Commit 0d90d638 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-f2fs-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
 "In this round, a couple of sysfs entries were introduced to tune the
  f2fs at runtime.

  In addition, f2fs starts to support inline_data and improves the
  read/write performance in some workloads by refactoring bio-related
  flows.

  This patch-set includes the following major enhancement patches.
   - support inline_data
   - refactor bio operations such as merge operations and rw type
     assignment
   - enhance the direct IO path
   - enhance bio operations
   - truncate a node page when it becomes obsolete
   - add sysfs entries: small_discards, max_victim_search, and
     in-place-update
   - add a sysfs entry to control max_victim_search

  The other bug fixes are as follows.
   - fix a bug in truncate_partial_nodes
   - avoid warnings during sparse and build process
   - fix error handling flows
   - fix potential bit overflows

  And, there are a bunch of cleanups"

* tag 'for-f2fs-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (95 commits)
  f2fs: drop obsolete node page when it is truncated
  f2fs: introduce NODE_MAPPING for code consistency
  f2fs: remove the orphan block page array
  f2fs: add help function META_MAPPING
  f2fs: move a branch for code redability
  f2fs: call mark_inode_dirty to flush dirty pages
  f2fs: clean checkpatch warnings
  f2fs: missing REQ_META and REQ_PRIO when sync_meta_pages(META_FLUSH)
  f2fs: avoid f2fs_balance_fs call during pageout
  f2fs: add delimiter to seperate name and value in debug phrase
  f2fs: use spinlock rather than mutex for better speed
  f2fs: move alloc new orphan node out of lock protection region
  f2fs: move grabing orphan pages out of protection region
  f2fs: remove the needless parameter of f2fs_wait_on_page_writeback
  f2fs: update documents and a MAINTAINERS entry
  f2fs: add a sysfs entry to control max_victim_search
  f2fs: improve write performance under frequent fsync calls
  f2fs: avoid to read inline data except first page
  f2fs: avoid to left uninitialized data in page when read inline data
  f2fs: fix truncate_partial_nodes bug
  ...
parents 1d32bdaf bf39c00a
......@@ -24,3 +24,34 @@ Date: July 2013
Contact: "Namjae Jeon" <namjae.jeon@samsung.com>
Description:
Controls the victim selection policy for garbage collection.
What: /sys/fs/f2fs/<disk>/reclaim_segments
Date: October 2013
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
Description:
Controls the issue rate of segment discard commands.
What: /sys/fs/f2fs/<disk>/ipu_policy
Date: November 2013
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
Description:
Controls the in-place-update policy.
What: /sys/fs/f2fs/<disk>/min_ipu_util
Date: November 2013
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
Description:
Controls the FS utilization condition for the in-place-update
policies.
What: /sys/fs/f2fs/<disk>/max_small_discards
Date: November 2013
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
Description:
Controls the issue rate of small discard commands.
What: /sys/fs/f2fs/<disk>/max_victim_search
Date: January 2014
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
Description:
Controls the number of trials to find a victim segment.
......@@ -120,6 +120,8 @@ active_logs=%u Support configuring the number of active logs. In the
disable_ext_identify Disable the extension list configured by mkfs, so f2fs
does not aware of cold files such as media files.
inline_xattr Enable the inline xattrs feature.
inline_data Enable the inline data feature: New created small(<~3.4k)
files can be written into inode block.
================================================================================
DEBUGFS ENTRIES
......@@ -171,6 +173,28 @@ Files in /sys/fs/f2fs/<devname>
conduct checkpoint to reclaim the prefree segments
to free segments. By default, 100 segments, 200MB.
max_small_discards This parameter controls the number of discard
commands that consist small blocks less than 2MB.
The candidates to be discarded are cached until
checkpoint is triggered, and issued during the
checkpoint. By default, it is disabled with 0.
ipu_policy This parameter controls the policy of in-place
updates in f2fs. There are five policies:
0: F2FS_IPU_FORCE, 1: F2FS_IPU_SSR,
2: F2FS_IPU_UTIL, 3: F2FS_IPU_SSR_UTIL,
4: F2FS_IPU_DISABLE.
min_ipu_util This parameter controls the threshold to trigger
in-place-updates. The number indicates percentage
of the filesystem utilization, and used by
F2FS_IPU_UTIL and F2FS_IPU_SSR_UTIL policies.
max_victim_search This parameter controls the number of trials to
find a victim segment when conducting SSR and
cleaning operations. The default value is 4096
which covers 8GB block address range.
================================================================================
USAGE
================================================================================
......
......@@ -3634,6 +3634,7 @@ W: http://en.wikipedia.org/wiki/F2FS
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
S: Maintained
F: Documentation/filesystems/f2fs.txt
F: Documentation/ABI/testing/sysfs-fs-f2fs
F: fs/f2fs/
F: include/linux/f2fs_fs.h
......
obj-$(CONFIG_F2FS_FS) += f2fs.o
f2fs-y := dir.o file.o inode.o namei.o hash.o super.o
f2fs-y := dir.o file.o inode.o namei.o hash.o super.o inline.o
f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o
f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
......
......@@ -30,7 +30,7 @@ static struct kmem_cache *inode_entry_slab;
*/
struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
struct address_space *mapping = sbi->meta_inode->i_mapping;
struct address_space *mapping = META_MAPPING(sbi);
struct page *page = NULL;
repeat:
page = grab_cache_page(mapping, index);
......@@ -50,7 +50,7 @@ struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
*/
struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
struct address_space *mapping = sbi->meta_inode->i_mapping;
struct address_space *mapping = META_MAPPING(sbi);
struct page *page;
repeat:
page = grab_cache_page(mapping, index);
......@@ -61,11 +61,12 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
if (PageUptodate(page))
goto out;
if (f2fs_readpage(sbi, page, index, READ_SYNC))
if (f2fs_submit_page_bio(sbi, page, index,
READ_SYNC | REQ_META | REQ_PRIO))
goto repeat;
lock_page(page);
if (page->mapping != mapping) {
if (unlikely(page->mapping != mapping)) {
f2fs_put_page(page, 1);
goto repeat;
}
......@@ -81,13 +82,12 @@ static int f2fs_write_meta_page(struct page *page,
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
/* Should not write any meta pages, if any IO error was occurred */
if (wbc->for_reclaim || sbi->por_doing ||
is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) {
dec_page_count(sbi, F2FS_DIRTY_META);
wbc->pages_skipped++;
set_page_dirty(page);
return AOP_WRITEPAGE_ACTIVATE;
}
if (unlikely(sbi->por_doing ||
is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
goto redirty_out;
if (wbc->for_reclaim)
goto redirty_out;
wait_on_page_writeback(page);
......@@ -95,24 +95,31 @@ static int f2fs_write_meta_page(struct page *page,
dec_page_count(sbi, F2FS_DIRTY_META);
unlock_page(page);
return 0;
redirty_out:
dec_page_count(sbi, F2FS_DIRTY_META);
wbc->pages_skipped++;
set_page_dirty(page);
return AOP_WRITEPAGE_ACTIVATE;
}
static int f2fs_write_meta_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
struct block_device *bdev = sbi->sb->s_bdev;
int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
long written;
if (wbc->for_kupdate)
return 0;
if (get_pages(sbi, F2FS_DIRTY_META) == 0)
/* collect a number of dirty meta pages and write together */
if (get_pages(sbi, F2FS_DIRTY_META) < nrpages)
return 0;
/* if mounting is failed, skip writing node pages */
mutex_lock(&sbi->cp_mutex);
written = sync_meta_pages(sbi, META, bio_get_nr_vecs(bdev));
written = sync_meta_pages(sbi, META, nrpages);
mutex_unlock(&sbi->cp_mutex);
wbc->nr_to_write -= written;
return 0;
......@@ -121,7 +128,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
long nr_to_write)
{
struct address_space *mapping = sbi->meta_inode->i_mapping;
struct address_space *mapping = META_MAPPING(sbi);
pgoff_t index = 0, end = LONG_MAX;
struct pagevec pvec;
long nwritten = 0;
......@@ -136,7 +143,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
PAGECACHE_TAG_DIRTY,
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
if (nr_pages == 0)
if (unlikely(nr_pages == 0))
break;
for (i = 0; i < nr_pages; i++) {
......@@ -149,7 +156,8 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
unlock_page(page);
break;
}
if (nwritten++ >= nr_to_write)
nwritten++;
if (unlikely(nwritten >= nr_to_write))
break;
}
pagevec_release(&pvec);
......@@ -157,7 +165,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
}
if (nwritten)
f2fs_submit_bio(sbi, type, nr_to_write == LONG_MAX);
f2fs_submit_merged_bio(sbi, type, WRITE);
return nwritten;
}
......@@ -186,31 +194,24 @@ const struct address_space_operations f2fs_meta_aops = {
int acquire_orphan_inode(struct f2fs_sb_info *sbi)
{
unsigned int max_orphans;
int err = 0;
/*
* considering 512 blocks in a segment 5 blocks are needed for cp
* and log segment summaries. Remaining blocks are used to keep
* orphan entries with the limitation one reserved segment
* for cp pack we can have max 1020*507 orphan entries
*/
max_orphans = (sbi->blocks_per_seg - 5) * F2FS_ORPHANS_PER_BLOCK;
mutex_lock(&sbi->orphan_inode_mutex);
if (sbi->n_orphans >= max_orphans)
spin_lock(&sbi->orphan_inode_lock);
if (unlikely(sbi->n_orphans >= sbi->max_orphans))
err = -ENOSPC;
else
sbi->n_orphans++;
mutex_unlock(&sbi->orphan_inode_mutex);
spin_unlock(&sbi->orphan_inode_lock);
return err;
}
void release_orphan_inode(struct f2fs_sb_info *sbi)
{
mutex_lock(&sbi->orphan_inode_mutex);
spin_lock(&sbi->orphan_inode_lock);
f2fs_bug_on(sbi->n_orphans == 0);
sbi->n_orphans--;
mutex_unlock(&sbi->orphan_inode_mutex);
spin_unlock(&sbi->orphan_inode_lock);
}
void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
......@@ -218,27 +219,30 @@ void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
struct list_head *head, *this;
struct orphan_inode_entry *new = NULL, *orphan = NULL;
mutex_lock(&sbi->orphan_inode_mutex);
new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
new->ino = ino;
spin_lock(&sbi->orphan_inode_lock);
head = &sbi->orphan_inode_list;
list_for_each(this, head) {
orphan = list_entry(this, struct orphan_inode_entry, list);
if (orphan->ino == ino)
goto out;
if (orphan->ino == ino) {
spin_unlock(&sbi->orphan_inode_lock);
kmem_cache_free(orphan_entry_slab, new);
return;
}
if (orphan->ino > ino)
break;
orphan = NULL;
}
new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
new->ino = ino;
/* add new_oentry into list which is sorted by inode number */
if (orphan)
list_add(&new->list, this->prev);
else
list_add_tail(&new->list, head);
out:
mutex_unlock(&sbi->orphan_inode_mutex);
spin_unlock(&sbi->orphan_inode_lock);
}
void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
......@@ -246,7 +250,7 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
struct list_head *head;
struct orphan_inode_entry *orphan;
mutex_lock(&sbi->orphan_inode_mutex);
spin_lock(&sbi->orphan_inode_lock);
head = &sbi->orphan_inode_list;
list_for_each_entry(orphan, head, list) {
if (orphan->ino == ino) {
......@@ -257,7 +261,7 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
break;
}
}
mutex_unlock(&sbi->orphan_inode_mutex);
spin_unlock(&sbi->orphan_inode_lock);
}
static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
......@@ -270,12 +274,12 @@ static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
iput(inode);
}
int recover_orphan_inodes(struct f2fs_sb_info *sbi)
void recover_orphan_inodes(struct f2fs_sb_info *sbi)
{
block_t start_blk, orphan_blkaddr, i, j;
if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
return 0;
return;
sbi->por_doing = true;
start_blk = __start_cp_addr(sbi) + 1;
......@@ -295,29 +299,39 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
/* clear Orphan Flag */
clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
sbi->por_doing = false;
return 0;
return;
}
static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
{
struct list_head *head, *this, *next;
struct list_head *head;
struct f2fs_orphan_block *orphan_blk = NULL;
struct page *page = NULL;
unsigned int nentries = 0;
unsigned short index = 1;
unsigned short orphan_blocks;
orphan_blocks = (unsigned short)((sbi->n_orphans +
unsigned short index;
unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans +
(F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
struct page *page = NULL;
struct orphan_inode_entry *orphan = NULL;
for (index = 0; index < orphan_blocks; index++)
grab_meta_page(sbi, start_blk + index);
mutex_lock(&sbi->orphan_inode_mutex);
index = 1;
spin_lock(&sbi->orphan_inode_lock);
head = &sbi->orphan_inode_list;
/* loop for each orphan inode entry and write them in Jornal block */
list_for_each_safe(this, next, head) {
struct orphan_inode_entry *orphan;
list_for_each_entry(orphan, head, list) {
if (!page) {
page = find_get_page(META_MAPPING(sbi), start_blk++);
f2fs_bug_on(!page);
orphan_blk =
(struct f2fs_orphan_block *)page_address(page);
memset(orphan_blk, 0, sizeof(*orphan_blk));
f2fs_put_page(page, 0);
}
orphan = list_entry(this, struct orphan_inode_entry, list);
orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
if (nentries == F2FS_ORPHANS_PER_BLOCK) {
/*
......@@ -331,29 +345,20 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
set_page_dirty(page);
f2fs_put_page(page, 1);
index++;
start_blk++;
nentries = 0;
page = NULL;
}
if (page)
goto page_exist;
}
page = grab_meta_page(sbi, start_blk);
orphan_blk = (struct f2fs_orphan_block *)page_address(page);
memset(orphan_blk, 0, sizeof(*orphan_blk));
page_exist:
orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
if (page) {
orphan_blk->blk_addr = cpu_to_le16(index);
orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
orphan_blk->entry_count = cpu_to_le32(nentries);
set_page_dirty(page);
f2fs_put_page(page, 1);
}
if (!page)
goto end;
orphan_blk->blk_addr = cpu_to_le16(index);
orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
orphan_blk->entry_count = cpu_to_le32(nentries);
set_page_dirty(page);
f2fs_put_page(page, 1);
end:
mutex_unlock(&sbi->orphan_inode_mutex);
spin_unlock(&sbi->orphan_inode_lock);
}
static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
......@@ -428,7 +433,8 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);
/* The second checkpoint pack should start at the next segment */
cp_start_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
cp_start_blk_no += ((unsigned long long)1) <<
le32_to_cpu(fsb->log_blocks_per_seg);
cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);
if (cp1 && cp2) {
......@@ -465,7 +471,7 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
list_for_each(this, head) {
struct dir_inode_entry *entry;
entry = list_entry(this, struct dir_inode_entry, list);
if (entry->inode == inode)
if (unlikely(entry->inode == inode))
return -EEXIST;
}
list_add_tail(&new->list, head);
......@@ -513,8 +519,8 @@ void add_dirty_dir_inode(struct inode *inode)
void remove_dirty_dir_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct list_head *head = &sbi->dir_inode_list;
struct list_head *this;
struct list_head *this, *head;
if (!S_ISDIR(inode->i_mode))
return;
......@@ -525,6 +531,7 @@ void remove_dirty_dir_inode(struct inode *inode)
return;
}
head = &sbi->dir_inode_list;
list_for_each(this, head) {
struct dir_inode_entry *entry;
entry = list_entry(this, struct dir_inode_entry, list);
......@@ -546,11 +553,13 @@ void remove_dirty_dir_inode(struct inode *inode)
struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
struct list_head *head = &sbi->dir_inode_list;
struct list_head *this;
struct list_head *this, *head;
struct inode *inode = NULL;
spin_lock(&sbi->dir_inode_lock);
head = &sbi->dir_inode_list;
list_for_each(this, head) {
struct dir_inode_entry *entry;
entry = list_entry(this, struct dir_inode_entry, list);
......@@ -565,11 +574,13 @@ struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino)
void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
{
struct list_head *head = &sbi->dir_inode_list;
struct list_head *head;
struct dir_inode_entry *entry;
struct inode *inode;
retry:
spin_lock(&sbi->dir_inode_lock);
head = &sbi->dir_inode_list;
if (list_empty(head)) {
spin_unlock(&sbi->dir_inode_lock);
return;
......@@ -585,7 +596,7 @@ void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
* We should submit bio, since it exists several
* wribacking dentry pages in the freeing inode.
*/
f2fs_submit_bio(sbi, DATA, true);
f2fs_submit_merged_bio(sbi, DATA, WRITE);
}
goto retry;
}
......@@ -760,8 +771,8 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
/* wait for previous submitted node/meta pages writeback */
wait_on_all_pages_writeback(sbi);
filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX);
filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX);
filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
/* update user_block_counts */
sbi->last_valid_block_count = sbi->total_valid_block_count;
......@@ -770,7 +781,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
/* Here, we only have one bio having CP pack */
sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
clear_prefree_segments(sbi);
F2FS_RESET_SB_DIRT(sbi);
}
......@@ -791,9 +802,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops");
f2fs_submit_bio(sbi, DATA, true);
f2fs_submit_bio(sbi, NODE, true);
f2fs_submit_bio(sbi, META, true);
f2fs_submit_merged_bio(sbi, DATA, WRITE);
f2fs_submit_merged_bio(sbi, NODE, WRITE);
f2fs_submit_merged_bio(sbi, META, WRITE);
/*
* update checkpoint pack index
......@@ -818,20 +829,28 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
void init_orphan_info(struct f2fs_sb_info *sbi)
{
mutex_init(&sbi->orphan_inode_mutex);
spin_lock_init(&sbi->orphan_inode_lock);
INIT_LIST_HEAD(&sbi->orphan_inode_list);
sbi->n_orphans = 0;
/*
* considering 512 blocks in a segment 8 blocks are needed for cp
* and log segment summaries. Remaining blocks are used to keep
* orphan entries with the limitation one reserved segment
* for cp pack we can have max 1020*504 orphan entries
*/
sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE)
* F2FS_ORPHANS_PER_BLOCK;
}
int __init create_checkpoint_caches(void)
{
orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
sizeof(struct orphan_inode_entry), NULL);
if (unlikely(!orphan_entry_slab))
if (!orphan_entry_slab)
return -ENOMEM;
inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
sizeof(struct dir_inode_entry), NULL);
if (unlikely(!inode_entry_slab)) {
if (!inode_entry_slab) {
kmem_cache_destroy(orphan_entry_slab);
return -ENOMEM;
}
......
......@@ -24,6 +24,195 @@
#include "segment.h"
#include <trace/events/f2fs.h>
static void f2fs_read_end_io(struct bio *bio, int err)
{
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
do {
struct page *page = bvec->bv_page;
if (--bvec >= bio->bi_io_vec)
prefetchw(&bvec->bv_page->flags);
if (unlikely(!uptodate)) {
ClearPageUptodate(page);
SetPageError(page);
} else {
SetPageUptodate(page);
}
unlock_page(page);
} while (bvec >= bio->bi_io_vec);
bio_put(bio);
}
static void f2fs_write_end_io(struct bio *bio, int err)
{
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
struct f2fs_sb_info *sbi = F2FS_SB(bvec->bv_page->mapping->host->i_sb);
do {
struct page *page = bvec->bv_page;
if (--bvec >= bio->bi_io_vec)
prefetchw(&bvec->bv_page->flags);
if (unlikely(!uptodate)) {
SetPageError(page);
set_bit(AS_EIO, &page->mapping->flags);
set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
sbi->sb->s_flags |= MS_RDONLY;
}
end_page_writeback(page);
dec_page_count(sbi, F2FS_WRITEBACK);
} while (bvec >= bio->bi_io_vec);
if (bio->bi_private)
complete(bio->bi_private);
if (!get_pages(sbi, F2FS_WRITEBACK) &&
!list_empty(&sbi->cp_wait.task_list))
wake_up(&sbi->cp_wait);
bio_put(bio);
}
/*
* Low-level block read/write IO operations.
*/
static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
int npages, bool is_read)
{
struct bio *bio;
/* No failure on bio allocation */
bio = bio_alloc(GFP_NOIO, npages);
bio->bi_bdev = sbi->sb->s_bdev;
bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
return bio;
}
static void __submit_merged_bio(struct f2fs_bio_info *io)
{
struct f2fs_io_info *fio = &io->fio;
int rw;
if (!io->bio)
return;
rw = fio->rw;
if (is_read_io(rw)) {
trace_f2fs_submit_read_bio(io->sbi->sb, rw,
fio->type, io->bio);
submit_bio(rw, io->bio);
} else {
trace_f2fs_submit_write_bio(io->sbi->sb, rw,
fio->type, io->bio);
/*
* META_FLUSH is only from the checkpoint procedure, and we
* should wait this metadata bio for FS consistency.
*/
if (fio->type == META_FLUSH) {
DECLARE_COMPLETION_ONSTACK(wait);
io->bio->bi_private = &wait;
submit_bio(rw, io->bio);
wait_for_completion(&wait);
} else {
submit_bio(rw, io->bio);
}
}
io->bio = NULL;
}
void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
enum page_type type, int rw)
{
enum page_type btype = PAGE_TYPE_OF_BIO(type);
struct f2fs_bio_info *io;
io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
mutex_lock(&io->io_mutex);
/* change META to META_FLUSH in the checkpoint procedure */
if (type >= META_FLUSH) {
io->fio.type = META_FLUSH;
io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
}
__submit_merged_bio(io);
mutex_unlock(&io->io_mutex);
}
/*
* Fill the locked page with data located in the block address.
* Return unlocked page.
*/
int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page,
block_t blk_addr, int rw)
{
struct bio *bio;
trace_f2fs_submit_page_bio(page, blk_addr, rw);
/* Allocate a new bio */
bio = __bio_alloc(sbi, blk_addr, 1, is_read_io(rw));
if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
bio_put(bio);
f2fs_put_page(page, 1);
return -EFAULT;
}
submit_bio(rw, bio);
return 0;
}
void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
block_t blk_addr, struct f2fs_io_info *fio)
{
enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
struct f2fs_bio_info *io;
bool is_read = is_read_io(fio->rw);
io = is_read ? &sbi->read_io : &sbi->write_io[btype];
verify_block_addr(sbi, blk_addr);
mutex_lock(&io->io_mutex);
if (!is_read)
inc_page_count(sbi, F2FS_WRITEBACK);
if (io->bio && (io->last_block_in_bio != blk_addr - 1 ||
io->fio.rw != fio->rw))
__submit_merged_bio(io);
alloc_new:
if (io->bio == NULL) {
int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read);
io->fio = *fio;
}
if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) <
PAGE_CACHE_SIZE) {
__submit_merged_bio(io);
goto alloc_new;
}
io->last_block_in_bio = blk_addr;
mutex_unlock(&io->io_mutex);
trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr);
}
/*
* Lock ordering for the change of data block address:
* ->data_page
......@@ -37,7 +226,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)
struct page *node_page = dn->node_page;
unsigned int ofs_in_node = dn->ofs_in_node;
f2fs_wait_on_page_writeback(node_page, NODE, false);
f2fs_wait_on_page_writeback(node_page, NODE);
rn = F2FS_NODE(node_page);
......@@ -51,19 +240,39 @@ int reserve_new_block(struct dnode_of_data *dn)
{
struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))
if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
return -EPERM;
if (!inc_valid_block_count(sbi, dn->inode, 1))
if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
return -ENOSPC;
trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);
__set_data_blkaddr(dn, NEW_ADDR);
dn->data_blkaddr = NEW_ADDR;
mark_inode_dirty(dn->inode);
sync_inode_page(dn);
return 0;
}
int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
{
bool need_put = dn->inode_page ? false : true;
int err;
/* if inode_page exists, index should be zero */
f2fs_bug_on(!need_put && index);
err = get_dnode_of_data(dn, index, ALLOC_NODE);
if (err)
return err;
if (dn->data_blkaddr == NULL_ADDR)
err = reserve_new_block(dn);
if (err || need_put)
f2fs_put_dnode(dn);
return err;
}
static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
struct buffer_head *bh_result)
{
......@@ -71,6 +280,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
pgoff_t start_fofs, end_fofs;
block_t start_blkaddr;
if (is_inode_flag_set(fi, FI_NO_EXTENT))
return 0;
read_lock(&fi->ext.ext_lock);
if (fi->ext.len == 0) {
read_unlock(&fi->ext.ext_lock);
......@@ -109,6 +321,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
struct f2fs_inode_info *fi = F2FS_I(dn->inode);
pgoff_t fofs, start_fofs, end_fofs;
block_t start_blkaddr, end_blkaddr;
int need_update = true;
f2fs_bug_on(blk_addr == NEW_ADDR);
fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
......@@ -117,6 +330,9 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
/* Update the page address in the parent node */
__set_data_blkaddr(dn, blk_addr);
if (is_inode_flag_set(fi, FI_NO_EXTENT))
return;
write_lock(&fi->ext.ext_lock);
start_fofs = fi->ext.fofs;
......@@ -163,14 +379,21 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
fofs - start_fofs + 1;
fi->ext.len -= fofs - start_fofs + 1;
}
goto end_update;
} else {
need_update = false;
}
write_unlock(&fi->ext.ext_lock);
return;
/* Finally, if the extent is very fragmented, let's drop the cache. */
if (fi->ext.len < F2FS_MIN_EXTENT_LEN) {
fi->ext.len = 0;
set_inode_flag(fi, FI_NO_EXTENT);
need_update = true;
}
end_update:
write_unlock(&fi->ext.ext_lock);
sync_inode_page(dn);
if (need_update)
sync_inode_page(dn);
return;
}
struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
......@@ -196,7 +419,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
return ERR_PTR(-ENOENT);
/* By fallocate(), there is no cached page, but with NEW_ADDR */
if (dn.data_blkaddr == NEW_ADDR)
if (unlikely(dn.data_blkaddr == NEW_ADDR))
return ERR_PTR(-EINVAL);
page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
......@@ -208,11 +431,14 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
return page;
}
err = f2fs_readpage(sbi, page, dn.data_blkaddr,
err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
sync ? READ_SYNC : READA);
if (err)
return ERR_PTR(err);
if (sync) {
wait_on_page_locked(page);
if (!PageUptodate(page)) {
if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 0);
return ERR_PTR(-EIO);
}
......@@ -246,7 +472,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
}
f2fs_put_dnode(&dn);
if (dn.data_blkaddr == NULL_ADDR) {
if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
f2fs_put_page(page, 1);
return ERR_PTR(-ENOENT);
}
......@@ -266,16 +492,16 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
return page;
}
err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, READ_SYNC);
if (err)
return ERR_PTR(err);
lock_page(page);
if (!PageUptodate(page)) {
if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
return ERR_PTR(-EIO);
}
if (page->mapping != mapping) {
if (unlikely(page->mapping != mapping)) {
f2fs_put_page(page, 1);
goto repeat;
}
......@@ -286,12 +512,12 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
* Caller ensures that this data page is never allocated.
* A new zero-filled data page is allocated in the page cache.
*
* Also, caller should grab and release a mutex by calling mutex_lock_op() and
* mutex_unlock_op().
* Note that, npage is set only by make_empty_dir.
* Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
* f2fs_unlock_op().
* Note that, ipage is set only by make_empty_dir.
*/
struct page *get_new_data_page(struct inode *inode,
struct page *npage, pgoff_t index, bool new_i_size)
struct page *ipage, pgoff_t index, bool new_i_size)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct address_space *mapping = inode->i_mapping;
......@@ -299,24 +525,16 @@ struct page *get_new_data_page(struct inode *inode,
struct dnode_of_data dn;
int err;
set_new_dnode(&dn, inode, npage, npage, 0);
err = get_dnode_of_data(&dn, index, ALLOC_NODE);
set_new_dnode(&dn, inode, ipage, NULL, 0);
err = f2fs_reserve_block(&dn, index);
if (err)
return ERR_PTR(err);
if (dn.data_blkaddr == NULL_ADDR) {
if (reserve_new_block(&dn)) {
if (!npage)
f2fs_put_dnode(&dn);
return ERR_PTR(-ENOSPC);
}
}
if (!npage)
f2fs_put_dnode(&dn);
repeat:
page = grab_cache_page(mapping, index);
if (!page)
return ERR_PTR(-ENOMEM);
if (!page) {
err = -ENOMEM;
goto put_err;
}
if (PageUptodate(page))
return page;
......@@ -325,15 +543,18 @@ struct page *get_new_data_page(struct inode *inode,
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
SetPageUptodate(page);
} else {
err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
READ_SYNC);
if (err)
return ERR_PTR(err);
goto put_err;
lock_page(page);
if (!PageUptodate(page)) {
if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
return ERR_PTR(-EIO);
err = -EIO;
goto put_err;
}
if (page->mapping != mapping) {
if (unlikely(page->mapping != mapping)) {
f2fs_put_page(page, 1);
goto repeat;
}
......@@ -344,140 +565,187 @@ struct page *get_new_data_page(struct inode *inode,
i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT));
/* Only the directory inode sets new_i_size */
set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
mark_inode_dirty_sync(inode);
}
return page;
}
static void read_end_io(struct bio *bio, int err)
{
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
do {
struct page *page = bvec->bv_page;
if (--bvec >= bio->bi_io_vec)
prefetchw(&bvec->bv_page->flags);
if (uptodate) {
SetPageUptodate(page);
} else {
ClearPageUptodate(page);
SetPageError(page);
}
unlock_page(page);
} while (bvec >= bio->bi_io_vec);
bio_put(bio);
put_err:
f2fs_put_dnode(&dn);
return ERR_PTR(err);
}
/*
* Fill the locked page with data located in the block address.
* Return unlocked page.
*/
int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page,
block_t blk_addr, int type)
static int __allocate_data_block(struct dnode_of_data *dn)
{
struct block_device *bdev = sbi->sb->s_bdev;
struct bio *bio;
struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
struct f2fs_summary sum;
block_t new_blkaddr;
struct node_info ni;
int type;
trace_f2fs_readpage(page, blk_addr, type);
if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
return -EPERM;
if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
return -ENOSPC;
down_read(&sbi->bio_sem);
__set_data_blkaddr(dn, NEW_ADDR);
dn->data_blkaddr = NEW_ADDR;
/* Allocate a new bio */
bio = f2fs_bio_alloc(bdev, 1);
get_node_info(sbi, dn->nid, &ni);
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
/* Initialize the bio */
bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
bio->bi_end_io = read_end_io;
type = CURSEG_WARM_DATA;
if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
bio_put(bio);
up_read(&sbi->bio_sem);
f2fs_put_page(page, 1);
return -EFAULT;
}
allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type);
submit_bio(type, bio);
up_read(&sbi->bio_sem);
/* direct IO doesn't use extent cache to maximize the performance */
set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
update_extent_cache(new_blkaddr, dn);
clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
dn->data_blkaddr = new_blkaddr;
return 0;
}
/*
* This function should be used by the data read flow only where it
* does not check the "create" flag that indicates block allocation.
* The reason for this special functionality is to exploit VFS readahead
* mechanism.
* get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh.
* If original data blocks are allocated, then give them to blockdev.
* Otherwise,
* a. preallocate requested block addresses
* b. do not use extent cache for better performance
* c. give the block addresses to blockdev
*/
static int get_data_block_ro(struct inode *inode, sector_t iblock,
static int get_data_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
unsigned int blkbits = inode->i_sb->s_blocksize_bits;
unsigned maxblocks = bh_result->b_size >> blkbits;
struct dnode_of_data dn;
pgoff_t pgofs;
int err;
int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
pgoff_t pgofs, end_offset;
int err = 0, ofs = 1;
bool allocated = false;
/* Get the page offset from the block offset(iblock) */
pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));
if (check_extent_cache(inode, pgofs, bh_result)) {
trace_f2fs_get_data_block(inode, iblock, bh_result, 0);
return 0;
}
if (check_extent_cache(inode, pgofs, bh_result))
goto out;
if (create)
f2fs_lock_op(sbi);
/* When reading holes, we need its node page */
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA);
err = get_dnode_of_data(&dn, pgofs, mode);
if (err) {
trace_f2fs_get_data_block(inode, iblock, bh_result, err);
return (err == -ENOENT) ? 0 : err;
if (err == -ENOENT)
err = 0;
goto unlock_out;
}
if (dn.data_blkaddr == NEW_ADDR)
goto put_out;
if (dn.data_blkaddr != NULL_ADDR) {
map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
} else if (create) {
err = __allocate_data_block(&dn);
if (err)
goto put_out;
allocated = true;
map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
} else {
goto put_out;
}
/* It does not support data allocation */
f2fs_bug_on(create);
end_offset = IS_INODE(dn.node_page) ?
ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
bh_result->b_size = (((size_t)1) << blkbits);
dn.ofs_in_node++;
pgofs++;
get_next:
if (dn.ofs_in_node >= end_offset) {
if (allocated)
sync_inode_page(&dn);
allocated = false;
f2fs_put_dnode(&dn);
if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) {
int i;
unsigned int end_offset;
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, pgofs, mode);
if (err) {
if (err == -ENOENT)
err = 0;
goto unlock_out;
}
if (dn.data_blkaddr == NEW_ADDR)
goto put_out;
end_offset = IS_INODE(dn.node_page) ?
ADDRS_PER_INODE(F2FS_I(inode)) :
ADDRS_PER_BLOCK;
clear_buffer_new(bh_result);
ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
}
if (maxblocks > (bh_result->b_size >> blkbits)) {
block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
if (blkaddr == NULL_ADDR && create) {
err = __allocate_data_block(&dn);
if (err)
goto sync_out;
allocated = true;
blkaddr = dn.data_blkaddr;
}
/* Give more consecutive addresses for the read ahead */
for (i = 0; i < end_offset - dn.ofs_in_node; i++)
if (((datablock_addr(dn.node_page,
dn.ofs_in_node + i))
!= (dn.data_blkaddr + i)) || maxblocks == i)
break;
map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
bh_result->b_size = (i << blkbits);
if (blkaddr == (bh_result->b_blocknr + ofs)) {
ofs++;
dn.ofs_in_node++;
pgofs++;
bh_result->b_size += (((size_t)1) << blkbits);
goto get_next;
}
}
sync_out:
if (allocated)
sync_inode_page(&dn);
put_out:
f2fs_put_dnode(&dn);
trace_f2fs_get_data_block(inode, iblock, bh_result, 0);
return 0;
unlock_out:
if (create)
f2fs_unlock_op(sbi);
out:
trace_f2fs_get_data_block(inode, iblock, bh_result, err);
return err;
}
static int f2fs_read_data_page(struct file *file, struct page *page)
{
return mpage_readpage(page, get_data_block_ro);
struct inode *inode = page->mapping->host;
int ret;
/* If the file has inline data, try to read it directlly */
if (f2fs_has_inline_data(inode))
ret = f2fs_read_inline_data(inode, page);
else
ret = mpage_readpage(page, get_data_block);
return ret;
}
static int f2fs_read_data_pages(struct file *file,
struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro);
struct inode *inode = file->f_mapping->host;
/* If the file has inline data, skip readpages */
if (f2fs_has_inline_data(inode))
return 0;
return mpage_readpages(mapping, pages, nr_pages, get_data_block);
}
int do_write_data_page(struct page *page)
int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
{
struct inode *inode = page->mapping->host;
block_t old_blk_addr, new_blk_addr;
block_t old_blkaddr, new_blkaddr;
struct dnode_of_data dn;
int err = 0;
......@@ -486,10 +754,10 @@ int do_write_data_page(struct page *page)
if (err)
return err;
old_blk_addr = dn.data_blkaddr;
old_blkaddr = dn.data_blkaddr;
/* This page is already truncated */
if (old_blk_addr == NULL_ADDR)
if (old_blkaddr == NULL_ADDR)
goto out_writepage;
set_page_writeback(page);
......@@ -498,15 +766,13 @@ int do_write_data_page(struct page *page)
* If current allocation needs SSR,
* it had better in-place writes for updated data.
*/
if (unlikely(old_blk_addr != NEW_ADDR &&
if (unlikely(old_blkaddr != NEW_ADDR &&
!is_cold_data(page) &&
need_inplace_update(inode))) {
rewrite_data_page(F2FS_SB(inode->i_sb), page,
old_blk_addr);
rewrite_data_page(page, old_blkaddr, fio);
} else {
write_data_page(inode, page, &dn,
old_blk_addr, &new_blk_addr);
update_extent_cache(new_blk_addr, &dn);
write_data_page(page, &dn, &new_blkaddr, fio);
update_extent_cache(new_blkaddr, &dn);
}
out_writepage:
f2fs_put_dnode(&dn);
......@@ -521,9 +787,13 @@ static int f2fs_write_data_page(struct page *page,
loff_t i_size = i_size_read(inode);
const pgoff_t end_index = ((unsigned long long) i_size)
>> PAGE_CACHE_SHIFT;
unsigned offset;
unsigned offset = 0;
bool need_balance_fs = false;
int err = 0;
struct f2fs_io_info fio = {
.type = DATA,
.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
};
if (page->index < end_index)
goto write;
......@@ -543,7 +813,7 @@ static int f2fs_write_data_page(struct page *page,
zero_user_segment(page, offset, PAGE_CACHE_SIZE);
write:
if (sbi->por_doing) {
if (unlikely(sbi->por_doing)) {
err = AOP_WRITEPAGE_ACTIVATE;
goto redirty_out;
}
......@@ -552,10 +822,18 @@ static int f2fs_write_data_page(struct page *page,
if (S_ISDIR(inode->i_mode)) {
dec_page_count(sbi, F2FS_DIRTY_DENTS);
inode_dec_dirty_dents(inode);
err = do_write_data_page(page);
err = do_write_data_page(page, &fio);
} else {
f2fs_lock_op(sbi);
err = do_write_data_page(page);
if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode)) {
err = f2fs_write_inline_data(inode, page, offset);
f2fs_unlock_op(sbi);
goto out;
} else {
err = do_write_data_page(page, &fio);
}
f2fs_unlock_op(sbi);
need_balance_fs = true;
}
......@@ -564,8 +842,10 @@ static int f2fs_write_data_page(struct page *page,
else if (err)
goto redirty_out;
if (wbc->for_reclaim)
f2fs_submit_bio(sbi, DATA, true);
if (wbc->for_reclaim) {
f2fs_submit_merged_bio(sbi, DATA, WRITE);
need_balance_fs = false;
}
clear_cold_data(page);
out:
......@@ -617,7 +897,8 @@ static int f2fs_write_data_pages(struct address_space *mapping,
ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
if (locked)
mutex_unlock(&sbi->writepages);
f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL));
f2fs_submit_merged_bio(sbi, DATA, WRITE);
remove_dirty_dir_inode(inode);
......@@ -638,27 +919,28 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
f2fs_balance_fs(sbi);
repeat:
err = f2fs_convert_inline_data(inode, pos + len);
if (err)
return err;
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
*pagep = page;
f2fs_lock_op(sbi);
if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA)
goto inline_data;
f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, index, ALLOC_NODE);
if (err)
goto err;
if (dn.data_blkaddr == NULL_ADDR)
err = reserve_new_block(&dn);
f2fs_put_dnode(&dn);
if (err)
goto err;
err = f2fs_reserve_block(&dn, index);
f2fs_unlock_op(sbi);
if (err) {
f2fs_put_page(page, 1);
return err;
}
inline_data:
if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
return 0;
......@@ -674,15 +956,19 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
if (dn.data_blkaddr == NEW_ADDR) {
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
} else {
err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
if (f2fs_has_inline_data(inode))
err = f2fs_read_inline_data(inode, page);
else
err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
READ_SYNC);
if (err)
return err;
lock_page(page);
if (!PageUptodate(page)) {
if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
return -EIO;
}
if (page->mapping != mapping) {
if (unlikely(page->mapping != mapping)) {
f2fs_put_page(page, 1);
goto repeat;
}
......@@ -691,11 +977,6 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
SetPageUptodate(page);
clear_cold_data(page);
return 0;
err:
f2fs_unlock_op(sbi);
f2fs_put_page(page, 1);
return err;
}
static int f2fs_write_end(struct file *file,
......@@ -714,23 +995,43 @@ static int f2fs_write_end(struct file *file,
update_inode_page(inode);
}
unlock_page(page);
page_cache_release(page);
f2fs_put_page(page, 1);
return copied;
}
static int check_direct_IO(struct inode *inode, int rw,
const struct iovec *iov, loff_t offset, unsigned long nr_segs)
{
unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
int i;
if (rw == READ)
return 0;
if (offset & blocksize_mask)
return -EINVAL;
for (i = 0; i < nr_segs; i++)
if (iov[i].iov_len & blocksize_mask)
return -EINVAL;
return 0;
}
static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
const struct iovec *iov, loff_t offset, unsigned long nr_segs)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
if (rw == WRITE)
/* Let buffer I/O handle the inline data case. */
if (f2fs_has_inline_data(inode))
return 0;
if (check_direct_IO(inode, rw, iov, offset, nr_segs))
return 0;
/* Needs synchronization with the cleaner */
return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
get_data_block_ro);
get_data_block);
}
static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
......@@ -759,6 +1060,8 @@ static int f2fs_set_data_page_dirty(struct page *page)
trace_f2fs_set_page_dirty(page, DATA);
SetPageUptodate(page);
mark_inode_dirty(inode);
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
set_dirty_dir_page(inode, page);
......@@ -769,7 +1072,7 @@ static int f2fs_set_data_page_dirty(struct page *page)
static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
{
return generic_block_bmap(mapping, block, get_data_block_ro);
return generic_block_bmap(mapping, block, get_data_block);
}
const struct address_space_operations f2fs_dblock_aops = {
......
......@@ -24,7 +24,7 @@
#include "gc.h"
static LIST_HEAD(f2fs_stat_list);
static struct dentry *debugfs_root;
static struct dentry *f2fs_debugfs_root;
static DEFINE_MUTEX(f2fs_stat_mutex);
static void update_general_status(struct f2fs_sb_info *sbi)
......@@ -45,14 +45,15 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->valid_count = valid_user_blocks(sbi);
si->valid_node_count = valid_node_count(sbi);
si->valid_inode_count = valid_inode_count(sbi);
si->inline_inode = sbi->inline_inode;
si->utilization = utilization(sbi);
si->free_segs = free_segments(sbi);
si->free_secs = free_sections(sbi);
si->prefree_count = prefree_segments(sbi);
si->dirty_count = dirty_segments(sbi);
si->node_pages = sbi->node_inode->i_mapping->nrpages;
si->meta_pages = sbi->meta_inode->i_mapping->nrpages;
si->node_pages = NODE_MAPPING(sbi)->nrpages;
si->meta_pages = META_MAPPING(sbi)->nrpages;
si->nats = NM_I(sbi)->nat_cnt;
si->sits = SIT_I(sbi)->dirty_sentries;
si->fnids = NM_I(sbi)->fcnt;
......@@ -165,9 +166,9 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
/* free nids */
si->cache_mem = NM_I(sbi)->fcnt;
si->cache_mem += NM_I(sbi)->nat_cnt;
npages = sbi->node_inode->i_mapping->nrpages;
npages = NODE_MAPPING(sbi)->nrpages;
si->cache_mem += npages << PAGE_CACHE_SHIFT;
npages = sbi->meta_inode->i_mapping->nrpages;
npages = META_MAPPING(sbi)->nrpages;
si->cache_mem += npages << PAGE_CACHE_SHIFT;
si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry);
si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry);
......@@ -200,6 +201,8 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, "Other: %u)\n - Data: %u\n",
si->valid_node_count - si->valid_inode_count,
si->valid_count - si->valid_node_count);
seq_printf(s, " - Inline_data Inode: %u\n",
si->inline_inode);
seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
si->main_area_segs, si->main_area_sections,
si->main_area_zones);
......@@ -242,14 +245,14 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - node blocks : %d\n", si->node_blks);
seq_printf(s, "\nExtent Hit Ratio: %d / %d\n",
si->hit_ext, si->total_ext);
seq_printf(s, "\nBalancing F2FS Async:\n");
seq_printf(s, " - nodes %4d in %4d\n",
seq_puts(s, "\nBalancing F2FS Async:\n");
seq_printf(s, " - nodes: %4d in %4d\n",
si->ndirty_node, si->node_pages);
seq_printf(s, " - dents %4d in dirs:%4d\n",
seq_printf(s, " - dents: %4d in dirs:%4d\n",
si->ndirty_dent, si->ndirty_dirs);
seq_printf(s, " - meta %4d in %4d\n",
seq_printf(s, " - meta: %4d in %4d\n",
si->ndirty_meta, si->meta_pages);
seq_printf(s, " - NATs %5d > %lu\n",
seq_printf(s, " - NATs: %5d > %lu\n",
si->nats, NM_WOUT_THRESHOLD);
seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n",
si->sits, si->fnids);
......@@ -340,14 +343,32 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
void __init f2fs_create_root_stats(void)
{
debugfs_root = debugfs_create_dir("f2fs", NULL);
if (debugfs_root)
debugfs_create_file("status", S_IRUGO, debugfs_root,
NULL, &stat_fops);
struct dentry *file;
f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
if (!f2fs_debugfs_root)
goto bail;
file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root,
NULL, &stat_fops);
if (!file)
goto free_debugfs_dir;
return;
free_debugfs_dir:
debugfs_remove(f2fs_debugfs_root);
bail:
f2fs_debugfs_root = NULL;
return;
}
void f2fs_destroy_root_stats(void)
{
debugfs_remove_recursive(debugfs_root);
debugfs_root = NULL;
if (!f2fs_debugfs_root)
return;
debugfs_remove_recursive(f2fs_debugfs_root);
f2fs_debugfs_root = NULL;
}
......@@ -190,9 +190,6 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
unsigned int max_depth;
unsigned int level;
if (namelen > F2FS_NAME_LEN)
return NULL;
if (npages == 0)
return NULL;
......@@ -259,20 +256,17 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
mark_inode_dirty(dir);
/* update parent inode number before releasing dentry page */
F2FS_I(inode)->i_pino = dir->i_ino;
f2fs_put_page(page, 1);
}
static void init_dent_inode(const struct qstr *name, struct page *ipage)
{
struct f2fs_node *rn;
struct f2fs_inode *ri;
/* copy name info. to this inode page */
rn = F2FS_NODE(ipage);
rn->i.i_namelen = cpu_to_le32(name->len);
memcpy(rn->i.i_name, name->name, name->len);
ri = F2FS_INODE(ipage);
ri->i_namelen = cpu_to_le32(name->len);
memcpy(ri->i_name, name->name, name->len);
set_page_dirty(ipage);
}
......@@ -348,11 +342,11 @@ static struct page *init_inode_metadata(struct inode *inode,
err = f2fs_init_acl(inode, dir, page);
if (err)
goto error;
goto put_error;
err = f2fs_init_security(inode, dir, name, page);
if (err)
goto error;
goto put_error;
wait_on_page_writeback(page);
} else {
......@@ -376,8 +370,9 @@ static struct page *init_inode_metadata(struct inode *inode,
}
return page;
error:
put_error:
f2fs_put_page(page, 1);
error:
remove_inode_page(inode);
return ERR_PTR(err);
}
......@@ -393,6 +388,8 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode,
clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
}
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
mark_inode_dirty(dir);
if (F2FS_I(dir)->i_current_depth != current_depth) {
F2FS_I(dir)->i_current_depth = current_depth;
set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
......@@ -400,8 +397,6 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode,
if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR))
update_inode_page(dir);
else
mark_inode_dirty(dir);
if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK))
clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
......@@ -432,10 +427,11 @@ static int room_for_filename(struct f2fs_dentry_block *dentry_blk, int slots)
}
/*
* Caller should grab and release a mutex by calling mutex_lock_op() and
* mutex_unlock_op().
* Caller should grab and release a rwsem by calling f2fs_lock_op() and
* f2fs_unlock_op().
*/
int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode)
int __f2fs_add_link(struct inode *dir, const struct qstr *name,
struct inode *inode)
{
unsigned int bit_pos;
unsigned int level;
......@@ -461,7 +457,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *in
}
start:
if (current_depth == MAX_DIR_HASH_DEPTH)
if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
return -ENOSPC;
/* Increase the depth, if required */
......@@ -554,14 +550,11 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
if (inode && S_ISDIR(inode->i_mode)) {
drop_nlink(dir);
update_inode_page(dir);
} else {
mark_inode_dirty(dir);
}
if (inode) {
if (S_ISDIR(inode->i_mode)) {
drop_nlink(dir);
update_inode_page(dir);
}
inode->i_ctime = CURRENT_TIME;
drop_nlink(inode);
if (S_ISDIR(inode->i_mode)) {
......@@ -636,7 +629,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK);
for ( ; n < npages; n++) {
for (; n < npages; n++) {
dentry_page = get_lock_data_page(inode, n);
if (IS_ERR(dentry_page))
continue;
......
......@@ -22,8 +22,10 @@
#ifdef CONFIG_F2FS_CHECK_FS
#define f2fs_bug_on(condition) BUG_ON(condition)
#define f2fs_down_write(x, y) down_write_nest_lock(x, y)
#else
#define f2fs_bug_on(condition)
#define f2fs_down_write(x, y) down_write(x)
#endif
/*
......@@ -37,6 +39,7 @@
#define F2FS_MOUNT_POSIX_ACL 0x00000020
#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040
#define F2FS_MOUNT_INLINE_XATTR 0x00000080
#define F2FS_MOUNT_INLINE_DATA 0x00000100
#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
......@@ -97,6 +100,13 @@ struct dir_inode_entry {
struct inode *inode; /* vfs inode pointer */
};
/* for the list of blockaddresses to be discarded */
struct discard_entry {
struct list_head list; /* list head */
block_t blkaddr; /* block address to be discarded */
int len; /* # of consecutive blocks of the discard */
};
/* for the list of fsync inodes, used only during recovery */
struct fsync_inode_entry {
struct list_head list; /* list head */
......@@ -155,13 +165,15 @@ enum {
LOOKUP_NODE, /* look up a node without readahead */
LOOKUP_NODE_RA, /*
* look up a node with readahead called
* by get_datablock_ro.
* by get_data_block.
*/
};
#define F2FS_LINK_MAX 32000 /* maximum link count per file */
/* for in-memory extent cache entry */
#define F2FS_MIN_EXTENT_LEN 16 /* minimum extent length */
struct extent_info {
rwlock_t ext_lock; /* rwlock for consistency */
unsigned int fofs; /* start offset in a file */
......@@ -308,6 +320,14 @@ struct f2fs_sm_info {
/* a threshold to reclaim prefree segments */
unsigned int rec_prefree_segments;
/* for small discard management */
struct list_head discard_list; /* 4KB discard list */
int nr_discards; /* # of discards in the list */
int max_discards; /* max. discards to be issued */
unsigned int ipu_policy; /* in-place-update policy */
unsigned int min_ipu_util; /* in-place-update threshold */
};
/*
......@@ -338,6 +358,7 @@ enum count_type {
* with waiting the bio's completion
* ... Only can be used with META.
*/
#define PAGE_TYPE_OF_BIO(type) ((type) > META ? META : (type))
enum page_type {
DATA,
NODE,
......@@ -346,6 +367,20 @@ enum page_type {
META_FLUSH,
};
struct f2fs_io_info {
enum page_type type; /* contains DATA/NODE/META/META_FLUSH */
int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */
};
#define is_read_io(rw) (((rw) & 1) == READ)
struct f2fs_bio_info {
struct f2fs_sb_info *sbi; /* f2fs superblock */
struct bio *bio; /* bios to merge */
sector_t last_block_in_bio; /* last block number */
struct f2fs_io_info fio; /* store buffered io info. */
struct mutex io_mutex; /* mutex for bio */
};
struct f2fs_sb_info {
struct super_block *sb; /* pointer to VFS super block */
struct proc_dir_entry *s_proc; /* proc entry */
......@@ -359,9 +394,10 @@ struct f2fs_sb_info {
/* for segment-related operations */
struct f2fs_sm_info *sm_info; /* segment manager */
struct bio *bio[NR_PAGE_TYPE]; /* bios to merge */
sector_t last_block_in_bio[NR_PAGE_TYPE]; /* last block number */
struct rw_semaphore bio_sem; /* IO semaphore */
/* for bio operations */
struct f2fs_bio_info read_io; /* for read bios */
struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */
/* for checkpoint */
struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
......@@ -376,8 +412,9 @@ struct f2fs_sb_info {
/* for orphan inode management */
struct list_head orphan_inode_list; /* orphan inode list */
struct mutex orphan_inode_mutex; /* for orphan inode list */
spinlock_t orphan_inode_lock; /* for orphan inode list */
unsigned int n_orphans; /* # of orphan inodes */
unsigned int max_orphans; /* max orphan inodes */
/* for directory inode management */
struct list_head dir_inode_list; /* dir inode list */
......@@ -414,6 +451,9 @@ struct f2fs_sb_info {
struct f2fs_gc_kthread *gc_thread; /* GC thread */
unsigned int cur_victim_sec; /* current victim section num */
/* maximum # of trials to find a victim segment for SSR and GC */
unsigned int max_victim_search;
/*
* for stat information.
* one is for the LFS mode, and the other is for the SSR mode.
......@@ -423,6 +463,7 @@ struct f2fs_sb_info {
unsigned int segment_count[2]; /* # of allocated segments */
unsigned int block_count[2]; /* # of allocated blocks */
int total_hit_ext, read_hit_ext; /* extent cache hit ratio */
int inline_inode; /* # of inline_data inodes */
int bg_gc; /* background gc calls */
unsigned int n_dirty_dirs; /* # of dir inodes */
#endif
......@@ -462,6 +503,11 @@ static inline struct f2fs_node *F2FS_NODE(struct page *page)
return (struct f2fs_node *)page_address(page);
}
static inline struct f2fs_inode *F2FS_INODE(struct page *page)
{
return &((struct f2fs_node *)page_address(page))->i;
}
static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi)
{
return (struct f2fs_nm_info *)(sbi->nm_info);
......@@ -487,6 +533,16 @@ static inline struct dirty_seglist_info *DIRTY_I(struct f2fs_sb_info *sbi)
return (struct dirty_seglist_info *)(SM_I(sbi)->dirty_info);
}
static inline struct address_space *META_MAPPING(struct f2fs_sb_info *sbi)
{
return sbi->meta_inode->i_mapping;
}
static inline struct address_space *NODE_MAPPING(struct f2fs_sb_info *sbi)
{
return sbi->node_inode->i_mapping;
}
static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi)
{
sbi->s_dirty = 1;
......@@ -534,7 +590,7 @@ static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
{
down_write_nest_lock(&sbi->cp_rwsem, &sbi->cp_mutex);
f2fs_down_write(&sbi->cp_rwsem, &sbi->cp_mutex);
}
static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
......@@ -548,7 +604,7 @@ static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
{
WARN_ON((nid >= NM_I(sbi)->max_nid));
if (nid >= NM_I(sbi)->max_nid)
if (unlikely(nid >= NM_I(sbi)->max_nid))
return -EINVAL;
return 0;
}
......@@ -561,9 +617,9 @@ static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
static inline int F2FS_HAS_BLOCKS(struct inode *inode)
{
if (F2FS_I(inode)->i_xattr_nid)
return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1);
return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1;
else
return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS);
return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS;
}
static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
......@@ -574,7 +630,7 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
spin_lock(&sbi->stat_lock);
valid_block_count =
sbi->total_valid_block_count + (block_t)count;
if (valid_block_count > sbi->user_block_count) {
if (unlikely(valid_block_count > sbi->user_block_count)) {
spin_unlock(&sbi->stat_lock);
return false;
}
......@@ -585,7 +641,7 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
return true;
}
static inline int dec_valid_block_count(struct f2fs_sb_info *sbi,
static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
struct inode *inode,
blkcnt_t count)
{
......@@ -595,7 +651,6 @@ static inline int dec_valid_block_count(struct f2fs_sb_info *sbi,
inode->i_blocks -= count;
sbi->total_valid_block_count -= (block_t)count;
spin_unlock(&sbi->stat_lock);
return 0;
}
static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
......@@ -686,50 +741,48 @@ static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi)
}
static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi,
struct inode *inode,
unsigned int count)
struct inode *inode)
{
block_t valid_block_count;
unsigned int valid_node_count;
spin_lock(&sbi->stat_lock);
valid_block_count = sbi->total_valid_block_count + (block_t)count;
sbi->alloc_valid_block_count += (block_t)count;
valid_node_count = sbi->total_valid_node_count + count;
if (valid_block_count > sbi->user_block_count) {
valid_block_count = sbi->total_valid_block_count + 1;
if (unlikely(valid_block_count > sbi->user_block_count)) {
spin_unlock(&sbi->stat_lock);
return false;
}
if (valid_node_count > sbi->total_node_count) {
valid_node_count = sbi->total_valid_node_count + 1;
if (unlikely(valid_node_count > sbi->total_node_count)) {
spin_unlock(&sbi->stat_lock);
return false;
}
if (inode)
inode->i_blocks += count;
sbi->total_valid_node_count = valid_node_count;
sbi->total_valid_block_count = valid_block_count;
inode->i_blocks++;
sbi->alloc_valid_block_count++;
sbi->total_valid_node_count++;
sbi->total_valid_block_count++;
spin_unlock(&sbi->stat_lock);
return true;
}
static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
struct inode *inode,
unsigned int count)
struct inode *inode)
{
spin_lock(&sbi->stat_lock);
f2fs_bug_on(sbi->total_valid_block_count < count);
f2fs_bug_on(sbi->total_valid_node_count < count);
f2fs_bug_on(inode->i_blocks < count);
f2fs_bug_on(!sbi->total_valid_block_count);
f2fs_bug_on(!sbi->total_valid_node_count);
f2fs_bug_on(!inode->i_blocks);
inode->i_blocks -= count;
sbi->total_valid_node_count -= count;
sbi->total_valid_block_count -= (block_t)count;
inode->i_blocks--;
sbi->total_valid_node_count--;
sbi->total_valid_block_count--;
spin_unlock(&sbi->stat_lock);
}
......@@ -751,13 +804,12 @@ static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
spin_unlock(&sbi->stat_lock);
}
static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi)
static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi)
{
spin_lock(&sbi->stat_lock);
f2fs_bug_on(!sbi->total_valid_inode_count);
sbi->total_valid_inode_count--;
spin_unlock(&sbi->stat_lock);
return 0;
}
static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi)
......@@ -771,7 +823,7 @@ static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi)
static inline void f2fs_put_page(struct page *page, int unlock)
{
if (!page || IS_ERR(page))
if (!page)
return;
if (unlock) {
......@@ -876,7 +928,9 @@ enum {
FI_NO_ALLOC, /* should not allocate any blocks */
FI_UPDATE_DIR, /* should update inode block for consistency */
FI_DELAY_IPUT, /* used for the recovery */
FI_NO_EXTENT, /* not to use the extent cache */
FI_INLINE_XATTR, /* used for inline xattr */
FI_INLINE_DATA, /* used for inline data*/
};
static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
......@@ -914,6 +968,8 @@ static inline void get_inline_info(struct f2fs_inode_info *fi,
{
if (ri->i_inline & F2FS_INLINE_XATTR)
set_inode_flag(fi, FI_INLINE_XATTR);
if (ri->i_inline & F2FS_INLINE_DATA)
set_inode_flag(fi, FI_INLINE_DATA);
}
static inline void set_raw_inline(struct f2fs_inode_info *fi,
......@@ -923,6 +979,8 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi,
if (is_inode_flag_set(fi, FI_INLINE_XATTR))
ri->i_inline |= F2FS_INLINE_XATTR;
if (is_inode_flag_set(fi, FI_INLINE_DATA))
ri->i_inline |= F2FS_INLINE_DATA;
}
static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi)
......@@ -948,6 +1006,18 @@ static inline int inline_xattr_size(struct inode *inode)
return 0;
}
static inline int f2fs_has_inline_data(struct inode *inode)
{
return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA);
}
static inline void *inline_data_addr(struct page *page)
{
struct f2fs_inode *ri;
ri = (struct f2fs_inode *)page_address(page);
return (void *)&(ri->i_addr[1]);
}
static inline int f2fs_readonly(struct super_block *sb)
{
return sb->s_flags & MS_RDONLY;
......@@ -958,6 +1028,7 @@ static inline int f2fs_readonly(struct super_block *sb)
*/
int f2fs_sync_file(struct file *, loff_t, loff_t, int);
void truncate_data_blocks(struct dnode_of_data *);
int truncate_blocks(struct inode *, u64);
void f2fs_truncate(struct inode *);
int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
int f2fs_setattr(struct dentry *, struct iattr *);
......@@ -1027,7 +1098,7 @@ int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
int truncate_inode_blocks(struct inode *, pgoff_t);
int truncate_xattr_node(struct inode *, struct page *);
int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t);
int remove_inode_page(struct inode *);
void remove_inode_page(struct inode *);
struct page *new_inode_page(struct inode *, const struct qstr *);
struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);
void ra_node_page(struct f2fs_sb_info *, nid_t);
......@@ -1059,19 +1130,19 @@ void clear_prefree_segments(struct f2fs_sb_info *);
int npages_for_summary_flush(struct f2fs_sb_info *);
void allocate_new_segments(struct f2fs_sb_info *);
struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
struct bio *f2fs_bio_alloc(struct block_device *, int);
void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool);
void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool);
void write_meta_page(struct f2fs_sb_info *, struct page *);
void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int,
block_t, block_t *);
void write_data_page(struct inode *, struct page *, struct dnode_of_data*,
block_t, block_t *);
void rewrite_data_page(struct f2fs_sb_info *, struct page *, block_t);
void write_node_page(struct f2fs_sb_info *, struct page *,
struct f2fs_io_info *, unsigned int, block_t, block_t *);
void write_data_page(struct page *, struct dnode_of_data *, block_t *,
struct f2fs_io_info *);
void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *);
void recover_data_page(struct f2fs_sb_info *, struct page *,
struct f2fs_summary *, block_t, block_t);
void rewrite_node_page(struct f2fs_sb_info *, struct page *,
struct f2fs_summary *, block_t, block_t);
void allocate_data_block(struct f2fs_sb_info *, struct page *,
block_t, block_t *, struct f2fs_summary *, int);
void f2fs_wait_on_page_writeback(struct page *, enum page_type);
void write_data_summaries(struct f2fs_sb_info *, block_t);
void write_node_summaries(struct f2fs_sb_info *, block_t);
int lookup_journal_in_cursum(struct f2fs_summary_block *,
......@@ -1079,6 +1150,8 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *,
void flush_sit_entries(struct f2fs_sb_info *);
int build_segment_manager(struct f2fs_sb_info *);
void destroy_segment_manager(struct f2fs_sb_info *);
int __init create_segment_manager_caches(void);
void destroy_segment_manager_caches(void);
/*
* checkpoint.c
......@@ -1090,7 +1163,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *);
void release_orphan_inode(struct f2fs_sb_info *);
void add_orphan_inode(struct f2fs_sb_info *, nid_t);
void remove_orphan_inode(struct f2fs_sb_info *, nid_t);
int recover_orphan_inodes(struct f2fs_sb_info *);
void recover_orphan_inodes(struct f2fs_sb_info *);
int get_valid_checkpoint(struct f2fs_sb_info *);
void set_dirty_dir_page(struct inode *, struct page *);
void add_dirty_dir_inode(struct inode *);
......@@ -1105,13 +1178,17 @@ void destroy_checkpoint_caches(void);
/*
* data.c
*/
void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int);
int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *, block_t, int);
void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, block_t,
struct f2fs_io_info *);
int reserve_new_block(struct dnode_of_data *);
int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
void update_extent_cache(block_t, struct dnode_of_data *);
struct page *find_data_page(struct inode *, pgoff_t, bool);
struct page *get_lock_data_page(struct inode *, pgoff_t);
struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int);
int do_write_data_page(struct page *);
int do_write_data_page(struct page *, struct f2fs_io_info *);
/*
* gc.c
......@@ -1144,7 +1221,7 @@ struct f2fs_stat_info {
int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
int nats, sits, fnids;
int total_count, utilization;
int bg_gc;
int bg_gc, inline_inode;
unsigned int valid_count, valid_node_count, valid_inode_count;
unsigned int bimodal, avg_vblocks;
int util_free, util_valid, util_invalid;
......@@ -1164,7 +1241,7 @@ struct f2fs_stat_info {
static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
{
return (struct f2fs_stat_info*)sbi->stat_info;
return (struct f2fs_stat_info *)sbi->stat_info;
}
#define stat_inc_call_count(si) ((si)->call_count++)
......@@ -1173,6 +1250,17 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
#define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--)
#define stat_inc_total_hit(sb) ((F2FS_SB(sb))->total_hit_ext++)
#define stat_inc_read_hit(sb) ((F2FS_SB(sb))->read_hit_ext++)
#define stat_inc_inline_inode(inode) \
do { \
if (f2fs_has_inline_data(inode)) \
((F2FS_SB(inode->i_sb))->inline_inode++); \
} while (0)
#define stat_dec_inline_inode(inode) \
do { \
if (f2fs_has_inline_data(inode)) \
((F2FS_SB(inode->i_sb))->inline_inode--); \
} while (0)
#define stat_inc_seg_type(sbi, curseg) \
((sbi)->segment_count[(curseg)->alloc_type]++)
#define stat_inc_block_count(sbi, curseg) \
......@@ -1216,6 +1304,8 @@ void f2fs_destroy_root_stats(void);
#define stat_dec_dirty_dir(sbi)
#define stat_inc_total_hit(sb)
#define stat_inc_read_hit(sb)
#define stat_inc_inline_inode(inode)
#define stat_dec_inline_inode(inode)
#define stat_inc_seg_type(sbi, curseg)
#define stat_inc_block_count(sbi, curseg)
#define stat_inc_seg_count(si, type)
......@@ -1238,4 +1328,13 @@ extern const struct address_space_operations f2fs_meta_aops;
extern const struct inode_operations f2fs_dir_inode_operations;
extern const struct inode_operations f2fs_symlink_inode_operations;
extern const struct inode_operations f2fs_special_inode_operations;
/*
* inline.c
*/
bool f2fs_may_inline(struct inode *);
int f2fs_read_inline_data(struct inode *, struct page *);
int f2fs_convert_inline_data(struct inode *, pgoff_t);
int f2fs_write_inline_data(struct inode *, struct page *, unsigned int);
int recover_inline_data(struct inode *, struct page *);
#endif
......@@ -33,7 +33,6 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
struct page *page = vmf->page;
struct inode *inode = file_inode(vma->vm_file);
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
block_t old_blk_addr;
struct dnode_of_data dn;
int err;
......@@ -44,30 +43,16 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
/* block allocation */
f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, page->index, ALLOC_NODE);
if (err) {
f2fs_unlock_op(sbi);
goto out;
}
old_blk_addr = dn.data_blkaddr;
if (old_blk_addr == NULL_ADDR) {
err = reserve_new_block(&dn);
if (err) {
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
goto out;
}
}
f2fs_put_dnode(&dn);
err = f2fs_reserve_block(&dn, page->index);
f2fs_unlock_op(sbi);
if (err)
goto out;
file_update_time(vma->vm_file);
lock_page(page);
if (page->mapping != inode->i_mapping ||
if (unlikely(page->mapping != inode->i_mapping ||
page_offset(page) > i_size_read(inode) ||
!PageUptodate(page)) {
!PageUptodate(page))) {
unlock_page(page);
err = -EFAULT;
goto out;
......@@ -130,12 +115,12 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
int ret = 0;
bool need_cp = false;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.sync_mode = WB_SYNC_NONE,
.nr_to_write = LONG_MAX,
.for_reclaim = 0,
};
if (f2fs_readonly(inode->i_sb))
if (unlikely(f2fs_readonly(inode->i_sb)))
return 0;
trace_f2fs_sync_file_enter(inode);
......@@ -217,7 +202,7 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
raw_node = F2FS_NODE(dn->node_page);
addr = blkaddr_in_node(raw_node) + ofs;
for ( ; count > 0; count--, addr++, dn->ofs_in_node++) {
for (; count > 0; count--, addr++, dn->ofs_in_node++) {
block_t blkaddr = le32_to_cpu(*addr);
if (blkaddr == NULL_ADDR)
continue;
......@@ -256,7 +241,7 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
return;
lock_page(page);
if (page->mapping != inode->i_mapping) {
if (unlikely(page->mapping != inode->i_mapping)) {
f2fs_put_page(page, 1);
return;
}
......@@ -266,21 +251,24 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
f2fs_put_page(page, 1);
}
static int truncate_blocks(struct inode *inode, u64 from)
int truncate_blocks(struct inode *inode, u64 from)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
unsigned int blocksize = inode->i_sb->s_blocksize;
struct dnode_of_data dn;
pgoff_t free_from;
int count = 0;
int err;
int count = 0, err = 0;
trace_f2fs_truncate_blocks_enter(inode, from);
if (f2fs_has_inline_data(inode))
goto done;
free_from = (pgoff_t)
((from + blocksize - 1) >> (sbi->log_blocksize));
f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE);
if (err) {
......@@ -308,7 +296,7 @@ static int truncate_blocks(struct inode *inode, u64 from)
free_next:
err = truncate_inode_blocks(inode, free_from);
f2fs_unlock_op(sbi);
done:
/* lastly zero out the first data page */
truncate_partial_data_page(inode, from);
......@@ -382,6 +370,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
if ((attr->ia_valid & ATTR_SIZE) &&
attr->ia_size != i_size_read(inode)) {
err = f2fs_convert_inline_data(inode, attr->ia_size);
if (err)
return err;
truncate_setsize(inode, attr->ia_size);
f2fs_truncate(inode);
f2fs_balance_fs(F2FS_SB(inode->i_sb));
......@@ -459,12 +451,16 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
return 0;
}
static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode)
static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
{
pgoff_t pg_start, pg_end;
loff_t off_start, off_end;
int ret = 0;
ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1);
if (ret)
return ret;
pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
......@@ -499,12 +495,6 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode)
}
}
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
i_size_read(inode) <= (offset + len)) {
i_size_write(inode, offset);
mark_inode_dirty(inode);
}
return ret;
}
......@@ -521,6 +511,10 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
if (ret)
return ret;
ret = f2fs_convert_inline_data(inode, offset + len);
if (ret)
return ret;
pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
......@@ -532,22 +526,10 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
ret = get_dnode_of_data(&dn, index, ALLOC_NODE);
if (ret) {
f2fs_unlock_op(sbi);
break;
}
if (dn.data_blkaddr == NULL_ADDR) {
ret = reserve_new_block(&dn);
if (ret) {
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
break;
}
}
f2fs_put_dnode(&dn);
ret = f2fs_reserve_block(&dn, index);
f2fs_unlock_op(sbi);
if (ret)
break;
if (pg_start == pg_end)
new_size = offset + len;
......@@ -578,7 +560,7 @@ static long f2fs_fallocate(struct file *file, int mode,
return -EOPNOTSUPP;
if (mode & FALLOC_FL_PUNCH_HOLE)
ret = punch_hole(inode, offset, len, mode);
ret = punch_hole(inode, offset, len);
else
ret = expand_inode_data(inode, offset, len, mode);
......
......@@ -119,7 +119,6 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
kfree(gc_th);
sbi->gc_thread = NULL;
}
out:
return err;
}
......@@ -164,8 +163,8 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
p->ofs_unit = sbi->segs_per_sec;
}
if (p->max_search > MAX_VICTIM_SEARCH)
p->max_search = MAX_VICTIM_SEARCH;
if (p->max_search > sbi->max_victim_search)
p->max_search = sbi->max_victim_search;
p->offset = sbi->last_victim[p->gc_mode];
}
......@@ -429,7 +428,7 @@ static void gc_node_segment(struct f2fs_sb_info *sbi,
/* set page dirty and write it */
if (gc_type == FG_GC) {
f2fs_wait_on_page_writeback(node_page, NODE, true);
f2fs_wait_on_page_writeback(node_page, NODE);
set_page_dirty(node_page);
} else {
if (!PageWriteback(node_page))
......@@ -521,6 +520,11 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
static void move_data_page(struct inode *inode, struct page *page, int gc_type)
{
struct f2fs_io_info fio = {
.type = DATA,
.rw = WRITE_SYNC,
};
if (gc_type == BG_GC) {
if (PageWriteback(page))
goto out;
......@@ -529,7 +533,7 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type)
} else {
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
f2fs_wait_on_page_writeback(page, DATA, true);
f2fs_wait_on_page_writeback(page, DATA);
if (clear_page_dirty_for_io(page) &&
S_ISDIR(inode->i_mode)) {
......@@ -537,7 +541,7 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type)
inode_dec_dirty_dents(inode);
}
set_cold_data(page);
do_write_data_page(page);
do_write_data_page(page, &fio);
clear_cold_data(page);
}
out:
......@@ -631,7 +635,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
goto next_step;
if (gc_type == FG_GC) {
f2fs_submit_bio(sbi, DATA, true);
f2fs_submit_merged_bio(sbi, DATA, WRITE);
/*
* In the case of FG_GC, it'd be better to reclaim this victim
......@@ -664,8 +668,6 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
/* read segment summary of victim */
sum_page = get_sum_page(sbi, segno);
if (IS_ERR(sum_page))
return;
blk_start_plug(&plug);
......@@ -697,7 +699,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
INIT_LIST_HEAD(&ilist);
gc_more:
if (!(sbi->sb->s_flags & MS_ACTIVE))
if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
goto stop;
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {
......
......@@ -20,7 +20,7 @@
#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */
/* Search max. number of dirty segments to select a victim segment */
#define MAX_VICTIM_SEARCH 4096 /* covers 8GB */
#define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
struct f2fs_gc_kthread {
struct task_struct *f2fs_gc_task;
......
/*
* fs/f2fs/inline.c
* Copyright (c) 2013, Intel Corporation
* Authors: Huajun Li <huajun.li@intel.com>
* Haicheng Li <haicheng.li@intel.com>
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
bool f2fs_may_inline(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
block_t nr_blocks;
loff_t i_size;
if (!test_opt(sbi, INLINE_DATA))
return false;
nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2;
if (inode->i_blocks > nr_blocks)
return false;
i_size = i_size_read(inode);
if (i_size > MAX_INLINE_DATA)
return false;
return true;
}
int f2fs_read_inline_data(struct inode *inode, struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct page *ipage;
void *src_addr, *dst_addr;
if (page->index) {
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
goto out;
}
ipage = get_node_page(sbi, inode->i_ino);
if (IS_ERR(ipage))
return PTR_ERR(ipage);
zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
/* Copy the whole inline data block */
src_addr = inline_data_addr(ipage);
dst_addr = kmap(page);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
kunmap(page);
f2fs_put_page(ipage, 1);
out:
SetPageUptodate(page);
unlock_page(page);
return 0;
}
static int __f2fs_convert_inline_data(struct inode *inode, struct page *page)
{
int err;
struct page *ipage;
struct dnode_of_data dn;
void *src_addr, *dst_addr;
block_t new_blk_addr;
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct f2fs_io_info fio = {
.type = DATA,
.rw = WRITE_SYNC | REQ_PRIO,
};
f2fs_lock_op(sbi);
ipage = get_node_page(sbi, inode->i_ino);
if (IS_ERR(ipage))
return PTR_ERR(ipage);
/*
* i_addr[0] is not used for inline data,
* so reserving new block will not destroy inline data
*/
set_new_dnode(&dn, inode, ipage, NULL, 0);
err = f2fs_reserve_block(&dn, 0);
if (err) {
f2fs_unlock_op(sbi);
return err;
}
zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
/* Copy the whole inline data block */
src_addr = inline_data_addr(ipage);
dst_addr = kmap(page);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
kunmap(page);
SetPageUptodate(page);
/* write data page to try to make data consistent */
set_page_writeback(page);
write_data_page(page, &dn, &new_blk_addr, &fio);
update_extent_cache(new_blk_addr, &dn);
f2fs_wait_on_page_writeback(page, DATA);
/* clear inline data and flag after data writeback */
zero_user_segment(ipage, INLINE_DATA_OFFSET,
INLINE_DATA_OFFSET + MAX_INLINE_DATA);
clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
stat_dec_inline_inode(inode);
sync_inode_page(&dn);
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
return err;
}
int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size)
{
struct page *page;
int err;
if (!f2fs_has_inline_data(inode))
return 0;
else if (to_size <= MAX_INLINE_DATA)
return 0;
page = grab_cache_page_write_begin(inode->i_mapping, 0, AOP_FLAG_NOFS);
if (!page)
return -ENOMEM;
err = __f2fs_convert_inline_data(inode, page);
f2fs_put_page(page, 1);
return err;
}
int f2fs_write_inline_data(struct inode *inode,
struct page *page, unsigned size)
{
void *src_addr, *dst_addr;
struct page *ipage;
struct dnode_of_data dn;
int err;
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, 0, LOOKUP_NODE);
if (err)
return err;
ipage = dn.inode_page;
zero_user_segment(ipage, INLINE_DATA_OFFSET,
INLINE_DATA_OFFSET + MAX_INLINE_DATA);
src_addr = kmap(page);
dst_addr = inline_data_addr(ipage);
memcpy(dst_addr, src_addr, size);
kunmap(page);
/* Release the first data block if it is allocated */
if (!f2fs_has_inline_data(inode)) {
truncate_data_blocks_range(&dn, 1);
set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
stat_inc_inline_inode(inode);
}
sync_inode_page(&dn);
f2fs_put_dnode(&dn);
return 0;
}
int recover_inline_data(struct inode *inode, struct page *npage)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct f2fs_inode *ri = NULL;
void *src_addr, *dst_addr;
struct page *ipage;
/*
* The inline_data recovery policy is as follows.
* [prev.] [next] of inline_data flag
* o o -> recover inline_data
* o x -> remove inline_data, and then recover data blocks
* x o -> remove inline_data, and then recover inline_data
* x x -> recover data blocks
*/
if (IS_INODE(npage))
ri = F2FS_INODE(npage);
if (f2fs_has_inline_data(inode) &&
ri && ri->i_inline & F2FS_INLINE_DATA) {
process_inline:
ipage = get_node_page(sbi, inode->i_ino);
f2fs_bug_on(IS_ERR(ipage));
src_addr = inline_data_addr(npage);
dst_addr = inline_data_addr(ipage);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
update_inode(inode, ipage);
f2fs_put_page(ipage, 1);
return -1;
}
if (f2fs_has_inline_data(inode)) {
ipage = get_node_page(sbi, inode->i_ino);
f2fs_bug_on(IS_ERR(ipage));
zero_user_segment(ipage, INLINE_DATA_OFFSET,
INLINE_DATA_OFFSET + MAX_INLINE_DATA);
clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
update_inode(inode, ipage);
f2fs_put_page(ipage, 1);
} else if (ri && ri->i_inline & F2FS_INLINE_DATA) {
truncate_blocks(inode, 0);
set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
goto process_inline;
}
return 0;
}
......@@ -42,9 +42,11 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
if (ri->i_addr[0])
inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0]));
inode->i_rdev =
old_decode_dev(le32_to_cpu(ri->i_addr[0]));
else
inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1]));
inode->i_rdev =
new_decode_dev(le32_to_cpu(ri->i_addr[1]));
}
}
......@@ -52,11 +54,13 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
{
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
if (old_valid_dev(inode->i_rdev)) {
ri->i_addr[0] = cpu_to_le32(old_encode_dev(inode->i_rdev));
ri->i_addr[0] =
cpu_to_le32(old_encode_dev(inode->i_rdev));
ri->i_addr[1] = 0;
} else {
ri->i_addr[0] = 0;
ri->i_addr[1] = cpu_to_le32(new_encode_dev(inode->i_rdev));
ri->i_addr[1] =
cpu_to_le32(new_encode_dev(inode->i_rdev));
ri->i_addr[2] = 0;
}
}
......@@ -67,7 +71,6 @@ static int do_read_inode(struct inode *inode)
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct f2fs_inode_info *fi = F2FS_I(inode);
struct page *node_page;
struct f2fs_node *rn;
struct f2fs_inode *ri;
/* Check if ino is within scope */
......@@ -81,8 +84,7 @@ static int do_read_inode(struct inode *inode)
if (IS_ERR(node_page))
return PTR_ERR(node_page);
rn = F2FS_NODE(node_page);
ri = &(rn->i);
ri = F2FS_INODE(node_page);
inode->i_mode = le16_to_cpu(ri->i_mode);
i_uid_write(inode, le32_to_cpu(ri->i_uid));
......@@ -175,13 +177,11 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
void update_inode(struct inode *inode, struct page *node_page)
{
struct f2fs_node *rn;
struct f2fs_inode *ri;
f2fs_wait_on_page_writeback(node_page, NODE, false);
f2fs_wait_on_page_writeback(node_page, NODE);
rn = F2FS_NODE(node_page);
ri = &(rn->i);
ri = F2FS_INODE(node_page);
ri->i_mode = cpu_to_le16(inode->i_mode);
ri->i_advise = F2FS_I(inode)->i_advise;
......@@ -281,6 +281,7 @@ void f2fs_evict_inode(struct inode *inode)
f2fs_lock_op(sbi);
remove_inode_page(inode);
stat_dec_inline_inode(inode);
f2fs_unlock_op(sbi);
sb_end_intwrite(inode->i_sb);
......
......@@ -424,11 +424,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
f2fs_set_link(new_dir, new_entry, new_page, old_inode);
F2FS_I(old_inode)->i_pino = new_dir->i_ino;
new_inode->i_ctime = CURRENT_TIME;
if (old_dir_entry)
drop_nlink(new_inode);
drop_nlink(new_inode);
mark_inode_dirty(new_inode);
if (!new_inode->i_nlink)
add_orphan_inode(sbi, new_inode->i_ino);
......@@ -457,11 +459,14 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (old_dir != new_dir) {
f2fs_set_link(old_inode, old_dir_entry,
old_dir_page, new_dir);
F2FS_I(old_inode)->i_pino = new_dir->i_ino;
update_inode_page(old_inode);
} else {
kunmap(old_dir_page);
f2fs_put_page(old_dir_page, 0);
}
drop_nlink(old_dir);
mark_inode_dirty(old_dir);
update_inode_page(old_dir);
}
......
......@@ -87,17 +87,19 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
*/
static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid)
{
struct address_space *mapping = sbi->meta_inode->i_mapping;
struct address_space *mapping = META_MAPPING(sbi);
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct blk_plug plug;
struct page *page;
pgoff_t index;
int i;
struct f2fs_io_info fio = {
.type = META,
.rw = READ_SYNC | REQ_META | REQ_PRIO
};
blk_start_plug(&plug);
for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) {
if (nid >= nm_i->max_nid)
if (unlikely(nid >= nm_i->max_nid))
nid = 0;
index = current_nat_addr(sbi, nid);
......@@ -105,15 +107,15 @@ static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid)
if (!page)
continue;
if (PageUptodate(page)) {
mark_page_accessed(page);
f2fs_put_page(page, 1);
continue;
}
if (f2fs_readpage(sbi, page, index, READ))
continue;
f2fs_submit_page_mbio(sbi, page, index, &fio);
mark_page_accessed(page);
f2fs_put_page(page, 0);
}
blk_finish_plug(&plug);
f2fs_submit_merged_bio(sbi, META, READ);
}
static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
......@@ -391,8 +393,8 @@ static int get_node_path(struct f2fs_inode_info *fi, long block,
/*
* Caller should call f2fs_put_dnode(dn).
* Also, it should grab and release a mutex by calling mutex_lock_op() and
* mutex_unlock_op() only if ro is not set RDONLY_NODE.
* Also, it should grab and release a rwsem by calling f2fs_lock_op() and
* f2fs_unlock_op() only if ro is not set RDONLY_NODE.
* In the case of RDONLY_NODE, we don't need to care about mutex.
*/
int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
......@@ -502,7 +504,7 @@ static void truncate_node(struct dnode_of_data *dn)
/* Deallocate node address */
invalidate_blocks(sbi, ni.blk_addr);
dec_valid_node_count(sbi, dn->inode, 1);
dec_valid_node_count(sbi, dn->inode);
set_node_addr(sbi, &ni, NULL_ADDR);
if (dn->nid == dn->inode->i_ino) {
......@@ -516,6 +518,10 @@ static void truncate_node(struct dnode_of_data *dn)
F2FS_SET_SB_DIRT(sbi);
f2fs_put_page(dn->node_page, 1);
invalidate_mapping_pages(NODE_MAPPING(sbi),
dn->node_page->index, dn->node_page->index);
dn->node_page = NULL;
trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
}
......@@ -631,19 +637,19 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
return 0;
/* get indirect nodes in the path */
for (i = 0; i < depth - 1; i++) {
for (i = 0; i < idx + 1; i++) {
/* refernece count'll be increased */
pages[i] = get_node_page(sbi, nid[i]);
if (IS_ERR(pages[i])) {
depth = i + 1;
err = PTR_ERR(pages[i]);
idx = i - 1;
goto fail;
}
nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
}
/* free direct nodes linked to a partial indirect node */
for (i = offset[depth - 1]; i < NIDS_PER_BLOCK; i++) {
for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) {
child_nid = get_nid(pages[idx], i, false);
if (!child_nid)
continue;
......@@ -654,7 +660,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
set_nid(pages[idx], i, 0, false);
}
if (offset[depth - 1] == 0) {
if (offset[idx + 1] == 0) {
dn->node_page = pages[idx];
dn->nid = nid[idx];
truncate_node(dn);
......@@ -662,9 +668,10 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
f2fs_put_page(pages[idx], 1);
}
offset[idx]++;
offset[depth - 1] = 0;
offset[idx + 1] = 0;
idx--;
fail:
for (i = depth - 3; i >= 0; i--)
for (i = idx; i >= 0; i--)
f2fs_put_page(pages[i], 1);
trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err);
......@@ -678,11 +685,10 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
int truncate_inode_blocks(struct inode *inode, pgoff_t from)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct address_space *node_mapping = sbi->node_inode->i_mapping;
int err = 0, cont = 1;
int level, offset[4], noffset[4];
unsigned int nofs = 0;
struct f2fs_node *rn;
struct f2fs_inode *ri;
struct dnode_of_data dn;
struct page *page;
......@@ -699,7 +705,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
set_new_dnode(&dn, inode, page, NULL, 0);
unlock_page(page);
rn = F2FS_NODE(page);
ri = F2FS_INODE(page);
switch (level) {
case 0:
case 1:
......@@ -709,7 +715,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
nofs = noffset[1];
if (!offset[level - 1])
goto skip_partial;
err = truncate_partial_nodes(&dn, &rn->i, offset, level);
err = truncate_partial_nodes(&dn, ri, offset, level);
if (err < 0 && err != -ENOENT)
goto fail;
nofs += 1 + NIDS_PER_BLOCK;
......@@ -718,7 +724,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
nofs = 5 + 2 * NIDS_PER_BLOCK;
if (!offset[level - 1])
goto skip_partial;
err = truncate_partial_nodes(&dn, &rn->i, offset, level);
err = truncate_partial_nodes(&dn, ri, offset, level);
if (err < 0 && err != -ENOENT)
goto fail;
break;
......@@ -728,7 +734,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
skip_partial:
while (cont) {
dn.nid = le32_to_cpu(rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]);
dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
switch (offset[0]) {
case NODE_DIR1_BLOCK:
case NODE_DIR2_BLOCK:
......@@ -751,14 +757,14 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
if (err < 0 && err != -ENOENT)
goto fail;
if (offset[1] == 0 &&
rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) {
ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {
lock_page(page);
if (page->mapping != node_mapping) {
if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
f2fs_put_page(page, 1);
goto restart;
}
wait_on_page_writeback(page);
rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
set_page_dirty(page);
unlock_page(page);
}
......@@ -794,38 +800,34 @@ int truncate_xattr_node(struct inode *inode, struct page *page)
set_new_dnode(&dn, inode, page, npage, nid);
if (page)
dn.inode_page_locked = 1;
dn.inode_page_locked = true;
truncate_node(&dn);
return 0;
}
/*
* Caller should grab and release a mutex by calling mutex_lock_op() and
* mutex_unlock_op().
* Caller should grab and release a rwsem by calling f2fs_lock_op() and
* f2fs_unlock_op().
*/
int remove_inode_page(struct inode *inode)
void remove_inode_page(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct page *page;
nid_t ino = inode->i_ino;
struct dnode_of_data dn;
int err;
page = get_node_page(sbi, ino);
if (IS_ERR(page))
return PTR_ERR(page);
return;
err = truncate_xattr_node(inode, page);
if (err) {
if (truncate_xattr_node(inode, page)) {
f2fs_put_page(page, 1);
return err;
return;
}
/* 0 is possible, after f2fs_new_inode() is failed */
f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1);
set_new_dnode(&dn, inode, page, page, ino);
truncate_node(&dn);
return 0;
}
struct page *new_inode_page(struct inode *inode, const struct qstr *name)
......@@ -843,19 +845,18 @@ struct page *new_node_page(struct dnode_of_data *dn,
unsigned int ofs, struct page *ipage)
{
struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
struct address_space *mapping = sbi->node_inode->i_mapping;
struct node_info old_ni, new_ni;
struct page *page;
int err;
if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))
if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
return ERR_PTR(-EPERM);
page = grab_cache_page(mapping, dn->nid);
page = grab_cache_page(NODE_MAPPING(sbi), dn->nid);
if (!page)
return ERR_PTR(-ENOMEM);
if (!inc_valid_node_count(sbi, dn->inode, 1)) {
if (unlikely(!inc_valid_node_count(sbi, dn->inode))) {
err = -ENOSPC;
goto fail;
}
......@@ -898,14 +899,14 @@ struct page *new_node_page(struct dnode_of_data *dn,
* LOCKED_PAGE: f2fs_put_page(page, 1)
* error: nothing
*/
static int read_node_page(struct page *page, int type)
static int read_node_page(struct page *page, int rw)
{
struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
struct node_info ni;
get_node_info(sbi, page->index, &ni);
if (ni.blk_addr == NULL_ADDR) {
if (unlikely(ni.blk_addr == NULL_ADDR)) {
f2fs_put_page(page, 1);
return -ENOENT;
}
......@@ -913,7 +914,7 @@ static int read_node_page(struct page *page, int type)
if (PageUptodate(page))
return LOCKED_PAGE;
return f2fs_readpage(sbi, page, ni.blk_addr, type);
return f2fs_submit_page_bio(sbi, page, ni.blk_addr, rw);
}
/*
......@@ -921,18 +922,17 @@ static int read_node_page(struct page *page, int type)
*/
void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
{
struct address_space *mapping = sbi->node_inode->i_mapping;
struct page *apage;
int err;
apage = find_get_page(mapping, nid);
apage = find_get_page(NODE_MAPPING(sbi), nid);
if (apage && PageUptodate(apage)) {
f2fs_put_page(apage, 0);
return;
}
f2fs_put_page(apage, 0);
apage = grab_cache_page(mapping, nid);
apage = grab_cache_page(NODE_MAPPING(sbi), nid);
if (!apage)
return;
......@@ -945,11 +945,10 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
{
struct address_space *mapping = sbi->node_inode->i_mapping;
struct page *page;
int err;
repeat:
page = grab_cache_page(mapping, nid);
page = grab_cache_page(NODE_MAPPING(sbi), nid);
if (!page)
return ERR_PTR(-ENOMEM);
......@@ -960,11 +959,11 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
goto got_it;
lock_page(page);
if (!PageUptodate(page)) {
if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
return ERR_PTR(-EIO);
}
if (page->mapping != mapping) {
if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
f2fs_put_page(page, 1);
goto repeat;
}
......@@ -981,7 +980,6 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
struct page *get_node_page_ra(struct page *parent, int start)
{
struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb);
struct address_space *mapping = sbi->node_inode->i_mapping;
struct blk_plug plug;
struct page *page;
int err, i, end;
......@@ -992,7 +990,7 @@ struct page *get_node_page_ra(struct page *parent, int start)
if (!nid)
return ERR_PTR(-ENOENT);
repeat:
page = grab_cache_page(mapping, nid);
page = grab_cache_page(NODE_MAPPING(sbi), nid);
if (!page)
return ERR_PTR(-ENOMEM);
......@@ -1017,12 +1015,12 @@ struct page *get_node_page_ra(struct page *parent, int start)
blk_finish_plug(&plug);
lock_page(page);
if (page->mapping != mapping) {
if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
f2fs_put_page(page, 1);
goto repeat;
}
page_hit:
if (!PageUptodate(page)) {
if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
return ERR_PTR(-EIO);
}
......@@ -1048,7 +1046,6 @@ void sync_inode_page(struct dnode_of_data *dn)
int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
struct writeback_control *wbc)
{
struct address_space *mapping = sbi->node_inode->i_mapping;
pgoff_t index, end;
struct pagevec pvec;
int step = ino ? 2 : 0;
......@@ -1062,7 +1059,7 @@ int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
while (index <= end) {
int i, nr_pages;
nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
PAGECACHE_TAG_DIRTY,
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
if (nr_pages == 0)
......@@ -1095,7 +1092,7 @@ int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
else if (!trylock_page(page))
continue;
if (unlikely(page->mapping != mapping)) {
if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
continue_unlock:
unlock_page(page);
continue;
......@@ -1122,7 +1119,7 @@ int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
set_fsync_mark(page, 0);
set_dentry_mark(page, 0);
}
mapping->a_ops->writepage(page, wbc);
NODE_MAPPING(sbi)->a_ops->writepage(page, wbc);
wrote++;
if (--wbc->nr_to_write == 0)
......@@ -1143,31 +1140,31 @@ int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
}
if (wrote)
f2fs_submit_bio(sbi, NODE, wbc->sync_mode == WB_SYNC_ALL);
f2fs_submit_merged_bio(sbi, NODE, WRITE);
return nwritten;
}
int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
{
struct address_space *mapping = sbi->node_inode->i_mapping;
pgoff_t index = 0, end = LONG_MAX;
struct pagevec pvec;
int nr_pages;
int ret2 = 0, ret = 0;
pagevec_init(&pvec, 0);
while ((index <= end) &&
(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
PAGECACHE_TAG_WRITEBACK,
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {
unsigned i;
while (index <= end) {
int i, nr_pages;
nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
PAGECACHE_TAG_WRITEBACK,
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
/* until radix tree lookup accepts end_index */
if (page->index > end)
if (unlikely(page->index > end))
continue;
if (ino && ino_of_node(page) == ino) {
......@@ -1180,9 +1177,9 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
cond_resched();
}
if (test_and_clear_bit(AS_ENOSPC, &mapping->flags))
if (unlikely(test_and_clear_bit(AS_ENOSPC, &NODE_MAPPING(sbi)->flags)))
ret2 = -ENOSPC;
if (test_and_clear_bit(AS_EIO, &mapping->flags))
if (unlikely(test_and_clear_bit(AS_EIO, &NODE_MAPPING(sbi)->flags)))
ret2 = -EIO;
if (!ret)
ret = ret2;
......@@ -1196,8 +1193,12 @@ static int f2fs_write_node_page(struct page *page,
nid_t nid;
block_t new_addr;
struct node_info ni;
struct f2fs_io_info fio = {
.type = NODE,
.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
};
if (sbi->por_doing)
if (unlikely(sbi->por_doing))
goto redirty_out;
wait_on_page_writeback(page);
......@@ -1209,7 +1210,7 @@ static int f2fs_write_node_page(struct page *page,
get_node_info(sbi, nid, &ni);
/* This page is already truncated */
if (ni.blk_addr == NULL_ADDR) {
if (unlikely(ni.blk_addr == NULL_ADDR)) {
dec_page_count(sbi, F2FS_DIRTY_NODES);
unlock_page(page);
return 0;
......@@ -1220,7 +1221,7 @@ static int f2fs_write_node_page(struct page *page,
mutex_lock(&sbi->node_write);
set_page_writeback(page);
write_node_page(sbi, page, nid, ni.blk_addr, &new_addr);
write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
set_node_addr(sbi, &ni, new_addr);
dec_page_count(sbi, F2FS_DIRTY_NODES);
mutex_unlock(&sbi->node_write);
......@@ -1255,6 +1256,7 @@ static int f2fs_write_node_pages(struct address_space *mapping,
/* if mounting is failed, skip writing node pages */
wbc->nr_to_write = 3 * max_hw_blocks(sbi);
wbc->sync_mode = WB_SYNC_NONE;
sync_node_pages(sbi, 0, wbc);
wbc->nr_to_write = nr_to_write - (3 * max_hw_blocks(sbi) -
wbc->nr_to_write);
......@@ -1333,7 +1335,7 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
return -1;
/* 0 nid should not be used */
if (nid == 0)
if (unlikely(nid == 0))
return 0;
if (build) {
......@@ -1386,7 +1388,7 @@ static void scan_nat_page(struct f2fs_nm_info *nm_i,
for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
if (start_nid >= nm_i->max_nid)
if (unlikely(start_nid >= nm_i->max_nid))
break;
blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
......@@ -1420,7 +1422,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
f2fs_put_page(page, 1);
nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
if (nid >= nm_i->max_nid)
if (unlikely(nid >= nm_i->max_nid))
nid = 0;
if (i++ == FREE_NID_PAGES)
......@@ -1454,7 +1456,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
struct free_nid *i = NULL;
struct list_head *this;
retry:
if (sbi->total_valid_node_count + 1 >= nm_i->max_nid)
if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid))
return false;
spin_lock(&nm_i->free_nid_list_lock);
......@@ -1535,13 +1537,12 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
{
struct address_space *mapping = sbi->node_inode->i_mapping;
struct f2fs_node *src, *dst;
struct f2fs_inode *src, *dst;
nid_t ino = ino_of_node(page);
struct node_info old_ni, new_ni;
struct page *ipage;
ipage = grab_cache_page(mapping, ino);
ipage = grab_cache_page(NODE_MAPPING(sbi), ino);
if (!ipage)
return -ENOMEM;
......@@ -1552,19 +1553,19 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
SetPageUptodate(ipage);
fill_node_footer(ipage, ino, ino, 0, true);
src = F2FS_NODE(page);
dst = F2FS_NODE(ipage);
src = F2FS_INODE(page);
dst = F2FS_INODE(ipage);
memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i);
dst->i.i_size = 0;
dst->i.i_blocks = cpu_to_le64(1);
dst->i.i_links = cpu_to_le32(1);
dst->i.i_xattr_nid = 0;
memcpy(dst, src, (unsigned long)&src->i_ext - (unsigned long)src);
dst->i_size = 0;
dst->i_blocks = cpu_to_le64(1);
dst->i_links = cpu_to_le32(1);
dst->i_xattr_nid = 0;
new_ni = old_ni;
new_ni.ino = ino;
if (!inc_valid_node_count(sbi, NULL, 1))
if (unlikely(!inc_valid_node_count(sbi, NULL)))
WARN_ON(1);
set_node_addr(sbi, &new_ni, NEW_ADDR);
inc_valid_inode_count(sbi);
......@@ -1572,47 +1573,88 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
return 0;
}
/*
* ra_sum_pages() merge contiguous pages into one bio and submit.
* these pre-readed pages are linked in pages list.
*/
static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages,
int start, int nrpages)
{
struct page *page;
int page_idx = start;
struct f2fs_io_info fio = {
.type = META,
.rw = READ_SYNC | REQ_META | REQ_PRIO
};
for (; page_idx < start + nrpages; page_idx++) {
/* alloc temporal page for read node summary info*/
page = alloc_page(GFP_F2FS_ZERO);
if (!page) {
struct page *tmp;
list_for_each_entry_safe(page, tmp, pages, lru) {
list_del(&page->lru);
unlock_page(page);
__free_pages(page, 0);
}
return -ENOMEM;
}
lock_page(page);
page->index = page_idx;
list_add_tail(&page->lru, pages);
}
list_for_each_entry(page, pages, lru)
f2fs_submit_page_mbio(sbi, page, page->index, &fio);
f2fs_submit_merged_bio(sbi, META, READ);
return 0;
}
int restore_node_summary(struct f2fs_sb_info *sbi,
unsigned int segno, struct f2fs_summary_block *sum)
{
struct f2fs_node *rn;
struct f2fs_summary *sum_entry;
struct page *page;
struct page *page, *tmp;
block_t addr;
int i, last_offset;
/* alloc temporal page for read node */
page = alloc_page(GFP_NOFS | __GFP_ZERO);
if (!page)
return -ENOMEM;
lock_page(page);
int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
int i, last_offset, nrpages, err = 0;
LIST_HEAD(page_list);
/* scan the node segment */
last_offset = sbi->blocks_per_seg;
addr = START_BLOCK(sbi, segno);
sum_entry = &sum->entries[0];
for (i = 0; i < last_offset; i++, sum_entry++) {
/*
* In order to read next node page,
* we must clear PageUptodate flag.
*/
ClearPageUptodate(page);
for (i = 0; i < last_offset; i += nrpages, addr += nrpages) {
nrpages = min(last_offset - i, bio_blocks);
if (f2fs_readpage(sbi, page, addr, READ_SYNC))
goto out;
/* read ahead node pages */
err = ra_sum_pages(sbi, &page_list, addr, nrpages);
if (err)
return err;
lock_page(page);
rn = F2FS_NODE(page);
sum_entry->nid = rn->footer.nid;
sum_entry->version = 0;
sum_entry->ofs_in_node = 0;
addr++;
list_for_each_entry_safe(page, tmp, &page_list, lru) {
lock_page(page);
if (unlikely(!PageUptodate(page))) {
err = -EIO;
} else {
rn = F2FS_NODE(page);
sum_entry->nid = rn->footer.nid;
sum_entry->version = 0;
sum_entry->ofs_in_node = 0;
sum_entry++;
}
list_del(&page->lru);
unlock_page(page);
__free_pages(page, 0);
}
}
unlock_page(page);
out:
__free_pages(page, 0);
return 0;
return err;
}
static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
......
......@@ -224,7 +224,13 @@ static inline block_t next_blkaddr_of_node(struct page *node_page)
* | `- direct node (5 + N => 5 + 2N - 1)
* `- double indirect node (5 + 2N)
* `- indirect node (6 + 2N)
* `- direct node (x(N + 1))
* `- direct node
* ......
* `- indirect node ((6 + 2N) + x(N + 1))
* `- direct node
* ......
* `- indirect node ((6 + 2N) + (N - 1)(N + 1))
* `- direct node
*/
static inline bool IS_DNODE(struct page *node_page)
{
......
......@@ -40,8 +40,7 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
static int recover_dentry(struct page *ipage, struct inode *inode)
{
struct f2fs_node *raw_node = F2FS_NODE(ipage);
struct f2fs_inode *raw_inode = &(raw_node->i);
struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
nid_t pino = le32_to_cpu(raw_inode->i_pino);
struct f2fs_dir_entry *de;
struct qstr name;
......@@ -62,6 +61,12 @@ static int recover_dentry(struct page *ipage, struct inode *inode)
name.len = le32_to_cpu(raw_inode->i_namelen);
name.name = raw_inode->i_name;
if (unlikely(name.len > F2FS_NAME_LEN)) {
WARN_ON(1);
err = -ENAMETOOLONG;
goto out;
}
retry:
de = f2fs_find_entry(dir, &name, &page);
if (de && inode->i_ino == le32_to_cpu(de->ino))
......@@ -90,17 +95,16 @@ static int recover_dentry(struct page *ipage, struct inode *inode)
kunmap(page);
f2fs_put_page(page, 0);
out:
f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: "
"ino = %x, name = %s, dir = %lx, err = %d",
ino_of_node(ipage), raw_inode->i_name,
f2fs_msg(inode->i_sb, KERN_NOTICE,
"%s: ino = %x, name = %s, dir = %lx, err = %d",
__func__, ino_of_node(ipage), raw_inode->i_name,
IS_ERR(dir) ? 0 : dir->i_ino, err);
return err;
}
static int recover_inode(struct inode *inode, struct page *node_page)
{
struct f2fs_node *raw_node = F2FS_NODE(node_page);
struct f2fs_inode *raw_inode = &(raw_node->i);
struct f2fs_inode *raw_inode = F2FS_INODE(node_page);
if (!IS_INODE(node_page))
return 0;
......@@ -143,9 +147,9 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
while (1) {
struct fsync_inode_entry *entry;
err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC);
err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC);
if (err)
goto out;
return err;
lock_page(page);
......@@ -191,9 +195,10 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
/* check next segment */
blkaddr = next_blkaddr_of_node(page);
}
unlock_page(page);
out:
__free_pages(page, 0);
return err;
}
......@@ -293,6 +298,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
struct node_info ni;
int err = 0, recovered = 0;
if (recover_inline_data(inode, page))
goto out;
start = start_bidx_of_node(ofs_of_node(page), fi);
if (IS_INODE(page))
end = start + ADDRS_PER_INODE(fi);
......@@ -300,12 +308,13 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
end = start + ADDRS_PER_BLOCK;
f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, start, ALLOC_NODE);
if (err) {
f2fs_unlock_op(sbi);
return err;
goto out;
}
wait_on_page_writeback(dn.node_page);
......@@ -356,10 +365,10 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
err:
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
f2fs_msg(sbi->sb, KERN_NOTICE, "recover_data: ino = %lx, "
"recovered_data = %d blocks, err = %d",
inode->i_ino, recovered, err);
out:
f2fs_msg(sbi->sb, KERN_NOTICE,
"recover_data: ino = %lx, recovered = %d blocks, err = %d",
inode->i_ino, recovered, err);
return err;
}
......@@ -377,7 +386,7 @@ static int recover_data(struct f2fs_sb_info *sbi,
blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
/* read node page */
page = alloc_page(GFP_NOFS | __GFP_ZERO);
page = alloc_page(GFP_F2FS_ZERO);
if (!page)
return -ENOMEM;
......@@ -386,9 +395,9 @@ static int recover_data(struct f2fs_sb_info *sbi,
while (1) {
struct fsync_inode_entry *entry;
err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC);
err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC);
if (err)
goto out;
return err;
lock_page(page);
......@@ -412,8 +421,8 @@ static int recover_data(struct f2fs_sb_info *sbi,
/* check next segment */
blkaddr = next_blkaddr_of_node(page);
}
unlock_page(page);
out:
__free_pages(page, 0);
if (!err)
......@@ -429,7 +438,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
sizeof(struct fsync_inode_entry), NULL);
if (unlikely(!fsync_entry_slab))
if (!fsync_entry_slab)
return -ENOMEM;
INIT_LIST_HEAD(&inode_list);
......
......@@ -14,12 +14,163 @@
#include <linux/blkdev.h>
#include <linux/prefetch.h>
#include <linux/vmalloc.h>
#include <linux/swap.h>
#include "f2fs.h"
#include "segment.h"
#include "node.h"
#include <trace/events/f2fs.h>
#define __reverse_ffz(x) __reverse_ffs(~(x))
static struct kmem_cache *discard_entry_slab;
/*
* __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
* MSB and LSB are reversed in a byte by f2fs_set_bit.
*/
static inline unsigned long __reverse_ffs(unsigned long word)
{
int num = 0;
#if BITS_PER_LONG == 64
if ((word & 0xffffffff) == 0) {
num += 32;
word >>= 32;
}
#endif
if ((word & 0xffff) == 0) {
num += 16;
word >>= 16;
}
if ((word & 0xff) == 0) {
num += 8;
word >>= 8;
}
if ((word & 0xf0) == 0)
num += 4;
else
word >>= 4;
if ((word & 0xc) == 0)
num += 2;
else
word >>= 2;
if ((word & 0x2) == 0)
num += 1;
return num;
}
/*
* __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c becasue
* f2fs_set_bit makes MSB and LSB reversed in a byte.
* Example:
* LSB <--> MSB
* f2fs_set_bit(0, bitmap) => 0000 0001
* f2fs_set_bit(7, bitmap) => 1000 0000
*/
static unsigned long __find_rev_next_bit(const unsigned long *addr,
unsigned long size, unsigned long offset)
{
const unsigned long *p = addr + BIT_WORD(offset);
unsigned long result = offset & ~(BITS_PER_LONG - 1);
unsigned long tmp;
unsigned long mask, submask;
unsigned long quot, rest;
if (offset >= size)
return size;
size -= result;
offset %= BITS_PER_LONG;
if (!offset)
goto aligned;
tmp = *(p++);
quot = (offset >> 3) << 3;
rest = offset & 0x7;
mask = ~0UL << quot;
submask = (unsigned char)(0xff << rest) >> rest;
submask <<= quot;
mask &= submask;
tmp &= mask;
if (size < BITS_PER_LONG)
goto found_first;
if (tmp)
goto found_middle;
size -= BITS_PER_LONG;
result += BITS_PER_LONG;
aligned:
while (size & ~(BITS_PER_LONG-1)) {
tmp = *(p++);
if (tmp)
goto found_middle;
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
}
if (!size)
return result;
tmp = *p;
found_first:
tmp &= (~0UL >> (BITS_PER_LONG - size));
if (tmp == 0UL) /* Are any bits set? */
return result + size; /* Nope. */
found_middle:
return result + __reverse_ffs(tmp);
}
static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
unsigned long size, unsigned long offset)
{
const unsigned long *p = addr + BIT_WORD(offset);
unsigned long result = offset & ~(BITS_PER_LONG - 1);
unsigned long tmp;
unsigned long mask, submask;
unsigned long quot, rest;
if (offset >= size)
return size;
size -= result;
offset %= BITS_PER_LONG;
if (!offset)
goto aligned;
tmp = *(p++);
quot = (offset >> 3) << 3;
rest = offset & 0x7;
mask = ~(~0UL << quot);
submask = (unsigned char)~((unsigned char)(0xff << rest) >> rest);
submask <<= quot;
mask += submask;
tmp |= mask;
if (size < BITS_PER_LONG)
goto found_first;
if (~tmp)
goto found_middle;
size -= BITS_PER_LONG;
result += BITS_PER_LONG;
aligned:
while (size & ~(BITS_PER_LONG - 1)) {
tmp = *(p++);
if (~tmp)
goto found_middle;
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
}
if (!size)
return result;
tmp = *p;
found_first:
tmp |= ~0UL << size;
if (tmp == ~0UL) /* Are any bits zero? */
return result + size; /* Nope. */
found_middle:
return result + __reverse_ffz(tmp);
}
/*
* This function balances dirty node and dentry pages.
* In addition, it controls garbage collection.
......@@ -116,6 +267,56 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
mutex_unlock(&dirty_i->seglist_lock);
}
static void f2fs_issue_discard(struct f2fs_sb_info *sbi,
block_t blkstart, block_t blklen)
{
sector_t start = SECTOR_FROM_BLOCK(sbi, blkstart);
sector_t len = SECTOR_FROM_BLOCK(sbi, blklen);
blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
}
static void add_discard_addrs(struct f2fs_sb_info *sbi,
unsigned int segno, struct seg_entry *se)
{
struct list_head *head = &SM_I(sbi)->discard_list;
struct discard_entry *new;
int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
int max_blocks = sbi->blocks_per_seg;
unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
unsigned long dmap[entries];
unsigned int start = 0, end = -1;
int i;
if (!test_opt(sbi, DISCARD))
return;
/* zero block will be discarded through the prefree list */
if (!se->valid_blocks || se->valid_blocks == max_blocks)
return;
/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
for (i = 0; i < entries; i++)
dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
while (SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
start = __find_rev_next_bit(dmap, max_blocks, end + 1);
if (start >= max_blocks)
break;
end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
INIT_LIST_HEAD(&new->list);
new->blkaddr = START_BLOCK(sbi, segno) + start;
new->len = end - start;
list_add_tail(&new->list, head);
SM_I(sbi)->nr_discards += end - start;
}
}
/*
* Should call clear_prefree_segments after checkpoint is done.
*/
......@@ -138,6 +339,9 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
void clear_prefree_segments(struct f2fs_sb_info *sbi)
{
struct list_head *head = &(SM_I(sbi)->discard_list);
struct list_head *this, *next;
struct discard_entry *entry;
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
unsigned int total_segs = TOTAL_SEGS(sbi);
......@@ -160,14 +364,19 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi)
if (!test_opt(sbi, DISCARD))
continue;
blkdev_issue_discard(sbi->sb->s_bdev,
START_BLOCK(sbi, start) <<
sbi->log_sectors_per_block,
(1 << (sbi->log_sectors_per_block +
sbi->log_blocks_per_seg)) * (end - start),
GFP_NOFS, 0);
f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
(end - start) << sbi->log_blocks_per_seg);
}
mutex_unlock(&dirty_i->seglist_lock);
/* send small discards */
list_for_each_safe(this, next, head) {
entry = list_entry(this, struct discard_entry, list);
f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
list_del(&entry->list);
SM_I(sbi)->nr_discards -= entry->len;
kmem_cache_free(discard_entry_slab, entry);
}
}
static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
......@@ -459,13 +668,18 @@ static void __next_free_blkoff(struct f2fs_sb_info *sbi,
struct curseg_info *seg, block_t start)
{
struct seg_entry *se = get_seg_entry(sbi, seg->segno);
block_t ofs;
for (ofs = start; ofs < sbi->blocks_per_seg; ofs++) {
if (!f2fs_test_bit(ofs, se->ckpt_valid_map)
&& !f2fs_test_bit(ofs, se->cur_valid_map))
break;
}
seg->next_blkoff = ofs;
int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
unsigned long target_map[entries];
unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
int i, pos;
for (i = 0; i < entries; i++)
target_map[i] = ckpt_map[i] | cur_map[i];
pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
seg->next_blkoff = pos;
}
/*
......@@ -573,148 +787,6 @@ static const struct segment_allocation default_salloc_ops = {
.allocate_segment = allocate_segment_by_default,
};
static void f2fs_end_io_write(struct bio *bio, int err)
{
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
struct bio_private *p = bio->bi_private;
do {
struct page *page = bvec->bv_page;
if (--bvec >= bio->bi_io_vec)
prefetchw(&bvec->bv_page->flags);
if (!uptodate) {
SetPageError(page);
if (page->mapping)
set_bit(AS_EIO, &page->mapping->flags);
set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG);
p->sbi->sb->s_flags |= MS_RDONLY;
}
end_page_writeback(page);
dec_page_count(p->sbi, F2FS_WRITEBACK);
} while (bvec >= bio->bi_io_vec);
if (p->is_sync)
complete(p->wait);
if (!get_pages(p->sbi, F2FS_WRITEBACK) &&
!list_empty(&p->sbi->cp_wait.task_list))
wake_up(&p->sbi->cp_wait);
kfree(p);
bio_put(bio);
}
struct bio *f2fs_bio_alloc(struct block_device *bdev, int npages)
{
struct bio *bio;
/* No failure on bio allocation */
bio = bio_alloc(GFP_NOIO, npages);
bio->bi_bdev = bdev;
bio->bi_private = NULL;
return bio;
}
static void do_submit_bio(struct f2fs_sb_info *sbi,
enum page_type type, bool sync)
{
int rw = sync ? WRITE_SYNC : WRITE;
enum page_type btype = type > META ? META : type;
if (type >= META_FLUSH)
rw = WRITE_FLUSH_FUA;
if (btype == META)
rw |= REQ_META;
if (sbi->bio[btype]) {
struct bio_private *p = sbi->bio[btype]->bi_private;
p->sbi = sbi;
sbi->bio[btype]->bi_end_io = f2fs_end_io_write;
trace_f2fs_do_submit_bio(sbi->sb, btype, sync, sbi->bio[btype]);
if (type == META_FLUSH) {
DECLARE_COMPLETION_ONSTACK(wait);
p->is_sync = true;
p->wait = &wait;
submit_bio(rw, sbi->bio[btype]);
wait_for_completion(&wait);
} else {
p->is_sync = false;
submit_bio(rw, sbi->bio[btype]);
}
sbi->bio[btype] = NULL;
}
}
void f2fs_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync)
{
down_write(&sbi->bio_sem);
do_submit_bio(sbi, type, sync);
up_write(&sbi->bio_sem);
}
static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page,
block_t blk_addr, enum page_type type)
{
struct block_device *bdev = sbi->sb->s_bdev;
int bio_blocks;
verify_block_addr(sbi, blk_addr);
down_write(&sbi->bio_sem);
inc_page_count(sbi, F2FS_WRITEBACK);
if (sbi->bio[type] && sbi->last_block_in_bio[type] != blk_addr - 1)
do_submit_bio(sbi, type, false);
alloc_new:
if (sbi->bio[type] == NULL) {
struct bio_private *priv;
retry:
priv = kmalloc(sizeof(struct bio_private), GFP_NOFS);
if (!priv) {
cond_resched();
goto retry;
}
bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
sbi->bio[type] = f2fs_bio_alloc(bdev, bio_blocks);
sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
sbi->bio[type]->bi_private = priv;
/*
* The end_io will be assigned at the sumbission phase.
* Until then, let bio_add_page() merge consecutive IOs as much
* as possible.
*/
}
if (bio_add_page(sbi->bio[type], page, PAGE_CACHE_SIZE, 0) <
PAGE_CACHE_SIZE) {
do_submit_bio(sbi, type, false);
goto alloc_new;
}
sbi->last_block_in_bio[type] = blk_addr;
up_write(&sbi->bio_sem);
trace_f2fs_submit_write_page(page, blk_addr, type);
}
void f2fs_wait_on_page_writeback(struct page *page,
enum page_type type, bool sync)
{
struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
if (PageWriteback(page)) {
f2fs_submit_bio(sbi, type, sync);
wait_on_page_writeback(page);
}
}
static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
......@@ -782,16 +854,14 @@ static int __get_segment_type(struct page *page, enum page_type p_type)
return __get_segment_type_6(page, p_type);
}
static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, enum page_type p_type)
void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, int type)
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg;
unsigned int old_cursegno;
int type;
type = __get_segment_type(page, p_type);
curseg = CURSEG_I(sbi, type);
mutex_lock(&curseg->curseg_mutex);
......@@ -824,49 +894,64 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
mutex_unlock(&sit_i->sentry_lock);
if (p_type == NODE)
if (page && IS_NODESEG(type))
fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
/* writeout dirty page into bdev */
submit_write_page(sbi, page, *new_blkaddr, p_type);
mutex_unlock(&curseg->curseg_mutex);
}
static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, struct f2fs_io_info *fio)
{
int type = __get_segment_type(page, fio->type);
allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type);
/* writeout dirty page into bdev */
f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio);
}
void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
{
struct f2fs_io_info fio = {
.type = META,
.rw = WRITE_SYNC | REQ_META | REQ_PRIO
};
set_page_writeback(page);
submit_write_page(sbi, page, page->index, META);
f2fs_submit_page_mbio(sbi, page, page->index, &fio);
}
void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
struct f2fs_io_info *fio,
unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr)
{
struct f2fs_summary sum;
set_summary(&sum, nid, 0, 0);
do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, NODE);
do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, fio);
}
void write_data_page(struct inode *inode, struct page *page,
struct dnode_of_data *dn, block_t old_blkaddr,
block_t *new_blkaddr)
void write_data_page(struct page *page, struct dnode_of_data *dn,
block_t *new_blkaddr, struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
struct f2fs_summary sum;
struct node_info ni;
f2fs_bug_on(old_blkaddr == NULL_ADDR);
f2fs_bug_on(dn->data_blkaddr == NULL_ADDR);
get_node_info(sbi, dn->nid, &ni);
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
do_write_page(sbi, page, old_blkaddr,
new_blkaddr, &sum, DATA);
do_write_page(sbi, page, dn->data_blkaddr, new_blkaddr, &sum, fio);
}
void rewrite_data_page(struct f2fs_sb_info *sbi, struct page *page,
block_t old_blk_addr)
void rewrite_data_page(struct page *page, block_t old_blkaddr,
struct f2fs_io_info *fio)
{
submit_write_page(sbi, page, old_blk_addr, DATA);
struct inode *inode = page->mapping->host;
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
f2fs_submit_page_mbio(sbi, page, old_blkaddr, fio);
}
void recover_data_page(struct f2fs_sb_info *sbi,
......@@ -925,6 +1010,10 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
unsigned int segno, old_cursegno;
block_t next_blkaddr = next_blkaddr_of_node(page);
unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr);
struct f2fs_io_info fio = {
.type = NODE,
.rw = WRITE_SYNC,
};
curseg = CURSEG_I(sbi, type);
......@@ -953,8 +1042,8 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
/* rewrite node page */
set_page_writeback(page);
submit_write_page(sbi, page, new_blkaddr, NODE);
f2fs_submit_bio(sbi, NODE, true);
f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio);
f2fs_submit_merged_bio(sbi, NODE, WRITE);
refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
locate_dirty_segment(sbi, old_cursegno);
......@@ -964,6 +1053,16 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
mutex_unlock(&curseg->curseg_mutex);
}
void f2fs_wait_on_page_writeback(struct page *page,
enum page_type type)
{
struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
if (PageWriteback(page)) {
f2fs_submit_merged_bio(sbi, type, WRITE);
wait_on_page_writeback(page);
}
}
static int read_compacted_summaries(struct f2fs_sb_info *sbi)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
......@@ -1314,6 +1413,10 @@ void flush_sit_entries(struct f2fs_sb_info *sbi)
sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
/* add discard candidates */
if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards)
add_discard_addrs(sbi, segno, se);
if (flushed)
goto to_sit_page;
......@@ -1480,41 +1583,94 @@ static int build_curseg(struct f2fs_sb_info *sbi)
return restore_curseg_summaries(sbi);
}
static int ra_sit_pages(struct f2fs_sb_info *sbi, int start, int nrpages)
{
struct address_space *mapping = META_MAPPING(sbi);
struct page *page;
block_t blk_addr, prev_blk_addr = 0;
int sit_blk_cnt = SIT_BLK_CNT(sbi);
int blkno = start;
struct f2fs_io_info fio = {
.type = META,
.rw = READ_SYNC | REQ_META | REQ_PRIO
};
for (; blkno < start + nrpages && blkno < sit_blk_cnt; blkno++) {
blk_addr = current_sit_addr(sbi, blkno * SIT_ENTRY_PER_BLOCK);
if (blkno != start && prev_blk_addr + 1 != blk_addr)
break;
prev_blk_addr = blk_addr;
repeat:
page = grab_cache_page(mapping, blk_addr);
if (!page) {
cond_resched();
goto repeat;
}
if (PageUptodate(page)) {
mark_page_accessed(page);
f2fs_put_page(page, 1);
continue;
}
f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
mark_page_accessed(page);
f2fs_put_page(page, 0);
}
f2fs_submit_merged_bio(sbi, META, READ);
return blkno - start;
}
static void build_sit_entries(struct f2fs_sb_info *sbi)
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
struct f2fs_summary_block *sum = curseg->sum_blk;
unsigned int start;
int sit_blk_cnt = SIT_BLK_CNT(sbi);
unsigned int i, start, end;
unsigned int readed, start_blk = 0;
int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
for (start = 0; start < TOTAL_SEGS(sbi); start++) {
struct seg_entry *se = &sit_i->sentries[start];
struct f2fs_sit_block *sit_blk;
struct f2fs_sit_entry sit;
struct page *page;
int i;
mutex_lock(&curseg->curseg_mutex);
for (i = 0; i < sits_in_cursum(sum); i++) {
if (le32_to_cpu(segno_in_journal(sum, i)) == start) {
sit = sit_in_journal(sum, i);
mutex_unlock(&curseg->curseg_mutex);
goto got_it;
do {
readed = ra_sit_pages(sbi, start_blk, nrpages);
start = start_blk * sit_i->sents_per_block;
end = (start_blk + readed) * sit_i->sents_per_block;
for (; start < end && start < TOTAL_SEGS(sbi); start++) {
struct seg_entry *se = &sit_i->sentries[start];
struct f2fs_sit_block *sit_blk;
struct f2fs_sit_entry sit;
struct page *page;
mutex_lock(&curseg->curseg_mutex);
for (i = 0; i < sits_in_cursum(sum); i++) {
if (le32_to_cpu(segno_in_journal(sum, i))
== start) {
sit = sit_in_journal(sum, i);
mutex_unlock(&curseg->curseg_mutex);
goto got_it;
}
}
}
mutex_unlock(&curseg->curseg_mutex);
page = get_current_sit_page(sbi, start);
sit_blk = (struct f2fs_sit_block *)page_address(page);
sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
f2fs_put_page(page, 1);
mutex_unlock(&curseg->curseg_mutex);
page = get_current_sit_page(sbi, start);
sit_blk = (struct f2fs_sit_block *)page_address(page);
sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
f2fs_put_page(page, 1);
got_it:
check_block_count(sbi, start, &sit);
seg_info_from_raw_sit(se, &sit);
if (sbi->segs_per_sec > 1) {
struct sec_entry *e = get_sec_entry(sbi, start);
e->valid_blocks += se->valid_blocks;
check_block_count(sbi, start, &sit);
seg_info_from_raw_sit(se, &sit);
if (sbi->segs_per_sec > 1) {
struct sec_entry *e = get_sec_entry(sbi, start);
e->valid_blocks += se->valid_blocks;
}
}
}
start_blk += readed;
} while (start_blk < sit_blk_cnt);
}
static void init_free_segmap(struct f2fs_sb_info *sbi)
......@@ -1644,6 +1800,12 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
sm_info->rec_prefree_segments = DEF_RECLAIM_PREFREE_SEGMENTS;
sm_info->ipu_policy = F2FS_IPU_DISABLE;
sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
INIT_LIST_HEAD(&sm_info->discard_list);
sm_info->nr_discards = 0;
sm_info->max_discards = 0;
err = build_sit_info(sbi);
if (err)
......@@ -1760,3 +1922,17 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
sbi->sm_info = NULL;
kfree(sm_info);
}
int __init create_segment_manager_caches(void)
{
discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
sizeof(struct discard_entry), NULL);
if (!discard_entry_slab)
return -ENOMEM;
return 0;
}
void destroy_segment_manager_caches(void)
{
kmem_cache_destroy(discard_entry_slab);
}
......@@ -20,13 +20,8 @@
#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno)
#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno)
#define IS_DATASEG(t) \
((t == CURSEG_HOT_DATA) || (t == CURSEG_COLD_DATA) || \
(t == CURSEG_WARM_DATA))
#define IS_NODESEG(t) \
((t == CURSEG_HOT_NODE) || (t == CURSEG_COLD_NODE) || \
(t == CURSEG_WARM_NODE))
#define IS_DATASEG(t) (t <= CURSEG_COLD_DATA)
#define IS_NODESEG(t) (t >= CURSEG_HOT_NODE)
#define IS_CURSEG(sbi, seg) \
((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \
......@@ -83,25 +78,20 @@
(segno / SIT_ENTRY_PER_BLOCK)
#define START_SEGNO(sit_i, segno) \
(SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK)
#define SIT_BLK_CNT(sbi) \
((TOTAL_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK)
#define f2fs_bitmap_size(nr) \
(BITS_TO_LONGS(nr) * sizeof(unsigned long))
#define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments)
#define TOTAL_SECS(sbi) (sbi->total_sections)
#define SECTOR_FROM_BLOCK(sbi, blk_addr) \
(blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE))
(((sector_t)blk_addr) << (sbi)->log_sectors_per_block)
#define SECTOR_TO_BLOCK(sbi, sectors) \
(sectors >> ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE))
(sectors >> (sbi)->log_sectors_per_block)
#define MAX_BIO_BLOCKS(max_hw_blocks) \
(min((int)max_hw_blocks, BIO_MAX_PAGES))
/* during checkpoint, bio_private is used to synchronize the last bio */
struct bio_private {
struct f2fs_sb_info *sbi;
bool is_sync;
void *wait;
};
/*
* indicate a block allocation direction: RIGHT and LEFT.
* RIGHT means allocating new sections towards the end of volume.
......@@ -458,8 +448,8 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi)
static inline bool need_SSR(struct f2fs_sb_info *sbi)
{
return ((prefree_segments(sbi) / sbi->segs_per_sec)
+ free_sections(sbi) < overprovision_sections(sbi));
return (prefree_segments(sbi) / sbi->segs_per_sec)
+ free_sections(sbi) < overprovision_sections(sbi);
}
static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
......@@ -467,38 +457,71 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
if (sbi->por_doing)
if (unlikely(sbi->por_doing))
return false;
return ((free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs +
reserved_sections(sbi)));
return (free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs +
reserved_sections(sbi));
}
static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)
{
return (prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments);
return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments;
}
static inline int utilization(struct f2fs_sb_info *sbi)
{
return div_u64((u64)valid_user_blocks(sbi) * 100, sbi->user_block_count);
return div_u64((u64)valid_user_blocks(sbi) * 100,
sbi->user_block_count);
}
/*
* Sometimes f2fs may be better to drop out-of-place update policy.
* So, if fs utilization is over MIN_IPU_UTIL, then f2fs tries to write
* data in the original place likewise other traditional file systems.
* But, currently set 100 in percentage, which means it is disabled.
* See below need_inplace_update().
* And, users can control the policy through sysfs entries.
* There are five policies with triggering conditions as follows.
* F2FS_IPU_FORCE - all the time,
* F2FS_IPU_SSR - if SSR mode is activated,
* F2FS_IPU_UTIL - if FS utilization is over threashold,
* F2FS_IPU_SSR_UTIL - if SSR mode is activated and FS utilization is over
* threashold,
* F2FS_IPUT_DISABLE - disable IPU. (=default option)
*/
#define MIN_IPU_UTIL 100
#define DEF_MIN_IPU_UTIL 70
enum {
F2FS_IPU_FORCE,
F2FS_IPU_SSR,
F2FS_IPU_UTIL,
F2FS_IPU_SSR_UTIL,
F2FS_IPU_DISABLE,
};
static inline bool need_inplace_update(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
/* IPU can be done only for the user data */
if (S_ISDIR(inode->i_mode))
return false;
if (need_SSR(sbi) && utilization(sbi) > MIN_IPU_UTIL)
switch (SM_I(sbi)->ipu_policy) {
case F2FS_IPU_FORCE:
return true;
case F2FS_IPU_SSR:
if (need_SSR(sbi))
return true;
break;
case F2FS_IPU_UTIL:
if (utilization(sbi) > SM_I(sbi)->min_ipu_util)
return true;
break;
case F2FS_IPU_SSR_UTIL:
if (need_SSR(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util)
return true;
break;
case F2FS_IPU_DISABLE:
break;
}
return false;
}
......
......@@ -50,6 +50,7 @@ enum {
Opt_active_logs,
Opt_disable_ext_identify,
Opt_inline_xattr,
Opt_inline_data,
Opt_err,
};
......@@ -65,6 +66,7 @@ static match_table_t f2fs_tokens = {
{Opt_active_logs, "active_logs=%u"},
{Opt_disable_ext_identify, "disable_ext_identify"},
{Opt_inline_xattr, "inline_xattr"},
{Opt_inline_data, "inline_data"},
{Opt_err, NULL},
};
......@@ -72,6 +74,7 @@ static match_table_t f2fs_tokens = {
enum {
GC_THREAD, /* struct f2fs_gc_thread */
SM_INFO, /* struct f2fs_sm_info */
F2FS_SBI, /* struct f2fs_sb_info */
};
struct f2fs_attr {
......@@ -89,6 +92,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
return (unsigned char *)sbi->gc_thread;
else if (struct_type == SM_INFO)
return (unsigned char *)SM_I(sbi);
else if (struct_type == F2FS_SBI)
return (unsigned char *)sbi;
return NULL;
}
......@@ -175,6 +180,10 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
......@@ -183,6 +192,10 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(gc_no_gc_sleep_time),
ATTR_LIST(gc_idle),
ATTR_LIST(reclaim_segments),
ATTR_LIST(max_small_discards),
ATTR_LIST(ipu_policy),
ATTR_LIST(min_ipu_util),
ATTR_LIST(max_victim_search),
NULL,
};
......@@ -311,6 +324,9 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_disable_ext_identify:
set_opt(sbi, DISABLE_EXT_IDENTIFY);
break;
case Opt_inline_data:
set_opt(sbi, INLINE_DATA);
break;
default:
f2fs_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" or missing value",
......@@ -325,7 +341,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
{
struct f2fs_inode_info *fi;
fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_NOFS | __GFP_ZERO);
fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_F2FS_ZERO);
if (!fi)
return NULL;
......@@ -508,7 +524,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
#endif
if (test_opt(sbi, DISABLE_EXT_IDENTIFY))
seq_puts(seq, ",disable_ext_identify");
if (test_opt(sbi, INLINE_DATA))
seq_puts(seq, ",inline_data");
seq_printf(seq, ",active_logs=%u", sbi->active_logs);
return 0;
......@@ -518,7 +535,8 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
unsigned int total_segs = le32_to_cpu(sbi->raw_super->segment_count_main);
unsigned int total_segs =
le32_to_cpu(sbi->raw_super->segment_count_main);
int i;
for (i = 0; i < total_segs; i++) {
......@@ -618,7 +636,7 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct inode *inode;
if (ino < F2FS_ROOT_INO(sbi))
if (unlikely(ino < F2FS_ROOT_INO(sbi)))
return ERR_PTR(-ESTALE);
/*
......@@ -629,7 +647,7 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
inode = f2fs_iget(sb, ino);
if (IS_ERR(inode))
return ERR_CAST(inode);
if (generation && inode->i_generation != generation) {
if (unlikely(generation && inode->i_generation != generation)) {
/* we didn't find the right inode.. */
iput(inode);
return ERR_PTR(-ESTALE);
......@@ -732,10 +750,10 @@ static int sanity_check_ckpt(struct f2fs_sb_info *sbi)
fsmeta += le32_to_cpu(ckpt->rsvd_segment_count);
fsmeta += le32_to_cpu(raw_super->segment_count_ssa);
if (fsmeta >= total)
if (unlikely(fsmeta >= total))
return 1;
if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
if (unlikely(is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
return 1;
}
......@@ -763,6 +781,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->node_ino_num = le32_to_cpu(raw_super->node_ino);
sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino);
sbi->cur_victim_sec = NULL_SECNO;
sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
for (i = 0; i < NR_COUNT_TYPE; i++)
atomic_set(&sbi->nr_pages[i], 0);
......@@ -798,9 +817,10 @@ static int read_raw_super_block(struct super_block *sb,
/* sanity checking of raw super */
if (sanity_check_raw_super(sb, *raw_super)) {
brelse(*raw_super_buf);
f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem "
"in %dth superblock", block + 1);
if(block == 0) {
f2fs_msg(sb, KERN_ERR,
"Can't find valid F2FS filesystem in %dth superblock",
block + 1);
if (block == 0) {
block++;
goto retry;
} else {
......@@ -818,6 +838,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
struct buffer_head *raw_super_buf;
struct inode *root;
long err = -EINVAL;
int i;
/* allocate memory for f2fs-specific super block info */
sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL);
......@@ -825,7 +846,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
return -ENOMEM;
/* set a block size */
if (!sb_set_blocksize(sb, F2FS_BLKSIZE)) {
if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) {
f2fs_msg(sb, KERN_ERR, "unable to set blocksize");
goto free_sbi;
}
......@@ -874,7 +895,16 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
mutex_init(&sbi->node_write);
sbi->por_doing = false;
spin_lock_init(&sbi->stat_lock);
init_rwsem(&sbi->bio_sem);
mutex_init(&sbi->read_io.io_mutex);
sbi->read_io.sbi = sbi;
sbi->read_io.bio = NULL;
for (i = 0; i < NR_PAGE_TYPE; i++) {
mutex_init(&sbi->write_io[i].io_mutex);
sbi->write_io[i].sbi = sbi;
sbi->write_io[i].bio = NULL;
}
init_rwsem(&sbi->cp_rwsem);
init_waitqueue_head(&sbi->cp_wait);
init_sb_info(sbi);
......@@ -939,9 +969,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
}
/* if there are nt orphan nodes free them */
err = -EINVAL;
if (recover_orphan_inodes(sbi))
goto free_node_inode;
recover_orphan_inodes(sbi);
/* read root inode and dentry */
root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
......@@ -950,8 +978,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
err = PTR_ERR(root);
goto free_node_inode;
}
if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size)
if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
err = -EINVAL;
goto free_root_inode;
}
sb->s_root = d_make_root(root); /* allocate root dentry */
if (!sb->s_root) {
......@@ -1053,7 +1083,7 @@ static int __init init_inodecache(void)
{
f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache",
sizeof(struct f2fs_inode_info), NULL);
if (f2fs_inode_cachep == NULL)
if (!f2fs_inode_cachep)
return -ENOMEM;
return 0;
}
......@@ -1078,9 +1108,12 @@ static int __init init_f2fs_fs(void)
err = create_node_manager_caches();
if (err)
goto free_inodecache;
err = create_gc_caches();
err = create_segment_manager_caches();
if (err)
goto free_node_manager_caches;
err = create_gc_caches();
if (err)
goto free_segment_manager_caches;
err = create_checkpoint_caches();
if (err)
goto free_gc_caches;
......@@ -1102,6 +1135,8 @@ static int __init init_f2fs_fs(void)
destroy_checkpoint_caches();
free_gc_caches:
destroy_gc_caches();
free_segment_manager_caches:
destroy_segment_manager_caches();
free_node_manager_caches:
destroy_node_manager_caches();
free_inodecache:
......@@ -1117,6 +1152,7 @@ static void __exit exit_f2fs_fs(void)
unregister_filesystem(&f2fs_fs_type);
destroy_checkpoint_caches();
destroy_gc_caches();
destroy_segment_manager_caches();
destroy_node_manager_caches();
destroy_inodecache();
kset_unregister(f2fs_kset);
......
......@@ -522,7 +522,7 @@ static int __f2fs_setxattr(struct inode *inode, int name_index,
if (found)
free = free + ENTRY_SIZE(here);
if (free < newsize) {
if (unlikely(free < newsize)) {
error = -ENOSPC;
goto exit;
}
......
......@@ -153,6 +153,13 @@ struct f2fs_extent {
#define NODE_DIND_BLOCK (DEF_ADDRS_PER_INODE + 5)
#define F2FS_INLINE_XATTR 0x01 /* file inline xattr flag */
#define F2FS_INLINE_DATA 0x02 /* file inline data flag */
#define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \
F2FS_INLINE_XATTR_ADDRS - 1))
#define INLINE_DATA_OFFSET (PAGE_CACHE_SIZE - sizeof(struct node_footer) \
- sizeof(__le32) * (DEF_ADDRS_PER_INODE + 5 - 1))
struct f2fs_inode {
__le16 i_mode; /* file mode */
......
......@@ -16,15 +16,28 @@
{ META, "META" }, \
{ META_FLUSH, "META_FLUSH" })
#define show_bio_type(type) \
__print_symbolic(type, \
{ READ, "READ" }, \
{ READA, "READAHEAD" }, \
{ READ_SYNC, "READ_SYNC" }, \
{ WRITE, "WRITE" }, \
{ WRITE_SYNC, "WRITE_SYNC" }, \
{ WRITE_FLUSH, "WRITE_FLUSH" }, \
{ WRITE_FUA, "WRITE_FUA" })
#define F2FS_BIO_MASK(t) (t & (READA | WRITE_FLUSH_FUA))
#define F2FS_BIO_EXTRA_MASK(t) (t & (REQ_META | REQ_PRIO))
#define show_bio_type(type) show_bio_base(type), show_bio_extra(type)
#define show_bio_base(type) \
__print_symbolic(F2FS_BIO_MASK(type), \
{ READ, "READ" }, \
{ READA, "READAHEAD" }, \
{ READ_SYNC, "READ_SYNC" }, \
{ WRITE, "WRITE" }, \
{ WRITE_SYNC, "WRITE_SYNC" }, \
{ WRITE_FLUSH, "WRITE_FLUSH" }, \
{ WRITE_FUA, "WRITE_FUA" }, \
{ WRITE_FLUSH_FUA, "WRITE_FLUSH_FUA" })
#define show_bio_extra(type) \
__print_symbolic(F2FS_BIO_EXTRA_MASK(type), \
{ REQ_META, "(M)" }, \
{ REQ_PRIO, "(P)" }, \
{ REQ_META | REQ_PRIO, "(MP)" }, \
{ 0, " \b" })
#define show_data_type(type) \
__print_symbolic(type, \
......@@ -421,7 +434,7 @@ TRACE_EVENT(f2fs_truncate_partial_nodes,
__entry->err)
);
TRACE_EVENT_CONDITION(f2fs_readpage,
TRACE_EVENT_CONDITION(f2fs_submit_page_bio,
TP_PROTO(struct page *page, sector_t blkaddr, int type),
......@@ -446,7 +459,7 @@ TRACE_EVENT_CONDITION(f2fs_readpage,
),
TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, "
"blkaddr = 0x%llx, bio_type = %s",
"blkaddr = 0x%llx, bio_type = %s%s",
show_dev_ino(__entry),
(unsigned long)__entry->index,
(unsigned long long)__entry->blkaddr,
......@@ -598,36 +611,54 @@ TRACE_EVENT(f2fs_reserve_new_block,
__entry->ofs_in_node)
);
TRACE_EVENT(f2fs_do_submit_bio,
DECLARE_EVENT_CLASS(f2fs__submit_bio,
TP_PROTO(struct super_block *sb, int btype, bool sync, struct bio *bio),
TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio),
TP_ARGS(sb, btype, sync, bio),
TP_ARGS(sb, rw, type, bio),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(int, btype)
__field(bool, sync)
__field(int, rw)
__field(int, type)
__field(sector_t, sector)
__field(unsigned int, size)
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->btype = btype;
__entry->sync = sync;
__entry->rw = rw;
__entry->type = type;
__entry->sector = bio->bi_sector;
__entry->size = bio->bi_size;
),
TP_printk("dev = (%d,%d), type = %s, io = %s, sector = %lld, size = %u",
TP_printk("dev = (%d,%d), %s%s, %s, sector = %lld, size = %u",
show_dev(__entry),
show_block_type(__entry->btype),
__entry->sync ? "sync" : "no sync",
show_bio_type(__entry->rw),
show_block_type(__entry->type),
(unsigned long long)__entry->sector,
__entry->size)
);
DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_write_bio,
TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio),
TP_ARGS(sb, rw, type, bio),
TP_CONDITION(bio)
);
DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_read_bio,
TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio),
TP_ARGS(sb, rw, type, bio),
TP_CONDITION(bio)
);
DECLARE_EVENT_CLASS(f2fs__page,
TP_PROTO(struct page *page, int type),
......@@ -674,15 +705,16 @@ DEFINE_EVENT(f2fs__page, f2fs_vm_page_mkwrite,
TP_ARGS(page, type)
);
TRACE_EVENT(f2fs_submit_write_page,
TRACE_EVENT(f2fs_submit_page_mbio,
TP_PROTO(struct page *page, block_t blk_addr, int type),
TP_PROTO(struct page *page, int rw, int type, block_t blk_addr),
TP_ARGS(page, blk_addr, type),
TP_ARGS(page, rw, type, blk_addr),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(ino_t, ino)
__field(int, rw)
__field(int, type)
__field(pgoff_t, index)
__field(block_t, block)
......@@ -691,13 +723,15 @@ TRACE_EVENT(f2fs_submit_write_page,
TP_fast_assign(
__entry->dev = page->mapping->host->i_sb->s_dev;
__entry->ino = page->mapping->host->i_ino;
__entry->rw = rw;
__entry->type = type;
__entry->index = page->index;
__entry->block = blk_addr;
),
TP_printk("dev = (%d,%d), ino = %lu, %s, index = %lu, blkaddr = 0x%llx",
TP_printk("dev = (%d,%d), ino = %lu, %s%s, %s, index = %lu, blkaddr = 0x%llx",
show_dev_ino(__entry),
show_bio_type(__entry->rw),
show_block_type(__entry->type),
(unsigned long)__entry->index,
(unsigned long long)__entry->block)
......@@ -727,6 +761,29 @@ TRACE_EVENT(f2fs_write_checkpoint,
__entry->msg)
);
TRACE_EVENT(f2fs_issue_discard,
TP_PROTO(struct super_block *sb, block_t blkstart, block_t blklen),
TP_ARGS(sb, blkstart, blklen),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(block_t, blkstart)
__field(block_t, blklen)
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->blkstart = blkstart;
__entry->blklen = blklen;
),
TP_printk("dev = (%d,%d), blkstart = 0x%llx, blklen = 0x%llx",
show_dev(__entry),
(unsigned long long)__entry->blkstart,
(unsigned long long)__entry->blklen)
);
#endif /* _TRACE_F2FS_H */
/* This part must be outside protection */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment