Commit 30211125 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-f2fs-3.15' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
 "This patch-set includes the following major enhancement patches.
   - introduce large directory support
   - introduce f2fs_issue_flush to merge redundant flush commands
   - merge write IOs as much as possible aligned to the segment
   - add sysfs entries to tune the f2fs configuration
   - use radix_tree for the free_nid_list to reduce in-memory operations
   - remove costly bit operations in f2fs_find_entry
   - enhance the readahead flow for CP/NAT/SIT/SSA blocks

  The other bug fixes are as follows:
   - recover xattr node blocks correctly after sudden-power-cut
   - fix to calculate the maximum number of node ids
   - enhance to handle many error cases

  And, there are a bunch of cleanups"

* tag 'for-f2fs-3.15' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (62 commits)
  f2fs: fix wrong statistics of inline data
  f2fs: check the acl's validity before setting
  f2fs: introduce f2fs_issue_flush to avoid redundant flush issue
  f2fs: fix to cover io->bio with io_rwsem
  f2fs: fix error path when fail to read inline data
  f2fs: use list_for_each_entry{_safe} for simplyfying code
  f2fs: avoid free slab cache under spinlock
  f2fs: avoid unneeded lookup when xattr name length is too long
  f2fs: avoid unnecessary bio submit when wait page writeback
  f2fs: return -EIO when node id is not matched
  f2fs: avoid RECLAIM_FS-ON-W warning
  f2fs: skip unnecessary node writes during fsync
  f2fs: introduce fi->i_sem to protect fi's info
  f2fs: change reclaim rate in percentage
  f2fs: add missing documentation for dir_level
  f2fs: remove unnecessary threshold
  f2fs: throttle the memory footprint with a sysfs entry
  f2fs: avoid to drop nat entries due to the negative nr_shrink
  f2fs: call f2fs_wait_on_page_writeback instead of native function
  f2fs: introduce nr_pages_to_write for segment alignment
  ...
parents 0af9fb63 48b230a5
......@@ -55,3 +55,15 @@ Date: January 2014
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
Description:
Controls the number of trials to find a victim segment.
What: /sys/fs/f2fs/<disk>/dir_level
Date: March 2014
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
Description:
Controls the directory level for large directory.
What: /sys/fs/f2fs/<disk>/ram_thresh
Date: March 2014
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
Description:
Controls the memory footprint used by f2fs.
......@@ -122,6 +122,10 @@ disable_ext_identify Disable the extension list configured by mkfs, so f2fs
inline_xattr Enable the inline xattrs feature.
inline_data Enable the inline data feature: New created small(<~3.4k)
files can be written into inode block.
flush_merge Merge concurrent cache_flush commands as much as possible
to eliminate redundant command issues. If the underlying
device handles the cache_flush command relatively slowly,
recommend to enable this option.
================================================================================
DEBUGFS ENTRIES
......@@ -169,9 +173,11 @@ Files in /sys/fs/f2fs/<devname>
reclaim_segments This parameter controls the number of prefree
segments to be reclaimed. If the number of prefree
segments is larger than this number, f2fs tries to
conduct checkpoint to reclaim the prefree segments
to free segments. By default, 100 segments, 200MB.
segments is larger than the number of segments
in the proportion to the percentage over total
volume size, f2fs tries to conduct checkpoint to
reclaim the prefree segments to free segments.
By default, 5% over total # of segments.
max_small_discards This parameter controls the number of discard
commands that consist small blocks less than 2MB.
......@@ -195,6 +201,17 @@ Files in /sys/fs/f2fs/<devname>
cleaning operations. The default value is 4096
which covers 8GB block address range.
dir_level This parameter controls the directory level to
support large directory. If a directory has a
number of files, it can reduce the file lookup
latency by increasing this dir_level value.
Otherwise, it needs to decrease this value to
reduce the space overhead. The default value is 0.
ram_thresh This parameter controls the memory footprint used
by free nids and cached nat entries. By default,
10 is set, which indicates 10 MB / 1 GB RAM.
================================================================================
USAGE
================================================================================
......@@ -444,9 +461,11 @@ The number of blocks and buckets are determined by,
# of blocks in level #n = |
`- 4, Otherwise
,- 2^n, if n < MAX_DIR_HASH_DEPTH / 2,
,- 2^ (n + dir_level),
| if n < MAX_DIR_HASH_DEPTH / 2,
# of buckets in level #n = |
`- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), Otherwise
`- 2^((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1),
Otherwise
When F2FS finds a file name in a directory, at first a hash value of the file
name is calculated. Then, F2FS scans the hash table in level #0 to find the
......
......@@ -174,7 +174,7 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
retval = f2fs_getxattr(inode, name_index, "", NULL, 0);
if (retval > 0) {
value = kmalloc(retval, GFP_KERNEL);
value = kmalloc(retval, GFP_F2FS_ZERO);
if (!value)
return ERR_PTR(-ENOMEM);
retval = f2fs_getxattr(inode, name_index, "", value, retval);
......@@ -203,6 +203,12 @@ static int __f2fs_set_acl(struct inode *inode, int type,
size_t size = 0;
int error;
if (acl) {
error = posix_acl_valid(acl);
if (error < 0)
return error;
}
switch (type) {
case ACL_TYPE_ACCESS:
name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
......
......@@ -33,14 +33,12 @@ struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
struct address_space *mapping = META_MAPPING(sbi);
struct page *page = NULL;
repeat:
page = grab_cache_page(mapping, index);
page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
if (!page) {
cond_resched();
goto repeat;
}
/* We wait writeback only inside grab_meta_page() */
wait_on_page_writeback(page);
SetPageUptodate(page);
return page;
}
......@@ -75,23 +73,102 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
return page;
}
inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
{
switch (type) {
case META_NAT:
return NM_I(sbi)->max_nid / NAT_ENTRY_PER_BLOCK;
case META_SIT:
return SIT_BLK_CNT(sbi);
case META_SSA:
case META_CP:
return 0;
default:
BUG();
}
}
/*
* Readahead CP/NAT/SIT/SSA pages
*/
int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type)
{
block_t prev_blk_addr = 0;
struct page *page;
int blkno = start;
int max_blks = get_max_meta_blks(sbi, type);
struct f2fs_io_info fio = {
.type = META,
.rw = READ_SYNC | REQ_META | REQ_PRIO
};
for (; nrpages-- > 0; blkno++) {
block_t blk_addr;
switch (type) {
case META_NAT:
/* get nat block addr */
if (unlikely(blkno >= max_blks))
blkno = 0;
blk_addr = current_nat_addr(sbi,
blkno * NAT_ENTRY_PER_BLOCK);
break;
case META_SIT:
/* get sit block addr */
if (unlikely(blkno >= max_blks))
goto out;
blk_addr = current_sit_addr(sbi,
blkno * SIT_ENTRY_PER_BLOCK);
if (blkno != start && prev_blk_addr + 1 != blk_addr)
goto out;
prev_blk_addr = blk_addr;
break;
case META_SSA:
case META_CP:
/* get ssa/cp block addr */
blk_addr = blkno;
break;
default:
BUG();
}
page = grab_cache_page(META_MAPPING(sbi), blk_addr);
if (!page)
continue;
if (PageUptodate(page)) {
mark_page_accessed(page);
f2fs_put_page(page, 1);
continue;
}
f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
mark_page_accessed(page);
f2fs_put_page(page, 0);
}
out:
f2fs_submit_merged_bio(sbi, META, READ);
return blkno - start;
}
static int f2fs_write_meta_page(struct page *page,
struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
/* Should not write any meta pages, if any IO error was occurred */
if (unlikely(sbi->por_doing ||
is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
if (unlikely(sbi->por_doing))
goto redirty_out;
if (wbc->for_reclaim)
goto redirty_out;
wait_on_page_writeback(page);
/* Should not write any meta pages, if any IO error was occurred */
if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
goto no_write;
f2fs_wait_on_page_writeback(page, META);
write_meta_page(sbi, page);
no_write:
dec_page_count(sbi, F2FS_DIRTY_META);
unlock_page(page);
return 0;
......@@ -99,6 +176,7 @@ static int f2fs_write_meta_page(struct page *page,
redirty_out:
dec_page_count(sbi, F2FS_DIRTY_META);
wbc->pages_skipped++;
account_page_redirty(page);
set_page_dirty(page);
return AOP_WRITEPAGE_ACTIVATE;
}
......@@ -107,21 +185,23 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
long written;
if (wbc->for_kupdate)
return 0;
long diff, written;
/* collect a number of dirty meta pages and write together */
if (get_pages(sbi, F2FS_DIRTY_META) < nrpages)
return 0;
if (wbc->for_kupdate ||
get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
goto skip_write;
/* if mounting is failed, skip writing node pages */
mutex_lock(&sbi->cp_mutex);
written = sync_meta_pages(sbi, META, nrpages);
diff = nr_pages_to_write(sbi, META, wbc);
written = sync_meta_pages(sbi, META, wbc->nr_to_write);
mutex_unlock(&sbi->cp_mutex);
wbc->nr_to_write -= written;
wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
return 0;
skip_write:
wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
return 0;
}
......@@ -148,10 +228,22 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
lock_page(page);
f2fs_bug_on(page->mapping != mapping);
f2fs_bug_on(!PageDirty(page));
clear_page_dirty_for_io(page);
if (unlikely(page->mapping != mapping)) {
continue_unlock:
unlock_page(page);
continue;
}
if (!PageDirty(page)) {
/* someone wrote it for us */
goto continue_unlock;
}
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
if (f2fs_write_meta_page(page, &wbc)) {
unlock_page(page);
break;
......@@ -216,16 +308,15 @@ void release_orphan_inode(struct f2fs_sb_info *sbi)
void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
struct list_head *head, *this;
struct orphan_inode_entry *new = NULL, *orphan = NULL;
struct list_head *head;
struct orphan_inode_entry *new, *orphan;
new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
new->ino = ino;
spin_lock(&sbi->orphan_inode_lock);
head = &sbi->orphan_inode_list;
list_for_each(this, head) {
orphan = list_entry(this, struct orphan_inode_entry, list);
list_for_each_entry(orphan, head, list) {
if (orphan->ino == ino) {
spin_unlock(&sbi->orphan_inode_lock);
kmem_cache_free(orphan_entry_slab, new);
......@@ -234,14 +325,10 @@ void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
if (orphan->ino > ino)
break;
orphan = NULL;
}
/* add new_oentry into list which is sorted by inode number */
if (orphan)
list_add(&new->list, this->prev);
else
list_add_tail(&new->list, head);
/* add new orphan entry into list which is sorted by inode number */
list_add_tail(&new->list, &orphan->list);
spin_unlock(&sbi->orphan_inode_lock);
}
......@@ -255,10 +342,11 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
list_for_each_entry(orphan, head, list) {
if (orphan->ino == ino) {
list_del(&orphan->list);
kmem_cache_free(orphan_entry_slab, orphan);
f2fs_bug_on(sbi->n_orphans == 0);
sbi->n_orphans--;
break;
spin_unlock(&sbi->orphan_inode_lock);
kmem_cache_free(orphan_entry_slab, orphan);
return;
}
}
spin_unlock(&sbi->orphan_inode_lock);
......@@ -285,6 +373,8 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi)
start_blk = __start_cp_addr(sbi) + 1;
orphan_blkaddr = __start_sum_addr(sbi) - 1;
ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP);
for (i = 0; i < orphan_blkaddr; i++) {
struct page *page = get_meta_page(sbi, start_blk + i);
struct f2fs_orphan_block *orphan_blk;
......@@ -466,14 +556,12 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct list_head *head = &sbi->dir_inode_list;
struct list_head *this;
struct dir_inode_entry *entry;
list_for_each(this, head) {
struct dir_inode_entry *entry;
entry = list_entry(this, struct dir_inode_entry, list);
list_for_each_entry(entry, head, list)
if (unlikely(entry->inode == inode))
return -EEXIST;
}
list_add_tail(&new->list, head);
stat_inc_dirty_dir(sbi);
return 0;
......@@ -483,6 +571,7 @@ void set_dirty_dir_page(struct inode *inode, struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct dir_inode_entry *new;
int ret = 0;
if (!S_ISDIR(inode->i_mode))
return;
......@@ -492,13 +581,13 @@ void set_dirty_dir_page(struct inode *inode, struct page *page)
INIT_LIST_HEAD(&new->list);
spin_lock(&sbi->dir_inode_lock);
if (__add_dirty_inode(inode, new))
kmem_cache_free(inode_entry_slab, new);
inc_page_count(sbi, F2FS_DIRTY_DENTS);
ret = __add_dirty_inode(inode, new);
inode_inc_dirty_dents(inode);
SetPagePrivate(page);
spin_unlock(&sbi->dir_inode_lock);
if (ret)
kmem_cache_free(inode_entry_slab, new);
}
void add_dirty_dir_inode(struct inode *inode)
......@@ -506,44 +595,47 @@ void add_dirty_dir_inode(struct inode *inode)
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct dir_inode_entry *new =
f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
int ret = 0;
new->inode = inode;
INIT_LIST_HEAD(&new->list);
spin_lock(&sbi->dir_inode_lock);
if (__add_dirty_inode(inode, new))
kmem_cache_free(inode_entry_slab, new);
ret = __add_dirty_inode(inode, new);
spin_unlock(&sbi->dir_inode_lock);
if (ret)
kmem_cache_free(inode_entry_slab, new);
}
void remove_dirty_dir_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct list_head *this, *head;
struct list_head *head;
struct dir_inode_entry *entry;
if (!S_ISDIR(inode->i_mode))
return;
spin_lock(&sbi->dir_inode_lock);
if (atomic_read(&F2FS_I(inode)->dirty_dents)) {
if (get_dirty_dents(inode)) {
spin_unlock(&sbi->dir_inode_lock);
return;
}
head = &sbi->dir_inode_list;
list_for_each(this, head) {
struct dir_inode_entry *entry;
entry = list_entry(this, struct dir_inode_entry, list);
list_for_each_entry(entry, head, list) {
if (entry->inode == inode) {
list_del(&entry->list);
kmem_cache_free(inode_entry_slab, entry);
stat_dec_dirty_dir(sbi);
break;
spin_unlock(&sbi->dir_inode_lock);
kmem_cache_free(inode_entry_slab, entry);
goto done;
}
}
spin_unlock(&sbi->dir_inode_lock);
done:
/* Only from the recovery routine */
if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
......@@ -554,15 +646,14 @@ void remove_dirty_dir_inode(struct inode *inode)
struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
struct list_head *this, *head;
struct list_head *head;
struct inode *inode = NULL;
struct dir_inode_entry *entry;
spin_lock(&sbi->dir_inode_lock);
head = &sbi->dir_inode_list;
list_for_each(this, head) {
struct dir_inode_entry *entry;
entry = list_entry(this, struct dir_inode_entry, list);
list_for_each_entry(entry, head, list) {
if (entry->inode->i_ino == ino) {
inode = entry->inode;
break;
......@@ -589,7 +680,7 @@ void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
inode = igrab(entry->inode);
spin_unlock(&sbi->dir_inode_lock);
if (inode) {
filemap_flush(inode->i_mapping);
filemap_fdatawrite(inode->i_mapping);
iput(inode);
} else {
/*
......@@ -824,6 +915,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
unblock_operations(sbi);
mutex_unlock(&sbi->cp_mutex);
stat_inc_cp_count(sbi->stat_info);
trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
}
......@@ -845,11 +937,11 @@ void init_orphan_info(struct f2fs_sb_info *sbi)
int __init create_checkpoint_caches(void)
{
orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
sizeof(struct orphan_inode_entry), NULL);
sizeof(struct orphan_inode_entry));
if (!orphan_entry_slab)
return -ENOMEM;
inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
sizeof(struct dir_inode_entry), NULL);
sizeof(struct dir_inode_entry));
if (!inode_entry_slab) {
kmem_cache_destroy(orphan_entry_slab);
return -ENOMEM;
......
......@@ -45,7 +45,7 @@ static void f2fs_read_end_io(struct bio *bio, int err)
static void f2fs_write_end_io(struct bio *bio, int err)
{
struct f2fs_sb_info *sbi = F2FS_SB(bio->bi_io_vec->bv_page->mapping->host->i_sb);
struct f2fs_sb_info *sbi = bio->bi_private;
struct bio_vec *bvec;
int i;
......@@ -55,15 +55,16 @@ static void f2fs_write_end_io(struct bio *bio, int err)
if (unlikely(err)) {
SetPageError(page);
set_bit(AS_EIO, &page->mapping->flags);
set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
sbi->sb->s_flags |= MS_RDONLY;
f2fs_stop_checkpoint(sbi);
}
end_page_writeback(page);
dec_page_count(sbi, F2FS_WRITEBACK);
}
if (bio->bi_private)
complete(bio->bi_private);
if (sbi->wait_io) {
complete(sbi->wait_io);
sbi->wait_io = NULL;
}
if (!get_pages(sbi, F2FS_WRITEBACK) &&
!list_empty(&sbi->cp_wait.task_list))
......@@ -86,6 +87,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
bio->bi_bdev = sbi->sb->s_bdev;
bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
bio->bi_private = sbi;
return bio;
}
......@@ -113,7 +115,7 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
*/
if (fio->type == META_FLUSH) {
DECLARE_COMPLETION_ONSTACK(wait);
io->bio->bi_private = &wait;
io->sbi->wait_io = &wait;
submit_bio(rw, io->bio);
wait_for_completion(&wait);
} else {
......@@ -132,7 +134,7 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
mutex_lock(&io->io_mutex);
down_write(&io->io_rwsem);
/* change META to META_FLUSH in the checkpoint procedure */
if (type >= META_FLUSH) {
......@@ -140,7 +142,7 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
}
__submit_merged_bio(io);
mutex_unlock(&io->io_mutex);
up_write(&io->io_rwsem);
}
/*
......@@ -178,7 +180,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
verify_block_addr(sbi, blk_addr);
mutex_lock(&io->io_mutex);
down_write(&io->io_rwsem);
if (!is_read)
inc_page_count(sbi, F2FS_WRITEBACK);
......@@ -202,7 +204,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
io->last_block_in_bio = blk_addr;
mutex_unlock(&io->io_mutex);
up_write(&io->io_rwsem);
trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr);
}
......@@ -797,48 +799,36 @@ static int f2fs_write_data_page(struct page *page,
*/
offset = i_size & (PAGE_CACHE_SIZE - 1);
if ((page->index >= end_index + 1) || !offset) {
if (S_ISDIR(inode->i_mode)) {
dec_page_count(sbi, F2FS_DIRTY_DENTS);
inode_dec_dirty_dents(inode);
}
inode_dec_dirty_dents(inode);
goto out;
}
zero_user_segment(page, offset, PAGE_CACHE_SIZE);
write:
if (unlikely(sbi->por_doing)) {
err = AOP_WRITEPAGE_ACTIVATE;
if (unlikely(sbi->por_doing))
goto redirty_out;
}
/* Dentry blocks are controlled by checkpoint */
if (S_ISDIR(inode->i_mode)) {
dec_page_count(sbi, F2FS_DIRTY_DENTS);
inode_dec_dirty_dents(inode);
err = do_write_data_page(page, &fio);
} else {
f2fs_lock_op(sbi);
if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode)) {
err = f2fs_write_inline_data(inode, page, offset);
f2fs_unlock_op(sbi);
goto out;
} else {
err = do_write_data_page(page, &fio);
}
goto done;
}
f2fs_unlock_op(sbi);
if (!wbc->for_reclaim)
need_balance_fs = true;
}
if (err == -ENOENT)
goto out;
else if (err)
else if (has_not_enough_free_secs(sbi, 0))
goto redirty_out;
if (wbc->for_reclaim) {
f2fs_submit_merged_bio(sbi, DATA, WRITE);
need_balance_fs = false;
}
f2fs_lock_op(sbi);
if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode))
err = f2fs_write_inline_data(inode, page, offset);
else
err = do_write_data_page(page, &fio);
f2fs_unlock_op(sbi);
done:
if (err && err != -ENOENT)
goto redirty_out;
clear_cold_data(page);
out:
......@@ -849,12 +839,11 @@ static int f2fs_write_data_page(struct page *page,
redirty_out:
wbc->pages_skipped++;
account_page_redirty(page);
set_page_dirty(page);
return err;
return AOP_WRITEPAGE_ACTIVATE;
}
#define MAX_DESIRED_PAGES_WP 4096
static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
void *data)
{
......@@ -871,17 +860,17 @@ static int f2fs_write_data_pages(struct address_space *mapping,
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
bool locked = false;
int ret;
long excess_nrtw = 0, desired_nrtw;
long diff;
/* deal with chardevs and other special file */
if (!mapping->a_ops->writepage)
return 0;
if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) {
desired_nrtw = MAX_DESIRED_PAGES_WP;
excess_nrtw = desired_nrtw - wbc->nr_to_write;
wbc->nr_to_write = desired_nrtw;
}
if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA))
goto skip_write;
diff = nr_pages_to_write(sbi, DATA, wbc);
if (!S_ISDIR(inode->i_mode)) {
mutex_lock(&sbi->writepages);
......@@ -895,8 +884,12 @@ static int f2fs_write_data_pages(struct address_space *mapping,
remove_dirty_dir_inode(inode);
wbc->nr_to_write -= excess_nrtw;
wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
return ret;
skip_write:
wbc->pages_skipped += get_dirty_dents(inode);
return 0;
}
static int f2fs_write_begin(struct file *file, struct address_space *mapping,
......@@ -949,13 +942,19 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
if (dn.data_blkaddr == NEW_ADDR) {
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
} else {
if (f2fs_has_inline_data(inode))
if (f2fs_has_inline_data(inode)) {
err = f2fs_read_inline_data(inode, page);
else
if (err) {
page_cache_release(page);
return err;
}
} else {
err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
READ_SYNC);
if (err)
return err;
if (err)
return err;
}
lock_page(page);
if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
......@@ -1031,11 +1030,8 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
unsigned int length)
{
struct inode *inode = page->mapping->host;
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
if (S_ISDIR(inode->i_mode) && PageDirty(page)) {
dec_page_count(sbi, F2FS_DIRTY_DENTS);
if (PageDirty(page))
inode_dec_dirty_dents(inode);
}
ClearPagePrivate(page);
}
......
......@@ -86,7 +86,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
{
struct f2fs_stat_info *si = F2FS_STAT(sbi);
unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist;
struct sit_info *sit_i = SIT_I(sbi);
unsigned int segno, vblocks;
int ndirty = 0;
......@@ -94,7 +93,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
total_vblocks = 0;
blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg);
hblks_per_sec = blks_per_sec / 2;
mutex_lock(&sit_i->sentry_lock);
for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) {
vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
dist = abs(vblocks - hblks_per_sec);
......@@ -105,7 +103,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
ndirty++;
}
}
mutex_unlock(&sit_i->sentry_lock);
dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100;
si->bimodal = bimodal / dist;
if (si->dirty_count)
......@@ -236,6 +233,7 @@ static int stat_show(struct seq_file *s, void *v)
si->dirty_count);
seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n",
si->prefree_count, si->free_segs, si->free_secs);
seq_printf(s, "CP calls: %d\n", si->cp_count);
seq_printf(s, "GC calls: %d (BG: %d)\n",
si->call_count, si->bg_gc);
seq_printf(s, " - data segments : %d\n", si->data_segs);
......@@ -252,10 +250,10 @@ static int stat_show(struct seq_file *s, void *v)
si->ndirty_dent, si->ndirty_dirs);
seq_printf(s, " - meta: %4d in %4d\n",
si->ndirty_meta, si->meta_pages);
seq_printf(s, " - NATs: %5d > %lu\n",
si->nats, NM_WOUT_THRESHOLD);
seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n",
si->sits, si->fnids);
seq_printf(s, " - NATs: %9d\n - SITs: %9d\n",
si->nats, si->sits);
seq_printf(s, " - free_nids: %9d\n",
si->fnids);
seq_puts(s, "\nDistribution of User Blocks:");
seq_puts(s, " [ valid | invalid | free ]\n");
seq_puts(s, " [");
......
......@@ -21,12 +21,12 @@ static unsigned long dir_blocks(struct inode *inode)
>> PAGE_CACHE_SHIFT;
}
static unsigned int dir_buckets(unsigned int level)
static unsigned int dir_buckets(unsigned int level, int dir_level)
{
if (level < MAX_DIR_HASH_DEPTH / 2)
return 1 << level;
return 1 << (level + dir_level);
else
return 1 << ((MAX_DIR_HASH_DEPTH / 2) - 1);
return 1 << ((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1);
}
static unsigned int bucket_blocks(unsigned int level)
......@@ -65,13 +65,14 @@ static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
}
static unsigned long dir_block_index(unsigned int level, unsigned int idx)
static unsigned long dir_block_index(unsigned int level,
int dir_level, unsigned int idx)
{
unsigned long i;
unsigned long bidx = 0;
for (i = 0; i < level; i++)
bidx += dir_buckets(i) * bucket_blocks(i);
bidx += dir_buckets(i, dir_level) * bucket_blocks(i);
bidx += idx * bucket_blocks(level);
return bidx;
}
......@@ -93,16 +94,21 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
f2fs_hash_t namehash, struct page **res_page)
{
struct f2fs_dir_entry *de;
unsigned long bit_pos, end_pos, next_pos;
unsigned long bit_pos = 0;
struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
int slots;
const void *dentry_bits = &dentry_blk->dentry_bitmap;
int max_len = 0;
bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
NR_DENTRY_IN_BLOCK, 0);
while (bit_pos < NR_DENTRY_IN_BLOCK) {
if (!test_bit_le(bit_pos, dentry_bits)) {
if (bit_pos == 0)
max_len = 1;
else if (!test_bit_le(bit_pos - 1, dentry_bits))
max_len++;
bit_pos++;
continue;
}
de = &dentry_blk->dentry[bit_pos];
slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
if (early_match_name(name, namelen, namehash, de)) {
if (!memcmp(dentry_blk->filename[bit_pos],
name, namelen)) {
......@@ -110,20 +116,18 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
goto found;
}
}
next_pos = bit_pos + slots;
bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
NR_DENTRY_IN_BLOCK, next_pos);
if (bit_pos >= NR_DENTRY_IN_BLOCK)
end_pos = NR_DENTRY_IN_BLOCK;
else
end_pos = bit_pos;
if (*max_slots < end_pos - next_pos)
*max_slots = end_pos - next_pos;
if (max_len > *max_slots) {
*max_slots = max_len;
max_len = 0;
}
bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
}
de = NULL;
kunmap(dentry_page);
found:
if (max_len > *max_slots)
*max_slots = max_len;
return de;
}
......@@ -141,10 +145,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
f2fs_bug_on(level > MAX_DIR_HASH_DEPTH);
nbucket = dir_buckets(level);
nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
nblock = bucket_blocks(level);
bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket);
bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
le32_to_cpu(namehash) % nbucket);
end_block = bidx + nblock;
for (; bidx < end_block; bidx++) {
......@@ -248,7 +253,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
struct page *page, struct inode *inode)
{
lock_page(page);
wait_on_page_writeback(page);
f2fs_wait_on_page_writeback(page, DATA);
de->ino = cpu_to_le32(inode->i_ino);
set_de_type(de, inode);
kunmap(page);
......@@ -347,14 +352,11 @@ static struct page *init_inode_metadata(struct inode *inode,
err = f2fs_init_security(inode, dir, name, page);
if (err)
goto put_error;
wait_on_page_writeback(page);
} else {
page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino);
if (IS_ERR(page))
return page;
wait_on_page_writeback(page);
set_cold_node(inode, page);
}
......@@ -372,6 +374,10 @@ static struct page *init_inode_metadata(struct inode *inode,
put_error:
f2fs_put_page(page, 1);
/* once the failed inode becomes a bad inode, i_mode is S_IFREG */
truncate_inode_pages(&inode->i_data, 0);
truncate_blocks(inode, 0);
remove_dirty_dir_inode(inode);
error:
remove_inode_page(inode);
return ERR_PTR(err);
......@@ -395,9 +401,6 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode,
set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
}
if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR))
update_inode_page(dir);
if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK))
clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
}
......@@ -464,10 +467,11 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
if (level == current_depth)
++current_depth;
nbucket = dir_buckets(level);
nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
nblock = bucket_blocks(level);
bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket));
bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
(le32_to_cpu(dentry_hash) % nbucket));
for (block = bidx; block <= (bidx + nblock - 1); block++) {
dentry_page = get_new_data_page(dir, NULL, block, true);
......@@ -487,8 +491,9 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
++level;
goto start;
add_dentry:
wait_on_page_writeback(dentry_page);
f2fs_wait_on_page_writeback(dentry_page, DATA);
down_write(&F2FS_I(inode)->i_sem);
page = init_inode_metadata(inode, dir, name);
if (IS_ERR(page)) {
err = PTR_ERR(page);
......@@ -511,7 +516,12 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
update_parent_metadata(dir, inode, current_depth);
fail:
clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
up_write(&F2FS_I(inode)->i_sem);
if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
update_inode_page(dir);
clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
}
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
return err;
......@@ -528,13 +538,12 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
unsigned int bit_pos;
struct address_space *mapping = page->mapping;
struct inode *dir = mapping->host;
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
void *kaddr = page_address(page);
int i;
lock_page(page);
wait_on_page_writeback(page);
f2fs_wait_on_page_writeback(page, DATA);
dentry_blk = (struct f2fs_dentry_block *)kaddr;
bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry;
......@@ -551,6 +560,10 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
if (inode) {
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
down_write(&F2FS_I(inode)->i_sem);
if (S_ISDIR(inode->i_mode)) {
drop_nlink(dir);
update_inode_page(dir);
......@@ -561,6 +574,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
drop_nlink(inode);
i_size_write(inode, 0);
}
up_write(&F2FS_I(inode)->i_sem);
update_inode_page(inode);
if (inode->i_nlink == 0)
......@@ -573,7 +587,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
truncate_hole(dir, page->index, page->index + 1);
clear_page_dirty_for_io(page);
ClearPageUptodate(page);
dec_page_count(sbi, F2FS_DIRTY_DENTS);
inode_dec_dirty_dents(dir);
}
f2fs_put_page(page, 1);
......
......@@ -40,6 +40,7 @@
#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040
#define F2FS_MOUNT_INLINE_XATTR 0x00000080
#define F2FS_MOUNT_INLINE_DATA 0x00000100
#define F2FS_MOUNT_FLUSH_MERGE 0x00000200
#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
......@@ -88,6 +89,16 @@ enum {
SIT_BITMAP
};
/*
* For CP/NAT/SIT/SSA readahead
*/
enum {
META_CP,
META_NAT,
META_SIT,
META_SSA
};
/* for the list of orphan inodes */
struct orphan_inode_entry {
struct list_head list; /* list head */
......@@ -187,16 +198,20 @@ struct extent_info {
#define FADVISE_COLD_BIT 0x01
#define FADVISE_LOST_PINO_BIT 0x02
#define DEF_DIR_LEVEL 0
struct f2fs_inode_info {
struct inode vfs_inode; /* serve a vfs inode */
unsigned long i_flags; /* keep an inode flags for ioctl */
unsigned char i_advise; /* use to give file attribute hints */
unsigned char i_dir_level; /* use for dentry level for large dir */
unsigned int i_current_depth; /* use only in directory structure */
unsigned int i_pino; /* parent inode number */
umode_t i_acl_mode; /* keep file acl mode temporarily */
/* Use below internally in f2fs*/
unsigned long flags; /* use to pass per-file flags */
struct rw_semaphore i_sem; /* protect fi info */
atomic_t dirty_dents; /* # of dirty dentry pages */
f2fs_hash_t chash; /* hash value of given file name */
unsigned int clevel; /* maximum level of given file name */
......@@ -229,6 +244,7 @@ struct f2fs_nm_info {
block_t nat_blkaddr; /* base disk address of NAT */
nid_t max_nid; /* maximum possible node ids */
nid_t next_scan_nid; /* the next nid to be scanned */
unsigned int ram_thresh; /* control the memory footprint */
/* NAT cache management */
struct radix_tree_root nat_root;/* root of the nat entry cache */
......@@ -238,6 +254,7 @@ struct f2fs_nm_info {
struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
/* free node ids management */
struct radix_tree_root free_nid_root;/* root of the free_nid cache */
struct list_head free_nid_list; /* a list for free nids */
spinlock_t free_nid_list_lock; /* protect free nid list */
unsigned int fcnt; /* the number of free node id */
......@@ -300,6 +317,12 @@ enum {
NO_CHECK_TYPE
};
struct flush_cmd {
struct flush_cmd *next;
struct completion wait;
int ret;
};
struct f2fs_sm_info {
struct sit_info *sit_info; /* whole segment information */
struct free_segmap_info *free_info; /* free segment information */
......@@ -328,6 +351,14 @@ struct f2fs_sm_info {
unsigned int ipu_policy; /* in-place-update policy */
unsigned int min_ipu_util; /* in-place-update threshold */
/* for flush command control */
struct task_struct *f2fs_issue_flush; /* flush thread */
wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */
struct flush_cmd *issue_list; /* list for command issue */
struct flush_cmd *dispatch_list; /* list for command dispatch */
spinlock_t issue_lock; /* for issue list lock */
struct flush_cmd *issue_tail; /* list tail of issue list */
};
/*
......@@ -378,7 +409,7 @@ struct f2fs_bio_info {
struct bio *bio; /* bios to merge */
sector_t last_block_in_bio; /* last block number */
struct f2fs_io_info fio; /* store buffered io info. */
struct mutex io_mutex; /* mutex for bio */
struct rw_semaphore io_rwsem; /* blocking op for bio */
};
struct f2fs_sb_info {
......@@ -398,6 +429,7 @@ struct f2fs_sb_info {
/* for bio operations */
struct f2fs_bio_info read_io; /* for read bios */
struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */
struct completion *wait_io; /* for completion bios */
/* for checkpoint */
struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
......@@ -407,7 +439,6 @@ struct f2fs_sb_info {
struct mutex node_write; /* locking node writes */
struct mutex writepages; /* mutex for writepages() */
bool por_doing; /* recovery is doing or not */
bool on_build_free_nids; /* build_free_nids is doing */
wait_queue_head_t cp_wait;
/* for orphan inode management */
......@@ -436,6 +467,7 @@ struct f2fs_sb_info {
unsigned int total_valid_node_count; /* valid node block count */
unsigned int total_valid_inode_count; /* valid inode count */
int active_logs; /* # of active logs */
int dir_level; /* directory level */
block_t user_block_count; /* # of user blocks */
block_t total_valid_block_count; /* # of valid blocks */
......@@ -622,6 +654,11 @@ static inline int F2FS_HAS_BLOCKS(struct inode *inode)
return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS;
}
static inline bool f2fs_has_xattr_block(unsigned int ofs)
{
return ofs == XATTR_NODE_OFFSET;
}
static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
struct inode *inode, blkcnt_t count)
{
......@@ -661,6 +698,7 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
static inline void inode_inc_dirty_dents(struct inode *inode)
{
inc_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS);
atomic_inc(&F2FS_I(inode)->dirty_dents);
}
......@@ -671,6 +709,10 @@ static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
static inline void inode_dec_dirty_dents(struct inode *inode)
{
if (!S_ISDIR(inode->i_mode))
return;
dec_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS);
atomic_dec(&F2FS_I(inode)->dirty_dents);
}
......@@ -679,6 +721,11 @@ static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)
return atomic_read(&sbi->nr_pages[count_type]);
}
static inline int get_dirty_dents(struct inode *inode)
{
return atomic_read(&F2FS_I(inode)->dirty_dents);
}
static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
{
unsigned int pages_per_sec = sbi->segs_per_sec *
......@@ -689,11 +736,7 @@ static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi)
{
block_t ret;
spin_lock(&sbi->stat_lock);
ret = sbi->total_valid_block_count;
spin_unlock(&sbi->stat_lock);
return ret;
return sbi->total_valid_block_count;
}
static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
......@@ -789,11 +832,7 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
{
unsigned int ret;
spin_lock(&sbi->stat_lock);
ret = sbi->total_valid_node_count;
spin_unlock(&sbi->stat_lock);
return ret;
return sbi->total_valid_node_count;
}
static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
......@@ -814,11 +853,7 @@ static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi)
static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi)
{
unsigned int ret;
spin_lock(&sbi->stat_lock);
ret = sbi->total_valid_inode_count;
spin_unlock(&sbi->stat_lock);
return ret;
return sbi->total_valid_inode_count;
}
static inline void f2fs_put_page(struct page *page, int unlock)
......@@ -844,9 +879,9 @@ static inline void f2fs_put_dnode(struct dnode_of_data *dn)
}
static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name,
size_t size, void (*ctor)(void *))
size_t size)
{
return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor);
return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, NULL);
}
static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
......@@ -983,24 +1018,28 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi,
ri->i_inline |= F2FS_INLINE_DATA;
}
static inline int f2fs_has_inline_xattr(struct inode *inode)
{
return is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR);
}
static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi)
{
if (is_inode_flag_set(fi, FI_INLINE_XATTR))
if (f2fs_has_inline_xattr(&fi->vfs_inode))
return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS;
return DEF_ADDRS_PER_INODE;
}
static inline void *inline_xattr_addr(struct page *page)
{
struct f2fs_inode *ri;
ri = (struct f2fs_inode *)page_address(page);
struct f2fs_inode *ri = F2FS_INODE(page);
return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE -
F2FS_INLINE_XATTR_ADDRS]);
}
static inline int inline_xattr_size(struct inode *inode)
{
if (is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR))
if (f2fs_has_inline_xattr(inode))
return F2FS_INLINE_XATTR_ADDRS << 2;
else
return 0;
......@@ -1013,8 +1052,7 @@ static inline int f2fs_has_inline_data(struct inode *inode)
static inline void *inline_data_addr(struct page *page)
{
struct f2fs_inode *ri;
ri = (struct f2fs_inode *)page_address(page);
struct f2fs_inode *ri = F2FS_INODE(page);
return (void *)&(ri->i_addr[1]);
}
......@@ -1023,6 +1061,12 @@ static inline int f2fs_readonly(struct super_block *sb)
return sb->s_flags & MS_RDONLY;
}
static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi)
{
set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
sbi->sb->s_flags |= MS_RDONLY;
}
#define get_inode_mode(i) \
((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
(F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
......@@ -1048,7 +1092,7 @@ void f2fs_set_inode_flags(struct inode *);
struct inode *f2fs_iget(struct super_block *, unsigned long);
int try_to_free_nats(struct f2fs_sb_info *, int);
void update_inode(struct inode *, struct page *);
int update_inode_page(struct inode *);
void update_inode_page(struct inode *);
int f2fs_write_inode(struct inode *, struct writeback_control *);
void f2fs_evict_inode(struct inode *);
......@@ -1097,6 +1141,7 @@ struct dnode_of_data;
struct node_info;
int is_checkpointed_node(struct f2fs_sb_info *, nid_t);
bool fsync_mark_done(struct f2fs_sb_info *, nid_t);
void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
int truncate_inode_blocks(struct inode *, pgoff_t);
......@@ -1115,6 +1160,7 @@ void alloc_nid_done(struct f2fs_sb_info *, nid_t);
void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
void recover_node_page(struct f2fs_sb_info *, struct page *,
struct f2fs_summary *, struct node_info *, block_t);
bool recover_xattr_data(struct inode *, struct page *, block_t);
int recover_inode_page(struct f2fs_sb_info *, struct page *);
int restore_node_summary(struct f2fs_sb_info *, unsigned int,
struct f2fs_summary_block *);
......@@ -1129,7 +1175,9 @@ void destroy_node_manager_caches(void);
*/
void f2fs_balance_fs(struct f2fs_sb_info *);
void f2fs_balance_fs_bg(struct f2fs_sb_info *);
int f2fs_issue_flush(struct f2fs_sb_info *);
void invalidate_blocks(struct f2fs_sb_info *, block_t);
void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
void clear_prefree_segments(struct f2fs_sb_info *);
int npages_for_summary_flush(struct f2fs_sb_info *);
void allocate_new_segments(struct f2fs_sb_info *);
......@@ -1162,6 +1210,7 @@ void destroy_segment_manager_caches(void);
*/
struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
int ra_meta_pages(struct f2fs_sb_info *, int, int, int);
long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
int acquire_orphan_inode(struct f2fs_sb_info *);
void release_orphan_inode(struct f2fs_sb_info *);
......@@ -1231,7 +1280,7 @@ struct f2fs_stat_info {
int util_free, util_valid, util_invalid;
int rsvd_segs, overp_segs;
int dirty_count, node_pages, meta_pages;
int prefree_count, call_count;
int prefree_count, call_count, cp_count;
int tot_segs, node_segs, data_segs, free_segs, free_secs;
int tot_blks, data_blks, node_blks;
int curseg[NR_CURSEG_TYPE];
......@@ -1248,6 +1297,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
return (struct f2fs_stat_info *)sbi->stat_info;
}
#define stat_inc_cp_count(si) ((si)->cp_count++)
#define stat_inc_call_count(si) ((si)->call_count++)
#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++)
#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++)
......@@ -1302,6 +1352,7 @@ void f2fs_destroy_stats(struct f2fs_sb_info *);
void __init f2fs_create_root_stats(void);
void f2fs_destroy_root_stats(void);
#else
#define stat_inc_cp_count(si)
#define stat_inc_call_count(si)
#define stat_inc_bggc_count(si)
#define stat_inc_dirty_dir(sbi)
......
......@@ -76,7 +76,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
trace_f2fs_vm_page_mkwrite(page, DATA);
mapped:
/* fill the page */
wait_on_page_writeback(page);
f2fs_wait_on_page_writeback(page, DATA);
out:
sb_end_pagefault(inode->i_sb);
return block_page_mkwrite_return(err);
......@@ -111,11 +111,12 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
struct inode *inode = file->f_mapping->host;
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
int ret = 0;
bool need_cp = false;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
.sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX,
.for_reclaim = 0,
};
......@@ -133,7 +134,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
/* guarantee free sections for fsync */
f2fs_balance_fs(sbi);
mutex_lock(&inode->i_mutex);
down_read(&fi->i_sem);
/*
* Both of fdatasync() and fsync() are able to be recovered from
......@@ -150,25 +151,33 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
need_cp = true;
up_read(&fi->i_sem);
if (need_cp) {
nid_t pino;
F2FS_I(inode)->xattr_ver = 0;
/* all the dirty node pages should be flushed for POR */
ret = f2fs_sync_fs(inode->i_sb, 1);
down_write(&fi->i_sem);
F2FS_I(inode)->xattr_ver = 0;
if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
get_parent_ino(inode, &pino)) {
F2FS_I(inode)->i_pino = pino;
file_got_pino(inode);
up_write(&fi->i_sem);
mark_inode_dirty_sync(inode);
ret = f2fs_write_inode(inode, NULL);
if (ret)
goto out;
} else {
up_write(&fi->i_sem);
}
} else {
/* if there is no written node page, write its inode page */
while (!sync_node_pages(sbi, inode->i_ino, &wbc)) {
if (fsync_mark_done(sbi, inode->i_ino))
goto out;
mark_inode_dirty_sync(inode);
ret = f2fs_write_inode(inode, NULL);
if (ret)
......@@ -177,10 +186,9 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
ret = wait_on_node_pages_writeback(sbi, inode->i_ino);
if (ret)
goto out;
ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
ret = f2fs_issue_flush(F2FS_SB(inode->i_sb));
}
out:
mutex_unlock(&inode->i_mutex);
trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
return ret;
}
......@@ -245,7 +253,7 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
f2fs_put_page(page, 1);
return;
}
wait_on_page_writeback(page);
f2fs_wait_on_page_writeback(page, DATA);
zero_user(page, offset, PAGE_CACHE_SIZE - offset);
set_page_dirty(page);
f2fs_put_page(page, 1);
......@@ -422,7 +430,7 @@ static void fill_zero(struct inode *inode, pgoff_t index,
f2fs_unlock_op(sbi);
if (!IS_ERR(page)) {
wait_on_page_writeback(page);
f2fs_wait_on_page_writeback(page, DATA);
zero_user(page, start, len);
set_page_dirty(page);
f2fs_put_page(page, 1);
......@@ -560,6 +568,8 @@ static long f2fs_fallocate(struct file *file, int mode,
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP;
mutex_lock(&inode->i_mutex);
if (mode & FALLOC_FL_PUNCH_HOLE)
ret = punch_hole(inode, offset, len);
else
......@@ -569,6 +579,9 @@ static long f2fs_fallocate(struct file *file, int mode,
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
}
mutex_unlock(&inode->i_mutex);
trace_f2fs_fallocate(inode, mode, offset, len, ret);
return ret;
}
......
......@@ -531,15 +531,10 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type)
set_page_dirty(page);
set_cold_data(page);
} else {
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
f2fs_wait_on_page_writeback(page, DATA);
if (clear_page_dirty_for_io(page) &&
S_ISDIR(inode->i_mode)) {
dec_page_count(sbi, F2FS_DIRTY_DENTS);
if (clear_page_dirty_for_io(page))
inode_dec_dirty_dents(inode);
}
set_cold_data(page);
do_write_data_page(page, &fio);
clear_cold_data(page);
......@@ -701,6 +696,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
gc_more:
if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
goto stop;
if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
goto stop;
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {
gc_type = FG_GC;
......@@ -711,6 +708,11 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
goto stop;
ret = 0;
/* readahead multi ssa blocks those have contiguous address */
if (sbi->segs_per_sec > 1)
ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), sbi->segs_per_sec,
META_SSA);
for (i = 0; i < sbi->segs_per_sec; i++)
do_garbage_collect(sbi, segno + i, &ilist, gc_type);
......@@ -740,7 +742,7 @@ void build_gc_manager(struct f2fs_sb_info *sbi)
int __init create_gc_caches(void)
{
winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes",
sizeof(struct inode_entry), NULL);
sizeof(struct inode_entry));
if (!winode_slab)
return -ENOMEM;
return 0;
......
......@@ -45,8 +45,10 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
}
ipage = get_node_page(sbi, inode->i_ino);
if (IS_ERR(ipage))
if (IS_ERR(ipage)) {
unlock_page(page);
return PTR_ERR(ipage);
}
zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
......
......@@ -107,6 +107,7 @@ static int do_read_inode(struct inode *inode)
fi->flags = 0;
fi->i_advise = ri->i_advise;
fi->i_pino = le32_to_cpu(ri->i_pino);
fi->i_dir_level = ri->i_dir_level;
get_extent_info(&fi->ext, ri->i_ext);
get_inline_info(fi, ri);
......@@ -204,6 +205,7 @@ void update_inode(struct inode *inode, struct page *node_page)
ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);
ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
ri->i_generation = cpu_to_le32(inode->i_generation);
ri->i_dir_level = F2FS_I(inode)->i_dir_level;
__set_inode_rdev(inode, ri);
set_cold_node(inode, node_page);
......@@ -212,24 +214,29 @@ void update_inode(struct inode *inode, struct page *node_page)
clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
}
int update_inode_page(struct inode *inode)
void update_inode_page(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct page *node_page;
retry:
node_page = get_node_page(sbi, inode->i_ino);
if (IS_ERR(node_page))
return PTR_ERR(node_page);
if (IS_ERR(node_page)) {
int err = PTR_ERR(node_page);
if (err == -ENOMEM) {
cond_resched();
goto retry;
} else if (err != -ENOENT) {
f2fs_stop_checkpoint(sbi);
}
return;
}
update_inode(inode, node_page);
f2fs_put_page(node_page, 1);
return 0;
}
int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
int ret;
if (inode->i_ino == F2FS_NODE_INO(sbi) ||
inode->i_ino == F2FS_META_INO(sbi))
......@@ -243,13 +250,13 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
* during the urgent cleaning time when runing out of free sections.
*/
f2fs_lock_op(sbi);
ret = update_inode_page(inode);
update_inode_page(inode);
f2fs_unlock_op(sbi);
if (wbc)
f2fs_balance_fs(sbi);
return ret;
return 0;
}
/*
......@@ -266,7 +273,7 @@ void f2fs_evict_inode(struct inode *inode)
inode->i_ino == F2FS_META_INO(sbi))
goto no_delete;
f2fs_bug_on(atomic_read(&F2FS_I(inode)->dirty_dents));
f2fs_bug_on(get_dirty_dents(inode));
remove_dirty_dir_inode(inode);
if (inode->i_nlink || is_bad_inode(inode))
......
......@@ -207,6 +207,8 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
inode = f2fs_iget(dir->i_sb, ino);
if (IS_ERR(inode))
return ERR_CAST(inode);
stat_inc_inline_inode(inode);
}
return d_splice_alias(inode, dentry);
......@@ -424,12 +426,17 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
f2fs_set_link(new_dir, new_entry, new_page, old_inode);
down_write(&F2FS_I(old_inode)->i_sem);
F2FS_I(old_inode)->i_pino = new_dir->i_ino;
up_write(&F2FS_I(old_inode)->i_sem);
new_inode->i_ctime = CURRENT_TIME;
down_write(&F2FS_I(new_inode)->i_sem);
if (old_dir_entry)
drop_nlink(new_inode);
drop_nlink(new_inode);
up_write(&F2FS_I(new_inode)->i_sem);
mark_inode_dirty(new_inode);
if (!new_inode->i_nlink)
......@@ -459,7 +466,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (old_dir != new_dir) {
f2fs_set_link(old_inode, old_dir_entry,
old_dir_page, new_dir);
down_write(&F2FS_I(old_inode)->i_sem);
F2FS_I(old_inode)->i_pino = new_dir->i_ino;
up_write(&F2FS_I(old_inode)->i_sem);
update_inode_page(old_inode);
} else {
kunmap(old_dir_page);
......
......@@ -21,9 +21,27 @@
#include "segment.h"
#include <trace/events/f2fs.h>
#define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock)
static struct kmem_cache *nat_entry_slab;
static struct kmem_cache *free_nid_slab;
static inline bool available_free_memory(struct f2fs_nm_info *nm_i, int type)
{
struct sysinfo val;
unsigned long mem_size = 0;
si_meminfo(&val);
if (type == FREE_NIDS)
mem_size = nm_i->fcnt * sizeof(struct free_nid);
else if (type == NAT_ENTRIES)
mem_size += nm_i->nat_cnt * sizeof(struct nat_entry);
mem_size >>= 12;
/* give 50:50 memory for free nids and nat caches respectively */
return (mem_size < ((val.totalram * nm_i->ram_thresh) >> 11));
}
static void clear_node_page_dirty(struct page *page)
{
struct address_space *mapping = page->mapping;
......@@ -82,42 +100,6 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
return dst_page;
}
/*
* Readahead NAT pages
*/
static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid)
{
struct address_space *mapping = META_MAPPING(sbi);
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct page *page;
pgoff_t index;
int i;
struct f2fs_io_info fio = {
.type = META,
.rw = READ_SYNC | REQ_META | REQ_PRIO
};
for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) {
if (unlikely(nid >= nm_i->max_nid))
nid = 0;
index = current_nat_addr(sbi, nid);
page = grab_cache_page(mapping, index);
if (!page)
continue;
if (PageUptodate(page)) {
mark_page_accessed(page);
f2fs_put_page(page, 1);
continue;
}
f2fs_submit_page_mbio(sbi, page, index, &fio);
mark_page_accessed(page);
f2fs_put_page(page, 0);
}
f2fs_submit_merged_bio(sbi, META, READ);
}
static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
{
return radix_tree_lookup(&nm_i->nat_root, n);
......@@ -151,6 +133,20 @@ int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
return is_cp;
}
bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
bool fsync_done = false;
read_lock(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
if (e)
fsync_done = e->fsync_done;
read_unlock(&nm_i->nat_tree_lock);
return fsync_done;
}
static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
{
struct nat_entry *new;
......@@ -164,6 +160,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
}
memset(new, 0, sizeof(struct nat_entry));
nat_set_nid(new, nid);
new->checkpointed = true;
list_add_tail(&new->list, &nm_i->nat_entries);
nm_i->nat_cnt++;
return new;
......@@ -185,13 +182,12 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
nat_set_blkaddr(e, le32_to_cpu(ne->block_addr));
nat_set_ino(e, le32_to_cpu(ne->ino));
nat_set_version(e, ne->version);
e->checkpointed = true;
}
write_unlock(&nm_i->nat_tree_lock);
}
static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
block_t new_blkaddr)
block_t new_blkaddr, bool fsync_done)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
......@@ -205,7 +201,6 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
goto retry;
}
e->ni = *ni;
e->checkpointed = true;
f2fs_bug_on(ni->blk_addr == NEW_ADDR);
} else if (new_blkaddr == NEW_ADDR) {
/*
......@@ -217,9 +212,6 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
f2fs_bug_on(ni->blk_addr != NULL_ADDR);
}
if (new_blkaddr == NEW_ADDR)
e->checkpointed = false;
/* sanity check */
f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr);
f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR &&
......@@ -239,6 +231,11 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
/* change address */
nat_set_blkaddr(e, new_blkaddr);
__set_nat_cache_dirty(nm_i, e);
/* update fsync_mark if its inode nat entry is still alive */
e = __lookup_nat_cache(nm_i, ni->ino);
if (e)
e->fsync_done = fsync_done;
write_unlock(&nm_i->nat_tree_lock);
}
......@@ -246,7 +243,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
if (nm_i->nat_cnt <= NM_WOUT_THRESHOLD)
if (available_free_memory(nm_i, NAT_ENTRIES))
return 0;
write_lock(&nm_i->nat_tree_lock);
......@@ -505,7 +502,7 @@ static void truncate_node(struct dnode_of_data *dn)
/* Deallocate node address */
invalidate_blocks(sbi, ni.blk_addr);
dec_valid_node_count(sbi, dn->inode);
set_node_addr(sbi, &ni, NULL_ADDR);
set_node_addr(sbi, &ni, NULL_ADDR, false);
if (dn->nid == dn->inode->i_ino) {
remove_orphan_inode(sbi, dn->nid);
......@@ -763,7 +760,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
f2fs_put_page(page, 1);
goto restart;
}
wait_on_page_writeback(page);
f2fs_wait_on_page_writeback(page, NODE);
ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
set_page_dirty(page);
unlock_page(page);
......@@ -852,7 +849,8 @@ struct page *new_node_page(struct dnode_of_data *dn,
if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
return ERR_PTR(-EPERM);
page = grab_cache_page(NODE_MAPPING(sbi), dn->nid);
page = grab_cache_page_write_begin(NODE_MAPPING(sbi),
dn->nid, AOP_FLAG_NOFS);
if (!page)
return ERR_PTR(-ENOMEM);
......@@ -867,14 +865,14 @@ struct page *new_node_page(struct dnode_of_data *dn,
f2fs_bug_on(old_ni.blk_addr != NULL_ADDR);
new_ni = old_ni;
new_ni.ino = dn->inode->i_ino;
set_node_addr(sbi, &new_ni, NEW_ADDR);
set_node_addr(sbi, &new_ni, NEW_ADDR, false);
fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
set_cold_node(dn->inode, page);
SetPageUptodate(page);
set_page_dirty(page);
if (ofs == XATTR_NODE_OFFSET)
if (f2fs_has_xattr_block(ofs))
F2FS_I(dn->inode)->i_xattr_nid = dn->nid;
dn->node_page = page;
......@@ -948,7 +946,8 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
struct page *page;
int err;
repeat:
page = grab_cache_page(NODE_MAPPING(sbi), nid);
page = grab_cache_page_write_begin(NODE_MAPPING(sbi),
nid, AOP_FLAG_NOFS);
if (!page)
return ERR_PTR(-ENOMEM);
......@@ -959,7 +958,7 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
goto got_it;
lock_page(page);
if (unlikely(!PageUptodate(page))) {
if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) {
f2fs_put_page(page, 1);
return ERR_PTR(-EIO);
}
......@@ -968,7 +967,6 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
goto repeat;
}
got_it:
f2fs_bug_on(nid != nid_of_node(page));
mark_page_accessed(page);
return page;
}
......@@ -1168,7 +1166,7 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
continue;
if (ino && ino_of_node(page) == ino) {
wait_on_page_writeback(page);
f2fs_wait_on_page_writeback(page, NODE);
if (TestClearPageError(page))
ret = -EIO;
}
......@@ -1201,7 +1199,7 @@ static int f2fs_write_node_page(struct page *page,
if (unlikely(sbi->por_doing))
goto redirty_out;
wait_on_page_writeback(page);
f2fs_wait_on_page_writeback(page, NODE);
/* get old block addr of this node page */
nid = nid_of_node(page);
......@@ -1222,7 +1220,7 @@ static int f2fs_write_node_page(struct page *page,
mutex_lock(&sbi->node_write);
set_page_writeback(page);
write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
set_node_addr(sbi, &ni, new_addr);
set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
dec_page_count(sbi, F2FS_DIRTY_NODES);
mutex_unlock(&sbi->node_write);
unlock_page(page);
......@@ -1231,35 +1229,32 @@ static int f2fs_write_node_page(struct page *page,
redirty_out:
dec_page_count(sbi, F2FS_DIRTY_NODES);
wbc->pages_skipped++;
account_page_redirty(page);
set_page_dirty(page);
return AOP_WRITEPAGE_ACTIVATE;
}
/*
* It is very important to gather dirty pages and write at once, so that we can
* submit a big bio without interfering other data writes.
* Be default, 512 pages (2MB) * 3 node types, is more reasonable.
*/
#define COLLECT_DIRTY_NODES 1536
static int f2fs_write_node_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
long nr_to_write = wbc->nr_to_write;
long diff;
/* balancing f2fs's metadata in background */
f2fs_balance_fs_bg(sbi);
/* collect a number of dirty node pages and write together */
if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES)
return 0;
if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
goto skip_write;
/* if mounting is failed, skip writing node pages */
wbc->nr_to_write = 3 * max_hw_blocks(sbi);
diff = nr_pages_to_write(sbi, NODE, wbc);
wbc->sync_mode = WB_SYNC_NONE;
sync_node_pages(sbi, 0, wbc);
wbc->nr_to_write = nr_to_write - (3 * max_hw_blocks(sbi) -
wbc->nr_to_write);
wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
return 0;
skip_write:
wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);
return 0;
}
......@@ -1307,22 +1302,17 @@ const struct address_space_operations f2fs_node_aops = {
.releasepage = f2fs_release_node_page,
};
static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head)
static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
nid_t n)
{
struct list_head *this;
struct free_nid *i;
list_for_each(this, head) {
i = list_entry(this, struct free_nid, list);
if (i->nid == n)
return i;
}
return NULL;
return radix_tree_lookup(&nm_i->free_nid_root, n);
}
static void __del_from_free_nid_list(struct free_nid *i)
static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i,
struct free_nid *i)
{
list_del(&i->list);
kmem_cache_free(free_nid_slab, i);
radix_tree_delete(&nm_i->free_nid_root, i->nid);
}
static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
......@@ -1331,7 +1321,7 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
struct nat_entry *ne;
bool allocated = false;
if (nm_i->fcnt > 2 * MAX_FREE_NIDS)
if (!available_free_memory(nm_i, FREE_NIDS))
return -1;
/* 0 nid should not be used */
......@@ -1342,7 +1332,8 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
/* do not add allocated nids */
read_lock(&nm_i->nat_tree_lock);
ne = __lookup_nat_cache(nm_i, nid);
if (ne && nat_get_blkaddr(ne) != NULL_ADDR)
if (ne &&
(!ne->checkpointed || nat_get_blkaddr(ne) != NULL_ADDR))
allocated = true;
read_unlock(&nm_i->nat_tree_lock);
if (allocated)
......@@ -1354,7 +1345,7 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
i->state = NID_NEW;
spin_lock(&nm_i->free_nid_list_lock);
if (__lookup_free_nid_list(nid, &nm_i->free_nid_list)) {
if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
spin_unlock(&nm_i->free_nid_list_lock);
kmem_cache_free(free_nid_slab, i);
return 0;
......@@ -1368,13 +1359,19 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
{
struct free_nid *i;
bool need_free = false;
spin_lock(&nm_i->free_nid_list_lock);
i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
i = __lookup_free_nid_list(nm_i, nid);
if (i && i->state == NID_NEW) {
__del_from_free_nid_list(i);
__del_from_free_nid_list(nm_i, i);
nm_i->fcnt--;
need_free = true;
}
spin_unlock(&nm_i->free_nid_list_lock);
if (need_free)
kmem_cache_free(free_nid_slab, i);
}
static void scan_nat_page(struct f2fs_nm_info *nm_i,
......@@ -1413,7 +1410,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
return;
/* readahead nat pages to be scanned */
ra_nat_pages(sbi, nid);
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT);
while (1) {
struct page *page = get_current_nat_page(sbi, nid);
......@@ -1454,7 +1451,6 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i = NULL;
struct list_head *this;
retry:
if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid))
return false;
......@@ -1462,13 +1458,11 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
spin_lock(&nm_i->free_nid_list_lock);
/* We should not use stale free nids created by build_free_nids */
if (nm_i->fcnt && !sbi->on_build_free_nids) {
if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
f2fs_bug_on(list_empty(&nm_i->free_nid_list));
list_for_each(this, &nm_i->free_nid_list) {
i = list_entry(this, struct free_nid, list);
list_for_each_entry(i, &nm_i->free_nid_list, list)
if (i->state == NID_NEW)
break;
}
f2fs_bug_on(i->state != NID_NEW);
*nid = i->nid;
......@@ -1481,9 +1475,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
/* Let's scan nat pages and its caches to get free nids */
mutex_lock(&nm_i->build_lock);
sbi->on_build_free_nids = true;
build_free_nids(sbi);
sbi->on_build_free_nids = false;
mutex_unlock(&nm_i->build_lock);
goto retry;
}
......@@ -1497,10 +1489,12 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
struct free_nid *i;
spin_lock(&nm_i->free_nid_list_lock);
i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
i = __lookup_free_nid_list(nm_i, nid);
f2fs_bug_on(!i || i->state != NID_ALLOC);
__del_from_free_nid_list(i);
__del_from_free_nid_list(nm_i, i);
spin_unlock(&nm_i->free_nid_list_lock);
kmem_cache_free(free_nid_slab, i);
}
/*
......@@ -1510,20 +1504,25 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i;
bool need_free = false;
if (!nid)
return;
spin_lock(&nm_i->free_nid_list_lock);
i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
i = __lookup_free_nid_list(nm_i, nid);
f2fs_bug_on(!i || i->state != NID_ALLOC);
if (nm_i->fcnt > 2 * MAX_FREE_NIDS) {
__del_from_free_nid_list(i);
if (!available_free_memory(nm_i, FREE_NIDS)) {
__del_from_free_nid_list(nm_i, i);
need_free = true;
} else {
i->state = NID_NEW;
nm_i->fcnt++;
}
spin_unlock(&nm_i->free_nid_list_lock);
if (need_free)
kmem_cache_free(free_nid_slab, i);
}
void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
......@@ -1531,10 +1530,83 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
block_t new_blkaddr)
{
rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr);
set_node_addr(sbi, ni, new_blkaddr);
set_node_addr(sbi, ni, new_blkaddr, false);
clear_node_page_dirty(page);
}
void recover_inline_xattr(struct inode *inode, struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
void *src_addr, *dst_addr;
size_t inline_size;
struct page *ipage;
struct f2fs_inode *ri;
if (!f2fs_has_inline_xattr(inode))
return;
if (!IS_INODE(page))
return;
ri = F2FS_INODE(page);
if (!(ri->i_inline & F2FS_INLINE_XATTR))
return;
ipage = get_node_page(sbi, inode->i_ino);
f2fs_bug_on(IS_ERR(ipage));
dst_addr = inline_xattr_addr(ipage);
src_addr = inline_xattr_addr(page);
inline_size = inline_xattr_size(inode);
memcpy(dst_addr, src_addr, inline_size);
update_inode(inode, ipage);
f2fs_put_page(ipage, 1);
}
bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
nid_t new_xnid = nid_of_node(page);
struct node_info ni;
recover_inline_xattr(inode, page);
if (!f2fs_has_xattr_block(ofs_of_node(page)))
return false;
/* 1: invalidate the previous xattr nid */
if (!prev_xnid)
goto recover_xnid;
/* Deallocate node address */
get_node_info(sbi, prev_xnid, &ni);
f2fs_bug_on(ni.blk_addr == NULL_ADDR);
invalidate_blocks(sbi, ni.blk_addr);
dec_valid_node_count(sbi, inode);
set_node_addr(sbi, &ni, NULL_ADDR, false);
recover_xnid:
/* 2: allocate new xattr nid */
if (unlikely(!inc_valid_node_count(sbi, inode)))
f2fs_bug_on(1);
remove_free_nid(NM_I(sbi), new_xnid);
get_node_info(sbi, new_xnid, &ni);
ni.ino = inode->i_ino;
set_node_addr(sbi, &ni, NEW_ADDR, false);
F2FS_I(inode)->i_xattr_nid = new_xnid;
/* 3: update xattr blkaddr */
refresh_sit_entry(sbi, NEW_ADDR, blkaddr);
set_node_addr(sbi, &ni, blkaddr, false);
update_inode_page(inode);
return true;
}
int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
{
struct f2fs_inode *src, *dst;
......@@ -1567,7 +1639,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
if (unlikely(!inc_valid_node_count(sbi, NULL)))
WARN_ON(1);
set_node_addr(sbi, &new_ni, NEW_ADDR);
set_node_addr(sbi, &new_ni, NEW_ADDR, false);
inc_valid_inode_count(sbi);
f2fs_put_page(ipage, 1);
return 0;
......@@ -1590,15 +1662,8 @@ static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages,
for (; page_idx < start + nrpages; page_idx++) {
/* alloc temporal page for read node summary info*/
page = alloc_page(GFP_F2FS_ZERO);
if (!page) {
struct page *tmp;
list_for_each_entry_safe(page, tmp, pages, lru) {
list_del(&page->lru);
unlock_page(page);
__free_pages(page, 0);
}
return -ENOMEM;
}
if (!page)
break;
lock_page(page);
page->index = page_idx;
......@@ -1609,7 +1674,8 @@ static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages,
f2fs_submit_page_mbio(sbi, page, page->index, &fio);
f2fs_submit_merged_bio(sbi, META, READ);
return 0;
return page_idx - start;
}
int restore_node_summary(struct f2fs_sb_info *sbi,
......@@ -1628,15 +1694,17 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
addr = START_BLOCK(sbi, segno);
sum_entry = &sum->entries[0];
for (i = 0; i < last_offset; i += nrpages, addr += nrpages) {
for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) {
nrpages = min(last_offset - i, bio_blocks);
/* read ahead node pages */
err = ra_sum_pages(sbi, &page_list, addr, nrpages);
if (err)
return err;
nrpages = ra_sum_pages(sbi, &page_list, addr, nrpages);
if (!nrpages)
return -ENOMEM;
list_for_each_entry_safe(page, tmp, &page_list, lru) {
if (err)
goto skip;
lock_page(page);
if (unlikely(!PageUptodate(page))) {
......@@ -1648,9 +1716,9 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
sum_entry->ofs_in_node = 0;
sum_entry++;
}
list_del(&page->lru);
unlock_page(page);
skip:
list_del(&page->lru);
__free_pages(page, 0);
}
}
......@@ -1709,7 +1777,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_summary_block *sum = curseg->sum_blk;
struct list_head *cur, *n;
struct nat_entry *ne, *cur;
struct page *page = NULL;
struct f2fs_nat_block *nat_blk = NULL;
nid_t start_nid = 0, end_nid = 0;
......@@ -1721,18 +1789,17 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
mutex_lock(&curseg->curseg_mutex);
/* 1) flush dirty nat caches */
list_for_each_safe(cur, n, &nm_i->dirty_nat_entries) {
struct nat_entry *ne;
list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) {
nid_t nid;
struct f2fs_nat_entry raw_ne;
int offset = -1;
block_t new_blkaddr;
ne = list_entry(cur, struct nat_entry, list);
nid = nat_get_nid(ne);
if (nat_get_blkaddr(ne) == NEW_ADDR)
continue;
nid = nat_get_nid(ne);
if (flushed)
goto to_nat_page;
......@@ -1783,16 +1850,12 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
} else {
write_lock(&nm_i->nat_tree_lock);
__clear_nat_cache_dirty(nm_i, ne);
ne->checkpointed = true;
write_unlock(&nm_i->nat_tree_lock);
}
}
if (!flushed)
mutex_unlock(&curseg->curseg_mutex);
f2fs_put_page(page, 1);
/* 2) shrink nat caches if necessary */
try_to_free_nats(sbi, nm_i->nat_cnt - NM_WOUT_THRESHOLD);
}
static int init_node_manager(struct f2fs_sb_info *sbi)
......@@ -1807,10 +1870,14 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
/* segment_count_nat includes pair segment so divide to 2. */
nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
/* not used nids: 0, node, meta, (and root counted as valid node) */
nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks - 3;
nm_i->fcnt = 0;
nm_i->nat_cnt = 0;
nm_i->ram_thresh = DEF_RAM_THRESHOLD;
INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
INIT_LIST_HEAD(&nm_i->free_nid_list);
INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
INIT_LIST_HEAD(&nm_i->nat_entries);
......@@ -1864,8 +1931,11 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
spin_lock(&nm_i->free_nid_list_lock);
list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
f2fs_bug_on(i->state == NID_ALLOC);
__del_from_free_nid_list(i);
__del_from_free_nid_list(nm_i, i);
nm_i->fcnt--;
spin_unlock(&nm_i->free_nid_list_lock);
kmem_cache_free(free_nid_slab, i);
spin_lock(&nm_i->free_nid_list_lock);
}
f2fs_bug_on(nm_i->fcnt);
spin_unlock(&nm_i->free_nid_list_lock);
......@@ -1875,11 +1945,9 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
while ((found = __gang_lookup_nat_cache(nm_i,
nid, NATVEC_SIZE, natvec))) {
unsigned idx;
for (idx = 0; idx < found; idx++) {
struct nat_entry *e = natvec[idx];
nid = nat_get_nid(e) + 1;
__del_from_nat_cache(nm_i, e);
}
nid = nat_get_nid(natvec[found - 1]) + 1;
for (idx = 0; idx < found; idx++)
__del_from_nat_cache(nm_i, natvec[idx]);
}
f2fs_bug_on(nm_i->nat_cnt);
write_unlock(&nm_i->nat_tree_lock);
......@@ -1892,12 +1960,12 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
int __init create_node_manager_caches(void)
{
nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
sizeof(struct nat_entry), NULL);
sizeof(struct nat_entry));
if (!nat_entry_slab)
return -ENOMEM;
free_nid_slab = f2fs_kmem_cache_create("free_nid",
sizeof(struct free_nid), NULL);
sizeof(struct free_nid));
if (!free_nid_slab) {
kmem_cache_destroy(nat_entry_slab);
return -ENOMEM;
......
......@@ -17,14 +17,11 @@
/* # of pages to perform readahead before building free nids */
#define FREE_NID_PAGES 4
/* maximum # of free node ids to produce during build_free_nids */
#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES)
/* maximum readahead size for node during getting data blocks */
#define MAX_RA_NODE 128
/* maximum cached nat entries to manage memory footprint */
#define NM_WOUT_THRESHOLD (64 * NAT_ENTRY_PER_BLOCK)
/* control the memory footprint threshold (10MB per 1GB ram) */
#define DEF_RAM_THRESHOLD 10
/* vector size for gang look-up from nat cache that consists of radix tree */
#define NATVEC_SIZE 64
......@@ -45,6 +42,7 @@ struct node_info {
struct nat_entry {
struct list_head list; /* for clean or dirty nat list */
bool checkpointed; /* whether it is checkpointed or not */
bool fsync_done; /* whether the latest node has fsync mark */
struct node_info ni; /* in-memory node information */
};
......@@ -58,9 +56,15 @@ struct nat_entry {
#define nat_set_version(nat, v) (nat->ni.version = v)
#define __set_nat_cache_dirty(nm_i, ne) \
list_move_tail(&ne->list, &nm_i->dirty_nat_entries);
do { \
ne->checkpointed = false; \
list_move_tail(&ne->list, &nm_i->dirty_nat_entries); \
} while (0);
#define __clear_nat_cache_dirty(nm_i, ne) \
list_move_tail(&ne->list, &nm_i->nat_entries);
do { \
ne->checkpointed = true; \
list_move_tail(&ne->list, &nm_i->nat_entries); \
} while (0);
#define inc_node_version(version) (++version)
static inline void node_info_from_raw_nat(struct node_info *ni,
......@@ -71,6 +75,11 @@ static inline void node_info_from_raw_nat(struct node_info *ni,
ni->version = raw_ne->version;
}
enum nid_type {
FREE_NIDS, /* indicates the free nid list */
NAT_ENTRIES /* indicates the cached nat entry */
};
/*
* For free nid mangement
*/
......@@ -236,7 +245,7 @@ static inline bool IS_DNODE(struct page *node_page)
{
unsigned int ofs = ofs_of_node(node_page);
if (ofs == XATTR_NODE_OFFSET)
if (f2fs_has_xattr_block(ofs))
return false;
if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK ||
......
......@@ -27,14 +27,12 @@ bool space_for_roll_forward(struct f2fs_sb_info *sbi)
static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
nid_t ino)
{
struct list_head *this;
struct fsync_inode_entry *entry;
list_for_each(this, head) {
entry = list_entry(this, struct fsync_inode_entry, list);
list_for_each_entry(entry, head, list)
if (entry->inode->i_ino == ino)
return entry;
}
return NULL;
}
......@@ -136,7 +134,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
/* get node pages in the current segment */
curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
blkaddr = START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff;
blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
/* read node page */
page = alloc_page(GFP_F2FS_ZERO);
......@@ -218,13 +216,12 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
{
struct seg_entry *sentry;
unsigned int segno = GET_SEGNO(sbi, blkaddr);
unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) &
(sbi->blocks_per_seg - 1);
unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
struct f2fs_summary_block *sum_node;
struct f2fs_summary sum;
struct page *sum_page, *node_page;
nid_t ino, nid;
void *kaddr;
struct inode *inode;
struct page *node_page;
unsigned int offset;
block_t bidx;
int i;
......@@ -238,18 +235,15 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
struct curseg_info *curseg = CURSEG_I(sbi, i);
if (curseg->segno == segno) {
sum = curseg->sum_blk->entries[blkoff];
break;
goto got_it;
}
}
if (i > CURSEG_COLD_DATA) {
struct page *sum_page = get_sum_page(sbi, segno);
struct f2fs_summary_block *sum_node;
kaddr = page_address(sum_page);
sum_node = (struct f2fs_summary_block *)kaddr;
sum = sum_node->entries[blkoff];
f2fs_put_page(sum_page, 1);
}
sum_page = get_sum_page(sbi, segno);
sum_node = (struct f2fs_summary_block *)page_address(sum_page);
sum = sum_node->entries[blkoff];
f2fs_put_page(sum_page, 1);
got_it:
/* Use the locked dnode page and inode */
nid = le32_to_cpu(sum.nid);
if (dn->inode->i_ino == nid) {
......@@ -301,6 +295,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
if (recover_inline_data(inode, page))
goto out;
if (recover_xattr_data(inode, page, blkaddr))
goto out;
start = start_bidx_of_node(ofs_of_node(page), fi);
if (IS_INODE(page))
end = start + ADDRS_PER_INODE(fi);
......@@ -317,7 +314,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
goto out;
}
wait_on_page_writeback(dn.node_page);
f2fs_wait_on_page_writeback(dn.node_page, NODE);
get_node_info(sbi, dn.nid, &ni);
f2fs_bug_on(ni.ino != ino_of_node(page));
......@@ -437,7 +434,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
bool need_writecp = false;
fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
sizeof(struct fsync_inode_entry), NULL);
sizeof(struct fsync_inode_entry));
if (!fsync_entry_slab)
return -ENOMEM;
......
......@@ -13,6 +13,7 @@
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/prefetch.h>
#include <linux/kthread.h>
#include <linux/vmalloc.h>
#include <linux/swap.h>
......@@ -24,6 +25,7 @@
#define __reverse_ffz(x) __reverse_ffs(~(x))
static struct kmem_cache *discard_entry_slab;
static struct kmem_cache *flush_cmd_slab;
/*
* __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
......@@ -195,6 +197,73 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
f2fs_sync_fs(sbi->sb, true);
}
static int issue_flush_thread(void *data)
{
struct f2fs_sb_info *sbi = data;
struct f2fs_sm_info *sm_i = SM_I(sbi);
wait_queue_head_t *q = &sm_i->flush_wait_queue;
repeat:
if (kthread_should_stop())
return 0;
spin_lock(&sm_i->issue_lock);
if (sm_i->issue_list) {
sm_i->dispatch_list = sm_i->issue_list;
sm_i->issue_list = sm_i->issue_tail = NULL;
}
spin_unlock(&sm_i->issue_lock);
if (sm_i->dispatch_list) {
struct bio *bio = bio_alloc(GFP_NOIO, 0);
struct flush_cmd *cmd, *next;
int ret;
bio->bi_bdev = sbi->sb->s_bdev;
ret = submit_bio_wait(WRITE_FLUSH, bio);
for (cmd = sm_i->dispatch_list; cmd; cmd = next) {
cmd->ret = ret;
next = cmd->next;
complete(&cmd->wait);
}
sm_i->dispatch_list = NULL;
}
wait_event_interruptible(*q, kthread_should_stop() || sm_i->issue_list);
goto repeat;
}
int f2fs_issue_flush(struct f2fs_sb_info *sbi)
{
struct f2fs_sm_info *sm_i = SM_I(sbi);
struct flush_cmd *cmd;
int ret;
if (!test_opt(sbi, FLUSH_MERGE))
return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
cmd = f2fs_kmem_cache_alloc(flush_cmd_slab, GFP_ATOMIC);
cmd->next = NULL;
cmd->ret = 0;
init_completion(&cmd->wait);
spin_lock(&sm_i->issue_lock);
if (sm_i->issue_list)
sm_i->issue_tail->next = cmd;
else
sm_i->issue_list = cmd;
sm_i->issue_tail = cmd;
spin_unlock(&sm_i->issue_lock);
if (!sm_i->dispatch_list)
wake_up(&sm_i->flush_wait_queue);
wait_for_completion(&cmd->wait);
ret = cmd->ret;
kmem_cache_free(flush_cmd_slab, cmd);
return ret;
}
static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
enum dirty_type dirty_type)
{
......@@ -340,8 +409,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
void clear_prefree_segments(struct f2fs_sb_info *sbi)
{
struct list_head *head = &(SM_I(sbi)->discard_list);
struct list_head *this, *next;
struct discard_entry *entry;
struct discard_entry *entry, *this;
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
unsigned int total_segs = TOTAL_SEGS(sbi);
......@@ -370,8 +438,7 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi)
mutex_unlock(&dirty_i->seglist_lock);
/* send small discards */
list_for_each_safe(this, next, head) {
entry = list_entry(this, struct discard_entry, list);
list_for_each_entry_safe(entry, this, head, list) {
f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
list_del(&entry->list);
SM_I(sbi)->nr_discards -= entry->len;
......@@ -405,7 +472,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
se = get_seg_entry(sbi, segno);
new_vblocks = se->valid_blocks + del;
offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1);
offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) ||
(new_vblocks > sbi->blocks_per_seg)));
......@@ -434,12 +501,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
get_sec_entry(sbi, segno)->valid_blocks += del;
}
static void refresh_sit_entry(struct f2fs_sb_info *sbi,
block_t old_blkaddr, block_t new_blkaddr)
void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
{
update_sit_entry(sbi, new_blkaddr, 1);
if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
update_sit_entry(sbi, old_blkaddr, -1);
update_sit_entry(sbi, new, 1);
if (GET_SEGNO(sbi, old) != NULL_SEGNO)
update_sit_entry(sbi, old, -1);
locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
}
void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
......@@ -881,17 +950,15 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
stat_inc_block_count(sbi, curseg);
if (!__has_curseg_space(sbi, type))
sit_i->s_ops->allocate_segment(sbi, type, false);
/*
* SIT information should be updated before segment allocation,
* since SSR needs latest valid block information.
*/
refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
if (!__has_curseg_space(sbi, type))
sit_i->s_ops->allocate_segment(sbi, type, false);
locate_dirty_segment(sbi, old_cursegno);
locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
mutex_unlock(&sit_i->sentry_lock);
if (page && IS_NODESEG(type))
......@@ -987,14 +1054,11 @@ void recover_data_page(struct f2fs_sb_info *sbi,
change_curseg(sbi, type, true);
}
curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) &
(sbi->blocks_per_seg - 1);
curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
__add_sum_entry(sbi, type, sum);
refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
locate_dirty_segment(sbi, old_cursegno);
locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
mutex_unlock(&sit_i->sentry_lock);
mutex_unlock(&curseg->curseg_mutex);
......@@ -1028,8 +1092,7 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
curseg->next_segno = segno;
change_curseg(sbi, type, true);
}
curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) &
(sbi->blocks_per_seg - 1);
curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
__add_sum_entry(sbi, type, sum);
/* change the current log to the next block addr in advance */
......@@ -1037,28 +1100,50 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
curseg->next_segno = next_segno;
change_curseg(sbi, type, true);
}
curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, next_blkaddr) &
(sbi->blocks_per_seg - 1);
curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, next_blkaddr);
/* rewrite node page */
set_page_writeback(page);
f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio);
f2fs_submit_merged_bio(sbi, NODE, WRITE);
refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
locate_dirty_segment(sbi, old_cursegno);
locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
mutex_unlock(&sit_i->sentry_lock);
mutex_unlock(&curseg->curseg_mutex);
}
static inline bool is_merged_page(struct f2fs_sb_info *sbi,
struct page *page, enum page_type type)
{
enum page_type btype = PAGE_TYPE_OF_BIO(type);
struct f2fs_bio_info *io = &sbi->write_io[btype];
struct bio_vec *bvec;
int i;
down_read(&io->io_rwsem);
if (!io->bio)
goto out;
bio_for_each_segment_all(bvec, io->bio, i) {
if (page == bvec->bv_page) {
up_read(&io->io_rwsem);
return true;
}
}
out:
up_read(&io->io_rwsem);
return false;
}
void f2fs_wait_on_page_writeback(struct page *page,
enum page_type type)
{
struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
if (PageWriteback(page)) {
f2fs_submit_merged_bio(sbi, type, WRITE);
if (is_merged_page(sbi, page, type))
f2fs_submit_merged_bio(sbi, type, WRITE);
wait_on_page_writeback(page);
}
}
......@@ -1167,9 +1252,12 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
ns->ofs_in_node = 0;
}
} else {
if (restore_node_summary(sbi, segno, sum)) {
int err;
err = restore_node_summary(sbi, segno, sum);
if (err) {
f2fs_put_page(new, 1);
return -EINVAL;
return err;
}
}
}
......@@ -1190,6 +1278,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
{
int type = CURSEG_HOT_DATA;
int err;
if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
/* restore for compacted data summary */
......@@ -1198,9 +1287,12 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
type = CURSEG_HOT_NODE;
}
for (; type <= CURSEG_COLD_NODE; type++)
if (read_normal_summaries(sbi, type))
return -EINVAL;
for (; type <= CURSEG_COLD_NODE; type++) {
err = read_normal_summaries(sbi, type);
if (err)
return err;
}
return 0;
}
......@@ -1583,47 +1675,6 @@ static int build_curseg(struct f2fs_sb_info *sbi)
return restore_curseg_summaries(sbi);
}
static int ra_sit_pages(struct f2fs_sb_info *sbi, int start, int nrpages)
{
struct address_space *mapping = META_MAPPING(sbi);
struct page *page;
block_t blk_addr, prev_blk_addr = 0;
int sit_blk_cnt = SIT_BLK_CNT(sbi);
int blkno = start;
struct f2fs_io_info fio = {
.type = META,
.rw = READ_SYNC | REQ_META | REQ_PRIO
};
for (; blkno < start + nrpages && blkno < sit_blk_cnt; blkno++) {
blk_addr = current_sit_addr(sbi, blkno * SIT_ENTRY_PER_BLOCK);
if (blkno != start && prev_blk_addr + 1 != blk_addr)
break;
prev_blk_addr = blk_addr;
repeat:
page = grab_cache_page(mapping, blk_addr);
if (!page) {
cond_resched();
goto repeat;
}
if (PageUptodate(page)) {
mark_page_accessed(page);
f2fs_put_page(page, 1);
continue;
}
f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
mark_page_accessed(page);
f2fs_put_page(page, 0);
}
f2fs_submit_merged_bio(sbi, META, READ);
return blkno - start;
}
static void build_sit_entries(struct f2fs_sb_info *sbi)
{
struct sit_info *sit_i = SIT_I(sbi);
......@@ -1635,7 +1686,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
do {
readed = ra_sit_pages(sbi, start_blk, nrpages);
readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT);
start = start_blk * sit_i->sents_per_block;
end = (start_blk + readed) * sit_i->sents_per_block;
......@@ -1781,6 +1832,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
dev_t dev = sbi->sb->s_bdev->bd_dev;
struct f2fs_sm_info *sm_info;
int err;
......@@ -1799,7 +1851,8 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
sm_info->rec_prefree_segments = DEF_RECLAIM_PREFREE_SEGMENTS;
sm_info->rec_prefree_segments = sm_info->main_segments *
DEF_RECLAIM_PREFREE_SEGMENTS / 100;
sm_info->ipu_policy = F2FS_IPU_DISABLE;
sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
......@@ -1807,6 +1860,16 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
sm_info->nr_discards = 0;
sm_info->max_discards = 0;
if (test_opt(sbi, FLUSH_MERGE)) {
spin_lock_init(&sm_info->issue_lock);
init_waitqueue_head(&sm_info->flush_wait_queue);
sm_info->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(sm_info->f2fs_issue_flush))
return PTR_ERR(sm_info->f2fs_issue_flush);
}
err = build_sit_info(sbi);
if (err)
return err;
......@@ -1915,6 +1978,8 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
struct f2fs_sm_info *sm_info = SM_I(sbi);
if (!sm_info)
return;
if (sm_info->f2fs_issue_flush)
kthread_stop(sm_info->f2fs_issue_flush);
destroy_dirty_segmap(sbi);
destroy_curseg(sbi);
destroy_free_segmap(sbi);
......@@ -1926,13 +1991,20 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
int __init create_segment_manager_caches(void)
{
discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
sizeof(struct discard_entry), NULL);
sizeof(struct discard_entry));
if (!discard_entry_slab)
return -ENOMEM;
flush_cmd_slab = f2fs_kmem_cache_create("flush_command",
sizeof(struct flush_cmd));
if (!flush_cmd_slab) {
kmem_cache_destroy(discard_entry_slab);
return -ENOMEM;
}
return 0;
}
void destroy_segment_manager_caches(void)
{
kmem_cache_destroy(discard_entry_slab);
kmem_cache_destroy(flush_cmd_slab);
}
......@@ -14,7 +14,7 @@
#define NULL_SEGNO ((unsigned int)(~0))
#define NULL_SECNO ((unsigned int)(~0))
#define DEF_RECLAIM_PREFREE_SEGMENTS 100 /* 200MB of prefree segments */
#define DEF_RECLAIM_PREFREE_SEGMENTS 5 /* 5% over total segments */
/* L: Logical segment # in volume, R: Relative segment # in main area */
#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno)
......@@ -57,6 +57,9 @@
((blk_addr) - SM_I(sbi)->seg0_blkaddr)
#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg)
#define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1))
#define GET_SEGNO(sbi, blk_addr) \
(((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \
NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
......@@ -377,26 +380,12 @@ static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,
static inline block_t written_block_count(struct f2fs_sb_info *sbi)
{
struct sit_info *sit_i = SIT_I(sbi);
block_t vblocks;
mutex_lock(&sit_i->sentry_lock);
vblocks = sit_i->written_valid_blocks;
mutex_unlock(&sit_i->sentry_lock);
return vblocks;
return SIT_I(sbi)->written_valid_blocks;
}
static inline unsigned int free_segments(struct f2fs_sb_info *sbi)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int free_segs;
read_lock(&free_i->segmap_lock);
free_segs = free_i->free_segments;
read_unlock(&free_i->segmap_lock);
return free_segs;
return FREE_I(sbi)->free_segments;
}
static inline int reserved_segments(struct f2fs_sb_info *sbi)
......@@ -406,14 +395,7 @@ static inline int reserved_segments(struct f2fs_sb_info *sbi)
static inline unsigned int free_sections(struct f2fs_sb_info *sbi)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int free_secs;
read_lock(&free_i->segmap_lock);
free_secs = free_i->free_sections;
read_unlock(&free_i->segmap_lock);
return free_secs;
return FREE_I(sbi)->free_sections;
}
static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi)
......@@ -682,3 +664,46 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi)
struct request_queue *q = bdev_get_queue(bdev);
return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q));
}
/*
* It is very important to gather dirty pages and write at once, so that we can
* submit a big bio without interfering other data writes.
* By default, 512 pages for directory data,
* 512 pages (2MB) * 3 for three types of nodes, and
* max_bio_blocks for meta are set.
*/
static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
{
if (type == DATA)
return sbi->blocks_per_seg;
else if (type == NODE)
return 3 * sbi->blocks_per_seg;
else if (type == META)
return MAX_BIO_BLOCKS(max_hw_blocks(sbi));
else
return 0;
}
/*
* When writing pages, it'd better align nr_to_write for segment size.
*/
static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type,
struct writeback_control *wbc)
{
long nr_to_write, desired;
if (wbc->sync_mode != WB_SYNC_NONE)
return 0;
nr_to_write = wbc->nr_to_write;
if (type == DATA)
desired = 4096;
else if (type == NODE)
desired = 3 * max_hw_blocks(sbi);
else
desired = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
wbc->nr_to_write = desired;
return desired - nr_to_write;
}
......@@ -51,6 +51,7 @@ enum {
Opt_disable_ext_identify,
Opt_inline_xattr,
Opt_inline_data,
Opt_flush_merge,
Opt_err,
};
......@@ -67,6 +68,7 @@ static match_table_t f2fs_tokens = {
{Opt_disable_ext_identify, "disable_ext_identify"},
{Opt_inline_xattr, "inline_xattr"},
{Opt_inline_data, "inline_data"},
{Opt_flush_merge, "flush_merge"},
{Opt_err, NULL},
};
......@@ -74,6 +76,7 @@ static match_table_t f2fs_tokens = {
enum {
GC_THREAD, /* struct f2fs_gc_thread */
SM_INFO, /* struct f2fs_sm_info */
NM_INFO, /* struct f2fs_nm_info */
F2FS_SBI, /* struct f2fs_sb_info */
};
......@@ -92,6 +95,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
return (unsigned char *)sbi->gc_thread;
else if (struct_type == SM_INFO)
return (unsigned char *)SM_I(sbi);
else if (struct_type == NM_INFO)
return (unsigned char *)NM_I(sbi);
else if (struct_type == F2FS_SBI)
return (unsigned char *)sbi;
return NULL;
......@@ -183,7 +188,9 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
......@@ -196,6 +203,8 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(ipu_policy),
ATTR_LIST(min_ipu_util),
ATTR_LIST(max_victim_search),
ATTR_LIST(dir_level),
ATTR_LIST(ram_thresh),
NULL,
};
......@@ -256,9 +265,9 @@ static int parse_options(struct super_block *sb, char *options)
if (!name)
return -ENOMEM;
if (!strncmp(name, "on", 2))
if (strlen(name) == 2 && !strncmp(name, "on", 2))
set_opt(sbi, BG_GC);
else if (!strncmp(name, "off", 3))
else if (strlen(name) == 3 && !strncmp(name, "off", 3))
clear_opt(sbi, BG_GC);
else {
kfree(name);
......@@ -327,6 +336,9 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_inline_data:
set_opt(sbi, INLINE_DATA);
break;
case Opt_flush_merge:
set_opt(sbi, FLUSH_MERGE);
break;
default:
f2fs_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" or missing value",
......@@ -353,12 +365,16 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
fi->i_current_depth = 1;
fi->i_advise = 0;
rwlock_init(&fi->ext.ext_lock);
init_rwsem(&fi->i_sem);
set_inode_flag(fi, FI_NEW_INODE);
if (test_opt(F2FS_SB(sb), INLINE_XATTR))
set_inode_flag(fi, FI_INLINE_XATTR);
/* Will be used by directory only */
fi->i_dir_level = F2FS_SB(sb)->dir_level;
return &fi->vfs_inode;
}
......@@ -526,6 +542,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",disable_ext_identify");
if (test_opt(sbi, INLINE_DATA))
seq_puts(seq, ",inline_data");
if (test_opt(sbi, FLUSH_MERGE))
seq_puts(seq, ",flush_merge");
seq_printf(seq, ",active_logs=%u", sbi->active_logs);
return 0;
......@@ -539,13 +557,22 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset)
le32_to_cpu(sbi->raw_super->segment_count_main);
int i;
seq_puts(seq, "format: segment_type|valid_blocks\n"
"segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
for (i = 0; i < total_segs; i++) {
seq_printf(seq, "%u", get_valid_blocks(sbi, i, 1));
if (i != 0 && (i % 10) == 0)
seq_puts(seq, "\n");
struct seg_entry *se = get_seg_entry(sbi, i);
if ((i % 10) == 0)
seq_printf(seq, "%-5d", i);
seq_printf(seq, "%d|%-3u", se->type,
get_valid_blocks(sbi, i, 1));
if ((i % 10) == 9 || i == (total_segs - 1))
seq_putc(seq, '\n');
else
seq_puts(seq, " ");
seq_putc(seq, ' ');
}
return 0;
}
......@@ -640,6 +667,8 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
if (unlikely(ino < F2FS_ROOT_INO(sbi)))
return ERR_PTR(-ESTALE);
if (unlikely(ino >= NM_I(sbi)->max_nid))
return ERR_PTR(-ESTALE);
/*
* f2fs_iget isn't quite right if the inode is currently unallocated!
......@@ -787,6 +816,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
for (i = 0; i < NR_COUNT_TYPE; i++)
atomic_set(&sbi->nr_pages[i], 0);
sbi->dir_level = DEF_DIR_LEVEL;
}
/*
......@@ -898,11 +929,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
sbi->por_doing = false;
spin_lock_init(&sbi->stat_lock);
mutex_init(&sbi->read_io.io_mutex);
init_rwsem(&sbi->read_io.io_rwsem);
sbi->read_io.sbi = sbi;
sbi->read_io.bio = NULL;
for (i = 0; i < NR_PAGE_TYPE; i++) {
mutex_init(&sbi->write_io[i].io_mutex);
init_rwsem(&sbi->write_io[i].io_rwsem);
sbi->write_io[i].sbi = sbi;
sbi->write_io[i].bio = NULL;
}
......@@ -991,28 +1022,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
goto free_root_inode;
}
/* recover fsynced data */
if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
err = recover_fsync_data(sbi);
if (err)
f2fs_msg(sb, KERN_ERR,
"Cannot recover all fsync data errno=%ld", err);
}
/*
* If filesystem is not mounted as read-only then
* do start the gc_thread.
*/
if (!(sb->s_flags & MS_RDONLY)) {
/* After POR, we can run background GC thread.*/
err = start_gc_thread(sbi);
if (err)
goto free_gc;
}
err = f2fs_build_stats(sbi);
if (err)
goto free_gc;
goto free_root_inode;
if (f2fs_proc_root)
sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
......@@ -1034,17 +1046,36 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL,
"%s", sb->s_id);
if (err)
goto fail;
goto free_proc;
/* recover fsynced data */
if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
err = recover_fsync_data(sbi);
if (err)
f2fs_msg(sb, KERN_ERR,
"Cannot recover all fsync data errno=%ld", err);
}
/*
* If filesystem is not mounted as read-only then
* do start the gc_thread.
*/
if (!(sb->s_flags & MS_RDONLY)) {
/* After POR, we can run background GC thread.*/
err = start_gc_thread(sbi);
if (err)
goto free_kobj;
}
return 0;
fail:
free_kobj:
kobject_del(&sbi->s_kobj);
free_proc:
if (sbi->s_proc) {
remove_proc_entry("segment_info", sbi->s_proc);
remove_proc_entry(sb->s_id, f2fs_proc_root);
}
f2fs_destroy_stats(sbi);
free_gc:
stop_gc_thread(sbi);
free_root_inode:
dput(sb->s_root);
sb->s_root = NULL;
......@@ -1084,7 +1115,7 @@ MODULE_ALIAS_FS("f2fs");
static int __init init_inodecache(void)
{
f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache",
sizeof(struct f2fs_inode_info), NULL);
sizeof(struct f2fs_inode_info));
if (!f2fs_inode_cachep)
return -ENOMEM;
return 0;
......
......@@ -275,7 +275,7 @@ static void *read_all_xattrs(struct inode *inode, struct page *ipage)
inline_size = inline_xattr_size(inode);
txattr_addr = kzalloc(inline_size + size, GFP_KERNEL);
txattr_addr = kzalloc(inline_size + size, GFP_F2FS_ZERO);
if (!txattr_addr)
return NULL;
......@@ -407,6 +407,8 @@ int f2fs_getxattr(struct inode *inode, int name_index, const char *name,
if (name == NULL)
return -EINVAL;
name_len = strlen(name);
if (name_len > F2FS_NAME_LEN)
return -ERANGE;
base_addr = read_all_xattrs(inode, NULL);
if (!base_addr)
......@@ -590,7 +592,10 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
f2fs_balance_fs(sbi);
f2fs_lock_op(sbi);
/* protect xattr_ver */
down_write(&F2FS_I(inode)->i_sem);
err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage);
up_write(&F2FS_I(inode)->i_sem);
f2fs_unlock_op(sbi);
return err;
......
......@@ -183,7 +183,7 @@ struct f2fs_inode {
__le32 i_pino; /* parent inode number */
__le32 i_namelen; /* file name length */
__u8 i_name[F2FS_NAME_LEN]; /* file name for SPOR */
__u8 i_reserved2; /* for backward compatibility */
__u8 i_dir_level; /* dentry_level for large dir */
struct f2fs_extent i_ext; /* caching a largest extent */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment