Commit 5e745b04 authored by Aneesh Kumar K.V's avatar Aneesh Kumar K.V Committed by Theodore Ts'o

ext4: Fix small file fragmentation

For small file block allocations, mballoc uses per cpu prealloc
space.  Use goal block when searching for the right prealloc
space.  Also make sure ext4_da_writepages tries to write
all the pages for small files in single attempt
Signed-off-by: default avatarAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent 91246c00
...@@ -2282,13 +2282,12 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) ...@@ -2282,13 +2282,12 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
static int ext4_da_writepages(struct address_space *mapping, static int ext4_da_writepages(struct address_space *mapping,
struct writeback_control *wbc) struct writeback_control *wbc)
{ {
struct inode *inode = mapping->host;
handle_t *handle = NULL; handle_t *handle = NULL;
int needed_blocks;
int ret = 0;
long to_write;
loff_t range_start = 0; loff_t range_start = 0;
long pages_skipped = 0; struct inode *inode = mapping->host;
int needed_blocks, ret = 0, nr_to_writebump = 0;
long to_write, pages_skipped = 0;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
/* /*
* No pages to write? This is mainly a kludge to avoid starting * No pages to write? This is mainly a kludge to avoid starting
...@@ -2297,6 +2296,16 @@ static int ext4_da_writepages(struct address_space *mapping, ...@@ -2297,6 +2296,16 @@ static int ext4_da_writepages(struct address_space *mapping,
*/ */
if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
return 0; return 0;
/*
* Make sure nr_to_write is >= sbi->s_mb_stream_request
* This make sure small files blocks are allocated in
* single attempt. This ensure that small files
* get less fragmented.
*/
if (wbc->nr_to_write < sbi->s_mb_stream_request) {
nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
wbc->nr_to_write = sbi->s_mb_stream_request;
}
if (!wbc->range_cyclic) if (!wbc->range_cyclic)
/* /*
...@@ -2377,7 +2386,7 @@ static int ext4_da_writepages(struct address_space *mapping, ...@@ -2377,7 +2386,7 @@ static int ext4_da_writepages(struct address_space *mapping,
} }
out_writepages: out_writepages:
wbc->nr_to_write = to_write; wbc->nr_to_write = to_write - nr_to_writebump;
wbc->range_start = range_start; wbc->range_start = range_start;
return ret; return ret;
} }
......
...@@ -3281,6 +3281,35 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, ...@@ -3281,6 +3281,35 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa); mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
} }
/*
* Return the prealloc space that have minimal distance
* from the goal block. @cpa is the prealloc
* space that is having currently known minimal distance
* from the goal block.
*/
static struct ext4_prealloc_space *
ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
struct ext4_prealloc_space *pa,
struct ext4_prealloc_space *cpa)
{
ext4_fsblk_t cur_distance, new_distance;
if (cpa == NULL) {
atomic_inc(&pa->pa_count);
return pa;
}
cur_distance = abs(goal_block - cpa->pa_pstart);
new_distance = abs(goal_block - pa->pa_pstart);
if (cur_distance < new_distance)
return cpa;
/* drop the previous reference */
atomic_dec(&cpa->pa_count);
atomic_inc(&pa->pa_count);
return pa;
}
/* /*
* search goal blocks in preallocated space * search goal blocks in preallocated space
*/ */
...@@ -3290,7 +3319,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) ...@@ -3290,7 +3319,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
int order, i; int order, i;
struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
struct ext4_locality_group *lg; struct ext4_locality_group *lg;
struct ext4_prealloc_space *pa; struct ext4_prealloc_space *pa, *cpa = NULL;
ext4_fsblk_t goal_block;
/* only data can be preallocated */ /* only data can be preallocated */
if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
...@@ -3333,6 +3363,13 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) ...@@ -3333,6 +3363,13 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
/* The max size of hash table is PREALLOC_TB_SIZE */ /* The max size of hash table is PREALLOC_TB_SIZE */
order = PREALLOC_TB_SIZE - 1; order = PREALLOC_TB_SIZE - 1;
goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) +
ac->ac_g_ex.fe_start +
le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block);
/*
* search for the prealloc space that is having
* minimal distance from the goal block.
*/
for (i = order; i < PREALLOC_TB_SIZE; i++) { for (i = order; i < PREALLOC_TB_SIZE; i++) {
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i], list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
...@@ -3340,17 +3377,19 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) ...@@ -3340,17 +3377,19 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
spin_lock(&pa->pa_lock); spin_lock(&pa->pa_lock);
if (pa->pa_deleted == 0 && if (pa->pa_deleted == 0 &&
pa->pa_free >= ac->ac_o_ex.fe_len) { pa->pa_free >= ac->ac_o_ex.fe_len) {
atomic_inc(&pa->pa_count);
ext4_mb_use_group_pa(ac, pa); cpa = ext4_mb_check_group_pa(goal_block,
spin_unlock(&pa->pa_lock); pa, cpa);
ac->ac_criteria = 20;
rcu_read_unlock();
return 1;
} }
spin_unlock(&pa->pa_lock); spin_unlock(&pa->pa_lock);
} }
rcu_read_unlock(); rcu_read_unlock();
} }
if (cpa) {
ext4_mb_use_group_pa(ac, cpa);
ac->ac_criteria = 20;
return 1;
}
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment