Commit cbd7584e authored by Jan Kara's avatar Jan Kara Committed by Theodore Ts'o

ext4: fix block reservation for bigalloc filesystems

For bigalloc filesystems we have to check whether newly requested inode
block isn't already part of a cluster for which we already have delayed
allocation reservation. This check happens in ext4_ext_map_blocks() and
that function sets EXT4_MAP_FROM_CLUSTER if that's the case. However if
ext4_da_map_blocks() finds in extent cache information about the block,
we don't call into ext4_ext_map_blocks() and thus we always end up
getting new reservation even if the space for cluster is already
reserved. This results in overreservation and premature ENOSPC reports.

Fix the problem by checking for existing cluster reservation already in
ext4_da_map_blocks(). That simplifies the logic and actually allows us
to get rid of the EXT4_MAP_FROM_CLUSTER flag completely.
Signed-off-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent 0756b908
...@@ -158,17 +158,8 @@ struct ext4_allocation_request { ...@@ -158,17 +158,8 @@ struct ext4_allocation_request {
#define EXT4_MAP_MAPPED (1 << BH_Mapped) #define EXT4_MAP_MAPPED (1 << BH_Mapped)
#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) #define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten)
#define EXT4_MAP_BOUNDARY (1 << BH_Boundary) #define EXT4_MAP_BOUNDARY (1 << BH_Boundary)
/* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of
* ext4_map_blocks wants to know whether or not the underlying cluster has
* already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that
* the requested mapping was from previously mapped (or delayed allocated)
* cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster
* should never appear on buffer_head's state flags.
*/
#define EXT4_MAP_FROM_CLUSTER (1 << BH_AllocFromCluster)
#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY)
EXT4_MAP_FROM_CLUSTER)
struct ext4_map_blocks { struct ext4_map_blocks {
ext4_fsblk_t m_pblk; ext4_fsblk_t m_pblk;
...@@ -2789,16 +2780,6 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io, ...@@ -2789,16 +2780,6 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
/* mmp.c */ /* mmp.c */
extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
/*
* Note that these flags will never ever appear in a buffer_head's state flag.
* See EXT4_MAP_... to see where this is used.
*/
enum ext4_state_bits {
BH_AllocFromCluster /* allocated blocks were part of already
* allocated cluster. */
= BH_JBDPrivateStart
};
/* /*
* Add new method to test whether block and inode bitmaps are properly * Add new method to test whether block and inode bitmaps are properly
* initialized. With uninit_bg reading the block from disk is not enough * initialized. With uninit_bg reading the block from disk is not enough
......
...@@ -4282,6 +4282,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ...@@ -4282,6 +4282,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
ext4_io_end_t *io = ext4_inode_aio(inode); ext4_io_end_t *io = ext4_inode_aio(inode);
ext4_lblk_t cluster_offset; ext4_lblk_t cluster_offset;
int set_unwritten = 0; int set_unwritten = 0;
bool map_from_cluster = false;
ext_debug("blocks %u/%u requested for inode %lu\n", ext_debug("blocks %u/%u requested for inode %lu\n",
map->m_lblk, map->m_len, inode->i_ino); map->m_lblk, map->m_len, inode->i_ino);
...@@ -4358,10 +4359,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ...@@ -4358,10 +4359,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
} }
} }
if ((sbi->s_cluster_ratio > 1) &&
ext4_find_delalloc_cluster(inode, map->m_lblk))
map->m_flags |= EXT4_MAP_FROM_CLUSTER;
/* /*
* requested block isn't allocated yet; * requested block isn't allocated yet;
* we couldn't try to create block if create flag is zero * we couldn't try to create block if create flag is zero
...@@ -4379,7 +4376,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ...@@ -4379,7 +4376,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
/* /*
* Okay, we need to do block allocation. * Okay, we need to do block allocation.
*/ */
map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
newex.ee_block = cpu_to_le32(map->m_lblk); newex.ee_block = cpu_to_le32(map->m_lblk);
cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk); cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
...@@ -4391,7 +4387,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ...@@ -4391,7 +4387,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
get_implied_cluster_alloc(inode->i_sb, map, ex, path)) { get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
ar.len = allocated = map->m_len; ar.len = allocated = map->m_len;
newblock = map->m_pblk; newblock = map->m_pblk;
map->m_flags |= EXT4_MAP_FROM_CLUSTER; map_from_cluster = true;
goto got_allocated_blocks; goto got_allocated_blocks;
} }
...@@ -4412,7 +4408,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ...@@ -4412,7 +4408,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) { get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) {
ar.len = allocated = map->m_len; ar.len = allocated = map->m_len;
newblock = map->m_pblk; newblock = map->m_pblk;
map->m_flags |= EXT4_MAP_FROM_CLUSTER; map_from_cluster = true;
goto got_allocated_blocks; goto got_allocated_blocks;
} }
...@@ -4538,7 +4534,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ...@@ -4538,7 +4534,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
*/ */
reserved_clusters = get_reserved_cluster_alloc(inode, reserved_clusters = get_reserved_cluster_alloc(inode,
map->m_lblk, allocated); map->m_lblk, allocated);
if (map->m_flags & EXT4_MAP_FROM_CLUSTER) { if (map_from_cluster) {
if (reserved_clusters) { if (reserved_clusters) {
/* /*
* We have clusters reserved for this range. * We have clusters reserved for this range.
......
...@@ -416,11 +416,6 @@ static void ext4_map_blocks_es_recheck(handle_t *handle, ...@@ -416,11 +416,6 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
} }
if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
up_read((&EXT4_I(inode)->i_data_sem)); up_read((&EXT4_I(inode)->i_data_sem));
/*
* Clear EXT4_MAP_FROM_CLUSTER and EXT4_MAP_BOUNDARY flag
* because it shouldn't be marked in es_map->m_flags.
*/
map->m_flags &= ~(EXT4_MAP_FROM_CLUSTER | EXT4_MAP_BOUNDARY);
/* /*
* We don't check m_len because extent will be collpased in status * We don't check m_len because extent will be collpased in status
...@@ -1434,19 +1429,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, ...@@ -1434,19 +1429,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
* file system block. * file system block.
*/ */
down_read(&EXT4_I(inode)->i_data_sem); down_read(&EXT4_I(inode)->i_data_sem);
if (ext4_has_inline_data(inode)) { if (ext4_has_inline_data(inode))
/*
* We will soon create blocks for this page, and let
* us pretend as if the blocks aren't allocated yet.
* In case of clusters, we have to handle the work
* of mapping from cluster so that the reserved space
* is calculated properly.
*/
if ((EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) &&
ext4_find_delalloc_cluster(inode, map->m_lblk))
map->m_flags |= EXT4_MAP_FROM_CLUSTER;
retval = 0; retval = 0;
} else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
retval = ext4_ext_map_blocks(NULL, inode, map, retval = ext4_ext_map_blocks(NULL, inode, map,
EXT4_GET_BLOCKS_NO_PUT_HOLE); EXT4_GET_BLOCKS_NO_PUT_HOLE);
else else
...@@ -1465,7 +1450,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, ...@@ -1465,7 +1450,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
* then we don't need to reserve it again. However we still need * then we don't need to reserve it again. However we still need
* to reserve metadata for every block we're going to write. * to reserve metadata for every block we're going to write.
*/ */
if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) { if (EXT4_SB(inode->i_sb)->s_cluster_ratio <= 1 ||
!ext4_find_delalloc_cluster(inode, map->m_lblk)) {
ret = ext4_da_reserve_space(inode, iblock); ret = ext4_da_reserve_space(inode, iblock);
if (ret) { if (ret) {
/* not enough space to reserve */ /* not enough space to reserve */
...@@ -1481,11 +1467,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, ...@@ -1481,11 +1467,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
goto out_unlock; goto out_unlock;
} }
/* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
* and it should not appear on the bh->b_state.
*/
map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
map_bh(bh, inode->i_sb, invalid_block); map_bh(bh, inode->i_sb, invalid_block);
set_buffer_new(bh); set_buffer_new(bh);
set_buffer_delay(bh); set_buffer_delay(bh);
......
...@@ -50,8 +50,7 @@ struct extent_status; ...@@ -50,8 +50,7 @@ struct extent_status;
{ EXT4_MAP_NEW, "N" }, \ { EXT4_MAP_NEW, "N" }, \
{ EXT4_MAP_MAPPED, "M" }, \ { EXT4_MAP_MAPPED, "M" }, \
{ EXT4_MAP_UNWRITTEN, "U" }, \ { EXT4_MAP_UNWRITTEN, "U" }, \
{ EXT4_MAP_BOUNDARY, "B" }, \ { EXT4_MAP_BOUNDARY, "B" })
{ EXT4_MAP_FROM_CLUSTER, "C" })
#define show_free_flags(flags) __print_flags(flags, "|", \ #define show_free_flags(flags) __print_flags(flags, "|", \
{ EXT4_FREE_BLOCKS_METADATA, "METADATA" }, \ { EXT4_FREE_BLOCKS_METADATA, "METADATA" }, \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment