Commit 9fe67149 authored by Eric Whitney's avatar Eric Whitney Committed by Theodore Ts'o

ext4: adjust reserved cluster count when removing extents

Modify ext4_ext_remove_space() and the code it calls to correct the
reserved cluster count for pending reservations (delayed allocated
clusters shared with allocated blocks) when a block range is removed
from the extent tree.  Pending reservations may be found for the clusters
at the ends of written or unwritten extents when a block range is removed.
If a physical cluster at the end of an extent is freed, it's necessary
to increment the reserved cluster count to maintain correct accounting
if the corresponding logical cluster is shared with at least one
delayed and unwritten extent as found in the extents status tree.

Add a new function, ext4_rereserve_cluster(), to reapply a reservation
on a delayed allocated cluster sharing blocks with a freed allocated
cluster.  To avoid ENOSPC on reservation, a flag is applied to
ext4_free_blocks() to briefly defer updating the freeclusters counter
when an allocated cluster is freed.  This prevents another thread
from allocating the freed block before the reservation can be reapplied.

Redefine the partial cluster object as a struct to carry more state
information and to clarify the code using it.

Adjust the conditional code structure in ext4_ext_remove_space to
reduce the indentation level in the main body of the code to improve
readability.
Signed-off-by: default avatarEric Whitney <enwlinux@gmail.com>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent b6bf9171
...@@ -628,6 +628,7 @@ enum { ...@@ -628,6 +628,7 @@ enum {
#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 #define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008
#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010 #define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
#define EXT4_FREE_BLOCKS_RERESERVE_CLUSTER 0x0040
/* /*
* ioctl commands * ioctl commands
......
...@@ -119,6 +119,19 @@ struct ext4_ext_path { ...@@ -119,6 +119,19 @@ struct ext4_ext_path {
struct buffer_head *p_bh; struct buffer_head *p_bh;
}; };
/*
* Used to record a portion of a cluster found at the beginning or end
* of an extent while traversing the extent tree during space removal.
* A partial cluster may be removed if it does not contain blocks shared
* with extents that aren't being deleted (tofree state). Otherwise,
* it cannot be removed (nofree state).
*/
struct partial_cluster {
ext4_fsblk_t pclu; /* physical cluster number */
ext4_lblk_t lblk; /* logical block number within logical cluster */
enum {initial, tofree, nofree} state;
};
/* /*
* structure for external API * structure for external API
*/ */
......
This diff is collapsed.
...@@ -4915,9 +4915,17 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, ...@@ -4915,9 +4915,17 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
&sbi->s_flex_groups[flex_group].free_clusters); &sbi->s_flex_groups[flex_group].free_clusters);
} }
/*
* on a bigalloc file system, defer the s_freeclusters_counter
* update to the caller (ext4_remove_space and friends) so they
* can determine if a cluster freed here should be rereserved
*/
if (!(flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)) {
if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
dquot_free_block(inode, EXT4_C2B(sbi, count_clusters)); dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters); percpu_counter_add(&sbi->s_freeclusters_counter,
count_clusters);
}
ext4_mb_unload_buddy(&e4b); ext4_mb_unload_buddy(&e4b);
......
...@@ -17,6 +17,7 @@ struct mpage_da_data; ...@@ -17,6 +17,7 @@ struct mpage_da_data;
struct ext4_map_blocks; struct ext4_map_blocks;
struct extent_status; struct extent_status;
struct ext4_fsmap; struct ext4_fsmap;
struct partial_cluster;
#define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode))
...@@ -2037,19 +2038,21 @@ TRACE_EVENT(ext4_ext_show_extent, ...@@ -2037,19 +2038,21 @@ TRACE_EVENT(ext4_ext_show_extent,
TRACE_EVENT(ext4_remove_blocks, TRACE_EVENT(ext4_remove_blocks,
TP_PROTO(struct inode *inode, struct ext4_extent *ex, TP_PROTO(struct inode *inode, struct ext4_extent *ex,
ext4_lblk_t from, ext4_fsblk_t to, ext4_lblk_t from, ext4_fsblk_t to,
long long partial_cluster), struct partial_cluster *pc),
TP_ARGS(inode, ex, from, to, partial_cluster), TP_ARGS(inode, ex, from, to, pc),
TP_STRUCT__entry( TP_STRUCT__entry(
__field( dev_t, dev ) __field( dev_t, dev )
__field( ino_t, ino ) __field( ino_t, ino )
__field( ext4_lblk_t, from ) __field( ext4_lblk_t, from )
__field( ext4_lblk_t, to ) __field( ext4_lblk_t, to )
__field( long long, partial )
__field( ext4_fsblk_t, ee_pblk ) __field( ext4_fsblk_t, ee_pblk )
__field( ext4_lblk_t, ee_lblk ) __field( ext4_lblk_t, ee_lblk )
__field( unsigned short, ee_len ) __field( unsigned short, ee_len )
__field( ext4_fsblk_t, pc_pclu )
__field( ext4_lblk_t, pc_lblk )
__field( int, pc_state)
), ),
TP_fast_assign( TP_fast_assign(
...@@ -2057,14 +2060,16 @@ TRACE_EVENT(ext4_remove_blocks, ...@@ -2057,14 +2060,16 @@ TRACE_EVENT(ext4_remove_blocks,
__entry->ino = inode->i_ino; __entry->ino = inode->i_ino;
__entry->from = from; __entry->from = from;
__entry->to = to; __entry->to = to;
__entry->partial = partial_cluster;
__entry->ee_pblk = ext4_ext_pblock(ex); __entry->ee_pblk = ext4_ext_pblock(ex);
__entry->ee_lblk = le32_to_cpu(ex->ee_block); __entry->ee_lblk = le32_to_cpu(ex->ee_block);
__entry->ee_len = ext4_ext_get_actual_len(ex); __entry->ee_len = ext4_ext_get_actual_len(ex);
__entry->pc_pclu = pc->pclu;
__entry->pc_lblk = pc->lblk;
__entry->pc_state = pc->state;
), ),
TP_printk("dev %d,%d ino %lu extent [%u(%llu), %u]" TP_printk("dev %d,%d ino %lu extent [%u(%llu), %u]"
"from %u to %u partial_cluster %lld", "from %u to %u partial [pclu %lld lblk %u state %d]",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, (unsigned long) __entry->ino,
(unsigned) __entry->ee_lblk, (unsigned) __entry->ee_lblk,
...@@ -2072,45 +2077,53 @@ TRACE_EVENT(ext4_remove_blocks, ...@@ -2072,45 +2077,53 @@ TRACE_EVENT(ext4_remove_blocks,
(unsigned short) __entry->ee_len, (unsigned short) __entry->ee_len,
(unsigned) __entry->from, (unsigned) __entry->from,
(unsigned) __entry->to, (unsigned) __entry->to,
(long long) __entry->partial) (long long) __entry->pc_pclu,
(unsigned int) __entry->pc_lblk,
(int) __entry->pc_state)
); );
TRACE_EVENT(ext4_ext_rm_leaf, TRACE_EVENT(ext4_ext_rm_leaf,
TP_PROTO(struct inode *inode, ext4_lblk_t start, TP_PROTO(struct inode *inode, ext4_lblk_t start,
struct ext4_extent *ex, struct ext4_extent *ex,
long long partial_cluster), struct partial_cluster *pc),
TP_ARGS(inode, start, ex, partial_cluster), TP_ARGS(inode, start, ex, pc),
TP_STRUCT__entry( TP_STRUCT__entry(
__field( dev_t, dev ) __field( dev_t, dev )
__field( ino_t, ino ) __field( ino_t, ino )
__field( long long, partial )
__field( ext4_lblk_t, start ) __field( ext4_lblk_t, start )
__field( ext4_lblk_t, ee_lblk ) __field( ext4_lblk_t, ee_lblk )
__field( ext4_fsblk_t, ee_pblk ) __field( ext4_fsblk_t, ee_pblk )
__field( short, ee_len ) __field( short, ee_len )
__field( ext4_fsblk_t, pc_pclu )
__field( ext4_lblk_t, pc_lblk )
__field( int, pc_state)
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = inode->i_sb->s_dev; __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino; __entry->ino = inode->i_ino;
__entry->partial = partial_cluster;
__entry->start = start; __entry->start = start;
__entry->ee_lblk = le32_to_cpu(ex->ee_block); __entry->ee_lblk = le32_to_cpu(ex->ee_block);
__entry->ee_pblk = ext4_ext_pblock(ex); __entry->ee_pblk = ext4_ext_pblock(ex);
__entry->ee_len = ext4_ext_get_actual_len(ex); __entry->ee_len = ext4_ext_get_actual_len(ex);
__entry->pc_pclu = pc->pclu;
__entry->pc_lblk = pc->lblk;
__entry->pc_state = pc->state;
), ),
TP_printk("dev %d,%d ino %lu start_lblk %u last_extent [%u(%llu), %u]" TP_printk("dev %d,%d ino %lu start_lblk %u last_extent [%u(%llu), %u]"
"partial_cluster %lld", "partial [pclu %lld lblk %u state %d]",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, (unsigned long) __entry->ino,
(unsigned) __entry->start, (unsigned) __entry->start,
(unsigned) __entry->ee_lblk, (unsigned) __entry->ee_lblk,
(unsigned long long) __entry->ee_pblk, (unsigned long long) __entry->ee_pblk,
(unsigned short) __entry->ee_len, (unsigned short) __entry->ee_len,
(long long) __entry->partial) (long long) __entry->pc_pclu,
(unsigned int) __entry->pc_lblk,
(int) __entry->pc_state)
); );
TRACE_EVENT(ext4_ext_rm_idx, TRACE_EVENT(ext4_ext_rm_idx,
...@@ -2168,9 +2181,9 @@ TRACE_EVENT(ext4_ext_remove_space, ...@@ -2168,9 +2181,9 @@ TRACE_EVENT(ext4_ext_remove_space,
TRACE_EVENT(ext4_ext_remove_space_done, TRACE_EVENT(ext4_ext_remove_space_done,
TP_PROTO(struct inode *inode, ext4_lblk_t start, ext4_lblk_t end, TP_PROTO(struct inode *inode, ext4_lblk_t start, ext4_lblk_t end,
int depth, long long partial, __le16 eh_entries), int depth, struct partial_cluster *pc, __le16 eh_entries),
TP_ARGS(inode, start, end, depth, partial, eh_entries), TP_ARGS(inode, start, end, depth, pc, eh_entries),
TP_STRUCT__entry( TP_STRUCT__entry(
__field( dev_t, dev ) __field( dev_t, dev )
...@@ -2178,7 +2191,9 @@ TRACE_EVENT(ext4_ext_remove_space_done, ...@@ -2178,7 +2191,9 @@ TRACE_EVENT(ext4_ext_remove_space_done,
__field( ext4_lblk_t, start ) __field( ext4_lblk_t, start )
__field( ext4_lblk_t, end ) __field( ext4_lblk_t, end )
__field( int, depth ) __field( int, depth )
__field( long long, partial ) __field( ext4_fsblk_t, pc_pclu )
__field( ext4_lblk_t, pc_lblk )
__field( int, pc_state )
__field( unsigned short, eh_entries ) __field( unsigned short, eh_entries )
), ),
...@@ -2188,18 +2203,23 @@ TRACE_EVENT(ext4_ext_remove_space_done, ...@@ -2188,18 +2203,23 @@ TRACE_EVENT(ext4_ext_remove_space_done,
__entry->start = start; __entry->start = start;
__entry->end = end; __entry->end = end;
__entry->depth = depth; __entry->depth = depth;
__entry->partial = partial; __entry->pc_pclu = pc->pclu;
__entry->pc_lblk = pc->lblk;
__entry->pc_state = pc->state;
__entry->eh_entries = le16_to_cpu(eh_entries); __entry->eh_entries = le16_to_cpu(eh_entries);
), ),
TP_printk("dev %d,%d ino %lu since %u end %u depth %d partial %lld " TP_printk("dev %d,%d ino %lu since %u end %u depth %d "
"partial [pclu %lld lblk %u state %d] "
"remaining_entries %u", "remaining_entries %u",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, (unsigned long) __entry->ino,
(unsigned) __entry->start, (unsigned) __entry->start,
(unsigned) __entry->end, (unsigned) __entry->end,
__entry->depth, __entry->depth,
(long long) __entry->partial, (long long) __entry->pc_pclu,
(unsigned int) __entry->pc_lblk,
(int) __entry->pc_state,
(unsigned short) __entry->eh_entries) (unsigned short) __entry->eh_entries)
); );
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment