Commit ff9cb1c4 authored by Theodore Ts'o's avatar Theodore Ts'o

Merge branch 'for_linus' into for_linus_merged

Conflicts:
	fs/ext4/ioctl.c
parents e4e11180 d50f2ab6
...@@ -581,6 +581,13 @@ Table of Ext4 specific ioctls ...@@ -581,6 +581,13 @@ Table of Ext4 specific ioctls
behaviour may change in the future as it is behaviour may change in the future as it is
not necessary and has been done this way only not necessary and has been done this way only
for sake of simplicity. for sake of simplicity.
EXT4_IOC_RESIZE_FS Resize the filesystem to a new size. The number
of blocks of resized filesystem is passed in via
64 bit integer argument. The kernel allocates
bitmaps and inode table, the userspace tool thus
just passes the new number of blocks.
.............................................................................. ..............................................................................
References References
......
...@@ -23,6 +23,8 @@ ...@@ -23,6 +23,8 @@
#include <trace/events/ext4.h> #include <trace/events/ext4.h>
static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
ext4_group_t block_group);
/* /*
* balloc.c contains the blocks allocation and deallocation routines * balloc.c contains the blocks allocation and deallocation routines
*/ */
...@@ -668,7 +670,7 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) ...@@ -668,7 +670,7 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
* This function returns the number of file system metadata clusters at * This function returns the number of file system metadata clusters at
* the beginning of a block group, including the reserved gdt blocks. * the beginning of a block group, including the reserved gdt blocks.
*/ */
unsigned ext4_num_base_meta_clusters(struct super_block *sb, static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
ext4_group_t block_group) ext4_group_t block_group)
{ {
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
......
...@@ -511,6 +511,14 @@ struct ext4_new_group_data { ...@@ -511,6 +511,14 @@ struct ext4_new_group_data {
__u32 free_blocks_count; __u32 free_blocks_count;
}; };
/* Indexes used to index group tables in ext4_new_group_data */
enum {
BLOCK_BITMAP = 0, /* block bitmap */
INODE_BITMAP, /* inode bitmap */
INODE_TABLE, /* inode tables */
GROUP_TABLE_COUNT,
};
/* /*
* Flags used by ext4_map_blocks() * Flags used by ext4_map_blocks()
*/ */
...@@ -575,6 +583,7 @@ struct ext4_new_group_data { ...@@ -575,6 +583,7 @@ struct ext4_new_group_data {
/* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) #define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) #define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
#if defined(__KERNEL__) && defined(CONFIG_COMPAT) #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/* /*
...@@ -957,12 +966,13 @@ struct ext4_inode_info { ...@@ -957,12 +966,13 @@ struct ext4_inode_info {
#define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \ #define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \
EXT4_MOUNT2_##opt) EXT4_MOUNT2_##opt)
#define ext4_set_bit __test_and_set_bit_le #define ext4_test_and_set_bit __test_and_set_bit_le
#define ext4_set_bit __set_bit_le
#define ext4_set_bit_atomic ext2_set_bit_atomic #define ext4_set_bit_atomic ext2_set_bit_atomic
#define ext4_clear_bit __test_and_clear_bit_le #define ext4_test_and_clear_bit __test_and_clear_bit_le
#define ext4_clear_bit __clear_bit_le
#define ext4_clear_bit_atomic ext2_clear_bit_atomic #define ext4_clear_bit_atomic ext2_clear_bit_atomic
#define ext4_test_bit test_bit_le #define ext4_test_bit test_bit_le
#define ext4_find_first_zero_bit find_first_zero_bit_le
#define ext4_find_next_zero_bit find_next_zero_bit_le #define ext4_find_next_zero_bit find_next_zero_bit_le
#define ext4_find_next_bit find_next_bit_le #define ext4_find_next_bit find_next_bit_le
...@@ -1397,6 +1407,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) ...@@ -1397,6 +1407,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100
#define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200 #define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200
#define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400
#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
...@@ -1409,6 +1420,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) ...@@ -1409,6 +1420,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
#define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */
#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */
#define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x2000 /* data in inode */
#define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */
#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
...@@ -1790,8 +1803,6 @@ extern void ext4_init_block_bitmap(struct super_block *sb, ...@@ -1790,8 +1803,6 @@ extern void ext4_init_block_bitmap(struct super_block *sb,
extern unsigned ext4_free_clusters_after_init(struct super_block *sb, extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
ext4_group_t block_group, ext4_group_t block_group,
struct ext4_group_desc *gdp); struct ext4_group_desc *gdp);
extern unsigned ext4_num_base_meta_clusters(struct super_block *sb,
ext4_group_t block_group);
extern unsigned ext4_num_overhead_clusters(struct super_block *sb, extern unsigned ext4_num_overhead_clusters(struct super_block *sb,
ext4_group_t block_group, ext4_group_t block_group,
struct ext4_group_desc *gdp); struct ext4_group_desc *gdp);
...@@ -1880,16 +1891,9 @@ extern int ext4_alloc_da_blocks(struct inode *inode); ...@@ -1880,16 +1891,9 @@ extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode); extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *); extern int ext4_writepage_trans_blocks(struct inode *);
extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_block_truncate_page(handle_t *handle,
struct address_space *mapping, loff_t from);
extern int ext4_block_zero_page_range(handle_t *handle,
struct address_space *mapping, loff_t from, loff_t length);
extern int ext4_discard_partial_page_buffers(handle_t *handle, extern int ext4_discard_partial_page_buffers(handle_t *handle,
struct address_space *mapping, loff_t from, struct address_space *mapping, loff_t from,
loff_t length, int flags); loff_t length, int flags);
extern int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
struct inode *inode, struct page *page, loff_t from,
loff_t length, int flags);
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern void ext4_da_update_reserve_space(struct inode *inode, extern void ext4_da_update_reserve_space(struct inode *inode,
...@@ -1924,6 +1928,7 @@ extern int ext4_group_add(struct super_block *sb, ...@@ -1924,6 +1928,7 @@ extern int ext4_group_add(struct super_block *sb,
extern int ext4_group_extend(struct super_block *sb, extern int ext4_group_extend(struct super_block *sb,
struct ext4_super_block *es, struct ext4_super_block *es,
ext4_fsblk_t n_blocks_count); ext4_fsblk_t n_blocks_count);
extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
/* super.c */ /* super.c */
extern void *ext4_kvmalloc(size_t size, gfp_t flags); extern void *ext4_kvmalloc(size_t size, gfp_t flags);
......
...@@ -3280,6 +3280,9 @@ static int ext4_find_delalloc_range(struct inode *inode, ...@@ -3280,6 +3280,9 @@ static int ext4_find_delalloc_range(struct inode *inode,
ext4_lblk_t i, pg_lblk; ext4_lblk_t i, pg_lblk;
pgoff_t index; pgoff_t index;
if (!test_opt(inode->i_sb, DELALLOC))
return 0;
/* reverse search wont work if fs block size is less than page size */ /* reverse search wont work if fs block size is less than page size */
if (inode->i_blkbits < PAGE_CACHE_SHIFT) if (inode->i_blkbits < PAGE_CACHE_SHIFT)
search_hint_reverse = 0; search_hint_reverse = 0;
...@@ -3452,8 +3455,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ...@@ -3452,8 +3455,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
int err = 0; int err = 0;
ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical" ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical "
"block %llu, max_blocks %u, flags %d, allocated %u", "block %llu, max_blocks %u, flags %x, allocated %u\n",
inode->i_ino, (unsigned long long)map->m_lblk, map->m_len, inode->i_ino, (unsigned long long)map->m_lblk, map->m_len,
flags, allocated); flags, allocated);
ext4_ext_show_leaf(inode, path); ext4_ext_show_leaf(inode, path);
...@@ -3624,7 +3627,7 @@ static int get_implied_cluster_alloc(struct super_block *sb, ...@@ -3624,7 +3627,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1); ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
ext4_lblk_t ex_cluster_start, ex_cluster_end; ext4_lblk_t ex_cluster_start, ex_cluster_end;
ext4_lblk_t rr_cluster_start, rr_cluster_end; ext4_lblk_t rr_cluster_start;
ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
ext4_fsblk_t ee_start = ext4_ext_pblock(ex); ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
unsigned short ee_len = ext4_ext_get_actual_len(ex); unsigned short ee_len = ext4_ext_get_actual_len(ex);
...@@ -3635,7 +3638,6 @@ static int get_implied_cluster_alloc(struct super_block *sb, ...@@ -3635,7 +3638,6 @@ static int get_implied_cluster_alloc(struct super_block *sb,
/* The requested region passed into ext4_map_blocks() */ /* The requested region passed into ext4_map_blocks() */
rr_cluster_start = EXT4_B2C(sbi, map->m_lblk); rr_cluster_start = EXT4_B2C(sbi, map->m_lblk);
rr_cluster_end = EXT4_B2C(sbi, map->m_lblk + map->m_len - 1);
if ((rr_cluster_start == ex_cluster_end) || if ((rr_cluster_start == ex_cluster_end) ||
(rr_cluster_start == ex_cluster_start)) { (rr_cluster_start == ex_cluster_start)) {
......
...@@ -252,7 +252,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) ...@@ -252,7 +252,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
fatal = ext4_journal_get_write_access(handle, bh2); fatal = ext4_journal_get_write_access(handle, bh2);
} }
ext4_lock_group(sb, block_group); ext4_lock_group(sb, block_group);
cleared = ext4_clear_bit(bit, bitmap_bh->b_data); cleared = ext4_test_and_clear_bit(bit, bitmap_bh->b_data);
if (fatal || !cleared) { if (fatal || !cleared) {
ext4_unlock_group(sb, block_group); ext4_unlock_group(sb, block_group);
goto out; goto out;
...@@ -358,7 +358,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, ...@@ -358,7 +358,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_group_t real_ngroups = ext4_get_groups_count(sb); ext4_group_t real_ngroups = ext4_get_groups_count(sb);
int inodes_per_group = EXT4_INODES_PER_GROUP(sb); int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
unsigned int freei, avefreei; unsigned int freei, avefreei, grp_free;
ext4_fsblk_t freeb, avefreec; ext4_fsblk_t freeb, avefreec;
unsigned int ndirs; unsigned int ndirs;
int max_dirs, min_inodes; int max_dirs, min_inodes;
...@@ -477,8 +477,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, ...@@ -477,8 +477,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
for (i = 0; i < ngroups; i++) { for (i = 0; i < ngroups; i++) {
grp = (parent_group + i) % ngroups; grp = (parent_group + i) % ngroups;
desc = ext4_get_group_desc(sb, grp, NULL); desc = ext4_get_group_desc(sb, grp, NULL);
if (desc && ext4_free_inodes_count(sb, desc) && grp_free = ext4_free_inodes_count(sb, desc);
ext4_free_inodes_count(sb, desc) >= avefreei) { if (desc && grp_free && grp_free >= avefreei) {
*group = grp; *group = grp;
return 0; return 0;
} }
...@@ -618,7 +618,7 @@ static int ext4_claim_inode(struct super_block *sb, ...@@ -618,7 +618,7 @@ static int ext4_claim_inode(struct super_block *sb,
*/ */
down_read(&grp->alloc_sem); down_read(&grp->alloc_sem);
ext4_lock_group(sb, group); ext4_lock_group(sb, group);
if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) {
/* not a free inode */ /* not a free inode */
retval = 1; retval = 1;
goto err_ret; goto err_ret;
...@@ -885,8 +885,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode, ...@@ -885,8 +885,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
if (IS_DIRSYNC(inode)) if (IS_DIRSYNC(inode))
ext4_handle_sync(handle); ext4_handle_sync(handle);
if (insert_inode_locked(inode) < 0) { if (insert_inode_locked(inode) < 0) {
err = -EINVAL; /*
goto fail_drop; * Likely a bitmap corruption causing inode to be allocated
* twice.
*/
err = -EIO;
goto fail;
} }
spin_lock(&sbi->s_next_gen_lock); spin_lock(&sbi->s_next_gen_lock);
inode->i_generation = sbi->s_next_generation++; inode->i_generation = sbi->s_next_generation++;
......
...@@ -71,6 +71,9 @@ static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); ...@@ -71,6 +71,9 @@ static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
static int __ext4_journalled_writepage(struct page *page, unsigned int len); static int __ext4_journalled_writepage(struct page *page, unsigned int len);
static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
struct inode *inode, struct page *page, loff_t from,
loff_t length, int flags);
/* /*
* Test whether an inode is a fast symlink. * Test whether an inode is a fast symlink.
...@@ -2759,7 +2762,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, ...@@ -2759,7 +2762,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
if (!io_end || !size) if (!io_end || !size)
goto out; goto out;
ext_debug("ext4_end_io_dio(): io_end 0x%p" ext_debug("ext4_end_io_dio(): io_end 0x%p "
"for inode %lu, iocb 0x%p, offset %llu, size %llu\n", "for inode %lu, iocb 0x%p, offset %llu, size %llu\n",
iocb->private, io_end->inode->i_ino, iocb, offset, iocb->private, io_end->inode->i_ino, iocb, offset,
size); size);
...@@ -3160,7 +3163,7 @@ int ext4_discard_partial_page_buffers(handle_t *handle, ...@@ -3160,7 +3163,7 @@ int ext4_discard_partial_page_buffers(handle_t *handle,
* *
* Returns zero on sucess or negative on failure. * Returns zero on sucess or negative on failure.
*/ */
int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
struct inode *inode, struct page *page, loff_t from, struct inode *inode, struct page *page, loff_t from,
loff_t length, int flags) loff_t length, int flags)
{ {
...@@ -3300,126 +3303,6 @@ int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, ...@@ -3300,126 +3303,6 @@ int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
return err; return err;
} }
/*
* ext4_block_truncate_page() zeroes out a mapping from file offset `from'
* up to the end of the block which corresponds to `from'.
* This required during truncate. We need to physically zero the tail end
* of that block so it doesn't yield old data if the file is later grown.
*/
int ext4_block_truncate_page(handle_t *handle,
struct address_space *mapping, loff_t from)
{
unsigned offset = from & (PAGE_CACHE_SIZE-1);
unsigned length;
unsigned blocksize;
struct inode *inode = mapping->host;
blocksize = inode->i_sb->s_blocksize;
length = blocksize - (offset & (blocksize - 1));
return ext4_block_zero_page_range(handle, mapping, from, length);
}
/*
* ext4_block_zero_page_range() zeros out a mapping of length 'length'
* starting from file offset 'from'. The range to be zero'd must
* be contained with in one block. If the specified range exceeds
* the end of the block it will be shortened to end of the block
* that cooresponds to 'from'
*/
int ext4_block_zero_page_range(handle_t *handle,
struct address_space *mapping, loff_t from, loff_t length)
{
ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
unsigned offset = from & (PAGE_CACHE_SIZE-1);
unsigned blocksize, max, pos;
ext4_lblk_t iblock;
struct inode *inode = mapping->host;
struct buffer_head *bh;
struct page *page;
int err = 0;
page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
mapping_gfp_mask(mapping) & ~__GFP_FS);
if (!page)
return -ENOMEM;
blocksize = inode->i_sb->s_blocksize;
max = blocksize - (offset & (blocksize - 1));
/*
* correct length if it does not fall between
* 'from' and the end of the block
*/
if (length > max || length < 0)
length = max;
iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
if (!page_has_buffers(page))
create_empty_buffers(page, blocksize, 0);
/* Find the buffer that contains "offset" */
bh = page_buffers(page);
pos = blocksize;
while (offset >= pos) {
bh = bh->b_this_page;
iblock++;
pos += blocksize;
}
err = 0;
if (buffer_freed(bh)) {
BUFFER_TRACE(bh, "freed: skip");
goto unlock;
}
if (!buffer_mapped(bh)) {
BUFFER_TRACE(bh, "unmapped");
ext4_get_block(inode, iblock, bh, 0);
/* unmapped? It's a hole - nothing to do */
if (!buffer_mapped(bh)) {
BUFFER_TRACE(bh, "still unmapped");
goto unlock;
}
}
/* Ok, it's mapped. Make sure it's up-to-date */
if (PageUptodate(page))
set_buffer_uptodate(bh);
if (!buffer_uptodate(bh)) {
err = -EIO;
ll_rw_block(READ, 1, &bh);
wait_on_buffer(bh);
/* Uhhuh. Read error. Complain and punt. */
if (!buffer_uptodate(bh))
goto unlock;
}
if (ext4_should_journal_data(inode)) {
BUFFER_TRACE(bh, "get write access");
err = ext4_journal_get_write_access(handle, bh);
if (err)
goto unlock;
}
zero_user(page, offset, length);
BUFFER_TRACE(bh, "zeroed end of block");
err = 0;
if (ext4_should_journal_data(inode)) {
err = ext4_handle_dirty_metadata(handle, inode, bh);
} else
mark_buffer_dirty(bh);
unlock:
unlock_page(page);
page_cache_release(page);
return err;
}
int ext4_can_truncate(struct inode *inode) int ext4_can_truncate(struct inode *inode)
{ {
if (S_ISREG(inode->i_mode)) if (S_ISREG(inode->i_mode))
...@@ -4646,9 +4529,19 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) ...@@ -4646,9 +4529,19 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
return 0; return 0;
if (is_journal_aborted(journal)) if (is_journal_aborted(journal))
return -EROFS; return -EROFS;
/* We have to allocate physical blocks for delalloc blocks
* before flushing journal. otherwise delalloc blocks can not
* be allocated any more. even more truncate on delalloc blocks
* could trigger BUG by flushing delalloc blocks in journal.
* There is no delalloc block in non-journal data mode.
*/
if (val && test_opt(inode->i_sb, DELALLOC)) {
err = ext4_alloc_da_blocks(inode);
if (err < 0)
return err;
}
jbd2_journal_lock_updates(journal); jbd2_journal_lock_updates(journal);
jbd2_journal_flush(journal);
/* /*
* OK, there are no updates running now, and all cached data is * OK, there are no updates running now, and all cached data is
...@@ -4660,8 +4553,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) ...@@ -4660,8 +4553,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
if (val) if (val)
ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
else else {
jbd2_journal_flush(journal);
ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
}
ext4_set_aops(inode); ext4_set_aops(inode);
jbd2_journal_unlock_updates(journal); jbd2_journal_unlock_updates(journal);
......
...@@ -18,6 +18,8 @@ ...@@ -18,6 +18,8 @@
#include "ext4_jbd2.h" #include "ext4_jbd2.h"
#include "ext4.h" #include "ext4.h"
#define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1)
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{ {
struct inode *inode = filp->f_dentry->d_inode; struct inode *inode = filp->f_dentry->d_inode;
...@@ -186,19 +188,22 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ...@@ -186,19 +188,22 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (err) if (err)
return err; return err;
if (get_user(n_blocks_count, (__u32 __user *)arg)) if (get_user(n_blocks_count, (__u32 __user *)arg)) {
return -EFAULT; err = -EFAULT;
goto group_extend_out;
}
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
ext4_msg(sb, KERN_ERR, ext4_msg(sb, KERN_ERR,
"Online resizing not supported with bigalloc"); "Online resizing not supported with bigalloc");
return -EOPNOTSUPP; err = -EOPNOTSUPP;
goto group_extend_out;
} }
err = mnt_want_write_file(filp); err = mnt_want_write_file(filp);
if (err) if (err)
return err; goto group_extend_out;
err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
if (EXT4_SB(sb)->s_journal) { if (EXT4_SB(sb)->s_journal) {
...@@ -209,8 +214,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ...@@ -209,8 +214,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (err == 0) if (err == 0)
err = err2; err = err2;
mnt_drop_write_file(filp); mnt_drop_write_file(filp);
group_extend_out:
ext4_resize_end(sb); ext4_resize_end(sb);
return err; return err;
} }
...@@ -251,8 +256,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ...@@ -251,8 +256,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
err = ext4_move_extents(filp, donor_filp, me.orig_start, err = ext4_move_extents(filp, donor_filp, me.orig_start,
me.donor_start, me.len, &me.moved_len); me.donor_start, me.len, &me.moved_len);
mnt_drop_write_file(filp); mnt_drop_write_file(filp);
if (me.moved_len > 0) mnt_drop_write(filp->f_path.mnt);
file_remove_suid(donor_filp);
if (copy_to_user((struct move_extent __user *)arg, if (copy_to_user((struct move_extent __user *)arg,
&me, sizeof(me))) &me, sizeof(me)))
...@@ -271,19 +275,22 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ...@@ -271,19 +275,22 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return err; return err;
if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg,
sizeof(input))) sizeof(input))) {
return -EFAULT; err = -EFAULT;
goto group_add_out;
}
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
ext4_msg(sb, KERN_ERR, ext4_msg(sb, KERN_ERR,
"Online resizing not supported with bigalloc"); "Online resizing not supported with bigalloc");
return -EOPNOTSUPP; err = -EOPNOTSUPP;
goto group_add_out;
} }
err = mnt_want_write_file(filp); err = mnt_want_write_file(filp);
if (err) if (err)
return err; goto group_add_out;
err = ext4_group_add(sb, &input); err = ext4_group_add(sb, &input);
if (EXT4_SB(sb)->s_journal) { if (EXT4_SB(sb)->s_journal) {
...@@ -294,8 +301,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ...@@ -294,8 +301,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (err == 0) if (err == 0)
err = err2; err = err2;
mnt_drop_write_file(filp); mnt_drop_write_file(filp);
group_add_out:
ext4_resize_end(sb); ext4_resize_end(sb);
return err; return err;
} }
...@@ -335,6 +342,60 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ...@@ -335,6 +342,60 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return err; return err;
} }
case EXT4_IOC_RESIZE_FS: {
ext4_fsblk_t n_blocks_count;
struct super_block *sb = inode->i_sb;
int err = 0, err2 = 0;
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
ext4_msg(sb, KERN_ERR,
"Online resizing not (yet) supported with bigalloc");
return -EOPNOTSUPP;
}
if (EXT4_HAS_INCOMPAT_FEATURE(sb,
EXT4_FEATURE_INCOMPAT_META_BG)) {
ext4_msg(sb, KERN_ERR,
"Online resizing not (yet) supported with meta_bg");
return -EOPNOTSUPP;
}
if (copy_from_user(&n_blocks_count, (__u64 __user *)arg,
sizeof(__u64))) {
return -EFAULT;
}
if (n_blocks_count > MAX_32_NUM &&
!EXT4_HAS_INCOMPAT_FEATURE(sb,
EXT4_FEATURE_INCOMPAT_64BIT)) {
ext4_msg(sb, KERN_ERR,
"File system only supports 32-bit block numbers");
return -EOPNOTSUPP;
}
err = ext4_resize_begin(sb);
if (err)
return err;
err = mnt_want_write(filp->f_path.mnt);
if (err)
goto resizefs_out;
err = ext4_resize_fs(sb, n_blocks_count);
if (EXT4_SB(sb)->s_journal) {
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
}
if (err == 0)
err = err2;
mnt_drop_write(filp->f_path.mnt);
resizefs_out:
ext4_resize_end(sb);
return err;
}
case FITRIM: case FITRIM:
{ {
struct request_queue *q = bdev_get_queue(sb->s_bdev); struct request_queue *q = bdev_get_queue(sb->s_bdev);
...@@ -433,6 +494,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ...@@ -433,6 +494,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
} }
case EXT4_IOC_MOVE_EXT: case EXT4_IOC_MOVE_EXT:
case FITRIM: case FITRIM:
case EXT4_IOC_RESIZE_FS:
break; break;
default: default:
return -ENOIOCTLCMD; return -ENOIOCTLCMD;
......
...@@ -3671,7 +3671,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, ...@@ -3671,7 +3671,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
ext4_group_t group; ext4_group_t group;
ext4_grpblk_t bit; ext4_grpblk_t bit;
trace_ext4_mb_release_group_pa(pa); trace_ext4_mb_release_group_pa(sb, pa);
BUG_ON(pa->pa_deleted == 0); BUG_ON(pa->pa_deleted == 0);
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
BUG_ON(group != e4b->bd_group && pa->pa_len != 0); BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
......
...@@ -134,6 +134,172 @@ static int verify_group_input(struct super_block *sb, ...@@ -134,6 +134,172 @@ static int verify_group_input(struct super_block *sb,
return err; return err;
} }
/*
* ext4_new_flex_group_data is used by 64bit-resize interface to add a flex
* group each time.
*/
struct ext4_new_flex_group_data {
struct ext4_new_group_data *groups; /* new_group_data for groups
in the flex group */
__u16 *bg_flags; /* block group flags of groups
in @groups */
ext4_group_t count; /* number of groups in @groups
*/
};
/*
* alloc_flex_gd() allocates a ext4_new_flex_group_data with size of
* @flexbg_size.
*
* Returns NULL on failure otherwise address of the allocated structure.
*/
static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
{
struct ext4_new_flex_group_data *flex_gd;
flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS);
if (flex_gd == NULL)
goto out3;
flex_gd->count = flexbg_size;
flex_gd->groups = kmalloc(sizeof(struct ext4_new_group_data) *
flexbg_size, GFP_NOFS);
if (flex_gd->groups == NULL)
goto out2;
flex_gd->bg_flags = kmalloc(flexbg_size * sizeof(__u16), GFP_NOFS);
if (flex_gd->bg_flags == NULL)
goto out1;
return flex_gd;
out1:
kfree(flex_gd->groups);
out2:
kfree(flex_gd);
out3:
return NULL;
}
static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd)
{
kfree(flex_gd->bg_flags);
kfree(flex_gd->groups);
kfree(flex_gd);
}
/*
* ext4_alloc_group_tables() allocates block bitmaps, inode bitmaps
* and inode tables for a flex group.
*
* This function is used by 64bit-resize. Note that this function allocates
* group tables from the 1st group of groups contained by @flexgd, which may
* be a partial of a flex group.
*
* @sb: super block of fs to which the groups belongs
*/
static void ext4_alloc_group_tables(struct super_block *sb,
struct ext4_new_flex_group_data *flex_gd,
int flexbg_size)
{
struct ext4_new_group_data *group_data = flex_gd->groups;
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
ext4_fsblk_t start_blk;
ext4_fsblk_t last_blk;
ext4_group_t src_group;
ext4_group_t bb_index = 0;
ext4_group_t ib_index = 0;
ext4_group_t it_index = 0;
ext4_group_t group;
ext4_group_t last_group;
unsigned overhead;
BUG_ON(flex_gd->count == 0 || group_data == NULL);
src_group = group_data[0].group;
last_group = src_group + flex_gd->count - 1;
BUG_ON((flexbg_size > 1) && ((src_group & ~(flexbg_size - 1)) !=
(last_group & ~(flexbg_size - 1))));
next_group:
group = group_data[0].group;
start_blk = ext4_group_first_block_no(sb, src_group);
last_blk = start_blk + group_data[src_group - group].blocks_count;
overhead = ext4_bg_has_super(sb, src_group) ?
(1 + ext4_bg_num_gdb(sb, src_group) +
le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
start_blk += overhead;
BUG_ON(src_group >= group_data[0].group + flex_gd->count);
/* We collect contiguous blocks as much as possible. */
src_group++;
for (; src_group <= last_group; src_group++)
if (!ext4_bg_has_super(sb, src_group))
last_blk += group_data[src_group - group].blocks_count;
else
break;
/* Allocate block bitmaps */
for (; bb_index < flex_gd->count; bb_index++) {
if (start_blk >= last_blk)
goto next_group;
group_data[bb_index].block_bitmap = start_blk++;
ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL);
group -= group_data[0].group;
group_data[group].free_blocks_count--;
if (flexbg_size > 1)
flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
}
/* Allocate inode bitmaps */
for (; ib_index < flex_gd->count; ib_index++) {
if (start_blk >= last_blk)
goto next_group;
group_data[ib_index].inode_bitmap = start_blk++;
ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL);
group -= group_data[0].group;
group_data[group].free_blocks_count--;
if (flexbg_size > 1)
flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
}
/* Allocate inode tables */
for (; it_index < flex_gd->count; it_index++) {
if (start_blk + EXT4_SB(sb)->s_itb_per_group > last_blk)
goto next_group;
group_data[it_index].inode_table = start_blk;
ext4_get_group_no_and_offset(sb, start_blk, &group, NULL);
group -= group_data[0].group;
group_data[group].free_blocks_count -=
EXT4_SB(sb)->s_itb_per_group;
if (flexbg_size > 1)
flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
start_blk += EXT4_SB(sb)->s_itb_per_group;
}
if (test_opt(sb, DEBUG)) {
int i;
group = group_data[0].group;
printk(KERN_DEBUG "EXT4-fs: adding a flex group with "
"%d groups, flexbg size is %d:\n", flex_gd->count,
flexbg_size);
for (i = 0; i < flex_gd->count; i++) {
printk(KERN_DEBUG "adding %s group %u: %u "
"blocks (%d free)\n",
ext4_bg_has_super(sb, group + i) ? "normal" :
"no-super", group + i,
group_data[i].blocks_count,
group_data[i].free_blocks_count);
}
}
}
static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
ext4_fsblk_t blk) ext4_fsblk_t blk)
{ {
...@@ -179,131 +345,250 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh) ...@@ -179,131 +345,250 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh)
} }
/* /*
* Set up the block and inode bitmaps, and the inode table for the new group. * set_flexbg_block_bitmap() mark @count blocks starting from @block used.
*
* Helper function for ext4_setup_new_group_blocks() which set .
*
* @sb: super block
* @handle: journal handle
* @flex_gd: flex group data
*/
static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
struct ext4_new_flex_group_data *flex_gd,
ext4_fsblk_t block, ext4_group_t count)
{
ext4_group_t count2;
ext4_debug("mark blocks [%llu/%u] used\n", block, count);
for (count2 = count; count > 0; count -= count2, block += count2) {
ext4_fsblk_t start;
struct buffer_head *bh;
ext4_group_t group;
int err;
ext4_get_group_no_and_offset(sb, block, &group, NULL);
start = ext4_group_first_block_no(sb, group);
group -= flex_gd->groups[0].group;
count2 = sb->s_blocksize * 8 - (block - start);
if (count2 > count)
count2 = count;
if (flex_gd->bg_flags[group] & EXT4_BG_BLOCK_UNINIT) {
BUG_ON(flex_gd->count > 1);
continue;
}
err = extend_or_restart_transaction(handle, 1);
if (err)
return err;
bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap);
if (!bh)
return -EIO;
err = ext4_journal_get_write_access(handle, bh);
if (err)
return err;
ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", block,
block - start, count2);
ext4_set_bits(bh->b_data, block - start, count2);
err = ext4_handle_dirty_metadata(handle, NULL, bh);
if (unlikely(err))
return err;
brelse(bh);
}
return 0;
}
/*
* Set up the block and inode bitmaps, and the inode table for the new groups.
* This doesn't need to be part of the main transaction, since we are only * This doesn't need to be part of the main transaction, since we are only
* changing blocks outside the actual filesystem. We still do journaling to * changing blocks outside the actual filesystem. We still do journaling to
* ensure the recovery is correct in case of a failure just after resize. * ensure the recovery is correct in case of a failure just after resize.
* If any part of this fails, we simply abort the resize. * If any part of this fails, we simply abort the resize.
*
* setup_new_flex_group_blocks handles a flex group as follow:
* 1. copy super block and GDT, and initialize group tables if necessary.
* In this step, we only set bits in blocks bitmaps for blocks taken by
* super block and GDT.
* 2. allocate group tables in block bitmaps, that is, set bits in block
* bitmap for blocks taken by group tables.
*/ */
static int setup_new_group_blocks(struct super_block *sb, static int setup_new_flex_group_blocks(struct super_block *sb,
struct ext4_new_group_data *input) struct ext4_new_flex_group_data *flex_gd)
{ {
int group_table_count[] = {1, 1, EXT4_SB(sb)->s_itb_per_group};
ext4_fsblk_t start;
ext4_fsblk_t block;
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t start = ext4_group_first_block_no(sb, input->group); struct ext4_super_block *es = sbi->s_es;
int reserved_gdb = ext4_bg_has_super(sb, input->group) ? struct ext4_new_group_data *group_data = flex_gd->groups;
le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0; __u16 *bg_flags = flex_gd->bg_flags;
unsigned long gdblocks = ext4_bg_num_gdb(sb, input->group);
struct buffer_head *bh;
handle_t *handle; handle_t *handle;
ext4_fsblk_t block; ext4_group_t group, count;
ext4_grpblk_t bit; struct buffer_head *bh = NULL;
int i; int reserved_gdb, i, j, err = 0, err2;
int err = 0, err2;
BUG_ON(!flex_gd->count || !group_data ||
group_data[0].group != sbi->s_groups_count);
reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks);
/* This transaction may be extended/restarted along the way */ /* This transaction may be extended/restarted along the way */
handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
if (IS_ERR(handle)) if (IS_ERR(handle))
return PTR_ERR(handle); return PTR_ERR(handle);
BUG_ON(input->group != sbi->s_groups_count); group = group_data[0].group;
for (i = 0; i < flex_gd->count; i++, group++) {
unsigned long gdblocks;
/* Copy all of the GDT blocks into the backup in this group */ gdblocks = ext4_bg_num_gdb(sb, group);
for (i = 0, bit = 1, block = start + 1; start = ext4_group_first_block_no(sb, group);
i < gdblocks; i++, block++, bit++) {
struct buffer_head *gdb;
ext4_debug("update backup group %#04llx (+%d)\n", block, bit); /* Copy all of the GDT blocks into the backup in this group */
err = extend_or_restart_transaction(handle, 1); for (j = 0, block = start + 1; j < gdblocks; j++, block++) {
if (err) struct buffer_head *gdb;
goto exit_journal;
gdb = sb_getblk(sb, block); ext4_debug("update backup group %#04llx\n", block);
if (!gdb) { err = extend_or_restart_transaction(handle, 1);
err = -EIO; if (err)
goto exit_journal; goto out;
}
if ((err = ext4_journal_get_write_access(handle, gdb))) { gdb = sb_getblk(sb, block);
if (!gdb) {
err = -EIO;
goto out;
}
err = ext4_journal_get_write_access(handle, gdb);
if (err) {
brelse(gdb);
goto out;
}
memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data,
gdb->b_size);
set_buffer_uptodate(gdb);
err = ext4_handle_dirty_metadata(handle, NULL, gdb);
if (unlikely(err)) {
brelse(gdb);
goto out;
}
brelse(gdb); brelse(gdb);
goto exit_journal;
} }
memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
set_buffer_uptodate(gdb); /* Zero out all of the reserved backup group descriptor
err = ext4_handle_dirty_metadata(handle, NULL, gdb); * table blocks
if (unlikely(err)) { */
brelse(gdb); if (ext4_bg_has_super(sb, group)) {
goto exit_journal; err = sb_issue_zeroout(sb, gdblocks + start + 1,
reserved_gdb, GFP_NOFS);
if (err)
goto out;
} }
brelse(gdb);
}
/* Zero out all of the reserved backup group descriptor table blocks */ /* Initialize group tables of the grop @group */
ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED))
block, sbi->s_itb_per_group); goto handle_bb;
err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
GFP_NOFS);
if (err)
goto exit_journal;
err = extend_or_restart_transaction(handle, 2); /* Zero out all of the inode table blocks */
if (err) block = group_data[i].inode_table;
goto exit_journal; ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
block, sbi->s_itb_per_group);
err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group,
GFP_NOFS);
if (err)
goto out;
bh = bclean(handle, sb, input->block_bitmap); handle_bb:
if (IS_ERR(bh)) { if (bg_flags[i] & EXT4_BG_BLOCK_UNINIT)
err = PTR_ERR(bh); goto handle_ib;
goto exit_journal;
}
if (ext4_bg_has_super(sb, input->group)) { /* Initialize block bitmap of the @group */
ext4_debug("mark backup group tables %#04llx (+0)\n", start); block = group_data[i].block_bitmap;
ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + 1); err = extend_or_restart_transaction(handle, 1);
} if (err)
goto out;
ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, bh = bclean(handle, sb, block);
input->block_bitmap - start); if (IS_ERR(bh)) {
ext4_set_bit(input->block_bitmap - start, bh->b_data); err = PTR_ERR(bh);
ext4_debug("mark inode bitmap %#04llx (+%llu)\n", input->inode_bitmap, goto out;
input->inode_bitmap - start); }
ext4_set_bit(input->inode_bitmap - start, bh->b_data); if (ext4_bg_has_super(sb, group)) {
ext4_debug("mark backup superblock %#04llx (+0)\n",
/* Zero out all of the inode table blocks */ start);
block = input->inode_table; ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb +
ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", 1);
block, sbi->s_itb_per_group); }
err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); ext4_mark_bitmap_end(group_data[i].blocks_count,
if (err) sb->s_blocksize * 8, bh->b_data);
goto exit_bh; err = ext4_handle_dirty_metadata(handle, NULL, bh);
ext4_set_bits(bh->b_data, input->inode_table - start, if (err)
sbi->s_itb_per_group); goto out;
brelse(bh);
handle_ib:
if (bg_flags[i] & EXT4_BG_INODE_UNINIT)
continue;
ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, /* Initialize inode bitmap of the @group */
bh->b_data); block = group_data[i].inode_bitmap;
err = ext4_handle_dirty_metadata(handle, NULL, bh); err = extend_or_restart_transaction(handle, 1);
if (unlikely(err)) { if (err)
ext4_std_error(sb, err); goto out;
goto exit_bh; /* Mark unused entries in inode bitmap used */
bh = bclean(handle, sb, block);
if (IS_ERR(bh)) {
err = PTR_ERR(bh);
goto out;
}
ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb),
sb->s_blocksize * 8, bh->b_data);
err = ext4_handle_dirty_metadata(handle, NULL, bh);
if (err)
goto out;
brelse(bh);
} }
brelse(bh); bh = NULL;
/* Mark unused entries in inode bitmap used */
ext4_debug("clear inode bitmap %#04llx (+%llu)\n", /* Mark group tables in block bitmap */
input->inode_bitmap, input->inode_bitmap - start); for (j = 0; j < GROUP_TABLE_COUNT; j++) {
if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) { count = group_table_count[j];
err = PTR_ERR(bh); start = (&group_data[0].block_bitmap)[j];
goto exit_journal; block = start;
for (i = 1; i < flex_gd->count; i++) {
block += group_table_count[j];
if (block == (&group_data[i].block_bitmap)[j]) {
count += group_table_count[j];
continue;
}
err = set_flexbg_block_bitmap(sb, handle,
flex_gd, start, count);
if (err)
goto out;
count = group_table_count[j];
start = group_data[i].block_bitmap;
block = start;
}
if (count) {
err = set_flexbg_block_bitmap(sb, handle,
flex_gd, start, count);
if (err)
goto out;
}
} }
ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, out:
bh->b_data);
err = ext4_handle_dirty_metadata(handle, NULL, bh);
if (unlikely(err))
ext4_std_error(sb, err);
exit_bh:
brelse(bh); brelse(bh);
err2 = ext4_journal_stop(handle);
exit_journal: if (err2 && !err)
if ((err2 = ext4_journal_stop(handle)) && !err)
err = err2; err = err2;
return err; return err;
...@@ -351,10 +636,10 @@ static unsigned ext4_list_backups(struct super_block *sb, unsigned *three, ...@@ -351,10 +636,10 @@ static unsigned ext4_list_backups(struct super_block *sb, unsigned *three,
* groups in current filesystem that have BACKUPS, or -ve error code. * groups in current filesystem that have BACKUPS, or -ve error code.
*/ */
static int verify_reserved_gdb(struct super_block *sb, static int verify_reserved_gdb(struct super_block *sb,
ext4_group_t end,
struct buffer_head *primary) struct buffer_head *primary)
{ {
const ext4_fsblk_t blk = primary->b_blocknr; const ext4_fsblk_t blk = primary->b_blocknr;
const ext4_group_t end = EXT4_SB(sb)->s_groups_count;
unsigned three = 1; unsigned three = 1;
unsigned five = 5; unsigned five = 5;
unsigned seven = 7; unsigned seven = 7;
...@@ -429,7 +714,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, ...@@ -429,7 +714,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
if (!gdb_bh) if (!gdb_bh)
return -EIO; return -EIO;
gdbackups = verify_reserved_gdb(sb, gdb_bh); gdbackups = verify_reserved_gdb(sb, group, gdb_bh);
if (gdbackups < 0) { if (gdbackups < 0) {
err = gdbackups; err = gdbackups;
goto exit_bh; goto exit_bh;
...@@ -592,7 +877,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, ...@@ -592,7 +877,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
err = -EIO; err = -EIO;
goto exit_bh; goto exit_bh;
} }
if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) { gdbackups = verify_reserved_gdb(sb, group, primary[res]);
if (gdbackups < 0) {
brelse(primary[res]); brelse(primary[res]);
err = gdbackups; err = gdbackups;
goto exit_bh; goto exit_bh;
...@@ -735,6 +1021,348 @@ static void update_backups(struct super_block *sb, ...@@ -735,6 +1021,348 @@ static void update_backups(struct super_block *sb,
} }
} }
/*
* ext4_add_new_descs() adds @count group descriptor of groups
* starting at @group
*
* @handle: journal handle
* @sb: super block
* @group: the group no. of the first group desc to be added
* @resize_inode: the resize inode
* @count: number of group descriptors to be added
*/
static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
ext4_group_t group, struct inode *resize_inode,
ext4_group_t count)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
struct buffer_head *gdb_bh;
int i, gdb_off, gdb_num, err = 0;
for (i = 0; i < count; i++, group++) {
int reserved_gdb = ext4_bg_has_super(sb, group) ?
le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
gdb_off = group % EXT4_DESC_PER_BLOCK(sb);
gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
/*
* We will only either add reserved group blocks to a backup group
* or remove reserved blocks for the first group in a new group block.
* Doing both would be mean more complex code, and sane people don't
* use non-sparse filesystems anymore. This is already checked above.
*/
if (gdb_off) {
gdb_bh = sbi->s_group_desc[gdb_num];
err = ext4_journal_get_write_access(handle, gdb_bh);
if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group))
err = reserve_backup_gdb(handle, resize_inode, group);
} else
err = add_new_gdb(handle, resize_inode, group);
if (err)
break;
}
return err;
}
/*
* ext4_setup_new_descs() will set up the group descriptor descriptors of a flex bg
*/
static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
struct ext4_new_flex_group_data *flex_gd)
{
struct ext4_new_group_data *group_data = flex_gd->groups;
struct ext4_group_desc *gdp;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct buffer_head *gdb_bh;
ext4_group_t group;
__u16 *bg_flags = flex_gd->bg_flags;
int i, gdb_off, gdb_num, err = 0;
for (i = 0; i < flex_gd->count; i++, group_data++, bg_flags++) {
group = group_data->group;
gdb_off = group % EXT4_DESC_PER_BLOCK(sb);
gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
/*
* get_write_access() has been called on gdb_bh by ext4_add_new_desc().
*/
gdb_bh = sbi->s_group_desc[gdb_num];
/* Update group descriptor block for new group */
gdp = (struct ext4_group_desc *)((char *)gdb_bh->b_data +
gdb_off * EXT4_DESC_SIZE(sb));
memset(gdp, 0, EXT4_DESC_SIZE(sb));
ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap);
ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap);
ext4_inode_table_set(sb, gdp, group_data->inode_table);
ext4_free_group_clusters_set(sb, gdp,
EXT4_B2C(sbi, group_data->free_blocks_count));
ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
gdp->bg_flags = cpu_to_le16(*bg_flags);
gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
if (unlikely(err)) {
ext4_std_error(sb, err);
break;
}
/*
* We can allocate memory for mb_alloc based on the new group
* descriptor
*/
err = ext4_mb_add_groupinfo(sb, group, gdp);
if (err)
break;
}
return err;
}
/*
* ext4_update_super() updates the super block so that the newly added
* groups can be seen by the filesystem.
*
* @sb: super block
* @flex_gd: new added groups
*/
static void ext4_update_super(struct super_block *sb,
struct ext4_new_flex_group_data *flex_gd)
{
ext4_fsblk_t blocks_count = 0;
ext4_fsblk_t free_blocks = 0;
ext4_fsblk_t reserved_blocks = 0;
struct ext4_new_group_data *group_data = flex_gd->groups;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
int i;
BUG_ON(flex_gd->count == 0 || group_data == NULL);
/*
* Make the new blocks and inodes valid next. We do this before
* increasing the group count so that once the group is enabled,
* all of its blocks and inodes are already valid.
*
* We always allocate group-by-group, then block-by-block or
* inode-by-inode within a group, so enabling these
* blocks/inodes before the group is live won't actually let us
* allocate the new space yet.
*/
for (i = 0; i < flex_gd->count; i++) {
blocks_count += group_data[i].blocks_count;
free_blocks += group_data[i].free_blocks_count;
}
reserved_blocks = ext4_r_blocks_count(es) * 100;
do_div(reserved_blocks, ext4_blocks_count(es));
reserved_blocks *= blocks_count;
do_div(reserved_blocks, 100);
ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count);
le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) *
flex_gd->count);
/*
* We need to protect s_groups_count against other CPUs seeing
* inconsistent state in the superblock.
*
* The precise rules we use are:
*
* * Writers must perform a smp_wmb() after updating all
* dependent data and before modifying the groups count
*
* * Readers must perform an smp_rmb() after reading the groups
* count and before reading any dependent data.
*
* NB. These rules can be relaxed when checking the group count
* while freeing data, as we can only allocate from a block
* group after serialising against the group count, and we can
* only then free after serialising in turn against that
* allocation.
*/
smp_wmb();
/* Update the global fs size fields */
sbi->s_groups_count += flex_gd->count;
/* Update the reserved block counts only once the new group is
* active. */
ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) +
reserved_blocks);
/* Update the free space counts */
percpu_counter_add(&sbi->s_freeclusters_counter,
EXT4_B2C(sbi, free_blocks));
percpu_counter_add(&sbi->s_freeinodes_counter,
EXT4_INODES_PER_GROUP(sb) * flex_gd->count);
if (EXT4_HAS_INCOMPAT_FEATURE(sb,
EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
sbi->s_log_groups_per_flex) {
ext4_group_t flex_group;
flex_group = ext4_flex_group(sbi, group_data[0].group);
atomic_add(EXT4_B2C(sbi, free_blocks),
&sbi->s_flex_groups[flex_group].free_clusters);
atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count,
&sbi->s_flex_groups[flex_group].free_inodes);
}
if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: added group %u:"
"%llu blocks(%llu free %llu reserved)\n", flex_gd->count,
blocks_count, free_blocks, reserved_blocks);
}
/* Add a flex group to an fs. Ensure we handle all possible error conditions
* _before_ we start modifying the filesystem, because we cannot abort the
* transaction and not have it write the data to disk.
*/
static int ext4_flex_group_add(struct super_block *sb,
struct inode *resize_inode,
struct ext4_new_flex_group_data *flex_gd)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
ext4_fsblk_t o_blocks_count;
ext4_grpblk_t last;
ext4_group_t group;
handle_t *handle;
unsigned reserved_gdb;
int err = 0, err2 = 0, credit;
BUG_ON(!flex_gd->count || !flex_gd->groups || !flex_gd->bg_flags);
reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks);
o_blocks_count = ext4_blocks_count(es);
ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
BUG_ON(last);
err = setup_new_flex_group_blocks(sb, flex_gd);
if (err)
goto exit;
/*
* We will always be modifying at least the superblock and GDT
* block. If we are adding a group past the last current GDT block,
* we will also modify the inode and the dindirect block. If we
* are adding a group with superblock/GDT backups we will also
* modify each of the reserved GDT dindirect blocks.
*/
credit = flex_gd->count * 4 + reserved_gdb;
handle = ext4_journal_start_sb(sb, credit);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
goto exit;
}
err = ext4_journal_get_write_access(handle, sbi->s_sbh);
if (err)
goto exit_journal;
group = flex_gd->groups[0].group;
BUG_ON(group != EXT4_SB(sb)->s_groups_count);
err = ext4_add_new_descs(handle, sb, group,
resize_inode, flex_gd->count);
if (err)
goto exit_journal;
err = ext4_setup_new_descs(handle, sb, flex_gd);
if (err)
goto exit_journal;
ext4_update_super(sb, flex_gd);
err = ext4_handle_dirty_super(handle, sb);
exit_journal:
err2 = ext4_journal_stop(handle);
if (!err)
err = err2;
if (!err) {
int i;
update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
sizeof(struct ext4_super_block));
for (i = 0; i < flex_gd->count; i++, group++) {
struct buffer_head *gdb_bh;
int gdb_num;
gdb_num = group / EXT4_BLOCKS_PER_GROUP(sb);
gdb_bh = sbi->s_group_desc[gdb_num];
update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
gdb_bh->b_size);
}
}
exit:
return err;
}
static int ext4_setup_next_flex_gd(struct super_block *sb,
struct ext4_new_flex_group_data *flex_gd,
ext4_fsblk_t n_blocks_count,
unsigned long flexbg_size)
{
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
struct ext4_new_group_data *group_data = flex_gd->groups;
ext4_fsblk_t o_blocks_count;
ext4_group_t n_group;
ext4_group_t group;
ext4_group_t last_group;
ext4_grpblk_t last;
ext4_grpblk_t blocks_per_group;
unsigned long i;
blocks_per_group = EXT4_BLOCKS_PER_GROUP(sb);
o_blocks_count = ext4_blocks_count(es);
if (o_blocks_count == n_blocks_count)
return 0;
ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
BUG_ON(last);
ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last);
last_group = group | (flexbg_size - 1);
if (last_group > n_group)
last_group = n_group;
flex_gd->count = last_group - group + 1;
for (i = 0; i < flex_gd->count; i++) {
int overhead;
group_data[i].group = group + i;
group_data[i].blocks_count = blocks_per_group;
overhead = ext4_bg_has_super(sb, group + i) ?
(1 + ext4_bg_num_gdb(sb, group + i) +
le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
group_data[i].free_blocks_count = blocks_per_group - overhead;
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT |
EXT4_BG_INODE_UNINIT;
else
flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED;
}
if (last_group == n_group &&
EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
/* We need to initialize block bitmap of last group. */
flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT;
if ((last_group == n_group) && (last != blocks_per_group - 1)) {
group_data[i - 1].blocks_count = last + 1;
group_data[i - 1].free_blocks_count -= blocks_per_group-
last - 1;
}
return 1;
}
/* Add group descriptor data to an existing or new group descriptor block. /* Add group descriptor data to an existing or new group descriptor block.
* Ensure we handle all possible error conditions _before_ we start modifying * Ensure we handle all possible error conditions _before_ we start modifying
* the filesystem, because we cannot abort the transaction and not have it * the filesystem, because we cannot abort the transaction and not have it
...@@ -750,16 +1378,15 @@ static void update_backups(struct super_block *sb, ...@@ -750,16 +1378,15 @@ static void update_backups(struct super_block *sb,
*/ */
int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
{ {
struct ext4_new_flex_group_data flex_gd;
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es; struct ext4_super_block *es = sbi->s_es;
int reserved_gdb = ext4_bg_has_super(sb, input->group) ? int reserved_gdb = ext4_bg_has_super(sb, input->group) ?
le16_to_cpu(es->s_reserved_gdt_blocks) : 0; le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
struct buffer_head *primary = NULL;
struct ext4_group_desc *gdp;
struct inode *inode = NULL; struct inode *inode = NULL;
handle_t *handle;
int gdb_off, gdb_num; int gdb_off, gdb_num;
int err, err2; int err;
__u16 bg_flags = 0;
gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb);
gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb);
...@@ -798,175 +1425,69 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) ...@@ -798,175 +1425,69 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
} }
if ((err = verify_group_input(sb, input))) err = verify_group_input(sb, input);
goto exit_put; if (err)
goto out;
if ((err = setup_new_group_blocks(sb, input))) flex_gd.count = 1;
goto exit_put; flex_gd.groups = input;
flex_gd.bg_flags = &bg_flags;
err = ext4_flex_group_add(sb, inode, &flex_gd);
out:
iput(inode);
return err;
} /* ext4_group_add */
/* /*
* We will always be modifying at least the superblock and a GDT * extend a group without checking assuming that checking has been done.
* block. If we are adding a group past the last current GDT block, */
* we will also modify the inode and the dindirect block. If we static int ext4_group_extend_no_check(struct super_block *sb,
* are adding a group with superblock/GDT backups we will also ext4_fsblk_t o_blocks_count, ext4_grpblk_t add)
* modify each of the reserved GDT dindirect blocks. {
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
handle_t *handle;
int err = 0, err2;
/* We will update the superblock, one block bitmap, and
* one group descriptor via ext4_group_add_blocks().
*/ */
handle = ext4_journal_start_sb(sb, handle = ext4_journal_start_sb(sb, 3);
ext4_bg_has_super(sb, input->group) ?
3 + reserved_gdb : 4);
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
err = PTR_ERR(handle); err = PTR_ERR(handle);
goto exit_put; ext4_warning(sb, "error %d on journal start", err);
return err;
} }
if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
goto exit_journal; if (err) {
ext4_warning(sb, "error %d on journal write access", err);
/* goto errout;
* We will only either add reserved group blocks to a backup group
* or remove reserved blocks for the first group in a new group block.
* Doing both would be mean more complex code, and sane people don't
* use non-sparse filesystems anymore. This is already checked above.
*/
if (gdb_off) {
primary = sbi->s_group_desc[gdb_num];
if ((err = ext4_journal_get_write_access(handle, primary)))
goto exit_journal;
if (reserved_gdb && ext4_bg_num_gdb(sb, input->group)) {
err = reserve_backup_gdb(handle, inode, input->group);
if (err)
goto exit_journal;
}
} else {
/*
* Note that we can access new group descriptor block safely
* only if add_new_gdb() succeeds.
*/
err = add_new_gdb(handle, inode, input->group);
if (err)
goto exit_journal;
primary = sbi->s_group_desc[gdb_num];
} }
/* ext4_blocks_count_set(es, o_blocks_count + add);
* OK, now we've set up the new group. Time to make it active. ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
* o_blocks_count + add);
* so we have to be safe wrt. concurrent accesses the group /* We add the blocks to the bitmap and set the group need init bit */
* data. So we need to be careful to set all of the relevant err = ext4_group_add_blocks(handle, sb, o_blocks_count, add);
* group descriptor data etc. *before* we enable the group.
*
* The key field here is sbi->s_groups_count: as long as
* that retains its old value, nobody is going to access the new
* group.
*
* So first we update all the descriptor metadata for the new
* group; then we update the total disk blocks count; then we
* update the groups count to enable the group; then finally we
* update the free space counts so that the system can start
* using the new disk blocks.
*/
/* Update group descriptor block for new group */
gdp = (struct ext4_group_desc *)((char *)primary->b_data +
gdb_off * EXT4_DESC_SIZE(sb));
memset(gdp, 0, EXT4_DESC_SIZE(sb));
ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
ext4_free_group_clusters_set(sb, gdp, input->free_blocks_count);
ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED);
gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
/*
* We can allocate memory for mb_alloc based on the new group
* descriptor
*/
err = ext4_mb_add_groupinfo(sb, input->group, gdp);
if (err) if (err)
goto exit_journal; goto errout;
/*
* Make the new blocks and inodes valid next. We do this before
* increasing the group count so that once the group is enabled,
* all of its blocks and inodes are already valid.
*
* We always allocate group-by-group, then block-by-block or
* inode-by-inode within a group, so enabling these
* blocks/inodes before the group is live won't actually let us
* allocate the new space yet.
*/
ext4_blocks_count_set(es, ext4_blocks_count(es) +
input->blocks_count);
le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb));
/*
* We need to protect s_groups_count against other CPUs seeing
* inconsistent state in the superblock.
*
* The precise rules we use are:
*
* * Writers must perform a smp_wmb() after updating all dependent
* data and before modifying the groups count
*
* * Readers must perform an smp_rmb() after reading the groups count
* and before reading any dependent data.
*
* NB. These rules can be relaxed when checking the group count
* while freeing data, as we can only allocate from a block
* group after serialising against the group count, and we can
* only then free after serialising in turn against that
* allocation.
*/
smp_wmb();
/* Update the global fs size fields */
sbi->s_groups_count++;
err = ext4_handle_dirty_metadata(handle, NULL, primary);
if (unlikely(err)) {
ext4_std_error(sb, err);
goto exit_journal;
}
/* Update the reserved block counts only once the new group is
* active. */
ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) +
input->reserved_blocks);
/* Update the free space counts */
percpu_counter_add(&sbi->s_freeclusters_counter,
EXT4_B2C(sbi, input->free_blocks_count));
percpu_counter_add(&sbi->s_freeinodes_counter,
EXT4_INODES_PER_GROUP(sb));
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
sbi->s_log_groups_per_flex) {
ext4_group_t flex_group;
flex_group = ext4_flex_group(sbi, input->group);
atomic_add(EXT4_B2C(sbi, input->free_blocks_count),
&sbi->s_flex_groups[flex_group].free_clusters);
atomic_add(EXT4_INODES_PER_GROUP(sb),
&sbi->s_flex_groups[flex_group].free_inodes);
}
ext4_handle_dirty_super(handle, sb); ext4_handle_dirty_super(handle, sb);
ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
exit_journal: o_blocks_count + add);
if ((err2 = ext4_journal_stop(handle)) && !err) errout:
err2 = ext4_journal_stop(handle);
if (err2 && !err)
err = err2; err = err2;
if (!err && primary) {
update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, if (!err) {
if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: extended group to %llu "
"blocks\n", ext4_blocks_count(es));
update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es,
sizeof(struct ext4_super_block)); sizeof(struct ext4_super_block));
update_backups(sb, primary->b_blocknr, primary->b_data,
primary->b_size);
} }
exit_put:
iput(inode);
return err; return err;
} /* ext4_group_add */ }
/* /*
* Extend the filesystem to the new number of blocks specified. This entry * Extend the filesystem to the new number of blocks specified. This entry
...@@ -985,8 +1506,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ...@@ -985,8 +1506,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
ext4_grpblk_t last; ext4_grpblk_t last;
ext4_grpblk_t add; ext4_grpblk_t add;
struct buffer_head *bh; struct buffer_head *bh;
handle_t *handle; int err;
int err, err2;
ext4_group_t group; ext4_group_t group;
o_blocks_count = ext4_blocks_count(es); o_blocks_count = ext4_blocks_count(es);
...@@ -1042,42 +1562,119 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ...@@ -1042,42 +1562,119 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
} }
brelse(bh); brelse(bh);
/* We will update the superblock, one block bitmap, and err = ext4_group_extend_no_check(sb, o_blocks_count, add);
* one group descriptor via ext4_free_blocks(). return err;
*/ } /* ext4_group_extend */
handle = ext4_journal_start_sb(sb, 3);
if (IS_ERR(handle)) { /*
err = PTR_ERR(handle); * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count
ext4_warning(sb, "error %d on journal start", err); *
goto exit_put; * @sb: super block of the fs to be resized
* @n_blocks_count: the number of blocks resides in the resized fs
*/
int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
{
struct ext4_new_flex_group_data *flex_gd = NULL;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
struct buffer_head *bh;
struct inode *resize_inode;
ext4_fsblk_t o_blocks_count;
ext4_group_t o_group;
ext4_group_t n_group;
ext4_grpblk_t offset;
unsigned long n_desc_blocks;
unsigned long o_desc_blocks;
unsigned long desc_blocks;
int err = 0, flexbg_size = 1;
o_blocks_count = ext4_blocks_count(es);
if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: resizing filesystem from %llu "
"upto %llu blocks\n", o_blocks_count, n_blocks_count);
if (n_blocks_count < o_blocks_count) {
/* On-line shrinking not supported */
ext4_warning(sb, "can't shrink FS - resize aborted");
return -EINVAL;
} }
if ((err = ext4_journal_get_write_access(handle, if (n_blocks_count == o_blocks_count)
EXT4_SB(sb)->s_sbh))) { /* Nothing need to do */
ext4_warning(sb, "error %d on journal write access", err); return 0;
ext4_journal_stop(handle);
goto exit_put; ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset);
ext4_get_group_no_and_offset(sb, o_blocks_count, &o_group, &offset);
n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) /
EXT4_DESC_PER_BLOCK(sb);
o_desc_blocks = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
EXT4_DESC_PER_BLOCK(sb);
desc_blocks = n_desc_blocks - o_desc_blocks;
if (desc_blocks &&
(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE) ||
le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks)) {
ext4_warning(sb, "No reserved GDT blocks, can't resize");
return -EPERM;
} }
ext4_blocks_count_set(es, o_blocks_count + add);
ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
o_blocks_count + add);
/* We add the blocks to the bitmap and set the group need init bit */
err = ext4_group_add_blocks(handle, sb, o_blocks_count, add);
ext4_handle_dirty_super(handle, sb);
ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
o_blocks_count + add);
err2 = ext4_journal_stop(handle);
if (!err && err2)
err = err2;
if (err) resize_inode = ext4_iget(sb, EXT4_RESIZE_INO);
goto exit_put; if (IS_ERR(resize_inode)) {
ext4_warning(sb, "Error opening resize inode");
return PTR_ERR(resize_inode);
}
/* See if the device is actually as big as what was requested */
bh = sb_bread(sb, n_blocks_count - 1);
if (!bh) {
ext4_warning(sb, "can't read last block, resize aborted");
return -ENOSPC;
}
brelse(bh);
if (offset != 0) {
/* extend the last group */
ext4_grpblk_t add;
add = EXT4_BLOCKS_PER_GROUP(sb) - offset;
err = ext4_group_extend_no_check(sb, o_blocks_count, add);
if (err)
goto out;
}
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
es->s_log_groups_per_flex)
flexbg_size = 1 << es->s_log_groups_per_flex;
o_blocks_count = ext4_blocks_count(es);
if (o_blocks_count == n_blocks_count)
goto out;
flex_gd = alloc_flex_gd(flexbg_size);
if (flex_gd == NULL) {
err = -ENOMEM;
goto out;
}
/* Add flex groups. Note that a regular group is a
* flex group with 1 group.
*/
while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count,
flexbg_size)) {
ext4_alloc_group_tables(sb, flex_gd, flexbg_size);
err = ext4_flex_group_add(sb, resize_inode, flex_gd);
if (unlikely(err))
break;
}
out:
if (flex_gd)
free_flex_gd(flex_gd);
iput(resize_inode);
if (test_opt(sb, DEBUG)) if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n", printk(KERN_DEBUG "EXT4-fs: resized filesystem from %llu "
ext4_blocks_count(es)); "upto %llu blocks\n", o_blocks_count, n_blocks_count);
update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es,
sizeof(struct ext4_super_block));
exit_put:
return err; return err;
} /* ext4_group_extend */ }
...@@ -1095,7 +1095,7 @@ static int ext4_show_options(struct seq_file *seq, struct dentry *root) ...@@ -1095,7 +1095,7 @@ static int ext4_show_options(struct seq_file *seq, struct dentry *root)
} }
if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
seq_printf(seq, ",max_batch_time=%u", seq_printf(seq, ",max_batch_time=%u",
(unsigned) sbi->s_min_batch_time); (unsigned) sbi->s_max_batch_time);
} }
/* /*
...@@ -2005,17 +2005,16 @@ static int ext4_fill_flex_info(struct super_block *sb) ...@@ -2005,17 +2005,16 @@ static int ext4_fill_flex_info(struct super_block *sb)
struct ext4_group_desc *gdp = NULL; struct ext4_group_desc *gdp = NULL;
ext4_group_t flex_group_count; ext4_group_t flex_group_count;
ext4_group_t flex_group; ext4_group_t flex_group;
int groups_per_flex = 0; unsigned int groups_per_flex = 0;
size_t size; size_t size;
int i; int i;
sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
groups_per_flex = 1 << sbi->s_log_groups_per_flex; if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
if (groups_per_flex < 2) {
sbi->s_log_groups_per_flex = 0; sbi->s_log_groups_per_flex = 0;
return 1; return 1;
} }
groups_per_flex = 1 << sbi->s_log_groups_per_flex;
/* We allocate both existing and potentially added groups */ /* We allocate both existing and potentially added groups */
flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
...@@ -3506,7 +3505,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -3506,7 +3505,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
* of the filesystem. * of the filesystem.
*/ */
if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
ext4_msg(sb, KERN_WARNING, "bad geometry: first data" ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
"block %u is beyond end of filesystem (%llu)", "block %u is beyond end of filesystem (%llu)",
le32_to_cpu(es->s_first_data_block), le32_to_cpu(es->s_first_data_block),
ext4_blocks_count(es)); ext4_blocks_count(es));
......
...@@ -47,8 +47,9 @@ ext4_xattr_security_set(struct dentry *dentry, const char *name, ...@@ -47,8 +47,9 @@ ext4_xattr_security_set(struct dentry *dentry, const char *name,
name, value, size, flags); name, value, size, flags);
} }
int ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array, static int
void *fs_info) ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array,
void *fs_info)
{ {
const struct xattr *xattr; const struct xattr *xattr;
handle_t *handle = fs_info; handle_t *handle = fs_info;
......
...@@ -429,6 +429,12 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -429,6 +429,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd_debug(3, "JBD2: commit phase 1\n"); jbd_debug(3, "JBD2: commit phase 1\n");
/*
* Clear revoked flag to reflect there is no revoked buffers
* in the next transaction which is going to be started.
*/
jbd2_clear_buffer_revoked_flags(journal);
/* /*
* Switch to a new revoke table. * Switch to a new revoke table.
*/ */
......
...@@ -47,6 +47,10 @@ ...@@ -47,6 +47,10 @@
* overwriting the new data. We don't even need to clear the revoke * overwriting the new data. We don't even need to clear the revoke
* bit here. * bit here.
* *
* We cache revoke status of a buffer in the current transaction in b_states
* bits. As the name says, revokevalid flag indicates that the cached revoke
* status of a buffer is valid and we can rely on the cached status.
*
* Revoke information on buffers is a tri-state value: * Revoke information on buffers is a tri-state value:
* *
* RevokeValid clear: no cached revoke status, need to look it up * RevokeValid clear: no cached revoke status, need to look it up
...@@ -478,6 +482,36 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) ...@@ -478,6 +482,36 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
return did_revoke; return did_revoke;
} }
/*
* journal_clear_revoked_flag clears revoked flag of buffers in
* revoke table to reflect there is no revoked buffers in the next
* transaction which is going to be started.
*/
void jbd2_clear_buffer_revoked_flags(journal_t *journal)
{
struct jbd2_revoke_table_s *revoke = journal->j_revoke;
int i = 0;
for (i = 0; i < revoke->hash_size; i++) {
struct list_head *hash_list;
struct list_head *list_entry;
hash_list = &revoke->hash_table[i];
list_for_each(list_entry, hash_list) {
struct jbd2_revoke_record_s *record;
struct buffer_head *bh;
record = (struct jbd2_revoke_record_s *)list_entry;
bh = __find_get_block(journal->j_fs_dev,
record->blocknr,
journal->j_blocksize);
if (bh) {
clear_buffer_revoked(bh);
__brelse(bh);
}
}
}
}
/* journal_switch_revoke table select j_revoke for next transaction /* journal_switch_revoke table select j_revoke for next transaction
* we do not want to suspend any processing until all revokes are * we do not want to suspend any processing until all revokes are
* written -bzzz * written -bzzz
......
...@@ -517,12 +517,13 @@ void jbd2_journal_lock_updates(journal_t *journal) ...@@ -517,12 +517,13 @@ void jbd2_journal_lock_updates(journal_t *journal)
break; break;
spin_lock(&transaction->t_handle_lock); spin_lock(&transaction->t_handle_lock);
prepare_to_wait(&journal->j_wait_updates, &wait,
TASK_UNINTERRUPTIBLE);
if (!atomic_read(&transaction->t_updates)) { if (!atomic_read(&transaction->t_updates)) {
spin_unlock(&transaction->t_handle_lock); spin_unlock(&transaction->t_handle_lock);
finish_wait(&journal->j_wait_updates, &wait);
break; break;
} }
prepare_to_wait(&journal->j_wait_updates, &wait,
TASK_UNINTERRUPTIBLE);
spin_unlock(&transaction->t_handle_lock); spin_unlock(&transaction->t_handle_lock);
write_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
schedule(); schedule();
......
...@@ -1151,6 +1151,7 @@ extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t); ...@@ -1151,6 +1151,7 @@ extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t);
extern int jbd2_journal_test_revoke(journal_t *, unsigned long long, tid_t); extern int jbd2_journal_test_revoke(journal_t *, unsigned long long, tid_t);
extern void jbd2_journal_clear_revoke(journal_t *); extern void jbd2_journal_clear_revoke(journal_t *);
extern void jbd2_journal_switch_revoke_table(journal_t *journal); extern void jbd2_journal_switch_revoke_table(journal_t *journal);
extern void jbd2_clear_buffer_revoked_flags(journal_t *journal);
/* /*
* The log thread user interface: * The log thread user interface:
......
...@@ -573,9 +573,9 @@ TRACE_EVENT(ext4_mb_release_inode_pa, ...@@ -573,9 +573,9 @@ TRACE_EVENT(ext4_mb_release_inode_pa,
); );
TRACE_EVENT(ext4_mb_release_group_pa, TRACE_EVENT(ext4_mb_release_group_pa,
TP_PROTO(struct ext4_prealloc_space *pa), TP_PROTO(struct super_block *sb, struct ext4_prealloc_space *pa),
TP_ARGS(pa), TP_ARGS(sb, pa),
TP_STRUCT__entry( TP_STRUCT__entry(
__field( dev_t, dev ) __field( dev_t, dev )
...@@ -585,7 +585,7 @@ TRACE_EVENT(ext4_mb_release_group_pa, ...@@ -585,7 +585,7 @@ TRACE_EVENT(ext4_mb_release_group_pa,
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = pa->pa_inode->i_sb->s_dev; __entry->dev = sb->s_dev;
__entry->pa_pstart = pa->pa_pstart; __entry->pa_pstart = pa->pa_pstart;
__entry->pa_len = pa->pa_len; __entry->pa_len = pa->pa_len;
), ),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment