Commit adefe11c authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: add missing ext4_journal_stop()
  ext4: ext4_find_next_zero_bit needs an aligned address on some arch
  ext4: set EXT4_EXTENTS_FL only for directory and regular files
  ext4: Don't mark filesystem error if fallocate fails
  ext4: Fix BUG when writing to an unitialized extent
  ext4: Don't use ext4_dec_count() if not needed
  ext4: modify block allocation algorithm for the last group
  ext4: Don't claim block from group which has corrupt bitmap
  ext4: Get journal write access before modifying the extent tree
  ext4: Fix memory and buffer head leak in callers to ext4_ext_find_extent()
  ext4: Don't leave behind a half-created inode if ext4_mkdir() fails
  ext4: Fix kernel BUG at fs/ext4/mballoc.c:910!
  ext4: Fix locking hierarchy violation in ext4_fallocate()
  Remove incorrect BKL comments in ext4
parents 3d6ce332 5606bf5d
......@@ -46,7 +46,7 @@ const struct file_operations ext4_dir_operations = {
#ifdef CONFIG_COMPAT
.compat_ioctl = ext4_compat_ioctl,
#endif
.fsync = ext4_sync_file, /* BKL held */
.fsync = ext4_sync_file,
.release = ext4_release_dir,
};
......
......@@ -148,6 +148,7 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
{
struct ext4_inode_info *ei = EXT4_I(inode);
ext4_fsblk_t bg_start;
ext4_fsblk_t last_block;
ext4_grpblk_t colour;
int depth;
......@@ -169,8 +170,13 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
/* OK. use inode's group */
bg_start = (ei->i_block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) +
le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block);
colour = (current->pid % 16) *
last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
colour = (current->pid % 16) *
(EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
else
colour = (current->pid % 16) * ((last_block - bg_start) / 16);
return bg_start + colour + block;
}
......@@ -349,7 +355,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
#define ext4_ext_show_leaf(inode,path)
#endif
static void ext4_ext_drop_refs(struct ext4_ext_path *path)
void ext4_ext_drop_refs(struct ext4_ext_path *path)
{
int depth = path->p_depth;
int i;
......@@ -2168,6 +2174,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
newblock = iblock - ee_block + ext_pblock(ex);
ex2 = ex;
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
goto out;
/* ex1: ee_block to iblock - 1 : uninitialized */
if (iblock > ee_block) {
ex1 = ex;
......@@ -2200,16 +2210,20 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
newdepth = ext_depth(inode);
if (newdepth != depth) {
depth = newdepth;
path = ext4_ext_find_extent(inode, iblock, NULL);
ext4_ext_drop_refs(path);
path = ext4_ext_find_extent(inode, iblock, path);
if (IS_ERR(path)) {
err = PTR_ERR(path);
path = NULL;
goto out;
}
eh = path[depth].p_hdr;
ex = path[depth].p_ext;
if (ex2 != &newex)
ex2 = ex;
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
goto out;
}
allocated = max_blocks;
}
......@@ -2230,9 +2244,6 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ex2->ee_len = cpu_to_le16(allocated);
if (ex2 != ex)
goto insert;
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
goto out;
/*
* New (initialized) extent starts from the first block
* in the current extent. i.e., ex2 == ex
......@@ -2276,9 +2287,22 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
}
/*
* Block allocation/map/preallocation routine for extents based files
*
*
* Need to be called with
* down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
* (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
*
* return > 0, number of of blocks already mapped/allocated
* if create == 0 and these are pre-allocated blocks
* buffer head is unmapped
* otherwise blocks are mapped
*
* return = 0, if plain look up failed (blocks have not been allocated)
* buffer head is unmapped
*
* return < 0, error case.
*/
int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock,
......@@ -2623,7 +2647,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
* modify 1 super block, 1 block bitmap and 1 group descriptor.
*/
credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3;
down_write((&EXT4_I(inode)->i_data_sem));
mutex_lock(&inode->i_mutex);
retry:
while (ret >= 0 && ret < max_blocks) {
block = block + ret;
......@@ -2634,16 +2658,17 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
break;
}
ret = ext4_ext_get_blocks(handle, inode, block,
ret = ext4_get_blocks_wrap(handle, inode, block,
max_blocks, &map_bh,
EXT4_CREATE_UNINITIALIZED_EXT, 0);
WARN_ON(ret <= 0);
if (ret <= 0) {
ext4_error(inode->i_sb, "ext4_fallocate",
"ext4_ext_get_blocks returned error: "
"inode#%lu, block=%u, max_blocks=%lu",
#ifdef EXT4FS_DEBUG
WARN_ON(ret <= 0);
printk(KERN_ERR "%s: ext4_ext_get_blocks "
"returned error inode#%lu, block=%u, "
"max_blocks=%lu", __func__,
inode->i_ino, block, max_blocks);
ret = -EIO;
#endif
ext4_mark_inode_dirty(handle, inode);
ret2 = ext4_journal_stop(handle);
break;
......@@ -2680,7 +2705,6 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry;
up_write((&EXT4_I(inode)->i_data_sem));
/*
* Time to update the file size.
* Update only when preallocation was requested beyond the file size.
......@@ -2692,21 +2716,18 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
* if no error, we assume preallocation succeeded
* completely
*/
mutex_lock(&inode->i_mutex);
i_size_write(inode, offset + len);
EXT4_I(inode)->i_disksize = i_size_read(inode);
mutex_unlock(&inode->i_mutex);
} else if (ret < 0 && nblocks) {
/* Handle partial allocation scenario */
loff_t newsize;
mutex_lock(&inode->i_mutex);
newsize = (nblocks << blkbits) + i_size_read(inode);
i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits));
EXT4_I(inode)->i_disksize = i_size_read(inode);
mutex_unlock(&inode->i_mutex);
}
}
mutex_unlock(&inode->i_mutex);
return ret > 0 ? ret2 : ret;
}
......@@ -702,7 +702,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
ei->i_dir_start_lookup = 0;
ei->i_disksize = 0;
ei->i_flags = EXT4_I(dir)->i_flags & ~EXT4_INDEX_FL;
/*
* Don't inherit extent flag from directory. We set extent flag on
* newly created directory and file only if -o extent mount option is
* specified
*/
ei->i_flags = EXT4_I(dir)->i_flags & ~(EXT4_INDEX_FL|EXT4_EXTENTS_FL);
if (S_ISLNK(mode))
ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL);
/* dirsync only applies to directories */
......@@ -745,12 +750,15 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
goto fail_free_drop;
}
if (test_opt(sb, EXTENTS)) {
EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
ext4_ext_tree_init(handle, inode);
err = ext4_update_incompat_feature(handle, sb,
EXT4_FEATURE_INCOMPAT_EXTENTS);
if (err)
goto fail;
/* set extent flag only for directory and file */
if (S_ISDIR(mode) || S_ISREG(mode)) {
EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
ext4_ext_tree_init(handle, inode);
err = ext4_update_incompat_feature(handle, sb,
EXT4_FEATURE_INCOMPAT_EXTENTS);
if (err)
goto fail;
}
}
ext4_debug("allocating inode %lu\n", inode->i_ino);
......
......@@ -403,6 +403,7 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
__le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data;
__le32 *p;
ext4_fsblk_t bg_start;
ext4_fsblk_t last_block;
ext4_grpblk_t colour;
/* Try to find previous block */
......@@ -420,8 +421,13 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
* into the same cylinder group then.
*/
bg_start = ext4_group_first_block_no(inode->i_sb, ei->i_block_group);
colour = (current->pid % 16) *
last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
colour = (current->pid % 16) *
(EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
else
colour = (current->pid % 16) * ((last_block - bg_start) / 16);
return bg_start + colour;
}
......@@ -768,7 +774,6 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
*
* `handle' can be NULL if create == 0.
*
* The BKL may not be held on entry here. Be sure to take it early.
* return > 0, # of blocks mapped or allocated.
* return = 0, if plain lookup failed.
* return < 0, error case.
......@@ -903,11 +908,38 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
*/
#define DIO_CREDITS 25
/*
*
*
* ext4_ext4 get_block() wrapper function
* It will do a look up first, and returns if the blocks already mapped.
* Otherwise it takes the write lock of the i_data_sem and allocate blocks
* and store the allocated blocks in the result buffer head and mark it
* mapped.
*
* If file type is extents based, it will call ext4_ext_get_blocks(),
* Otherwise, call with ext4_get_blocks_handle() to handle indirect mapping
* based files
*
* On success, it returns the number of blocks being mapped or allocate.
* if create==0 and the blocks are pre-allocated and uninitialized block,
* the result buffer head is unmapped. If the create ==1, it will make sure
* the buffer head is mapped.
*
* It returns 0 if plain look up failed (blocks have not been allocated), in
* that casem, buffer head is unmapped
*
* It returns the error in case of allocation failure.
*/
int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
unsigned long max_blocks, struct buffer_head *bh,
int create, int extend_disksize)
{
int retval;
clear_buffer_mapped(bh);
/*
* Try to see if we can get the block without requesting
* for new file system block.
......@@ -921,12 +953,26 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
inode, block, max_blocks, bh, 0, 0);
}
up_read((&EXT4_I(inode)->i_data_sem));
if (!create || (retval > 0))
/* If it is only a block(s) look up */
if (!create)
return retval;
/*
* Returns if the blocks have already allocated
*
* Note that if blocks have been preallocated
* ext4_ext_get_block() returns th create = 0
* with buffer head unmapped.
*/
if (retval > 0 && buffer_mapped(bh))
return retval;
/*
* We need to allocate new blocks which will result
* in i_data update
* New blocks allocate and/or writing to uninitialized extent
* will possibly result in updating i_data, so we take
* the write lock of i_data_sem, and call get_blocks()
* with create == 1 flag.
*/
down_write((&EXT4_I(inode)->i_data_sem));
/*
......
......@@ -627,21 +627,19 @@ static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
return block;
}
static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
{
#if BITS_PER_LONG == 64
#define mb_correct_addr_and_bit(bit, addr) \
{ \
bit += ((unsigned long) addr & 7UL) << 3; \
addr = (void *) ((unsigned long) addr & ~7UL); \
}
*bit += ((unsigned long) addr & 7UL) << 3;
addr = (void *) ((unsigned long) addr & ~7UL);
#elif BITS_PER_LONG == 32
#define mb_correct_addr_and_bit(bit, addr) \
{ \
bit += ((unsigned long) addr & 3UL) << 3; \
addr = (void *) ((unsigned long) addr & ~3UL); \
}
*bit += ((unsigned long) addr & 3UL) << 3;
addr = (void *) ((unsigned long) addr & ~3UL);
#else
#error "how many bits you are?!"
#endif
return addr;
}
static inline int mb_test_bit(int bit, void *addr)
{
......@@ -649,34 +647,54 @@ static inline int mb_test_bit(int bit, void *addr)
* ext4_test_bit on architecture like powerpc
* needs unsigned long aligned address
*/
mb_correct_addr_and_bit(bit, addr);
addr = mb_correct_addr_and_bit(&bit, addr);
return ext4_test_bit(bit, addr);
}
static inline void mb_set_bit(int bit, void *addr)
{
mb_correct_addr_and_bit(bit, addr);
addr = mb_correct_addr_and_bit(&bit, addr);
ext4_set_bit(bit, addr);
}
static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr)
{
mb_correct_addr_and_bit(bit, addr);
addr = mb_correct_addr_and_bit(&bit, addr);
ext4_set_bit_atomic(lock, bit, addr);
}
static inline void mb_clear_bit(int bit, void *addr)
{
mb_correct_addr_and_bit(bit, addr);
addr = mb_correct_addr_and_bit(&bit, addr);
ext4_clear_bit(bit, addr);
}
static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
{
mb_correct_addr_and_bit(bit, addr);
addr = mb_correct_addr_and_bit(&bit, addr);
ext4_clear_bit_atomic(lock, bit, addr);
}
static inline int mb_find_next_zero_bit(void *addr, int max, int start)
{
int fix = 0;
addr = mb_correct_addr_and_bit(&fix, addr);
max += fix;
start += fix;
return ext4_find_next_zero_bit(addr, max, start) - fix;
}
static inline int mb_find_next_bit(void *addr, int max, int start)
{
int fix = 0;
addr = mb_correct_addr_and_bit(&fix, addr);
max += fix;
start += fix;
return ext4_find_next_bit(addr, max, start) - fix;
}
static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
{
char *bb;
......@@ -906,7 +924,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
unsigned short chunk;
unsigned short border;
BUG_ON(len >= EXT4_BLOCKS_PER_GROUP(sb));
BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb));
border = 2 << sb->s_blocksize_bits;
......@@ -946,12 +964,12 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
/* initialize buddy from bitmap which is aggregation
* of on-disk bitmap and preallocations */
i = ext4_find_next_zero_bit(bitmap, max, 0);
i = mb_find_next_zero_bit(bitmap, max, 0);
grp->bb_first_free = i;
while (i < max) {
fragments++;
first = i;
i = ext4_find_next_bit(bitmap, max, i);
i = mb_find_next_bit(bitmap, max, i);
len = i - first;
free += len;
if (len > 1)
......@@ -959,7 +977,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
else
grp->bb_counters[0]++;
if (i < max)
i = ext4_find_next_zero_bit(bitmap, max, i);
i = mb_find_next_zero_bit(bitmap, max, i);
}
grp->bb_fragments = fragments;
......@@ -967,6 +985,10 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
ext4_error(sb, __FUNCTION__,
"EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n",
group, free, grp->bb_free);
/*
* If we intent to continue, we consider group descritor
* corrupt and update bb_free using bitmap value
*/
grp->bb_free = free;
}
......@@ -1778,7 +1800,7 @@ static void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
buddy = mb_find_buddy(e4b, i, &max);
BUG_ON(buddy == NULL);
k = ext4_find_next_zero_bit(buddy, max, 0);
k = mb_find_next_zero_bit(buddy, max, 0);
BUG_ON(k >= max);
ac->ac_found++;
......@@ -1818,11 +1840,11 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
i = e4b->bd_info->bb_first_free;
while (free && ac->ac_status == AC_STATUS_CONTINUE) {
i = ext4_find_next_zero_bit(bitmap,
i = mb_find_next_zero_bit(bitmap,
EXT4_BLOCKS_PER_GROUP(sb), i);
if (i >= EXT4_BLOCKS_PER_GROUP(sb)) {
/*
* IF we corrupt the bitmap we won't find any
* IF we have corrupt bitmap, we won't find any
* free blocks even though group info says we
* we have free blocks
*/
......@@ -1838,6 +1860,12 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
ext4_error(sb, __FUNCTION__, "%d free blocks as per "
"group info. But got %d blocks\n",
free, ex.fe_len);
/*
* The number of free blocks differs. This mostly
* indicate that the bitmap is corrupt. So exit
* without claiming the space.
*/
break;
}
ext4_mb_measure_extent(ac, &ex, e4b);
......@@ -3740,10 +3768,10 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
}
while (bit < end) {
bit = ext4_find_next_zero_bit(bitmap_bh->b_data, end, bit);
bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
if (bit >= end)
break;
next = ext4_find_next_bit(bitmap_bh->b_data, end, bit);
next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
if (next > end)
next = end;
start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit +
......@@ -3771,6 +3799,10 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
(unsigned long) pa->pa_len);
ext4_error(sb, __FUNCTION__, "free %u, pa_free %u\n",
free, pa->pa_free);
/*
* pa is already deleted so we use the value obtained
* from the bitmap and continue.
*/
}
atomic_add(free, &sbi->s_mb_discarded);
if (ac)
......
......@@ -43,6 +43,7 @@ static int finish_range(handle_t *handle, struct inode *inode,
if (IS_ERR(path)) {
retval = PTR_ERR(path);
path = NULL;
goto err_out;
}
......@@ -74,6 +75,10 @@ static int finish_range(handle_t *handle, struct inode *inode,
}
retval = ext4_ext_insert_extent(handle, inode, path, &newext);
err_out:
if (path) {
ext4_ext_drop_refs(path);
kfree(path);
}
lb->first_pblock = 0;
return retval;
}
......
......@@ -1804,12 +1804,8 @@ static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode)
inode->i_fop = &ext4_dir_operations;
inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
dir_block = ext4_bread (handle, inode, 0, 1, &err);
if (!dir_block) {
ext4_dec_count(handle, inode); /* is this nlink == 0? */
ext4_mark_inode_dirty(handle, inode);
iput (inode);
goto out_stop;
}
if (!dir_block)
goto out_clear_inode;
BUFFER_TRACE(dir_block, "get_write_access");
ext4_journal_get_write_access(handle, dir_block);
de = (struct ext4_dir_entry_2 *) dir_block->b_data;
......@@ -1832,7 +1828,8 @@ static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode)
ext4_mark_inode_dirty(handle, inode);
err = ext4_add_entry (handle, dentry, inode);
if (err) {
inode->i_nlink = 0;
out_clear_inode:
clear_nlink(inode);
ext4_mark_inode_dirty(handle, inode);
iput (inode);
goto out_stop;
......@@ -2164,7 +2161,7 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry)
dir->i_ctime = dir->i_mtime = ext4_current_time(dir);
ext4_update_dx_flag(dir);
ext4_mark_inode_dirty(handle, dir);
ext4_dec_count(handle, inode);
drop_nlink(inode);
if (!inode->i_nlink)
ext4_orphan_add(handle, inode);
inode->i_ctime = ext4_current_time(inode);
......@@ -2214,7 +2211,7 @@ static int ext4_symlink (struct inode * dir,
err = __page_symlink(inode, symname, l,
mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
if (err) {
ext4_dec_count(handle, inode);
clear_nlink(inode);
ext4_mark_inode_dirty(handle, inode);
iput (inode);
goto out_stop;
......@@ -2223,7 +2220,6 @@ static int ext4_symlink (struct inode * dir,
inode->i_op = &ext4_fast_symlink_inode_operations;
memcpy((char*)&EXT4_I(inode)->i_data,symname,l);
inode->i_size = l-1;
EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
}
EXT4_I(inode)->i_disksize = inode->i_size;
err = ext4_add_nondir(handle, dentry, inode);
......@@ -2407,7 +2403,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
ext4_dec_count(handle, old_dir);
if (new_inode) {
/* checked empty_dir above, can't have another parent,
* ext3_dec_count() won't work for many-linked dirs */
* ext4_dec_count() won't work for many-linked dirs */
new_inode->i_nlink = 0;
} else {
ext4_inc_count(handle, new_dir);
......
......@@ -1037,6 +1037,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
ext4_warning(sb, __FUNCTION__,
"multiple resizers run on filesystem!");
unlock_super(sb);
ext4_journal_stop(handle);
err = -EBUSY;
goto exit_put;
}
......
......@@ -227,5 +227,6 @@ extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
ext4_lblk_t *, ext4_fsblk_t *);
extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
ext4_lblk_t *, ext4_fsblk_t *);
extern void ext4_ext_drop_refs(struct ext4_ext_path *);
#endif /* _LINUX_EXT4_EXTENTS */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment