Commit f41bfc94 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 fixes from Ted Ts'o:
 "A collection of bug fixes destined for stable and some printk cleanups
  and a patch so that instead of BUG'ing we use the ext4_error()
  framework to mark the file system is corrupted"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: add explicit casts when masking cluster sizes
  ext4: fix deadlock when writing in ENOSPC conditions
  jbd2: rename obsoleted msg JBD->JBD2
  jbd2: revise KERN_EMERG error messages
  jbd2: don't BUG but return ENOSPC if a handle runs out of space
  ext4: Do not reserve clusters when fs doesn't support extents
  ext4: fix del_timer() misuse for ->s_err_report
  ext4: check for overlapping extents in ext4_valid_extent_entries()
  ext4: fix use-after-free in ext4_mb_new_blocks
  ext4: call ext4_error_inode() if jbd2_journal_dirty_metadata() fails
parents c5fdd531 f5a44db5
......@@ -268,6 +268,16 @@ struct ext4_io_submit {
/* Translate # of blks to # of clusters */
#define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \
(sbi)->s_cluster_bits)
/* Mask out the low bits to get the starting block of the cluster */
#define EXT4_PBLK_CMASK(s, pblk) ((pblk) & \
~((ext4_fsblk_t) (s)->s_cluster_ratio - 1))
#define EXT4_LBLK_CMASK(s, lblk) ((lblk) & \
~((ext4_lblk_t) (s)->s_cluster_ratio - 1))
/* Get the cluster offset */
#define EXT4_PBLK_COFF(s, pblk) ((pblk) & \
((ext4_fsblk_t) (s)->s_cluster_ratio - 1))
#define EXT4_LBLK_COFF(s, lblk) ((lblk) & \
((ext4_lblk_t) (s)->s_cluster_ratio - 1))
/*
* Structure of a blocks group descriptor
......
......@@ -259,6 +259,15 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
if (WARN_ON_ONCE(err)) {
ext4_journal_abort_handle(where, line, __func__, bh,
handle, err);
ext4_error_inode(inode, where, line,
bh->b_blocknr,
"journal_dirty_metadata failed: "
"handle type %u started at line %u, "
"credits %u/%u, errcode %d",
handle->h_type,
handle->h_line_no,
handle->h_requested_credits,
handle->h_buffer_credits, err);
}
} else {
if (inode)
......
......@@ -360,8 +360,10 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
{
ext4_fsblk_t block = ext4_ext_pblock(ext);
int len = ext4_ext_get_actual_len(ext);
ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
ext4_lblk_t last = lblock + len - 1;
if (len == 0)
if (lblock > last)
return 0;
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
}
......@@ -387,11 +389,26 @@ static int ext4_valid_extent_entries(struct inode *inode,
if (depth == 0) {
/* leaf entries */
struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
ext4_fsblk_t pblock = 0;
ext4_lblk_t lblock = 0;
ext4_lblk_t prev = 0;
int len = 0;
while (entries) {
if (!ext4_valid_extent(inode, ext))
return 0;
/* Check for overlapping extents */
lblock = le32_to_cpu(ext->ee_block);
len = ext4_ext_get_actual_len(ext);
if ((lblock <= prev) && prev) {
pblock = ext4_ext_pblock(ext);
es->s_last_error_block = cpu_to_le64(pblock);
return 0;
}
ext++;
entries--;
prev = lblock + len - 1;
}
} else {
struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
......@@ -1834,8 +1851,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
depth = ext_depth(inode);
if (!path[depth].p_ext)
goto out;
b2 = le32_to_cpu(path[depth].p_ext->ee_block);
b2 &= ~(sbi->s_cluster_ratio - 1);
b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block));
/*
* get the next allocated block if the extent in the path
......@@ -1845,7 +1861,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
b2 = ext4_ext_next_allocated_block(path);
if (b2 == EXT_MAX_BLOCKS)
goto out;
b2 &= ~(sbi->s_cluster_ratio - 1);
b2 = EXT4_LBLK_CMASK(sbi, b2);
}
/* check for wrap through zero on extent logical start block*/
......@@ -2504,7 +2520,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
* extent, we have to mark the cluster as used (store negative
* cluster number in partial_cluster).
*/
unaligned = pblk & (sbi->s_cluster_ratio - 1);
unaligned = EXT4_PBLK_COFF(sbi, pblk);
if (unaligned && (ee_len == num) &&
(*partial_cluster != -((long long)EXT4_B2C(sbi, pblk))))
*partial_cluster = EXT4_B2C(sbi, pblk);
......@@ -2598,7 +2614,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
* accidentally freeing it later on
*/
pblk = ext4_ext_pblock(ex);
if (pblk & (sbi->s_cluster_ratio - 1))
if (EXT4_PBLK_COFF(sbi, pblk))
*partial_cluster =
-((long long)EXT4_B2C(sbi, pblk));
ex--;
......@@ -3753,7 +3769,7 @@ int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
ext4_lblk_t lblk_start, lblk_end;
lblk_start = lblk & (~(sbi->s_cluster_ratio - 1));
lblk_start = EXT4_LBLK_CMASK(sbi, lblk);
lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
return ext4_find_delalloc_range(inode, lblk_start, lblk_end);
......@@ -3812,9 +3828,9 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks);
/* Check towards left side */
c_offset = lblk_start & (sbi->s_cluster_ratio - 1);
c_offset = EXT4_LBLK_COFF(sbi, lblk_start);
if (c_offset) {
lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1));
lblk_from = EXT4_LBLK_CMASK(sbi, lblk_start);
lblk_to = lblk_from + c_offset - 1;
if (ext4_find_delalloc_range(inode, lblk_from, lblk_to))
......@@ -3822,7 +3838,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
}
/* Now check towards right. */
c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1);
c_offset = EXT4_LBLK_COFF(sbi, lblk_start + num_blks);
if (allocated_clusters && c_offset) {
lblk_from = lblk_start + num_blks;
lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
......@@ -4030,7 +4046,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
struct ext4_ext_path *path)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
ext4_lblk_t ex_cluster_start, ex_cluster_end;
ext4_lblk_t rr_cluster_start;
ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
......@@ -4048,8 +4064,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
(rr_cluster_start == ex_cluster_start)) {
if (rr_cluster_start == ex_cluster_end)
ee_start += ee_len - 1;
map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) +
c_offset;
map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset;
map->m_len = min(map->m_len,
(unsigned) sbi->s_cluster_ratio - c_offset);
/*
......@@ -4203,7 +4218,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
*/
map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
newex.ee_block = cpu_to_le32(map->m_lblk);
cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
cluster_offset = EXT4_LBLK_CMASK(sbi, map->m_lblk);
/*
* If we are doing bigalloc, check to see if the extent returned
......@@ -4271,7 +4286,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* needed so that future calls to get_implied_cluster_alloc()
* work correctly.
*/
offset = map->m_lblk & (sbi->s_cluster_ratio - 1);
offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
ar.goal -= offset;
ar.logical -= offset;
......
......@@ -1206,7 +1206,6 @@ static int ext4_journalled_write_end(struct file *file,
*/
static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
{
int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int md_needed;
......@@ -1218,7 +1217,6 @@ static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
* in order to allocate nrblocks
* worse case is one extent per block
*/
repeat:
spin_lock(&ei->i_block_reservation_lock);
/*
* ext4_calc_metadata_amount() has side effects, which we have
......@@ -1238,10 +1236,6 @@ static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
ei->i_da_metadata_calc_len = save_len;
ei->i_da_metadata_calc_last_lblock = save_last_lblock;
spin_unlock(&ei->i_block_reservation_lock);
if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
cond_resched();
goto repeat;
}
return -ENOSPC;
}
ei->i_reserved_meta_blocks += md_needed;
......@@ -1255,7 +1249,6 @@ static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
*/
static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
{
int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int md_needed;
......@@ -1277,7 +1270,6 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
* in order to allocate nrblocks
* worse case is one extent per block
*/
repeat:
spin_lock(&ei->i_block_reservation_lock);
/*
* ext4_calc_metadata_amount() has side effects, which we have
......@@ -1297,10 +1289,6 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
ei->i_da_metadata_calc_len = save_len;
ei->i_da_metadata_calc_last_lblock = save_last_lblock;
spin_unlock(&ei->i_block_reservation_lock);
if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
cond_resched();
goto repeat;
}
dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
return -ENOSPC;
}
......
......@@ -3442,6 +3442,9 @@ static void ext4_mb_pa_callback(struct rcu_head *head)
{
struct ext4_prealloc_space *pa;
pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
BUG_ON(atomic_read(&pa->pa_count));
BUG_ON(pa->pa_deleted == 0);
kmem_cache_free(ext4_pspace_cachep, pa);
}
......@@ -3455,11 +3458,13 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
ext4_group_t grp;
ext4_fsblk_t grp_blk;
if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
return;
/* in this short window concurrent discard can set pa_deleted */
spin_lock(&pa->pa_lock);
if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) {
spin_unlock(&pa->pa_lock);
return;
}
if (pa->pa_deleted == 1) {
spin_unlock(&pa->pa_lock);
return;
......@@ -4121,7 +4126,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
ext4_get_group_no_and_offset(sb, goal, &group, &block);
/* set up allocation goals */
ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
ac->ac_status = AC_STATUS_CONTINUE;
ac->ac_sb = sb;
ac->ac_inode = ar->inode;
......@@ -4663,7 +4668,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
* blocks at the beginning or the end unless we are explicitly
* requested to avoid doing so.
*/
overflow = block & (sbi->s_cluster_ratio - 1);
overflow = EXT4_PBLK_COFF(sbi, block);
if (overflow) {
if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
overflow = sbi->s_cluster_ratio - overflow;
......@@ -4677,7 +4682,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
count += overflow;
}
}
overflow = count & (sbi->s_cluster_ratio - 1);
overflow = EXT4_LBLK_COFF(sbi, count);
if (overflow) {
if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
if (count > overflow)
......
......@@ -792,7 +792,7 @@ static void ext4_put_super(struct super_block *sb)
}
ext4_es_unregister_shrinker(sbi);
del_timer(&sbi->s_err_report);
del_timer_sync(&sbi->s_err_report);
ext4_release_system_zone(sb);
ext4_mb_release(sb);
ext4_ext_release(sb);
......@@ -3316,10 +3316,18 @@ int ext4_calculate_overhead(struct super_block *sb)
}
static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi)
static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb)
{
ext4_fsblk_t resv_clusters;
/*
* There's no need to reserve anything when we aren't using extents.
* The space estimates are exact, there are no unwritten extents,
* hole punching doesn't need new metadata... This is needed especially
* to keep ext2/3 backward compatibility.
*/
if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
return 0;
/*
* By default we reserve 2% or 4096 clusters, whichever is smaller.
* This should cover the situations where we can not afford to run
......@@ -3328,7 +3336,8 @@ static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi)
* allocation would require 1, or 2 blocks, higher numbers are
* very rare.
*/
resv_clusters = ext4_blocks_count(sbi->s_es) >> sbi->s_cluster_bits;
resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >>
EXT4_SB(sb)->s_cluster_bits;
do_div(resv_clusters, 50);
resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
......@@ -4071,10 +4080,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"available");
}
err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sbi));
err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb));
if (err) {
ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
"reserved pool", ext4_calculate_resv_clusters(sbi));
"reserved pool", ext4_calculate_resv_clusters(sb));
goto failed_mount4a;
}
......@@ -4184,7 +4193,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
failed_mount3:
ext4_es_unregister_shrinker(sbi);
del_timer(&sbi->s_err_report);
del_timer_sync(&sbi->s_err_report);
if (sbi->s_flex_groups)
ext4_kvfree(sbi->s_flex_groups);
percpu_counter_destroy(&sbi->s_freeclusters_counter);
......
......@@ -702,7 +702,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
read_lock(&journal->j_state_lock);
#ifdef CONFIG_JBD2_DEBUG
if (!tid_geq(journal->j_commit_request, tid)) {
printk(KERN_EMERG
printk(KERN_ERR
"%s: error: j_commit_request=%d, tid=%d\n",
__func__, journal->j_commit_request, tid);
}
......@@ -718,10 +718,8 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
}
read_unlock(&journal->j_state_lock);
if (unlikely(is_journal_aborted(journal))) {
printk(KERN_EMERG "journal commit I/O error\n");
if (unlikely(is_journal_aborted(journal)))
err = -EIO;
}
return err;
}
......@@ -1527,13 +1525,13 @@ static int journal_get_superblock(journal_t *journal)
if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) &&
JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
/* Can't have checksum v1 and v2 on at the same time! */
printk(KERN_ERR "JBD: Can't enable checksumming v1 and v2 "
printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 "
"at the same time!\n");
goto out;
}
if (!jbd2_verify_csum_type(journal, sb)) {
printk(KERN_ERR "JBD: Unknown checksum type\n");
printk(KERN_ERR "JBD2: Unknown checksum type\n");
goto out;
}
......@@ -1541,7 +1539,7 @@ static int journal_get_superblock(journal_t *journal)
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
if (IS_ERR(journal->j_chksum_driver)) {
printk(KERN_ERR "JBD: Cannot load crc32c driver.\n");
printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
err = PTR_ERR(journal->j_chksum_driver);
journal->j_chksum_driver = NULL;
goto out;
......@@ -1550,7 +1548,7 @@ static int journal_get_superblock(journal_t *journal)
/* Check superblock checksum */
if (!jbd2_superblock_csum_verify(journal, sb)) {
printk(KERN_ERR "JBD: journal checksum error\n");
printk(KERN_ERR "JBD2: journal checksum error\n");
goto out;
}
......@@ -1836,7 +1834,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
journal->j_chksum_driver = crypto_alloc_shash("crc32c",
0, 0);
if (IS_ERR(journal->j_chksum_driver)) {
printk(KERN_ERR "JBD: Cannot load crc32c "
printk(KERN_ERR "JBD2: Cannot load crc32c "
"driver.\n");
journal->j_chksum_driver = NULL;
return 0;
......@@ -2645,7 +2643,7 @@ static void __exit journal_exit(void)
#ifdef CONFIG_JBD2_DEBUG
int n = atomic_read(&nr_journal_heads);
if (n)
printk(KERN_EMERG "JBD2: leaked %d journal_heads!\n", n);
printk(KERN_ERR "JBD2: leaked %d journal_heads!\n", n);
#endif
jbd2_remove_jbd_stats_proc_entry();
jbd2_journal_destroy_caches();
......
......@@ -594,7 +594,7 @@ static int do_one_pass(journal_t *journal,
be32_to_cpu(tmp->h_sequence))) {
brelse(obh);
success = -EIO;
printk(KERN_ERR "JBD: Invalid "
printk(KERN_ERR "JBD2: Invalid "
"checksum recovering "
"block %llu in log\n",
blocknr);
......
......@@ -932,7 +932,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
jbd2_alloc(jh2bh(jh)->b_size,
GFP_NOFS);
if (!frozen_buffer) {
printk(KERN_EMERG
printk(KERN_ERR
"%s: OOM for frozen_buffer\n",
__func__);
JBUFFER_TRACE(jh, "oom!");
......@@ -1166,7 +1166,7 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
if (!jh->b_committed_data) {
committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
if (!committed_data) {
printk(KERN_EMERG "%s: No memory for committed data\n",
printk(KERN_ERR "%s: No memory for committed data\n",
__func__);
err = -ENOMEM;
goto out;
......@@ -1290,7 +1290,10 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
* once a transaction -bzzz
*/
jh->b_modified = 1;
J_ASSERT_JH(jh, handle->h_buffer_credits > 0);
if (handle->h_buffer_credits <= 0) {
ret = -ENOSPC;
goto out_unlock_bh;
}
handle->h_buffer_credits--;
}
......@@ -1305,7 +1308,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
JBUFFER_TRACE(jh, "fastpath");
if (unlikely(jh->b_transaction !=
journal->j_running_transaction)) {
printk(KERN_EMERG "JBD: %s: "
printk(KERN_ERR "JBD2: %s: "
"jh->b_transaction (%llu, %p, %u) != "
"journal->j_running_transaction (%p, %u)",
journal->j_devname,
......@@ -1332,7 +1335,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
JBUFFER_TRACE(jh, "already on other transaction");
if (unlikely(jh->b_transaction !=
journal->j_committing_transaction)) {
printk(KERN_EMERG "JBD: %s: "
printk(KERN_ERR "JBD2: %s: "
"jh->b_transaction (%llu, %p, %u) != "
"journal->j_committing_transaction (%p, %u)",
journal->j_devname,
......@@ -1345,7 +1348,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
ret = -EINVAL;
}
if (unlikely(jh->b_next_transaction != transaction)) {
printk(KERN_EMERG "JBD: %s: "
printk(KERN_ERR "JBD2: %s: "
"jh->b_next_transaction (%llu, %p, %u) != "
"transaction (%p, %u)",
journal->j_devname,
......@@ -1373,7 +1376,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
jbd2_journal_put_journal_head(jh);
out:
JBUFFER_TRACE(jh, "exit");
WARN_ON(ret); /* All errors are bugs, so dump the stack */
return ret;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment