Commit 19883bd9 authored by Theodore Ts'o's avatar Theodore Ts'o

ext4: avoid reusing recently deleted inodes in no journal mode

In no journal mode, if an inode has recently been deleted, we
shouldn't reuse it right away.  Otherwise it's possible, after an
unclean shutdown, to hit a situation where a recently deleted inode
gets reused for some other purpose before the inode table block has
been written to disk.  However, if the directory entry has been
updated, then the directory entry will be pointing at the old inode
contents.

E2fsck will make sure the file system is consistent after the
unclean shutdown.  However, if the recently deleted inode is a
character mode device, or an inode with the immutable bit set, even
after the file system has been fixed up by e2fsck, it can be
possible for a *.pyc file to be pointing at a character mode
device, and when python tries to open the *.pyc file, Hilarity
Ensues.  We could change all of userspace to be very suspicious
about stat'ing files before opening them, and clearing the
immutable flag if necessary --- or we can just avoid reusing an
inode number if it has been recently deleted.

Google-Bug-Id: 10017573
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent 0e202704
...@@ -624,6 +624,51 @@ static int find_group_other(struct super_block *sb, struct inode *parent, ...@@ -624,6 +624,51 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
return -1; return -1;
} }
/*
* In no journal mode, if an inode has recently been deleted, we want
* to avoid reusing it until we're reasonably sure the inode table
* block has been written back to disk. (Yes, these values are
* somewhat arbitrary...)
*/
#define RECENTCY_MIN 5
#define RECENTCY_DIRTY 30
static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)
{
struct ext4_group_desc *gdp;
struct ext4_inode *raw_inode;
struct buffer_head *bh;
unsigned long dtime, now;
int inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
int offset, ret = 0, recentcy = RECENTCY_MIN;
gdp = ext4_get_group_desc(sb, group, NULL);
if (unlikely(!gdp))
return 0;
bh = sb_getblk(sb, ext4_inode_table(sb, gdp) +
(ino / inodes_per_block));
if (unlikely(!bh) || !buffer_uptodate(bh))
/*
* If the block is not in the buffer cache, then it
* must have been written out.
*/
goto out;
offset = (ino % inodes_per_block) * EXT4_INODE_SIZE(sb);
raw_inode = (struct ext4_inode *) (bh->b_data + offset);
dtime = le32_to_cpu(raw_inode->i_dtime);
now = get_seconds();
if (buffer_dirty(bh))
recentcy += RECENTCY_DIRTY;
if (dtime && (dtime < now) && (now < dtime + recentcy))
ret = 1;
out:
brelse(bh);
return ret;
}
/* /*
* There are two policies for allocating an inode. If the new inode is * There are two policies for allocating an inode. If the new inode is
* a directory, then a forward search is made for a block group with both * a directory, then a forward search is made for a block group with both
...@@ -741,6 +786,11 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, ...@@ -741,6 +786,11 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
"inode=%lu", ino + 1); "inode=%lu", ino + 1);
continue; continue;
} }
if ((EXT4_SB(sb)->s_journal == NULL) &&
recently_deleted(sb, group, ino)) {
ino++;
goto next_inode;
}
if (!handle) { if (!handle) {
BUG_ON(nblocks <= 0); BUG_ON(nblocks <= 0);
handle = __ext4_journal_start_sb(dir->i_sb, line_no, handle = __ext4_journal_start_sb(dir->i_sb, line_no,
...@@ -764,6 +814,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, ...@@ -764,6 +814,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
ino++; /* the inode bitmap is zero-based */ ino++; /* the inode bitmap is zero-based */
if (!ret2) if (!ret2)
goto got; /* we grabbed the inode! */ goto got; /* we grabbed the inode! */
next_inode:
if (ino < EXT4_INODES_PER_GROUP(sb)) if (ino < EXT4_INODES_PER_GROUP(sb))
goto repeat_in_this_group; goto repeat_in_this_group;
next_group: next_group:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment