Commit 77a9874a authored by Andrew Morton's avatar Andrew Morton Committed by James Bottomley

[PATCH] use spinlocking in the ext2 inode allocator

From Alex Tomas and myself

It is identical in concept to the block allocator change.  It uses the same
hashed spinlock.
parent c14c1a44
...@@ -63,6 +63,66 @@ read_inode_bitmap(struct super_block * sb, unsigned long block_group) ...@@ -63,6 +63,66 @@ read_inode_bitmap(struct super_block * sb, unsigned long block_group)
return bh; return bh;
} }
/*
* Speculatively reserve an inode in a blockgroup which used to have some
* spare ones. Later, when we come to actually claim the inode in the bitmap
* it may be that it was taken. In that case the allocator will undo this
* reservation and try again.
*
* The inode allocator does not physically alter the superblock. But we still
* set sb->s_dirt, because the superblock was "logically" altered - we need to
* go and add up the free inodes counts again and flush out the superblock.
*/
static void ext2_reserve_inode(struct super_block *sb, int group, int dir)
{
struct ext2_group_desc * desc;
struct buffer_head *bh;
desc = ext2_get_group_desc(sb, group, &bh);
if (!desc) {
ext2_error(sb, "ext2_reserve_inode",
"can't get descriptor for group %d", group);
return;
}
spin_lock(sb_bgl_lock(EXT2_SB(sb), group));
desc->bg_free_inodes_count =
cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) - 1);
if (dir)
desc->bg_used_dirs_count =
cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) + 1);
spin_unlock(sb_bgl_lock(EXT2_SB(sb), group));
if (dir)
percpu_counter_inc(&EXT2_SB(sb)->s_dirs_counter);
sb->s_dirt = 1;
mark_buffer_dirty(bh);
}
static void ext2_release_inode(struct super_block *sb, int group, int dir)
{
struct ext2_group_desc * desc;
struct buffer_head *bh;
desc = ext2_get_group_desc(sb, group, &bh);
if (!desc) {
ext2_error(sb, "ext2_release_inode",
"can't get descriptor for group %d", group);
return;
}
spin_lock(sb_bgl_lock(EXT2_SB(sb), group));
desc->bg_free_inodes_count =
cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1);
if (dir)
desc->bg_used_dirs_count =
cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1);
spin_unlock(sb_bgl_lock(EXT2_SB(sb), group));
if (dir)
percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter);
sb->s_dirt = 1;
mark_buffer_dirty(bh);
}
/* /*
* NOTE! When we get the inode, we're the only people * NOTE! When we get the inode, we're the only people
* that have access to it, and as such there are no * that have access to it, and as such there are no
...@@ -85,10 +145,8 @@ void ext2_free_inode (struct inode * inode) ...@@ -85,10 +145,8 @@ void ext2_free_inode (struct inode * inode)
int is_directory; int is_directory;
unsigned long ino; unsigned long ino;
struct buffer_head *bitmap_bh = NULL; struct buffer_head *bitmap_bh = NULL;
struct buffer_head *bh2;
unsigned long block_group; unsigned long block_group;
unsigned long bit; unsigned long bit;
struct ext2_group_desc * desc;
struct ext2_super_block * es; struct ext2_super_block * es;
ino = inode->i_ino; ino = inode->i_ino;
...@@ -105,7 +163,6 @@ void ext2_free_inode (struct inode * inode) ...@@ -105,7 +163,6 @@ void ext2_free_inode (struct inode * inode)
DQUOT_DROP(inode); DQUOT_DROP(inode);
} }
lock_super (sb);
es = EXT2_SB(sb)->s_es; es = EXT2_SB(sb)->s_es;
is_directory = S_ISDIR(inode->i_mode); is_directory = S_ISDIR(inode->i_mode);
...@@ -126,32 +183,17 @@ void ext2_free_inode (struct inode * inode) ...@@ -126,32 +183,17 @@ void ext2_free_inode (struct inode * inode)
goto error_return; goto error_return;
/* Ok, now we can actually update the inode bitmaps.. */ /* Ok, now we can actually update the inode bitmaps.. */
if (!ext2_clear_bit(bit, bitmap_bh->b_data)) if (!ext2_clear_bit_atomic(sb_bgl_lock(EXT2_SB(sb), block_group),
bit, (void *) bitmap_bh->b_data))
ext2_error (sb, "ext2_free_inode", ext2_error (sb, "ext2_free_inode",
"bit already cleared for inode %lu", ino); "bit already cleared for inode %lu", ino);
else { else
desc = ext2_get_group_desc (sb, block_group, &bh2); ext2_release_inode(sb, block_group, is_directory);
if (desc) {
desc->bg_free_inodes_count =
cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1);
if (is_directory) {
desc->bg_used_dirs_count =
cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1);
EXT2_SB(sb)->s_dir_count--;
}
}
mark_buffer_dirty(bh2);
es->s_free_inodes_count =
cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) + 1);
mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
}
mark_buffer_dirty(bitmap_bh); mark_buffer_dirty(bitmap_bh);
if (sb->s_flags & MS_SYNCHRONOUS) if (sb->s_flags & MS_SYNCHRONOUS)
sync_dirty_buffer(bitmap_bh); sync_dirty_buffer(bitmap_bh);
sb->s_dirt = 1;
error_return: error_return:
brelse(bitmap_bh); brelse(bitmap_bh);
unlock_super (sb);
} }
/* /*
...@@ -211,9 +253,8 @@ static void ext2_preread_inode(struct inode *inode) ...@@ -211,9 +253,8 @@ static void ext2_preread_inode(struct inode *inode)
*/ */
static int find_group_dir(struct super_block *sb, struct inode *parent) static int find_group_dir(struct super_block *sb, struct inode *parent)
{ {
struct ext2_super_block * es = EXT2_SB(sb)->s_es;
int ngroups = EXT2_SB(sb)->s_groups_count; int ngroups = EXT2_SB(sb)->s_groups_count;
int avefreei = le32_to_cpu(es->s_free_inodes_count) / ngroups; int avefreei = ext2_count_free_inodes(sb) / ngroups;
struct ext2_group_desc *desc, *best_desc = NULL; struct ext2_group_desc *desc, *best_desc = NULL;
struct buffer_head *bh, *best_bh = NULL; struct buffer_head *bh, *best_bh = NULL;
int group, best_group = -1; int group, best_group = -1;
...@@ -234,11 +275,9 @@ static int find_group_dir(struct super_block *sb, struct inode *parent) ...@@ -234,11 +275,9 @@ static int find_group_dir(struct super_block *sb, struct inode *parent)
} }
if (!best_desc) if (!best_desc)
return -1; return -1;
best_desc->bg_free_inodes_count =
cpu_to_le16(le16_to_cpu(best_desc->bg_free_inodes_count) - 1); ext2_reserve_inode(sb, best_group, 1);
best_desc->bg_used_dirs_count =
cpu_to_le16(le16_to_cpu(best_desc->bg_used_dirs_count) + 1);
mark_buffer_dirty(best_bh);
return best_group; return best_group;
} }
...@@ -277,16 +316,23 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) ...@@ -277,16 +316,23 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
struct ext2_super_block *es = sbi->s_es; struct ext2_super_block *es = sbi->s_es;
int ngroups = sbi->s_groups_count; int ngroups = sbi->s_groups_count;
int inodes_per_group = EXT2_INODES_PER_GROUP(sb); int inodes_per_group = EXT2_INODES_PER_GROUP(sb);
int avefreei = le32_to_cpu(es->s_free_inodes_count) / ngroups; int freei;
int free_blocks = percpu_counter_read(&sbi->s_freeblocks_counter); int avefreei;
int avefreeb = free_blocks / ngroups; int free_blocks;
int avefreeb;
int blocks_per_dir; int blocks_per_dir;
int ndirs = sbi->s_dir_count; int ndirs;
int max_debt, max_dirs, min_blocks, min_inodes; int max_debt, max_dirs, min_blocks, min_inodes;
int group = -1, i; int group = -1, i;
struct ext2_group_desc *desc; struct ext2_group_desc *desc;
struct buffer_head *bh; struct buffer_head *bh;
freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
avefreei = freei / ngroups;
free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
avefreeb = free_blocks / ngroups;
ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
if ((parent == sb->s_root->d_inode) || if ((parent == sb->s_root->d_inode) ||
(parent->i_flags & EXT2_TOPDIR_FL)) { (parent->i_flags & EXT2_TOPDIR_FL)) {
struct ext2_group_desc *best_desc = NULL; struct ext2_group_desc *best_desc = NULL;
...@@ -321,7 +367,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) ...@@ -321,7 +367,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
goto fallback; goto fallback;
} }
blocks_per_dir = (le32_to_cpu(es->s_blocks_count) - free_blocks) / ndirs; if (ndirs == 0)
ndirs = 1; /* percpu_counters are approximate... */
blocks_per_dir = (le32_to_cpu(es->s_blocks_count)-free_blocks) / ndirs;
max_dirs = ndirs / ngroups + inodes_per_group / 16; max_dirs = ndirs / ngroups + inodes_per_group / 16;
min_inodes = avefreei - inodes_per_group / 4; min_inodes = avefreei - inodes_per_group / 4;
...@@ -364,12 +413,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) ...@@ -364,12 +413,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
return -1; return -1;
found: found:
desc->bg_free_inodes_count = ext2_reserve_inode(sb, group, 1);
cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) - 1);
desc->bg_used_dirs_count =
cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) + 1);
sbi->s_dir_count++;
mark_buffer_dirty(bh);
return group; return group;
} }
...@@ -431,9 +475,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent) ...@@ -431,9 +475,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent)
return -1; return -1;
found: found:
desc->bg_free_inodes_count = ext2_reserve_inode(sb, group, 0);
cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) - 1);
mark_buffer_dirty(bh);
return group; return group;
} }
...@@ -456,7 +499,6 @@ struct inode * ext2_new_inode(struct inode * dir, int mode) ...@@ -456,7 +499,6 @@ struct inode * ext2_new_inode(struct inode * dir, int mode)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
ei = EXT2_I(inode); ei = EXT2_I(inode);
lock_super (sb);
es = EXT2_SB(sb)->s_es; es = EXT2_SB(sb)->s_es;
repeat: repeat:
if (S_ISDIR(mode)) { if (S_ISDIR(mode)) {
...@@ -480,7 +522,12 @@ struct inode * ext2_new_inode(struct inode * dir, int mode) ...@@ -480,7 +522,12 @@ struct inode * ext2_new_inode(struct inode * dir, int mode)
EXT2_INODES_PER_GROUP(sb)); EXT2_INODES_PER_GROUP(sb));
if (i >= EXT2_INODES_PER_GROUP(sb)) if (i >= EXT2_INODES_PER_GROUP(sb))
goto bad_count; goto bad_count;
ext2_set_bit(i, bitmap_bh->b_data); if (ext2_set_bit_atomic(sb_bgl_lock(EXT2_SB(sb), group),
i, (void *) bitmap_bh->b_data)) {
brelse(bitmap_bh);
ext2_release_inode(sb, group, S_ISDIR(mode));
goto repeat;
}
mark_buffer_dirty(bitmap_bh); mark_buffer_dirty(bitmap_bh);
if (sb->s_flags & MS_SYNCHRONOUS) if (sb->s_flags & MS_SYNCHRONOUS)
...@@ -497,8 +544,7 @@ struct inode * ext2_new_inode(struct inode * dir, int mode) ...@@ -497,8 +544,7 @@ struct inode * ext2_new_inode(struct inode * dir, int mode)
goto fail2; goto fail2;
} }
es->s_free_inodes_count = percpu_counter_mod(&EXT2_SB(sb)->s_freeinodes_counter, -1);
cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1);
spin_lock(sb_bgl_lock(EXT2_SB(sb), group)); spin_lock(sb_bgl_lock(EXT2_SB(sb), group));
if (S_ISDIR(mode)) { if (S_ISDIR(mode)) {
...@@ -510,7 +556,6 @@ struct inode * ext2_new_inode(struct inode * dir, int mode) ...@@ -510,7 +556,6 @@ struct inode * ext2_new_inode(struct inode * dir, int mode)
} }
spin_unlock(sb_bgl_lock(EXT2_SB(sb), group)); spin_unlock(sb_bgl_lock(EXT2_SB(sb), group));
mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
sb->s_dirt = 1; sb->s_dirt = 1;
inode->i_uid = current->fsuid; inode->i_uid = current->fsuid;
if (test_opt (sb, GRPID)) if (test_opt (sb, GRPID))
...@@ -551,7 +596,6 @@ struct inode * ext2_new_inode(struct inode * dir, int mode) ...@@ -551,7 +596,6 @@ struct inode * ext2_new_inode(struct inode * dir, int mode)
inode->i_generation = EXT2_SB(sb)->s_next_generation++; inode->i_generation = EXT2_SB(sb)->s_next_generation++;
insert_inode_hash(inode); insert_inode_hash(inode);
unlock_super(sb);
if(DQUOT_ALLOC_INODE(inode)) { if(DQUOT_ALLOC_INODE(inode)) {
DQUOT_DROP(inode); DQUOT_DROP(inode);
goto fail3; goto fail3;
...@@ -573,15 +617,8 @@ struct inode * ext2_new_inode(struct inode * dir, int mode) ...@@ -573,15 +617,8 @@ struct inode * ext2_new_inode(struct inode * dir, int mode)
return ERR_PTR(err); return ERR_PTR(err);
fail2: fail2:
desc = ext2_get_group_desc (sb, group, &bh2); ext2_release_inode(sb, group, S_ISDIR(mode));
desc->bg_free_inodes_count =
cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1);
if (S_ISDIR(mode))
desc->bg_used_dirs_count =
cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1);
mark_buffer_dirty(bh2);
fail: fail:
unlock_super(sb);
make_bad_inode(inode); make_bad_inode(inode);
iput(inode); iput(inode);
return ERR_PTR(err); return ERR_PTR(err);
...@@ -604,16 +641,19 @@ struct inode * ext2_new_inode(struct inode * dir, int mode) ...@@ -604,16 +641,19 @@ struct inode * ext2_new_inode(struct inode * dir, int mode)
unsigned long ext2_count_free_inodes (struct super_block * sb) unsigned long ext2_count_free_inodes (struct super_block * sb)
{ {
struct ext2_group_desc *desc;
unsigned long desc_count = 0;
int i;
#ifdef EXT2FS_DEBUG #ifdef EXT2FS_DEBUG
struct ext2_super_block * es; struct ext2_super_block * es;
unsigned long desc_count = 0, bitmap_count = 0; unsigned long bitmap_count = 0;
struct buffer_head *bitmap_bh = NULL; struct buffer_head *bitmap_bh = NULL;
int i; int i;
lock_super (sb); lock_super (sb);
es = EXT2_SB(sb)->s_es; es = EXT2_SB(sb)->s_es;
for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
struct ext2_group_desc *desc;
unsigned x; unsigned x;
desc = ext2_get_group_desc (sb, i, NULL); desc = ext2_get_group_desc (sb, i, NULL);
...@@ -632,11 +672,18 @@ unsigned long ext2_count_free_inodes (struct super_block * sb) ...@@ -632,11 +672,18 @@ unsigned long ext2_count_free_inodes (struct super_block * sb)
} }
brelse(bitmap_bh); brelse(bitmap_bh);
printk("ext2_count_free_inodes: stored = %lu, computed = %lu, %lu\n", printk("ext2_count_free_inodes: stored = %lu, computed = %lu, %lu\n",
le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); percpu_counter_read(EXT2_SB(sb)->s_freeinodes_counter),
desc_count, bitmap_count);
unlock_super(sb); unlock_super(sb);
return desc_count; return desc_count;
#else #else
return le32_to_cpu(EXT2_SB(sb)->s_es->s_free_inodes_count); for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
desc = ext2_get_group_desc (sb, i, NULL);
if (!desc)
continue;
desc_count += le16_to_cpu(desc->bg_free_inodes_count);
}
return desc_count;
#endif #endif
} }
...@@ -686,7 +733,8 @@ void ext2_check_inodes_bitmap (struct super_block * sb) ...@@ -686,7 +733,8 @@ void ext2_check_inodes_bitmap (struct super_block * sb)
bitmap_count += x; bitmap_count += x;
} }
brelse(bitmap_bh); brelse(bitmap_bh);
if (le32_to_cpu(es->s_free_inodes_count) != bitmap_count) if (percpu_counter_read(EXT2_SB(sb)->s_freeinodes_counter) !=
bitmap_count)
ext2_error(sb, "ext2_check_inodes_bitmap", ext2_error(sb, "ext2_check_inodes_bitmap",
"Wrong free inodes count in super block, " "Wrong free inodes count in super block, "
"stored = %lu, counted = %lu", "stored = %lu, counted = %lu",
......
...@@ -770,6 +770,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) ...@@ -770,6 +770,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount; goto failed_mount;
} }
percpu_counter_init(&sbi->s_freeblocks_counter); percpu_counter_init(&sbi->s_freeblocks_counter);
percpu_counter_init(&sbi->s_freeinodes_counter);
percpu_counter_init(&sbi->s_dirs_counter);
bgl_lock_init(&sbi->s_blockgroup_lock); bgl_lock_init(&sbi->s_blockgroup_lock);
sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->s_debts), sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->s_debts),
GFP_KERNEL); GFP_KERNEL);
...@@ -794,7 +796,6 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) ...@@ -794,7 +796,6 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount2; goto failed_mount2;
} }
sbi->s_gdb_count = db_count; sbi->s_gdb_count = db_count;
sbi->s_dir_count = ext2_count_dirs(sb);
get_random_bytes(&sbi->s_next_generation, sizeof(u32)); get_random_bytes(&sbi->s_next_generation, sizeof(u32));
/* /*
* set up enough so that it can read an inode * set up enough so that it can read an inode
...@@ -818,6 +819,10 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) ...@@ -818,6 +819,10 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY); ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY);
percpu_counter_mod(&sbi->s_freeblocks_counter, percpu_counter_mod(&sbi->s_freeblocks_counter,
ext2_count_free_blocks(sb)); ext2_count_free_blocks(sb));
percpu_counter_mod(&sbi->s_freeinodes_counter,
ext2_count_free_inodes(sb));
percpu_counter_mod(&sbi->s_dirs_counter,
ext2_count_dirs(sb));
return 0; return 0;
failed_mount2: failed_mount2:
for (i = 0; i < db_count; i++) for (i = 0; i < db_count; i++)
...@@ -845,6 +850,7 @@ static void ext2_commit_super (struct super_block * sb, ...@@ -845,6 +850,7 @@ static void ext2_commit_super (struct super_block * sb,
static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es) static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es)
{ {
es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
es->s_wtime = cpu_to_le32(get_seconds()); es->s_wtime = cpu_to_le32(get_seconds());
mark_buffer_dirty(EXT2_SB(sb)->s_sbh); mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
sync_dirty_buffer(EXT2_SB(sb)->s_sbh); sync_dirty_buffer(EXT2_SB(sb)->s_sbh);
...@@ -874,6 +880,7 @@ void ext2_write_super (struct super_block * sb) ...@@ -874,6 +880,7 @@ void ext2_write_super (struct super_block * sb)
es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) &
~EXT2_VALID_FS); ~EXT2_VALID_FS);
es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
es->s_mtime = cpu_to_le32(get_seconds()); es->s_mtime = cpu_to_le32(get_seconds());
ext2_sync_super(sb, es); ext2_sync_super(sb, es);
} else } else
......
...@@ -49,6 +49,8 @@ struct ext2_sb_info { ...@@ -49,6 +49,8 @@ struct ext2_sb_info {
unsigned long s_dir_count; unsigned long s_dir_count;
u8 *s_debts; u8 *s_debts;
struct percpu_counter s_freeblocks_counter; struct percpu_counter s_freeblocks_counter;
struct percpu_counter s_freeinodes_counter;
struct percpu_counter s_dirs_counter;
struct blockgroup_lock s_blockgroup_lock; struct blockgroup_lock s_blockgroup_lock;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment