Commit c14c1a44 authored by Andrew Morton's avatar Andrew Morton Committed by James Bottomley

[PATCH] use spinlocking in the ext2 block allocator

From Alex Tomas and myself

ext2 currently uses lock_super() to protect the filesystem's in-core block
allocation bitmaps.

On big SMP machines the contention on that semaphore is causing high context
switch rates, large amounts of idle time and reduced throughput.

The context switch rate can also worsen block allocation: if several tasks
are trying to allocate blocks inside the same blockgroup for different files,
madly rotating between those tasks will cause the files' blocks to be
intermingled.

On SDET and dbench-style worloads (lots of tasks doing lots of allocation)
this patch (and a similar one for the inode allocator) improve throughout on
an 8-way by ~15%.  On 16-way NUMAQ the speedup is 150%.

What wedo isto remove the lock altogether and just rely on the atomic
semantics of test_and_set_bit(): if the allocator sees a block was free it
runs test_and_set_bit().  If that fails, then we raced and the allocator will
go and look for another block.

Of course, we don't really use test_and_set_bit() because that
isn'tendian-dependent.  New atomic endian-independent functions are
introduced: ext2_set_bit_atomic() and ext2_clear_bit_atomic().  We do not
need ext2_test_bit_atomic(), since even if ext2_test_bit() returns the wrong
result, that error will be detected and naturally handled in the subsequent
ext2_set_bit_atomic().

For little-endian machines the new atomic ops map directly onto the
test_and_set_bit(), etc.

For big-endian machines we provide the architecture's impementation with the
address of a spinlock whcih can be taken around the nonatomic ext2_set_bit().
 The spinlocks are hashed, and the hash is scaled according to the machine
size.  Architectures are free to implement optimised versions of
ext2_set_bit_atomic() and ext2_clear_bit_atomic().
parent c9db333a
This diff is collapsed.
......@@ -278,7 +278,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
int ngroups = sbi->s_groups_count;
int inodes_per_group = EXT2_INODES_PER_GROUP(sb);
int avefreei = le32_to_cpu(es->s_free_inodes_count) / ngroups;
int avefreeb = le32_to_cpu(es->s_free_blocks_count) / ngroups;
int free_blocks = percpu_counter_read(&sbi->s_freeblocks_counter);
int avefreeb = free_blocks / ngroups;
int blocks_per_dir;
int ndirs = sbi->s_dir_count;
int max_debt, max_dirs, min_blocks, min_inodes;
......@@ -320,8 +321,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
goto fallback;
}
blocks_per_dir = (le32_to_cpu(es->s_blocks_count) -
le32_to_cpu(es->s_free_blocks_count)) / ndirs;
blocks_per_dir = (le32_to_cpu(es->s_blocks_count) - free_blocks) / ndirs;
max_dirs = ndirs / ngroups + inodes_per_group / 16;
min_inodes = avefreei - inodes_per_group / 4;
......@@ -500,6 +500,7 @@ struct inode * ext2_new_inode(struct inode * dir, int mode)
es->s_free_inodes_count =
cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1);
spin_lock(sb_bgl_lock(EXT2_SB(sb), group));
if (S_ISDIR(mode)) {
if (EXT2_SB(sb)->s_debts[group] < 255)
EXT2_SB(sb)->s_debts[group]++;
......@@ -507,6 +508,7 @@ struct inode * ext2_new_inode(struct inode * dir, int mode)
if (EXT2_SB(sb)->s_debts[group])
EXT2_SB(sb)->s_debts[group]--;
}
spin_unlock(sb_bgl_lock(EXT2_SB(sb), group));
mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
sb->s_dirt = 1;
......
......@@ -769,6 +769,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
printk ("EXT2-fs: not enough memory\n");
goto failed_mount;
}
percpu_counter_init(&sbi->s_freeblocks_counter);
bgl_lock_init(&sbi->s_blockgroup_lock);
sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->s_debts),
GFP_KERNEL);
if (!sbi->s_debts) {
......@@ -814,6 +816,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
ext2_warning(sb, __FUNCTION__,
"mounting ext3 filesystem as ext2\n");
ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY);
percpu_counter_mod(&sbi->s_freeblocks_counter,
ext2_count_free_blocks(sb));
return 0;
failed_mount2:
for (i = 0; i < db_count; i++)
......@@ -840,6 +844,7 @@ static void ext2_commit_super (struct super_block * sb,
static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es)
{
es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
es->s_wtime = cpu_to_le32(get_seconds());
mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
sync_dirty_buffer(EXT2_SB(sb)->s_sbh);
......@@ -868,6 +873,7 @@ void ext2_write_super (struct super_block * sb)
ext2_debug ("setting valid to 0\n");
es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) &
~EXT2_VALID_FS);
es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
es->s_mtime = cpu_to_le32(get_seconds());
ext2_sync_super(sb, es);
} else
......@@ -965,7 +971,7 @@ static int ext2_statfs (struct super_block * sb, struct statfs * buf)
buf->f_type = EXT2_SUPER_MAGIC;
buf->f_bsize = sb->s_blocksize;
buf->f_blocks = le32_to_cpu(sbi->s_es->s_blocks_count) - overhead;
buf->f_bfree = ext2_count_free_blocks (sb);
buf->f_bfree = ext2_count_free_blocks(sb);
buf->f_bavail = buf->f_bfree - le32_to_cpu(sbi->s_es->s_r_blocks_count);
if (buf->f_bfree < le32_to_cpu(sbi->s_es->s_r_blocks_count))
buf->f_bavail = 0;
......
......@@ -487,7 +487,9 @@ sched_find_first_bit(unsigned long b[3])
#define ext2_set_bit __test_and_set_bit
#define ext2_set_bit_atomic(l,n,a) test_and_set_bit(n,a)
#define ext2_clear_bit __test_and_clear_bit
#define ext2_clear_bit_atomic(l,n,a) test_and_clear_bit(n,a)
#define ext2_test_bit test_bit
#define ext2_find_first_zero_bit find_first_zero_bit
#define ext2_find_next_zero_bit find_next_zero_bit
......
......@@ -357,8 +357,12 @@ static inline int sched_find_first_bit(unsigned long *b)
*/
#define ext2_set_bit(nr,p) \
__test_and_set_bit(WORD_BITOFF_TO_LE(nr), (unsigned long *)(p))
#define ext2_set_bit_atomic(lock,nr,p) \
test_and_set_bit(WORD_BITOFF_TO_LE(nr), (unsigned long *)(p))
#define ext2_clear_bit(nr,p) \
__test_and_clear_bit(WORD_BITOFF_TO_LE(nr), (unsigned long *)(p))
#define ext2_clear_bit_atomic(lock,nr,p) \
test_and_clear_bit(WORD_BITOFF_TO_LE(nr), (unsigned long *)(p))
#define ext2_test_bit(nr,p) \
__test_bit(WORD_BITOFF_TO_LE(nr), (unsigned long *)(p))
#define ext2_find_first_zero_bit(p,sz) \
......
......@@ -360,7 +360,9 @@ static inline int find_next_zero_bit (void * addr, int size, int offset)
#define hweight8(x) generic_hweight8(x)
#define ext2_set_bit test_and_set_bit
#define ext2_set_bit_atomic(l,n,a) test_and_set_bit(n,a)
#define ext2_clear_bit test_and_clear_bit
#define ext2_clear_bit_atomic(l,n,a) test_and_clear_bit(n,a)
#define ext2_test_bit test_bit
#define ext2_find_first_zero_bit find_first_zero_bit
#define ext2_find_next_zero_bit find_next_zero_bit
......
......@@ -479,8 +479,12 @@ static __inline__ int ffs(int x)
#define ext2_set_bit(nr,addr) \
__test_and_set_bit((nr),(unsigned long*)addr)
#define ext2_set_bit_atomic(lock,nr,addr) \
test_and_set_bit((nr),(unsigned long*)addr)
#define ext2_clear_bit(nr, addr) \
__test_and_clear_bit((nr),(unsigned long*)addr)
#define ext2_clear_bit_atomic(lock,nr, addr) \
test_and_clear_bit((nr),(unsigned long*)addr)
#define ext2_test_bit(nr, addr) test_bit((nr),(unsigned long*)addr)
#define ext2_find_first_zero_bit(addr, size) \
find_first_zero_bit((unsigned long*)addr, size)
......
......@@ -453,7 +453,9 @@ find_next_bit (void *addr, unsigned long size, unsigned long offset)
#define __clear_bit(nr, addr) clear_bit(nr, addr)
#define ext2_set_bit test_and_set_bit
#define ext2_set_atomic(l,n,a) test_and_set_bit(n,a)
#define ext2_clear_bit test_and_clear_bit
#define ext2_clear_atomic(l,n,a) test_and_clear_bit(n,a)
#define ext2_test_bit test_bit
#define ext2_find_first_zero_bit find_first_zero_bit
#define ext2_find_next_zero_bit find_next_zero_bit
......
......@@ -365,6 +365,24 @@ ext2_clear_bit (int nr, volatile void *vaddr)
return retval;
}
#define ext2_set_bit_atomic(lock, nr, addr) \
({ \
int ret; \
spin_lock(lock); \
ret = ext2_set_bit((nr), (addr)); \
spin_unlock(lock); \
ret; \
})
#define ext2_clear_bit_atomic(lock, nr, addr) \
({ \
int ret; \
spin_lock(lock); \
ret = ext2_clear_bit((nr), (addr)); \
spin_unlock(lock); \
ret; \
})
extern __inline__ int
ext2_test_bit (int nr, const volatile void *vaddr)
{
......
......@@ -402,6 +402,24 @@ extern __inline__ int ext2_clear_bit(int nr, volatile void * addr)
return retval;
}
#define ext2_set_bit_atomic(lock, nr, addr) \
({ \
int ret; \
spin_lock(lock); \
ret = ext2_set_bit((nr), (addr)); \
spin_unlock(lock); \
ret; \
})
#define ext2_clear_bit_atomic(lock, nr, addr) \
({ \
int ret; \
spin_lock(lock); \
ret = ext2_clear_bit((nr), (addr)); \
spin_unlock(lock); \
ret; \
})
extern __inline__ int ext2_test_bit(int nr, const volatile void * addr)
{
int mask;
......
......@@ -824,6 +824,24 @@ extern __inline__ int ext2_clear_bit(int nr, void * addr)
return retval;
}
#define ext2_set_bit_atomic(lock, nr, addr) \
({ \
int ret; \
spin_lock(lock); \
ret = ext2_set_bit((nr), (addr)); \
spin_unlock(lock); \
ret; \
})
#define ext2_clear_bit_atomic(lock, nr, addr) \
({ \
int ret; \
spin_lock(lock); \
ret = ext2_clear_bit((nr), (addr)); \
spin_unlock(lock); \
ret; \
})
extern __inline__ int ext2_test_bit(int nr, const void * addr)
{
int mask;
......@@ -890,7 +908,9 @@ extern __inline__ unsigned long ext2_find_next_zero_bit(void *addr, unsigned lon
/* Native ext2 byte ordering, just collapse using defines. */
#define ext2_set_bit(nr, addr) test_and_set_bit((nr), (addr))
#define ext2_set_bit_atomic(lock, nr, addr) test_and_set_bit((nr), (addr))
#define ext2_clear_bit(nr, addr) test_and_clear_bit((nr), (addr))
#define ext2_clear_bit_atomic(lock, nr, addr) test_and_clear_bit((nr), (addr))
#define ext2_test_bit(nr, addr) test_bit((nr), (addr))
#define ext2_find_first_zero_bit(addr, size) find_first_zero_bit((addr), (size))
#define ext2_find_next_zero_bit(addr, size, offset) \
......
......@@ -531,6 +531,24 @@ ext2_clear_bit(int nr, void * addr)
return retval;
}
#define ext2_set_bit_atomic(lock, nr, addr) \
({ \
int ret; \
spin_lock(lock); \
ret = ext2_set_bit((nr), (addr)); \
spin_unlock(lock); \
ret; \
})
#define ext2_clear_bit_atomic(lock, nr, addr) \
({ \
int ret; \
spin_lock(lock); \
ret = ext2_clear_bit((nr), (addr)); \
spin_unlock(lock); \
ret; \
})
extern inline int
ext2_test_bit(int nr, const void * addr)
{
......@@ -599,7 +617,9 @@ ext2_find_next_zero_bit(void *addr, unsigned long size, unsigned long offset)
/* Native ext2 byte ordering, just collapse using defines. */
#define ext2_set_bit(nr, addr) test_and_set_bit((nr), (addr))
#define ext2_set_bit_atomic(lock, nr, addr) test_and_set_bit((nr), (addr))
#define ext2_clear_bit(nr, addr) test_and_clear_bit((nr), (addr))
#define ext2_clear_bit_atomic(lock, nr, addr) test_and_clear_bit((nr), (addr))
#define ext2_test_bit(nr, addr) test_bit((nr), (addr))
#define ext2_find_first_zero_bit(addr, size) find_first_zero_bit((addr), (size))
#define ext2_find_next_zero_bit(addr, size, offset) \
......
......@@ -389,10 +389,14 @@ static __inline__ unsigned long find_next_bit(unsigned long *addr, unsigned long
*/
#ifdef __LP64__
#define ext2_set_bit(nr, addr) test_and_set_bit((nr) ^ 0x38, addr)
#define ext2_set_bit_atomic(l,nr,addr) test_and_set_bit((nr) ^ 0x38, addr)
#define ext2_clear_bit(nr, addr) test_and_clear_bit((nr) ^ 0x38, addr)
#define ext2_clear_bit_atomic(l,nr,addr) test_and_clear_bit((nr) ^ 0x38, addr)
#else
#define ext2_set_bit(nr, addr) test_and_set_bit((nr) ^ 0x18, addr)
#define ext2_set_bit_atomic(l,nr,addr) test_and_set_bit((nr) ^ 0x18, addr)
#define ext2_clear_bit(nr, addr) test_and_clear_bit((nr) ^ 0x18, addr)
#define ext2_clear_bit_atomic(l,nr,addr) test_and_clear_bit((nr) ^ 0x18, addr)
#endif
#endif /* __KERNEL__ */
......
......@@ -392,7 +392,9 @@ static __inline__ unsigned long find_next_zero_bit(unsigned long * addr,
#define ext2_set_bit(nr, addr) __test_and_set_bit((nr) ^ 0x18, (unsigned long *)(addr))
#define ext2_set_bit_atomic(lock, nr, addr) test_and_set_bit((nr) ^ 0x18, (unsigned long *)(addr))
#define ext2_clear_bit(nr, addr) __test_and_clear_bit((nr) ^ 0x18, (unsigned long *)(addr))
#define ext2_clear_bit_atomic(lock, nr, addr) test_and_clear_bit((nr) ^ 0x18, (unsigned long *)(addr))
static __inline__ int ext2_test_bit(int nr, __const__ void * addr)
{
......
......@@ -338,6 +338,25 @@ static __inline__ int __test_and_clear_le_bit(unsigned long nr, unsigned long *a
__test_and_set_le_bit((nr),(unsigned long*)addr)
#define ext2_clear_bit(nr, addr) \
__test_and_clear_le_bit((nr),(unsigned long*)addr)
#define ext2_set_bit_atomic(lock, nr, addr) \
({ \
int ret; \
spin_lock(lock); \
ret = ext2_set_bit((nr), (addr)); \
spin_unlock(lock); \
ret; \
})
#define ext2_clear_bit_atomic(lock, nr, addr) \
({ \
int ret; \
spin_lock(lock); \
ret = ext2_clear_bit((nr), (addr)); \
spin_unlock(lock); \
ret; \
})
#define ext2_test_bit(nr, addr) test_le_bit((nr),(unsigned long*)addr)
#define ext2_find_first_zero_bit(addr, size) \
find_first_zero_le_bit((unsigned long*)addr, size)
......
......@@ -805,8 +805,12 @@ extern __inline__ int fls(int x)
#define ext2_set_bit(nr, addr) \
test_and_set_bit((nr)^24, (unsigned long *)addr)
#define ext2_set_bit_atomic(lock, nr, addr) \
test_and_set_bit((nr)^24, (unsigned long *)addr)
#define ext2_clear_bit(nr, addr) \
test_and_clear_bit((nr)^24, (unsigned long *)addr)
#define ext2_clear_bit_atomic(lock, nr, addr) \
test_and_clear_bit((nr)^24, (unsigned long *)addr)
#define ext2_test_bit(nr, addr) \
test_bit((nr)^24, (unsigned long *)addr)
......
......@@ -838,8 +838,12 @@ extern __inline__ int fls(int x)
#define ext2_set_bit(nr, addr) \
test_and_set_bit((nr)^56, (unsigned long *)addr)
#define ext2_set_bit_atomic(lock, nr, addr) \
test_and_set_bit((nr)^56, (unsigned long *)addr)
#define ext2_clear_bit(nr, addr) \
test_and_clear_bit((nr)^56, (unsigned long *)addr)
#define ext2_clear_bit_atomic(lock, nr, addr) \
test_and_clear_bit((nr)^56, (unsigned long *)addr)
#define ext2_test_bit(nr, addr) \
test_bit((nr)^56, (unsigned long *)addr)
......
......@@ -344,6 +344,24 @@ static __inline__ unsigned long ext2_find_next_zero_bit(void *addr, unsigned lon
}
#endif
#define ext2_set_bit_atomic(lock, nr, addr) \
({ \
int ret; \
spin_lock(lock); \
ret = ext2_set_bit((nr), (addr)); \
spin_unlock(lock); \
ret; \
})
#define ext2_clear_bit_atomic(lock, nr, addr) \
({ \
int ret; \
spin_lock(lock); \
ret = ext2_clear_bit((nr), (addr)); \
spin_unlock(lock); \
ret; \
})
/* Bitmap functions for the minix filesystem. */
#define minix_test_and_set_bit(nr,addr) test_and_set_bit(nr,addr)
#define minix_set_bit(nr,addr) set_bit(nr,addr)
......
......@@ -455,6 +455,25 @@ static __inline__ unsigned long find_next_zero_le_bit(void *addr, unsigned long
#define ext2_set_bit __test_and_set_le_bit
#define ext2_clear_bit __test_and_clear_le_bit
#define ext2_set_bit_atomic(lock, nr, addr) \
({ \
int ret; \
spin_lock(lock); \
ret = ext2_set_bit((nr), (addr)); \
spin_unlock(lock); \
ret; \
})
#define ext2_clear_bit_atomic(lock, nr, addr) \
({ \
int ret; \
spin_lock(lock); \
ret = ext2_clear_bit((nr), (addr)); \
spin_unlock(lock); \
ret; \
})
#define ext2_test_bit test_le_bit
#define ext2_find_first_zero_bit find_first_zero_le_bit
#define ext2_find_next_zero_bit find_next_zero_le_bit
......
......@@ -351,7 +351,9 @@ static __inline__ unsigned long find_next_zero_le_bit(unsigned long *addr, unsig
#ifdef __KERNEL__
#define ext2_set_bit(nr,addr) test_and_set_le_bit((nr),(unsigned long *)(addr))
#define ext2_set_bit_atomic(lock,nr,addr) test_and_set_le_bit((nr),(unsigned long *)(addr))
#define ext2_clear_bit(nr,addr) test_and_clear_le_bit((nr),(unsigned long *)(addr))
#define ext2_clear_bit_atomic(lock,nr,addr) test_and_clear_le_bit((nr),(unsigned long *)(addr))
#define ext2_test_bit(nr,addr) test_le_bit((nr),(unsigned long *)(addr))
#define ext2_find_first_zero_bit(addr, size) \
find_first_zero_le_bit((unsigned long *)(addr), (size))
......
......@@ -252,7 +252,9 @@ static inline int sched_find_first_bit(unsigned long *b)
#define hweight8(x) generic_hweight8 (x)
#define ext2_set_bit test_and_set_bit
#define ext2_set_bit_atomic(l,n,a) test_and_set_bit(n,a)
#define ext2_clear_bit test_and_clear_bit
#define ext2_clear_bit_atomic(l,n,a) test_and_clear_bit(n,a)
#define ext2_test_bit test_bit
#define ext2_find_first_zero_bit find_first_zero_bit
#define ext2_find_next_zero_bit find_next_zero_bit
......
......@@ -487,8 +487,12 @@ static __inline__ int ffs(int x)
#define ext2_set_bit(nr,addr) \
__test_and_set_bit((nr),(unsigned long*)addr)
#define ext2_set_bit_atomic(lock,nr,addr) \
test_and_set_bit((nr),(unsigned long*)addr)
#define ext2_clear_bit(nr, addr) \
__test_and_clear_bit((nr),(unsigned long*)addr)
#define ext2_clear_bit_atomic(lock,nr,addr) \
test_and_clear_bit((nr),(unsigned long*)addr)
#define ext2_test_bit(nr, addr) test_bit((nr),(unsigned long*)addr)
#define ext2_find_first_zero_bit(addr, size) \
find_first_zero_bit((unsigned long*)addr, size)
......
......@@ -16,6 +16,9 @@
#ifndef _LINUX_EXT2_FS_SB
#define _LINUX_EXT2_FS_SB
#include <linux/blockgroup_lock.h>
#include <linux/percpu_counter.h>
/*
* second extended-fs super-block data in memory
*/
......@@ -45,6 +48,8 @@ struct ext2_sb_info {
u32 s_next_generation;
unsigned long s_dir_count;
u8 *s_debts;
struct percpu_counter s_freeblocks_counter;
struct blockgroup_lock s_blockgroup_lock;
};
#endif /* _LINUX_EXT2_FS_SB */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment