Commit 7619fd2b authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] Fix inode size accounting race

Since Jan removed the lock_kernel()s in inode_add_bytes() and
inode_sub_bytes(), these functions have been racy.

One problematic workload has been discovered in which concurrent writepage
and truncate on SMP quickly causes i_blocks to go negative.  writepage() does
not take i_sem, and it seems that for ext2, there are no other locks in
force when inode_add_bytes() is called.

Putting the BKL back in there is not acceptable.  To fix this race I have
added a new spinlock "i_lock" to the inode.

That lock is presently used to protect i_bytes and i_blocks.  We could use it
to protect i_size as well.

The splitting of the used disk space into i_blocks and i_bytes is silly - we
should nuke all that and just have a bare loff_t i_usedbytes.   Later.
parent 7c0f82da
......@@ -176,6 +176,7 @@ void inode_init_once(struct inode *inode)
spin_lock_init(&inode->i_data.private_lock);
INIT_LIST_HEAD(&inode->i_data.i_mmap);
INIT_LIST_HEAD(&inode->i_data.i_mmap_shared);
spin_lock_init(&inode->i_lock);
}
static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
......
......@@ -316,3 +316,45 @@ asmlinkage long sys_fstat64(unsigned long fd, struct stat64 * statbuf, long flag
}
#endif /* LFS-64 */
void inode_add_bytes(struct inode *inode, loff_t bytes)
{
spin_lock(&inode->i_lock);
inode->i_blocks += bytes >> 9;
bytes &= 511;
inode->i_bytes += bytes;
if (inode->i_bytes >= 512) {
inode->i_blocks++;
inode->i_bytes -= 512;
}
spin_unlock(&inode->i_lock);
}
void inode_sub_bytes(struct inode *inode, loff_t bytes)
{
spin_lock(&inode->i_lock);
inode->i_blocks -= bytes >> 9;
bytes &= 511;
if (inode->i_bytes < bytes) {
inode->i_blocks--;
inode->i_bytes += 512;
}
inode->i_bytes -= bytes;
spin_unlock(&inode->i_lock);
}
loff_t inode_get_bytes(struct inode *inode)
{
loff_t ret;
spin_lock(&inode->i_lock);
ret = (((loff_t)inode->i_blocks) << 9) + inode->i_bytes;
spin_unlock(&inode->i_lock);
return ret;
}
void inode_set_bytes(struct inode *inode, loff_t bytes)
{
inode->i_blocks = bytes >> 9;
inode->i_bytes = bytes & 511;
}
......@@ -371,9 +371,10 @@ struct inode {
struct timespec i_ctime;
unsigned int i_blkbits;
unsigned long i_blksize;
unsigned long i_blocks;
unsigned long i_version;
unsigned long i_blocks;
unsigned short i_bytes;
spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
struct semaphore i_sem;
struct inode_operations *i_op;
struct file_operations *i_fop; /* former ->i_op->default_file_ops */
......@@ -400,7 +401,7 @@ struct inode {
void *i_security;
__u32 i_generation;
union {
void *generic_ip;
void *generic_ip;
} u;
};
......@@ -412,39 +413,6 @@ struct fown_struct {
void *security;
};
static inline void inode_add_bytes(struct inode *inode, loff_t bytes)
{
inode->i_blocks += bytes >> 9;
bytes &= 511;
inode->i_bytes += bytes;
if (inode->i_bytes >= 512) {
inode->i_blocks++;
inode->i_bytes -= 512;
}
}
static inline void inode_sub_bytes(struct inode *inode, loff_t bytes)
{
inode->i_blocks -= bytes >> 9;
bytes &= 511;
if (inode->i_bytes < bytes) {
inode->i_blocks--;
inode->i_bytes += 512;
}
inode->i_bytes -= bytes;
}
static inline loff_t inode_get_bytes(struct inode *inode)
{
return (((loff_t)inode->i_blocks) << 9) + inode->i_bytes;
}
static inline void inode_set_bytes(struct inode *inode, loff_t bytes)
{
inode->i_blocks = bytes >> 9;
inode->i_bytes = bytes & 511;
}
/*
* Track a single file's readahead state
*/
......@@ -1277,6 +1245,10 @@ extern int page_symlink(struct inode *inode, const char *symname, int len);
extern struct inode_operations page_symlink_inode_operations;
extern void generic_fillattr(struct inode *, struct kstat *);
extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
void inode_add_bytes(struct inode *inode, loff_t bytes);
void inode_sub_bytes(struct inode *inode, loff_t bytes);
loff_t inode_get_bytes(struct inode *inode);
void inode_set_bytes(struct inode *inode, loff_t bytes);
extern int vfs_readdir(struct file *, filldir_t, void *);
......
......@@ -272,6 +272,10 @@ EXPORT_SYMBOL(vfs_fstat);
EXPORT_SYMBOL(vfs_stat);
EXPORT_SYMBOL(vfs_lstat);
EXPORT_SYMBOL(vfs_getattr);
EXPORT_SYMBOL(inode_add_bytes);
EXPORT_SYMBOL(inode_sub_bytes);
EXPORT_SYMBOL(inode_get_bytes);
EXPORT_SYMBOL(inode_set_bytes);
EXPORT_SYMBOL(lock_rename);
EXPORT_SYMBOL(unlock_rename);
EXPORT_SYMBOL(generic_read_dir);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment