Commit afd64673 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] reiserfs: reiserfs_file_write implementation

From: Oleg Drokin <green@namesys.com>

With the current 'one block at a time' algorithm, writes past the end of a
file are slow because each new file block is separately added into the tree
causing shifting of other items which is CPU expensive.

With this new implementation if you write into file with big enough chunks,
it uses half as much CPU.  Also this version is more SMP friendly than the
current one.

There are some known-bad applications that break with this patch (ie.  start
to work very slow or even hang).

This is because the filesystem returns a large value in the stat.st_blocksize
hint (128k instead of 4k).  This tickles a small number of application bugs.
One is KDE's kmail 3.04 (fixed by upgrading to 3.1+) and the other is
sleepycat's database from before 1997.

If you hit a slowdown problem that you believe is related to the increased
"recommended i/o size" value, try to mount your fs with nolargeio=1 mount
option (remount should work too).

This patch exports block_commit_write(), generic_osync_inode() and
remove_suid() to modules.
parent a61638bc
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/buffer_head.h> #include <linux/buffer_head.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/pagemap.h>
#include <linux/reiserfs_fs_sb.h> #include <linux/reiserfs_fs_sb.h>
#include <linux/reiserfs_fs_i.h> #include <linux/reiserfs_fs_i.h>
...@@ -733,7 +734,7 @@ static inline int allocate_without_wrapping_disk (reiserfs_blocknr_hint_t * hint ...@@ -733,7 +734,7 @@ static inline int allocate_without_wrapping_disk (reiserfs_blocknr_hint_t * hint
int rest = amount_needed; int rest = amount_needed;
int nr_allocated; int nr_allocated;
while (rest > 0) { while (rest > 0 && start <= finish) {
nr_allocated = scan_bitmap (hint->th, &start, finish, 1, nr_allocated = scan_bitmap (hint->th, &start, finish, 1,
rest + prealloc_size, !hint->formatted_node, rest + prealloc_size, !hint->formatted_node,
hint->block); hint->block);
...@@ -879,7 +880,9 @@ void reiserfs_claim_blocks_to_be_allocated( ...@@ -879,7 +880,9 @@ void reiserfs_claim_blocks_to_be_allocated(
if ( !blocks ) if ( !blocks )
return; return;
spin_lock(&REISERFS_SB(sb)->bitmap_lock);
REISERFS_SB(sb)->reserved_blocks += blocks; REISERFS_SB(sb)->reserved_blocks += blocks;
spin_unlock(&REISERFS_SB(sb)->bitmap_lock);
} }
/* Unreserve @blocks amount of blocks in fs pointed by @sb */ /* Unreserve @blocks amount of blocks in fs pointed by @sb */
...@@ -896,6 +899,22 @@ void reiserfs_release_claimed_blocks( ...@@ -896,6 +899,22 @@ void reiserfs_release_claimed_blocks(
if ( !blocks ) if ( !blocks )
return; return;
spin_lock(&REISERFS_SB(sb)->bitmap_lock);
REISERFS_SB(sb)->reserved_blocks -= blocks; REISERFS_SB(sb)->reserved_blocks -= blocks;
spin_unlock(&REISERFS_SB(sb)->bitmap_lock);
RFALSE( REISERFS_SB(sb)->reserved_blocks < 0, "amount of blocks reserved became zero?"); RFALSE( REISERFS_SB(sb)->reserved_blocks < 0, "amount of blocks reserved became zero?");
} }
/* This function estimates how much pages we will be able to write to FS
used for reiserfs_file_write() purposes for now. */
int reiserfs_can_fit_pages ( struct super_block *sb /* superblock of filesystem
to estimate space */ )
{
unsigned long space;
spin_lock(&REISERFS_SB(sb)->bitmap_lock);
space = (SB_FREE_BLOCKS(sb) - REISERFS_SB(sb)->reserved_blocks) >> ( PAGE_CACHE_SHIFT - sb->s_blocksize_bits);
spin_unlock(&REISERFS_SB(sb)->bitmap_lock);
return space;
}
This diff is collapsed.
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#include <linux/mpage.h> #include <linux/mpage.h>
#include <linux/writeback.h> #include <linux/writeback.h>
extern int reiserfs_default_io_size; /* default io size devuned in super.c */
/* args for the create parameter of reiserfs_get_block */ /* args for the create parameter of reiserfs_get_block */
#define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */ #define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */
#define GET_BLOCK_CREATE 1 /* add anything you need to find block */ #define GET_BLOCK_CREATE 1 /* add anything you need to find block */
...@@ -908,7 +910,7 @@ static void init_inode (struct inode * inode, struct path * path) ...@@ -908,7 +910,7 @@ static void init_inode (struct inode * inode, struct path * path)
copy_key (INODE_PKEY (inode), &(ih->ih_key)); copy_key (INODE_PKEY (inode), &(ih->ih_key));
inode->i_blksize = PAGE_SIZE; inode->i_blksize = reiserfs_default_io_size;
INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list )); INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list ));
REISERFS_I(inode)->i_flags = 0; REISERFS_I(inode)->i_flags = 0;
...@@ -1598,7 +1600,7 @@ int reiserfs_new_inode (struct reiserfs_transaction_handle *th, ...@@ -1598,7 +1600,7 @@ int reiserfs_new_inode (struct reiserfs_transaction_handle *th,
} }
// these do not go to on-disk stat data // these do not go to on-disk stat data
inode->i_ino = le32_to_cpu (ih.ih_key.k_objectid); inode->i_ino = le32_to_cpu (ih.ih_key.k_objectid);
inode->i_blksize = PAGE_SIZE; inode->i_blksize = reiserfs_default_io_size;
// store in in-core inode the key of stat data and version all // store in in-core inode the key of stat data and version all
// object items will have (directory items will have old offset // object items will have (directory items will have old offset
......
...@@ -532,6 +532,11 @@ static const arg_desc_t tails[] = { ...@@ -532,6 +532,11 @@ static const arg_desc_t tails[] = {
{NULL, 0} {NULL, 0}
}; };
int reiserfs_default_io_size = 128 * 1024; /* Default recommended I/O size is 128k.
There might be broken applications that are
confused by this. Use nolargeio mount option
to get usual i/o size = PAGE_SIZE.
*/
/* proceed only one option from a list *cur - string containing of mount options /* proceed only one option from a list *cur - string containing of mount options
opts - array of options which are accepted opts - array of options which are accepted
...@@ -657,6 +662,7 @@ for old setups still work */ ...@@ -657,6 +662,7 @@ for old setups still work */
{"block-allocator", 'a', balloc, -1}, {"block-allocator", 'a', balloc, -1},
{"resize", 'r', 0, -1}, {"resize", 'r', 0, -1},
{"jdev", 'j', 0, -1}, {"jdev", 'j', 0, -1},
{"nolargeio", 'w', 0, -1},
{NULL, 0, 0, -1} {NULL, 0, 0, -1}
}; };
...@@ -688,6 +694,10 @@ for old setups still work */ ...@@ -688,6 +694,10 @@ for old setups still work */
} }
} }
if ( c == 'w' ) {
reiserfs_default_io_size = PAGE_SIZE;
}
if (c == 'j') { if (c == 'j') {
if (arg && *arg && jdev_name) { if (arg && *arg && jdev_name) {
*jdev_name = arg; *jdev_name = arg;
...@@ -1318,6 +1328,7 @@ static int reiserfs_fill_super (struct super_block * s, void * data, int silent) ...@@ -1318,6 +1328,7 @@ static int reiserfs_fill_super (struct super_block * s, void * data, int silent)
reiserfs_proc_register( s, "oidmap", reiserfs_oidmap_in_proc ); reiserfs_proc_register( s, "oidmap", reiserfs_oidmap_in_proc );
reiserfs_proc_register( s, "journal", reiserfs_journal_in_proc ); reiserfs_proc_register( s, "journal", reiserfs_journal_in_proc );
init_waitqueue_head (&(sbi->s_wait)); init_waitqueue_head (&(sbi->s_wait));
sbi->bitmap_lock = SPIN_LOCK_UNLOCKED;
return (0); return (0);
......
...@@ -1268,6 +1268,7 @@ struct path var = {ILLEGAL_PATH_ELEMENT_OFFSET, } ...@@ -1268,6 +1268,7 @@ struct path var = {ILLEGAL_PATH_ELEMENT_OFFSET, }
/* Size of pointer to the unformatted node. */ /* Size of pointer to the unformatted node. */
#define UNFM_P_SIZE (sizeof(unp_t)) #define UNFM_P_SIZE (sizeof(unp_t))
#define UNFM_P_SHIFT 2
// in in-core inode key is stored on le form // in in-core inode key is stored on le form
#define INODE_PKEY(inode) ((struct key *)(REISERFS_I(inode)->i_key)) #define INODE_PKEY(inode) ((struct key *)(REISERFS_I(inode)->i_key))
...@@ -1838,7 +1839,7 @@ void reiserfs_do_truncate (struct reiserfs_transaction_handle *th, ...@@ -1838,7 +1839,7 @@ void reiserfs_do_truncate (struct reiserfs_transaction_handle *th,
void padd_item (char * item, int total_length, int length); void padd_item (char * item, int total_length, int length);
/* inode.c */ /* inode.c */
void restart_transaction(struct reiserfs_transaction_handle *th, struct inode *inode, struct path *path);
void reiserfs_read_locked_inode(struct inode * inode, struct reiserfs_iget_args *args) ; void reiserfs_read_locked_inode(struct inode * inode, struct reiserfs_iget_args *args) ;
int reiserfs_find_actor(struct inode * inode, void *p) ; int reiserfs_find_actor(struct inode * inode, void *p) ;
int reiserfs_init_locked_inode(struct inode * inode, void *p) ; int reiserfs_init_locked_inode(struct inode * inode, void *p) ;
...@@ -2111,6 +2112,7 @@ void reiserfs_discard_all_prealloc (struct reiserfs_transaction_handle *th); ...@@ -2111,6 +2112,7 @@ void reiserfs_discard_all_prealloc (struct reiserfs_transaction_handle *th);
#endif #endif
void reiserfs_claim_blocks_to_be_allocated( struct super_block *sb, int blocks); void reiserfs_claim_blocks_to_be_allocated( struct super_block *sb, int blocks);
void reiserfs_release_claimed_blocks( struct super_block *sb, int blocks); void reiserfs_release_claimed_blocks( struct super_block *sb, int blocks);
int reiserfs_can_fit_pages(struct super_block *sb);
/* hashes.c */ /* hashes.c */
__u32 keyed_hash (const signed char *msg, int len); __u32 keyed_hash (const signed char *msg, int len);
......
...@@ -397,6 +397,7 @@ struct reiserfs_sb_info ...@@ -397,6 +397,7 @@ struct reiserfs_sb_info
reiserfs_proc_info_data_t s_proc_info_data; reiserfs_proc_info_data_t s_proc_info_data;
struct proc_dir_entry *procdir; struct proc_dir_entry *procdir;
int reserved_blocks; /* amount of blocks reserved for further allocations */ int reserved_blocks; /* amount of blocks reserved for further allocations */
spinlock_t bitmap_lock; /* this lock on now only used to protect reserved_blocks variable */
}; };
/* Definitions of reiserfs on-disk properties: */ /* Definitions of reiserfs on-disk properties: */
......
...@@ -223,6 +223,7 @@ EXPORT_SYMBOL(block_sync_page); ...@@ -223,6 +223,7 @@ EXPORT_SYMBOL(block_sync_page);
EXPORT_SYMBOL(generic_cont_expand); EXPORT_SYMBOL(generic_cont_expand);
EXPORT_SYMBOL(cont_prepare_write); EXPORT_SYMBOL(cont_prepare_write);
EXPORT_SYMBOL(generic_commit_write); EXPORT_SYMBOL(generic_commit_write);
EXPORT_SYMBOL(block_commit_write);
EXPORT_SYMBOL(block_truncate_page); EXPORT_SYMBOL(block_truncate_page);
EXPORT_SYMBOL(generic_block_bmap); EXPORT_SYMBOL(generic_block_bmap);
EXPORT_SYMBOL(generic_file_read); EXPORT_SYMBOL(generic_file_read);
...@@ -556,6 +557,8 @@ EXPORT_SYMBOL(buffer_insert_list); ...@@ -556,6 +557,8 @@ EXPORT_SYMBOL(buffer_insert_list);
EXPORT_SYMBOL(make_bad_inode); EXPORT_SYMBOL(make_bad_inode);
EXPORT_SYMBOL(is_bad_inode); EXPORT_SYMBOL(is_bad_inode);
EXPORT_SYMBOL(__inode_dir_notify); EXPORT_SYMBOL(__inode_dir_notify);
EXPORT_SYMBOL(generic_osync_inode);
EXPORT_SYMBOL(remove_suid);
#ifdef CONFIG_UID16 #ifdef CONFIG_UID16
EXPORT_SYMBOL(overflowuid); EXPORT_SYMBOL(overflowuid);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment