Commit 7924d769 authored by Oleg Drokin's avatar Oleg Drokin

Implemented new block allocator for reiserfs, new tail policy for smaller tails.

New block allocator contains code from Alexander Zarochencev, Jeff Mahoney and Oleg Drokin.
parent d541f7da
/* /*
* Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
*/ */
/* Reiserfs block (de)allocator, bitmap-based. */
#include <linux/config.h> #include <linux/config.h>
#include <linux/time.h> #include <linux/time.h>
#include <linux/reiserfs_fs.h> #include <linux/reiserfs_fs.h>
#include <asm/bitops.h> #include <linux/errno.h>
#include <linux/list.h>
#include <linux/buffer_head.h> #include <linux/buffer_head.h>
#include <linux/kernel.h>
#include <linux/reiserfs_fs.h>
#include <linux/reiserfs_fs_sb.h>
#include <linux/reiserfs_fs_i.h>
#define PREALLOCATION_SIZE 9
/* different reiserfs block allocator options */
#define SB_ALLOC_OPTS(s) (REISERFS_SB(s)->s_alloc_options.bits)
#define _ALLOC_concentrating_formatted_nodes 0
#define _ALLOC_displacing_large_files 1
#define _ALLOC_displacing_new_packing_localities 2
#define _ALLOC_old_hashed_relocation 3
#define _ALLOC_new_hashed_relocation 4
#define _ALLOC_skip_busy 5
#define _ALLOC_displace_based_on_dirid 6
#define _ALLOC_hashed_formatted_nodes 7
#define _ALLOC_old_way 8
#define _ALLOC_hundredth_slices 9
#define concentrating_formatted_nodes(s) test_bit(_ALLOC_concentrating_formatted_nodes, &SB_ALLOC_OPTS(s))
#define displacing_large_files(s) test_bit(_ALLOC_displacing_large_files, &SB_ALLOC_OPTS(s))
#define displacing_new_packing_localities(s) test_bit(_ALLOC_displacing_new_packing_localities, &SB_ALLOC_OPTS(s))
#define SET_OPTION(optname) \
do { \
reiserfs_warning("reiserfs: option \"%s\" is set\n", #optname); \
set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \
} while(0)
#define TEST_OPTION(optname, s) \
test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))
static inline void get_bit_address (struct super_block * s,
unsigned long block, int * bmap_nr, int * offset)
{
/* It is in the bitmap block number equal to the block
* number divided by the number of bits in a block. */
*bmap_nr = block / (s->s_blocksize << 3);
/* Within that bitmap block it is located at bit offset *offset. */
*offset = block & ((s->s_blocksize << 3) - 1 );
return;
}
#ifdef CONFIG_REISERFS_CHECK #ifdef CONFIG_REISERFS_CHECK
/* this is a safety check to make sure
** blocks are reused properly. used for debugging only.
**
** this checks, that block can be reused, and it has correct state
** (free or busy)
*/
int is_reusable (struct super_block * s, unsigned long block, int bit_value) int is_reusable (struct super_block * s, unsigned long block, int bit_value)
{ {
int i, j; int i, j;
...@@ -29,74 +66,234 @@ int is_reusable (struct super_block * s, unsigned long block, int bit_value) ...@@ -29,74 +66,234 @@ int is_reusable (struct super_block * s, unsigned long block, int bit_value)
/* it can't be one of the bitmap blocks */ /* it can't be one of the bitmap blocks */
for (i = 0; i < SB_BMAP_NR (s); i ++) for (i = 0; i < SB_BMAP_NR (s); i ++)
if (block == SB_AP_BITMAP (s)[i]->b_blocknr) { if (block == SB_AP_BITMAP (s)[i].bh->b_blocknr) {
reiserfs_warning ("vs: 4020: is_reusable: " reiserfs_warning ("vs: 4020: is_reusable: "
"bitmap block %lu(%u) can't be freed or reused\n", "bitmap block %lu(%u) can't be freed or reused\n",
block, SB_BMAP_NR (s)); block, SB_BMAP_NR (s));
return 0; return 0;
} }
i = block / (s->s_blocksize << 3); get_bit_address (s, block, &i, &j);
if (i >= SB_BMAP_NR (s)) { if (i >= SB_BMAP_NR (s)) {
reiserfs_warning ("vs-4030: is_reusable: there is no so many bitmap blocks: " reiserfs_warning ("vs-4030: is_reusable: there is no so many bitmap blocks: "
"block=%lu, bitmap_nr=%d\n", block, i); "block=%lu, bitmap_nr=%d\n", block, i);
return 0; return 0;
} }
j = block % (s->s_blocksize << 3);
if ((bit_value == 0 && if ((bit_value == 0 &&
reiserfs_test_le_bit(j, SB_AP_BITMAP(s)[i]->b_data)) || reiserfs_test_le_bit(j, SB_AP_BITMAP(s)[i].bh->b_data)) ||
(bit_value == 1 && (bit_value == 1 &&
reiserfs_test_le_bit(j, SB_AP_BITMAP (s)[i]->b_data) == 0)) { reiserfs_test_le_bit(j, SB_AP_BITMAP (s)[i].bh->b_data) == 0)) {
reiserfs_warning ("vs-4040: is_reusable: corresponding bit of block %lu does not " reiserfs_warning ("vs-4040: is_reusable: corresponding bit of block %lu does not "
"match required value (i==%d, j==%d) test_bit==%d\n", "match required value (i==%d, j==%d) test_bit==%d\n",
block, i, j, reiserfs_test_le_bit (j, SB_AP_BITMAP (s)[i]->b_data)); block, i, j, reiserfs_test_le_bit (j, SB_AP_BITMAP (s)[i].bh->b_data));
return 0; return 0;
} }
if (bit_value == 0 && block == SB_ROOT_BLOCK (s)) { if (bit_value == 0 && block == SB_ROOT_BLOCK (s)) {
reiserfs_warning ("vs-4050: is_reusable: this is root block (%u), " reiserfs_warning ("vs-4050: is_reusable: this is root block (%u), "
"it must be busy", SB_ROOT_BLOCK (s)); "it must be busy\n", SB_ROOT_BLOCK (s));
return 0; return 0;
} }
return 1; return 1;
} }
#endif /* CONFIG_REISERFS_CHECK */
/* searches in journal structures for a given block number (bmap, off). If block
is found in reiserfs journal it suggests next free block candidate to test. */
static inline int is_block_in_journal (struct super_block * s, int bmap, int
off, int *next)
{
unsigned long tmp;
if (reiserfs_in_journal (s, bmap, off, 1, &tmp)) {
if (tmp) { /* hint supplied */
*next = tmp;
PROC_INFO_INC( s, scan_bitmap.in_journal_hint );
} else {
(*next) = off + 1; /* inc offset to avoid looping. */
PROC_INFO_INC( s, scan_bitmap.in_journal_nohint );
}
PROC_INFO_INC( s, scan_bitmap.retry );
return 1;
}
return 0;
}
/* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap
* block; */
static int scan_bitmap_block (struct reiserfs_transaction_handle *th,
int bmap_n, int *beg, int boundary, int min, int max, int unfm)
{
struct super_block *s = th->t_super;
struct reiserfs_bitmap_info *bi=&SB_AP_BITMAP(s)[bmap_n];
int end, next;
int org = *beg;
RFALSE(bmap_n >= SB_BMAP_NR (s), "Bitmap %d is out of range (0..%d)\n",bmap_n, SB_BMAP_NR (s) - 1);
PROC_INFO_INC( s, scan_bitmap.bmap );
/* this is unclear and lacks comments, explain how journal bitmaps
work here for the reader. Convey a sense of the design here. What
is a window? */
/* - I mean `a window of zero bits' as in description of this function - Zam. */
if ( !bi ) {
printk("Hey, bitmap info pointer is zero for bitmap %d!\n",bmap_n);
return 0;
}
if (buffer_locked (bi->bh)) {
PROC_INFO_INC( s, scan_bitmap.wait );
__wait_on_buffer (bi->bh);
}
#endif /* CONFIG_REISERFS_CHECK */ /* If we know that first zero bit is only one or first zero bit is
closer to the end of bitmap than our start pointer */
if (bi->first_zero_hint > *beg || bi->free_count == 1)
*beg = bi->first_zero_hint;
while (1) {
cont:
if (bi->free_count < min)
return 0; // No free blocks in this bitmap
/* search for a first zero bit -- beggining of a window */
*beg = reiserfs_find_next_zero_le_bit
((unsigned long*)(bi->bh->b_data), boundary, *beg);
if (*beg + min > boundary) { /* search for a zero bit fails or the rest of bitmap block
* cannot contain a zero window of minimum size */
return 0;
}
if (unfm && is_block_in_journal(s,bmap_n, *beg, beg))
continue;
/* first zero bit found; we check next bits */
for (end = *beg + 1;; end ++) {
if (end >= *beg + max || end >= boundary || reiserfs_test_le_bit (end, bi->bh->b_data)) {
next = end;
break;
}
/* finding the other end of zero bit window requires looking into journal structures (in
* case of searching for free blocks for unformatted nodes) */
if (unfm && is_block_in_journal(s, bmap_n, end, &next))
break;
}
/* now (*beg) points to beginning of zero bits window,
* (end) points to one bit after the window end */
if (end - *beg >= min) { /* it seems we have found window of proper size */
int i;
reiserfs_prepare_for_journal (s, bi->bh, 1);
/* try to set all blocks used checking are they still free */
for (i = *beg; i < end; i++) {
/* It seems that we should not check in journal again. */
if (reiserfs_test_and_set_le_bit (i, bi->bh->b_data)) {
/* bit was set by another process
* while we slept in prepare_for_journal() */
PROC_INFO_INC( s, scan_bitmap.stolen );
if (i >= *beg + min) { /* we can continue with smaller set of allocated blocks,
* if length of this set is more or equal to `min' */
end = i;
break;
}
/* otherwise we clear all bit were set ... */
while (--i >= *beg)
reiserfs_test_and_clear_le_bit (i, bi->bh->b_data);
reiserfs_restore_prepared_buffer (s, bi->bh);
*beg = max(org, (int)bi->first_zero_hint);
/* ... and search again in current block from beginning */
goto cont;
}
}
bi->free_count -= (end - *beg);
/* if search started from zero_hint bit, and zero hint have not
changed since, then we need to update first_zero_hint */
if ( bi->first_zero_hint >= *beg)
/* no point in looking for free bit if there is not any */
bi->first_zero_hint = (bi->free_count > 0 ) ?
reiserfs_find_next_zero_le_bit
((unsigned long*)(bi->bh->b_data), s->s_blocksize << 3, end) : (s->s_blocksize << 3);
/* get address of corresponding bit (bitmap block number and offset in it) */ journal_mark_dirty (th, s, bi->bh);
static inline void get_bit_address (struct super_block * s, unsigned long block, int * bmap_nr, int * offset)
/* free block count calculation */
reiserfs_prepare_for_journal (s, SB_BUFFER_WITH_SB(s), 1);
PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg));
journal_mark_dirty (th, s, SB_BUFFER_WITH_SB(s));
return end - (*beg);
} else {
*beg = next;
}
}
}
/* Tries to find contiguous zero bit window (given size) in given region of
* bitmap and place new blocks there. Returns number of allocated blocks. */
static int scan_bitmap (struct reiserfs_transaction_handle *th,
unsigned long *start, unsigned long finish,
int min, int max, int unfm, unsigned long file_block)
{ {
/* It is in the bitmap block number equal to the block number divided by the number of int nr_allocated=0;
bits in a block. */ struct super_block * s = th->t_super;
*bmap_nr = block / (s->s_blocksize << 3); /* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr
/* Within that bitmap block it is located at bit offset *offset. */ * - Hans, it is not a block number - Zam. */
*offset = block % (s->s_blocksize << 3);
return; int bm, off;
} int end_bm, end_off;
int off_max = s->s_blocksize << 3;
PROC_INFO_INC( s, scan_bitmap.call );
if ( SB_FREE_BLOCKS(s) <= 0)
return 0; // No point in looking for more free blocks
get_bit_address (s, *start, &bm, &off);
get_bit_address (s, finish, &end_bm, &end_off);
// With this option set first we try to find a bitmap that is at least 10%
// free, and if that fails, then we fall back to old whole bitmap scanning
if ( TEST_OPTION(skip_busy, s) && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s)/20 ) {
for (;bm < end_bm; bm++, off = 0) {
if ( ( off && (!unfm || (file_block != 0))) || SB_AP_BITMAP(s)[bm].free_count > (s->s_blocksize << 3) / 10 )
nr_allocated = scan_bitmap_block(th, bm, &off, off_max, min, max, unfm);
if (nr_allocated)
goto ret;
}
get_bit_address (s, *start, &bm, &off);
}
for (;bm < end_bm; bm++, off = 0) {
nr_allocated = scan_bitmap_block(th, bm, &off, off_max, min, max, unfm);
if (nr_allocated)
goto ret;
}
nr_allocated = scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm);
ret:
*start = bm * off_max + off;
return nr_allocated;
/* There would be a modest performance benefit if we write a version }
to free a list of blocks at once. -Hans */
/* I wonder if it would be less modest static void _reiserfs_free_block (struct reiserfs_transaction_handle *th,
now that we use journaling. -Hans */ unsigned long block)
static void _reiserfs_free_block (struct reiserfs_transaction_handle *th, unsigned long block)
{ {
struct super_block * s = th->t_super; struct super_block * s = th->t_super;
struct reiserfs_super_block * rs; struct reiserfs_super_block * rs;
struct buffer_head * sbh; struct buffer_head * sbh;
struct buffer_head ** apbh; struct reiserfs_bitmap_info *apbi;
int nr, offset; int nr, offset;
PROC_INFO_INC( s, free_block ); PROC_INFO_INC( s, free_block );
rs = SB_DISK_SUPER_BLOCK (s); rs = SB_DISK_SUPER_BLOCK (s);
sbh = SB_BUFFER_WITH_SB (s); sbh = SB_BUFFER_WITH_SB (s);
apbh = SB_AP_BITMAP (s); apbi = SB_AP_BITMAP(s);
get_bit_address (s, block, &nr, &offset); get_bit_address (s, block, &nr, &offset);
...@@ -107,15 +304,19 @@ static void _reiserfs_free_block (struct reiserfs_transaction_handle *th, unsign ...@@ -107,15 +304,19 @@ static void _reiserfs_free_block (struct reiserfs_transaction_handle *th, unsign
return; return;
} }
reiserfs_prepare_for_journal(s, apbh[nr], 1 ) ; reiserfs_prepare_for_journal(s, apbi[nr].bh, 1 ) ;
/* clear bit for the given block in bit map */ /* clear bit for the given block in bit map */
if (!reiserfs_test_and_clear_le_bit (offset, apbh[nr]->b_data)) { if (!reiserfs_test_and_clear_le_bit (offset, apbi[nr].bh->b_data)) {
reiserfs_warning ("vs-4080: reiserfs_free_block: " reiserfs_warning ("vs-4080: reiserfs_free_block: "
"free_block (%s:%lu)[dev:blocknr]: bit already cleared\n", "free_block (%s:%lu)[dev:blocknr]: bit already cleared\n",
reiserfs_bdevname (s), block); reiserfs_bdevname (s), block);
} }
journal_mark_dirty (th, s, apbh[nr]); if (offset < apbi[nr].first_zero_hint) {
apbi[nr].first_zero_hint = offset;
}
apbi[nr].free_count ++;
journal_mark_dirty (th, s, apbi[nr].bh);
reiserfs_prepare_for_journal(s, sbh, 1) ; reiserfs_prepare_for_journal(s, sbh, 1) ;
/* update super block */ /* update super block */
...@@ -126,7 +327,8 @@ static void _reiserfs_free_block (struct reiserfs_transaction_handle *th, unsign ...@@ -126,7 +327,8 @@ static void _reiserfs_free_block (struct reiserfs_transaction_handle *th, unsign
} }
void reiserfs_free_block (struct reiserfs_transaction_handle *th, void reiserfs_free_block (struct reiserfs_transaction_handle *th,
unsigned long block) { unsigned long block)
{
struct super_block * s = th->t_super; struct super_block * s = th->t_super;
RFALSE(!s, "vs-4061: trying to free block on nonexistent device"); RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
...@@ -144,571 +346,557 @@ void reiserfs_free_prealloc_block (struct reiserfs_transaction_handle *th, ...@@ -144,571 +346,557 @@ void reiserfs_free_prealloc_block (struct reiserfs_transaction_handle *th,
_reiserfs_free_block(th, block) ; _reiserfs_free_block(th, block) ;
} }
/* beginning from offset-th bit in bmap_nr-th bitmap block, static void __discard_prealloc (struct reiserfs_transaction_handle * th,
find_forward finds the closest zero bit. It returns 1 and zero struct reiserfs_inode_info *ei)
bit address (bitmap, offset) if zero bit found or 0 if there is no
zero bit in the forward direction */
/* The function is NOT SCHEDULE-SAFE! */
static int find_forward (struct super_block * s, int * bmap_nr, int * offset, int for_unformatted)
{ {
int i, j; unsigned long save = ei->i_prealloc_block ;
struct buffer_head * bh; #ifdef CONFIG_REISERFS_CHECK
unsigned long block_to_try = 0; if (ei->i_prealloc_count < 0)
unsigned long next_block_to_try = 0 ; reiserfs_warning("zam-4001:%s: inode has negative prealloc blocks count.\n", __FUNCTION__ );
#endif
PROC_INFO_INC( s, find_forward.call ); while (ei->i_prealloc_count > 0) {
reiserfs_free_prealloc_block(th,ei->i_prealloc_block);
for (i = *bmap_nr; i < SB_BMAP_NR (s); i ++, *offset = 0, ei->i_prealloc_block++;
PROC_INFO_INC( s, find_forward.bmap )) { ei->i_prealloc_count --;
/* get corresponding bitmap block */ }
bh = SB_AP_BITMAP (s)[i]; ei->i_prealloc_block = save;
if (buffer_locked (bh)) { list_del_init(&(ei->i_prealloc_list));
PROC_INFO_INC( s, find_forward.wait ); }
__wait_on_buffer (bh);
}
retry:
j = reiserfs_find_next_zero_le_bit ((unsigned long *)bh->b_data,
s->s_blocksize << 3, *offset);
/* wow, this really needs to be redone. We can't allocate a block if
** it is in the journal somehow. reiserfs_in_journal makes a suggestion
** for a good block if the one you ask for is in the journal. Note,
** reiserfs_in_journal might reject the block it suggests. The big
** gain from the suggestion is when a big file has been deleted, and
** many blocks show free in the real bitmap, but are all not free
** in the journal list bitmaps.
**
** this whole system sucks. The bitmaps should reflect exactly what
** can and can't be allocated, and the journal should update them as
** it goes. TODO.
*/
if (j < (s->s_blocksize << 3)) {
block_to_try = (i * (s->s_blocksize << 3)) + j;
/* the block is not in the journal, we can proceed */ /* FIXME: It should be inline function */
if (!(reiserfs_in_journal(s, block_to_try, for_unformatted, &next_block_to_try))) { void reiserfs_discard_prealloc (struct reiserfs_transaction_handle *th,
*bmap_nr = i; struct inode * inode)
*offset = j; {
return 1; struct reiserfs_inode_info *ei = REISERFS_I(inode);
if (ei->i_prealloc_count) {
__discard_prealloc(th, ei);
} }
/* the block is in the journal */ }
else if ((j+1) < (s->s_blocksize << 3)) { /* try again */
/* reiserfs_in_journal suggested a new block to try */
if (next_block_to_try > 0) {
int new_i ;
get_bit_address (s, next_block_to_try, &new_i, offset);
PROC_INFO_INC( s, find_forward.in_journal_hint ); void reiserfs_discard_all_prealloc (struct reiserfs_transaction_handle *th)
{
struct list_head * plist = &SB_JOURNAL(th->t_super)->j_prealloc_list;
/* block is not in this bitmap. reset i and continue while (!list_empty(plist)) {
** we only reset i if new_i is in a later bitmap. struct reiserfs_inode_info *ei;
*/ ei = list_entry(plist->next, struct reiserfs_inode_info, i_prealloc_list);
if (new_i > i) { #ifdef CONFIG_REISERFS_CHECK
i = (new_i - 1 ); /* i gets incremented by the for loop */ if (!ei->i_prealloc_count) {
PROC_INFO_INC( s, find_forward.in_journal_out ); reiserfs_warning("zam-4001:%s: inode is in prealloc list but has no preallocated blocks.\n", __FUNCTION__);
continue ;
} }
} else { #endif
/* no suggestion was made, just try the next block */ __discard_prealloc(th, ei);
*offset = j+1 ;
} }
PROC_INFO_INC( s, find_forward.retry ); }
goto retry ; /* block allocator related options are parsed here */
int reiserfs_parse_alloc_options(struct super_block * s, char * options)
{
char * this_char, * value;
REISERFS_SB(s)->s_alloc_options.bits = 0; /* clear default settings */
for (this_char = strsep (&options, ":"); this_char != NULL; ) {
if ((value = strchr (this_char, '=')) != NULL)
*value++ = 0;
if (!strcmp(this_char, "concentrating_formatted_nodes")) {
int temp;
SET_OPTION(concentrating_formatted_nodes);
temp = (value && *value) ? simple_strtoul (value, &value, 0) : 10;
if (temp <= 0 || temp > 100) {
REISERFS_SB(s)->s_alloc_options.border = 10;
} else {
REISERFS_SB(s)->s_alloc_options.border = 100 / temp;
} }
continue;
} }
if (!strcmp(this_char, "displacing_large_files")) {
SET_OPTION(displacing_large_files);
REISERFS_SB(s)->s_alloc_options.large_file_size =
(value && *value) ? simple_strtoul (value, &value, 0) : 16;
continue;
} }
/* zero bit not found */ if (!strcmp(this_char, "displacing_new_packing_localities")) {
return 0; SET_OPTION(displacing_new_packing_localities);
} continue;
};
/* return 0 if no free blocks, else return 1 */ if (!strcmp(this_char, "old_hashed_relocation")) {
/* The function is NOT SCHEDULE-SAFE! SET_OPTION(old_hashed_relocation);
** because the bitmap block we want to change could be locked, and on its continue;
** way to the disk when we want to read it, and because of the
** flush_async_commits. Per bitmap block locks won't help much, and
** really aren't needed, as we retry later on if we try to set the bit
** and it is already set.
*/
static int find_zero_bit_in_bitmap (struct super_block * s,
unsigned long search_start,
int * bmap_nr, int * offset,
int for_unformatted)
{
int retry_count = 0 ;
/* get bit location (bitmap number and bit offset) of search_start block */
get_bit_address (s, search_start, bmap_nr, offset);
/* note that we search forward in the bitmap, benchmarks have shown that it is better to allocate in increasing
sequence, which is probably due to the disk spinning in the forward direction.. */
if (find_forward (s, bmap_nr, offset, for_unformatted) == 0) {
/* there wasn't a free block with number greater than our
starting point, so we are going to go to the beginning of the disk */
retry:
search_start = 0; /* caller will reset search_start for itself also. */
get_bit_address (s, search_start, bmap_nr, offset);
if (find_forward (s, bmap_nr,offset,for_unformatted) == 0) {
if (for_unformatted) { /* why only unformatted nodes? -Hans */
if (retry_count == 0) {
/* we've got a chance that flushing async commits will free up
** some space. Sync then retry
*/
flush_async_commits(s) ;
retry_count++ ;
goto retry ;
} else if (retry_count > 0) {
/* nothing more we can do. Make the others wait, flush
** all log blocks to disk, and flush to their home locations.
** this will free up any blocks held by the journal
*/
SB_JOURNAL(s)->j_must_wait = 1 ;
} }
if (!strcmp(this_char, "new_hashed_relocation")) {
SET_OPTION(new_hashed_relocation);
continue;
} }
return 0;
if (!strcmp(this_char, "hashed_formatted_nodes")) {
SET_OPTION(hashed_formatted_nodes);
continue;
} }
if (!strcmp(this_char, "skip_busy")) {
SET_OPTION(skip_busy);
continue;
} }
return 1;
}
/* get amount_needed free block numbers from scanning the bitmap of if (!strcmp(this_char, "hundredth_slices")) {
free/used blocks. SET_OPTION(hundredth_slices);
continue;
}
Optimize layout by trying to find them starting from search_start if (!strcmp(this_char, "old_way")) {
and moving in increasing blocknr direction. (This was found to be SET_OPTION(old_way);
faster than using a bi-directional elevator_direction, in part continue;
because of disk spin direction, in part because by the time one }
reaches the end of the disk the beginning of the disk is the least
congested).
search_start is the block number of the left if (!strcmp(this_char, "displace_based_on_dirid")) {
semantic neighbor of the node we create. SET_OPTION(displace_based_on_dirid);
continue;
}
return CARRY_ON if everything is ok if (!strcmp(this_char, "preallocmin")) {
return NO_DISK_SPACE if out of disk space REISERFS_SB(s)->s_alloc_options.preallocmin =
return NO_MORE_UNUSED_CONTIGUOUS_BLOCKS if the block we found is not contiguous to the last one (value && *value) ? simple_strtoul (value, &value, 0) : 4;
continue;
}
return block numbers found, in the array free_blocknrs. assumes if (!strcmp(this_char, "preallocsize")) {
that any non-zero entries already present in the array are valid. REISERFS_SB(s)->s_alloc_options.preallocsize =
This feature is perhaps convenient coding when one might not have (value && *value) ? simple_strtoul (value, &value, 0) : PREALLOCATION_SIZE;
used all blocknrs from the last time one called this function, or continue;
perhaps it is an archaism from the days of schedule tracking, one }
of us ought to reread the code that calls this, and analyze whether
it is still the right way to code it.
spare space is used only when priority is set to 1. reiserfsck has reiserfs_warning("zam-4001: %s : unknown option - %s\n", __FUNCTION__ , this_char);
its own reiserfs_new_blocknrs, which can use reserved space return 1;
}
exactly what reserved space? the SPARE_SPACE? if so, please comment reiserfs.h. return 0;
}
Give example of who uses spare space, and say that it is a deadlock static void inline new_hashed_relocation (reiserfs_blocknr_hint_t * hint)
avoidance mechanism. -Hans */ {
char * hash_in;
if (hint->formatted_node) {
hash_in = (char*)&hint->key.k_dir_id;
} else {
if (!hint->inode) {
//hint->search_start = hint->beg;
hash_in = (char*)&hint->key.k_dir_id;
} else
if ( TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
else
hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid);
}
/* This function is NOT SCHEDULE-SAFE! */ hint->search_start = hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
}
static int do_reiserfs_new_blocknrs (struct reiserfs_transaction_handle *th, static void inline get_left_neighbor(reiserfs_blocknr_hint_t *hint)
unsigned long * free_blocknrs,
unsigned long search_start,
int amount_needed, int priority,
int for_unformatted,
int for_prealloc)
{ {
struct super_block * s = th->t_super; struct path * path;
int i, j; struct buffer_head * bh;
unsigned long * block_list_start = free_blocknrs; struct item_head * ih;
int init_amount_needed = amount_needed; int pos_in_item;
unsigned long new_block = 0 ; __u32 * item;
if (SB_FREE_BLOCKS (s) < SPARE_SPACE && !priority) if (!hint->path) /* reiserfs code can call this function w/o pointer to path
/* we can answer NO_DISK_SPACE being asked for new block with * structure supplied; then we rely on supplied search_start */
priority 0 */ return;
return NO_DISK_SPACE;
RFALSE( !s, "vs-4090: trying to get new block from nonexistent device"); path = hint->path;
RFALSE( search_start == MAX_B_NUM, bh = get_last_bh(path);
"vs-4100: we are optimizing location based on " RFALSE( !bh, "green-4002: Illegal path specified to get_left_neighbor\n");
"the bogus location of a temp buffer (%lu).", search_start); ih = get_ih(path);
RFALSE( amount_needed < 1 || amount_needed > 2, pos_in_item = path->pos_in_item;
"vs-4110: amount_needed parameter incorrect (%d)", amount_needed); item = get_item (path);
/* We continue the while loop if another process snatches our found hint->search_start = bh->b_blocknr;
* free block from us after we find it but before we successfully
* mark it as in use */ if (!hint->formatted_node && is_indirect_le_ih (ih)) {
/* for indirect item: go to left and look for the first non-hole entry
while (amount_needed--) { in the indirect item */
/* skip over any blocknrs already gotten last time. */ if (pos_in_item == I_UNFM_NUM (ih))
if (*(free_blocknrs) != 0) { pos_in_item--;
RFALSE( is_reusable (s, *free_blocknrs, 1) == 0, // pos_in_item = I_UNFM_NUM (ih) - 1;
"vs-4120: bad blocknr on free_blocknrs list"); while (pos_in_item >= 0) {
free_blocknrs++; int t=get_block_num(item,pos_in_item);
continue; if (t) {
hint->search_start = t;
break;
} }
/* look for zero bits in bitmap */ pos_in_item --;
if (find_zero_bit_in_bitmap(s,search_start, &i, &j,for_unformatted) == 0) {
if (find_zero_bit_in_bitmap(s,search_start,&i,&j, for_unformatted) == 0) {
/* recode without the goto and without
the if. It will require a
duplicate for. This is worth the
code clarity. Your way was
admirable, and just a bit too
clever in saving instructions.:-)
I'd say create a new function, but
that would slow things also, yes?
-Hans */
free_and_return:
for ( ; block_list_start != free_blocknrs; block_list_start++) {
reiserfs_free_block (th, *block_list_start);
*block_list_start = 0;
}
if (for_prealloc)
return NO_MORE_UNUSED_CONTIGUOUS_BLOCKS;
else
return NO_DISK_SPACE;
} }
} else {
} }
/* i and j now contain the results of the search. i = bitmap block /* does result value fit into specified region? */
number containing free block, j = offset in this block. we return;
compute the blocknr which is our result, store it in }
free_blocknrs, and increment the pointer so that on the next
loop we will insert into the next location in the array. Also
in preparation for the next loop, search_start is changed so
that the next search will not rescan the same range but will
start where this search finished. Note that while it is
possible that schedule has occurred and blocks have been freed
in that range, it is perhaps more important that the blocks
returned be near each other than that they be near their other
neighbors, and it also simplifies and speeds the code this way. */
/* journal: we need to make sure the block we are giving out is not /* should be, if formatted node, then try to put on first part of the device
** a log block, horrible things would happen there. specified as number of percent with mount option device, else try to put
*/ on last of device. This is not to say it is good code to do so,
new_block = (i * (s->s_blocksize << 3)) + j; but the effect should be measured. */
if (for_prealloc && (new_block - 1) != search_start) { static void inline set_border_in_hint(struct super_block *s, reiserfs_blocknr_hint_t *hint)
/* preallocated blocks must be contiguous, bail if we didnt find one. {
** this is not a bug. We want to do the check here, before the b_blocknr_t border = SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border;
** bitmap block is prepared, and before we set the bit and log the
** bitmap.
**
** If we do the check after this function returns, we have to
** call reiserfs_free_block for new_block, which would be pure
** overhead.
**
** for_prealloc should only be set if the caller can deal with the
** NO_MORE_UNUSED_CONTIGUOUS_BLOCKS return value. This can be
** returned before the disk is actually full
*/
goto free_and_return ;
}
search_start = new_block ;
if (hint->formatted_node)
hint->end = border - 1;
else
hint->beg = border;
}
/* make sure the block is not of journal or reserved area */ static void inline displace_large_file(reiserfs_blocknr_hint_t *hint)
if (is_block_in_log_or_reserved_area(s, search_start)) { {
reiserfs_warning("vs-4130: reiserfs_new_blocknrs: trying to allocate log block %lu\n", if ( TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
search_start) ; hint->search_start = hint->beg + keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id), 4) % (hint->end - hint->beg);
search_start++ ; else
amount_needed++ ; hint->search_start = hint->beg + keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid), 4) % (hint->end - hint->beg);
continue ; }
}
static void inline hash_formatted_node(reiserfs_blocknr_hint_t *hint)
{
char * hash_in;
reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[i], 1) ; if (!hint->inode)
hash_in = (char*)&hint->key.k_dir_id;
else if ( TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
else
hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid);
RFALSE( buffer_locked (SB_AP_BITMAP (s)[i]) || hint->search_start = hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
is_reusable (s, search_start, 0) == 0, }
"vs-4140: bitmap block is locked or bad block number found");
/* if this bit was already set, we've scheduled, and someone else static int inline this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t *hint)
** has allocated it. loop around and try again {
*/ return hint->block == REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size;
if (reiserfs_test_and_set_le_bit (j, SB_AP_BITMAP (s)[i]->b_data)) { }
reiserfs_warning("vs-4150: reiserfs_new_blocknrs, block not free");
reiserfs_restore_prepared_buffer(s, SB_AP_BITMAP(s)[i]) ; #ifdef DISPLACE_NEW_PACKING_LOCALITIES
amount_needed++ ; static void inline displace_new_packing_locality (reiserfs_blocknr_hint_t *hint)
continue ; {
struct key * key = &hint->key;
hint->th->displace_new_blocks = 0;
hint->search_start = hint->beg + keyed_hash((char*)(&key->k_objectid),4) % (hint->end - hint->beg);
}
#endif
static int inline old_hashed_relocation (reiserfs_blocknr_hint_t * hint)
{
unsigned long border;
unsigned long hash_in;
if (hint->formatted_node || hint->inode == NULL) {
return 0;
} }
journal_mark_dirty (th, s, SB_AP_BITMAP (s)[i]);
*free_blocknrs = search_start ; hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id);
free_blocknrs ++; border = hint->beg + (unsigned long) keyed_hash(((char *) (&hash_in)), 4) % (hint->end - hint->beg - 1);
if (border > hint->search_start)
hint->search_start = border;
return 1;
} }
reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; static int inline old_way (reiserfs_blocknr_hint_t * hint)
/* update free block count in super block */ {
PUT_SB_FREE_BLOCKS( s, SB_FREE_BLOCKS(s) - init_amount_needed ); unsigned long border;
journal_mark_dirty (th, s, SB_BUFFER_WITH_SB (s));
s->s_dirt = 1;
return CARRY_ON; if (hint->formatted_node || hint->inode == NULL) {
} return 0;
}
// this is called only by get_empty_nodes border = hint->beg + le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end - hint->beg);
int reiserfs_new_blocknrs (struct reiserfs_transaction_handle *th, unsigned long * free_blocknrs, if (border > hint->search_start)
unsigned long search_start, int amount_needed) { hint->search_start = border;
return do_reiserfs_new_blocknrs(th, free_blocknrs, search_start, amount_needed, 0/*priority*/, 0/*for_formatted*/, 0/*for_prealloc */) ;
return 1;
} }
static void inline hundredth_slices (reiserfs_blocknr_hint_t * hint)
{
struct key * key = &hint->key;
unsigned long slice_start;
// called by get_new_buffer and by reiserfs_get_block with amount_needed == 1 slice_start = (keyed_hash((char*)(&key->k_dir_id),4) % 100) * (hint->end / 100);
int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle *th, unsigned long * free_blocknrs, if ( slice_start > hint->search_start || slice_start + (hint->end / 100) <= hint->search_start) {
unsigned long search_start) { hint->search_start = slice_start;
return do_reiserfs_new_blocknrs(th, free_blocknrs, search_start, }
1/*amount_needed*/,
0/*priority*/,
1/*for formatted*/,
0/*for prealloc */) ;
} }
#ifdef REISERFS_PREALLOCATE static void inline determine_search_start(reiserfs_blocknr_hint_t *hint,
int amount_needed)
/*
** We pre-allocate 8 blocks. Pre-allocation is used for files > 16 KB only.
** This lowers fragmentation on large files by grabbing a contiguous set of
** blocks at once. It also limits the number of times the bitmap block is
** logged by making X number of allocation changes in a single transaction.
**
** We are using a border to divide the disk into two parts. The first part
** is used for tree blocks, which have a very high turnover rate (they
** are constantly allocated then freed)
**
** The second part of the disk is for the unformatted nodes of larger files.
** Putting them away from the tree blocks lowers fragmentation, and makes
** it easier to group files together. There are a number of different
** allocation schemes being tried right now, each is documented below.
**
** A great deal of the allocator's speed comes because reiserfs_get_block
** sends us the block number of the last unformatted node in the file. Once
** a given block is allocated past the border, we don't collide with the
** blocks near the search_start again.
**
*/
int reiserfs_new_unf_blocknrs2 (struct reiserfs_transaction_handle *th,
struct inode * p_s_inode,
unsigned long * free_blocknrs,
unsigned long search_start)
{ {
struct reiserfs_inode_info *ei = REISERFS_I(p_s_inode); struct super_block *s = hint->th->t_super;
int ret=0, blks_gotten=0; hint->beg = 0;
unsigned long border = 0; hint->end = SB_BLOCK_COUNT(s) - 1;
unsigned long bstart = 0;
unsigned long hash_in, hash_out; /* This is former border algorithm. Now with tunable border offset */
unsigned long saved_search_start=search_start; if (concentrating_formatted_nodes(s))
int allocated[PREALLOCATION_SIZE]; set_border_in_hint(s, hint);
int blks;
#ifdef DISPLACE_NEW_PACKING_LOCALITIES
if (!reiserfs_no_border(th->t_super)) { /* whenever we create a new directory, we displace it. At first we will
/* we default to having the border at the 10% mark of the disk. This hash for location, later we might look for a moderately empty place for
** is an arbitrary decision and it needs tuning. It also needs a limit it */
** to prevent it from taking too much space on huge drives. if (displacing_new_packing_localities(s)
*/ && hint->th->displace_new_blocks) {
bstart = (SB_BLOCK_COUNT(th->t_super) / 10); displace_new_packing_locality(hint);
/* we do not continue determine_search_start,
* if new packing locality is being displaced */
return;
} }
if (!reiserfs_no_unhashed_relocation(th->t_super)) { #endif
/* this is a very simple first attempt at preventing too much grouping
** around the border value. Since k_dir_id is never larger than the
** highest allocated oid, it is far from perfect, and files will tend
** to be grouped towards the start of the border
*/
border = le32_to_cpu(INODE_PKEY(p_s_inode)->k_dir_id) % (SB_BLOCK_COUNT(th->t_super) - bstart - 1) ;
} else if (!reiserfs_hashed_relocation(th->t_super)) {
hash_in = le32_to_cpu((INODE_PKEY(p_s_inode))->k_dir_id);
/* I wonder if the CPU cost of the
hash will obscure the layout
effect? Of course, whether that
effect is good or bad we don't
know.... :-) */
hash_out = keyed_hash(((char *) (&hash_in)), 4);
border = hash_out % (SB_BLOCK_COUNT(th->t_super) - bstart - 1) ;
}
border += bstart ;
allocated[0] = 0 ; /* important. Allows a check later on to see if at
* least one block was allocated. This prevents false
* no disk space returns
*/
if ( (p_s_inode->i_size < 4 * 4096) || /* all persons should feel encouraged to add more special cases here and
!(S_ISREG(p_s_inode->i_mode)) ) * test them */
{
if ( search_start < border
|| (
/* allow us to test whether it is a
good idea to prevent files from
getting too far away from their
packing locality by some unexpected
means. This might be poor code for
directories whose files total
larger than 1/10th of the disk, and
it might be good code for
suffering from old insertions when the disk
was almost full. */
/* changed from !reiserfs_test3(th->t_super), which doesn't
** seem like a good idea. Think about adding blocks to
** a large file. If you've allocated 10% of the disk
** in contiguous blocks, you start over at the border value
** for every new allocation. This throws away all the
** information sent in about the last block that was allocated
** in the file. Not a good general case at all.
** -chris
*/
reiserfs_test4(th->t_super) &&
(search_start > border + (SB_BLOCK_COUNT(th->t_super) / 10))
)
)
search_start=border;
ret = do_reiserfs_new_blocknrs(th, free_blocknrs, search_start, if (displacing_large_files(s) && !hint->formatted_node
1/*amount_needed*/, && this_blocknr_allocation_would_make_it_a_large_file(hint)) {
0/*use reserved blocks for root */, displace_large_file(hint);
1/*for_formatted*/, return;
0/*for prealloc */) ;
return ret;
} }
/* take a block off the prealloc list and return it -Hans */ /* attempt to copy a feature from old block allocator code */
if (ei->i_prealloc_count > 0) { if (TEST_OPTION(old_hashed_relocation, s) && !hint->formatted_node) {
ei->i_prealloc_count--; old_hashed_relocation(hint);
*free_blocknrs = ei->i_prealloc_block++; }
/* if no more preallocated blocks, remove inode from list */ /* if none of our special cases is relevant, use the left neighbor in the
if (! ei->i_prealloc_count) { tree order of the new node we are allocating for */
list_del_init(&ei->i_prealloc_list); if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes,s)) {
hash_formatted_node(hint);
return;
} }
return ret; get_left_neighbor(hint);
/* Mimic old block allocator behaviour, that is if VFS allowed for preallocation,
new blocks are displaced based on directory ID. Also, if suggested search_start
is less than last preallocated block, we start searching from it, assuming that
HDD dataflow is faster in forward direction */
if ( TEST_OPTION(old_way, s)) {
if (!hint->formatted_node) {
if ( !reiserfs_hashed_relocation(s))
old_way(hint);
else if (!reiserfs_no_unhashed_relocation(s))
old_hashed_relocation(hint);
if ( hint->inode && hint->search_start < REISERFS_I(hint->inode)->i_prealloc_block)
hint->search_start = REISERFS_I(hint->inode)->i_prealloc_block;
}
return;
} }
/* else get a new preallocation for the file */ /* This is an approach proposed by Hans */
reiserfs_discard_prealloc (th, p_s_inode); if ( TEST_OPTION(hundredth_slices, s) && ! (displacing_large_files(s) && !hint->formatted_node)) {
/* this uses the last preallocated block as the search_start. discard hundredth_slices(hint);
** prealloc does not zero out this number. return;
*/
if (search_start <= ei->i_prealloc_block) {
search_start = ei->i_prealloc_block;
} }
/* doing the compare again forces search_start to be >= the border, if (TEST_OPTION(old_hashed_relocation, s))
** even if the file already had prealloction done. This seems extra, old_hashed_relocation(hint);
** and should probably be removed if (TEST_OPTION(new_hashed_relocation, s))
*/ new_hashed_relocation(hint);
if ( search_start < border ) search_start=border; return;
}
/* If the disk free space is already below 10% we should static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
** start looking for the free blocks from the beginning {
** of the partition, before the border line. /* make minimum size a mount option and benchmark both ways */
*/ /* we preallocate blocks only for regular files, specific size */
if ( SB_FREE_BLOCKS(th->t_super) <= (SB_BLOCK_COUNT(th->t_super) / 10) ) { /* benchmark preallocating always and see what happens */
search_start=saved_search_start;
}
*free_blocknrs = 0; hint->prealloc_size = 0;
blks = PREALLOCATION_SIZE-1;
for (blks_gotten=0; blks_gotten<PREALLOCATION_SIZE; blks_gotten++) {
ret = do_reiserfs_new_blocknrs(th, free_blocknrs, search_start, if (!hint->formatted_node && hint->preallocate) {
1/*amount_needed*/, if (S_ISREG(hint->inode->i_mode)
0/*for root reserved*/, && hint->inode->i_size >= REISERFS_SB(hint->th->t_super)->s_alloc_options.preallocmin * hint->inode->i_sb->s_blocksize)
1/*for_formatted*/, hint->prealloc_size = REISERFS_SB(hint->th->t_super)->s_alloc_options.preallocsize - 1;
(blks_gotten > 0)/*must_be_contiguous*/) ;
/* if we didn't find a block this time, adjust blks to reflect
** the actual number of blocks allocated
*/
if (ret != CARRY_ON) {
blks = blks_gotten > 0 ? (blks_gotten - 1) : 0 ;
break ;
} }
allocated[blks_gotten]= *free_blocknrs; return CARRY_ON;
#ifdef CONFIG_REISERFS_CHECK }
if ( (blks_gotten>0) && (allocated[blks_gotten] - allocated[blks_gotten-1]) != 1 ) {
/* this should be caught by new_blocknrs now, checking code */ /* XXX I know it could be merged with upper-level function;
reiserfs_warning("yura-1, reiserfs_new_unf_blocknrs2: pre-allocated not contiguous set of blocks!\n") ; but may be result function would be too complex. */
reiserfs_free_block(th, allocated[blks_gotten]); static inline int allocate_without_wrapping_disk (reiserfs_blocknr_hint_t * hint,
blks = blks_gotten-1; b_blocknr_t * new_blocknrs,
b_blocknr_t start, b_blocknr_t finish,
int amount_needed, int prealloc_size)
{
int rest = amount_needed;
int nr_allocated;
while (rest > 0) {
nr_allocated = scan_bitmap (hint->th, &start, finish, 1,
rest + prealloc_size, !hint->formatted_node,
hint->block);
if (nr_allocated == 0) /* no new blocks allocated, return */
break; break;
/* fill free_blocknrs array first */
while (rest > 0 && nr_allocated > 0) {
* new_blocknrs ++ = start ++;
rest --; nr_allocated --;
} }
#endif
if (blks_gotten==0) { /* do we have something to fill prealloc. array also ? */
ei->i_prealloc_block = *free_blocknrs; if (nr_allocated > 0) {
/* it means prealloc_size was greater that 0 and we do preallocation */
list_add(&REISERFS_I(hint->inode)->i_prealloc_list,
&SB_JOURNAL(hint->th->t_super)->j_prealloc_list);
REISERFS_I(hint->inode)->i_prealloc_block = start;
REISERFS_I(hint->inode)->i_prealloc_count = nr_allocated;
break;
} }
search_start = *free_blocknrs;
*free_blocknrs = 0;
} }
ei->i_prealloc_count = blks;
*free_blocknrs = ei->i_prealloc_block;
ei->i_prealloc_block++;
/* if inode has preallocated blocks, link him to list */ return (amount_needed - rest);
if (ei->i_prealloc_count) {
list_add(&ei->i_prealloc_list,
&SB_JOURNAL(th->t_super)->j_prealloc_list);
}
/* we did actually manage to get 1 block */
if (ret != CARRY_ON && allocated[0] > 0) {
return CARRY_ON ;
}
/* NO_MORE_UNUSED_CONTIGUOUS_BLOCKS should only mean something to
** the preallocation code. The rest of the filesystem asks for a block
** and should either get it, or know the disk is full. The code
** above should never allow ret == NO_MORE_UNUSED_CONTIGUOUS_BLOCK,
** as it doesn't send for_prealloc = 1 to do_reiserfs_new_blocknrs
** unless it has already successfully allocated at least one block.
** Just in case, we translate into a return value the rest of the
** filesystem can understand.
**
** It is an error to change this without making the
** rest of the filesystem understand NO_MORE_UNUSED_CONTIGUOUS_BLOCKS
** If you consider it a bug to return NO_DISK_SPACE here, fix the rest
** of the fs first.
*/
if (ret == NO_MORE_UNUSED_CONTIGUOUS_BLOCKS) {
#ifdef CONFIG_REISERFS_CHECK
reiserfs_warning("reiser-2015: this shouldn't happen, may cause false out of disk space error");
#endif
return NO_DISK_SPACE;
}
return ret;
} }
static inline int blocknrs_and_prealloc_arrays_from_search_start
static void __discard_prealloc (struct reiserfs_transaction_handle * th, (reiserfs_blocknr_hint_t *hint, b_blocknr_t *new_blocknrs, int amount_needed)
struct reiserfs_inode_info *ei)
{ {
unsigned long save = ei->i_prealloc_block ; struct super_block *s = hint->th->t_super;
while (ei->i_prealloc_count > 0) { b_blocknr_t start = hint->search_start;
reiserfs_free_prealloc_block(th,ei->i_prealloc_block); b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1;
ei->i_prealloc_block++; int second_pass = 0;
ei->i_prealloc_count --; int nr_allocated = 0;
determine_prealloc_size(hint);
while((nr_allocated
+= allocate_without_wrapping_disk(hint, new_blocknrs + nr_allocated, start, finish,
amount_needed - nr_allocated, hint->prealloc_size))
< amount_needed) {
/* not all blocks were successfully allocated yet*/
if (second_pass) { /* it was a second pass; we must free all blocks */
while (nr_allocated --)
reiserfs_free_block(hint->th, new_blocknrs[nr_allocated]);
return NO_DISK_SPACE;
} else { /* refine search parameters for next pass */
second_pass = 1;
finish = start;
start = 0;
continue;
} }
ei->i_prealloc_block = save; }
list_del_init(&(ei->i_prealloc_list)); return CARRY_ON;
} }
/* grab new blocknrs from preallocated list */
void reiserfs_discard_prealloc (struct reiserfs_transaction_handle *th, /* return amount still needed after using them */
struct inode * inode) static int use_preallocated_list_if_available (reiserfs_blocknr_hint_t *hint,
b_blocknr_t *new_blocknrs, int amount_needed)
{ {
struct reiserfs_inode_info *ei = REISERFS_I(inode); struct inode * inode = hint->inode;
#ifdef CONFIG_REISERFS_CHECK
if (ei->i_prealloc_count < 0) if (REISERFS_I(inode)->i_prealloc_count > 0) {
reiserfs_warning("zam-4001:%s inode has negative prealloc blocks count.\n", __FUNCTION__); while (amount_needed) {
#endif
if (ei->i_prealloc_count > 0) { *new_blocknrs ++ = REISERFS_I(inode)->i_prealloc_block ++;
__discard_prealloc(th, ei); REISERFS_I(inode)->i_prealloc_count --;
amount_needed --;
if (REISERFS_I(inode)->i_prealloc_count <= 0) {
list_del(&REISERFS_I(inode)->i_prealloc_list);
break;
}
} }
} }
/* return amount still needed after using preallocated blocks */
return amount_needed;
}
void reiserfs_discard_all_prealloc (struct reiserfs_transaction_handle *th) int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *hint,
b_blocknr_t * new_blocknrs, int amount_needed,
int reserved_by_us /* Amount of blocks we have
already reserved */)
{ {
struct list_head * plist = &SB_JOURNAL(th->t_super)->j_prealloc_list; int initial_amount_needed = amount_needed;
int ret;
while (!list_empty(plist)) { /* Check if there is enough space, taking into account reserved space */
struct reiserfs_inode_info *ei; if ( SB_FREE_BLOCKS(hint->th->t_super) - REISERFS_SB(hint->th->t_super)->reserved_blocks <
ei = list_entry(plist->next, struct reiserfs_inode_info, i_prealloc_list); amount_needed - reserved_by_us)
#ifdef CONFIG_REISERFS_CHECK return NO_DISK_SPACE;
if (!ei->i_prealloc_count) { /* should this be if !hint->inode && hint->preallocate? */
reiserfs_warning("zam-4001:%s: inode is in prealloc list but has no preallocated blocks.\n", __FUNCTION__); /* do you mean hint->formatted_node can be removed ? - Zam */
/* hint->formatted_node cannot be removed because we try to access
inode information here, and there is often no inode assotiated with
metadata allocations - green */
if (!hint->formatted_node && hint->preallocate) {
amount_needed = use_preallocated_list_if_available
(hint, new_blocknrs, amount_needed);
if (amount_needed == 0) /* all blocknrs we need we got from
prealloc. list */
return CARRY_ON;
new_blocknrs += (initial_amount_needed - amount_needed);
}
/* find search start and save it in hint structure */
determine_search_start(hint, amount_needed);
/* allocation itself; fill new_blocknrs and preallocation arrays */
ret = blocknrs_and_prealloc_arrays_from_search_start
(hint, new_blocknrs, amount_needed);
/* we used prealloc. list to fill (partially) new_blocknrs array. If final allocation fails we
* need to return blocks back to prealloc. list or just free them. -- Zam (I chose second
* variant) */
if (ret != CARRY_ON) {
while (amount_needed ++ < initial_amount_needed) {
reiserfs_free_block(hint->th, *(--new_blocknrs));
} }
#endif
__discard_prealloc(th, ei);
} }
return ret;
}
/* These 2 functions are here to provide blocks reservation to the rest of kernel */
/* Reserve @blocks amount of blocks in fs pointed by @sb. Caller must make sure
there are actually this much blocks on the FS available */
void reiserfs_claim_blocks_to_be_allocated(
struct super_block *sb, /* super block of
filesystem where
blocks should be
reserved */
int blocks /* How much to reserve */
)
{
/* Fast case, if reservation is zero - exit immediately. */
if ( !blocks )
return;
REISERFS_SB(sb)->reserved_blocks += blocks;
}
/* Unreserve @blocks amount of blocks in fs pointed by @sb */
void reiserfs_release_claimed_blocks(
struct super_block *sb, /* super block of
filesystem where
blocks should be
reserved */
int blocks /* How much to unreserve */
)
{
/* Fast case, if unreservation is zero - exit immediately. */
if ( !blocks )
return;
REISERFS_SB(sb)->reserved_blocks -= blocks;
RFALSE( REISERFS_SB(sb)->reserved_blocks < 0, "amount of blocks reserved became zero?");
} }
#endif
...@@ -795,8 +795,8 @@ static int get_empty_nodes( ...@@ -795,8 +795,8 @@ static int get_empty_nodes(
else /* If we have enough already then there is nothing to do. */ else /* If we have enough already then there is nothing to do. */
return CARRY_ON; return CARRY_ON;
if ( reiserfs_new_blocknrs (p_s_tb->transaction_handle, a_n_blocknrs, if ( reiserfs_new_form_blocknrs (p_s_tb, a_n_blocknrs,
PATH_PLAST_BUFFER(p_s_tb->tb_path)->b_blocknr, n_amount_needed) == NO_DISK_SPACE ) n_amount_needed) == NO_DISK_SPACE )
return NO_DISK_SPACE; return NO_DISK_SPACE;
/* for each blocknumber we just got, get a buffer and stick it on FEB */ /* for each blocknumber we just got, get a buffer and stick it on FEB */
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
// //
#include <asm/types.h> #include <asm/types.h>
#include <asm/page.h>
...@@ -57,7 +58,6 @@ u32 keyed_hash(const signed char *msg, int len) ...@@ -57,7 +58,6 @@ u32 keyed_hash(const signed char *msg, int len)
u32 pad; u32 pad;
int i; int i;
// assert(len >= 0 && len < 256); // assert(len >= 0 && len < 256);
pad = (u32)len | ((u32)len << 8); pad = (u32)len | ((u32)len << 8);
...@@ -92,7 +92,7 @@ u32 keyed_hash(const signed char *msg, int len) ...@@ -92,7 +92,7 @@ u32 keyed_hash(const signed char *msg, int len)
{ {
//assert(len < 16); //assert(len < 16);
if (len >= 16) if (len >= 16)
*(int *)0 = 0; BUG();
a = (u32)msg[ 0] | a = (u32)msg[ 0] |
(u32)msg[ 1] << 8 | (u32)msg[ 1] << 8 |
...@@ -118,7 +118,7 @@ u32 keyed_hash(const signed char *msg, int len) ...@@ -118,7 +118,7 @@ u32 keyed_hash(const signed char *msg, int len)
{ {
//assert(len < 12); //assert(len < 12);
if (len >= 12) if (len >= 12)
*(int *)0 = 0; BUG();
a = (u32)msg[ 0] | a = (u32)msg[ 0] |
(u32)msg[ 1] << 8 | (u32)msg[ 1] << 8 |
(u32)msg[ 2] << 16| (u32)msg[ 2] << 16|
...@@ -139,7 +139,7 @@ u32 keyed_hash(const signed char *msg, int len) ...@@ -139,7 +139,7 @@ u32 keyed_hash(const signed char *msg, int len)
{ {
//assert(len < 8); //assert(len < 8);
if (len >= 8) if (len >= 8)
*(int *)0 = 0; BUG();
a = (u32)msg[ 0] | a = (u32)msg[ 0] |
(u32)msg[ 1] << 8 | (u32)msg[ 1] << 8 |
(u32)msg[ 2] << 16| (u32)msg[ 2] << 16|
...@@ -156,7 +156,7 @@ u32 keyed_hash(const signed char *msg, int len) ...@@ -156,7 +156,7 @@ u32 keyed_hash(const signed char *msg, int len)
{ {
//assert(len < 4); //assert(len < 4);
if (len >= 4) if (len >= 4)
*(int *)0 = 0; BUG();
a = b = c = d = pad; a = b = c = d = pad;
for(i = 0; i < len; i++) for(i = 0; i < len; i++)
{ {
......
...@@ -156,33 +156,6 @@ static inline void fix_tail_page_for_writing(struct page *page) { ...@@ -156,33 +156,6 @@ static inline void fix_tail_page_for_writing(struct page *page) {
} }
} }
/* we need to allocate a block for new unformatted node. Try to figure out
what point in bitmap reiserfs_new_blocknrs should start from. */
static b_blocknr_t find_tag (struct buffer_head * bh, struct item_head * ih,
__u32 * item, int pos_in_item)
{
__u32 block ;
if (!is_indirect_le_ih (ih))
/* something more complicated could be here */
return bh->b_blocknr;
/* for indirect item: go to left and look for the first non-hole entry in
the indirect item */
if (pos_in_item == I_UNFM_NUM (ih))
pos_in_item --;
while (pos_in_item >= 0) {
block = get_block_num(item, pos_in_item) ;
if (block)
return block ;
pos_in_item --;
}
return bh->b_blocknr;
}
/* reiserfs_get_block does not need to allocate a block only if it has been /* reiserfs_get_block does not need to allocate a block only if it has been
done already or non-hole position has been found in the indirect item */ done already or non-hole position has been found in the indirect item */
static inline int allocation_needed (int retval, b_blocknr_t allocated, static inline int allocation_needed (int retval, b_blocknr_t allocated,
...@@ -505,24 +478,24 @@ static int convert_tail_for_hole(struct inode *inode, ...@@ -505,24 +478,24 @@ static int convert_tail_for_hole(struct inode *inode,
} }
static inline int _allocate_block(struct reiserfs_transaction_handle *th, static inline int _allocate_block(struct reiserfs_transaction_handle *th,
long block,
struct inode *inode, struct inode *inode,
b_blocknr_t *allocated_block_nr, b_blocknr_t *allocated_block_nr,
unsigned long tag, struct path * path,
int flags) { int flags) {
#ifdef REISERFS_PREALLOCATE #ifdef REISERFS_PREALLOCATE
if (!(flags & GET_BLOCK_NO_ISEM)) { if (!(flags & GET_BLOCK_NO_ISEM)) {
return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, tag); return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, path, block);
} }
#endif #endif
return reiserfs_new_unf_blocknrs (th, allocated_block_nr, tag); return reiserfs_new_unf_blocknrs (th, allocated_block_nr, path, block);
} }
int reiserfs_get_block (struct inode * inode, sector_t block, int reiserfs_get_block (struct inode * inode, sector_t block,
struct buffer_head * bh_result, int create) struct buffer_head * bh_result, int create)
{ {
int repeat, retval; int repeat, retval;
unsigned long tag;
b_blocknr_t allocated_block_nr = 0;// b_blocknr_t is unsigned long b_blocknr_t allocated_block_nr = 0;// b_blocknr_t is unsigned long
INITIALIZE_PATH(path); INITIALIZE_PATH(path);
int pos_in_item; int pos_in_item;
...@@ -601,7 +574,6 @@ int reiserfs_get_block (struct inode * inode, sector_t block, ...@@ -601,7 +574,6 @@ int reiserfs_get_block (struct inode * inode, sector_t block,
if (allocation_needed (retval, allocated_block_nr, ih, item, pos_in_item)) { if (allocation_needed (retval, allocated_block_nr, ih, item, pos_in_item)) {
/* we have to allocate block for the unformatted node */ /* we have to allocate block for the unformatted node */
tag = find_tag (bh, ih, item, pos_in_item);
if (!transaction_started) { if (!transaction_started) {
pathrelse(&path) ; pathrelse(&path) ;
journal_begin(&th, inode->i_sb, jbegin_count) ; journal_begin(&th, inode->i_sb, jbegin_count) ;
...@@ -610,7 +582,7 @@ int reiserfs_get_block (struct inode * inode, sector_t block, ...@@ -610,7 +582,7 @@ int reiserfs_get_block (struct inode * inode, sector_t block,
goto research ; goto research ;
} }
repeat = _allocate_block(&th, inode, &allocated_block_nr, tag, create); repeat = _allocate_block(&th, block, inode, &allocated_block_nr, &path, create);
if (repeat == NO_DISK_SPACE) { if (repeat == NO_DISK_SPACE) {
/* restart the transaction to give the journal a chance to free /* restart the transaction to give the journal a chance to free
...@@ -618,7 +590,7 @@ int reiserfs_get_block (struct inode * inode, sector_t block, ...@@ -618,7 +590,7 @@ int reiserfs_get_block (struct inode * inode, sector_t block,
** research if we succeed on the second try ** research if we succeed on the second try
*/ */
restart_transaction(&th, inode, &path) ; restart_transaction(&th, inode, &path) ;
repeat = _allocate_block(&th, inode,&allocated_block_nr,tag,create); repeat = _allocate_block(&th, block, inode, &allocated_block_nr, NULL, create);
if (repeat != NO_DISK_SPACE) { if (repeat != NO_DISK_SPACE) {
goto research ; goto research ;
...@@ -1591,6 +1563,10 @@ int reiserfs_new_inode (struct reiserfs_transaction_handle *th, ...@@ -1591,6 +1563,10 @@ int reiserfs_new_inode (struct reiserfs_transaction_handle *th,
set_inode_sd_version (inode, STAT_DATA_V2); set_inode_sd_version (inode, STAT_DATA_V2);
/* insert the stat data into the tree */ /* insert the stat data into the tree */
#ifdef DISPLACE_NEW_PACKING_LOCALITIES
if (REISERFS_I(dir)->new_packing_locality)
th->displace_new_blocks = 1;
#endif
retval = reiserfs_insert_item (th, &path_to_key, &key, &ih, (char *)(&sd)); retval = reiserfs_insert_item (th, &path_to_key, &key, &ih, (char *)(&sd));
if (retval) { if (retval) {
err = retval; err = retval;
...@@ -1598,6 +1574,10 @@ int reiserfs_new_inode (struct reiserfs_transaction_handle *th, ...@@ -1598,6 +1574,10 @@ int reiserfs_new_inode (struct reiserfs_transaction_handle *th,
goto out_bad_inode; goto out_bad_inode;
} }
#ifdef DISPLACE_NEW_PACKING_LOCALITIES
if (!th->displace_new_blocks)
REISERFS_I(dir)->new_packing_locality = 0;
#endif
if (S_ISDIR(mode)) { if (S_ISDIR(mode)) {
/* insert item with "." and ".." */ /* insert item with "." and ".." */
retval = reiserfs_new_directory (th, &ih, &path_to_key, dir); retval = reiserfs_new_directory (th, &ih, &path_to_key, dir);
......
...@@ -510,14 +510,12 @@ int dump_journal_writers(void) { ...@@ -510,14 +510,12 @@ int dump_journal_writers(void) {
** **
*/ */
int reiserfs_in_journal(struct super_block *p_s_sb, int reiserfs_in_journal(struct super_block *p_s_sb,
unsigned long bl, int search_all, int bmap_nr, int bit_nr, int search_all,
unsigned long *next_zero_bit) { unsigned long *next_zero_bit) {
struct reiserfs_journal_cnode *cn ; struct reiserfs_journal_cnode *cn ;
struct reiserfs_list_bitmap *jb ; struct reiserfs_list_bitmap *jb ;
int i ; int i ;
int bmap_nr = bl / (p_s_sb->s_blocksize << 3) ; unsigned long bl;
int bit_nr = bl % (p_s_sb->s_blocksize << 3) ;
int tmp_bit ;
*next_zero_bit = 0 ; /* always start this at zero. */ *next_zero_bit = 0 ; /* always start this at zero. */
...@@ -537,15 +535,15 @@ int reiserfs_in_journal(struct super_block *p_s_sb, ...@@ -537,15 +535,15 @@ int reiserfs_in_journal(struct super_block *p_s_sb,
jb = SB_JOURNAL(p_s_sb)->j_list_bitmap + i ; jb = SB_JOURNAL(p_s_sb)->j_list_bitmap + i ;
if (jb->journal_list && jb->bitmaps[bmap_nr] && if (jb->journal_list && jb->bitmaps[bmap_nr] &&
test_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data)) { test_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data)) {
tmp_bit = find_next_zero_bit((unsigned long *) *next_zero_bit = find_next_zero_bit((unsigned long *)
(jb->bitmaps[bmap_nr]->data), (jb->bitmaps[bmap_nr]->data),
p_s_sb->s_blocksize << 3, bit_nr+1) ; p_s_sb->s_blocksize << 3, bit_nr+1) ;
*next_zero_bit = bmap_nr * (p_s_sb->s_blocksize << 3) + tmp_bit ;
return 1 ; return 1 ;
} }
} }
} }
bl = bmap_nr * (p_s_sb->s_blocksize << 3) + bit_nr;
/* is it in any old transactions? */ /* is it in any old transactions? */
if (search_all && (cn = get_journal_hash_dev(p_s_sb, SB_JOURNAL(p_s_sb)->j_list_hash_table, bl))) { if (search_all && (cn = get_journal_hash_dev(p_s_sb, SB_JOURNAL(p_s_sb)->j_list_hash_table, bl))) {
return 1; return 1;
...@@ -1820,6 +1818,7 @@ static void reiserfs_journal_commit_task_func(struct reiserfs_journal_commit_tas ...@@ -1820,6 +1818,7 @@ static void reiserfs_journal_commit_task_func(struct reiserfs_journal_commit_tas
jl = SB_JOURNAL_LIST(ct->p_s_sb) + ct->jindex ; jl = SB_JOURNAL_LIST(ct->p_s_sb) + ct->jindex ;
flush_commit_list(ct->p_s_sb, SB_JOURNAL_LIST(ct->p_s_sb) + ct->jindex, 1) ; flush_commit_list(ct->p_s_sb, SB_JOURNAL_LIST(ct->p_s_sb) + ct->jindex, 1) ;
if (jl->j_len > 0 && atomic_read(&(jl->j_nonzerolen)) > 0 && if (jl->j_len > 0 && atomic_read(&(jl->j_nonzerolen)) > 0 &&
atomic_read(&(jl->j_commit_left)) == 0) { atomic_read(&(jl->j_commit_left)) == 0) {
kupdate_one_transaction(ct->p_s_sb, jl) ; kupdate_one_transaction(ct->p_s_sb, jl) ;
......
...@@ -661,6 +661,10 @@ static int reiserfs_mkdir (struct inode * dir, struct dentry *dentry, int mode) ...@@ -661,6 +661,10 @@ static int reiserfs_mkdir (struct inode * dir, struct dentry *dentry, int mode)
struct reiserfs_transaction_handle th ; struct reiserfs_transaction_handle th ;
int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3;
#ifdef DISPLACE_NEW_PACKING_LOCALITIES
/* set flag that new packing locality created and new blocks for the content * of that directory are not displaced yet */
REISERFS_I(dir)->new_packing_locality = 1;
#endif
mode = S_IFDIR | mode; mode = S_IFDIR | mode;
if (!(inode = new_inode(dir->i_sb))) { if (!(inode = new_inode(dir->i_sb))) {
return -ENOMEM ; return -ENOMEM ;
......
...@@ -111,7 +111,7 @@ int reiserfs_global_version_in_proc( char *buffer, char **start, off_t offset, ...@@ -111,7 +111,7 @@ int reiserfs_global_version_in_proc( char *buffer, char **start, off_t offset,
#define SF( x ) ( r -> x ) #define SF( x ) ( r -> x )
#define SFP( x ) SF( s_proc_info_data.x ) #define SFP( x ) SF( s_proc_info_data.x )
#define SFPL( x ) SFP( x[ level ] ) #define SFPL( x ) SFP( x[ level ] )
#define SFPF( x ) SFP( find_forward.x ) #define SFPF( x ) SFP( scan_bitmap.x )
#define SFPJ( x ) SFP( journal.x ) #define SFPJ( x ) SFP( journal.x )
#define D2C( x ) le16_to_cpu( x ) #define D2C( x ) le16_to_cpu( x )
...@@ -184,7 +184,7 @@ int reiserfs_super_in_proc( char *buffer, char **start, off_t offset, ...@@ -184,7 +184,7 @@ int reiserfs_super_in_proc( char *buffer, char **start, off_t offset,
reiserfs_no_unhashed_relocation( sb ) ? "NO_UNHASHED_RELOCATION " : "", reiserfs_no_unhashed_relocation( sb ) ? "NO_UNHASHED_RELOCATION " : "",
reiserfs_hashed_relocation( sb ) ? "UNHASHED_RELOCATION " : "", reiserfs_hashed_relocation( sb ) ? "UNHASHED_RELOCATION " : "",
reiserfs_test4( sb ) ? "TEST4 " : "", reiserfs_test4( sb ) ? "TEST4 " : "",
dont_have_tails( sb ) ? "NO_TAILS " : "TAILS ", have_large_tails( sb ) ? "TAILS " : have_small_tails(sb)?"SMALL_TAILS ":"NO_TAILS ",
replay_only( sb ) ? "REPLAY_ONLY " : "", replay_only( sb ) ? "REPLAY_ONLY " : "",
reiserfs_dont_log( sb ) ? "DONT_LOG " : "LOG ", reiserfs_dont_log( sb ) ? "DONT_LOG " : "LOG ",
convert_reiserfs( sb ) ? "CONV " : "", convert_reiserfs( sb ) ? "CONV " : "",
...@@ -314,27 +314,30 @@ int reiserfs_bitmap_in_proc( char *buffer, char **start, off_t offset, ...@@ -314,27 +314,30 @@ int reiserfs_bitmap_in_proc( char *buffer, char **start, off_t offset,
r = REISERFS_SB(sb); r = REISERFS_SB(sb);
len += sprintf( &buffer[ len ], "free_block: %lu\n" len += sprintf( &buffer[ len ], "free_block: %lu\n"
"find_forward:" " scan_bitmap:"
" wait" " wait"
" bmap" " bmap"
" retry" " retry"
" stolen"
" journal_hint" " journal_hint"
" journal_out" "journal_nohint"
"\n" "\n"
" %12lu" " %14lu"
" %12lu" " %14lu"
" %12lu" " %14lu"
" %12lu" " %14lu"
" %12lu" " %14lu"
" %12lu" " %14lu"
" %14lu"
"\n", "\n",
SFP( free_block ), SFP( free_block ),
SFPF( call ), SFPF( call ),
SFPF( wait ), SFPF( wait ),
SFPF( bmap ), SFPF( bmap ),
SFPF( retry ), SFPF( retry ),
SFPF( stolen ),
SFPF( in_journal_hint ), SFPF( in_journal_hint ),
SFPF( in_journal_out ) ); SFPF( in_journal_nohint ) );
procinfo_epilogue( sb ); procinfo_epilogue( sb );
return reiserfs_proc_tail( len, buffer, start, offset, count, eof ); return reiserfs_proc_tail( len, buffer, start, offset, count, eof );
......
...@@ -19,7 +19,8 @@ ...@@ -19,7 +19,8 @@
int reiserfs_resize (struct super_block * s, unsigned long block_count_new) int reiserfs_resize (struct super_block * s, unsigned long block_count_new)
{ {
struct reiserfs_super_block * sb; struct reiserfs_super_block * sb;
struct buffer_head ** bitmap, * bh; struct reiserfs_bitmap_info *bitmap;
struct buffer_head * bh;
struct reiserfs_transaction_handle th; struct reiserfs_transaction_handle th;
unsigned int bmap_nr_new, bmap_nr; unsigned int bmap_nr_new, bmap_nr;
unsigned int block_r_new, block_r; unsigned int block_r_new, block_r;
...@@ -103,26 +104,29 @@ int reiserfs_resize (struct super_block * s, unsigned long block_count_new) ...@@ -103,26 +104,29 @@ int reiserfs_resize (struct super_block * s, unsigned long block_count_new)
/* allocate additional bitmap blocks, reallocate array of bitmap /* allocate additional bitmap blocks, reallocate array of bitmap
* block pointers */ * block pointers */
bitmap = reiserfs_kmalloc(sizeof(struct buffer_head *) * bmap_nr_new, GFP_KERNEL, s); bitmap = vmalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new);
if (!bitmap) { if (!bitmap) {
printk("reiserfs_resize: unable to allocate memory.\n"); printk("reiserfs_resize: unable to allocate memory.\n");
return -ENOMEM; return -ENOMEM;
} }
memset (bitmap, 0, sizeof (struct reiserfs_bitmap_info) * SB_BMAP_NR(s));
for (i = 0; i < bmap_nr; i++) for (i = 0; i < bmap_nr; i++)
bitmap[i] = SB_AP_BITMAP(s)[i]; bitmap[i] = SB_AP_BITMAP(s)[i];
for (i = bmap_nr; i < bmap_nr_new; i++) { for (i = bmap_nr; i < bmap_nr_new; i++) {
bitmap[i] = sb_getblk(s, i * s->s_blocksize * 8); bitmap[i].bh = sb_getblk(s, i * s->s_blocksize * 8);
memset(bitmap[i]->b_data, 0, sb_blocksize(sb)); memset(bitmap[i].bh->b_data, 0, sb_blocksize(sb));
reiserfs_test_and_set_le_bit(0, bitmap[i]->b_data); reiserfs_test_and_set_le_bit(0, bitmap[i].bh->b_data);
mark_buffer_dirty(bitmap[i]) ; mark_buffer_dirty(bitmap[i].bh) ;
set_buffer_uptodate(bitmap[i]); set_buffer_uptodate(bitmap[i].bh);
ll_rw_block(WRITE, 1, bitmap + i); ll_rw_block(WRITE, 1, &bitmap[i].bh);
wait_on_buffer(bitmap[i]); wait_on_buffer(bitmap[i].bh);
// update bitmap_info stuff
bitmap[i].first_zero_hint=1;
bitmap[i].free_count = sb_blocksize(sb) * 8 - 1;
} }
/* free old bitmap blocks array */ /* free old bitmap blocks array */
reiserfs_kfree(SB_AP_BITMAP(s), vfree(SB_AP_BITMAP(s));
sizeof(struct buffer_head *) * bmap_nr, s);
SB_AP_BITMAP(s) = bitmap; SB_AP_BITMAP(s) = bitmap;
} }
...@@ -130,18 +134,26 @@ int reiserfs_resize (struct super_block * s, unsigned long block_count_new) ...@@ -130,18 +134,26 @@ int reiserfs_resize (struct super_block * s, unsigned long block_count_new)
journal_begin(&th, s, 10); journal_begin(&th, s, 10);
/* correct last bitmap blocks in old and new disk layout */ /* correct last bitmap blocks in old and new disk layout */
reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[bmap_nr - 1], 1); reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[bmap_nr - 1].bh, 1);
for (i = block_r; i < s->s_blocksize * 8; i++) for (i = block_r; i < s->s_blocksize * 8; i++)
reiserfs_test_and_clear_le_bit(i, reiserfs_test_and_clear_le_bit(i,
SB_AP_BITMAP(s)[bmap_nr - 1]->b_data); SB_AP_BITMAP(s)[bmap_nr - 1].bh->b_data);
journal_mark_dirty(&th, s, SB_AP_BITMAP(s)[bmap_nr - 1]); SB_AP_BITMAP(s)[bmap_nr - 1].free_count += s->s_blocksize * 8 - block_r;
if ( !SB_AP_BITMAP(s)[bmap_nr - 1].first_zero_hint)
SB_AP_BITMAP(s)[bmap_nr - 1].first_zero_hint = block_r;
reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[bmap_nr_new - 1], 1); journal_mark_dirty(&th, s, SB_AP_BITMAP(s)[bmap_nr - 1].bh);
reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[bmap_nr_new - 1].bh, 1);
for (i = block_r_new; i < s->s_blocksize * 8; i++) for (i = block_r_new; i < s->s_blocksize * 8; i++)
reiserfs_test_and_set_le_bit(i, reiserfs_test_and_set_le_bit(i,
SB_AP_BITMAP(s)[bmap_nr_new - 1]->b_data); SB_AP_BITMAP(s)[bmap_nr_new - 1].bh->b_data);
journal_mark_dirty(&th, s, SB_AP_BITMAP(s)[bmap_nr_new - 1]); journal_mark_dirty(&th, s, SB_AP_BITMAP(s)[bmap_nr_new - 1].bh);
SB_AP_BITMAP(s)[bmap_nr_new - 1].free_count -= s->s_blocksize * 8 - block_r_new;
/* Extreme case where last bitmap is the only valid block in itself. */
if ( !SB_AP_BITMAP(s)[bmap_nr_new - 1].free_count )
SB_AP_BITMAP(s)[bmap_nr_new - 1].first_zero_hint = 0;
/* update super */ /* update super */
reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ;
free_blocks = SB_FREE_BLOCKS(s); free_blocks = SB_FREE_BLOCKS(s);
......
...@@ -1813,6 +1813,9 @@ int reiserfs_paste_into_item (struct reiserfs_transaction_handle *th, ...@@ -1813,6 +1813,9 @@ int reiserfs_paste_into_item (struct reiserfs_transaction_handle *th,
int retval; int retval;
init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path, n_pasted_size); init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path, n_pasted_size);
#ifdef DISPLACE_NEW_PACKING_LOCALITIES
s_paste_balance.key = p_s_key->on_disk_key;
#endif
while ( (retval = fix_nodes(M_PASTE, &s_paste_balance, NULL, p_c_body)) == REPEAT_SEARCH ) { while ( (retval = fix_nodes(M_PASTE, &s_paste_balance, NULL, p_c_body)) == REPEAT_SEARCH ) {
/* file system changed while we were in the fix_nodes */ /* file system changed while we were in the fix_nodes */
...@@ -1823,7 +1826,7 @@ int reiserfs_paste_into_item (struct reiserfs_transaction_handle *th, ...@@ -1823,7 +1826,7 @@ int reiserfs_paste_into_item (struct reiserfs_transaction_handle *th,
goto error_out ; goto error_out ;
} }
if (retval == POSITION_FOUND) { if (retval == POSITION_FOUND) {
reiserfs_warning ("PAP-5710: reiserfs_paste_into_item: entry or pasted byte (%K) exists", p_s_key); reiserfs_warning ("PAP-5710: reiserfs_paste_into_item: entry or pasted byte (%K) exists\n", p_s_key);
retval = -EEXIST ; retval = -EEXIST ;
goto error_out ; goto error_out ;
} }
...@@ -1858,6 +1861,9 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, ...@@ -1858,6 +1861,9 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
int retval; int retval;
init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path, IH_SIZE + ih_item_len(p_s_ih)); init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path, IH_SIZE + ih_item_len(p_s_ih));
#ifdef DISPLACE_NEW_PACKING_LOCALITIES
s_ins_balance.key = key->on_disk_key;
#endif
/* /*
if (p_c_body == 0) if (p_c_body == 0)
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <linux/config.h> #include <linux/config.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/time.h> #include <linux/time.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <linux/reiserfs_fs.h> #include <linux/reiserfs_fs.h>
...@@ -374,9 +375,9 @@ static void reiserfs_put_super (struct super_block * s) ...@@ -374,9 +375,9 @@ static void reiserfs_put_super (struct super_block * s)
journal_release(&th, s) ; journal_release(&th, s) ;
for (i = 0; i < SB_BMAP_NR (s); i ++) for (i = 0; i < SB_BMAP_NR (s); i ++)
brelse (SB_AP_BITMAP (s)[i]); brelse (SB_AP_BITMAP (s)[i].bh);
reiserfs_kfree (SB_AP_BITMAP (s), sizeof (struct buffer_head *) * SB_BMAP_NR (s), s); vfree (SB_AP_BITMAP (s));
brelse (SB_BUFFER_WITH_SB (s)); brelse (SB_BUFFER_WITH_SB (s));
...@@ -387,6 +388,11 @@ static void reiserfs_put_super (struct super_block * s) ...@@ -387,6 +388,11 @@ static void reiserfs_put_super (struct super_block * s)
REISERFS_SB(s)->s_kmallocs); REISERFS_SB(s)->s_kmallocs);
} }
if (REISERFS_SB(s)->reserved_blocks != 0) {
reiserfs_warning ("green-2005: reiserfs_put_super: reserved blocks left %d\n",
REISERFS_SB(s)->reserved_blocks);
}
reiserfs_proc_unregister( s, "journal" ); reiserfs_proc_unregister( s, "journal" );
reiserfs_proc_unregister( s, "oidmap" ); reiserfs_proc_unregister( s, "oidmap" );
reiserfs_proc_unregister( s, "on-disk-super" ); reiserfs_proc_unregister( s, "on-disk-super" );
...@@ -518,6 +524,13 @@ const arg_desc_t balloc[] = { ...@@ -518,6 +524,13 @@ const arg_desc_t balloc[] = {
{NULL, -1} {NULL, -1}
}; };
const arg_desc_t tails[] = {
{"on", REISERFS_LARGETAIL},
{"off", -1},
{"small", REISERFS_SMALLTAIL},
{NULL, 0}
};
/* proceed only one option from a list *cur - string containing of mount options /* proceed only one option from a list *cur - string containing of mount options
opts - array of options which are accepted opts - array of options which are accepted
...@@ -525,7 +538,7 @@ const arg_desc_t balloc[] = { ...@@ -525,7 +538,7 @@ const arg_desc_t balloc[] = {
in the input - pointer to the argument is stored here in the input - pointer to the argument is stored here
bit_flags - if option requires to set a certain bit - it is set here bit_flags - if option requires to set a certain bit - it is set here
return -1 if unknown option is found, opt->arg_required otherwise */ return -1 if unknown option is found, opt->arg_required otherwise */
static int reiserfs_getopt (char ** cur, opt_desc_t * opts, char ** opt_arg, static int reiserfs_getopt ( struct super_block * s, char ** cur, opt_desc_t * opts, char ** opt_arg,
unsigned long * bit_flags) unsigned long * bit_flags)
{ {
char * p; char * p;
...@@ -548,6 +561,15 @@ static int reiserfs_getopt (char ** cur, opt_desc_t * opts, char ** opt_arg, ...@@ -548,6 +561,15 @@ static int reiserfs_getopt (char ** cur, opt_desc_t * opts, char ** opt_arg,
(*cur) ++; (*cur) ++;
} }
if ( !strncmp (p, "alloc=", 6) ) {
/* Ugly special case, probably we should redo options parser so that
it can understand several arguments for some options, also so that
it can fill several bitfields with option values. */
reiserfs_parse_alloc_options( s, p + 6);
return 0;
}
/* for every option in the list */ /* for every option in the list */
for (opt = opts; opt->option_name; opt ++) { for (opt = opts; opt->option_name; opt ++) {
if (!strncmp (p, opt->option_name, strlen (opt->option_name))) { if (!strncmp (p, opt->option_name, strlen (opt->option_name))) {
...@@ -612,7 +634,7 @@ static int reiserfs_getopt (char ** cur, opt_desc_t * opts, char ** opt_arg, ...@@ -612,7 +634,7 @@ static int reiserfs_getopt (char ** cur, opt_desc_t * opts, char ** opt_arg,
/* returns 0 if something is wrong in option string, 1 - otherwise */ /* returns 0 if something is wrong in option string, 1 - otherwise */
static int reiserfs_parse_options (char * options, /* string given via mount's -o */ static int reiserfs_parse_options (struct super_block * s, char * options, /* string given via mount's -o */
unsigned long * mount_options, unsigned long * mount_options,
/* after the parsing phase, contains the /* after the parsing phase, contains the
collection of bitflags defining what collection of bitflags defining what
...@@ -624,14 +646,14 @@ static int reiserfs_parse_options (char * options, /* string given via mount's - ...@@ -624,14 +646,14 @@ static int reiserfs_parse_options (char * options, /* string given via mount's -
char * arg = NULL; char * arg = NULL;
char * pos; char * pos;
opt_desc_t opts[] = { opt_desc_t opts[] = {
{"notail", 0, 0, NOTAIL}, {"tails", 't', tails, -1},
{"notail", 0, 0, -1}, /* Compatibility stuff, so that -o notail
for old setups still work */
{"conv", 0, 0, REISERFS_CONVERT}, {"conv", 0, 0, REISERFS_CONVERT},
{"attrs", 0, 0, REISERFS_ATTRS}, {"attrs", 0, 0, REISERFS_ATTRS},
{"nolog", 0, 0, -1}, {"nolog", 0, 0, -1},
{"replayonly", 0, 0, REPLAYONLY}, {"replayonly", 0, 0, REPLAYONLY},
{"block-allocator", 'a', balloc, -1}, {"block-allocator", 'a', balloc, -1},
{"resize", 'r', 0, -1}, {"resize", 'r', 0, -1},
{"jdev", 'j', 0, -1}, {"jdev", 'j', 0, -1},
{NULL, 0, 0, -1} {NULL, 0, 0, -1}
...@@ -642,9 +664,12 @@ static int reiserfs_parse_options (char * options, /* string given via mount's - ...@@ -642,9 +664,12 @@ static int reiserfs_parse_options (char * options, /* string given via mount's -
/* use default configuration: create tails, journaling on, no /* use default configuration: create tails, journaling on, no
conversion to newest format */ conversion to newest format */
return 1; return 1;
else
/* Drop defaults to zeroes */
*mount_options = 0;
for (pos = options; pos; ) { for (pos = options; pos; ) {
c = reiserfs_getopt (&pos, opts, &arg, mount_options); c = reiserfs_getopt (s, &pos, opts, &arg, mount_options);
if (c == -1) if (c == -1)
/* wrong option is given */ /* wrong option is given */
return 0; return 0;
...@@ -681,7 +706,7 @@ static int reiserfs_remount (struct super_block * s, int * mount_flags, char * a ...@@ -681,7 +706,7 @@ static int reiserfs_remount (struct super_block * s, int * mount_flags, char * a
rs = SB_DISK_SUPER_BLOCK (s); rs = SB_DISK_SUPER_BLOCK (s);
if (!reiserfs_parse_options(arg, &mount_options, &blocks, NULL)) if (!reiserfs_parse_options(s, arg, &mount_options, &blocks, NULL))
return -EINVAL; return -EINVAL;
if(blocks) { if(blocks) {
...@@ -731,32 +756,84 @@ static int reiserfs_remount (struct super_block * s, int * mount_flags, char * a ...@@ -731,32 +756,84 @@ static int reiserfs_remount (struct super_block * s, int * mount_flags, char * a
return 0; return 0;
} }
/* load_bitmap_info_data - Sets up the reiserfs_bitmap_info structure from disk.
* @sb - superblock for this filesystem
* @bi - the bitmap info to be loaded. Requires that bi->bh is valid.
*
* This routine counts how many free bits there are, finding the first zero
* as a side effect. Could also be implemented as a loop of test_bit() calls, or
* a loop of find_first_zero_bit() calls. This implementation is similar to
* find_first_zero_bit(), but doesn't return after it finds the first bit.
* Should only be called on fs mount, but should be fairly efficient anyways.
*
* bi->first_zero_hint is considered unset if it == 0, since the bitmap itself
* will * invariably occupt block 0 represented in the bitmap. The only
* exception to this is when free_count also == 0, since there will be no
* free blocks at all.
*/
static void load_bitmap_info_data (struct super_block *sb,
struct reiserfs_bitmap_info *bi)
{
unsigned long *cur = (unsigned long *)bi->bh->b_data;
while ((char *)cur < (bi->bh->b_data + sb->s_blocksize)) {
/* No need to scan if all 0's or all 1's.
* Since we're only counting 0's, we can simply ignore all 1's */
if (*cur == 0) {
if (bi->first_zero_hint == 0) {
bi->first_zero_hint = ((char *)cur - bi->bh->b_data) << 3;
}
bi->free_count += sizeof(unsigned long)*8;
} else if (*cur != ~0L) {
int b;
for (b = 0; b < sizeof(unsigned long)*8; b++) {
if (!reiserfs_test_le_bit (b, cur)) {
bi->free_count ++;
if (bi->first_zero_hint == 0)
bi->first_zero_hint =
(((char *)cur - bi->bh->b_data) << 3) + b;
}
}
}
cur ++;
}
#ifdef CONFIG_REISERFS_CHECK
// This outputs a lot of unneded info on big FSes
// reiserfs_warning ("bitmap loaded from block %d: %d free blocks\n",
// bi->bh->b_blocknr, bi->free_count);
#endif
}
static int read_bitmaps (struct super_block * s) static int read_bitmaps (struct super_block * s)
{ {
int i, bmap_nr; int i, bmap_nr;
SB_AP_BITMAP (s) = reiserfs_kmalloc (sizeof (struct buffer_head *) * SB_BMAP_NR(s), GFP_NOFS, s); SB_AP_BITMAP (s) = vmalloc (sizeof (struct reiserfs_bitmap_info) * SB_BMAP_NR(s));
if (SB_AP_BITMAP (s) == 0) if (SB_AP_BITMAP (s) == 0)
return 1; return 1;
memset (SB_AP_BITMAP (s), 0, sizeof (struct reiserfs_bitmap_info) * SB_BMAP_NR(s));
for (i = 0, bmap_nr = REISERFS_DISK_OFFSET_IN_BYTES / s->s_blocksize + 1; for (i = 0, bmap_nr = REISERFS_DISK_OFFSET_IN_BYTES / s->s_blocksize + 1;
i < SB_BMAP_NR(s); i++, bmap_nr = s->s_blocksize * 8 * i) { i < SB_BMAP_NR(s); i++, bmap_nr = s->s_blocksize * 8 * i) {
SB_AP_BITMAP (s)[i] = sb_getblk(s, bmap_nr); SB_AP_BITMAP (s)[i].bh = sb_getblk(s, bmap_nr);
if (!buffer_uptodate(SB_AP_BITMAP(s)[i])) if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh))
ll_rw_block(READ, 1, SB_AP_BITMAP(s) + i); ll_rw_block(READ, 1, &SB_AP_BITMAP(s)[i].bh);
} }
for (i = 0; i < SB_BMAP_NR(s); i++) { for (i = 0; i < SB_BMAP_NR(s); i++) {
wait_on_buffer(SB_AP_BITMAP (s)[i]); wait_on_buffer(SB_AP_BITMAP (s)[i].bh);
if (!buffer_uptodate(SB_AP_BITMAP(s)[i])) { if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) {
reiserfs_warning("sh-2029: reiserfs read_bitmaps: " reiserfs_warning("sh-2029: reiserfs read_bitmaps: "
"bitmap block (#%lu) reading failed\n", "bitmap block (#%lu) reading failed\n",
SB_AP_BITMAP(s)[i]->b_blocknr); SB_AP_BITMAP(s)[i].bh->b_blocknr);
for (i = 0; i < SB_BMAP_NR(s); i++) for (i = 0; i < SB_BMAP_NR(s); i++)
brelse(SB_AP_BITMAP(s)[i]); brelse(SB_AP_BITMAP(s)[i].bh);
reiserfs_kfree(SB_AP_BITMAP(s), sizeof(struct buffer_head *) * SB_BMAP_NR(s), s); vfree(SB_AP_BITMAP(s));
SB_AP_BITMAP(s) = NULL; SB_AP_BITMAP(s) = NULL;
return 1; return 1;
} }
load_bitmap_info_data (s, SB_AP_BITMAP (s) + i);
} }
return 0; return 0;
} }
...@@ -768,16 +845,17 @@ static int read_old_bitmaps (struct super_block * s) ...@@ -768,16 +845,17 @@ static int read_old_bitmaps (struct super_block * s)
int bmp1 = (REISERFS_OLD_DISK_OFFSET_IN_BYTES / s->s_blocksize) + 1; /* first of bitmap blocks */ int bmp1 = (REISERFS_OLD_DISK_OFFSET_IN_BYTES / s->s_blocksize) + 1; /* first of bitmap blocks */
/* read true bitmap */ /* read true bitmap */
SB_AP_BITMAP (s) = reiserfs_kmalloc (sizeof (struct buffer_head *) * sb_bmap_nr(rs), GFP_NOFS, s); SB_AP_BITMAP (s) = vmalloc (sizeof (struct reiserfs_buffer_info *) * sb_bmap_nr(rs));
if (SB_AP_BITMAP (s) == 0) if (SB_AP_BITMAP (s) == 0)
return 1; return 1;
memset (SB_AP_BITMAP (s), 0, sizeof (struct buffer_head *) * sb_bmap_nr(rs)); memset (SB_AP_BITMAP (s), 0, sizeof (struct reiserfs_buffer_info *) * sb_bmap_nr(rs));
for (i = 0; i < sb_bmap_nr(rs); i ++) { for (i = 0; i < sb_bmap_nr(rs); i ++) {
SB_AP_BITMAP (s)[i] = sb_bread (s, bmp1 + i); SB_AP_BITMAP (s)[i].bh = sb_bread (s, bmp1 + i);
if (!SB_AP_BITMAP (s)[i]) if (!SB_AP_BITMAP (s)[i].bh)
return 1; return 1;
load_bitmap_info_data (s, SB_AP_BITMAP (s) + i);
} }
return 0; return 0;
...@@ -790,7 +868,7 @@ void check_bitmap (struct super_block * s) ...@@ -790,7 +868,7 @@ void check_bitmap (struct super_block * s)
char * buf; char * buf;
while (i < SB_BLOCK_COUNT (s)) { while (i < SB_BLOCK_COUNT (s)) {
buf = SB_AP_BITMAP (s)[i / (s->s_blocksize * 8)]->b_data; buf = SB_AP_BITMAP (s)[i / (s->s_blocksize * 8)].bh->b_data;
if (!reiserfs_test_le_bit (i % (s->s_blocksize * 8), buf)) if (!reiserfs_test_le_bit (i % (s->s_blocksize * 8), buf))
free ++; free ++;
i ++; i ++;
...@@ -899,10 +977,11 @@ static int reread_meta_blocks(struct super_block *s) { ...@@ -899,10 +977,11 @@ static int reread_meta_blocks(struct super_block *s) {
} }
for (i = 0; i < SB_BMAP_NR(s) ; i++) { for (i = 0; i < SB_BMAP_NR(s) ; i++) {
ll_rw_block(READ, 1, &(SB_AP_BITMAP(s)[i])) ; ll_rw_block(READ, 1, &(SB_AP_BITMAP(s)[i].bh)) ;
wait_on_buffer(SB_AP_BITMAP(s)[i]) ; wait_on_buffer(SB_AP_BITMAP(s)[i].bh) ;
if (!buffer_uptodate(SB_AP_BITMAP(s)[i])) { if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) {
printk("reread_meta_blocks, error reading bitmap block number %d at %ld\n", i, SB_AP_BITMAP(s)[i]->b_blocknr) ; printk("reread_meta_blocks, error reading bitmap block number %d at
%ld\n", i, SB_AP_BITMAP(s)[i].bh->b_blocknr) ;
return 1 ; return 1 ;
} }
} }
...@@ -1087,9 +1166,17 @@ static int reiserfs_fill_super (struct super_block * s, void * data, int silent) ...@@ -1087,9 +1166,17 @@ static int reiserfs_fill_super (struct super_block * s, void * data, int silent)
} }
s->u.generic_sbp = sbi; s->u.generic_sbp = sbi;
memset (sbi, 0, sizeof (struct reiserfs_sb_info)); memset (sbi, 0, sizeof (struct reiserfs_sb_info));
/* Set default values for options: non-aggressive tails */
REISERFS_SB(s)->s_mount_opt = ( 1 << REISERFS_SMALLTAIL );
/* default block allocator option: skip_busy */
REISERFS_SB(s)->s_alloc_options.bits = ( 1 << 5);
/* If file grew past 4 blocks, start preallocation blocks for it. */
REISERFS_SB(s)->s_alloc_options.preallocmin = 4;
/* Preallocate by 8 blocks (9-1) at once */
REISERFS_SB(s)->s_alloc_options.preallocsize = 9;
jdev_name = NULL; jdev_name = NULL;
if (reiserfs_parse_options ((char *) data, &(sbi->s_mount_opt), &blocks, &jdev_name) == 0) { if (reiserfs_parse_options (s, (char *) data, &(sbi->s_mount_opt), &blocks, &jdev_name) == 0) {
goto error; goto error;
} }
...@@ -1236,10 +1323,10 @@ static int reiserfs_fill_super (struct super_block * s, void * data, int silent) ...@@ -1236,10 +1323,10 @@ static int reiserfs_fill_super (struct super_block * s, void * data, int silent)
if (SB_DISK_SUPER_BLOCK (s)) { if (SB_DISK_SUPER_BLOCK (s)) {
for (j = 0; j < SB_BMAP_NR (s); j ++) { for (j = 0; j < SB_BMAP_NR (s); j ++) {
if (SB_AP_BITMAP (s)) if (SB_AP_BITMAP (s))
brelse (SB_AP_BITMAP (s)[j]); brelse (SB_AP_BITMAP (s)[j].bh);
} }
if (SB_AP_BITMAP (s)) if (SB_AP_BITMAP (s))
reiserfs_kfree (SB_AP_BITMAP (s), sizeof (struct buffer_head *) * SB_BMAP_NR (s), s); vfree (SB_AP_BITMAP (s));
} }
if (SB_BUFFER_WITH_SB (s)) if (SB_BUFFER_WITH_SB (s))
brelse(SB_BUFFER_WITH_SB (s)); brelse(SB_BUFFER_WITH_SB (s));
......
...@@ -59,7 +59,8 @@ ...@@ -59,7 +59,8 @@
#define USE_INODE_GENERATION_COUNTER #define USE_INODE_GENERATION_COUNTER
#define REISERFS_PREALLOCATE #define REISERFS_PREALLOCATE
#define PREALLOCATION_SIZE 8 #define DISPLACE_NEW_PACKING_LOCALITIES
#define PREALLOCATION_SIZE 9
/* n must be power of 2 */ /* n must be power of 2 */
#define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u)) #define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u))
...@@ -326,7 +327,7 @@ static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb) ...@@ -326,7 +327,7 @@ static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb)
time cost for a 4 block file and saves an amount of space that is time cost for a 4 block file and saves an amount of space that is
less significant as a percentage of space, or so goes the hypothesis. less significant as a percentage of space, or so goes the hypothesis.
-Hans */ -Hans */
#define STORE_TAIL_IN_UNFM(n_file_size,n_tail_size,n_block_size) \ #define STORE_TAIL_IN_UNFM_S1(n_file_size,n_tail_size,n_block_size) \
(\ (\
(!(n_tail_size)) || \ (!(n_tail_size)) || \
(((n_tail_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) || \ (((n_tail_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) || \
...@@ -339,6 +340,18 @@ static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb) ...@@ -339,6 +340,18 @@ static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb)
( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \ ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \
) )
/* Another strategy for tails, this one means only create a tail if all the
file would fit into one DIRECT item.
Primary intention for this one is to increase performance by decreasing
seeking.
*/
#define STORE_TAIL_IN_UNFM_S2(n_file_size,n_tail_size,n_block_size) \
(\
(!(n_tail_size)) || \
(((n_file_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) ) \
)
/* /*
* values for s_umount_state field * values for s_umount_state field
...@@ -1433,6 +1446,10 @@ struct tree_balance ...@@ -1433,6 +1446,10 @@ struct tree_balance
int fs_gen; /* saved value of `reiserfs_generation' counter int fs_gen; /* saved value of `reiserfs_generation' counter
see FILESYSTEM_CHANGED() macro in reiserfs_fs.h */ see FILESYSTEM_CHANGED() macro in reiserfs_fs.h */
#ifdef DISPLACE_NEW_PACKING_LOCALITIES
struct key key; /* key pointer, to pass to block allocator or
another low-level subsystem */
#endif
} ; } ;
/* These are modes of balancing */ /* These are modes of balancing */
...@@ -1673,7 +1690,7 @@ int journal_mark_freed(struct reiserfs_transaction_handle *, struct super_block ...@@ -1673,7 +1690,7 @@ int journal_mark_freed(struct reiserfs_transaction_handle *, struct super_block
int push_journal_writer(char *w) ; int push_journal_writer(char *w) ;
int pop_journal_writer(int windex) ; int pop_journal_writer(int windex) ;
int journal_transaction_should_end(struct reiserfs_transaction_handle *, int) ; int journal_transaction_should_end(struct reiserfs_transaction_handle *, int) ;
int reiserfs_in_journal(struct super_block *p_s_sb, unsigned long bl, int searchall, unsigned long *next) ; int reiserfs_in_journal(struct super_block *p_s_sb, int bmap_nr, int bit_nr, int searchall, unsigned long *next) ;
int journal_begin(struct reiserfs_transaction_handle *, struct super_block *p_s_sb, unsigned long) ; int journal_begin(struct reiserfs_transaction_handle *, struct super_block *p_s_sb, unsigned long) ;
void flush_async_commits(struct super_block *p_s_sb) ; void flush_async_commits(struct super_block *p_s_sb) ;
...@@ -1818,8 +1835,8 @@ void reiserfs_do_truncate (struct reiserfs_transaction_handle *th, ...@@ -1818,8 +1835,8 @@ void reiserfs_do_truncate (struct reiserfs_transaction_handle *th,
#define file_size(inode) ((inode)->i_size) #define file_size(inode) ((inode)->i_size)
#define tail_size(inode) (file_size (inode) & (i_block_size (inode) - 1)) #define tail_size(inode) (file_size (inode) & (i_block_size (inode) - 1))
#define tail_has_to_be_packed(inode) (!dont_have_tails ((inode)->i_sb) &&\ #define tail_has_to_be_packed(inode) (have_large_tails ((inode)->i_sb)?\
!STORE_TAIL_IN_UNFM(file_size (inode), tail_size(inode), i_block_size (inode))) !STORE_TAIL_IN_UNFM_S1(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):have_small_tails ((inode)->i_sb)?!STORE_TAIL_IN_UNFM_S2(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):0 )
void padd_item (char * item, int total_length, int length); void padd_item (char * item, int total_length, int length);
...@@ -2015,22 +2032,87 @@ void make_empty_node (struct buffer_info *); ...@@ -2015,22 +2032,87 @@ void make_empty_node (struct buffer_info *);
struct buffer_head * get_FEB (struct tree_balance *); struct buffer_head * get_FEB (struct tree_balance *);
/* bitmap.c */ /* bitmap.c */
/* structure contains hints for block allocator, and it is a container for
* arguments, such as node, search path, transaction_handle, etc. */
struct __reiserfs_blocknr_hint {
struct inode * inode; /* inode passed to allocator, if we allocate unf. nodes */
long block; /* file offset, in blocks */
struct key key;
struct path * path; /* search path, used by allocator to deternine search_start by
* various ways */
struct reiserfs_transaction_handle * th; /* transaction handle is needed to log super blocks and
* bitmap blocks changes */
b_blocknr_t beg, end;
b_blocknr_t search_start; /* a field used to transfer search start value (block number)
* between different block allocator procedures
* (determine_search_start() and others) */
int prealloc_size; /* is set in determine_prealloc_size() function, used by underlayed
* function that do actual allocation */
int formatted_node:1; /* the allocator uses different polices for getting disk space for
* formatted/unformatted blocks with/without preallocation */
int preallocate:1;
};
typedef struct __reiserfs_blocknr_hint reiserfs_blocknr_hint_t;
int reiserfs_parse_alloc_options (struct super_block *, char *);
int is_reusable (struct super_block * s, unsigned long block, int bit_value); int is_reusable (struct super_block * s, unsigned long block, int bit_value);
void reiserfs_free_block (struct reiserfs_transaction_handle *th, unsigned long); void reiserfs_free_block (struct reiserfs_transaction_handle *th, unsigned long);
int reiserfs_new_blocknrs (struct reiserfs_transaction_handle *th, int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *, b_blocknr_t * , int, int);
unsigned long * pblocknrs, unsigned long start_from, int amount_needed); extern inline int reiserfs_new_form_blocknrs (struct tree_balance * tb,
int reiserfs_new_unf_blocknrs (struct reiserfs_transaction_handle *th, b_blocknr_t *new_blocknrs, int amount_needed)
unsigned long * pblocknr, unsigned long start_from); {
reiserfs_blocknr_hint_t hint = {
th:tb->transaction_handle,
path: tb->tb_path,
inode: NULL,
key: tb->key,
block: 0,
formatted_node:1
};
return reiserfs_allocate_blocknrs(&hint, new_blocknrs, amount_needed, 0);
}
extern inline int reiserfs_new_unf_blocknrs (struct reiserfs_transaction_handle *th,
b_blocknr_t *new_blocknrs,
struct path * path, long block)
{
reiserfs_blocknr_hint_t hint = {
th: th,
path: path,
inode: NULL,
block: block,
formatted_node: 0,
preallocate: 0
};
return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0);
}
#ifdef REISERFS_PREALLOCATE #ifdef REISERFS_PREALLOCATE
int reiserfs_new_unf_blocknrs2 (struct reiserfs_transaction_handle *th, extern inline int reiserfs_new_unf_blocknrs2(struct reiserfs_transaction_handle *th,
struct inode * inode, struct inode * inode,
unsigned long * pblocknr, b_blocknr_t *new_blocknrs,
unsigned long start_from); struct path * path, long block)
{
reiserfs_blocknr_hint_t hint = {
th: th,
path: path,
inode: inode,
block: block,
formatted_node: 0,
preallocate: 1
};
return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0);
}
void reiserfs_discard_prealloc (struct reiserfs_transaction_handle *th, void reiserfs_discard_prealloc (struct reiserfs_transaction_handle *th,
struct inode * inode); struct inode * inode);
void reiserfs_discard_all_prealloc (struct reiserfs_transaction_handle *th); void reiserfs_discard_all_prealloc (struct reiserfs_transaction_handle *th);
#endif #endif
void reiserfs_claim_blocks_to_be_allocated( struct super_block *sb, int blocks);
void reiserfs_release_claimed_blocks( struct super_block *sb, int blocks);
/* hashes.c */ /* hashes.c */
__u32 keyed_hash (const signed char *msg, int len); __u32 keyed_hash (const signed char *msg, int len);
......
...@@ -37,6 +37,10 @@ struct reiserfs_inode_info { ...@@ -37,6 +37,10 @@ struct reiserfs_inode_info {
struct list_head i_prealloc_list; /* per-transaction list of inodes which struct list_head i_prealloc_list; /* per-transaction list of inodes which
* have preallocated blocks */ * have preallocated blocks */
int new_packing_locality:1; /* new_packig_locality is created; new blocks
* for the contents of this directory should be
* displaced */
/* we use these for fsync or O_SYNC to decide which transaction /* we use these for fsync or O_SYNC to decide which transaction
** needs to be committed in order for this inode to be properly ** needs to be committed in order for this inode to be properly
** flushed */ ** flushed */
......
...@@ -160,7 +160,10 @@ struct reiserfs_transaction_handle { ...@@ -160,7 +160,10 @@ struct reiserfs_transaction_handle {
int t_blocks_allocated ; /* number of blocks this writer allocated */ int t_blocks_allocated ; /* number of blocks this writer allocated */
unsigned long t_trans_id ; /* sanity check, equals the current trans id */ unsigned long t_trans_id ; /* sanity check, equals the current trans id */
struct super_block *t_super ; /* super for this FS when journal_begin was struct super_block *t_super ; /* super for this FS when journal_begin was
called. */ called. saves calls to reiserfs_get_super */
int displace_new_blocks:1; /* if new block allocation occurres, that block
should be displaced from others */
} ; } ;
/* /*
...@@ -254,6 +257,14 @@ struct reiserfs_journal { ...@@ -254,6 +257,14 @@ struct reiserfs_journal {
typedef __u32 (*hashf_t) (const signed char *, int); typedef __u32 (*hashf_t) (const signed char *, int);
struct reiserfs_bitmap_info
{
// FIXME: Won't work with block sizes > 8K
__u16 first_zero_hint;
__u16 free_count;
struct buffer_head *bh; /* the actual bitmap */
};
struct proc_dir_entry; struct proc_dir_entry;
#if defined( CONFIG_PROC_FS ) && defined( CONFIG_REISERFS_PROC_INFO ) #if defined( CONFIG_PROC_FS ) && defined( CONFIG_REISERFS_PROC_INFO )
...@@ -298,14 +309,15 @@ typedef struct reiserfs_proc_info_data ...@@ -298,14 +309,15 @@ typedef struct reiserfs_proc_info_data
stat_cnt_t need_r_neighbor[ 5 ]; stat_cnt_t need_r_neighbor[ 5 ];
stat_cnt_t free_block; stat_cnt_t free_block;
struct __find_forward_stats { struct __scan_bitmap_stats {
stat_cnt_t call; stat_cnt_t call;
stat_cnt_t wait; stat_cnt_t wait;
stat_cnt_t bmap; stat_cnt_t bmap;
stat_cnt_t retry; stat_cnt_t retry;
stat_cnt_t in_journal_hint; stat_cnt_t in_journal_hint;
stat_cnt_t in_journal_out; stat_cnt_t in_journal_nohint;
} find_forward; stat_cnt_t stolen;
} scan_bitmap;
struct __journal_stats { struct __journal_stats {
stat_cnt_t in_journal; stat_cnt_t in_journal;
stat_cnt_t in_journal_bitmap; stat_cnt_t in_journal_bitmap;
...@@ -335,7 +347,7 @@ struct reiserfs_sb_info ...@@ -335,7 +347,7 @@ struct reiserfs_sb_info
/* both the comment and the choice of /* both the comment and the choice of
name are unclear for s_rs -Hans */ name are unclear for s_rs -Hans */
struct reiserfs_super_block * s_rs; /* Pointer to the super block in the buffer */ struct reiserfs_super_block * s_rs; /* Pointer to the super block in the buffer */
struct buffer_head ** s_ap_bitmap; /* array of buffers, holding block bitmap */ struct reiserfs_bitmap_info * s_ap_bitmap;
struct reiserfs_journal *s_journal ; /* pointer to journal information */ struct reiserfs_journal *s_journal ; /* pointer to journal information */
unsigned short s_mount_state; /* reiserfs state (valid, invalid) */ unsigned short s_mount_state; /* reiserfs state (valid, invalid) */
...@@ -348,6 +360,16 @@ struct reiserfs_sb_info ...@@ -348,6 +360,16 @@ struct reiserfs_sb_info
here (currently - NOTAIL, NOLOG, here (currently - NOTAIL, NOLOG,
REPLAYONLY) */ REPLAYONLY) */
struct { /* This is a structure that describes block allocator options */
unsigned long bits; /* Bitfield for enable/disable kind of options */
unsigned long large_file_size; /* size started from which we consider file to be a large one(in blocks) */
int border; /* percentage of disk, border takes */
int preallocmin; /* Minimal file size (in blocks) starting from which we do preallocations */
int preallocsize; /* Number of blocks we try to prealloc when file
reaches preallocmin size (in blocks) or
prealloc_list is empty. */
} s_alloc_options;
/* Comment? -Hans */ /* Comment? -Hans */
wait_queue_head_t s_wait; wait_queue_head_t s_wait;
/* To be obsoleted soon by per buffer seals.. -Hans */ /* To be obsoleted soon by per buffer seals.. -Hans */
...@@ -374,6 +396,7 @@ struct reiserfs_sb_info ...@@ -374,6 +396,7 @@ struct reiserfs_sb_info
int s_is_unlinked_ok; int s_is_unlinked_ok;
reiserfs_proc_info_data_t s_proc_info_data; reiserfs_proc_info_data_t s_proc_info_data;
struct proc_dir_entry *procdir; struct proc_dir_entry *procdir;
int reserved_blocks; /* amount of blocks reserved for further allocations */
}; };
/* Definitions of reiserfs on-disk properties: */ /* Definitions of reiserfs on-disk properties: */
...@@ -381,7 +404,8 @@ struct reiserfs_sb_info ...@@ -381,7 +404,8 @@ struct reiserfs_sb_info
#define REISERFS_3_6 1 #define REISERFS_3_6 1
/* Mount options */ /* Mount options */
#define NOTAIL 0 /* -o notail: no tails will be created in a session */ #define REISERFS_LARGETAIL 0 /* large tails will be created in a session */
#define REISERFS_SMALLTAIL 17 /* small (for files less than block size) tails will be created in a session */
#define REPLAYONLY 3 /* replay journal and return 0. Use by fsck */ #define REPLAYONLY 3 /* replay journal and return 0. Use by fsck */
#define REISERFS_NOLOG 4 /* -o nolog: turn journalling off */ #define REISERFS_NOLOG 4 /* -o nolog: turn journalling off */
#define REISERFS_CONVERT 5 /* -o conv: causes conversion of old #define REISERFS_CONVERT 5 /* -o conv: causes conversion of old
...@@ -429,7 +453,8 @@ struct reiserfs_sb_info ...@@ -429,7 +453,8 @@ struct reiserfs_sb_info
#define reiserfs_hashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_HASHED_RELOCATION)) #define reiserfs_hashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_HASHED_RELOCATION))
#define reiserfs_test4(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TEST4)) #define reiserfs_test4(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TEST4))
#define dont_have_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << NOTAIL)) #define have_large_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_LARGETAIL))
#define have_small_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_SMALLTAIL))
#define replay_only(s) (REISERFS_SB(s)->s_mount_opt & (1 << REPLAYONLY)) #define replay_only(s) (REISERFS_SB(s)->s_mount_opt & (1 << REPLAYONLY))
#define reiserfs_dont_log(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NOLOG)) #define reiserfs_dont_log(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NOLOG))
#define old_format_only(s) (REISERFS_SB(s)->s_properties & (1 << REISERFS_3_5)) #define old_format_only(s) (REISERFS_SB(s)->s_properties & (1 << REISERFS_3_5))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment