Commit 6ded22c1 authored by Qu Wenruo's avatar Qu Wenruo Committed by David Sterba

btrfs: reduce div64 calls by limiting the number of stripes of a chunk to u32

There are quite some div64 calls inside btrfs_map_block() and its
variants.

Such calls are for @stripe_nr, where @stripe_nr is the number of
stripes before our logical bytenr inside a chunk.

However we can eliminate such div64 calls by just reducing the width of
@stripe_nr from 64 to 32.

This can be done because our chunk size limit is already 10G, with fixed
stripe length 64K.
Thus a U32 is definitely enough to contain the number of stripes.

With such width reduction, we can get rid of slower div64, and extra
warning for certain 32bit arch.

This patch would do:

- Add a new tree-checker chunk validation on chunk length
  Make sure no chunk can reach 256G, which can also act as a bitflip
  checker.

- Reduce the width from u64 to u32 for @stripe_nr variables

- Replace unnecessary div64 calls with regular modulo and division
  32bit division and modulo are much faster than 64bit operations, and
  we are finally free of the div64 fear at least in those involved
  functions.
Signed-off-by: default avatarQu Wenruo <wqu@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent a97699d1
...@@ -1992,8 +1992,8 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, ...@@ -1992,8 +1992,8 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
for (i = 0; i < map->num_stripes; i++) { for (i = 0; i < map->num_stripes; i++) {
bool already_inserted = false; bool already_inserted = false;
u64 stripe_nr; u32 stripe_nr;
u64 offset; u32 offset;
int j; int j;
if (!in_range(physical, map->stripes[i].physical, if (!in_range(physical, map->stripes[i].physical,
...@@ -2006,16 +2006,14 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, ...@@ -2006,16 +2006,14 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
BTRFS_STRIPE_LEN_MASK; BTRFS_STRIPE_LEN_MASK;
if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID10)) { BTRFS_BLOCK_GROUP_RAID10))
stripe_nr = stripe_nr * map->num_stripes + i; stripe_nr = div_u64(stripe_nr * map->num_stripes + i,
stripe_nr = div_u64(stripe_nr, map->sub_stripes); map->sub_stripes);
}
/* /*
* The remaining case would be for RAID56, multiply by * The remaining case would be for RAID56, multiply by
* nr_data_stripes(). Alternatively, just use rmap_len below * nr_data_stripes(). Alternatively, just use rmap_len below
* instead of map->stripe_len * instead of map->stripe_len
*/ */
bytenr = chunk_start + stripe_nr * io_stripe_size + offset; bytenr = chunk_start + stripe_nr * io_stripe_size + offset;
/* Ensure we don't add duplicate addresses */ /* Ensure we don't add duplicate addresses */
......
...@@ -2908,10 +2908,7 @@ static int get_raid56_logic_offset(u64 physical, int num, ...@@ -2908,10 +2908,7 @@ static int get_raid56_logic_offset(u64 physical, int num,
{ {
int i; int i;
int j = 0; int j = 0;
u64 stripe_nr;
u64 last_offset; u64 last_offset;
u32 stripe_index;
u32 rot;
const int data_stripes = nr_data_stripes(map); const int data_stripes = nr_data_stripes(map);
last_offset = (physical - map->stripes[num].physical) * data_stripes; last_offset = (physical - map->stripes[num].physical) * data_stripes;
...@@ -2920,13 +2917,17 @@ static int get_raid56_logic_offset(u64 physical, int num, ...@@ -2920,13 +2917,17 @@ static int get_raid56_logic_offset(u64 physical, int num,
*offset = last_offset; *offset = last_offset;
for (i = 0; i < data_stripes; i++) { for (i = 0; i < data_stripes; i++) {
u32 stripe_nr;
u32 stripe_index;
u32 rot;
*offset = last_offset + (i << BTRFS_STRIPE_LEN_SHIFT); *offset = last_offset + (i << BTRFS_STRIPE_LEN_SHIFT);
stripe_nr = *offset >> BTRFS_STRIPE_LEN_SHIFT; stripe_nr = (u32)(*offset >> BTRFS_STRIPE_LEN_SHIFT) / data_stripes;
stripe_nr = div_u64(stripe_nr, data_stripes);
/* Work out the disk rotation on this stripe-set */ /* Work out the disk rotation on this stripe-set */
stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot); rot = stripe_nr % map->num_stripes;
stripe_nr /= map->num_stripes;
/* calculate which stripe this data locates */ /* calculate which stripe this data locates */
rot += i; rot += i;
stripe_index = rot % map->num_stripes; stripe_index = rot % map->num_stripes;
......
...@@ -849,6 +849,20 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf, ...@@ -849,6 +849,20 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf,
stripe_len); stripe_len);
return -EUCLEAN; return -EUCLEAN;
} }
/*
* We artificially limit the chunk size, so that the number of stripes
* inside a chunk can be fit into a U32. The current limit (256G) is
* way too large for real world usage anyway, and it's also much larger
* than our existing limit (10G).
*
* Thus it should be a good way to catch obvious bitflips.
*/
if (unlikely(length >= ((u64)U32_MAX << BTRFS_STRIPE_LEN_SHIFT))) {
chunk_err(leaf, chunk, logical,
"chunk length too large: have %llu limit %llu",
length, (u64)U32_MAX << BTRFS_STRIPE_LEN_SHIFT);
return -EUCLEAN;
}
if (unlikely(type & ~(BTRFS_BLOCK_GROUP_TYPE_MASK | if (unlikely(type & ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
BTRFS_BLOCK_GROUP_PROFILE_MASK))) { BTRFS_BLOCK_GROUP_PROFILE_MASK))) {
chunk_err(leaf, chunk, logical, chunk_err(leaf, chunk, logical,
......
...@@ -5970,15 +5970,15 @@ struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info, ...@@ -5970,15 +5970,15 @@ struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info,
struct btrfs_discard_stripe *stripes; struct btrfs_discard_stripe *stripes;
u64 length = *length_ret; u64 length = *length_ret;
u64 offset; u64 offset;
u64 stripe_nr; u32 stripe_nr;
u64 stripe_nr_end; u32 stripe_nr_end;
u32 stripe_cnt;
u64 stripe_end_offset; u64 stripe_end_offset;
u64 stripe_cnt;
u64 stripe_offset; u64 stripe_offset;
u32 stripe_index; u32 stripe_index;
u32 factor = 0; u32 factor = 0;
u32 sub_stripes = 0; u32 sub_stripes = 0;
u64 stripes_per_dev = 0; u32 stripes_per_dev = 0;
u32 remaining_stripes = 0; u32 remaining_stripes = 0;
u32 last_stripe = 0; u32 last_stripe = 0;
int ret; int ret;
...@@ -6031,18 +6031,19 @@ struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info, ...@@ -6031,18 +6031,19 @@ struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info,
factor = map->num_stripes / sub_stripes; factor = map->num_stripes / sub_stripes;
*num_stripes = min_t(u64, map->num_stripes, *num_stripes = min_t(u64, map->num_stripes,
sub_stripes * stripe_cnt); sub_stripes * stripe_cnt);
stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index); stripe_index = stripe_nr % factor;
stripe_nr /= factor;
stripe_index *= sub_stripes; stripe_index *= sub_stripes;
stripes_per_dev = div_u64_rem(stripe_cnt, factor,
&remaining_stripes); remaining_stripes = stripe_cnt % factor;
div_u64_rem(stripe_nr_end - 1, factor, &last_stripe); stripes_per_dev = stripe_cnt / factor;
last_stripe *= sub_stripes; last_stripe = ((stripe_nr_end - 1) % factor) * sub_stripes;
} else if (map->type & (BTRFS_BLOCK_GROUP_RAID1_MASK | } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1_MASK |
BTRFS_BLOCK_GROUP_DUP)) { BTRFS_BLOCK_GROUP_DUP)) {
*num_stripes = map->num_stripes; *num_stripes = map->num_stripes;
} else { } else {
stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, stripe_index = stripe_nr % map->num_stripes;
&stripe_index); stripe_nr /= map->num_stripes;
} }
stripes = kcalloc(*num_stripes, sizeof(*stripes), GFP_NOFS); stripes = kcalloc(*num_stripes, sizeof(*stripes), GFP_NOFS);
...@@ -6298,7 +6299,7 @@ static bool need_full_stripe(enum btrfs_map_op op) ...@@ -6298,7 +6299,7 @@ static bool need_full_stripe(enum btrfs_map_op op)
} }
static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op, static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op,
u64 offset, u64 *stripe_nr, u64 *stripe_offset, u64 offset, u32 *stripe_nr, u64 *stripe_offset,
u64 *full_stripe_start) u64 *full_stripe_start)
{ {
ASSERT(op != BTRFS_MAP_DISCARD); ASSERT(op != BTRFS_MAP_DISCARD);
...@@ -6346,7 +6347,7 @@ static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op, ...@@ -6346,7 +6347,7 @@ static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op,
} }
static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *map, static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *map,
u32 stripe_index, u64 stripe_offset, u64 stripe_nr) u32 stripe_index, u64 stripe_offset, u32 stripe_nr)
{ {
dst->dev = map->stripes[stripe_index].dev; dst->dev = map->stripes[stripe_index].dev;
dst->physical = map->stripes[stripe_index].physical + dst->physical = map->stripes[stripe_index].physical +
...@@ -6363,7 +6364,7 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, ...@@ -6363,7 +6364,7 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
struct map_lookup *map; struct map_lookup *map;
u64 map_offset; u64 map_offset;
u64 stripe_offset; u64 stripe_offset;
u64 stripe_nr; u32 stripe_nr;
u32 stripe_index; u32 stripe_index;
int data_stripes; int data_stripes;
int i; int i;
...@@ -6422,8 +6423,8 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, ...@@ -6422,8 +6423,8 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
num_stripes = 1; num_stripes = 1;
stripe_index = 0; stripe_index = 0;
if (map->type & BTRFS_BLOCK_GROUP_RAID0) { if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, stripe_index = stripe_nr % map->num_stripes;
&stripe_index); stripe_nr /= map->num_stripes;
if (!need_full_stripe(op)) if (!need_full_stripe(op))
mirror_num = 1; mirror_num = 1;
} else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) { } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
...@@ -6449,8 +6450,8 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, ...@@ -6449,8 +6450,8 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
} else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
u32 factor = map->num_stripes / map->sub_stripes; u32 factor = map->num_stripes / map->sub_stripes;
stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index); stripe_index = (stripe_nr % factor) * map->sub_stripes;
stripe_index *= map->sub_stripes; stripe_nr /= factor;
if (need_full_stripe(op)) if (need_full_stripe(op))
num_stripes = map->sub_stripes; num_stripes = map->sub_stripes;
...@@ -6466,9 +6467,16 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, ...@@ -6466,9 +6467,16 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
} else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) { if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) {
/* push stripe_nr back to the start of the full stripe */ /*
stripe_nr = div64_u64(raid56_full_stripe_start, * Push stripe_nr back to the start of the full stripe
data_stripes << BTRFS_STRIPE_LEN_SHIFT); * For those cases needing a full stripe, @stripe_nr
* is the full stripe number.
*
* Originally we go raid56_full_stripe_start / full_stripe_len,
* but that can be expensive. Here we just divide
* @stripe_nr with @data_stripes.
*/
stripe_nr /= data_stripes;
/* RAID[56] write or recovery. Return all stripes */ /* RAID[56] write or recovery. Return all stripes */
num_stripes = map->num_stripes; num_stripes = map->num_stripes;
...@@ -6486,25 +6494,24 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, ...@@ -6486,25 +6494,24 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
* Mirror #2 is RAID5 parity block. * Mirror #2 is RAID5 parity block.
* Mirror #3 is RAID6 Q block. * Mirror #3 is RAID6 Q block.
*/ */
stripe_nr = div_u64_rem(stripe_nr, stripe_index = stripe_nr % data_stripes;
data_stripes, &stripe_index); stripe_nr /= data_stripes;
if (mirror_num > 1) if (mirror_num > 1)
stripe_index = data_stripes + mirror_num - 2; stripe_index = data_stripes + mirror_num - 2;
/* We distribute the parity blocks across stripes */ /* We distribute the parity blocks across stripes */
div_u64_rem(stripe_nr + stripe_index, map->num_stripes, stripe_index = (stripe_nr + stripe_index) % map->num_stripes;
&stripe_index);
if (!need_full_stripe(op) && mirror_num <= 1) if (!need_full_stripe(op) && mirror_num <= 1)
mirror_num = 1; mirror_num = 1;
} }
} else { } else {
/* /*
* after this, stripe_nr is the number of stripes on this * After this, stripe_nr is the number of stripes on this
* device we have to walk to find the data, and stripe_index is * device we have to walk to find the data, and stripe_index is
* the number of our device in the stripe array * the number of our device in the stripe array
*/ */
stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, stripe_index = stripe_nr % map->num_stripes;
&stripe_index); stripe_nr /= map->num_stripes;
mirror_num = stripe_index + 1; mirror_num = stripe_index + 1;
} }
if (stripe_index >= map->num_stripes) { if (stripe_index >= map->num_stripes) {
...@@ -6566,7 +6573,7 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, ...@@ -6566,7 +6573,7 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
unsigned rot; unsigned rot;
/* Work out the disk rotation on this stripe-set */ /* Work out the disk rotation on this stripe-set */
div_u64_rem(stripe_nr, num_stripes, &rot); rot = stripe_nr % num_stripes;
/* Fill in the logical address of each stripe */ /* Fill in the logical address of each stripe */
tmp = stripe_nr * data_stripes; tmp = stripe_nr * data_stripes;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment