lab.nexedi.com will be down from Thursday, 20 March 2025, 07:30:00 UTC for a duration of approximately 2 hours

Commit 9b2ff973 authored by Martin K. Petersen's avatar Martin K. Petersen Committed by Greg Kroah-Hartman

block: Backport of various I/O topology fixes from 2.6.33 and 2.6.34

block: Backport of various I/O topology fixes from 2.6.33 and 2.6.34

The stacking code incorrectly scaled up the data offset in some cases
causing misaligned devices to report alignment.  Rewrite the stacking
algorithm to remedy this.  

(Upstream commit 9504e086)

The top device misalignment flag would not be set if the added bottom
device was already misaligned as opposed to causing a stacking failure.
    
Also massage the reporting so that an error is only returned if adding
the bottom device caused the misalignment.  I.e. don't return an error
if the top is already flagged as misaligned.

(Upstream commit fe0b393f)


lcm() was defined to take integer-sized arguments.  The supplied
arguments are multiplied, however, causing us to overflow given
sufficiently large input.  That in turn led to incorrect optimal I/O
size reporting in some cases.  Switch lcm() over to unsigned long
similar to gcd() and move the function from blk-settings.c to lib.
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: default avatarMike Snitzer <snitzer@redhat.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@suse.de>
parent e548510b
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */ #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
#include <linux/gcd.h> #include <linux/gcd.h>
#include <linux/lcm.h>
#include "blk.h" #include "blk.h"
...@@ -490,18 +491,31 @@ EXPORT_SYMBOL(blk_queue_stack_limits); ...@@ -490,18 +491,31 @@ EXPORT_SYMBOL(blk_queue_stack_limits);
/** /**
* blk_stack_limits - adjust queue_limits for stacked devices * blk_stack_limits - adjust queue_limits for stacked devices
* @t: the stacking driver limits (top) * @t: the stacking driver limits (top device)
* @b: the underlying queue limits (bottom) * @b: the underlying queue limits (bottom, component device)
* @offset: offset to beginning of data within component device * @offset: offset to beginning of data within component device
* *
* Description: * Description:
* Merges two queue_limit structs. Returns 0 if alignment didn't * This function is used by stacking drivers like MD and DM to ensure
* change. Returns -1 if adding the bottom device caused * that all component devices have compatible block sizes and
* misalignment. * alignments. The stacking driver must provide a queue_limits
* struct (top) and then iteratively call the stacking function for
* all component (bottom) devices. The stacking function will
* attempt to combine the values and ensure proper alignment.
*
* Returns 0 if the top and bottom queue_limits are compatible. The
* top device's block sizes and alignment offsets may be adjusted to
* ensure alignment with the bottom device. If no compatible sizes
* and alignments exist, -1 is returned and the resulting top
* queue_limits will have the misaligned flag set to indicate that
* the alignment_offset is undefined.
*/ */
int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
sector_t offset) sector_t offset)
{ {
sector_t alignment;
unsigned int top, bottom, ret = 0;
t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn); t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
...@@ -518,6 +532,26 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, ...@@ -518,6 +532,26 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->max_segment_size = min_not_zero(t->max_segment_size, t->max_segment_size = min_not_zero(t->max_segment_size,
b->max_segment_size); b->max_segment_size);
t->misaligned |= b->misaligned;
alignment = queue_limit_alignment_offset(b, offset);
/* Bottom device has different alignment. Check that it is
* compatible with the current top alignment.
*/
if (t->alignment_offset != alignment) {
top = max(t->physical_block_size, t->io_min)
+ t->alignment_offset;
bottom = max(b->physical_block_size, b->io_min) + alignment;
/* Verify that top and bottom intervals line up */
if (max(top, bottom) & (min(top, bottom) - 1)) {
t->misaligned = 1;
ret = -1;
}
}
t->logical_block_size = max(t->logical_block_size, t->logical_block_size = max(t->logical_block_size,
b->logical_block_size); b->logical_block_size);
...@@ -525,37 +559,46 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, ...@@ -525,37 +559,46 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
b->physical_block_size); b->physical_block_size);
t->io_min = max(t->io_min, b->io_min); t->io_min = max(t->io_min, b->io_min);
t->io_opt = lcm(t->io_opt, b->io_opt);
t->no_cluster |= b->no_cluster; t->no_cluster |= b->no_cluster;
/* Bottom device offset aligned? */ /* Physical block size a multiple of the logical block size? */
if (offset && if (t->physical_block_size & (t->logical_block_size - 1)) {
(offset & (b->physical_block_size - 1)) != b->alignment_offset) { t->physical_block_size = t->logical_block_size;
t->misaligned = 1; t->misaligned = 1;
return -1; ret = -1;
} }
/* If top has no alignment offset, inherit from bottom */ /* Minimum I/O a multiple of the physical block size? */
if (!t->alignment_offset) if (t->io_min & (t->physical_block_size - 1)) {
t->alignment_offset = t->io_min = t->physical_block_size;
b->alignment_offset & (b->physical_block_size - 1); t->misaligned = 1;
ret = -1;
}
/* Top device aligned on logical block boundary? */ /* Optimal I/O a multiple of the physical block size? */
if (t->alignment_offset & (t->logical_block_size - 1)) { if (t->io_opt & (t->physical_block_size - 1)) {
t->io_opt = 0;
t->misaligned = 1; t->misaligned = 1;
return -1; ret = -1;
} }
/* Find lcm() of optimal I/O size */ /* Find lowest common alignment_offset */
if (t->io_opt && b->io_opt) t->alignment_offset = lcm(t->alignment_offset, alignment)
t->io_opt = (t->io_opt * b->io_opt) / gcd(t->io_opt, b->io_opt); & (max(t->physical_block_size, t->io_min) - 1);
else if (b->io_opt)
t->io_opt = b->io_opt;
/* Verify that optimal I/O size is a multiple of io_min */ /* Verify that new alignment_offset is on a logical block boundary */
if (t->io_min && t->io_opt % t->io_min) if (t->alignment_offset & (t->logical_block_size - 1)) {
return -1; t->misaligned = 1;
ret = -1;
}
return 0; /* Discard */
t->max_discard_sectors = min_not_zero(t->max_discard_sectors,
b->max_discard_sectors);
return ret;
} }
EXPORT_SYMBOL(blk_stack_limits); EXPORT_SYMBOL(blk_stack_limits);
......
#ifndef _LCM_H
#define _LCM_H
#include <linux/compiler.h>
unsigned long lcm(unsigned long a, unsigned long b) __attribute_const__;
#endif /* _LCM_H */
...@@ -21,7 +21,7 @@ lib-y += kobject.o kref.o klist.o ...@@ -21,7 +21,7 @@ lib-y += kobject.o kref.o klist.o
obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
string_helpers.o gcd.o string_helpers.o gcd.o lcm.o
ifeq ($(CONFIG_DEBUG_KOBJECT),y) ifeq ($(CONFIG_DEBUG_KOBJECT),y)
CFLAGS_kobject.o += -DDEBUG CFLAGS_kobject.o += -DDEBUG
......
#include <linux/kernel.h>
#include <linux/gcd.h>
#include <linux/module.h>
/* Lowest common multiple */
unsigned long lcm(unsigned long a, unsigned long b)
{
if (a && b)
return (a * b) / gcd(a, b);
else if (b)
return b;
return a;
}
EXPORT_SYMBOL_GPL(lcm);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment