Commit c80ddb52 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'md-3.5' of git://neil.brown.name/md

Pull md updates from NeilBrown:
 "It's been a busy cycle for md - lots of fun stuff here..  if you like
  this kind of thing :-)

  Main features:
   - RAID10 arrays can be reshaped - adding and removing devices and
     changing chunks (not 'far' array though)
   - allow RAID5 arrays to be reshaped with a backup file (not tested
     yet, but the priciple works fine for RAID10).
   - arrays can be reshaped while a bitmap is present - you no longer
     need to remove it first
   - SSSE3 support for RAID6 syndrome calculations

  and of course a number of minor fixes etc."

* tag 'md-3.5' of git://neil.brown.name/md: (56 commits)
  md/bitmap: record the space available for the bitmap in the superblock.
  md/raid10: Remove extras after reshape to smaller number of devices.
  md/raid5: improve removal of extra devices after reshape.
  md: check the return of mddev_find()
  MD RAID1: Further conditionalize 'fullsync'
  DM RAID: Use md_error() in place of simply setting Faulty bit
  DM RAID: Record and handle missing devices
  DM RAID: Set recovery flags on resume
  md/raid5: Allow reshape while a bitmap is present.
  md/raid10: resize bitmap when required during reshape.
  md: allow array to be resized while bitmap is present.
  md/bitmap: make sure reshape request are reflected in superblock.
  md/bitmap: add bitmap_resize function to allow bitmap resizing.
  md/bitmap: use DIV_ROUND_UP instead of open-code
  md/bitmap: create a 'struct bitmap_counts' substructure of 'struct bitmap'
  md/bitmap: make bitmap bitops atomic.
  md/bitmap: make _page_attr bitops atomic.
  md/bitmap: merge bitmap_file_unmap and bitmap_file_put.
  md/bitmap: remove async freeing of bitmap file.
  md/bitmap: convert some spin_lock_irqsave to spin_lock_irq
  ...
parents 2c13bc0f 1dff2b87
...@@ -115,9 +115,10 @@ cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTI ...@@ -115,9 +115,10 @@ cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTI
# does binutils support specific instructions? # does binutils support specific instructions?
asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1) asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr)
KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr)
LDFLAGS := -m elf_$(UTS_MACHINE) LDFLAGS := -m elf_$(UTS_MACHINE)
......
...@@ -861,6 +861,9 @@ static struct xor_block_template xor_block_pIII_sse = { ...@@ -861,6 +861,9 @@ static struct xor_block_template xor_block_pIII_sse = {
.do_5 = xor_sse_5, .do_5 = xor_sse_5,
}; };
/* Also try the AVX routines */
#include "xor_avx.h"
/* Also try the generic routines. */ /* Also try the generic routines. */
#include <asm-generic/xor.h> #include <asm-generic/xor.h>
...@@ -871,6 +874,7 @@ do { \ ...@@ -871,6 +874,7 @@ do { \
xor_speed(&xor_block_8regs_p); \ xor_speed(&xor_block_8regs_p); \
xor_speed(&xor_block_32regs); \ xor_speed(&xor_block_32regs); \
xor_speed(&xor_block_32regs_p); \ xor_speed(&xor_block_32regs_p); \
AVX_XOR_SPEED; \
if (cpu_has_xmm) \ if (cpu_has_xmm) \
xor_speed(&xor_block_pIII_sse); \ xor_speed(&xor_block_pIII_sse); \
if (cpu_has_mmx) { \ if (cpu_has_mmx) { \
...@@ -883,6 +887,6 @@ do { \ ...@@ -883,6 +887,6 @@ do { \
We may also be able to load into the L1 only depending on how the cpu We may also be able to load into the L1 only depending on how the cpu
deals with a load to a line that is being prefetched. */ deals with a load to a line that is being prefetched. */
#define XOR_SELECT_TEMPLATE(FASTEST) \ #define XOR_SELECT_TEMPLATE(FASTEST) \
(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST) AVX_SELECT(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
#endif /* _ASM_X86_XOR_32_H */ #endif /* _ASM_X86_XOR_32_H */
...@@ -347,15 +347,21 @@ static struct xor_block_template xor_block_sse = { ...@@ -347,15 +347,21 @@ static struct xor_block_template xor_block_sse = {
.do_5 = xor_sse_5, .do_5 = xor_sse_5,
}; };
/* Also try the AVX routines */
#include "xor_avx.h"
#undef XOR_TRY_TEMPLATES #undef XOR_TRY_TEMPLATES
#define XOR_TRY_TEMPLATES \ #define XOR_TRY_TEMPLATES \
do { \ do { \
AVX_XOR_SPEED; \
xor_speed(&xor_block_sse); \ xor_speed(&xor_block_sse); \
} while (0) } while (0)
/* We force the use of the SSE xor block because it can write around L2. /* We force the use of the SSE xor block because it can write around L2.
We may also be able to load into the L1 only depending on how the cpu We may also be able to load into the L1 only depending on how the cpu
deals with a load to a line that is being prefetched. */ deals with a load to a line that is being prefetched. */
#define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_sse) #define XOR_SELECT_TEMPLATE(FASTEST) \
AVX_SELECT(&xor_block_sse)
#endif /* _ASM_X86_XOR_64_H */ #endif /* _ASM_X86_XOR_64_H */
#ifndef _ASM_X86_XOR_AVX_H
#define _ASM_X86_XOR_AVX_H
/*
* Optimized RAID-5 checksumming functions for AVX
*
* Copyright (C) 2012 Intel Corporation
* Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
*
* Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; version 2
* of the License.
*/
#ifdef CONFIG_AS_AVX
#include <linux/compiler.h>
#include <asm/i387.h>
#define ALIGN32 __aligned(32)
#define YMM_SAVED_REGS 4
#define YMMS_SAVE \
do { \
preempt_disable(); \
cr0 = read_cr0(); \
clts(); \
asm volatile("vmovaps %%ymm0, %0" : "=m" (ymm_save[0]) : : "memory"); \
asm volatile("vmovaps %%ymm1, %0" : "=m" (ymm_save[32]) : : "memory"); \
asm volatile("vmovaps %%ymm2, %0" : "=m" (ymm_save[64]) : : "memory"); \
asm volatile("vmovaps %%ymm3, %0" : "=m" (ymm_save[96]) : : "memory"); \
} while (0);
#define YMMS_RESTORE \
do { \
asm volatile("sfence" : : : "memory"); \
asm volatile("vmovaps %0, %%ymm3" : : "m" (ymm_save[96])); \
asm volatile("vmovaps %0, %%ymm2" : : "m" (ymm_save[64])); \
asm volatile("vmovaps %0, %%ymm1" : : "m" (ymm_save[32])); \
asm volatile("vmovaps %0, %%ymm0" : : "m" (ymm_save[0])); \
write_cr0(cr0); \
preempt_enable(); \
} while (0);
#define BLOCK4(i) \
BLOCK(32 * i, 0) \
BLOCK(32 * (i + 1), 1) \
BLOCK(32 * (i + 2), 2) \
BLOCK(32 * (i + 3), 3)
#define BLOCK16() \
BLOCK4(0) \
BLOCK4(4) \
BLOCK4(8) \
BLOCK4(12)
static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
{
unsigned long cr0, lines = bytes >> 9;
char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
YMMS_SAVE
while (lines--) {
#undef BLOCK
#define BLOCK(i, reg) \
do { \
asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p1[i / sizeof(*p1)])); \
asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
"m" (p0[i / sizeof(*p0)])); \
asm volatile("vmovdqa %%ymm" #reg ", %0" : \
"=m" (p0[i / sizeof(*p0)])); \
} while (0);
BLOCK16()
p0 = (unsigned long *)((uintptr_t)p0 + 512);
p1 = (unsigned long *)((uintptr_t)p1 + 512);
}
YMMS_RESTORE
}
static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
unsigned long *p2)
{
unsigned long cr0, lines = bytes >> 9;
char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
YMMS_SAVE
while (lines--) {
#undef BLOCK
#define BLOCK(i, reg) \
do { \
asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p2[i / sizeof(*p2)])); \
asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
"m" (p1[i / sizeof(*p1)])); \
asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
"m" (p0[i / sizeof(*p0)])); \
asm volatile("vmovdqa %%ymm" #reg ", %0" : \
"=m" (p0[i / sizeof(*p0)])); \
} while (0);
BLOCK16()
p0 = (unsigned long *)((uintptr_t)p0 + 512);
p1 = (unsigned long *)((uintptr_t)p1 + 512);
p2 = (unsigned long *)((uintptr_t)p2 + 512);
}
YMMS_RESTORE
}
static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
unsigned long *p2, unsigned long *p3)
{
unsigned long cr0, lines = bytes >> 9;
char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
YMMS_SAVE
while (lines--) {
#undef BLOCK
#define BLOCK(i, reg) \
do { \
asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p3[i / sizeof(*p3)])); \
asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
"m" (p2[i / sizeof(*p2)])); \
asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
"m" (p1[i / sizeof(*p1)])); \
asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
"m" (p0[i / sizeof(*p0)])); \
asm volatile("vmovdqa %%ymm" #reg ", %0" : \
"=m" (p0[i / sizeof(*p0)])); \
} while (0);
BLOCK16();
p0 = (unsigned long *)((uintptr_t)p0 + 512);
p1 = (unsigned long *)((uintptr_t)p1 + 512);
p2 = (unsigned long *)((uintptr_t)p2 + 512);
p3 = (unsigned long *)((uintptr_t)p3 + 512);
}
YMMS_RESTORE
}
static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
unsigned long *p2, unsigned long *p3, unsigned long *p4)
{
unsigned long cr0, lines = bytes >> 9;
char ymm_save[32 * YMM_SAVED_REGS] ALIGN32;
YMMS_SAVE
while (lines--) {
#undef BLOCK
#define BLOCK(i, reg) \
do { \
asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p4[i / sizeof(*p4)])); \
asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
"m" (p3[i / sizeof(*p3)])); \
asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
"m" (p2[i / sizeof(*p2)])); \
asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
"m" (p1[i / sizeof(*p1)])); \
asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
"m" (p0[i / sizeof(*p0)])); \
asm volatile("vmovdqa %%ymm" #reg ", %0" : \
"=m" (p0[i / sizeof(*p0)])); \
} while (0);
BLOCK16()
p0 = (unsigned long *)((uintptr_t)p0 + 512);
p1 = (unsigned long *)((uintptr_t)p1 + 512);
p2 = (unsigned long *)((uintptr_t)p2 + 512);
p3 = (unsigned long *)((uintptr_t)p3 + 512);
p4 = (unsigned long *)((uintptr_t)p4 + 512);
}
YMMS_RESTORE
}
static struct xor_block_template xor_block_avx = {
.name = "avx",
.do_2 = xor_avx_2,
.do_3 = xor_avx_3,
.do_4 = xor_avx_4,
.do_5 = xor_avx_5,
};
#define AVX_XOR_SPEED \
do { \
if (cpu_has_avx) \
xor_speed(&xor_block_avx); \
} while (0)
#define AVX_SELECT(FASTEST) \
(cpu_has_avx ? &xor_block_avx : FASTEST)
#else
#define AVX_XOR_SPEED {}
#define AVX_SELECT(FASTEST) (FASTEST)
#endif
#endif
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/raid/xor.h> #include <linux/raid/xor.h>
#include <linux/jiffies.h> #include <linux/jiffies.h>
#include <linux/preempt.h>
#include <asm/xor.h> #include <asm/xor.h>
/* The xor routines to use. */ /* The xor routines to use. */
...@@ -63,12 +64,14 @@ static void ...@@ -63,12 +64,14 @@ static void
do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2) do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
{ {
int speed; int speed;
unsigned long now; unsigned long now, j;
int i, count, max; int i, count, max;
tmpl->next = template_list; tmpl->next = template_list;
template_list = tmpl; template_list = tmpl;
preempt_disable();
/* /*
* Count the number of XORs done during a whole jiffy, and use * Count the number of XORs done during a whole jiffy, and use
* this to calculate the speed of checksumming. We use a 2-page * this to calculate the speed of checksumming. We use a 2-page
...@@ -76,9 +79,11 @@ do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2) ...@@ -76,9 +79,11 @@ do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
*/ */
max = 0; max = 0;
for (i = 0; i < 5; i++) { for (i = 0; i < 5; i++) {
now = jiffies; j = jiffies;
count = 0; count = 0;
while (jiffies == now) { while ((now = jiffies) == j)
cpu_relax();
while (time_before(jiffies, now + 1)) {
mb(); /* prevent loop optimzation */ mb(); /* prevent loop optimzation */
tmpl->do_2(BENCH_SIZE, b1, b2); tmpl->do_2(BENCH_SIZE, b1, b2);
mb(); mb();
...@@ -89,6 +94,8 @@ do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2) ...@@ -89,6 +94,8 @@ do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
max = count; max = count;
} }
preempt_enable();
speed = max * (HZ * BENCH_SIZE / 1024); speed = max * (HZ * BENCH_SIZE / 1024);
tmpl->speed = speed; tmpl->speed = speed;
......
This diff is collapsed.
...@@ -111,9 +111,9 @@ typedef __u16 bitmap_counter_t; ...@@ -111,9 +111,9 @@ typedef __u16 bitmap_counter_t;
/* use these for bitmap->flags and bitmap->sb->state bit-fields */ /* use these for bitmap->flags and bitmap->sb->state bit-fields */
enum bitmap_state { enum bitmap_state {
BITMAP_STALE = 0x002, /* the bitmap file is out of date or had -EIO */ BITMAP_STALE = 1, /* the bitmap file is out of date or had -EIO */
BITMAP_WRITE_ERROR = 0x004, /* A write error has occurred */ BITMAP_WRITE_ERROR = 2, /* A write error has occurred */
BITMAP_HOSTENDIAN = 0x8000, BITMAP_HOSTENDIAN =15,
}; };
/* the superblock at the front of the bitmap file -- little endian */ /* the superblock at the front of the bitmap file -- little endian */
...@@ -128,8 +128,10 @@ typedef struct bitmap_super_s { ...@@ -128,8 +128,10 @@ typedef struct bitmap_super_s {
__le32 chunksize; /* 52 the bitmap chunk size in bytes */ __le32 chunksize; /* 52 the bitmap chunk size in bytes */
__le32 daemon_sleep; /* 56 seconds between disk flushes */ __le32 daemon_sleep; /* 56 seconds between disk flushes */
__le32 write_behind; /* 60 number of outstanding write-behind writes */ __le32 write_behind; /* 60 number of outstanding write-behind writes */
__le32 sectors_reserved; /* 64 number of 512-byte sectors that are
* reserved for the bitmap. */
__u8 pad[256 - 64]; /* set to zero */ __u8 pad[256 - 68]; /* set to zero */
} bitmap_super_t; } bitmap_super_t;
/* notes: /* notes:
...@@ -159,36 +161,49 @@ struct bitmap_page { ...@@ -159,36 +161,49 @@ struct bitmap_page {
* pointer and use it as two counters itself * pointer and use it as two counters itself
*/ */
unsigned int hijacked:1; unsigned int hijacked:1;
/*
* If any counter in this page is '1' or '2' - and so could be
* cleared then that page is marked as 'pending'
*/
unsigned int pending:1;
/* /*
* count of dirty bits on the page * count of dirty bits on the page
*/ */
unsigned int count:31; unsigned int count:30;
}; };
/* the main bitmap structure - one per mddev */ /* the main bitmap structure - one per mddev */
struct bitmap { struct bitmap {
struct bitmap_counts {
spinlock_t lock;
struct bitmap_page *bp; struct bitmap_page *bp;
unsigned long pages; /* total number of pages in the bitmap */ unsigned long pages; /* total number of pages
unsigned long missing_pages; /* number of pages not yet allocated */ * in the bitmap */
unsigned long missing_pages; /* number of pages
* not yet allocated */
unsigned long chunkshift; /* chunksize = 2^chunkshift
* (for bitops) */
unsigned long chunks; /* Total number of data
* chunks for the array */
} counts;
struct mddev *mddev; /* the md device that the bitmap is for */ struct mddev *mddev; /* the md device that the bitmap is for */
/* bitmap chunksize -- how much data does each bit represent? */
unsigned long chunkshift; /* chunksize = 2^(chunkshift+9) (for bitops) */
unsigned long chunks; /* total number of data chunks for the array */
__u64 events_cleared; __u64 events_cleared;
int need_sync; int need_sync;
/* bitmap spinlock */ struct bitmap_storage {
spinlock_t lock;
struct file *file; /* backing disk file */ struct file *file; /* backing disk file */
struct page *sb_page; /* cached copy of the bitmap file superblock */ struct page *sb_page; /* cached copy of the bitmap
struct page **filemap; /* list of cache pages for the file */ * file superblock */
unsigned long *filemap_attr; /* attributes associated w/ filemap pages */ struct page **filemap; /* list of cache pages for
unsigned long file_pages; /* number of pages in the file */ * the file */
int last_page_size; /* bytes in the last page */ unsigned long *filemap_attr; /* attributes associated
* w/ filemap pages */
unsigned long file_pages; /* number of pages in the file*/
unsigned long bytes; /* total bytes in the bitmap */
} storage;
unsigned long flags; unsigned long flags;
...@@ -242,6 +257,9 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector); ...@@ -242,6 +257,9 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector);
void bitmap_unplug(struct bitmap *bitmap); void bitmap_unplug(struct bitmap *bitmap);
void bitmap_daemon_work(struct mddev *mddev); void bitmap_daemon_work(struct mddev *mddev);
int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
int chunksize, int init);
#endif #endif
#endif #endif
...@@ -155,10 +155,7 @@ static void context_free(struct raid_set *rs) ...@@ -155,10 +155,7 @@ static void context_free(struct raid_set *rs)
for (i = 0; i < rs->md.raid_disks; i++) { for (i = 0; i < rs->md.raid_disks; i++) {
if (rs->dev[i].meta_dev) if (rs->dev[i].meta_dev)
dm_put_device(rs->ti, rs->dev[i].meta_dev); dm_put_device(rs->ti, rs->dev[i].meta_dev);
if (rs->dev[i].rdev.sb_page) md_rdev_clear(&rs->dev[i].rdev);
put_page(rs->dev[i].rdev.sb_page);
rs->dev[i].rdev.sb_page = NULL;
rs->dev[i].rdev.sb_loaded = 0;
if (rs->dev[i].data_dev) if (rs->dev[i].data_dev)
dm_put_device(rs->ti, rs->dev[i].data_dev); dm_put_device(rs->ti, rs->dev[i].data_dev);
} }
...@@ -606,7 +603,7 @@ static int read_disk_sb(struct md_rdev *rdev, int size) ...@@ -606,7 +603,7 @@ static int read_disk_sb(struct md_rdev *rdev, int size)
if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, 1)) { if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, 1)) {
DMERR("Failed to read superblock of device at position %d", DMERR("Failed to read superblock of device at position %d",
rdev->raid_disk); rdev->raid_disk);
set_bit(Faulty, &rdev->flags); md_error(rdev->mddev, rdev);
return -EINVAL; return -EINVAL;
} }
...@@ -617,16 +614,18 @@ static int read_disk_sb(struct md_rdev *rdev, int size) ...@@ -617,16 +614,18 @@ static int read_disk_sb(struct md_rdev *rdev, int size)
static void super_sync(struct mddev *mddev, struct md_rdev *rdev) static void super_sync(struct mddev *mddev, struct md_rdev *rdev)
{ {
struct md_rdev *r; int i;
uint64_t failed_devices; uint64_t failed_devices;
struct dm_raid_superblock *sb; struct dm_raid_superblock *sb;
struct raid_set *rs = container_of(mddev, struct raid_set, md);
sb = page_address(rdev->sb_page); sb = page_address(rdev->sb_page);
failed_devices = le64_to_cpu(sb->failed_devices); failed_devices = le64_to_cpu(sb->failed_devices);
rdev_for_each(r, mddev) for (i = 0; i < mddev->raid_disks; i++)
if ((r->raid_disk >= 0) && test_bit(Faulty, &r->flags)) if (!rs->dev[i].data_dev ||
failed_devices |= (1ULL << r->raid_disk); test_bit(Faulty, &(rs->dev[i].rdev.flags)))
failed_devices |= (1ULL << i);
memset(sb, 0, sizeof(*sb)); memset(sb, 0, sizeof(*sb));
...@@ -1252,12 +1251,13 @@ static void raid_resume(struct dm_target *ti) ...@@ -1252,12 +1251,13 @@ static void raid_resume(struct dm_target *ti)
{ {
struct raid_set *rs = ti->private; struct raid_set *rs = ti->private;
set_bit(MD_CHANGE_DEVS, &rs->md.flags);
if (!rs->bitmap_loaded) { if (!rs->bitmap_loaded) {
bitmap_load(&rs->md); bitmap_load(&rs->md);
rs->bitmap_loaded = 1; rs->bitmap_loaded = 1;
} else }
md_wakeup_thread(rs->md.thread);
clear_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
mddev_resume(&rs->md); mddev_resume(&rs->md);
} }
......
This diff is collapsed.
...@@ -55,6 +55,7 @@ struct md_rdev { ...@@ -55,6 +55,7 @@ struct md_rdev {
int sb_loaded; int sb_loaded;
__u64 sb_events; __u64 sb_events;
sector_t data_offset; /* start of data in array */ sector_t data_offset; /* start of data in array */
sector_t new_data_offset;/* only relevant while reshaping */
sector_t sb_start; /* offset of the super block (in 512byte sectors) */ sector_t sb_start; /* offset of the super block (in 512byte sectors) */
int sb_size; /* bytes in the superblock */ int sb_size; /* bytes in the superblock */
int preferred_minor; /* autorun support */ int preferred_minor; /* autorun support */
...@@ -193,8 +194,9 @@ static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors, ...@@ -193,8 +194,9 @@ static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
return 0; return 0;
} }
extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
int acknowledged); int is_new);
extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors); extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
int is_new);
extern void md_ack_all_badblocks(struct badblocks *bb); extern void md_ack_all_badblocks(struct badblocks *bb);
struct mddev { struct mddev {
...@@ -262,6 +264,7 @@ struct mddev { ...@@ -262,6 +264,7 @@ struct mddev {
sector_t reshape_position; sector_t reshape_position;
int delta_disks, new_level, new_layout; int delta_disks, new_level, new_layout;
int new_chunk_sectors; int new_chunk_sectors;
int reshape_backwards;
atomic_t plug_cnt; /* If device is expecting atomic_t plug_cnt; /* If device is expecting
* more bios soon. * more bios soon.
...@@ -390,10 +393,13 @@ struct mddev { ...@@ -390,10 +393,13 @@ struct mddev {
* For external metadata, offset * For external metadata, offset
* from start of device. * from start of device.
*/ */
unsigned long space; /* space available at this offset */
loff_t default_offset; /* this is the offset to use when loff_t default_offset; /* this is the offset to use when
* hot-adding a bitmap. It should * hot-adding a bitmap. It should
* eventually be settable by sysfs. * eventually be settable by sysfs.
*/ */
unsigned long default_space; /* space available at
* default offset */
struct mutex mutex; struct mutex mutex;
unsigned long chunksize; unsigned long chunksize;
unsigned long daemon_sleep; /* how many jiffies between updates? */ unsigned long daemon_sleep; /* how many jiffies between updates? */
...@@ -591,6 +597,7 @@ extern void md_write_start(struct mddev *mddev, struct bio *bi); ...@@ -591,6 +597,7 @@ extern void md_write_start(struct mddev *mddev, struct bio *bi);
extern void md_write_end(struct mddev *mddev); extern void md_write_end(struct mddev *mddev);
extern void md_done_sync(struct mddev *mddev, int blocks, int ok); extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
extern void md_error(struct mddev *mddev, struct md_rdev *rdev); extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
extern void md_finish_reshape(struct mddev *mddev);
extern int mddev_congested(struct mddev *mddev, int bits); extern int mddev_congested(struct mddev *mddev, int bits);
extern void md_flush_request(struct mddev *mddev, struct bio *bio); extern void md_flush_request(struct mddev *mddev, struct bio *bio);
...@@ -615,6 +622,7 @@ extern int md_run(struct mddev *mddev); ...@@ -615,6 +622,7 @@ extern int md_run(struct mddev *mddev);
extern void md_stop(struct mddev *mddev); extern void md_stop(struct mddev *mddev);
extern void md_stop_writes(struct mddev *mddev); extern void md_stop_writes(struct mddev *mddev);
extern int md_rdev_init(struct md_rdev *rdev); extern int md_rdev_init(struct md_rdev *rdev);
extern void md_rdev_clear(struct md_rdev *rdev);
extern void mddev_suspend(struct mddev *mddev); extern void mddev_suspend(struct mddev *mddev);
extern void mddev_resume(struct mddev *mddev); extern void mddev_resume(struct mddev *mddev);
......
...@@ -1859,7 +1859,9 @@ static void fix_read_error(struct r1conf *conf, int read_disk, ...@@ -1859,7 +1859,9 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
rdev = conf->mirrors[d].rdev; rdev = conf->mirrors[d].rdev;
if (rdev && if (rdev &&
test_bit(In_sync, &rdev->flags) && (test_bit(In_sync, &rdev->flags) ||
(!test_bit(Faulty, &rdev->flags) &&
rdev->recovery_offset >= sect + s)) &&
is_badblock(rdev, sect, s, is_badblock(rdev, sect, s,
&first_bad, &bad_sectors) == 0 && &first_bad, &bad_sectors) == 0 &&
sync_page_io(rdev, sect, s<<9, sync_page_io(rdev, sect, s<<9,
...@@ -2024,7 +2026,7 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio ...@@ -2024,7 +2026,7 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
continue; continue;
if (test_bit(BIO_UPTODATE, &bio->bi_flags) && if (test_bit(BIO_UPTODATE, &bio->bi_flags) &&
test_bit(R1BIO_MadeGood, &r1_bio->state)) { test_bit(R1BIO_MadeGood, &r1_bio->state)) {
rdev_clear_badblocks(rdev, r1_bio->sector, s); rdev_clear_badblocks(rdev, r1_bio->sector, s, 0);
} }
if (!test_bit(BIO_UPTODATE, &bio->bi_flags) && if (!test_bit(BIO_UPTODATE, &bio->bi_flags) &&
test_bit(R1BIO_WriteError, &r1_bio->state)) { test_bit(R1BIO_WriteError, &r1_bio->state)) {
...@@ -2044,7 +2046,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio) ...@@ -2044,7 +2046,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
struct md_rdev *rdev = conf->mirrors[m].rdev; struct md_rdev *rdev = conf->mirrors[m].rdev;
rdev_clear_badblocks(rdev, rdev_clear_badblocks(rdev,
r1_bio->sector, r1_bio->sector,
r1_bio->sectors); r1_bio->sectors, 0);
rdev_dec_pending(rdev, conf->mddev); rdev_dec_pending(rdev, conf->mddev);
} else if (r1_bio->bios[m] != NULL) { } else if (r1_bio->bios[m] != NULL) {
/* This drive got a write error. We need to /* This drive got a write error. We need to
...@@ -2598,7 +2600,8 @@ static struct r1conf *setup_conf(struct mddev *mddev) ...@@ -2598,7 +2600,8 @@ static struct r1conf *setup_conf(struct mddev *mddev)
if (!disk->rdev || if (!disk->rdev ||
!test_bit(In_sync, &disk->rdev->flags)) { !test_bit(In_sync, &disk->rdev->flags)) {
disk->head_position = 0; disk->head_position = 0;
if (disk->rdev) if (disk->rdev &&
(disk->rdev->saved_raid_disk < 0))
conf->fullsync = 1; conf->fullsync = 1;
} else if (conf->last_used < 0) } else if (conf->last_used < 0)
/* /*
...@@ -2750,9 +2753,16 @@ static int raid1_resize(struct mddev *mddev, sector_t sectors) ...@@ -2750,9 +2753,16 @@ static int raid1_resize(struct mddev *mddev, sector_t sectors)
* any io in the removed space completes, but it hardly seems * any io in the removed space completes, but it hardly seems
* worth it. * worth it.
*/ */
md_set_array_sectors(mddev, raid1_size(mddev, sectors, 0)); sector_t newsize = raid1_size(mddev, sectors, 0);
if (mddev->array_sectors > raid1_size(mddev, sectors, 0)) if (mddev->external_size &&
mddev->array_sectors > newsize)
return -EINVAL; return -EINVAL;
if (mddev->bitmap) {
int ret = bitmap_resize(mddev->bitmap, newsize, 0, 0);
if (ret)
return ret;
}
md_set_array_sectors(mddev, newsize);
set_capacity(mddev->gendisk, mddev->array_sectors); set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk); revalidate_disk(mddev->gendisk);
if (sectors > mddev->dev_sectors && if (sectors > mddev->dev_sectors &&
......
This diff is collapsed.
...@@ -14,10 +14,12 @@ struct mirror_info { ...@@ -14,10 +14,12 @@ struct mirror_info {
struct r10conf { struct r10conf {
struct mddev *mddev; struct mddev *mddev;
struct mirror_info *mirrors; struct mirror_info *mirrors;
int raid_disks; struct mirror_info *mirrors_new, *mirrors_old;
spinlock_t device_lock; spinlock_t device_lock;
/* geometry */ /* geometry */
struct geom {
int raid_disks;
int near_copies; /* number of copies laid out int near_copies; /* number of copies laid out
* raid0 style */ * raid0 style */
int far_copies; /* number of copies laid out int far_copies; /* number of copies laid out
...@@ -26,20 +28,24 @@ struct r10conf { ...@@ -26,20 +28,24 @@ struct r10conf {
int far_offset; /* far_copies are offset by 1 int far_offset; /* far_copies are offset by 1
* stripe instead of many * stripe instead of many
*/ */
int copies; /* near_copies * far_copies.
* must be <= raid_disks
*/
sector_t stride; /* distance between far copies. sector_t stride; /* distance between far copies.
* This is size / far_copies unless * This is size / far_copies unless
* far_offset, in which case it is * far_offset, in which case it is
* 1 stripe. * 1 stripe.
*/ */
int chunk_shift; /* shift from chunks to sectors */
sector_t chunk_mask;
} prev, geo;
int copies; /* near_copies * far_copies.
* must be <= raid_disks
*/
sector_t dev_sectors; /* temp copy of sector_t dev_sectors; /* temp copy of
* mddev->dev_sectors */ * mddev->dev_sectors */
sector_t reshape_progress;
int chunk_shift; /* shift from chunks to sectors */ sector_t reshape_safe;
sector_t chunk_mask; unsigned long reshape_checkpoint;
sector_t offset_diff;
struct list_head retry_list; struct list_head retry_list;
/* queue pending writes and submit them on unplug */ /* queue pending writes and submit them on unplug */
...@@ -136,6 +142,7 @@ enum r10bio_state { ...@@ -136,6 +142,7 @@ enum r10bio_state {
R10BIO_Uptodate, R10BIO_Uptodate,
R10BIO_IsSync, R10BIO_IsSync,
R10BIO_IsRecover, R10BIO_IsRecover,
R10BIO_IsReshape,
R10BIO_Degraded, R10BIO_Degraded,
/* Set ReadError on bios that experience a read error /* Set ReadError on bios that experience a read error
* so that raid10d knows what to do with them. * so that raid10d knows what to do with them.
...@@ -146,5 +153,10 @@ enum r10bio_state { ...@@ -146,5 +153,10 @@ enum r10bio_state {
*/ */
R10BIO_MadeGood, R10BIO_MadeGood,
R10BIO_WriteError, R10BIO_WriteError,
/* During a reshape we might be performing IO on the
* 'previous' part of the array, in which case this
* flag is set
*/
R10BIO_Previous,
}; };
#endif #endif
This diff is collapsed.
...@@ -285,6 +285,7 @@ enum r5dev_flags { ...@@ -285,6 +285,7 @@ enum r5dev_flags {
*/ */
R5_Wantdrain, /* dev->towrite needs to be drained */ R5_Wantdrain, /* dev->towrite needs to be drained */
R5_WantFUA, /* Write should be FUA */ R5_WantFUA, /* Write should be FUA */
R5_SyncIO, /* The IO is sync */
R5_WriteError, /* got a write error - need to record it */ R5_WriteError, /* got a write error - need to record it */
R5_MadeGood, /* A bad block has been fixed by writing to it */ R5_MadeGood, /* A bad block has been fixed by writing to it */
R5_ReadRepl, /* Will/did read from replacement rather than orig */ R5_ReadRepl, /* Will/did read from replacement rather than orig */
...@@ -385,6 +386,12 @@ struct r5conf { ...@@ -385,6 +386,12 @@ struct r5conf {
short generation; /* increments with every reshape */ short generation; /* increments with every reshape */
unsigned long reshape_checkpoint; /* Time we last updated unsigned long reshape_checkpoint; /* Time we last updated
* metadata */ * metadata */
long long min_offset_diff; /* minimum difference between
* data_offset and
* new_data_offset across all
* devices. May be negative,
* but is closest to zero.
*/
struct list_head handle_list; /* stripes needing handling */ struct list_head handle_list; /* stripes needing handling */
struct list_head hold_list; /* preread ready stripes */ struct list_head hold_list; /* preread ready stripes */
......
...@@ -233,7 +233,10 @@ struct mdp_superblock_1 { ...@@ -233,7 +233,10 @@ struct mdp_superblock_1 {
__le32 delta_disks; /* change in number of raid_disks */ __le32 delta_disks; /* change in number of raid_disks */
__le32 new_layout; /* new layout */ __le32 new_layout; /* new layout */
__le32 new_chunk; /* new chunk size (512byte sectors) */ __le32 new_chunk; /* new chunk size (512byte sectors) */
__u8 pad1[128-124]; /* set to 0 when written */ __le32 new_offset; /* signed number to add to data_offset in new
* layout. 0 == no-change. This can be
* different on each device in the array.
*/
/* constant this-device information - 64 bytes */ /* constant this-device information - 64 bytes */
__le64 data_offset; /* sector start of data, often 0 */ __le64 data_offset; /* sector start of data, often 0 */
...@@ -281,10 +284,18 @@ struct mdp_superblock_1 { ...@@ -281,10 +284,18 @@ struct mdp_superblock_1 {
* active device with same 'role'. * active device with same 'role'.
* 'recovery_offset' is also set. * 'recovery_offset' is also set.
*/ */
#define MD_FEATURE_RESHAPE_BACKWARDS 32 /* Reshape doesn't change number
* of devices, but is going
* backwards anyway.
*/
#define MD_FEATURE_NEW_OFFSET 64 /* new_offset must be honoured */
#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ #define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \
|MD_FEATURE_RECOVERY_OFFSET \ |MD_FEATURE_RECOVERY_OFFSET \
|MD_FEATURE_RESHAPE_ACTIVE \ |MD_FEATURE_RESHAPE_ACTIVE \
|MD_FEATURE_BAD_BLOCKS \ |MD_FEATURE_BAD_BLOCKS \
|MD_FEATURE_REPLACEMENT) |MD_FEATURE_REPLACEMENT \
|MD_FEATURE_RESHAPE_BACKWARDS \
|MD_FEATURE_NEW_OFFSET \
)
#endif #endif
...@@ -99,8 +99,20 @@ extern const struct raid6_calls raid6_altivec2; ...@@ -99,8 +99,20 @@ extern const struct raid6_calls raid6_altivec2;
extern const struct raid6_calls raid6_altivec4; extern const struct raid6_calls raid6_altivec4;
extern const struct raid6_calls raid6_altivec8; extern const struct raid6_calls raid6_altivec8;
struct raid6_recov_calls {
void (*data2)(int, size_t, int, int, void **);
void (*datap)(int, size_t, int, void **);
int (*valid)(void);
const char *name;
int priority;
};
extern const struct raid6_recov_calls raid6_recov_intx1;
extern const struct raid6_recov_calls raid6_recov_ssse3;
/* Algorithm list */ /* Algorithm list */
extern const struct raid6_calls * const raid6_algos[]; extern const struct raid6_calls * const raid6_algos[];
extern const struct raid6_recov_calls *const raid6_recov_algos[];
int raid6_select_algo(void); int raid6_select_algo(void);
/* Return values from chk_syndrome */ /* Return values from chk_syndrome */
...@@ -111,14 +123,16 @@ int raid6_select_algo(void); ...@@ -111,14 +123,16 @@ int raid6_select_algo(void);
/* Galois field tables */ /* Galois field tables */
extern const u8 raid6_gfmul[256][256] __attribute__((aligned(256))); extern const u8 raid6_gfmul[256][256] __attribute__((aligned(256)));
extern const u8 raid6_vgfmul[256][32] __attribute__((aligned(256)));
extern const u8 raid6_gfexp[256] __attribute__((aligned(256))); extern const u8 raid6_gfexp[256] __attribute__((aligned(256)));
extern const u8 raid6_gfinv[256] __attribute__((aligned(256))); extern const u8 raid6_gfinv[256] __attribute__((aligned(256)));
extern const u8 raid6_gfexi[256] __attribute__((aligned(256))); extern const u8 raid6_gfexi[256] __attribute__((aligned(256)));
/* Recovery routines */ /* Recovery routines */
void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, extern void (*raid6_2data_recov)(int disks, size_t bytes, int faila, int failb,
void **ptrs);
extern void (*raid6_datap_recov)(int disks, size_t bytes, int faila,
void **ptrs); void **ptrs);
void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs);
void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void raid6_dual_recov(int disks, size_t bytes, int faila, int failb,
void **ptrs); void **ptrs);
......
obj-$(CONFIG_RAID6_PQ) += raid6_pq.o obj-$(CONFIG_RAID6_PQ) += raid6_pq.o
raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ raid6_pq-y += algos.o recov.o recov_ssse3.o tables.o int1.o int2.o int4.o \
int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \ int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \
altivec8.o mmx.o sse1.o sse2.o altivec8.o mmx.o sse1.o sse2.o
hostprogs-y += mktables hostprogs-y += mktables
......
This diff is collapsed.
...@@ -81,6 +81,31 @@ int main(int argc, char *argv[]) ...@@ -81,6 +81,31 @@ int main(int argc, char *argv[])
printf("EXPORT_SYMBOL(raid6_gfmul);\n"); printf("EXPORT_SYMBOL(raid6_gfmul);\n");
printf("#endif\n"); printf("#endif\n");
/* Compute vector multiplication table */
printf("\nconst u8 __attribute__((aligned(256)))\n"
"raid6_vgfmul[256][32] =\n"
"{\n");
for (i = 0; i < 256; i++) {
printf("\t{\n");
for (j = 0; j < 16; j += 8) {
printf("\t\t");
for (k = 0; k < 8; k++)
printf("0x%02x,%c", gfmul(i, j + k),
(k == 7) ? '\n' : ' ');
}
for (j = 0; j < 16; j += 8) {
printf("\t\t");
for (k = 0; k < 8; k++)
printf("0x%02x,%c", gfmul(i, (j + k) << 4),
(k == 7) ? '\n' : ' ');
}
printf("\t},\n");
}
printf("};\n");
printf("#ifdef __KERNEL__\n");
printf("EXPORT_SYMBOL(raid6_vgfmul);\n");
printf("#endif\n");
/* Compute power-of-2 table (exponent) */ /* Compute power-of-2 table (exponent) */
v = 1; v = 1;
printf("\nconst u8 __attribute__((aligned(256)))\n" printf("\nconst u8 __attribute__((aligned(256)))\n"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#include <linux/raid/pq.h> #include <linux/raid/pq.h>
/* Recover two failed data blocks. */ /* Recover two failed data blocks. */
void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, void raid6_2data_recov_intx1(int disks, size_t bytes, int faila, int failb,
void **ptrs) void **ptrs)
{ {
u8 *p, *q, *dp, *dq; u8 *p, *q, *dp, *dq;
...@@ -64,10 +64,9 @@ void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, ...@@ -64,10 +64,9 @@ void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
p++; q++; p++; q++;
} }
} }
EXPORT_SYMBOL_GPL(raid6_2data_recov);
/* Recover failure of one data block plus the P block */ /* Recover failure of one data block plus the P block */
void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs) void raid6_datap_recov_intx1(int disks, size_t bytes, int faila, void **ptrs)
{ {
u8 *p, *q, *dq; u8 *p, *q, *dq;
const u8 *qmul; /* Q multiplier table */ const u8 *qmul; /* Q multiplier table */
...@@ -96,7 +95,15 @@ void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs) ...@@ -96,7 +95,15 @@ void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)
q++; dq++; q++; dq++;
} }
} }
EXPORT_SYMBOL_GPL(raid6_datap_recov);
const struct raid6_recov_calls raid6_recov_intx1 = {
.data2 = raid6_2data_recov_intx1,
.datap = raid6_datap_recov_intx1,
.valid = NULL,
.name = "intx1",
.priority = 0,
};
#ifndef __KERNEL__ #ifndef __KERNEL__
/* Testing only */ /* Testing only */
......
This diff is collapsed.
...@@ -23,7 +23,7 @@ RANLIB = ranlib ...@@ -23,7 +23,7 @@ RANLIB = ranlib
all: raid6.a raid6test all: raid6.a raid6test
raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \ raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \
altivec1.o altivec2.o altivec4.o altivec8.o recov.o algos.o \ altivec1.o altivec2.o altivec4.o altivec8.o recov.o recov_ssse3.o algos.o \
tables.o tables.o
rm -f $@ rm -f $@
$(AR) cq $@ $^ $(AR) cq $@ $^
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment