Commit d7bd398e authored by Song Liu's avatar Song Liu Committed by Shaohua Li

md/r5cache: handle alloc_page failure

RMW of r5c write back cache uses an extra page to store old data for
prexor. handle_stripe_dirtying() allocates this page by calling
alloc_page(). However, alloc_page() may fail.

To handle alloc_page() failures, this patch adds an extra page to
disk_info. When alloc_page fails, handle_stripe() trys to use these
pages. When these pages are used by other stripe (R5C_EXTRA_PAGE_IN_USE),
the stripe is added to delayed_list.
Signed-off-by: default avatarSong Liu <songliubraving@fb.com>
Reviewed-by: default avatarNeilBrown <neilb@suse.com>
Signed-off-by: default avatarShaohua Li <shli@fb.com>
parent 034e33f5
...@@ -2326,15 +2326,40 @@ int r5c_try_caching_write(struct r5conf *conf, ...@@ -2326,15 +2326,40 @@ int r5c_try_caching_write(struct r5conf *conf,
*/ */
void r5c_release_extra_page(struct stripe_head *sh) void r5c_release_extra_page(struct stripe_head *sh)
{ {
struct r5conf *conf = sh->raid_conf;
int i; int i;
bool using_disk_info_extra_page;
using_disk_info_extra_page =
sh->dev[0].orig_page == conf->disks[0].extra_page;
for (i = sh->disks; i--; ) for (i = sh->disks; i--; )
if (sh->dev[i].page != sh->dev[i].orig_page) { if (sh->dev[i].page != sh->dev[i].orig_page) {
struct page *p = sh->dev[i].orig_page; struct page *p = sh->dev[i].orig_page;
sh->dev[i].orig_page = sh->dev[i].page; sh->dev[i].orig_page = sh->dev[i].page;
put_page(p); if (!using_disk_info_extra_page)
put_page(p);
} }
if (using_disk_info_extra_page) {
clear_bit(R5C_EXTRA_PAGE_IN_USE, &conf->cache_state);
md_wakeup_thread(conf->mddev->thread);
}
}
void r5c_use_extra_page(struct stripe_head *sh)
{
struct r5conf *conf = sh->raid_conf;
int i;
struct r5dev *dev;
for (i = sh->disks; i--; ) {
dev = &sh->dev[i];
if (dev->orig_page != dev->page)
put_page(dev->orig_page);
dev->orig_page = conf->disks[i].extra_page;
}
} }
/* /*
......
...@@ -876,6 +876,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) ...@@ -876,6 +876,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
if (!test_bit(STRIPE_R5C_CACHING, &sh->state)) { if (!test_bit(STRIPE_R5C_CACHING, &sh->state)) {
/* writing out phase */ /* writing out phase */
if (s->waiting_extra_page)
return;
if (r5l_write_stripe(conf->log, sh) == 0) if (r5l_write_stripe(conf->log, sh) == 0)
return; return;
} else { /* caching phase */ } else { /* caching phase */
...@@ -2007,6 +2009,7 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, ...@@ -2007,6 +2009,7 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
INIT_LIST_HEAD(&sh->batch_list); INIT_LIST_HEAD(&sh->batch_list);
INIT_LIST_HEAD(&sh->lru); INIT_LIST_HEAD(&sh->lru);
INIT_LIST_HEAD(&sh->r5c); INIT_LIST_HEAD(&sh->r5c);
INIT_LIST_HEAD(&sh->log_list);
atomic_set(&sh->count, 1); atomic_set(&sh->count, 1);
sh->log_start = MaxSector; sh->log_start = MaxSector;
for (i = 0; i < disks; i++) { for (i = 0; i < disks; i++) {
...@@ -2253,10 +2256,24 @@ static int resize_stripes(struct r5conf *conf, int newsize) ...@@ -2253,10 +2256,24 @@ static int resize_stripes(struct r5conf *conf, int newsize)
*/ */
ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO); ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
if (ndisks) { if (ndisks) {
for (i=0; i<conf->raid_disks; i++) for (i = 0; i < conf->pool_size; i++)
ndisks[i] = conf->disks[i]; ndisks[i] = conf->disks[i];
kfree(conf->disks);
conf->disks = ndisks; for (i = conf->pool_size; i < newsize; i++) {
ndisks[i].extra_page = alloc_page(GFP_NOIO);
if (!ndisks[i].extra_page)
err = -ENOMEM;
}
if (err) {
for (i = conf->pool_size; i < newsize; i++)
if (ndisks[i].extra_page)
put_page(ndisks[i].extra_page);
kfree(ndisks);
} else {
kfree(conf->disks);
conf->disks = ndisks;
}
} else } else
err = -ENOMEM; err = -ENOMEM;
...@@ -3580,10 +3597,10 @@ static void handle_stripe_clean_event(struct r5conf *conf, ...@@ -3580,10 +3597,10 @@ static void handle_stripe_clean_event(struct r5conf *conf,
break_stripe_batch_list(head_sh, STRIPE_EXPAND_SYNC_FLAGS); break_stripe_batch_list(head_sh, STRIPE_EXPAND_SYNC_FLAGS);
} }
static void handle_stripe_dirtying(struct r5conf *conf, static int handle_stripe_dirtying(struct r5conf *conf,
struct stripe_head *sh, struct stripe_head *sh,
struct stripe_head_state *s, struct stripe_head_state *s,
int disks) int disks)
{ {
int rmw = 0, rcw = 0, i; int rmw = 0, rcw = 0, i;
sector_t recovery_cp = conf->mddev->recovery_cp; sector_t recovery_cp = conf->mddev->recovery_cp;
...@@ -3649,12 +3666,32 @@ static void handle_stripe_dirtying(struct r5conf *conf, ...@@ -3649,12 +3666,32 @@ static void handle_stripe_dirtying(struct r5conf *conf,
dev->page == dev->orig_page && dev->page == dev->orig_page &&
!test_bit(R5_LOCKED, &sh->dev[sh->pd_idx].flags)) { !test_bit(R5_LOCKED, &sh->dev[sh->pd_idx].flags)) {
/* alloc page for prexor */ /* alloc page for prexor */
dev->orig_page = alloc_page(GFP_NOIO); struct page *p = alloc_page(GFP_NOIO);
if (p) {
dev->orig_page = p;
continue;
}
/* will handle failure in a later patch*/ /*
BUG_ON(!dev->orig_page); * alloc_page() failed, try use
* disk_info->extra_page
*/
if (!test_and_set_bit(R5C_EXTRA_PAGE_IN_USE,
&conf->cache_state)) {
r5c_use_extra_page(sh);
break;
}
/* extra_page in use, add to delayed_list */
set_bit(STRIPE_DELAYED, &sh->state);
s->waiting_extra_page = 1;
return -EAGAIN;
} }
}
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
if ((dev->towrite || if ((dev->towrite ||
i == sh->pd_idx || i == sh->qd_idx || i == sh->pd_idx || i == sh->qd_idx ||
test_bit(R5_InJournal, &dev->flags)) && test_bit(R5_InJournal, &dev->flags)) &&
...@@ -3730,6 +3767,7 @@ static void handle_stripe_dirtying(struct r5conf *conf, ...@@ -3730,6 +3767,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
(s->locked == 0 && (rcw == 0 || rmw == 0) && (s->locked == 0 && (rcw == 0 || rmw == 0) &&
!test_bit(STRIPE_BIT_DELAY, &sh->state))) !test_bit(STRIPE_BIT_DELAY, &sh->state)))
schedule_reconstruction(sh, s, rcw == 0, 0); schedule_reconstruction(sh, s, rcw == 0, 0);
return 0;
} }
static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh, static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh,
...@@ -4545,8 +4583,12 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -4545,8 +4583,12 @@ static void handle_stripe(struct stripe_head *sh)
if (ret == -EAGAIN || if (ret == -EAGAIN ||
/* stripe under reclaim: !caching && injournal */ /* stripe under reclaim: !caching && injournal */
(!test_bit(STRIPE_R5C_CACHING, &sh->state) && (!test_bit(STRIPE_R5C_CACHING, &sh->state) &&
s.injournal > 0)) s.injournal > 0)) {
handle_stripe_dirtying(conf, sh, &s, disks); ret = handle_stripe_dirtying(conf, sh, &s,
disks);
if (ret == -EAGAIN)
goto finish;
}
} }
} }
...@@ -6458,6 +6500,8 @@ static void raid5_free_percpu(struct r5conf *conf) ...@@ -6458,6 +6500,8 @@ static void raid5_free_percpu(struct r5conf *conf)
static void free_conf(struct r5conf *conf) static void free_conf(struct r5conf *conf)
{ {
int i;
if (conf->log) if (conf->log)
r5l_exit_log(conf->log); r5l_exit_log(conf->log);
if (conf->shrinker.nr_deferred) if (conf->shrinker.nr_deferred)
...@@ -6466,6 +6510,9 @@ static void free_conf(struct r5conf *conf) ...@@ -6466,6 +6510,9 @@ static void free_conf(struct r5conf *conf)
free_thread_groups(conf); free_thread_groups(conf);
shrink_stripes(conf); shrink_stripes(conf);
raid5_free_percpu(conf); raid5_free_percpu(conf);
for (i = 0; i < conf->pool_size; i++)
if (conf->disks[i].extra_page)
put_page(conf->disks[i].extra_page);
kfree(conf->disks); kfree(conf->disks);
kfree(conf->stripe_hashtbl); kfree(conf->stripe_hashtbl);
kfree(conf); kfree(conf);
...@@ -6612,9 +6659,16 @@ static struct r5conf *setup_conf(struct mddev *mddev) ...@@ -6612,9 +6659,16 @@ static struct r5conf *setup_conf(struct mddev *mddev)
conf->disks = kzalloc(max_disks * sizeof(struct disk_info), conf->disks = kzalloc(max_disks * sizeof(struct disk_info),
GFP_KERNEL); GFP_KERNEL);
if (!conf->disks) if (!conf->disks)
goto abort; goto abort;
for (i = 0; i < max_disks; i++) {
conf->disks[i].extra_page = alloc_page(GFP_KERNEL);
if (!conf->disks[i].extra_page)
goto abort;
}
conf->mddev = mddev; conf->mddev = mddev;
if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
......
...@@ -276,6 +276,7 @@ struct stripe_head_state { ...@@ -276,6 +276,7 @@ struct stripe_head_state {
struct md_rdev *blocked_rdev; struct md_rdev *blocked_rdev;
int handle_bad_blocks; int handle_bad_blocks;
int log_failed; int log_failed;
int waiting_extra_page;
}; };
/* Flags for struct r5dev.flags */ /* Flags for struct r5dev.flags */
...@@ -439,6 +440,7 @@ enum { ...@@ -439,6 +440,7 @@ enum {
struct disk_info { struct disk_info {
struct md_rdev *rdev, *replacement; struct md_rdev *rdev, *replacement;
struct page *extra_page; /* extra page to use in prexor */
}; };
/* /*
...@@ -559,6 +561,9 @@ enum r5_cache_state { ...@@ -559,6 +561,9 @@ enum r5_cache_state {
* only process stripes that are already * only process stripes that are already
* occupying the log * occupying the log
*/ */
R5C_EXTRA_PAGE_IN_USE, /* a stripe is using disk_info.extra_page
* for prexor
*/
}; };
struct r5conf { struct r5conf {
...@@ -765,6 +770,7 @@ extern void ...@@ -765,6 +770,7 @@ extern void
r5c_finish_stripe_write_out(struct r5conf *conf, struct stripe_head *sh, r5c_finish_stripe_write_out(struct r5conf *conf, struct stripe_head *sh,
struct stripe_head_state *s); struct stripe_head_state *s);
extern void r5c_release_extra_page(struct stripe_head *sh); extern void r5c_release_extra_page(struct stripe_head *sh);
extern void r5c_use_extra_page(struct stripe_head *sh);
extern void r5l_wake_reclaim(struct r5l_log *log, sector_t space); extern void r5l_wake_reclaim(struct r5l_log *log, sector_t space);
extern void r5c_handle_cached_data_endio(struct r5conf *conf, extern void r5c_handle_cached_data_endio(struct r5conf *conf,
struct stripe_head *sh, int disks, struct bio_list *return_bi); struct stripe_head *sh, int disks, struct bio_list *return_bi);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment