Commit ce9f8d6b authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.open-osd.org/linux-open-osd

Pull pnfs/ore fixes from Boaz Harrosh:
 "These are catastrophic fixes to the pnfs objects-layout that were just
  discovered.  They are also destined for @stable.

  I have found these and worked on them at around RC1 time but
  unfortunately went to the hospital for kidney stones and had a very
  slow recovery.  I refrained from sending them as is, before proper
  testing, and surly I have found a bug just yesterday.

  So now they are all well tested, and have my sign-off.  Other then
  fixing the problem at hand, and assuming there are no bugs at the new
  code, there is low risk to any surrounding code.  And in anyway they
  affect only these paths that are now broken.  That is RAID5 in pnfs
  objects-layout code.  It does also affect exofs (which was not broken)
  but I have tested exofs and it is lower priority then objects-layout
  because no one is using exofs, but objects-layout has lots of users."

* 'for-linus' of git://git.open-osd.org/linux-open-osd:
  pnfs-obj: Fix __r4w_get_page when offset is beyond i_size
  pnfs-obj: don't leak objio_state if ore_write/read fails
  ore: Unlock r4w pages in exact reverse order of locking
  ore: Remove support of partial IO request (NFS crash)
  ore: Fix NFS crash by supporting any unaligned RAID IO
parents 17934162 c999ff68
......@@ -735,13 +735,7 @@ static int _prepare_for_striping(struct ore_io_state *ios)
out:
ios->numdevs = devs_in_group;
ios->pages_consumed = cur_pg;
if (unlikely(ret)) {
if (length == ios->length)
return ret;
else
ios->length -= length;
}
return 0;
}
int ore_create(struct ore_io_state *ios)
......
......@@ -144,18 +144,15 @@ static void _sp2d_reset(struct __stripe_pages_2d *sp2d,
{
unsigned data_devs = sp2d->data_devs;
unsigned group_width = data_devs + sp2d->parity;
unsigned p;
int p, c;
if (!sp2d->needed)
return;
for (p = 0; p < sp2d->pages_in_unit; p++) {
for (c = data_devs - 1; c >= 0; --c)
for (p = sp2d->pages_in_unit - 1; p >= 0; --p) {
struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
if (_1ps->write_count < group_width) {
unsigned c;
for (c = 0; c < data_devs; c++)
if (_1ps->page_is_read[c]) {
struct page *page = _1ps->pages[c];
......@@ -164,6 +161,9 @@ static void _sp2d_reset(struct __stripe_pages_2d *sp2d,
}
}
for (p = 0; p < sp2d->pages_in_unit; p++) {
struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
memset(_1ps->pages, 0, group_width * sizeof(*_1ps->pages));
_1ps->write_count = 0;
_1ps->tx = NULL;
......@@ -461,16 +461,12 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
* ios->sp2d[p][*], xor is calculated the same way. These pages are
* allocated/freed and don't go through cache
*/
static int _read_4_write(struct ore_io_state *ios)
static int _read_4_write_first_stripe(struct ore_io_state *ios)
{
struct ore_io_state *ios_read;
struct ore_striping_info read_si;
struct __stripe_pages_2d *sp2d = ios->sp2d;
u64 offset = ios->si.first_stripe_start;
u64 last_stripe_end;
unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
unsigned i, c, p, min_p = sp2d->pages_in_unit, max_p = -1;
int ret;
unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
if (offset == ios->offset) /* Go to start collect $200 */
goto read_last_stripe;
......@@ -478,6 +474,9 @@ static int _read_4_write(struct ore_io_state *ios)
min_p = _sp2d_min_pg(sp2d);
max_p = _sp2d_max_pg(sp2d);
ORE_DBGMSG("stripe_start=0x%llx ios->offset=0x%llx min_p=%d max_p=%d\n",
offset, ios->offset, min_p, max_p);
for (c = 0; ; c++) {
ore_calc_stripe_info(ios->layout, offset, 0, &read_si);
read_si.obj_offset += min_p * PAGE_SIZE;
......@@ -512,6 +511,18 @@ static int _read_4_write(struct ore_io_state *ios)
}
read_last_stripe:
return 0;
}
static int _read_4_write_last_stripe(struct ore_io_state *ios)
{
struct ore_striping_info read_si;
struct __stripe_pages_2d *sp2d = ios->sp2d;
u64 offset;
u64 last_stripe_end;
unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
offset = ios->offset + ios->length;
if (offset % PAGE_SIZE)
_add_to_r4w_last_page(ios, &offset);
......@@ -527,15 +538,15 @@ static int _read_4_write(struct ore_io_state *ios)
c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,
ios->layout->mirrors_p1, read_si.par_dev, read_si.dev);
BUG_ON(ios->si.first_stripe_start + bytes_in_stripe != last_stripe_end);
/* unaligned IO must be within a single stripe */
if (min_p == sp2d->pages_in_unit) {
/* Didn't do it yet */
min_p = _sp2d_min_pg(sp2d);
max_p = _sp2d_max_pg(sp2d);
}
ORE_DBGMSG("offset=0x%llx stripe_end=0x%llx min_p=%d max_p=%d\n",
offset, last_stripe_end, min_p, max_p);
while (offset < last_stripe_end) {
struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
......@@ -568,6 +579,15 @@ static int _read_4_write(struct ore_io_state *ios)
}
read_it:
return 0;
}
static int _read_4_write_execute(struct ore_io_state *ios)
{
struct ore_io_state *ios_read;
unsigned i;
int ret;
ios_read = ios->ios_read_4_write;
if (!ios_read)
return 0;
......@@ -591,6 +611,8 @@ static int _read_4_write(struct ore_io_state *ios)
}
_mark_read4write_pages_uptodate(ios_read, ret);
ore_put_io_state(ios_read);
ios->ios_read_4_write = NULL; /* Might need a reuse at last stripe */
return 0;
}
......@@ -626,8 +648,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
/* If first stripe, Read in all read4write pages
* (if needed) before we calculate the first parity.
*/
_read_4_write(ios);
_read_4_write_first_stripe(ios);
}
if (!cur_len) /* If last stripe r4w pages of last stripe */
_read_4_write_last_stripe(ios);
_read_4_write_execute(ios);
for (i = 0; i < num_pages; i++) {
pages[i] = _raid_page_alloc();
......@@ -654,34 +679,14 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
int _ore_post_alloc_raid_stuff(struct ore_io_state *ios)
{
struct ore_layout *layout = ios->layout;
if (ios->parity_pages) {
struct ore_layout *layout = ios->layout;
unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE;
unsigned stripe_size = ios->si.bytes_in_stripe;
u64 last_stripe, first_stripe;
if (_sp2d_alloc(pages_in_unit, layout->group_width,
layout->parity, &ios->sp2d)) {
return -ENOMEM;
}
/* Round io down to last full strip */
first_stripe = div_u64(ios->offset, stripe_size);
last_stripe = div_u64(ios->offset + ios->length, stripe_size);
/* If an IO spans more then a single stripe it must end at
* a stripe boundary. The reminder at the end is pushed into the
* next IO.
*/
if (last_stripe != first_stripe) {
ios->length = last_stripe * stripe_size - ios->offset;
BUG_ON(!ios->length);
ios->nr_pages = (ios->length + PAGE_SIZE - 1) /
PAGE_SIZE;
ios->si.length = ios->length; /*make it consistent */
}
}
return 0;
}
......
......@@ -454,7 +454,10 @@ int objio_read_pagelist(struct nfs_read_data *rdata)
objios->ios->done = _read_done;
dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
rdata->args.offset, rdata->args.count);
return ore_read(objios->ios);
ret = ore_read(objios->ios);
if (unlikely(ret))
objio_free_result(&objios->oir);
return ret;
}
/*
......@@ -486,8 +489,16 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
struct nfs_write_data *wdata = objios->oir.rpcdata;
struct address_space *mapping = wdata->header->inode->i_mapping;
pgoff_t index = offset / PAGE_SIZE;
struct page *page = find_get_page(mapping, index);
struct page *page;
loff_t i_size = i_size_read(wdata->header->inode);
if (offset >= i_size) {
*uptodate = true;
dprintk("%s: g_zero_page index=0x%lx\n", __func__, index);
return ZERO_PAGE(0);
}
page = find_get_page(mapping, index);
if (!page) {
page = find_or_create_page(mapping, index, GFP_NOFS);
if (unlikely(!page)) {
......@@ -507,7 +518,9 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
static void __r4w_put_page(void *priv, struct page *page)
{
dprintk("%s: index=0x%lx\n", __func__, page->index);
dprintk("%s: index=0x%lx\n", __func__,
(page == ZERO_PAGE(0)) ? -1UL : page->index);
if (ZERO_PAGE(0) != page)
page_cache_release(page);
return;
}
......@@ -539,8 +552,10 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how)
dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
wdata->args.offset, wdata->args.count);
ret = ore_write(objios->ios);
if (unlikely(ret))
if (unlikely(ret)) {
objio_free_result(&objios->oir);
return ret;
}
if (objios->sync)
_write_done(objios->ios, objios);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment