Commit 8e23b549 authored by Stephen Lord's avatar Stephen Lord

[XFS] Fix a use after free in the unwritten extent code. Also rework the

interface to the allocator to have its own flag set, and always
go through the same interface in all cases rather than having
unwritten extent requests take a different path from all others.

SGI Modid: 2.5.x-xfs:slinx:146678a
parent 6d70dcef
......@@ -50,8 +50,6 @@ linvfs_unwritten_done(
pagebuf_ioerror(pb, -EIO);
if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
pagebuf_iodone(pb, 1, 1);
pb->pb_flags &= ~_PBF_LOCKABLE;
pagebuf_rele(pb);
}
end_buffer_async_write(bh, uptodate);
}
......@@ -61,28 +59,21 @@ linvfs_unwritten_done(
* to written extents.
*/
STATIC void
xfs_unwritten_conv(
xfs_buf_t *bp)
linvfs_unwritten_conv(
xfs_buf_t *bp)
{
bhv_desc_t *bdp = XFS_BUF_FSPRIVATE(bp, bhv_desc_t *);
xfs_mount_t *mp;
xfs_inode_t *ip;
vnode_t *vp = XFS_BUF_FSPRIVATE(bp, vnode_t *);
int error;
ip = XFS_BHVTOI(bdp);
mp = ip->i_mount;
if (XFS_TEST_ERROR(XFS_BUF_GETERROR(bp), mp,
XFS_ERRTAG_STRATCMPL_IOERR,
XFS_RANDOM_STRATCMPL_IOERR)) {
xfs_ioerror_alert(__FUNCTION__, mp, bp, XFS_BUF_ADDR(bp));
}
if (atomic_read(&bp->pb_hold) < 1)
BUG();
XFS_IOMAP_WRITE_UNWRITTEN(mp, &ip->i_iocore,
XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp));
VOP_BMAP(vp, XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp),
BMAP_UNWRITTEN, NULL, NULL, error);
XFS_BUF_SET_FSPRIVATE(bp, NULL);
XFS_BUF_CLR_IODONE_FUNC(bp);
XFS_BUF_UNDATAIO(bp);
xfs_biodone(bp);
pagebuf_iodone(bp, 0, 0);
}
STATIC int
......@@ -96,20 +87,20 @@ map_blocks(
vnode_t *vp = LINVFS_GET_VP(inode);
int error, nmaps = 1;
if (((flags & (PBF_DIRECT|PBF_SYNC)) == PBF_DIRECT) &&
if (((flags & (BMAP_DIRECT|BMAP_SYNC)) == BMAP_DIRECT) &&
(offset >= inode->i_size))
count = max_t(ssize_t, count, XFS_WRITE_IO_LOG);
retry:
VOP_BMAP(vp, offset, count, flags, pbmapp, &nmaps, error);
if (error == EAGAIN)
if ((error == EAGAIN) || (error == EIO))
return -error;
if (unlikely((flags & (PBF_WRITE|PBF_DIRECT)) ==
(PBF_WRITE|PBF_DIRECT) && nmaps &&
if (unlikely((flags & (BMAP_WRITE|BMAP_DIRECT)) ==
(BMAP_WRITE|BMAP_DIRECT) && nmaps &&
(pbmapp->pbm_flags & PBMF_DELAY))) {
flags = PBF_FILE_ALLOCATE;
flags = BMAP_ALLOCATE;
goto retry;
}
if (flags & (PBF_WRITE|PBF_FILE_ALLOCATE)) {
if (flags & (BMAP_WRITE|BMAP_ALLOCATE)) {
VMODIFY(vp);
}
return -error;
......@@ -371,7 +362,7 @@ map_unwritten(
offset += p_offset;
pb = pagebuf_lookup(mp->pbm_target,
mp->pbm_offset, mp->pbm_bsize, _PBF_LOCKABLE);
mp->pbm_offset, mp->pbm_bsize, 0);
if (!pb)
return -ENOMEM;
......@@ -390,7 +381,6 @@ map_unwritten(
tmp = match_offset_to_mapping(start_page, mp, p_offset);
if (!tmp)
break;
BUG_ON(!(tmp->pbm_flags & PBMF_UNWRITTEN));
map_buffer_at_offset(start_page, bh, p_offset, block_bits, mp);
set_buffer_unwritten_io(bh);
bh->b_private = pb;
......@@ -442,15 +432,14 @@ map_unwritten(
size <<= block_bits; /* convert fsb's to byte range */
XFS_BUF_DATAIO(pb);
XFS_BUF_ASYNC(pb);
XFS_BUF_SET_SIZE(pb, size);
XFS_BUF_SET_OFFSET(pb, offset);
XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode)->v_fbhv);
XFS_BUF_SET_IODONE_FUNC(pb, xfs_unwritten_conv);
XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode));
XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_conv);
if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
pagebuf_iodone(pb, 1, 1);
pb->pb_flags &= ~_PBF_LOCKABLE;
pagebuf_rele(pb);
}
return 0;
......@@ -552,6 +541,7 @@ convert_page(
} else {
set_buffer_dirty(bh);
unlock_buffer(bh);
mark_buffer_dirty(bh);
}
} while (i++, (bh = bh->b_this_page) != head);
......@@ -617,7 +607,7 @@ page_state_convert(
unsigned long p_offset = 0, end_index;
loff_t offset, end_offset;
int len, err, i, cnt = 0, uptodate = 1;
int flags = startio ? 0 : PBF_TRYLOCK;
int flags = startio ? 0 : BMAP_TRYLOCK;
int page_dirty = 1;
......@@ -655,7 +645,7 @@ page_state_convert(
if (buffer_unwritten(bh)) {
if (!mp) {
err = map_blocks(inode, offset, len, &map,
PBF_FILE_UNWRITTEN);
BMAP_READ|BMAP_IGNSTATE);
if (err) {
goto error;
}
......@@ -677,6 +667,7 @@ page_state_convert(
} else {
set_buffer_dirty(bh);
unlock_buffer(bh);
mark_buffer_dirty(bh);
}
page_dirty = 0;
}
......@@ -687,7 +678,7 @@ page_state_convert(
} else if (buffer_delay(bh)) {
if (!mp) {
err = map_blocks(inode, offset, len, &map,
PBF_FILE_ALLOCATE | flags);
BMAP_ALLOCATE | flags);
if (err) {
goto error;
}
......@@ -702,6 +693,7 @@ page_state_convert(
} else {
set_buffer_dirty(bh);
unlock_buffer(bh);
mark_buffer_dirty(bh);
}
page_dirty = 0;
}
......@@ -720,8 +712,8 @@ page_state_convert(
size = probe_unmapped_cluster(
inode, page, bh, head);
err = map_blocks(inode, offset,
size, &map,
PBF_WRITE | PBF_DIRECT);
size, &map,
BMAP_WRITE | BMAP_MMAP);
if (err) {
goto error;
}
......@@ -737,6 +729,7 @@ page_state_convert(
} else {
set_buffer_dirty(bh);
unlock_buffer(bh);
mark_buffer_dirty(bh);
}
page_dirty = 0;
}
......@@ -760,13 +753,11 @@ page_state_convert(
if (uptodate)
SetPageUptodate(page);
if (startio) {
if (startio)
submit_page(page, bh_arr, cnt);
}
if (mp) {
if (mp)
cluster_write(inode, page->index + 1, mp, startio, unmapped);
}
return page_dirty;
......@@ -797,7 +788,7 @@ linvfs_get_block_core(
struct buffer_head *bh_result,
int create,
int direct,
page_buf_flags_t flags)
bmapi_flags_t flags)
{
vnode_t *vp = LINVFS_GET_VP(inode);
page_buf_bmap_t pbmap;
......@@ -817,7 +808,7 @@ linvfs_get_block_core(
size = 1 << inode->i_blkbits;
VOP_BMAP(vp, offset, size,
create ? flags : PBF_READ, &pbmap, &retpbbm, error);
create ? flags : BMAP_READ, &pbmap, &retpbbm, error);
if (error)
return -error;
......@@ -887,7 +878,7 @@ linvfs_get_block(
int create)
{
return linvfs_get_block_core(inode, iblock, 0, bh_result,
create, 0, PBF_WRITE);
create, 0, BMAP_WRITE);
}
STATIC int
......@@ -898,7 +889,7 @@ linvfs_get_block_sync(
int create)
{
return linvfs_get_block_core(inode, iblock, 0, bh_result,
create, 0, PBF_SYNC|PBF_WRITE);
create, 0, BMAP_SYNC|BMAP_WRITE);
}
STATIC int
......@@ -910,7 +901,7 @@ linvfs_get_blocks_direct(
int create)
{
return linvfs_get_block_core(inode, iblock, max_blocks, bh_result,
create, 1, PBF_WRITE|PBF_DIRECT);
create, 1, BMAP_WRITE|BMAP_DIRECT);
}
STATIC int
......
......@@ -97,7 +97,7 @@ xfs_iomap(
{
xfs_mount_t *mp = io->io_mount;
xfs_fileoff_t offset_fsb, end_fsb;
int error;
int error = 0;
int lockmode = 0;
xfs_bmbt_irec_t imap;
int nimaps = 1;
......@@ -107,32 +107,31 @@ xfs_iomap(
return XFS_ERROR(EIO);
switch (flags &
(PBF_READ|PBF_WRITE|PBF_FILE_ALLOCATE|PBF_FILE_UNWRITTEN)) {
case PBF_READ:
(BMAP_READ|BMAP_WRITE|BMAP_ALLOCATE|BMAP_UNWRITTEN)) {
case BMAP_READ:
lockmode = XFS_LCK_MAP_SHARED(mp, io);
bmap_flags = XFS_BMAPI_ENTIRE;
if (flags & BMAP_IGNSTATE)
bmap_flags |= XFS_BMAPI_IGSTATE;
break;
case PBF_WRITE:
lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR;
bmap_flags = 0;
XFS_ILOCK(mp, io, lockmode);
break;
case PBF_FILE_ALLOCATE:
case BMAP_ALLOCATE:
lockmode = XFS_ILOCK_SHARED|XFS_EXTSIZE_RD;
bmap_flags = XFS_BMAPI_ENTIRE;
/* Attempt non-blocking lock */
if (flags & PBF_TRYLOCK) {
if (flags & BMAP_TRYLOCK) {
if (!XFS_ILOCK_NOWAIT(mp, io, lockmode))
return XFS_ERROR(EAGAIN);
} else {
XFS_ILOCK(mp, io, lockmode);
}
break;
case PBF_FILE_UNWRITTEN:
lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR;
bmap_flags = XFS_BMAPI_ENTIRE|XFS_BMAPI_IGSTATE;
XFS_ILOCK(mp, io, lockmode);
break;
case BMAP_UNWRITTEN:
goto phase2;
default:
BUG();
}
......@@ -148,13 +147,14 @@ xfs_iomap(
if (error)
goto out;
switch (flags & (PBF_WRITE|PBF_FILE_ALLOCATE)) {
case PBF_WRITE:
phase2:
switch (flags & (BMAP_WRITE|BMAP_ALLOCATE|BMAP_UNWRITTEN)) {
case BMAP_WRITE:
/* If we found an extent, return it */
if (nimaps && (imap.br_startblock != HOLESTARTBLOCK))
break;
if (flags & PBF_DIRECT) {
if (flags & (BMAP_DIRECT|BMAP_MMAP)) {
error = XFS_IOMAP_WRITE_DIRECT(mp, io, offset,
count, flags, &imap, &nimaps, nimaps);
} else {
......@@ -162,7 +162,7 @@ xfs_iomap(
flags, &imap, &nimaps);
}
break;
case PBF_FILE_ALLOCATE:
case BMAP_ALLOCATE:
/* If we found an extent, return it */
XFS_IUNLOCK(mp, io, lockmode);
lockmode = 0;
......@@ -172,12 +172,17 @@ xfs_iomap(
error = XFS_IOMAP_WRITE_ALLOCATE(mp, io, &imap, &nimaps);
break;
case BMAP_UNWRITTEN:
lockmode = 0;
error = XFS_IOMAP_WRITE_UNWRITTEN(mp, io, offset, count);
nimaps = 0;
break;
}
if (nimaps) {
*npbmaps = _xfs_imap_to_bmap(io, offset, &imap,
pbmapp, nimaps, *npbmaps);
} else {
} else if (npbmaps) {
*npbmaps = 0;
}
......@@ -203,13 +208,13 @@ xfs_flush_space(
xfs_ilock(ip, XFS_ILOCK_EXCL);
*fsynced = 1;
} else {
*ioflags |= PBF_SYNC;
*ioflags |= BMAP_SYNC;
*fsynced = 2;
}
return 0;
case 1:
*fsynced = 2;
*ioflags |= PBF_SYNC;
*ioflags |= BMAP_SYNC;
return 0;
case 2:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
......@@ -228,7 +233,7 @@ xfs_iomap_write_direct(
xfs_inode_t *ip,
loff_t offset,
size_t count,
int ioflag,
int flags,
xfs_bmbt_irec_t *ret_imap,
int *nmaps,
int found)
......@@ -342,7 +347,7 @@ xfs_iomap_write_direct(
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_ihold(tp, ip);
if (offset < ip->i_d.di_size || rt)
if (!(flags & BMAP_MMAP) && (offset < ip->i_d.di_size || rt))
bmapi_flag |= XFS_BMAPI_PREALLOC;
/*
......@@ -441,7 +446,7 @@ xfs_iomap_write_delay(
* We don't bother with this for sync writes, because we need
* to minimize the amount we write for good performance.
*/
if (!(ioflag & PBF_SYNC) && ((offset + count) > ip->i_d.di_size)) {
if (!(ioflag & BMAP_SYNC) && ((offset + count) > ip->i_d.di_size)) {
xfs_off_t aligned_offset;
unsigned int iosize;
xfs_fileoff_t ioalign;
......
......@@ -1575,6 +1575,13 @@ pagebuf_delwri_dequeue(
spin_unlock(&pbd_delwrite_lock);
}
STATIC void
pagebuf_runall_queues(
struct workqueue_struct *queue)
{
flush_workqueue(queue);
}
/* Defines for pagebuf daemon */
DECLARE_WAIT_QUEUE_HEAD(pbd_waitq);
STATIC int force_flush;
......@@ -1680,10 +1687,13 @@ pagebuf_delwri_flush(
page_buf_t *pb;
struct list_head *curr, *next, tmp;
int pincount = 0;
int flush_cnt = 0;
spin_lock(&pbd_delwrite_lock);
INIT_LIST_HEAD(&tmp);
pagebuf_runall_queues(pagebuf_dataio_workqueue);
list_for_each_safe(curr, next, &pbd_delwrite_queue) {
pb = list_entry(curr, page_buf_t, pb_list);
......@@ -1725,6 +1735,10 @@ pagebuf_delwri_flush(
pb->pb_flags |= PBF_WRITE;
__pagebuf_iorequest(pb);
if (++flush_cnt > 32) {
pagebuf_run_queues(NULL);
flush_cnt = 0;
}
spin_lock(&pbd_delwrite_lock);
}
......
......@@ -83,6 +83,20 @@ typedef enum { /* pbm_flags values */
/* but uninitialized file data */
} bmap_flags_t;
typedef enum {
/* base extent manipulation calls */
BMAP_READ = (1 << 0), /* read extents */
BMAP_WRITE = (1 << 1), /* create extents */
BMAP_ALLOCATE = (1 << 2), /* delayed allocate to real extents */
BMAP_UNWRITTEN = (1 << 3), /* unwritten extents to real extents */
/* modifiers */
BMAP_IGNSTATE = (1 << 4), /* ignore unwritten state on read */
BMAP_DIRECT = (1 << 5), /* direct instead of buffered write */
BMAP_MMAP = (1 << 6), /* allocate for mmap write */
BMAP_SYNC = (1 << 7), /* sync write */
BMAP_TRYLOCK = (1 << 8), /* non-blocking request */
} bmapi_flags_t;
typedef enum page_buf_flags_e { /* pb_flags values */
PBF_READ = (1 << 0), /* buffer intended for reading from device */
PBF_WRITE = (1 << 1), /* buffer intended for writing to device */
......@@ -101,20 +115,18 @@ typedef enum page_buf_flags_e { /* pb_flags values */
/* flags used only as arguments to access routines */
PBF_LOCK = (1 << 13), /* lock requested */
PBF_TRYLOCK = (1 << 14), /* lock requested, but do not wait */
PBF_FILE_ALLOCATE = (1 << 15), /* allocate all file space */
PBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */
PBF_DIRECT = (1 << 17), /* direct I/O desired */
PBF_FILE_UNWRITTEN = (1 << 18), /* convert unwritten extent space */
PBF_DONT_BLOCK = (1 << 15), /* do not block in current thread */
/* flags used only internally */
_PBF_LOCKABLE = (1 << 19), /* page_buf_t may be locked */
_PBF_ALL_PAGES_MAPPED = (1 << 21), /* all pages in range mapped */
_PBF_ADDR_ALLOCATED = (1 << 22), /* pb_addr space was allocated */
_PBF_MEM_ALLOCATED = (1 << 23), /* pb_mem+underlying pages alloc'd */
PBF_FORCEIO = (1 << 24),
PBF_FLUSH = (1 << 25), /* flush disk write cache */
PBF_READ_AHEAD = (1 << 26),
_PBF_LOCKABLE = (1 << 16), /* page_buf_t may be locked */
_PBF_PRIVATE_BH = (1 << 17), /* do not use public buffer heads */
_PBF_ALL_PAGES_MAPPED = (1 << 18), /* all pages in range mapped */
_PBF_ADDR_ALLOCATED = (1 << 19), /* pb_addr space was allocated */
_PBF_MEM_ALLOCATED = (1 << 20), /* pb_mem+underlying pages alloc'd */
PBF_FORCEIO = (1 << 21),
PBF_FLUSH = (1 << 22), /* flush disk write cache */
PBF_READ_AHEAD = (1 << 23),
} page_buf_flags_t;
......
......@@ -1732,11 +1732,9 @@ static char *pb_flag_vals[] = {
/* 0 */ "READ", "WRITE", "MAPPED", "PARTIAL", "ASYNC",
/* 5 */ "NONE", "DELWRI", "FREED", "SYNC", "MAPPABLE",
/* 10 */ "STALE", "FS_MANAGED", "INVALID12", "LOCK", "TRYLOCK",
/* 15 */ "FILE_ALLOCATE", "DONT_BLOCK", "DIRECT", "INVALID18", "LOCKABLE",
/* 20 */ "PRIVATE_BH", "ALL_PAGES_MAPPED", "ADDR_ALLOCATED", "MEM_ALLOCATED",
"FORCEIO",
/* 25 */ "FLUSH", "READ_AHEAD", "INVALID27", "INVALID28", "INVALID29",
/* 30 */ "INVALID30", "INVALID31",
/* 15 */ "DONT_BLOCK", "LOCKABLE", "PRIVATE_BH", "ALL_PAGES_MAPPED",
"ADDR_ALLOCATED",
/* 20 */ "MEM_ALLOCATED", "FORCEIO", "FLUSH", "READ_AHEAD",
NULL };
static char *pbm_flag_vals[] = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment