[XFS] Implement support for unwritten extents in XFS.

SGI Modid: 2.5.x-xfs:slinx:141508a

[XFS] Implement support for unwritten extents in XFS.
SGI Modid: 2.5.x-xfs:slinx:141508a
cd4816fe · Nathan Scott · Stephen Lord · 1d1c803e · cd4816fe · cd4816fe
Commit cd4816fe authored Mar 18, 2003 by Nathan Scott Committed by Stephen Lord Mar 18, 2003
6 changed files
--- a/fs/xfs/linux/xfs_aops.c
+++ b/fs/xfs/linux/xfs_aops.c
@@ -32,10 +32,56 @@

 #include <xfs.h>
 #include <linux/mm.h>
-#include <linux/pagemap.h>
 #include <linux/mpage.h>
+#include <linux/pagemap.h>

+STATIC void convert_page(struct inode *, struct page *,
+			page_buf_bmap_t *, void *, int, int);
+
+void
+linvfs_unwritten_done(
+	struct buffer_head	*bh,
+	int			uptodate)
+{
+	page_buf_t		*pb = (page_buf_t *)bh->b_private;
+
+	ASSERT(buffer_unwritten(bh));
+	bh->b_end_io = NULL;
+	clear_buffer_unwritten(bh);
+	if (!uptodate)
+		pagebuf_ioerror(pb, -EIO);
+	if (atomic_dec_and_test(&pb->pb_io_remaining) == 1)
+		pagebuf_iodone(pb, 1, 1);
+	end_buffer_async_write(bh, uptodate);
+}

+/*
+ * Issue transactions to convert a buffer range from unwritten
+ * to written extents.
+ */
+STATIC void
+xfs_unwritten_conv(
+	xfs_buf_t		*bp)
+{
+	bhv_desc_t		*bdp = XFS_BUF_FSPRIVATE(bp, bhv_desc_t *);
+	xfs_mount_t		*mp;
+	xfs_inode_t		*ip;
+
+	ip = XFS_BHVTOI(bdp);
+	mp = ip->i_mount;
+
+	if (XFS_TEST_ERROR(XFS_BUF_GETERROR(bp), mp,
+			   XFS_ERRTAG_STRATCMPL_IOERR,
+			   XFS_RANDOM_STRATCMPL_IOERR)) {
+		xfs_ioerror_alert(__FUNCTION__, mp, bp, XFS_BUF_ADDR(bp));
+	}
+
+	XFS_IOMAP_WRITE_UNWRITTEN(mp, &ip->i_iocore,
+				  XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp));
+	XFS_BUF_SET_FSPRIVATE(bp, NULL);
+	XFS_BUF_CLR_IODONE_FUNC(bp);
+	xfs_biodone(bp);
+}

 STATIC int
 map_blocks(
@@ -127,6 +173,58 @@ map_buffer_at_offset(
 	clear_buffer_delay(bh);
 }

+/*
+ * Look for a page at index which is unlocked and contains our
+ * unwritten extent flagged buffers at its head.  Returns page
+ * locked and with an extra reference count, and length of the
+ * unwritten extent component on this page that we can write,
+ * in units of filesystem blocks.
+ */
+STATIC struct page *
+probe_unwritten_page(
+	struct address_space	*mapping,
+	unsigned long		index,
+	page_buf_bmap_t		*mp,
+	page_buf_t		*pb,
+	unsigned long		max_offset,
+	unsigned long		*fsbs)
+{
+	struct page		*page;
+
+	page = find_trylock_page(mapping, index);
+	if (!page)
+		return 0;
+	if (PageWriteback(page))
+		goto out;
+
+	if (page->mapping && page_has_buffers(page)) {
+		struct buffer_head	*bh, *head;
+		unsigned long		p_offset = 0;
+
+		*fsbs = 0;
+		bh = head = page_buffers(page);
+		do {
+			if (!buffer_unwritten(bh))
+				break;
+			if (!match_offset_to_mapping(page, mp, p_offset))
+				break;
+			if (p_offset >= max_offset)
+				break;
+			set_buffer_unwritten_io(bh);
+			bh->b_private = pb;
+			p_offset += bh->b_size;
+			(*fsbs)++;
+		} while ((bh = bh->b_this_page) != head);
+
+		if (p_offset)
+			return page;
+	}
+
+out:
+	unlock_page(page);
+	return NULL;
+}
+
 /*
 * Look for a page at index which is unlocked and not mapped
 * yet - clustering for mmap write case.
@@ -149,6 +247,7 @@ probe_unmapped_page(
 	if (page->mapping && PageDirty(page)) {
 		if (page_has_buffers(page)) {
 			struct buffer_head	*bh, *head;
+
 			bh = head = page_buffers(page);
 			do {
 				if (buffer_mapped(bh) || !buffer_uptodate(bh))
@@ -206,11 +305,12 @@ probe_unmapped_cluster(
 }

 /*
- * Probe for a given page (index) in the inode & test if it is delayed.
- * Returns page locked and with an extra reference count.
+ * Probe for a given page (index) in the inode and test if it is delayed
+ * and without unwritten buffers.  Returns page locked and with an extra
+ * reference count.
 */
 STATIC struct page *
-probe_page(
+probe_delalloc_page(
 	struct inode		*inode,
 	unsigned long		index)
 {
@@ -224,12 +324,20 @@ probe_page(

 	if (page->mapping && page_has_buffers(page)) {
 		struct buffer_head	*bh, *head;
+		int			acceptable = 0;

 		bh = head = page_buffers(page);
 		do {
-			if (buffer_delay(bh))
-				return page;
+			if (buffer_unwritten(bh)) {
+				acceptable = 0;
+				break;
+			} else if (buffer_delay(bh)) {
+				acceptable = 1;
+			}
 		} while ((bh = bh->b_this_page) != head);
+
+		if (acceptable)
+			return page;
 	}

 out:
@@ -237,6 +345,102 @@ probe_page(
 	return NULL;
 }

+STATIC int
+map_unwritten(
+	struct inode		*inode,
+	struct page		*start_page,
+	struct buffer_head	*head,
+	struct buffer_head	*curr,
+	unsigned long		p_offset,
+	int			block_bits,
+	page_buf_bmap_t		*mp,
+	int			all_bh)
+{
+	struct buffer_head	*bh = curr;
+	page_buf_bmap_t		*tmp;
+	page_buf_t		*pb;
+	loff_t			offset, size;
+	unsigned long		nblocks = 0;
+
+	offset = start_page->index;
+	offset <<= PAGE_CACHE_SHIFT;
+	offset += p_offset;
+
+	pb = pagebuf_lookup(mp->pbm_target,
+			    mp->pbm_offset, mp->pbm_bsize, _PBF_LOCKABLE);
+	if (!pb)
+		return -ENOMEM;
+
+	/* Set the count to 1 initially, this will stop an I/O
+	 * completion callout which happens before we have started
+	 * all the I/O from calling pagebuf_iodone too early.
+	 */
+	atomic_set(&pb->pb_io_remaining, 1);
+
+	/* First map forwards in the page consecutive buffers
+	 * covering this unwritten extent
+	 */
+	do {
+		if (!buffer_unwritten(bh))
+			break;
+		tmp = match_offset_to_mapping(start_page, mp, p_offset);
+		if (!tmp)
+			break;
+		BUG_ON(!(tmp->pbm_flags & PBMF_UNWRITTEN));
+		map_buffer_at_offset(start_page, bh, p_offset, block_bits, mp);
+		set_buffer_unwritten_io(bh);
+		bh->b_private = pb;
+		p_offset += bh->b_size;
+		nblocks++;
+	} while ((bh = bh->b_this_page) != head);
+
+	atomic_add(nblocks, &pb->pb_io_remaining);
+
+	/* If we reached the end of the page, map forwards in any
+	 * following pages which are also covered by this extent.
+	 */
+	if (bh == head) {
+		struct address_space	*mapping = inode->i_mapping;
+		unsigned long		tindex, tlast, bs;
+		struct page		*page;
+
+		tlast = inode->i_size >> PAGE_CACHE_SHIFT;
+		for (tindex = start_page->index + 1; tindex < tlast; tindex++) {
+			page = probe_unwritten_page(mapping, tindex, mp, pb,
+					PAGE_CACHE_SIZE, &bs);
+			if (!page)
+				break;
+			nblocks += bs;
+			atomic_add(bs, &pb->pb_io_remaining);
+			convert_page(inode, page, mp, pb, 1, all_bh);
+		}
+
+		if ((tindex == tlast) && (inode->i_size & ~PAGE_CACHE_MASK)) {
+			page = probe_unwritten_page(mapping, tindex, mp, pb,
+					inode->i_size & ~PAGE_CACHE_MASK, &bs);
+			if (page) {
+				nblocks += bs;
+				atomic_add(bs, &pb->pb_io_remaining);
+				convert_page(inode, page,
+							mp, pb, 1, all_bh);
+			}
+		}
+	}
+
+	size = nblocks;		/* NB: using 64bit number here */
+	size <<= block_bits;	/* convert fsb's to byte range */
+
+	XFS_BUF_SET_SIZE(pb, size);
+	XFS_BUF_SET_OFFSET(pb, offset);
+	XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode)->v_fbhv);
+	XFS_BUF_SET_IODONE_FUNC(pb, xfs_unwritten_conv);
+
+	if (atomic_dec_and_test(&pb->pb_io_remaining) == 1)
+		pagebuf_iodone(pb, 1, 1);
+
+	return 0;
+}
+
 STATIC void
 submit_page(
 	struct page		*page,
@@ -255,6 +459,8 @@ submit_page(
 		for (i = 0; i < cnt; i++) {
 			bh = bh_arr[i];
 			mark_buffer_async_write(bh);
+			if (buffer_unwritten(bh))
+				set_buffer_unwritten_io(bh);
 			set_buffer_uptodate(bh);
 			clear_buffer_dirty(bh);
 		}
@@ -268,14 +474,15 @@ submit_page(
 /*
 * Allocate & map buffers for page given the extent map. Write it out.
 * except for the original page of a writepage, this is called on
- * delalloc pages only, for the original page it is possible that
- * the page has no mapping at all.
+ * delalloc/unwritten pages only, for the original page it is possible
+ * that the page has no mapping at all.
 */
 STATIC void
 convert_page(
 	struct inode		*inode,
 	struct page		*page,
 	page_buf_bmap_t		*maps,
+	void			*private,
 	int			startio,
 	int			all_bh)
 {
@@ -308,7 +515,23 @@ convert_page(
 			continue;
 		ASSERT(!(tmp->pbm_flags & PBMF_HOLE));
 		ASSERT(!(tmp->pbm_flags & PBMF_DELAY));
-		map_buffer_at_offset(page, bh, offset, bbits, tmp);
+
+		/* If this is a new unwritten extent buffer (i.e. one
+		 * that we haven't passed in private data for, we must
+		 * now map this buffer too.
+		 */
+		if (buffer_unwritten(bh) && !bh->b_end_io) {
+			ASSERT(tmp->pbm_flags & PBMF_UNWRITTEN);
+			map_unwritten(inode, page, head, bh,
+						offset, bbits, tmp, all_bh);
+		} else {
+			map_buffer_at_offset(page, bh, offset, bbits, tmp);
+			if (buffer_unwritten(bh)) {
+				set_buffer_unwritten_io(bh);
+				bh->b_private = private;
+				ASSERT(private);
+			}
+		}
 		if (startio && (offset < end)) {
 			bh_arr[index++] = bh;
 		} else {
@@ -341,10 +564,10 @@ cluster_write(

 	tlast = (mp->pbm_offset + mp->pbm_bsize) >> PAGE_CACHE_SHIFT;
 	for (; tindex < tlast; tindex++) {
-		page = probe_page(inode, tindex);
+		page = probe_delalloc_page(inode, tindex);
 		if (!page)
 			break;
-		convert_page(inode, page, mp, startio, all_bh);
+		convert_page(inode, page, mp, NULL, startio, all_bh);
 	}
 }

@@ -368,7 +591,7 @@ cluster_write(
 */

 STATIC int
-delalloc_convert(
+page_state_convert(
 	struct page	*page,
 	int		startio,
 	int		unmapped) /* also implies page uptodate */
@@ -411,10 +634,42 @@ delalloc_convert(
 		}

 		/*
-		 * First case, allocate space for delalloc buffer head
-		 * we can return EAGAIN here in the release page case.
+		 * First case, map an unwritten extent and prepare for
+		 * extent state conversion transaction on completion.
 		 */
-		if (buffer_delay(bh)) {
+		if (buffer_unwritten(bh)) {
+			if (!mp) {
+				err = map_blocks(inode, offset, len, &map,
+						PBF_FILE_UNWRITTEN);
+				if (err) {
+					goto error;
+				}
+				mp = match_offset_to_mapping(page, &map,
+								p_offset);
+			}
+			if (mp) {
+				if (!bh->b_end_io) {
+					err = map_unwritten(inode, page,
+							head, bh, p_offset,
+							inode->i_blkbits,
+							mp, unmapped);
+					if (err) {
+						goto error;
+					}
+				}
+				if (startio) {
+					bh_arr[cnt++] = bh;
+				} else {
+					set_buffer_dirty(bh);
+					unlock_buffer(bh);
+				}
+				page_dirty = 0;
+			}
+		/*
+		 * Second case, allocate space for a delalloc buffer.
+		 * We can return EAGAIN here in the release page case.
+		 */
+		} else if (buffer_delay(bh)) {
 			if (!mp) {
 				err = map_blocks(inode, offset, len, &map,
 					PBF_FILE_ALLOCATE | flags);
@@ -574,6 +829,12 @@ linvfs_get_block_core(
 			bh_result->b_bdev = pbmap.pbm_target->pbr_bdev;
 			set_buffer_mapped(bh_result);
 		}
+		if (pbmap.pbm_flags & PBMF_UNWRITTEN) {
+			if (create)
+				set_buffer_mapped(bh_result);
+			set_buffer_unwritten(bh_result);
+			set_buffer_delay(bh_result);
+		}
 	}

 	/* If we previously allocated a block out beyond eof and
@@ -695,21 +956,23 @@ linvfs_readpages(
 	return mpage_readpages(mapping, pages, nr_pages, linvfs_get_block);
 }

-
 STATIC void
 count_page_state(
 	struct page		*page,
 	int			*delalloc,
-	int			*unmapped)
+	int			*unmapped,
+	int			*unwritten)
 {
 	struct buffer_head	*bh, *head;

-	*delalloc = *unmapped = 0;
+	*delalloc = *unmapped = *unwritten = 0;

 	bh = head = page_buffers(page);
 	do {
 		if (buffer_uptodate(bh) && !buffer_mapped(bh))
 			(*unmapped) = 1;
+		else if (buffer_unwritten(bh))
+			(*unwritten) = 1;
 		else if (buffer_delay(bh))
 			(*delalloc) = 1;
 	} while ((bh = bh->b_this_page) != head);
@@ -736,6 +999,7 @@ count_page_state(
 * is off, we need to fail the writepage and redirty the page.
 * We also need to set PF_NOIO ourselves.
 */
+
 STATIC int
 linvfs_writepage(
 	struct page		*page,
@@ -743,7 +1007,7 @@ linvfs_writepage(
 {
 	int			error;
 	int			need_trans;
-	int			delalloc, unmapped;
+	int			delalloc, unmapped, unwritten;
 	struct inode		*inode = page->mapping->host;

 	/*
@@ -751,15 +1015,16 @@ linvfs_writepage(
 	 *  1. There are delalloc buffers on the page
 	 *  2. The page is upto date and we have unmapped buffers
 	 *  3. The page is upto date and we have no buffers
+	 *  4. There are unwritten buffers on the page
 	 */
 	if (!page_has_buffers(page)) {
 		unmapped = 1;
 		need_trans = 1;
 	} else {
-		count_page_state(page, &delalloc, &unmapped);
+		count_page_state(page, &delalloc, &unmapped, &unwritten);
 		if (!PageUptodate(page))
 			unmapped = 0;
-		need_trans = delalloc + unmapped;
+		need_trans = delalloc + unmapped + unwritten;
 	}

 	/*
@@ -775,15 +1040,14 @@ linvfs_writepage(
 	 * Delay hooking up buffer heads until we have
 	 * made our go/no-go decision.
 	 */
-	if (!page_has_buffers(page)) {
+	if (!page_has_buffers(page))
 		create_empty_buffers(page, 1 << inode->i_blkbits, 0);
-	}

 	/*
-	 * Convert delalloc or unmapped space to real space and flush out
-	 * to disk.
+	 * Convert delayed allocate, unwritten or unmapped space
+	 * to real space and flush out to disk.
 	 */
-	error = delalloc_convert(page, 1, unmapped);
+	error = page_state_convert(page, 1, unmapped);
 	if (error == -EAGAIN)
 		goto out_fail;
 	if (unlikely(error < 0))
@@ -824,10 +1088,10 @@ linvfs_release_page(
 	struct page		*page,
 	int			gfp_mask)
 {
-	int			delalloc, unmapped;
+	int			delalloc, unmapped, unwritten;

-	count_page_state(page, &delalloc, &unmapped);
-	if (!delalloc)
+	count_page_state(page, &delalloc, &unmapped, &unwritten);
+	if (!delalloc && !unwritten)
 		goto free_buffers;

 	if (!(gfp_mask & __GFP_FS))
@@ -839,7 +1103,7 @@ linvfs_release_page(
 	 * Never need to allocate space here - we will always
 	 * come back to writepage in that case.
 	 */
-	if (delalloc_convert(page, 0, 0) == 0)
+	if (page_state_convert(page, 0, 0) == 0)
 		goto free_buffers;
 	return 0;


--- a/fs/xfs/linux/xfs_iops.h
+++ b/fs/xfs/linux/xfs_iops.h
@@ -66,5 +66,6 @@ extern struct file_operations linvfs_dir_operations;
 extern struct address_space_operations linvfs_aops;

 extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
+extern void linvfs_unwritten_done(struct buffer_head *, int);

 #endif /* __XFS_IOPS_H__ */
--- a/fs/xfs/linux/xfs_linux.h
+++ b/fs/xfs/linux/xfs_linux.h
@@ -69,6 +69,20 @@
 #define STATIC static
 #endif

+/*
+ * State flag for unwritten extent buffers.
+ *
+ * We need to be able to distinguish between these and delayed
+ * allocate buffers within XFS.  The generic IO path code does
+ * not need to distinguish - we use the BH_Delay flag for both
+ * delalloc and these ondisk-uninitialised buffers.
+ */
+BUFFER_FNS(PrivateStart, unwritten);
+static inline void set_buffer_unwritten_io(struct buffer_head *bh)
+{
+	bh->b_end_io = linvfs_unwritten_done;
+}
+
 #define restricted_chown	xfs_params.restrict_chown
 #define irix_sgid_inherit	xfs_params.sgid_inherit
 #define irix_symlink_mode	xfs_params.symlink_mode

--- a/fs/xfs/pagebuf/page_buf.c
+++ b/fs/xfs/pagebuf/page_buf.c
@@ -121,7 +121,8 @@ pb_trace_func(
 STATIC kmem_cache_t *pagebuf_cache;
 STATIC void pagebuf_daemon_wakeup(int);
 STATIC void pagebuf_delwri_queue(page_buf_t *, int);
-STATIC struct workqueue_struct *pagebuf_workqueue;
+STATIC struct workqueue_struct *pagebuf_logio_workqueue;
+STATIC struct workqueue_struct *pagebuf_dataio_workqueue;

 /*
 * Pagebuf module configuration parameters, exported via
@@ -785,6 +786,25 @@ pagebuf_get(				/* allocate a buffer		*/
 	return (pb);
 }

+/*
+ * Create a skeletal pagebuf (no pages associated with it).
+ */
+page_buf_t *
+pagebuf_lookup(
+	struct pb_target	*target,
+	loff_t			ioff,
+	size_t			isize,
+	page_buf_flags_t	flags)
+{
+	page_buf_t		*pb;
+
+	pb = pagebuf_allocate(flags);
+	if (pb) {
+		_pagebuf_initialize(pb, target, ioff, isize, flags);
+	}
+	return pb;
+}
+
 /*
 * If we are not low on memory then do the readahead in a deadlock
 * safe manner.
@@ -1131,6 +1151,7 @@ pagebuf_iodone_work(
 void
 pagebuf_iodone(
 	page_buf_t		*pb,
+	int			dataio,
 	int			schedule)
 {
 	pb->pb_flags &= ~(PBF_READ | PBF_WRITE);
@@ -1143,7 +1164,8 @@ pagebuf_iodone(
 	if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) {
 		if (schedule) {
 			INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb);
-			queue_work(pagebuf_workqueue, &pb->pb_iodone_work);
+			queue_work(dataio ? pagebuf_dataio_workqueue :
+				pagebuf_logio_workqueue, &pb->pb_iodone_work);
 		} else {
 			pagebuf_iodone_work(pb);
 		}
@@ -1268,7 +1290,7 @@ bio_end_io_pagebuf(

 	if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
 		pb->pb_locked = 0;
-		pagebuf_iodone(pb, 1);
+		pagebuf_iodone(pb, 0, 1);
 	}

 	bio_put(bio);
@@ -1412,7 +1434,7 @@ pagebuf_iorequest(			/* start real I/O		*/

 	if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
 		pb->pb_locked = 0;
-		pagebuf_iodone(pb, 0);
+		pagebuf_iodone(pb, 0, 0);
 	}

 	return 0;
@@ -1734,13 +1756,21 @@ pagebuf_daemon_start(void)
 {
 	int		rval;

-	pagebuf_workqueue = create_workqueue("pagebuf");
-	if (!pagebuf_workqueue)
+	pagebuf_logio_workqueue = create_workqueue("xfslogd");
+	if (!pagebuf_logio_workqueue)
 		return -ENOMEM;

+	pagebuf_dataio_workqueue = create_workqueue("xfsdatad");
+	if (!pagebuf_dataio_workqueue) {
+		destroy_workqueue(pagebuf_logio_workqueue);
+		return -ENOMEM;
+	}
+
 	rval = kernel_thread(pagebuf_daemon, NULL, CLONE_FS|CLONE_FILES);
-	if (rval < 0)
-		destroy_workqueue(pagebuf_workqueue);
+	if (rval < 0) {
+		destroy_workqueue(pagebuf_logio_workqueue);
+		destroy_workqueue(pagebuf_dataio_workqueue);
+	}

 	return rval;
 }
@@ -1756,7 +1786,8 @@ pagebuf_daemon_stop(void)
 	pbd_active = 0;
 	wake_up_interruptible(&pbd_waitq);
 	wait_event_interruptible(pbd_waitq, pbd_active);
-	destroy_workqueue(pagebuf_workqueue);
+	destroy_workqueue(pagebuf_logio_workqueue);
+	destroy_workqueue(pagebuf_dataio_workqueue);
 }



--- a/fs/xfs/pagebuf/page_buf.h
+++ b/fs/xfs/pagebuf/page_buf.h
@@ -169,8 +169,8 @@ typedef page_buf_bmap_t pb_bmap_t;
 * This buffer structure is used by the page cache buffer management routines
 * to refer to an assembly of pages forming a logical buffer.  The actual
 * I/O is performed with buffer_head or bio structures, as required by drivers,
- * for drivers which do not understand this structure. The buffer structure is
- * used on temporary basis only, and discarded when released.  
+ * for drivers which do not understand this structure.	The buffer structure is
+ * used on temporary basis only, and discarded when released.
 *
 * The real data storage is recorded in the page cache.	 Metadata is
 * hashed to the inode for the block device on which the file system resides.
@@ -245,6 +245,13 @@ extern page_buf_t *pagebuf_get(		/* allocate a buffer		*/
 		page_buf_flags_t);	/* PBF_LOCK, PBF_READ,		*/
 					/* PBF_ASYNC			*/	

+extern page_buf_t *pagebuf_lookup(
+		struct pb_target *,
+		loff_t,			/* starting offset of range	*/
+		size_t,			/* length of range		*/
+		page_buf_flags_t);	/* PBF_READ, PBF_WRITE,		*/
+					/* PBF_FORCEIO, _PBF_LOCKABLE	*/
+
 extern page_buf_t *pagebuf_get_empty(	/* allocate pagebuf struct with */
 					/*  no memory or disk address	*/
 		struct pb_target *);	/* mount point "fake" inode	*/
@@ -300,6 +307,7 @@ static inline int pagebuf_geterror(page_buf_t *pb)

 extern void pagebuf_iodone(		/* mark buffer I/O complete	*/
 		page_buf_t *,		/* buffer to mark		*/
+		int,			/* use data/log helper thread.	*/
 		int);			/* run completion locally, or in
 					 * a helper thread. 		*/


--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -163,15 +163,17 @@ extern inline xfs_caddr_t xfs_buf_offset(page_buf_t *bp, size_t offset)
 #define XFS_BUF_SET_PTR(bp, val, count)		\
 				pagebuf_associate_memory(bp, val, count)
 #define XFS_BUF_ADDR(bp)	((bp)->pb_bn)
-#define XFS_BUF_OFFSET(bp)	((bp)->pb_file_offset >> 9)
 #define XFS_BUF_SET_ADDR(bp, blk)		\
 			((bp)->pb_bn = (page_buf_daddr_t)(blk))
+#define XFS_BUF_OFFSET(bp)	((bp)->pb_file_offset)
+#define XFS_BUF_SET_OFFSET(bp, off)		\
+			((bp)->pb_file_offset = (off))
 #define XFS_BUF_COUNT(bp)	((bp)->pb_count_desired)
 #define XFS_BUF_SET_COUNT(bp, cnt)		\
-			((bp)->pb_count_desired = cnt)
+			((bp)->pb_count_desired = (cnt))
 #define XFS_BUF_SIZE(bp)	((bp)->pb_buffer_length)
 #define XFS_BUF_SET_SIZE(bp, cnt)		\
-			((bp)->pb_buffer_length = cnt)
+			((bp)->pb_buffer_length = (cnt))
 #define XFS_BUF_SET_VTYPE_REF(bp, type, ref)
 #define XFS_BUF_SET_VTYPE(bp, type)
 #define XFS_BUF_SET_REF(bp, ref)
@@ -242,7 +244,7 @@ static inline void	xfs_buf_relse(page_buf_t *bp)


 #define xfs_biodone(pb)		    \
-	    pagebuf_iodone(pb, 0)
+	    pagebuf_iodone(pb, 0, 0)

 #define xfs_incore(buftarg,blkno,len,lockit) \
 	    pagebuf_find(buftarg, blkno ,len, lockit)