Merge bk://ldm.bkbits.net/linux-2.5-driverfs

into home.transmeta.com:/home/torvalds/v2.5/linux

Merge bk://ldm.bkbits.net/linux-2.5-driverfs
into home.transmeta.com:/home/torvalds/v2.5/linux
c4265b8b · Linus Torvalds · a5a7521e · f951b5f5 · c4265b8b · c4265b8b
Commit c4265b8b authored Aug 05, 2002 by Linus Torvalds
21 changed files
--- a/Documentation/filesystems/jfs.txt
+++ b/Documentation/filesystems/jfs.txt
@@ -17,11 +17,16 @@ iocharset=name	Character set to use for converting from Unicode to
 		translations.  This requires CONFIG_NLS_UTF8 to be set
 		in the kernel .config file.
+resize=value	Resize the volume to <value> blocks.  JFS only supports
+		growing a volume, not shrinking it.  This option is only
+		valid during a remount, when the volume is mounted
+		read-write.  The resize keyword with no value will grow
+		the volume to the full size of the partition.
 JFS TODO list:
 Plans for our near term development items
-   - implement online resize for extending JFS volumes
   - enhance support for logfile on dedicated partition
   - get access control list functionality operational
   - get extended attributes functionality operational

--- a/fs/jfs/Makefile
+++ b/fs/jfs/Makefile
@@ -8,7 +8,7 @@ jfs-objs := super.o file.o inode.o namei.o jfs_mount.o jfs_umount.o \
 	    jfs_xtree.o jfs_imap.o jfs_debug.o jfs_dmap.o \
 	    jfs_unicode.o jfs_dtree.o jfs_inode.o \
 	    jfs_extent.o symlink.o jfs_metapage.o \
-	    jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o
+	    jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o resize.o
 EXTRA_CFLAGS += -D_JFS_4K

--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -38,9 +38,7 @@ int jfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
 		return rc;
-	IWRITE_LOCK(inode);
 	rc |= jfs_commit_inode(inode, 1);
-	IWRITE_UNLOCK(inode);
 	return rc ? -EIO : 0;
 }
@@ -64,10 +62,19 @@ void jfs_truncate_nolock(struct inode *ip, loff_t length)
 	do {
 		tid = txBegin(ip->i_sb, 0);
+		/*
+		 * The commit_sem cannot be taken before txBegin.
+		 * txBegin may block and there is a chance the inode
+		 * could be marked dirty and need to be committed
+		 * before txBegin unblocks
+		 */
+		down(&JFS_IP(ip)->commit_sem);
 		newsize = xtTruncate(tid, ip, length,
 				     COMMIT_TRUNCATE | COMMIT_PWMAP);
 		if (newsize < 0) {
 			txEnd(tid);
+			up(&JFS_IP(ip)->commit_sem);
 			break;
 		}
@@ -76,6 +83,7 @@ void jfs_truncate_nolock(struct inode *ip, loff_t length)
 		txCommit(tid, 1, &ip, 0);
 		txEnd(tid);
+		up(&JFS_IP(ip)->commit_sem);
 	} while (newsize > length);	/* Truncate isn't always atomic */
 }

--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -107,8 +107,10 @@ int jfs_commit_inode(struct inode *inode, int wait)
 	}
 	tid = txBegin(inode->i_sb, COMMIT_INODE);
+	down(&JFS_IP(inode)->commit_sem);
 	rc = txCommit(tid, 1, &inode, wait ? COMMIT_SYNC : 0);
 	txEnd(tid);
+	up(&JFS_IP(inode)->commit_sem);
 	return -rc;
 }
@@ -123,25 +125,19 @@ void jfs_write_inode(struct inode *inode, int wait)
 	    !test_cflag(COMMIT_Dirty, inode))
 		return;
-	IWRITE_LOCK(inode);
 	if (jfs_commit_inode(inode, wait)) {
 		jERROR(1, ("jfs_write_inode: jfs_commit_inode failed!\n"));
 	}
-	IWRITE_UNLOCK(inode);
 }
 void jfs_delete_inode(struct inode *inode)
 {
 	jFYI(1, ("In jfs_delete_inode, inode = 0x%p\n", inode));
-	IWRITE_LOCK(inode);
 	if (test_cflag(COMMIT_Freewmap, inode))
 		freeZeroLink(inode);
 	diFree(inode);
-	IWRITE_UNLOCK(inode);
 	clear_inode(inode);
 }
@@ -203,8 +199,7 @@ static int jfs_get_block(struct inode *ip, sector_t lblock,
 	if ((no_size_check ||
 	     ((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size)) &&
-	    (xtLookup
+	    (xtLookup(ip, lblock64, 1, &xflag, &xaddr, &xlen, no_size_check)
-	     (ip, lblock64, 1, &xflag, &xaddr, &xlen, no_size_check)
 	     == 0) && xlen) {
 		if (xflag & XAD_NOTRECORDED) {
 			if (!create)
@@ -241,8 +236,7 @@ static int jfs_get_block(struct inode *ip, sector_t lblock,
 	 * Allocate a new block
 	 */
 #ifdef _JFS_4K
-	if ((rc =
+	if ((rc = extHint(ip, lblock64 << ip->i_sb->s_blocksize_bits, &xad)))
-	     extHint(ip, lblock64 << ip->i_sb->s_blocksize_bits, &xad)))
 		goto unlock;
 	rc = extAlloc(ip, 1, lblock64, &xad, FALSE);
 	if (rc)

--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -330,7 +330,7 @@ int dbSync(struct inode *ipbmap)
 	filemap_fdatawait(ipbmap->i_mapping);
 	ipbmap->i_state |= I_DIRTY;
-	diWriteSpecial(ipbmap);
+	diWriteSpecial(ipbmap, 0);
 	return (0);
 }
@@ -3175,7 +3175,7 @@ static int dbAllocDmapBU(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks)
 			dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb)
 						      >> wbitno);
-			word += 1;
+			word++;
 		} else {
 			/* one or more dmap words are fully contained
 			 * within the block range.  determine how many
@@ -3187,6 +3187,7 @@ static int dbAllocDmapBU(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks)
 			/* determine how many bits */
 			nb = nwords << L2DBWORD;
+			word += nwords;
 		}
 	}

--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -96,6 +96,9 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
 	/* This blocks if we are low on resources */
 	txBeginAnon(ip->i_sb);
+	/* Avoid race with jfs_commit_inode() */
+	down(&JFS_IP(ip)->commit_sem);
 	/* validate extent length */
 	if (xlen > MAXXLEN)
 		xlen = MAXXLEN;
@@ -138,8 +141,8 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
 	 * is smaller than the number of blocks per page.
 	 */
 	nxlen = xlen;
-	if ((rc =
+	if ((rc = extBalloc(ip, hint ? hint : INOHINT(ip), &nxlen, &nxaddr))) {
-	     extBalloc(ip, hint ? hint : INOHINT(ip), &nxlen, &nxaddr))) {
+		up(&JFS_IP(ip)->commit_sem);
 		return (rc);
 	}
@@ -160,6 +163,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
 	 */
 	if (rc) {
 		dbFree(ip, nxaddr, nxlen);
+		up(&JFS_IP(ip)->commit_sem);
 		return (rc);
 	}
@@ -174,6 +178,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
 	mark_inode_dirty(ip);
+	up(&JFS_IP(ip)->commit_sem);
 	/*
 	 * COMMIT_SyncList flags an anonymous tlock on page that is on
 	 * sync list.
@@ -217,6 +222,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
 	/* This blocks if we are low on resources */
 	txBeginAnon(ip->i_sb);
+	down(&JFS_IP(ip)->commit_sem);
 	/* validate extent length */
 	if (nxlen > MAXXLEN)
 		nxlen = MAXXLEN;
@@ -235,7 +241,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
 	if ((xp->flag & XAD_NOTRECORDED) && !abnr) {
 		xp->flag = 0;
 		if ((rc = xtUpdate(0, ip, xp)))
-			return (rc);
+			goto exit;
 	}
 	/* try to allocated the request number of blocks for the
@@ -247,7 +253,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
 	 * space as to satisfy the extend page.
 	 */
 	if ((rc = extBrealloc(ip, xaddr, xlen, &nxlen, &nxaddr)))
-		return (rc);
+		goto exit;
 	delta = nxlen - xlen;
@@ -284,7 +290,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
 		/* extend the extent */
 		if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) {
 			dbFree(ip, xaddr + xlen, delta);
-			return (rc);
+			goto exit;
 		}
 	} else {
 		/*
@@ -294,7 +300,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
 		 */
 		if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) {
 			dbFree(ip, nxaddr, nxlen);
-			return (rc);
+			goto exit;
 		}
 	}
@@ -325,8 +331,9 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
 	xp->flag = xflag;
 	mark_inode_dirty(ip);
+exit:
-	return (0);
+	up(&JFS_IP(ip)->commit_sem);
+	return (rc);
 }
@@ -423,19 +430,13 @@ int extRecord(struct inode *ip, xad_t * xp)
 	txBeginAnon(ip->i_sb);
-	/* update the extent */
+	down(&JFS_IP(ip)->commit_sem);
-	if ((rc = xtUpdate(0, ip, xp)))
-		return (rc);
-#ifdef _STILL_TO_PORT
-	/* no longer abnr */
-	cp->cm_abnr = FALSE;
-	/* mark the cbuf as modified */
+	/* update the extent */
-	cp->cm_modified = TRUE;
+	rc = xtUpdate(0, ip, xp);
-#endif				/*  _STILL_TO_PORT */
-	return (0);
+	up(&JFS_IP(ip)->commit_sem);
+	return (rc);
 }

--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -285,7 +285,7 @@ int diSync(struct inode *ipimap)
 	filemap_fdatawrite(ipimap->i_mapping);
 	filemap_fdatawait(ipimap->i_mapping);
-	diWriteSpecial(ipimap);
+	diWriteSpecial(ipimap, 0);
 	return (0);
 }
@@ -450,12 +450,13 @@ int diRead(struct inode *ip)
 * PARAMETERS:
 *      sb - filesystem superblock
 *	inum - aggregate inode number
+ *	secondary - 1 if secondary aggregate inode table
 *
 * RETURN VALUES:
 *      new inode	- success
 *      NULL		- i/o error.
 */
-struct inode *diReadSpecial(struct super_block *sb, ino_t inum)
+struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
 {
 	struct jfs_sb_info *sbi = JFS_SBI(sb);
 	uint address;
@@ -470,21 +471,16 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum)
 		return ip;
 	}
-	/*
+	if (secondary) {
-	 * If ip->i_number >= 32 (INOSPEREXT), then read from secondary
+		address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
-	 * aggregate inode table.
-	 */
-	if (inum >= INOSPEREXT) {
-		address =
-		    addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
-		inum -= INOSPEREXT;
-		ASSERT(inum < INOSPEREXT);
 		JFS_IP(ip)->ipimap = sbi->ipaimap2;
 	} else {
 		address = AITBL_OFF >> L2PSIZE;
 		JFS_IP(ip)->ipimap = sbi->ipaimap;
 	}
+	ASSERT(inum < INOSPEREXT);
 	ip->i_ino = inum;
 	address += inum >> 3;	/* 8 inodes per 4K page */
@@ -538,11 +534,12 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum)
 *
 * PARAMETERS:
 *      ip - special inode
+ *	secondary - 1 if secondary aggregate inode table
 *
 * RETURN VALUES: none
 */
-void diWriteSpecial(struct inode *ip)
+void diWriteSpecial(struct inode *ip, int secondary)
 {
 	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
 	uint address;
@@ -550,24 +547,14 @@ void diWriteSpecial(struct inode *ip)
 	ino_t inum = ip->i_ino;
 	metapage_t *mp;
-	/*
-	 * If ip->i_number >= 32 (INOSPEREXT), then write to secondary
-	 * aggregate inode table.
-	 */
-	if (!(ip->i_state & I_DIRTY))
-		return;
 	ip->i_state &= ~I_DIRTY;
-	if (inum >= INOSPEREXT) {
+	if (secondary)
-		address =
+		address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
-		    addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
+	else
-		inum -= INOSPEREXT;
-		ASSERT(inum < INOSPEREXT);
-	} else {
 		address = AITBL_OFF >> L2PSIZE;
-	}
+	ASSERT(inum < INOSPEREXT);
 	address += inum >> 3;	/* 8 inodes per 4K page */
@@ -2996,7 +2983,7 @@ duplicateIXtree(struct super_block *sb, s64 blkno, int xlen, s64 * xaddr)
 	/* if AIT2 ipmap2 is bad, do not try to update it */
 	if (JFS_SBI(sb)->mntflag & JFS_BAD_SAIT)	/* s_flag */
 		return;
-	ip = diReadSpecial(sb, FILESYSTEM_I + INOSPEREXT);
+	ip = diReadSpecial(sb, FILESYSTEM_I, 1);
 	if (ip == 0) {
 		JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT;
 		if ((rc = readSuper(sb, &mpsuper)))

--- a/fs/jfs/jfs_imap.h
+++ b/fs/jfs/jfs_imap.h
@@ -146,15 +146,12 @@ extern int diSync(struct inode *);
 /* external references */
 extern int diUpdatePMap(struct inode *ipimap, unsigned long inum,
 			boolean_t is_free, tblock_t * tblk);
-#ifdef _STILL_TO_PORT
+extern int diExtendFS(struct inode *ipimap, struct inode *ipbmap);
-extern int diExtendFS(inode_t * ipimap, inode_t * ipbmap);
-#endif				/* _STILL_TO_PORT */
 extern int diMount(struct inode *);
 extern int diUnmount(struct inode *, int);
 extern int diRead(struct inode *);
-extern struct inode *diReadSpecial(struct super_block *, ino_t);
+extern struct inode *diReadSpecial(struct super_block *, ino_t, int);
-extern void diWriteSpecial(struct inode *);
+extern void diWriteSpecial(struct inode *, int);
 extern void diFreeSpecial(struct inode *);
 extern int diWrite(tid_t tid, struct inode *);
 #endif				/* _H_JFS_IMAP */
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -19,6 +19,7 @@
 #ifndef _H_JFS_INCORE
 #define _H_JFS_INCORE
+#include <linux/rwsem.h>
 #include <linux/slab.h>
 #include <asm/bitops.h>
 #include "jfs_types.h"
@@ -30,14 +31,6 @@
 */
 #define JFS_SUPER_MAGIC 0x3153464a /* "JFS1" */
-/*
- * Due to header ordering problems this can't be in jfs_lock.h
- */
-typedef struct	jfs_rwlock {
-	struct rw_semaphore rw_sem;
-	atomic_t in_use;	/* for hacked implementation of trylock */
-} jfs_rwlock_t;
 /*
 * JFS-private inode information
 */
@@ -62,7 +55,19 @@ struct jfs_inode_info {
 	lid_t	atltail;	/* anonymous tlock list tail	*/
 	struct list_head anon_inode_list; /* inodes having anonymous txns */
 	struct list_head mp_list; /* metapages in inode's address space */
-	jfs_rwlock_t rdwrlock;	/* read/write lock	*/
+	/*
+	 * rdwrlock serializes xtree between reads & writes and synchronizes
+	 * changes to special inodes.  It's use would be redundant on
+	 * directories since the i_sem taken in the VFS is sufficient.
+	 */
+	struct rw_semaphore rdwrlock;
+	/*
+	 * commit_sem serializes transaction processing on an inode.
+	 * It must be taken after beginning a transaction (txBegin), since
+	 * dirty inodes may be committed while a new transaction on the
+	 * inode is blocked in txBegin or TxBeginAnon
+	 */
+	struct semaphore commit_sem;
 	lid_t	xtlid;		/* lid of xtree lock on directory */
 	union {
 		struct {
@@ -87,6 +92,12 @@ struct jfs_inode_info {
 #define i_dtroot u.dir._dtroot
 #define i_inline u.link._inline
+#define IREAD_LOCK(ip)		down_read(&JFS_IP(ip)->rdwrlock)
+#define IREAD_UNLOCK(ip)	up_read(&JFS_IP(ip)->rdwrlock)
+#define IWRITE_LOCK(ip)		down_write(&JFS_IP(ip)->rdwrlock)
+#define IWRITE_UNLOCK(ip)	up_write(&JFS_IP(ip)->rdwrlock)
 /*
 * cflag
 */
@@ -125,6 +136,7 @@ struct jfs_sb_info {
 	u32		logdev;		/* 2: external log device	*/
 	uint		aggregate;	/* volume identifier in log record */
 	pxd_t		logpxd;		/* 8: pxd describing log	*/
+	pxd_t		fsckpxd;	/* 8: pxd describing fsck wkspc */
 	pxd_t		ait2;		/* 8: pxd describing AIT copy	*/
 	char		uuid[16];	/* 16: 128-bit uuid for volume	*/
 	char		loguuid[16];	/* 16: 128-bit uuid for log	*/

--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -91,40 +91,3 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 	return inode;
 }
-/*
- * NAME:	iwritelocklist()
- *
- * FUNCTION:	Lock multiple inodes in sorted order to avoid deadlock
- *
- */
-void iwritelocklist(int n, ...)
-{
-	va_list ilist;
-	struct inode *sort[4];
-	struct inode *ip;
-	int k, m;
-	va_start(ilist, n);
-	for (k = 0; k < n; k++)
-		sort[k] = va_arg(ilist, struct inode *);
-	va_end(ilist);
-	/* Bubble sort in descending order */
-	do {
-		m = 0;
-		for (k = 0; k < n; k++)
-			if ((k + 1) < n
-			    && sort[k + 1]->i_ino > sort[k]->i_ino) {
-				ip = sort[k];
-				sort[k] = sort[k + 1];
-				sort[k + 1] = ip;
-				m++;
-			}
-	} while (m);
-	/* Lock them */
-	for (k = 0; k < n; k++) {
-		IWRITE_LOCK(sort[k]);
-	}
-}
--- a/fs/jfs/jfs_lock.h
+++ b/fs/jfs/jfs_lock.h
@@ -24,63 +24,7 @@
 /*
 *	jfs_lock.h
- *
- * JFS lock definition for globally referenced locks
- */
-/* readers/writer lock: thread-thread */
-/*
- * RW semaphores do not currently have a trylock function.  Since the
- * implementation varies by platform, I have implemented a platform-independent
- * wrapper around the rw_semaphore routines.  If this turns out to be the best
- * way of avoiding our locking problems, I will push to get a trylock
- * implemented in the kernel, but I'd rather find a way to avoid having to
- * use it.
 */
-#define RDWRLOCK_T jfs_rwlock_t
-static inline void RDWRLOCK_INIT(jfs_rwlock_t * Lock)
-{
-	init_rwsem(&Lock->rw_sem);
-	atomic_set(&Lock->in_use, 0);
-}
-static inline void READ_LOCK(jfs_rwlock_t * Lock)
-{
-	atomic_inc(&Lock->in_use);
-	down_read(&Lock->rw_sem);
-}
-static inline void READ_UNLOCK(jfs_rwlock_t * Lock)
-{
-	up_read(&Lock->rw_sem);
-	atomic_dec(&Lock->in_use);
-}
-static inline void WRITE_LOCK(jfs_rwlock_t * Lock)
-{
-	atomic_inc(&Lock->in_use);
-	down_write(&Lock->rw_sem);
-}
-static inline int WRITE_TRYLOCK(jfs_rwlock_t * Lock)
-{
-	if (atomic_read(&Lock->in_use))
-		return 0;
-	WRITE_LOCK(Lock);
-	return 1;
-}
-static inline void WRITE_UNLOCK(jfs_rwlock_t * Lock)
-{
-	up_write(&Lock->rw_sem);
-	atomic_dec(&Lock->in_use);
-}
-#define IREAD_LOCK(ip)		READ_LOCK(&JFS_IP(ip)->rdwrlock)
-#define IREAD_UNLOCK(ip)	READ_UNLOCK(&JFS_IP(ip)->rdwrlock)
-#define IWRITE_LOCK(ip)		WRITE_LOCK(&JFS_IP(ip)->rdwrlock)
-#define IWRITE_TRYLOCK(ip)	WRITE_TRYLOCK(&JFS_IP(ip)->rdwrlock)
-#define IWRITE_UNLOCK(ip)	WRITE_UNLOCK(&JFS_IP(ip)->rdwrlock)
-#define IWRITE_LOCK_LIST	iwritelocklist
-extern void iwritelocklist(int, ...);
 /*
 * Conditional sleep where condition is protected by spinlock

--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -172,8 +172,6 @@ static int lmWriteRecord(log_t * log, tblock_t * tblk, lrd_t * lrd,
 static int lmNextPage(log_t * log);
 static int lmLogFileSystem(log_t * log, char *uuid, int activate);
-static int lmLogInit(log_t * log);
-static int lmLogShutdown(log_t * log);
 static int lbmLogInit(log_t * log);
 static void lbmLogShutdown(log_t * log);
@@ -1037,7 +1035,7 @@ int lmLogSync(log_t * log, int nosyncwait)
 	 * by setting syncbarrier flag.
 	 */
 	if (written > LOGSYNC_BARRIER(logsize) && logsize > 32 * LOGPSIZE) {
-		log->syncbarrier = 1;
+		set_bit(log_SYNCBARRIER, &log->flag);
 		jFYI(1, ("log barrier on: lsn=0x%x syncpt=0x%x\n", lsn,
 			 log->syncpt));
 	}
@@ -1068,6 +1066,7 @@ int lmLogOpen(struct super_block *sb, log_t ** logptr)
 	if (!(log = kmalloc(sizeof(log_t), GFP_KERNEL)))
 		return ENOMEM;
 	memset(log, 0, sizeof(log_t));
+	init_waitqueue_head(&log->syncwait);
 	log->sb = sb;		/* This should be a list */
@@ -1080,7 +1079,7 @@ int lmLogOpen(struct super_block *sb, log_t ** logptr)
 	 * file system to log have 1-to-1 relationship;
 	 */
-	log->flag = JFS_INLINELOG;
+	set_bit(log_INLINELOG, &log->flag);
 	log->bdev = sb->s_bdev;
 	log->base = addressPXD(&JFS_SBI(sb)->logpxd);
 	log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
@@ -1175,7 +1174,7 @@ int lmLogOpen(struct super_block *sb, log_t ** logptr)
 *			
 * serialization: single first open thread
 */
-static int lmLogInit(log_t * log)
+int lmLogInit(log_t * log)
 {
 	int rc = 0;
 	lrd_t lrd;
@@ -1203,7 +1202,7 @@ static int lmLogInit(log_t * log)
 	 */
-	if (!(log->flag & JFS_INLINELOG))
+	if (!test_bit(log_INLINELOG, &log->flag))
 		log->l2bsize = 12;	/* XXX kludge alert XXX */
 	if ((rc = lbmRead(log, 1, &bpsuper)))
 		goto errout10;
@@ -1224,7 +1223,7 @@ static int lmLogInit(log_t * log)
 	}
 	/* initialize log inode from log superblock */
-	if (log->flag & JFS_INLINELOG) {
+	if (test_bit(log_INLINELOG,&log->flag)) {
 		if (log->size != le32_to_cpu(logsuper->size)) {
 			rc = EINVAL;
 			goto errout20;
@@ -1244,10 +1243,6 @@ static int lmLogInit(log_t * log)
 		      log, (unsigned long long) log->base, log->size));
 	}
-	log->flag |= JFS_GROUPCOMMIT;
-/*
-	log->flag |= JFS_LAZYCOMMIT;
-*/
 	log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
 	log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
@@ -1309,7 +1304,6 @@ static int lmLogInit(log_t * log)
 	log->syncpt = lsn;
 	log->sync = log->syncpt;
 	log->nextsync = LOGSYNC_DELTA(log->logsize);
-	init_waitqueue_head(&log->syncwait);
 	jFYI(1, ("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x\n",
 		 log->lsn, log->syncpt, log->sync));
@@ -1377,7 +1371,7 @@ int lmLogClose(struct super_block *sb, log_t * log)
 	jFYI(1, ("lmLogClose: log:0x%p\n", log));
-	if (!(log->flag & JFS_INLINELOG))
+	if (!test_bit(log_INLINELOG, &log->flag))
 		goto externalLog;
 	/*
@@ -1445,7 +1439,7 @@ void lmLogWait(log_t *log)
 *			
 * serialization: single last close thread
 */
-static int lmLogShutdown(log_t * log)
+int lmLogShutdown(log_t * log)
 {
 	int rc;
 	lrd_t lrd;
@@ -1524,8 +1518,6 @@ static int lmLogShutdown(log_t * log)
 *
 * RETURN:	0	- success
 *		errors returned by vms_iowait().
- *			
- * serialization: IWRITE_LOCK(log inode) held on entry/exit
 */
 static int lmLogFileSystem(log_t * log, char *uuid, int activate)
 {
@@ -1578,37 +1570,6 @@ static int lmLogFileSystem(log_t * log, char *uuid, int activate)
 	return rc;
 }
-/*
- *	lmLogQuiesce()
- */
-int lmLogQuiesce(log_t * log)
-{
-	int rc;
-	rc = lmLogShutdown(log);
-	return rc;
-}
-/*
- *	lmLogResume()
- */
-int lmLogResume(log_t * log, struct super_block *sb)
-{
-	struct jfs_sb_info *sbi = JFS_SBI(sb);
-	int rc;
-	log->base = addressPXD(&sbi->logpxd);
-	log->size =
-	    (lengthPXD(&sbi->logpxd) << sb->s_blocksize_bits) >> L2LOGPSIZE;
-	rc = lmLogInit(log);
-	return rc;
-}
 /*
 *		log buffer manager (lbm)
 *		------------------------
@@ -2192,42 +2153,40 @@ int jfsIOWait(void *arg)
 	return 0;
 }
-#ifdef _STILL_TO_PORT
 /*
 * NAME:	lmLogFormat()/jfs_logform()
 *
- * FUNCTION:	format file system log (ref. jfs_logform()).
+ * FUNCTION:	format file system log
 *
 * PARAMETERS:
- *	log	- log inode (with common mount inode base);
+ *      log	- volume log
- *	logAddress - start address of log space in FS block;
+ *	logAddress - start address of log space in FS block
 *	logSize	- length of log space in FS block;
 *
 * RETURN:	0	- success
- *		-1 -	i/o error
+ *		-EIO	- i/o error
+ *
+ * XXX: We're synchronously writing one page at a time.  This needs to
+ *	be improved by writing multiple pages at once.
 */
-int lmLogFormat(inode_t * ipmnt, s64 logAddress, int logSize)
+int lmLogFormat(log_t *log, s64 logAddress, int logSize)
 {
-	int rc = 0;
+	int rc = -EIO;
-	cbuf_t *bp;
+	struct jfs_sb_info *sbi = JFS_SBI(log->sb);
 	logsuper_t *logsuper;
 	logpage_t *lp;
 	int lspn;		/* log sequence page number */
 	struct lrd *lrd_ptr;
-	int npbperpage, npages;
+	int npages = 0;
+	lbuf_t *bp;
 	jFYI(0, ("lmLogFormat: logAddress:%Ld logSize:%d\n",
-		 logAddress, logSize));
+		 (long long)logAddress, logSize));
-	/* allocate a JFS buffer */
-	bp = rawAllocate();
-	/* map the logical block address to physical block address */
+	/* allocate a log buffer */
-	bp->cm_blkno = logAddress << ipmnt->i_l2bfactor;
+	bp = lbmAllocate(log, 1);
-	npbperpage = LOGPSIZE >> ipmnt->i_l2pbsize;
+	npages = logSize >> sbi->l2nbperpage;
-	npages = logSize / (LOGPSIZE >> ipmnt->i_l2bsize);
 	/*
 	 *      log space:
@@ -2241,20 +2200,22 @@ int lmLogFormat(inode_t * ipmnt, s64 logAddress, int logSize)
 	/*
 	 *      init log superblock: log page 1
 	 */
-	logsuper = (logsuper_t *) bp->cm_cdata;
+	logsuper = (logsuper_t *) bp->l_ldata;
 	logsuper->magic = cpu_to_le32(LOGMAGIC);
 	logsuper->version = cpu_to_le32(LOGVERSION);
 	logsuper->state = cpu_to_le32(LOGREDONE);
-	logsuper->flag = cpu_to_le32(ipmnt->i_mntflag);	/* ? */
+	logsuper->flag = cpu_to_le32(sbi->mntflag);	/* ? */
 	logsuper->size = cpu_to_le32(npages);
-	logsuper->bsize = cpu_to_le32(ipmnt->i_bsize);
+	logsuper->bsize = cpu_to_le32(sbi->bsize);
-	logsuper->l2bsize = cpu_to_le32(ipmnt->i_l2bsize);
+	logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
-	logsuper->end =
+	logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
-	    cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
-	bp->cm_blkno += npbperpage;
+	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
-	rawWrite(ipmnt, bp, 0);
+	bp->l_blkno = logAddress + sbi->nbperpage;
+	lbmStartIO(bp);
+	if ((rc = lbmIOWait(bp, 0)))
+		goto exit;
 	/*
 	 *      init pages 2 to npages-1 as log data pages:
@@ -2270,7 +2231,6 @@ int lmLogFormat(inode_t * ipmnt, s64 logAddress, int logSize)
 	 * a circular file for the log records;
 	 * lpsn grows by 1 monotonically as each log page is written
 	 * to the circular file of the log;
-	 * Since the AIX DUMMY log record is dropped for this XJFS,
 	 * and setLogpage() will not reset the page number even if
 	 * the eor is equal to LOGPHDRSIZE. In order for binary search
 	 * still work in find log end process, we have to simulate the
@@ -2279,8 +2239,7 @@ int lmLogFormat(inode_t * ipmnt, s64 logAddress, int logSize)
 	 * the succeeding log pages will have ascending order of
 	 * the lspn starting from 0, ... (N-2)
 	 */
-	lp = (logpage_t *) bp->cm_cdata;
+	lp = (logpage_t *) bp->l_ldata;
 	/*
 	 * initialize 1st log page to be written: lpsn = N - 1,
 	 * write a SYNCPT log record is written to this page
@@ -2295,8 +2254,11 @@ int lmLogFormat(inode_t * ipmnt, s64 logAddress, int logSize)
 	lrd_ptr->length = 0;
 	lrd_ptr->log.syncpt.sync = 0;
-	bp->cm_blkno += npbperpage;
+	bp->l_blkno += sbi->nbperpage;
-	rawWrite(ipmnt, bp, 0);
+	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
+	lbmStartIO(bp);
+	if ((rc = lbmIOWait(bp, 0)))
+		goto exit;
 	/*
 	 *      initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
@@ -2305,20 +2267,23 @@ int lmLogFormat(inode_t * ipmnt, s64 logAddress, int logSize)
 		lp->h.page = lp->t.page = cpu_to_le32(lspn);
 		lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
-		bp->cm_blkno += npbperpage;
+		bp->l_blkno += sbi->nbperpage;
-		rawWrite(ipmnt, bp, 0);
+		bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
+		lbmStartIO(bp);
+		if ((rc = lbmIOWait(bp, 0)))
+			goto exit;
 	}
+	rc = 0;
+exit:
 	/*
 	 *      finalize log
 	 */
 	/* release the buffer */
-	rawRelease(bp);
+	lbmFree(bp);
 	return rc;
 }
-#endif				/* _STILL_TO_PORT */
 #ifdef CONFIG_JFS_STATISTICS
 int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length,

--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -379,8 +379,7 @@ typedef struct jfs_log {
 	int size;		/* 4: log size in log page (in page) */
 	int l2bsize;		/* 4: log2 of bsize */
-	uint flag;		/* 4: flag */
+	long flag;		/* 4: flag */
-	uint state;		/* 4: state */
 	struct lbuf *lbuf_free;	/* 4: free lbufs */
 	wait_queue_head_t free_wait;	/* 4: */
@@ -396,7 +395,6 @@ typedef struct jfs_log {
 	/* syncpt */
 	int nextsync;		/* 4: bytes to write before next syncpt */
 	int active;		/* 4: */
-	int syncbarrier;	/* 4: */
 	wait_queue_head_t syncwait;	/* 4: */
 	/* commit */
@@ -420,6 +418,13 @@ typedef struct jfs_log {
 	char uuid[16];		/* 16: 128-bit uuid of log device */
 } log_t;
+/*
+ * Log flag
+ */
+#define log_INLINELOG	1
+#define log_SYNCBARRIER	2
+#define log_QUIESCE	3
 /*
 * group commit flag
 */
@@ -499,8 +504,8 @@ extern int lmLogOpen(struct super_block *sb, log_t ** log);
 extern void lmLogWait(log_t * log);
 extern int lmLogClose(struct super_block *sb, log_t * log);
 extern int lmLogSync(log_t * log, int nosyncwait);
-extern int lmLogQuiesce(log_t * log);
+extern int lmLogShutdown(log_t * log);
-extern int lmLogResume(log_t * log, struct super_block *sb);
+extern int lmLogInit(log_t * log);
-extern int lmLogFormat(struct super_block *sb, s64 logAddress, int logSize);
+extern int lmLogFormat(log_t *log, s64 logAddress, int logSize);
 #endif				/* _H_JFS_LOGMGR */
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -20,6 +20,7 @@
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/buffer_head.h>
+#include <linux/mempool.h>
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
 #include "jfs_metapage.h"
@@ -27,11 +28,6 @@
 #include "jfs_debug.h"
 extern struct task_struct *jfsCommitTask;
-static unsigned int metapages = 1024;	/* ??? Need a better number */
-static unsigned int free_metapages;
-static metapage_t *metapage_buf;
-static unsigned long meta_order;
-static metapage_t *meta_free_list = NULL;
 static spinlock_t meta_lock = SPIN_LOCK_UNLOCKED;
 static wait_queue_head_t meta_wait;
@@ -93,12 +89,51 @@ static inline void lock_metapage(struct metapage *mp)
 		__lock_metapage(mp);
 }
-int __init metapage_init(void)
+#define METAPOOL_MIN_PAGES 32
+static kmem_cache_t *metapage_cache;
+static mempool_t *metapage_mempool;
+static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
 {
-	int i;
+	metapage_t *mp = (metapage_t *)foo;
-	metapage_t *last = NULL;
-	metapage_t *mp;
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+	    SLAB_CTOR_CONSTRUCTOR) {
+		mp->lid = 0;
+		mp->lsn = 0;
+		mp->flag = 0;
+		mp->data = NULL;
+		mp->clsn = 0;
+		mp->log = NULL;
+		set_bit(META_free, &mp->flag);
+		init_waitqueue_head(&mp->wait);
+	}
+}
+static inline metapage_t *alloc_metapage(int no_wait)
+{
+	return mempool_alloc(metapage_mempool, no_wait ? GFP_ATOMIC : GFP_NOFS);
+}
+static inline void free_metapage(metapage_t *mp)
+{
+	mp->flag = 0;
+	set_bit(META_free, &mp->flag);
+	mempool_free(mp, metapage_mempool);
+}
+static void *mp_mempool_alloc(int gfp_mask, void *pool_data)
+{
+	return kmem_cache_alloc(metapage_cache, gfp_mask);
+}
+static void mp_mempool_free(void *element, void *pool_data)
+{
+	return kmem_cache_free(metapage_cache, element);
+}
+int __init metapage_init(void)
+{
 	/*
 	 * Initialize wait queue
 	 */
@@ -107,30 +142,18 @@ int __init metapage_init(void)
 	/*
 	 * Allocate the metapage structures
 	 */
-	for (meta_order = 0;
+	metapage_cache = kmem_cache_create("jfs_mp", sizeof(metapage_t), 0, 0,
-	     ((PAGE_SIZE << meta_order) / sizeof(metapage_t)) < metapages;
+					   init_once, NULL);
-	     meta_order++);
+	if (metapage_cache == NULL)
-	metapages = (PAGE_SIZE << meta_order) / sizeof(metapage_t);
+		return -ENOMEM;
-	jFYI(1, ("metapage_init: metapage size = %Zd, metapages = %d\n",
-		 sizeof(metapage_t), metapages));
-	metapage_buf =
+	metapage_mempool = mempool_create(METAPOOL_MIN_PAGES, mp_mempool_alloc,
-	    (metapage_t *) __get_free_pages(GFP_KERNEL, meta_order);
+					  mp_mempool_free, NULL);
-	assert(metapage_buf);
-	memset(metapage_buf, 0, PAGE_SIZE << meta_order);
-	mp = metapage_buf;
+	if (metapage_mempool == NULL) {
-	for (i = 0; i < metapages; i++, mp++) {
+		kmem_cache_destroy(metapage_cache);
-		mp->flag = 0;
+		return -ENOMEM;
-		set_bit(META_free, &mp->flag);
-		init_waitqueue_head(&mp->wait);
-		mp->hash_next = last;
-		last = mp;
 	}
-	meta_free_list = last;
-	free_metapages = metapages;
 	/*
 	 * Now the hash list
 	 */
@@ -147,64 +170,8 @@ int __init metapage_init(void)
 void metapage_exit(void)
 {
-	free_pages((unsigned long) metapage_buf, meta_order);
+	mempool_destroy(metapage_mempool);
-	free_pages((unsigned long) hash_table, hash_order);
+	kmem_cache_destroy(metapage_cache);
-	metapage_buf = 0;	/* This is a signal to the jfsIOwait thread */
-}
-/*
- * Get metapage structure from freelist
- * 
- * Caller holds meta_lock
- */
-static metapage_t *alloc_metapage(int *dropped_lock)
-{
-	metapage_t *new;
-	*dropped_lock = FALSE;
-	/*
-	 * Reserve two metapages for the lazy commit thread.  Otherwise
-	 * we may deadlock with holders of metapages waiting for tlocks
-	 * that lazy thread should be freeing.
-	 */
-	if ((free_metapages < 3) && (current != jfsCommitTask)) {
-		INCREMENT(mpStat.allocwait);
-		*dropped_lock = TRUE;
-		__SLEEP_COND(meta_wait, (free_metapages > 2),
-			     spin_lock(&meta_lock), spin_unlock(&meta_lock));
-	}
-	assert(meta_free_list);
-	new = meta_free_list;
-	meta_free_list = new->hash_next;
-	free_metapages--;
-	return new;
-}
-/*
- * Put metapage on freelist (holding meta_lock)
- */
-static inline void __free_metapage(metapage_t * mp)
-{
-	mp->flag = 0;
-	set_bit(META_free, &mp->flag);
-	mp->hash_next = meta_free_list;
-	meta_free_list = mp;
-	free_metapages++;
-	wake_up(&meta_wait);
-}
-/*
- * Put metapage on freelist (not holding meta_lock)
- */
-static inline void free_metapage(metapage_t * mp)
-{
-	spin_lock(&meta_lock);
-	__free_metapage(mp);
-	spin_unlock(&meta_lock);
 }
 /*
@@ -295,19 +262,18 @@ static int direct_bmap(struct address_space *mapping, long block)
 }
 struct address_space_operations direct_aops = {
-	readpage:	direct_readpage,
+	.readpage	= direct_readpage,
-	writepage:	direct_writepage,
+	.writepage	= direct_writepage,
-	sync_page:	block_sync_page,
+	.sync_page	= block_sync_page,
-	prepare_write:	direct_prepare_write,
+	.prepare_write	= direct_prepare_write,
-	commit_write:	generic_commit_write,
+	.commit_write	= generic_commit_write,
-	bmap:		direct_bmap,
+	.bmap		= direct_bmap,
 };
 metapage_t *__get_metapage(struct inode *inode,
 			   unsigned long lblock, unsigned int size,
 			   int absolute, unsigned long new)
 {
-	int dropped_lock;
 	metapage_t **hash_ptr;
 	int l2BlocksPerPage;
 	int l2bsize;
@@ -354,16 +320,42 @@ metapage_t *__get_metapage(struct inode *inode,
 			return NULL;
 		}
-		mp = alloc_metapage(&dropped_lock);
+		/*
-		if (dropped_lock) {
+		 * Locks held on aggregate inode pages are usually
-			/* alloc_metapage blocked, we need to search the hash
+		 * not held long, and they are taken in critical code
-			 * again.  (The goto is ugly, maybe we'll clean this
+		 * paths (committing dirty inodes, txCommit thread) 
-			 * up in the future.)
+		 * 
+		 * Attempt to get metapage without blocking, tapping into
+		 * reserves if necessary.
+		 */
+		mp = NULL;
+		if (JFS_IP(inode)->fileset == AGGREGATE_I) {
+			mp =  mempool_alloc(metapage_mempool, GFP_ATOMIC);
+			if (!mp) {
+				/*
+				 * mempool is supposed to protect us from
+				 * failing here.  We will try a blocking
+				 * call, but a deadlock is possible here
 				 */
+				printk(KERN_WARNING
+				       "__get_metapage: atomic call to mempool_alloc failed.\n");
+				printk(KERN_WARNING
+				       "Will attempt blocking call\n");
+			}
+		}
+		if (!mp) {
 			metapage_t *mp2;
+			spin_unlock(&meta_lock);
+			mp =  mempool_alloc(metapage_mempool, GFP_NOFS);
+			spin_lock(&meta_lock);
+			/* we dropped the meta_lock, we need to search the
+			 * hash again.
+			 */
 			mp2 = search_hash(hash_ptr, mapping, lblock);
 			if (mp2) {
-				__free_metapage(mp);
+				free_metapage(mp);
 				mp = mp2;
 				goto page_found;
 			}
@@ -416,7 +408,7 @@ metapage_t *__get_metapage(struct inode *inode,
 	remove_from_hash(mp, hash_ptr);
 	if (!absolute)
 		list_del(&mp->inode_list);
-	__free_metapage(mp);
+	free_metapage(mp);
 	spin_unlock(&meta_lock);
 	return NULL;
 }
@@ -631,12 +623,10 @@ int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length,
 	len += sprintf(buffer,
 		       "JFS Metapage statistics\n"
 		       "=======================\n"
-		       "metapages in use = %d\n"
 		       "page allocations = %d\n"
 		       "page frees = %d\n"
 		       "lock waits = %d\n"
 		       "allocation waits = %d\n",
-		       metapages - free_metapages,
 		       mpStat.pagealloc,
 		       mpStat.pagefree,
 		       mpStat.lockwait,

--- a/fs/jfs/jfs_mount.c
+++ b/fs/jfs/jfs_mount.c
@@ -95,7 +95,7 @@ int jfs_mount(struct super_block *sb)
 		goto errout20;
 	}
-	ipaimap = diReadSpecial(sb, AGGREGATE_I);
+	ipaimap = diReadSpecial(sb, AGGREGATE_I, 0);
 	if (ipaimap == NULL) {
 		jERROR(1, ("jfs_mount: Faild to read AGGREGATE_I\n"));
 		rc = EIO;
@@ -118,7 +118,7 @@ int jfs_mount(struct super_block *sb)
 	/*
 	 * open aggregate block allocation map
 	 */
-	ipbmap = diReadSpecial(sb, BMAP_I);
+	ipbmap = diReadSpecial(sb, BMAP_I, 0);
 	if (ipbmap == NULL) {
 		rc = EIO;
 		goto errout22;
@@ -148,7 +148,7 @@ int jfs_mount(struct super_block *sb)
 	 * table.
 	 */
 	if ((sbi->mntflag & JFS_BAD_SAIT) == 0) {
-		ipaimap2 = diReadSpecial(sb, AGGREGATE_I + INOSPEREXT);
+		ipaimap2 = diReadSpecial(sb, AGGREGATE_I, 1);
 		if (ipaimap2 == 0) {
 			jERROR(1,
 			       ("jfs_mount: Faild to read AGGREGATE_I\n"));
@@ -178,7 +178,7 @@ int jfs_mount(struct super_block *sb)
 	/*
 	 * open fileset inode allocation map (aka fileset inode)
 	 */
-	ipimap = diReadSpecial(sb, FILESYSTEM_I);
+	ipimap = diReadSpecial(sb, FILESYSTEM_I, 0);
 	if (ipimap == NULL) {
 		jERROR(1, ("jfs_mount: Failed to read FILESYSTEM_I\n"));
 		/* open fileset secondary inode allocation map */
@@ -410,6 +410,7 @@ static int chkSuper(struct super_block *sb)
 		memcpy(sbi->uuid, j_sb->s_uuid, sizeof(sbi->uuid));
 		memcpy(sbi->loguuid, j_sb->s_loguuid, sizeof(sbi->uuid));
 	}
+	sbi->fsckpxd = j_sb->s_fsckpxd;
 	sbi->ait2 = j_sb->s_ait2;
      out:

--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -318,11 +318,12 @@ tid_t txBegin(struct super_block *sb, int flag)
 	TXN_LOCK();
      retry:
-	if (flag != COMMIT_FORCE) {
+	if (!(flag & COMMIT_FORCE)) {
 		/*
 		 * synchronize with logsync barrier
 		 */
-		if (log->syncbarrier) {
+		if (test_bit(log_SYNCBARRIER, &log->flag) ||
+		    test_bit(log_QUIESCE, &log->flag)) {
 			TXN_SLEEP(&log->syncwait);
 			goto retry;
 		}
@@ -330,8 +331,8 @@ tid_t txBegin(struct super_block *sb, int flag)
 	if (flag == 0) {
 		/*
 		 * Don't begin transaction if we're getting starved for tlocks
-		 * unless COMMIT_FORCE (imap changes) or COMMIT_INODE (which
+		 * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately
-		 * may ultimately free tlocks)
+		 * free tlocks)
 		 */
 		if (TlocksLow) {
 			TXN_SLEEP(&TxAnchor.lowlockwait);
@@ -411,7 +412,8 @@ void txBeginAnon(struct super_block *sb)
 	/*
 	 * synchronize with logsync barrier
 	 */
-	if (log->syncbarrier) {
+	if (test_bit(log_SYNCBARRIER, &log->flag) ||
+	    test_bit(log_QUIESCE, &log->flag)) {
 		TXN_SLEEP(&log->syncwait);
 		goto retry;
 	}
@@ -490,14 +492,14 @@ void txEnd(tid_t tid)
 	/*
 	 * synchronize with logsync barrier
 	 */
-	if (log->syncbarrier && log->active == 0) {
+	if (test_bit(log_SYNCBARRIER, &log->flag) && log->active == 0) {
 		/* forward log syncpt */
 		/* lmSync(log); */
 		jFYI(1, ("     log barrier off: 0x%x\n", log->lsn));
 		/* enable new transactions start */
-		log->syncbarrier = 0;
+		clear_bit(log_SYNCBARRIER, &log->flag);
 		/* wakeup all waitors for logsync barrier */
 		TXN_WAKEUP(&log->syncwait);
@@ -823,36 +825,21 @@ static void txRelease(tblock_t * tblk)
 *
 * FUNCTION:    Initiates pageout of pages modified by tid in journalled
 *              objects and frees their lockwords.
- *
- * PARAMETER:
- *              flag    -
- *
- * RETURN:      Errors from subroutines.
 */
-static void txUnlock(tblock_t * tblk, int flag)
+static void txUnlock(tblock_t * tblk)
 {
 	tlock_t *tlck;
 	linelock_t *linelock;
 	lid_t lid, next, llid, k;
 	metapage_t *mp;
 	log_t *log;
-	int force;
 	int difft, diffp;
 	jFYI(1, ("txUnlock: tblk = 0x%p\n", tblk));
 	log = (log_t *) JFS_SBI(tblk->sb)->log;
-	force = flag & COMMIT_FLUSH;
-	if (log->syncbarrier)
-		force |= COMMIT_FORCE;
 	/*
 	 * mark page under tlock homeok (its log has been written):
-	 * if caller has specified FORCE (e.g., iRecycle()), or
-	 * if syncwait for the log is set (i.e., the log sync point
-	 * has fallen behind), or
-	 * if syncpt is set for the page, or
-	 * if the page is new, initiate pageout;
-	 * otherwise, leave the page in memory.
 	 */
 	for (lid = tblk->next; lid; lid = next) {
 		tlck = lid_to_tlock(lid);
@@ -1268,7 +1255,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
 	txRelease(tblk);
 	if ((tblk->flag & tblkGC_LAZY) == 0)
-		txUnlock(tblk, flag);
+		txUnlock(tblk);
 	/*
@@ -2753,7 +2740,7 @@ void txLazyCommit(tblock_t * tblk)
 	spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
 	if (tblk->flag & tblkGC_LAZY) {
-		txUnlock(tblk, 0);
+		txUnlock(tblk);
 		tblk->flag &= ~tblkGC_LAZY;
 		txEnd(tblk - TxBlock);	/* Convert back to tid */
 	}
@@ -2887,6 +2874,77 @@ static void LogSyncRelease(metapage_t * mp)
 	release_metapage(mp);
 }
+/*
+ *	txQuiesce
+ *
+ *	Block all new transactions and push anonymous transactions to
+ *	completion
+ *
+ *	This does almost the same thing as jfs_sync below.  We don't
+ *	worry about deadlocking when TlocksLow is set, since we would
+ *	expect jfs_sync to get us out of that jam.
+ */
+void txQuiesce(struct super_block *sb)
+{
+	struct inode *ip;
+	struct jfs_inode_info *jfs_ip;
+	log_t *log = JFS_SBI(sb)->log;
+	int rc;
+	tid_t tid;
+	set_bit(log_QUIESCE, &log->flag);
+	TXN_LOCK();
+restart:
+	while (!list_empty(&TxAnchor.anon_list)) {
+		jfs_ip = list_entry(TxAnchor.anon_list.next,
+				    struct jfs_inode_info,
+				    anon_inode_list);
+		ip = &jfs_ip->vfs_inode;
+		/*
+		 * inode will be removed from anonymous list
+		 * when it is committed
+		 */
+		TXN_UNLOCK();
+		tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE);
+		down(&jfs_ip->commit_sem);
+		rc = txCommit(tid, 1, &ip, 0);
+		txEnd(tid);
+		up(&jfs_ip->commit_sem);
+		/*
+		 * Just to be safe.  I don't know how
+		 * long we can run without blocking
+		 */
+		cond_resched();
+		TXN_LOCK();
+	}
+	/*
+	 * If jfs_sync is running in parallel, there could be some inodes
+	 * on anon_list2.  Let's check.
+	 */
+	if (!list_empty(&TxAnchor.anon_list2)) {
+		list_splice(&TxAnchor.anon_list2, &TxAnchor.anon_list);
+		INIT_LIST_HEAD(&TxAnchor.anon_list2);
+		goto restart;
+	}
+	TXN_UNLOCK();
+}
+/*
+ * txResume()
+ *
+ * Allows transactions to start again following txQuiesce
+ */
+void txResume(struct super_block *sb)
+{
+	log_t *log = JFS_SBI(sb)->log;
+	clear_bit(log_QUIESCE, &log->flag);
+	TXN_WAKEUP(&log->syncwait);
+}
 /*
 *      jfs_sync(void)
 *
@@ -2898,6 +2956,8 @@ int jfs_sync(void)
 {
 	struct inode *ip;
 	struct jfs_inode_info *jfs_ip;
+	int rc;
+	tid_t tid;
 	lock_kernel();
@@ -2927,17 +2987,20 @@ int jfs_sync(void)
 			ip = &jfs_ip->vfs_inode;
 			/*
-			 * We must release the TXN_LOCK since our
+			 * down_trylock returns 0 on success.  This is
-			 * IWRITE_TRYLOCK implementation may still block
+			 * inconsistent with spin_trylock.
 			 */
-			TXN_UNLOCK();
+			if (! down_trylock(&jfs_ip->commit_sem)) {
-			if (IWRITE_TRYLOCK(ip)) {
 				/*
 				 * inode will be removed from anonymous list
 				 * when it is committed
 				 */
-				jfs_commit_inode(ip, 0);
+				TXN_UNLOCK();
-				IWRITE_UNLOCK(ip);
+				tid = txBegin(ip->i_sb,
+					      COMMIT_INODE | COMMIT_FORCE);
+				rc = txCommit(tid, 1, &ip, 0);
+				txEnd(tid);
+				up(&jfs_ip->commit_sem);
 				/*
 				 * Just to be safe.  I don't know how
 				 * long we can run without blocking
@@ -2945,21 +3008,12 @@ int jfs_sync(void)
 				cond_resched();
 				TXN_LOCK();
 			} else {
-				/* We can't get the write lock.  It may
+				/* We can't get the commit semaphore.  It may
 				 * be held by a thread waiting for tlock's
 				 * so let's not block here.  Save it to
 				 * put back on the anon_list.
 				 */
-				/*
-				 * We released TXN_LOCK, let's make sure
-				 * this inode is still there
-				 */
-				TXN_LOCK();
-				if (TxAnchor.anon_list.next !=
-				    &jfs_ip->anon_inode_list)
-					continue;
 				/* Take off anon_list */
 				list_del(&jfs_ip->anon_inode_list);

--- a/fs/jfs/jfs_txnmgr.h
+++ b/fs/jfs/jfs_txnmgr.h
@@ -310,4 +310,7 @@ extern void txFreelock(struct inode *ip);
 extern int lmLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck);
+extern void txQuiesce(struct super_block *sb);
+extern void txResume(struct super_block *sb);
 #endif				/* _H_JFS_TXNMGR */
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -2373,7 +2373,6 @@ printf("xtUpdate.updateLeft.split p:0x%p\n", p);
 }
-#ifdef _STILL_TO_PORT
 /*
 *      xtAppend()
 *
@@ -2392,7 +2391,7 @@ printf("xtUpdate.updateLeft.split p:0x%p\n", p);
 * return:
 */
 int xtAppend(tid_t tid,		/* transaction id */
-	     struct inode *ip, int xflag, s64 xoff, s32 maxblocks,	/* @GD1 */
+	     struct inode *ip, int xflag, s64 xoff, s32 maxblocks,	
 	     s32 * xlenp,	/* (in/out) */
 	     s64 * xaddrp,	/* (in/out) */
 	     int flag)
@@ -2460,7 +2459,7 @@ int xtAppend(tid_t tid,		/* transaction id */
 	pxdlist.maxnpxd = pxdlist.npxd = 0;
 	pxd = &pxdlist.pxd[0];
 	nblocks = JFS_SBI(ip->i_sb)->nbperpage;
-	for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) {	/* @GD1 */
+	for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) {	
 		if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) {
 			PXDaddress(pxd, xaddr);
 			PXDlength(pxd, nblocks);
@@ -2475,7 +2474,7 @@ int xtAppend(tid_t tid,		/* transaction id */
 		goto out;
 	}
-	xlen = min(xlen, maxblocks);	/* @GD1 */
+	xlen = min(xlen, maxblocks);	
 	/*
 	 * allocate data extent requested
@@ -2528,7 +2527,7 @@ int xtAppend(tid_t tid,		/* transaction id */
 	    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
 	xtlck->lwm.offset =
-	    (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index;
+	    (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index;
 	xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
 	    xtlck->lwm.offset;
@@ -2541,7 +2540,7 @@ int xtAppend(tid_t tid,		/* transaction id */
 	return rc;
 }
+#ifdef _STILL_TO_PORT
 /* - TBD for defragmentaion/reorganization -
 *

--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -69,8 +69,6 @@ int jfs_create(struct inode *dip, struct dentry *dentry, int mode)
 	jFYI(1, ("jfs_create: dip:0x%p name:%s\n", dip, dentry->d_name.name));
-	IWRITE_LOCK(dip);
 	/*
 	 * search parent directory for entry/freespace
 	 * (dtSearch() returns parent directory page pinned)
@@ -91,12 +89,12 @@ int jfs_create(struct inode *dip, struct dentry *dentry, int mode)
 	tid = txBegin(dip->i_sb, 0);
+	down(&JFS_IP(dip)->commit_sem);
+	down(&JFS_IP(ip)->commit_sem);
 	if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE))) {
 		jERROR(1, ("jfs_create: dtSearch returned %d\n", rc));
-		ip->i_nlink = 0;
+		goto out3;
-		iput(ip);
-		txEnd(tid);
-		goto out2;
 	}
 	tblk = tid_to_tblock(tid);
@@ -118,16 +116,11 @@ int jfs_create(struct inode *dip, struct dentry *dentry, int mode)
 	ino = ip->i_ino;
 	if ((rc = dtInsert(tid, dip, &dname, &ino, &btstack))) {
 		jERROR(1, ("jfs_create: dtInsert returned %d\n", rc));
-		/* discard new inode */
-		ip->i_nlink = 0;
-		iput(ip);
 		if (rc == EIO)
 			txAbort(tid, 1);	/* Marks Filesystem dirty */
 		else
 			txAbort(tid, 0);	/* Filesystem full */
-		txEnd(tid);
+		goto out3;
-		goto out2;
 	}
 	ip->i_op = &jfs_file_inode_operations;
@@ -143,14 +136,21 @@ int jfs_create(struct inode *dip, struct dentry *dentry, int mode)
 	mark_inode_dirty(dip);
 	rc = txCommit(tid, 2, &iplist[0], 0);
+      out3:
 	txEnd(tid);
+	up(&JFS_IP(dip)->commit_sem);
+	up(&JFS_IP(ip)->commit_sem);
+	if (rc) {
+		ip->i_nlink = 0;
+		iput(ip);
+	}
      out2:
 	free_UCSname(&dname);
      out1:
-	IWRITE_UNLOCK(dip);
 	jFYI(1, ("jfs_create: rc:%d\n", -rc));
 	return -rc;
 }
@@ -184,8 +184,6 @@ int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
 	jFYI(1, ("jfs_mkdir: dip:0x%p name:%s\n", dip, dentry->d_name.name));
-	IWRITE_LOCK(dip);
 	/* link count overflow on parent directory ? */
 	if (dip->i_nlink == JFS_LINK_MAX) {
 		rc = EMLINK;
@@ -212,12 +210,12 @@ int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
 	tid = txBegin(dip->i_sb, 0);
+	down(&JFS_IP(dip)->commit_sem);
+	down(&JFS_IP(ip)->commit_sem);
 	if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE))) {
 		jERROR(1, ("jfs_mkdir: dtSearch returned %d\n", rc));
-		ip->i_nlink = 0;
+		goto out3;
-		iput(ip);
-		txEnd(tid);
-		goto out2;
 	}
 	tblk = tid_to_tblock(tid);
@@ -239,16 +237,12 @@ int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
 	ino = ip->i_ino;
 	if ((rc = dtInsert(tid, dip, &dname, &ino, &btstack))) {
 		jERROR(1, ("jfs_mkdir: dtInsert returned %d\n", rc));
-		/* discard new directory inode */
-		ip->i_nlink = 0;
-		iput(ip);
 		if (rc == EIO)
 			txAbort(tid, 1);	/* Marks Filesystem dirty */
 		else
 			txAbort(tid, 0);	/* Filesystem full */
-		txEnd(tid);
+		goto out3;
-		goto out2;
 	}
 	ip->i_nlink = 2;	/* for '.' */
@@ -267,15 +261,21 @@ int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
 	mark_inode_dirty(dip);
 	rc = txCommit(tid, 2, &iplist[0], 0);
+      out3:
 	txEnd(tid);
+	up(&JFS_IP(dip)->commit_sem);
+	up(&JFS_IP(ip)->commit_sem);
+	if (rc) {
+		ip->i_nlink = 0;
+		iput(ip);
+	}
      out2:
 	free_UCSname(&dname);
      out1:
-	IWRITE_UNLOCK(dip);
 	jFYI(1, ("jfs_mkdir: rc:%d\n", -rc));
 	return -rc;
 }
@@ -311,24 +311,21 @@ int jfs_rmdir(struct inode *dip, struct dentry *dentry)
 	jFYI(1, ("jfs_rmdir: dip:0x%p name:%s\n", dip, dentry->d_name.name));
-	IWRITE_LOCK_LIST(2, dip, ip);
 	/* directory must be empty to be removed */
 	if (!dtEmpty(ip)) {
-		IWRITE_UNLOCK(ip);
-		IWRITE_UNLOCK(dip);
 		rc = ENOTEMPTY;
 		goto out;
 	}
 	if ((rc = get_UCSname(&dname, dentry, JFS_SBI(dip->i_sb)->nls_tab))) {
-		IWRITE_UNLOCK(ip);
-		IWRITE_UNLOCK(dip);
 		goto out;
 	}
 	tid = txBegin(dip->i_sb, 0);
+	down(&JFS_IP(dip)->commit_sem);
+	down(&JFS_IP(ip)->commit_sem);
 	iplist[0] = dip;
 	iplist[1] = ip;
@@ -345,9 +342,8 @@ int jfs_rmdir(struct inode *dip, struct dentry *dentry)
 		if (rc == EIO)
 			txAbort(tid, 1);
 		txEnd(tid);
+		up(&JFS_IP(dip)->commit_sem);
-		IWRITE_UNLOCK(ip);
+		up(&JFS_IP(ip)->commit_sem);
-		IWRITE_UNLOCK(dip);
 		goto out2;
 	}
@@ -384,7 +380,8 @@ int jfs_rmdir(struct inode *dip, struct dentry *dentry)
 	txEnd(tid);
-	IWRITE_UNLOCK(ip);
+	up(&JFS_IP(dip)->commit_sem);
+	up(&JFS_IP(ip)->commit_sem);
 	/*
 	 * Truncating the directory index table is not guaranteed.  It
@@ -397,8 +394,6 @@ int jfs_rmdir(struct inode *dip, struct dentry *dentry)
 		clear_cflag(COMMIT_Stale, dip);
 	}
-	IWRITE_UNLOCK(dip);
      out2:
 	free_UCSname(&dname);
@@ -444,10 +439,13 @@ int jfs_unlink(struct inode *dip, struct dentry *dentry)
 	if ((rc = get_UCSname(&dname, dentry, JFS_SBI(dip->i_sb)->nls_tab)))
 		goto out;
-	IWRITE_LOCK_LIST(2, ip, dip);
+	IWRITE_LOCK(ip);
 	tid = txBegin(dip->i_sb, 0);
+	down(&JFS_IP(dip)->commit_sem);
+	down(&JFS_IP(ip)->commit_sem);
 	iplist[0] = dip;
 	iplist[1] = ip;
@@ -460,8 +458,9 @@ int jfs_unlink(struct inode *dip, struct dentry *dentry)
 		if (rc == EIO)
 			txAbort(tid, 1);	/* Marks FS Dirty */
 		txEnd(tid);
+		up(&JFS_IP(dip)->commit_sem);
+		up(&JFS_IP(ip)->commit_sem);
 		IWRITE_UNLOCK(ip);
-		IWRITE_UNLOCK(dip);
 		goto out1;
 	}
@@ -483,8 +482,9 @@ int jfs_unlink(struct inode *dip, struct dentry *dentry)
 		if ((new_size = commitZeroLink(tid, ip)) < 0) {
 			txAbort(tid, 1);	/* Marks FS Dirty */
 			txEnd(tid);
+			up(&JFS_IP(dip)->commit_sem);
+			up(&JFS_IP(ip)->commit_sem);
 			IWRITE_UNLOCK(ip);
-			IWRITE_UNLOCK(dip);
 			rc = -new_size;		/* We return -rc */
 			goto out1;
 		}
@@ -511,8 +511,13 @@ int jfs_unlink(struct inode *dip, struct dentry *dentry)
 	txEnd(tid);
+	up(&JFS_IP(dip)->commit_sem);
+	up(&JFS_IP(ip)->commit_sem);
 	while (new_size && (rc == 0)) {
 		tid = txBegin(dip->i_sb, 0);
+		down(&JFS_IP(ip)->commit_sem);
 		new_size = xtTruncate_pmap(tid, ip, new_size);
 		if (new_size < 0) {
 			txAbort(tid, 1);	/* Marks FS Dirty */
@@ -520,6 +525,7 @@ int jfs_unlink(struct inode *dip, struct dentry *dentry)
 		} else
 			rc = txCommit(tid, 2, &iplist[0], COMMIT_SYNC);
 		txEnd(tid);
+		up(&JFS_IP(ip)->commit_sem);
 	}
 	if (ip->i_nlink == 0)
@@ -539,8 +545,6 @@ int jfs_unlink(struct inode *dip, struct dentry *dentry)
 		clear_cflag(COMMIT_Stale, dip);
 	}
-	IWRITE_UNLOCK(dip);
      out1:
 	free_UCSname(&dname);
      out:
@@ -764,10 +768,11 @@ int jfs_link(struct dentry *old_dentry,
 	     ("jfs_link: %s %s\n", old_dentry->d_name.name,
 	      dentry->d_name.name));
-	IWRITE_LOCK_LIST(2, dir, ip);
 	tid = txBegin(ip->i_sb, 0);
+	down(&JFS_IP(dir)->commit_sem);
+	down(&JFS_IP(ip)->commit_sem);
 	if (ip->i_nlink == JFS_LINK_MAX) {
 		rc = EMLINK;
 		goto out;
@@ -801,11 +806,11 @@ int jfs_link(struct dentry *old_dentry,
 	rc = txCommit(tid, 2, &iplist[0], 0);
      out:
-	IWRITE_UNLOCK(dir);
-	IWRITE_UNLOCK(ip);
 	txEnd(tid);
+	up(&JFS_IP(dir)->commit_sem);
+	up(&JFS_IP(ip)->commit_sem);
 	jFYI(1, ("jfs_link: rc:%d\n", rc));
 	return -rc;
 }
@@ -849,12 +854,8 @@ int jfs_symlink(struct inode *dip, struct dentry *dentry, const char *name)
 	jFYI(1, ("jfs_symlink: dip:0x%p name:%s\n", dip, name));
-	IWRITE_LOCK(dip);
 	ssize = strlen(name) + 1;
-	tid = txBegin(dip->i_sb, 0);
 	/*
 	 * search parent directory for entry/freespace
 	 * (dtSearch() returns parent directory page pinned)
@@ -863,23 +864,24 @@ int jfs_symlink(struct inode *dip, struct dentry *dentry, const char *name)
 	if ((rc = get_UCSname(&dname, dentry, JFS_SBI(dip->i_sb)->nls_tab)))
 		goto out1;
-	if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE)))
-		goto out2;
 	/*
 	 * allocate on-disk/in-memory inode for symbolic link:
 	 * (iAlloc() returns new, locked inode)
 	 */
 	ip = ialloc(dip, S_IFLNK | 0777);
 	if (ip == NULL) {
-		BT_PUTSEARCH(&btstack);
 		rc = ENOSPC;
 		goto out2;
 	}
+	tid = txBegin(dip->i_sb, 0);
+	down(&JFS_IP(dip)->commit_sem);
+	down(&JFS_IP(ip)->commit_sem);
+	if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE)))
+		goto out3;
 	tblk = tid_to_tblock(tid);
 	tblk->xflag |= COMMIT_CREATE;
 	tblk->ip = ip;
@@ -895,9 +897,7 @@ int jfs_symlink(struct inode *dip, struct dentry *dentry, const char *name)
 	if ((rc = dtInsert(tid, dip, &dname, &ino, &btstack))) {
 		jERROR(1, ("jfs_symlink: dtInsert returned %d\n", rc));
 		/* discard ne inode */
-		ip->i_nlink = 0;
+		goto out3;
-		iput(ip);
-		goto out2;
 	}
@@ -955,10 +955,8 @@ int jfs_symlink(struct inode *dip, struct dentry *dentry, const char *name)
 				if (mp == NULL) {
 					dtDelete(tid, dip, &dname, &ino,
 						 JFS_REMOVE);
-					ip->i_nlink = 0;
-					iput(ip);
 					rc = EIO;
-					goto out2;
+					goto out3;
 				}
 				memcpy(mp->data, name, copy_size);
 				flush_metapage(mp);
@@ -977,10 +975,8 @@ int jfs_symlink(struct inode *dip, struct dentry *dentry, const char *name)
 			ip->i_blocks = LBLK2PBLK(sb, xlen);
 		} else {
 			dtDelete(tid, dip, &dname, &ino, JFS_REMOVE);
-			ip->i_nlink = 0;
-			iput(ip);
 			rc = ENOSPC;
-			goto out2;
+			goto out3;
 		}
 	}
@@ -1008,14 +1004,19 @@ int jfs_symlink(struct inode *dip, struct dentry *dentry, const char *name)
 	} else
 		rc = txCommit(tid, 1, &iplist[0], 0);
-      out2:
+      out3:
+	txEnd(tid);
+	up(&JFS_IP(dip)->commit_sem);
+	up(&JFS_IP(ip)->commit_sem);
+	if (rc) {
+		ip->i_nlink = 0;
+		iput(ip);
+	}
+      out2:
 	free_UCSname(&dname);
-      out1:
-	IWRITE_UNLOCK(dip);
-	txEnd(tid);
+      out1:
 	jFYI(1, ("jfs_symlink: rc:%d\n", -rc));
 	return -rc;
 }
@@ -1054,19 +1055,6 @@ int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	old_ip = old_dentry->d_inode;
 	new_ip = new_dentry->d_inode;
-	if (old_dir == new_dir) {
-		if (new_ip)
-			IWRITE_LOCK_LIST(3, old_dir, old_ip, new_ip);
-		else
-			IWRITE_LOCK_LIST(2, old_dir, old_ip);
-	} else {
-		if (new_ip)
-			IWRITE_LOCK_LIST(4, old_dir, new_dir, old_ip,
-					 new_ip);
-		else
-			IWRITE_LOCK_LIST(3, old_dir, new_dir, old_ip);
-	}
 	if ((rc = get_UCSname(&old_dname, old_dentry,
 			      JFS_SBI(old_dir->i_sb)->nls_tab)))
 		goto out1;
@@ -1112,14 +1100,21 @@ int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			rc = EMLINK;
 			goto out3;
 		}
-	}
+	} else if (new_ip)
+		IWRITE_LOCK(new_ip);
 	/*
 	 * The real work starts here
 	 */
 	tid = txBegin(new_dir->i_sb, 0);
+	down(&JFS_IP(new_dir)->commit_sem);
+	down(&JFS_IP(old_ip)->commit_sem);
+	if (old_dir != new_dir)
+		down(&JFS_IP(old_dir)->commit_sem);
 	if (new_ip) {
+		down(&JFS_IP(new_ip)->commit_sem);
 		/*
 		 * Change existing directory entry to new inode number
 		 */
@@ -1247,14 +1242,24 @@ int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	/*
 	 * Don't unlock new_ip if COMMIT_HOLDLOCK is set
 	 */
-	if (new_ip && test_cflag(COMMIT_Holdlock, new_ip))
+	if (new_ip && test_cflag(COMMIT_Holdlock, new_ip)) {
+		up(&JFS_IP(new_ip)->commit_sem);
 		new_ip = 0;
+	}
      out4:
 	txEnd(tid);
+	up(&JFS_IP(new_dir)->commit_sem);
+	up(&JFS_IP(old_ip)->commit_sem);
+	if (old_dir != new_dir)
+		up(&JFS_IP(old_dir)->commit_sem);
+	if (new_ip)
+		up(&JFS_IP(new_ip)->commit_sem);
 	while (new_size && (rc == 0)) {
 		tid = txBegin(new_ip->i_sb, 0);
+		down(&JFS_IP(new_ip)->commit_sem);
 		new_size = xtTruncate_pmap(tid, new_ip, new_size);
 		if (new_size < 0) {
 			txAbort(tid, 1);
@@ -1262,6 +1267,7 @@ int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		} else
 			rc = txCommit(tid, 1, &new_ip, COMMIT_SYNC);
 		txEnd(tid);
+		up(&JFS_IP(new_ip)->commit_sem);
 	}
 	if (new_ip && (new_ip->i_nlink == 0))
 		set_cflag(COMMIT_Nolink, new_ip);
@@ -1270,12 +1276,8 @@ int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
      out2:
 	free_UCSname(&old_dname);
      out1:
-	IWRITE_UNLOCK(old_ip);
+	if (new_ip && !S_ISDIR(new_ip->i_mode))
-	if (old_dir != new_dir)
-		IWRITE_UNLOCK(new_dir);
-	if (new_ip)
 		IWRITE_UNLOCK(new_ip);
 	/*
 	 * Truncating the directory index table is not guaranteed.  It
 	 * may need to be done iteratively
@@ -1287,8 +1289,6 @@ int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		clear_cflag(COMMIT_Stale, old_dir);
 	}
-	IWRITE_UNLOCK(old_dir);
 	jFYI(1, ("jfs_rename: returning %d\n", rc));
 	return -rc;
 }
@@ -1315,8 +1315,6 @@ int jfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rdev)
 	if ((rc = get_UCSname(&dname, dentry, JFS_SBI(dir->i_sb)->nls_tab)))
 		goto out;
-	IWRITE_LOCK(dir);
 	ip = ialloc(dir, mode);
 	if (ip == NULL) {
 		rc = ENOSPC;
@@ -1325,24 +1323,19 @@ int jfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rdev)
 	tid = txBegin(dir->i_sb, 0);
-	if ((rc = dtSearch(dir, &dname, &ino, &btstack, JFS_CREATE))) {
+	down(&JFS_IP(dir)->commit_sem);
-		ip->i_nlink = 0;
+	down(&JFS_IP(ip)->commit_sem);
-		iput(ip);
-		txEnd(tid);
+	if ((rc = dtSearch(dir, &dname, &ino, &btstack, JFS_CREATE)))
-		goto out1;
+		goto out3;
-	}
 	tblk = tid_to_tblock(tid);
 	tblk->xflag |= COMMIT_CREATE;
 	tblk->ip = ip;
 	ino = ip->i_ino;
-	if ((rc = dtInsert(tid, dir, &dname, &ino, &btstack))) {
+	if ((rc = dtInsert(tid, dir, &dname, &ino, &btstack)))
-		ip->i_nlink = 0;
+		goto out3;
-		iput(ip);
-		txEnd(tid);
-		goto out1;
-	}
 	init_special_inode(ip, ip->i_mode, rdev);
@@ -1357,10 +1350,17 @@ int jfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rdev)
 	iplist[0] = dir;
 	iplist[1] = ip;
 	rc = txCommit(tid, 2, iplist, 0);
+      out3:
 	txEnd(tid);
+	up(&JFS_IP(ip)->commit_sem);
+	up(&JFS_IP(dir)->commit_sem);
+	if (rc) {
+		ip->i_nlink = 0;
+		iput(ip);
+	}
      out1:
-	IWRITE_UNLOCK(dir);
 	free_UCSname(&dname);
      out:
@@ -1389,9 +1389,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry)
 		if ((rc =
 		     get_UCSname(&key, dentry, JFS_SBI(dip->i_sb)->nls_tab)))
 			return ERR_PTR(-rc);
-		IREAD_LOCK(dip);
 		rc = dtSearch(dip, &key, &inum, &btstack, JFS_LOOKUP);
-		IREAD_UNLOCK(dip);
 		free_UCSname(&key);
 		if (rc == ENOENT) {
 			d_add(dentry, NULL);

--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000-2002
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include "jfs_incore.h"
+#include "jfs_filsys.h"
+#include "jfs_metapage.h"
+#include "jfs_dinode.h"
+#include "jfs_imap.h"
+#include "jfs_dmap.h"
+#include "jfs_superblock.h"
+#include "jfs_txnmgr.h"
+#include "jfs_debug.h"
+#define BITSPERPAGE     (PSIZE << 3)
+#define L2MEGABYTE      20
+#define MEGABYTE        (1 << L2MEGABYTE)
+#define MEGABYTE32     (MEGABYTE << 5)
+/* convert block number to bmap file page number */
+#define BLKTODMAPN(b)\
+        (((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1)
+/*
+ *      jfs_extendfs()
+ *
+ * function: extend file system;
+ *
+ *   |-------------------------------|----------|----------|
+ *   file system space               fsck       inline log
+ *                                   workspace  space
+ *
+ * input:
+ *      new LVSize: in LV blocks (required)
+ *      new LogSize: in LV blocks (optional)
+ *      new FSSize: in LV blocks (optional)
+ *
+ * new configuration:
+ * 1. set new LogSize as specified or default from new LVSize;
+ * 2. compute new FSCKSize from new LVSize;
+ * 3. set new FSSize as MIN(FSSize, LVSize-(LogSize+FSCKSize)) where
+ *    assert(new FSSize >= old FSSize),
+ *    i.e., file system must not be shrinked;
+ */
+int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
+{
+	int rc = 0;
+	struct jfs_sb_info *sbi = JFS_SBI(sb);
+	struct inode *ipbmap = sbi->ipbmap;
+	struct inode *ipbmap2;
+	struct inode *ipimap = sbi->ipimap;
+	log_t *log = sbi->log;
+	bmap_t *bmp = sbi->bmap;
+	s64 newLogAddress, newFSCKAddress;
+	int newFSCKSize;
+	s64 newMapSize = 0, mapSize;
+	s64 XAddress, XSize, nblocks, xoff, xaddr, t64;
+	s64 oldLVSize;
+	s64 newFSSize;
+	s64 VolumeSize;
+	int newNpages = 0, nPages, newPage, xlen, t32;
+	int tid;
+	int log_formatted = 0;
+	struct inode *iplist[1];
+	struct jfs_superblock *j_sb, *j_sb2;
+	metapage_t *sbp, *sb2p;
+	uint old_agsize;
+	struct buffer_head *bh;
+	/* If the volume hasn't grown, get out now */
+	if (sbi->mntflag & JFS_INLINELOG)
+		oldLVSize = addressPXD(&sbi->logpxd) + lengthPXD(&sbi->logpxd);
+	else
+		oldLVSize = addressPXD(&sbi->fsckpxd) +
+		    lengthPXD(&sbi->fsckpxd);
+	if (oldLVSize >= newLVSize) {
+		printk(KERN_WARNING
+		       "jfs_extendfs: volume hasn't grown, returning\n");
+		goto out;
+	}
+	VolumeSize = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
+	if (VolumeSize) {
+		if (newLVSize > VolumeSize) {
+			printk(KERN_WARNING "jfs_extendfs: invalid size\n");
+			rc = -EINVAL;
+			goto out;
+		}
+	} else {
+		/* check the device */
+		bh = sb_bread(sb, newLVSize - 1);
+		if (!bh) {
+			printk(KERN_WARNING "jfs_extendfs: invalid size\n");
+			rc = -EINVAL;
+			goto out;
+		}
+		bforget(bh);
+	}
+	/* Can't extend write-protected drive */
+	if (isReadOnly(ipbmap)) {
+		printk(KERN_WARNING "jfs_extendfs: read-only file system\n");
+		rc = -EROFS;
+		goto out;
+	}
+	/*
+	 *      reconfigure LV spaces
+	 *      ---------------------
+	 *
+	 * validate new size, or, if not specified, determine new size
+	 */
+	/*
+	 * reconfigure inline log space:
+	 */
+	if ((sbi->mntflag & JFS_INLINELOG)) {
+		if (newLogSize == 0) {
+			/*
+			 * no size specified: default to 1/256 of aggregate
+			 * size; rounded up to a megabyte boundary;
+			 */
+			newLogSize = newLVSize >> 8;
+			t32 = (1 << (20 - sbi->l2bsize)) - 1;
+			newLogSize = (newLogSize + t32) & ~t32;
+			newLogSize =
+			    min(newLogSize, MEGABYTE32 >> sbi->l2bsize);
+		} else {
+			/*
+			 * convert the newLogSize to fs blocks.
+			 *
+			 * Since this is given in megabytes, it will always be
+			 * an even number of pages.
+			 */
+			newLogSize = (newLogSize * MEGABYTE) >> sbi->l2bsize;
+		}
+	} else
+		newLogSize = 0;
+	newLogAddress = newLVSize - newLogSize;
+	/*
+	 * reconfigure fsck work space:
+	 *
+	 * configure it to the end of the logical volume regardless of
+	 * whether file system extends to the end of the aggregate;
+	 * Need enough 4k pages to cover:
+	 *  - 1 bit per block in aggregate rounded up to BPERDMAP boundary
+	 *  - 1 extra page to handle control page and intermediate level pages
+	 *  - 50 extra pages for the chkdsk service log
+	 */
+	t64 = ((newLVSize - newLogSize + BPERDMAP - 1) >> L2BPERDMAP)
+	    << L2BPERDMAP;
+	t32 = ((t64 + (BITSPERPAGE - 1)) / BITSPERPAGE) + 1 + 50;
+	newFSCKSize = t32 << sbi->l2nbperpage;
+	newFSCKAddress = newLogAddress - newFSCKSize;
+	/*
+	 * compute new file system space;
+	 */
+	newFSSize = newLVSize - newLogSize - newFSCKSize;
+	/* file system cannot be shrinked */
+	if (newFSSize < bmp->db_mapsize) {
+		rc = EINVAL;
+		goto out;
+	}
+	/*
+	 * If we're expanding enough that the inline log does not overlap
+	 * the old one, we can format the new log before we quiesce the
+	 * filesystem.
+	 */
+	if ((sbi->mntflag & JFS_INLINELOG) && (newLogAddress > oldLVSize)) {
+		if ((rc = lmLogFormat(log, newLogAddress, newLogSize)))
+			goto out;
+		log_formatted = 1;
+	}
+	/*
+	 *      quiesce file system
+	 *
+	 * (prepare to move the inline log and to prevent map update)
+	 *
+	 * block any new transactions and wait for completion of
+	 * all wip transactions and flush modified pages s.t.
+	 * on-disk file system is in consistent state and
+	 * log is not required for recovery.
+	 */
+	txQuiesce(sb);
+	if (sbi->mntflag & JFS_INLINELOG) {
+		/*
+		 * deactivate old inline log
+		 */
+		lmLogShutdown(log);
+		/*
+		 * mark on-disk super block for fs in transition;
+		 *
+		 * update on-disk superblock for the new space configuration
+		 * of inline log space and fsck work space descriptors:
+		 * N.B. FS descriptor is NOT updated;
+		 *
+		 * crash recovery:
+		 * logredo(): if FM_EXTENDFS, return to fsck() for cleanup;
+		 * fsck(): if FM_EXTENDFS, reformat inline log and fsck
+		 * workspace from superblock inline log descriptor and fsck
+		 * workspace descriptor;
+		 */
+		/* read in superblock */
+		if ((rc = readSuper(sb, &sbp)))
+			goto error_out;
+		j_sb = (struct jfs_superblock *) (sbp->data);
+		/* mark extendfs() in progress */
+		j_sb->s_state |= cpu_to_le32(FM_EXTENDFS);
+		j_sb->s_xsize = cpu_to_le64(newFSSize);
+		PXDaddress(&j_sb->s_xfsckpxd, newFSCKAddress);
+		PXDlength(&j_sb->s_xfsckpxd, newFSCKSize);
+		PXDaddress(&j_sb->s_xlogpxd, newLogAddress);
+		PXDlength(&j_sb->s_xlogpxd, newLogSize);
+		/* synchronously update superblock */
+		flush_metapage(sbp);
+		/*
+		 * format new inline log synchronously;
+		 *
+		 * crash recovery: if log move in progress,
+		 * reformat log and exit success;
+		 */
+		if (!log_formatted)
+			if ((rc = lmLogFormat(log, newLogAddress, newLogSize)))
+				goto error_out;
+		/*
+		 * activate new log
+		 */
+		log->base = newLogAddress;
+		log->size = newLogSize >> (L2LOGPSIZE - sb->s_blocksize_bits);
+		if ((rc = lmLogInit(log)))
+			goto error_out;
+	}
+	/*
+	 *      extend block allocation map
+	 *      ---------------------------
+	 *
+	 * extendfs() for new extension, retry after crash recovery;
+	 *
+	 * note: both logredo() and fsck() rebuild map from
+	 * the bitmap and configuration parameter from superblock
+	 * (disregarding all other control information in the map);
+	 *
+	 * superblock:
+	 *  s_size: aggregate size in physical blocks;
+	 */
+	/*
+	 *      compute the new block allocation map configuration
+	 *
+	 * map dinode:
+	 *  di_size: map file size in byte;
+	 *  di_nblocks: number of blocks allocated for map file;
+	 *  di_mapsize: number of blocks in aggregate (covered by map);
+	 * map control page:
+	 *  db_mapsize: number of blocks in aggregate (covered by map);
+	 */
+	newMapSize = newFSSize;
+	/* number of data pages of new bmap file:
+	 * roundup new size to full dmap page boundary and
+	 * add 1 extra dmap page for next extendfs()
+	 */
+	t64 = (newMapSize - 1) + BPERDMAP;
+	newNpages = BLKTODMAPN(t64) + 1;
+	/*
+	 *      extend map from current map (WITHOUT growing mapfile)
+	 *
+	 * map new extension with unmapped part of the last partial
+	 * dmap page, if applicable, and extra page(s) allocated
+	 * at end of bmap by mkfs() or previous extendfs();
+	 */
+      extendBmap:
+	/* compute number of blocks requested to extend */
+	mapSize = bmp->db_mapsize;
+	XAddress = mapSize;	/* eXtension Address */
+	XSize = newMapSize - mapSize;	/* eXtension Size */
+	old_agsize = bmp->db_agsize;	/* We need to know if this changes */
+	/* compute number of blocks that can be extended by current mapfile */
+	t64 = dbMapFileSizeToMapSize(ipbmap);
+	if (mapSize > t64) {
+		printk(KERN_ERR
+		       "jfs_extendfs: mapSize (0x%llx) > t64 (0x%llx)\n",
+		       mapSize, t64);
+		rc = EIO;
+		goto error_out;
+	}
+	nblocks = min(t64 - mapSize, XSize);
+	/*
+	 * update map pages for new extension:
+	 *
+	 * update/init dmap and bubble up the control hierarchy
+	 * incrementally fold up dmaps into upper levels;
+	 * update bmap control page;
+	 */
+	if ((rc = dbExtendFS(ipbmap, XAddress, nblocks)))
+		goto error_out;
+	/*
+	 * the map now has extended to cover additional nblocks:
+	 * dn_mapsize = oldMapsize + nblocks;
+	 */
+	/* ipbmap->i_mapsize += nblocks; */
+	XSize -= nblocks;
+	/*
+	 *      grow map file to cover remaining extension
+	 *      and/or one extra dmap page for next extendfs();
+	 *
+	 * allocate new map pages and its backing blocks, and
+	 * update map file xtree
+	 */
+	/* compute number of data pages of current bmap file */
+	nPages = ipbmap->i_size >> L2PSIZE;
+	/* need to grow map file ? */
+	if (nPages == newNpages)
+		goto updateImap;
+	/*
+	 * grow bmap file for the new map pages required:
+	 *
+	 * allocate growth at the start of newly extended region;
+	 * bmap file only grows sequentially, i.e., both data pages
+	 * and possibly xtree index pages may grow in append mode,
+	 * s.t. logredo() can reconstruct pre-extension state
+	 * by washing away bmap file of pages outside s_size boundary;
+	 */
+	/*
+	 * journal map file growth as if a regular file growth:
+	 * (note: bmap is created with di_mode = IFJOURNAL|IFREG);
+	 *
+	 * journaling of bmap file growth is not required since
+	 * logredo() do/can not use log records of bmap file growth
+	 * but it provides careful write semantics, pmap update, etc.;
+	 */
+	/* synchronous write of data pages: bmap data pages are
+	 * cached in meta-data cache, and not written out
+	 * by txCommit();
+	 */
+	filemap_fdatawait(ipbmap->i_mapping);
+	filemap_fdatawrite(ipbmap->i_mapping);
+	filemap_fdatawait(ipbmap->i_mapping);
+	diWriteSpecial(ipbmap, 0);
+	newPage = nPages;	/* first new page number */
+	xoff = newPage << sbi->l2nbperpage;
+	xlen = (newNpages - nPages) << sbi->l2nbperpage;
+	xlen = min(xlen, (int) nblocks) & ~(sbi->nbperpage - 1);
+	xaddr = XAddress;
+	tid = txBegin(sb, COMMIT_FORCE);
+	if ((rc = xtAppend(tid, ipbmap, 0, xoff, nblocks, &xlen, &xaddr, 0))) {
+		txEnd(tid);
+		goto error_out;
+	}
+	/* update bmap file size */
+	ipbmap->i_size += xlen << sbi->l2bsize;
+	ipbmap->i_blocks += LBLK2PBLK(sb, xlen);
+	iplist[0] = ipbmap;
+	rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
+	txEnd(tid);
+	if (rc)
+		goto error_out;
+	/*
+	 * map file has been grown now to cover extension to further out;
+	 * di_size = new map file size;
+	 *
+	 * if huge extension, the previous extension based on previous
+	 * map file size may not have been sufficient to cover whole extension
+	 * (it could have been used up for new map pages),
+	 * but the newly grown map file now covers lot bigger new free space
+	 * available for further extension of map;
+	 */
+	/* any more blocks to extend ? */
+	if (XSize)
+		goto extendBmap;
+	/* finalize bmap */
+	dbFinalizeBmap(ipbmap);
+	/*
+	 *      update inode allocation map
+	 *      ---------------------------
+	 *
+	 * move iag lists from old to new iag;
+	 * agstart field is not updated for logredo() to reconstruct
+	 * iag lists if system crash occurs.
+	 * (computation of ag number from agstart based on agsize
+	 * will correctly identify the new ag);
+	 */
+      updateImap:
+	/* if new AG size the same as old AG size, done! */
+	if (bmp->db_agsize != old_agsize) {
+		if ((rc = diExtendFS(ipimap, ipbmap)))
+			goto error_out;
+		/* finalize imap */
+		if ((rc = diSync(ipimap)))
+			goto error_out;
+	}
+	/*
+	 *      finalize
+	 *      --------
+	 *
+	 * extension is committed when on-disk super block is
+	 * updated with new descriptors: logredo will recover
+	 * crash before it to pre-extension state;
+	 */
+	/* sync log to skip log replay of bmap file growth transaction; */
+	/* lmLogSync(log, 1); */
+	/*
+	 * synchronous write bmap global control page;
+	 * for crash before completion of write
+	 * logredo() will recover to pre-extendfs state;
+	 * for crash after completion of write,
+	 * logredo() will recover post-extendfs state;
+	 */
+	if ((rc = dbSync(ipbmap)))
+		goto error_out;
+	/*
+	 * copy primary bmap inode to secondary bmap inode
+	 */
+	ipbmap2 = diReadSpecial(sb, BMAP_I, 1);
+	if (ipbmap2 == NULL) {
+		printk(KERN_ERR "jfs_extendfs: diReadSpecial(bmap) failed\n");
+		goto error_out;
+	}
+	memcpy(&JFS_IP(ipbmap2)->i_xtroot, &JFS_IP(ipbmap)->i_xtroot, 288);
+	ipbmap2->i_size = ipbmap->i_size;
+	ipbmap2->i_blocks = ipbmap->i_blocks;
+	diWriteSpecial(ipbmap2, 1);
+	diFreeSpecial(ipbmap2);
+	/*
+	 *      update superblock
+	 */
+	if ((rc = readSuper(sb, &sbp)))
+		goto error_out;
+	j_sb = (struct jfs_superblock *) (sbp->data);
+	/* mark extendfs() completion */
+	j_sb->s_state &= cpu_to_le32(~FM_EXTENDFS);
+	j_sb->s_size = cpu_to_le64(bmp->db_mapsize) <<
+		       le16_to_cpu(j_sb->s_l2bfactor);
+	j_sb->s_agsize = cpu_to_le32(bmp->db_agsize);
+	/* update inline log space descriptor */
+	if (sbi->mntflag & JFS_INLINELOG) {
+		PXDaddress(&(j_sb->s_logpxd), newLogAddress);
+		PXDlength(&(j_sb->s_logpxd), newLogSize);
+	}
+	/* record log's mount serial number */
+	j_sb->s_logserial = cpu_to_le32(log->serial);
+	/* update fsck work space descriptor */
+	PXDaddress(&(j_sb->s_fsckpxd), newFSCKAddress);
+	PXDlength(&(j_sb->s_fsckpxd), newFSCKSize);
+	j_sb->s_fscklog = 1;
+	/* sb->s_fsckloglen remains the same */
+	/* Update secondary superblock */
+	sb2p = read_metapage(sbi->direct_inode,
+			     SUPER2_OFF >> sb->s_blocksize_bits, PSIZE, 1);
+	if (sb2p) {
+		j_sb2 = (struct jfs_superblock *) (sb2p->data);
+		memcpy(j_sb2, j_sb, sizeof (struct jfs_superblock));
+		flush_metapage(sb2p);
+	}
+	/* write primary superblock */
+	flush_metapage(sbp);
+	goto resume;
+      error_out:
+	updateSuper(sb, FM_DIRTY);
+      resume:
+	/*
+	 *      resume file system transactions
+	 */
+	txResume(sb);
+      out:
+	return rc;
+}
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -71,6 +71,7 @@ extern void jfs_delete_inode(struct inode *inode);
 extern void jfs_write_inode(struct inode *inode, int wait);
 extern struct dentry *jfs_get_parent(struct dentry *dentry);
+extern int jfs_extendfs(struct super_block *, s64, int);
 #ifdef PROC_FS_JFS		/* see jfs_debug.h */
 extern void jfs_proc_init(void);
@@ -119,7 +120,7 @@ static int jfs_statfs(struct super_block *sb, struct statfs *buf)
 	 */
 	maxinodes = min((s64) atomic_read(&imap->im_numinos) +
 			((sbi->bmap->db_nfree >> imap->im_l2nbperiext)
-			 << L2INOSPEREXT), (s64)0xffffffffLL);
+			 << L2INOSPEREXT), (s64) 0xffffffffLL);
 	buf->f_files = maxinodes;
 	buf->f_ffree = maxinodes - (atomic_read(&imap->im_numinos) -
 				    atomic_read(&imap->im_numfree));
@@ -156,20 +157,23 @@ static void jfs_put_super(struct super_block *sb)
 	kfree(sbi);
 }
-static int parse_options (char * options, struct jfs_sb_info *sbi)
+static int parse_options(char *options, struct super_block *sb, s64 *newLVSize)
 {
 	void *nls_map = NULL;
-	char * this_char;
+	char *this_char;
-	char * value;
+	char *value;
+	struct jfs_sb_info *sbi = JFS_SBI(sb);
+	*newLVSize = 0;
 	if (!options)
 		return 1;
-	while ((this_char = strsep (&options, ",")) != NULL) {
+	while ((this_char = strsep(&options, ",")) != NULL) {
 		if (!*this_char)
 			continue;
-		if ((value = strchr (this_char, '=')) != NULL)
+		if ((value = strchr(this_char, '=')) != NULL)
 			*value++ = 0;
-		if (!strcmp (this_char, "iocharset")) {
+		if (!strcmp(this_char, "iocharset")) {
 			if (!value || !*value)
 				goto needs_arg;
 			if (nls_map)	/* specified iocharset twice! */
@@ -179,14 +183,25 @@ static int parse_options (char * options, struct jfs_sb_info *sbi)
 				printk(KERN_ERR "JFS: charset not found\n");
 				goto cleanup;
 			}
+		} else if (!strcmp(this_char, "resize")) {
+			if (!value || !*value) {
+				*newLVSize = sb->s_bdev->bd_inode->i_size >>
+					sb->s_blocksize_bits;
+				if (*newLVSize == 0)
+					printk(KERN_ERR
+					 "JFS: Cannot determine volume size\n");
+			} else
+				*newLVSize = simple_strtoull(value, &value, 0);
 			/* Silently ignore the quota options */
-		} else if (!strcmp (this_char, "grpquota")
+		} else if (!strcmp(this_char, "grpquota")
-		         || !strcmp (this_char, "noquota")
+			   || !strcmp(this_char, "noquota")
-		         || !strcmp (this_char, "quota")
+			   || !strcmp(this_char, "quota")
-		         || !strcmp (this_char, "usrquota"))
+			   || !strcmp(this_char, "usrquota"))
 			/* Don't do anything ;-) */ ;
 		else {
-			printk ("jfs: Unrecognized mount option %s\n", this_char);
+			printk("jfs: Unrecognized mount option %s\n",
+			       this_char);
 			goto cleanup;
 		}
 	}
@@ -208,10 +223,22 @@ static int parse_options (char * options, struct jfs_sb_info *sbi)
 int jfs_remount(struct super_block *sb, int *flags, char *data)
 {
 	struct jfs_sb_info *sbi = JFS_SBI(sb);
+	s64 newLVSize = 0;
+	int rc = 0;
-	if (!parse_options(data, sbi)) {
+	if (!parse_options(data, sb, &newLVSize)) {
 		return -EINVAL;
 	}
+	if (newLVSize) {
+		if (sb->s_flags & MS_RDONLY) {
+			printk(KERN_ERR
+		  "JFS: resize requires volume to be mounted read-write\n");
+			return -EROFS;
+		}
+		rc = jfs_extendfs(sb, newLVSize, 0);
+		if (rc)
+			return rc;
+	}
 	if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
 		/*
@@ -232,20 +259,26 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 	struct jfs_sb_info *sbi;
 	struct inode *inode;
 	int rc;
+	s64 newLVSize = 0;
 	jFYI(1, ("In jfs_read_super: s_flags=0x%lx\n", sb->s_flags));
-	sbi = kmalloc(sizeof(struct jfs_sb_info), GFP_KERNEL);
+	sbi = kmalloc(sizeof (struct jfs_sb_info), GFP_KERNEL);
 	if (!sbi)
 		return -ENOSPC;
-	memset(sbi, 0, sizeof(struct jfs_sb_info));
+	memset(sbi, 0, sizeof (struct jfs_sb_info));
 	sb->u.generic_sbp = sbi;
-	if (!parse_options((char *)data, sbi)) {
+	if (!parse_options((char *) data, sb, &newLVSize)) {
 		kfree(sbi);
 		return -EINVAL;
 	}
+	if (newLVSize) {
+		printk(KERN_ERR "resize option for remount only\n");
+		return -EINVAL;
+	}
 	/*
 	 * Initialize blocksize to 4K.
 	 */
@@ -276,8 +309,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 	if (rc) {
 		if (!silent) {
 			jERROR(1,
-			       ("jfs_mount failed w/return code = %d\n",
+			       ("jfs_mount failed w/return code = %d\n", rc));
-				rc));
 		}
 		goto out_mount_failed;
 	}
@@ -314,7 +346,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 	 * Page cache is indexed by long.
 	 * I would use MAX_LFS_FILESIZE, but it's only half as big
 	 */
-	sb->s_maxbytes = min(((u64)PAGE_CACHE_SIZE << 32) - 1, sb->s_maxbytes);
+	sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1, sb->s_maxbytes);
 #endif
 	return 0;
@@ -379,15 +411,17 @@ extern int txInit(void);
 extern void txExit(void);
 extern void metapage_exit(void);
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
 {
 	struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo;
-	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+	if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
 	    SLAB_CTOR_CONSTRUCTOR) {
 		INIT_LIST_HEAD(&jfs_ip->anon_inode_list);
 		INIT_LIST_HEAD(&jfs_ip->mp_list);
-		RDWRLOCK_INIT(&jfs_ip->rdwrlock);
+		init_rwsem(&jfs_ip->rdwrlock);
+		init_MUTEX(&jfs_ip->commit_sem);
+		jfs_ip->atlhead = 0;
 		inode_init_once(&jfs_ip->vfs_inode);
 	}
 }
@@ -397,9 +431,8 @@ static int __init init_jfs_fs(void)
 	int rc;
 	jfs_inode_cachep =
-	    kmem_cache_create("jfs_ip",
+	    kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0, 0,
-			    sizeof(struct jfs_inode_info),
+			      init_once, NULL);
-			    0, 0, init_once, NULL);
 	if (jfs_inode_cachep == NULL)
 		return -ENOMEM;
@@ -425,37 +458,32 @@ static int __init init_jfs_fs(void)
 	 * I/O completion thread (endio)
 	 */
 	jfsIOthread = kernel_thread(jfsIOWait, 0,
-				    CLONE_FS | CLONE_FILES |
+				    CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
-				    CLONE_SIGHAND);
 	if (jfsIOthread < 0) {
 		jERROR(1,
-		       ("init_jfs_fs: fork failed w/rc = %d\n",
+		       ("init_jfs_fs: fork failed w/rc = %d\n", jfsIOthread));
-			jfsIOthread));
 		goto end_txmngr;
 	}
-	wait_for_completion(&jfsIOwait);	/* Wait until IO thread starts */
+	wait_for_completion(&jfsIOwait);	/* Wait until thread starts */
 	jfsCommitThread = kernel_thread(jfs_lazycommit, 0,
-					CLONE_FS | CLONE_FILES |
+					CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
-					CLONE_SIGHAND);
 	if (jfsCommitThread < 0) {
 		jERROR(1,
 		       ("init_jfs_fs: fork failed w/rc = %d\n",
 			jfsCommitThread));
 		goto kill_iotask;
 	}
-	wait_for_completion(&jfsIOwait);	/* Wait until IO thread starts */
+	wait_for_completion(&jfsIOwait);	/* Wait until thread starts */
 	jfsSyncThread = kernel_thread(jfs_sync, 0,
-				      CLONE_FS | CLONE_FILES |
+				      CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
-				      CLONE_SIGHAND);
 	if (jfsSyncThread < 0) {
 		jERROR(1,
-		       ("init_jfs_fs: fork failed w/rc = %d\n",
+		       ("init_jfs_fs: fork failed w/rc = %d\n", jfsSyncThread));
-			jfsSyncThread));
 		goto kill_committask;
 	}
-	wait_for_completion(&jfsIOwait);	/* Wait until IO thread starts */
+	wait_for_completion(&jfsIOwait);	/* Wait until thread starts */
 #ifdef PROC_FS_JFS
 	jfs_proc_init();
@@ -463,15 +491,14 @@ static int __init init_jfs_fs(void)
 	return register_filesystem(&jfs_fs_type);
 kill_committask:
 	jfs_stop_threads = 1;
 	wake_up(&jfs_commit_thread_wait);
-	wait_for_completion(&jfsIOwait);	/* Wait until Commit thread exits */
+	wait_for_completion(&jfsIOwait);	/* Wait for thread exit */
 kill_iotask:
 	jfs_stop_threads = 1;
 	wake_up(&jfs_IO_thread_wait);
-	wait_for_completion(&jfsIOwait);	/* Wait until IO thread exits */
+	wait_for_completion(&jfsIOwait);	/* Wait for thread exit */
 end_txmngr:
 	txExit();
 free_metapage: