Merge jfs@jfs.bkbits.net:linux-2.5

into shaggy.austin.ibm.com:/shaggy/bk/jfs-2.5

Merge jfs@jfs.bkbits.net:linux-2.5
into shaggy.austin.ibm.com:/shaggy/bk/jfs-2.5
821d7f2d · Dave Kleikamp · 9f96ca62 · 3d3dfabe · 821d7f2d · 821d7f2d
Commit 821d7f2d authored Dec 04, 2002 by Dave Kleikamp
13 changed files
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -27,6 +27,7 @@


 extern int jfs_commit_inode(struct inode *, int);
+extern void jfs_truncate(struct inode *);

 int jfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 {
@@ -43,59 +44,6 @@ int jfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 	return rc ? -EIO : 0;
 }

-/*
- * Guts of jfs_truncate.  Called with locks already held.  Can be called
- * with directory for truncating directory index table.
- */
-void jfs_truncate_nolock(struct inode *ip, loff_t length)
-{
-	loff_t newsize;
-	tid_t tid;
-
-	ASSERT(length >= 0);
-
-	if (test_cflag(COMMIT_Nolink, ip)) {
-		xtTruncate(0, ip, length, COMMIT_WMAP);
-		return;
-	}
-
-	do {
-		tid = txBegin(ip->i_sb, 0);
-
-		/*
-		 * The commit_sem cannot be taken before txBegin.
-		 * txBegin may block and there is a chance the inode
-		 * could be marked dirty and need to be committed
-		 * before txBegin unblocks
-		 */
-		down(&JFS_IP(ip)->commit_sem);
-
-		newsize = xtTruncate(tid, ip, length,
-				     COMMIT_TRUNCATE | COMMIT_PWMAP);
-		if (newsize < 0) {
-			txEnd(tid);
-			up(&JFS_IP(ip)->commit_sem);
-			break;
-		}
-
-		ip->i_mtime = ip->i_ctime = CURRENT_TIME;
-		mark_inode_dirty(ip);
-
-		txCommit(tid, 1, &ip, 0);
-		txEnd(tid);
-		up(&JFS_IP(ip)->commit_sem);
-	} while (newsize > length);	/* Truncate isn't always atomic */
-}
-
-static void jfs_truncate(struct inode *ip)
-{
-	jFYI(1, ("jfs_truncate: size = 0x%lx\n", (ulong) ip->i_size));
-
-	IWRITE_LOCK(ip);
-	jfs_truncate_nolock(ip, ip->i_size);
-	IWRITE_UNLOCK(ip);
-}
-
 static int jfs_open(struct inode *inode, struct file *file)
 {
 	int rc;

--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -59,7 +59,7 @@ struct inode *jfs_iget(struct super_block *sb, ino_t ino)
 		inode->i_mapping->a_ops = &jfs_aops;
 		inode->i_mapping->gfp_mask = GFP_NOFS;
 	} else if (S_ISLNK(inode->i_mode)) {
-		if (inode->i_size > IDATASIZE) {
+		if (inode->i_size >= IDATASIZE) {
 			inode->i_op = &page_symlink_inode_operations;
 			inode->i_mapping->a_ops = &jfs_aops;
 		} else
@@ -330,3 +330,58 @@ struct address_space_operations jfs_aops = {
 	.bmap		= jfs_bmap,
 	.direct_IO	= jfs_direct_IO,
 };
+
+/*
+ * Guts of jfs_truncate.  Called with locks already held.  Can be called
+ * with directory for truncating directory index table.
+ */
+void jfs_truncate_nolock(struct inode *ip, loff_t length)
+{
+	loff_t newsize;
+	tid_t tid;
+
+	ASSERT(length >= 0);
+
+	if (test_cflag(COMMIT_Nolink, ip)) {
+		xtTruncate(0, ip, length, COMMIT_WMAP);
+		return;
+	}
+
+	do {
+		tid = txBegin(ip->i_sb, 0);
+
+		/*
+		 * The commit_sem cannot be taken before txBegin.
+		 * txBegin may block and there is a chance the inode
+		 * could be marked dirty and need to be committed
+		 * before txBegin unblocks
+		 */
+		down(&JFS_IP(ip)->commit_sem);
+
+		newsize = xtTruncate(tid, ip, length,
+				     COMMIT_TRUNCATE | COMMIT_PWMAP);
+		if (newsize < 0) {
+			txEnd(tid);
+			up(&JFS_IP(ip)->commit_sem);
+			break;
+		}
+
+		ip->i_mtime = ip->i_ctime = CURRENT_TIME;
+		mark_inode_dirty(ip);
+
+		txCommit(tid, 1, &ip, 0);
+		txEnd(tid);
+		up(&JFS_IP(ip)->commit_sem);
+	} while (newsize > length);	/* Truncate isn't always atomic */
+}
+
+void jfs_truncate(struct inode *ip)
+{
+	jFYI(1, ("jfs_truncate: size = 0x%lx\n", (ulong) ip->i_size));
+
+	block_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block);
+
+	IWRITE_LOCK(ip);
+	jfs_truncate_nolock(ip, ip->i_size);
+	IWRITE_UNLOCK(ip);
+}
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -100,6 +100,7 @@ static int loglevel_write(struct file *file, const char *buffer,

 #ifdef CONFIG_JFS_STATISTICS
 extern read_proc_t jfs_lmstats_read;
+extern read_proc_t jfs_txstats_read;
 extern read_proc_t jfs_xtstat_read;
 extern read_proc_t jfs_mpstat_read;
 #endif
@@ -111,6 +112,7 @@ static struct {
 } Entries[] = {
 #ifdef CONFIG_JFS_STATISTICS
 	{ "lmstats",	jfs_lmstats_read, },
+	{ "txstats",	jfs_txstats_read, },
 	{ "xtstat",	jfs_xtstat_read, },
 	{ "mpstat",	jfs_mpstat_read, },
 #endif

--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -199,6 +199,28 @@ static void dtLinelockFreelist(dtpage_t * p, int m, struct dt_lock ** dtlock);

 #define ciToUpper(c)	UniStrupr((c)->name)

+/*
+ *	read_index_page()
+ *
+ *	Reads a page of a directory's index table.
+ *	Having metadata mapped into the directory inode's address space
+ *	presents a multitude of problems.  We avoid this by mapping to
+ *	the absolute address space outside of the *_metapage routines
+ */
+static struct metapage *read_index_page(struct inode *inode, s64 blkno)
+{
+	int rc;
+	s64 xaddr;
+	int xflag;
+	s32 xlen;
+
+	rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
+	if (rc || (xlen == 0))
+		return NULL;
+
+	return read_metapage(inode, xaddr, PSIZE, 1);
+}
+
 /*
 *	find_index()
 *
@@ -208,7 +230,7 @@ static void dtLinelockFreelist(dtpage_t * p, int m, struct dt_lock ** dtlock);
 *	mp must be released by caller.
 */
 static struct dir_table_slot *find_index(struct inode *ip, u32 index,
-					 struct metapage ** mp)
+					 struct metapage ** mp, s64 *lblock)
 {
 	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 	s64 blkno;
@@ -243,12 +265,14 @@ static struct dir_table_slot *find_index(struct inode *ip, u32 index,
 		blkno = ((offset + 1) >> L2PSIZE) <<
 		    JFS_SBI(ip->i_sb)->l2nbperpage;

-		if (*mp && ((*mp)->index != blkno)) {
+		if (*mp && (*lblock != blkno)) {
 			release_metapage(*mp);
 			*mp = 0;
 		}
-		if (*mp == 0)
-			*mp = read_metapage(ip, blkno, PSIZE, 0);
+		if (*mp == 0) {
+			*lblock = blkno;
+			*mp = read_index_page(ip, blkno);
+		}
 		if (*mp == 0) {
 			jERROR(1,
 			       ("free_index: error reading directory table\n"));
@@ -368,7 +392,7 @@ static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
 		ip->i_size = PSIZE;
 		ip->i_blocks += LBLK2PBLK(sb, sbi->nbperpage);

-		if ((mp = get_metapage(ip, 0, ip->i_blksize, 0)) == 0) {
+		if ((mp = read_index_page(ip, 0)) == 0) {
 			jERROR(1, ("add_index: get_metapage failed!\n"));
 			xtTruncate(tid, ip, 0, COMMIT_PWMAP);
 			return -1;
@@ -411,12 +435,12 @@ static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
 		ip->i_size += PSIZE;
 		ip->i_blocks += LBLK2PBLK(sb, sbi->nbperpage);

-		if ((mp = get_metapage(ip, blkno, PSIZE, 0)))
+		if ((mp = read_index_page(ip, blkno)))
 			memset(mp->data, 0, PSIZE);	/* Just looks better */
 		else
 			xtTruncate(tid, ip, offset, COMMIT_PWMAP);
 	} else
-		mp = read_metapage(ip, blkno, PSIZE, 0);
+		mp = read_index_page(ip, blkno);

 	if (mp == 0) {
 		jERROR(1, ("add_index: get/read_metapage failed!\n"));
@@ -445,9 +469,10 @@ static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
 static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next)
 {
 	struct dir_table_slot *dirtab_slot;
+	s64 lblock;
 	struct metapage *mp = 0;

-	dirtab_slot = find_index(ip, index, &mp);
+	dirtab_slot = find_index(ip, index, &mp, &lblock);

 	if (dirtab_slot == 0)
 		return;
@@ -470,11 +495,11 @@ static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next)
 *	Changes an entry in the directory index table
 */
 static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn,
-			 int slot, struct metapage ** mp)
+			 int slot, struct metapage ** mp, u64 *lblock)
 {
 	struct dir_table_slot *dirtab_slot;

-	dirtab_slot = find_index(ip, index, mp);
+	dirtab_slot = find_index(ip, index, mp, lblock);

 	if (dirtab_slot == 0)
 		return;
@@ -497,10 +522,11 @@ static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn,
 static int read_index(struct inode *ip, u32 index,
 		     struct dir_table_slot * dirtab_slot)
 {
+	s64 lblock;
 	struct metapage *mp = 0;
 	struct dir_table_slot *slot;

-	slot = find_index(ip, index, &mp);
+	slot = find_index(ip, index, &mp, &lblock);
 	if (slot == 0) {
 		return -EIO;
 	}
@@ -1491,12 +1517,14 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
 	 * Update directory index table for entries now in right page
 	 */
 	if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
+		s64 lblock;
+
 		mp = 0;
 		stbl = DT_GETSTBL(rp);
 		for (n = 0; n < rp->header.nextindex; n++) {
 			ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
 			modify_index(tid, ip, le32_to_cpu(ldtentry->index),
-				     rbn, n, &mp);
+				     rbn, n, &mp, &lblock);
 		}
 		if (mp)
 			release_metapage(mp);
@@ -1616,6 +1644,8 @@ static int dtExtendPage(tid_t tid,
 		 * Update directory index table to reflect new page address
 		 */
 		if (DO_INDEX(ip)) {
+			s64 lblock;
+
 			mp = 0;
 			stbl = DT_GETSTBL(sp);
 			for (n = 0; n < sp->header.nextindex; n++) {
@@ -1623,7 +1653,7 @@ static int dtExtendPage(tid_t tid,
 				    (struct ldtentry *) & sp->slot[stbl[n]];
 				modify_index(tid, ip,
 					     le32_to_cpu(ldtentry->index),
-					     xaddr, n, &mp);
+					     xaddr, n, &mp, &lblock);
 			}
 			if (mp)
 				release_metapage(mp);
@@ -1911,6 +1941,7 @@ static int dtSplitRoot(tid_t tid,
 	 * Update directory index table for entries now in right page
 	 */
 	if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
+		s64 lblock;
 		struct metapage *mp = 0;
 		struct ldtentry *ldtentry;

@@ -1918,7 +1949,7 @@ static int dtSplitRoot(tid_t tid,
 		for (n = 0; n < rp->header.nextindex; n++) {
 			ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
 			modify_index(tid, ip, le32_to_cpu(ldtentry->index),
-				     rbn, n, &mp);
+				     rbn, n, &mp, &lblock);
 		}
 		if (mp)
 			release_metapage(mp);
@@ -2120,6 +2151,8 @@ int dtDelete(tid_t tid,
 		 * Update directory index table for entries moved in stbl
 		 */
 		if (DO_INDEX(ip) && index < p->header.nextindex) {
+			s64 lblock;
+
 			imp = 0;
 			stbl = DT_GETSTBL(p);
 			for (i = index; i < p->header.nextindex; i++) {
@@ -2127,7 +2160,7 @@ int dtDelete(tid_t tid,
 				    (struct ldtentry *) & p->slot[stbl[i]];
 				modify_index(tid, ip,
 					     le32_to_cpu(ldtentry->index),
-					     bn, i, &imp);
+					     bn, i, &imp, &lblock);
 			}
 			if (imp)
 				release_metapage(imp);
@@ -2769,12 +2802,6 @@ void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot)
 			set_cflag(COMMIT_Stale, ip);

 			tblk->xflag = xflag_save;
-			/*
-			 * Tells jfs_metapage code that the metadata pages
-			 * for the index table are no longer useful, and
-			 * remove them from page cache.
-			 */
-			invalidate_inode_metapages(ip);
 		} else
 			ip->i_size = 1;

@@ -3919,6 +3946,8 @@ static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key,
 		memmove(stbl + index + 1, stbl + index, nextindex - index);

 		if ((p->header.flag & BT_LEAF) && data->leaf.ip) {
+			s64 lblock;
+
 			/*
 			 * Need to update slot number for entries that moved
 			 * in the stbl
@@ -3928,7 +3957,7 @@ static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key,
 				lh = (struct ldtentry *) & (p->slot[stbl[n]]);
 				modify_index(data->leaf.tid, data->leaf.ip,
 					     le32_to_cpu(lh->index), bn, n,
-					     &mp);
+					     &mp, &lblock);
 			}
 			if (mp)
 				release_metapage(mp);

--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -1170,7 +1170,7 @@ int diFree(struct inode *ip)
 	 * invalidate any page of the inode extent freed from buffer cache;
 	 */
 	freepxd = iagp->inoext[extno];
-	invalidate_pxd_metapages(ip->i_sb->s_bdev->bd_inode, freepxd);
+	invalidate_pxd_metapages(ip, freepxd);

 	/*
 	 *      update iag list(s) (careful update step 2)

--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -54,7 +54,6 @@ struct jfs_inode_info {
 	lid_t	atlhead;	/* anonymous tlock list head	*/
 	lid_t	atltail;	/* anonymous tlock list tail	*/
 	struct list_head anon_inode_list; /* inodes having anonymous txns */
-	struct list_head mp_list; /* metapages in inode's address space */
 	/*
 	 * rdwrlock serializes xtree between reads & writes and synchronizes
 	 * changes to special inodes.  It's use would be redundant on

--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -197,6 +197,8 @@ struct lmStat {
 	uint commit;		/* # of commit */
 	uint pagedone;		/* # of page written */
 	uint submitted;		/* # of pages submitted */
+	uint full_page;		/* # of full pages submitted */
+	uint partial_page;	/* # of partial pages submitted */
 } lmStat;
 #endif

@@ -597,21 +599,21 @@ static int lmNextPage(struct jfs_log * log)
 		/* mark tblk for end-of-page */
 		tblk->flag |= tblkGC_EOP;

-		/* if page is not already on write queue,
-		 * just enqueue (no lbmWRITE to prevent redrive)
-		 * buffer to wqueue to ensure correct serial order
-		 * of the pages since log pages will be added
-		 * continuously (tblk bound with the page hasn't
-		 * got around to init write of the page, either
-		 * preempted or the page got filled by its COMMIT
-		 * record);
-		 * pages with COMMIT are paged out explicitly by
-		 * tblk in lmGroupCommit();
-		 */
-		if (bp->l_wqnext == NULL) {
-			/* bp->l_ceor = bp->l_eor; */
-			/* lp->h.eor = lp->t.eor = bp->l_ceor; */
-			lbmWrite(log, bp, 0, 0);
+		if (log->cflag & logGC_PAGEOUT) {
+			/* if page is not already on write queue,
+			 * just enqueue (no lbmWRITE to prevent redrive)
+			 * buffer to wqueue to ensure correct serial order
+			 * of the pages since log pages will be added
+			 * continuously
+			 */
+			if (bp->l_wqnext == NULL)
+				lbmWrite(log, bp, 0, 0);
+		} else {
+			/*
+			 * No current GC leader, initiate group commit
+			 */
+			log->cflag |= logGC_PAGEOUT;
+			lmGCwrite(log, 0);
 		}
 	}
 	/* page is not bound with outstanding tblk:
@@ -678,10 +680,17 @@ int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
 		LOGGC_UNLOCK(log);
 		return rc;
 	}
-	jFYI(1,
-	     ("lmGroup Commit: tblk = 0x%p, gcrtc = %d\n", tblk,
-	      log->gcrtc));
+	jFYI(1, ("lmGroup Commit: tblk = 0x%p, gcrtc = %d\n", tblk,
+		 log->gcrtc));

+	if (tblk->xflag & COMMIT_LAZY) {
+		/*
+		 * Lazy transactions can leave now
+		 */
+		tblk->flag |= tblkGC_LAZY;
+		LOGGC_UNLOCK(log);
+		return 0;
+	}
 	/*
 	 * group commit pageout in progress
 	 */
@@ -709,12 +718,6 @@ int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
 	/* upcount transaction waiting for completion
 	 */
 	log->gcrtc++;
-
-	if (tblk->xflag & COMMIT_LAZY) {
-		tblk->flag |= tblkGC_LAZY;
-		LOGGC_UNLOCK(log);
-		return 0;
-	}
 	tblk->flag |= tblkGC_READY;

 	__SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
@@ -785,6 +788,7 @@ void lmGCwrite(struct jfs_log * log, int cant_write)
 			bp->l_ceor));
 		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
 			 cant_write);
+		INCREMENT(lmStat.full_page);
 	}
 	/* page is not yet full */
 	else {
@@ -794,6 +798,7 @@ void lmGCwrite(struct jfs_log * log, int cant_write)
 		       ("gc: tclsn:0x%x, bceor:0x%x\n", tblk->clsn,
 			bp->l_ceor));
 		lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
+		INCREMENT(lmStat.partial_page);
 	}
 }

@@ -881,11 +886,15 @@ void lmPostGC(struct lbuf * bp)

 	/* are there any transactions who have entered lnGroupCommit()
 	 * (whose COMMITs are after that of the last log page written.
-	 * They are waiting for new group commit (above at (SLEEP 1)):
+	 * They are waiting for new group commit (above at (SLEEP 1))
+	 * or lazy transactions are on a full (queued) log page,
 	 * select the latest ready transaction as new group leader and
 	 * wake her up to lead her group.
 	 */
-	if ((log->gcrtc > 0) && log->cqueue.head)
+	if ((tblk = log->cqueue.head) &&
+	    ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
+	     test_bit(log_SYNCBARRIER, &log->flag) ||
+	     test_bit(log_QUIESCE, &log->flag)))
 		/*
 		 * Call lmGCwrite with new group leader
 		 */
@@ -1042,6 +1051,16 @@ int lmLogSync(struct jfs_log * log, int nosyncwait)
 			 log->syncpt));
 	}

+	/*
+	 * We may have to initiate group commit
+	 */
+	LOGGC_LOCK(log);
+	if (log->cqueue.head && !(log->cflag & logGC_PAGEOUT)) {
+		log->cflag |= logGC_PAGEOUT;
+		lmGCwrite(log, 0);
+	}
+	LOGGC_UNLOCK(log);
+
 	return lsn;
 }

@@ -1407,6 +1426,22 @@ void lmLogWait(struct jfs_log *log)

 	jFYI(1, ("lmLogWait: log:0x%p\n", log));

+	/*
+	 * This ensures that we will keep writing to the journal as long
+	 * as there are unwritten commit records
+	 */
+	set_bit(log_QUIESCE, &log->flag);
+
+	/*
+	 * Initiate I/O on outstanding transactions
+	 */
+	LOGGC_LOCK(log);
+	if (log->cqueue.head && !(log->cflag & logGC_PAGEOUT)) {
+		log->cflag |= logGC_PAGEOUT;
+		lmGCwrite(log, 0);
+	}
+	LOGGC_UNLOCK(log);
+
 	if (log->cqueue.head || !list_empty(&log->synclist)) {
 		/*
 		 * If there was very recent activity, we may need to wait
@@ -1423,6 +1458,8 @@ void lmLogWait(struct jfs_log *log)
 	}
 	assert(log->cqueue.head == NULL);
 	assert(list_empty(&log->synclist));
+
+	clear_bit(log_QUIESCE, &log->flag);	/* Probably not needed */
 }

 /*
@@ -2312,10 +2349,14 @@ int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length,
 		       "================\n"
 		       "commits = %d\n"
 		       "writes submitted = %d\n"
-		       "writes completed = %d\n",
+		       "writes completed = %d\n"
+		       "full pages submitted = %d\n"
+		       "partial pages submitted = %d\n",
 		       lmStat.commit,
 		       lmStat.submitted,
-		       lmStat.pagedone);
+		       lmStat.pagedone,
+		       lmStat.full_page,
+		       lmStat.partial_page);

 	begin = offset;
 	*start = buffer + begin;

--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -317,8 +317,6 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
 		mp->page = 0;
 		mp->logical_size = size;
 		add_to_hash(mp, hash_ptr);
-		if (!absolute)
-			list_add(&mp->inode_list, &JFS_IP(inode)->mp_list);
 		spin_unlock(&meta_lock);

 		if (new) {
@@ -351,8 +349,6 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
 freeit:
 	spin_lock(&meta_lock);
 	remove_from_hash(mp, hash_ptr);
-	if (!absolute)
-		list_del(&mp->inode_list);
 	free_metapage(mp);
 	spin_unlock(&meta_lock);
 	return NULL;
@@ -457,8 +453,6 @@ void release_metapage(struct metapage * mp)
 		spin_unlock(&meta_lock);
 	} else {
 		remove_from_hash(mp, meta_hash(mp->mapping, mp->index));
-		if (!test_bit(META_absolute, &mp->flag))
-			list_del(&mp->inode_list);
 		spin_unlock(&meta_lock);

 		if (mp->page) {
@@ -505,7 +499,8 @@ void __invalidate_metapages(struct inode *ip, s64 addr, int len)
 	struct metapage **hash_ptr;
 	unsigned long lblock;
 	int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits;
-	struct address_space *mapping = ip->i_mapping;
+	/* All callers are interested in block device's mapping */
+	struct address_space *mapping = ip->i_sb->s_bdev->bd_inode->i_mapping;
 	struct metapage *mp;
 	struct page *page;

@@ -535,26 +530,6 @@ void __invalidate_metapages(struct inode *ip, s64 addr, int len)
 	}
 }

-void invalidate_inode_metapages(struct inode *inode)
-{
-	struct list_head *ptr;
-	struct metapage *mp;
-
-	spin_lock(&meta_lock);
-	list_for_each(ptr, &JFS_IP(inode)->mp_list) {
-		mp = list_entry(ptr, struct metapage, inode_list);
-		clear_bit(META_dirty, &mp->flag);
-		set_bit(META_discard, &mp->flag);
-		kunmap(mp->page);
-		page_cache_release(mp->page);
-		INCREMENT(mpStat.pagefree);
-		mp->data = 0;
-		mp->page = 0;
-	}
-	spin_unlock(&meta_lock);
-	truncate_inode_pages(inode->i_mapping, 0);
-}
-
 #ifdef CONFIG_JFS_STATISTICS
 int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length,
 		    int *eof, void *data)

--- a/fs/jfs/jfs_metapage.h
+++ b/fs/jfs/jfs_metapage.h
@@ -38,7 +38,6 @@ struct metapage {
 	struct metapage *hash_prev;
 	struct metapage *hash_next;	/* Also used for free list */

-	struct list_head inode_list;	/* per-inode metapage list */
 	/*
 	 * mapping & index become redundant, but we need these here to
 	 * add the metapage to the hash before we have the real page
@@ -109,9 +108,7 @@ extern void __invalidate_metapages(struct inode *, s64, int);
 	__invalidate_metapages((ip), addressPXD(&(pxd)), lengthPXD(&(pxd)))
 #define invalidate_dxd_metapages(ip, dxd) \
 	__invalidate_metapages((ip), addressDXD(&(dxd)), lengthDXD(&(dxd)))
+#define invalidate_xad_metapages(ip, xad) \
+	__invalidate_metapages((ip), addressXAD(&(xad)), lengthXAD(&(xad)))

-/*
- * This one uses mp_list to invalidate all pages for an inode
- */
-extern void invalidate_inode_metapages(struct inode *inode);
 #endif				/* _H_JFS_METAPAGE */
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -70,6 +70,7 @@ static struct {
 	wait_queue_head_t freelockwait;	/* eventlist of free tlock */
 	wait_queue_head_t lowlockwait;	/* eventlist of ample tlocks */
 	int tlocksInUse;	/* Number of tlocks in use */
+	int TlocksLow;		/* Indicates low number of available tlocks */
 	spinlock_t LazyLock;	/* synchronize sync_queue & unlock_queue */
 /*	struct tblock *sync_queue; * Transactions waiting for data sync */
 	struct tblock *unlock_queue;	/* Txns waiting to be released */
@@ -79,6 +80,20 @@ static struct {
 					   that couldn't be sync'ed */
 } TxAnchor;

+#ifdef CONFIG_JFS_STATISTICS
+struct {
+	uint txBegin;
+	uint txBegin_barrier;
+	uint txBegin_lockslow;
+	uint txBegin_freetid;
+	uint txBeginAnon;
+	uint txBeginAnon_barrier;
+	uint txBeginAnon_lockslow;
+	uint txLockAlloc;
+	uint txLockAlloc_freelock;
+} TxStat;
+#endif
+
 static int nTxBlock = 512;	/* number of transaction blocks */
 struct tblock *TxBlock;	        /* transaction block table */

@@ -86,7 +101,6 @@ static int nTxLock = 4096;	/* number of transaction locks */
 static int TxLockLWM = 4096*.4;	/* Low water mark for number of txLocks used */
 static int TxLockHWM = 4096*.8;	/* High water mark for number of txLocks used */
 struct tlock *TxLock;           /* transaction lock table */
-static int TlocksLow = 0;	/* Indicates low number of available tlocks */


 /*
@@ -143,7 +157,8 @@ struct {
 /*
 * external references
 */
-extern int lmGroupCommit(struct jfs_log * log, struct tblock * tblk);
+extern int lmGroupCommit(struct jfs_log *, struct tblock *);
+extern int lmGCwrite(struct jfs_log *, int);
 extern void lmSync(struct jfs_log *);
 extern int jfs_commit_inode(struct inode *, int);
 extern int jfs_stop_threads;
@@ -190,13 +205,18 @@ static lid_t txLockAlloc(void)
 {
 	lid_t lid;

+	INCREMENT(TxStat.txLockAlloc);
+	if (!TxAnchor.freelock) {
+		INCREMENT(TxStat.txLockAlloc_freelock);
+	}
+
 	while (!(lid = TxAnchor.freelock))
 		TXN_SLEEP(&TxAnchor.freelockwait);
 	TxAnchor.freelock = TxLock[lid].next;
 	HIGHWATERMARK(stattx.maxlid, lid);
-	if ((++TxAnchor.tlocksInUse > TxLockHWM) && (TlocksLow == 0)) {
+	if ((++TxAnchor.tlocksInUse > TxLockHWM) && (TxAnchor.TlocksLow == 0)) {
 		jEVENT(0,("txLockAlloc TlocksLow\n"));
-		TlocksLow = 1;
+		TxAnchor.TlocksLow = 1;
 		wake_up(&jfs_sync_thread_wait);
 	}

@@ -208,9 +228,9 @@ static void txLockFree(lid_t lid)
 	TxLock[lid].next = TxAnchor.freelock;
 	TxAnchor.freelock = lid;
 	TxAnchor.tlocksInUse--;
-	if (TlocksLow && (TxAnchor.tlocksInUse < TxLockLWM)) {
+	if (TxAnchor.TlocksLow && (TxAnchor.tlocksInUse < TxLockLWM)) {
 		jEVENT(0,("txLockFree TlocksLow no more\n"));
-		TlocksLow = 0;
+		TxAnchor.TlocksLow = 0;
 		TXN_WAKEUP(&TxAnchor.lowlockwait);
 	}
 	TXN_WAKEUP(&TxAnchor.freelockwait);
@@ -322,6 +342,8 @@ tid_t txBegin(struct super_block *sb, int flag)

 	TXN_LOCK();

+	INCREMENT(TxStat.txBegin);
+
      retry:
 	if (!(flag & COMMIT_FORCE)) {
 		/*
@@ -329,6 +351,7 @@ tid_t txBegin(struct super_block *sb, int flag)
 		 */
 		if (test_bit(log_SYNCBARRIER, &log->flag) ||
 		    test_bit(log_QUIESCE, &log->flag)) {
+			INCREMENT(TxStat.txBegin_barrier);
 			TXN_SLEEP(&log->syncwait);
 			goto retry;
 		}
@@ -339,7 +362,8 @@ tid_t txBegin(struct super_block *sb, int flag)
 		 * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately
 		 * free tlocks)
 		 */
-		if (TlocksLow) {
+		if (TxAnchor.TlocksLow) {
+			INCREMENT(TxStat.txBegin_lockslow);
 			TXN_SLEEP(&TxAnchor.lowlockwait);
 			goto retry;
 		}
@@ -350,6 +374,7 @@ tid_t txBegin(struct super_block *sb, int flag)
 	 */
 	if ((t = TxAnchor.freetid) == 0) {
 		jFYI(1, ("txBegin: waiting for free tid\n"));
+		INCREMENT(TxStat.txBegin_freetid);
 		TXN_SLEEP(&TxAnchor.freewait);
 		goto retry;
 	}
@@ -359,6 +384,7 @@ tid_t txBegin(struct super_block *sb, int flag)
 	if ((tblk->next == 0) && (current != jfsCommitTask)) {
 		/* Save one tblk for jfsCommit thread */
 		jFYI(1, ("txBegin: waiting for free tid\n"));
+		INCREMENT(TxStat.txBegin_freetid);
 		TXN_SLEEP(&TxAnchor.freewait);
 		goto retry;
 	}
@@ -412,6 +438,7 @@ void txBeginAnon(struct super_block *sb)
 	log = JFS_SBI(sb)->log;

 	TXN_LOCK();
+	INCREMENT(TxStat.txBeginAnon);

      retry:
 	/*
@@ -419,6 +446,7 @@ void txBeginAnon(struct super_block *sb)
 	 */
 	if (test_bit(log_SYNCBARRIER, &log->flag) ||
 	    test_bit(log_QUIESCE, &log->flag)) {
+		INCREMENT(TxStat.txBeginAnon_barrier);
 		TXN_SLEEP(&log->syncwait);
 		goto retry;
 	}
@@ -426,7 +454,8 @@ void txBeginAnon(struct super_block *sb)
 	/*
 	 * Don't begin transaction if we're getting starved for tlocks
 	 */
-	if (TlocksLow) {
+	if (TxAnchor.TlocksLow) {
+		INCREMENT(TxStat.txBeginAnon_lockslow);
 		TXN_SLEEP(&TxAnchor.lowlockwait);
 		goto retry;
 	}
@@ -1485,10 +1514,6 @@ int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 {
 	struct metapage *mp;
 	pxd_t *pxd;
-	int rc;
-	s64 xaddr;
-	int xflag;
-	s32 xlen;

 	mp = tlck->mp;

@@ -1513,13 +1538,7 @@ int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 		return 0;
 	}

-	rc = xtLookup(tlck->ip, mp->index, 1, &xflag, &xaddr, &xlen, 1);
-	if (rc || (xlen == 0)) {
-		jERROR(1, ("dataLog: can't find physical address\n"));
-		return 0;
-	}
-
-	PXDaddress(pxd, xaddr);
+	PXDaddress(pxd, mp->index);
 	PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits);

 	lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
@@ -2752,7 +2771,7 @@ void txLazyCommit(struct tblock * tblk)

 	tblk->flag |= tblkGC_COMMITTED;

-	if ((tblk->flag & tblkGC_READY) || (tblk->flag & tblkGC_LAZY))
+	if (tblk->flag & tblkGC_READY)
 		log->gcrtc--;

 	if (tblk->flag & tblkGC_READY)
@@ -2958,6 +2977,16 @@ void txQuiesce(struct super_block *sb)
 		goto restart;
 	}
 	TXN_UNLOCK();
+
+	/*
+	 * We may need to kick off the group commit
+	 */
+	spin_lock_irq(&log->gclock);	// LOGGC_LOCK
+	if (log->cqueue.head && !(log->cflag & logGC_PAGEOUT)) {
+		log->cflag |= logGC_PAGEOUT;
+		lmGCwrite(log, 0);
+	}
+	spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
 }

 /*
@@ -3006,7 +3035,7 @@ int jfs_sync(void *arg)
 		 * write each inode on the anonymous inode list
 		 */
 		TXN_LOCK();
-		while (TlocksLow && !list_empty(&TxAnchor.anon_list)) {
+		while (TxAnchor.TlocksLow && !list_empty(&TxAnchor.anon_list)) {
 			jfs_ip = list_entry(TxAnchor.anon_list.next,
 					    struct jfs_inode_info,
 					    anon_inode_list);
@@ -3097,6 +3126,7 @@ int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length,
 		       "freelockwait = %s\n"
 		       "lowlockwait = %s\n"
 		       "tlocksInUse = %d\n"
+		       "TlocksLow = %d\n"
 		       "unlock_queue = 0x%p\n"
 		       "unlock_tail = 0x%p\n",
 		       TxAnchor.freetid,
@@ -3105,6 +3135,7 @@ int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length,
 		       freelockwait,
 		       lowlockwait,
 		       TxAnchor.tlocksInUse,
+		       TxAnchor.TlocksLow,
 		       TxAnchor.unlock_queue,
 		       TxAnchor.unlock_tail);

@@ -3123,3 +3154,48 @@ int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length,
 	return len;
 }
 #endif
+
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS)
+int jfs_txstats_read(char *buffer, char **start, off_t offset, int length,
+		     int *eof, void *data)
+{
+	int len = 0;
+	off_t begin;
+
+	len += sprintf(buffer,
+		       "JFS TxStats\n"
+		       "===========\n"
+		       "calls to txBegin = %d\n"
+		       "txBegin blocked by sync barrier = %d\n"
+		       "txBegin blocked by tlocks low = %d\n"
+		       "txBegin blocked by no free tid = %d\n"
+		       "calls to txBeginAnon = %d\n"
+		       "txBeginAnon blocked by sync barrier = %d\n"
+		       "txBeginAnon blocked by tlocks low = %d\n"
+		       "calls to txLockAlloc = %d\n"
+		       "tLockAlloc blocked by no free lock = %d\n",
+		       TxStat.txBegin,
+		       TxStat.txBegin_barrier,
+		       TxStat.txBegin_lockslow,
+		       TxStat.txBegin_freetid,
+		       TxStat.txBeginAnon,
+		       TxStat.txBeginAnon_barrier,
+		       TxStat.txBeginAnon_lockslow,
+		       TxStat.txLockAlloc,
+		       TxStat.txLockAlloc_freelock);
+
+	begin = offset;
+	*start = buffer + begin;
+	len -= begin;
+
+	if (len > length)
+		len = length;
+	else
+		*eof = 1;
+
+	if (len < 0)
+		len = 0;
+
+	return len;
+}
+#endif
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -3517,6 +3517,13 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
 		xlen = lengthXAD(xad);
 		xaddr = addressXAD(xad);

+		/*
+		 * The "data" for a directory is indexed by the block
+		 * device's address space.  This metadata must be invalidated
+		 * here
+		 */
+		if (S_ISDIR(ip->i_mode) && (teof == 0))
+			invalidate_xad_metapages(ip, *xad);
 		/*
 		 * entry beyond eof: continue scan of current page
 		 *          xad

--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -602,7 +602,7 @@ s64 commitZeroLink(tid_t tid, struct inode *ip)
 		break;
 	case S_IFLNK:
 		/* fast symbolic link */
-		if (ip->i_size <= 256) {
+		if (ip->i_size < IDATASIZE) {
 			ip->i_size = 0;
 			return 0;
 		}
@@ -674,7 +674,7 @@ int freeZeroLink(struct inode *ip)
 		break;
 	case S_IFLNK:
 		/* if its contained in inode nothing to do */
-		if (ip->i_size <= 256)
+		if (ip->i_size < IDATASIZE)
 			return 0;
 		break;
 	default:

--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -412,7 +412,6 @@ static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
 	if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
 	    SLAB_CTOR_CONSTRUCTOR) {
 		INIT_LIST_HEAD(&jfs_ip->anon_inode_list);
-		INIT_LIST_HEAD(&jfs_ip->mp_list);
 		init_rwsem(&jfs_ip->rdwrlock);
 		init_MUTEX(&jfs_ip->commit_sem);
 		jfs_ip->atlhead = 0;