Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2

* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (31 commits) ocfs2: clean up bh null checks ocfs2: document access rules for blocked_lock_list configfs: file.c fix possible recursive locking configfs: dir.c fix possible recursive locking configfs: Remove EXPERIMENTAL ocfs2: bump version number ocfs2/dlm: Clear joining_node on hearbeat node down ocfs2: convert byte order of constant instead of variable ocfs2: Update default cluster timeouts ocfs2: printf fixes ocfs2: Use generic_file_llseek ocfs2: Safer read_inline_data() ocfs2: Silence false lockdep warnings [PATCH 2/2] ocfs2: cluster aware flock() [PATCH 1/2] ocfs2: add flock lock type ocfs2: Local alloc window size changeable via mount option ocfs2: Support commit= mount option ocfs2: Add missing permission checks [PATCH 2/2] ocfs2: Implement group add for online resize [PATCH 1/2] ocfs2: Add group extend for online resize ...

Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (31 commits) ocfs2: clean up bh null checks ocfs2: document access rules for blocked_lock_list configfs: file.c fix possible recursive locking configfs: dir.c fix possible recursive locking configfs: Remove EXPERIMENTAL ocfs2: bump version number ocfs2/dlm: Clear joining_node on hearbeat node down ocfs2: convert byte order of constant instead of variable ocfs2: Update default cluster timeouts ocfs2: printf fixes ocfs2: Use generic_file_llseek ocfs2: Safer read_inline_data() ocfs2: Silence false lockdep warnings [PATCH 2/2] ocfs2: cluster aware flock() [PATCH 1/2] ocfs2: add flock lock type ocfs2: Local alloc window size changeable via mount option ocfs2: Support commit= mount option ocfs2: Add missing permission checks [PATCH 2/2] ocfs2: Implement group add for online resize [PATCH 1/2] ocfs2: Add group extend for online resize ...
29bd17af · Linus Torvalds · 2ba14a01 · 2fe5c1d7 · 29bd17af · 29bd17af
Commit 29bd17af authored Jan 25, 2008 by Linus Torvalds
53 changed files
--- a/Documentation/filesystems/ocfs2.txt
+++ b/Documentation/filesystems/ocfs2.txt
@@ -35,7 +35,6 @@ Features which OCFS2 does not support yet:
 	- Directory change notification (F_NOTIFY)
 	- Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease)
 	- POSIX ACLs
-	- readpages / writepages (not user visible)
 Mount options
 =============
@@ -62,3 +61,18 @@ data=writeback		Data ordering is not preserved, data may be written
 preferred_slot=0(*)	During mount, try to use this filesystem slot first. If
 			it is in use by another node, the first empty one found
 			will be chosen. Invalid values will be ignored.
+commit=nrsec	(*)	Ocfs2 can be told to sync all its data and metadata
+			every 'nrsec' seconds. The default value is 5 seconds.
+			This means that if you lose your power, you will lose
+			as much as the latest 5 seconds of work (your
+			filesystem will not be damaged though, thanks to the
+			journaling).  This default value (or any low value)
+			will hurt performance, but it's good for data-safety.
+			Setting it to 0 will have the same effect as leaving
+			it at the default (5 seconds).
+			Setting it to very large values will improve
+			performance.
+localalloc=8(*)		Allows custom localalloc size in MB. If the value is too
+			large, the fs will silently revert it to the default.
+			Localalloc is not enabled for local mounts.
+localflocks		This disables cluster aware flock.
--- a/Documentation/ioctl-number.txt
+++ b/Documentation/ioctl-number.txt
@@ -138,6 +138,7 @@ Code	Seq#	Include File		Comments
 'm'	00-1F	net/irda/irmod.h	conflict!
 'n'	00-7F	linux/ncp_fs.h
 'n'	E0-FF	video/matrox.h          matroxfb
+'o'	00-1F	fs/ocfs2/ocfs2_fs.h	OCFS2
 'p'	00-0F	linux/phantom.h		conflict! (OpenHaptics needs this)
 'p'	00-3F	linux/mc146818rtc.h	conflict!
 'p'	40-7F	linux/nvram.h

--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -440,14 +440,8 @@ config OCFS2_FS
 	  Tools web page:      http://oss.oracle.com/projects/ocfs2-tools
 	  OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/
-	  Note: Features which OCFS2 does not support yet:
+	  For more information on OCFS2, see the file
-	          - extended attributes
+	  <file:Documentation/filesystems/ocfs2.txt>.
-	          - quotas
-	          - cluster aware flock
-	          - Directory change notification (F_NOTIFY)
-	          - Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease)
-	          - POSIX ACLs
-	          - readpages / writepages (not user visible)
 config OCFS2_DEBUG_MASKLOG
 	bool "OCFS2 logging support"
@@ -1028,8 +1022,8 @@ config HUGETLB_PAGE
 	def_bool HUGETLBFS
 config CONFIGFS_FS
-	tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)"
+	tristate "Userspace-driven configuration filesystem"
-	depends on SYSFS && EXPERIMENTAL
+	depends on SYSFS
 	help
 	  configfs is a ram-based filesystem that provides the converse
 	  of sysfs's functionality. Where sysfs is a filesystem-based

--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -546,7 +546,7 @@ static int populate_groups(struct config_group *group)
 		 * That said, taking our i_mutex is closer to mkdir
 		 * emulation, and shouldn't hurt.
 		 */
-		mutex_lock(&dentry->d_inode->i_mutex);
+		mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
 		for (i = 0; group->default_groups[i]; i++) {
 			new_group = group->default_groups[i];
@@ -1405,7 +1405,8 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
 	sd = configfs_sb->s_root->d_fsdata;
 	link_group(to_config_group(sd->s_element), group);
-	mutex_lock(&configfs_sb->s_root->d_inode->i_mutex);
+	mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex,
+			I_MUTEX_PARENT);
 	name.name = group->cg_item.ci_name;
 	name.len = strlen(name.name);

--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -320,7 +320,7 @@ int configfs_add_file(struct dentry * dir, const struct configfs_attribute * att
 	umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG;
 	int error = 0;
-	mutex_lock(&dir->d_inode->i_mutex);
+	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_NORMAL);
 	error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode, type);
 	mutex_unlock(&dir->d_inode->i_mutex);

--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -19,16 +19,17 @@ ocfs2-objs := \
 	ioctl.o 		\
 	journal.o 		\
 	localalloc.o 		\
+	locks.o			\
 	mmap.o 			\
 	namei.o 		\
+	resize.o		\
 	slot_map.o 		\
 	suballoc.o 		\
 	super.o 		\
 	symlink.o 		\
 	sysfile.o 		\
 	uptodate.o		\
-	ver.o 			\
+	ver.o
-	vote.o
 obj-$(CONFIG_OCFS2_FS) += cluster/
 obj-$(CONFIG_OCFS2_FS) += dlm/
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4731,7 +4731,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
 	mutex_lock(&data_alloc_inode->i_mutex);
-	status = ocfs2_meta_lock(data_alloc_inode, &data_alloc_bh, 1);
+	status = ocfs2_inode_lock(data_alloc_inode, &data_alloc_bh, 1);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out_mutex;
@@ -4753,7 +4753,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
 out_unlock:
 	brelse(data_alloc_bh);
-	ocfs2_meta_unlock(data_alloc_inode, 1);
+	ocfs2_inode_unlock(data_alloc_inode, 1);
 out_mutex:
 	mutex_unlock(&data_alloc_inode->i_mutex);
@@ -5077,7 +5077,7 @@ static int ocfs2_free_cached_items(struct ocfs2_super *osb,
 	mutex_lock(&inode->i_mutex);
-	ret = ocfs2_meta_lock(inode, &di_bh, 1);
+	ret = ocfs2_inode_lock(inode, &di_bh, 1);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_mutex;
@@ -5118,7 +5118,7 @@ static int ocfs2_free_cached_items(struct ocfs2_super *osb,
 	ocfs2_commit_trans(osb, handle);
 out_unlock:
-	ocfs2_meta_unlock(inode, 1);
+	ocfs2_inode_unlock(inode, 1);
 	brelse(di_bh);
 out_mutex:
 	mutex_unlock(&inode->i_mutex);

--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -26,6 +26,7 @@
 #include <asm/byteorder.h>
 #include <linux/swap.h>
 #include <linux/pipe_fs_i.h>
+#include <linux/mpage.h>
 #define MLOG_MASK_PREFIX ML_FILE_IO
 #include <cluster/masklog.h>
@@ -139,7 +140,8 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
 {
 	int err = 0;
 	unsigned int ext_flags;
-	u64 p_blkno, past_eof;
+	u64 max_blocks = bh_result->b_size >> inode->i_blkbits;
+	u64 p_blkno, count, past_eof;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode,
@@ -155,7 +157,7 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
 		goto bail;
 	}
-	err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, NULL,
+	err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, &count,
 					  &ext_flags);
 	if (err) {
 		mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, "
@@ -164,6 +166,9 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
 		goto bail;
 	}
+	if (max_blocks < count)
+		count = max_blocks;
 	/*
 	 * ocfs2 never allocates in this function - the only time we
 	 * need to use BH_New is when we're extending i_size on a file
@@ -178,6 +183,8 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
 	if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))
 		map_bh(bh_result, inode->i_sb, p_blkno);
+	bh_result->b_size = count << inode->i_blkbits;
 	if (!ocfs2_sparse_alloc(osb)) {
 		if (p_blkno == 0) {
 			err = -EIO;
@@ -210,7 +217,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
 			   struct buffer_head *di_bh)
 {
 	void *kaddr;
-	unsigned int size;
+	loff_t size;
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
 	if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) {
@@ -224,8 +231,9 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
 	if (size > PAGE_CACHE_SIZE ||
 	    size > ocfs2_max_inline_data(inode->i_sb)) {
 		ocfs2_error(inode->i_sb,
-			    "Inode %llu has with inline data has bad size: %u",
+			    "Inode %llu has with inline data has bad size: %Lu",
-			    (unsigned long long)OCFS2_I(inode)->ip_blkno, size);
+			    (unsigned long long)OCFS2_I(inode)->ip_blkno,
+			    (unsigned long long)size);
 		return -EROFS;
 	}
@@ -275,7 +283,7 @@ static int ocfs2_readpage(struct file *file, struct page *page)
 	mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0));
-	ret = ocfs2_meta_lock_with_page(inode, NULL, 0, page);
+	ret = ocfs2_inode_lock_with_page(inode, NULL, 0, page);
 	if (ret != 0) {
 		if (ret == AOP_TRUNCATED_PAGE)
 			unlock = 0;
@@ -285,7 +293,7 @@ static int ocfs2_readpage(struct file *file, struct page *page)
 	if (down_read_trylock(&oi->ip_alloc_sem) == 0) {
 		ret = AOP_TRUNCATED_PAGE;
-		goto out_meta_unlock;
+		goto out_inode_unlock;
 	}
 	/*
@@ -305,25 +313,16 @@ static int ocfs2_readpage(struct file *file, struct page *page)
 		goto out_alloc;
 	}
-	ret = ocfs2_data_lock_with_page(inode, 0, page);
-	if (ret != 0) {
-		if (ret == AOP_TRUNCATED_PAGE)
-			unlock = 0;
-		mlog_errno(ret);
-		goto out_alloc;
-	}
 	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
 		ret = ocfs2_readpage_inline(inode, page);
 	else
 		ret = block_read_full_page(page, ocfs2_get_block);
 	unlock = 0;
-	ocfs2_data_unlock(inode, 0);
 out_alloc:
 	up_read(&OCFS2_I(inode)->ip_alloc_sem);
-out_meta_unlock:
+out_inode_unlock:
-	ocfs2_meta_unlock(inode, 0);
+	ocfs2_inode_unlock(inode, 0);
 out:
 	if (unlock)
 		unlock_page(page);
@@ -331,6 +330,62 @@ static int ocfs2_readpage(struct file *file, struct page *page)
 	return ret;
 }
+/*
+ * This is used only for read-ahead. Failures or difficult to handle
+ * situations are safe to ignore.
+ *
+ * Right now, we don't bother with BH_Boundary - in-inode extent lists
+ * are quite large (243 extents on 4k blocks), so most inodes don't
+ * grow out to a tree. If need be, detecting boundary extents could
+ * trivially be added in a future version of ocfs2_get_block().
+ */
+static int ocfs2_readpages(struct file *filp, struct address_space *mapping,
+			   struct list_head *pages, unsigned nr_pages)
+{
+	int ret, err = -EIO;
+	struct inode *inode = mapping->host;
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	loff_t start;
+	struct page *last;
+	/*
+	 * Use the nonblocking flag for the dlm code to avoid page
+	 * lock inversion, but don't bother with retrying.
+	 */
+	ret = ocfs2_inode_lock_full(inode, NULL, 0, OCFS2_LOCK_NONBLOCK);
+	if (ret)
+		return err;
+	if (down_read_trylock(&oi->ip_alloc_sem) == 0) {
+		ocfs2_inode_unlock(inode, 0);
+		return err;
+	}
+	/*
+	 * Don't bother with inline-data. There isn't anything
+	 * to read-ahead in that case anyway...
+	 */
+	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
+		goto out_unlock;
+	/*
+	 * Check whether a remote node truncated this file - we just
+	 * drop out in that case as it's not worth handling here.
+	 */
+	last = list_entry(pages->prev, struct page, lru);
+	start = (loff_t)last->index << PAGE_CACHE_SHIFT;
+	if (start >= i_size_read(inode))
+		goto out_unlock;
+	err = mpage_readpages(mapping, pages, nr_pages, ocfs2_get_block);
+out_unlock:
+	up_read(&oi->ip_alloc_sem);
+	ocfs2_inode_unlock(inode, 0);
+	return err;
+}
 /* Note: Because we don't support holes, our allocation has
 * already happened (allocation writes zeros to the file data)
 * so we don't have to worry about ordered writes in
@@ -452,7 +507,7 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
 	 * accessed concurrently from multiple nodes.
 	 */
 	if (!INODE_JOURNAL(inode)) {
-		err = ocfs2_meta_lock(inode, NULL, 0);
+		err = ocfs2_inode_lock(inode, NULL, 0);
 		if (err) {
 			if (err != -ENOENT)
 				mlog_errno(err);
@@ -467,7 +522,7 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
 	if (!INODE_JOURNAL(inode)) {
 		up_read(&OCFS2_I(inode)->ip_alloc_sem);
-		ocfs2_meta_unlock(inode, 0);
+		ocfs2_inode_unlock(inode, 0);
 	}
 	if (err) {
@@ -638,34 +693,12 @@ static ssize_t ocfs2_direct_IO(int rw,
 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
 		return 0;
-	if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) {
-		/*
-		 * We get PR data locks even for O_DIRECT.  This
-		 * allows concurrent O_DIRECT I/O but doesn't let
-		 * O_DIRECT with extending and buffered zeroing writes
-		 * race.  If they did race then the buffered zeroing
-		 * could be written back after the O_DIRECT I/O.  It's
-		 * one thing to tell people not to mix buffered and
-		 * O_DIRECT writes, but expecting them to understand
-		 * that file extension is also an implicit buffered
-		 * write is too much.  By getting the PR we force
-		 * writeback of the buffered zeroing before
-		 * proceeding.
-		 */
-		ret = ocfs2_data_lock(inode, 0);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto out;
-		}
-		ocfs2_data_unlock(inode, 0);
-	}
 	ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
 					    inode->i_sb->s_bdev, iov, offset,
 					    nr_segs, 
 					    ocfs2_direct_IO_get_blocks,
 					    ocfs2_dio_end_io);
-out:
 	mlog_exit(ret);
 	return ret;
 }
@@ -1754,7 +1787,7 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping,
 	struct buffer_head *di_bh = NULL;
 	struct inode *inode = mapping->host;
-	ret = ocfs2_meta_lock(inode, &di_bh, 1);
+	ret = ocfs2_inode_lock(inode, &di_bh, 1);
 	if (ret) {
 		mlog_errno(ret);
 		return ret;
@@ -1769,30 +1802,22 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping,
 	 */
 	down_write(&OCFS2_I(inode)->ip_alloc_sem);
-	ret = ocfs2_data_lock(inode, 1);
-	if (ret) {
-		mlog_errno(ret);
-		goto out_fail;
-	}
 	ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep,
 				       fsdata, di_bh, NULL);
 	if (ret) {
 		mlog_errno(ret);
-		goto out_fail_data;
+		goto out_fail;
 	}
 	brelse(di_bh);
 	return 0;
-out_fail_data:
-	ocfs2_data_unlock(inode, 1);
 out_fail:
 	up_write(&OCFS2_I(inode)->ip_alloc_sem);
 	brelse(di_bh);
-	ocfs2_meta_unlock(inode, 1);
+	ocfs2_inode_unlock(inode, 1);
 	return ret;
 }
@@ -1908,15 +1933,15 @@ static int ocfs2_write_end(struct file *file, struct address_space *mapping,
 	ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata);
-	ocfs2_data_unlock(inode, 1);
 	up_write(&OCFS2_I(inode)->ip_alloc_sem);
-	ocfs2_meta_unlock(inode, 1);
+	ocfs2_inode_unlock(inode, 1);
 	return ret;
 }
 const struct address_space_operations ocfs2_aops = {
 	.readpage	= ocfs2_readpage,
+	.readpages	= ocfs2_readpages,
 	.writepage	= ocfs2_writepage,
 	.write_begin	= ocfs2_write_begin,
 	.write_end	= ocfs2_write_end,

--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -79,7 +79,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
 		 * information for this bh as it's not marked locally
 		 * uptodate. */
 		ret = -EIO;
-		brelse(bh);
+		put_bh(bh);
 	}
 	mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
@@ -256,7 +256,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 				 * for this bh as it's not marked locally
 				 * uptodate. */
 				status = -EIO;
-				brelse(bh);
+				put_bh(bh);
 				bhs[i] = NULL;
 				continue;
 			}
@@ -280,3 +280,64 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 	mlog_exit(status);
 	return status;
 }
+/* Check whether the blkno is the super block or one of the backups. */
+static void ocfs2_check_super_or_backup(struct super_block *sb,
+					sector_t blkno)
+{
+	int i;
+	u64 backup_blkno;
+	if (blkno == OCFS2_SUPER_BLOCK_BLKNO)
+		return;
+	for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) {
+		backup_blkno = ocfs2_backup_super_blkno(sb, i);
+		if (backup_blkno == blkno)
+			return;
+	}
+	BUG();
+}
+/*
+ * Write super block and backups doesn't need to collaborate with journal,
+ * so we don't need to lock ip_io_mutex and inode doesn't need to bea passed
+ * into this function.
+ */
+int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
+				struct buffer_head *bh)
+{
+	int ret = 0;
+	mlog_entry_void();
+	BUG_ON(buffer_jbd(bh));
+	ocfs2_check_super_or_backup(osb->sb, bh->b_blocknr);
+	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) {
+		ret = -EROFS;
+		goto out;
+	}
+	lock_buffer(bh);
+	set_buffer_uptodate(bh);
+	/* remove from dirty list before I/O. */
+	clear_buffer_dirty(bh);
+	get_bh(bh); /* for end_buffer_write_sync() */
+	bh->b_end_io = end_buffer_write_sync;
+	submit_bh(WRITE, bh);
+	wait_on_buffer(bh);
+	if (!buffer_uptodate(bh)) {
+		ret = -EIO;
+		put_bh(bh);
+	}
+out:
+	mlog_exit(ret);
+	return ret;
+}
--- a/fs/ocfs2/buffer_head_io.h
+++ b/fs/ocfs2/buffer_head_io.h
@@ -47,6 +47,8 @@ int ocfs2_read_blocks(struct ocfs2_super          *osb,
 		      int                  flags,
 		      struct inode        *inode);
+int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
+				struct buffer_head *bh);
 #define OCFS2_BH_CACHED            1
 #define OCFS2_BH_READAHEAD         8

--- a/fs/ocfs2/cluster/heartbeat.h
+++ b/fs/ocfs2/cluster/heartbeat.h
@@ -35,7 +35,7 @@
 #define O2HB_LIVE_THRESHOLD	   2
 /* number of equal samples to be seen as dead */
 extern unsigned int o2hb_dead_threshold;
-#define O2HB_DEFAULT_DEAD_THRESHOLD	   7
+#define O2HB_DEFAULT_DEAD_THRESHOLD	   31
 /* Otherwise MAX_WRITE_TIMEOUT will be zero... */
 #define O2HB_MIN_DEAD_THRESHOLD	  2
 #define O2HB_MAX_WRITE_TIMEOUT_MS (O2HB_REGION_TIMEOUT_MS * (o2hb_dead_threshold - 1))

--- a/fs/ocfs2/cluster/tcp.h
+++ b/fs/ocfs2/cluster/tcp.h
@@ -60,8 +60,8 @@ typedef void (o2net_post_msg_handler_func)(int status, void *data,
 /* same as hb delay, we're waiting for another node to recognize our hb */
 #define O2NET_RECONNECT_DELAY_MS_DEFAULT	2000
-#define O2NET_KEEPALIVE_DELAY_MS_DEFAULT	5000
+#define O2NET_KEEPALIVE_DELAY_MS_DEFAULT	2000
-#define O2NET_IDLE_TIMEOUT_MS_DEFAULT		10000
+#define O2NET_IDLE_TIMEOUT_MS_DEFAULT		30000
 /* TODO: figure this out.... */

--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -38,6 +38,12 @@
 * locking semantics of the file system using the protocol.  It should 
 * be somewhere else, I'm sure, but right now it isn't.
 *
+ * New in version 10:
+ * 	- Meta/data locks combined
+ *
+ * New in version 9:
+ * 	- All votes removed
+ *
 * New in version 8:
 * 	- Replace delete inode votes with a cluster lock
 *
@@ -60,7 +66,7 @@
 * 	- full 64 bit i_size in the metadata lock lvbs
 * 	- introduction of "rw" lock and pushing meta/data locking down
 */
-#define O2NET_PROTOCOL_VERSION 8ULL
+#define O2NET_PROTOCOL_VERSION 10ULL
 struct o2net_handshake {
 	__be64	protocol_version;
 	__be64	connector_id;

--- a/fs/ocfs2/cluster/ver.c
+++ b/fs/ocfs2/cluster/ver.c
@@ -28,7 +28,7 @@
 #include "ver.h"
-#define CLUSTER_BUILD_VERSION "1.3.3"
+#define CLUSTER_BUILD_VERSION "1.5.0"
 #define VERSION_STR "OCFS2 Node Manager " CLUSTER_BUILD_VERSION

--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -128,9 +128,9 @@ static int ocfs2_match_dentry(struct dentry *dentry,
 /*
 * Walk the inode alias list, and find a dentry which has a given
 * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it
- * is looking for a dentry_lock reference. The vote thread is looking
+ * is looking for a dentry_lock reference. The downconvert thread is
- * to unhash aliases, so we allow it to skip any that already have
+ * looking to unhash aliases, so we allow it to skip any that already
- * that property.
+ * have that property.
 */
 struct dentry *ocfs2_find_local_alias(struct inode *inode,
 				      u64 parent_blkno,
@@ -266,7 +266,7 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry,
 	dl->dl_count = 0;
 	/*
 	 * Does this have to happen below, for all attaches, in case
-	 * the struct inode gets blown away by votes?
+	 * the struct inode gets blown away by the downconvert thread?
 	 */
 	dl->dl_inode = igrab(inode);
 	dl->dl_parent_blkno = parent_blkno;

--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -846,14 +846,14 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 	mlog_entry("dirino=%llu\n",
 		   (unsigned long long)OCFS2_I(inode)->ip_blkno);
-	error = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level);
+	error = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level);
 	if (lock_level && error >= 0) {
 		/* We release EX lock which used to update atime
 		 * and get PR lock again to reduce contention
 		 * on commonly accessed directories. */
-		ocfs2_meta_unlock(inode, 1);
+		ocfs2_inode_unlock(inode, 1);
 		lock_level = 0;
-		error = ocfs2_meta_lock(inode, NULL, 0);
+		error = ocfs2_inode_lock(inode, NULL, 0);
 	}
 	if (error < 0) {
 		if (error != -ENOENT)
@@ -865,7 +865,7 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 	error = ocfs2_dir_foreach_blk(inode, &filp->f_version, &filp->f_pos,
 				      dirent, filldir, NULL);
-	ocfs2_meta_unlock(inode, lock_level);
+	ocfs2_inode_unlock(inode, lock_level);
 bail_nolock:
 	mlog_exit(error);

--- a/fs/ocfs2/dlm/dlmfsver.c
+++ b/fs/ocfs2/dlm/dlmfsver.c
@@ -28,7 +28,7 @@
 #include "dlmfsver.h"
-#define DLM_BUILD_VERSION "1.3.3"
+#define DLM_BUILD_VERSION "1.5.0"
 #define VERSION_STR "OCFS2 DLMFS " DLM_BUILD_VERSION

--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2270,6 +2270,12 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx)
 		}
 	}
+	/* Clean up join state on node death. */
+	if (dlm->joining_node == idx) {
+		mlog(0, "Clearing join state for node %u\n", idx);
+		__dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN);
+	}
 	/* check to see if the node is already considered dead */
 	if (!test_bit(idx, dlm->live_nodes_map)) {
 		mlog(0, "for domain %s, node %d is already dead. "
@@ -2288,12 +2294,6 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx)
 	clear_bit(idx, dlm->live_nodes_map);
-	/* Clean up join state on node death. */
-	if (dlm->joining_node == idx) {
-		mlog(0, "Clearing join state for node %u\n", idx);
-		__dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN);
-	}
 	/* make sure local cleanup occurs before the heartbeat events */
 	if (!test_bit(idx, dlm->recovery_map))
 		dlm_do_local_recovery_cleanup(dlm, idx);
@@ -2321,6 +2321,13 @@ void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data)
 	if (!dlm_grab(dlm))
 		return;
+	/*
+	 * This will notify any dlm users that a node in our domain
+	 * went away without notifying us first.
+	 */
+	if (test_bit(idx, dlm->domain_map))
+		dlm_fire_domain_eviction_callbacks(dlm, idx);
 	spin_lock(&dlm->spinlock);
 	__dlm_hb_node_down(dlm, idx);
 	spin_unlock(&dlm->spinlock);

--- a/fs/ocfs2/dlm/dlmver.c
+++ b/fs/ocfs2/dlm/dlmver.c
@@ -28,7 +28,7 @@
 #include "dlmver.h"
-#define DLM_BUILD_VERSION "1.3.3"
+#define DLM_BUILD_VERSION "1.5.0"
 #define VERSION_STR "OCFS2 DLM " DLM_BUILD_VERSION

--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -49,12 +49,12 @@ struct ocfs2_meta_lvb {
 	__be32       lvb_reserved2;
 };
-/* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */
+/* ocfs2_inode_lock_full() 'arg_flags' flags */
 /* don't wait on recovery. */
 #define OCFS2_META_LOCK_RECOVERY	(0x01)
 /* Instruct the dlm not to queue ourselves on the other node. */
 #define OCFS2_META_LOCK_NOQUEUE		(0x02)
-/* don't block waiting for the vote thread, instead return -EAGAIN */
+/* don't block waiting for the downconvert thread, instead return -EAGAIN */
 #define OCFS2_LOCK_NONBLOCK		(0x04)
 int ocfs2_dlm_init(struct ocfs2_super *osb);
@@ -66,38 +66,32 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
 			       struct inode *inode);
 void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
 				u64 parent, struct inode *inode);
+struct ocfs2_file_private;
+void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
+			      struct ocfs2_file_private *fp);
 void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
 int ocfs2_create_new_inode_locks(struct inode *inode);
 int ocfs2_drop_inode_locks(struct inode *inode);
-int ocfs2_data_lock_full(struct inode *inode,
-			 int write,
-			 int arg_flags);
-#define ocfs2_data_lock(inode, write) ocfs2_data_lock_full(inode, write, 0)
-int ocfs2_data_lock_with_page(struct inode *inode,
-			      int write,
-			      struct page *page);
-void ocfs2_data_unlock(struct inode *inode,
-		       int write);
 int ocfs2_rw_lock(struct inode *inode, int write);
 void ocfs2_rw_unlock(struct inode *inode, int write);
 int ocfs2_open_lock(struct inode *inode);
 int ocfs2_try_open_lock(struct inode *inode, int write);
 void ocfs2_open_unlock(struct inode *inode);
-int ocfs2_meta_lock_atime(struct inode *inode,
+int ocfs2_inode_lock_atime(struct inode *inode,
 			  struct vfsmount *vfsmnt,
 			  int *level);
-int ocfs2_meta_lock_full(struct inode *inode,
+int ocfs2_inode_lock_full(struct inode *inode,
 			 struct buffer_head **ret_bh,
 			 int ex,
 			 int arg_flags);
-int ocfs2_meta_lock_with_page(struct inode *inode,
+int ocfs2_inode_lock_with_page(struct inode *inode,
 			      struct buffer_head **ret_bh,
 			      int ex,
 			      struct page *page);
 /* 99% of the time we don't want to supply any additional flags --
 * those are for very specific cases only. */
-#define ocfs2_meta_lock(i, b, e) ocfs2_meta_lock_full(i, b, e, 0)
+#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full(i, b, e, 0)
-void ocfs2_meta_unlock(struct inode *inode,
+void ocfs2_inode_unlock(struct inode *inode,
 		       int ex);
 int ocfs2_super_lock(struct ocfs2_super *osb,
 		     int ex);
@@ -107,14 +101,17 @@ int ocfs2_rename_lock(struct ocfs2_super *osb);
 void ocfs2_rename_unlock(struct ocfs2_super *osb);
 int ocfs2_dentry_lock(struct dentry *dentry, int ex);
 void ocfs2_dentry_unlock(struct dentry *dentry, int ex);
+int ocfs2_file_lock(struct file *file, int ex, int trylock);
+void ocfs2_file_unlock(struct file *file);
 void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
 void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
 			       struct ocfs2_lock_res *lockres);
-/* for the vote thread */
+/* for the downconvert thread */
 void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
 				struct ocfs2_lock_res *lockres);
+void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb);
 struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void);
 void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug);

--- a/fs/ocfs2/endian.h
+++ b/fs/ocfs2/endian.h
@@ -37,11 +37,6 @@ static inline void le64_add_cpu(__le64 *var, u64 val)
 	*var = cpu_to_le64(le64_to_cpu(*var) + val);
 }
-static inline void le32_and_cpu(__le32 *var, u32 val)
-{
-	*var = cpu_to_le32(le32_to_cpu(*var) & val);
-}
 static inline void be32_add_cpu(__be32 *var, u32 val)
 {
 	*var = cpu_to_be32(be32_to_cpu(*var) + val);

--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -58,7 +58,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
 		return ERR_PTR(-ESTALE);
 	}
-	inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0);
+	inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0, 0);
 	if (IS_ERR(inode))
 		return (void *)inode;
@@ -95,7 +95,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
 	mlog(0, "find parent of directory %llu\n",
 	     (unsigned long long)OCFS2_I(dir)->ip_blkno);
-	status = ocfs2_meta_lock(dir, NULL, 0);
+	status = ocfs2_inode_lock(dir, NULL, 0);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
@@ -109,7 +109,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
 		goto bail_unlock;
 	}
-	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0);
+	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0);
 	if (IS_ERR(inode)) {
 		mlog(ML_ERROR, "Unable to create inode %llu\n",
 		     (unsigned long long)blkno);
@@ -126,7 +126,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
 	parent->d_op = &ocfs2_dentry_ops;
 bail_unlock:
-	ocfs2_meta_unlock(dir, 0);
+	ocfs2_inode_unlock(dir, 0);
 bail:
 	mlog_exit_ptr(parent);

--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -32,6 +32,12 @@ extern const struct inode_operations ocfs2_file_iops;
 extern const struct inode_operations ocfs2_special_file_iops;
 struct ocfs2_alloc_context;
+struct ocfs2_file_private {
+	struct file		*fp_file;
+	struct mutex		fp_mutex;
+	struct ocfs2_lock_res	fp_flock;
+};
 enum ocfs2_alloc_restarted {
 	RESTART_NONE = 0,
 	RESTART_TRANS,

--- a/fs/ocfs2/heartbeat.c
+++ b/fs/ocfs2/heartbeat.c
@@ -30,9 +30,6 @@
 #include <linux/highmem.h>
 #include <linux/kmod.h>
-#include <cluster/heartbeat.h>
-#include <cluster/nodemanager.h>
 #include <dlm/dlmapi.h>
 #define MLOG_MASK_PREFIX ML_SUPER
@@ -44,13 +41,9 @@
 #include "heartbeat.h"
 #include "inode.h"
 #include "journal.h"
-#include "vote.h"
 #include "buffer_head_io.h"
-#define OCFS2_HB_NODE_DOWN_PRI     (0x0000002)
-#define OCFS2_HB_NODE_UP_PRI	   OCFS2_HB_NODE_DOWN_PRI
 static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map,
 					    int bit);
 static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map,
@@ -64,9 +57,7 @@ static void __ocfs2_node_map_set(struct ocfs2_node_map *target,
 void ocfs2_init_node_maps(struct ocfs2_super *osb)
 {
 	spin_lock_init(&osb->node_map_lock);
-	ocfs2_node_map_init(&osb->mounted_map);
 	ocfs2_node_map_init(&osb->recovery_map);
-	ocfs2_node_map_init(&osb->umount_map);
 	ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs);
 }
@@ -87,24 +78,7 @@ static void ocfs2_do_node_down(int node_num,
 		return;
 	}
-	if (ocfs2_node_map_test_bit(osb, &osb->umount_map, node_num)) {
-		/* If a node is in the umount map, then we've been
-		 * expecting him to go down and we know ahead of time
-		 * that recovery is not necessary. */
-		ocfs2_node_map_clear_bit(osb, &osb->umount_map, node_num);
-		return;
-	}
 	ocfs2_recovery_thread(osb, node_num);
-	ocfs2_remove_node_from_vote_queues(osb, node_num);
-}
-static void ocfs2_hb_node_down_cb(struct o2nm_node *node,
-				  int node_num,
-				  void *data)
-{
-	ocfs2_do_node_down(node_num, (struct ocfs2_super *) data);
 }
 /* Called from the dlm when it's about to evict a node. We may also
@@ -121,27 +95,8 @@ static void ocfs2_dlm_eviction_cb(int node_num,
 	ocfs2_do_node_down(node_num, osb);
 }
-static void ocfs2_hb_node_up_cb(struct o2nm_node *node,
-				int node_num,
-				void *data)
-{
-	struct ocfs2_super *osb = data;
-	BUG_ON(osb->node_num == node_num);
-	mlog(0, "node up event for %d\n", node_num);
-	ocfs2_node_map_clear_bit(osb, &osb->umount_map, node_num);
-}
 void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb)
 {
-	o2hb_setup_callback(&osb->osb_hb_down, O2HB_NODE_DOWN_CB,
-			    ocfs2_hb_node_down_cb, osb,
-			    OCFS2_HB_NODE_DOWN_PRI);
-	o2hb_setup_callback(&osb->osb_hb_up, O2HB_NODE_UP_CB,
-			    ocfs2_hb_node_up_cb, osb, OCFS2_HB_NODE_UP_PRI);
 	/* Not exactly a heartbeat callback, but leads to essentially
 	 * the same path so we set it up here. */
 	dlm_setup_eviction_cb(&osb->osb_eviction_cb,
@@ -149,39 +104,6 @@ void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb)
 			      osb);
 }
-/* Most functions here are just stubs for now... */
-int ocfs2_register_hb_callbacks(struct ocfs2_super *osb)
-{
-	int status;
-	if (ocfs2_mount_local(osb))
-		return 0;
-	status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_down);
-	if (status < 0) {
-		mlog_errno(status);
-		goto bail;
-	}
-	status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_up);
-	if (status < 0) {
-		mlog_errno(status);
-		o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down);
-	}
-bail:
-	return status;
-}
-void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb)
-{
-	if (ocfs2_mount_local(osb))
-		return;
-	o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down);
-	o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_up);
-}
 void ocfs2_stop_heartbeat(struct ocfs2_super *osb)
 {
 	int ret;
@@ -341,8 +263,6 @@ int ocfs2_recovery_map_set(struct ocfs2_super *osb,
 	spin_lock(&osb->node_map_lock);
-	__ocfs2_node_map_clear_bit(&osb->mounted_map, num);
 	if (!test_bit(num, osb->recovery_map.map)) {
 	    __ocfs2_node_map_set_bit(&osb->recovery_map, num);
 	    set = 1;

--- a/fs/ocfs2/heartbeat.h
+++ b/fs/ocfs2/heartbeat.h
@@ -29,8 +29,6 @@
 void ocfs2_init_node_maps(struct ocfs2_super *osb);
 void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb);
-int ocfs2_register_hb_callbacks(struct ocfs2_super *osb);
-void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb);
 void ocfs2_stop_heartbeat(struct ocfs2_super *osb);
 /* node map functions - used to keep track of mounted and in-recovery

--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -34,8 +34,7 @@ struct ocfs2_inode_info
 	u64			ip_blkno;
 	struct ocfs2_lock_res		ip_rw_lockres;
-	struct ocfs2_lock_res		ip_meta_lockres;
+	struct ocfs2_lock_res		ip_inode_lockres;
-	struct ocfs2_lock_res		ip_data_lockres;
 	struct ocfs2_lock_res		ip_open_lockres;
 	/* protects allocation changes on this inode. */
@@ -121,9 +120,10 @@ void ocfs2_delete_inode(struct inode *inode);
 void ocfs2_drop_inode(struct inode *inode);
 /* Flags for ocfs2_iget() */
-#define OCFS2_FI_FLAG_SYSFILE		0x4
+#define OCFS2_FI_FLAG_SYSFILE		0x1
-#define OCFS2_FI_FLAG_ORPHAN_RECOVERY	0x8
+#define OCFS2_FI_FLAG_ORPHAN_RECOVERY	0x2
-struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, int flags);
+struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags,
+			 int sysfile_type);
 int ocfs2_inode_init_private(struct inode *inode);
 int ocfs2_inode_revalidate(struct dentry *dentry);
 int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,

--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -20,6 +20,7 @@
 #include "ocfs2_fs.h"
 #include "ioctl.h"
+#include "resize.h"
 #include <linux/ext2_fs.h>
@@ -27,14 +28,14 @@ static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags)
 {
 	int status;
-	status = ocfs2_meta_lock(inode, NULL, 0);
+	status = ocfs2_inode_lock(inode, NULL, 0);
 	if (status < 0) {
 		mlog_errno(status);
 		return status;
 	}
 	ocfs2_get_inode_flags(OCFS2_I(inode));
 	*flags = OCFS2_I(inode)->ip_attr;
-	ocfs2_meta_unlock(inode, 0);
+	ocfs2_inode_unlock(inode, 0);
 	mlog_exit(status);
 	return status;
@@ -52,7 +53,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
 	mutex_lock(&inode->i_mutex);
-	status = ocfs2_meta_lock(inode, &bh, 1);
+	status = ocfs2_inode_lock(inode, &bh, 1);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
@@ -100,7 +101,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
 	ocfs2_commit_trans(osb, handle);
 bail_unlock:
-	ocfs2_meta_unlock(inode, 1);
+	ocfs2_inode_unlock(inode, 1);
 bail:
 	mutex_unlock(&inode->i_mutex);
@@ -115,8 +116,10 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp,
 	unsigned int cmd, unsigned long arg)
 {
 	unsigned int flags;
+	int new_clusters;
 	int status;
 	struct ocfs2_space_resv sr;
+	struct ocfs2_new_group_input input;
 	switch (cmd) {
 	case OCFS2_IOC_GETFLAGS:
@@ -140,6 +143,23 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp,
 			return -EFAULT;
 		return ocfs2_change_file_space(filp, cmd, &sr);
+	case OCFS2_IOC_GROUP_EXTEND:
+		if (!capable(CAP_SYS_RESOURCE))
+			return -EPERM;
+		if (get_user(new_clusters, (int __user *)arg))
+			return -EFAULT;
+		return ocfs2_group_extend(inode, new_clusters);
+	case OCFS2_IOC_GROUP_ADD:
+	case OCFS2_IOC_GROUP_ADD64:
+		if (!capable(CAP_SYS_RESOURCE))
+			return -EPERM;
+		if (copy_from_user(&input, (int __user *) arg, sizeof(input)))
+			return -EFAULT;
+		return ocfs2_group_add(inode, &input);
 	default:
 		return -ENOTTY;
 	}
@@ -162,6 +182,9 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 	case OCFS2_IOC_RESVSP64:
 	case OCFS2_IOC_UNRESVSP:
 	case OCFS2_IOC_UNRESVSP64:
+	case OCFS2_IOC_GROUP_EXTEND:
+	case OCFS2_IOC_GROUP_ADD:
+	case OCFS2_IOC_GROUP_ADD64:
 		break;
 	default:
 		return -ENOIOCTLCMD;

--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -44,7 +44,6 @@
 #include "localalloc.h"
 #include "slot_map.h"
 #include "super.h"
-#include "vote.h"
 #include "sysfile.h"
 #include "buffer_head_io.h"
@@ -103,7 +102,7 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb)
 	mlog(0, "commit_thread: flushed transaction %lu (%u handles)\n",
 	     journal->j_trans_id, flushed);
-	ocfs2_kick_vote_thread(osb);
+	ocfs2_wake_downconvert_thread(osb);
 	wake_up(&journal->j_checkpointed);
 finally:
 	mlog_exit(status);
@@ -314,14 +313,18 @@ int ocfs2_journal_dirty_data(handle_t *handle,
 	return err;
 }
-#define OCFS2_DEFAULT_COMMIT_INTERVAL 	(HZ * 5)
+#define OCFS2_DEFAULT_COMMIT_INTERVAL 	(HZ * JBD_DEFAULT_MAX_COMMIT_AGE)
 void ocfs2_set_journal_params(struct ocfs2_super *osb)
 {
 	journal_t *journal = osb->journal->j_journal;
+	unsigned long commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL;
+	if (osb->osb_commit_interval)
+		commit_interval = osb->osb_commit_interval;
 	spin_lock(&journal->j_state_lock);
-	journal->j_commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL;
+	journal->j_commit_interval = commit_interval;
 	if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
 		journal->j_flags |= JFS_BARRIER;
 	else
@@ -337,7 +340,7 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
 	struct ocfs2_dinode *di = NULL;
 	struct buffer_head *bh = NULL;
 	struct ocfs2_super *osb;
-	int meta_lock = 0;
+	int inode_lock = 0;
 	mlog_entry_void();
@@ -367,14 +370,14 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
 	/* Skip recovery waits here - journal inode metadata never
 	 * changes in a live cluster so it can be considered an
 	 * exception to the rule. */
-	status = ocfs2_meta_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
+	status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
 	if (status < 0) {
 		if (status != -ERESTARTSYS)
 			mlog(ML_ERROR, "Could not get lock on journal!\n");
 		goto done;
 	}
-	meta_lock = 1;
+	inode_lock = 1;
 	di = (struct ocfs2_dinode *)bh->b_data;
 	if (inode->i_size <  OCFS2_MIN_JOURNAL_SIZE) {
@@ -414,8 +417,8 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
 	status = 0;
 done:
 	if (status < 0) {
-		if (meta_lock)
+		if (inode_lock)
-			ocfs2_meta_unlock(inode, 1);
+			ocfs2_inode_unlock(inode, 1);
 		if (bh != NULL)
 			brelse(bh);
 		if (inode) {
@@ -544,7 +547,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
 	OCFS2_I(inode)->ip_open_count--;
 	/* unlock our journal */
-	ocfs2_meta_unlock(inode, 1);
+	ocfs2_inode_unlock(inode, 1);
 	brelse(journal->j_bh);
 	journal->j_bh = NULL;
@@ -883,8 +886,8 @@ static int __ocfs2_recovery_thread(void *arg)
 	ocfs2_super_unlock(osb, 1);
 	/* We always run recovery on our own orphan dir - the dead
-	 * node(s) may have voted "no" on an inode delete earlier. A
+	 * node(s) may have disallowd a previos inode delete. Re-processing
-	 * revote is therefore required. */
+	 * is therefore required. */
 	ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
 					NULL);
@@ -973,9 +976,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
 	}
 	SET_INODE_JOURNAL(inode);
-	status = ocfs2_meta_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
+	status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
 	if (status < 0) {
-		mlog(0, "status returned from ocfs2_meta_lock=%d\n", status);
+		mlog(0, "status returned from ocfs2_inode_lock=%d\n", status);
 		if (status != -ERESTARTSYS)
 			mlog(ML_ERROR, "Could not lock journal!\n");
 		goto done;
@@ -1047,7 +1050,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
 done:
 	/* drop the lock on this nodes journal */
 	if (got_lock)
-		ocfs2_meta_unlock(inode, 1);
+		ocfs2_inode_unlock(inode, 1);
 	if (inode)
 		iput(inode);
@@ -1162,14 +1165,14 @@ static int ocfs2_trylock_journal(struct ocfs2_super *osb,
 	SET_INODE_JOURNAL(inode);
 	flags = OCFS2_META_LOCK_RECOVERY | OCFS2_META_LOCK_NOQUEUE;
-	status = ocfs2_meta_lock_full(inode, NULL, 1, flags);
+	status = ocfs2_inode_lock_full(inode, NULL, 1, flags);
 	if (status < 0) {
 		if (status != -EAGAIN)
 			mlog_errno(status);
 		goto bail;
 	}
-	ocfs2_meta_unlock(inode, 1);
+	ocfs2_inode_unlock(inode, 1);
 bail:
 	if (inode)
 		iput(inode);
@@ -1241,7 +1244,7 @@ static int ocfs2_orphan_filldir(void *priv, const char *name, int name_len,
 	/* Skip bad inodes so that recovery can continue */
 	iter = ocfs2_iget(p->osb, ino,
-			  OCFS2_FI_FLAG_ORPHAN_RECOVERY);
+			  OCFS2_FI_FLAG_ORPHAN_RECOVERY, 0);
 	if (IS_ERR(iter))
 		return 0;
@@ -1277,7 +1280,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
 	}	
 	mutex_lock(&orphan_dir_inode->i_mutex);
-	status = ocfs2_meta_lock(orphan_dir_inode, NULL, 0);
+	status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out;
@@ -1293,7 +1296,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
 	*head = priv.head;
 out_cluster:
-	ocfs2_meta_unlock(orphan_dir_inode, 0);
+	ocfs2_inode_unlock(orphan_dir_inode, 0);
 out:
 	mutex_unlock(&orphan_dir_inode->i_mutex);
 	iput(orphan_dir_inode);
@@ -1380,10 +1383,10 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
 		iter = oi->ip_next_orphan;
 		spin_lock(&oi->ip_lock);
-		/* Delete voting may have set these on the assumption
+		/* The remote delete code may have set these on the
-		 * that the other node would wipe them successfully.
+		 * assumption that the other node would wipe them
-		 * If they are still in the node's orphan dir, we need
+		 * successfully.  If they are still in the node's
-		 * to reset that state. */
+		 * orphan dir, we need to reset that state. */
 		oi->ip_flags &= ~(OCFS2_INODE_DELETED|OCFS2_INODE_SKIP_DELETE);
 		/* Set the proper information to get us going into

--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -278,6 +278,12 @@ int                  ocfs2_journal_dirty_data(handle_t *handle,
 /* simple file updates like chmod, etc. */
 #define OCFS2_INODE_UPDATE_CREDITS 1
+/* group extend. inode update and last group update. */
+#define OCFS2_GROUP_EXTEND_CREDITS	(OCFS2_INODE_UPDATE_CREDITS + 1)
+/* group add. inode update and the new group update. */
+#define OCFS2_GROUP_ADD_CREDITS	(OCFS2_INODE_UPDATE_CREDITS + 1)
 /* get one bit out of a suballocator: dinode + group descriptor +
 * prev. group desc. if we relink. */
 #define OCFS2_SUBALLOC_ALLOC (3)

--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -75,18 +75,12 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
 					  struct inode *local_alloc_inode);
-/*
- * Determine how large our local alloc window should be, in bits.
- *
- * These values (and the behavior in ocfs2_alloc_should_use_local) have
- * been chosen so that most allocations, including new block groups go
- * through local alloc.
- */
 static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb)
 {
-	BUG_ON(osb->s_clustersize_bits < 12);
+	BUG_ON(osb->s_clustersize_bits > 20);
-	return 2048 >> (osb->s_clustersize_bits - 12);
+	/* Size local alloc windows by the megabyte */
+	return osb->local_alloc_size << (20 - osb->s_clustersize_bits);
 }
 /*
@@ -96,18 +90,23 @@ static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb)
 int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
 {
 	int la_bits = ocfs2_local_alloc_window_bits(osb);
+	int ret = 0;
 	if (osb->local_alloc_state != OCFS2_LA_ENABLED)
-		return 0;
+		goto bail;
 	/* la_bits should be at least twice the size (in clusters) of
 	 * a new block group. We want to be sure block group
 	 * allocations go through the local alloc, so allow an
 	 * allocation to take up to half the bitmap. */
 	if (bits > (la_bits / 2))
-		return 0;
+		goto bail;
-	return 1;
+	ret = 1;
+bail:
+	mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n",
+	     osb->local_alloc_state, (unsigned long long)bits, la_bits, ret);
+	return ret;
 }
 int ocfs2_load_local_alloc(struct ocfs2_super *osb)
@@ -121,6 +120,19 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
 	mlog_entry_void();
+	if (ocfs2_mount_local(osb))
+		goto bail;
+	if (osb->local_alloc_size == 0)
+		goto bail;
+	if (ocfs2_local_alloc_window_bits(osb) >= osb->bitmap_cpg) {
+		mlog(ML_NOTICE, "Requested local alloc window %d is larger "
+		     "than max possible %u. Using defaults.\n",
+		     ocfs2_local_alloc_window_bits(osb), (osb->bitmap_cpg - 1));
+		osb->local_alloc_size = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE;
+	}
 	/* read the alloc off disk */
 	inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE,
 					    osb->slot_num);
@@ -181,6 +193,9 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
 	if (inode)
 		iput(inode);
+	mlog(0, "Local alloc window bits = %d\n",
+	     ocfs2_local_alloc_window_bits(osb));
 	mlog_exit(status);
 	return status;
 }
@@ -231,7 +246,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
 	mutex_lock(&main_bm_inode->i_mutex);
-	status = ocfs2_meta_lock(main_bm_inode, &main_bm_bh, 1);
+	status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out_mutex;
@@ -286,7 +301,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
 	if (main_bm_bh)
 		brelse(main_bm_bh);
-	ocfs2_meta_unlock(main_bm_inode, 1);
+	ocfs2_inode_unlock(main_bm_inode, 1);
 out_mutex:
 	mutex_unlock(&main_bm_inode->i_mutex);
@@ -399,7 +414,7 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
 	mutex_lock(&main_bm_inode->i_mutex);
-	status = ocfs2_meta_lock(main_bm_inode, &main_bm_bh, 1);
+	status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
 	if (status < 0) {
 		mlog_errno(status);
 		goto out_mutex;
@@ -424,7 +439,7 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
 	ocfs2_commit_trans(osb, handle);
 out_unlock:
-	ocfs2_meta_unlock(main_bm_inode, 1);
+	ocfs2_inode_unlock(main_bm_inode, 1);
 out_mutex:
 	mutex_unlock(&main_bm_inode->i_mutex);
@@ -521,6 +536,9 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
 		iput(local_alloc_inode);
 	}
+	mlog(0, "bits=%d, slot=%d, ret=%d\n", bits_wanted, osb->slot_num,
+	     status);
 	mlog_exit(status);
 	return status;
 }

--- a/fs/ocfs2/locks.c
+++ b/fs/ocfs2/locks.c
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * locks.c
+ *
+ * Userspace file locking support
+ *
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#include <linux/fs.h>
+#define MLOG_MASK_PREFIX ML_INODE
+#include <cluster/masklog.h>
+#include "ocfs2.h"
+#include "dlmglue.h"
+#include "file.h"
+#include "locks.h"
+static int ocfs2_do_flock(struct file *file, struct inode *inode,
+			  int cmd, struct file_lock *fl)
+{
+	int ret = 0, level = 0, trylock = 0;
+	struct ocfs2_file_private *fp = file->private_data;
+	struct ocfs2_lock_res *lockres = &fp->fp_flock;
+	if (fl->fl_type == F_WRLCK)
+		level = 1;
+	if (!IS_SETLKW(cmd))
+		trylock = 1;
+	mutex_lock(&fp->fp_mutex);
+	if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
+	    lockres->l_level > LKM_NLMODE) {
+		int old_level = 0;
+		if (lockres->l_level == LKM_EXMODE)
+			old_level = 1;
+		if (level == old_level)
+			goto out;
+		/*
+		 * Converting an existing lock is not guaranteed to be
+		 * atomic, so we can get away with simply unlocking
+		 * here and allowing the lock code to try at the new
+		 * level.
+		 */
+		flock_lock_file_wait(file,
+				     &(struct file_lock){.fl_type = F_UNLCK});
+		ocfs2_file_unlock(file);
+	}
+	ret = ocfs2_file_lock(file, level, trylock);
+	if (ret) {
+		if (ret == -EAGAIN && trylock)
+			ret = -EWOULDBLOCK;
+		else
+			mlog_errno(ret);
+		goto out;
+	}
+	ret = flock_lock_file_wait(file, fl);
+out:
+	mutex_unlock(&fp->fp_mutex);
+	return ret;
+}
+static int ocfs2_do_funlock(struct file *file, int cmd, struct file_lock *fl)
+{
+	int ret;
+	struct ocfs2_file_private *fp = file->private_data;
+	mutex_lock(&fp->fp_mutex);
+	ocfs2_file_unlock(file);
+	ret = flock_lock_file_wait(file, fl);
+	mutex_unlock(&fp->fp_mutex);
+	return ret;
+}
+/*
+ * Overall flow of ocfs2_flock() was influenced by gfs2_flock().
+ */
+int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl)
+{
+	struct inode *inode = file->f_mapping->host;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	if (!(fl->fl_flags & FL_FLOCK))
+		return -ENOLCK;
+	if (__mandatory_lock(inode))
+		return -ENOLCK;
+	if ((osb->s_mount_opt & OCFS2_MOUNT_LOCALFLOCKS) ||
+	    ocfs2_mount_local(osb))
+		return flock_lock_file_wait(file, fl);
+	if (fl->fl_type == F_UNLCK)
+		return ocfs2_do_funlock(file, cmd, fl);
+	else
+		return ocfs2_do_flock(file, inode, cmd, fl);
+}
--- a/fs/ocfs2/vote.h
+++ b/fs/ocfs2/vote.h
 /* -*- mode: c; c-basic-offset: 8; -*-
 * vim: noexpandtab sw=8 ts=8 sts=0:
 *
- * vote.h
+ * locks.h
 *
- * description here
+ * Function prototypes for Userspace file locking support
 *
 * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
 *
@@ -23,26 +23,9 @@
 * Boston, MA 021110-1307, USA.
 */
+#ifndef OCFS2_LOCKS_H
+#define OCFS2_LOCKS_H
-#ifndef VOTE_H
+int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl);
-#define VOTE_H
-int ocfs2_vote_thread(void *arg);
+#endif /* OCFS2_LOCKS_H */
-static inline void ocfs2_kick_vote_thread(struct ocfs2_super *osb)
-{
-	spin_lock(&osb->vote_task_lock);
-	/* make sure the voting thread gets a swipe at whatever changes
-	 * the caller may have made to the voting state */
-	osb->vote_wake_sequence++;
-	spin_unlock(&osb->vote_task_lock);
-	wake_up(&osb->vote_event);
-}
-int ocfs2_request_mount_vote(struct ocfs2_super *osb);
-int ocfs2_request_umount_vote(struct ocfs2_super *osb);
-int ocfs2_register_net_handlers(struct ocfs2_super *osb);
-void ocfs2_unregister_net_handlers(struct ocfs2_super *osb);
-void ocfs2_remove_node_from_vote_queues(struct ocfs2_super *osb,
-					int node_num);
-#endif
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -168,7 +168,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
 	 * node. Taking the data lock will also ensure that we don't
 	 * attempt page truncation as part of a downconvert.
 	 */
-	ret = ocfs2_meta_lock(inode, &di_bh, 1);
+	ret = ocfs2_inode_lock(inode, &di_bh, 1);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out;
@@ -181,21 +181,12 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
 	 */
 	down_write(&OCFS2_I(inode)->ip_alloc_sem);
-	ret = ocfs2_data_lock(inode, 1);
-	if (ret < 0) {
-		mlog_errno(ret);
-		goto out_meta_unlock;
-	}
 	ret = __ocfs2_page_mkwrite(inode, di_bh, page);
-	ocfs2_data_unlock(inode, 1);
-out_meta_unlock:
 	up_write(&OCFS2_I(inode)->ip_alloc_sem);
 	brelse(di_bh);
-	ocfs2_meta_unlock(inode, 1);
+	ocfs2_inode_unlock(inode, 1);
 out:
 	ret2 = ocfs2_vm_op_unblock_sigs(&oldset);
@@ -214,13 +205,13 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	int ret = 0, lock_level = 0;
-	ret = ocfs2_meta_lock_atime(file->f_dentry->d_inode,
+	ret = ocfs2_inode_lock_atime(file->f_dentry->d_inode,
 				    file->f_vfsmnt, &lock_level);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out;
 	}
-	ocfs2_meta_unlock(file->f_dentry->d_inode, lock_level);
+	ocfs2_inode_unlock(file->f_dentry->d_inode, lock_level);
 out:
 	vma->vm_ops = &ocfs2_file_vm_ops;
 	vma->vm_flags |= VM_CAN_NONLINEAR;

--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -60,7 +60,6 @@
 #include "symlink.h"
 #include "sysfile.h"
 #include "uptodate.h"
-#include "vote.h"
 #include "buffer_head_io.h"
@@ -116,7 +115,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
 	mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len,
 	     dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno);
-	status = ocfs2_meta_lock(dir, NULL, 0);
+	status = ocfs2_inode_lock(dir, NULL, 0);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
@@ -129,7 +128,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
 	if (status < 0)
 		goto bail_add;
-	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0);
+	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0);
 	if (IS_ERR(inode)) {
 		ret = ERR_PTR(-EACCES);
 		goto bail_unlock;
@@ -176,8 +175,8 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
 	/* Don't drop the cluster lock until *after* the d_add --
 	 * unlink on another node will message us to remove that
 	 * dentry under this lock so otherwise we can race this with
-	 * the vote thread and have a stale dentry. */
+	 * the downconvert thread and have a stale dentry. */
-	ocfs2_meta_unlock(dir, 0);
+	ocfs2_inode_unlock(dir, 0);
 bail:
@@ -209,7 +208,7 @@ static int ocfs2_mknod(struct inode *dir,
 	/* get our super block */
 	osb = OCFS2_SB(dir->i_sb);
-	status = ocfs2_meta_lock(dir, &parent_fe_bh, 1);
+	status = ocfs2_inode_lock(dir, &parent_fe_bh, 1);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
@@ -323,7 +322,7 @@ static int ocfs2_mknod(struct inode *dir,
 	if (handle)
 		ocfs2_commit_trans(osb, handle);
-	ocfs2_meta_unlock(dir, 1);
+	ocfs2_inode_unlock(dir, 1);
 	if (status == -ENOSPC)
 		mlog(0, "Disk is full\n");
@@ -553,7 +552,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 	if (S_ISDIR(inode->i_mode))
 		return -EPERM;
-	err = ocfs2_meta_lock(dir, &parent_fe_bh, 1);
+	err = ocfs2_inode_lock(dir, &parent_fe_bh, 1);
 	if (err < 0) {
 		if (err != -ENOENT)
 			mlog_errno(err);
@@ -578,7 +577,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 		goto out;
 	}
-	err = ocfs2_meta_lock(inode, &fe_bh, 1);
+	err = ocfs2_inode_lock(inode, &fe_bh, 1);
 	if (err < 0) {
 		if (err != -ENOENT)
 			mlog_errno(err);
@@ -643,10 +642,10 @@ static int ocfs2_link(struct dentry *old_dentry,
 out_commit:
 	ocfs2_commit_trans(osb, handle);
 out_unlock_inode:
-	ocfs2_meta_unlock(inode, 1);
+	ocfs2_inode_unlock(inode, 1);
 out:
-	ocfs2_meta_unlock(dir, 1);
+	ocfs2_inode_unlock(dir, 1);
 	if (de_bh)
 		brelse(de_bh);
@@ -720,7 +719,7 @@ static int ocfs2_unlink(struct inode *dir,
 		return -EPERM;
 	}
-	status = ocfs2_meta_lock(dir, &parent_node_bh, 1);
+	status = ocfs2_inode_lock(dir, &parent_node_bh, 1);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
@@ -745,7 +744,7 @@ static int ocfs2_unlink(struct inode *dir,
 		goto leave;
 	}
-	status = ocfs2_meta_lock(inode, &fe_bh, 1);
+	status = ocfs2_inode_lock(inode, &fe_bh, 1);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
@@ -765,7 +764,7 @@ static int ocfs2_unlink(struct inode *dir,
 	status = ocfs2_remote_dentry_delete(dentry);
 	if (status < 0) {
-		/* This vote should succeed under all normal
+		/* This remote delete should succeed under all normal
 		 * circumstances. */
 		mlog_errno(status);
 		goto leave;
@@ -841,13 +840,13 @@ static int ocfs2_unlink(struct inode *dir,
 		ocfs2_commit_trans(osb, handle);
 	if (child_locked)
-		ocfs2_meta_unlock(inode, 1);
+		ocfs2_inode_unlock(inode, 1);
-	ocfs2_meta_unlock(dir, 1);
+	ocfs2_inode_unlock(dir, 1);
 	if (orphan_dir) {
 		/* This was locked for us in ocfs2_prepare_orphan_dir() */
-		ocfs2_meta_unlock(orphan_dir, 1);
+		ocfs2_inode_unlock(orphan_dir, 1);
 		mutex_unlock(&orphan_dir->i_mutex);
 		iput(orphan_dir);
 	}
@@ -908,7 +907,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
 			inode1 = tmpinode;
 		}
 		/* lock id2 */
-		status = ocfs2_meta_lock(inode2, bh2, 1);
+		status = ocfs2_inode_lock(inode2, bh2, 1);
 		if (status < 0) {
 			if (status != -ENOENT)
 				mlog_errno(status);
@@ -917,14 +916,14 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
 	}
 	/* lock id1 */
-	status = ocfs2_meta_lock(inode1, bh1, 1);
+	status = ocfs2_inode_lock(inode1, bh1, 1);
 	if (status < 0) {
 		/*
 		 * An error return must mean that no cluster locks
 		 * were held on function exit.
 		 */
 		if (oi1->ip_blkno != oi2->ip_blkno)
-			ocfs2_meta_unlock(inode2, 1);
+			ocfs2_inode_unlock(inode2, 1);
 		if (status != -ENOENT)
 			mlog_errno(status);
@@ -937,10 +936,10 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
 static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2)
 {
-	ocfs2_meta_unlock(inode1, 1);
+	ocfs2_inode_unlock(inode1, 1);
 	if (inode1 != inode2)
-		ocfs2_meta_unlock(inode2, 1);
+		ocfs2_inode_unlock(inode2, 1);
 }
 static int ocfs2_rename(struct inode *old_dir,
@@ -1031,10 +1030,11 @@ static int ocfs2_rename(struct inode *old_dir,
 	/*
 	 * Aside from allowing a meta data update, the locking here
-	 * also ensures that the vote thread on other nodes won't have
+	 * also ensures that the downconvert thread on other nodes
-	 * to concurrently downconvert the inode and the dentry locks.
+	 * won't have to concurrently downconvert the inode and the
+	 * dentry locks.
 	 */
-	status = ocfs2_meta_lock(old_inode, &old_inode_bh, 1);
+	status = ocfs2_inode_lock(old_inode, &old_inode_bh, 1);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
@@ -1143,7 +1143,7 @@ static int ocfs2_rename(struct inode *old_dir,
 			goto bail;
 		}
-		status = ocfs2_meta_lock(new_inode, &newfe_bh, 1);
+		status = ocfs2_inode_lock(new_inode, &newfe_bh, 1);
 		if (status < 0) {
 			if (status != -ENOENT)
 				mlog_errno(status);
@@ -1355,14 +1355,14 @@ static int ocfs2_rename(struct inode *old_dir,
 		ocfs2_double_unlock(old_dir, new_dir);
 	if (old_child_locked)
-		ocfs2_meta_unlock(old_inode, 1);
+		ocfs2_inode_unlock(old_inode, 1);
 	if (new_child_locked)
-		ocfs2_meta_unlock(new_inode, 1);
+		ocfs2_inode_unlock(new_inode, 1);
 	if (orphan_dir) {
 		/* This was locked for us in ocfs2_prepare_orphan_dir() */
-		ocfs2_meta_unlock(orphan_dir, 1);
+		ocfs2_inode_unlock(orphan_dir, 1);
 		mutex_unlock(&orphan_dir->i_mutex);
 		iput(orphan_dir);
 	}
@@ -1530,7 +1530,7 @@ static int ocfs2_symlink(struct inode *dir,
 	credits = ocfs2_calc_symlink_credits(sb);
 	/* lock the parent directory */
-	status = ocfs2_meta_lock(dir, &parent_fe_bh, 1);
+	status = ocfs2_inode_lock(dir, &parent_fe_bh, 1);
 	if (status < 0) {
 		if (status != -ENOENT)
 			mlog_errno(status);
@@ -1657,7 +1657,7 @@ static int ocfs2_symlink(struct inode *dir,
 	if (handle)
 		ocfs2_commit_trans(osb, handle);
-	ocfs2_meta_unlock(dir, 1);
+	ocfs2_inode_unlock(dir, 1);
 	if (new_fe_bh)
 		brelse(new_fe_bh);
@@ -1735,7 +1735,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
 	mutex_lock(&orphan_dir_inode->i_mutex);
-	status = ocfs2_meta_lock(orphan_dir_inode, &orphan_dir_bh, 1);
+	status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
 	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
@@ -1745,7 +1745,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
 					      orphan_dir_bh, name,
 					      OCFS2_ORPHAN_NAMELEN, de_bh);
 	if (status < 0) {
-		ocfs2_meta_unlock(orphan_dir_inode, 1);
+		ocfs2_inode_unlock(orphan_dir_inode, 1);
 		mlog_errno(status);
 		goto leave;

--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -101,6 +101,7 @@ enum ocfs2_unlock_action {
 					       * about to be
 					       * dropped. */
 #define OCFS2_LOCK_QUEUED        (0x00000100) /* queued for downconvert */
+#define OCFS2_LOCK_NOCACHE       (0x00000200) /* don't use a holder count */
 struct ocfs2_lock_res_ops;
@@ -170,6 +171,7 @@ enum ocfs2_mount_options
 	OCFS2_MOUNT_NOINTR  = 1 << 2,   /* Don't catch signals */
 	OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */
 	OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */
+	OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */
 };
 #define OCFS2_OSB_SOFT_RO	0x0001
@@ -189,9 +191,7 @@ struct ocfs2_super
 	struct ocfs2_slot_info *slot_info;
 	spinlock_t node_map_lock;
-	struct ocfs2_node_map mounted_map;
 	struct ocfs2_node_map recovery_map;
-	struct ocfs2_node_map umount_map;
 	u64 root_blkno;
 	u64 system_dir_blkno;
@@ -231,7 +231,9 @@ struct ocfs2_super
 	wait_queue_head_t checkpoint_event;
 	atomic_t needs_checkpoint;
 	struct ocfs2_journal *journal;
+	unsigned long osb_commit_interval;
+	int local_alloc_size;
 	enum ocfs2_local_alloc_state local_alloc_state;
 	struct buffer_head *local_alloc_bh;
 	u64 la_last_gd;
@@ -254,28 +256,21 @@ struct ocfs2_super
 	wait_queue_head_t recovery_event;
-	spinlock_t vote_task_lock;
+	spinlock_t dc_task_lock;
-	struct task_struct *vote_task;
+	struct task_struct *dc_task;
-	wait_queue_head_t vote_event;
+	wait_queue_head_t dc_event;
-	unsigned long vote_wake_sequence;
+	unsigned long dc_wake_sequence;
-	unsigned long vote_work_sequence;
+	unsigned long dc_work_sequence;
+	/*
+	 * Any thread can add locks to the list, but the downconvert
+	 * thread is the only one allowed to remove locks. Any change
+	 * to this rule requires updating
+	 * ocfs2_downconvert_thread_do_work().
+	 */
 	struct list_head blocked_lock_list;
 	unsigned long blocked_lock_count;
-	struct list_head vote_list;
-	int vote_count;
-	u32 net_key;
-	spinlock_t net_response_lock;
-	unsigned int net_response_ids;
-	struct list_head net_response_list;
-	struct o2hb_callback_func osb_hb_up;
-	struct o2hb_callback_func osb_hb_down;
-	struct list_head	osb_net_handlers;
 	wait_queue_head_t		osb_mount_event;
 	/* Truncate log info */

--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -231,6 +231,20 @@ struct ocfs2_space_resv {
 #define OCFS2_IOC_RESVSP64	_IOW ('X', 42, struct ocfs2_space_resv)
 #define OCFS2_IOC_UNRESVSP64	_IOW ('X', 43, struct ocfs2_space_resv)
+/* Used to pass group descriptor data when online resize is done */
+struct ocfs2_new_group_input {
+	__u64 group;		/* Group descriptor's blkno. */
+	__u32 clusters;		/* Total number of clusters in this group */
+	__u32 frees;		/* Total free clusters in this group */
+	__u16 chain;		/* Chain for this group */
+	__u16 reserved1;
+	__u32 reserved2;
+};
+#define OCFS2_IOC_GROUP_EXTEND	_IOW('o', 1, int)
+#define OCFS2_IOC_GROUP_ADD	_IOW('o', 2,struct ocfs2_new_group_input)
+#define OCFS2_IOC_GROUP_ADD64	_IOW('o', 3,struct ocfs2_new_group_input)
 /*
 * Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
 */
@@ -256,6 +270,14 @@ struct ocfs2_space_resv {
 /* Journal limits (in bytes) */
 #define OCFS2_MIN_JOURNAL_SIZE		(4 * 1024 * 1024)
+/*
+ * Default local alloc size (in megabytes)
+ *
+ * The value chosen should be such that most allocations, including new
+ * block groups, use local alloc.
+ */
+#define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE	8
 struct ocfs2_system_inode_info {
 	char	*si_name;
 	int	si_iflags;

--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -45,6 +45,7 @@ enum ocfs2_lock_type {
 	OCFS2_LOCK_TYPE_RW,
 	OCFS2_LOCK_TYPE_DENTRY,
 	OCFS2_LOCK_TYPE_OPEN,
+	OCFS2_LOCK_TYPE_FLOCK,
 	OCFS2_NUM_LOCK_TYPES
 };
@@ -73,6 +74,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
 		case OCFS2_LOCK_TYPE_OPEN:
 			c = 'O';
 			break;
+		case OCFS2_LOCK_TYPE_FLOCK:
+			c = 'F';
+			break;
 		default:
 			c = '\0';
 	}
@@ -90,6 +94,7 @@ static char *ocfs2_lock_type_strings[] = {
 	[OCFS2_LOCK_TYPE_RW] = "Write/Read",
 	[OCFS2_LOCK_TYPE_DENTRY] = "Dentry",
 	[OCFS2_LOCK_TYPE_OPEN] = "Open",
+	[OCFS2_LOCK_TYPE_FLOCK] = "Flock",
 };
 static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)

--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
--- a/fs/ocfs2/resize.h
+++ b/fs/ocfs2/resize.h
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * resize.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#ifndef OCFS2_RESIZE_H
+#define OCFS2_RESIZE_H
+int ocfs2_group_extend(struct inode * inode, int new_clusters);
+int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input);
+#endif /* OCFS2_RESIZE_H */
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -48,25 +48,6 @@ static void __ocfs2_fill_slot(struct ocfs2_slot_info *si,
 			      s16 slot_num,
 			      s16 node_num);
-/* Use the slot information we've collected to create a map of mounted
- * nodes. Should be holding an EX on super block. assumes slot info is
- * up to date. Note that we call this *after* we find a slot, so our
- * own node should be set in the map too... */
-void ocfs2_populate_mounted_map(struct ocfs2_super *osb)
-{
-	int i;
-	struct ocfs2_slot_info *si = osb->slot_info;
-	spin_lock(&si->si_lock);
-	for (i = 0; i < si->si_size; i++)
-		if (si->si_global_node_nums[i] != OCFS2_INVALID_SLOT)
-			ocfs2_node_map_set_bit(osb, &osb->mounted_map,
-					      si->si_global_node_nums[i]);
-	spin_unlock(&si->si_lock);
-}
 /* post the slot information on disk into our slot_info struct. */
 void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
 {

--- a/fs/ocfs2/slot_map.h
+++ b/fs/ocfs2/slot_map.h
@@ -52,8 +52,6 @@ s16 ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
 void ocfs2_clear_slot(struct ocfs2_slot_info *si,
 		      s16 slot_num);
-void ocfs2_populate_mounted_map(struct ocfs2_super *osb);
 static inline int ocfs2_is_empty_slot(struct ocfs2_slot_info *si,
 				      int slot_num)
 {

--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -101,8 +101,6 @@ static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg
 static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
 						   u64 bg_blkno,
 						   u16 bg_bit_off);
-static inline u64 ocfs2_which_cluster_group(struct inode *inode,
-					    u32 cluster);
 static inline void ocfs2_block_to_cluster_group(struct inode *inode,
 						u64 data_blkno,
 						u64 *bg_blkno,
@@ -114,7 +112,7 @@ void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
 	if (inode) {
 		if (ac->ac_which != OCFS2_AC_USE_LOCAL)
-			ocfs2_meta_unlock(inode, 1);
+			ocfs2_inode_unlock(inode, 1);
 		mutex_unlock(&inode->i_mutex);
@@ -131,9 +129,9 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
 }
 /* somewhat more expensive than our other checks, so use sparingly. */
-static int ocfs2_check_group_descriptor(struct super_block *sb,
+int ocfs2_check_group_descriptor(struct super_block *sb,
-					struct ocfs2_dinode *di,
+				 struct ocfs2_dinode *di,
-					struct ocfs2_group_desc *gd)
+				 struct ocfs2_group_desc *gd)
 {
 	unsigned int max_bits;
@@ -412,7 +410,7 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
 	mutex_lock(&alloc_inode->i_mutex);
-	status = ocfs2_meta_lock(alloc_inode, &bh, 1);
+	status = ocfs2_inode_lock(alloc_inode, &bh, 1);
 	if (status < 0) {
 		mutex_unlock(&alloc_inode->i_mutex);
 		iput(alloc_inode);
@@ -1443,8 +1441,7 @@ static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
 /* given a cluster offset, calculate which block group it belongs to
 * and return that block offset. */
-static inline u64 ocfs2_which_cluster_group(struct inode *inode,
+u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster)
-					    u32 cluster)
 {
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	u32 group_no;
@@ -1519,8 +1516,9 @@ int __ocfs2_claim_clusters(struct ocfs2_super *osb,
 		if (min_clusters > (osb->bitmap_cpg - 1)) {
 			/* The only paths asking for contiguousness
 			 * should know about this already. */
-			mlog(ML_ERROR, "minimum allocation requested exceeds "
+			mlog(ML_ERROR, "minimum allocation requested %u exceeds "
-				       "group bitmap size!");
+			     "group bitmap size %u!\n", min_clusters,
+			     osb->bitmap_cpg);
 			status = -ENOSPC;
 			goto bail;
 		}

--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -147,4 +147,12 @@ static inline int ocfs2_is_cluster_bitmap(struct inode *inode)
 int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
 				      struct ocfs2_alloc_context *ac);
+/* given a cluster offset, calculate which block group it belongs to
+ * and return that block offset. */
+u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster);
+/* somewhat more expensive than our other checks, so use sparingly. */
+int ocfs2_check_group_descriptor(struct super_block *sb,
+				 struct ocfs2_dinode *di,
+				 struct ocfs2_group_desc *gd);
 #endif /* _CHAINALLOC_H_ */
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -65,7 +65,6 @@
 #include "sysfile.h"
 #include "uptodate.h"
 #include "ver.h"
-#include "vote.h"
 #include "buffer_head_io.h"
@@ -84,9 +83,11 @@ MODULE_LICENSE("GPL");
 struct mount_options
 {
+	unsigned long	commit_interval;
 	unsigned long	mount_opt;
 	unsigned int	atime_quantum;
 	signed short	slot;
+	unsigned int	localalloc_opt;
 };
 static int ocfs2_parse_options(struct super_block *sb, char *options,
@@ -150,6 +151,9 @@ enum {
 	Opt_data_writeback,
 	Opt_atime_quantum,
 	Opt_slot,
+	Opt_commit,
+	Opt_localalloc,
+	Opt_localflocks,
 	Opt_err,
 };
@@ -165,6 +169,9 @@ static match_table_t tokens = {
 	{Opt_data_writeback, "data=writeback"},
 	{Opt_atime_quantum, "atime_quantum=%u"},
 	{Opt_slot, "preferred_slot=%u"},
+	{Opt_commit, "commit=%u"},
+	{Opt_localalloc, "localalloc=%d"},
+	{Opt_localflocks, "localflocks"},
 	{Opt_err, NULL}
 };
@@ -213,7 +220,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
 	mlog_entry_void();
-	new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE);
+	new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE, 0);
 	if (IS_ERR(new)) {
 		status = PTR_ERR(new);
 		mlog_errno(status);
@@ -221,7 +228,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
 	}
 	osb->root_inode = new;
-	new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE);
+	new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE, 0);
 	if (IS_ERR(new)) {
 		status = PTR_ERR(new);
 		mlog_errno(status);
@@ -443,6 +450,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
 		osb->s_mount_opt = parsed_options.mount_opt;
 		osb->s_atime_quantum = parsed_options.atime_quantum;
 		osb->preferred_slot = parsed_options.slot;
+		if (parsed_options.commit_interval)
+			osb->osb_commit_interval = parsed_options.commit_interval;
 		if (!ocfs2_is_hard_readonly(osb))
 			ocfs2_set_journal_params(osb);
@@ -597,6 +606,8 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
 	osb->s_mount_opt = parsed_options.mount_opt;
 	osb->s_atime_quantum = parsed_options.atime_quantum;
 	osb->preferred_slot = parsed_options.slot;
+	osb->osb_commit_interval = parsed_options.commit_interval;
+	osb->local_alloc_size = parsed_options.localalloc_opt;
 	sb->s_magic = OCFS2_SUPER_MAGIC;
@@ -747,9 +758,11 @@ static int ocfs2_parse_options(struct super_block *sb,
 	mlog_entry("remount: %d, options: \"%s\"\n", is_remount,
 		   options ? options : "(none)");
+	mopt->commit_interval = 0;
 	mopt->mount_opt = 0;
 	mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
 	mopt->slot = OCFS2_INVALID_SLOT;
+	mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE;
 	if (!options) {
 		status = 1;
@@ -816,6 +829,41 @@ static int ocfs2_parse_options(struct super_block *sb,
 			if (option)
 				mopt->slot = (s16)option;
 			break;
+		case Opt_commit:
+			option = 0;
+			if (match_int(&args[0], &option)) {
+				status = 0;
+				goto bail;
+			}
+			if (option < 0)
+				return 0;
+			if (option == 0)
+				option = JBD_DEFAULT_MAX_COMMIT_AGE;
+			mopt->commit_interval = HZ * option;
+			break;
+		case Opt_localalloc:
+			option = 0;
+			if (match_int(&args[0], &option)) {
+				status = 0;
+				goto bail;
+			}
+			if (option >= 0 && (option <= ocfs2_local_alloc_size(sb) * 8))
+				mopt->localalloc_opt = option;
+			break;
+		case Opt_localflocks:
+			/*
+			 * Changing this during remount could race
+			 * flock() requests, or "unbalance" existing
+			 * ones (e.g., a lock is taken in one mode but
+			 * dropped in the other). If users care enough
+			 * to flip locking modes during remount, we
+			 * could add a "local" flag to individual
+			 * flock structures for proper tracking of
+			 * state.
+			 */
+			if (!is_remount)
+				mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS;
+			break;
 		default:
 			mlog(ML_ERROR,
 			     "Unrecognized mount option \"%s\" "
@@ -864,6 +912,16 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
 	if (osb->s_atime_quantum != OCFS2_DEFAULT_ATIME_QUANTUM)
 		seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum);
+	if (osb->osb_commit_interval)
+		seq_printf(s, ",commit=%u",
+			   (unsigned) (osb->osb_commit_interval / HZ));
+	if (osb->local_alloc_size != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE)
+		seq_printf(s, ",localalloc=%d", osb->local_alloc_size);
+	if (opts & OCFS2_MOUNT_LOCALFLOCKS)
+		seq_printf(s, ",localflocks,");
 	return 0;
 }
@@ -965,7 +1023,7 @@ static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
 		goto bail;
 	}
-	status = ocfs2_meta_lock(inode, &bh, 0);
+	status = ocfs2_inode_lock(inode, &bh, 0);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
@@ -989,7 +1047,7 @@ static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
 	brelse(bh);
-	ocfs2_meta_unlock(inode, 0);
+	ocfs2_inode_unlock(inode, 0);
 	status = 0;
 bail:
 	if (inode)
@@ -1020,8 +1078,7 @@ static void ocfs2_inode_init_once(struct kmem_cache *cachep, void *data)
 	oi->ip_clusters = 0;
 	ocfs2_lock_res_init_once(&oi->ip_rw_lockres);
-	ocfs2_lock_res_init_once(&oi->ip_meta_lockres);
+	ocfs2_lock_res_init_once(&oi->ip_inode_lockres);
-	ocfs2_lock_res_init_once(&oi->ip_data_lockres);
 	ocfs2_lock_res_init_once(&oi->ip_open_lockres);
 	ocfs2_metadata_cache_init(&oi->vfs_inode);
@@ -1117,25 +1174,12 @@ static int ocfs2_mount_volume(struct super_block *sb)
 		goto leave;
 	}
-	status = ocfs2_register_hb_callbacks(osb);
-	if (status < 0) {
-		mlog_errno(status);
-		goto leave;
-	}
 	status = ocfs2_dlm_init(osb);
 	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
 	}
-	/* requires vote_thread to be running. */
-	status = ocfs2_register_net_handlers(osb);
-	if (status < 0) {
-		mlog_errno(status);
-		goto leave;
-	}
 	status = ocfs2_super_lock(osb, 1);
 	if (status < 0) {
 		mlog_errno(status);
@@ -1150,8 +1194,6 @@ static int ocfs2_mount_volume(struct super_block *sb)
 		goto leave;
 	}
-	ocfs2_populate_mounted_map(osb);
 	/* load all node-local system inodes */
 	status = ocfs2_init_local_system_inodes(osb);
 	if (status < 0) {
@@ -1174,15 +1216,6 @@ static int ocfs2_mount_volume(struct super_block *sb)
 	if (ocfs2_mount_local(osb))
 		goto leave;
-	/* This should be sent *after* we recovered our journal as it
-	 * will cause other nodes to unmark us as needing
-	 * recovery. However, we need to send it *before* dropping the
-	 * super block lock as otherwise their recovery threads might
-	 * try to clean us up while we're live! */
-	status = ocfs2_request_mount_vote(osb);
-	if (status < 0)
-		mlog_errno(status);
 leave:
 	if (unlock_super)
 		ocfs2_super_unlock(osb, 1);
@@ -1240,10 +1273,6 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
 			mlog_errno(tmp);
 			return;
 		}
-		tmp = ocfs2_request_umount_vote(osb);
-		if (tmp < 0)
-			mlog_errno(tmp);
 	}
 	if (osb->slot_num != OCFS2_INVALID_SLOT)
@@ -1254,13 +1283,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
 	ocfs2_release_system_inodes(osb);
-	if (osb->dlm) {
+	if (osb->dlm)
-		ocfs2_unregister_net_handlers(osb);
 		ocfs2_dlm_shutdown(osb);
-	}
-	ocfs2_clear_hb_callbacks(osb);
 	debugfs_remove(osb->osb_debug_root);
@@ -1315,7 +1339,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	int i, cbits, bbits;
 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
 	struct inode *inode = NULL;
-	struct buffer_head *bitmap_bh = NULL;
 	struct ocfs2_journal *journal;
 	__le32 uuid_net_key;
 	struct ocfs2_super *osb;
@@ -1344,19 +1367,13 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	osb->s_sectsize_bits = blksize_bits(sector_size);
 	BUG_ON(!osb->s_sectsize_bits);
-	osb->net_response_ids = 0;
-	spin_lock_init(&osb->net_response_lock);
-	INIT_LIST_HEAD(&osb->net_response_list);
-	INIT_LIST_HEAD(&osb->osb_net_handlers);
 	init_waitqueue_head(&osb->recovery_event);
-	spin_lock_init(&osb->vote_task_lock);
+	spin_lock_init(&osb->dc_task_lock);
-	init_waitqueue_head(&osb->vote_event);
+	init_waitqueue_head(&osb->dc_event);
-	osb->vote_work_sequence = 0;
+	osb->dc_work_sequence = 0;
-	osb->vote_wake_sequence = 0;
+	osb->dc_wake_sequence = 0;
 	INIT_LIST_HEAD(&osb->blocked_lock_list);
 	osb->blocked_lock_count = 0;
-	INIT_LIST_HEAD(&osb->vote_list);
 	spin_lock_init(&osb->osb_lock);
 	atomic_set(&osb->alloc_stats.moves, 0);
@@ -1496,7 +1513,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	}
 	memcpy(&uuid_net_key, di->id2.i_super.s_uuid, sizeof(uuid_net_key));
-	osb->net_key = le32_to_cpu(uuid_net_key);
 	strncpy(osb->vol_label, di->id2.i_super.s_label, 63);
 	osb->vol_label[63] = '\0';
@@ -1539,25 +1555,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	}
 	osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno;
-	/* We don't have a cluster lock on the bitmap here because
-	 * we're only interested in static information and the extra
-	 * complexity at mount time isn't worht it. Don't pass the
-	 * inode in to the read function though as we don't want it to
-	 * be put in the cache. */
-	status = ocfs2_read_block(osb, osb->bitmap_blkno, &bitmap_bh, 0,
-				  NULL);
 	iput(inode);
-	if (status < 0) {
-		mlog_errno(status);
-		goto bail;
-	}
-	di = (struct ocfs2_dinode *) bitmap_bh->b_data;
+	osb->bitmap_cpg = ocfs2_group_bitmap_size(sb) * 8;
-	osb->bitmap_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg);
-	brelse(bitmap_bh);
-	mlog(0, "cluster bitmap inode: %llu, clusters per group: %u\n",
-	     (unsigned long long)osb->bitmap_blkno, osb->bitmap_cpg);
 	status = ocfs2_init_slot_info(osb);
 	if (status < 0) {

--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -112,7 +112,7 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
 		goto bail;
 	}
-	inode = ocfs2_iget(osb, blkno, OCFS2_FI_FLAG_SYSFILE);
+	inode = ocfs2_iget(osb, blkno, OCFS2_FI_FLAG_SYSFILE, type);
 	if (IS_ERR(inode)) {
 		mlog_errno(PTR_ERR(inode));
 		inode = NULL;

--- a/fs/ocfs2/ver.c
+++ b/fs/ocfs2/ver.c
@@ -29,7 +29,7 @@
 #include "ver.h"
-#define OCFS2_BUILD_VERSION "1.3.3"
+#define OCFS2_BUILD_VERSION "1.5.0"
 #define VERSION_STR "OCFS2 " OCFS2_BUILD_VERSION

--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -49,6 +49,7 @@ header-y += comstats.h
 header-y += const.h
 header-y += cgroupstats.h
 header-y += cycx_cfm.h
+header-y += dlmconstants.h
 header-y += dlm_device.h
 header-y += dlm_netlink.h
 header-y += dm-ioctl.h

--- a/include/linux/dlm.h
+++ b/include/linux/dlm.h
@@ -19,148 +19,12 @@
 * routines and structures to use DLM lockspaces
 */
-/*
+/* Lock levels and flags are here */
- * Lock Modes
+#include <linux/dlmconstants.h>
- */
-#define DLM_LOCK_IV		-1	/* invalid */
-#define DLM_LOCK_NL		0	/* null */
-#define DLM_LOCK_CR		1	/* concurrent read */
-#define DLM_LOCK_CW		2	/* concurrent write */
-#define DLM_LOCK_PR		3	/* protected read */
-#define DLM_LOCK_PW		4	/* protected write */
-#define DLM_LOCK_EX		5	/* exclusive */
-/*
- * Maximum size in bytes of a dlm_lock name
- */
 #define DLM_RESNAME_MAXLEN	64
-/*
- * Flags to dlm_lock
- *
- * DLM_LKF_NOQUEUE
- *
- * Do not queue the lock request on the wait queue if it cannot be granted
- * immediately.  If the lock cannot be granted because of this flag, DLM will
- * either return -EAGAIN from the dlm_lock call or will return 0 from
- * dlm_lock and -EAGAIN in the lock status block when the AST is executed.
- *
- * DLM_LKF_CANCEL
- *
- * Used to cancel a pending lock request or conversion.  A converting lock is
- * returned to its previously granted mode.
- *
- * DLM_LKF_CONVERT
- *
- * Indicates a lock conversion request.  For conversions the name and namelen
- * are ignored and the lock ID in the LKSB is used to identify the lock.
- *
- * DLM_LKF_VALBLK
- *
- * Requests DLM to return the current contents of the lock value block in the
- * lock status block.  When this flag is set in a lock conversion from PW or EX
- * modes, DLM assigns the value specified in the lock status block to the lock
- * value block of the lock resource.  The LVB is a DLM_LVB_LEN size array
- * containing application-specific information.
- *
- * DLM_LKF_QUECVT
- *
- * Force a conversion request to be queued, even if it is compatible with
- * the granted modes of other locks on the same resource.
- *
- * DLM_LKF_IVVALBLK
- *
- * Invalidate the lock value block.
- *
- * DLM_LKF_CONVDEADLK
- *
- * Allows the dlm to resolve conversion deadlocks internally by demoting the
- * granted mode of a converting lock to NL.  The DLM_SBF_DEMOTED flag is
- * returned for a conversion that's been effected by this.
- *
- * DLM_LKF_PERSISTENT
- *
- * Only relevant to locks originating in userspace.  A persistent lock will not
- * be removed if the process holding the lock exits.
- *
- * DLM_LKF_NODLCKWT
- *
- * Do not cancel the lock if it gets into conversion deadlock.
- * Exclude this lock from being monitored due to DLM_LSFL_TIMEWARN.
- *
- * DLM_LKF_NODLCKBLK
- *
- * net yet implemented
- *
- * DLM_LKF_EXPEDITE
- *
- * Used only with new requests for NL mode locks.  Tells the lock manager
- * to grant the lock, ignoring other locks in convert and wait queues.
- *
- * DLM_LKF_NOQUEUEBAST
- *
- * Send blocking AST's before returning -EAGAIN to the caller.  It is only
- * used along with the NOQUEUE flag.  Blocking AST's are not sent for failed
- * NOQUEUE requests otherwise.
- *
- * DLM_LKF_HEADQUE
- *
- * Add a lock to the head of the convert or wait queue rather than the tail.
- *
- * DLM_LKF_NOORDER
- *
- * Disregard the standard grant order rules and grant a lock as soon as it
- * is compatible with other granted locks.
- *
- * DLM_LKF_ORPHAN
- *
- * not yet implemented
- *
- * DLM_LKF_ALTPR
- *
- * If the requested mode cannot be granted immediately, try to grant the lock
- * in PR mode instead.  If this alternate mode is granted instead of the
- * requested mode, DLM_SBF_ALTMODE is returned in the lksb.
- *
- * DLM_LKF_ALTCW
- *
- * The same as ALTPR, but the alternate mode is CW.
- *
- * DLM_LKF_FORCEUNLOCK
- *
- * Unlock the lock even if it is converting or waiting or has sublocks.
- * Only really for use by the userland device.c code.
- *
- */
-#define DLM_LKF_NOQUEUE		0x00000001
-#define DLM_LKF_CANCEL		0x00000002
-#define DLM_LKF_CONVERT		0x00000004
-#define DLM_LKF_VALBLK		0x00000008
-#define DLM_LKF_QUECVT		0x00000010
-#define DLM_LKF_IVVALBLK	0x00000020
-#define DLM_LKF_CONVDEADLK	0x00000040
-#define DLM_LKF_PERSISTENT	0x00000080
-#define DLM_LKF_NODLCKWT	0x00000100
-#define DLM_LKF_NODLCKBLK	0x00000200
-#define DLM_LKF_EXPEDITE	0x00000400
-#define DLM_LKF_NOQUEUEBAST	0x00000800
-#define DLM_LKF_HEADQUE		0x00001000
-#define DLM_LKF_NOORDER		0x00002000
-#define DLM_LKF_ORPHAN		0x00004000
-#define DLM_LKF_ALTPR		0x00008000
-#define DLM_LKF_ALTCW		0x00010000
-#define DLM_LKF_FORCEUNLOCK	0x00020000
-#define DLM_LKF_TIMEOUT		0x00040000
-/*
- * Some return codes that are not in errno.h
- */
-#define DLM_ECANCEL		0x10001
-#define DLM_EUNLOCK		0x10002
 typedef void dlm_lockspace_t;

--- a/include/linux/dlmconstants.h
+++ b/include/linux/dlmconstants.h
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+#ifndef __DLMCONSTANTS_DOT_H__
+#define __DLMCONSTANTS_DOT_H__
+/*
+ * Constants used by DLM interface.
+ */
+/*
+ * Lock Modes
+ */
+#define DLM_LOCK_IV		(-1)	/* invalid */
+#define DLM_LOCK_NL		0	/* null */
+#define DLM_LOCK_CR		1	/* concurrent read */
+#define DLM_LOCK_CW		2	/* concurrent write */
+#define DLM_LOCK_PR		3	/* protected read */
+#define DLM_LOCK_PW		4	/* protected write */
+#define DLM_LOCK_EX		5	/* exclusive */
+/*
+ * Flags to dlm_lock
+ *
+ * DLM_LKF_NOQUEUE
+ *
+ * Do not queue the lock request on the wait queue if it cannot be granted
+ * immediately.  If the lock cannot be granted because of this flag, DLM will
+ * either return -EAGAIN from the dlm_lock call or will return 0 from
+ * dlm_lock and -EAGAIN in the lock status block when the AST is executed.
+ *
+ * DLM_LKF_CANCEL
+ *
+ * Used to cancel a pending lock request or conversion.  A converting lock is
+ * returned to its previously granted mode.
+ *
+ * DLM_LKF_CONVERT
+ *
+ * Indicates a lock conversion request.  For conversions the name and namelen
+ * are ignored and the lock ID in the LKSB is used to identify the lock.
+ *
+ * DLM_LKF_VALBLK
+ *
+ * Requests DLM to return the current contents of the lock value block in the
+ * lock status block.  When this flag is set in a lock conversion from PW or EX
+ * modes, DLM assigns the value specified in the lock status block to the lock
+ * value block of the lock resource.  The LVB is a DLM_LVB_LEN size array
+ * containing application-specific information.
+ *
+ * DLM_LKF_QUECVT
+ *
+ * Force a conversion request to be queued, even if it is compatible with
+ * the granted modes of other locks on the same resource.
+ *
+ * DLM_LKF_IVVALBLK
+ *
+ * Invalidate the lock value block.
+ *
+ * DLM_LKF_CONVDEADLK
+ *
+ * Allows the dlm to resolve conversion deadlocks internally by demoting the
+ * granted mode of a converting lock to NL.  The DLM_SBF_DEMOTED flag is
+ * returned for a conversion that's been effected by this.
+ *
+ * DLM_LKF_PERSISTENT
+ *
+ * Only relevant to locks originating in userspace.  A persistent lock will not
+ * be removed if the process holding the lock exits.
+ *
+ * DLM_LKF_NODLCKWT
+ *
+ * Do not cancel the lock if it gets into conversion deadlock.
+ * Exclude this lock from being monitored due to DLM_LSFL_TIMEWARN.
+ *
+ * DLM_LKF_NODLCKBLK
+ *
+ * net yet implemented
+ *
+ * DLM_LKF_EXPEDITE
+ *
+ * Used only with new requests for NL mode locks.  Tells the lock manager
+ * to grant the lock, ignoring other locks in convert and wait queues.
+ *
+ * DLM_LKF_NOQUEUEBAST
+ *
+ * Send blocking AST's before returning -EAGAIN to the caller.  It is only
+ * used along with the NOQUEUE flag.  Blocking AST's are not sent for failed
+ * NOQUEUE requests otherwise.
+ *
+ * DLM_LKF_HEADQUE
+ *
+ * Add a lock to the head of the convert or wait queue rather than the tail.
+ *
+ * DLM_LKF_NOORDER
+ *
+ * Disregard the standard grant order rules and grant a lock as soon as it
+ * is compatible with other granted locks.
+ *
+ * DLM_LKF_ORPHAN
+ *
+ * not yet implemented
+ *
+ * DLM_LKF_ALTPR
+ *
+ * If the requested mode cannot be granted immediately, try to grant the lock
+ * in PR mode instead.  If this alternate mode is granted instead of the
+ * requested mode, DLM_SBF_ALTMODE is returned in the lksb.
+ *
+ * DLM_LKF_ALTCW
+ *
+ * The same as ALTPR, but the alternate mode is CW.
+ *
+ * DLM_LKF_FORCEUNLOCK
+ *
+ * Unlock the lock even if it is converting or waiting or has sublocks.
+ * Only really for use by the userland device.c code.
+ *
+ */
+#define DLM_LKF_NOQUEUE		0x00000001
+#define DLM_LKF_CANCEL		0x00000002
+#define DLM_LKF_CONVERT		0x00000004
+#define DLM_LKF_VALBLK		0x00000008
+#define DLM_LKF_QUECVT		0x00000010
+#define DLM_LKF_IVVALBLK	0x00000020
+#define DLM_LKF_CONVDEADLK	0x00000040
+#define DLM_LKF_PERSISTENT	0x00000080
+#define DLM_LKF_NODLCKWT	0x00000100
+#define DLM_LKF_NODLCKBLK	0x00000200
+#define DLM_LKF_EXPEDITE	0x00000400
+#define DLM_LKF_NOQUEUEBAST	0x00000800
+#define DLM_LKF_HEADQUE		0x00001000
+#define DLM_LKF_NOORDER		0x00002000
+#define DLM_LKF_ORPHAN		0x00004000
+#define DLM_LKF_ALTPR		0x00008000
+#define DLM_LKF_ALTCW		0x00010000
+#define DLM_LKF_FORCEUNLOCK	0x00020000
+#define DLM_LKF_TIMEOUT		0x00040000
+/*
+ * Some return codes that are not in errno.h
+ */
+#define DLM_ECANCEL		0x10001
+#define DLM_EUNLOCK		0x10002
+#endif  /* __DLMCONSTANTS_DOT_H__ */