Merge cam.ac.uk:/rain/usr/src/bkntfs-tng-2.5 into cam.ac.uk:/usr/src/tng

a4583c49 · Anton Altaparmakov · f8c1af38 · 3c51ba14 · a4583c49 · a4583c49
Commit a4583c49 authored Mar 09, 2002 by Anton Altaparmakov
34 changed files
--- a/Documentation/filesystems/ntfs.txt
+++ b/Documentation/filesystems/ntfs.txt
+NTFS Overview
+=============
+To mount an NTFS 1.2/3.x (Windows NT4/2000/XP) volume, use the filesystem
+type 'ntfs'. The driver currently works only in read-only mode, with no
+fault-tolerance supported.
+For ftdisk support, limited success was reported with volume sets on top of
+the md driver, although mirror and stripe sets should work as well - if the
+md driver can be talked into using the same layout as Windows NT. However,
+using the md driver will fail if any of your NTFS partitions have an odd
+number of sectors.
+Supported mount options
+=======================
+nls=name		Character set to use when returning file names.
+			Unlike VFAT, NTFS suppresses names that contain
+			unconvertible characters. Note that most character
+			sets contain insufficient characters to represent all
+			possible Unicode characters that can exist on NTFS. To
+			be sure you are not missing any files, you are advised
+			to use nls=utf8 which is capable of representing all
+			Unicode characters.
+uid=
+gid=
+umask=			Provide default owner, group, and access mode mask.
+			These options work as documented in mount(8). By
+			default, the files are owned by root and are not
+			readable by anyone else.
+fmask=
+dmask=			Instead of specifying umask which applies both to
+			files and directories, fmask applies only to files and
+			dmask only to directories.
+sloppy=<BOOL>		If sloppy is specified, ignore unknown mount options.
+			Otherwise the default behaviour is to abort mount if
+			any unknown options are found.
+errors=opt		What to do when critical file system errors are found.
+			Following values can be used for "opt":
+			  continue: DEFAULT, try to clean-up as much as
+				    possible, e.g. marking a corrupt inode as
+				    bad so it is no longer accessed.
+			  recover:  At present only supported is recovery of
+				    the boot sector from the backup copy. If a
+				    read-only mount, the recovery is done in
+				    memory only and not written to disk.
+show_inodes=opt		Allows choice of which types of inode names readdir()
+			returns, i.e. this affects what "ls" shows. Following
+			values can be used for "opt":
+			   system: show system files
+			   win32:  long file names (includes POSIX) [DEFAULT]
+			   long:   same as win32
+			   dos:    short file names only (excludes POSIX)
+			   short:  same as dos
+			   posix:  same as both win32 and dos
+			   all:    all file names
+			Note that the options are additive, i.e. specifying:
+			   show_inodes=system,show_inodes=win32,show_inodes=dos
+			is the same as specifying:
+			   show_inodes=all
+			Note that the "posix" and "all" options will show all
+			directory names, BUT the link count on each directory
+			inode entry is set to 1, due to Linux not supporting
+			directory hard links. This may well confuse some
+			userspace applications, since the directory names will
+			have the same inode numbers. Thus it is NOT advisable
+			to use the "posix" and "all" options. We provide them
+			only for completeness sake.
+			Further, note that the "system" option will not show
+			"$MFT" due to bugs/mis-features in glibc. Even though
+			it does not show, you can specifically "ls" it:
+				ls -l \$MFT
+			And of course you can stat it, too.
+			Further, note that irrespective of what show_inodes
+			option(s) you use, all files are accessible when you
+			specify the correct name, even though they may not be
+			shown in a normal "ls", i.e. you can always access the
+			system files and both the short and long file names of
+			files and directories.
+			Finally, note that win32 and dos file names are not
+			case sensitive and can be accessed using any
+			combination of lower and upper case, while POSIX file
+			names are case sensitive and they can only be accessed
+			given the correct case.
+mft_zone_multiplier=	Set the MFT zone multiplier for the volume (this
+			setting is not persistent across mounts and can be
+			changed from mount to mount but cannot be changed on
+			remount). Values of 1 to 4 are allowed, 1 being the
+			default. The MFT zone multiplier determines how much
+			space is reserved for the MFT on the volume. If all
+			other space is used up, then the MFT zone will be
+			shrunk dynamically, so this has no impact on the
+			amount of free space. However, it can have an impact
+			on performance by affecting fragmentation of the MFT.
+			In general use the default. If you have a lot of small
+			files then use a higher value. The values have the
+			following meaning:
+			      Value	     MFT zone size (% of volume size)
+				1		12.5%
+				2		25%
+				3		37.5%
+				4		50%
+			Note this option is irrelevant for read-only mounts.
+Features
+========
+- Implementation of NTFS read support functionally equivalent to the old ntfs
+  driver.
+Known bugs and (mis-)features
+=============================
+- None
+Please send bug reports/comments/feedback/abuse to the Linux-NTFS development
+list at sourceforge: linux-ntfs-dev@lists.sourceforge.net
+ChangeLog
+=========
+Note that a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog.
+TNG-0.0.8:
+	- Started ChangeLog.
--- a/fs/Config.help
+++ b/fs/Config.help
@@ -576,6 +576,37 @@ CONFIG_HPFS_FS
  say M here and read <file:Documentation/modules.txt>.  If unsure,
  say N.
+CONFIG_NTFS_FS
+  NTFS is the file system of Microsoft Windows NT/2000/XP. For more
+  information see <file:Documentation/filesystems/ntfs.txt>. Saying Y
+  here would allow you to read from NTFS partitions.
+  This file system is also available as a module ( = code which can be
+  inserted in and removed from the running kernel whenever you want).
+  The module will be called ntfs.o. If you want to compile it as a
+  module, say M here and read <file:Documentation/modules.txt>. If you
+  are not using Windows NT/2000/XP in addition to Linux on your computer
+  it is safe to say N.
+CONFIG_NTFS_DEBUG
+  If you are experiencing any problems with the NTFS file system, say
+  Y here. This will result in additional consistency checks to be
+  performed by the driver as well as additional debugging messages to
+  be written to the system log. Note that debugging messages are
+  disabled by default. To enable them, supply the option debug_msgs=1
+  at the kernel command line when booting the kernel or as an option
+  to insmod when loading the ntfs module. Once the driver is active,
+  you can enable debugging messages by doing (as root):
+    echo 1 > /proc/sys/fs/ntfs-debug
+  Replacing the "1" with "0" would disable debug messages.
+  If you leave debugging messages disable, this results in little
+  overhead, but enabling debug messages results in very significant
+  slowdown of the system.
+  When reporting bugs, please try to have available a full dump of
+  debugging messages while the misbehaviour was occurring.
 CONFIG_SYSV_FS
  SCO, Xenix and Coherent are commercial Unix systems for Intel
  machines, and Version 7 was used on the DEC PDP-11. Saying Y

--- a/fs/Config.in
+++ b/fs/Config.in
@@ -62,6 +62,9 @@ tristate 'Minix fs support' CONFIG_MINIX_FS
 tristate 'FreeVxFS file system support (VERITAS VxFS(TM) compatible)' CONFIG_VXFS_FS
+tristate 'NTFS file system support (read only)' CONFIG_NTFS_FS
+dep_mbool '  NTFS debugging support' CONFIG_NTFS_DEBUG $CONFIG_NTFS_FS
 tristate 'OS/2 HPFS file system support' CONFIG_HPFS_FS
 bool '/proc file system support' CONFIG_PROC_FS

--- a/fs/Makefile
+++ b/fs/Makefile
@@ -52,6 +52,7 @@ subdir-$(CONFIG_SYSV_FS)	+= sysv
 subdir-$(CONFIG_SMB_FS)		+= smbfs
 subdir-$(CONFIG_NCP_FS)		+= ncpfs
 subdir-$(CONFIG_HPFS_FS)	+= hpfs
+subdir-$(CONFIG_NTFS_FS)	+= ntfs
 subdir-$(CONFIG_UFS_FS)		+= ufs
 subdir-$(CONFIG_EFS_FS)		+= efs
 subdir-$(CONFIG_JFFS_FS)	+= jffs

--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
+ToDo:
+	- Audit for allocated_size vs initialized_size vs data_size (i.e.
+	  i_size) in whole driver.
+	  Need to enforce limits and zeroes need to be written when overflow is
+	  detected. We CANNOT use block_read_full_page() at all anywhere! This
+	  is because initialized_size can lie within a block and ntfs_get_block
+	  has no way to tell block_read_full_page about it. So our readpage
+	  functions need to clone block_read_full_page and modify it to cope
+	  with the significance of the different attribute sizes.
+	  Still need to go through:
+		aops.c, attrib.c, compress.c, dir.c, mft.c
+	- Find and fix bugs.
+	- W.r.t. s_maxbytes still need to be careful on reading/truncating as
+	  there are dragons lurking in the details, e.g. read_inode() currently
+	  does no checks for file size wrt s_maxbytes. So what happens when a
+	  user open()s a file with i_size > s_maxbytes? Should read_inode()
+	  truncate the visible i_size? Will the user just get -E2BIG (or
+	  whatever) on open()? Or will (s)he be able to open() but lseek() and
+	  read() will fail when s_maxbytes is reached? -> Investigate this!
+	- Perhaps don't bother getting the run list in ntfs_read_inode() at
+	  all. But we do have to find the data/index root attribute to get the
+	  inode size so we might want to decompress the mapping pairs of the
+	  first extent in there anyway. -> Ponder this. Directory listings
+	  would have significant speedups but the first access to each file/dir
+	  would have a small speed penalty.
+	- Implement/allow non-resident index bitmaps in ntfs_readdir().
+	- vcn_to_lcn() should somehow return the correct pointer within the
+	  ->run_list so we can get at the lcns for the following vcns, this is
+	  strictly a speed optimization. Obviously need to keep the ->run_list
+	  locked or RACE. load_attribute_list() which we call without any locks
+	  held already performs such an optimization but that is no longer
+	  possible when using the spinlock on the run lists as this would sleep
+	  in between. Either need different type of optimization as above or
+	  need to change the read/write spinlock to a read/write semaphore.
+tng-0.0.8 - 08/03/2002 - BitKeeper ChangeSet 1.457
+	- Replace bdevname(sb->s_dev) with sb->s_id.
+	- Remove now superfluous new-line characters in all callers of
+	  ntfs_debug().
+	- Apply kludge in ntfs_read_inode(), setting i_nlink to 1 for
+	  directories. Without this the "find" utility gets very upset which is
+	  fair enough as Linux/Unix do not support directory hard links.
+	- Further run list merging work. (Richard Russon)
+	- Backwards compatibility for gcc-2.95. (Richard Russon)
+	- Update to kernel 2.5.5-pre1 and rediff the now tiny patch.
+	- Convert to new file system declaration using ->ntfs_get_sb() and
+	  replacing ntfs_read_super() with ntfs_fill_super().
+	- Set s_maxbytes to MAX_LFS_FILESIZE to avoid page cache page index
+	  overflow on 32-bit architectures.
+	- Cleanup upcase loading code to use ntfs_(un)map_page().
+	- Disable/reenable preemtion in critical sections of compession engine.
+	- Replace device size determination in ntfs_fill_super() with
+	  sb->s_bdev->bd_inode->i_size (in bytes) and remove now superfluous
+	  function super.c::get_nr_blocks().
+	- Implement a mount time option (show_inodes) allowing choice of which
+	  types of inode names readdir() returns and modify ntfs_filldir()
+	  accordingly. There are several parameters to show_inodes:
+		system:	system files
+	  	win32:	long file names (including POSIX file names) [DEFAULT]
+		long:	same as win32
+	  	dos:	short file names only (excluding POSIX file names)
+		short:	same as dos
+		posix:	same as both win32 and dos
+	  	all:	all file names
+	  Note that the options are additive, i.e. specifying:
+		-o show_inodes=system,show_inodes=win32,show_inodes=dos
+	  is the same as specifying:
+		-o show_inodes=all
+	  Note that the "posix" and "all" options will show all directory
+	  names, BUT the link count on each directory inode entry is set to 1,
+	  due to Linux not supporting directory hard links. This may well
+	  confuse some userspace applications, since the directory names will
+	  have the same inode numbers. Thus it is NOT advisable to use the
+	  "posix" or "all" options. We provide them only for completeness sake.
+	- Add copies of allocated_size, initialized_size, and compressed_size to
+	  the ntfs inode structure and set them up in
+	  inode.c::ntfs_read_inode(). These reflect the unnamed data attribute
+	  for files and the index allocation attribute for directories.
+	- Add copies of allocated_size and initialized_size to ntfs inode for
+	  $BITMAP attribute of large directories and set them up in
+	  inode.c::ntfs_read_inode().
+	- Add copies of allocated_size and initialized_size to ntfs volume for
+	  $BITMAP attribute of $MFT and set them up in
+	  super.c::load_system_files().
+	- Parse deprecated ntfs driver options (iocharset, show_sys_files,
+	  posix, and utf8) and tell user what the new options to use are. Note
+	  we still do support them but they will be removed with kernel 2.7.x.
+	- Change all occurences of integer long long printf formatting to hex
+	  as printk() will not support long long integer format if/when the
+	  div64 patch goes into the kernel.
+	- Make slab caches have stable names and change the names to what they
+	  were intended to be. These changes are required/made possible by the
+	  new slab cache name handling which removes the length limitation by
+	  requiring the caller of kmem_cache_create() to supply a stable name
+	  which is then referenced but not copied.
+	- Rename run_list structure to run_list_element and create a new
+	  run_list structure containing a pointer to a run_list_element
+	  structure and a read/write spinlock. Adapt all usesrs of run lists
+	  to new scheme and take and release the lock as needed. This fixes a
+	  nasty race as the run_list changes even when inodes are locked for
+	  reading and even when the inode isn't locked at all, so we really
+	  needed the serialization.
+	- Cleanup read_inode() removing all code checking for lowest_vcn != 0.
+	  This can never happen due to the nature of lookup_attr() and how we
+	  support attribute lists. If it did happen it would imply the inode
+	  being corrupt.
+	- Check for lowest_vcn != 0 in ntfs_read_inode() and mark the inode as
+	  bad if found.
+	- Update to 2.5.6-pre2 changes in struct address_space.
+	- Import Sourceforge CVS repository into BitKeeper repository:
+		http://linux-ntfs.bkbits.net/ntfs-tng-2.5
+	- Update fs/Makefile, fs/Config.help, fs/Config.in, and
+	  Documentation/filesystems/ntfs.txt for NTFS TNG.
+	- Create kernel configuration option controlling whether debugging
+	  is enabled or not.
+	- Add the required export of end_buffer_io_sync() from the patches
+	  directory to the kernel code.
+	- Update inode.c::ntfs_show_options() with show_inodes mount option.
+	- Update errors mount option.
+tng-0.0.7 - 13/02/2002 - The driver is now feature complete for read-only!
+	- Cleanup mft.c and it's debug/error output in particular. Fix a minor
+	  bug in mapping of extent inodes. Update all the comments to fit all
+	  the recent code changes.
+	- Modify vcn_to_lcn() to cope with entirely unmapped run lists.
+	- Cleanups in compress.c, mostly comments and folding help.
+	- Implement attrib.c::map_run_list() as a generic helper.
+	- Make compress.c::ntfs_file_read_compressed_block() use map_run_list()
+	  thus making code shorter and enabling attribute list support.
+	- Cleanup incorrect use of [su]64 with %L printf format specifier in
+	  all source files. Type casts to [unsigned] long long added to correct
+	  the mismatches (important for architectures which have long long not
+	  being 64 bits).
+	- Merge async io completion handlers for directory indexes and $MFT
+	  data into one by setting the index_block_size{_bits} of the ntfs
+	  inode for $MFT to the mft_record_size{_bits} of the ntfs_volume.
+	- Cleanup aops.c, update comments.
+	- Make ntfs_file_get_block() use map_run_list() so all files now
+	  support attribute lists.
+	- Make ntfs_dir_readpage() almost verbatim copy of
+	  block_read_full_page() by using ntfs_file_get_block() with only real
+	  difference being the use of our own async io completion handler
+	  rather than the default one, thus reducing the amount of code and
+	  automatically enabling attribute list support for directory indices.
+	- Fix bug in load_attribute_list() - forgot to call brelse in error
+	  code path.
+	- Change parameters to find_attr() and lookup_attr(). We no longer
+	  pass in the upcase table and its length. These can be gotten from
+	  ctx->ntfs_ino->vol->upcase{_len}. Update all callers.
+	- Cleanups in attrib.c. 
+	- Implement merging of run lists, attrib.c::merge_run_lists() and its
+	  helpers. (Richard Russon)
+	- Attribute lists part 2, attribute extents and multi part run lists:
+	  enable proper support for LCN_RL_NOT_MAPPED and automatic mapping of
+	  further run list parts via attrib.c::map_run_list().
+	- Tiny endianness bug fix in decompress_mapping_pairs().
+tng-0.0.6 - Encrypted directories, bug fixes, cleanups, debugging enhancements.
+	- Enable encrypted directories. (Their index root is marked encrypted
+	  to indicate that new files in that directory should be created
+	  encrypted.)
+	- Fix bug in NInoBmpNonResident() macro. (Cut and paste error.)
+	- Enable $Extend system directory. Most (if not all) extended system
+	  files do not have unnamed data attributes so ntfs_read_inode() had to
+	  special case them but that is ok, as the special casing recovery
+	  happens inside an error code path so there is zero slow down in the
+	  normal fast path. The special casing is done by introducing a new
+	  function inode.c::ntfs_is_extended_system_file() which checks if any
+	  of the hard links in the inode point to $Extend as being their parent
+	  directory and if they do we assume this is an extended system file.
+	- Create a sysctl/proc interface to allow {dis,en}abling of debug output
+	  when compiled with -DDEBUG. Default is debug messages to be disabled.
+	  To enable them, one writes a non-zero value to /proc/sys/fs/ntfs-debug
+	  (if /proc is enabled) or uses sysctl(2) to effect the same (if sysctl
+	  interface is enabled). Inspired by old ntfs driver.
+	- Add debug_msgs insmod/kernel boot parameter to set whether debug
+	  messages are {dis,en}abled. This is useful to enable debug messages
+	  during ntfs initialization and is the only way to activate debugging
+	  when the sysctl interface is not enabled.
+	- Cleanup debug output in various places.
+	- Remove all dollar signs ($) from the source (except comments) to
+	  enable compilation on architectures whose gcc compiler does not
+	  support dollar signs in the names of variables/constants. Attribute
+	  types now start with AT_ instead of $ and $I30 is now just I30.
+	- Cleanup ntfs_lookup() and add consistency check of sequence numbers.
+	- Load complete run list for $MFT/$BITMAP during mount and cleanup
+	  access functions. This means we now cope with $MFT/$BITMAP being
+	  spread accross several mft records.
+	- Disable modification of mft_zone_multiplier on remount. We can always
+	  reenable this later on if we really want to, but we will need to make
+	  sure we readjust the mft_zone size / layout accordingly.
+tng-0.0.5 - Modernize for 2.5.x and further in line-ing with Al Viro's comments.
+	- Use sb_set_blocksize() instead of set_blocksize() and verify the
+	  return value.
+	- Use sb_bread() instead of bread() throughout.
+	- Add index_vcn_size{_bits} to ntfs_inode structure to store the size
+	  of a directory index block vcn. Apply resulting simplifications in
+	  dir.c everywhere.
+	- Fix a small bug somewhere (but forgot what it was).
+	- Change ntfs_{debug,error,warning} to enable gcc to do type checking
+	  on the printf-format parameter list and fix bugs reported by gcc
+	  as a result. (Richard Russon)
+	- Move inode allocation strategy to Al's new stuff but maintain the
+	  divorce of ntfs_inode from struct inode. To achieve this we have two
+	  separate slab caches, one for big ntfs inodes containing a struct
+	  inode and pure ntfs inodes and at the same time fix some faulty
+	  error code paths in ntfs_read_inode().
+	- Show mount options in proc (inode.c::ntfs_show_options()).
+tng-0.0.4 - Big changes, getting in line with Al Viro's comments.
+	- Modified (un)map_mft_record functions to be common for read and write
+	  case. To specify which is which, added extra parameter at front of
+	  parameter list. Pass either READ or WRITE to this, each has the
+	  obvious meaning.
+	- General cleanups to allow for easier folding in vi.
+	- attrib.c::decompress_mapping_pairs() now accepts the old run list
+	  argument, and invokes attrib.c::merge_run_lists() to merge the old
+	  and the new run lists.
+	- Removed attrib.c::find_first_attr().
+	- Implemented loading of attribute list and complete run list for $MFT.
+	  This means we now cope with $MFT being spread across several mft
+	  records.
+	- Adapt to 2.5.2-pre9 and the changed create_empty_buffers() syntax.
+	- Adapt major/minor/kdev_t/[bk]devname stuff to new 2.5.x kernels.
+	- Make ntfs_volume be allocated via kmalloc() instead of using a slab
+	  cache. There are too little ntfs_volume structures at any one time
+	  to justify a private slab cache.
+	- Fix bogus kmap() use in async io completion. Now use kmap_atomic().
+	  Use KM_BIO_IRQ on advice from IRC/kernel...
+	- Use ntfs_map_page() in map_mft_record() and create ->readpage method
+	  for reading $MFT (ntfs_mft_readpage). In the process create dedicated
+	  address space operations (ntfs_mft_aops) for $MFT inode mapping. Also
+	  removed the now superfluous exports from the kernel core patch.
+	- Fix a bug where kfree() was used insted of ntfs_free().
+	- Change map_mft_record() to take ntfs_inode as argument instead of
+	  vfs inode. Dito for unmap_mft_record(). Adapt all callers.
+	- Add pointer to ntfs_volume to ntfs_inode.
+	- Add mft record number and sequence number to ntfs_inode. Stop using
+	  i_ino and i_generation for in-driver purposes.
+	- Implement attrib.c::merge_run_lists(). (Richard Russon)
+	- Remove use of proper inodes by extent inodes. Move i_ino and
+	  i_generation to ntfs_inode to do this. Apply simplifications that
+	  result and remove iget_no_wait(), etc.
+	- Pass ntfs_inode everywhere in the driver (used to be struct inode).
+	- Add reference counting in ntfs_inode for the ntfs inode itself and
+	  for the mapped mft record.
+	- Extend mft record mapping so we can (un)map extent mft records (new
+	  functions (un)map_extent_mft_record), and so mappings are reference
+	  counted and don't have to happen twice if already mapped - just ref
+	  count increases.
+	- Add -o iocharset as alias to -o nls for backwards compatibility.
+	- The latest core patch is now tiny. In fact just a single additional
+	  export is necessary over the base kernel.
+tng-0.0.3 - Cleanups, enhancements, bug fixes.
+	- Work on attrib.c::decompress_mapping_pairs() to detect base extents
+	  and setup the run list appropriately using knowledge provided by the
+	  sizes in the base attribute record.
+	- Balance the get_/put_attr_search_ctx() calls so we don't leak memory
+	  any more.
+	- Introduce ntfs_malloc_nofs() and ntfs_free() to allocate/free a single
+	  page or use vmalloc depending on the amount of memory requested.
+	- Cleanup error output. The __FUNCTION__ "(): " is now added
+	  automatically. Introduced a new header file debug.h to support this
+	  and also moved ntfs_debug() function into it.
+	- Make reading of compressed files more intelligent and especially get
+	  rid of the vmalloc_nofs() from readpage(). This now uses per CPU
+	  buffers (allocated at first mount with cluster size <= 4kiB and
+	  deallocated on last umount with cluster size <= 4kiB), and
+	  asynchronous io for the compressed data using a list of buffer heads.
+	  Er, we use synchronous io as async io only works on whole pages
+	  covered by buffers and not on individual buffer heads...
+	- Bug fix for reading compressed files with sparse compression blocks.
+tng-0.0.2 - Now handles larger/fragmented/compressed volumes/files/dirs.
+	- Fixed handling of directories when cluster size exceeds index block
+	  size.
+	- Hide DOS only name space directory entries from readdir() but allow
+	  them in lookup(). This should fix the problem that Linux doesn't
+	  support directory hard links, while still allowing access to entries
+	  via their short file name. This also has the benefit of mimicking
+	  what Windows users are used to, so it is the ideal solution.
+	- Implemented sync_page everywhere so no more hangs in D state when
+	  waiting for a page.
+	- Stop using bforget() in favour of brelse().
+	- Stop locking buffers unnecessarily.
+	- Implemented compressed files (inode->mapping contains uncompressed
+	  data, raw compressed data is currently bread() into a vmalloc()ed
+	  memory buffer).
+	- Enable compressed directories. (Their index root is marked compressed
+	  to indicate that new files in that directory should be created
+	  compressed.)
+	- Use vsnprintf rather than vsprintf in the ntfs_error and ntfs_warning
+	  functions. (Thanks to Will Dyson for pointing this out.)
+	- Moved the ntfs_inode and ntfs_volume (the former ntfs_inode_info and
+	  ntfs_sb_info) out of the common inode and super_block structures and
+	  started using the generic_ip and generic_sbp pointers instead. This
+	  makes ntfs entirely private with respect to the kernel tree.
+	- Detect compiler version and abort with error message if gcc less than
+	  2.96 is used.
+	- Fix bug in name comparison function in unistr.c.
+	- Implement attribute lists part 1, the infrastructure: search contexts
+	  and operations, find_external_attr(), lookup_attr()) and make the
+	  code use the infrastructure.
+	- Fix stupid buffer overflow bug that became apparent on larger run
+	  list containing attributes.
+	- Fix bugs in readdir() that became apparent on larger directories.
+	The driver is now really useful and survives the test
+		find . -type f -exec md5sum "{}" \;
+	without any error messages on a over 1GiB sized partition with >16k
+	files on it, including compressed files and directories and many files
+	and directories with attribute lists.
+tng-0.0.1 - The first useful version.
+	- Added ntfs_lookup().
+	- Added default upcase generation and handling.
+	- Added compile options to be shown on module init.
+	- Many bug fixes that were "hidden" before.
+	- Update to latest kernel.
+	- Added ntfs_readdir().
+	- Added file operations for mmap(), read(), open() and llseek(). We just
+	  use the generic ones. The whole point of going through implementing
+	  readpage() methods and where possible get_block() call backs is that
+	  this allows us to make use of the generic high level methods provided
+	  by the kernel.
+	The driver is now actually useful! Yey. (-: It undoubtedly has got bugs
+	though and it doesn't implement accesssing compressed files yet. Also,
+	accessing files with attribute list attributes is not implemented yet
+	either. But for small or simple file systems it should work and allow
+	you to list directories, use stat on directory entries and the file
+	system, open, read, mmap and llseek around in files. A big mile stone
+	has been reached!
+tng-0.0.0 - Initial version tag.
+	Initial driver implementation. The driver can mount and umount simple
+	NTFS file systems (i.e. ones without attribute lists in the system
+	files). If the mount fails there might be problems in the error handling
+	code paths, so be warned. Otherwise it seems to be loading the system
+	files nicely and the mft record read mapping/unmapping seems to be
+	working nicely, too. Proof of inode metadata in the page cache and non-
+	resident file unnamed stream data in the page cache concepts is thus
+	complete.
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
+# Rules for making the NTFS TNG driver.
+O_TARGET := ntfs.o
+obj-y   := time.o unistr.o inode.o file.o mft.o super.o debug.o aops.o \
+		attrib.o dir.o namei.o mst.o upcase.o compress.o sysctl.o
+obj-m   := $(O_TARGET)
+EXTRA_CFLAGS = -DNTFS_VERSION=\"TNG-0.0.8\"
+ifeq ($(CONFIG_NTFS_DEBUG),y)
+EXTRA_CFLAGS += -DDEBUG
+endif
+include $(TOPDIR)/Rules.make
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
+/*
+ * aops.c - NTFS kernel address space operations and page cache handling.
+ * 	    Part of the Linux-NTFS project.
+ *
+ * Copyright (c) 2001,2002 Anton Altaparmakov.
+ * Copyright (C) 2002 Richard Russon.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <linux/locks.h>
+#include "ntfs.h"
+/**
+ * ntfs_file_get_block - read/create inode @ino block @blk into buffer head @bh
+ * @ino:	inode to read/create block from/onto
+ * @blk:	block number to read/create
+ * @bh:		buffer in which to return the read/created block
+ * @create:	if not zero, create the block if it doesn't exist already
+ * 
+ * ntfs_file_get_block() remaps the block number @blk of the inode @ino from
+ * file offset into disk block position and returns the result in the buffer
+ * head @bh. If the block doesn't exist and create is not zero,
+ * ntfs_file_get_block() creates the block before returning it. @blk is the
+ * file offset divided by the file system block size, as defined by the field
+ * s_blocksize in the super block reachable by @ino->i_sb.
+ *
+ * If the block doesn't exist, create is true, and the inode is marked
+ * for synchronous I/O, then we will wait for creation to complete before
+ * returning the created block (which will be zeroed). Otherwise we only
+ * schedule creation and return. - FIXME: Need to have a think whether this is
+ * really necessary. What would happen if we didn't actually write the block to
+ * disk at this stage? We would just save writing a block full of zeroes to the
+ * device. - We can always write it synchronously when the user actually writes
+ * some data into it. - But this might result in random data being returned
+ * should the computer crash. - Hmmm. - This requires more thought.
+ *
+ * Obviously the block is only created if the file system super block flag
+ * MS_RDONLY is not set and only if NTFS write support is compiled in.
+ */
+int ntfs_file_get_block(struct inode *vi, const sector_t blk,
+		struct buffer_head *bh, const int create)
+{
+	ntfs_inode *ni = NTFS_I(vi);
+	ntfs_volume *vol = ni->vol;
+	VCN vcn;
+	LCN lcn;
+	int ofs;
+	BOOL is_retry = FALSE;
+	//ntfs_debug("Entering for blk 0x%lx.", blk);
+	//printk(KERN_DEBUG "NTFS: " __FUNCTION__ "(): Entering for blk "
+	//		"0x%lx.\n", blk);
+	bh->b_dev = vi->i_dev;
+	bh->b_blocknr = -1;
+	bh->b_state &= ~(1UL << BH_Mapped);
+	/* Convert @blk into a virtual cluster number (vcn) and offset. */
+	vcn = (VCN)blk << vol->sb->s_blocksize_bits >> vol->cluster_size_bits;
+	ofs = ((VCN)blk << vol->sb->s_blocksize_bits) & vol->cluster_size_mask;
+	/* Check for initialized size overflow. */
+	if ((vcn << vol->cluster_size_bits) + ofs >= ni->initialized_size)
+		return 0;
+	/*
+	 * Further, we need to be checking i_size and be just doing the
+	 * following if it is zero or we are out of bounds:
+	 * 	bh->b_blocknr = -1UL;
+	 * 	raturn 0;
+	 * Also, we need to deal with attr->initialized_size.
+	 * Also, we need to deal with the case where the last block is
+	 * requested but it is not initialized fully, i.e. it is a partial
+	 * block. We then need to read it synchronously and fill the remainder
+	 * with zero. Can't do it other way round as reading from the block
+	 * device would result in our pre-zeroed data to be overwritten as the
+	 * whole block is loaded from disk.
+	 * Also, need to lock run_list in inode so we don't have someone
+	 * reading it at the same time as someone else writing it.
+	 */
+retry_remap:
+	/* Convert the vcn to the corresponding logical cluster number (lcn). */
+	read_lock(&ni->run_list.lock);
+	lcn = vcn_to_lcn(ni->run_list.rl, vcn);
+	read_unlock(&ni->run_list.lock);
+	/* Successful remap. */
+	if (lcn >= 0) {
+		/* Setup the buffer head to describe the correct block. */
+#if 0
+		/* Already the case when we are called. */
+		bh->b_dev = vfs_ino->i_dev;
+#endif
+		bh->b_blocknr = ((lcn << vol->cluster_size_bits) + ofs) >>
+				vol->sb->s_blocksize_bits;
+		bh->b_state |= (1UL << BH_Mapped);
+		return 0;
+	}
+	/* It is a hole. */
+	if (lcn == LCN_HOLE) {
+		if (create)
+			/* FIXME: We should instantiate the hole. */
+			return -EROFS;
+		/*
+		 * Hole. Set the block number to -1 (it is ignored but
+		 * just in case and might help with debugging).
+		 */
+		bh->b_blocknr = -1UL;
+		bh->b_state &= ~(1UL << BH_Mapped);
+		return 0;
+	}
+	/* If on first try and the run list was not mapped, map it and retry. */
+	if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
+		int err = map_run_list(ni, vcn);
+		if (!err) {
+			is_retry = TRUE;
+			goto retry_remap;
+		}
+		return err;
+	}
+	if (create)
+		/* FIXME: We might need to extend the attribute. */
+		return -EROFS;
+	/* Error. */
+	return -EIO;
+}
+/**
+ * ntfs_file_readpage - fill a @page of a @file with data from the device
+ * @file:	open file to which the page @page belongs or NULL
+ * @page:	page cache page to fill with data
+ *
+ * For non-resident attributes, ntfs_file_readpage() fills the @page of the open
+ * file @file by calling the generic block_read_full_page() function provided by
+ * the kernel which in turn invokes our ntfs_file_get_block() callback in order
+ * to create and read in the buffers associated with the page asynchronously.
+ *
+ * For resident attributes, OTOH, ntfs_file_readpage() fills @page by copying
+ * the data from the mft record (which at this stage is most likely in memory)
+ * and fills the remainder with zeroes. Thus, in this case I/O is synchronous,
+ * as even if the mft record is not cached at this point in time, we need to
+ * wait for it to be read in before we can do the copy.
+ *
+ * Return zero on success or -errno on error.
+ */
+static int ntfs_file_readpage(struct file *file, struct page *page)
+{
+	s64 attr_pos;
+	struct inode *vi;
+	ntfs_inode *ni;
+	char *page_addr;
+	u32 attr_len;
+	int err = 0;
+	attr_search_context *ctx;
+	MFT_RECORD *mrec;
+	//ntfs_debug("Entering for index 0x%lx.", page->index);
+	/* The page must be locked. */
+	if (!PageLocked(page))
+		PAGE_BUG(page);
+	/*
+	 * Get the VFS and ntfs inodes associated with the page. This could
+	 * be achieved by looking at f->f_dentry->d_inode, too, unless the
+	 * dentry is negative, but could it really be negative considering we
+	 * are reading from the opened file? - NOTE: We can't get it from file,
+	 * because we can use ntfs_file_readpage on inodes not representing
+	 * open files!!! So basically we never ever touch file or at least we
+	 * must check it is not NULL before doing so.
+	 */
+	vi = page->mapping->host;
+	ni = NTFS_I(vi);
+	/* Is the unnamed $DATA attribute resident? */
+	if (test_bit(NI_NonResident, &ni->state)) {
+		/* Attribute is not resident. */
+		/* If the file is encrypted, we deny access, just like NT4. */
+		if (test_bit(NI_Encrypted, &ni->state)) {
+			err = -EACCES;
+			goto unl_err_out;
+		}
+		if (!test_bit(NI_Compressed, &ni->state))
+			/* Normal data stream, use generic functionality. */
+			return block_read_full_page(page, ntfs_file_get_block);
+		/* Compressed data stream. Handled in compress.c. */
+		return ntfs_file_read_compressed_block(page);
+	}
+	/* Attribute is resident, implying it is not compressed or encrypted. */
+	/*
+	 * Make sure the inode doesn't disappear under us. - Shouldn't be
+	 * needed as the page is locked.
+	 */
+	// atomic_inc(&vfs_ino->i_count);
+	/* Map, pin and lock the mft record for reading. */
+	mrec = map_mft_record(READ, ni);
+	if (IS_ERR(mrec)) {
+		err = PTR_ERR(mrec);
+		goto dec_unl_err_out;
+	}
+	err = get_attr_search_ctx(&ctx, ni, mrec);
+	if (err)
+		goto unm_dec_unl_err_out;
+	/* Find the data attribute in the mft record. */
+	if (!lookup_attr(AT_DATA, NULL, 0, 0, 0, NULL, 0, ctx)) {
+		err = -ENOENT;
+		goto put_unm_dec_unl_err_out;
+	}
+	/* Starting position of the page within the attribute value. */
+	attr_pos = page->index << PAGE_CACHE_SHIFT;
+	/* The total length of the attribute value. */
+	attr_len = le32_to_cpu(ctx->attr->_ARA(value_length));
+	/* Map the page so we can access it. */
+	page_addr = kmap(page);
+	/*
+	 * TODO: Find out whether we really need to zero the page. If it is
+	 * initialized to zero already we could skip this.
+	 */
+	/* 
+	 * If we are asking for any in bounds data, copy it over, zeroing the
+	 * remainder of the page if necessary. Otherwise just zero the page.
+	 */
+	if (attr_pos < attr_len) {
+		u32 bytes = attr_len - attr_pos;
+		if (bytes > PAGE_CACHE_SIZE)
+			bytes = PAGE_CACHE_SIZE;
+		else if (bytes < PAGE_CACHE_SIZE)
+			memset(page_addr + bytes, 0, PAGE_CACHE_SIZE - bytes);
+		/* Copy the data to the page. */
+		memcpy(page_addr, attr_pos + (char*)ctx->attr +
+				le16_to_cpu(ctx->attr->_ARA(value_offset)), bytes);
+	} else
+		memset(page_addr, 0, PAGE_CACHE_SIZE);
+	kunmap(page);
+	/* We are done. */
+	SetPageUptodate(page);
+put_unm_dec_unl_err_out:
+	put_attr_search_ctx(ctx);
+unm_dec_unl_err_out:
+	/* Unlock, unpin and release the mft record. */
+	unmap_mft_record(READ, ni);
+dec_unl_err_out:
+	/* Release the inode. - Shouldn't be needed as the page is locked. */
+	// atomic_dec(&vfs_ino->i_count);
+unl_err_out:
+	UnlockPage(page);
+	return err;
+}
+/*
+ * Specialized get block for reading the mft bitmap. Adapted from
+ * ntfs_file_get_block.
+ */
+static int ntfs_mftbmp_get_block(ntfs_volume *vol, const sector_t blk,
+		struct buffer_head *bh)
+{
+	VCN vcn = (VCN)blk << vol->sb->s_blocksize_bits >>
+			vol->cluster_size_bits;
+	int ofs = (blk << vol->sb->s_blocksize_bits) &
+			vol->cluster_size_mask;
+	LCN lcn;
+	ntfs_debug("Entering for blk = 0x%lx, vcn = 0x%Lx, ofs = 0x%x.",
+			blk, (long long)vcn, ofs);
+	bh->b_dev = vol->mft_ino->i_dev;
+	bh->b_state &= ~(1UL << BH_Mapped);
+	bh->b_blocknr = -1;
+	/* Check for initialized size overflow. */
+	if ((vcn << vol->cluster_size_bits) + ofs >=
+			vol->mftbmp_initialized_size) {
+		ntfs_debug("Done.");
+		return 0;
+	}
+	read_lock(&vol->mftbmp_rl.lock);
+	lcn = vcn_to_lcn(vol->mftbmp_rl.rl, vcn);
+	read_unlock(&vol->mftbmp_rl.lock);
+	ntfs_debug("lcn = 0x%Lx.", (long long)lcn);
+	if (lcn < 0LL) {
+		ntfs_error(vol->sb, "Returning -EIO, lcn = 0x%Lx.",
+				(long long)lcn);
+		return -EIO;
+	}
+	/* Setup the buffer head to describe the correct block. */
+	bh->b_blocknr = ((lcn << vol->cluster_size_bits) + ofs) >>
+			vol->sb->s_blocksize_bits;
+	bh->b_state |= (1UL << BH_Mapped);
+	ntfs_debug("Done, bh->b_blocknr = 0x%lx.", bh->b_blocknr);
+	return 0;
+}
+#define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512)
+/*
+ * Specialized readpage for accessing mft bitmap. Adapted from
+ * block_read_full_page().
+ */
+static int ntfs_mftbmp_readpage(ntfs_volume *vol, struct page *page)
+{
+	sector_t iblock, lblock;
+	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
+	unsigned int blocksize, blocks;
+	int nr, i;
+	unsigned char blocksize_bits;
+	ntfs_debug("Entering for index 0x%lx.", page->index);
+	if (!PageLocked(page))
+		PAGE_BUG(page);
+	blocksize = vol->sb->s_blocksize;
+	blocksize_bits = vol->sb->s_blocksize_bits;
+	if (!page->buffers)
+		create_empty_buffers(page, blocksize);
+	head = page->buffers;
+	if (!head) {
+		ntfs_error(vol->sb, "Creation of empty buffers failed, cannot "
+				"read page.");
+		return -EINVAL;
+	}
+	blocks = PAGE_CACHE_SIZE >> blocksize_bits;
+	iblock = page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
+	lblock = (((vol->_VMM(nr_mft_records) + 7) >> 3) + blocksize - 1) >>
+			blocksize_bits;
+	ntfs_debug("blocks = 0x%x, iblock = 0x%lx, lblock = 0x%lx.", blocks,
+			iblock, lblock);
+	bh = head;
+	nr = i = 0;
+	do {
+		ntfs_debug("In do loop, i = 0x%x, iblock = 0x%lx.", i,
+				iblock);
+		if (buffer_uptodate(bh)) {
+			ntfs_debug("Buffer is already uptodate.");
+			continue;
+		}
+		if (!buffer_mapped(bh)) {
+			if (iblock < lblock) {
+				if (ntfs_mftbmp_get_block(vol, iblock, bh))
+					continue;
+			}
+			if (!buffer_mapped(bh)) {
+				ntfs_debug("Buffer is not mapped, setting "
+						"uptodate.");
+				memset(kmap(page) + i*blocksize, 0, blocksize);
+				flush_dcache_page(page);
+				kunmap(page);
+				set_bit(BH_Uptodate, &bh->b_state);
+				continue;
+			}
+			/*
+			 * ntfs_mftbmp_get_block() might have updated the
+			 * buffer synchronously.
+			 */
+			if (buffer_uptodate(bh)) {
+				ntfs_debug("Buffer is now uptodate.");
+				continue;
+			}
+		}
+		arr[nr++] = bh;
+	} while (i++, iblock++, (bh = bh->b_this_page) != head);
+	ntfs_debug("After do loop, i = 0x%x, iblock = 0x%lx, nr = 0x%x.", i,
+			iblock, nr);
+	if (!nr) {
+		/* All buffers are uptodate - set the page uptodate as well. */
+		ntfs_debug("All buffers are uptodate, returning 0.");
+		SetPageUptodate(page);
+		UnlockPage(page);
+		return 0;
+	}
+	/* Stage two: lock the buffers */
+	ntfs_debug("Locking buffers.");
+	for (i = 0; i < nr; i++) {
+		struct buffer_head *bh = arr[i];
+		lock_buffer(bh);
+		set_buffer_async_io(bh);
+	}
+	/* Stage 3: start the IO */
+	ntfs_debug("Starting IO on buffers.");
+	for (i = 0; i < nr; i++)
+		submit_bh(READ, arr[i]);
+	ntfs_debug("Done.");
+	return 0;
+}
+/**
+ * end_buffer_read_index_async - async io completion for reading index records
+ * @bh:		buffer head on which io is completed
+ * @uptodate:	whether @bh is now uptodate or not
+ *
+ * Asynchronous I/O completion handler for reading pages belogning to the
+ * index allocation attribute address space of directory inodes.
+ *
+ * Perform the post read mst fixups when all IO on the page has been completed
+ * and marks the page uptodate or sets the error bit on the page.
+ *
+ * Adapted from fs/buffer.c.
+ *
+ * NOTE: We use this function as async io completion handler for reading pages
+ * belonging to the mft data attribute address space, too as this saves
+ * duplicating an almost identical function. We do this by cheating a little
+ * bit in setting the index_block_size in the mft ntfs_inode to the mft record
+ * size of the volume (vol->mft_record_size), and index_block_size_bits to
+ * mft_record_size_bits, respectively.
+ */
+void end_buffer_read_index_async(struct buffer_head *bh, int uptodate)
+{
+	static spinlock_t page_uptodate_lock = SPIN_LOCK_UNLOCKED;
+	unsigned long flags;
+	struct buffer_head *tmp;
+	struct page *page;
+	mark_buffer_uptodate(bh, uptodate);
+	/* This is a temporary buffer used for page I/O. */
+	page = bh->b_page;
+	if (!uptodate)
+		SetPageError(page);
+	/*
+	 * Be _very_ careful from here on. Bad things can happen if
+	 * two buffer heads end IO at almost the same time and both
+	 * decide that the page is now completely done.
+	 *
+	 * Async buffer_heads are here only as labels for IO, and get
+	 * thrown away once the IO for this page is complete.  IO is
+	 * deemed complete once all buffers have been visited
+	 * (b_count==0) and are now unlocked. We must make sure that
+	 * only the _last_ buffer that decrements its count is the one
+	 * that unlock the page..
+	 */
+	spin_lock_irqsave(&page_uptodate_lock, flags);
+	mark_buffer_async(bh, 0);
+	unlock_buffer(bh);
+	tmp = bh->b_this_page;
+	while (tmp != bh) {
+		if (buffer_async(tmp) && buffer_locked(tmp))
+			goto still_busy;
+		tmp = tmp->b_this_page;
+	}
+	/* OK, the async IO on this page is complete. */
+	spin_unlock_irqrestore(&page_uptodate_lock, flags);
+	/*
+	 * If none of the buffers had errors then we can set the page uptodate,
+	 * but we first have to perform the post read mst fixups.
+	 */
+	if (!PageError(page)) {
+		char *addr;
+		unsigned int i, recs, nr_err = 0;
+		u32 rec_size;
+		ntfs_inode *ni = NTFS_I(page->mapping->host);
+		addr = kmap_atomic(page, KM_BIO_IRQ);
+		rec_size = ni->_IDM(index_block_size);
+		recs = PAGE_CACHE_SIZE / rec_size;
+		for (i = 0; i < recs; i++) {
+			if (!post_read_mst_fixup((NTFS_RECORD*)(addr +
+					i * rec_size), rec_size))
+				continue;
+			nr_err++;
+			ntfs_error(ni->vol->sb, "post_read_mst_fixup() failed, "
+					"corrupt %s record 0x%Lx. Run chkdsk.",
+					ni->mft_no ? "index" : "mft",
+					(long long)((page->index <<
+					PAGE_CACHE_SHIFT >>
+					ni->_IDM(index_block_size_bits)) + i));
+		}
+		kunmap_atomic(addr, KM_BIO_IRQ);
+		if (!nr_err && recs)
+			SetPageUptodate(page);
+		else {
+			ntfs_error(ni->vol->sb, "Setting page error, index "
+					"0x%lx.", page->index);
+			SetPageError(page);
+		}
+	}
+	UnlockPage(page);
+	return;
+still_busy:
+	spin_unlock_irqrestore(&page_uptodate_lock, flags);
+	return;
+}
+/**
+ * ntfs_dir_readpage - fill a @page of a directory with data from the device
+ * @dir:	open directory to which the page @page belongs
+ * @page:	page cache page to fill with data
+ *
+ * Fill the page @page of the open directory @dir. We read each buffer
+ * asynchronously and when all buffers are read in our io completion
+ * handler end_buffer_read_index_block_async() automatically applies the mst
+ * fixups to the page before finally marking it uptodate and unlocking it.
+ *
+ * Contains an adapted version of fs/buffer.c::block_read_full_page(), a
+ * generic "read page" function for block devices that have the normal
+ * get_block functionality. This is most of the block device filesystems.
+ * Reads the page asynchronously --- the unlock_buffer() and
+ * mark_buffer_uptodate() functions propagate buffer state into the
+ * page struct once IO has completed.
+ */
+static int ntfs_dir_readpage(struct file *dir, struct page *page)
+{
+	struct inode *vi;
+	struct super_block *sb;
+	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
+	sector_t iblock, lblock;
+	unsigned int blocksize, blocks, nr_bu;
+	int nr, i;
+	unsigned char blocksize_bits;
+	/* The page must be locked. */
+	if (!PageLocked(page))
+		PAGE_BUG(page);
+	/*
+	 * Get the VFS/ntfs inodes, the super block and ntfs volume associated
+	 * with the page.
+	 */
+	vi = page->mapping->host;
+	sb = vi->i_sb;
+	/* We need to create buffers for the page so we can do low level io. */
+	blocksize = sb->s_blocksize;
+	blocksize_bits = sb->s_blocksize_bits;
+	if (!page->buffers)
+		create_empty_buffers(page, blocksize);
+	else
+		ntfs_error(sb, "Page (index 0x%lx) already has buffers.",
+				page->index);
+	nr_bu = blocks = PAGE_CACHE_SIZE >> blocksize_bits;
+	iblock = page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
+	lblock = (vi->i_size + blocksize - 1) >> blocksize_bits;
+	bh = head = page->buffers;
+	BUG_ON(!bh);
+	/* Loop through all the buffers in the page. */
+	i = nr = 0;
+	do {
+		if (buffer_uptodate(bh)) {
+			nr_bu--;
+			continue;
+		}
+		if (!buffer_mapped(bh)) {
+			/* Is the block within the allowed limits? */
+			if (iblock < lblock) {
+				/* Remap the inode offset to its disk block. */
+				if (ntfs_file_get_block(vi, iblock, bh, 0))
+					continue;
+			}
+			if (!buffer_mapped(bh)) {
+				/*
+				 * Error. Zero this portion of the page and set
+				 * the buffer uptodate.
+				 */
+				memset(kmap(page) + i * blocksize, 0,
+						blocksize);
+				flush_dcache_page(page);
+				kunmap(page);
+				set_bit(BH_Uptodate, &bh->b_state);
+				continue;
+			}
+			/* The buffer might have been updated synchronousle. */
+			if (buffer_uptodate(bh))
+				continue;
+		}
+		arr[nr++] = bh;
+	} while (i++, iblock++, (bh = bh->b_this_page) != head);
+	/* Check we have at least one buffer ready for io. */
+	if (nr) {
+		/* Lock the buffers. */
+		for (i = 0; i < nr; i++) {
+			struct buffer_head *tbh = arr[i];
+			lock_buffer(tbh);
+			tbh->b_end_io = end_buffer_read_index_async;
+			mark_buffer_async(tbh, 1);
+		}
+		/* Finally, start io on the buffers. */
+		for (i = 0; i < nr; i++)
+			submit_bh(READ, arr[i]);
+		/* We are done. */
+		return 0;
+	}
+	if (!nr_bu) {
+		ntfs_debug("All buffers in the page were already uptodate, "
+				"assuming mst fixups were already applied.");
+		SetPageUptodate(page);
+		UnlockPage(page);
+		return 0;
+	}
+	ntfs_error(sb, "No io was scheduled on any of the buffers in the page, "
+			"but buffers were not all uptodate to start with. "
+			"Setting page error flag and returning io error.");
+	SetPageError(page);
+	UnlockPage(page);
+	return -EIO;
+}
+/* Address space operations for accessing normal file data. */
+struct address_space_operations ntfs_file_aops = {
+	writepage:	NULL,			/* Write dirty page to disk. */
+	readpage:	ntfs_file_readpage,	/* Fill page with data. */
+	sync_page:	block_sync_page,	/* Currently, just unplugs the
+						   disk request queue. */
+	prepare_write:	NULL,			/* . */
+	commit_write:	NULL,			/* . */
+	//truncatepage:	NULL,			/* . */
+};
+typedef int readpage_t(struct file *, struct page *);
+/* FIXME: Kludge: Address space operations for accessing mftbmp. */
+struct address_space_operations ntfs_mftbmp_aops = {
+	writepage:	NULL,			/* Write dirty page to disk. */
+	readpage:	(readpage_t*)ntfs_mftbmp_readpage, /* Fill page with
+							      data. */
+	sync_page:	block_sync_page,	/* Currently, just unplugs the
+						   disk request queue. */
+	prepare_write:	NULL,			/* . */
+	commit_write:	NULL,			/* . */
+	//truncatepage:	NULL,			/* . */
+};
+/*
+ * Address space operations for accessing normal directory data (i.e. index
+ * allocation attribute). We can't just use the same operations as for files
+ * because 1) the attribute is different and even more importantly 2) the index
+ * records have to be multi sector transfer deprotected (i.e. fixed-up).
+ */
+struct address_space_operations ntfs_dir_aops = {
+	writepage:	NULL,			/* Write dirty page to disk. */
+	readpage:	ntfs_dir_readpage,	/* Fill page with data. */
+	sync_page:	block_sync_page,	/* Currently, just unplugs the
+						   disk request queue. */
+	prepare_write:	NULL,			/* . */
+	commit_write:	NULL,			/* . */
+	//truncatepage:	NULL,			/* . */
+};
--- a/fs/ntfs/attraops.c
+++ b/fs/ntfs/attraops.c
+#include "ntfs.h"
+/*
+ * We need to define the attribute object structure. FIXME: Move these to
+ * ntfs.h.
+ */
+typedef struct {
+	ntfs_inode *a_ni;
+	ntfs_volume *a_vol;
+	atomic_t a_count;
+	s64 a_size;
+	struct rw_semaphore a_sem;
+	struct address_space a_mapping;
+	unsigned long a_flags;
+} attr_obj;
+/**
+ * ntfs_attr_readpage - fill a page @page of an attribute object @aobj with data
+ * @aobj:	attribute object to which the page @page belongs
+ * @page:	page cache page to fill with data
+ *
+ */
+//static int ntfs_attr_readpage(attr_obj *aobj, struct page *page)
+static int ntfs_attr_readpage(struct file *aobj, struct page *page)
+{
+	return -EOPNOTSUPP;
+}
+/*
+ * Address space operations for accessing attributes. Note that these functions
+ * do not accept an inode as the first parameter but an attribute object. We
+ * use this to implement a generic interface that is not bound to inodes in
+ * order to support multiple named streams per file, multiple bitmaps per file
+ * and directory, etc. Basically, this gives access to any attribute within an
+ * mft record.
+ *
+ * We make use of a slab cache for attribute object allocations.
+ */
+struct address_space_operations ntfs_attr_aops = {
+	writepage:	NULL,			/* Write dirty page to disk. */
+	readpage:	ntfs_attr_readpage,	/* Fill page with data. */
+	sync_page:	block_sync_page,	/* Currently, just unplugs the
+						   disk request queue. */
+	prepare_write:	NULL,			/* . */
+	commit_write:	NULL,			/* . */
+	//truncatepage:	NULL,			/* . */
+};
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
+/**
+ * attrib.c - NTFS attribute operations. Part of the Linux-NTFS project.
+ *
+ * Copyright (c) 2001,2002 Anton Altaparmakov.
+ * Copyright (C) 2002 Richard Russon.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include "ntfs.h"
+/* Temporary helper functions -- might become macros */
+/**
+ * rl_mm - run_list memmove
+ *
+ * It is up to the caller to serialize access to the run list @base.
+ */
+static inline void rl_mm(run_list_element *base, int dst, int src, int size)
+{
+	if ((dst != src) && (size > 0))
+		memmove (base + dst, base + src, size * sizeof (*base));
+}
+/**
+ * rl_mc - run_list memory copy
+ *
+ * It is up to the caller to serialize access to the run lists @dstbase and
+ * @srcbase.
+ */
+static inline void rl_mc(run_list_element *dstbase, int dst,
+		run_list_element *srcbase, int src, int size)
+{
+	if (size > 0)
+		memcpy (dstbase+dst, srcbase+src, size * sizeof (*dstbase));
+}
+/**
+ * ntfs_rl_realloc - Reallocate memory for run_lists
+ * @orig:  The original memory allocation
+ * @old:   The number of run_lists in the original
+ * @new:   The number of run_lists we need space for
+ *
+ * As the run_lists grow, more memory will be required.  To prevent the
+ * kernel having to allocate and reallocate large numbers of small bits of
+ * memory, this function returns and entire page of memory.
+ *
+ * It is up to the caller to serialize access to the run list @orig.
+ *
+ * N.B.  If the new allocation doesn't require a different number of pages in
+ *       memory, the function will return the original pointer.
+ *
+ * Return: Pointer  The newly allocated, or recycled,  memory.
+ *
+ * Errors: -ENOMEM, Not enough memory to allocate run list array.
+ *         -EINVAL, Invalid parameters were passed in.
+ */
+static inline run_list_element *ntfs_rl_realloc(run_list_element *orig,
+		int old, int new)
+{
+	run_list_element *nrl;
+	old = PAGE_ALIGN (old * sizeof (*orig));
+	new = PAGE_ALIGN (new * sizeof (*orig));
+	if (old == new)
+		return orig;
+	nrl = ntfs_malloc_nofs (new);
+	if (!nrl)
+		return ERR_PTR (-ENOMEM);
+	if (orig) {
+		memcpy (nrl, orig, min (old, new));
+		ntfs_free (orig);
+	}
+	return nrl;
+}
+/**
+ * ntfs_rl_merge - Join together two run_lists
+ * @one:  The first run_list and destination
+ * @two:  The second run_list
+ *
+ * If possible merge together two run_lists.  For this, their VCNs and LCNs
+ * must be adjacent.
+ *
+ * It is up to the caller to serialize access to the run lists @one and @two.
+ *
+ * Return: TRUE   Success, the run_lists were merged
+ *         FALSE  Failure, the run_lists were not merged
+ */
+static inline BOOL ntfs_rl_merge(run_list_element *one, run_list_element *two)
+{
+	BUG_ON (!one || !two);
+	if ((one->lcn < 0) || (two->lcn < 0))     /* Are we merging holes? */
+		return FALSE;
+	if ((one->lcn + one->length) != two->lcn) /* Are the runs contiguous? */
+		return FALSE;
+	if ((one->vcn + one->length) != two->vcn) /* Are the runs misaligned? */
+		return FALSE;
+	one->length += two->length;
+	return TRUE;
+}
+/**
+ * ntfs_rl_append - Append a run_list after the given element
+ * @orig:   The original run_list to be worked on.
+ * @osize:  The number of elements in @orig (including end marker).
+ * @new:    The run_list to be inserted.
+ * @nsize:  The number of elements in @new (excluding end marker).
+ * @loc:    Append the new run_list after this element in @orig.
+ *
+ * Append a run_list after element @loc in @orig.  Merge the right end of
+ * the new run_list, if necessary.  Adjust the size of the hole before the
+ * appended run_list.
+ *
+ * It is up to the caller to serialize access to the run lists @orig and @new.
+ *
+ * Return: Pointer, The new, combined, run_list
+ *
+ * Errors: -ENOMEM, Not enough memory to allocate run list array.
+ *         -EINVAL, Invalid parameters were passed in.
+ */
+static inline run_list_element *ntfs_rl_append(run_list_element *orig,
+		int osize, run_list_element *new, int nsize, int loc)
+{
+	run_list_element *res;
+	BOOL right;
+	BUG_ON (!orig || !new);
+	/* First, merge the right hand end, if necessary. */
+	right = ntfs_rl_merge (new + nsize - 1, orig + loc + 1);
+	/* Space required: Orig size + New size, less one if we merged. */
+	res = ntfs_rl_realloc (orig, osize, osize + nsize - right);
+	if (IS_ERR (res))
+		return res;
+	/* Move the tail of Orig out of the way, then copy in New. */
+	rl_mm (res, loc + 1 + nsize, loc + 1 + right, osize - loc - 1 - right);
+	rl_mc (res, loc + 1, new, 0, nsize);
+	/* Adjust the size of the preceding hole. */
+	res[loc].length = res[loc+1].vcn - res[loc].vcn;
+	/* We may have changed the length of the file, so fix the end marker */
+	if (res[loc+nsize+1].lcn == LCN_ENOENT)
+		res[loc+nsize+1].vcn = res[loc+nsize].vcn + res[loc+nsize].length;
+	return res;
+}
+/**
+ * ntfs_rl_insert - Insert a run_list into another
+ * @orig:   The original run_list to be worked on.
+ * @osize:  The number of elements in @orig (including end marker).
+ * @new:    The run_list to be inserted.
+ * @nsize:  The number of elements in @new (excluding end marker).
+ * @loc:    Insert the new run_list before this element in @orig.
+ *
+ * Insert a run_list before element @loc in @orig.  Merge the left end of
+ * the new run_list, if necessary.  Adjust the size of the hole after the
+ * inserted run_list.
+ *
+ * It is up to the caller to serialize access to the run lists @orig and @new.
+ *
+ * Return: Pointer, The new, combined, run_list
+ *
+ * Errors: -ENOMEM, Not enough memory to allocate run list array.
+ *         -EINVAL, Invalid parameters were passed in.
+ */
+static inline run_list_element *ntfs_rl_insert(run_list_element *orig,
+		int osize, run_list_element *new, int nsize, int loc)
+{
+	run_list_element *res;
+	BOOL left = FALSE;
+	BOOL disc = FALSE;	/* Discontinuity */
+	BOOL hole = FALSE;	/* Following a hole */
+	BUG_ON (!orig || !new);
+	/* disc => Discontinuity between the end of Orig and the start of New.
+	 *         This means we might need to insert a hole.
+	 * hole => Orig ends with a hole or an unmapped region which we can
+	 *         extend to match the discontinuity. */
+	if (loc == 0) {
+		disc = (new[0].vcn > 0);
+	} else {
+		left = ntfs_rl_merge (orig + loc - 1, new);
+		disc = (new[0].vcn > (orig[loc-1].vcn + orig[loc-1].length));
+		if (disc)
+			hole = (orig[loc-1].lcn == LCN_HOLE);
+	}
+	/* Space required: Orig size + New size, less one if we merged,
+	 * plus one if there was a discontinuity, less one for a trailing hole */
+	res = ntfs_rl_realloc (orig, osize, osize + nsize - left + disc - hole);
+	if (IS_ERR (res))
+		return res;
+	/* Move the tail of Orig out of the way, then copy in New. */
+	rl_mm (res, loc + nsize - left + disc - hole, loc, osize - loc);
+	rl_mc (res, loc + disc - hole, new, left, nsize - left);
+	/* Adjust the VCN of the last run ... */
+	if (res[loc+nsize-left+disc-hole].lcn <= LCN_HOLE) {
+		res[loc+nsize-left+disc-hole].vcn =
+			res[loc+nsize-left+disc-hole-1].vcn +
+			res[loc+nsize-left+disc-hole-1].length;
+	}
+	/* ... and the length. */
+	if ((res[loc+nsize-left+disc-hole].lcn == LCN_HOLE) ||
+	    (res[loc+nsize-left+disc-hole].lcn == LCN_RL_NOT_MAPPED)) {
+		res[loc+nsize-left+disc-hole].length =
+			res[loc+nsize-left+disc-hole+1].vcn -
+			res[loc+nsize-left+disc-hole].vcn;
+	}
+	/* Writing beyond the end of the file and there's a discontinuity. */
+	if (disc) {
+		if (hole) {
+			res[loc-1].length = res[loc].vcn - res[loc-1].vcn;
+		} else {
+			if (loc > 0) {
+				res[loc].vcn = res[loc-1].vcn +
+					res[loc-1].length;
+				res[loc].length = res[loc+1].vcn - res[loc].vcn;
+			} else {
+				res[loc].vcn = 0;
+				res[loc].length = res[loc+1].vcn;
+			}
+			res[loc].lcn = LCN_RL_NOT_MAPPED;
+		}
+		if (res[loc+nsize-left+disc].lcn == LCN_ENOENT)
+			res[loc+nsize-left+disc].vcn = res[loc+nsize-left+disc-1].vcn +
+				res[loc+nsize-left+disc-1].length;
+	}
+	return res;
+}
+/**
+ * ntfs_rl_replace - Overwrite a run_list element with another run_list
+ * @orig:   The original run_list to be worked on.
+ * @osize:  The number of elements in @orig (including end marker).
+ * @new:    The run_list to be inserted.
+ * @nsize:  The number of elements in @new (excluding end marker).
+ * @loc:    Index of run_list @orig to overwrite with @new.
+ *
+ * Replace the run_list at @loc with @new.  Merge the left and right ends of
+ * the inserted run_list, if necessary.
+ *
+ * It is up to the caller to serialize access to the run lists @orig and @new.
+ *
+ * Return: Pointer, The new, combined, run_list
+ *
+ * Errors: -ENOMEM, Not enough memory to allocate run list array.
+ *         -EINVAL, Invalid parameters were passed in.
+ */
+static inline run_list_element *ntfs_rl_replace(run_list_element *orig,
+		int osize, run_list_element *new, int nsize, int loc)
+{
+	run_list_element *res;
+	BOOL left = FALSE;
+	BOOL right;
+	BUG_ON (!orig || !new);
+	/* First, merge the left and right ends, if necessary. */
+	right = ntfs_rl_merge (new + nsize - 1, orig + loc + 1);
+	if (loc > 0)
+		left = ntfs_rl_merge (orig + loc - 1, new);
+	/* Allocate some space.  We'll need less if the left, right
+	 * or both ends were merged. */
+	res = ntfs_rl_realloc (orig, osize, osize + nsize - left - right);
+	if (IS_ERR (res))
+		return res;
+	/* Move the tail of Orig out of the way, then copy in New. */
+	rl_mm (res, loc + nsize - left, loc + right + 1,
+		osize - loc - right - 1);
+	rl_mc (res, loc, new, left, nsize - left);
+	/* We may have changed the length of the file, so fix the end marker */
+	if (res[loc+nsize-left].lcn == LCN_ENOENT)
+		res[loc+nsize-left].vcn = res[loc+nsize-left-1].vcn +
+					  res[loc+nsize-left-1].length;
+	return res;
+}
+/**
+ * ntfs_rl_split - Insert a run_list into the centre of a hole
+ * @orig:   The original run_list to be worked on.
+ * @osize:  The number of elements in @orig (including end marker).
+ * @new:    The run_list to be inserted.
+ * @nsize:  The number of elements in @new (excluding end marker).
+ * @loc:    Index of run_list in @orig to split with @new.
+ *
+ * Split the run_list at @loc into two and insert @new.  No merging of
+ * run_lists is necessary.  Adjust the size of the holes either side.
+ *
+ * It is up to the caller to serialize access to the run lists @orig and @new.
+ *
+ * Return: Pointer, The new, combined, run_list
+ *
+ * Errors: -ENOMEM, Not enough memory to allocate run list array.
+ *         -EINVAL, Invalid parameters were passed in.
+ */
+static inline run_list_element *ntfs_rl_split(run_list_element *orig, int osize,
+		run_list_element *new, int nsize, int loc)
+{
+	run_list_element *res;
+	BUG_ON (!orig || !new);
+	/* Space required: Orig size + New size + One new hole. */
+	res = ntfs_rl_realloc (orig, osize, osize + nsize + 1);
+	if (IS_ERR (res))
+		return res;
+	/* Move the tail of Orig out of the way, then copy in New. */
+	rl_mm (res, loc + 1 + nsize, loc, osize - loc);
+	rl_mc (res, loc + 1, new, 0, nsize);
+	/* Adjust the size of the holes either size of New. */
+	res[loc].length         = res[loc+1].vcn       - res[loc].vcn;
+	res[loc+nsize+1].vcn    = res[loc+nsize].vcn   + res[loc+nsize].length;
+	res[loc+nsize+1].length = res[loc+nsize+2].vcn - res[loc+nsize+1].vcn;
+	return res;
+}
+/**
+ * merge_run_lists - merge two run_lists into one
+ * @drl:  The original run_list.
+ * @srl:  The new run_list to be merge into @drl.
+ *
+ * First we sanity check the two run_lists to make sure that they are sensible
+ * and can be merged.  The @srl run_list must be either after the @drl run_list
+ * or completely within a hole in @drl.
+ *
+ * It is up to the caller to serialize access to the run lists @drl and @srl.
+ *
+ * Merging of run lists is necessary in two cases:
+ *   1. When attribute lists are used and a further extent is being mapped.
+ *   2. When new clusters are allocated to fill a hole or extend a file.
+ *
+ * There are four possible ways @srl can be merged.  It can be inserted at
+ * the beginning of a hole; split the hole in two; appended at the end of
+ * a hole; replace the whole hole.  It can also be appended to the end of
+ * the run_list, which is just a variant of the insert case.
+ *
+ * N.B.  Either, or both, of the input pointers may be freed if the function
+ *       is successful.  Only the returned pointer may be used.
+ *
+ *       If the function fails, neither of the input run_lists may be safe.
+ *
+ * Return: Pointer, The resultant merged run_list.
+ *
+ * Errors: -ENOMEM, Not enough memory to allocate run list array.
+ *         -EINVAL, Invalid parameters were passed in.
+ *         -ERANGE, The run_lists overlap and cannot be merged.
+ */
+run_list_element *merge_run_lists(run_list_element *drl, run_list_element *srl)
+{
+	run_list_element *nrl;	/* New run list. */
+	int di, si;		/* Current index into @[ds]rl. */
+	int sstart;		/* First index with lcn > LCN_RL_NOT_MAPPED. */
+	int dins;		/* Index into @drl at which to insert @srl. */
+	int dend, send;		/* Last index into @[ds]rl. */
+	int dfinal, sfinal;	/* The last index into @[ds]rl with
+				   lcn >= LCN_HOLE. */
+	int marker = 0;
+#if 1
+	ntfs_debug ("dst:");
+	ntfs_debug_dump_runlist (drl);
+	ntfs_debug ("src:");
+	ntfs_debug_dump_runlist (srl);
+#endif
+ 	/* Check for silly calling... */
+	if (unlikely (!srl))
+		return drl;
+	if (unlikely (IS_ERR (srl) || IS_ERR (drl)))
+		return ERR_PTR (-EINVAL);
+	/* Check for the case where the first mapping is being done now. */
+	if (unlikely (!drl)) {
+		nrl = srl;
+		/* Complete the source run list if necessary. */
+		if (unlikely (srl[0].vcn)) {
+			/* Scan to the end of the source run list. */
+			for (send = 0; likely (srl[send].length); send++)
+				;
+			nrl = ntfs_rl_realloc (srl, send, send + 1);
+			if (!nrl)
+				return ERR_PTR (-ENOMEM);
+			rl_mm (nrl, 1, 0, send);
+			nrl[0].vcn = 0;			/* Add start element. */
+			nrl[0].lcn = LCN_RL_NOT_MAPPED;
+			nrl[0].length = nrl[1].vcn;
+		}
+		goto finished;
+	}
+	si = di = 0;
+	/* Skip the unmapped start element(s) in each run_list if present. */
+	while (srl[si].length && srl[si].lcn < (LCN)LCN_HOLE)
+		si++;
+	/* Can't have an entirely unmapped srl run_list. */
+	BUG_ON (!srl[si].length);
+	/* Record the starting points. */
+	sstart = si;
+	/*
+	 * Skip forward in @drl until we reach the position where @srl needs to
+	 * be inserted. If we reach the end of @drl, @srl just needs to be
+	 * appended to @drl.
+	 */
+	for (; drl[di].length; di++) {
+		if ((drl[di].vcn + drl[di].length) > srl[sstart].vcn)
+			break;
+	}
+	dins = di;
+	/* Sanity check for illegal overlaps. */
+	if ((drl[di].vcn == srl[si].vcn) &&
+	    (drl[di].lcn >= 0) &&
+	    (srl[si].lcn >= 0)) {
+		ntfs_error (NULL, "Run lists overlap. Cannot merge! Returning "
+				"ERANGE.");
+		nrl = ERR_PTR (-ERANGE);
+		goto exit;
+	}
+	/* Scan to the end of both run lists in order to know their sizes. */
+	for (send = si; srl[send].length; send++)
+		;
+	for (dend = di; drl[dend].length; dend++)
+		;
+	if (srl[send].lcn == LCN_ENOENT) {
+		marker = send;
+	}
+	/* Scan to the last element with lcn >= LCN_HOLE. */
+	for (sfinal = send; sfinal >= 0 && srl[sfinal].lcn < LCN_HOLE; sfinal--)
+		;
+	for (dfinal = dend; dfinal >= 0 && drl[dfinal].lcn < LCN_HOLE; dfinal--)
+		;
+	{
+	BOOL start;
+	BOOL finish;
+	int ds = dend   + 1;		/* Number of elements in drl & srl */
+	int ss = sfinal - sstart + 1;
+	start  = ((drl[dins].lcn <  LCN_RL_NOT_MAPPED) ||    /* End of file   */
+		  (drl[dins].vcn == srl[sstart].vcn));	     /* Start of hole */
+	finish = ((drl[dins].lcn >= LCN_RL_NOT_MAPPED) &&    /* End of file   */
+		 ((drl[dins].vcn + drl[dins].length) <=      /* End of hole   */
+		  (srl[send-1].vcn + srl[send-1].length)));
+		  //srl[send-1].vcn));
+	/* Or we'll lose an end marker */
+	if (start && finish && (drl[dins].length == 0))
+		ss++;
+	if (marker && (drl[dins].vcn + drl[dins].length > srl[send-1].vcn))
+		finish = FALSE;
+#if 0
+	ntfs_debug("dfinal = %i, dend = %i", dfinal, dend);
+	ntfs_debug("sstart = %i, sfinal = %i, send = %i", sstart, sfinal, send);
+	ntfs_debug("start = %i, finish = %i", start, finish);
+	ntfs_debug("ds = %i, ss = %i, dins = %i", ds, ss, dins);
+#endif
+	if (start)
+		if (finish)
+			nrl = ntfs_rl_replace (drl, ds, srl + sstart, ss, dins);
+		else
+			nrl = ntfs_rl_insert  (drl, ds, srl + sstart, ss, dins);
+	else
+		if (finish)
+			nrl = ntfs_rl_append  (drl, ds, srl + sstart, ss, dins);
+		else
+			nrl = ntfs_rl_split   (drl, ds, srl + sstart, ss, dins);
+	if (marker) {
+		for (ds = 0; nrl[ds].lcn; ds++) ;
+		nrl = ntfs_rl_insert (nrl, ds+1, srl + marker, 1, ds-1);
+	}
+	}
+	if (likely (!IS_ERR (nrl))) {
+		/* The merge was completed successfully. */
+finished:
+		if (nrl != srl)
+			ntfs_free (srl);
+		/*ntfs_debug ("Done.");*/
+		/*ntfs_debug ("Merged run list:");*/
+#if 1
+		ntfs_debug ("res:");
+		ntfs_debug_dump_runlist (nrl);
+#endif
+	} else {
+		ntfs_error (NULL, "Merge failed, returning error code %ld.",
+				-PTR_ERR (nrl));
+	}
+exit:
+	return nrl;
+}
+/**
+ * decompress_mapping_pairs - convert mapping pairs array to run list
+ * @vol:	ntfs volume on which the attribute resides
+ * @attr:	attribute record whose mapping pairs array to decompress
+ * @run_list:	optional run list in which to insert @attr's run list
+ *
+ * Decompress the attribute @attr's mapping pairs array into a run_list and
+ * return the run list or -errno on error. If @run_list is not NULL then
+ * the mapping pairs array of @attr is decompressed and the run list inserted
+ * into the appropriate place in @run_list. If this is the case and the
+ * function returns success, the original pointer passed into @run_list is no
+ * longer valid.
+ *
+ * It is up to the caller to serialize access to the run list @old_rl.
+ *
+ * Check the return value for error with IS_ERR(ret_val). If this is FALSE,
+ * the function was successful, the return value is the new run list, and if
+ * an existing run list pointer was passed in, this is no longer valid.
+ * If IS_ERR(ret_val) returns true, there was an error, the return value is not
+ * a run_list pointer and the existing run list pointer if one was passed in
+ * has not been touched. In this case use PTR_ERR(ret_val) to obtain the error
+ * code. Following error codes are defined:
+ * 	-ENOMEM		Not enough memory to allocate run list array.
+ * 	-EIO		Corrupt run list.
+ * 	-EINVAL		Invalid parameters were passed in.
+ * 	-ERANGE		The two run lists overlap.
+ *
+ * FIXME: For now we take the conceptionally simplest approach of creating the
+ * new run list disregarding the already existing one and then splicing the
+ * two into one if that is possible (we check for overlap and discard the new
+ * run list if overlap present and return error).
+ */
+run_list_element *decompress_mapping_pairs(const ntfs_volume *vol,
+		const ATTR_RECORD *attr, run_list_element *old_rl)
+{
+	VCN vcn;		/* Current vcn. */
+	LCN lcn; 		/* Current lcn. */
+	s64 deltaxcn;		/* Change in [vl]cn. */
+	run_list_element *rl = NULL;	/* The output run_list. */
+	run_list_element *rl2;	/* Temporary run_list. */
+	u8 *buf;		/* Current position in mapping pairs array. */
+	u8 *attr_end;		/* End of attribute. */
+	int rlsize;		/* Size of run_list buffer. */
+	int rlpos;		/* Current run_list position. */
+	u8 b;			/* Current byte offset in buf. */
+#ifdef DEBUG
+	/* Make sure attr exists and is non-resident. */
+	if (!attr || !attr->non_resident ||
+			sle64_to_cpu(attr->_ANR(lowest_vcn)) < (VCN)0) {
+		ntfs_error(vol->sb, "Invalid arguments.");
+		return ERR_PTR(-EINVAL);
+	}
+#endif
+	/* Start at vcn = lowest_vcn and lcn 0. */
+	vcn = sle64_to_cpu(attr->_ANR(lowest_vcn));
+	lcn = 0;
+	/* Get start of the mapping pairs array. */
+	buf = (u8*)attr + le16_to_cpu(attr->_ANR(mapping_pairs_offset));
+	attr_end = (u8*)attr + le32_to_cpu(attr->length);
+	if (unlikely(buf < (u8*)attr || buf > attr_end)) {
+		ntfs_error(vol->sb, "Corrupt attribute.");
+		return ERR_PTR(-EIO);
+	}
+	/* Current position in run_list array. */
+	rlpos = 0;
+	/* Allocate first page. */
+	rl = ntfs_malloc_nofs(PAGE_SIZE);
+	if (unlikely(!rl))
+		return ERR_PTR(-ENOMEM);
+	/* Current run_list buffer size in bytes. */
+	rlsize = PAGE_SIZE;
+	/* Insert unmapped starting element if necessary. */
+	if (vcn) {
+		rl->vcn = (VCN)0;
+		rl->lcn = (LCN)LCN_RL_NOT_MAPPED;
+		rl->length = vcn;
+		rlpos++;
+	}
+	while (buf < attr_end && *buf) {
+		/*
+		 * Allocate more memory if needed, including space for the
+		 * not-mapped and terminator elements. ntfs_malloc_nofs()
+		 * operates on whole pages only.
+		 */
+		if (((rlpos + 3) * sizeof(*old_rl)) > rlsize) {
+			rl2 = ntfs_malloc_nofs(rlsize + (int)PAGE_SIZE);
+			if (unlikely(!rl2)) {
+				ntfs_free(rl);
+				return ERR_PTR(-ENOMEM);
+			}
+			memmove(rl2, rl, rlsize);
+			ntfs_free(rl);
+			rl = rl2;
+			rlsize += PAGE_SIZE;
+		}
+		/* Enter the current vcn into the current run_list element. */
+		(rl + rlpos)->vcn = vcn;
+		/*
+		 * Get the change in vcn, i.e. the run length in clusters.
+		 * Doing it this way ensures that we signextend negative values.
+		 * A negative run length doesn't make any sense, but hey, I
+		 * didn't make up the NTFS specs and Windows NT4 treats the run
+		 * length as a signed value so that's how it is...
+		 */
+		b = *buf & 0xf;
+		if (b) {
+			if (unlikely(buf + b > attr_end))
+				goto io_error;
+			for (deltaxcn = (s8)buf[b--]; b; b--)
+				deltaxcn = (deltaxcn << 8) + buf[b];
+		} else { /* The length entry is compulsory. */
+			ntfs_error(vol->sb, "Missing length entry in mapping "
+					"pairs array.");
+			deltaxcn = (s64)-1;
+		}
+		/*
+		 * Assume a negative length to indicate data corruption and
+		 * hence clean-up and return NULL.
+		 */
+		if (unlikely(deltaxcn < 0)) {
+			ntfs_error(vol->sb, "Invalid length in mapping pairs "
+					"array.");
+			goto err_out;
+		}
+		/*
+		 * Enter the current run length into the current run_list
+		 * element.
+		 */
+		(rl + rlpos)->length = deltaxcn;
+		/* Increment the current vcn by the current run length. */
+		vcn += deltaxcn;
+		/*
+		 * There might be no lcn change at all, as is the case for
+		 * sparse clusters on NTFS 3.0+, in which case we set the lcn
+		 * to LCN_HOLE.
+		 */
+		if (!(*buf & 0xf0))
+			(rl + rlpos)->lcn = (LCN)LCN_HOLE;
+		else {
+			/* Get the lcn change which really can be negative. */
+			u8 b2 = *buf & 0xf;
+			b = b2 + ((*buf >> 4) & 0xf);
+			if (buf + b > attr_end)
+				goto io_error;
+			for (deltaxcn = (s8)buf[b--]; b > b2; b--)
+				deltaxcn = (deltaxcn << 8) + buf[b];
+			/* Change the current lcn to it's new value. */
+			lcn += deltaxcn;
+#ifdef DEBUG
+			/*
+			 * On NTFS 1.2-, apparently can have lcn == -1 to
+			 * indicate a hole. But we haven't verified ourselves
+			 * whether it is really the lcn or the deltaxcn that is
+			 * -1. So if either is found give us a message so we
+			 * can investigate it further!
+			 */
+			if (vol->major_ver < 3) {
+				if (unlikely(deltaxcn == (LCN)-1))
+					ntfs_error(vol->sb, "lcn delta == -1");
+				if (unlikely(lcn == (LCN)-1))
+					ntfs_error(vol->sb, "lcn == -1");
+			}
+#endif
+			/* Check lcn is not below -1. */
+			if (unlikely(lcn < (LCN)-1)) {
+				ntfs_error(vol->sb, "Invalid LCN < -1 in "
+						"mapping pairs array.");
+				goto err_out;
+			}
+			/* Enter the current lcn into the run_list element. */
+			(rl + rlpos)->lcn = lcn;
+		}
+		/* Get to the next run_list element. */
+		rlpos++;
+		/* Increment the buffer position to the next mapping pair. */
+		buf += (*buf & 0xf) + ((*buf >> 4) & 0xf) + 1;
+	}
+	if (unlikely(buf >= attr_end))
+		goto io_error;
+	/*
+	 * If there is a highest_vcn specified, it must be equal to the final
+	 * vcn in the run list - 1, or something has gone badly wrong.
+	 */
+	deltaxcn = sle64_to_cpu(attr->_ANR(highest_vcn));
+	if (unlikely(deltaxcn && vcn - 1 != deltaxcn)) {
+mpa_err:
+		ntfs_error(vol->sb, "Corrupt mapping pairs array in "
+				"non-resident attribute.");
+		goto err_out;
+	}
+	/* Setup not mapped run_list element if this is the base extent. */
+	if (!attr->_ANR(lowest_vcn)) {
+		VCN max_cluster;
+		max_cluster = (sle64_to_cpu(attr->_ANR(allocated_size)) +
+				vol->cluster_size - 1) >>
+				vol->cluster_size_bits;
+		/*
+		 * If there is a difference between the highest_vcn and the
+		 * highest cluster, the run list is either corrupt or, more
+		 * likely, there are more extents following this one.
+		 */
+		if (deltaxcn < --max_cluster) {
+			//RAR ntfs_debug("More extents to follow; deltaxcn = 0x%Lx, "
+					//RAR "max_cluster = 0x%Lx",
+					//RAR (long long)deltaxcn,
+					//RAR (long long)max_cluster);
+			(rl + rlpos)->vcn = vcn;
+			vcn += (rl + rlpos)->length = max_cluster - deltaxcn;
+			(rl + rlpos)->lcn = (LCN)LCN_RL_NOT_MAPPED;
+			rlpos++;
+		} else if (unlikely(deltaxcn > max_cluster)) {
+			ntfs_error(vol->sb, "Corrupt attribute. deltaxcn = "
+					"0x%Lx, max_cluster = 0x%Lx",
+					(long long)deltaxcn,
+					(long long)max_cluster);
+			goto mpa_err;
+		}
+		(rl + rlpos)->lcn = (LCN)LCN_ENOENT;
+	} else /* Not the base extent. There may be more extents to follow. */
+		(rl + rlpos)->lcn = (LCN)LCN_RL_NOT_MAPPED;
+	/* Setup terminating run_list element. */
+	(rl + rlpos)->vcn = vcn;
+	(rl + rlpos)->length = (s64)0;
+	//RAR ntfs_debug("Mapping pairs array successfully decompressed.");
+	//RAR ntfs_debug_dump_runlist(rl);
+	/* If no existing run list was specified, we are done. */
+	if (!old_rl)
+		return rl;
+	/* Now combine the new and old run lists checking for overlaps. */
+	rl2 = merge_run_lists(old_rl, rl);
+	if (likely(!IS_ERR(rl2)))
+		return rl2;
+	ntfs_free(rl);
+	ntfs_error(vol->sb, "Failed to merge run lists.");
+	return rl2;
+io_error:
+	ntfs_error(vol->sb, "Corrupt attribute.");
+err_out:
+	ntfs_free(rl);
+	return ERR_PTR(-EIO);
+}
+/**
+ * map_run_list - map (a part of) a run list of an ntfs inode
+ * @ni:		ntfs inode for which to map (part of) a run list 
+ * @vcn:	map run list part containing this vcn
+ *
+ * Map the part of a run list containing the @vcn of an the ntfs inode @ni.
+ *
+ * Return 0 on success and -errno on error.
+ */
+int map_run_list(ntfs_inode *ni, VCN vcn)
+{
+	attr_search_context *ctx;
+	MFT_RECORD *mrec;
+	const uchar_t *name;
+	u32 name_len;
+	ATTR_TYPES at;
+	int err;
+	ntfs_debug("Mapping run list part containing vcn 0x%Lx.",
+			(long long)vcn);
+	/* Map, pin and lock the mft record for reading. */
+	mrec = map_mft_record(READ, ni);
+	if (IS_ERR(mrec))
+		return PTR_ERR(mrec);
+	err = get_attr_search_ctx(&ctx, ni, mrec);
+	if (err)
+		goto unm_err_out;
+	/* The attribute type is determined from the inode type. */
+	if (S_ISDIR(VFS_I(ni)->i_mode)) {
+		at = AT_INDEX_ALLOCATION;
+		name = I30;
+		name_len = 4;
+	} else {
+		at = AT_DATA;
+		name = NULL;
+		name_len = 0;
+	}
+	/* Find the attribute in the mft record. */
+	if (!lookup_attr(at, name, name_len, CASE_SENSITIVE, vcn, NULL, 0,
+			ctx)) {
+		put_attr_search_ctx(ctx);
+		err = -ENOENT;
+		goto unm_err_out;
+	}
+	/* Lock the run list. */
+	write_lock(&ni->run_list.lock);
+	/* Make sure someone else didn't do the work while we were spinning. */
+	if (likely(vcn_to_lcn(ni->run_list.rl, vcn) <= LCN_RL_NOT_MAPPED)) {
+		run_list_element *rl;
+		/* Decode the run list. */
+		rl = decompress_mapping_pairs(ni->vol, ctx->attr,
+				ni->run_list.rl);
+		/* Flag any errors or set the run list if successful. */
+		if (unlikely(IS_ERR(rl)))
+			err = PTR_ERR(rl);
+		else
+			ni->run_list.rl = rl;
+	}
+	/* Unlock the run list. */
+	write_unlock(&ni->run_list.lock);
+	put_attr_search_ctx(ctx);
+	/* Unlock, unpin and release the mft record. */
+	unmap_mft_record(READ, ni);
+	/* If an error occured, return it. */
+	ntfs_debug("Done.");
+	return err;
+unm_err_out:
+	unmap_mft_record(READ, ni);
+	return err;
+}
+/**
+ * vcn_to_lcn - convert a vcn into a lcn given a run list
+ * @rl:		run list to use for conversion
+ * @vcn:	vcn to convert
+ *
+ * Convert the virtual cluster number @vcn of an attribute into a logical
+ * cluster number (lcn) of a device using the run list @rl to map vcns to their
+ * corresponding lcns.
+ *
+ * It is up to the caller to serialize access to the run list @rl.
+ *
+ * Since lcns must be >= 0, we use negative return values with special meaning:
+ *
+ * Return value			Meaning / Description
+ * ==================================================
+ *  -1 = LCN_HOLE		Hole / not allocated on disk.
+ *  -2 = LCN_RL_NOT_MAPPED	This is part of the run list which has not been
+ *				inserted into the run list yet.
+ *  -3 = LCN_ENOENT		There is no such vcn in the data attribute.
+ *  -4 = LCN_EINVAL		Input parameter error (if debug enabled).
+ */
+LCN vcn_to_lcn(const run_list_element *rl, const VCN vcn)
+{
+	int i;
+#ifdef DEBUG
+	if (vcn < (VCN)0)
+		return (LCN)LCN_EINVAL;
+#endif
+	/*
+	 * If rl is NULL, assume that we have found an unmapped run list. The
+	 * caller can then attempt to map it and fail appropriately if
+	 * necessary.
+	 */
+	if (unlikely(!rl))
+		return (LCN)LCN_RL_NOT_MAPPED;
+	/* Catch out of lower bounds vcn. */
+	if (unlikely(vcn < rl[0].vcn))
+		return (LCN)LCN_ENOENT;
+	for (i = 0; likely(rl[i].length); i++) {
+		if (unlikely(vcn < rl[i+1].vcn)) {
+			if (likely(rl[i].lcn >= (LCN)0))
+				return rl[i].lcn + (vcn - rl[i].vcn);
+			return rl[i].lcn;
+		}
+	}
+	/*
+	 * The terminator element is setup to the correct value, i.e. one of
+	 * LCN_HOLE, LCN_RL_NOT_MAPPED, or LCN_ENOENT.
+	 */
+	if (likely(rl[i].lcn < (LCN)0))
+		return rl[i].lcn;
+	/* Just in case... We could replace this with BUG() some day. */
+	return (LCN)LCN_ENOENT;
+}
+/**
+ * find_attr - find (next) attribute in mft record
+ * @type:	attribute type to find
+ * @name:	attribute name to find (optional, i.e. NULL means don't care)
+ * @name_len:	attribute name length (only needed if @name present)
+ * @ic:		IGNORE_CASE or CASE_SENSITIVE (ignored if @name not present)
+ * @val:	attribute value to find (optional, resident attributes only)
+ * @val_len:	attribute value length
+ * @ctx:	search context with mft record and attribute to search from
+ *
+ * You shouldn't need to call this function directly. Use lookup_attr() instead.
+ *
+ * find_attr() takes a search context @ctx as parameter and searches the mft
+ * record specified by @ctx->mrec, beginning at @ctx->attr, for an attribute of
+ * @type, optionally @name and @val. If found, find_attr() returns TRUE and
+ * @ctx->attr will point to the found attribute. If not found, find_attr()
+ * returns FALSE and @ctx->attr is undefined (i.e. do not rely on it not
+ * changing).
+ *
+ * If @ctx->is_first is TRUE, the search begins with @ctx->attr itself. If it
+ * is FALSE, the search begins after @ctx->attr.
+ *
+ * If @ic is IGNORE_CASE, the @name comparisson is not case sensitive and
+ * @ctx->ntfs_ino must be set to the ntfs inode to which the mft record
+ * @ctx->mrec belongs. This is so we can get at the ntfs volume and hence at
+ * the upcase table. If @ic is CASE_SENSITIVE, the comparison is case
+ * sensitive. When @name is present, @name_len is the @name length in Unicode
+ * characters.
+ *
+ * If @name is not present (NULL), we assume that the unnamed attribute is
+ * being searched for.
+ *
+ * Finally, the resident attribute value @val is looked for, if present. If @val
+ * is not present (NULL), @val_len is ignored.
+ *
+ * find_attr() only searches the specified mft record and it ignores the
+ * presence of an attribute list attribute (unless it is the one being searched
+ * for, obviously). If you need to take attribute lists into consideration, use
+ * lookup_attr() instead (see below). This also means that you cannot use
+ * find_attr() to search for extent records of non-resident attributes, as
+ * extents with lowest_vcn != 0 are usually described by the attribute list
+ * attribute only. - Note that it is possible that the first extent is only in
+ * the attribute list while the last extent is in the base mft record, so don't
+ * rely on being able to find the first extent in the base mft record.
+ *
+ * Warning: Never use @val when looking for attribute types which can be
+ *	    non-resident as this most likely will result in a crash!
+ */
+BOOL find_attr(const ATTR_TYPES type, const uchar_t *name, const u32 name_len,
+		const IGNORE_CASE_BOOL ic, const u8 *val, const u32 val_len,
+		attr_search_context *ctx)
+{
+	ATTR_RECORD *a;
+	ntfs_volume *vol;
+	uchar_t *upcase;
+	u32 upcase_len;
+	if (ic == IGNORE_CASE) {
+		vol = ctx->ntfs_ino->vol;
+		upcase = vol->upcase;
+		upcase_len = vol->upcase_len;
+	} else {
+		vol = NULL;
+		upcase = NULL;
+		upcase_len = 0;
+	}
+	/*
+	 * Iterate over attributes in mft record starting at @ctx->attr, or the
+	 * attribute following that, if @ctx->is_first is TRUE.
+	 */
+	if (ctx->is_first) {
+		a = ctx->attr;
+		ctx->is_first = FALSE;
+	} else
+		a = (ATTR_RECORD*)((u8*)ctx->attr +
+				le32_to_cpu(ctx->attr->length));
+	for (;;	a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) {
+		if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec +
+				le32_to_cpu(ctx->mrec->bytes_allocated))
+			break;
+		ctx->attr = a;
+		/* We catch $END with this more general check, too... */
+		if (le32_to_cpu(a->type) > le32_to_cpu(type))
+			return FALSE;
+		if (unlikely(!a->length))
+			break;
+		if (a->type != type)
+			continue;
+		/* 
+		 * If @name is present, compare the two names. If @name is
+		 * missing, assume we want an unnamed attribute.
+		 */
+		if (!name) {
+			/* The search failed if the found attribute is named. */
+			if (a->name_length)
+				return FALSE;
+		} else if (!ntfs_are_names_equal(name, name_len,
+			    (uchar_t*)((u8*)a + le16_to_cpu(a->name_offset)),
+			    a->name_length, ic, upcase, upcase_len)) {
+			register int rc;
+			rc = ntfs_collate_names(name, name_len,
+					(uchar_t*)((u8*)a +
+						le16_to_cpu(a->name_offset)),
+					a->name_length, 1, IGNORE_CASE,
+					upcase, upcase_len);
+			/*
+			 * If @name collates before a->name, there is no
+			 * matching attribute.
+			 */
+			if (rc == -1)
+				return FALSE;
+			/* If the strings are not equal, continue search. */
+			if (rc)
+	 			continue;
+			rc = ntfs_collate_names(name, name_len,
+					(uchar_t*)((u8*)a +
+						le16_to_cpu(a->name_offset)),
+					a->name_length, 1, CASE_SENSITIVE,
+					upcase, upcase_len);
+			if (rc == -1)
+				return FALSE;
+			if (rc)
+				continue;
+		}
+		/*
+		 * The names match or @name not present and attribute is
+		 * unnamed. If no @val specified, we have found the attribute
+		 * and are done.
+		 */
+		if (!val)
+			return TRUE;
+		/* @val is present; compare values. */
+		else {
+			register int rc;
+			rc = memcmp(val, (u8*)a +le16_to_cpu(a->_ARA(value_offset)),
+				min(val_len, le32_to_cpu(a->_ARA(value_length))));
+			/*
+			 * If @val collates before the current attribute's
+			 * value, there is no matching attribute.
+			 */
+			if (!rc) {
+				register u32 avl;
+				avl = le32_to_cpu(a->_ARA(value_length));
+				if (val_len == avl)
+					return TRUE;
+				if (val_len < avl)
+					return FALSE;
+			} else if (rc < 0)
+				return FALSE;
+		}
+	}
+	ntfs_error(NULL, "Inode is corrupt. Run chkdsk.");
+	return FALSE;
+}
+/**
+ * load_attribute_list - load an attribute list into memory
+ * @vol:	ntfs volume from which to read
+ * @rl:		run list of the attribute list
+ * @al:		destination buffer
+ * @size:	size of the destination buffer in bytes
+ *
+ * Walk the run list @rl and load all clusters from it copying them into the
+ * linear buffer @al. The maximum number of bytes copied to @al is @size bytes.
+ * Note, @size does not need to be a multiple of the cluster size.
+ *
+ * It is up to the caller to serialize access to the run list @rl.
+ *
+ * Return 0 on success or -errno on error.
+ */
+int load_attribute_list(ntfs_volume *vol, run_list_element *rl, u8 *al,
+		const s64 size)
+{
+	LCN lcn;
+	u8 *al_end = al + size;
+	struct buffer_head *bh;
+	struct super_block *sb = vol->sb;
+	unsigned long block_size = sb->s_blocksize;
+	unsigned long block, max_block;
+	unsigned char block_size_bits = sb->s_blocksize_bits;
+	ntfs_debug("Entering.");
+#ifdef DEBUG
+	if (!vol || !rl || !al || size <= 0)
+		return -EINVAL;
+#endif
+	/* Read all clusters specified by the run list one run at a time. */
+	while (rl->length) {
+		lcn = vcn_to_lcn(rl, rl->vcn);
+		ntfs_debug("Reading vcn = 0x%Lx, lcn = 0x%Lx.",
+				(long long)rl->vcn, (long long)lcn);
+		/* The attribute list cannot be sparse. */
+		if (lcn < 0) {
+			ntfs_error(sb, "vcn_to_lcn() failed. Cannot read "
+					"attribute list.");
+			return -EIO;;
+		}
+		block = lcn << vol->cluster_size_bits >> block_size_bits;
+		/* Read the run from device in chunks of block_size bytes. */
+		max_block = block + (rl->length << vol->cluster_size_bits >>
+				block_size_bits);
+		ntfs_debug("max_block = 0x%lx.", max_block);
+		do {
+			ntfs_debug("Reading block = 0x%lx.", block);
+			bh = sb_bread(sb, block);
+			if (!bh)
+				goto bread_err;
+			if (al + block_size > al_end)
+				goto do_partial;
+			memcpy(al, bh->b_data, block_size);
+			brelse(bh);
+			al += block_size;
+		} while (++block < max_block);
+		rl++;
+	}
+	return 0;
+do_partial:
+	if (al < al_end) {
+		/* Partial block. */
+		memcpy(al, bh->b_data, al_end - al);
+		brelse(bh);
+		/* If the final lcn is partial all is fine. */
+		if (((s64)(block - (lcn << vol->cluster_size_bits >>
+				block_size_bits)) << block_size_bits >>
+				vol->cluster_size_bits) == rl->length - 1) {
+			if (!(rl + 1)->length)
+				return 0;
+			if ((rl + 1)->lcn == LCN_RL_NOT_MAPPED &&
+					!(rl + 2)->length)
+				return 0;
+		}
+	} else
+		brelse(bh);
+	/* Real overflow! */
+	ntfs_error(sb, "Attribute list buffer overflow. Read attribute list "
+			"is truncated.");
+err_out:
+	return -EIO;
+bread_err:
+	ntfs_error(sb, "sb_bread() failed. Cannot read attribute list.");
+	goto err_out;
+}
+/**
+ * find_external_attr - find an attribute in the attribute list of an ntfs inode
+ * @type:	attribute type to find
+ * @name:	attribute name to find (optional, i.e. NULL means don't care)
+ * @name_len:	attribute name length (only needed if @name present)
+ * @ic:		IGNORE_CASE or CASE_SENSITIVE (ignored if @name not present)
+ * @lowest_vcn:	lowest vcn to find (optional, non-resident attributes only)
+ * @val:	attribute value to find (optional, resident attributes only)
+ * @val_len:	attribute value length
+ * @ctx:	search context with mft record and attribute to search from
+ *
+ * You shouldn't need to call this function directly. Use lookup_attr() instead.
+ *
+ * Find an attribute by searching the attribute list for the corresponding
+ * attribute list entry. Having found the entry, map the mft record for read
+ * if the attribute is in a different mft record/inode, find_attr the attribute
+ * in there and return it.
+ *
+ * On first search @ctx->ntfs_ino must be the base mft record and @ctx must
+ * have been obtained from a call to get_attr_search_ctx(). On subsequent calls
+ * @ctx->ntfs_ino can be any extent inode, too (@ctx->base_ntfs_ino is then the
+ * base inode).
+ *
+ * After finishing with the attribute/mft record you need to call
+ * release_attr_search_ctx() to cleanup the search context (unmapping any
+ * mapped inodes, etc).
+ *
+ * Return TRUE if the search was successful and FALSE if not. When TRUE,
+ * @ctx->attr is the found attribute and it is in mft record @ctx->mrec. When
+ * FALSE, @ctx->attr is the attribute which collates just after the attribute
+ * being searched for in the base ntfs inode, i.e. if one wants to add the
+ * attribute to the mft record this is the correct place to insert it into
+ * and if there is not enough space, the attribute should be placed in an
+ * extent mft record.
+ */
+static BOOL find_external_attr(const ATTR_TYPES type, const uchar_t *name,
+		const u32 name_len, const IGNORE_CASE_BOOL ic,
+		const VCN lowest_vcn, const u8 *val, const u32 val_len,
+		attr_search_context *ctx)
+{
+	ntfs_inode *base_ni, *ni;
+	ntfs_volume *vol;
+	ATTR_LIST_ENTRY *al_entry, *next_al_entry;
+	u8 *al_start, *al_end;
+	ATTR_RECORD *a;
+	uchar_t *al_name;
+	u32 al_name_len;
+	ni = ctx->ntfs_ino;
+	base_ni = ctx->base_ntfs_ino;
+	ntfs_debug("Entering for inode 0x%Lx, type 0x%x.",
+			(unsigned long long)ni->mft_no, type);
+	if (!base_ni) {
+		/* First call happens with the base mft record. */
+		base_ni = ctx->base_ntfs_ino = ctx->ntfs_ino;
+		ctx->base_mrec = ctx->mrec;
+	}
+	if (ni == base_ni)
+		ctx->base_attr = ctx->attr;
+	vol = base_ni->vol;
+	al_start = base_ni->attr_list;
+	al_end = al_start + base_ni->attr_list_size;
+	if (!ctx->al_entry)
+		ctx->al_entry = (ATTR_LIST_ENTRY*)al_start;
+	/*
+	 * Iterate over entries in attribute list starting at @ctx->al_entry,
+	 * or the entry following that, if @ctx->is_first is TRUE.
+	 */
+	if (ctx->is_first) {
+		al_entry = ctx->al_entry;
+		ctx->is_first = FALSE;
+	} else
+		al_entry = (ATTR_LIST_ENTRY*)((u8*)ctx->al_entry +
+				le16_to_cpu(ctx->al_entry->length));
+	for (;; al_entry = next_al_entry) {
+		/* Out of bounds check. */
+		if ((u8*)al_entry < base_ni->attr_list ||
+				(u8*)al_entry > al_end)
+			break;	/* Inode is corrupt. */
+		ctx->al_entry = al_entry;
+		/* Catch the end of the attribute list. */
+		if ((u8*)al_entry == al_end)
+			goto not_found;
+		if (!al_entry->length)
+			break;
+		if ((u8*)al_entry + 6 > al_end || (u8*)al_entry +
+				le16_to_cpu(al_entry->length) > al_end)
+			break;
+		next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry +
+				le16_to_cpu(al_entry->length));
+		if (le32_to_cpu(al_entry->type) > le32_to_cpu(type))
+			goto not_found;
+		if (type != al_entry->type)
+			continue;
+		/*
+		 * If @name is present, compare the two names. If @name is
+		 * missing, assume we want an unnamed attribute.
+		 */
+		al_name_len = al_entry->name_length;
+		al_name = (uchar_t*)((u8*)al_entry + al_entry->name_offset);
+		if (!name) {
+			if (al_name_len)
+				goto not_found;
+		} else if (!ntfs_are_names_equal(al_name, al_name_len, name,
+				name_len, ic, vol->upcase, vol->upcase_len)) {
+			register int rc;
+			rc = ntfs_collate_names(name, name_len, al_name,
+					al_name_len, 1, IGNORE_CASE,
+					vol->upcase, vol->upcase_len);
+			/*
+			 * If @name collates before al_name, there is no
+			 * matching attribute.
+			 */
+			if (rc == -1)
+				goto not_found;
+			/* If the strings are not equal, continue search. */
+			if (rc)
+				continue;
+			/*
+			 * FIXME: Reverse engineering showed 0, IGNORE_CASE but
+			 * that is inconsistent with find_attr(). The subsequent
+			 * rc checks were also different. Perhaps I made a
+			 * mistake in one of the two. Need to recheck which is
+			 * correct or at least see what is going on... (AIA)
+			 */
+			rc = ntfs_collate_names(name, name_len, al_name,
+					al_name_len, 1, CASE_SENSITIVE,
+					vol->upcase, vol->upcase_len);
+			if (rc == -1)
+				goto not_found;
+			if (rc)
+				continue;
+		}
+		/*
+		 * The names match or @name not present and attribute is
+		 * unnamed. Now check @lowest_vcn. Continue search if the
+		 * next attribute list entry still fits @lowest_vcn. Otherwise
+		 * we have reached the right one or the search has failed.
+		 */
+		if (lowest_vcn && (u8*)next_al_entry >= al_start	    &&
+				(u8*)next_al_entry + 6 < al_end		    &&
+				(u8*)next_al_entry + le16_to_cpu(
+					next_al_entry->length) <= al_end    &&	
+				sle64_to_cpu(next_al_entry->lowest_vcn) <=
+					sle64_to_cpu(lowest_vcn)	    &&
+				next_al_entry->type == al_entry->type	    &&
+				next_al_entry->name_length == al_name_len   &&
+				ntfs_are_names_equal((uchar_t*)((u8*)
+					next_al_entry +
+					next_al_entry->name_offset),
+					next_al_entry->name_length,
+					al_name, al_name_len, CASE_SENSITIVE,
+					vol->upcase, vol->upcase_len))
+			continue;
+		if (MREF_LE(al_entry->mft_reference) == ni->mft_no) {
+			if (MSEQNO_LE(al_entry->mft_reference) != ni->seq_no) {
+				ntfs_error(vol->sb, "Found stale mft "
+						"reference in attribute list!");
+				break;
+			}
+		} else { /* Mft references do not match. */
+			/* If there is a mapped record unmap it first. */
+			if (ni != base_ni)
+				unmap_extent_mft_record(ni);
+			/* Do we want the base record back? */
+			if (MREF_LE(al_entry->mft_reference) ==
+					base_ni->mft_no) {
+				ni = ctx->ntfs_ino = base_ni;
+				ctx->mrec = ctx->base_mrec;
+			} else {
+				/* We want an extent record. */
+				ctx->mrec = map_extent_mft_record(base_ni,
+						al_entry->mft_reference, &ni);
+				ctx->ntfs_ino = ni;
+				if (IS_ERR(ctx->mrec)) {
+					ntfs_error(vol->sb, "Failed to map mft "
+							"record, error code "
+							"%ld.",
+							-PTR_ERR(ctx->mrec));
+					break;
+				}
+			}
+			ctx->attr = (ATTR_RECORD*)((u8*)ctx->mrec +
+					le16_to_cpu(ctx->mrec->attrs_offset));
+		}
+		/*
+		 * ctx->vfs_ino, ctx->mrec, and ctx->attr now point to the
+		 * mft record containing the attribute represented by the
+		 * current al_entry.
+		 */
+		/*
+		 * We could call into find_attr() to find the right attribute
+		 * in this mft record but this would be less efficient and not
+		 * quite accurate as find_attr() ignores the attribute instance
+		 * numbers for example which become important when one plays
+		 * with attribute lists. Also, because a proper match has been
+		 * found in the attribute list entry above, the comparison can
+		 * now be optimized. So it is worth re-implementing a
+		 * simplified find_attr() here.
+		 */
+		a = ctx->attr;
+		/*
+		 * Use a manual loop so we can still use break and continue
+		 * with the same meanings as above.
+		 */
+do_next_attr_loop:
+		if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec +
+				le32_to_cpu(ctx->mrec->bytes_allocated))
+			break;
+		if (a->type == AT_END)
+			continue;
+		if (!a->length)
+			break;
+		if (al_entry->instance != a->instance)
+			goto do_next_attr;
+		if (al_entry->type != a->type)
+			continue;
+		if (name) {
+			if (a->name_length != al_name_len)
+				continue;
+			if (!ntfs_are_names_equal((uchar_t*)((u8*)a +
+					le16_to_cpu(a->name_offset)),
+					a->name_length, al_name, al_name_len,
+					CASE_SENSITIVE, vol->upcase,
+					vol->upcase_len))
+				continue;
+		}
+		ctx->attr = a;
+		/*
+		 * If no @val specified or @val specified and it matches, we
+		 * have found it!
+		 */
+		if (!val || (!a->non_resident && le32_to_cpu(a->_ARA(value_length))
+				== val_len && !memcmp((u8*)a +
+				le16_to_cpu(a->_ARA(value_offset)), val, val_len))) {
+			ntfs_debug("Done, found.");
+			return TRUE;
+		}
+do_next_attr:
+		/* Proceed to the next attribute in the current mft record. */
+		a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length));
+		goto do_next_attr_loop;
+	}
+	ntfs_error(base_ni->vol->sb, "Inode contains corrupt attribute list "
+			"attribute.\n");
+	if (ni != base_ni) {
+		unmap_extent_mft_record(ni);
+		ctx->ntfs_ino = base_ni;
+		ctx->mrec = ctx->base_mrec;
+		ctx->attr = ctx->base_attr;
+	}
+	/*
+	 * FIXME: We absolutely have to return ERROR status instead of just
+	 * false or we will blow up or even worse cause corruption when we add
+	 * write support and we reach this code path!
+	 */
+	printk(KERN_CRIT "NTFS: FIXME: Hit unfinished error code path!!!\n");
+	return FALSE;
+not_found:
+	/*
+	 * Seek to the end of the base mft record, i.e. when we return false,
+	 * ctx->mrec and ctx->attr indicate where the attribute should be
+	 * inserted into the attribute record.
+	 * And of course ctx->al_entry points to the end of the attribute
+	 * list inside NTFS_I(ctx->base_vfs_ino)->attr_list.
+	 *
+	 * FIXME: Do we really want to do this here? Think about it... (AIA)
+	 */
+	reinit_attr_search_ctx(ctx);
+	find_attr(type, name, name_len, ic, val, val_len, ctx);
+	ntfs_debug("Done, not found.");
+	return FALSE;
+}
+/**
+ * lookup_attr - find an attribute in an ntfs inode
+ * @type:	attribute type to find
+ * @name:	attribute name to find (optional, i.e. NULL means don't care)
+ * @name_len:	attribute name length (only needed if @name present)
+ * @ic:		IGNORE_CASE or CASE_SENSITIVE (ignored if @name not present)
+ * @lowest_vcn:	lowest vcn to find (optional, non-resident attributes only)
+ * @val:	attribute value to find (optional, resident attributes only)
+ * @val_len:	attribute value length
+ * @ctx:	search context with mft record and attribute to search from
+ *
+ * Find an attribute in an ntfs inode. On first search @ctx->ntfs_ino must
+ * be the base mft record and @ctx must have been obtained from a call to
+ * get_attr_search_ctx().
+ *
+ * This function transparently handles attribute lists and @ctx is used to
+ * continue searches where they were left off at.
+ *
+ * After finishing with the attribute/mft record you need to call
+ * release_attr_search_ctx() to cleanup the search context (unmapping any
+ * mapped inodes, etc).
+ *
+ * Return TRUE if the search was successful and FALSE if not. When TRUE,
+ * @ctx->attr is the found attribute and it is in mft record @ctx->mrec. When
+ * FALSE, @ctx->attr is the attribute which collates just after the attribute
+ * being searched for, i.e. if one wants to add the attribute to the mft
+ * record this is the correct place to insert it into.
+ */
+BOOL lookup_attr(const ATTR_TYPES type, const uchar_t *name, const u32 name_len,
+		const IGNORE_CASE_BOOL ic, const VCN lowest_vcn, const u8 *val,
+		const u32 val_len, attr_search_context *ctx)
+{
+	ntfs_inode *base_ni;
+	ntfs_debug("Entering.");
+	if (ctx->base_ntfs_ino)
+		base_ni = ctx->base_ntfs_ino;
+	else
+		base_ni = ctx->ntfs_ino;
+	/* Sanity check, just for debugging really. */
+	BUG_ON(!base_ni);
+	if (!NInoAttrList(base_ni))
+		return find_attr(type, name, name_len, ic, val, val_len, ctx);
+	return find_external_attr(type, name, name_len, ic, lowest_vcn, val,
+			val_len, ctx);
+}
+/**
+ * init_attr_search_ctx - initialize an attribute search context
+ * @ctx:	attribute search context to initialize
+ * @ni:		ntfs inode with which to initialize the search context
+ * @mrec:	mft record with which to initialize the search context
+ *
+ * Initialize the attribute search context @ctx with @ni and @mrec.
+ */
+static inline void init_attr_search_ctx(attr_search_context *ctx,
+		ntfs_inode *ni, MFT_RECORD *mrec)
+{
+	ctx->mrec = mrec;
+	/* Sanity checks are performed elsewhere. */
+	ctx->attr = (ATTR_RECORD*)((u8*)mrec + le16_to_cpu(mrec->attrs_offset));
+	ctx->is_first = TRUE;
+	ctx->ntfs_ino = ni;
+	ctx->al_entry = NULL;
+	ctx->base_ntfs_ino = NULL;
+	ctx->base_mrec = NULL;
+	ctx->base_attr = NULL;
+}
+/**
+ * reinit_attr_search_ctx - reinitialize an attribute search context
+ * @ctx:	attribute search context to reinitialize
+ *
+ * Reinitialize the attribute search context @ctx, unmapping an associated
+ * extent mft record if present, and initialize the search context again.
+ *
+ * This is used when a search for a new attribute is being started to reset
+ * the search context to the beginning.
+ */
+void reinit_attr_search_ctx(attr_search_context *ctx)
+{
+	if (likely(!ctx->base_ntfs_ino)) {
+		/* No attribute list. */
+		ctx->is_first = TRUE;
+		/* Sanity checks are performed elsewhere. */
+		ctx->attr = (ATTR_RECORD*)((u8*)ctx->mrec +
+				le16_to_cpu(ctx->mrec->attrs_offset));
+		return;
+	} /* Attribute list. */
+	if (ctx->ntfs_ino != ctx->base_ntfs_ino)
+		unmap_mft_record(READ, ctx->ntfs_ino);
+	init_attr_search_ctx(ctx, ctx->base_ntfs_ino, ctx->base_mrec);
+	return;
+}
+/**
+ * get_attr_search_ctx - allocate and initialize a new attribute search context
+ * @ctx:	address of pointer in which to return the new search context
+ * @ni:		ntfs inode with which to initialize the search context
+ * @mrec:	mft record with which to initialize the search context
+ *
+ * Allocate a new attribute search context, initialize it with @ni and @mrec,
+ * and return it in *@ctx. Return 0 on success or -ENOMEM if allocation failed.
+ */
+int get_attr_search_ctx(attr_search_context **ctx, ntfs_inode *ni,
+		MFT_RECORD *mrec)
+{
+	*ctx = kmem_cache_alloc(ntfs_attr_ctx_cache, SLAB_NOFS);
+	if (unlikely(!*ctx))
+		return -ENOMEM;
+	init_attr_search_ctx(*ctx, ni, mrec);
+	return 0;
+}
+/**
+ * put_attr_search_ctx - release an attribute search context
+ * @ctx:	attribute search context to free
+ *
+ * Release the attribute search context @ctx, unmapping an associated extent
+ * mft record if prseent.
+ */
+void put_attr_search_ctx(attr_search_context *ctx)
+{
+	if (ctx->base_ntfs_ino && ctx->ntfs_ino != ctx->base_ntfs_ino)
+		unmap_mft_record(READ, ctx->ntfs_ino);
+	kmem_cache_free(ntfs_attr_ctx_cache, ctx);
+	return;
+}
--- a/fs/ntfs/attrib.h
+++ b/fs/ntfs/attrib.h
+/*
+ * attrib.h - Defines for attribute handling in NTFS Linux kernel driver.
+ *	      Part of the Linux-NTFS project.
+ *
+ * Copyright (c) 2001,2002 Anton Altaparmakov.
+ * Copyright (C) 2002 Richard Russon.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef _LINUX_NTFS_ATTRIB_H
+#define _LINUX_NTFS_ATTRIB_H
+#include <linux/fs.h>
+#include "endian.h"
+#include "types.h"
+#include "layout.h"
+typedef enum {
+	LCN_HOLE		= -1,	/* Keep this as highest value or die! */
+	LCN_RL_NOT_MAPPED	= -2,
+	LCN_ENOENT		= -3,
+	LCN_EINVAL		= -4,
+} LCN_SPECIAL_VALUES;
+/**
+ * attr_search_context - used in attribute search functions
+ * @mrec:	buffer containing mft record to search
+ * @attr:	attribute record in @mrec where to begin/continue search
+ * @is_first:	if true lookup_attr() begins search with @attr, else after @attr
+ *
+ * Structure must be initialized to zero before the first call to one of the
+ * attribute search functions. Initialize @mrec to point to the mft record to
+ * search, and @attr to point to the first attribute within @mrec (not necessary
+ * if calling the _first() functions), and set @is_first to TRUE (not necessary
+ * if calling the _first() functions).
+ *
+ * If @is_first is TRUE, the search begins with @attr. If @is_first is FALSE,
+ * the search begins after @attr. This is so that, after the first call to one
+ * of the search attribute functions, we can call the function again, without
+ * any modification of the search context, to automagically get the next
+ * matching attribute.
+ */
+typedef struct {
+	MFT_RECORD *mrec;
+	ATTR_RECORD *attr;
+	BOOL is_first;
+	ntfs_inode *ntfs_ino;
+	ATTR_LIST_ENTRY *al_entry;
+	ntfs_inode *base_ntfs_ino;
+	MFT_RECORD *base_mrec;
+	ATTR_RECORD *base_attr;
+} attr_search_context;
+extern run_list_element *decompress_mapping_pairs(const ntfs_volume *vol,
+		const ATTR_RECORD *attr, run_list_element *old_rl);
+extern int map_run_list(ntfs_inode *ni, VCN vcn);
+extern LCN vcn_to_lcn(const run_list_element *rl, const VCN vcn);
+extern BOOL find_attr(const ATTR_TYPES type, const uchar_t *name,
+		const u32 name_len, const IGNORE_CASE_BOOL ic, const u8 *val,
+		const u32 val_len, attr_search_context *ctx);
+BOOL lookup_attr(const ATTR_TYPES type, const uchar_t *name, const u32 name_len,
+		const IGNORE_CASE_BOOL ic, const VCN lowest_vcn, const u8 *val,
+		const u32 val_len, attr_search_context *ctx);
+extern int load_attribute_list(ntfs_volume *vol, run_list_element *rl, u8 *al,
+		const s64 size);
+static inline s64 attribute_value_length(const ATTR_RECORD *a)
+{
+	if (!a->non_resident)
+		return (s64)le32_to_cpu(a->_ARA(value_length));
+	return sle64_to_cpu(a->_ANR(data_size));
+}
+extern void reinit_attr_search_ctx(attr_search_context *ctx);
+extern int get_attr_search_ctx(attr_search_context **ctx, ntfs_inode *ni,
+		MFT_RECORD *mrec);
+extern void put_attr_search_ctx(attr_search_context *ctx);
+#endif /* _LINUX_NTFS_ATTRIB_H */
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
+/**
+ * compress.c - NTFS kernel compressed attributes handling.
+ *		Part of the Linux-NTFS project.
+ *
+ * Copyright (c) 2001,2002 Anton Altaparmakov.
+ * Copyright (C) 2002 Richard Russon.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/locks.h>
+#include <linux/fs.h>
+#include "ntfs.h"
+/**
+ * ntfs_compression_constants - enum of constants used in the compression code
+ */
+typedef enum {
+	/* Token types and access mask. */
+	NTFS_SYMBOL_TOKEN	=	0,
+	NTFS_PHRASE_TOKEN	=	1,
+	NTFS_TOKEN_MASK		=	1,
+	/* Compression sub-block constants. */
+	NTFS_SB_SIZE_MASK	=	0x0fff,
+	NTFS_SB_SIZE		=	0x1000,
+	NTFS_SB_IS_COMPRESSED	=	0x8000,
+	/*
+	 * The maximum compression block size is by definition 16 * the cluster
+	 * size, with the maximum supported cluster size being 4kiB. Thus the
+	 * maximum compression buffer size is 64kiB, so we use this when
+	 * initializing the per-CPU buffers.
+	 */
+	NTFS_MAX_CB_SIZE	= 64 * 1024,
+} ntfs_compression_constants;
+/**
+ * ntfs_compression_buffers - per-CPU buffers for the decompression engine.
+ */
+static u8 **ntfs_compression_buffers = NULL;
+/**
+ * allocate_compression_buffers - allocate the per-CPU decompression buffers
+ *
+ * Allocate the per-CPU buffers for the decompression engine.
+ *
+ * Caller has to hold the ntfs_lock semaphore.
+ *
+ * Return 0 on success or -ENOMEM if the allocations failed.
+ */
+int allocate_compression_buffers(void)
+{
+	int i, j;
+	BUG_ON(ntfs_compression_buffers);
+	ntfs_compression_buffers =  (u8**)kmalloc(smp_num_cpus * sizeof(u8 *),
+			GFP_KERNEL);
+	if (!ntfs_compression_buffers)
+		return -ENOMEM;
+	for (i = 0; i < smp_num_cpus; i++) {
+		ntfs_compression_buffers[i] = (u8*)vmalloc(NTFS_MAX_CB_SIZE);
+		if (!ntfs_compression_buffers[i])
+			break;
+	}
+	if (i == smp_num_cpus)
+		return 0;
+	/* Allocation failed, cleanup and return error. */
+	for (j = 0; i < j; j++)
+		vfree(ntfs_compression_buffers[j]);
+	kfree(ntfs_compression_buffers);
+	return -ENOMEM;
+}
+/**
+ * free_compression_buffers - free the per-CPU decompression buffers
+ *
+ * Free the per-CPU buffers used by the decompression engine.
+ *
+ * Caller has to hold the ntfs_lock semaphore.
+ */
+void free_compression_buffers(void)
+{
+	int i;
+	BUG_ON(!ntfs_compression_buffers);
+	for (i = 0; i < smp_num_cpus; i++)
+		vfree(ntfs_compression_buffers[i]);
+	kfree(ntfs_compression_buffers);
+	ntfs_compression_buffers = NULL;
+}
+/**
+ * ntfs_decompress - decompress a compression block into an array of pages
+ * @dest_pages:		destination array of pages
+ * @dest_index:		current index into @dest_pages (IN/OUT)
+ * @dest_ofs:		current offset within @dest_pages[@dest_index] (IN/OUT)
+ * @dest_max_index:	maximum index into @dest_pages (IN)
+ * @dest_max_ofs:	maximum offset within @dest_pages[@dest_max_index] (IN)
+ * @xpage:		the target page (-1 if none) (IN)
+ * @xpage_done:		set to 1 if xpage was completed successfully (IN/OUT)
+ * @cb_start:		compression block to decompress (IN)
+ * @cb_size:		size of compression block @cb_start in bytes (IN)
+ *
+ * The caller must have disabled preemption. ntfs_decompress() reenables it when
+ * the critical section is finished.
+ *
+ * This decompresses the compression block @cb_start into the array of
+ * destination pages @dest_pages starting at index @dest_index into @dest_pages
+ * and at offset @dest_pos into the page @dest_pages[@dest_index].
+ *
+ * When the page @dest_pages[@xpage] is completed, @xpage_done is set to 1.
+ * If xpage is -1 or @xpage has not been completed, @xpage_done is not modified.
+ *
+ * @cb_start is a pointer to the compression block which needs decompressing
+ * and @cb_size is the size of @cb_start in bytes (8-64kiB).
+ *
+ * Return 0 if success or -EOVERFLOW on error in the compressed stream.
+ * @xpage_done indicates whether the target page (@dest_pages[@xpage]) was
+ * completed during the decompression of the compression block (@cb_start).
+ *
+ * Warning: This function *REQUIRES* PAGE_CACHE_SIZE >= 4096 or it will blow up
+ * unpredicatbly! You have been warned!
+ *
+ * Note to hackers: This function may not sleep until it has finished accessing
+ * the compression block @cb_start as it is a per-CPU buffer.
+ */
+static int ntfs_decompress(struct page *dest_pages[], int *dest_index,
+		int *dest_ofs, const int dest_max_index, const int dest_max_ofs,
+		const int xpage, char *xpage_done, u8 *const cb_start,
+		const u32 cb_size)
+{
+	/*
+	 * Pointers into the compressed data, i.e. the compression block (cb),
+	 * and the therein contained sub-blocks (sb).
+	 */
+	u8 *cb_end = cb_start + cb_size; /* End of cb. */
+	u8 *cb = cb_start;	/* Current position in cb. */
+	u8 *cb_sb_start = cb;	/* Beginning of the current sb in the cb. */
+	u8 *cb_sb_end;		/* End of current sb / beginning of next sb. */
+	/* Variables for uncompressed data / destination. */
+	struct page *dp;	/* Current destination page being worked on. */
+	u8 *dp_addr;		/* Current pointer into dp. */
+	u8 *dp_sb_start;	/* Start of current sub-block in dp. */
+	u8 *dp_sb_end;		/* End of current sb in dp (dp_sb_start +
+				   NTFS_SB_SIZE). */
+	u16 do_sb_start;	/* @dest_ofs when starting this sub-block. */
+	u16 do_sb_end;		/* @dest_ofs of end of this sb (do_sb_start +
+				   NTFS_SB_SIZE). */
+	/* Variables for tag and token parsing. */
+	u8 tag;			/* Current tag. */
+	int token;		/* Loop counter for the eight tokens in tag. */
+	/* Need this because we can't sleep, so need two stages. */
+	int completed_pages[dest_max_index - *dest_index + 1];
+	int nr_completed_pages = 0;
+	/* Default error code. */
+	int err = -EOVERFLOW;
+	ntfs_debug("Entering, cb_size = 0x%x.", cb_size);
+do_next_sb:
+	ntfs_debug("Beginning sub-block at offset = 0x%x in the cb.",
+			cb - cb_start);
+	/* Have we reached the end of the compression block? */
+	if (cb == cb_end || !le16_to_cpup(cb)) {
+		int i;
+		ntfs_debug("Completed. Returning success (0).");
+		err = 0;
+return_error:
+		/* We can sleep from now on, so we reenable preemption. */
+		preempt_enable();
+		/* Second stage: finalize completed pages. */
+		for (i = 0; i < nr_completed_pages; i++) {
+			int di = completed_pages[i];
+			dp = dest_pages[di];
+			flush_dcache_page(dp);
+			kunmap(dp);
+			SetPageUptodate(dp);
+			UnlockPage(dp);
+			if (di == xpage)
+				*xpage_done = 1;
+			else
+				page_cache_release(dp);
+			dest_pages[di] = NULL;
+		}
+		return err;
+	}
+	/* Setup offsets for the current sub-block destination. */
+	do_sb_start = *dest_ofs;
+	do_sb_end = do_sb_start + NTFS_SB_SIZE;
+	/* Check that we are still within allowed boundaries. */
+	if (*dest_index == dest_max_index && do_sb_end > dest_max_ofs)
+		goto return_overflow;
+	/* Does the minimum size of a compressed sb overflow valid range? */
+	if (cb + 6 > cb_end)
+		goto return_overflow;
+	/* Setup the current sub-block source pointers and validate range. */
+	cb_sb_start = cb;
+	cb_sb_end = cb_sb_start + (le16_to_cpup(cb) & NTFS_SB_SIZE_MASK) + 3;
+	if (cb_sb_end > cb_end)
+		goto return_overflow;
+	/* Get the current destination page. */
+	dp = dest_pages[*dest_index];
+	if (!dp) {
+		/* No page present. Skip decompression of this sub-block. */
+		cb = cb_sb_end;
+		/* Advance destination position to next sub-block. */
+		*dest_ofs = (*dest_ofs + NTFS_SB_SIZE) & ~PAGE_CACHE_MASK;
+		if (!*dest_ofs && (++*dest_index > dest_max_index))
+			goto return_overflow;
+		goto do_next_sb;
+	}
+	/* We have a valid destination page. Setup the destination pointers. */
+	dp_addr = (u8*)page_address(dp) + do_sb_start;
+	/* Now, we are ready to process the current sub-block (sb). */
+	if (!(le16_to_cpup(cb) & NTFS_SB_IS_COMPRESSED)) {
+		ntfs_debug("Found uncompressed sub-block.");
+		/* This sb is not compressed, just copy it into destination. */
+		/* Advance source position to first data byte. */
+		cb += 2;
+		/* An uncompressed sb must be full size. */
+		if (cb_sb_end - cb != NTFS_SB_SIZE)
+			goto return_overflow;
+		/* Copy the block and advance the source position. */
+		memcpy(dp_addr, cb, NTFS_SB_SIZE);
+		cb += NTFS_SB_SIZE;
+		/* Advance destination position to next sub-block. */
+		*dest_ofs += NTFS_SB_SIZE;
+		if (!(*dest_ofs &= ~PAGE_CACHE_MASK)) {
+finalize_page:
+			/*
+			 * First stage: add current page index to array of
+			 * completed pages.
+			 */
+			completed_pages[nr_completed_pages++] = *dest_index;
+			if (++*dest_index > dest_max_index)
+				goto return_overflow;
+		}
+		goto do_next_sb;
+	}
+	ntfs_debug("Found compressed sub-block.");
+	/* This sb is compressed, decompress it into destination. */
+	/* Setup destination pointers. */
+	dp_sb_start = dp_addr;
+	dp_sb_end = dp_sb_start + NTFS_SB_SIZE;
+	/* Forward to the first tag in the sub-block. */
+	cb += 2;
+do_next_tag:
+	if (cb == cb_sb_end) {
+		/* Check if the decompressed sub-block was not full-length. */
+		if (dp_addr < dp_sb_end) {
+			int nr_bytes = do_sb_end - *dest_ofs;
+			ntfs_debug("Filling incomplete sub-block with "
+					"zeroes.");
+			/* Zero remainder and update destination position. */
+			memset(dp_addr, 0, nr_bytes);
+			*dest_ofs += nr_bytes;
+		}
+		/* We have finished the current sub-block. */
+		if (!(*dest_ofs &= ~PAGE_CACHE_MASK))
+			goto finalize_page;
+		goto do_next_sb;
+	}
+	/* Check we are still in range. */
+	if (cb > cb_sb_end || dp_addr > dp_sb_end)
+		goto return_overflow;
+	/* Get the next tag and advance to first token. */
+	tag = *cb++;
+	//ntfs_debug("Found tag = 0x%x.", tag);
+	/* Parse the eight tokens described by the tag. */
+	for (token = 0; token < 8; token++, tag >>= 1) {
+		u16 lg, pt, length, max_non_overlap;
+		register u16 i;
+		u8 *dp_back_addr;
+		/* Check if we are done / still in range. */
+		if (cb >= cb_sb_end || dp_addr > dp_sb_end)
+			break;
+		/* Determine token type and parse appropriately.*/
+		if ((tag & NTFS_TOKEN_MASK) == NTFS_SYMBOL_TOKEN) {
+			//ntfs_debug("Found symbol token = %c (0x%x).", *cb,
+			//		*cb);
+			/*
+			 * We have a symbol token, copy the symbol across, and
+			 * advance the source and destination positions.
+			 */
+			*dp_addr++ = *cb++;
+			++*dest_ofs;
+			/* Continue with the next token. */
+			continue;
+		}
+		//ntfs_debug("Found phrase token = 0x%x.", le16_to_cpup(cb));
+		/* 
+		 * We have a phrase token. Make sure it is not the first tag in
+		 * the sb as this is illegal and would confuse the code below.
+		 */
+		if (dp_addr == dp_sb_start)
+			goto return_overflow;
+		/*
+		 * Determine the number of bytes to go back (p) and the number
+		 * of bytes to copy (l). We use an optimized algorithm in which
+		 * we first calculate log2(current destination position in sb),
+		 * which allows determination of l and p in O(1) rather than
+		 * O(n). We just need an arch-optimized log2() function now.
+		 */
+		lg = 0;
+		for (i = *dest_ofs - do_sb_start - 1; i >= 0x10; i >>= 1)
+			lg++;
+		/* Get the phrase token into i. */
+		pt = le16_to_cpup(cb);
+		/*
+		 * Calculate starting position of the byte sequence in
+		 * the destination using the fact that p = (pt >> (12 - lg)) + 1
+		 * and make sure we don't go too far back.
+		 */
+		dp_back_addr = dp_addr - (pt >> (12 - lg)) - 1;
+		if (dp_back_addr < dp_sb_start)
+			goto return_overflow;
+		/* Now calculate the length of the byte sequence. */
+		length = (pt & (0xfff >> lg)) + 3;
+#if 0
+		ntfs_debug("starting position = 0x%x, back pointer = 0x%x, "
+				"length = 0x%x.", *dest_ofs - do_sb_start -
+				1, (pt >> (12 - lg)) + 1, length);
+#endif
+		/* Advance destination position and verify it is in range. */
+		*dest_ofs += length;
+		if (*dest_ofs > do_sb_end)
+			goto return_overflow;
+		/* The number of non-overlapping bytes. */
+		max_non_overlap = dp_addr - dp_back_addr;
+		if (length <= max_non_overlap) {
+			//ntfs_debug("Found non-overlapping byte sequence.");
+			/* The byte sequence doesn't overlap, just copy it. */
+			memcpy(dp_addr, dp_back_addr, length);
+			/* Advance destination pointer. */
+			dp_addr += length;
+		} else {
+			//ntfs_debug("Found overlapping byte sequence.");
+			/*
+			 * The byte sequence does overlap, copy non-overlapping
+			 * part and then do a slow byte by byte copy for the
+			 * overlapping part. Also, advance the destination
+			 * pointer.
+			 */
+			memcpy(dp_addr, dp_back_addr, max_non_overlap);
+			dp_addr += max_non_overlap;
+			dp_back_addr += max_non_overlap;
+			length -= max_non_overlap;
+			while (length--)
+				*dp_addr++ = *dp_back_addr++;
+		}
+		/* Advance source position and continue with the next token. */
+		cb += 2;
+	}
+	/* No tokens left in the current tag. Continue with the next tag. */
+	goto do_next_tag;
+return_overflow:
+	ntfs_error(NULL, "Failed. Returning -EOVERFLOW.\n");
+	goto return_error;
+}
+/**
+ * ntfs_file_read_compressed_block - read a compressed block into the page cache
+ * @page:	locked page in the compression block(s) we need to read
+ *
+ * When we are called the page has already been verified to be locked and the
+ * attribute is known to be non-resident, not encrypted, but compressed.
+ *
+ * 1. Determine which compression block(s) @page is in.
+ * 2. Get hold of all pages corresponding to this/these compression block(s).
+ * 3. Read the (first) compression block.
+ * 4. Decompress it into the corresponding pages.
+ * 5. Throw the compressed data away and proceed to 3. for the next compression
+ *    block or return success if no more compression blocks left.
+ *
+ * Warning: We have to be careful what we do about existing pages. They might
+ * have been written to so that we would lose data if we were to just overwrite
+ * them with the out-of-date uncompressed data.
+ *
+ * FIXME: For PAGE_CACHE_SIZE > cb_size we are not doing the Right Thing(TM) at
+ * the end of the file I think. We need to detect this case and zero the out
+ * of bounds remainder of the page in question and mark it as handled. At the
+ * moment we would just return -EIO on such a page. This bug will only become
+ * apparent if pages are above 8kiB and the NTFS volume only uses 512 byte
+ * clusters so is probably not going to be seen by anyone. Still this should
+ * be fixed. (AIA)
+ *
+ * FIXME: Again for PAGE_CACHE_SIZE > cb_size we are screwing up both in
+ * handling sparse and compressed cbs. (AIA)
+ */
+int ntfs_file_read_compressed_block(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	ntfs_inode *ni = NTFS_I(mapping->host);
+	ntfs_volume *vol = ni->vol;
+	kdev_t dev = vol->sb->s_dev;
+	unsigned long block_size = vol->sb->s_blocksize;
+	unsigned char block_size_bits = vol->sb->s_blocksize_bits;
+	u8 *cb, *cb_pos, *cb_end;
+	struct buffer_head **bhs;
+	unsigned long offset, index = page->index;
+	u32 cb_size = ni->_ICF(compression_block_size);
+	u64 cb_size_mask = cb_size - 1UL;
+	VCN vcn;
+	LCN lcn;
+	/* The first wanted vcn (minimum alignment is PAGE_CACHE_SIZE). */
+	VCN start_vcn = (((s64)index << PAGE_CACHE_SHIFT) & ~cb_size_mask) >>
+			vol->cluster_size_bits;
+	/*
+	 * The first vcn after the last wanted vcn (minumum alignment is again
+	 * PAGE_CACHE_SIZE.
+	 */
+	VCN end_vcn = ((((s64)(index + 1UL) << PAGE_CACHE_SHIFT) + cb_size - 1)
+			& ~cb_size_mask) >> vol->cluster_size_bits;
+	/* Number of compression blocks (cbs) in the wanted vcn range. */
+	unsigned int nr_cbs = (end_vcn - start_vcn) << vol->cluster_size_bits
+			>> ni->_ICF(compression_block_size_bits);
+	/*
+	 * Number of pages required to store the uncompressed data from all
+	 * compression blocks (cbs) overlapping @page. Due to alignment
+	 * guarantees of start_vcn and end_vcn, no need to round up here.
+	 */
+	unsigned int nr_pages = (end_vcn - start_vcn) <<
+			vol->cluster_size_bits >> PAGE_CACHE_SHIFT;
+	unsigned int xpage, max_page, cur_page, cur_ofs, i;
+	unsigned int cb_clusters, cb_max_ofs;
+	int block, max_block, cb_max_page, bhs_size, nr_bhs, err = 0;
+	struct page **pages;
+	unsigned char xpage_done = 0;
+	ntfs_debug("Entering, page->index = 0x%lx, cb_size = 0x%x, nr_pages = "
+			"%i.", index, cb_size, nr_pages);
+	/*
+	 * Uncommenting the below line results in the compressed data being
+	 * read without any decompression. Compression blocks are padded with
+	 * zeroes in order to give them in their proper alignments. I am
+	 * leaving this here as it is a handy debugging / studying tool for
+	 * compressed data.
+	 */
+#if 0
+	return block_read_full_page(page, ntfs_file_get_block);
+#endif
+	pages = kmalloc(nr_pages * sizeof(struct page *), GFP_NOFS);
+	/* Allocate memory to store the buffer heads we need. */
+	bhs_size = cb_size / block_size * sizeof(struct buffer_head *);
+	bhs = kmalloc(bhs_size, GFP_NOFS);
+	if (unlikely(!pages || !bhs)) {
+		kfree(bhs);
+		kfree(pages);
+		SetPageError(page);
+		UnlockPage(page);
+		ntfs_error(vol->sb, "Failed to allocate internal buffers.");
+		return -ENOMEM;
+	}
+	/*
+	 * We have already been given one page, this is the one we must do.
+	 * Once again, the alignment guarantees keep it simple.
+	 */
+	offset = start_vcn << vol->cluster_size_bits >> PAGE_CACHE_SHIFT;
+	xpage = index - offset;
+	pages[xpage] = page;
+	/*
+	 * The remaining pages need to be allocated and inserted into the page
+	 * cache, alignment guarantees keep all the below much simpler. (-8
+	 */
+	max_page = ((VFS_I(ni)->i_size + PAGE_CACHE_SIZE - 1) >>
+			PAGE_CACHE_SHIFT) - offset;
+	if (nr_pages < max_page)
+		max_page = nr_pages;
+	for (i = 0; i < max_page; i++, offset++) {
+		if (i != xpage)
+			pages[i] = grab_cache_page_nowait(mapping, offset);
+		page = pages[i];
+		if (page) {
+			/*
+			 * We only (re)read the page if it isn't already read
+			 * in and/or dirty or we would be losing data or at
+			 * least wasting our time.
+			 */
+			if (!PageDirty(page) && (!Page_Uptodate(page) ||
+					PageError(page))) {
+				ClearPageError(page);
+				kmap(page);
+				continue;
+			}
+			UnlockPage(page);
+			page_cache_release(page);
+			pages[i] = NULL;
+		}
+	}
+	/*
+	 * We have the run list, and all the destination pages we need to fill.
+	 * Now read the first compression block.
+	 */
+	cur_page = 0;
+	cur_ofs = 0;
+	cb_clusters = ni->_ICF(compression_block_clusters);
+do_next_cb:
+	nr_cbs--;
+	nr_bhs = 0;
+	/* Read all cb buffer heads one cluster run at a time. */
+	for (vcn = start_vcn, start_vcn += cb_clusters; vcn < start_vcn;
+			vcn++) {
+		BOOL is_retry = FALSE;
+retry_remap:
+		/* Make sure we are not overflowing the file limits. */
+		if (vcn << vol->cluster_size_bits >= ni->initialized_size) {
+			/* Overflow, just zero this region. */
+			// TODO: AIA
+		}
+		/* Find lcn of vcn and convert it into blocks. */
+		read_lock(&ni->run_list.lock);
+		lcn = vcn_to_lcn(ni->run_list.rl, vcn);
+		read_unlock(&ni->run_list.lock);
+		ntfs_debug("Reading vcn = 0x%Lx, lcn = 0x%Lx.",
+				(long long)vcn, (long long)lcn);
+		if (lcn < 0) {
+			/*
+			 * When we reach the first sparse cluster we have
+			 * finished with the cb.
+			 */
+			if (lcn == LCN_HOLE)
+				break;
+			if (is_retry || lcn != LCN_RL_NOT_MAPPED)
+				goto rl_err;
+			is_retry = TRUE;
+			/* Map run list of current extent and retry. */
+			if (!map_run_list(ni, vcn))
+				goto retry_remap;
+			goto map_rl_err;
+		}
+		block = lcn << vol->cluster_size_bits >> block_size_bits;
+		/* Read the lcn from device in chunks of block_size bytes. */
+		max_block = block + (vol->cluster_size >> block_size_bits);
+		do {
+			// TODO: Need overflow checks here, too! (AIA)
+			ntfs_debug("block = 0x%x.", block);
+			if (unlikely(!(bhs[nr_bhs] = getblk(dev, block,
+					block_size))))
+				goto getblk_err;
+			nr_bhs++;
+		} while (++block < max_block);
+	}
+	/* Setup and initiate io on all buffer heads. */
+	for (i = 0; i < nr_bhs; i++) {
+		struct buffer_head *tbh = bhs[i];
+		if (buffer_uptodate(tbh))
+			continue;
+		lock_buffer(tbh);
+		get_bh(tbh);
+		tbh->b_end_io = end_buffer_io_sync;
+		submit_bh(READ, tbh);
+	}
+	/* Wait for io completion on all buffer heads. */
+	for (i = 0; i < nr_bhs; i++) {
+		struct buffer_head *tbh = bhs[i];
+		if (buffer_uptodate(tbh))
+			continue;
+		wait_on_buffer(tbh);
+		if (!buffer_uptodate(tbh))
+			goto read_err;
+	}
+	/*
+	 * Get the compression buffer corresponding to the current CPU. We must
+	 * not sleep any more until we are finished with the compression buffer.
+	 * If on a preemptible kernel, now disable preemption.
+	 */
+	preempt_disable();
+	cb = ntfs_compression_buffers[smp_processor_id()];
+	BUG_ON(!cb);
+	cb_pos = cb;
+	cb_end = cb + cb_size;
+	/* Copy the buffer heads into the contiguous buffer. */
+	for (i = 0; i < nr_bhs; i++) {
+		memcpy(cb_pos, bhs[i]->b_data, block_size);
+		cb_pos += block_size;
+	}
+	/* Just a precaution. */
+	if (cb_pos + 2 <= cb + cb_size)
+		*(u16*)cb_pos = 0;
+	/* Reset cb_pos back to the beginning. */
+	cb_pos = cb;
+	/* We now have both source (if present) and destination. */
+	ntfs_debug("Successfully read the compression block.");
+	/* The last page and maximum offset within it for the current cb. */
+	cb_max_page = (cur_page << PAGE_CACHE_SHIFT) + cur_ofs + cb_size;
+	cb_max_ofs = cb_max_page & ~PAGE_CACHE_MASK;
+	cb_max_page >>= PAGE_CACHE_SHIFT;
+	/* Catch end of file inside a compression block. */
+	if (cb_max_page > max_page)
+		cb_max_page = max_page;
+	if (vcn == start_vcn - cb_clusters) {
+		/* Sparse cb, zero out page range overlapping the cb. */
+		ntfs_debug("Found sparse compression block.");
+		/* We can sleep from now on, so we reenable preemption. */
+		preempt_enable();
+		if (cb_max_ofs)
+			cb_max_page--;
+		for (; cur_page < cb_max_page; cur_page++) {
+			page = pages[cur_page];
+			if (page) {
+				/*
+				 * FIXME: Using clear_page() will become wrong
+				 * when we get PAGE_CACHE_SIZE != PAGE_SIZE but
+				 * for now there is no problem.
+				 */
+				if (likely(!cur_ofs))
+					clear_page(page_address(page));
+				else
+					memset(page_address(page) + cur_ofs, 0,
+							PAGE_CACHE_SIZE -
+							cur_ofs);
+				flush_dcache_page(page);
+				kunmap(page);
+				SetPageUptodate(page);
+				UnlockPage(page);
+				if (cur_page == xpage)
+					xpage_done = 1;
+				else
+					page_cache_release(page);
+				pages[cur_page] = NULL;
+			}
+			cb_pos += PAGE_CACHE_SIZE - cur_ofs;
+			cur_ofs = 0;
+			if (cb_pos >= cb_end)
+				break;
+		}
+		/* If we have a partial final page, deal with it now. */
+		if (cb_max_ofs && cb_pos < cb_end) {
+			page = pages[cur_page];
+			if (page)
+				memset(page_address(page) + cur_ofs, 0,
+						cb_max_ofs - cur_ofs);
+			cb_pos += cb_max_ofs - cur_ofs;
+			cur_ofs = cb_max_ofs;
+		}
+	} else if (vcn == start_vcn) {
+		/* We can't sleep so we need two stages. */
+		unsigned int cur2_page = cur_page;
+		unsigned int cur_ofs2 = cur_ofs;
+		u8 *cb_pos2 = cb_pos;
+		ntfs_debug("Found uncompressed compression block.");
+		/* Uncompressed cb, copy it to the destination pages. */
+		/*
+		 * TODO: As a big optimization, we could detect this case
+		 * before we read all the pages and use block_read_full_page()
+		 * on all full pages instead (we still have to treat partial
+		 * pages especially but at least we are getting rid of the
+		 * synchronous io for the majority of pages.
+		 * Or if we choose not to do the read-ahead/-behind stuff, we
+		 * could just return block_read_full_page(pages[xpage]) as long
+		 * as PAGE_CACHE_SIZE <= cb_size.
+		 */
+		if (cb_max_ofs)
+			cb_max_page--;
+		/* First stage: copy data into destination pages. */
+		for (; cur_page < cb_max_page; cur_page++) {
+			page = pages[cur_page];
+			if (page)
+				memcpy(page_address(page) + cur_ofs, cb_pos,
+						PAGE_CACHE_SIZE - cur_ofs);
+			cb_pos += PAGE_CACHE_SIZE - cur_ofs;
+			cur_ofs = 0;
+			if (cb_pos >= cb_end)
+				break;
+		}
+		/* If we have a partial final page, deal with it now. */
+		if (cb_max_ofs && cb_pos < cb_end) {
+			page = pages[cur_page];
+			if (page)
+				memcpy(page_address(page) + cur_ofs, cb_pos,
+						cb_max_ofs - cur_ofs);
+			cb_pos += cb_max_ofs - cur_ofs;
+			cur_ofs = cb_max_ofs;
+		}
+		/* We can sleep from now on, so we reenable preemption. */
+		preempt_enable();
+		/* Second stage: finalize pages. */
+		for (; cur2_page < cb_max_page; cur2_page++) {
+			page = pages[cur2_page];
+			if (page) {
+				flush_dcache_page(page);
+				kunmap(page);
+				SetPageUptodate(page);
+				UnlockPage(page);
+				if (cur2_page == xpage)
+					xpage_done = 1;
+				else
+					page_cache_release(page);
+				pages[cur2_page] = NULL;
+			}
+			cb_pos2 += PAGE_CACHE_SIZE - cur_ofs2;
+			cur_ofs2 = 0;
+			if (cb_pos2 >= cb_end)
+				break;
+		}
+	} else {
+		/* Compressed cb, decompress it into the destination page(s). */
+		unsigned int prev_cur_page = cur_page;
+		ntfs_debug("Found compressed compression block.");
+		err = ntfs_decompress(pages, &cur_page, &cur_ofs,
+				cb_max_page, cb_max_ofs, xpage, &xpage_done,
+				cb_pos,	cb_size - (cb_pos - cb));
+		/*
+		 * We can sleep from now on, preemption already reenabled by
+		 * ntfs_decompess.
+		 */
+		if (err) {
+			ntfs_error(vol->sb, "ntfs_decompress() failed with "
+					"error code %i. Skipping this "
+					"compression block.\n", -err);
+			/* Release the unfinished pages. */
+			for (; prev_cur_page < cur_page; prev_cur_page++) {
+				page = pages[prev_cur_page];
+				if (page) {
+					if (prev_cur_page == xpage &&
+							!xpage_done)
+						SetPageError(page);
+					flush_dcache_page(page);
+					kunmap(page);
+					UnlockPage(page);
+					if (prev_cur_page != xpage)
+						page_cache_release(page);
+					pages[prev_cur_page] = NULL;
+				}
+			}
+		}
+	}
+	/* Release the buffer heads. */
+	for (i = 0; i < nr_bhs; i++)
+		brelse(bhs[i]);
+	/* Do we have more work to do? */
+	if (nr_cbs)
+		goto do_next_cb;
+	/* We no longer need the list of buffer heads. */
+	kfree(bhs);
+	/* Clean up if we have any pages left. Should never happen. */
+	for (cur_page = 0; cur_page < max_page; cur_page++) {
+		page = pages[cur_page];
+		if (page) {
+			ntfs_error(vol->sb, "Still have pages left! "
+					"Terminating them with extreme "
+					"prejudice.");
+			if (cur_page == xpage && !xpage_done)
+				SetPageError(page);
+			flush_dcache_page(page);
+			kunmap(page);
+			UnlockPage(page);
+			if (cur_page != xpage)
+				page_cache_release(page);
+			pages[cur_page] = NULL;
+		}
+	}
+	/* If we have completed the requested page, we return success. */
+	if (likely(xpage_done))
+		return 0;
+	ntfs_debug("Failed. Returning error code %s.", err == -EOVERFLOW ?
+			"EOVERFLOW" : (!err ? "EIO" : "unkown error"));
+	return err < 0 ? err : -EIO;
+read_err:
+	ntfs_error(vol->sb, "IO error while reading compressed data.");
+	/* Release the buffer heads. */
+	for (i = 0; i < nr_bhs; i++)
+		brelse(bhs[i]);
+	goto err_out;
+map_rl_err:
+	ntfs_error(vol->sb, "map_run_list() failed. Cannot read compression "
+			"block.");
+	goto err_out;
+rl_err:
+	ntfs_error(vol->sb, "vcn_to_lcn() failed. Cannot read compression "
+			"block.");
+	goto err_out;
+getblk_err:
+	ntfs_error(vol->sb, "getblk() failed. Cannot read compression block.");
+err_out:
+	kfree(bhs);
+	for (i = cur_page; i < max_page; i++) {
+		page = pages[i];
+		if (page) {
+			if (i == xpage && !xpage_done)
+				SetPageError(page);
+			flush_dcache_page(page);
+			kunmap(page);
+			UnlockPage(page);
+			if (i != xpage)
+				page_cache_release(page);
+		}
+	}
+	return -EIO;
+}
--- a/fs/ntfs/debug.c
+++ b/fs/ntfs/debug.c
+/*
+ * debug.c - NTFS kernel debug support. Part of the Linux-NTFS project.
+ *
+ * Copyright (c) 2001,2002 Anton Altaparmakov.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include "debug.h"
+/*
+ * A static buffer to hold the error string being displayed and a spinlock
+ * to protect concurrent accesses to it.
+ */
+static char err_buf[1024];
+static spinlock_t err_buf_lock = SPIN_LOCK_UNLOCKED;
+/**
+ * __ntfs_warning - output a warning to the syslog
+ * @function:	name of function outputting the warning
+ * @sb:		super block of mounted ntfs filesystem
+ * @fmt:	warning string containing format specifications
+ * @...:	a variable number of arguments specified in @fmt
+ *
+ * Outputs a warning to the syslog for the mounted ntfs filesystem described
+ * by @sb.
+ *
+ * @fmt and the corresponding @... is printf style format string containing
+ * the warning string and the corresponding format arguments, respectively.
+ *
+ * @function is the name of the function from which __ntfs_warning is being
+ * called.
+ *
+ * Note, you should be using debug.h::ntfs_warning(@sb, @fmt, @...) instead
+ * as this provides the @function parameter automatically.
+ */
+void __ntfs_warning(const char *function, const struct super_block *sb,
+		const char *fmt, ...)
+{
+	va_list args;
+	int flen = 0;
+	if (function)
+		flen = strlen(function);
+	spin_lock(&err_buf_lock);
+	va_start(args, fmt);
+	vsnprintf(err_buf, sizeof(err_buf), fmt, args);
+	va_end(args);
+	if (sb)
+		printk(KERN_ERR "NTFS-fs warning (device %s): %s(): %s\n",
+				sb->s_id, flen ? function : "", err_buf);
+	else
+		printk(KERN_ERR "NTFS-fs warning: %s(): %s\n",
+				flen ? function : "", err_buf);
+	spin_unlock(&err_buf_lock);
+}
+/**
+ * __ntfs_error - output an error to the syslog
+ * @function:	name of function outputting the error
+ * @sb:		super block of mounted ntfs filesystem
+ * @fmt:	error string containing format specifications
+ * @...:	a variable number of arguments specified in @fmt
+ *
+ * Outputs an error to the syslog for the mounted ntfs filesystem described
+ * by @sb.
+ *
+ * @fmt and the corresponding @... is printf style format string containing
+ * the error string and the corresponding format arguments, respectively.
+ *
+ * @function is the name of the function from which __ntfs_error is being
+ * called.
+ *
+ * Note, you should be using debug.h::ntfs_error(@sb, @fmt, @...) instead
+ * as this provides the @function parameter automatically.
+ */
+void __ntfs_error(const char *function, const struct super_block *sb,
+		const char *fmt, ...)
+{
+	va_list args;
+	int flen = 0;
+	if (function)
+		flen = strlen(function);
+	spin_lock(&err_buf_lock);
+	va_start(args, fmt);
+	vsnprintf(err_buf, sizeof(err_buf), fmt, args);
+	va_end(args);
+	if (sb)
+		printk(KERN_ERR "NTFS-fs error (device %s): %s(): %s\n",
+				sb->s_id, flen ? function : "", err_buf);
+	else
+		printk(KERN_ERR "NTFS-fs error: %s(): %s\n",
+				flen ? function : "", err_buf);
+	spin_unlock(&err_buf_lock);
+}
+#ifdef DEBUG
+/* If 1, output debug messages, and if 0, don't. */
+int debug_msgs = 0;
+void __ntfs_debug (const char *file, int line, const char *function,
+		const char *fmt, ...)
+{
+	va_list args;
+	int flen = 0;
+	if (!debug_msgs)
+		return;
+	if (function)
+		flen = strlen(function);
+	spin_lock(&err_buf_lock);
+	va_start(args, fmt);
+	vsnprintf(err_buf, sizeof(err_buf), fmt, args);
+	va_end(args);
+	printk(KERN_DEBUG "NTFS-fs DEBUG (%s, %d): %s: %s\n",
+		file, line, flen ? function : "", err_buf);
+	spin_unlock(&err_buf_lock);
+}
+/* Dump a run list. Caller has to provide synchronisation for @rl. */
+void ntfs_debug_dump_runlist(const run_list_element *rl)
+{
+	int i;
+	const char *lcn_str[5] = { "LCN_HOLE         ", "LCN_RL_NOT_MAPPED",
+				   "LCN_ENOENT       ", "LCN_EINVAL       ",
+				   "LCN_unknown      " };
+	if (!debug_msgs)
+		return;
+	printk(KERN_DEBUG "NTFS-fs DEBUG: Dumping run list (values "
+			"in hex):\n");
+	if (!rl) {
+		printk(KERN_DEBUG "Run list not present.\n");
+		return;
+	}
+	printk(KERN_DEBUG "VCN              LCN               Run length\n");
+	for (i = 0; ; i++) {
+		LCN lcn = (rl + i)->lcn;
+		if (lcn < (LCN)0) {
+			int index = -lcn - 1;
+			if (index > -LCN_EINVAL - 1)
+				index = 4;
+			printk(KERN_DEBUG "%-16Lx %s %-16Lx%s\n",
+				(rl + i)->vcn, lcn_str[index],
+				(rl + i)->length, (rl + i)->length ?
+				"" : " (run list end)");
+		} else
+			printk(KERN_DEBUG "%-16Lx %-16Lx  %-16Lx%s\n",
+				(rl + i)->vcn, (rl + i)->lcn,
+				(rl + i)->length, (rl + i)->length ?
+				"" : " (run list end)");
+		if (!(rl + i)->length)
+			break;
+	}
+}
+#endif
--- a/fs/ntfs/debug.h
+++ b/fs/ntfs/debug.h
+/*
+ * debug.h - NTFS kernel debug support. Part of the Linux-NTFS project.
+ *
+ * Copyright (c) 2001,2002 Anton Altaparmakov.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef _LINUX_NTFS_DEBUG_H
+#define _LINUX_NTFS_DEBUG_H
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/spinlock.h>
+#include <linux/fs.h>
+#include "inode.h"
+#include "attrib.h"
+#ifdef DEBUG
+extern int debug_msgs;
+#if 0 /* Fool kernel-doc since it doesn't do macros yet */
+/**
+ * ntfs_debug - write a debug level message to syslog
+ * @f:		a printf format string containing the message
+ * @...:	the variables to substitute into @f
+ *
+ * ntfs_debug() writes a DEBUG level message to the syslog but only if the
+ * driver was compiled with -DDEBUG. Otherwise, the call turns into a NOP.
+ */
+static void ntfs_debug(const char *f, ...);
+#endif
+extern void __ntfs_debug (const char *file, int line, const char *function,
+	const char *format, ...) __attribute__ ((format (printf, 4, 5)));
+#define ntfs_debug(f, a...)						\
+	__ntfs_debug(__FILE__, __LINE__, __FUNCTION__, f, ##a)
+extern void ntfs_debug_dump_runlist(const run_list_element *rl);
+#else	/* !DEBUG */
+#define ntfs_debug(f, a...)		do {} while (0)
+#define ntfs_debug_dump_runlist(rl)	do {} while (0)
+#endif	/* !DEBUG */
+extern void __ntfs_warning(const char *function, const struct super_block *sb,
+		const char *fmt, ...) __attribute__ ((format (printf, 3, 4)));
+#define ntfs_warning(sb, f, a...)	__ntfs_warning(__FUNCTION__, sb, f, ##a)
+extern void __ntfs_error(const char *function, const struct super_block *sb,
+		const char *fmt, ...) __attribute__ ((format (printf, 3, 4)));
+#define ntfs_error(sb, f, a...)		__ntfs_error(__FUNCTION__, sb, f, ##a)
+#endif /* _LINUX_NTFS_DEBUG_H */
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
+/**
+ * dir.c - NTFS kernel directory operations. Part of the Linux-NTFS project.
+ *
+ * Copyright (c) 2001,2002 Anton Altaparmakov.
+ * Copyright (C) 2002 Richard Russon.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include "ntfs.h"
+/**
+ * The little endian Unicode string $I30 as a global constant.
+ */
+const uchar_t I30[5] = { const_cpu_to_le16('$'), const_cpu_to_le16('I'),
+		const_cpu_to_le16('3'),	const_cpu_to_le16('0'),
+		const_cpu_to_le16(0) };
+/**
+ * ntfs_lookup_inode_by_name - find an inode in a directory given its name
+ * @dir_ni:	ntfs inode of the directory in which to search for the name
+ * @uname:	Unicode name for which to search in the directory
+ * @uname_len:	length of the name @uname in Unicode characters
+ *
+ * Look for an inode with name @uname in the directory with inode @dir_ni.
+ * ntfs_lookup_inode_by_name() walks the contents of the directory looking for
+ * the Unicode name. If the name is found in the directory, the corresponding
+ * inode number (>= 0) is returned as a mft reference in cpu format, i.e. it
+ * is a 64-bit number containing the sequence number.
+ *
+ * On error, a negative value is returned corresponding to the error code. In
+ * particular if the inode is not found -ENOENT is returned. Note that you
+ * can't just check the return value for being negative, you have to check the
+ * inode number for being negative which you can extract using MREC(return
+ * value).
+ *
+ * Note, @uname_len does not include the (optional) terminating NULL character.
+ */
+u64 ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const uchar_t *uname,
+		const int uname_len)
+{
+	ntfs_volume *vol = dir_ni->vol;
+	struct super_block *sb = vol->sb;
+	MFT_RECORD *m;
+	INDEX_ROOT *ir;
+	INDEX_ENTRY *ie;
+	INDEX_ALLOCATION *ia;
+	u8 *index_end;
+	u64 mref;
+	attr_search_context *ctx;
+	int err, rc;
+	IGNORE_CASE_BOOL ic;
+	VCN vcn, old_vcn;
+	struct address_space *ia_mapping;
+	struct page *page;
+	u8 *kaddr;
+	/* Get hold of the mft record for the directory. */
+	m = map_mft_record(READ, dir_ni);
+	if (IS_ERR(m))
+		goto map_err_out;
+	err = get_attr_search_ctx(&ctx, dir_ni, m);
+	if (err)
+		goto unm_err_out;
+	/* Find the index root attribute in the mft record. */
+	if (!lookup_attr(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, 0,
+			ctx)) {
+		ntfs_error(sb, "Index root attribute missing in directory "
+				"inode 0x%Lx.",
+				(unsigned long long)dir_ni->mft_no);
+		err = -EIO;
+		goto put_unm_err_out;
+	}
+	/* Get to the index root value (it's been verified in read_inode). */
+	ir = (INDEX_ROOT*)((u8*)ctx->attr +
+			le16_to_cpu(ctx->attr->_ARA(value_offset)));
+	index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
+	/* The first index entry. */
+	ie = (INDEX_ENTRY*)((u8*)&ir->index +
+			le32_to_cpu(ir->index.entries_offset));
+	/*
+	 * Loop until we exceed valid memory (corruption case) or until we
+	 * reach the last entry.
+	 */
+	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->_IEH(length)))) {
+		/* Bounds checks. */
+		if ((u8*)ie < (u8*)ctx->mrec || (u8*)ie +
+				sizeof(INDEX_ENTRY_HEADER) > index_end ||
+				(u8*)ie + le16_to_cpu(ie->_IEH(key_length)) >
+				index_end)
+			goto dir_err_out;
+		/*
+		 * The last entry cannot contain a name. It can however contain
+		 * a pointer to a child node in the B+tree so we just break out.
+		 */
+		if (ie->_IEH(flags) & INDEX_ENTRY_END)
+			break;
+		/*
+		 * If the current entry has a name type of POSIX, the name is
+		 * case sensitive and not otherwise. This has the effect of us
+		 * not being able to access any POSIX file names which collate
+		 * after the non-POSIX one when they only differ in case, but
+		 * anyone doing screwy stuff like that deserves to burn in
+		 * hell... Doing that kind of stuff on NT4 actually causes
+		 * corruption on the partition even when using SP6a and Linux
+		 * is not involved at all.
+		 */
+		ic = ie->key.file_name.file_name_type ? IGNORE_CASE :
+				CASE_SENSITIVE;
+		/*
+		 * If the names match perfectly, we are done and return the
+		 * mft reference of the inode (i.e. the inode number together
+		 * with the sequence number for consistency checking. We
+		 * convert it to cpu format before returning.
+		 */
+		if (ntfs_are_names_equal(uname, uname_len,
+				(uchar_t*)&ie->key.file_name.file_name,
+				ie->key.file_name.file_name_length, ic,
+				vol->upcase, vol->upcase_len)) {
+found_it:
+			mref = le64_to_cpu(ie->_IIF(indexed_file));
+			put_attr_search_ctx(ctx);
+			unmap_mft_record(READ, dir_ni);
+			return mref;
+		}
+		/*
+		 * Not a perfect match, need to do full blown collation so we
+		 * know which way in the B+tree we have to go.
+		 */
+		rc = ntfs_collate_names(uname, uname_len,
+				(uchar_t*)&ie->key.file_name.file_name,
+				ie->key.file_name.file_name_length, 1,
+				IGNORE_CASE, vol->upcase, vol->upcase_len);
+		/*
+		 * If uname collates before the name of the current entry, there
+		 * is definitely no such name in this index but we might need to
+		 * descend into the B+tree so we just break out of the loop.
+		 */
+		if (rc == -1)
+			break;
+		/* The names are not equal, continue the search. */
+		if (rc)
+			continue;
+		/*
+		 * Names match with case insensitive comparison, now try the
+		 * case sensitive comparison, which is required for proper
+		 * collation.
+		 */
+		rc = ntfs_collate_names(uname, uname_len,
+				(uchar_t*)&ie->key.file_name.file_name,
+				ie->key.file_name.file_name_length, 1,
+				CASE_SENSITIVE, vol->upcase, vol->upcase_len);
+		if (rc == -1)
+			break;
+		if (rc)
+			continue;
+		/*
+		 * Perfect match, this will never happen as the
+		 * ntfs_are_names_equal() call will have gotten a match but we
+		 * still treat it correctly.
+		 */
+		goto found_it;
+	}
+	/*
+	 * We have finished with this index without success. Check for the
+	 * presence of a child node.
+	 */
+	if (!(ie->_IEH(flags) & INDEX_ENTRY_NODE)) {
+		/* No child node, return -ENOENT. */
+		err = -ENOENT;
+		goto put_unm_err_out;
+	} /* Child node present, descend into it. */
+	/* Consistency check: Verify that an index allocation exists. */
+	if (!NInoIndexAllocPresent(dir_ni)) {
+		ntfs_error(sb, "No index allocation attribute but index entry "
+				"requires one. Directory inode 0x%Lx is "
+				"corrupt or driver bug.",
+				(unsigned long long)dir_ni->mft_no);
+		err = -EIO;
+		goto put_unm_err_out;
+	}
+	/* Get the starting vcn of the index_block holding the child node. */
+	vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->_IEH(length)) - 8);
+	ia_mapping = VFS_I(dir_ni)->i_mapping;
+descend_into_child_node:
+	/*
+	 * Convert vcn to index into the index allocation attribute in units
+	 * of PAGE_CACHE_SIZE and map the page cache page, reading it from
+	 * disk if necessary.
+	 */
+	page = ntfs_map_page(ia_mapping, vcn << dir_ni->_IDM(index_vcn_size_bits)
+			>> PAGE_CACHE_SHIFT);
+	if (IS_ERR(page)) {
+		ntfs_error(sb, "Failed to map directory index page, error %ld.",
+				-PTR_ERR(page));
+		goto put_unm_err_out;
+	}
+	kaddr = (u8*)page_address(page);
+fast_descend_into_child_node:
+	/* Get to the index allocation block. */
+	ia = (INDEX_ALLOCATION*)(kaddr + ((vcn << dir_ni->_IDM(index_vcn_size_bits)) &
+			~PAGE_CACHE_MASK));
+	/* Bounds checks. */
+	if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE) {
+		ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
+				"inode 0x%Lx or driver bug.",
+				(unsigned long long)dir_ni->mft_no);
+		err = -EIO;
+		goto unm_unm_err_out;
+	}
+	if (sle64_to_cpu(ia->index_block_vcn) != vcn) {
+		ntfs_error(sb, "Actual VCN (0x%Lx) of index buffer is "
+				"different from expected VCN (0x%Lx). "
+				"Directory inode 0x%Lx is corrupt or driver "
+				"bug.",
+				(long long)sle64_to_cpu(ia->index_block_vcn),
+				(long long)vcn,
+				(unsigned long long)dir_ni->mft_no);
+		err = -EIO;
+		goto unm_unm_err_out;
+	}
+	if (le32_to_cpu(ia->index.allocated_size) + 0x18 !=
+			dir_ni->_IDM(index_block_size)) {
+		ntfs_error(sb, "Index buffer (VCN 0x%Lx) of directory inode "
+				"0x%Lx has a size (%u) differing from the "
+				"directory specified size (%u). Directory "
+				"inode is corrupt or driver bug.",
+				(long long)vcn,
+				(unsigned long long)dir_ni->mft_no,
+				le32_to_cpu(ia->index.allocated_size) + 0x18,
+				dir_ni->_IDM(index_block_size));
+		err = -EIO;
+		goto unm_unm_err_out;
+	}
+	index_end = (u8*)ia + dir_ni->_IDM(index_block_size);
+	if (index_end > kaddr + PAGE_CACHE_SIZE) {
+		ntfs_error(sb, "Index buffer (VCN 0x%Lx) of directory inode "
+				"0x%Lx crosses page boundary. Impossible! "
+				"Cannot access! This is probably a bug in the "
+				"driver.", (long long)vcn,
+				(unsigned long long)dir_ni->mft_no);
+		err = -EIO;
+		goto unm_unm_err_out;
+	}
+	index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
+	if (index_end > (u8*)ia + dir_ni->_IDM(index_block_size)) {
+		ntfs_error(sb, "Size of index buffer (VCN 0x%Lx) of directory "
+				"inode 0x%Lx exceeds maximum size.",
+				(long long)vcn,
+				(unsigned long long)dir_ni->mft_no);
+		err = -EIO;
+		goto unm_unm_err_out;
+	}
+	/* The first index entry. */
+	ie = (INDEX_ENTRY*)((u8*)&ia->index +
+			le32_to_cpu(ia->index.entries_offset));
+	/*
+	 * Iterate similar to above big loop but applied to index buffer, thus
+	 * loop until we exceed valid memory (corruption case) or until we
+	 * reach the last entry.
+	 */
+	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->_IEH(length)))) {
+		/* Bounds check. */
+		if ((u8*)ie < (u8*)ia || (u8*)ie +
+				sizeof(INDEX_ENTRY_HEADER) > index_end ||
+				(u8*)ie + le16_to_cpu(ie->_IEH(key_length)) >
+				index_end) {
+			ntfs_error(sb, "Index entry out of bounds in "
+					"directory inode 0x%Lx.",
+					(unsigned long long)dir_ni->mft_no);
+			err = -EIO;
+			goto unm_unm_err_out;
+		}
+		/*
+		 * The last entry cannot contain a name. It can however contain
+		 * a pointer to a child node in the B+tree so we just break out.
+		 */
+		if (ie->_IEH(flags) & INDEX_ENTRY_END)
+			break;
+		/*
+		 * If the current entry has a name type of POSIX, the name is
+		 * case sensitive and not otherwise. This has the effect of us
+		 * not being able to access any POSIX file names which collate
+		 * after the non-POSIX one when they only differ in case, but
+		 * anyone doing screwy stuff like that deserves to burn in
+		 * hell... Doing that kind of stuff on NT4 actually causes
+		 * corruption on the partition even when using SP6a and Linux
+		 * is not involved at all.
+		 */
+		ic = ie->key.file_name.file_name_type ? IGNORE_CASE :
+				CASE_SENSITIVE;
+		/*
+		 * If the names match perfectly, we are done and return the
+		 * mft reference of the inode (i.e. the inode number together
+		 * with the sequence number for consistency checking. We
+		 * convert it to cpu format before returning.
+		 */
+		if (ntfs_are_names_equal(uname, uname_len,
+				(uchar_t*)&ie->key.file_name.file_name,
+				ie->key.file_name.file_name_length, ic,
+				vol->upcase, vol->upcase_len)) {
+found_it2:
+			mref = le64_to_cpu(ie->_IIF(indexed_file));
+			ntfs_unmap_page(page);
+			put_attr_search_ctx(ctx);
+			unmap_mft_record(READ, dir_ni);
+			return mref;
+		}
+		/*
+		 * Not a perfect match, need to do full blown collation so we
+		 * know which way in the B+tree we have to go.
+		 */
+		rc = ntfs_collate_names(uname, uname_len,
+				(uchar_t*)&ie->key.file_name.file_name,
+				ie->key.file_name.file_name_length, 1,
+				IGNORE_CASE, vol->upcase, vol->upcase_len);
+		/*
+		 * If uname collates before the name of the current entry, there
+		 * is definitely no such name in this index but we might need to
+		 * descend into the B+tree so we just break out of the loop.
+		 */
+		if (rc == -1)
+			break;
+		/* The names are not equal, continue the search. */
+		if (rc)
+			continue;
+		/*
+		 * Names match with case insensitive comparison, now try the
+		 * case sensitive comparison, which is required for proper
+		 * collation.
+		 */
+		rc = ntfs_collate_names(uname, uname_len,
+				(uchar_t*)&ie->key.file_name.file_name,
+				ie->key.file_name.file_name_length, 1,
+				CASE_SENSITIVE, vol->upcase, vol->upcase_len);
+		if (rc == -1)
+			break;
+		if (rc)
+			continue;
+		/*
+		 * Perfect match, this will never happen as the
+		 * ntfs_are_names_equal() call will have gotten a match but we
+		 * still treat it correctly.
+		 */
+		goto found_it2;
+	}
+	/*
+	 * We have finished with this index buffer without success. Check for
+	 * the presence of a child node.
+	 */
+	if (ie->_IEH(flags) & INDEX_ENTRY_NODE) {
+		if ((ia->index.flags & NODE_MASK) == LEAF_NODE) {
+			ntfs_error(sb, "Index entry with child node found in "
+					"a leaf node in directory inode 0x%Lx.",
+					(unsigned long long)dir_ni->mft_no);
+			err = -EIO;
+			goto unm_unm_err_out;
+		}
+		/* Child node present, descend into it. */
+		old_vcn = vcn;
+		vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->_IEH(length)) - 8);
+		if (vcn >= 0) {
+			/* If vcn is in the same page cache page as old_vcn we
+			 * recycle the mapped page. */
+			if (old_vcn << vol->cluster_size_bits >>
+					PAGE_CACHE_SHIFT == vcn <<
+					vol->cluster_size_bits >>
+					PAGE_CACHE_SHIFT)
+				goto fast_descend_into_child_node;
+			ntfs_unmap_page(page);
+			goto descend_into_child_node;
+		}
+		ntfs_error(sb, "Negative child node vcn in directory inode "
+				"0x%Lx.", (unsigned long long)dir_ni->mft_no);
+		err = -EIO;
+		goto unm_unm_err_out;
+	}
+	/* No child node, return -ENOENT. */
+	ntfs_debug("Entry not found.");
+	err = -ENOENT;
+unm_unm_err_out:
+	ntfs_unmap_page(page);
+put_unm_err_out:
+	put_attr_search_ctx(ctx);
+unm_err_out:
+	unmap_mft_record(READ, dir_ni);
+	return ERR_MREF(err);
+map_err_out:
+	ntfs_error(sb, "map_mft_record(READ) failed with error code %ld.",
+			-PTR_ERR(m));
+	return ERR_MREF(PTR_ERR(m));
+dir_err_out:
+	ntfs_error(sb, "Corrupt directory. Aborting lookup.");
+	err = -EIO;
+	goto put_unm_err_out;
+}
+typedef union {
+	INDEX_ROOT *ir;
+	INDEX_ALLOCATION *ia;
+} index_union __attribute__ ((__transparent_union__));
+typedef enum {
+	INDEX_TYPE_ROOT,	/* index root */
+	INDEX_TYPE_ALLOCATION,	/* index allocation */
+} INDEX_TYPE;
+/**
+ * ntfs_filldir - ntfs specific filldir method
+ * @vol:	current ntfs volume
+ * @filp:	open file descriptor for the current directory
+ * @ndir:	ntfs inode of current directory
+ * @index_type:	specifies whether @iu is an index root or an index allocation
+ * @iu:		index root or index allocation attribute to which @ie belongs
+ * @ie:		current index entry
+ * @name:	buffer to use for the converted name
+ * @dirent:	vfs filldir callback context
+ * filldir:	vfs filldir callback
+ *
+ * Convert the Unicode name to the loaded NLS and pass it to
+ * the filldir callback.
+ */
+static inline int ntfs_filldir(ntfs_volume *vol, struct file *filp,
+		ntfs_inode *ndir, const INDEX_TYPE index_type,
+		index_union iu, INDEX_ENTRY *ie, u8 *name,
+		void *dirent, filldir_t filldir)
+{
+	int name_len;
+	unsigned dt_type;
+	FILE_NAME_TYPE_FLAGS name_type;
+	READDIR_OPTIONS readdir_opts;
+	/* Advance the position even if going to skip the entry. */
+	if (index_type == INDEX_TYPE_ALLOCATION)
+		filp->f_pos = (u8*)ie - (u8*)iu.ia +
+				(sle64_to_cpu(iu.ia->index_block_vcn) <<
+				ndir->_IDM(index_vcn_size_bits)) +
+				vol->mft_record_size;
+	else /* if (index_type == INDEX_TYPE_ROOT) */
+		filp->f_pos = (u8*)ie - (u8*)iu.ir;
+	readdir_opts = vol->readdir_opts;
+	name_type = ie->key.file_name.file_name_type;
+	if (name_type == FILE_NAME_DOS && RHideDosNames(readdir_opts)) {
+		ntfs_debug("Skipping DOS name space entry.");
+		return 0;
+	}
+	if (RHideLongNames(readdir_opts)) {
+		if (name_type == FILE_NAME_WIN32 ||
+				name_type == FILE_NAME_POSIX) {
+			ntfs_debug("Skipping WIN32/POSIX name space entry.");
+			return 0;
+		}
+	}
+	if (MREF_LE(ie->_IIF(indexed_file)) == FILE_root) {
+		ntfs_debug("Skipping root directory self reference entry.");
+		return 0;
+	}
+	if (MREF_LE(ie->_IIF(indexed_file)) < FILE_first_user &&
+			RHideSystemFiles(readdir_opts)) {
+		ntfs_debug("Skipping system file.");
+		return 0;
+	}
+	name_len = ntfs_ucstonls(vol, (uchar_t*)&ie->key.file_name.file_name,
+			ie->key.file_name.file_name_length, &name,
+			NTFS_MAX_NAME_LEN * 3 + 1);
+	if (name_len <= 0) {
+		ntfs_debug("Skipping unrepresentable file.");
+		return 0;
+	}
+	if (ie->key.file_name.file_attributes &
+			FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT)
+		dt_type = DT_DIR;
+	else
+		dt_type = DT_REG;
+	ntfs_debug("Calling filldir for %s with len %i, f_pos 0x%Lx, inode "
+			"0x%Lx, DT_%s.", name, name_len, filp->f_pos,
+			(unsigned long long)MREF_LE(ie->_IIF(indexed_file)),
+			dt_type == DT_DIR ? "DIR" : "REG");
+	return filldir(dirent, name, name_len, filp->f_pos,
+			(unsigned long)MREF_LE(ie->_IIF(indexed_file)), dt_type);
+}
+/*
+ * VFS calls readdir with BKL held so no possible RACE conditions.
+ * We use the same basic approach as the old NTFS driver, i.e. we parse the
+ * index root entries and then the index allocation entries that are marked
+ * as in use in the index bitmap.
+ * While this will return the names in random order this doesn't matter for
+ * readdir but OTOH results in faster readdir.
+ */
+static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	s64 ia_pos, ia_start, prev_ia_pos;
+	struct inode *vdir = filp->f_dentry->d_inode;
+	struct super_block *sb = vdir->i_sb;
+	ntfs_inode *ndir = NTFS_I(vdir);
+	ntfs_volume *vol = NTFS_SB(sb);
+	MFT_RECORD *m;
+	INDEX_ROOT *ir;
+	INDEX_ENTRY *ie;
+	INDEX_ALLOCATION *ia;
+	u8 *name;
+	int rc, err, ir_pos, bmp_pos;
+	struct address_space *ia_mapping;
+	struct page *page;
+	u8 *kaddr, *bmp, *index_end;
+	attr_search_context *ctx;
+	ntfs_debug("Entering for inode 0x%Lx, f_pos 0x%Lx.",
+			(unsigned long long)ndir->mft_no, filp->f_pos);
+	rc = err = 0;
+	/* Are we at end of dir yet? */
+	if (filp->f_pos >= vdir->i_size + vol->mft_record_size)
+		goto done;
+	/* Emulate . and .. for all directories. */
+	if (!filp->f_pos) {
+		ntfs_debug("Calling filldir for . with len 1, f_pos 0x0, "
+				"inode 0x%Lx, DT_DIR.",
+				(unsigned long long)ndir->mft_no);
+		rc = filldir(dirent, ".", 1, filp->f_pos, vdir->i_ino, DT_DIR);
+		if (rc)
+			goto done;
+		filp->f_pos++;
+	}
+	if (filp->f_pos == 1) {
+		ntfs_debug("Calling filldir for .. with len 2, f_pos 0x1, "
+				"inode 0x%Lx, DT_DIR.",
+				(unsigned long long)NTFS_I(
+				filp->f_dentry->d_parent->d_inode)->mft_no);
+		rc = filldir(dirent, "..", 2, filp->f_pos,
+				filp->f_dentry->d_parent->d_inode->i_ino,
+				DT_DIR);
+		if (rc)
+			goto done;
+		filp->f_pos++;
+	}
+	/* Get hold of the mft record for the directory. */
+	m = map_mft_record(READ, ndir);
+	if (IS_ERR(m)) {
+		err = PTR_ERR(m);
+		goto err_out;
+	}
+	err = get_attr_search_ctx(&ctx, ndir, m);
+	if (err)
+		goto unm_err_out;
+	/*
+	 * Allocate a buffer to store the current name being processed
+	 * converted to format determined by current NLS.
+	 */
+	name = (u8*)kmalloc(NTFS_MAX_NAME_LEN * 3 + 1, GFP_NOFS);
+	if (!name) {
+		err = -ENOMEM;
+		goto put_unm_err_out;
+	}
+	/* Are we jumping straight into the index allocation attribute? */
+	if (filp->f_pos >= vol->mft_record_size)
+		goto skip_index_root;
+	/* Get the offset into the index root attribute. */
+	ir_pos = (s64)filp->f_pos;
+	/* Find the index root attribute in the mft record. */
+	if (!lookup_attr(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, 0,
+			ctx)) {
+		ntfs_error(sb, "Index root attribute missing in directory "
+				"inode 0x%Lx.",
+				(unsigned long long)ndir->mft_no);
+		err = -EIO;
+		goto kf_unm_err_out;
+	}
+	/* Get to the index root value (it's been verified in read_inode). */
+	ir = (INDEX_ROOT*)((u8*)ctx->attr +
+			le16_to_cpu(ctx->attr->_ARA(value_offset)));
+	index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
+	/* The first index entry. */
+	ie = (INDEX_ENTRY*)((u8*)&ir->index +
+			le32_to_cpu(ir->index.entries_offset));
+	/*
+	 * Loop until we exceed valid memory (corruption case) or until we
+	 * reach the last entry or until filldir tells us it has had enough
+	 * or signals an error (both covered by the rc test).
+	 */
+	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->_IEH(length)))) {
+		ntfs_debug("In index root, offset 0x%x.", (u8*)ie - (u8*)ir);
+		/* Bounds checks. */
+		if ((u8*)ie < (u8*)ctx->mrec || (u8*)ie +
+				sizeof(INDEX_ENTRY_HEADER) > index_end ||
+				(u8*)ie + le16_to_cpu(ie->_IEH(key_length)) >
+				index_end)
+			goto dir_err_out;
+		/* The last entry cannot contain a name. */
+		if (ie->_IEH(flags) & INDEX_ENTRY_END)
+			break;
+		/* Skip index root entry if continuing previous readdir. */
+		if (ir_pos > (u8*)ie - (u8*)ir)
+			continue;
+		/* Submit the name to the filldir callback. */
+		rc = ntfs_filldir(vol, filp, ndir, INDEX_TYPE_ROOT, ir, ie,
+				name, dirent, filldir);
+		if (rc)
+			goto abort;
+	}
+	/* If there is no index allocation attribute we are finished. */
+	if (!NInoIndexAllocPresent(ndir))
+		goto EOD;
+	/* Advance f_pos to the beginning of the index allocation. */
+	filp->f_pos = vol->mft_record_size;
+	/* Reinitialize the search context. */
+	reinit_attr_search_ctx(ctx);
+skip_index_root:
+	if (NInoBmpNonResident(ndir)) {
+		/*
+		 * Read the page of the bitmap that contains the current index
+		 * block.
+		 */
+		// TODO: FIXME: Implement this!
+		ntfs_error(sb, "Index bitmap is non-resident, which is not "
+				"supported yet. Pretending that end of "
+				"directory has been reached.\n");
+		goto EOD;
+	} else {
+		/* Find the index bitmap attribute in the mft record. */
+		if (!lookup_attr(AT_BITMAP, I30, 4, CASE_SENSITIVE, 0, NULL, 0,
+				ctx)) {
+			ntfs_error(sb, "Index bitmap attribute missing in "
+					"directory inode 0x%Lx.",
+					(unsigned long long)ndir->mft_no);
+			err = -EIO;
+			goto kf_unm_err_out;
+		}
+		bmp = (u8*)ctx->attr + le16_to_cpu(ctx->attr->_ARA(value_offset));
+	}
+	/* Get the offset into the index allocation attribute. */
+	ia_pos = (s64)filp->f_pos - vol->mft_record_size;
+	ia_mapping = vdir->i_mapping;
+	/* If the index block is not in use find the next one that is. */
+	bmp_pos = ia_pos >> ndir->_IDM(index_block_size_bits);
+	page = NULL;
+	kaddr = NULL;
+	prev_ia_pos = -1LL;
+	if (bmp_pos >> 3 >= ndir->_IDM(bmp_size)) {
+		ntfs_error(sb, "Current index allocation position exceeds "
+				"index bitmap size.");
+		goto kf_unm_err_out;
+	}
+	while (!(bmp[bmp_pos >> 3] & (1 << (bmp_pos & 7)))) {
+find_next_index_buffer:
+		bmp_pos++;
+		/* If we have reached the end of the bitmap, we are done. */
+		if (bmp_pos >> 3 >= ndir->_IDM(bmp_size))
+			goto EOD;
+		ia_pos = (s64)bmp_pos << ndir->_IDM(index_block_size_bits);
+	}
+	ntfs_debug("Handling index buffer 0x%x.", bmp_pos);
+	/* If the current index buffer is in the same page we reuse the page. */
+	if ((prev_ia_pos & PAGE_CACHE_MASK) != (ia_pos & PAGE_CACHE_MASK)) {
+		prev_ia_pos = ia_pos;
+		if (page)
+			ntfs_unmap_page(page);
+		/*
+		 * Map the page cache page containing the current ia_pos,
+		 * reading it from disk if necessary.
+		 */
+		page = ntfs_map_page(ia_mapping, ia_pos >> PAGE_CACHE_SHIFT);
+		if (IS_ERR(page))
+			goto map_page_err_out;
+		kaddr = (u8*)page_address(page);
+	}
+	/* Get the current index buffer. */
+	ia = (INDEX_ALLOCATION*)(kaddr + (ia_pos & ~PAGE_CACHE_MASK &
+			~(s64)(ndir->_IDM(index_block_size) - 1)));
+	/* Bounds checks. */
+	if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE) {
+		ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
+				"inode 0x%Lx or driver bug.",
+				(unsigned long long)ndir->mft_no);
+		err = -EIO;
+		goto unm_dir_err_out;
+	}
+	if (sle64_to_cpu(ia->index_block_vcn) != (ia_pos &
+			~(s64)(ndir->_IDM(index_block_size) - 1)) >>
+			ndir->_IDM(index_vcn_size_bits)) {
+		ntfs_error(sb, "Actual VCN (0x%Lx) of index buffer is "
+				"different from expected VCN (0x%Lx). "
+				"Directory inode 0x%Lx is corrupt or driver "
+				"bug. ",
+				(long long)sle64_to_cpu(ia->index_block_vcn),
+				(long long)ia_pos >> ndir->_IDM(index_vcn_size_bits),
+				(unsigned long long)ndir->mft_no);
+		err = -EIO;
+		goto unm_dir_err_out;
+	}
+	if (le32_to_cpu(ia->index.allocated_size) + 0x18 !=
+			ndir->_IDM(index_block_size)) {
+		ntfs_error(sb, "Index buffer (VCN 0x%Lx) of directory inode "
+				"0x%Lx has a size (%u) differing from the "
+				"directory specified size (%u). Directory "
+				"inode is corrupt or driver bug.",
+				(long long)ia_pos >> ndir->_IDM(index_vcn_size_bits),
+				(unsigned long long)ndir->mft_no,
+				le32_to_cpu(ia->index.allocated_size) + 0x18,
+				ndir->_IDM(index_block_size));
+		err = -EIO;
+		goto unm_dir_err_out;
+	}
+	index_end = (u8*)ia + ndir->_IDM(index_block_size);
+	if (index_end > kaddr + PAGE_CACHE_SIZE) {
+		ntfs_error(sb, "Index buffer (VCN 0x%Lx) of directory inode "
+				"0x%Lx crosses page boundary. Impossible! "
+				"Cannot access! This is probably a bug in the "
+				"driver.", (long long)ia_pos >>
+				ndir->_IDM(index_vcn_size_bits),
+				(unsigned long long)ndir->mft_no);
+		err = -EIO;
+		goto unm_dir_err_out;
+	}
+	ia_start = ia_pos & ~(s64)(ndir->_IDM(index_block_size) - 1);
+	index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
+	if (index_end > (u8*)ia + ndir->_IDM(index_block_size)) {
+		ntfs_error(sb, "Size of index buffer (VCN 0x%Lx) of directory "
+				"inode 0x%Lx exceeds maximum size.",
+				(long long)ia_pos >> ndir->_IDM(index_vcn_size_bits),
+				(unsigned long long)ndir->mft_no);
+		err = -EIO;
+		goto unm_dir_err_out;
+	}
+	/* The first index entry in this index buffer. */
+	ie = (INDEX_ENTRY*)((u8*)&ia->index +
+			le32_to_cpu(ia->index.entries_offset));
+	/*
+	 * Loop until we exceed valid memory (corruption case) or until we
+	 * reach the last entry or until filldir tells us it has had enough
+	 * or signals an error (both covered by the rc test).
+	 */
+	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->_IEH(length)))) {
+		ntfs_debug("In index allocation, offset 0x%Lx.",
+				(long long)ia_start + ((u8*)ie - (u8*)ia));
+		/* Bounds checks. */
+		if ((u8*)ie < (u8*)ia || (u8*)ie +
+				sizeof(INDEX_ENTRY_HEADER) > index_end ||
+				(u8*)ie + le16_to_cpu(ie->_IEH(key_length)) >
+				index_end)
+			goto unm_dir_err_out;
+		/* The last entry cannot contain a name. */
+		if (ie->_IEH(flags) & INDEX_ENTRY_END)
+			break;
+		/* Skip index block entry if continuing previous readdir. */
+		if (ia_pos - ia_start > (u8*)ie - (u8*)ia)
+			continue;
+		/* Submit the name to the filldir callback. */
+		rc = ntfs_filldir(vol, filp, ndir, INDEX_TYPE_ALLOCATION, ia,
+				ie, name, dirent, filldir);
+		if (rc) {
+			ntfs_unmap_page(page);
+			goto abort;
+		}
+	}
+	goto find_next_index_buffer;
+EOD:
+	/* We are finished, set f_pos to EOD. */
+	filp->f_pos = vdir->i_size + vol->mft_record_size;
+abort:
+	put_attr_search_ctx(ctx);
+	unmap_mft_record(READ, ndir);
+	kfree(name);
+done:
+#ifdef DEBUG
+	if (!rc)
+		ntfs_debug("EOD, f_pos 0x%Lx, returning 0.", filp->f_pos);
+	else
+		ntfs_debug("filldir returned %i, f_pos 0x%Lx, returning 0.",
+				rc, filp->f_pos);
+#endif
+	return 0;
+map_page_err_out:
+	ntfs_error(sb, "Reading index allocation data failed.");
+	err = PTR_ERR(page);
+kf_unm_err_out:
+	kfree(name);
+put_unm_err_out:
+	put_attr_search_ctx(ctx);
+unm_err_out:
+	unmap_mft_record(READ, ndir);
+err_out:
+	ntfs_debug("Failed. Returning error code %i.", -err);
+	return err;
+unm_dir_err_out:
+	ntfs_unmap_page(page);
+dir_err_out:
+	ntfs_error(sb, "Corrupt directory. Aborting. You should run chkdsk.");
+	err = -EIO;
+	goto kf_unm_err_out;
+}
+struct file_operations ntfs_dir_ops = {
+	read:			generic_read_dir,	/* Return -EISDIR. */
+	readdir:		ntfs_readdir,		/* Read directory. */
+};
+#if 0
+/* NOTE: write, poll, fsync, readv, writev can be called without the big
+ * kernel lock held in all filesystems. */
+struct file_operations {
+	loff_t (*llseek) (struct file *, loff_t, int);
+	ssize_t (*write) (struct file *, const char *, size_t, loff_t *);
+	unsigned int (*poll) (struct file *, struct poll_table_struct *);
+	int (*ioctl) (struct inode *, struct file *, unsigned int,
+			unsigned long);
+	int (*mmap) (struct file *, struct vm_area_struct *);
+	int (*open) (struct inode *, struct file *);
+	int (*flush) (struct file *);
+	int (*release) (struct inode *, struct file *);
+	int (*fsync) (struct file *, struct dentry *, int datasync);
+	int (*fasync) (int, struct file *, int);
+	int (*lock) (struct file *, int, struct file_lock *);
+	ssize_t (*readv) (struct file *, const struct iovec *, unsigned long,
+			loff_t *);
+	ssize_t (*writev) (struct file *, const struct iovec *, unsigned long,
+			loff_t *);
+	ssize_t (*sendpage) (struct file *, struct page *, int, size_t,
+			loff_t *, int);
+	unsigned long (*get_unmapped_area)(struct file *, unsigned long,
+			unsigned long, unsigned long, unsigned long);
+};
+#endif
--- a/fs/ntfs/endian.h
+++ b/fs/ntfs/endian.h
+/*
+ * endian.h - Defines for endianness handling in NTFS Linux kernel driver.
+ *	      Part of the Linux-NTFS project.
+ *
+ * Copyright (c) 2001 Anton Altaparmakov.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef _LINUX_NTFS_ENDIAN_H
+#define _LINUX_NTFS_ENDIAN_H
+#include <asm/byteorder.h>
+/*
+ * Signed endianness conversion defines.
+ */
+#define sle16_to_cpu(x)		((s16)__le16_to_cpu((s16)(x)))
+#define sle32_to_cpu(x)		((s32)__le32_to_cpu((s32)(x)))
+#define sle64_to_cpu(x)		((s64)__le64_to_cpu((s64)(x)))
+#define sle16_to_cpup(x)	((s16)__le16_to_cpu(*(s16*)(x)))
+#define sle32_to_cpup(x)	((s32)__le32_to_cpu(*(s32*)(x)))
+#define sle64_to_cpup(x)	((s64)__le64_to_cpu(*(s64*)(x)))
+#define cpu_to_sle16(x)		((s16)__cpu_to_le16((s16)(x)))
+#define cpu_to_sle32(x)		((s32)__cpu_to_le32((s32)(x)))
+#define cpu_to_sle64(x)		((s64)__cpu_to_le64((s64)(x)))
+#define cpu_to_sle16p(x)	((s16)__cpu_to_le16(*(s16*)(x)))
+#define cpu_to_sle32p(x)	((s32)__cpu_to_le32(*(s32*)(x)))
+#define cpu_to_sle64p(x)	((s64)__cpu_to_le64(*(s64*)(x)))
+#endif /* _LINUX_NTFS_ENDIAN_H */
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
+/*
+ * file.c - NTFS kernel file operations. Part of the Linux-NTFS project.
+ *
+ * Copyright (c) 2001 Anton Altaparmakov.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include "ntfs.h"
+struct file_operations ntfs_file_ops = {
+	llseek:			generic_file_llseek,	/* Seek inside file. */
+	read:			generic_file_read,	/* Read from file. */
+	write:			NULL,			/* . */
+	readdir:		NULL,			/* . */
+	poll:			NULL,			/* . */
+	ioctl:			NULL,			/* . */
+	mmap:			generic_file_mmap,	/* Mmap file. */
+	open:			generic_file_open,	/* Open file. */
+	flush:			NULL,			/* . */
+	release:		NULL,			/* . */
+	fsync:			NULL,			/* . */
+	fasync:			NULL,			/* . */
+	lock:			NULL,			/* . */
+	readv:			NULL,			/* . */
+	writev:			NULL,			/* . */
+	sendpage:		NULL,			/* . */
+	get_unmapped_area:	NULL,			/* . */
+};
+struct inode_operations ntfs_file_inode_ops = {
+	create:		NULL,		/* . */
+	lookup:		NULL,		/* . */
+	link:		NULL,		/* . */
+	unlink:		NULL,		/* . */
+	symlink:	NULL,		/* . */
+	mkdir:		NULL,		/* . */
+	rmdir:		NULL,		/* . */
+	mknod:		NULL,		/* . */
+	rename:		NULL,		/* . */
+	readlink:	NULL,		/* . */
+	follow_link:	NULL,		/* . */
+	truncate:	NULL,		/* . */
+	permission:	NULL,		/* . */
+	revalidate:	NULL,		/* . */
+	setattr:	NULL,		/* . */
+	getattr:	NULL,		/* . */
+};
+#if 0
+/* NOTE: read, write, poll, fsync, readv, writev can be called without the big
+ * kernel lock held in all filesystems. */
+struct file_operations {
+	struct module *owner;
+	loff_t (*llseek) (struct file *, loff_t, int);
+	ssize_t (*read) (struct file *, char *, size_t, loff_t *);
+	ssize_t (*write) (struct file *, const char *, size_t, loff_t *);
+	int (*readdir) (struct file *, void *, filldir_t);
+	unsigned int (*poll) (struct file *, struct poll_table_struct *);
+	int (*ioctl) (struct inode *, struct file *, unsigned int,
+			unsigned long);
+	int (*mmap) (struct file *, struct vm_area_struct *);
+	int (*flush) (struct file *);
+	int (*release) (struct inode *, struct file *);
+	int (*fsync) (struct file *, struct dentry *, int datasync);
+	int (*fasync) (int, struct file *, int);
+	int (*lock) (struct file *, int, struct file_lock *);
+	ssize_t (*readv) (struct file *, const struct iovec *, unsigned long,
+			loff_t *);
+	ssize_t (*writev) (struct file *, const struct iovec *, unsigned long,
+			loff_t *);
+	ssize_t (*sendpage) (struct file *, struct page *, int, size_t,
+			loff_t *, int);
+	unsigned long (*get_unmapped_area)(struct file *, unsigned long,
+			unsigned long, unsigned long, unsigned long);
+};
+struct inode_operations {
+	int (*create) (struct inode *,struct dentry *,int);
+	struct dentry * (*lookup) (struct inode *,struct dentry *);
+	int (*link) (struct dentry *,struct inode *,struct dentry *);
+	int (*unlink) (struct inode *,struct dentry *);
+	int (*symlink) (struct inode *,struct dentry *,const char *);
+	int (*mkdir) (struct inode *,struct dentry *,int);
+	int (*rmdir) (struct inode *,struct dentry *);
+	int (*mknod) (struct inode *,struct dentry *,int,int);
+	int (*rename) (struct inode *, struct dentry *,
+			struct inode *, struct dentry *);
+	int (*readlink) (struct dentry *, char *,int);
+	int (*follow_link) (struct dentry *, struct nameidata *);
+	void (*truncate) (struct inode *);
+	int (*permission) (struct inode *, int);
+	int (*revalidate) (struct dentry *);
+	int (*setattr) (struct dentry *, struct iattr *);
+	int (*getattr) (struct dentry *, struct iattr *);
+};
+#endif
+struct file_operations ntfs_empty_file_ops = {
+	llseek:			NULL,			/* . */
+	read:			NULL,			/* . */
+	write:			NULL,			/* . */
+	readdir:		NULL,			/* . */
+	poll:			NULL,			/* . */
+	ioctl:			NULL,			/* . */
+	mmap:			NULL,			/* . */
+	open:			NULL,			/* . */
+	flush:			NULL,			/* . */
+	release:		NULL,			/* . */
+	fsync:			NULL,			/* . */
+	fasync:			NULL,			/* . */
+	lock:			NULL,			/* . */
+	readv:			NULL,			/* . */
+	writev:			NULL,			/* . */
+	sendpage:		NULL,			/* . */
+	get_unmapped_area:	NULL,			/* . */
+};
+struct inode_operations ntfs_empty_inode_ops = {
+	create:		NULL,		/* . */
+	lookup:		NULL,		/* . */
+	link:		NULL,		/* . */
+	unlink:		NULL,		/* . */
+	symlink:	NULL,		/* . */
+	mkdir:		NULL,		/* . */
+	rmdir:		NULL,		/* . */
+	mknod:		NULL,		/* . */
+	rename:		NULL,		/* . */
+	readlink:	NULL,		/* . */
+	follow_link:	NULL,		/* . */
+	truncate:	NULL,		/* . */
+	permission:	NULL,		/* . */
+	revalidate:	NULL,		/* . */
+	setattr:	NULL,		/* . */
+	getattr:	NULL,		/* . */
+};
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
+/**
+ * inode.c - NTFS kernel inode handling. Part of the Linux-NTFS project.
+ *
+ * Copyright (c) 2001,2002 Anton Altaparmakov.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/pagemap.h>
+#include "ntfs.h"
+struct inode *ntfs_alloc_big_inode(struct super_block *sb)
+{
+	ntfs_inode *ni;
+	ntfs_debug("Entering.");
+	ni = (ntfs_inode *)kmem_cache_alloc(ntfs_big_inode_cache,
+			SLAB_NOFS);
+	if (!ni) {
+		ntfs_error(sb, "Allocation of NTFS big inode structure "
+				"failed.");
+		return NULL;
+	}
+	return VFS_I(ni);
+}
+void ntfs_destroy_big_inode(struct inode *inode)
+{
+	ntfs_inode *ni = NTFS_I(inode);
+	ntfs_debug("Entering.");
+	BUG_ON(atomic_read(&ni->mft_count) || !atomic_dec_and_test(&ni->count));
+	kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
+}
+ntfs_inode *ntfs_alloc_inode(void)
+{
+	ntfs_inode *ni = (ntfs_inode *)kmem_cache_alloc(ntfs_inode_cache,
+			SLAB_NOFS);
+	ntfs_debug("Entering.");
+	if (unlikely(!ni))
+		ntfs_error(NULL, "Allocation of NTFS inode structure failed.");
+	return ni;
+}
+void ntfs_destroy_inode(ntfs_inode *ni)
+{
+	ntfs_debug("Entering.");
+	BUG_ON(atomic_read(&ni->mft_count) || !atomic_dec_and_test(&ni->count));
+	kmem_cache_free(ntfs_inode_cache, ni);
+}
+/**
+ * __ntfs_init_inode - initialize ntfs specific part of an inode
+ *
+ * Initialize an ntfs inode to defaults.
+ *
+ * Return zero on success and -ENOMEM on error.
+ */
+static void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
+{
+	ntfs_debug("Entering.");
+	memset(ni, 0, sizeof(ntfs_inode));
+	atomic_set(&ni->count, 1);
+	ni->vol = NULL;
+	INIT_RUN_LIST(&ni->run_list);
+	init_rwsem(&ni->mrec_lock);
+	atomic_set(&ni->mft_count, 0);
+	ni->page = NULL;
+	ni->attr_list = NULL;
+	INIT_RUN_LIST(&ni->attr_list_rl);
+	INIT_RUN_LIST(&ni->_IDM(bmp_rl));
+	init_MUTEX(&ni->extent_lock);
+	ni->_INE(base_ntfs_ino) = NULL;
+	ni->vol = NTFS_SB(sb);
+	return;
+}
+static void ntfs_init_big_inode(struct inode *vi)
+{
+	ntfs_inode *ni = NTFS_I(vi);
+	ntfs_debug("Entering.");
+	__ntfs_init_inode(vi->i_sb, ni);
+	ni->mft_no = vi->i_ino;
+	return;
+}
+ntfs_inode *ntfs_new_inode(struct super_block *sb)
+{
+	ntfs_inode *ni = ntfs_alloc_inode();
+	ntfs_debug("Entering.");
+	if (ni)
+		__ntfs_init_inode(sb, ni);
+	return ni;
+}
+/**
+ * ntfs_is_extended_system_file - check if a file is in the $Extend directory
+ * @ctx:	initialized attribute search context
+ *
+ * Search all file name attributes in the inode described by the attribute
+ * search context @ctx and check if any of the names are in the $Extend system
+ * directory.
+ * 
+ * Return values:
+ *	   1: file is in $Extend directory
+ *	   0: file is not in $Extend directory
+ *	-EIO: file is corrupt
+ */
+static int ntfs_is_extended_system_file(attr_search_context *ctx)
+{
+	int nr_links;
+	/* Restart search. */
+	reinit_attr_search_ctx(ctx);
+	/* Get number of hard links. */
+	nr_links = le16_to_cpu(ctx->mrec->link_count);
+	/* Loop through all hard links. */
+	while (lookup_attr(AT_FILE_NAME, NULL, 0, 0, 0, NULL, 0, ctx)) {
+		FILE_NAME_ATTR *file_name_attr;
+		ATTR_RECORD *attr = ctx->attr;
+		u8 *p, *p2;
+		nr_links--;
+		/*
+		 * Maximum sanity checking as we are called on an inode that
+		 * we suspect might be corrupt.
+		 */
+		p = (u8*)attr + le32_to_cpu(attr->length);
+		if (p < (u8*)ctx->mrec || (u8*)p > (u8*)ctx->mrec +
+				le32_to_cpu(ctx->mrec->bytes_in_use)) {
+err_corrupt_attr:
+			ntfs_error(ctx->ntfs_ino->vol->sb, "Corrupt file name "
+					"attribute. You should run chkdsk.");
+			return -EIO;
+		}
+		if (attr->non_resident) {
+			ntfs_error(ctx->ntfs_ino->vol->sb, "Non-resident file "
+					"name. You should run chkdsk.");
+			return -EIO;
+		}
+		if (attr->flags) {
+			ntfs_error(ctx->ntfs_ino->vol->sb, "File name with "
+					"invalid flags. You should run "
+					"chkdsk.");
+			return -EIO;
+		}
+		if (!(attr->_ARA(resident_flags) & RESIDENT_ATTR_IS_INDEXED)) {
+			ntfs_error(ctx->ntfs_ino->vol->sb, "Unindexed file "
+					"name. You should run chkdsk.");
+			return -EIO;
+		}
+		file_name_attr = (FILE_NAME_ATTR*)((u8*)attr +
+				le16_to_cpu(attr->_ARA(value_offset)));
+		p2 = (u8*)attr + le32_to_cpu(attr->_ARA(value_length));
+		if (p2 < (u8*)attr || p2 > p)
+			goto err_corrupt_attr;
+		/* This attribute is ok, but is it in the $Extend directory? */
+		if (MREF_LE(file_name_attr->parent_directory) == FILE_Extend)
+			return 1;	/* YES, it's an extended system file. */
+	}
+	if (nr_links) {
+		ntfs_error(ctx->ntfs_ino->vol->sb, "Inode hard link count "
+				"doesn't match number of name attributes. You "
+				"should run chkdsk.");
+		return -EIO;
+	}
+	return 0;	/* NO, it is not an extended system file. */
+}
+/**
+ * ntfs_read_inode - read an inode from its device
+ * @vi:		inode to read
+ *
+ * ntfs_read_inode() is called from the VFS iget() function to read the inode
+ * described by @vi into memory from the device.
+ *
+ * The only fields in @vi that we need to/can look at when the function is
+ * called are i_sb, pointing to the mounted device's super block, and i_ino,
+ * the number of the inode to load.
+ *
+ * ntfs_read_inode() maps, pins and locks the mft record number i_ino for
+ * reading and sets up the necessary @vi fields as well as initializing
+ * the ntfs inode.
+ *
+ * Q: What locks are held when the function is called?
+ * A: i_state has I_LOCK set, hence the inode is locked, also
+ *    i_count is set to 1, so it is not going to go away
+ *    i_flags is set to 0 and we have no business touching it. Only an ioctl()
+ *    is allowed to write to them. We should of course be honouring them but
+ *    we need to do that using the IS_* macros defined in include/linux/fs.h.
+ *    In any case ntfs_read_inode() has nothing to do with i_flags at all.
+ */
+void ntfs_read_inode(struct inode *vi)
+{
+	ntfs_volume *vol = NTFS_SB(vi->i_sb);
+	ntfs_inode *ni;
+	MFT_RECORD *m;
+	STANDARD_INFORMATION *si;
+	attr_search_context *ctx;
+	int err;
+	ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino);
+	/* Setup the generic vfs inode parts now. */
+	/* This is the optimal IO size (for stat), not the fs block size. */
+	vi->i_blksize = PAGE_CACHE_SIZE;
+	/*
+	 * This is for checking whether an inode has changed w.r.t. a file so
+	 * that the file can be updated if necessary (compare with f_version).
+	 */
+	vi->i_version = ++event;
+	/* Set uid and gid from the mount options. */
+	vi->i_uid = vol->uid;
+	vi->i_gid = vol->gid;
+	/* Set to zero so we can use logical operations on it from here on. */
+	vi->i_mode = 0;
+	/*
+	 * Initialize the ntfs specific part of @vi special casing
+	 * FILE_MFT which we need to do at mount time.
+	 */
+	if (vi->i_ino != FILE_MFT)
+		ntfs_init_big_inode(vi);
+	ni = NTFS_I(vi);
+	/* Map, pin and lock the mft record for reading. */
+	m = map_mft_record(READ, ni);
+	if (IS_ERR(m)) {
+		err = PTR_ERR(m);
+		goto err_out;
+	}
+	/* Is the record in use? */
+	if (!(m->flags & MFT_RECORD_IN_USE)) {
+		ntfs_error(vi->i_sb, "Inode is not in use! You should "
+				"run chkdsk.");
+		goto unm_err_out;
+	}
+	/* Is this an extent mft record / inode? Treat same as if not in use. */
+	if (m->base_mft_record) {
+		ntfs_error(vi->i_sb, "Inode is an extent inode! iget() "
+				"not possible. You should run chkdsk.");
+		goto unm_err_out;
+	}
+	/* Transfer information from mft record into vfs and ntfs inodes. */
+	/* Cache the sequence number in the ntfs inode. */
+	ni->seq_no = le16_to_cpu(m->sequence_number);
+	/*
+	 * FIXME: Keep in mind that link_count is two for files which have both
+	 * a long file name and a short file name as separate entries, so if
+	 * we are hiding short file names this will be too high. Either we need
+	 * to account for the short file names by subtracting them or we need
+	 * to make sure we delete files even though i_nlink is not zero which
+	 * might be tricky due to vfs interactions. Need to think about this
+	 * some more when implementing the unlink command.
+	 */
+	vi->i_nlink = le16_to_cpu(m->link_count);
+	/*
+	 * FIXME: Reparse points can have the directory bit set even though
+	 * they would be S_IFLNK. Need to deal with this further below when we
+	 * implement reparse points / symbolic links but it will do for now.
+	 * Also if not a directory, it could be something else, rather than
+	 * a regular file. But again, will do for now.
+	 */
+	if (m->flags & MFT_RECORD_IS_DIRECTORY) {
+		vi->i_mode |= S_IFDIR;
+		/*
+		 * Linux/Unix do not support directory hard links and things
+		 * break without this kludge.
+		 */
+		if (vi->i_nlink > 1)
+			vi->i_nlink = 1;
+	} else
+		vi->i_mode |= S_IFREG;
+	err = get_attr_search_ctx(&ctx, ni, m);
+	if (err)
+		goto unm_err_out;
+	/*
+	 * Find the standard information attribute in the mft record. At this
+	 * stage we haven't setup the attribute list stuff yet, so this could
+	 * in fact fail if the standard information is in an extent record, but
+	 * I don't think this actually ever happens.
+	 */
+	if (!lookup_attr(AT_STANDARD_INFORMATION, NULL, 0, 0, 0, NULL, 0,
+			ctx)) {
+		/*
+		 * TODO: We should be performing a hot fix here (if the recover
+		 * mount option is set) by creating a new attribute.
+		 */
+		ntfs_error(vi->i_sb, "$STANDARD_INFORMATION attribute is "
+				"missing.");
+		goto put_unm_err_out;
+	}
+	/* Get the standard information attribute value. */
+	si = (STANDARD_INFORMATION*)((char*)ctx->attr +
+			le16_to_cpu(ctx->attr->_ARA(value_offset)));
+	/* Transfer information from the standard information into vfs_ino. */
+	/*
+	 * Note: The i_?times do not quite map perfectly onto the NTFS times,
+	 * but they are close enough, and in the end it doesn't really matter
+	 * that much...
+	 */
+	/*
+	 * mtime is the last change of the data within the file. Not changed
+	 * when only metadata is changed, e.g. a rename doesn't affect mtime.
+	 */
+	vi->i_mtime = ntfs2utc(si->last_data_change_time);
+	/*
+	 * ctime is the last change of the metadata of the file. This obviously
+	 * always changes, when mtime is changed. ctime can be changed on its
+	 * own, mtime is then not changed, e.g. when a file is renamed.
+	 */
+	vi->i_ctime = ntfs2utc(si->last_mft_change_time);
+	/*
+	 * Last access to the data within the file. Not changed during a rename
+	 * for example but changed whenever the file is written to.
+	 */
+	vi->i_atime = ntfs2utc(si->last_access_time);
+	/*
+	 * Find the attribute list attribute and set the corresponding bit in
+	 * ntfs_ino->state.
+	 */
+	reinit_attr_search_ctx(ctx);
+	if (lookup_attr(AT_ATTRIBUTE_LIST, NULL, 0, 0, 0, NULL, 0, ctx)) {
+		if (vi->i_ino == FILE_MFT)
+			goto skip_attr_list_load;
+		ntfs_debug("Attribute list found in inode %li (0x%lx).",
+				vi->i_ino, vi->i_ino);
+		ni->state |= 1 << NI_AttrList;
+		if (ctx->attr->flags & ATTR_IS_ENCRYPTED ||
+				ctx->attr->flags & ATTR_COMPRESSION_MASK) {
+			ntfs_error(vi->i_sb, "Attribute list attribute is "
+					"compressed/encrypted. Not allowed. "
+					"Corrupt inode. You should run "
+					"chkdsk.");
+			goto put_unm_err_out;
+		}
+		/* Now allocate memory for the attribute list. */
+		ni->attr_list_size = (u32)attribute_value_length(ctx->attr);
+		ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size);
+		if (!ni->attr_list) {
+			ntfs_error(vi->i_sb, "Not enough memory to allocate "
+					"buffer for attribute list.");
+			err = -ENOMEM;
+			goto ec_put_unm_err_out;
+		}
+		if (ctx->attr->non_resident) {
+			ni->state |= 1 << NI_AttrListNonResident;
+			if (ctx->attr->_ANR(lowest_vcn)) {
+				ntfs_error(vi->i_sb, "Attribute list has non "
+						"zero lowest_vcn. Inode is "
+						"corrupt. You should run "
+						"chkdsk.");
+				goto put_unm_err_out;
+			}
+			/*
+			 * Setup the run list. No need for locking as we have
+			 * exclusive access to the inode at this time.
+			 */
+			ni->attr_list_rl.rl = decompress_mapping_pairs(vol,
+					ctx->attr, NULL);
+			if (IS_ERR(ni->attr_list_rl.rl)) {
+				err = PTR_ERR(ni->attr_list_rl.rl);
+				ni->attr_list_rl.rl = NULL;
+				ntfs_error(vi->i_sb, "Mapping pairs "
+						"decompression failed with "
+						"error code %i. Corrupt "
+						"attribute list in inode.",
+						-err);
+				goto ec_put_unm_err_out;
+			}
+			/*
+			 * Now load the attribute list. Again no need for
+			 * locking as above.
+			 */
+			if ((err = load_attribute_list(vol, ni->attr_list_rl.rl,
+					ni->attr_list, ni->attr_list_size))) {
+				ntfs_error(vi->i_sb, "Failed to load "
+						"attribute list attribute.");
+				goto ec_put_unm_err_out;
+			}
+		} else /* if (!ctx.attr->non_resident) */ {
+			if ((u8*)ctx->attr + le16_to_cpu(
+					ctx->attr->_ARA(value_offset)) +
+					le32_to_cpu(
+					ctx->attr->_ARA(value_length)) >
+					(u8*)ctx->mrec + vol->mft_record_size) {
+				ntfs_error(vi->i_sb, "Corrupt attribute list "
+						"in inode.");
+				goto put_unm_err_out;
+			}
+			/* Now copy the attribute list. */
+			memcpy(ni->attr_list, (u8*)ctx->attr + le16_to_cpu(
+					ctx->attr->_ARA(value_offset)),
+					le32_to_cpu(
+					ctx->attr->_ARA(value_length)));
+		}
+	}
+skip_attr_list_load:
+	/*
+	 * If an attribute list is present we now have the attribute list value
+	 * in ntfs_ino->attr_list and it is ntfs_ino->attr_list_size bytes.
+	 */
+	if (S_ISDIR(vi->i_mode)) {
+		INDEX_ROOT *ir;
+		char *ir_end, *index_end;
+		/* It is a directory, find index root attribute. */
+		reinit_attr_search_ctx(ctx);
+		if (!lookup_attr(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0,
+				NULL, 0, ctx)) {
+			// FIXME: File is corrupt! Hot-fix with empty index
+			// root attribute if recovery option is set.
+			ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is "
+					"missing.");
+			goto put_unm_err_out;
+		}
+		/* Set up the state. */
+		if (ctx->attr->non_resident) {
+			ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is "
+					"not resident. Not allowed.");
+			goto put_unm_err_out;
+		}
+		/*
+		 * Compressed/encrypted index root just means that the newly
+		 * created files in that directory should be created compressed/
+		 * encrypted. However index root cannot be both compressed and
+		 * encrypted.
+		 */
+		if (ctx->attr->flags & ATTR_COMPRESSION_MASK)
+			ni->state |= 1 << NI_Compressed;
+		if (ctx->attr->flags & ATTR_IS_ENCRYPTED) {
+			if (ctx->attr->flags & ATTR_COMPRESSION_MASK) {
+				ntfs_error(vi->i_sb, "Found encrypted and "
+						"compressed attribute. Not "
+						"allowed.");
+				goto put_unm_err_out;
+			}
+			ni->state |= 1 << NI_Encrypted;
+		}
+		ir = (INDEX_ROOT*)((char*)ctx->attr +
+				le16_to_cpu(ctx->attr->_ARA(value_offset)));
+		ir_end = (char*)ir + le32_to_cpu(ctx->attr->_ARA(value_length));
+		if (ir_end > (char*)ctx->mrec + vol->mft_record_size) {
+			ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is "
+					"corrupt.");
+			goto put_unm_err_out;
+		}
+		index_end = (char*)&ir->index +
+				le32_to_cpu(ir->index.index_length);
+		if (index_end > ir_end) {
+			ntfs_error(vi->i_sb, "Directory index is corrupt.");
+			goto put_unm_err_out;
+		}
+		if (ir->type != AT_FILE_NAME) {
+			ntfs_error(vi->i_sb, __FUNCTION__ "(): Indexed "
+					"attribute is not $FILE_NAME. Not "
+					"allowed.");
+			goto put_unm_err_out;
+		}
+		if (ir->collation_rule != COLLATION_FILE_NAME) {
+			ntfs_error(vi->i_sb, "Index collation rule is not "
+					"COLLATION_FILE_NAME. Not allowed.");
+			goto put_unm_err_out;
+		}
+		ni->_IDM(index_block_size) = le32_to_cpu(ir->index_block_size);
+		if (ni->_IDM(index_block_size) &
+				(ni->_IDM(index_block_size) - 1)) {
+			ntfs_error(vi->i_sb, "Index block size (%u) is not a "
+					"power of two.",
+					ni->_IDM(index_block_size));
+			goto put_unm_err_out;
+		}
+		if (ni->_IDM(index_block_size) > PAGE_CACHE_SIZE) {
+			ntfs_error(vi->i_sb, "Index block size (%u) > "
+					"PAGE_CACHE_SIZE (%ld) is not "
+					"supported. Sorry.",
+					ni->_IDM(index_block_size),
+					PAGE_CACHE_SIZE);
+			err = -EOPNOTSUPP;
+			goto ec_put_unm_err_out;
+		}
+		if (ni->_IDM(index_block_size) < NTFS_BLOCK_SIZE) {
+			ntfs_error(vi->i_sb, "Index block size (%u) < "
+					"NTFS_BLOCK_SIZE (%i) is not "
+					"supported. Sorry.",
+					ni->_IDM(index_block_size),
+					NTFS_BLOCK_SIZE);
+			err = -EOPNOTSUPP;
+			goto ec_put_unm_err_out;
+		}
+		ni->_IDM(index_block_size_bits) =
+				ffs(ni->_IDM(index_block_size)) - 1;
+		/* Determine the size of a vcn in the directory index. */
+		if (vol->cluster_size <= ni->_IDM(index_block_size)) {
+			ni->_IDM(index_vcn_size) = vol->cluster_size;
+			ni->_IDM(index_vcn_size_bits) = vol->cluster_size_bits;
+		} else {
+			ni->_IDM(index_vcn_size) = vol->sector_size;
+			ni->_IDM(index_vcn_size_bits) = vol->sector_size_bits;
+		}
+		if (!(ir->index.flags & LARGE_INDEX)) {
+			/* No index allocation. */
+			vi->i_size = ni->initialized_size = 0;
+			goto skip_large_dir_stuff;
+		} /* LARGE_INDEX: Index allocation present. Setup state. */
+		ni->state |= 1 << NI_NonResident;
+		/* Find index allocation attribute. */
+		reinit_attr_search_ctx(ctx);
+		if (!lookup_attr(AT_INDEX_ALLOCATION, I30, 4, CASE_SENSITIVE,
+				0, NULL, 0, ctx)) {
+			ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
+					"is not present but $INDEX_ROOT "
+					"indicated it is.");
+			goto put_unm_err_out;
+		}
+		if (!ctx->attr->non_resident) {
+			ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
+					"is resident.");
+			goto put_unm_err_out;
+		}
+		if (ctx->attr->flags & ATTR_IS_ENCRYPTED) {
+			ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
+					"is encrypted.");
+			goto put_unm_err_out;
+		}
+		if (ctx->attr->flags & ATTR_COMPRESSION_MASK) {
+			ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
+					"is compressed.");
+			goto put_unm_err_out;
+		}
+		if (ctx->attr->_ANR(lowest_vcn)) {
+			ntfs_error(vi->i_sb, "First extent of "
+					"$INDEX_ALLOCATION attribute has non "
+					"zero lowest_vcn. Inode is corrupt. "
+					"You should run chkdsk.");
+			goto put_unm_err_out;
+		}
+		vi->i_size = sle64_to_cpu(ctx->attr->_ANR(data_size));
+		ni->initialized_size = sle64_to_cpu(
+				ctx->attr->_ANR(initialized_size));
+		ni->allocated_size = sle64_to_cpu(
+				ctx->attr->_ANR(allocated_size));
+		/*
+		 * Setup the run list. No need for locking as we have exclusive
+		 * access to the inode at this time.
+		 */
+		ni->run_list.rl = decompress_mapping_pairs(vol, ctx->attr,
+				NULL);
+		if (IS_ERR(ni->run_list.rl)) {
+			err = PTR_ERR(ni->run_list.rl);
+			ni->run_list.rl = NULL;
+			ntfs_error(vi->i_sb, "Mapping pairs decompression "
+					"failed with error code %i.", -err);
+			goto ec_put_unm_err_out;
+		}
+		/* Find bitmap attribute. */
+		reinit_attr_search_ctx(ctx);
+		if (!lookup_attr(AT_BITMAP, I30, 4, CASE_SENSITIVE, 0, NULL, 0,
+				ctx)) {
+			ntfs_error(vi->i_sb, "$BITMAP attribute is not "
+					"present but it must be.");
+			goto put_unm_err_out;
+		}
+		if (ctx->attr->flags & (ATTR_COMPRESSION_MASK |
+				ATTR_IS_ENCRYPTED)) {
+			ntfs_error(vi->i_sb, "$BITMAP attribute is compressed "
+					"and/or encrypted.");
+			goto put_unm_err_out;
+		}
+		if (ctx->attr->non_resident) {
+			ni->state |= 1 << NI_BmpNonResident;
+			if (ctx->attr->_ANR(lowest_vcn)) {
+				ntfs_error(vi->i_sb, "First extent of $BITMAP "
+						"attribute has non zero "
+						"lowest_vcn. Inode is corrupt. "
+						"You should run chkdsk.");
+				goto put_unm_err_out;
+			}
+			ni->_IDM(bmp_size) = sle64_to_cpu(
+					ctx->attr->_ANR(data_size));
+			ni->_IDM(bmp_initialized_size) = sle64_to_cpu(
+					ctx->attr->_ANR(initialized_size));
+			ni->_IDM(bmp_allocated_size) = sle64_to_cpu(
+					ctx->attr->_ANR(allocated_size));
+			/*
+			 * Setup the run list. No need for locking as we have
+			 * exclusive access to the inode at this time.
+			 */
+			ni->_IDM(bmp_rl).rl = decompress_mapping_pairs(vol,
+					ctx->attr, NULL);
+			if (IS_ERR(ni->_IDM(bmp_rl).rl)) {
+				err = PTR_ERR(ni->_IDM(bmp_rl).rl);
+				ni->_IDM(bmp_rl).rl = NULL;
+				ntfs_error(vi->i_sb, "Mapping pairs "
+						"decompression failed with "
+						"error code %i.", -err);
+				goto ec_put_unm_err_out;
+			}
+		} else
+			ni->_IDM(bmp_size) = ni->_IDM(bmp_initialized_size) =
+					ni->_IDM(bmp_allocated_size) =
+					le32_to_cpu(
+					ctx->attr->_ARA(value_length));
+		/* Consistency check bitmap size vs. index allocation size. */
+		if (ni->_IDM(bmp_size) << 3 < vi->i_size >>
+				ni->_IDM(index_block_size_bits)) {
+			ntfs_error(vi->i_sb, "$I30 bitmap too small (0x%Lx) "
+					"for index allocation (0x%Lx).",
+					(long long)ni->_IDM(bmp_size) << 3,
+					vi->i_size);
+			goto put_unm_err_out;
+		}
+skip_large_dir_stuff:
+		/* Everyone gets read and scan permissions. */
+		vi->i_mode |= S_IRUGO | S_IXUGO;
+		/* If not read-only, set write permissions. */
+		if (!IS_RDONLY(vi))
+			vi->i_mode |= S_IWUGO;
+		/*
+		 * Apply the directory permissions mask set in the mount
+		 * options.
+		 */
+		vi->i_mode &= ~vol->dmask;
+		/* Setup the operations for this inode. */
+		vi->i_op = &ntfs_dir_inode_ops;
+		vi->i_fop = &ntfs_dir_ops;
+		vi->i_mapping->a_ops = &ntfs_dir_aops;
+	} else {
+		/* It is a file: find first extent of unnamed data attribute. */
+		reinit_attr_search_ctx(ctx);
+		if (!lookup_attr(AT_DATA, NULL, 0, 0, 0, NULL, 0, ctx)) {
+			vi->i_size = ni->initialized_size =
+					ni->allocated_size = 0LL;
+			/*
+			 * FILE_Secure does not have an unnamed $DATA
+			 * attribute, so we special case it here.
+			 */
+			if (vi->i_ino == FILE_Secure)
+				goto no_data_attr_special_case;
+			/*
+			 * Most if not all the system files in the $Extend
+			 * system directory do not have unnamed data
+			 * attributes so we need to check if the parent
+			 * directory of the file is FILE_Extend and if it is
+			 * ignore this error. To do this we need to get the
+			 * name of this inode from the mft record as the name
+			 * contains the back reference to the parent directory.
+			 */
+			if (ntfs_is_extended_system_file(ctx) > 0)
+				goto no_data_attr_special_case;
+			// FIXME: File is corrupt! Hot-fix with empty data
+			// attribute if recovery option is set.
+			ntfs_error(vi->i_sb, "$DATA attribute is "
+					"missing.");
+			goto put_unm_err_out;
+		}
+		/* Setup the state. */
+		if (ctx->attr->non_resident) {
+			ni->state |= 1 << NI_NonResident;
+			if (ctx->attr->flags & ATTR_COMPRESSION_MASK) {
+				ni->state |= 1 << NI_Compressed;
+				if (vol->cluster_size > 4096) {
+					ntfs_error(vi->i_sb, "Found "
+						"compressed data but "
+						"compression is disabled due "
+						"to cluster size (%i) > 4kiB.",
+						vol->cluster_size);
+					goto put_unm_err_out;
+				}
+				if ((ctx->attr->flags & ATTR_COMPRESSION_MASK)
+						!= ATTR_IS_COMPRESSED) {
+					ntfs_error(vi->i_sb, "Found "
+						"unknown compression method or "
+						"corrupt file.");
+					goto put_unm_err_out;
+				}
+				ni->_ICF(compression_block_clusters) = 1U <<
+					ctx->attr->_ANR(compression_unit);
+				if (ctx->attr->_ANR(compression_unit) != 4) {
+					ntfs_error(vi->i_sb, "Found "
+						"nonstandard compression unit "
+						"(%u instead of 4). Cannot "
+						"handle this. This might "
+						"indicate corruption so you "
+						"should run chkdsk.",
+					     ctx->attr->_ANR(compression_unit));
+					err = -EOPNOTSUPP;
+					goto ec_put_unm_err_out;
+				}
+				ni->_ICF(compression_block_size) = 1U << (
+					       ctx->attr->_ANR(compression_unit)
+						+ vol->cluster_size_bits);
+				ni->_ICF(compression_block_size_bits) = ffs(
+					ni->_ICF(compression_block_size)) - 1;
+			}
+			if (ctx->attr->flags & ATTR_IS_ENCRYPTED) {
+				if (ctx->attr->flags & ATTR_COMPRESSION_MASK) {
+					ntfs_error(vi->i_sb, "Found encrypted "
+							"and compressed data.");
+					goto put_unm_err_out;
+				}
+				ni->state |= 1 << NI_Encrypted;
+			}
+			if (ctx->attr->_ANR(lowest_vcn)) {
+				ntfs_error(vi->i_sb, "First extent of $DATA "
+						"attribute has non zero "
+						"lowest_vcn. Inode is corrupt. "
+						"You should run chkdsk.");
+				goto put_unm_err_out;
+			}
+			/* $MFT is special as we have the run_list already. */
+			if (likely(vi->i_ino != FILE_MFT)) {
+				/*
+				 * Setup the run list. No need for locking as
+				 * we have exclusive access to the inode at
+				 * this time.
+				 */
+				ni->run_list.rl = decompress_mapping_pairs(vol,
+						ctx->attr, NULL);
+				if (IS_ERR(ni->run_list.rl)) {
+					err = PTR_ERR(ni->run_list.rl);
+					ni->run_list.rl = NULL;
+					ntfs_error(vi->i_sb, "Mapping pairs "
+							"decompression failed "
+							"with error code %i.",
+							-err);
+					goto ec_put_unm_err_out;
+				}
+			}
+			/* Setup all the sizes. */
+			vi->i_size = sle64_to_cpu(ctx->attr->_ANR(data_size));
+			ni->initialized_size = sle64_to_cpu(
+					ctx->attr->_ANR(initialized_size));
+			ni->allocated_size = sle64_to_cpu(
+					ctx->attr->_ANR(allocated_size));
+			if (NInoCompressed(ni))
+				ni->_ICF(compressed_size) = sle64_to_cpu(
+					ctx->attr->_ANR(compressed_size));
+		} else { /* Resident attribute. */
+			/*
+			 * Make all sizes equal for simplicity in read code
+			 * paths. FIXME: Need to keep this in mind when
+			 * converting to non-resident attribute in write code
+			 * path. (Probably only affects truncate().)
+			 */
+			vi->i_size = ni->initialized_size = ni->allocated_size =
+				le32_to_cpu(ctx->attr->_ARA(value_length));
+		}
+no_data_attr_special_case:
+		/* Everyone gets read permissions. */
+		vi->i_mode |= S_IRUGO;
+		/* If not read-only, set write permissions. */
+		if (!IS_RDONLY(vi))
+			vi->i_mode |= S_IWUGO;
+		/* Apply the file permissions mask set in the mount options. */
+		vi->i_mode &= ~vol->fmask;
+		// FIXME: Encrypted files should probably get their rw bits
+		// taken away here.
+		/* Setup the operations for this inode. */
+		vi->i_op = &ntfs_file_inode_ops;
+		vi->i_fop = &ntfs_file_ops;
+		vi->i_mapping->a_ops = &ntfs_file_aops;
+	}
+	/*
+	 * The number of 512-byte blocks used on disk (for stat). This is in so
+	 * far inaccurate as it doesn't account for any named streams or other
+	 * special non-resident attributes, but that is how Windows works, too,
+	 * so we are at least consistent with Windows, if not entirely
+	 * consistent with the Linux Way. Doing it the Linux Way would cause a
+	 * significant slowdown as it would involve iterating over all
+	 * attributes in the mft record and adding the allocated/compressed
+	 * sizes of all non-resident attributes present to give us the Linux
+	 * correct size that should go into i_blocks (after division by 512).
+	 */
+	if (!NInoCompressed(ni))
+		vi->i_blocks = ni->allocated_size >> 9;
+	else
+		vi->i_blocks = ni->_ICF(compressed_size) >> 9;
+	/* Done. */
+	put_attr_search_ctx(ctx);
+	unmap_mft_record(READ, ni);
+	ntfs_debug("Done.");
+	return;
+ec_put_unm_err_out:
+	put_attr_search_ctx(ctx);
+	goto ec_unm_err_out;
+put_unm_err_out:
+	put_attr_search_ctx(ctx);
+unm_err_out:
+	err = -EIO;
+ec_unm_err_out:
+	unmap_mft_record(READ, ni);
+err_out:
+	ntfs_error(vi->i_sb, "Failed with error code %i. Marking inode "
+			"%li (0x%lx) as bad.", -err, vi->i_ino, vi->i_ino);
+	make_bad_inode(vi);
+	return;
+}
+/**
+ * ntfs_read_inode_mount - special read_inode for mount time use only
+ * @vi:		inode to read
+ *
+ * Read inode FILE_MFT at mount time, only called with super_block lock
+ * held from within the read_super() code path.
+ *
+ * This function exists because when it is called the page cache for $MFT/$DATA
+ * is not initialized and hence we cannot get at the contents of mft records
+ * by calling map_mft_record*().
+ *
+ * Further it needs to cope with the circular references problem, i.e. can't
+ * load any attributes other than $ATTRIBUTE_LIST until $DATA is loaded, because
+ * we don't know where the other extent mft records are yet and again, because
+ * we cannot call map_mft_record*() yet. Obviously this applies only when an
+ * attribute list is actually present in $MFT inode.
+ *
+ * We solve these problems by starting with the $DATA attribute before anything
+ * else and iterating using lookup_attr($DATA) over all extents. As each extent
+ * is found, we decompress_mapping_pairs() including the implied
+ * merge_run_lists(). Each step of the iteration necessarily provides
+ * sufficient information for the next step to complete.
+ *
+ * This should work but there are two possible pit falls (see inline comments
+ * below), but only time will tell if they are real pits or just smoke...
+ */
+void ntfs_read_inode_mount(struct inode *vi)
+{
+	VCN next_vcn, last_vcn, highest_vcn;
+	s64 block;
+	struct super_block *sb = vi->i_sb;
+	ntfs_volume *vol = NTFS_SB(sb);
+	struct buffer_head *bh;
+	ntfs_inode *ni;
+	MFT_RECORD *m = NULL;
+	ATTR_RECORD *attr;
+	attr_search_context *ctx;
+	unsigned int i, nr_blocks;
+	int err;
+	ntfs_debug("Entering.");
+	/* Initialize the ntfs specific part of @vi. */
+	ntfs_init_big_inode(vi);
+	ni = NTFS_I(vi);
+	if (vi->i_ino != FILE_MFT) {
+		ntfs_error(sb, "Called for inode %ld but only inode %d "
+				"allowed.", vi->i_ino, FILE_MFT);
+		goto err_out;
+	}
+	/*
+	 * This sets up our little cheat allowing us to reuse the async io
+	 * completion handler for directories.
+	 */
+	ni->_IDM(index_block_size) = vol->mft_record_size;
+	ni->_IDM(index_block_size_bits) = vol->mft_record_size_bits;
+	/* Very important! Needed to be able to call map_mft_record*(). */
+	vol->mft_ino = vi;
+	/* Allocate enough memory to read the first mft record. */
+	if (vol->mft_record_size > 64 * 1024) {
+		ntfs_error(sb, "Unsupported mft record size %i (max 64kiB).",
+				vol->mft_record_size);
+		goto err_out;
+	}
+	i = vol->mft_record_size;
+	if (i < sb->s_blocksize)
+		i = sb->s_blocksize;
+	m = (MFT_RECORD*)ntfs_malloc_nofs(i);
+	if (!m) {
+		ntfs_error(sb, "Failed to allocate buffer for $MFT record 0.");
+		goto err_out;
+	}
+	/* Determine the first block of the $MFT/$DATA attribute. */
+	block = vol->mft_lcn << vol->cluster_size_bits >>
+			sb->s_blocksize_bits;
+	nr_blocks = vol->mft_record_size >> sb->s_blocksize_bits;
+	if (!nr_blocks)
+		nr_blocks = 1;
+	/* Load $MFT/$DATA's first mft record. */
+	for (i = 0; i < nr_blocks; i++) {
+		bh = sb_bread(sb, block++);
+		if (!bh) {
+			ntfs_error(sb, "Device read failed.");
+			goto err_out;
+		}
+		memcpy((char*)m + (i << sb->s_blocksize_bits), bh->b_data,
+				sb->s_blocksize);
+		brelse(bh);
+	}
+	/* Apply the mst fixups. */
+	if (post_read_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size)) {
+		/* FIXME: Try to use the $MFTMirr now. */
+		ntfs_error(sb, "MST fixup failed. $MFT is corrupt.");
+		goto err_out;
+	}
+	/* Need this to sanity check attribute list references to $MFT. */
+	ni->seq_no = le16_to_cpu(m->sequence_number);
+	/* Provides readpage() and sync_page() for map_mft_record(READ). */
+	vi->i_mapping->a_ops = &ntfs_mft_aops;
+	err = get_attr_search_ctx(&ctx, ni, m);
+	if (err)
+		goto err_out;
+	/* Find the attribute list attribute if present. */
+	if (lookup_attr(AT_ATTRIBUTE_LIST, NULL, 0, 0, 0, NULL, 0, ctx)) {
+		ATTR_LIST_ENTRY *al_entry, *next_al_entry;
+		u8 *al_end;
+		ntfs_debug("Attribute list attribute found in $MFT.");
+		ni->state |= 1 << NI_AttrList;
+		if (ctx->attr->flags & ATTR_IS_ENCRYPTED ||
+				ctx->attr->flags & ATTR_COMPRESSION_MASK) {
+			ntfs_error(sb, "Attribute list attribute is "
+					"compressed/encrypted. Not allowed. "
+					"$MFT is corrupt. You should run "
+					"chkdsk.");
+			goto put_err_out;
+		}
+		/* Now allocate memory for the attribute list. */
+		ni->attr_list_size = (u32)attribute_value_length(ctx->attr);
+		ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size);
+		if (!ni->attr_list) {
+			ntfs_error(sb, "Not enough memory to allocate buffer "
+					"for attribute list.");
+			goto put_err_out;
+		}
+		if (ctx->attr->non_resident) {
+			ni->state |= 1 << NI_AttrListNonResident;
+			if (ctx->attr->_ANR(lowest_vcn)) {
+				ntfs_error(sb, "Attribute list has non zero "
+						"lowest_vcn. $MFT is corrupt. "
+						"You should run chkdsk.");
+				goto put_err_out;
+			}
+			/* Setup the run list. */
+			ni->attr_list_rl.rl = decompress_mapping_pairs(vol,
+					ctx->attr, NULL);
+			if (IS_ERR(ni->attr_list_rl.rl)) {
+				err = PTR_ERR(ni->attr_list_rl.rl);
+				ni->attr_list_rl.rl = NULL;
+				ntfs_error(sb, "Mapping pairs decompression "
+						"failed with error code %i.",
+						-err);
+				goto put_err_out;
+			}
+			/* Now load the attribute list. */
+			if ((err = load_attribute_list(vol, ni->attr_list_rl.rl,
+					ni->attr_list, ni->attr_list_size))) {
+				ntfs_error(sb, "Failed to load attribute list "
+						"attribute with error code %i.",
+						-err);
+				goto put_err_out;
+			}
+		} else /* if (!ctx.attr->non_resident) */ {
+			if ((u8*)ctx->attr + le16_to_cpu(
+					ctx->attr->_ARA(value_offset)) +
+					le32_to_cpu(
+					ctx->attr->_ARA(value_length)) >
+					(u8*)ctx->mrec + vol->mft_record_size) {
+				ntfs_error(sb, "Corrupt attribute list "
+						"attribute.");
+				goto put_err_out;
+			}
+			/* Now copy the attribute list. */
+			memcpy(ni->attr_list, (u8*)ctx->attr + le16_to_cpu(
+					ctx->attr->_ARA(value_offset)),
+					le32_to_cpu(
+					ctx->attr->_ARA(value_length)));
+		}
+		/* The attribute list is now setup in memory. */
+		/*
+		 * FIXME: I don't know if this case is actually possible.
+		 * According to logic it is not possible but I have seen too
+		 * many weird things in MS software to rely on logic... Thus we
+		 * perform a manual search and make sure the first $MFT/$DATA
+		 * extent is in the base inode. If it is not we abort with an
+		 * error and if we ever see a report of this error we will need
+		 * to do some magic in order to have the necessary mft record
+		 * loaded and in the right place in the page cache. But
+		 * hopefully logic will prevail and this never happens...
+		 */
+		al_entry = (ATTR_LIST_ENTRY*)ni->attr_list;
+		al_end = (u8*)al_entry + ni->attr_list_size;
+		for (;; al_entry = next_al_entry) {
+			/* Out of bounds check. */
+			if ((u8*)al_entry < ni->attr_list ||
+					(u8*)al_entry > al_end)
+				goto em_put_err_out;
+			/* Catch the end of the attribute list. */
+			if ((u8*)al_entry == al_end)
+				goto em_put_err_out;
+			if (!al_entry->length)
+				goto em_put_err_out;
+			if ((u8*)al_entry + 6 > al_end || (u8*)al_entry +
+					le16_to_cpu(al_entry->length) > al_end)
+				goto em_put_err_out;
+			next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry +
+					le16_to_cpu(al_entry->length));
+			if (le32_to_cpu(al_entry->type) >
+					const_le32_to_cpu(AT_DATA))
+				goto em_put_err_out;
+			if (AT_DATA != al_entry->type)
+				continue;
+			/* We want an unnamed attribute. */
+			if (al_entry->name_length)
+				goto em_put_err_out;
+			/* Want the first entry, i.e. lowest_vcn == 0. */
+			if (al_entry->lowest_vcn)
+				goto em_put_err_out;
+			/* First entry has to be in the base mft record. */
+			if (MREF_LE(al_entry->mft_reference) != ni->mft_no) {
+				/* MFT references do not match, logic fails. */
+				ntfs_error(sb, "BUG: The first $DATA extent "
+						"of $MFT is not in the base "
+						"mft record. Please report "
+						"you saw this message to "
+						"linux-ntfs-dev@lists.sf.net");
+				goto put_err_out;
+			} else {
+				/* Sequence numbers must match. */
+				if (MSEQNO_LE(al_entry->mft_reference) !=
+						ni->seq_no)
+					goto em_put_err_out;
+				/* Got it. All is ok. We can stop now. */
+				break;
+			}
+		}
+	}
+	reinit_attr_search_ctx(ctx);
+	/* Now load all attribute extents. */
+	attr = NULL;
+	next_vcn = last_vcn = highest_vcn = 0;
+	while (lookup_attr(AT_DATA, NULL, 0, 0, next_vcn, NULL, 0, ctx)) {
+		run_list_element *nrl;
+		/* Cache the current attribute. */
+		attr = ctx->attr;
+		/* $MFT must be non-resident. */
+		if (!attr->non_resident) {
+			ntfs_error(sb, "$MFT must be non-resident but a "
+					"resident extent was found. $MFT is "
+					"corrupt. Run chkdsk.");
+			goto put_err_out;
+		}
+		/* $MFT must be uncompressed and unencrypted. */
+		if (attr->flags & ATTR_COMPRESSION_MASK ||
+				attr->flags & ATTR_IS_ENCRYPTED) {
+			ntfs_error(sb, "$MFT must be uncompressed and "
+					"unencrypted but a compressed/"
+					"encrypted extent was found. "
+					"$MFT is corrupt. Run chkdsk.");
+			goto put_err_out;
+		}
+		/*
+		 * Decompress the mapping pairs array of this extent and merge
+		 * the result into the existing run list. No need for locking
+		 * as we have exclusive access to the inode at this time and we
+		 * are a mount in progress task, too.
+		 */
+		nrl = decompress_mapping_pairs(vol, attr, ni->run_list.rl);
+		if (IS_ERR(nrl)) {
+			ntfs_error(sb, "decompress_mapping_pairs() failed with "
+					"error code %ld. $MFT is corrupt.",
+					PTR_ERR(nrl));
+			goto put_err_out;
+		}
+		ni->run_list.rl = nrl;
+		/* Are we in the first extent? */
+		if (!next_vcn) {
+			if (attr->_ANR(lowest_vcn)) {
+				ntfs_error(sb, "First extent of $DATA "
+						"attribute has non zero "
+						"lowest_vcn. $MFT is corrupt. "
+						"You should run chkdsk.");
+				goto put_err_out;
+			}
+			/* Get the last vcn in the $DATA attribute. */
+			last_vcn = sle64_to_cpu(attr->_ANR(allocated_size)) >>
+					vol->cluster_size_bits;
+			/* Fill in the inode size. */
+			vi->i_size = sle64_to_cpu(attr->_ANR(data_size));
+			ni->initialized_size = sle64_to_cpu(
+					attr->_ANR(initialized_size));
+			ni->allocated_size = sle64_to_cpu(
+					attr->_ANR(allocated_size));
+			/* Set the number of mft records. */
+			vol->_VMM(nr_mft_records) = vi->i_size >>
+					vol->mft_record_size_bits;
+			/*
+			 * We have got the first extent of the run_list for
+			 * $MFT which means it is now relatively safe to call
+			 * the normal ntfs_read_inode() function. Thus, take
+			 * us out of the calling chain. Also we need to do this
+			 * now because we need ntfs_read_inode() in place to
+			 * get at subsequent extents.
+			 */
+			sb->s_op = &ntfs_sops;
+			/*
+			 * Complete reading the inode, this will actually
+			 * re-read the mft record for $MFT, this time entering
+			 * it into the page cache with which we complete the
+			 * kick start of the volume. It should be safe to do
+			 * this now as the first extent of $MFT/$DATA is
+			 * already known and we would hope that we don't need
+			 * further extents in order to find the other
+			 * attributes belonging to $MFT. Only time will tell if
+			 * this is really the case. If not we will have to play
+			 * magic at this point, possibly duplicating a lot of
+			 * ntfs_read_inode() at this point. We will need to
+			 * ensure we do enough of its work to be able to call
+			 * ntfs_read_inode() on extents of $MFT/$DATA. But lets
+			 * hope this never happens...
+			 */
+			ntfs_read_inode(vi);
+			if (is_bad_inode(vi)) {
+				ntfs_error(sb, "ntfs_read_inode() of $MFT "
+						"failed. BUG or corrupt $MFT. "
+						"Run chkdsk and if no errors "
+						"are found, please report you "
+						"saw this message to "
+						"linux-ntfs-dev@lists.sf.net");
+				put_attr_search_ctx(ctx);
+				/* Revert to the safe super operations. */
+				sb->s_op = &ntfs_mount_sops;
+				goto out_now;
+			}
+			/*
+			 * Re-initialize some specifics about $MFT's inode as
+			 * ntfs_read_inode() will have set up the default ones.
+			 */
+			/* Set uid and gid to root. */
+			vi->i_uid = vi->i_gid = 0;
+			/* Regular file. No access for anyone. */
+			vi->i_mode = S_IFREG;
+			/* No VFS initiated operations allowed for $MFT. */
+			vi->i_op = &ntfs_empty_inode_ops;
+			vi->i_fop = &ntfs_empty_file_ops;
+			/* Put back our special address space operations. */
+			vi->i_mapping->a_ops = &ntfs_mft_aops;
+		}
+		/* Get the lowest vcn for the next extent. */
+		highest_vcn = sle64_to_cpu(attr->_ANR(highest_vcn));
+		next_vcn = highest_vcn + 1;
+		/* Only one extent or error, which we catch below. */
+		if (next_vcn <= 0)
+			break;
+		/* Avoid endless loops due to corruption. */
+		if (next_vcn < sle64_to_cpu(attr->_ANR(lowest_vcn))) {
+			ntfs_error(sb, "$MFT has corrupt attribute list "
+					"attribute. Run chkdsk.");
+			goto put_err_out;
+		}
+	}
+	if (!attr) {
+		ntfs_error(sb, "$MFT/$DATA attribute not found. $MFT is "
+				"corrupt. Run chkdsk.");
+		goto put_err_out;
+	}
+	if (highest_vcn && highest_vcn != last_vcn - 1) {
+		ntfs_error(sb, "Failed to load the complete run list "
+				"for $MFT/$DATA. Driver bug or "
+				"corrupt $MFT. Run chkdsk.");
+		ntfs_debug("highest_vcn = 0x%Lx, last_vcn - 1 = 0x%Lx",
+				(long long)highest_vcn, (long long)last_vcn - 1);
+		goto put_err_out;
+	}
+	put_attr_search_ctx(ctx);
+	ntfs_debug("Done.");
+out_now:
+	ntfs_free(m);
+	return;
+em_put_err_out:
+	ntfs_error(sb, "Couldn't find first extent of $DATA attribute in "
+			"attribute list. $MFT is corrupt. Run chkdsk.");
+put_err_out:
+	put_attr_search_ctx(ctx);
+err_out:
+	/* Make sure we revert to the safe super operations. */
+	sb->s_op = &ntfs_mount_sops;
+	ntfs_error(sb, "Failed. Marking inode as bad.");
+	make_bad_inode(vi);
+	goto out_now;
+}
+/**
+ * ntfs_dirty_inode - mark the inode's metadata dirty
+ * @vi:		inode to mark dirty
+ *
+ * This is called from fs/inode.c::__mark_inode_dirty(), when the inode itself
+ * is being marked dirty. An example is when UPDATE_ATIME() is invoked.
+ *
+ * We mark the inode dirty by setting both the page in which the mft record
+ * resides and the buffer heads in that page which correspond to the mft record
+ * dirty. This ensures that the changes will eventually be propagated to disk
+ * when the inode is set dirty.
+ *
+ * FIXME: Can we do that with the buffer heads? I am not too sure. Because if we
+ * do that we need to make sure that the kernel will not write out those buffer
+ * heads or we are screwed as it will write corrupt data to disk. The only way
+ * a mft record can be written correctly is by mst protecting it, writting it
+ * synchronously and fast mst deprotecting it. During this period, obviously,
+ * the mft record must be marked as not uptodate, be locked for writing or
+ * whatever, so that nobody attempts anything stupid.
+ *
+ * FIXME: Do we need to check that the fs is not mounted read only? And what
+ * about the inode? Anything else?
+ *
+ * FIXME: As we are only a read only driver it is safe to just return here for
+ * the moment.
+ */
+void ntfs_dirty_inode(struct inode *vi)
+{
+	ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
+	NInoSetDirty(NTFS_I(vi));
+	return;
+}
+/**
+ * ntfs_commit_inode - write out a dirty inode
+ * @ni:		inode to write out
+ *
+ */
+int ntfs_commit_inode(ntfs_inode *ni)
+{
+	ntfs_debug("Entering for inode 0x%Lx.",
+			(unsigned long long)ni->mft_no);
+	NInoClearDirty(ni);
+	return 0;
+}
+void __ntfs_clear_inode(ntfs_inode *ni)
+{
+	int err;
+	ntfs_debug("Entering for inode 0x%Lx.",
+			(unsigned long long)ni->mft_no);
+	if (NInoDirty(ni)) {
+		err = ntfs_commit_inode(ni);
+		if (err) {
+			ntfs_error(ni->vol->sb, "Failed to commit dirty "
+					"inode synchronously.");
+			// FIXME: Do something!!!
+		}
+	}
+	/* Synchronize with ntfs_commit_inode(). */
+	down_write(&ni->mrec_lock);
+	up_write(&ni->mrec_lock);
+	if (NInoDirty(ni)) {
+		ntfs_error(ni->vol->sb, "Failed to commit dirty inode "
+				"asynchronously.");
+		// FIXME: Do something!!!
+	}
+	/* No need to lock at this stage as no one else has a reference. */
+	if (ni->nr_extents > 0) {
+		int i;
+		// FIXME: Handle dirty case for each extent inode!
+		for (i = 0; i < ni->nr_extents; i++)
+			ntfs_destroy_inode(ni->_INE(extent_ntfs_inos)[i]);
+		kfree(ni->_INE(extent_ntfs_inos));
+	}
+	/* Free all alocated memory. */
+	write_lock(&ni->run_list.lock);
+	ntfs_free(ni->run_list.rl);
+	ni->run_list.rl = NULL;
+	write_unlock(&ni->run_list.lock);
+	ntfs_free(ni->attr_list);
+	write_lock(&ni->attr_list_rl.lock);
+	ntfs_free(ni->attr_list_rl.rl);
+	ni->attr_list_rl.rl = NULL;
+	write_unlock(&ni->attr_list_rl.lock);
+}
+void ntfs_clear_inode(ntfs_inode *ni)
+{
+	__ntfs_clear_inode(ni);
+	/* Bye, bye... */
+	ntfs_destroy_inode(ni);
+}
+/**
+ * ntfs_clear_big_inode - clean up the ntfs specific part of an inode
+ * @vi:		vfs inode pending annihilation
+ *
+ * When the VFS is going to remove an inode from memory, ntfs_clear_big_inode()
+ * is called, which deallocates all memory belonging to the NTFS specific part
+ * of the inode and returns.
+ *
+ * If the MFT record is dirty, we commit it before doing anything else.
+ */
+void ntfs_clear_big_inode(struct inode *vi)
+{
+	ntfs_inode *ni = NTFS_I(vi);
+	__ntfs_clear_inode(ni);
+	if (S_ISDIR(vi->i_mode)) {
+		write_lock(&ni->_IDM(bmp_rl).lock);
+		ntfs_free(ni->_IDM(bmp_rl).rl);
+		write_unlock(&ni->_IDM(bmp_rl).lock);
+	}
+	return;
+}
+static const option_t si_readdir_opts_arr[] = {
+	{ SHOW_SYSTEM,	"system" },
+	{ SHOW_WIN32,	"win32" },
+	{ SHOW_DOS,	"dos" },
+	{ 0,		NULL }
+};
+/**
+ * ntfs_show_options - show mount options in /proc/mounts
+ * @sf:		seq_file in which to write our mount options
+ * @mnt:	vfs mount whose mount options to display
+ *
+ * Called by the VFS once for each mounted ntfs volume when someone reads
+ * /proc/mounts in order to display the NTFS specific mount options of each
+ * mount. The mount options of the vfs mount @mnt are written to the seq file
+ * @sf and success is returned.
+ */
+int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt)
+{
+	ntfs_volume *vol = NTFS_SB(mnt->mnt_sb);
+	int i;
+	char *s;
+	seq_printf(sf, ",uid=%i", vol->uid);
+	seq_printf(sf, ",gid=%i", vol->gid);
+	if (vol->fmask == vol->dmask)
+		seq_printf(sf, ",umask=0%o", vol->fmask);
+	else {
+		seq_printf(sf, ",fmask=0%o", vol->fmask);
+		seq_printf(sf, ",dmask=0%o", vol->dmask);
+	}
+	seq_printf(sf, ",nls=%s", vol->nls_map->charset);
+	switch (vol->readdir_opts) {
+	case SHOW_ALL:
+		seq_printf(sf, ",show_inodes=all");
+		break;
+	case SHOW_POSIX:
+		seq_printf(sf, ",show_inodes=posix");
+		break;
+	default:
+		for (i = 0; si_readdir_opts_arr[i].val; i++) {
+			if (si_readdir_opts_arr[i].val & vol->readdir_opts)
+				seq_printf(sf, ",show_inodes=%s",
+						si_readdir_opts_arr[i].str);
+		}
+	}
+	for (i = 0; on_errors_arr[i].val; i++) {
+		if (on_errors_arr[i].val & vol->on_errors)
+			seq_printf(sf, ",errors=%s", on_errors_arr[i].str);
+	}
+	seq_printf(sf, ",mft_zone_multiplier=%i", vol->mft_zone_multiplier);
+	return 0;
+}
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
+/*
+ * inode.h - Defines for inode structures NTFS Linux kernel driver. Part of
+ *	     the Linux-NTFS project.
+ *
+ * Copyright (c) 2001,2002 Anton Altaparmakov.
+ * Copyright (C) 2002 Richard Russon.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef _LINUX_NTFS_INODE_H
+#define _LINUX_NTFS_INODE_H
+#include <linux/seq_file.h>
+#include "volume.h"
+typedef struct _ntfs_inode ntfs_inode;
+/*
+ * The NTFS in-memory inode structure. It is just used as an extension to the
+ * fields already provided in the VFS inode.
+ */
+struct _ntfs_inode {
+	s64 initialized_size;	/* Copy from $DATA/$INDEX_ALLOCATION. */
+	s64 allocated_size;	/* Copy from $DATA/$INDEX_ALLOCATION. */
+	unsigned long state;	/* NTFS specific flags describing this inode.
+				   See fs/ntfs/ntfs.h:ntfs_inode_state_bits. */
+	u64 mft_no;		/* Mft record number (inode number). */
+	u16 seq_no;		/* Sequence number of the mft record. */
+	atomic_t count;		/* Inode reference count for book keeping. */
+	ntfs_volume *vol;	/* Pointer to the ntfs volume of this inode. */
+	run_list run_list;	/* If state has the NI_NonResident bit set,
+				   the run list of the unnamed data attribute
+				   (if a file) or of the index allocation
+				   attribute (directory). If run_list.rl is
+				   NULL, the run list has not been read in or
+				   has been unmapped. If NI_NonResident is
+				   clear, the unnamed data attribute is
+				   resident (file) or there is no $I30 index
+				   allocation attribute (directory). In that
+				   case run_list.rl is always NULL.*/
+	struct rw_semaphore mrec_lock;	/* Lock for serializing access to the
+				   mft record belonging to this inode. */
+	atomic_t mft_count;	/* Mapping reference count for book keeping. */
+	struct page *page;	/* The page containing the mft record of the
+				   inode. This should only be touched by the
+				   (un)map_mft_record*() functions. */
+	int page_ofs;		/* Offset into the page at which the mft record
+				   begins. This should only be touched by the
+				   (un)map_mft_record*() functions. */
+	/*
+	 * Attribute list support (only for use by the attribute lookup
+	 * functions). Setup during read_inode for all inodes with attribute
+	 * lists. Only valid if NI_AttrList is set in state, and attr_list_rl is
+	 * further only valid if NI_AttrListNonResident is set.
+	 */
+	u32 attr_list_size;	/* Length of attribute list value in bytes. */
+	u8 *attr_list;		/* Attribute list value itself. */
+	run_list attr_list_rl;	/* Run list for the attribute list value. */
+	union {
+		struct { /* It is a directory or $MFT. */
+			u32 index_block_size;	/* Size of an index block. */
+			u8 index_block_size_bits; /* Log2 of the above. */
+			u32 index_vcn_size;	/* Size of a vcn in this
+						   directory index. */
+			u8 index_vcn_size_bits;	/* Log2 of the above. */
+			s64 bmp_size;		/* Size of the $I30 bitmap. */
+			s64 bmp_initialized_size; /* Copy from $I30 bitmap. */
+			s64 bmp_allocated_size;	/* Copy from $I30 bitmap. */
+			run_list bmp_rl;	/* Run list for the $I30 bitmap
+						   if it is non-resident. */
+		} SN(idm);
+		struct { /* It is a compressed file. */
+			u32 compression_block_size;     /* Size of a compression
+						           block (cb). */
+			u8 compression_block_size_bits; /* Log2 of the size of
+							   a cb. */
+			u8 compression_block_clusters;  /* Number of clusters
+							   per compression
+							   block. */
+			s64 compressed_size;		/* Copy from $DATA. */
+		} SN(icf);
+	} SN(idc);
+	struct semaphore extent_lock;	/* Lock for accessing/modifying the
+					   below . */
+	s32 nr_extents;	/* For a base mft record, the number of attached extent
+			   inodes (0 if none), for extent records this is -1. */
+	union {		/* This union is only used if nr_extents != 0. */
+		ntfs_inode **extent_ntfs_inos;	/* For nr_extents > 0, array of
+						   the ntfs inodes of the extent
+						   mft records belonging to
+						   this base inode which have
+						   been loaded. */
+		ntfs_inode *base_ntfs_ino;	/* For nr_extents == -1, the
+						   vfs inode of the base mft
+						   record. */
+	} SN(ine);
+};
+#define _IDM(X)  SC(idc.idm,X)
+#define _ICF(X)  SC(idc.icf,X)
+#define _INE(X)  SC(ine,X)
+typedef struct {
+	ntfs_inode ntfs_inode;
+	struct inode vfs_inode;		/* The vfs inode structure. */
+} big_ntfs_inode;
+/**
+ * NTFS_I - return the ntfs inode given a vfs inode
+ * @inode:	VFS inode
+ *
+ * NTFS_I() returns the ntfs inode associated with the VFS @inode.
+ */
+static inline ntfs_inode *NTFS_I(struct inode *inode)
+{
+	return (ntfs_inode *)list_entry(inode, big_ntfs_inode, vfs_inode);
+}
+static inline struct inode *VFS_I(ntfs_inode *ni)
+{
+	return &((big_ntfs_inode*)ni)->vfs_inode;
+}
+extern struct inode *ntfs_alloc_big_inode(struct super_block *sb);
+extern void ntfs_destroy_big_inode(struct inode *inode);
+extern void ntfs_clear_big_inode(struct inode *vi);
+extern ntfs_inode *ntfs_new_inode(struct super_block *sb);
+extern void ntfs_clear_inode(ntfs_inode *ni);
+extern void ntfs_read_inode(struct inode *vi);
+extern void ntfs_read_inode_mount(struct inode *vi);
+extern void ntfs_dirty_inode(struct inode *vi);
+extern int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt);
+#endif /* _LINUX_NTFS_FS_INODE_H */
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
+/*
+ * layout.h - All NTFS associated on-disk structures. Part of the Linux-NTFS
+ *	      project.
+ *
+ * Copyright (c) 2001,2002 Anton Altaparmakov.
+ * Copyright (C) 2002 Richard Russon.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef _LINUX_NTFS_LAYOUT_H
+#define _LINUX_NTFS_LAYOUT_H
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/list.h>
+#include <asm/byteorder.h>
+#include "volume.h"
+/*
+ * Constant endianness conversion defines.
+ */
+#define const_le16_to_cpu(x)	__constant_le16_to_cpu(x)
+#define const_le32_to_cpu(x)	__constant_le32_to_cpu(x)
+#define const_le64_to_cpu(x)	__constant_le64_to_cpu(x)
+#define const_cpu_to_le16(x)	__constant_cpu_to_le16(x)
+#define const_cpu_to_le32(x)	__constant_cpu_to_le32(x)
+#define const_cpu_to_le64(x)	__constant_cpu_to_le64(x)
+/* The NTFS oem_id */
+#define magicNTFS	const_cpu_to_le64(0x202020205346544e) /* "NTFS    " */
+/*
+ * Location of bootsector on partition:
+ * 	The standard NTFS_BOOT_SECTOR is on sector 0 of the partition.
+ * 	On NT4 and above there is one backup copy of the boot sector to
+ * 	be found on the last sector of the partition (not normally accessible
+ * 	from within Windows as the bootsector contained number of sectors
+ *	value is one less than the actual value!).
+ * 	On versions of NT 3.51 and earlier, the backup copy was located at 
+ * 	number of sectors/2 (integer divide), i.e. in the middle of the volume.
+ */
+/*
+ * BIOS parameter block (bpb) structure.
+ */
+typedef struct {
+	u16 bytes_per_sector;		/* Size of a sector in bytes. */
+	u8  sectors_per_cluster;	/* Size of a cluster in sectors. */
+	u16 reserved_sectors;		/* zero */
+	u8  fats;			/* zero */
+	u16 root_entries;		/* zero */
+	u16 sectors;			/* zero */
+	u8  media_type;			/* 0xf8 = hard disk */
+	u16 sectors_per_fat;		/* zero */
+	u16 sectors_per_track;		/* irrelevant */
+	u16 heads;			/* irrelevant */
+	u32 hidden_sectors;		/* zero */
+	u32 large_sectors;		/* zero */
+} __attribute__ ((__packed__)) BIOS_PARAMETER_BLOCK;
+/*
+ * NTFS boot sector structure.
+ */
+typedef struct {
+	u8  jump[3];			/* Irrelevant (jump to boot up code).*/
+	u64 oem_id;			/* Magic "NTFS    ". */
+	BIOS_PARAMETER_BLOCK bpb;	/* See BIOS_PARAMETER_BLOCK. */
+	u8  unused[4];			/* zero */
+	s64 number_of_sectors;		/* Number of sectors in volume. Gives
+					   maximum volume size of 2^63 sectors.
+					   Assuming standard sector size of 512
+					   bytes, the maximum byte size is
+					   approx. 4.7x10^21 bytes. (-; */
+	s64 mft_lcn;			/* Cluster location of mft data. */
+	s64 mftmirr_lcn;		/* Cluster location of copy of mft. */
+	s8  clusters_per_mft_record;	/* Mft record size in clusters. */
+	u8  reserved0[3];		/* zero */
+	s8  clusters_per_index_record;	/* Index block size in clusters. */
+	u8  reserved1[3];		/* zero */
+	u64 volume_serial_number;	/* Irrelevant (serial number). */
+	u32 checksum;			/* Boot sector checksum. */
+	u8  bootstrap[426];		/* Irrelevant (boot up code). */
+	u16 end_of_sector_marker;	/* End of bootsector magic. Always is
+					   0xaa55 in little endian. */
+} __attribute__ ((__packed__)) NTFS_BOOT_SECTOR;
+/*
+ * Magic identifiers present at the beginning of all ntfs record containing
+ * records (like mft records for example).
+ */
+typedef enum {
+	magic_BAAD = const_cpu_to_le32(0x44414142), /* BAAD == corrupt record */
+	magic_CHKD = const_cpu_to_le32(0x424b4843), /* CHKD == chkdsk ??? */
+        magic_FILE = const_cpu_to_le32(0x454c4946), /* FILE == mft entry */
+	magic_HOLE = const_cpu_to_le32(0x454c4f48), /* HOLE == ? (NTFS 3.0+?) */
+        magic_INDX = const_cpu_to_le32(0x58444e49), /* INDX == index buffer */
+} NTFS_RECORD_TYPES;
+/*
+ * Generic magic comparison macros. Finally found a use for the ## preprocessor
+ * operator! (-8
+ */
+#define is_magic(x, m)		(   (u32)(x) == magic_##m )
+#define is_magicp(p, m) 	( *(u32*)(p) == magic_##m )
+/*
+ * Specialised magic comparison macros.
+ */
+#define is_baad_record(x)	( is_magic (x, BAAD) )
+#define is_baad_recordp(p)	( is_magicp(p, BAAD) )
+#define is_chkd_record(x)       ( is_magic (x, CHKD) )
+#define is_chkd_recordp(p)      ( is_magicp(p, CHKD) )
+#define is_file_record(x)	( is_magic (x, FILE) )
+#define is_file_recordp(p)	( is_magicp(p, FILE) )
+#define is_hole_record(x)       ( is_magic (x, HOLE) )
+#define is_hole_recordp(p)      ( is_magicp(p, HOLE) )
+#define is_indx_record(x)       ( is_magic (x, INDX) )
+#define is_indx_recordp(p)      ( is_magicp(p, INDX) )
+#define is_mft_record(x)	( is_file_record(x) )
+#define is_mft_recordp(p)	( is_file_recordp(p) )
+/*
+ * The Update Sequence Array (usa) is an array of the u16 values which belong
+ * to the end of each sector protected by the update sequence record in which
+ * this array is contained. Note that the first entry is the Update Sequence
+ * Number (usn), a cyclic counter of how many times the protected record has
+ * been written to disk. The values 0 and -1 (ie. 0xffff) are not used. All
+ * last u16's of each sector have to be equal to the usn (during reading) or
+ * are set to it (during writing). If they are not, an incomplete multi sector
+ * transfer has occured when the data was written.
+ * The maximum size for the update sequence array is fixed to:
+ * 	maximum size = usa_ofs + (usa_count * 2) = 510 bytes
+ * The 510 bytes comes from the fact that the last u16 in the array has to
+ * (obviously) finish before the last u16 of the first 512-byte sector.
+ * This formula can be used as a consistency check in that usa_ofs +
+ * (usa_count * 2) has to be less than or equal to 510.
+ */
+typedef struct {
+	NTFS_RECORD_TYPES magic;	/* A four-byte magic identifying the
+					   record type and/or status. */
+	u16 usa_ofs;		/* Offset to the Update Sequence Array (usa)
+				   from the start of the ntfs record. */
+	u16 usa_count;		/* Number of u16 sized entries in the usa
+				   including the Update Sequence Number (usn),
+				   thus the number of fixups is the usa_count
+				   minus 1. */
+} __attribute__ ((__packed__)) NTFS_RECORD;
+/*
+ * System files mft record numbers. All these files are always marked as used
+ * in the bitmap attribute of the mft; presumably in order to avoid accidental
+ * allocation for random other mft records. Also, the sequence number for each
+ * of the system files is always equal to their mft record number and it is
+ * never modified.
+ */
+typedef enum {
+	FILE_MFT       = 0,	/* Master file table (mft). Data attribute
+				   contains the entries and bitmap attribute
+				   records which ones are in use (bit==1). */
+	FILE_MFTMirr   = 1,	/* Mft mirror (copy of first four mft records)
+				   in data attribute. */
+	FILE_LogFile   = 2,	/* Journalling log in data attribute. */
+	FILE_Volume    = 3,	/* Volume name attribute and volume information
+				   attribute (flags and ntfs version). Windows
+				   refers to this file as volume DASD (Direct
+				   Access Storage Device). */
+	FILE_AttrDef   = 4,	/* Array of attribute definitions in data
+				   attribute. */
+	FILE_root      = 5,	/* Root directory. */
+	FILE_Bitmap    = 6,	/* Allocation bitmap of all clusters (lcns) in
+				   data attribute. */
+	FILE_Boot      = 7,	/* Boot sector (always at cluster 0) in data
+				   attribute. */
+	FILE_BadClus   = 8,	/* Contains all bad clusters in the non-resident
+				   data attribute. */
+	FILE_Secure    = 9,	/* Shared security descriptors in data attribute
+				   and two indexes into the descriptors.
+				   Appeared in Windows 2000. Before that, this
+				   file was named $Quota but was unused. */
+	FILE_UpCase    = 10,	/* Uppercase equivalents of all 65536 Unicode
+				   characters in data attribute. */
+	FILE_Extend    = 11,	/* Directory containing other system files (eg.
+				   $ObjId, $Quota, $Reparse and $UsnJrnl). This
+				   is new to NTFS3.0. */
+	FILE_reserved12 = 12,	/* Reserved for future use (records 12-15). */
+	FILE_reserved13 = 13,
+	FILE_reserved14 = 14,
+	FILE_reserved15 = 15,
+	FILE_first_user = 16,	/* First user file, used as test limit for
+				   whether to allow opening a file or not. */
+} NTFS_SYSTEM_FILES;
+/*
+ * These are the so far known MFT_RECORD_* flags (16-bit) which contain 
+ * information about the mft record in which they are present.
+ */
+typedef enum {
+	MFT_RECORD_IN_USE	= const_cpu_to_le16(0x0001),
+	MFT_RECORD_IS_DIRECTORY	= const_cpu_to_le16(0x0002),
+	MFT_REC_SPACE_FILLER	= 0xffff	/* Just to make flags 16-bit. */
+} __attribute__ ((__packed__)) MFT_RECORD_FLAGS;
+/*
+ * mft references (aka file references or file record segment references) are
+ * used whenever a structure needs to refer to a record in the mft.
+ * 
+ * A reference consists of a 48-bit index into the mft and a 16-bit sequence
+ * number used to detect stale references.
+ *
+ * For error reporting purposes we treat the 48-bit index as a signed quantity.
+ *
+ * The sequence number is a circular counter (skipping 0) describing how many
+ * times the referenced mft record has been (re)used. This has to match the
+ * sequence number of the mft record being referenced, otherwise the reference
+ * is considered stale and removed (FIXME: only ntfsck or the driver itself?).
+ *
+ * If the sequence number is zero it is assumed that no sequence number
+ * consistency checking should be performed.
+ *
+ * FIXME: Since inodes are 32-bit as of now, the driver needs to always check
+ * for high_part being 0 and if not either BUG(), cause a panic() or handle
+ * the situation in some other way. This shouldn't be a problem as a volume has
+ * to become HUGE in order to need more than 32-bits worth of mft records.
+ * Assuming the standard mft record size of 1kb only the records (never mind
+ * the non-resident attributes, etc.) would require 4Tb of space on their own
+ * for the first 32 bits worth of records. This is only if some strange person
+ * doesn't decide to foul play and make the mft sparse which would be a really
+ * horrible thing to do as it would trash our current driver implementation. )-:
+ * Do I hear screams "we want 64-bit inodes!" ?!? (-;
+ *
+ * FIXME: The mft zone is defined as the first 12% of the volume. This space is
+ * reserved so that the mft can grow contiguously and hence doesn't become 
+ * fragmented. Volume free space includes the empty part of the mft zone and
+ * when the volume's free 88% are used up, the mft zone is shrunk by a factor
+ * of 2, thus making more space available for more files/data. This process is
+ * repeated everytime there is no more free space except for the mft zone until
+ * there really is no more free space.
+ */
+/*
+ * Typedef the MFT_REF as a 64-bit value for easier handling.
+ * Also define two unpacking macros to get to the reference (MREF) and
+ * sequence number (MSEQNO) respectively.
+ * The _LE versions are to be applied on little endian MFT_REFs.
+ * Note: The _LE versions will return a CPU endian formatted value!
+ */
+typedef enum {
+	MFT_REF_MASK_CPU	= 0x0000ffffffffffffULL,
+	MFT_REF_MASK_LE		= const_cpu_to_le64(0x0000ffffffffffffULL),
+} MFT_REF_CONSTS;
+typedef u64 MFT_REF;
+#define MREF(x)		((u64)((x) & MFT_REF_MASK_CPU))
+#define MSEQNO(x)	((u16)(((x) >> 48) & 0xffff))
+#define MREF_LE(x)	((u64)(le64_to_cpu(x) & MFT_REF_MASK_CPU))
+#define MSEQNO_LE(x)	((u16)((le64_to_cpu(x) >> 48) & 0xffff))
+#define IS_ERR_MREF(x)	(((x) & 0x0000800000000000ULL) ? 1 : 0)
+#define ERR_MREF(x)	((u64)((s64)(x)))
+#define MREF_ERR(x)	((int)((s64)(x)))
+/*
+ * The mft record header present at the beginning of every record in the mft.
+ * This is followed by a sequence of variable length attribute records which
+ * is terminated by an attribute of type AT_END which is a truncated attribute
+ * in that it only consists of the attribute type code AT_END and none of the
+ * other members of the attribute structure are present.
+ */
+typedef struct {
+/*Ofs*/
+/*  0*/	NTFS_RECORD SN(mnr);	/* Usually the magic is "FILE". */
+/*  8*/	u64 lsn;		/* $LogFile sequence number for this record.
+				   Changed every time the record is modified. */
+/* 16*/	u16 sequence_number;	/* Number of times this mft record has been
+		   		   reused. (See description for MFT_REF
+				   above.) NOTE: The increment (skipping zero)
+				   is done when the file is deleted. NOTE: If
+				   this is zero it is left zero. */
+/* 18*/	u16 link_count; 	/* Number of hard links, i.e. the number of 
+				   directory entries referencing this record.
+				   NOTE: Only used in mft base records.
+				   NOTE: When deleting a directory entry we
+				   check the link_count and if it is 1 we
+				   delete the file. Otherwise we delete the
+				   FILE_NAME_ATTR being referenced by the
+				   directory entry from the mft record and
+				   decrement the link_count.
+				   FIXME: Careful with Win32 + DOS names! */
+/* 20*/	u16 attrs_offset;	/* Byte offset to the first attribute in this
+				   mft record from the start of the mft record.
+				   NOTE: Must be aligned to 8-byte boundary. */
+/* 22*/	MFT_RECORD_FLAGS flags;	/* Bit array of MFT_RECORD_FLAGS. When a file
+				   is deleted, the MFT_RECORD_IN_USE flag is
+				   set to zero. */
+/* 24*/	u32 bytes_in_use;	/* Number of bytes used in this mft record.
+				   NOTE: Must be aligned to 8-byte boundary. */
+/* 28*/	u32 bytes_allocated;	/* Number of bytes allocated for this mft
+				   record. This should be equal to the mft
+				   record size. */
+/* 32*/	MFT_REF base_mft_record; /* This is zero for base mft records.
+				   When it is not zero it is a mft reference
+				   pointing to the base mft record to which
+				   this record belongs (this is then used to
+				   locate the attribute list attribute present
+				   in the base record which describes this
+				   extension record and hence might need
+				   modification when the extension record
+				   itself is modified, also locating the
+				   attribute list also means finding the other
+				   potential extents, belonging to the non-base
+				   mft record). */
+/* 40*/	u16 next_attr_instance;	/* The instance number that will be
+				   assigned to the next attribute added to this
+				   mft record. NOTE: Incremented each time
+				   after it is used. NOTE: Every time the mft
+				   record is reused this number is set to zero.
+				   NOTE: The first instance number is always 0.
+				 */
+/* sizeof() = 42 bytes */
+/* NTFS 3.1+ (Windows XP and above) introduce the following additions. */
+/* 42*/ //u16 reserved;		/* Reserved/alignment. */
+/* 44*/ //u32 mft_record_number;/* Number of this mft record. */
+/* sizeof() = 48 bytes */
+/*
+ * When (re)using the mft record, we place the update sequence array at this
+ * offset, i.e. before we start with the attributes. This also makes sense,
+ * otherwise we could run into problems with the update sequence array
+ * containing in itself the last two bytes of a sector which would mean that
+ * multi sector transfer protection wouldn't work. As you can't protect data
+ * by overwriting it since you then can't get it back...
+ * When reading we obviously use the data from the ntfs record header.
+ */
+} __attribute__ ((__packed__)) MFT_RECORD;
+#define _MNR(X)  SC(mnr,X)
+/*
+ * System defined attributes (32-bit). Each attribute type has a corresponding
+ * attribute name (Unicode string of maximum 64 character length) as described
+ * by the attribute definitions present in the data attribute of the $AttrDef
+ * system file. On NTFS 3.0 volumes the names are just as the types are named
+ * in the below enum exchanging AT_ for the dollar sign ($). If that isn't a
+ * revealing choice of symbol... (-;
+ */
+typedef enum {
+	AT_UNUSED			= const_cpu_to_le32(         0),
+	AT_STANDARD_INFORMATION		= const_cpu_to_le32(      0x10),
+	AT_ATTRIBUTE_LIST		= const_cpu_to_le32(      0x20),
+	AT_FILE_NAME			= const_cpu_to_le32(      0x30),
+	AT_OBJECT_ID			= const_cpu_to_le32(      0x40),
+	AT_SECURITY_DESCRIPTOR		= const_cpu_to_le32(      0x50),
+	AT_VOLUME_NAME			= const_cpu_to_le32(      0x60),
+	AT_VOLUME_INFORMATION		= const_cpu_to_le32(      0x70),
+	AT_DATA				= const_cpu_to_le32(      0x80),
+	AT_INDEX_ROOT			= const_cpu_to_le32(      0x90),
+	AT_INDEX_ALLOCATION		= const_cpu_to_le32(      0xa0),
+	AT_BITMAP			= const_cpu_to_le32(      0xb0),
+	AT_REPARSE_POINT		= const_cpu_to_le32(      0xc0),
+	AT_EA_INFORMATION		= const_cpu_to_le32(      0xd0),
+	AT_EA				= const_cpu_to_le32(      0xe0),
+	AT_PROPERTY_SET			= const_cpu_to_le32(      0xf0),
+	AT_LOGGED_UTILITY_STREAM	= const_cpu_to_le32(     0x100),
+	AT_FIRST_USER_DEFINED_ATTRIBUTE	= const_cpu_to_le32(    0x1000),
+	AT_END				= const_cpu_to_le32(0xffffffff),
+} ATTR_TYPES;
+/*
+ * The collation rules for sorting views/indexes/etc (32-bit).
+ *
+ * COLLATION_UNICODE_STRING - Collate Unicode strings by comparing their binary
+ *	Unicode values, except that when a character can be uppercased, the
+ *	upper case value collates before the lower case one.
+ * COLLATION_FILE_NAME - Collate file names as Unicode strings. The collation
+ *	is done very much like COLLATION_UNICODE_STRING. In fact I have no idea
+ *	what the difference is. Perhaps the difference is that file names
+ *	would treat some special characters in an odd way (see
+ *	unistr.c::ntfs_collate_names() and unistr.c::legal_ansi_char_array[]
+ *	for what I mean but COLLATION_UNICODE_STRING would not give any special
+ *	treatment to any characters at all, but this is speculation.
+ * COLLATION_NTOFS_ULONG - Sorting is done according to ascending u32 key
+ * 	values. E.g. used for $SII index in FILE_Secure, which sorts by
+ * 	security_id (u32).
+ * COLLATION_NTOFS_SID - Sorting is done according to ascending SID values.
+ * 	E.g. used for $O index in FILE_Extend/$Quota.
+ * COLLATION_NTOFS_SECURITY_HASH - Sorting is done first by ascending hash
+ * 	values and second by ascending security_id values. E.g. used for $SDH
+ * 	index in FILE_Secure.
+ * COLLATION_NTOFS_ULONGS - Sorting is done according to a sequence of ascending
+ *	u32 key values. E.g. used for $O index in FILE_Extend/$ObjId, which
+ *	sorts by object_id (16-byte), by splitting up the object_id in four
+ *	u32 values and using them as individual keys. E.g. take the following
+ *	two security_ids, stored as follows on disk:
+ *		1st: a1 61 65 b7 65 7b d4 11 9e 3d 00 e0 81 10 42 59
+ *		2nd: 38 14 37 d2 d2 f3 d4 11 a5 21 c8 6b 79 b1 97 45
+ *	To compare them, they are split into four u32 values each, like so:
+ *		1st: 0xb76561a1 0x11d47b65 0xe0003d9e 0x59421081
+ *		2nd: 0xd2371438 0x11d4f3d2 0x6bc821a5 0x4597b179
+ *	Now, it is apparent why the 2nd object_id collates after the 1st: the
+ *	first u32 value of the 1st object_id is less than the first u32 of
+ *	the 2nd object_id. If the first u32 values of both object_ids were
+ *	equal then the second u32 values would be compared, etc.
+ */
+typedef enum {
+	COLLATION_BINARY	 = const_cpu_to_le32(0), /* Collate by binary
+					compare where the first byte is most
+					significant. */
+	COLLATION_FILE_NAME	 = const_cpu_to_le32(1), /* Collate file names
+					as Unicode strings. */
+	COLLATION_UNICODE_STRING = const_cpu_to_le32(2), /* Collate Unicode
+					strings by comparing their binary
+					Unicode values, except that when a
+					character can be uppercased, the upper
+					case value collates before the lower
+					case one. */
+	COLLATION_NTOFS_ULONG		= const_cpu_to_le32(16),
+	COLLATION_NTOFS_SID		= const_cpu_to_le32(17),
+	COLLATION_NTOFS_SECURITY_HASH	= const_cpu_to_le32(18),
+	COLLATION_NTOFS_ULONGS		= const_cpu_to_le32(19),
+} COLLATION_RULES;
+/*
+ * The flags (32-bit) describing attribute properties in the attribute
+ * definition structure. FIXME: This information is from Regis's information
+ * and, according to him, it is not certain and probably incomplete.
+ * The INDEXABLE flag is fairly certainly correct as only the file name
+ * attribute has this flag set and this is the only attribute indexed in NT4.
+ */
+typedef enum {
+	INDEXABLE	    = const_cpu_to_le32(0x02),	/* Attribute can be
+							   indexed. */
+	NEED_TO_REGENERATE  = const_cpu_to_le32(0x40),	/* Need to regenerate
+							   during regeneration
+							   phase. */
+	CAN_BE_NON_RESIDENT = const_cpu_to_le32(0x80),	/* Attribute can be
+							   non-resident. */
+} ATTR_DEF_FLAGS;
+/*
+ * The data attribute of FILE_AttrDef contains a sequence of attribute
+ * definitions for the NTFS volume. With this, it is supposed to be safe for an
+ * older NTFS driver to mount a volume containing a newer NTFS version without
+ * damaging it (that's the theory. In practice it's: not damaging it too much).
+ * Entries are sorted by attribute type. The flags describe whether the
+ * attribute can be resident/non-resident and possibly other things, but the
+ * actual bits are unknown.
+ */
+typedef struct {
+/*hex ofs*/
+/*  0*/	uchar_t name[0x40];		/* Unicode name of the attribute. Zero
+					   terminated. */
+/* 80*/	ATTR_TYPES type;		/* Type of the attribute. */
+/* 84*/	u32 display_rule;		/* Default display rule.
+					   FIXME: What does it mean? (AIA) */
+/* 88*/ COLLATION_RULES collation_rule;	/* Default collation rule. */
+/* 8c*/	ATTR_DEF_FLAGS flags;		/* Flags describing the attribute. */
+/* 90*/	u64 min_size;			/* Optional minimum attribute size. */
+/* 98*/	u64 max_size;			/* Maximum size of attribute. */
+/* sizeof() = 0xa0 or 160 bytes */
+} __attribute__ ((__packed__)) ATTR_DEF;
+/*
+ * Attribute flags (16-bit). 
+ */
+typedef enum {
+	ATTR_IS_COMPRESSED	= const_cpu_to_le16(0x0001),
+	ATTR_COMPRESSION_MASK	= const_cpu_to_le16(0x00ff),  /* Compression
+						method mask. Also, first
+						illegal value. */
+	ATTR_IS_ENCRYPTED	= const_cpu_to_le16(0x4000),
+	ATTR_IS_SPARSE		= const_cpu_to_le16(0x8000),
+} __attribute__ ((__packed__)) ATTR_FLAGS;
+/*
+ * Attribute compression.
+ *
+ * Only the data attribute is ever compressed in the current ntfs driver in
+ * Windows. Further, compression is only applied when the data attribute is
+ * non-resident. Finally, to use compression, the maximum allowed cluster size
+ * on a volume is 4kib.
+ *
+ * The compression method is based on independently compressing blocks of X
+ * clusters, where X is determined from the compression_unit value found in the
+ * non-resident attribute record header (more precisely: X = 2^compression_unit
+ * clusters). On Windows NT/2k, X always is 16 clusters (compression_unit = 4).
+ *
+ * There are three different cases of how a compression block of X clusters
+ * can be stored:
+ *
+ *   1) The data in the block is all zero (a sparse block):
+ *	  This is stored as a sparse block in the run list, i.e. the run list
+ *	  entry has length = X and lcn = -1. The mapping pairs array actually
+ *	  uses a delta_lcn value length of 0, i.e. delta_lcn is not present at
+ *	  all, which is then interpreted by the driver as lcn = -1.
+ *	  NOTE: Even uncompressed files can be sparse on NTFS 3.0 volumes, then
+ *	  the same principles apply as above, except that the length is not
+ *	  restricted to being any particular value.
+ *
+ *   2) The data in the block is not compressed:
+ *	  This happens when compression doesn't reduce the size of the block
+ *	  in clusters. I.e. if compression has a small effect so that the
+ *	  compressed data still occupies X clusters, then the uncompressed data
+ *	  is stored in the block.
+ *	  This case is recognised by the fact that the run list entry has
+ *	  length = X and lcn >= 0. The mapping pairs array stores this as
+ *	  normal with a run length of X and some specific delta_lcn, i.e.
+ *	  delta_lcn has to be present.
+ *
+ *   3) The data in the block is compressed:
+ *	  The common case. This case is recognised by the fact that the run
+ *	  list entry has length L < X and lcn >= 0. The mapping pairs array
+ *	  stores this as normal with a run length of X and some specific
+ *	  delta_lcn, i.e. delta_lcn has to be present. This run list entry is
+ *	  immediately followed by a sparse entry with length = X - L and
+ *	  lcn = -1. The latter entry is to make up the vcn counting to the
+ *	  full compression block size X.
+ *
+ * In fact, life is more complicated because adjacent entries of the same type
+ * can be coalesced. This means that one has to keep track of the number of
+ * clusters handled and work on a basis of X clusters at a time being one
+ * block. An example: if length L > X this means that this particular run list
+ * entry contains a block of length X and part of one or more blocks of length
+ * L - X. Another example: if length L < X, this does not necessarily mean that
+ * the block is compressed as it might be that the lcn changes inside the block
+ * and hence the following run list entry describes the continuation of the
+ * potentially compressed block. The block would be compressed if the
+ * following run list entry describes at least X - L sparse clusters, thus
+ * making up the compression block length as described in point 3 above. (Of
+ * course, there can be several run list entries with small lengths so that the
+ * sparse entry does not follow the first data containing entry with
+ * length < X.)
+ *
+ * NOTE: At the end of the compressed attribute value, there most likely is not
+ * just the right amount of data to make up a compression block, thus this data
+ * is not even attempted to be compressed. It is just stored as is, unless
+ * the number of clusters it occupies is reduced when compressed in which case
+ * it is stored as a compressed compression block, complete with sparse
+ * clusters at the end.
+ */
+/*
+ * Flags of resident attributes (8-bit).
+ */
+typedef enum {
+	RESIDENT_ATTR_IS_INDEXED = 0x01, /* Attribute is referenced in an index
+					    (has implications for deleting and
+					    modifying the attribute). */
+} __attribute__ ((__packed__)) RESIDENT_ATTR_FLAGS;
+/*
+ * Attribute record header. Always aligned to 8-byte boundary.
+ */
+typedef struct {
+/*Ofs*/
+/*  0*/	ATTR_TYPES type;	/* The (32-bit) type of the attribute. */
+/*  4*/	u32 length;		/* Byte size of the resident part of the
+				   attribute (aligned to 8-byte boundary).
+				   Used to get to the next attribute. */
+/*  8*/	u8 non_resident;	/* If 0, attribute is resident.
+				   If 1, attribute is non-resident. */
+/*  9*/	u8 name_length;		/* Unicode character size of name of attribute.
+				   0 if unnamed. */
+/* 10*/	u16 name_offset;	/* If name_length != 0, the byte offset to the
+				   beginning of the name from the attribute
+				   record. Note that the name is stored as a
+				   Unicode string. When creating, place offset
+				   just at the end of the record header. Then,
+				   follow with attribute value or mapping pairs
+				   array, resident and non-resident attributes
+				   respectively, aligning to an 8-byte
+				   boundary. */
+/* 12*/	ATTR_FLAGS flags;	/* Flags describing the attribute. */
+/* 14*/	u16 instance;		/* The instance of this attribute record. This
+				   number is unique within this mft record (see 
+				   MFT_RECORD/next_attribute_instance notes in
+				   in mft.h for more details). */
+/* 16*/	union {
+		/* Resident attributes. */
+		struct {
+/* 16 */		u32 value_length; /* Byte size of attribute value. */
+/* 20 */		u16 value_offset; /* Byte offset of the attribute
+					     value from the start of the
+					     attribute record. When creating,
+					     align to 8-byte boundary if we 
+					     have a name present as this might
+					     not have a length of a multiple
+					     of 8-bytes. */
+/* 22 */		RESIDENT_ATTR_FLAGS resident_flags; /* See above. */
+/* 23 */		s8 reservedR;	  /* Reserved/alignment to 8-byte
+					     boundary. */
+		} SN(ara) __attribute__ ((__packed__));
+		/* Non-resident attributes. */
+		struct {
+/* 16*/			VCN lowest_vcn;	/* Lowest valid virtual cluster number
+				for this portion of the attribute value or
+				0 if this is the only extent (usually the
+				case). - Only when an attribute list is used
+				does lowest_vcn != 0 ever occur. */
+/* 24*/			VCN highest_vcn; /* Highest valid vcn of this extent of
+				the attribute value. - Usually there is only one
+				portion, so this usually equals the attribute
+				value size in clusters minus 1. Can be -1 for
+				zero length files. Can be 0 for "single extent"
+				attributes. */
+/* 32*/			u16 mapping_pairs_offset; /* Byte offset from the
+				beginning of the structure to the mapping pairs
+				array which contains the mappings between the
+				vcns and the logical cluster numbers (lcns).
+				When creating, place this at the end of this
+				record header aligned to 8-byte boundary. */
+/* 34*/			u8 compression_unit; /* The compression unit expressed
+				as the log to the base 2 of the number of
+				clusters in a compression unit. 0 means not
+				compressed. (This effectively limits the
+				compression unit size to be a power of two
+				clusters.) WinNT4 only uses a value of 4. */
+/* 35*/			u8 reserved1[5];	/* Align to 8-byte boundary. */
+/* The sizes below are only used when lowest_vcn is zero, as otherwise it would
+   be difficult to keep them up-to-date.*/
+/* 40*/			s64 allocated_size;	/* Byte size of disk space
+				allocated to hold the attribute value. Always
+				is a multiple of the cluster size. When a file
+				is compressed, this field is a multiple of the
+				compression block size (2^compression_unit) and
+				it represents the logically allocated space
+				rather than the actual on disk usage. For this
+				use the compressed_size (see below). */
+/* 48*/			s64 data_size;	/* Byte size of the attribute
+				value. Can be larger than allocated_size if
+				attribute value is compressed or sparse. */
+/* 56*/			s64 initialized_size;	/* Byte size of initialized
+				portion of the attribute value. Usually equals
+				data_size. */
+/* sizeof(uncompressed attr) = 64*/
+/* 64*/			s64 compressed_size;	/* Byte size of the attribute
+				value after compression. Only present when
+				compressed. Always is a multiple of the
+				cluster size. Represents the actual amount of
+				disk space being used on the disk. */
+/* sizeof(compressed attr) = 72*/
+		} SN(anr) __attribute__ ((__packed__));
+	} SN(aua) __attribute__ ((__packed__));
+} __attribute__ ((__packed__)) ATTR_RECORD;
+#define _ARA(X)  SC(aua.ara,X)
+#define _ANR(X)  SC(aua.anr,X)
+typedef ATTR_RECORD ATTR_REC;
+/*
+ * File attribute flags (32-bit).
+ */
+typedef enum {
+	/*
+	 * These flags are only presnt in the STANDARD_INFORMATION attribute
+	 * (in the field file_attributes).
+	 */
+	FILE_ATTR_READONLY		= const_cpu_to_le32(0x00000001),
+	FILE_ATTR_HIDDEN		= const_cpu_to_le32(0x00000002),
+	FILE_ATTR_SYSTEM		= const_cpu_to_le32(0x00000004),
+	/* Old DOS volid. Unused in NT.	= cpu_to_le32(0x00000008), */
+	FILE_ATTR_DIRECTORY		= const_cpu_to_le32(0x00000010),
+	/* FILE_ATTR_DIRECTORY is not considered valid in NT. It is reserved
+	   for the DOS SUBDIRECTORY flag. */
+	FILE_ATTR_ARCHIVE		= const_cpu_to_le32(0x00000020),
+	FILE_ATTR_DEVICE		= const_cpu_to_le32(0x00000040),
+	FILE_ATTR_NORMAL		= const_cpu_to_le32(0x00000080),
+	FILE_ATTR_TEMPORARY		= const_cpu_to_le32(0x00000100),
+	FILE_ATTR_SPARSE_FILE		= const_cpu_to_le32(0x00000200),
+	FILE_ATTR_REPARSE_POINT		= const_cpu_to_le32(0x00000400),
+	FILE_ATTR_COMPRESSED		= const_cpu_to_le32(0x00000800),
+	FILE_ATTR_OFFLINE		= const_cpu_to_le32(0x00001000),
+	FILE_ATTR_NOT_CONTENT_INDEXED	= const_cpu_to_le32(0x00002000),
+	FILE_ATTR_ENCRYPTED		= const_cpu_to_le32(0x00004000),
+	FILE_ATTR_VALID_FLAGS		= const_cpu_to_le32(0x00007fb7),
+	/* FILE_ATTR_VALID_FLAGS masks out the old DOS VolId and the 
+	   FILE_ATTR_DEVICE and preserves everything else. This mask
+	   is used to obtain all flags that are valid for reading. */
+	FILE_ATTR_VALID_SET_FLAGS	= const_cpu_to_le32(0x000031a7),
+	/* FILE_ATTR_VALID_SET_FLAGS masks out the old DOS VolId, the
+	   F_A_DEVICE, F_A_DIRECTORY, F_A_SPARSE_FILE, F_A_REPARSE_POINT,
+	   F_A_COMPRESSED and F_A_ENCRYPTED and preserves the rest. This mask
+	   is used to to obtain all flags that are valid for setting. */
+	/*
+	 * These flags are only present in the FILE_NAME attribute (in the
+	 * field file_attributes).
+	 */
+	FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT	= const_cpu_to_le32(0x10000000),
+	/* This is a copy of the corresponding bit from the mft record, telling
+	   us whether this is a directory or not, i.e. whether it has an
+	   index root attribute or not. */
+	FILE_ATTR_DUP_VIEW_INDEX_PRESENT	= const_cpu_to_le32(0x20000000),
+	/* This is a copy of the corresponding bit from the mft record, telling
+	   us whether this file has a view index present (eg. object id index,
+	   quota index, one of the security indexes or the encrypting file
+	   system related indexes). */
+} FILE_ATTR_FLAGS;
+/*
+ * NOTE on times in NTFS: All times are in MS standard time format, i.e. they
+ * are the number of 100-nanosecond intervals since 1st January 1601, 00:00:00
+ * universal coordinated time (UTC). (In Linux time starts 1st January 1970,
+ * 00:00:00 UTC and is stored as the number of 1-second intervals since then.)
+ */
+/*
+ * Attribute: Standard information (0x10).
+ *
+ * NOTE: Always resident.
+ * NOTE: Present in all base file records on a volume.
+ * NOTE: There is conflicting information about the meaning of each of the time
+ * 	 fields but the meaning as defined below has been verified to be
+ * 	 correct by practical experimentation on Windows NT4 SP6a and is hence
+ * 	 assumed to be the one and only correct interpretation.
+ */
+typedef struct {
+/*Ofs*/
+/*  0*/	s64 creation_time;		/* Time file was created. Updated when
+					   a filename is changed(?). */
+/*  8*/	s64 last_data_change_time;	/* Time the data attribute was last
+					   modified. */
+/* 16*/	s64 last_mft_change_time;	/* Time this mft record was last
+					   modified. */
+/* 24*/	s64 last_access_time;		/* Approximate time when the file was
+					   last accessed (obviously this is not
+					   updated on read-only volumes). In
+					   Windows this is only updated when
+					   accessed if some time delta has
+					   passed since the last update. Also,
+					   last access times updates can be
+					   disabled altogether for speed. */
+/* 32*/	FILE_ATTR_FLAGS file_attributes; /* Flags describing the file. */
+/* 36*/	union {
+		/* NTFS 1.2 (and previous, presumably) */
+/* 36 */	u8 reserved12[12];	/* Reserved/alignment to 8-byte
+					   boundary. */
+/* sizeof() = 48 bytes */
+		/* NTFS 3.0 */
+		struct {
+/*
+ * If a volume has been upgraded from a previous NTFS version, then these
+ * fields are present only if the file has been accessed since the upgrade.
+ * Recognize the difference by comparing the length of the resident attribute
+ * value. If it is 48, then the following fields are missing. If it is 72 then
+ * the fields are present. Maybe just check like this:
+ * 	if (resident.ValueLength < sizeof(STANDARD_INFORMATION)) {
+ * 		Assume NTFS 1.2- format.
+ * 		If (volume version is 3.0+)
+ * 			Upgrade attribute to NTFS 3.0 format.
+ * 		else
+ * 			Use NTFS 1.2- format for access.
+ * 	} else
+ * 		Use NTFS 3.0 format for access.
+ * Only problem is that it might be legal to set the length of the value to
+ * arbitrarily large values thus spoiling this check. - But chkdsk probably
+ * views that as a corruption, assuming that it behaves like this for all
+ * attributes.
+ */
+		/* 36*/	u32 maximum_versions;	/* Maximum allowed versions for
+				file. Zero if version numbering is disabled. */
+		/* 40*/	u32 version_number;	/* This file's version (if any).
+				Set to zero if maximum_versions is zero. */
+		/* 44*/	u32 class_id;		/* Class id from bidirectional
+				class id index (?). */
+		/* 48*/	u32 owner_id;		/* Owner_id of the user owning
+				the file. Translate via $Q index in FILE_Extend
+				/$Quota to the quota control entry for the user
+				owning the file. Zero if quotas are disabled. */
+		/* 52*/	u32 security_id;	/* Security_id for the file.
+				Translate via $SII index and $SDS data stream
+				in FILE_Secure to the security descriptor. */
+		/* 56*/	u64 quota_charged;	/* Byte size of the charge to
+				the quota for all streams of the file. Note: Is
+				zero if quotas are disabled. */
+		/* 64*/	u64 usn;		/* Last update sequence number
+				of the file. This is a direct index into the
+				change (aka usn) journal file. It is zero if
+				the usn journal is disabled.
+				NOTE: To disable the journal need to delete
+				the journal file itself and to then walk the
+				whole mft and set all Usn entries in all mft
+				records to zero! (This can take a while!)
+				The journal is FILE_Extend/$UsnJrnl. Win2k
+				will recreate the journal and initiate
+				logging if necessary when mounting the
+				partition. This, in contrast to disabling the
+				journal is a very fast process, so the user
+				won't even notice it. */
+		} SN(svs);
+	} SN(sei);
+/* sizeof() = 72 bytes (NTFS 3.0) */
+} __attribute__ ((__packed__)) STANDARD_INFORMATION;
+#define _SVS(X)  SC(sei.svs,X)
+/*
+ * Attribute: Attribute list (0x20).
+ *
+ * - Can be either resident or non-resident.
+ * - Value consists of a sequence of variable length, 8-byte aligned,
+ * ATTR_LIST_ENTRY records.
+ * - The list is not terminated by anything at all! The only way to know when
+ * the end is reached is to keep track of the current offset and compare it to
+ * the attribute value size.
+ * - The attribute list attribute contains one entry for each attribute of
+ * the file in which the list is located, except for the list attribute
+ * itself. The list is sorted: first by attribute type, second by attribute
+ * name (if present), third by instance number. The extents of one
+ * non-resident attribute (if present) immediately follow after the initial
+ * extent. They are ordered by lowest_vcn and have their instace set to zero. 
+ * It is not allowed to have two attributes with all sorting keys equal.
+ * - Further restrictions: 
+ * 	- If not resident, the vcn to lcn mapping array has to fit inside the
+ * 	  base mft record.
+ * 	- The attribute list attribute value has a maximum size of 256kb. This
+ * 	  is imposed by the Windows cache manager.
+ * - Attribute lists are only used when the attributes of mft record do not
+ * fit inside the mft record despite all attributes (that can be made
+ * non-resident) having been made non-resident. This can happen e.g. when:
+ *  	- File has a large number of hard links (lots of file name
+ *  	  attributes present).
+ *  	- The mapping pairs array of some non-resident attribute becomes so
+ *	  large due to fragmentation that it overflows the mft record.
+ *  	- The security descriptor is very complex (not applicable to
+ *  	  NTFS 3.0 volumes).
+ *  	- There are many named streams.
+ */
+typedef struct {
+/*Ofs*/
+/*  0*/	ATTR_TYPES type;	/* Type of referenced attribute. */
+/*  4*/	u16 length;		/* Byte size of this entry (8-byte aligned). */
+/*  6*/	u8 name_length;		/* Size in Unicode chars of the name of the
+				   attribute or 0 if unnamed. */
+/*  7*/	u8 name_offset;		/* Byte offset to beginning of attribute name
+				   (always set this to where the name would
+				   start even if unnamed). */
+/*  8*/	VCN lowest_vcn;		/* Lowest virtual cluster number of this portion
+				   of the attribute value. This is usually 0. It
+				   is non-zero for the case where one attribute
+				   does not fit into one mft record and thus
+				   several mft records are allocated to hold
+				   this attribute. In the latter case, each mft
+				   record holds one extent of the attribute and
+				   there is one attribute list entry for each
+				   extent. NOTE: This is DEFINITELY a signed
+				   value! The windows driver uses cmp, followed
+				   by jg when comparing this, thus it treats it
+				   as signed. */
+/* 16*/	MFT_REF mft_reference;	/* The reference of the mft record holding
+				   the ATTR_RECORD for this portion of the
+				   attribute value. */
+/* 24*/	u16 instance;		/* If lowest_vcn = 0, the instance of the
+				   attribute being referenced; otherwise 0. */
+/* 26*/	uchar_t name[0];	/* Use when creating only. When reading use
+				   name_offset to determine the location of the
+				   name. */
+/* sizeof() = 26 + (attribute_name_length * 2) bytes */
+} __attribute__ ((__packed__)) ATTR_LIST_ENTRY;
+/*
+ * The maximum allowed length for a file name.
+ */
+#define MAXIMUM_FILE_NAME_LENGTH	255
+/*
+ * Possible namespaces for filenames in ntfs (8-bit).
+ */
+typedef enum {
+	FILE_NAME_POSIX			= 0x00,
+		/* This is the largest namespace. It is case sensitive and 
+		   allows all Unicode characters except for: '\0' and '/'.
+		   Beware that in WinNT/2k files which eg have the same name
+		   except for their case will not be distinguished by the
+		   standard utilities and thus a "del filename" will delete
+		   both "filename" and "fileName" without warning. */
+	FILE_NAME_WIN32			= 0x01,
+		/* The standard WinNT/2k NTFS long filenames. Case insensitive.
+		   All Unicode chars except: '\0', '"', '*', '/', ':', '<', 
+		   '>', '?', '\' and '|'. Further, names cannot end with a '.'
+		   or a space. */
+	FILE_NAME_DOS			= 0x02,
+		/* The standard DOS filenames (8.3 format). Uppercase only.
+		   All 8-bit characters greater space, except: '"', '*', '+',
+		   ',', '/', ':', ';', '<', '=', '>', '?' and '\'. */
+	FILE_NAME_WIN32_AND_DOS		= 0x03, 
+		/* 3 means that both the Win32 and the DOS filenames are
+		   identical and hence have been saved in this single filename
+		   record. */
+} __attribute__ ((__packed__)) FILE_NAME_TYPE_FLAGS;
+/*
+ * Attribute: Filename (0x30).
+ *
+ * NOTE: Always resident.
+ * NOTE: All fields, except the parent_directory, are only updated when the
+ *	 filename is changed. Until then, they just become out of sync with
+ *	 reality and the more up to date values are present in the standard
+ *	 information attribute.
+ * NOTE: There is conflicting information about the meaning of each of the time
+ * 	 fields but the meaning as defined below has been verified to be
+ * 	 correct by practical experimentation on Windows NT4 SP6a and is hence
+ * 	 assumed to be the one and only correct interpretation.
+ */
+typedef struct {
+/*hex ofs*/
+/*  0*/	MFT_REF parent_directory;	/* Directory this filename is
+					   referenced from. */
+/*  8*/	s64 creation_time;		/* Time file was created. */
+/* 10*/	s64 last_data_change_time;	/* Time the data attribute was last
+					   modified. */
+/* 18*/	s64 last_mft_change_time;	/* Time this mft record was last
+					   modified. */
+/* 20*/	s64 last_access_time;		/* Last time this mft record was
+					   accessed. */
+/* 28*/	s64 allocated_size;		/* Byte size of allocated space for the
+					   data attribute. NOTE: Is a multiple
+					   of the cluster size. */
+/* 30*/	s64 data_size;			/* Byte size of actual data in data
+					   attribute. NOTE: Only present when
+					   lowest_vcn is 0. */
+/* 38*/	FILE_ATTR_FLAGS file_attributes;	/* Flags describing the file. */
+/* 3c*/	union {
+	/* 3c*/	struct {
+		/* 3c*/	u16 packed_ea_size;	/* Size of the buffer needed to
+						   pack the extended attributes
+						   (EAs), if such are present.*/
+		/* 3e*/	u16 reserved;		/* Reserved for alignment. */
+		} SN(fea) __attribute__ ((__packed__));
+	/* 3c*/	u32 reparse_point_tag;		/* Type of reparse point,
+						   present only in reparse
+						   points and only if there are
+						   no EAs. */
+	} SN(fer) __attribute__ ((__packed__));
+/* 40*/	u8 file_name_length;			/* Length of file name in
+						   (Unicode) characters. */
+/* 41*/	FILE_NAME_TYPE_FLAGS file_name_type;	/* Namespace of the file name.*/
+/* 42*/	uchar_t file_name[0];			/* File name in Unicode. */
+} __attribute__ ((__packed__)) FILE_NAME_ATTR;
+#define _FEA(X)  SC(fer.fea,X)
+#define _FER(X)  SC(fer,X)
+/*
+ * GUID structures store globally unique identifiers (GUID). A GUID is a 
+ * 128-bit value consisting of one group of eight hexadecimal digits, followed
+ * by three groups of four hexadecimal digits each, followed by one group of
+ * twelve hexadecimal digits. GUIDs are Microsoft's implementation of the
+ * distributed computing environment (DCE) universally unique identifier (UUID).
+ * Example of a GUID:
+ * 	1F010768-5A73-BC91-0010A52216A7
+ */
+typedef struct {
+	u32 data1;	/* The first eight hexadecimal digits of the GUID. */
+	u16 data2;	/* The first group of four hexadecimal digits. */
+	u16 data3;	/* The second group of four hexadecimal digits. */
+	u8 data4[8];	/* The first two bytes are the third group of four
+			   hexadecimal digits. The remaining six bytes are the
+			   final 12 hexadecimal digits. */
+} __attribute__ ((__packed__)) GUID;
+/*
+ * FILE_Extend/$ObjId contains an index named $O. This index contains all
+ * object_ids present on the volume as the index keys and the corresponding
+ * mft_record numbers as the index entry data parts. The data part (defined
+ * below) also contains three other object_ids:
+ *	birth_volume_id - object_id of FILE_Volume on which the file was first
+ *			  created. Optional (i.e. can be zero).
+ *	birth_object_id - object_id of file when it was first created. Usually
+ *			  equals the object_id. Optional (i.e. can be zero).
+ *	domain_id	- Reserved (always zero).
+ */
+typedef struct {
+	MFT_REF mft_reference;	/* Mft record containing the object_id in
+				   the index entry key. */
+	union {
+		struct {
+			GUID birth_volume_id;
+			GUID birth_object_id;
+			GUID domain_id;
+		} SN(obv) __attribute__ ((__packed__));
+		u8 extended_info[48];
+	} SN(oei) __attribute__ ((__packed__));
+} __attribute__ ((__packed__)) OBJ_ID_INDEX_DATA;
+/*
+ * Attribute: Object id (NTFS 3.0+) (0x40).
+ *
+ * NOTE: Always resident.
+ */
+typedef struct {
+	GUID object_id;				/* Unique id assigned to the
+						   file.*/
+	/* The following fields are optional. The attribute value size is 16
+	   bytes, i.e. sizeof(GUID), if these are not present at all. Note,
+	   the entries can be present but one or more (or all) can be zero
+	   meaning that that particular value(s) is(are) not defined. */
+	union {
+		struct {
+			GUID birth_volume_id;	/* Unique id of volume on which
+						   the file was first created.*/
+			GUID birth_object_id;	/* Unique id of file when it was
+						   first created. */
+			GUID domain_id;		/* Reserved, zero. */
+		} SN(obv) __attribute__ ((__packed__));
+		u8 extended_info[48];
+	} SN(oei) __attribute__ ((__packed__));
+} __attribute__ ((__packed__)) OBJECT_ID_ATTR;
+#define _OBV(X)  SC(oei.obv,X)
+/*
+ * The pre-defined IDENTIFIER_AUTHORITIES used as SID_IDENTIFIER_AUTHORITY in
+ * the SID structure (see below).
+ */
+//typedef enum {					/* SID string prefix. */
+//	SECURITY_NULL_SID_AUTHORITY	= {0, 0, 0, 0, 0, 0},	/* S-1-0 */
+//	SECURITY_WORLD_SID_AUTHORITY	= {0, 0, 0, 0, 0, 1},	/* S-1-1 */
+//	SECURITY_LOCAL_SID_AUTHORITY 	= {0, 0, 0, 0, 0, 2},	/* S-1-2 */
+//	SECURITY_CREATOR_SID_AUTHORITY	= {0, 0, 0, 0, 0, 3},	/* S-1-3 */
+//	SECURITY_NON_UNIQUE_AUTHORITY	= {0, 0, 0, 0, 0, 4},	/* S-1-4 */
+//	SECURITY_NT_SID_AUTHORITY	= {0, 0, 0, 0, 0, 5},	/* S-1-5 */
+//} IDENTIFIER_AUTHORITIES;
+/*
+ * These relative identifiers (RIDs) are used with the above identifier
+ * authorities to make up universal well-known SIDs.
+ * 	
+ * Note: The relative identifier (RID) refers to the portion of a SID, which
+ * identifies a user or group in relation to the authority that issued the SID.
+ * For example, the universal well-known SID Creator Owner ID (S-1-3-0) is
+ * made up of the identifier authority SECURITY_CREATOR_SID_AUTHORITY (3) and
+ * the relative identifier SECURITY_CREATOR_OWNER_RID (0).
+ */
+typedef enum {					/* Identifier authority. */
+	SECURITY_NULL_RID		  = 0,	/* S-1-0 */
+	SECURITY_WORLD_RID		  = 0,	/* S-1-1 */
+	SECURITY_LOCAL_RID		  = 0,	/* S-1-2 */
+	SECURITY_CREATOR_OWNER_RID	  = 0,	/* S-1-3 */
+	SECURITY_CREATOR_GROUP_RID	  = 1,	/* S-1-3 */
+	SECURITY_CREATOR_OWNER_SERVER_RID = 2,	/* S-1-3 */
+	SECURITY_CREATOR_GROUP_SERVER_RID = 3,	/* S-1-3 */
+	SECURITY_DIALUP_RID		  = 1,
+	SECURITY_NETWORK_RID		  = 2,
+	SECURITY_BATCH_RID		  = 3,
+	SECURITY_INTERACTIVE_RID	  = 4,
+	SECURITY_SERVICE_RID		  = 6,
+	SECURITY_ANONYMOUS_LOGON_RID	  = 7,
+	SECURITY_PROXY_RID		  = 8,
+	SECURITY_ENTERPRISE_CONTROLLERS_RID=9,
+	SECURITY_SERVER_LOGON_RID	  = 9,
+	SECURITY_PRINCIPAL_SELF_RID	  = 0xa,
+	SECURITY_AUTHENTICATED_USER_RID	  = 0xb,
+	SECURITY_RESTRICTED_CODE_RID	  = 0xc,
+	SECURITY_TERMINAL_SERVER_RID	  = 0xd,
+	SECURITY_LOGON_IDS_RID		  = 5,
+	SECURITY_LOGON_IDS_RID_COUNT	  = 3,
+	SECURITY_LOCAL_SYSTEM_RID	  = 0x12,
+	SECURITY_NT_NON_UNIQUE		  = 0x15,
+	SECURITY_BUILTIN_DOMAIN_RID	  = 0x20,
+	/*
+	 * Well-known domain relative sub-authority values (RIDs).
+	 */
+	/* Users. */
+	DOMAIN_USER_RID_ADMIN		  = 0x1f4,
+	DOMAIN_USER_RID_GUEST		  = 0x1f5,
+	DOMAIN_USER_RID_KRBTGT		  = 0x1f6,
+	/* Groups. */
+	DOMAIN_GROUP_RID_ADMINS		  = 0x200,
+	DOMAIN_GROUP_RID_USERS		  = 0x201,
+	DOMAIN_GROUP_RID_GUESTS		  = 0x202,
+	DOMAIN_GROUP_RID_COMPUTERS	  = 0x203,
+	DOMAIN_GROUP_RID_CONTROLLERS	  = 0x204,
+	DOMAIN_GROUP_RID_CERT_ADMINS	  = 0x205,
+	DOMAIN_GROUP_RID_SCHEMA_ADMINS	  = 0x206,
+	DOMAIN_GROUP_RID_ENTERPRISE_ADMINS= 0x207,
+	DOMAIN_GROUP_RID_POLICY_ADMINS	  = 0x208,
+	/* Aliases. */
+	DOMAIN_ALIAS_RID_ADMINS		  = 0x220,
+	DOMAIN_ALIAS_RID_USERS		  = 0x221,
+	DOMAIN_ALIAS_RID_GUESTS		  = 0x222,
+	DOMAIN_ALIAS_RID_POWER_USERS	  = 0x223,
+	DOMAIN_ALIAS_RID_ACCOUNT_OPS	  = 0x224,
+	DOMAIN_ALIAS_RID_SYSTEM_OPS	  = 0x225,
+	DOMAIN_ALIAS_RID_PRINT_OPS	  = 0x226,
+	DOMAIN_ALIAS_RID_BACKUP_OPS	  = 0x227,
+	DOMAIN_ALIAS_RID_REPLICATOR	  = 0x228,
+	DOMAIN_ALIAS_RID_RAS_SERVERS	  = 0x229,
+	DOMAIN_ALIAS_RID_PREW2KCOMPACCESS = 0x22a,
+} RELATIVE_IDENTIFIERS;
+/*
+ * The universal well-known SIDs:
+ *
+ * 	NULL_SID			S-1-0-0
+ * 	WORLD_SID			S-1-1-0
+ * 	LOCAL_SID			S-1-2-0
+ * 	CREATOR_OWNER_SID		S-1-3-0
+ * 	CREATOR_GROUP_SID		S-1-3-1
+ * 	CREATOR_OWNER_SERVER_SID	S-1-3-2
+ * 	CREATOR_GROUP_SERVER_SID	S-1-3-3
+ *
+ * 	(Non-unique IDs)		S-1-4
+ *
+ * NT well-known SIDs:
+ * 
+ * 	NT_AUTHORITY_SID	S-1-5
+ * 	DIALUP_SID		S-1-5-1
+ *
+ * 	NETWORD_SID		S-1-5-2
+ * 	BATCH_SID		S-1-5-3
+ * 	INTERACTIVE_SID		S-1-5-4
+ * 	SERVICE_SID		S-1-5-6
+ * 	ANONYMOUS_LOGON_SID	S-1-5-7		(aka null logon session)
+ * 	PROXY_SID		S-1-5-8
+ * 	SERVER_LOGON_SID	S-1-5-9		(aka domain controller account)
+ * 	SELF_SID		S-1-5-10	(self RID)
+ * 	AUTHENTICATED_USER_SID	S-1-5-11
+ * 	RESTRICTED_CODE_SID	S-1-5-12	(running restricted code)
+ * 	TERMINAL_SERVER_SID	S-1-5-13	(running on terminal server)
+ *
+ * 	(Logon IDs)		S-1-5-5-X-Y
+ *
+ * 	(NT non-unique IDs)	S-1-5-0x15-...
+ *
+ * 	(Built-in domain)	S-1-5-0x20
+ */
+/*
+ * The SID_IDENTIFIER_AUTHORITY is a 48-bit value used in the SID structure.
+ */
+typedef union {
+	struct {
+		u32 low_part;         /* Low 32-bits. */
+		u16 high_part;        /* High 16-bits. */
+	} SN(sia) __attribute__ ((__packed__));
+	u8 value[6];			/* Value as individual bytes. */
+} __attribute__ ((__packed__)) SID_IDENTIFIER_AUTHORITY;
+#define _SIA(X)  SC(sia,X)
+/*
+ * The SID structure is a variable-length structure used to uniquely identify
+ * users or groups. SID stands for security identifier.
+ * 
+ * The standard textual representation of the SID is of the form:
+ * 	S-R-I-S-S...
+ * Where:
+ *    - The first "S" is the literal character 'S' identifying the following
+ * 	digits as a SID.
+ *    - R is the revision level of the SID expressed as a sequence of digits
+ *	either in decimal or hexadecimal (if the later, prefixed by "0x").
+ *    - I is the 48-bit identifier_authority, expressed as digits as R above.
+ *    - S... is one or more sub_authority values, expressed as digits as above.
+ *    
+ * Example SID; the domain-relative SID of the local Administrators group on
+ * Windows NT/2k:
+ * 	S-1-5-32-544
+ * This translates to a SID with:
+ * 	revision = 1,
+ * 	sub_authority_count = 2,
+ * 	identifier_authority = {0,0,0,0,0,5},	// SECURITY_NT_AUTHORITY
+ * 	sub_authority[0] = 32,			// SECURITY_BUILTIN_DOMAIN_RID
+ * 	sub_authority[1] = 544			// DOMAIN_ALIAS_RID_ADMINS
+ */
+typedef struct {
+	u8 revision;
+	u8 sub_authority_count;
+	SID_IDENTIFIER_AUTHORITY identifier_authority;
+	u32 sub_authority[1];		/* At least one sub_authority. */
+} __attribute__ ((__packed__)) SID;
+/*
+ * Current constants for SIDs.
+ */
+typedef enum {
+	SID_REVISION			=  1,	/* Current revision level. */
+	SID_MAX_SUB_AUTHORITIES		= 15,	/* Maximum number of those. */
+	SID_RECOMMENDED_SUB_AUTHORITIES	=  1,	/* Will change to around 6 in
+						   a future revision. */
+} SID_CONSTANTS;
+/*
+ * The predefined ACE types (8-bit, see below).
+ */
+typedef enum {
+	ACCESS_MIN_MS_ACE_TYPE		= 0,
+	ACCESS_ALLOWED_ACE_TYPE		= 0,
+	ACCESS_DENIED_ACE_TYPE		= 1,
+	SYSTEM_AUDIT_ACE_TYPE		= 2,
+	SYSTEM_ALARM_ACE_TYPE		= 3, /* Not implemented as of Win2k. */
+	ACCESS_MAX_MS_V2_ACE_TYPE	= 3,
+	ACCESS_ALLOWED_COMPOUND_ACE_TYPE= 4,
+	ACCESS_MAX_MS_V3_ACE_TYPE	= 4,
+	/* The following are Win2k only. */
+	ACCESS_MIN_MS_OBJECT_ACE_TYPE	= 5,
+	ACCESS_ALLOWED_OBJECT_ACE_TYPE	= 5,
+	ACCESS_DENIED_OBJECT_ACE_TYPE	= 6,
+	SYSTEM_AUDIT_OBJECT_ACE_TYPE	= 7,
+	SYSTEM_ALARM_OBJECT_ACE_TYPE	= 8,
+	ACCESS_MAX_MS_OBJECT_ACE_TYPE	= 8,
+	ACCESS_MAX_MS_V4_ACE_TYPE	= 8,
+	/* This one is for WinNT&2k. */
+	ACCESS_MAX_MS_ACE_TYPE		= 8,
+} __attribute__ ((__packed__)) ACE_TYPES;
+/*
+ * The ACE flags (8-bit) for audit and inheritance (see below).
+ *
+ * SUCCESSFUL_ACCESS_ACE_FLAG is only used with system audit and alarm ACE
+ * types to indicate that a message is generated (in Windows!) for successful
+ * accesses.
+ *
+ * FAILED_ACCESS_ACE_FLAG is only used with system audit and alarm ACE types
+ * to indicate that a message is generated (in Windows!) for failed accesses.
+ */
+typedef enum {
+	/* The inheritance flags. */
+	OBJECT_INHERIT_ACE		= 0x01,
+	CONTAINER_INHERIT_ACE		= 0x02,
+	NO_PROPAGATE_INHERIT_ACE	= 0x04,
+	INHERIT_ONLY_ACE		= 0x08,
+	INHERITED_ACE			= 0x10,	/* Win2k only. */
+	VALID_INHERIT_FLAGS		= 0x1f,
+	/* The audit flags. */
+	SUCCESSFUL_ACCESS_ACE_FLAG	= 0x40,
+	FAILED_ACCESS_ACE_FLAG		= 0x80,
+} __attribute__ ((__packed__)) ACE_FLAGS;
+/*
+ * An ACE is an access-control entry in an access-control list (ACL).
+ * An ACE defines access to an object for a specific user or group or defines
+ * the types of access that generate system-administration messages or alarms
+ * for a specific user or group. The user or group is identified by a security
+ * identifier (SID).
+ *
+ * Each ACE starts with an ACE_HEADER structure (aligned on 4-byte boundary),
+ * which specifies the type and size of the ACE. The format of the subsequent
+ * data depends on the ACE type.
+ */
+typedef struct {
+	ACE_TYPES type;		/* Type of the ACE. */
+	ACE_FLAGS flags;	/* Flags describing the ACE. */
+	u16 size;		/* Size in bytes of the ACE. */
+} __attribute__ ((__packed__)) ACE_HEADER;
+/*
+ * The access mask (32-bit). Defines the access rights.
+ */
+typedef enum {
+	/*
+	 * The specific rights (bits 0 to 15). Depend on the type of the
+	 * object being secured by the ACE.
+	 */
+	/* Specific rights for files and directories are as follows: */
+	/* Right to read data from the file. (FILE) */
+	FILE_READ_DATA			= const_cpu_to_le32(0x00000001),
+	/* Right to list contents of a directory. (DIRECTORY) */
+	FILE_LIST_DIRECTORY		= const_cpu_to_le32(0x00000001),
+	/* Right to write data to the file. (FILE) */
+	FILE_WRITE_DATA			= const_cpu_to_le32(0x00000002),
+	/* Right to create a file in the directory. (DIRECTORY) */
+	FILE_ADD_FILE			= const_cpu_to_le32(0x00000002),
+	/* Right to append data to the file. (FILE) */
+	FILE_APPEND_DATA		= const_cpu_to_le32(0x00000004),
+	/* Right to create a subdirectory. (DIRECTORY) */
+	FILE_ADD_SUBDIRECTORY		= const_cpu_to_le32(0x00000004),
+	/* Right to read extended attributes. (FILE/DIRECTORY) */
+	FILE_READ_EA			= const_cpu_to_le32(0x00000008),
+	/* Right to write extended attributes. (FILE/DIRECTORY) */
+	FILE_WRITE_EA			= const_cpu_to_le32(0x00000010),
+	/* Right to execute a file. (FILE) */
+	FILE_EXECUTE			= const_cpu_to_le32(0x00000020),
+	/* Right to traverse the directory. (DIRECTORY) */
+	FILE_TRAVERSE			= const_cpu_to_le32(0x00000020),
+	/*
+	 * Right to delete a directory and all the files it contains (its
+	 * children), even if the files are read-only. (DIRECTORY)
+	 */
+	FILE_DELETE_CHILD		= const_cpu_to_le32(0x00000040),
+	/* Right to read file attributes. (FILE/DIRECTORY) */
+	FILE_READ_ATTRIBUTES		= const_cpu_to_le32(0x00000080),
+	/* Right to change file attributes. (FILE/DIRECTORY) */
+	FILE_WRITE_ATTRIBUTES		= const_cpu_to_le32(0x00000100),
+	/*
+	 * The standard rights (bits 16 to 23). Are independent of the type of
+	 * object being secured.
+	 */
+	/* Right to delete the object. */
+	DELETE				= const_cpu_to_le32(0x00010000),
+	/*
+	 * Right to read the information in the object's security descriptor,
+	 * not including the information in the SACL. I.e. right to read the
+	 * security descriptor and owner.
+	 */
+	READ_CONTROL			= const_cpu_to_le32(0x00020000),
+	/* Right to modify the DACL in the object's security descriptor. */
+	WRITE_DAC			= const_cpu_to_le32(0x00040000),
+	/* Right to change the owner in the object's security descriptor. */
+	WRITE_OWNER			= const_cpu_to_le32(0x00080000),
+	/*
+	 * Right to use the object for synchronization. Enables a process to
+	 * wait until the object is in the signalled state. Some object types
+	 * do not support this access right.
+	 */
+	SYNCHRONIZE			= const_cpu_to_le32(0x00100000),
+	/*
+	 * The following STANDARD_RIGHTS_* are combinations of the above for
+	 * convenience and are defined by the Win32 API.
+	 */
+	/* These are currently defined to READ_CONTROL. */
+	STANDARD_RIGHTS_READ		= const_cpu_to_le32(0x00020000),
+	STANDARD_RIGHTS_WRITE		= const_cpu_to_le32(0x00020000),
+	STANDARD_RIGHTS_EXECUTE		= const_cpu_to_le32(0x00020000),
+	/* Combines DELETE, READ_CONTROL, WRITE_DAC, and WRITE_OWNER access. */
+	STANDARD_RIGHTS_REQUIRED	= const_cpu_to_le32(0x000f0000),
+	/*
+	 * Combines DELETE, READ_CONTROL, WRITE_DAC, WRITE_OWNER, and
+	 * SYNCHRONIZE access.
+	 */
+	STANDARD_RIGHTS_ALL		= const_cpu_to_le32(0x001f0000),
+	/*
+	 * The access system ACL and maximum allowed access types (bits 24 to
+	 * 25, bits 26 to 27 are reserved).
+	 */
+	ACCESS_SYSTEM_SECURITY		= const_cpu_to_le32(0x01000000),
+	MAXIMUM_ALLOWED			= const_cpu_to_le32(0x02000000),
+	/*
+	 * The generic rights (bits 28 to 31). These map onto the standard and
+	 * specific rights.
+	 */
+	/* Read, write, and execute access. */
+	GENERIC_ALL			= const_cpu_to_le32(0x10000000),
+	/* Execute access. */
+	GENERIC_EXECUTE			= const_cpu_to_le32(0x20000000),
+	/*
+	 * Write access. For files, this maps onto:
+	 *	FILE_APPEND_DATA | FILE_WRITE_ATTRIBUTES | FILE_WRITE_DATA |
+	 *	FILE_WRITE_EA | STANDARD_RIGHTS_WRITE | SYNCHRONIZE
+	 * For directories, the mapping has the same numberical value. See
+	 * above for the descriptions of the rights granted.
+	 */
+	GENERIC_WRITE			= const_cpu_to_le32(0x40000000),
+	/*
+	 * Read access. For files, this maps onto:
+	 *	FILE_READ_ATTRIBUTES | FILE_READ_DATA | FILE_READ_EA |
+	 *	STANDARD_RIGHTS_READ | SYNCHRONIZE
+	 * For directories, the mapping has the same numberical value. See
+	 * above for the descriptions of the rights granted.
+	 */
+	GENERIC_READ			= const_cpu_to_le32(0x80000000),
+} ACCESS_MASK;
+/*
+ * The generic mapping array. Used to denote the mapping of each generic
+ * access right to a specific access mask.
+ * 
+ * FIXME: What exactly is this and what is it for? (AIA)
+ */
+typedef struct {
+	ACCESS_MASK generic_read;
+	ACCESS_MASK generic_write;
+	ACCESS_MASK generic_execute;
+	ACCESS_MASK generic_all;
+} __attribute__ ((__packed__)) GENERIC_MAPPING;
+/*
+ * The predefined ACE type structures are as defined below.
+ */
+/*
+ * ACCESS_ALLOWED_ACE, ACCESS_DENIED_ACE, SYSTEM_AUDIT_ACE, SYSTEM_ALARM_ACE
+ */
+typedef struct {
+	ACE_HEADER SN(aah);		/* The ACE header. */
+	ACCESS_MASK mask;	/* Access mask associated with the ACE. */
+	SID sid;		/* The SID associated with the ACE. */
+} __attribute__ ((__packed__)) ACCESS_ALLOWED_ACE, ACCESS_DENIED_ACE,
+			       SYSTEM_AUDIT_ACE, SYSTEM_ALARM_ACE;
+#define _AAH(X)  SC(aah,X)
+/*
+ * The object ACE flags (32-bit).
+ */
+typedef enum {
+	ACE_OBJECT_TYPE_PRESENT			= const_cpu_to_le32(1),
+	ACE_INHERITED_OBJECT_TYPE_PRESENT	= const_cpu_to_le32(2),
+} OBJECT_ACE_FLAGS;
+typedef struct {
+	ACE_HEADER SN(aah);	/* The ACE_HEADER. */
+	ACCESS_MASK mask;	/* Access mask associated with the ACE. */
+	OBJECT_ACE_FLAGS flags;	/* Flags describing the object ACE. */
+	GUID object_type;
+	GUID inherited_object_type;
+	SID sid;		/* The SID associated with the ACE. */
+} __attribute__ ((__packed__)) ACCESS_ALLOWED_OBJECT_ACE,
+			       ACCESS_DENIED_OBJECT_ACE,
+			       SYSTEM_AUDIT_OBJECT_ACE,
+			       SYSTEM_ALARM_OBJECT_ACE;
+/*
+ * An ACL is an access-control list (ACL).
+ * An ACL starts with an ACL header structure, which specifies the size of
+ * the ACL and the number of ACEs it contains. The ACL header is followed by
+ * zero or more access control entries (ACEs). The ACL as well as each ACE
+ * are aligned on 4-byte boundaries.
+ */
+typedef struct {
+	u8 revision;	/* Revision of this ACL. */
+	u8 alignment1;
+	u16 size;	/* Allocated space in bytes for ACL. Includes this
+			   header, the ACEs and the remaining free space. */
+	u16 ace_count;	/* Number of ACEs in the ACL. */
+	u16 alignment2;
+/* sizeof() = 8 bytes */
+} __attribute__ ((__packed__)) ACL;
+/*
+ * Current constants for ACLs.
+ */
+typedef enum {
+	/* Current revision. */
+	ACL_REVISION		= 2,
+	ACL_REVISION_DS		= 4,
+	/* History of revisions. */
+	ACL_REVISION1		= 1,
+	MIN_ACL_REVISION	= 2,
+	ACL_REVISION2		= 2,
+	ACL_REVISION3		= 3,
+	ACL_REVISION4		= 4,
+	MAX_ACL_REVISION	= 4,
+} ACL_CONSTANTS;
+/*
+ * The security descriptor control flags (16-bit).
+ *
+ * SE_OWNER_DEFAULTED - This boolean flag, when set, indicates that the
+ *          SID pointed to by the Owner field was provided by a
+ *          defaulting mechanism rather than explicitly provided by the
+ *          original provider of the security descriptor.  This may
+ *          affect the treatment of the SID with respect to inheritence
+ *          of an owner.
+ *
+ * SE_GROUP_DEFAULTED - This boolean flag, when set, indicates that the
+ *          SID in the Group field was provided by a defaulting mechanism
+ *          rather than explicitly provided by the original provider of
+ *          the security descriptor.  This may affect the treatment of
+ *          the SID with respect to inheritence of a primary group.
+ *
+ * SE_DACL_PRESENT - This boolean flag, when set, indicates that the
+ *          security descriptor contains a discretionary ACL.  If this
+ *          flag is set and the Dacl field of the SECURITY_DESCRIPTOR is
+ *          null, then a null ACL is explicitly being specified.
+ *
+ * SE_DACL_DEFAULTED - This boolean flag, when set, indicates that the
+ *          ACL pointed to by the Dacl field was provided by a defaulting
+ *          mechanism rather than explicitly provided by the original
+ *          provider of the security descriptor.  This may affect the
+ *          treatment of the ACL with respect to inheritence of an ACL.
+ *          This flag is ignored if the DaclPresent flag is not set.
+ *
+ * SE_SACL_PRESENT - This boolean flag, when set,  indicates that the
+ *          security descriptor contains a system ACL pointed to by the
+ *          Sacl field.  If this flag is set and the Sacl field of the
+ *          SECURITY_DESCRIPTOR is null, then an empty (but present)
+ *          ACL is being specified.
+ *
+ * SE_SACL_DEFAULTED - This boolean flag, when set, indicates that the
+ *          ACL pointed to by the Sacl field was provided by a defaulting
+ *          mechanism rather than explicitly provided by the original
+ *          provider of the security descriptor.  This may affect the
+ *          treatment of the ACL with respect to inheritence of an ACL.
+ *          This flag is ignored if the SaclPresent flag is not set.
+ *
+ * SE_SELF_RELATIVE - This boolean flag, when set, indicates that the
+ *          security descriptor is in self-relative form.  In this form,
+ *          all fields of the security descriptor are contiguous in memory
+ *          and all pointer fields are expressed as offsets from the
+ *          beginning of the security descriptor.
+ */
+typedef enum {
+	SE_OWNER_DEFAULTED		= const_cpu_to_le16(0x0001),
+	SE_GROUP_DEFAULTED		= const_cpu_to_le16(0x0002),
+	SE_DACL_PRESENT			= const_cpu_to_le16(0x0004),
+	SE_DACL_DEFAULTED		= const_cpu_to_le16(0x0008),
+	SE_SACL_PRESENT			= const_cpu_to_le16(0x0010),
+	SE_SACL_DEFAULTED		= const_cpu_to_le16(0x0020),
+	SE_DACL_AUTO_INHERIT_REQ	= const_cpu_to_le16(0x0100),
+	SE_SACL_AUTO_INHERIT_REQ	= const_cpu_to_le16(0x0200),
+	SE_DACL_AUTO_INHERITED		= const_cpu_to_le16(0x0400),
+	SE_SACL_AUTO_INHERITED		= const_cpu_to_le16(0x0800),
+	SE_DACL_PROTECTED		= const_cpu_to_le16(0x1000),
+	SE_SACL_PROTECTED		= const_cpu_to_le16(0x2000),
+	SE_RM_CONTROL_VALID		= const_cpu_to_le16(0x4000),
+	SE_SELF_RELATIVE		= const_cpu_to_le16(0x8000),
+} __attribute__ ((__packed__)) SECURITY_DESCRIPTOR_CONTROL;
+/*
+ * Self-relative security descriptor. Contains the owner and group SIDs as well
+ * as the sacl and dacl ACLs inside the security descriptor itself.
+ */
+typedef struct {
+	u8 revision;	/* Revision level of the security descriptor. */
+	u8 alignment;
+	SECURITY_DESCRIPTOR_CONTROL control; /* Flags qualifying the type of
+			   the descriptor as well as the following fields. */
+	u32 owner;	/* Byte offset to a SID representing an object's
+			   owner. If this is NULL, no owner SID is present in
+			   the descriptor. */
+	u32 group;	/* Byte offset to a SID representing an object's
+			   primary group. If this is NULL, no primary group
+			   SID is present in the descriptor. */
+	u32 sacl;	/* Byte offset to a system ACL. Only valid, if
+			   SE_SACL_PRESENT is set in the control field. If
+			   SE_SACL_PRESENT is set but sacl is NULL, a NULL ACL
+			   is specified. */
+	u32 dacl;	/* Byte offset to a discretionary ACL. Only valid, if 
+			   SE_DACL_PRESENT is set in the control field. If
+			   SE_DACL_PRESENT is set but dacl is NULL, a NULL ACL
+			   (unconditionally granting access) is specified. */
+/* sizeof() = 0x14 bytes */
+} __attribute__ ((__packed__)) SECURITY_DESCRIPTOR_RELATIVE;
+/*
+ * Absolute security descriptor. Does not contain the owner and group SIDs, nor
+ * the sacl and dacl ACLs inside the security descriptor. Instead, it contains
+ * pointers to these structures in memory. Obviously, absolute security
+ * descriptors are only useful for in memory representations of security
+ * descriptors. On disk, a self-relative security descriptor is used.
+ */
+typedef struct {
+	u8 revision;	/* Revision level of the security descriptor. */
+	u8 alignment;
+	SECURITY_DESCRIPTOR_CONTROL control;	/* Flags qualifying the type of 
+			   the descriptor as well as the following fields. */
+	SID *owner;	/* Points to a SID representing an object's owner. If
+			   this is NULL, no owner SID is present in the
+			   descriptor. */
+	SID *group;	/* Points to a SID representing an object's primary
+			   group. If this is NULL, no primary group SID is
+			   present in the descriptor. */
+	ACL *sacl;	/* Points to a system ACL. Only valid, if
+			   SE_SACL_PRESENT is set in the control field. If
+			   SE_SACL_PRESENT is set but sacl is NULL, a NULL ACL
+			   is specified. */
+	ACL *dacl;	/* Points to a discretionary ACL. Only valid, if 
+			   SE_DACL_PRESENT is set in the control field. If
+			   SE_DACL_PRESENT is set but dacl is NULL, a NULL ACL
+			   (unconditionally granting access) is specified. */
+} __attribute__ ((__packed__)) SECURITY_DESCRIPTOR;
+/*
+ * Current constants for security descriptors.
+ */
+typedef enum {
+	/* Current revision. */
+	SECURITY_DESCRIPTOR_REVISION	= 1,
+	SECURITY_DESCRIPTOR_REVISION1	= 1,
+	/* The sizes of both the absolute and relative security descriptors is
+	   the same as pointers, at least on ia32 architecture are 32-bit. */
+	SECURITY_DESCRIPTOR_MIN_LENGTH	= sizeof(SECURITY_DESCRIPTOR),
+} SECURITY_DESCRIPTOR_CONSTANTS;
+/*
+ * Attribute: Security descriptor (0x50). A standard self-relative security
+ * descriptor.
+ *
+ * NOTE: Can be resident or non-resident.
+ * NOTE: Not used in NTFS 3.0+, as security descriptors are stored centrally
+ * in FILE_Secure and the correct descriptor is found using the security_id
+ * from the standard information attribute.
+ */
+typedef SECURITY_DESCRIPTOR_RELATIVE SECURITY_DESCRIPTOR_ATTR;
+/*
+ * On NTFS 3.0+, all security descriptors are stored in FILE_Secure. Only one
+ * referenced instance of each unique security descriptor is stored.
+ * 
+ * FILE_Secure contains no unnamed data attribute, i.e. it has zero length. It
+ * does, however, contain two indexes ($SDH and $SII) as well as a named data
+ * stream ($SDS).
+ * 
+ * Every unique security descriptor is assigned a unique security identifier
+ * (security_id, not to be confused with a SID). The security_id is unique for
+ * the NTFS volume and is used as an index into the $SII index, which maps
+ * security_ids to the security descriptor's storage location within the $SDS
+ * data attribute. The $SII index is sorted by ascending security_id.
+ *
+ * A simple hash is computed from each security descriptor. This hash is used
+ * as an index into the $SDH index, which maps security descriptor hashes to
+ * the security descriptor's storage location within the $SDS data attribute.
+ * The $SDH index is sorted by security descriptor hash and is stored in a B+
+ * tree. When searching $SDH (with the intent of determining whether or not a
+ * new security descriptor is already present in the $SDS data stream), if a
+ * matching hash is found, but the security descriptors do not match, the
+ * search in the $SDH index is continued, searching for a next matching hash.
+ * 
+ * When a precise match is found, the security_id coresponding to the security
+ * descriptor in the $SDS attribute is read from the found $SDH index entry and
+ * is stored in the $STANDARD_INFORMATION attribute of the file/directory to
+ * which the security descriptor is being applied. The $STANDARD_INFORMATION
+ * attribute is present in all base mft records (i.e. in all files and 
+ * directories).
+ *
+ * If a match is not found, the security descriptor is assigned a new unique
+ * security_id and is added to the $SDS data attribute. Then, entries
+ * referencing the this security descriptor in the $SDS data attribute are
+ * added to the $SDH and $SII indexes.
+ *
+ * Note: Entries are never deleted from FILE_Secure, even if nothing
+ * references an entry any more.
+ */
+/*
+ * This header precedes each security descriptor in the $SDS data stream.
+ * This is also the index entry data part of both the $SII and $SDH indexes.
+ */
+typedef struct {
+	u32 hash;	   /* Hash of the security descriptor. */
+	u32 security_id;   /* The security_id assigned to the descriptor. */
+	u64 offset;	   /* Byte offset of this entry in the $SDS stream. */
+	u32 length;	   /* Size in bytes of this entry in $SDS stream. */
+} __attribute__ ((__packed__)) SECURITY_DESCRIPTOR_HEADER;
+/*
+ * The $SDS data stream contains the security descriptors, aligned on 16-byte
+ * boundaries, sorted by security_id in a B+ tree. Security descriptors cannot
+ * cross 256kib boundaries (this restriction is imposed by the Windows cache
+ * manager). Each security descriptor is contained in a SDS_ENTRY structure.
+ * Also, each security descriptor is stored twice in the $SDS stream with a
+ * fixed offset of 0x40000 bytes (256kib, the Windows cache manager's max size)
+ * between them; i.e. if a SDS_ENTRY specifies an offset of 0x51d0, then the
+ * the first copy of the security descriptor will be at offset 0x51d0 in the
+ * $SDS data stream and the second copy will be at offset 0x451d0.
+ */
+typedef struct {
+	SECURITY_DESCRIPTOR_HEADER SN(sdh);	  /* The security descriptor header. */
+	SECURITY_DESCRIPTOR_RELATIVE sid; /* The self-relative security
+					     descriptor. */
+} __attribute__ ((__packed__)) SDS_ENTRY;
+#define _SDH(X)  SC(sdh,X)
+/*
+ * The index entry key used in the $SII index. The collation type is
+ * COLLATION_NTOFS_ULONG. 
+ */
+typedef struct {
+	u32 security_id; /* The security_id assigned to the descriptor. */
+} __attribute__ ((__packed__)) SII_INDEX_KEY;
+/*
+ * The index entry key used in the $SDH index. The keys are sorted first by
+ * hash and then by security_id. The collation rule is
+ * COLLATION_NTOFS_SECURITY_HASH.
+ */
+typedef struct {
+	u32 hash;	   /* Hash of the security descriptor. */
+	u32 security_id;   /* The security_id assigned to the descriptor. */
+} __attribute__ ((__packed__)) SDH_INDEX_KEY;
+/*
+ * Attribute: Volume name (0x60).
+ * 
+ * NOTE: Always resident.
+ * NOTE: Present only in FILE_Volume.
+ */
+typedef struct {
+	uchar_t name[0];		/* The name of the volume in Unicode. */
+} __attribute__ ((__packed__)) VOLUME_NAME;
+/*
+ * Possible flags for the volume (16-bit).
+ */
+typedef enum {
+	VOLUME_IS_DIRTY			= const_cpu_to_le16(0x0001),
+	VOLUME_RESIZE_LOG_FILE		= const_cpu_to_le16(0x0002),
+	VOLUME_UPGRADE_ON_MOUNT 	= const_cpu_to_le16(0x0004),
+	VOLUME_MOUNTED_ON_NT4		= const_cpu_to_le16(0x0008),
+	VOLUME_DELETE_USN_UNDERWAY	= const_cpu_to_le16(0x0010),
+	VOLUME_REPAIR_OBJECT_ID		= const_cpu_to_le16(0x0020),
+	VOLUME_MODIFIED_BY_CHKDSK	= const_cpu_to_le16(0x8000),
+	VOLUME_FLAGS_MASK		= const_cpu_to_le16(0x803f),
+} __attribute__ ((__packed__)) VOLUME_FLAGS;
+/*
+ * Attribute: Volume information (0x70).
+ *
+ * NOTE: Always resident.
+ * NOTE: Present only in FILE_Volume.
+ * NOTE: Windows 2000 uses NTFS 3.0 while Windows NT4 service pack 6a uses
+ * 	 NTFS 1.2. I haven't personally seen other values yet.
+ */
+typedef struct {
+	u64 reserved;		/* Not used (yet?). */
+	u8 major_ver;		/* Major version of the ntfs format. */
+	u8 minor_ver;		/* Minor version of the ntfs format. */
+	VOLUME_FLAGS flags;	/* Bit array of VOLUME_* flags. */
+} __attribute__ ((__packed__)) VOLUME_INFORMATION;
+/*
+ * Attribute: Data attribute (0x80).
+ *
+ * NOTE: Can be resident or non-resident.
+ *
+ * Data contents of a file (i.e. the unnamed stream) or of a named stream.
+ */
+typedef struct {
+	u8 data[0];		/* The file's data contents. */
+} __attribute__ ((__packed__)) DATA_ATTR;
+/*
+ * Index header flags (8-bit).
+ */
+typedef enum {
+	/* When index header is in an index root attribute: */
+	SMALL_INDEX	= 0, /* The index is small enough to fit inside the
+				index root attribute and there is no index
+				allocation attribute present. */
+	LARGE_INDEX	= 1, /* The index is too large to fit in the index
+				root attribute and/or an index allocation
+				attribute is present. */
+	/*
+	 * When index header is in an index block, i.e. is part of index
+	 * allocation attribute:
+	 */
+	LEAF_NODE	= 0, /* This is a leaf node, i.e. there are no more
+			        nodes branching off it. */
+	INDEX_NODE	= 1, /* This node indexes other nodes, i.e. is not a
+				leaf node. */
+	NODE_MASK	= 1, /* Mask for accessing the *_NODE bits. */
+} __attribute__ ((__packed__)) INDEX_HEADER_FLAGS;
+/*
+ * This is the header for indexes, describing the INDEX_ENTRY records, which
+ * follow the INDEX_HEADER. Together the index header and the index entries
+ * make up a complete index.
+ *
+ * IMPORTANT NOTE: The offset, length and size structure members are counted
+ * relative to the start of the index header structure and not relative to the
+ * start of the index root or index allocation structures themselves.
+ */
+typedef struct {
+	u32 entries_offset;		/* Byte offset to first INDEX_ENTRY
+					   aligned to 8-byte boundary. */
+	u32 index_length;		/* Data size of the index in bytes,
+					   i.e. bytes used from allocated
+					   size, aligned to 8-byte boundary. */
+	u32 allocated_size;		/* Byte size of this index (block),
+					   multiple of 8 bytes. */
+	/* NOTE: For the index root attribute, the above two numbers are always
+	   equal, as the attribute is resident and it is resized as needed. In
+	   the case of the index allocation attribute the attribute is not
+	   resident and hence the allocated_size is a fixed value and must
+	   equal the index_block_size specified by the INDEX_ROOT attribute
+	   corresponding to the INDEX_ALLOCATION attribute this INDEX_BLOCK
+	   belongs to. */
+	INDEX_HEADER_FLAGS flags;	/* Bit field of INDEX_HEADER_FLAGS. */
+	u8 reserved[3];			/* Reserved/align to 8-byte boundary. */
+} __attribute__ ((__packed__)) INDEX_HEADER;
+/*
+ * Attribute: Index root (0x90).
+ *
+ * NOTE: Always resident.
+ *
+ * This is followed by a sequence of index entries (INDEX_ENTRY structures)
+ * as described by the index header.
+ *
+ * When a directory is small enough to fit inside the index root then this
+ * is the only attribute describing the directory. When the directory is too
+ * large to fit in the index root, on the other hand, two aditional attributes
+ * are present: an index allocation attribute, containing sub-nodes of the B+
+ * directory tree (see below), and a bitmap attribute, describing which virtual
+ * cluster numbers (vcns) in the index allocation attribute are in use by an
+ * index block.
+ *
+ * NOTE: The root directory (FILE_root) contains an entry for itself. Other
+ * dircetories do not contain entries for themselves, though.
+ */
+typedef struct {
+	ATTR_TYPES type;		/* Type of the indexed attribute. Is
+					   $FILE_NAME for directories, zero
+					   for view indexes. No other values
+					   allowed. */
+	COLLATION_RULES collation_rule;	/* Collation rule used to sort the
+					   index entries. If type is $FILE_NAME,
+					   this must be COLLATION_FILE_NAME. */
+	u32 index_block_size;		/* Size of each index block in bytes (in
+					   the index allocation attribute). */
+	u8 clusters_per_index_block;	/* Cluster size of each index block (in
+					   the index allocation attribute), when
+					   an index block is >= than a cluster,
+					   otherwise this will be the log of
+					   the size (like how the encoding of 
+					   the mft record size and the index
+					   record size found in the boot sector
+					   work). Has to be a power of 2. */
+	u8 reserved[3];			/* Reserved/align to 8-byte boundary. */
+	INDEX_HEADER index;		/* Index header describing the
+					   following index entries. */
+} __attribute__ ((__packed__)) INDEX_ROOT;
+/*
+ * Attribute: Index allocation (0xa0).
+ *
+ * NOTE: Always non-resident (doesn't make sense to be resident anyway!).
+ * 
+ * This is an array of index blocks. Each index block starts with an
+ * INDEX_BLOCK structure containing an index header, followed by a sequence of
+ * index entries (INDEX_ENTRY structures), as described by the INDEX_HEADER.
+ */
+typedef struct {
+/*  0*/	NTFS_RECORD SN(inr);	/* Magic is "INDX". */
+/*  8*/	s64 lsn;		/* $LogFile sequence number of the last
+				   modification of this index block. */
+/* 16*/	VCN index_block_vcn;	/* Virtual cluster number of the index block.
+				   If the cluster_size on the volume is <= the
+				   index_block_size of the directory,
+				   index_block_vcn counts in units of clusters,
+				   and in units of sectors otherwise. */
+/* 24*/	INDEX_HEADER index;	/* Describes the following index entries. */
+/* sizeof()= 40 (0x28) bytes */
+/*
+ * When creating the index block, we place the update sequence array at this
+ * offset, i.e. before we start with the index entries. This also makes sense,
+ * otherwise we could run into problems with the update sequence array
+ * containing in itself the last two bytes of a sector which would mean that
+ * multi sector transfer protection wouldn't work. As you can't protect data
+ * by overwriting it since you then can't get it back...
+ * When reading use the data from the ntfs record header.
+ */
+} __attribute__ ((__packed__)) INDEX_BLOCK;
+#define _INR(X)  SC(inr,X)
+typedef INDEX_BLOCK INDEX_ALLOCATION;
+/*
+ * The system file FILE_Extend/$Reparse contains an index named $R listing
+ * all reparse points on the volume. The index entry keys are as defined
+ * below. Note, that there is no index data associated with the index entries.
+ *
+ * The index entries are sorted by the index key file_id. The collation rule is
+ * COLLATION_NTOFS_ULONGS. FIXME: Verify whether the reparse_tag is not the
+ * primary key / is not a key at all. (AIA)
+ */
+typedef struct {
+	u32 reparse_tag;	/* Reparse point type (inc. flags). */
+	MFT_REF file_id;	/* Mft record of the file containing the
+				   reparse point attribute. */
+} __attribute__ ((__packed__)) REPARSE_INDEX_KEY;
+/*
+ * Quota flags (32-bit).
+ */
+typedef enum {
+	/* The user quota flags. Names explain meaning. */
+	QUOTA_FLAG_DEFAULT_LIMITS	= const_cpu_to_le32(0x00000001),
+	QUOTA_FLAG_LIMIT_REACHED	= const_cpu_to_le32(0x00000002),
+	QUOTA_FLAG_ID_DELETED		= const_cpu_to_le32(0x00000004),
+	QUOTA_FLAG_USER_MASK		= const_cpu_to_le32(0x00000007),
+		/* Bit mask for user quota flags. */
+	/* These flags are only present in the quota defaults index entry,
+	   i.e. in the entry where owner_id = QUOTA_DEFAULTS_ID. */
+	QUOTA_FLAG_TRACKING_ENABLED	= const_cpu_to_le32(0x00000010),
+	QUOTA_FLAG_ENFORCEMENT_ENABLED	= const_cpu_to_le32(0x00000020),
+	QUOTA_FLAG_TRACKING_REQUESTED	= const_cpu_to_le32(0x00000040),
+	QUOTA_FLAG_LOG_THRESHOLD	= const_cpu_to_le32(0x00000080),
+	QUOTA_FLAG_LOG_LIMIT		= const_cpu_to_le32(0x00000100),
+	QUOTA_FLAG_OUT_OF_DATE		= const_cpu_to_le32(0x00000200),
+	QUOTA_FLAG_CORRUPT		= const_cpu_to_le32(0x00000400),
+	QUOTA_FLAG_PENDING_DELETES	= const_cpu_to_le32(0x00000800),
+} QUOTA_FLAGS;
+/*
+ * The system file FILE_Extend/$Quota contains two indexes $O and $Q. Quotas
+ * are on a per volume and per user basis.
+ *
+ * The $Q index contains one entry for each existing user_id on the volume. The
+ * index key is the user_id of the user/group owning this quota control entry,
+ * i.e. the key is the owner_id. The user_id of the owner of a file, i.e. the 
+ * owner_id, is found in the standard information attribute. The collation rule
+ * for $Q is COLLATION_NTOFS_ULONG.
+ *
+ * The $O index contains one entry for each user/group who has been assigned
+ * a quota on that volume. The index key holds the SID of the user_id the
+ * entry belongs to, i.e. the owner_id. The collation rule for $O is
+ * COLLATION_NTOFS_SID.
+ *
+ * The $O index entry data is the user_id of the user corresponding to the SID.
+ * This user_id is used as an index into $Q to find the quota control entry
+ * associated with the SID.
+ *
+ * The $Q index entry data is the quota control entry and is defined below.
+ */
+typedef struct {
+	u32 version;		/* Currently equals 2. */
+	QUOTA_FLAGS flags;	/* Flags describing this quota entry. */
+	u64 bytes_used;		/* How many bytes of the quota are in use. */
+	s64 change_time;	/* Last time this quota entry was changed. */
+	s64 threshold;		/* Soft quota (-1 if not limited). */
+	s64 limit;		/* Hard quota (-1 if not limited). */
+	s64 exceeded_time;	/* How long the soft quota has been exceeded. */
+	SID sid;		/* The SID of the user/object associated with
+				   this quota entry. Equals zero for the quota
+				   defaults entry. */
+} __attribute__ ((__packed__)) QUOTA_CONTROL_ENTRY;
+/*
+ * Predefined owner_id values (32-bit).
+ */
+typedef enum {
+	QUOTA_INVALID_ID	= const_cpu_to_le32(0x00000000),
+	QUOTA_DEFAULTS_ID	= const_cpu_to_le32(0x00000001),
+	QUOTA_FIRST_USER_ID	= const_cpu_to_le32(0x00000100),
+} PREDEFINED_OWNER_IDS;
+/*
+ * Index entry flags (16-bit).
+ */
+typedef enum {
+	INDEX_ENTRY_NODE = const_cpu_to_le16(1), /* This entry contains a sub-node,
+					      i.e. a reference to an index
+					      block in form of a virtual
+					      cluster number (see below). */
+	INDEX_ENTRY_END  = const_cpu_to_le16(2), /* This signifies the last entry in
+					      an index block. The index entry
+					      does not represent a file but it
+					      can point to a sub-node. */
+	INDEX_ENTRY_SPACE_FILLER = 0xffff, /* Just to force 16-bit width. */
+} __attribute__ ((__packed__)) INDEX_ENTRY_FLAGS;
+/*
+ * This the index entry header (see below).
+ */
+typedef struct {
+/*  0*/	union {		/* Only valid when INDEX_ENTRY_END is not set. */
+		MFT_REF indexed_file;		/* The mft reference of the file
+						   described by this index
+						   entry. Used for directory
+						   indexes. */
+		struct { /* Used for views/indexes to find the entry's data. */
+			u16 data_offset;	/* Data byte offset from this
+						   INDEX_ENTRY. Follows the
+						   index key. */
+			u16 data_length;	/* Data length in bytes. */
+			u32 reservedV;		/* Reserved (zero). */
+		} SN(iev) __attribute__ ((__packed__));
+	} SN(iif) __attribute__ ((__packed__));
+/*  8*/	u16 length;		 /* Byte size of this index entry, multiple of
+				    8-bytes. */
+/* 10*/	u16 key_length;		 /* Byte size of the key value, which is in the
+				    index entry. It follows field reserved. Not
+				    multiple of 8-bytes. */
+/* 12*/	INDEX_ENTRY_FLAGS flags; /* Bit field of INDEX_ENTRY_* flags. */
+/* 14*/	u16 reserved;		 /* Reserved/align to 8-byte boundary. */
+/* sizeof() = 16 bytes */
+} __attribute__ ((__packed__)) INDEX_ENTRY_HEADER;
+#define _IIF(X)  SC(ieh.iif,X)
+#define _IEV(X)  SC(iif.iev,X)
+/*
+ * This is an index entry. A sequence of such entries follows each INDEX_HEADER
+ * structure. Together they make up a complete index. The index follows either
+ * an index root attribute or an index allocation attribute.
+ *
+ * NOTE: Before NTFS 3.0 only filename attributes were indexed.
+ */
+typedef struct {
+/*  0*/ INDEX_ENTRY_HEADER SN(ieh);	/* The index entry header (see above). */
+/* 16*/	union {		/* The key of the indexed attribute. NOTE: Only present
+			   if INDEX_ENTRY_END bit in flags is not set. NOTE: On
+			   NTFS versions before 3.0 the only valid key is the
+			   FILE_NAME_ATTR. On NTFS 3.0+ the following
+			   additional index keys are defined: */
+		FILE_NAME_ATTR file_name;/* $I30 index in directories. */
+		SII_INDEX_KEY sii;	/* $SII index in $Secure. */
+		SDH_INDEX_KEY sdh;	/* $SDH index in $Secure. */
+		GUID object_id;		/* $O index in FILE_Extend/$ObjId: The
+					   object_id of the mft record found in
+					   the data part of the index. */
+		REPARSE_INDEX_KEY SN(iri);	/* $R index in FILE_Extend/$Reparse. */
+		SID sid;		/* $O index in FILE_Extend/$Quota:
+					   SID of the owner of the user_id. */
+		u32 owner_id;		/* $Q index in FILE_Extend/$Quota:
+					   user_id of the owner of the quota
+					   control entry in the data part of
+					   the index. */
+	} __attribute__ ((__packed__)) key;
+	/* The (optional) index data is inserted here when creating. */
+	// VCN vcn;	/* If INDEX_ENTRY_NODE bit in flags is set, the last
+	//		   eight bytes of this index entry contain the virtual
+	//		   cluster number of the index block that holds the
+	//		   entries immediately preceding the current entry (the
+	//		   vcn references the corresponding cluster in the data
+	//		   of the non-resident index allocation attribute). If
+	//		   the key_length is zero, then the vcn immediately
+	//		   follows the INDEX_ENTRY_HEADER. Regardless of
+	//		   key_length, the address of the 8-byte boundary
+	//		   alligned vcn of INDEX_ENTRY{_HEADER} *ie is given by
+	//		   (char*)ie + le16_to_cpu(ie*)->length) - sizeof(VCN),
+	//		   where sizeof(VCN) can be hardcoded as 8 if wanted. */
+} __attribute__ ((__packed__)) INDEX_ENTRY;
+#define _IEH(X)  SC(ieh,X)
+#define _IRI(X)  SC(key.iri,X)
+/*
+ * Attribute: Bitmap (0xb0).
+ *
+ * Contains an array of bits (aka a bitfield).
+ *
+ * When used in conjunction with the index allocation attribute, each bit
+ * corresponds to one index block within the index allocation attribute. Thus
+ * the number of bits in the bitmap * index block size / cluster size is the
+ * number of clusters in the index allocation attribute.
+ */
+typedef struct {
+	u8 bitmap[0];			/* Array of bits. */
+} __attribute__ ((__packed__)) BITMAP_ATTR;
+/*
+ * The reparse point tag defines the type of the reparse point. It also
+ * includes several flags, which further describe the reparse point.
+ *
+ * The reparse point tag is an unsigned 32-bit value divided in three parts:
+ *
+ * 1. The least significant 16 bits (i.e. bits 0 to 15) specifiy the type of
+ *    the reparse point.
+ * 2. The 13 bits after this (i.e. bits 16 to 28) are reserved for future use.
+ * 3. The most significant three bits are flags describing the reparse point.
+ *    They are defined as follows:
+ *	bit 29: Name surrogate bit. If set, the filename is an alias for
+ *		another object in the system.
+ * 	bit 30: High-latency bit. If set, accessing the first byte of data will
+ *		be slow. (E.g. the data is stored on a tape drive.)
+ * 	bit 31: Microsoft bit. If set, the tag is owned by Microsoft. User
+ *		defined tags have to use zero here.
+ */
+typedef enum {
+	IO_REPARSE_TAG_IS_ALIAS		= const_cpu_to_le32(0x20000000),
+	IO_REPARSE_TAG_IS_HIGH_LATENCY	= const_cpu_to_le32(0x40000000),
+	IO_REPARSE_TAG_IS_MICROSOFT	= const_cpu_to_le32(0x80000000),
+	IO_REPARSE_TAG_RESERVED_ZERO	= const_cpu_to_le32(0x00000000),
+	IO_REPARSE_TAG_RESERVED_ONE	= const_cpu_to_le32(0x00000001),
+	IO_REPARSE_TAG_RESERVED_RANGE	= const_cpu_to_le32(0x00000001),
+	IO_REPARSE_TAG_NSS		= const_cpu_to_le32(0x68000005),
+	IO_REPARSE_TAG_NSS_RECOVER	= const_cpu_to_le32(0x68000006),
+	IO_REPARSE_TAG_SIS		= const_cpu_to_le32(0x68000007),
+	IO_REPARSE_TAG_DFS		= const_cpu_to_le32(0x68000008),
+	IO_REPARSE_TAG_MOUNT_POINT	= const_cpu_to_le32(0x88000003),
+	IO_REPARSE_TAG_HSM		= const_cpu_to_le32(0xa8000004),
+	IO_REPARSE_TAG_SYMBOLIC_LINK	= const_cpu_to_le32(0xe8000000),
+	IO_REPARSE_TAG_VALID_VALUES	= const_cpu_to_le32(0xe000ffff),
+} PREDEFINED_REPARSE_TAGS;
+/*
+ * Attribute: Reparse point (0xc0).
+ *
+ * NOTE: Can be resident or non-resident.
+ */
+typedef struct {
+	u32 reparse_tag;		/* Reparse point type (inc. flags). */
+	u16 reparse_data_length;	/* Byte size of reparse data. */
+	u16 reserved;			/* Align to 8-byte boundary. */
+	u8 reparse_data[0];		/* Meaning depends on reparse_tag. */
+} __attribute__ ((__packed__)) REPARSE_POINT;
+/*
+ * Attribute: Extended attribute (EA) information (0xd0).
+ *
+ * NOTE: Always resident. (Is this true???)
+ */
+typedef struct {
+	u16 ea_length;		/* Byte size of the packed extended
+				   attributes. */
+	u16 need_ea_count;	/* The number of extended attributes which have
+				   the NEED_EA bit set. */
+	u32 ea_query_length;	/* Byte size of the buffer required to query
+				   the extended attributes when calling
+				   ZwQueryEaFile() in Windows NT/2k. I.e. the
+				   byte size of the unpacked extended
+				   attributes. */
+} __attribute__ ((__packed__)) EA_INFORMATION;
+/*
+ * Extended attribute flags (8-bit).
+ */
+typedef enum {
+	NEED_EA	= 0x80,
+} __attribute__ ((__packed__)) EA_FLAGS;
+/*
+ * Attribute: Extended attribute (EA) (0xe0).
+ *
+ * NOTE: Always non-resident. (Is this true?)
+ * 
+ * Like the attribute list and the index buffer list, the EA attribute value is 
+ * a sequence of EA_ATTR variable length records.
+ *
+ * FIXME: It appears weird that the EA name is not unicode. Is it true?
+ */
+typedef struct {
+	u32 next_entry_offset;	/* Offset to the next EA_ATTR. */
+	EA_FLAGS flags;		/* Flags describing the EA. */
+	u8 ea_name_length;	/* Length of the name of the EA in bytes. */
+	u16 ea_value_length;	/* Byte size of the EA's value. */
+	u8 ea_name[0];		/* Name of the EA. */
+	u8 ea_value[0];		/* The value of the EA. Immediately follows
+				   the name. */
+} __attribute__ ((__packed__)) EA_ATTR;
+/*
+ * Attribute: Property set (0xf0).
+ *
+ * Intended to support Native Structure Storage (NSS) - a feature removed from
+ * NTFS 3.0 during beta testing.
+ */
+typedef struct {
+	/* Irrelevant as feature unused. */
+} __attribute__ ((__packed__)) PROPERTY_SET;
+/*
+ * Attribute: Logged utility stream (0x100).
+ *
+ * NOTE: Can be resident or non-resident.
+ *
+ * Operations on this attribute are logged to the journal ($LogFile) like
+ * normal metadata changes.
+ *
+ * Used by the Encrypting File System (EFS). All encrypted files have this
+ * attribute with the name $EFS.
+ */
+typedef struct {
+	/* Can be anything the creator chooses. */
+	/* EFS uses it as follows: */
+	// FIXME: Type this info, verifying it along the way. (AIA)
+} __attribute__ ((__packed__)) LOGGED_UTILITY_STREAM, EFS_ATTR;
+#endif /* _LINUX_NTFS_LAYOUT_H */
--- a/fs/ntfs/malloc.h
+++ b/fs/ntfs/malloc.h
+/*
+ * malloc.h - NTFS kernel memory handling. Part of the Linux-NTFS project.
+ *
+ * Copyright (c) 2001,2002 Anton Altaparmakov.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef _LINUX_NTFS_MALLOC_H
+#define _LINUX_NTFS_MALLOC_H
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+/**
+ * vmalloc_nofs - allocate any pages but don't allow calls into fs layer
+ * @size:	number of bytes to allocate
+ *
+ * Allocate any pages but don't allow calls into fs layer. Return allocated
+ * memory or NULL if insufficient memory.
+ */
+static inline void *vmalloc_nofs(unsigned long size)
+{
+	if (likely(size >> PAGE_SHIFT < num_physpages))
+		return __vmalloc(size, GFP_NOFS | __GFP_HIGHMEM, PAGE_KERNEL);
+	return NULL;
+}
+/**
+ * ntfs_malloc_nofs - allocate memory in multiples of pages
+ * @size	number of bytes to allocate
+ *
+ * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and
+ * returns a pointer to the allocated memory.
+ *
+ * If there was insufficient memory to complete the request, return NULL.
+ */
+static inline void *ntfs_malloc_nofs(unsigned long size)
+{
+	if (likely(size <= PAGE_SIZE)) {
+		if (likely(size)) {
+			/* kmalloc() has per-CPU caches so if faster for now. */
+			return kmalloc(PAGE_SIZE, GFP_NOFS);
+			/* return (void *)__get_free_page(GFP_NOFS |
+					__GFP_HIGHMEM); */
+		}
+		BUG();
+	}
+	if (likely(size >> PAGE_SHIFT < num_physpages))
+		return __vmalloc(size, GFP_NOFS | __GFP_HIGHMEM, PAGE_KERNEL);
+	return NULL;
+}
+static inline void ntfs_free(void *addr)
+{
+	if (likely((unsigned long)addr < VMALLOC_START)) {
+		return kfree(addr);
+		/* return free_page((unsigned long)addr); */
+	}
+	vfree(addr);
+}
+#endif /* _LINUX_NTFS_MALLOC_H */
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
+/**
+ * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project.
+ *
+ * Copyright (c) 2001,2002 Anton Altaparmakov.
+ * Copyright (C) 2002 Richard Russon.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/locks.h>
+#include <linux/swap.h>
+#include "ntfs.h"
+#define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512)
+/**
+ * __format_mft_record - initialize an empty mft record
+ * @m:		mapped, pinned and locked for writing mft record
+ * @size:	size of the mft record
+ * @rec_no:	mft record number / inode number
+ *
+ * Private function to initialize an empty mft record. Use one of the two
+ * provided format_mft_record() functions instead.
+ */
+static void __format_mft_record(MFT_RECORD *m, const int size,
+		const unsigned long rec_no)
+{
+	ATTR_RECORD *a;
+	memset(m, 0, size);
+	m->_MNR(magic) = magic_FILE;
+	/* Aligned to 2-byte boundary. */
+	m->_MNR(usa_ofs) = cpu_to_le16((sizeof(MFT_RECORD) + 1) & ~1);
+	m->_MNR(usa_count) = cpu_to_le16(size / NTFS_BLOCK_SIZE + 1);
+	/* Set the update sequence number to 1. */
+	*(u16*)((char*)m + ((sizeof(MFT_RECORD) + 1) & ~1)) = cpu_to_le16(1);
+	m->lsn = cpu_to_le64(0LL);
+	m->sequence_number = cpu_to_le16(1);
+	m->link_count = cpu_to_le16(0);
+	/* Aligned to 8-byte boundary. */
+	m->attrs_offset = cpu_to_le16((le16_to_cpu(m->_MNR(usa_ofs)) +
+			(le16_to_cpu(m->_MNR(usa_count)) << 1) + 7) & ~7);
+	m->flags = cpu_to_le16(0);
+	/*
+	 * Using attrs_offset plus eight bytes (for the termination attribute),
+	 * aligned to 8-byte boundary.
+	 */
+	m->bytes_in_use = cpu_to_le32((le16_to_cpu(m->attrs_offset) + 8 + 7) &
+			~7);
+	m->bytes_allocated = cpu_to_le32(size);
+	m->base_mft_record = cpu_to_le64((MFT_REF)0);
+	m->next_attr_instance = cpu_to_le16(0);
+	a = (ATTR_RECORD*)((char*)m + le16_to_cpu(m->attrs_offset));
+	a->type = AT_END;
+	a->length = cpu_to_le32(0);
+}
+/**
+ * format_mft_record2 - initialize an empty mft record
+ * @vfs_sb:	vfs super block of volume
+ * @inum:	mft record number / inode number to format
+ * @mft_rec:	mapped, pinned and locked mft record (optional)
+ *
+ * Initialize an empty mft record. This is used when extending the MFT.
+ *
+ * If @mft_rec is NULL, we call map_mft_record() to obtain the record and we
+ * unmap it again when finished.
+ *
+ * We return 0 on success or -errno on error.
+ */
+#if 0
+// Can't do this as iget_map_mft_record no longer exists...
+int format_mft_record2(struct super_block *vfs_sb, const unsigned long inum,
+		MFT_RECORD *mft_rec)
+{
+	MFT_RECORD *m;
+	ntfs_inode *ni;
+	if (mft_rec)
+		m = mft_rec;
+	else {
+		m = iget_map_mft_record(WRITE, vfs_sb, inum, &ni);
+		if (IS_ERR(m))
+			return PTR_ERR(m);
+	}
+	__format_mft_record(m, NTFS_SB(vfs_sb)->mft_record_size, inum);
+	if (!mft_rec) {
+		// TODO: dirty mft record
+		unmap_mft_record(WRITE, ni);
+		// TODO: Do stuff to get rid of the ntfs_inode
+	}
+	return 0;
+}
+#endif
+/**
+ * format_mft_record - initialize an empty mft record
+ * @ni:		ntfs inode of mft record
+ * @mft_rec:	mapped, pinned and locked mft record (optional)
+ *
+ * Initialize an empty mft record. This is used when extending the MFT.
+ *
+ * If @mft_rec is NULL, we call map_mft_record() to obtain the
+ * record and we unmap it again when finished.
+ *
+ * We return 0 on success or -errno on error.
+ */
+int format_mft_record(ntfs_inode *ni, MFT_RECORD *mft_rec)
+{
+	MFT_RECORD *m;
+	if (mft_rec)
+		m = mft_rec;
+	else {
+		m = map_mft_record(WRITE, ni);
+		if (IS_ERR(m))
+			return PTR_ERR(m);
+	}
+	__format_mft_record(m, ni->vol->mft_record_size, ni->mft_no);
+	if (!mft_rec)
+		unmap_mft_record(WRITE, ni);
+	return 0;
+}
+/**
+ * ntfs_mft_readpage - read a page of the data attribute of $MFT
+ * @file:	open file to which the page @page belongs or NULL
+ * @page:	page cache page to fill with data
+ *
+ * Readpage method for the VFS address space operations.
+ *
+ * ntfs_mft_readpage() reads the page specified by @page and returns 0 on
+ * success or -EIO on error.
+ *
+ * Note, we only setup asynchronous I/O on the page and return. I/O completion
+ * is signalled via our asynchronous I/O completion handler
+ * end_buffer_read_index_async().
+ */
+static int ntfs_mft_readpage(struct file *file, struct page *page)
+{
+	VCN vcn;
+	LCN lcn;
+	struct inode *vi;
+	ntfs_inode *ni;
+	struct super_block *sb;
+	ntfs_volume *vol;
+	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
+	sector_t iblock, lblock;
+	unsigned int blocksize, blocks,vcn_ofs;
+	int i, nr;
+	unsigned char blocksize_bits;
+	/* The page must be locked. */
+	if (!PageLocked(page))
+		PAGE_BUG(page);
+	/* Get the VFS and ntfs inodes as well as the super blocks for page. */
+	vi = page->mapping->host;
+	ni = NTFS_I(vi);
+	sb = vi->i_sb;
+	vol = NTFS_SB(sb);
+	blocksize = sb->s_blocksize;
+	blocksize_bits = sb->s_blocksize_bits;
+	if (!page->buffers)
+		create_empty_buffers(page, blocksize);
+	blocks = PAGE_CACHE_SIZE >> blocksize_bits;
+	iblock = page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
+	lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
+	bh = head = page->buffers;
+	BUG_ON(!bh);
+#ifdef DEBUG
+	if (!ni->run_list.rl)
+		panic("NTFS: $MFT/$DATA run list has been unmapped! This is a "
+				"very serious bug! Cannot continue...");
+#endif
+	nr = i = 0;
+	/* Loop through all the buffers in the page. */
+	do {
+		if (buffer_mapped(bh))
+			BUG();
+		/* Is the block within the allowed limits? */
+		if (iblock < lblock) {
+			/* Convert iblock into corresponding vcn and offset. */
+			vcn = (VCN)iblock << blocksize_bits >>
+					vol->cluster_size_bits;
+			vcn_ofs = ((VCN)iblock << blocksize_bits) &
+					vol->cluster_size_mask;
+			/* Convert the vcn to the corresponding lcn. */
+			read_lock(&ni->run_list.lock);
+			lcn = vcn_to_lcn(ni->run_list.rl, vcn);
+			read_unlock(&ni->run_list.lock);
+			if (lcn >= 0) {
+				/* Setup buffer head to correct block. */
+				bh->b_dev = vi->i_dev;
+				bh->b_blocknr = ((lcn << vol->cluster_size_bits)
+						+ vcn_ofs) >> blocksize_bits;
+				bh->b_state |= (1UL << BH_Mapped);
+				arr[nr++] = bh;
+				continue;
+			}
+			ntfs_error(sb, "vcn_to_lcn(vcn = 0x%Lx) failed with "
+					"error code 0x%Lx.", (long long)vcn,
+					(long long)-lcn);
+			// FIXME: Depending on vol->on_errors, do something.
+		}
+		/*
+		 * Either iblock was outside lblock limits or vcn_to_lcn()
+		 * returned error. Just zero that portion of the page and set
+		 * the buffer uptodate.
+		 */
+		bh->b_dev = vi->i_dev;
+		bh->b_blocknr = -1UL;
+		bh->b_state &= ~(1UL << BH_Mapped);
+		memset(kmap(page) + i * blocksize, 0, blocksize);
+		flush_dcache_page(page);
+		kunmap(page);
+		set_bit(BH_Uptodate, &bh->b_state);
+	} while (i++, iblock++, (bh = bh->b_this_page) != head);
+	/* Check we have at least one buffer ready for io. */
+	if (nr) {
+		/* Lock the buffers. */
+		for (i = 0; i < nr; i++) {
+			struct buffer_head *tbh = arr[i];
+			lock_buffer(tbh);
+			tbh->b_end_io = end_buffer_read_index_async;
+			mark_buffer_async(tbh, 1);
+		}
+		/* And start io on the buffers. */
+		for (i = 0; i < nr; i++)
+			submit_bh(READ, arr[i]);
+		return 0;
+	}
+	/* We didn't schedule any io on any of the buffers. */
+	ntfs_error(sb, "No I/O was scheduled on any buffers. Page I/O error.");
+	SetPageError(page);
+	UnlockPage(page);
+	return -EIO;
+}
+/**
+ * ntfs_mft_aops - address space operations for access to $MFT
+ *
+ * Address space operations for access to $MFT. This allows us to simply use
+ * read_cache_page() in map_mft_record().
+ */
+struct address_space_operations ntfs_mft_aops = {
+	writepage:	NULL,			/* Write dirty page to disk. */
+	readpage:	ntfs_mft_readpage,	/* Fill page with data. */
+	sync_page:	block_sync_page,	/* Currently, just unplugs the
+						   disk request queue. */
+	prepare_write:	NULL,			/* . */
+	commit_write:	NULL,			/* . */
+	bmap:		NULL,			/* Needed for FIBMAP.
+						   Don't use it. */
+	flushpage:	NULL,			/* . */
+	releasepage:	NULL,			/* . */
+#ifdef KERNEL_HAS_O_DIRECT
+	direct_IO:	NULL,			/* . */
+#endif
+};
+/**
+ * map_mft_record_page - map the page in which a specific mft record resides
+ * @ni:		ntfs inode whose mft record page to map
+ *
+ * This maps the page in which the mft record of the ntfs inode @ni is situated
+ * and returns a pointer to the mft record within the mapped page.
+ *
+ * Return value needs to be checked with IS_ERR() and if that is true PTR_ERR()
+ * contains the negative error code returned.
+ */
+static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
+{
+	ntfs_volume *vol = ni->vol;
+	struct inode *mft_vi = vol->mft_ino;
+	struct page *page;
+	unsigned long index, ofs, end_index;
+	BUG_ON(atomic_read(&ni->mft_count) || ni->page);
+	/*
+	 * The index into the page cache and the offset within the page cache
+	 * page of the wanted mft record. FIXME: We need to check for
+	 * overflowing the unsigned long, but I don't think we would ever get
+	 * here if the volume was that big...
+	 */
+	index = ni->mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT;
+	ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
+	/* The maximum valid index into the page cache for $MFT's data. */
+	end_index = mft_vi->i_size >> PAGE_CACHE_SHIFT;
+	/* If the wanted index is out of bounds the mft record doesn't exist. */
+	if (index >= end_index) {
+		if (index > end_index || (mft_vi->i_size & ~PAGE_CACHE_MASK) <
+				ofs + vol->mft_record_size) {
+			page = ERR_PTR(-ENOENT);
+			goto up_err_out;
+		}
+	}
+	/* Read, map, and pin the page. */
+	page = ntfs_map_page(mft_vi->i_mapping, index);
+	if (!IS_ERR(page)) {
+		/* Pin the mft record mapping in the ntfs_inode. */
+		atomic_inc(&ni->mft_count);
+		/* Setup the references in the ntfs_inode. */
+		ni->page = page;
+		ni->page_ofs = ofs;
+		return page_address(page) + ofs;
+	}
+up_err_out:
+	/* Just in case... */
+	ni->page = NULL;
+	ni->page_ofs = 0;
+	ntfs_error(vol->sb, "Failed with error code %lu.", -PTR_ERR(page));
+	return (void*)page;
+}
+/**
+ * unmap_mft_record_page - unmap the page in which a specific mft record resides
+ * @ni:		ntfs inode whose mft record page to unmap
+ *
+ * This unmaps the page in which the mft record of the ntfs inode @ni is
+ * situated and returns. This is a NOOP if highmem is not configured.
+ *
+ * The unmap happens via ntfs_unmap_page() which in turn decrements the use
+ * count on the page thus releasing it from the pinned state.
+ *
+ * We do not actually unmap the page from memory of course, as that will be
+ * done by the page cache code itself when memory pressure increases or
+ * whatever.
+ */
+static inline void unmap_mft_record_page(ntfs_inode *ni)
+{
+	BUG_ON(atomic_read(&ni->mft_count) || !ni->page);
+	// TODO: If dirty, blah...
+	ntfs_unmap_page(ni->page);
+	ni->page = NULL;
+	ni->page_ofs = 0;
+	return;
+}
+/**
+ * map_mft_record - map, pin and lock an mft record
+ * @rw:		map for read (rw = READ) or write (rw = WRITE)
+ * @ni:		ntfs inode whose MFT record to map
+ *
+ * First, take the mrec_lock semaphore for reading or writing, depending on
+ * the value or @rw. We might now be sleeping, while waiting for the semaphore
+ * if it was already locked by someone else.
+ *
+ * Then increment the map reference count and return the mft. If this is the
+ * first invocation, the page of the record is first mapped using
+ * map_mft_record_page().
+ *
+ * This in turn uses ntfs_map_page() to get the page containing the wanted mft
+ * record (it in turn calls read_cache_page() which reads it in from disk if
+ * necessary, increments the use count on the page so that it cannot disappear
+ * under us and returns a reference to the page cache page).
+ *
+ * If read_cache_page() invokes ntfs_mft_readpage() to load the page from disk,
+ * it sets PG_locked and clears PG_uptodate on the page. Once I/O has
+ * completed and the post-read mst fixups on each mft record in the page have
+ * been performed, the page gets PG_uptodate set and PG_locked cleared (this is
+ * done in our asynchronous I/O completion handler end_buffer_read_mft_async()).
+ * ntfs_map_page() waits for PG_locked to become clear and checks if
+ * PG_uptodate is set and returns an error code if not. This provides
+ * sufficient protection against races when reading/using the page.
+ *
+ * However there is the write mapping to think about. Doing the above described
+ * checking here will be fine, because when initiating the write we will set
+ * PG_locked and clear PG_uptodate making sure nobody is touching the page
+ * contents. Doing the locking this way means that the commit to disk code in
+ * the page cache code paths is automatically sufficiently locked with us as
+ * we will not touch a page that has been locked or is not uptodate. The only
+ * locking problem then is them locking the page while we are accessing it.
+ *
+ * So that code will end up having to own the mrec_lock of all mft
+ * records/inodes present in the page before I/O can proceed. Grr. In that
+ * case we wouldn't need need to bother with PG_locked and PG_uptodate as
+ * nobody will be accessing anything without owning the mrec_lock semaphore.
+ * But we do need to use them because of the read_cache_page() invokation and
+ * the code becomes so much simpler this way that it is well worth it.
+ *
+ * The mft record is now ours and we return a pointer to it. You need to check
+ * the returned pointer with IS_ERR() and if that is true, PTR_ERR() will return
+ * the error code. The following error codes are defined:
+ * 	TODO: Fill in the possible error codes.
+ *
+ * NOTE: Caller is responsible for setting the mft record dirty before calling
+ * unmap_mft_record(). This is obviously only necessary if the caller really
+ * modified the mft record...
+ * Q: Do we want to recycle one of the VFS inode state bits instead?
+ * A: No, the inode ones mean we want to change the mft record, not we want to
+ * write it out.
+ */
+MFT_RECORD *map_mft_record(const int rw, ntfs_inode *ni)
+{
+	MFT_RECORD *m;
+	ntfs_debug("Entering for i_ino 0x%Lx, mapping for %s.",
+			(unsigned long long)ni->mft_no,
+			rw == READ ? "READ" : "WRITE");
+	/* Make sure the ntfs inode doesn't go away. */
+	atomic_inc(&ni->count);
+	/* Serialize access to this mft record. */
+	if (rw == READ)
+		down_read(&ni->mrec_lock);
+	else
+		down_write(&ni->mrec_lock);
+	/* If already mapped, bump reference count and return the mft record. */
+	if (atomic_read(&ni->mft_count)) {
+		BUG_ON(!ni->page);
+		atomic_inc(&ni->mft_count);
+		return page_address(ni->page) + ni->page_ofs;
+	}
+	/* Wasn't mapped. Map it now and return it if all was ok. */
+	m = map_mft_record_page(ni);
+	if (!IS_ERR(m))
+		return m;
+	/* Mapping failed. Release the mft record lock. */
+	if (rw == READ)
+		up_read(&ni->mrec_lock);
+	else
+		up_write(&ni->mrec_lock);
+	ntfs_error(ni->vol->sb, "Failed with error code %lu.", -PTR_ERR(m));
+	/* Release the ntfs inode and return the error code. */
+	atomic_dec(&ni->count);
+	return m;
+}
+/**
+ * iget_map_mft_record - iget, map, pin, lock an mft record
+ * @rw:		map for read (rw = READ) or write (rw = WRITE)
+ * @vfs_sb:	vfs super block of mounted volume
+ * @inum:	inode number / MFT record number whose mft record to map
+ * @vfs_ino:	output parameter which we set to the inode on successful return
+ *
+ * Does the same as map_mft_record(), except that it starts out only with the
+ * knowledge of the super block (@vfs_sb) and the mft record number which is of
+ * course the same as the inode number (@inum).
+ *
+ * On success, *@vfs_ino will contain a pointer to the inode structure of the
+ * mft record on return. On error return, *@vfs_ino is undefined.
+ *
+ * See map_mft_record() description for details and for a description of how
+ * errors are returned and what error codes are defined.
+ *
+ * IMPROTANT: The caller is responsible for calling iput(@vfs_ino) when
+ * finished with the inode, i.e. after unmap_mft_record() has been called. If
+ * that is omitted you will get busy inodes upon umount...
+ */
+#if 0
+// this is no longer possible. iget() cannot be called as we may be loading
+// an ntfs inode which will never have a corresponding vfs inode counter part.
+// this is not going to be pretty. )-:
+// we need our own hash for ntfs inodes now, ugh. )-:
+// not having vfs inodes associated with all ntfs inodes is a bad mistake I am
+// getting the impression. this will in the end turn out uglier than just
+// having iget_no_wait().
+// my only hope is that we can get away without this functionality in the driver
+// altogether. we are ok for extent inodes already because we only handle them
+// via map_extent_mft_record().
+// if we really need it, we could have a list or hash of "pure ntfs inodes"
+// to cope with this situation, so the lookup would be:
+// look for the inode and if not present look for pure ntfs inode and if not
+// present add a new pure ntfs inode. under this scheme extent inodes have to
+// also be added to the list/hash of pure inodes.
+MFT_RECORD *iget_map_mft_record(const int rw, struct super_block *vfs_sb,
+		const unsigned long inum, struct inode **vfs_ino)
+{
+	struct inode *inode;
+	MFT_RECORD *mrec;
+	/*
+	 * The corresponding iput() happens when clear_inode() is called on the
+	 * base mft record of this extent mft record.
+	 * When used on base mft records, caller has to perform the iput().
+	 */
+	inode = iget(vfs_sb, inum);
+	if (inode && !is_bad_inode(inode)) {
+		mrec = map_mft_record(rw, inode);
+		if (!IS_ERR(mrec)) {
+			ntfs_debug("Success for i_ino 0x%lx.", inum);
+			*vfs_ino = inode;
+			return mrec;
+		}
+	} else
+		mrec = ERR_PTR(-EIO);
+	if (inode)
+		iput(inode);
+	ntfs_debug("Failed for i_ino 0x%lx.", inum);
+	return mrec;
+}
+#endif
+/**
+ * unmap_mft_record - release a mapped mft record
+ * @rw:		unmap from read (@rw = READ) or write (@rw = WRITE)
+ * @ni:		ntfs inode whose MFT record to unmap
+ *
+ * First, decrement the mapping count and when it reaches zero unmap the mft
+ * record.
+ *
+ * Second, release the mrec_lock semaphore.
+ *
+ * The mft record is now released for others to get hold of.
+ *
+ * Finally, release the ntfs inode by decreasing the ntfs inode reference count.
+ *
+ * NOTE: If caller had the mft record mapped for write and has modified it, it
+ * is imperative to set the mft record dirty BEFORE calling unmap_mft_record().
+ *
+ * NOTE: This has to be done both for 'normal' mft records, and for extent mft
+ * records.
+ */
+void unmap_mft_record(const int rw, ntfs_inode *ni)
+{
+	struct page *page = ni->page;
+	BUG_ON(!atomic_read(&ni->mft_count) || !page);
+	ntfs_debug("Entering for mft_no 0x%Lx, unmapping from %s.",
+			(unsigned long long)ni->mft_no,
+			rw == READ ? "READ" : "WRITE");
+	/* Only release the actual page mapping if this is the last one. */
+	if (atomic_dec_and_test(&ni->mft_count))
+		unmap_mft_record_page(ni);
+	/* Release the semaphore. */
+	if (rw == READ)
+		up_read(&ni->mrec_lock);
+	else
+		up_write(&ni->mrec_lock);
+	/* Release the ntfs inode. */
+	atomic_dec(&ni->count);
+	/*
+	 * If pure ntfs_inode, i.e. no vfs inode attached, we leave it to
+	 * ntfs_clear_inode() in the extent inode case, and to the caller in
+	 * the non-extent, yet pure ntfs inode case, to do the actual tear
+	 * down of all structures and freeing of all allocated memory.
+	 */
+	return;
+}
+/**
+ * map_extent_mft_record - load an extent inode and attach it to its base
+ * @base_ni:	base ntfs inode
+ * @mref:	mft reference of the extent inode to load (in little endian)
+ * @ntfs_ino:	on successful return, pointer to the ntfs_inode structure
+ *
+ * Load the extent mft record @mref and attach it to its base inode @base_ni.
+ * Return the mapped extent mft record if IS_ERR(result) is false. Otherwise
+ * PTR_ERR(result) gives the negative error code.
+ *
+ * On successful return, @ntfs_ino contains a pointer to the ntfs_inode
+ * structure of the mapped extent inode.
+ *
+ * Note, we always map for READ. We consider this lock as irrelevant because
+ * the base inode will be write locked in all cases when we want to write to
+ * an extent inode which already gurantees that there is no-one else accessing
+ * the extent inode.
+ */
+MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref,
+		ntfs_inode **ntfs_ino)
+{
+	MFT_RECORD *m;
+	ntfs_inode *ni = NULL;
+	ntfs_inode **extent_nis = NULL;
+	int i;
+	u64 mft_no = MREF_LE(mref);
+	u16 seq_no = MSEQNO_LE(mref);
+	BOOL destroy_ni = FALSE;
+	ntfs_debug("Mapping extent mft record 0x%Lx (base mft record 0x%Lx).",
+			(unsigned long long)mft_no,
+			(unsigned long long)base_ni->mft_no);
+	/* Make sure the base ntfs inode doesn't go away. */
+	atomic_inc(&base_ni->count);
+	/*
+	 * Check if this extent inode has already been added to the base inode,
+	 * in which case just return it. If not found, add it to the base
+	 * inode before returning it.
+	 */
+	down(&base_ni->extent_lock);
+	if (base_ni->nr_extents > 0) {
+		extent_nis = base_ni->_INE(extent_ntfs_inos);
+		for (i = 0; i < base_ni->nr_extents; i++) {
+			if (mft_no != extent_nis[i]->mft_no)
+				continue;
+			ni = extent_nis[i];
+			/* Make sure the ntfs inode doesn't go away. */
+			atomic_inc(&ni->count);
+			break;
+		}
+	}
+	if (ni) {
+		up(&base_ni->extent_lock);
+		atomic_dec(&base_ni->count);
+		/* We found the record; just have to map and return it. */
+		m = map_mft_record(READ, ni);
+		/* Map mft record increments this on success. */
+		atomic_dec(&ni->count);
+		if (!IS_ERR(m)) {
+			/* Verify the sequence number. */
+			if (le16_to_cpu(m->sequence_number) == seq_no) {
+				ntfs_debug("Done 1.");
+				*ntfs_ino = ni;
+				return m;
+			}
+			unmap_mft_record(READ, ni);
+			ntfs_error(base_ni->vol->sb, "Found stale extent mft "
+					"reference! Corrupt file system. "
+					"Run chkdsk.");
+			return ERR_PTR(-EIO);
+		}
+map_err_out:
+		ntfs_error(base_ni->vol->sb, "Failed to map extent "
+				"mft record, error code %ld.", -PTR_ERR(m));
+		return m;
+	}
+	/* Record wasn't there. Get a new ntfs inode and initialize it. */
+	ni = ntfs_new_inode(base_ni->vol->sb);
+	if (!ni) {
+		up(&base_ni->extent_lock);
+		atomic_dec(&base_ni->count);
+		return ERR_PTR(-ENOMEM);
+	}
+	ni->vol = base_ni->vol;
+	ni->mft_no = mft_no;
+	ni->seq_no = seq_no;
+	ni->nr_extents = -1;
+	ni->_INE(base_ntfs_ino) = base_ni;
+	/* Now map the record. */
+	m = map_mft_record(READ, ni);
+	if (IS_ERR(m)) {
+		up(&base_ni->extent_lock);
+		atomic_dec(&base_ni->count);
+		ntfs_clear_inode(ni);
+		goto map_err_out;
+	}
+	/* Verify the sequence number. */
+	if (le16_to_cpu(m->sequence_number) != seq_no) {
+		ntfs_error(base_ni->vol->sb, "Found stale extent mft "
+				"reference! Corrupt file system. Run chkdsk.");
+		destroy_ni = TRUE;
+		m = ERR_PTR(-EIO);
+		goto unm_err_out;
+	}
+	/* Attach extent inode to base inode, reallocating memory if needed. */
+	if (!(base_ni->nr_extents & ~3)) {
+		ntfs_inode **tmp;
+		int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode *);
+		tmp = (ntfs_inode **)kmalloc(new_size, GFP_NOFS);
+		if (!tmp) {
+			ntfs_error(base_ni->vol->sb, "Failed to allocate "
+					"internal buffer.");
+			destroy_ni = TRUE;
+			m = ERR_PTR(-ENOMEM);
+			goto unm_err_out;
+		}
+		if (base_ni->_INE(extent_ntfs_inos)) {
+			memcpy(tmp, base_ni->_INE(extent_ntfs_inos), new_size -
+					4 * sizeof(ntfs_inode *));
+			kfree(base_ni->_INE(extent_ntfs_inos));
+		}
+		base_ni->_INE(extent_ntfs_inos) = tmp;
+	}
+	base_ni->_INE(extent_ntfs_inos)[base_ni->nr_extents++] = ni;
+	up(&base_ni->extent_lock);
+	atomic_dec(&base_ni->count);
+	ntfs_debug("Done 2.");
+	*ntfs_ino = ni;
+	return m;
+unm_err_out:
+	unmap_mft_record(READ, ni);
+	up(&base_ni->extent_lock);
+	atomic_dec(&base_ni->count);
+	/*
+	 * If the extent inode was not attached to the base inode we need to
+	 * release it or we will leak memory.
+	 */
+	if (destroy_ni)
+		ntfs_clear_inode(ni);
+	return m;
+}
--- a/fs/ntfs/mft.h
+++ b/fs/ntfs/mft.h
+/*
+ * mft.h - Defines for mft record handling in NTFS Linux kernel driver.
+ *	   Part of the Linux-NTFS project.
+ *
+ * Copyright (c) 2001,2002 Anton Altaparmakov.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef _LINUX_NTFS_MFT_H
+#define _LINUX_NTFS_MFT_H
+#include <linux/fs.h>
+#include "inode.h"
+extern int format_mft_record(ntfs_inode *ni, MFT_RECORD *m);
+//extern int format_mft_record2(struct super_block *vfs_sb,
+//		const unsigned long inum, MFT_RECORD *m);
+extern MFT_RECORD *map_mft_record(const int rw, ntfs_inode *ni);
+extern void unmap_mft_record(const int rw, ntfs_inode *ni);
+extern MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref,
+		ntfs_inode **ntfs_ino);
+static inline void unmap_extent_mft_record(ntfs_inode *ni)
+{
+	unmap_mft_record(READ, ni);
+	return;
+}
+#endif /* _LINUX_NTFS_MFT_H */
--- a/fs/ntfs/mst.c
+++ b/fs/ntfs/mst.c
+/*
+ * mst.c - NTFS multi sector transfer protection handling code. Part of the
+ * 	   Linux-NTFS project.
+ *
+ * Copyright (c) 2001 Anton Altaparmakov.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include "ntfs.h"
+/**
+ * __post_read_mst_fixup - fast deprotect multi sector transfer protected data
+ * @b:		pointer to the data to deprotect
+ * @size:	size in bytes of @b
+ * 
+ * Perform the necessary post read multi sector transfer fixup, not checking for
+ * any errors. Defined inline for additional speed. 
+ */
+inline void __post_read_mst_fixup(NTFS_RECORD *b, const u32 size)
+{
+	u16 usa_ofs, usa_count;
+	u16 *usa_pos, *data_pos;
+	/* Setup the variables. */
+	usa_ofs = le16_to_cpu(b->usa_ofs);
+	usa_count = le16_to_cpu(b->usa_count) - 1;
+	/* Position of usn in update sequence array. */  
+	usa_pos = (u16*)b + usa_ofs/sizeof(u16);
+	/*
+	 * Position in protected data of first u16 that needs fixing up.
+	 */
+	data_pos = (u16*)b + NTFS_BLOCK_SIZE/sizeof(u16) - 1;
+        /* Fixup all sectors. */
+	while (usa_count--) {
+		/*
+		 * Increment position in usa and restore original data from
+		 * the usa into the data buffer.
+		 */
+		*data_pos = *(++usa_pos);
+                /* Increment position in data as well. */
+		data_pos += NTFS_BLOCK_SIZE/sizeof(u16);
+        }
+}
+/**
+ * post_read_mst_fixup - deprotect multi sector transfer protected data
+ * @b:		pointer to the data to deprotect
+ * @size:	size in bytes of @b
+ * 
+ * Perform the necessary post read multi sector transfer fixup and detect the
+ * presence of incomplete multi sector transfers. - In that case, overwrite the
+ * magic of the ntfs record header being processed with "BAAD" (in memory only!)
+ * and abort processing.
+ *
+ * Return 0 on success and -EINVAL on error ("BAAD" magic will be present).
+ *
+ * NOTE: We consider the absence / invalidity of an update sequence array to
+ * mean that the structure is not protected at all and hence doesn't need to
+ * be fixed up. Thus, we return success and not failure in this case. This is
+ * in contrast to pre_write_mst_fixup(), see below.
+ */
+int post_read_mst_fixup(NTFS_RECORD *b, const u32 size)
+{
+	u16 usa_ofs, usa_count, usn;
+	u16 *usa_pos, *data_pos;
+	/* Setup the variables. */
+	usa_ofs = le16_to_cpu(b->usa_ofs);
+	/* Decrement usa_count to get number of fixups. */
+	usa_count = le16_to_cpu(b->usa_count) - 1;
+	/* Size and alignement checks. */
+	if ( size & (NTFS_BLOCK_SIZE - 1)	||
+	     usa_ofs & 1			||
+	     usa_ofs + (usa_count * 2) > size	||
+	     (size >> NTFS_BLOCK_SIZE_BITS) != usa_count)
+		return 0;
+	/* Position of usn in update sequence array. */  
+	usa_pos = (u16*)b + usa_ofs/sizeof(u16);
+	/* 
+	 * The update sequence number which has to be equal to each of the
+	 * u16 values before they are fixed up. Note no need to care for
+	 * endianness since we are comparing and moving data for on disk
+	 * structures which means the data is consistent. - If it is 
+	 * consistenty the wrong endianness it doesn't make any difference.
+	 */
+	usn = *usa_pos;
+	/*
+	 * Position in protected data of first u16 that needs fixing up.
+	 */
+	data_pos = (u16*)b + NTFS_BLOCK_SIZE/sizeof(u16) - 1;
+        /*
+	 * Check for incomplete multi sector transfer(s).
+	 */
+	while (usa_count--) {
+                if (*data_pos != usn) {
+			/*
+			 * Incomplete multi sector transfer detected! )-:
+			 * Set the magic to "BAAD" and return failure.
+			 * Note that magic_BAAD is already converted to le32.
+			 */
+			b->magic = magic_BAAD;
+	                return -EINVAL;
+		}
+		data_pos += NTFS_BLOCK_SIZE/sizeof(u16);
+	}
+	/* Re-setup the variables. */
+	usa_count = le16_to_cpu(b->usa_count) - 1;
+	data_pos = (u16*)b + NTFS_BLOCK_SIZE/sizeof(u16) - 1;
+	/* Fixup all sectors. */
+	while (usa_count--) {
+		/*
+		 * Increment position in usa and restore original data from
+		 * the usa into the data buffer.
+		 */
+		*data_pos = *(++usa_pos);
+                /* Increment position in data as well. */
+		data_pos += NTFS_BLOCK_SIZE/sizeof(u16);
+        }
+	return 0;
+}
+/**
+ * pre_write_mst_fixup - apply multi sector transfer protection
+ * @b:		pointer to the data to protect
+ * @size:	size in bytes of @b
+ * 
+ * Perform the necessary pre write multi sector transfer fixup on the data
+ * pointer to by @b of @size.
+ *
+ * Return 0 if fixup applied (success) or -EINVAL if no fixup was performed
+ * (assumed not needed). This is in contrast to post_read_mst_fixup() above.
+ *
+ * NOTE: We consider the absence / invalidity of an update sequence array to
+ * mean that the structure is not subject to protection and hence doesn't need
+ * to be fixed up. This means that you have to create a valid update sequence
+ * array header in the ntfs record before calling this function, otherwise it
+ * will fail (the header needs to contain the position of the update seqeuence
+ * array together with the number of elements in the array). You also need to
+ * initialise the update sequence number before calling this function
+ * otherwise a random word will be used (whatever was in the record at that
+ * position at that time).
+ */
+int pre_write_mst_fixup(NTFS_RECORD *b, const u32 size)
+{
+	u16 usa_ofs, usa_count, usn;
+	u16 *usa_pos, *data_pos;
+	/* Sanity check + only fixup if it makes sense. */
+	if (!b || is_baad_record(b->magic) || is_hole_record(b->magic))
+		return -EINVAL;
+	/* Setup the variables. */
+	usa_ofs = le16_to_cpu(b->usa_ofs);
+	/* Decrement usa_count to get number of fixups. */
+	usa_count = le16_to_cpu(b->usa_count) - 1;
+	/* Size and alignement checks. */
+	if ( size & (NTFS_BLOCK_SIZE - 1)	||
+	     usa_ofs & 1			||
+	     usa_ofs + (usa_count * 2) > size	||
+	     (size >> NTFS_BLOCK_SIZE_BITS) != usa_count)
+		return -EINVAL;
+	/* Position of usn in update sequence array. */  
+	usa_pos = (u16*)((u8*)b + usa_ofs);
+	/*
+	 * Cyclically increment the update sequence number 
+	 * (skipping 0 and -1, i.e. 0xffff).
+	 */
+	usn = le16_to_cpup(usa_pos) + 1;
+	if (usn == 0xffff || !usn)
+		usn = 1;
+	usn = cpu_to_le16(usn);
+	*usa_pos = usn;
+	/* Position in data of first u16 that needs fixing up. */
+	data_pos = (u16*)b + NTFS_BLOCK_SIZE/sizeof(u16) - 1;
+        /* Fixup all sectors. */
+        while (usa_count--) {
+		/*
+		 * Increment the position in the usa and save the 
+		 * original data from the data buffer into the usa.
+		 */
+		*(++usa_pos) = *data_pos;
+		/* Apply fixup to data. */
+		*data_pos = usn;
+		/* Increment position in data as well. */
+		data_pos += NTFS_BLOCK_SIZE/sizeof(u16);
+        }
+	return 0;
+}
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
+/*
+ * namei.c - NTFS kernel directory inode operations. Part of the Linux-NTFS
+ * 	     project.
+ *
+ * Copyright (c) 2001,2002 Anton Altaparmakov.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include "ntfs.h"
+/**
+ * ntfs_lookup - find the inode represented by a dentry in a directory inode
+ * @dir_ino:	directory inode in which to look for the inode
+ * @dent:	dentry representing the inode to look for
+ *
+ * In short, ntfs_lookup() looks for the inode represented by the dentry @dent
+ * in the directory inode @dir_ino and if found attaches the inode to the
+ * dentry @dent.
+ *
+ * In more detail, the dentry @dent specifies which inode to look for by
+ * supplying the name of the inode in @dent->d_name.name. ntfs_lookup()
+ * converts the name to Unicode and walks the contents of the directory inode
+ * @dir_ino looking for the converted Unicode name. If the name is found in the
+ * directory, the corresponding inode is loaded by calling iget() on its inode
+ * number and the inode is associated with the dentry @dent via a call to
+ * d_add().
+ *
+ * If the name is not found in the directory, a NULL inode is inserted into the
+ * dentry @dent. The dentry is then termed a negative dentry.
+ *
+ * Only if an actual error occurs, do we return an error via ERR_PTR().
+ */
+static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent)
+{
+	ntfs_volume *vol = NTFS_SB(dir_ino->i_sb);
+	struct inode *dent_inode;
+	u64 mref;
+	unsigned long dent_ino;
+	uchar_t *uname;
+	int uname_len;
+	ntfs_debug("Looking up %s in directory inode 0x%lx.",
+			dent->d_name.name, dir_ino->i_ino);
+	/* Convert the name of the dentry to Unicode. */
+	uname_len = ntfs_nlstoucs(vol, dent->d_name.name, dent->d_name.len,
+			&uname);
+	if (uname_len < 0) {
+		ntfs_error(vol->sb, "Failed to convert name to Unicode.");
+		return ERR_PTR(uname_len);
+	}
+	mref = ntfs_lookup_inode_by_name(NTFS_I(dir_ino), uname, uname_len);
+	kmem_cache_free(ntfs_name_cache, uname);
+	if (!IS_ERR_MREF(mref)) {
+		dent_ino = (unsigned long)MREF(mref);
+		ntfs_debug("Found inode 0x%lx. Calling iget.", dent_ino);
+		dent_inode = iget(vol->sb, dent_ino);
+		if (dent_inode) {
+			/* Consistency check. */
+			if (MSEQNO(mref) == NTFS_I(dent_inode)->seq_no ||
+					dent_ino == FILE_MFT) {
+				d_add(dent, dent_inode);
+				ntfs_debug("Done.");
+				return NULL;
+			}
+			ntfs_error(vol->sb, "Found stale reference to inode "
+					"0x%Lx (reference sequence number = "
+					"0x%x, inode sequence number = 0x%x, "
+					"returning -EACCES. Run chkdsk.",
+					(unsigned long long)MREF(mref),
+					MSEQNO(mref),
+					NTFS_I(dent_inode)->seq_no);
+			iput(dent_inode);
+		} else
+			ntfs_error(vol->sb, "iget(0x%Lx) failed, returning "
+					"-EACCES.",
+					(unsigned long long)MREF(mref));
+		return ERR_PTR(-EACCES);
+	}
+	if (MREF_ERR(mref) == -ENOENT) {
+		ntfs_debug("Entry was not found, adding negative dentry.");
+		/* The dcache will handle negative entries. */
+		d_add(dent, NULL);
+		ntfs_debug("Done.");
+		return NULL;
+	}
+	ntfs_error(vol->sb, "ntfs_lookup_ino_by_name() failed with error "
+			"code %i.", -MREF_ERR(mref));
+	return ERR_PTR(MREF_ERR(mref));
+}
+struct inode_operations ntfs_dir_inode_ops = {
+	create:		NULL,		/* . */
+	lookup:		ntfs_lookup,	/* lookup directory. */
+	link:		NULL,		/* . */
+	unlink:		NULL,		/* . */
+	symlink:	NULL,		/* . */
+	mkdir:		NULL,		/* . */
+	rmdir:		NULL,		/* . */
+	mknod:		NULL,		/* . */
+	rename:		NULL,		/* . */
+	readlink:	NULL,		/* . */
+	follow_link:	NULL,		/* . */
+	truncate:	NULL,		/* . */
+	permission:	NULL,		/* . */
+	revalidate:	NULL,		/* . */
+	setattr:	NULL,		/* . */
+	getattr:	NULL,		/* . */
+};
+#if 0
+struct inode_operations {
+	int (*create) (struct inode *,struct dentry *,int);
+	struct dentry * (*lookup) (struct inode *,struct dentry *);
+	int (*link) (struct dentry *,struct inode *,struct dentry *);
+	int (*unlink) (struct inode *,struct dentry *);
+	int (*symlink) (struct inode *,struct dentry *,const char *);
+	int (*mkdir) (struct inode *,struct dentry *,int);
+	int (*rmdir) (struct inode *,struct dentry *);
+	int (*mknod) (struct inode *,struct dentry *,int,int);
+	int (*rename) (struct inode *, struct dentry *,
+			struct inode *, struct dentry *);
+	int (*readlink) (struct dentry *, char *,int);
+	int (*follow_link) (struct dentry *, struct nameidata *);
+	void (*truncate) (struct inode *);
+	int (*permission) (struct inode *, int);
+	int (*revalidate) (struct dentry *);
+	int (*setattr) (struct dentry *, struct iattr *);
+	int (*getattr) (struct dentry *, struct iattr *);
+};
+#endif
--- a/fs/ntfs/ntfs.h
+++ b/fs/ntfs/ntfs.h
+/*
+ * ntfs.h - Defines for NTFS Linux kernel driver. Part of the Linux-NTFS
+ *	    project.
+ *
+ * Copyright (c) 2001,2002 Anton Altaparmakov.
+ * Copyright (C) 2002 Richard Russon.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef _LINUX_NTFS_H
+#define _LINUX_NTFS_H
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,5)
+#	error The NTFS driver requires at least kernel 2.5.5.
+#endif
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/compiler.h>
+#include <linux/fs.h>
+#include <linux/nls.h>
+#include <linux/pagemap.h>
+#include <linux/smp.h>
+#include <asm/atomic.h>
+#include "types.h"
+#include "debug.h"
+#include "malloc.h"
+#include "endian.h"
+#include "volume.h"
+#include "inode.h"
+#include "layout.h"
+#include "attrib.h"
+#include "mft.h"
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+typedef long sector_t;
+#endif
+typedef enum {
+	NTFS_BLOCK_SIZE		= 512,
+	NTFS_BLOCK_SIZE_BITS	= 9,
+	NTFS_SB_MAGIC		= 0x5346544e,	/* 'NTFS' */
+	NTFS_MAX_NAME_LEN	= 255,
+} NTFS_CONSTANTS;
+/*
+ * Defined bits for the state field in the ntfs_inode structure.
+ * (f) = files only, (d) = directories only
+ */
+typedef enum {
+	NI_Dirty,		/* 1: Mft record needs to be written to disk. */
+	NI_AttrList,		/* 1: Mft record contains an attribute list. */
+	NI_AttrListNonResident,	/* 1: Attribute list is non-resident. Implies
+				      NI_AttrList is set. */
+	NI_NonResident,		/* 1: Unnamed data attr is non-resident (f).
+				   1: $I30 index alloc attr is present (d). */
+	NI_Compressed,		/* 1: Unnamed data attr is compressed (f).
+				   1: Create compressed files by default (d). */
+	NI_Encrypted,		/* 1: Unnamed data attr is encrypted (f).
+				   1: Create encrypted files by default (d). */
+	NI_BmpNonResident,	/* 1: $I30 bitmap attr is non resident (d). */
+} ntfs_inode_state_bits;
+/*
+ * NOTE: We should be adding dirty mft records to a list somewhere and they
+ * should be independent of the (ntfs/vfs) inode structure so that an inode can
+ * be removed but the record can be left dirty for syncing later.
+ */
+#define NInoDirty(n_ino)	  test_bit(NI_Dirty, &(n_ino)->state)
+#define NInoSetDirty(n_ino)	  set_bit(NI_Dirty, &(n_ino)->state)
+#define NInoClearDirty(n_ino)	  clear_bit(NI_Dirty, &(n_ino)->state)
+#define NInoAttrList(n_ino)	  test_bit(NI_AttrList, &(n_ino)->state)
+#define NInoNonResident(n_ino)	  test_bit(NI_NonResident, &(n_ino)->state)
+#define NInoIndexAllocPresent(n_ino) test_bit(NI_NonResident, &(n_ino)->state)
+#define NInoCompressed(n_ino)	  test_bit(NI_Compressed, &(n_ino)->state)
+#define NInoEncrypted(n_ino)	  test_bit(NI_Encrypted, &(n_ino)->state)
+#define NInoBmpNonResident(n_ino) test_bit(NI_BmpNonResident, &(n_ino)->state)
+/* Global variables. */
+/* Slab caches (from super.c). */
+extern kmem_cache_t *ntfs_name_cache;
+extern kmem_cache_t *ntfs_inode_cache;
+extern kmem_cache_t *ntfs_big_inode_cache;
+extern kmem_cache_t *ntfs_attr_ctx_cache;
+/* The little endian Unicode string $I30 as a global constant. */
+extern const uchar_t I30[5];
+/* The various operations structs defined throughout the driver files. */
+extern struct super_operations ntfs_mount_sops;
+extern struct super_operations ntfs_sops;
+extern struct file_operations ntfs_file_ops;
+extern struct inode_operations ntfs_file_inode_ops;
+extern struct address_space_operations ntfs_file_aops;
+extern struct file_operations ntfs_dir_ops;
+extern struct inode_operations ntfs_dir_inode_ops;
+extern struct address_space_operations ntfs_dir_aops;
+extern struct file_operations ntfs_empty_file_ops;
+extern struct inode_operations ntfs_empty_inode_ops;
+extern struct address_space_operations ntfs_mft_aops;
+extern struct address_space_operations ntfs_mftbmp_aops;
+/* Generic macro to convert pointers to values for comparison purposes. */
+#ifndef p2n
+#define p2n(p)          ((ptrdiff_t)((ptrdiff_t*)(p)))
+#endif
+/**
+ * NTFS_SB - return the ntfs volume given a vfs super block
+ * @sb:		VFS super block
+ *
+ * NTFS_SB() returns the ntfs volume associated with the VFS super block @sb.
+ */
+static inline ntfs_volume *NTFS_SB(struct super_block *sb)
+{
+	return sb->u.generic_sbp;
+}
+/**
+ * ntfs_unmap_page - release a page that was mapped using ntfs_map_page()
+ * @page:	the page to release
+ *
+ * Unpin, unmap and release a page that was obtained from ntfs_map_page().
+ */
+static inline void ntfs_unmap_page(struct page *page)
+{
+	kunmap(page);
+	page_cache_release(page);
+}
+/**
+ * ntfs_map_page - map a page into accessible memory, reading it if necessary
+ * @mapping:	address space for which to obtain the page
+ * @index:	index into the page cache for @mapping of the page to map
+ *
+ * Read a page from the page cache of the address space @mapping at position
+ * @index, where @index is in units of PAGE_CACHE_SIZE, and not in bytes.
+ *
+ * If the page is not in memory it is loaded from disk first using the readpage
+ * method defined in the address space operations of @mapping and the page is
+ * added to the page cache of @mapping in the process.
+ *
+ * If the page is in high memory it is mapped into memory directly addressible
+ * by the kernel.
+ *
+ * Finally the page count is incremented, thus pinning the page into place.
+ *
+ * The above means that page_address(page) can be used on all pages obtained
+ * with ntfs_map_page() to get the kernel virtual address of the page.
+ *
+ * When finished with the page, the caller has to call ntfs_unmap_page() to
+ * unpin, unmap and release the page.
+ *
+ * Note this does not grant exclusive access. If such is desired, the caller
+ * must provide it independently of the ntfs_{un}map_page() calls by using
+ * a {rw_}semaphore or other means of serialization. A spin lock cannot be
+ * used as ntfs_map_page() can block.
+ *
+ * The unlocked and uptodate page is returned on success or an encoded error
+ * on failure. Caller has to test for error using the IS_ERR() macro on the
+ * return value. If that evaluates to TRUE, the negative error code can be
+ * obtained using PTR_ERR() on the return value of ntfs_map_page().
+ */
+static inline struct page *ntfs_map_page(struct address_space *mapping,
+		unsigned long index)
+{
+	struct page *page = read_cache_page(mapping, index,
+			(filler_t*)mapping->a_ops->readpage, NULL);
+	if (!IS_ERR(page)) {
+		wait_on_page(page);
+		kmap(page);
+		if (Page_Uptodate(page) && !PageError(page))
+			return page;
+		ntfs_unmap_page(page);
+		return ERR_PTR(-EIO);
+	}
+	return page;
+}
+/* Declarations of functions and global variables. */
+/* From fs/ntfs/aops.c */
+extern int ntfs_file_get_block(struct inode *vi, const sector_t blk,
+		struct buffer_head *bh, const int create);
+extern void end_buffer_read_index_async(struct buffer_head *bh, int uptodate);
+/* From fs/ntfs/compress.c */
+extern int ntfs_file_read_compressed_block(struct page *page);
+/* From fs/ntfs/super.c */
+#define default_upcase_len 0x10000
+extern wchar_t *default_upcase;
+extern unsigned long ntfs_nr_upcase_users;
+extern unsigned long ntfs_nr_mounts;
+extern struct semaphore ntfs_lock;
+typedef struct {
+	int val;
+	char *str;
+} option_t;
+extern const option_t on_errors_arr[];
+/* From fs/ntfs/compress.c */
+extern int allocate_compression_buffers(void);
+extern void free_compression_buffers(void);
+/* From fs/ntfs/mst.c */
+extern inline void __post_read_mst_fixup(NTFS_RECORD *b, const u32 size);
+extern int post_read_mst_fixup(NTFS_RECORD *b, const u32 size);
+extern int pre_write_mst_fixup(NTFS_RECORD *b, const u32 size);
+/* From fs/ntfs/time.c */
+extern inline s64 utc2ntfs(const time_t time);
+extern inline s64 get_current_ntfs_time(void);
+extern inline time_t ntfs2utc(const s64 time);
+/* From fs/ntfs/dir.c */
+extern u64 ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const uchar_t *uname,
+		const int uname_len);
+/* From fs/ntfs/unistr.c */
+extern BOOL ntfs_are_names_equal(const uchar_t *s1, size_t s1_len,
+		const uchar_t *s2, size_t s2_len,
+		const IGNORE_CASE_BOOL ic,
+		const uchar_t *upcase, const u32 upcase_size);
+extern int ntfs_collate_names(const uchar_t *name1, const u32 name1_len,
+		const uchar_t *name2, const u32 name2_len,
+		const int err_val, const IGNORE_CASE_BOOL ic,
+		const uchar_t *upcase, const u32 upcase_len);
+extern int ntfs_ucsncmp(const uchar_t *s1, const uchar_t *s2, size_t n);
+extern int ntfs_ucsncasecmp(const uchar_t *s1, const uchar_t *s2, size_t n,
+		const uchar_t *upcase, const u32 upcase_size);
+extern void ntfs_upcase_name(uchar_t *name, u32 name_len,
+		const uchar_t *upcase, const u32 upcase_len);
+extern void ntfs_file_upcase_value(FILE_NAME_ATTR *file_name_attr,
+		const uchar_t *upcase, const u32 upcase_len);
+extern int ntfs_file_compare_values(FILE_NAME_ATTR *file_name_attr1,
+		FILE_NAME_ATTR *file_name_attr2,
+		const int err_val, const IGNORE_CASE_BOOL ic,
+		const uchar_t *upcase, const u32 upcase_len);
+extern int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins,
+		const int ins_len, uchar_t **outs);
+extern int ntfs_ucstonls(const ntfs_volume *vol, const uchar_t *ins,
+		const int ins_len, unsigned char **outs, int outs_len);
+/* From fs/ntfs/upcase.c */
+extern uchar_t *generate_default_upcase(void);
+#endif /* _LINUX_NTFS_H */
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
+/*
+ * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project.
+ *
+ * Copyright (c) 2001,2002 Anton Altaparmakov.
+ * Copyright (C) 2001,2002 Richard Russon.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+#include <linux/spinlock.h>
+#include <linux/genhd.h>	/* For gendisk stuff. */
+#include <linux/blkdev.h>	/* Fox get_hardsect_size. */
+#include "ntfs.h"
+#include "sysctl.h"
+/* Number of mounted file systems which have compression enabled. */
+static unsigned long ntfs_nr_compression_users = 0;
+/* Error constants/strings used in inode.c::ntfs_show_options(). */
+typedef enum {
+	/* One of these must be present, default is ON_ERRORS_CONTINUE. */
+	ON_ERRORS_PANIC			= 0x01,
+	ON_ERRORS_REMOUNT_RO		= 0x02,
+	ON_ERRORS_CONTINUE		= 0x04,
+	/* Optional, can be combined with any of the above. */
+	ON_ERRORS_RECOVER		= 0x10,
+} ON_ERRORS_ACTIONS;
+const option_t on_errors_arr[] = {
+	{ ON_ERRORS_PANIC,	"panic" },
+	{ ON_ERRORS_REMOUNT_RO,	"remount-ro", },
+	{ ON_ERRORS_CONTINUE,	"continue", },
+	{ ON_ERRORS_RECOVER,	"recover" },
+	{ 0,			NULL }
+};
+static const option_t readdir_opts_arr[] = {
+	{ SHOW_SYSTEM,	"system" },
+	{ SHOW_WIN32,	"win32" },
+	{ SHOW_WIN32,	"long" },
+	{ SHOW_DOS,	"dos" },
+	{ SHOW_DOS,	"short" },
+	{ SHOW_POSIX,	"posix" },
+	{ SHOW_ALL,	"all" },
+	{ 0,		NULL }
+};
+/**
+ * simple_getbool -
+ *
+ * Copied from old ntfs driver (which copied from vfat driver).
+ */
+static int simple_getbool(char *s, BOOL *setval)
+{
+	if (s) {
+		if (!strcmp(s, "1") || !strcmp(s, "yes") || !strcmp(s, "true"))
+			*setval = TRUE;
+		else if (!strcmp(s, "0") || !strcmp(s, "no") ||
+							!strcmp(s, "false"))
+			*setval = FALSE;
+		else
+			return 0;
+	} else
+		*setval = TRUE;
+	return 1;
+}
+/**
+ * parse_options - parse the (re)mount options
+ * @vol:	ntfs volume
+ * @opt:	string containing the (re)mount options
+ *
+ * Parse the recognized options in @opt for the ntfs volume described by @vol.
+ */
+static BOOL parse_options(ntfs_volume *vol, char *opt)
+{
+	char *p, *v, *ov;
+	static char *utf8 = "utf8";
+	int errors = 0, sloppy = 0;
+	uid_t uid = (uid_t)-1;
+	gid_t gid = (gid_t)-1;
+	mode_t fmask = (mode_t)-1, dmask = (mode_t)-1;
+	int mft_zone_multiplier = -1, on_errors = -1, readdir_opts = -1;
+	struct nls_table *nls_map = NULL, *old_nls;
+	/* I am lazy... (-8 */
+#define NTFS_GETOPT_WITH_DEFAULT(option, variable, default_value)	\
+	if (!strcmp(p, option)) {					\
+		if (!v || !*v)						\
+			variable = default_value;			\
+		else {							\
+			variable = simple_strtoul(ov = v, &v, 0);	\
+			if (*v)						\
+				goto needs_val;				\
+		}							\
+	} 
+#define NTFS_GETOPT(option, variable)					\
+	if (!strcmp(p, option)) {					\
+		if (!v || !*v)						\
+			goto needs_arg;					\
+		variable = simple_strtoul(ov = v, &v, 0);		\
+		if (*v)							\
+			goto needs_val;					\
+	} 
+#define NTFS_GETOPT_OPTIONS_ARRAY(option, variable, opt_array)		\
+	if (!strcmp(p, option)) {					\
+		int _i;							\
+		if (!v || !*v)						\
+			goto needs_arg;					\
+		ov = v;							\
+		if (variable == -1)					\
+			variable = 0;					\
+		for (_i = 0; opt_array[_i].str && *opt_array[_i].str; _i++) \
+			if (!strcmp(opt_array[_i].str, v)) {		\
+				variable |= opt_array[_i].val;		\
+				break;					\
+			}						\
+		if (!opt_array[_i].str || !*opt_array[_i].str)		\
+			goto needs_val;					\
+	}
+	if (!opt || !*opt)
+		goto no_mount_options;
+	while ((p = strsep(&opt, ","))) {
+		if ((v = strchr(p, '=')))
+			*v++ = '\0';
+		NTFS_GETOPT("uid", uid)
+		else NTFS_GETOPT("gid", gid)
+		else NTFS_GETOPT("umask", fmask = dmask)
+		else NTFS_GETOPT("fmask", fmask)
+		else NTFS_GETOPT("dmask", dmask)
+		else NTFS_GETOPT_WITH_DEFAULT("sloppy", sloppy, TRUE)
+		else NTFS_GETOPT("mft_zone_multiplier", mft_zone_multiplier)
+		else NTFS_GETOPT_OPTIONS_ARRAY("errors", on_errors,
+				on_errors_arr)
+		else NTFS_GETOPT_OPTIONS_ARRAY("show_inodes", readdir_opts,
+				readdir_opts_arr)
+		else if (!strcmp(p, "show_system_files")) {
+			BOOL val = FALSE;
+			ntfs_warning(vol->sb, "Option show_system_files is "
+				   "deprecated. Please use option "
+				   "show_inodes=system in the future.");
+			if (!v || !*v)
+				val = TRUE;
+			else if (!simple_getbool(v, &val))
+				goto needs_bool;
+			if (val) {
+				if (readdir_opts == -1)
+					readdir_opts = 0;
+				readdir_opts |= SHOW_SYSTEM;
+			}
+		} else if (!strcmp(p, "posix")) {
+			BOOL val = FALSE;
+			ntfs_warning(vol->sb, "Option posix is deprecated. "
+				   "Please use option show_inodes=posix "
+				   "instead. Be aware that some userspace "
+				   "applications may be confused by this, "
+				   "since the short and long names of "
+				   "directory inodes will have the same inode "
+				   "numbers, yet each will only have a link "
+				   "count of 1 due to Linux not supporting "
+				   "directory hard links.");
+			if (!v || !*v)
+				goto needs_arg;
+			else if (!simple_getbool(v, &val))
+				goto needs_bool;
+			if (val) {
+				if (readdir_opts == -1)
+					readdir_opts = 0;
+				readdir_opts |= SHOW_POSIX;
+			}
+		} else if (!strcmp(p, "nls") || !strcmp(p, "iocharset")) {
+			if (!strcmp(p, "iocharset"))
+				ntfs_warning(vol->sb, "Option iocharset is "
+						"deprecated. Please use "
+						"option nls=<charsetname> in "
+						"the future.");
+			if (!v || !*v)
+				goto needs_arg;
+use_utf8:
+			old_nls = nls_map;
+			nls_map = load_nls(v);
+			if (!nls_map) {
+				if (!old_nls) {
+					ntfs_error(vol->sb, "NLS character set "
+							"%s not found.", v);
+					return FALSE;
+				}
+				ntfs_error(vol->sb, "NLS character set %s not "
+						"found. Using previous one %s.",
+						v, old_nls->charset);
+				nls_map = old_nls;
+			} else /* nls_map */ {
+				if (old_nls)
+					unload_nls(old_nls);
+			}
+		} else if (!strcmp(p, "utf8")) {
+			BOOL val = FALSE;
+			ntfs_warning(vol->sb, "Option utf8 is no longer "
+				   "supported, using option nls=utf8. Please "
+				   "use option nls=utf8 in the future and "
+				   "make sure utf8 is compiled either as a "
+				   "module or into the kernel.");
+			if (!v || !*v)
+				val = TRUE;
+			else if (!simple_getbool(v, &val))
+				goto needs_bool;
+			if (val) {
+				v = utf8;
+				goto use_utf8;
+			}
+		} else {
+			ntfs_error(vol->sb, "Unrecognized mount option %s.", p);
+			if (errors < INT_MAX)
+				errors++;
+		}
+#undef NTFS_GETOPT_OPTIONS_ARRAY
+#undef NTFS_GETOPT
+#undef NTFS_GETOPT_WITH_DEFAULT
+	}
+no_mount_options:
+	if (errors && !sloppy)
+		return FALSE;
+	if (sloppy)
+		ntfs_warning(vol->sb, "Sloppy option given. Ignoring "
+				"unrecognized mount option(s) and continuing.");
+	/* Keep this first! */
+	if (on_errors != -1) {
+		if (!on_errors) {
+			ntfs_error(vol->sb, "Invalid errors option argument "
+					"or bug in options parser.");
+			return FALSE;
+		}
+	}
+	if (nls_map) {
+		if (vol->nls_map) {
+			ntfs_error(vol->sb, "Cannot change NLS character set "
+					"on remount.");
+			return FALSE;
+		} /* else (!vol->nls_map) */
+		ntfs_debug("Using NLS character set %s.", nls_map->charset);
+		vol->nls_map = nls_map;
+	} else /* (!nls_map) */ {
+		if (!vol->nls_map) {
+			vol->nls_map = load_nls_default();
+			if (!vol->nls_map) {
+				ntfs_error(vol->sb, "Failed to load default "
+						"NLS character set.");
+				return FALSE;
+			}
+			ntfs_debug("Using default NLS character set (%s).",
+					vol->nls_map->charset);
+		}
+	}
+	if (mft_zone_multiplier != -1) {
+		if (vol->mft_zone_multiplier && vol->mft_zone_multiplier !=
+				mft_zone_multiplier) {
+			ntfs_error(vol->sb, "Cannot change mft_zone_multiplier "
+					"on remount.");
+			return FALSE;
+		}
+		if (mft_zone_multiplier < 1 || mft_zone_multiplier > 4) {
+			ntfs_error(vol->sb, "Invalid mft_zone_multiplier. "
+					"Using default value, i.e. 1.");
+			mft_zone_multiplier = 1;
+		}
+		vol->mft_zone_multiplier = mft_zone_multiplier;
+	} if (!vol->mft_zone_multiplier)
+		/* Not specified and it is the first mount, so set default. */
+		vol->mft_zone_multiplier = 1;
+	if (on_errors != -1)
+		vol->on_errors = on_errors;
+	if (!vol->on_errors || vol->on_errors == ON_ERRORS_RECOVER)
+		vol->on_errors |= ON_ERRORS_CONTINUE;
+	if (uid != (uid_t)-1)
+		vol->uid = uid;
+	if (gid != (gid_t)-1)
+		vol->gid = gid;
+	if (fmask != (mode_t)-1)
+		vol->fmask = fmask;
+	if (dmask != (mode_t)-1)
+		vol->dmask = dmask;
+	if (readdir_opts != -1)
+		vol->readdir_opts = readdir_opts;
+	return TRUE;
+needs_arg:
+	ntfs_error(vol->sb, "The %s option requires an argument.", p);
+	return FALSE;
+needs_bool:
+	ntfs_error(vol->sb, "The %s option requires a boolean argument.", p);
+	return FALSE;
+needs_val:
+	ntfs_error(vol->sb, "Invalid %s option argument: %s", p, ov);
+	return FALSE;
+}
+/**
+ * ntfs_remount - change the mount options of a mounted ntfs filesystem
+ * @sb:		superblock of mounted ntfs filesystem
+ * @flags:	remount flags
+ * @opt:	remount options string
+ *
+ * Change the mount options of an already mounted ntfs filesystem.
+ *
+ * NOTE: The VFS set the @sb->s_flags remount flags to @flags after
+ * ntfs_remount() returns successfully (i.e. returns 0). Otherwise,
+ * @sb->s_flags are not changed.
+ */
+static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
+{
+	ntfs_volume *vol = NTFS_SB(sb);
+	ntfs_debug("Entering.");
+	// FIXME/TODO: If left like this we will have problems with rw->ro and
+	// ro->rw, as well as with sync->async and vice versa remounts.
+	// Note: The VFS already checks that there are no pending deletes and
+	// no open files for writing. So we only need to worry about dirty
+	// inode pages and dirty system files (which include dirty inodes).
+	// Either handle by flushing the whole volume NOW or by having the
+	// write routines work on MS_RDONLY fs and guarantee we don't mark
+	// anything as dirty if MS_RDONLY is set. That way the dirty data
+	// would get flushed but no new dirty data would appear. This is
+	// probably best but we need to be careful not to mark anything dirty
+	// or the MS_RDONLY will be leaking writes.
+	// TODO: Deal with *flags.
+	if (!parse_options(vol, opt))
+		return -EINVAL;
+	return 0;
+}
+/**
+ * is_boot_sector_ntfs - check whether a boot sector is a valid NTFS boot sector
+ * @sb:		Super block of the device to which @b belongs.
+ * @b:		Boot sector of device @sb to check.
+ * @silent:	If TRUE, all output will be silenced.
+ *
+ * is_boot_sector_ntfs() checks whether the boot sector @b is a valid NTFS boot
+ * sector. Returns TRUE if it is valid and FALSE if not.
+ *
+ * @sb is only needed for warning/error output, i.e. it can be NULL when silent
+ * is TRUE.
+ */
+static BOOL is_boot_sector_ntfs(const struct super_block *sb,
+		const NTFS_BOOT_SECTOR *b, const BOOL silent)
+{
+	/*
+	 * Check that checksum == sum of u32 values from b to the checksum
+	 * field. If checksum is zero, no checking is done.
+	 */
+	if ((void*)b < (void*)&b->checksum && b->checksum) {
+		u32 i, *u;
+		for (i = 0, u = (u32*)b; u < (u32*)(&b->checksum); ++u)
+			i += le32_to_cpup(u);
+		if (le32_to_cpu(b->checksum) != i)
+			goto not_ntfs;
+	}
+	/* Check OEMidentifier is "NTFS    " */
+	if (b->oem_id != magicNTFS)
+		goto not_ntfs;
+	/* Check bytes per sector value is between 256 and 4096. */
+	if (le16_to_cpu(b->bpb.bytes_per_sector) <  0x100 ||
+			le16_to_cpu(b->bpb.bytes_per_sector) > 0x1000)
+		goto not_ntfs;
+	/* Check sectors per cluster value is valid. */
+	switch (b->bpb.sectors_per_cluster) {
+	case 1: case 2: case 4: case 8: case 16: case 32: case 64: case 128:
+		break;
+	default:
+		goto not_ntfs;
+	}
+	/* Check the cluster size is not above 65536 bytes. */
+	if ((u32)le16_to_cpu(b->bpb.bytes_per_sector) *
+			b->bpb.sectors_per_cluster > 0x10000)
+		goto not_ntfs;
+	/* Check reserved/unused fields are really zero. */
+	if (le16_to_cpu(b->bpb.reserved_sectors) ||
+			le16_to_cpu(b->bpb.root_entries) ||
+			le16_to_cpu(b->bpb.sectors) ||
+			le16_to_cpu(b->bpb.sectors_per_fat) ||
+			le32_to_cpu(b->bpb.large_sectors) || b->bpb.fats)
+		goto not_ntfs;
+	/* Check clusters per file mft record value is valid. */
+	if ((u8)b->clusters_per_mft_record < 0xe1 || 
+			(u8)b->clusters_per_mft_record > 0xf7)
+		switch (b->clusters_per_mft_record) {
+		case 1: case 2: case 4: case 8: case 16: case 32: case 64:
+			break;
+		default:
+			goto not_ntfs;
+		}
+	/* Check clusters per index block value is valid. */
+	if ((u8)b->clusters_per_index_record < 0xe1 || 
+			(u8)b->clusters_per_index_record > 0xf7)
+		switch (b->clusters_per_index_record) {
+		case 1: case 2: case 4: case 8: case 16: case 32: case 64:
+			break;
+		default:
+			goto not_ntfs;
+		}
+	/*
+	 * Check for valid end of sector marker. We will work without it, but
+	 * many BIOSes will refuse to boot from a bootsector if the magic is
+	 * incorrect, so we emit a warning.
+	 */
+	if (!silent && b->end_of_sector_marker != cpu_to_le16(0xaa55))
+		ntfs_warning(sb, "Invalid end of sector marker.");
+	return TRUE;
+not_ntfs:
+	return FALSE;
+}
+/**
+ * read_boot_sector - read the NTFS boot sector of a device
+ * @sb:		super block of device to read the boot sector from
+ * @silent:	if true, suppress all output
+ *
+ * Reads the boot sector from the device and validates it. If that fails, tries
+ * to read the backup boot sector, first from the end of the device a-la NT4 and
+ * later and then from the middle of the device a-la NT3.51 and before.
+ *
+ * If a valid boot sector is found but it is not the primary boot sector, we
+ * repair the primary boot sector silently (unless the device is read-only or
+ * the primary boot sector is not accessible).
+ *
+ * NOTE: To call this function, @sb must have the fields s_dev, the ntfs super
+ * block (u.ntfs_sb), nr_blocks and the device flags (s_flags) initialized
+ * to their respective values.
+ *
+ * Return the unlocked buffer head containing the boot sector or NULL on error.
+ */
+static struct buffer_head *read_ntfs_boot_sector(struct super_block *sb,
+		const int silent)
+{
+	const char *read_err_str = "Unable to read %s boot sector.";
+	struct buffer_head *bh_primary, *bh_backup;
+	long nr_blocks = NTFS_SB(sb)->nr_blocks;
+	/* Try to read primary boot sector. */
+	if ((bh_primary = sb_bread(sb, 0))) {
+		if (is_boot_sector_ntfs(sb, (NTFS_BOOT_SECTOR*)
+				bh_primary->b_data, silent))
+			return bh_primary;
+		if (!silent)
+			ntfs_error(sb, "Primary boot sector is invalid.");
+	} else if (!silent)
+		ntfs_error(sb, read_err_str, "primary");
+	if (NTFS_SB(sb)->on_errors & ~ON_ERRORS_RECOVER) {
+		if (bh_primary)
+			brelse(bh_primary);
+		if (!silent)
+			ntfs_error(sb, "Mount option errors=recover not used. "
+					"Aborting without trying to recover.");
+		return NULL;
+	}
+	/* Try to read NT4+ backup boot sector. */
+	if ((bh_backup = sb_bread(sb, nr_blocks - 1))) {
+		if (is_boot_sector_ntfs(sb, (NTFS_BOOT_SECTOR*)
+				bh_backup->b_data, silent))
+			goto hotfix_primary_boot_sector;
+		brelse(bh_backup);
+	} else if (!silent)
+		ntfs_error(sb, read_err_str, "backup");
+	/* Try to read NT3.51- backup boot sector. */
+	if ((bh_backup = sb_bread(sb, nr_blocks >> 1))) {
+		if (is_boot_sector_ntfs(sb, (NTFS_BOOT_SECTOR*)
+				bh_backup->b_data, silent))
+			goto hotfix_primary_boot_sector;
+		if (!silent)
+			ntfs_error(sb, "Could not find a valid backup boot "
+					"sector.");
+		brelse(bh_backup);
+	} else if (!silent)
+		ntfs_error(sb, read_err_str, "backup");
+	/* We failed. Cleanup and return. */
+	if (bh_primary)
+		brelse(bh_primary);
+	return NULL;
+hotfix_primary_boot_sector:
+	if (bh_primary) {
+		/*
+		 * If we managed to read sector zero and the volume is not
+		 * read-only, copy the found, valid backup boot sector to the
+		 * primary boot sector.
+		 */
+		if (!(sb->s_flags & MS_RDONLY)) {
+			ntfs_warning(sb, "Hot-fix: Recovering invalid primary "
+					"boot sector from backup copy.");
+			memcpy(bh_primary->b_data, bh_backup->b_data,
+					sb->s_blocksize);
+			mark_buffer_dirty(bh_primary);
+			ll_rw_block(WRITE, 1, &bh_primary);
+			wait_on_buffer(bh_primary);
+			if (buffer_uptodate(bh_primary)) {
+				brelse(bh_backup);
+				return bh_primary;
+			}
+			ntfs_error(sb, "Hot-fix: Device write error while "
+					"recovering primary boot sector.");
+		} else {
+			ntfs_warning(sb, "Hot-fix: Recovery of primary boot "
+					"sector failed: Read-only mount.");
+		}
+		brelse(bh_primary);
+	}
+	ntfs_warning(sb, "Using backup boot sector.");
+	return bh_backup;
+}
+/**
+ * parse_ntfs_boot_sector - parse the boot sector and store the data in @vol
+ * @vol:	volume structure to initialise with data from boot sector
+ * @b:		boot sector to parse
+ * 
+ * Parse the ntfs boot sector @b and store all imporant information therein in
+ * the ntfs super block @vol. Return TRUE on success and FALSE on error.
+ */
+static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b)
+{
+	unsigned int sectors_per_cluster_bits, nr_hidden_sects;
+	int clusters_per_mft_record, clusters_per_index_record;
+	s64 ll;
+	vol->sector_size = le16_to_cpu(b->bpb.bytes_per_sector);
+	vol->sector_size_bits = ffs(vol->sector_size) - 1;
+	ntfs_debug("vol->sector_size = %i (0x%x)", vol->sector_size,
+			vol->sector_size);
+	ntfs_debug("vol->sector_size_bits = %i (0x%x)", vol->sector_size_bits,
+			vol->sector_size_bits);
+	if (vol->sector_size != vol->sb->s_blocksize)
+		ntfs_warning(vol->sb, "The boot sector indicates a sector size "
+				"different from the device sector size.");
+	ntfs_debug("sectors_per_cluster = 0x%x", b->bpb.sectors_per_cluster);
+	sectors_per_cluster_bits = ffs(b->bpb.sectors_per_cluster) - 1;
+	ntfs_debug("sectors_per_cluster_bits = 0x%x",
+			sectors_per_cluster_bits);
+	nr_hidden_sects = le32_to_cpu(b->bpb.hidden_sectors);
+	ntfs_debug("number of hidden sectors = 0x%x", nr_hidden_sects);
+	vol->cluster_size = vol->sector_size << sectors_per_cluster_bits;
+	vol->cluster_size_mask = vol->cluster_size - 1;
+	vol->cluster_size_bits = ffs(vol->cluster_size) - 1;
+	ntfs_debug("vol->cluster_size = %i (0x%x)", vol->cluster_size,
+			vol->cluster_size);
+	ntfs_debug("vol->cluster_size_mask = 0x%x", vol->cluster_size_mask);
+	ntfs_debug("vol->cluster_size_bits = %i (0x%x)",
+			vol->cluster_size_bits, vol->cluster_size_bits);
+	if (vol->sector_size > vol->cluster_size) {
+		ntfs_error(vol->sb, "Sector sizes above the cluster size are "
+				"not supported. Sorry.");
+		return FALSE;
+	}
+	if (vol->sb->s_blocksize > vol->cluster_size) {
+		ntfs_error(vol->sb, "Cluster sizes smaller than the device "
+				"sector size are not supported. Sorry.");
+		return FALSE;
+	}
+	clusters_per_mft_record = b->clusters_per_mft_record;
+	ntfs_debug("clusters_per_mft_record = %i (0x%x)",
+			clusters_per_mft_record, clusters_per_mft_record);
+	if (clusters_per_mft_record > 0)
+		vol->mft_record_size = vol->cluster_size <<
+				(ffs(clusters_per_mft_record) - 1);
+	else
+		/*
+		 * When mft_record_size < cluster_size, clusters_per_mft_record
+		 * = -log2(mft_record_size) bytes. mft_record_size normaly is
+		 * 1024 bytes, which is encoded as 0xF6 (-10 in decimal).
+		 */
+		vol->mft_record_size = 1 << -clusters_per_mft_record;
+	vol->mft_record_size_mask = vol->mft_record_size - 1;
+	vol->mft_record_size_bits = ffs(vol->mft_record_size) - 1;
+	ntfs_debug("vol->mft_record_size = %i (0x%x)", vol->mft_record_size,
+			vol->mft_record_size);
+	ntfs_debug("vol->mft_record_size_mask = 0x%x",
+			vol->mft_record_size_mask);
+	ntfs_debug("vol->mft_record_size_bits = %i (0x%x)",
+			vol->mft_record_size_bits, vol->mft_record_size_bits); 
+	clusters_per_index_record = b->clusters_per_index_record;
+	ntfs_debug("clusters_per_index_record = %i (0x%x)",
+			clusters_per_index_record, clusters_per_index_record); 
+	if (clusters_per_index_record > 0)
+		vol->index_record_size = vol->cluster_size <<
+				(ffs(clusters_per_index_record) - 1);
+	else
+		/*
+		 * When index_record_size < cluster_size,
+		 * clusters_per_index_record = -log2(index_record_size) bytes.
+		 * index_record_size normaly equals 4096 bytes, which is
+		 * encoded as 0xF4 (-12 in decimal).
+		 */
+		vol->index_record_size = 1 << -clusters_per_index_record;
+	vol->index_record_size_mask = vol->index_record_size - 1;
+	vol->index_record_size_bits = ffs(vol->index_record_size) - 1;
+	ntfs_debug("vol->index_record_size = %i (0x%x)",
+			vol->index_record_size, vol->index_record_size); 
+	ntfs_debug("vol->index_record_size_mask = 0x%x",
+			vol->index_record_size_mask);
+	ntfs_debug("vol->index_record_size_bits = %i (0x%x)",
+			vol->index_record_size_bits,
+			vol->index_record_size_bits);
+	/*
+	 * Get the size of the volume in clusters and check for 64-bit-ness.
+	 * Windows currently only uses 32 bits to save the clusters so we do
+	 * the same as it is much faster on 32-bit CPUs.
+	 */
+	ll = sle64_to_cpu(b->number_of_sectors) >> sectors_per_cluster_bits;
+	if ((u64)ll >= 1ULL << (sizeof(unsigned long) * 8)) {
+		ntfs_error(vol->sb, "Cannot handle %i-bit clusters. Sorry.",
+				sizeof(unsigned long) * 4);
+		return FALSE;
+	}
+	vol->_VCL(nr_clusters) = ll;
+	ntfs_debug("vol->nr_clusters = 0x%Lx", (long long)vol->_VCL(nr_clusters));
+	ll = sle64_to_cpu(b->mft_lcn);
+	if (ll >= vol->_VCL(nr_clusters)) {
+		ntfs_error(vol->sb, "MFT LCN is beyond end of volume. Weird.");
+		return FALSE;
+	}
+	vol->mft_lcn = ll;
+	ntfs_debug("vol->mft_lcn = 0x%Lx", (long long)vol->mft_lcn);
+	ll = sle64_to_cpu(b->mftmirr_lcn);
+	if (ll >= vol->_VCL(nr_clusters)) {
+		ntfs_error(vol->sb, "MFTMirr LCN is beyond end of volume. "
+				"Weird.");
+		return FALSE;
+	}
+	vol->mftmirr_lcn = ll;
+	ntfs_debug("vol->mftmirr_lcn = 0x%Lx", (long long)vol->mftmirr_lcn);
+	vol->serial_no = le64_to_cpu(b->volume_serial_number);
+	ntfs_debug("vol->serial_no = 0x%Lx",
+			(unsigned long long)vol->serial_no);
+	/*
+	 * Determine MFT zone size. This is not strictly the right place to do
+	 * this, but I am too lazy to create a function especially for it...
+	 */
+	vol->mft_zone_end = vol->_VCL(nr_clusters);
+	switch (vol->mft_zone_multiplier) {  /* % of volume size in clusters */
+	case 4:
+		vol->mft_zone_end = vol->mft_zone_end >> 1;	/* 50%   */
+		break;
+	case 3:
+		vol->mft_zone_end = (vol->mft_zone_end +
+				(vol->mft_zone_end >> 1)) >> 2;	/* 37.5% */
+		break;
+	case 2:
+		vol->mft_zone_end = vol->mft_zone_end >> 2;	/* 25%   */
+		break;
+	default:
+		vol->mft_zone_multiplier = 1;
+		/* Fall through into case 1. */
+	case 1:
+		vol->mft_zone_end = vol->mft_zone_end >> 3;	/* 12.5% */
+		break;
+	}
+	ntfs_debug("vol->mft_zone_multiplier = 0x%x",
+			vol->mft_zone_multiplier);
+	vol->mft_zone_start = vol->mft_lcn;
+	vol->mft_zone_end += vol->mft_lcn;
+	ntfs_debug("vol->mft_zone_start = 0x%Lx",
+			(long long)vol->mft_zone_start);
+	ntfs_debug("vol->mft_zone_end = 0x%Lx", (long long)vol->mft_zone_end);
+	/* And another misplaced defaults setting. */
+	if (!vol->on_errors)
+		vol->on_errors = ON_ERRORS_PANIC;
+	return TRUE;
+}
+/**
+ * load_and_init_upcase - load the upcase table for an ntfs volume
+ * @vol:	ntfs super block describing device whose upcase to load
+ *
+ * Return TRUE on success or FALSE on error.
+ */
+static BOOL load_and_init_upcase(ntfs_volume *vol)
+{
+	struct super_block *sb = vol->sb;
+	struct inode *ino;
+	struct page *page;
+	unsigned long index, max_index;
+	unsigned int size;
+	int i, max;
+	ntfs_debug("Entering.");
+	/* Read upcase table and setup vol->upcase and vol->upcase_len. */
+	ino = iget(sb, FILE_UpCase);
+	if (!ino || is_bad_inode(ino)) {
+		if (ino)
+			iput(ino);
+		goto upcase_failed;
+	}
+	/*
+	 * The upcase size must not be above 64k Unicode characters, must not
+	 * be zero and must be a multiple of sizeof(uchar_t).
+	 */
+	if (!ino->i_size || ino->i_size & (sizeof(uchar_t) - 1) ||
+			ino->i_size > 64ULL * 1024 * sizeof(uchar_t))
+		goto iput_upcase_failed;
+	vol->upcase = (uchar_t*)ntfs_malloc_nofs(ino->i_size);
+	if (!vol->upcase)
+		goto iput_upcase_failed;
+	index = 0;
+	max_index = ino->i_size >> PAGE_CACHE_SHIFT;
+	size = PAGE_CACHE_SIZE;
+	while (index < max_index) {
+		/* Read the upcase table and copy it into the linear buffer. */
+read_partial_upcase_page:
+		page = ntfs_map_page(ino->i_mapping, index);
+		if (IS_ERR(page))
+			goto iput_upcase_failed;
+		memcpy((char*)vol->upcase + (index++ << PAGE_CACHE_SHIFT),
+				page_address(page), size);
+		ntfs_unmap_page(page);
+	};
+	if (size == PAGE_CACHE_SIZE) {
+		size = ino->i_size & ~PAGE_CACHE_MASK;
+		if (size)
+			goto read_partial_upcase_page;
+	}
+	vol->upcase_len = ino->i_size >> UCHAR_T_SIZE_BITS;
+	ntfs_debug("Read %Lu bytes from $UpCase (expected %u bytes).",
+			ino->i_size, 64 * 1024 * sizeof(uchar_t));
+	iput(ino);
+	down(&ntfs_lock);
+	if (!default_upcase) {
+		ntfs_debug("Using volume specified $UpCase since default is "
+				"not present.");
+		up(&ntfs_lock);
+		return TRUE;
+	}
+	max = default_upcase_len;
+	if (max > vol->upcase_len)
+		max = vol->upcase_len;
+	for (i = 0; i < max; i++)
+		if (vol->upcase[i] != default_upcase[i])
+			break;
+	if (i == max) {
+		ntfs_free(vol->upcase);
+		vol->upcase = default_upcase;
+		vol->upcase_len = max;
+		ntfs_nr_upcase_users++;
+		up(&ntfs_lock);
+		ntfs_debug("Volume specified $UpCase matches default. Using "
+				"default.");
+		return TRUE;
+	}
+	up(&ntfs_lock);
+	ntfs_debug("Using volume specified $UpCase since it does not match "
+			"the default.");
+	return TRUE;
+iput_upcase_failed:
+	iput(ino);
+	ntfs_free(vol->upcase);
+	vol->upcase = NULL;
+upcase_failed:
+	down(&ntfs_lock);
+	if (default_upcase) {
+		vol->upcase = default_upcase;
+		vol->upcase_len = default_upcase_len;
+		ntfs_nr_upcase_users++;
+		up(&ntfs_lock);
+		ntfs_error(sb, "Failed to load $UpCase from the volume. Using "
+				"default.");
+		return TRUE;
+	}
+	up(&ntfs_lock);
+	ntfs_error(sb, "Failed to initialized upcase table.");
+	return FALSE;
+}
+/**
+ * load_system_files - open the system files using normal functions
+ * @vol:	ntfs super block describing device whose system files to load
+ *
+ * Open the system files with normal access functions and complete setting up
+ * the ntfs super block @vol.
+ *
+ * Return TRUE on success or FALSE on error.
+ */
+static BOOL load_system_files(ntfs_volume *vol)
+{
+	VCN next_vcn, last_vcn, highest_vcn;
+	struct super_block *sb = vol->sb;
+	struct inode *tmp_ino;
+	MFT_RECORD *m;
+	ATTR_RECORD *attr;
+	VOLUME_INFORMATION *vi;
+	attr_search_context *ctx;
+	run_list_element *rl;
+	ntfs_debug("Entering.");
+	/*
+	 * We have $MFT already (vol->mft_ino) but we need to setup access to
+	 * the $MFT/$BITMAP attribute.
+	 */
+	m = map_mft_record(READ, NTFS_I(vol->mft_ino));
+	if (IS_ERR(m)) {
+		ntfs_error(sb, "Failed to map $MFT.");
+		return FALSE;
+	}
+	if (get_attr_search_ctx(&ctx, NTFS_I(vol->mft_ino), m)) {
+		ntfs_error(sb, "Failed to get attribute search context.");
+		goto unmap_err_out;
+	}
+	/* Load all attribute extents. */
+	attr = NULL;
+	rl = NULL;
+	next_vcn = last_vcn = highest_vcn = 0;
+	while (lookup_attr(AT_BITMAP, NULL, 0, 0, next_vcn, NULL, 0, ctx)) {
+		run_list_element *nrl;
+		/* Cache the current attribute extent. */
+		attr = ctx->attr;
+		/* $MFT/$BITMAP must be non-resident. */
+		if (!attr->non_resident) {
+			ntfs_error(sb, "$MFT/$BITMAP must be non-resident but "
+					"a resident extent was found. $MFT is "
+					"corrupt. Run chkdsk.");
+			goto put_err_out;
+		}
+		/* $MFT/$BITMAP must be uncompressed and unencrypted. */
+		if (attr->flags & ATTR_COMPRESSION_MASK ||
+				attr->flags & ATTR_IS_ENCRYPTED) {
+			ntfs_error(sb, "$MFT/$BITMAP must be uncompressed and "
+					"unencrypted but a compressed/"
+					"encrypted extent was found. $MFT is "
+					"corrupt. Run chkdsk.");
+			goto put_err_out;
+		}
+		/*
+		 * Decompress the mapping pairs array of this extent
+		 * and merge the result into the existing run list. Note we
+		 * don't need any locking at this stage as we are already
+		 * running exclusively as we are mount in progress task.
+		 */
+		nrl = decompress_mapping_pairs(vol, attr, rl);
+		if (IS_ERR(nrl)) {
+			ntfs_error(sb, "decompress_mapping_pairs() failed with "
+					"error code %ld. $MFT is corrupt.",
+					PTR_ERR(nrl));
+			goto put_err_out;
+		}
+		rl = nrl;
+		/* Are we in the first extent? */
+		if (!next_vcn) {
+			/* Get the last vcn in the $BITMAP attribute. */
+			last_vcn = sle64_to_cpu(attr->_ANR(allocated_size)) >>
+					vol->cluster_size_bits;
+			vol->mftbmp_size = sle64_to_cpu(attr->_ANR(data_size));
+			vol->mftbmp_initialized_size =
+					sle64_to_cpu(attr->_ANR(initialized_size));
+			vol->mftbmp_allocated_size =
+					sle64_to_cpu(attr->_ANR(allocated_size));
+			/* Consistency check. */
+			if (vol->mftbmp_size < (vol->_VMM(nr_mft_records) + 7) >> 3) {
+				ntfs_error(sb, "$MFT/$BITMAP is too short to "
+						"contain a complete mft "
+						"bitmap: impossible. $MFT is "
+						"corrupt. Run chkdsk.");
+				goto put_err_out;
+			}
+		}
+		/* Get the lowest vcn for the next extent. */
+		highest_vcn = sle64_to_cpu(attr->_ANR(highest_vcn));
+		next_vcn = highest_vcn + 1;
+		/* Only one extent or error, which we catch below. */
+		if (next_vcn <= 0)
+			break;
+		/* Avoid endless loops due to corruption. */
+		if (next_vcn < sle64_to_cpu(attr->_ANR(lowest_vcn))) {
+			ntfs_error(sb, "$MFT/$BITMAP has corrupt attribute "
+					"list attribute. Run chkdsk.");
+			goto put_err_out;
+		}
+	}
+	if (!attr) {
+		ntfs_error(sb, "Missing or invalid $BITMAP attribute in file "
+				"$MFT. $MFT is corrupt. Run chkdsk.");
+put_err_out:
+		put_attr_search_ctx(ctx);
+unmap_err_out:
+		unmap_mft_record(READ, NTFS_I(vol->mft_ino));
+		return FALSE;
+	}
+	/* We are finished with $MFT/$BITMAP. */
+	put_attr_search_ctx(ctx);
+	unmap_mft_record(READ, NTFS_I(vol->mft_ino));
+	/* Catch errors. */
+	if (highest_vcn && highest_vcn != last_vcn - 1) {
+		ntfs_error(sb, "Failed to load the complete run list for "
+				"$MFT/$BITMAP. Driver bug or corrupt $MFT. "
+				"Run chkdsk.");
+		ntfs_debug("highest_vcn = 0x%Lx, last_vcn - 1 = 0x%Lx",
+				(long long)highest_vcn,
+				(long long)last_vcn - 1);
+		return FALSE;;
+	}
+	/* Setup the run list and the address space in the volume structure. */
+	vol->mftbmp_rl.rl = rl;
+	vol->mftbmp_mapping.a_ops = &ntfs_mftbmp_aops;
+	/* Not inode data, set to NULL. Our mft bitmap access kludge... */
+	vol->mftbmp_mapping.host = NULL;
+	// FIXME: If mounting read-only, it would be ok to ignore errors when
+	// loading the mftbmp but we then need to make sure nobody remounts the
+	// volume read-write...
+	/* Get mft mirror inode. */
+	vol->mftmirr_ino = iget(sb, FILE_MFTMirr);
+	if (!vol->mftmirr_ino || is_bad_inode(vol->mftmirr_ino)) {
+		if (is_bad_inode(vol->mftmirr_ino))
+			iput(vol->mftmirr_ino);
+		ntfs_error(sb, "Failed to load $MFTMirr.");
+		return FALSE;
+	}
+	// FIXME: Compare mftmirr with mft and repair if appropriate and not
+	// a read-only mount.
+	/* Read upcase table and setup vol->upcase and vol->upcase_len. */
+	if (!load_and_init_upcase(vol))
+		goto iput_mirr_err_out;
+	/*
+	 * Get the cluster allocation bitmap inode and verify the size, no
+	 * need for any locking at this stage as we are already running
+	 * exclusively as we are mount in progress task.
+	 */
+	vol->lcnbmp_ino = iget(sb, FILE_Bitmap);
+	if (!vol->lcnbmp_ino || is_bad_inode(vol->lcnbmp_ino)) {
+		if (is_bad_inode(vol->lcnbmp_ino))
+			iput(vol->lcnbmp_ino);
+		goto bitmap_failed;
+	}
+	if ((vol->_VCL(nr_lcn_bits) + 7) >> 3 > vol->lcnbmp_ino->i_size) {
+		iput(vol->lcnbmp_ino);
+bitmap_failed:
+		ntfs_error(sb, "Failed to load $Bitmap.");
+		goto iput_mirr_err_out;
+	}
+	/*
+	 * Get the volume inode and setup our cache of the volume flags and
+	 * version.
+	 */
+	vol->vol_ino = iget(sb, FILE_Volume);
+	if (!vol->vol_ino || is_bad_inode(vol->vol_ino)) {
+		if (is_bad_inode(vol->vol_ino))
+			iput(vol->vol_ino);
+volume_failed:
+		ntfs_error(sb, "Failed to load $Volume.");
+		goto iput_bmp_mirr_err_out;
+	}
+	m = map_mft_record(READ, NTFS_I(vol->vol_ino));
+	if (IS_ERR(m)) {
+iput_volume_failed:
+		iput(vol->vol_ino);
+		goto volume_failed;
+	}
+	if (get_attr_search_ctx(&ctx, NTFS_I(vol->vol_ino), m)) {
+		ntfs_error(sb, "Failed to get attribute search context.");
+		goto get_ctx_vol_failed;
+	}
+	if (!lookup_attr(AT_VOLUME_INFORMATION, NULL, 0, 0, 0, NULL, 0, ctx) ||
+			ctx->attr->non_resident || ctx->attr->flags) {
+err_put_vol:
+		put_attr_search_ctx(ctx);
+get_ctx_vol_failed:
+		unmap_mft_record(READ, NTFS_I(vol->vol_ino));
+		goto iput_volume_failed;
+	}
+	vi = (VOLUME_INFORMATION*)((char*)ctx->attr +
+			le16_to_cpu(ctx->attr->_ARA(value_offset)));
+	/* Some bounds checks. */
+	if ((u8*)vi < (u8*)ctx->attr || (u8*)vi +
+			le32_to_cpu(ctx->attr->_ARA(value_length)) > (u8*)ctx->attr +
+			le32_to_cpu(ctx->attr->length))
+		goto err_put_vol;
+	/* Setup volume flags and version. */
+	vol->vol_flags = vi->flags;
+	vol->major_ver = vi->major_ver;
+	vol->minor_ver = vi->minor_ver;
+	put_attr_search_ctx(ctx);
+	unmap_mft_record(READ, NTFS_I(vol->vol_ino));
+	printk(KERN_INFO "NTFS volume version %i.%i.\n", vol->major_ver,
+			vol->minor_ver);
+	/*
+	 * Get the inode for the logfile and empty it if this is a read-write
+	 * mount.
+	 */
+	tmp_ino = iget(sb, FILE_LogFile);
+	if (!tmp_ino || is_bad_inode(tmp_ino)) {
+		if (is_bad_inode(tmp_ino))
+			iput(tmp_ino);
+		ntfs_error(sb, "Failed to load $LogFile.");
+		// FIMXE: We only want to empty the thing so pointless bailing
+		// out. Can recover/ignore.
+		goto iput_vol_bmp_mirr_err_out;
+	}
+	// FIXME: Empty the logfile, but only if not read-only.
+	// FIXME: What happens if someone remounts rw? We need to empty the file
+	// then. We need a flag to tell us whether we have done it already.
+	iput(tmp_ino);
+	/*
+	 * Get the inode for the attribute definitions file and parse the
+	 * attribute definitions.
+	 */ 
+	tmp_ino = iget(sb, FILE_AttrDef);
+	if (!tmp_ino || is_bad_inode(tmp_ino)) {
+		if (is_bad_inode(tmp_ino))
+			iput(tmp_ino);
+		ntfs_error(sb, "Failed to load $AttrDef.");
+		goto iput_vol_bmp_mirr_err_out;
+	}
+	// FIXME: Parse the attribute definitions.
+	iput(tmp_ino);
+	/* Get the root directory inode. */
+	vol->root_ino = iget(sb, FILE_root);
+	if (!vol->root_ino || is_bad_inode(vol->root_ino)) {
+		if (is_bad_inode(vol->root_ino))
+			iput(vol->root_ino);
+		ntfs_error(sb, "Failed to load root directory.");
+		goto iput_vol_bmp_mirr_err_out;
+	}
+	/* If on NTFS versions before 3.0, we are done. */
+	if (vol->major_ver < 3)
+		return TRUE;
+	/* NTFS 3.0+ specific initialization. */
+	/* Get the security descriptors inode. */
+	vol->secure_ino = iget(sb, FILE_Secure);
+	if (!vol->secure_ino || is_bad_inode(vol->secure_ino)) {
+		if (is_bad_inode(vol->secure_ino))
+			iput(vol->secure_ino);
+		ntfs_error(sb, "Failed to load $Secure.");
+		goto iput_root_vol_bmp_mirr_err_out;
+	}
+	// FIXME: Initialize security.
+	/* Get the extended system files' directory inode. */
+	tmp_ino = iget(sb, FILE_Extend);
+	if (!tmp_ino || is_bad_inode(tmp_ino)) {
+		if (is_bad_inode(tmp_ino))
+			iput(tmp_ino);
+		ntfs_error(sb, "Failed to load $Extend.");
+		goto iput_sec_root_vol_bmp_mirr_err_out;
+	}
+	// FIXME: Do something. E.g. want to delete the $UsnJrnl if exists.
+	// Note we might be doing this at the wrong level; we might want to
+	// d_alloc_root() and then do a "normal" open(2) of $Extend\$UsnJrnl
+	// rather than using iget here, as we don't know the inode number for
+	// the files in $Extend directory.
+	iput(tmp_ino);
+	return TRUE;
+iput_sec_root_vol_bmp_mirr_err_out:
+	iput(vol->secure_ino);
+iput_root_vol_bmp_mirr_err_out:
+	iput(vol->root_ino);
+iput_vol_bmp_mirr_err_out:
+	iput(vol->vol_ino);
+iput_bmp_mirr_err_out:
+	iput(vol->lcnbmp_ino);
+iput_mirr_err_out:
+	iput(vol->mftmirr_ino);
+	return FALSE;
+}
+/**
+ * ntfs_put_super - called by the vfs to unmount a volume
+ * @vfs_sb:	vfs superblock of volume to unmount
+ *
+ * ntfs_put_super() is called by the VFS (from fs/super.c::do_umount()) when
+ * the volume is being unmounted (umount system call has been invoked) and it
+ * releases all inodes and memory belonging to the NTFS specific part of the
+ * super block.
+ */
+void ntfs_put_super(struct super_block *vfs_sb)
+{
+	ntfs_volume *vol = NTFS_SB(vfs_sb);
+	ntfs_debug("Entering.");
+	iput(vol->vol_ino);
+	vol->vol_ino = NULL;
+	/* NTFS 3.0+ specific clean up. */
+	if (vol->major_ver >= 3) {
+		if (vol->secure_ino) {
+			iput(vol->secure_ino);
+			vol->secure_ino = NULL;
+		}
+	}
+	iput(vol->root_ino);
+	vol->root_ino = NULL;
+	down_write(&vol->lcnbmp_lock);
+	iput(vol->lcnbmp_ino);
+	vol->lcnbmp_ino = NULL;
+	up_write(&vol->lcnbmp_lock);
+	iput(vol->mftmirr_ino);
+	vol->mftmirr_ino = NULL;
+	iput(vol->mft_ino);
+	vol->mft_ino = NULL;
+	down_write(&vol->mftbmp_lock);
+	/*
+	 * Clean up mft bitmap address space. Ignore the _inode_ bit in the
+	 * name of the function... FIXME: What does this do with dirty pages?
+	 * (ask Al Viro)
+	 */
+	truncate_inode_pages(&vol->mftbmp_mapping, 0);
+	vol->mftbmp_mapping.a_ops = NULL;
+	vol->mftbmp_mapping.host = NULL;
+	up_write(&vol->mftbmp_lock);
+	write_lock(&vol->mftbmp_rl.lock);
+	ntfs_free(vol->mftbmp_rl.rl);
+	vol->mftbmp_rl.rl = NULL;
+	write_unlock(&vol->mftbmp_rl.lock);
+	vol->upcase_len = 0;
+	/*
+	 * Decrease the number of mounts and destroy the global default upcase
+	 * table if necessary. Also decrease the number of upcase users if we
+	 * are a user.
+	 */
+	down(&ntfs_lock);
+	ntfs_nr_mounts--;
+	if (vol->upcase == default_upcase) {
+		ntfs_nr_upcase_users--;
+		vol->upcase = NULL;
+	}
+	if (!ntfs_nr_upcase_users && default_upcase) {
+		ntfs_free(default_upcase);
+		default_upcase = NULL;
+	}
+	if (vol->cluster_size <= 4096 && !--ntfs_nr_compression_users)
+		free_compression_buffers();
+	up(&ntfs_lock);
+	if (vol->upcase) {
+		ntfs_free(vol->upcase);
+		vol->upcase = NULL;
+	}
+	if (vol->nls_map) {
+		unload_nls(vol->nls_map);
+		vol->nls_map = NULL;
+	}
+	vfs_sb->u.generic_sbp = NULL;
+	kfree(vol);
+	return;
+}
+/**
+ * get_nr_free_clusters - return the number of free clusters on a volume
+ * @vol:	ntfs volume for which to obtain free cluster count
+ *
+ * Calculate the number of free clusters on the mounted NTFS volume @vol.
+ *
+ * Errors are ignored and we just return the number of free clusters we have
+ * found. This means we return an underestimate on error.
+ */
+s64 get_nr_free_clusters(ntfs_volume *vol)
+{
+	struct address_space *mapping = vol->lcnbmp_ino->i_mapping;
+	filler_t *readpage = (filler_t*)mapping->a_ops->readpage;
+	struct page *page;
+	unsigned long index, max_index;
+	unsigned int max_size, i;
+	s64 nr_free = 0LL;
+	u32 *b;
+	ntfs_debug("Entering.");
+	/* Serialize accesses to the cluster bitmap. */
+	down_read(&vol->lcnbmp_lock);
+	/*
+	 * Convert the number of bits into bytes rounded up, then convert into
+	 * multiples of PAGE_CACHE_SIZE.
+	 */
+	max_index = (vol->_VCL(nr_clusters) + 7) >> (3 + PAGE_CACHE_SHIFT);
+	/* Use multiples of 4 bytes. */
+	max_size = PAGE_CACHE_SIZE >> 2;
+	ntfs_debug("Reading $BITMAP, max_index = 0x%lx, max_size = 0x%x.",
+			max_index, max_size);
+	for (index = 0UL; index < max_index;) {
+handle_partial_page:
+		/*
+		 * Read the page from page cache, getting it from backing store
+		 * if necessary, and increment the use count.
+		 */
+		page = read_cache_page(mapping, index++, (filler_t*)readpage,
+				NULL);
+		/* Ignore pages which errored synchronously. */
+		if (IS_ERR(page)) {
+			ntfs_debug("Sync read_cache_page() error. Skipping "
+					"page (index 0x%lx).", index - 1);
+			continue;
+		}
+		wait_on_page(page);
+		if (!Page_Uptodate(page)) {
+			ntfs_debug("Async read_cache_page() error. Skipping "
+					"page (index 0x%lx).", index - 1);
+			/* Ignore pages which errored asynchronously. */
+			page_cache_release(page);
+			continue;
+		}
+		b = (u32*)kmap(page);
+		/* For each 4 bytes, add up the number zero bits. */
+	  	for (i = 0; i < max_size; i++)
+			nr_free += (s64)(32 - hweight32(b[i]));
+		kunmap(page);
+		page_cache_release(page);
+	}
+	if (max_size == PAGE_CACHE_SIZE >> 2) {
+		/*
+		 * Get the multiples of 4 bytes in use in the final partial
+		 * page.
+		 */
+		max_size = ((((vol->_VCL(nr_clusters) + 7) >> 3) & ~PAGE_CACHE_MASK)
+				+ 3) >> 2;
+		/* If there is a partial page go back and do it. */
+		if (max_size) {
+			ntfs_debug("Handling partial page, max_size = 0x%x.",
+					max_size);
+			goto handle_partial_page;
+		}
+	}
+	ntfs_debug("Finished reading $BITMAP, last index = 0x%lx", index - 1);
+	up_read(&vol->lcnbmp_lock);
+	ntfs_debug("Exiting.");
+	return nr_free;
+}
+/**
+ * get_nr_free_mft_records - return the number of free inodes on a volume
+ * @vol:	ntfs volume for which to obtain free inode count
+ *
+ * Calculate the number of free mft records (inodes) on the mounted NTFS
+ * volume @vol.
+ *
+ * Errors are ignored and we just return the number of free inodes we have
+ * found. This means we return an underestimate on error.
+ */
+s64 get_nr_free_mft_records(ntfs_volume *vol)
+{
+	struct address_space *mapping;
+	filler_t *readpage;
+	struct page *page;
+	unsigned long index, max_index;
+	unsigned int max_size, i;
+	s64 nr_free = 0LL;
+	u32 *b;
+	ntfs_debug("Entering.");
+	/* Serialize accesses to the inode bitmap. */
+	down_read(&vol->mftbmp_lock);
+	mapping = &vol->mftbmp_mapping;
+	readpage = (filler_t*)mapping->a_ops->readpage;
+	/*
+	 * Convert the number of bits into bytes rounded up, then convert into
+	 * multiples of PAGE_CACHE_SIZE.
+	 */
+	max_index = (vol->_VMM(nr_mft_records) + 7) >> (3 + PAGE_CACHE_SHIFT);
+	/* Use multiples of 4 bytes. */
+	max_size = PAGE_CACHE_SIZE >> 2;
+	ntfs_debug("Reading $MFT/$BITMAP, max_index = 0x%lx, max_size = "
+			"0x%x.", max_index, max_size);
+	for (index = 0UL; index < max_index;) {
+handle_partial_page:
+		/*
+		 * Read the page from page cache, getting it from backing store
+		 * if necessary, and increment the use count.
+		 */
+		page = read_cache_page(mapping, index++, (filler_t*)readpage,
+				vol);
+		/* Ignore pages which errored synchronously. */
+		if (IS_ERR(page)) {
+			ntfs_debug("Sync read_cache_page() error. Skipping "
+					"page (index 0x%lx).", index - 1);
+			continue;
+		}
+		wait_on_page(page);
+		if (!Page_Uptodate(page)) {
+			ntfs_debug("Async read_cache_page() error. Skipping "
+					"page (index 0x%lx).", index - 1);
+			/* Ignore pages which errored asynchronously. */
+			page_cache_release(page);
+			continue;
+		}
+		b = (u32*)kmap(page);
+		/* For each 4 bytes, add up the number of zero bits. */
+	  	for (i = 0; i < max_size; i++)
+			nr_free += (s64)(32 - hweight32(b[i]));
+		kunmap(page);
+		page_cache_release(page);
+	}
+	if (index == max_index) {
+		/*
+		 * Get the multiples of 4 bytes in use in the final partial
+		 * page.
+		 */
+		max_size = ((((vol->_VMM(nr_mft_records) + 7) >> 3) &
+				~PAGE_CACHE_MASK) + 3) >> 2;
+		/* If there is a partial page go back and do it. */
+		if (max_size) {
+			/* Compensate for out of bounds zero bits. */
+			if ((i = vol->_VMM(nr_mft_records) & 31))
+				nr_free -= (s64)(32 - i);
+			ntfs_debug("Handling partial page, max_size = 0x%x",
+					max_size);
+			goto handle_partial_page;
+		}
+	}
+	ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx",
+			index - 1);
+	up_read(&vol->mftbmp_lock);
+	ntfs_debug("Exiting.");
+	return nr_free;
+}
+/**
+ * ntfs_statfs - return information about mounted NTFS volume
+ * @sb:		super block of mounted volume
+ * @sfs:	statfs structure in which to return the information
+ *
+ * Return information about the mounted NTFS volume @sb in the statfs structure
+ * pointed to by @sfs (this is initialized with zeros before ntfs_statfs is
+ * called). We interpret the values to be correct of the moment in time at
+ * which we are called. Most values are variable otherwise and this isn't just
+ * the free values but the totals as well. For example we can increase the
+ * total number of file nodes if we run out and we can keep doing this until
+ * there is no more space on the volume left at all.
+ *
+ * Called from vfs_statfs which is used to handle the statfs, fstatfs, and
+ * ustat system calls.
+ *
+ * Return 0 on success or -errno on error.
+ */
+int ntfs_statfs(struct super_block *sb, struct statfs *sfs)
+{
+	ntfs_volume *vol = NTFS_SB(sb);
+	s64 size;
+	ntfs_debug("Entering.");
+	/* Type of filesystem. */
+	sfs->f_type   = NTFS_SB_MAGIC;
+	/* Optimal transfer block size. */
+	sfs->f_bsize  = PAGE_CACHE_SIZE;
+	/*
+	 * Total data blocks in file system in units of f_bsize and since
+	 * inodes are also stored in data blocs ($MFT is a file) this is just
+	 * the total clusters.
+	 */
+	sfs->f_blocks = vol->_VCL(nr_clusters) << vol->cluster_size_bits >>
+				PAGE_CACHE_SHIFT;
+	/* Free data blocks in file system in units of f_bsize. */
+	size	      = get_nr_free_clusters(vol) << vol->cluster_size_bits >>
+				PAGE_CACHE_SHIFT;
+	if (size < 0LL)
+		size = 0LL;
+	/* Free blocks avail to non-superuser, same as above on NTFS. */
+	sfs->f_bavail = sfs->f_bfree = size;
+	/* Total file nodes in file system (at this moment in time). */
+	sfs->f_files  = vol->mft_ino->i_size >> vol->mft_record_size_bits;
+	/* Free file nodes in fs (based on current total count). */
+	size	      = get_nr_free_mft_records(vol);
+	if (size < 0LL)
+		size = 0LL;
+	sfs->f_ffree = size;
+	/*
+	 * File system id. This is extremely *nix flavour dependent and even
+	 * within Linux itself all fs do their own thing. I interpret this to
+	 * mean a unique id associated with the mounted fs and not the id
+	 * associated with the file system driver, the latter is already given
+	 * by the file system type in sfs->f_type. Thus we use the 64-bit
+	 * volume serial number splitting it into two 32-bit parts. We enter
+	 * the least significant 32-bits in f_fsid[0] and the most significant
+	 * 32-bits in f_fsid[1].
+	 */
+	sfs->f_fsid.val[0] = vol->serial_no & 0xffffffff;
+	sfs->f_fsid.val[1] = (vol->serial_no >> 32) & 0xffffffff;
+	/* Maximum length of filenames. */
+	sfs->f_namelen	   = NTFS_MAX_NAME_LEN;
+	return 0;
+}
+/**
+ * Super operations for mount time when we don't have enough setup to use the
+ * proper functions.
+ */
+struct super_operations ntfs_mount_sops = {
+	alloc_inode:	ntfs_alloc_big_inode,	/* VFS: Allocate a new inode. */
+	destroy_inode:	ntfs_destroy_big_inode,	/* VFS: Deallocate an inode. */
+	read_inode:	ntfs_read_inode_mount,	/* VFS: Load inode from disk,
+						   called from iget(). */
+	clear_inode:	ntfs_clear_big_inode,	/* VFS: Called when an inode is
+						   removed from memory. */
+};
+/**
+ * The complete super operations.
+ */
+struct super_operations ntfs_sops = {
+	alloc_inode:	ntfs_alloc_big_inode,	/* VFS: Allocate a new inode. */
+	destroy_inode:	ntfs_destroy_big_inode,	/* VFS: Deallocate an inode. */
+	read_inode:	ntfs_read_inode,	/* VFS: Load inode from disk,
+						   called from iget(). */
+	dirty_inode:	ntfs_dirty_inode,	/* VFS: Called from
+						   __mark_inode_dirty(). */
+	write_inode:	NULL,		/* VFS: Write dirty inode to disk. */
+	put_inode:	NULL,		/* VFS: Called whenever the reference
+					   count (i_count) of the inode is
+					   going to be decreased but before the
+					   actual decrease. */
+	delete_inode:	NULL,		/* VFS: Delete inode from disk. Called
+					   when i_count becomes 0 and i_nlink is
+					   also 0. */
+	put_super:	ntfs_put_super,	/* Syscall: umount. */
+	write_super:	NULL,		/* Flush dirty super block to disk. */
+	write_super_lockfs:	NULL,	/* ? */
+	unlockfs:	NULL,		/* ? */
+	statfs:		ntfs_statfs,	/* Syscall: statfs */
+	remount_fs:	ntfs_remount,	/* Syscall: mount -o remount. */
+	clear_inode:	ntfs_clear_big_inode,	/* VFS: Called when an inode is
+						   removed from memory. */
+	umount_begin:	NULL,		/* Forced umount. */
+	/*
+	 * These are NFSd support functions but NTFS is a standard fs so
+	 * shouldn't need to implement these manually. At least we can try
+	 * without and if it doesn't work in some way we can always implement
+	 * something here.
+	 */
+	fh_to_dentry:	NULL,		/* Get dentry for given file handle. */
+	dentry_to_fh:	NULL,		/* Get file handle for given dentry. */
+	show_options:	ntfs_show_options, /* Show mount options in proc. */
+};
+/**
+ * ntfs_fill_super - mount an ntfs files system
+ * @sb:		super block of ntfs file system to mount
+ * @opt:	string containing the mount options
+ * @silent:	silence error output
+ *
+ * ntfs_fill_super() is called by the VFS to mount the device described by @sb
+ * with the mount otions in @data with the NTFS file system.
+ *
+ * If @silent is true, remain silent even if errors are detected. This is used
+ * during bootup, when the kernel tries to mount the root file system with all
+ * registered file systems one after the other until one succeeds. This implies
+ * that all file systems except the correct one will quite correctly and
+ * expectedly return an error, but nobody wants to see error messages when in
+ * fact this is what is supposed to happen.
+ *
+ * NOTE: @sb->s_flags contains the mount options flags.
+ */
+static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
+{
+	extern int *blksize_size[];
+	ntfs_volume *vol;
+	struct buffer_head *bh;
+	struct inode *tmp_ino;
+	int old_blocksize, result;
+	kdev_t dev = sb->s_dev;
+	ntfs_debug("Entering.");
+	/* Allocate a new ntfs_volume and place it in sb->u.generic_sbp. */
+	sb->u.generic_sbp = kmalloc(sizeof(ntfs_volume), GFP_NOFS);
+	vol = NTFS_SB(sb);
+	if (!vol) {
+		if (!silent)
+			ntfs_error(sb, "Allocation of NTFS volume structure "
+					"failed. Aborting mount...");
+		return -ENOMEM;
+	}
+	/* Initialize ntfs_volume structure. */
+	memset(vol, 0, sizeof(ntfs_volume));
+	vol->sb = sb;
+	vol->upcase = NULL;
+	vol->mft_ino = NULL;
+	init_rwsem(&vol->mftbmp_lock);
+	INIT_LIST_HEAD(&vol->mftbmp_mapping.clean_pages);
+	INIT_LIST_HEAD(&vol->mftbmp_mapping.dirty_pages);
+	INIT_LIST_HEAD(&vol->mftbmp_mapping.locked_pages);
+	vol->mftbmp_mapping.a_ops = NULL;
+	vol->mftbmp_mapping.host = NULL;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,6)
+	vol->mftbmp_mapping.i_mmap = NULL;
+	vol->mftbmp_mapping.i_mmap_shared = NULL;
+#else
+	INIT_LIST_HEAD(&vol->mftbmp_mapping.i_mmap);
+	INIT_LIST_HEAD(&vol->mftbmp_mapping.i_mmap_shared);
+#endif
+	spin_lock_init(&vol->mftbmp_mapping.i_shared_lock);
+	INIT_RUN_LIST(&vol->mftbmp_rl);
+	vol->mftmirr_ino = NULL;
+	vol->lcnbmp_ino = NULL;
+	init_rwsem(&vol->lcnbmp_lock);
+	vol->vol_ino = NULL;
+	vol->root_ino = NULL;
+	vol->secure_ino = NULL;
+	vol->uid = vol->gid = 0;
+	vol->on_errors = 0;
+	vol->mft_zone_multiplier = 0;
+	vol->nls_map = NULL;
+	/*
+	 * Default is group and other don't have write/execute access to files
+	 * and write access to directories.
+	 */
+	vol->fmask = 0033;
+	vol->dmask = 0022;
+	/*
+	 * Default is to show long file names (including POSIX file names), and
+	 * not to show system files and short file names.
+	 */
+	vol->readdir_opts = SHOW_WIN32;
+	/* Important to get the mount options dealt with now. */
+	if (!parse_options(vol, (char*)opt))
+		goto err_out_now;
+	/* We are just a read-only fs at the moment. */
+	sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
+	/*
+	 * TODO: Fail safety check. In the future we should really be able to
+	 * cope with this being the case, but for now just bail out.
+	 */
+	if (get_hardsect_size(dev) > NTFS_BLOCK_SIZE) {
+		if (!silent)
+			ntfs_error(sb, "Device has unsupported hardsect_size.");
+		goto err_out_now;
+	}
+	/* Setup the device access block size to NTFS_BLOCK_SIZE. */
+	if (!blksize_size[major(dev)])
+		old_blocksize = BLOCK_SIZE;
+	else
+		old_blocksize = blksize_size[major(dev)][minor(dev)];
+	if (sb_set_blocksize(sb, NTFS_BLOCK_SIZE) != NTFS_BLOCK_SIZE) {
+		if (!silent)
+			ntfs_error(sb, "Unable to set block size.");
+		goto set_blk_size_err_out_now;
+	}
+	/* Get the size of the device in units of NTFS_BLOCK_SIZE bytes. */
+	vol->nr_blocks = sb->s_bdev->bd_inode->i_size >> NTFS_BLOCK_SIZE_BITS;
+	/* Read the boot sector and return unlocked buffer head to it. */
+	if (!(bh = read_ntfs_boot_sector(sb, silent))) {
+		if (!silent)
+			ntfs_error(sb, "Not an NTFS volume.");
+		goto set_blk_size_err_out_now;
+	}
+	/*
+	 * Extract the data from the boot sector and setup the ntfs super block
+	 * using it.
+	 */
+	result = parse_ntfs_boot_sector(vol, (NTFS_BOOT_SECTOR*)bh->b_data);
+	brelse(bh);
+	if (!result) {
+		if (!silent)
+			ntfs_error(sb, "Unsupported NTFS filesystem.");
+		goto set_blk_size_err_out_now;
+	}
+	/* 
+	 * TODO: When we start coping with sector sizes different from
+	 * NTFS_BLOCK_SIZE, we now probably need to set the blocksize of the
+	 * device (probably to NTFS_BLOCK_SIZE).
+	 */
+	/* Setup remaining fields in the super block. */
+	sb->s_magic = NTFS_SB_MAGIC;
+	/*
+	 * Ntfs allows 63 bits for the file size, i.e. correct would be:
+	 * 	sb->s_maxbytes = ~0ULL >> 1;
+	 * But the kernel uses a long as the page cache page index which on
+	 * 32-bit architectures is only 32-bits. MAX_LFS_FILESIZE is kernel
+	 * defined to the maximum the page cache page index can cope with
+	 * without overflowing the index or to 2^63 - 1, whichever is smaller.
+	 */
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	/*
+	 * Now load the metadata required for the page cache and our address
+	 * space operations to function. We do this by setting up a specialised
+	 * read_inode method and then just calling iget() to obtain the inode
+	 * for $MFT which is sufficient to allow our normal inode operations
+	 * and associated address space operations to function.
+	 */
+	/*
+	 * Poison vol->mft_ino so we know whether iget() called into our
+	 * ntfs_read_inode_mount() method.
+	 */
+#define OGIN	((struct inode*)le32_to_cpu(0x4e49474f))	/* OGIN */
+	vol->mft_ino = OGIN;
+	sb->s_op = &ntfs_mount_sops;
+	tmp_ino = iget(vol->sb, FILE_MFT);
+	if (!tmp_ino || tmp_ino != vol->mft_ino || is_bad_inode(tmp_ino)) {
+		if (!silent)
+			ntfs_error(sb, "Failed to load essential metadata.");
+		if (tmp_ino && vol->mft_ino == OGIN)
+			ntfs_error(sb, "BUG: iget() did not call "
+					"ntfs_read_inode_mount() method!\n");
+		if (!tmp_ino)
+			goto cond_iput_mft_ino_err_out_now;
+		goto iput_tmp_ino_err_out_now;
+	}
+	/*
+	 * Note: sb->s_op has already been set to &ntfs_sops by our specialized
+	 * ntfs_read_inode_mount() method when it was invoked by iget().
+	 */
+	down(&ntfs_lock);
+	/*
+	 * The current mount is a compression user if the cluster size is
+	 * less than or equal 4kiB.
+	 */
+	if (vol->cluster_size <= 4096 && !ntfs_nr_compression_users++) {
+		result = allocate_compression_buffers();
+		if (result) {
+			ntfs_error(NULL, "Failed to allocate per CPU buffers "
+					"for compression engine.");
+			ntfs_nr_compression_users--;
+			up(&ntfs_lock);
+			goto iput_tmp_ino_err_out_now;
+		}
+	}
+	/*
+	 * Increment the number of mounts and generate the global default
+	 * upcase table if necessary. Also temporarily increment the number of
+	 * upcase users to avoid race conditions with concurrent (u)mounts.
+	 */
+	if (!ntfs_nr_mounts++)
+		default_upcase = generate_default_upcase();
+	ntfs_nr_upcase_users++;
+	up(&ntfs_lock);
+	/*
+	 * From now on, ignore @silent parameter. If we fail below this line,
+	 * it will be due to a corrupt fs or a system error, so we report it.
+	 */
+	/*
+	 * Open the system files with normal access functions and complete
+	 * setting up the ntfs super block.
+	 */
+	if (!load_system_files(vol)) {
+		ntfs_error(sb, "Failed to load system files.");
+		goto unl_upcase_iput_tmp_ino_err_out_now;
+	}
+	if ((sb->s_root = d_alloc_root(vol->root_ino))) {
+		/* We increment i_count simulating an iget(). */
+		atomic_inc(&vol->root_ino->i_count);
+		ntfs_debug("Exiting, status successful.");
+		/* Release the default upcase if it has no users. */
+		down(&ntfs_lock);
+		if (!--ntfs_nr_upcase_users && default_upcase) {
+			ntfs_free(default_upcase);
+			default_upcase = NULL;
+		}
+		up(&ntfs_lock);
+		return 0;
+	}
+	ntfs_error(sb, "Failed to allocate root directory.");
+	/* Clean up after the successful load_system_files() call from above. */
+	iput(vol->vol_ino);
+	vol->vol_ino = NULL;
+	/* NTFS 3.0+ specific clean up. */
+	if (vol->major_ver >= 3) {
+		iput(vol->secure_ino);
+		vol->secure_ino = NULL;
+	}
+	iput(vol->root_ino);
+	vol->root_ino = NULL;
+	iput(vol->lcnbmp_ino);
+	vol->lcnbmp_ino = NULL;
+	iput(vol->mftmirr_ino);
+	vol->mftmirr_ino = NULL;
+	truncate_inode_pages(&vol->mftbmp_mapping, 0);
+	vol->mftbmp_mapping.a_ops = NULL;
+	vol->mftbmp_mapping.host = NULL;
+	ntfs_free(vol->mftbmp_rl.rl);
+	vol->mftbmp_rl.rl = NULL;
+	vol->upcase_len = 0;
+	if (vol->upcase != default_upcase)
+		ntfs_free(vol->upcase);
+	vol->upcase = NULL;
+	if (vol->nls_map) {
+		unload_nls(vol->nls_map);
+		vol->nls_map = NULL;
+	}
+	/* Error exit code path. */
+unl_upcase_iput_tmp_ino_err_out_now:
+	/*
+	 * Decrease the number of mounts and destroy the global default upcase
+	 * table if necessary.
+	 */
+	down(&ntfs_lock);
+	ntfs_nr_mounts--;
+	if (!--ntfs_nr_upcase_users && default_upcase) {
+		ntfs_free(default_upcase);
+		default_upcase = NULL;
+	}
+	if (vol->cluster_size <= 4096 && !--ntfs_nr_compression_users)
+		free_compression_buffers();
+	up(&ntfs_lock);
+iput_tmp_ino_err_out_now:
+	iput(tmp_ino);
+cond_iput_mft_ino_err_out_now:
+	if (vol->mft_ino && vol->mft_ino != OGIN && vol->mft_ino != tmp_ino) {
+		iput(vol->mft_ino);
+		vol->mft_ino = NULL;
+	}
+#undef OGIN
+	/*
+	 * This is needed to get ntfs_clear_inode() called for each inode we
+	 * have ever called iget()/iput() on, otherwise we A) leak resources
+	 * and B) a subsequent mount fails automatically due to iget() never
+	 * calling down into our ntfs_read_inode{_mount}() methods again...
+	 */
+	if (invalidate_inodes(sb)) {
+		ntfs_error(sb, "Busy inodes left. This is most likely a NTFS "
+				"driver bug.");
+		/* Copied from fs/super.c. I just love this message. (-; */
+		printk("VFS: Busy inodes after umount. Self-destruct in 5 "
+				"seconds.  Have a nice day...\n");
+	}
+set_blk_size_err_out_now:
+	/* Errors at this stage are irrelevant. */
+	sb_set_blocksize(sb, old_blocksize);
+err_out_now:
+	sb->u.generic_sbp = NULL;
+	kfree(vol);
+	ntfs_debug("Failed, returning -EINVAL.");
+	return -EINVAL;
+}
+/*
+ * This is a slab cache to optimize allocations and deallocations of Unicode
+ * strings of the maximum length allowed by NTFS, which is NTFS_MAX_NAME_LEN
+ * (255) Unicode characters + a terminating NULL Unicode character.
+ */
+kmem_cache_t *ntfs_name_cache;
+/* Slab caches for efficient allocation/deallocation of of inodes. */
+kmem_cache_t *ntfs_inode_cache;
+kmem_cache_t *ntfs_big_inode_cache;
+/* Init once constructor for the inode slab cache. */
+static void ntfs_big_inode_init_once(void *foo, kmem_cache_t *cachep,
+		unsigned long flags)
+{
+	ntfs_inode *ni = (ntfs_inode *)foo;
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+			SLAB_CTOR_CONSTRUCTOR)
+		inode_init_once(VFS_I(ni));
+}
+/*
+ * Slab cache to optimize allocations and deallocations of attribute search
+ * contexts.
+ */
+kmem_cache_t *ntfs_attr_ctx_cache;
+/* A global default upcase table and a corresponding reference count. */
+wchar_t *default_upcase = NULL;
+unsigned long ntfs_nr_upcase_users = 0;
+/* The number of mounted filesystems. */
+unsigned long ntfs_nr_mounts = 0;
+/* Driver wide semaphore. */
+DECLARE_MUTEX(ntfs_lock);
+static struct super_block *ntfs_get_sb(struct file_system_type *fs_type,
+	int flags, char *dev_name, void *data)
+{
+	return get_sb_bdev(fs_type, flags, dev_name, data, ntfs_fill_super);
+}
+static struct file_system_type ntfs_fs_type = {
+	owner:		THIS_MODULE,
+	name:		"ntfs",
+	get_sb:		ntfs_get_sb,
+	fs_flags:	FS_REQUIRES_DEV,
+};
+/* Stable names for the slab caches. */
+static const char *ntfs_attr_ctx_cache_name = "ntfs_attr_ctx_cache";
+static const char *ntfs_name_cache_name = "ntfs_name_cache";
+static const char *ntfs_inode_cache_name = "ntfs_inode_cache";
+static const char *ntfs_big_inode_cache_name = "ntfs_big_inode_cache";
+static int __init init_ntfs_fs(void)
+{
+	int err = 0;
+	/* This may be ugly but it results in pretty output so who cares. (-8 */
+	printk(KERN_INFO "NTFS driver " NTFS_VERSION " [Flags: R/"
+#ifdef CONFIG_NTFS_RW
+			"W"
+#else
+			"O"
+#endif
+#ifdef DEBUG
+			" DEBUG"
+#endif
+#ifdef MODULE
+			" MODULE"
+#endif
+			"]. Copyright (c) 2001,2002 Anton Altaparmakov.\n");
+	ntfs_debug("Debug messages are enabled.");
+	ntfs_attr_ctx_cache = kmem_cache_create(ntfs_attr_ctx_cache_name,
+			sizeof(attr_search_context), 0 /* offset */,
+			SLAB_HWCACHE_ALIGN, NULL /* ctor */, NULL /* dtor */);
+	if (!ntfs_attr_ctx_cache) {
+		printk(KERN_CRIT "NTFS: Failed to create %s!\n",
+				ntfs_attr_ctx_cache_name);
+		goto ctx_err_out;
+	}
+	ntfs_name_cache = kmem_cache_create(ntfs_name_cache_name,
+			(NTFS_MAX_NAME_LEN+1) * sizeof(uchar_t), 0,
+			SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (!ntfs_name_cache) {
+		printk(KERN_CRIT "NTFS: Failed to create %s!\n",
+				ntfs_name_cache_name);
+		goto name_err_out;
+	}
+	ntfs_inode_cache = kmem_cache_create(ntfs_inode_cache_name,
+			sizeof(ntfs_inode), 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (!ntfs_inode_cache) {
+		printk(KERN_CRIT "NTFS: Failed to create %s!\n",
+				ntfs_inode_cache_name);
+		goto inode_err_out;
+	}
+	ntfs_big_inode_cache = kmem_cache_create(ntfs_big_inode_cache_name,
+			sizeof(big_ntfs_inode), 0, SLAB_HWCACHE_ALIGN,
+			ntfs_big_inode_init_once, NULL);
+	if (!ntfs_big_inode_cache) {
+		printk(KERN_CRIT "NTFS: Failed to create %s!\n",
+				ntfs_big_inode_cache_name);
+		goto big_inode_err_out;
+	}
+	/* Register the ntfs sysctls. */
+	err = ntfs_sysctl(1);
+	if (err) {
+		printk(KERN_CRIT "NTFS: Failed to register NTFS sysctls!\n");
+		goto sysctl_err_out;
+	}
+	err = register_filesystem(&ntfs_fs_type);
+	if (!err) {
+		ntfs_debug("NTFS driver registered successfully.");
+		return 0; /* Success! */
+	}
+	printk(KERN_CRIT "NTFS: Failed to register NTFS file system driver!\n");
+sysctl_err_out:
+	kmem_cache_destroy(ntfs_big_inode_cache);
+big_inode_err_out:
+	kmem_cache_destroy(ntfs_inode_cache);
+inode_err_out:
+	kmem_cache_destroy(ntfs_name_cache);
+name_err_out:
+	kmem_cache_destroy(ntfs_attr_ctx_cache);
+ctx_err_out:
+	if (!err) {
+		printk(KERN_CRIT "NTFS: Aborting NTFS file system driver "
+				"registration...\n");
+		err = -ENOMEM;
+	}
+	return err;
+}
+static void __exit exit_ntfs_fs(void)
+{
+	int err = 0;
+	ntfs_debug("Unregistering NTFS driver.");
+	unregister_filesystem(&ntfs_fs_type);
+	if (kmem_cache_destroy(ntfs_big_inode_cache) && (err = 1))
+		printk(KERN_CRIT "NTFS: Failed to destory %s.\n",
+				ntfs_big_inode_cache_name);
+	if (kmem_cache_destroy(ntfs_inode_cache) && (err = 1))
+		printk(KERN_CRIT "NTFS: Failed to destory %s.\n",
+				ntfs_inode_cache_name);
+	if (kmem_cache_destroy(ntfs_name_cache) && (err = 1))
+		printk(KERN_CRIT "NTFS: Failed to destory %s.\n",
+				ntfs_name_cache_name);
+	if (kmem_cache_destroy(ntfs_attr_ctx_cache) && (err = 1))
+		printk(KERN_CRIT "NTFS: Failed to destory %s.\n",
+				ntfs_attr_ctx_cache_name);
+	if (err)
+		printk(KERN_CRIT "NTFS: This causes memory to leak! There is "
+				"probably a BUG in the driver! Please report "
+				"you saw this message to "
+				"linux-ntfs-dev@lists.sf.net\n");
+	/* Unregister the ntfs sysctls. */
+	ntfs_sysctl(0);
+}
+EXPORT_NO_SYMBOLS;
+MODULE_AUTHOR("Anton Altaparmakov <aia21@cam.ac.uk>");
+MODULE_DESCRIPTION("NTFS 1.2/3.x driver");
+MODULE_LICENSE("GPL");
+#ifdef DEBUG
+MODULE_PARM(debug_msgs, "i");
+MODULE_PARM_DESC(debug_msgs, "Enable debug messages.");
+#endif
+module_init(init_ntfs_fs)
+module_exit(exit_ntfs_fs)
--- a/fs/ntfs/sysctl.c
+++ b/fs/ntfs/sysctl.c
+/*
+ * sysctl.c - Code for sysctl handling in NTFS Linux kernel driver. Part of
+ *	      the Linux-NTFS project. Adapted from the old NTFS driver,
+ *	      Copyright (C) 1997 Martin von Lwis, Rgis Duchesne.
+ *
+ * Copyright (c) 2002 Anton Altaparmakov.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifdef DEBUG
+#include <linux/module.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/proc_fs.h>
+#include <linux/sysctl.h>
+#include "sysctl.h"
+#include "debug.h"
+#define FS_NTFS	1
+/* Definition of the ntfs sysctl. */
+static ctl_table ntfs_sysctls[] = {
+	{ FS_NTFS, "ntfs-debug", 		/* Binary and text IDs. */
+	  &debug_msgs,sizeof(debug_msgs),	/* Data pointer and size. */
+	  0644,	NULL, &proc_dointvec },		/* Mode, child, proc handler. */
+	{ 0 }
+};
+/* Define the parent directory /proc/sys/fs. */
+static ctl_table sysctls_root[] = {
+	{ CTL_FS, "fs", NULL, 0, 0555, ntfs_sysctls },
+	{ 0 }
+};
+/* Storage for the sysctls header. */
+static struct ctl_table_header *sysctls_root_table = NULL;
+/**
+ * ntfs_sysctl - add or remove the debug sysctl
+ * @add:	add (1) or remove (0) the sysctl
+ *
+ * Add or remove the debug sysctl. Return 0 on success or -errno on error.
+ */
+int ntfs_sysctl(int add)
+{
+	if (add) {
+		BUG_ON(sysctls_root_table);
+		sysctls_root_table = register_sysctl_table(sysctls_root, 0);
+		if (!sysctls_root_table)
+			return -ENOMEM;
+#ifdef CONFIG_PROC_FS
+		/*
+		 * If the proc file system is in use and we are a module, need
+		 * to set the owner of our proc entry to our module. In the
+		 * non-modular case, THIS_MODULE is NULL, so this is ok.
+		 */
+		ntfs_sysctls[0].de->owner = THIS_MODULE;
+#endif
+	} else {
+		BUG_ON(!sysctls_root_table);
+		unregister_sysctl_table(sysctls_root_table);
+		sysctls_root_table = NULL;
+	}
+	return 0;
+}
+#endif /* CONFIG_SYSCTL */
+#endif /* DEBUG */
--- a/fs/ntfs/sysctl.h
+++ b/fs/ntfs/sysctl.h
+/*
+ * sysctl.h - Defines for sysctl handling in NTFS Linux kernel driver. Part of
+ *	      the Linux-NTFS project. Adapted from the old NTFS driver,
+ *	      Copyright (C) 1997 Martin von Löwis, Régis Duchesne.
+ * 
+ * Copyright (c) 2002 Anton Altaparmakov.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef _LINUX_NTFS_SYSCTL_H
+#define _LINUX_NTFS_SYSCTL_H
+#include <linux/config.h>
+#if (DEBUG && CONFIG_SYSCTL)
+extern int ntfs_sysctl(int add);
+#else
+/* Just return success. */
+static inline int ntfs_sysctl(int add)
+{
+	return 0;
+}
+#endif /* DEBUG && CONFIG_SYSCTL */
+#endif /* _LINUX_NTFS_SYSCTL_H */
--- a/fs/ntfs/time.c
+++ b/fs/ntfs/time.c
+/*
+ * time.c - NTFS time conversion functions. Part of the Linux-NTFS project.
+ *
+ * Copyright (c) 2001 Anton Altaparmakov.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/sched.h>	/* For CURRENT_TIME. */
+#include <asm/div64.h>		/* For do_div(). */
+#include "ntfs.h"
+#define NTFS_TIME_OFFSET ((s64)(369 * 365 + 89) * 24 * 3600 * 10000000)
+/**
+ * utc2ntfs - convert Linux time to NTFS time
+ * @time:		Linux time to convert to NTFS
+ *
+ * Convert the Linux time @time to its corresponding NTFS time and return that
+ * in little endian format.
+ *
+ * Linux stores time in a long at present and measures it as the number of
+ * 1-second intervals since 1st January 1970, 00:00:00 UTC.
+ *
+ * NTFS uses Microsoft's standard time format which is stored in a s64 and is
+ * measured as the number of 100 nano-second intervals since 1st January 1601,
+ * 00:00:00 UTC.
+ */
+inline s64 utc2ntfs(const time_t time)
+{
+	/* Convert to 100ns intervals and then add the NTFS time offset. */
+	return cpu_to_sle64((s64)time * 10000000 + NTFS_TIME_OFFSET);
+}
+/**
+ * get_current_ntfs_time - get the current time in little endian NTFS format
+ *
+ * Get the current time from the Linux kernel, convert it to its corresponding
+ * NTFS time and return that in little endian format.
+ */
+inline s64 get_current_ntfs_time(void)
+{
+	return utc2ntfs(CURRENT_TIME);
+}
+/**
+ * ntfs2utc - convert NTFS time to Linux time
+ * @time:		NTFS time (little endian) to convert to Linux
+ *
+ * Convert the little endian NTFS time @time to its corresponding Linux time
+ * and return that in cpu format.
+ *
+ * Linux stores time in a long at present and measures it as the number of
+ * 1-second intervals since 1st January 1970, 00:00:00 UTC.
+ *
+ * NTFS uses Microsoft's standard time format which is stored in a s64 and is
+ * measured as the number of 100 nano-second intervals since 1st January 1601,
+ * 00:00:00 UTC.
+ */
+inline time_t ntfs2utc(const s64 time)
+{
+	/* Subtract the NTFS time offset, then convert to 1s intervals. */
+	s64 t = sle64_to_cpu(time) - NTFS_TIME_OFFSET;
+	do_div(t, 10000000);
+	return (time_t)t;
+}
--- a/fs/ntfs/types.h
+++ b/fs/ntfs/types.h
+/*
+ * types.h - Defines for NTFS Linux kernel driver specific types.
+ *	     Part of the Linux-NTFS project.
+ *
+ * Copyright (c) 2001,2002 Anton Altaparmakov.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef _LINUX_NTFS_TYPES_H
+#define _LINUX_NTFS_TYPES_H
+#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 96)
+#define SN(X)   X	/* Struct Name */
+#define SC(P,N) P.N	/* ShortCut: Prefix, Name */
+#else
+#define SN(X)
+#define SC(P,N) N
+#endif
+/* 2-byte Unicode character type. */
+typedef u16 uchar_t;
+#define UCHAR_T_SIZE_BITS 1
+/*
+ * Clusters are signed 64-bit values on NTFS volumes. We define two types, LCN
+ * and VCN, to allow for type checking and better code readability.
+ */
+typedef s64 VCN;
+typedef s64 LCN;
+/**
+ * run_list_element - in memory vcn to lcn mapping array element
+ * @vcn:	starting vcn of the current array element
+ * @lcn:	starting lcn of the current array element
+ * @length:	length in clusters of the current array element
+ * 
+ * The last vcn (in fact the last vcn + 1) is reached when length == 0.
+ * 
+ * When lcn == -1 this means that the count vcns starting at vcn are not 
+ * physically allocated (i.e. this is a hole / data is sparse).
+ */
+typedef struct {	/* In memory vcn to lcn mapping structure element. */
+	VCN vcn;	/* vcn = Starting virtual cluster number. */
+	LCN lcn;	/* lcn = Starting logical cluster number. */
+	s64 length;	/* Run length in clusters. */
+} run_list_element;
+/**
+ * run_list - in memory vcn to lcn mapping array including a read/write lock
+ * @rl:		pointer to an array of run list elements
+ * @lock:	read/write spinlock for serializing access to @rl
+ * 
+ */
+typedef struct {
+	run_list_element *rl;
+	rwlock_t lock;
+} run_list;
+#define RUN_LIST_INIT		{ NULL, RW_LOCK_UNLOCKED }
+#define RUN_LIST(name)		run_list name = RUN_LIST_INIT
+#define INIT_RUN_LIST(runlist)	do {					\
+					run_list *___rl = runlist;	\
+					___rl->rl = NULL;		\
+					___rl->lock = RW_LOCK_UNLOCKED; \
+				} while (0)
+typedef enum {
+	FALSE = 0,
+	TRUE = 1
+} BOOL;
+typedef enum {
+	CASE_SENSITIVE = 0,
+	IGNORE_CASE = 1,
+} IGNORE_CASE_BOOL;
+#endif /* _LINUX_NTFS_TYPES_H */
--- a/fs/ntfs/unistr.c
+++ b/fs/ntfs/unistr.c
+/*
+ * unistr.c - NTFS Unicode string handling. Part of the Linux-NTFS project.
+ *
+ * Copyright (c) 2001 Anton Altaparmakov.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include "ntfs.h"
+/*
+ * IMPORTANT
+ * =========
+ *
+ * All these routines assume that the Unicode characters are in little endian
+ * encoding inside the strings!!!
+ */
+/*
+ * This is used by the name collation functions to quickly determine what
+ * characters are (in)valid.
+ */
+static const u8 legal_ansi_char_array[0x40] = {
+	0x00, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+	0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+	0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+	0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+	0x17, 0x07, 0x18, 0x17, 0x17, 0x17, 0x17, 0x17,
+	0x17, 0x17, 0x18, 0x16, 0x16, 0x17, 0x07, 0x00,
+	0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17,
+	0x17, 0x17, 0x04, 0x16, 0x18, 0x16, 0x18, 0x18,
+};
+/**
+ * ntfs_are_names_equal - compare two Unicode names for equality
+ * @s1:			name to compare to @s2
+ * @s1_len:		length in Unicode characters of @s1
+ * @s2:			name to compare to @s1
+ * @s2_len:		length in Unicode characters of @s2
+ * @ic:			ignore case bool
+ * @upcase:		upcase table (only if @ic == IGNORE_CASE)
+ * @upcase_size:	length in Unicode characters of @upcase (if present)
+ *
+ * Compare the names @s1 and @s2 and return TRUE (1) if the names are
+ * identical, or FALSE (0) if they are not identical. If @ic is IGNORE_CASE,
+ * the @upcase table is used to performa a case insensitive comparison.
+ */
+BOOL ntfs_are_names_equal(const uchar_t *s1, size_t s1_len,
+		     const uchar_t *s2, size_t s2_len,
+		     const IGNORE_CASE_BOOL ic,
+		     const uchar_t *upcase, const u32 upcase_size)
+{
+	if (s1_len != s2_len)
+		return FALSE;
+	if (ic == CASE_SENSITIVE)
+		return !ntfs_ucsncmp(s1, s2, s1_len);
+	return !ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_size);
+}
+/**
+ * ntfs_collate_names - collate two Unicode names
+ * @name1:	first Unicode name to compare
+ * @name2:	second Unicode name to compare
+ * @err_val:	if @name1 contains an invalid character return this value
+ * @ic:         either CASE_SENSITIVE or IGNORE_CASE
+ * @upcase:	upcase table (ignored if @ic is CASE_SENSITIVE)
+ * @upcase_len:	upcase table size (ignored if @ic is CASE_SENSITIVE)
+ *
+ * ntfs_collate_names collates two Unicode names and returns:
+ * 
+ *  -1 if the first name collates before the second one,
+ *   0 if the names match,
+ *   1 if the second name collates before the first one, or
+ * @err_val if an invalid character is found in @name1 during the comparison.
+ *
+ * The following characters are considered invalid: '"', '*', '<', '>' and '?'.
+ */
+int ntfs_collate_names(const uchar_t *name1, const u32 name1_len,
+		const uchar_t *name2, const u32 name2_len,
+		const int err_val, const IGNORE_CASE_BOOL ic,
+		const uchar_t *upcase, const u32 upcase_len)
+{
+	u32 cnt, min_len;
+	uchar_t c1, c2;
+	min_len = min(name1_len, name2_len);
+	for (cnt = 0; cnt < min_len; ++cnt) {
+		c1 = le16_to_cpu(*name1++);
+		c2 = le16_to_cpu(*name2++);
+		if (ic) {
+			if (c1 < upcase_len)
+				c1 = le16_to_cpu(upcase[c1]);
+			if (c2 < upcase_len)
+				c2 = le16_to_cpu(upcase[c2]);
+		}
+		if (c1 < 64 && legal_ansi_char_array[c1] & 8)
+			return err_val;
+		if (c1 < c2)
+			return -1;
+		if (c1 > c2)
+			return 1;
+	}
+	if (name1_len < name2_len)
+		return -1;
+	if (name1_len == name2_len)
+		return 0;
+	/* name1_len > name2_len */
+	c1 = le16_to_cpu(*name1);
+	if (c1 < 64 && legal_ansi_char_array[c1] & 8)
+		return err_val;
+	return 1;
+}
+/**
+ * ntfs_ucsncmp - compare two little endian Unicode strings
+ * @s1:		first string
+ * @s2:		second string
+ * @n:		maximum unicode characters to compare
+ *
+ * Compare the first @n characters of the Unicode strings @s1 and @s2,
+ * The strings in little endian format and appropriate le16_to_cpu()
+ * conversion is performed on non-little endian machines.
+ * 
+ * The function returns an integer less than, equal to, or greater than zero
+ * if @s1 (or the first @n Unicode characters thereof) is found, respectively,
+ * to be less than, to match, or be greater than @s2.
+ */
+int ntfs_ucsncmp(const uchar_t *s1, const uchar_t *s2, size_t n)
+{
+	uchar_t c1, c2;
+	size_t i;
+	for (i = 0; i < n; ++i) {
+		c1 = le16_to_cpu(s1[i]);
+		c2 = le16_to_cpu(s2[i]);
+		if (c1 < c2)
+			return -1;
+		if (c1 > c2)
+			return 1;
+		if (!c1)
+			break;
+	}
+	return 0;
+}
+/**
+ * ntfs_ucsncasecmp - compare two little endian Unicode strings, ignoring case
+ * @s1:			first string
+ * @s2:			second string
+ * @n:			maximum unicode characters to compare
+ * @upcase:		upcase table
+ * @upcase_size:	upcase table size in Unicode characters
+ *
+ * Compare the first @n characters of the Unicode strings @s1 and @s2,
+ * ignoring case. The strings in little endian format and appropriate
+ * le16_to_cpu() conversion is performed on non-little endian machines.
+ * 
+ * Each character is uppercased using the @upcase table before the comparison.
+ *
+ * The function returns an integer less than, equal to, or greater than zero
+ * if @s1 (or the first @n Unicode characters thereof) is found, respectively,
+ * to be less than, to match, or be greater than @s2.
+ */
+int ntfs_ucsncasecmp(const uchar_t *s1, const uchar_t *s2, size_t n,
+		     const uchar_t *upcase, const u32 upcase_size)
+{
+	uchar_t c1, c2;
+	size_t i;
+	for (i = 0; i < n; ++i) {
+		if ((c1 = le16_to_cpu(s1[i])) < upcase_size)
+			c1 = le16_to_cpu(upcase[c1]);
+		if ((c2 = le16_to_cpu(s2[i])) < upcase_size)
+			c2 = le16_to_cpu(upcase[c2]);
+		if (c1 < c2)
+			return -1;
+		if (c1 > c2)
+			return 1;
+		if (!c1)
+			break;
+	}
+	return 0;
+}
+void ntfs_upcase_name(uchar_t *name, u32 name_len, const uchar_t *upcase,
+		const u32 upcase_len)
+{
+	u32 i;
+	uchar_t u;
+	for (i = 0; i < name_len; i++)
+		if ((u = le16_to_cpu(name[i])) < upcase_len)
+			name[i] = upcase[u];
+}
+void ntfs_file_upcase_value(FILE_NAME_ATTR *file_name_attr,
+		const uchar_t *upcase, const u32 upcase_len)
+{
+	ntfs_upcase_name((uchar_t*)&file_name_attr->file_name,
+			file_name_attr->file_name_length, upcase, upcase_len);
+}
+int ntfs_file_compare_values(FILE_NAME_ATTR *file_name_attr1,
+		FILE_NAME_ATTR *file_name_attr2,
+		const int err_val, const IGNORE_CASE_BOOL ic,
+		const uchar_t *upcase, const u32 upcase_len)
+{
+	return ntfs_collate_names((uchar_t*)&file_name_attr1->file_name,
+			file_name_attr1->file_name_length,
+			(uchar_t*)&file_name_attr2->file_name,
+			file_name_attr2->file_name_length,
+			err_val, ic, upcase, upcase_len);
+}
+/**
+ * ntfs_nlstoucs - convert NLS string to little endian Unicode string
+ * @vol:	ntfs volume which we are working with
+ * @ins:	input NLS string buffer
+ * @ins_len:	length of input string in bytes
+ * @outs:	on return contains the allocated output Unicode string buffer
+ *
+ * Convert the input string @ins, which is in whatever format the loaded NLS
+ * map dictates, into a little endian, 2-byte Unicode string.
+ *
+ * This function allocates the string and the caller is responsible for
+ * calling kmem_cache_free(ntfs_name_cache, @outs); when finished with it.
+ *
+ * On success the function returns the number of Unicode characters written to
+ * the output string *@outs (>= 0), not counting the terminating Unicode NULL
+ * character. *@outs is set to the allocated output string buffer.
+ *
+ * On error, a negative number corresponding to the error code is returned. In
+ * that case the output string is not allocated. Both *@outs and *@outs_len
+ * are then undefined.
+ *
+ * This might look a bit odd due to fast path optimization...
+ */
+int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins,
+		const int ins_len, uchar_t **outs)
+{
+	struct nls_table *nls = vol->nls_map;
+	uchar_t *ucs;
+	wchar_t wc;
+	int i, o, wc_len;
+	/* We don't trust outside sources. */
+	if (ins) {
+		ucs = (uchar_t*)kmem_cache_alloc(ntfs_name_cache, SLAB_NOFS);
+		if (ucs) {
+			for (i = o = 0; i < ins_len; i += wc_len) {
+				wc_len = nls->char2uni(ins + i, ins_len - i,
+						&wc);
+				if (wc_len >= 0) {
+					if (wc) {
+						ucs[o++] = cpu_to_le16(wc);
+						continue;
+					} /* else (!wc) */
+					break;
+				} /* else (wc_len < 0) */
+				goto conversion_err;
+			}
+			ucs[o] = cpu_to_le16('\0');
+			*outs = ucs;
+			return o;
+		} /* else (!ucs) */
+		ntfs_error(vol->sb, "Failed to allocate name from "
+				"ntfs_name_cache!");
+		return -ENOMEM;
+	} /* else (!ins) */
+	ntfs_error(NULL, "Received NULL pointer.");
+	return -EINVAL;
+conversion_err:
+	ntfs_error(vol->sb, "Name using character set %s contains characters "
+			"that cannot be converted to Unicode.", nls->charset);
+	kmem_cache_free(ntfs_name_cache, ucs);
+	return -EILSEQ;
+}
+/**
+ * ntfs_ucstonls - convert little endian Unicode string to NLS string
+ * @vol:	ntfs volume which we are working with
+ * @ins:	input Unicode string buffer
+ * @ins_len:	length of input string in Unicode characters
+ * @outs:	on return contains the (allocated) output NLS string buffer
+ * @outs_len:	length of output string buffer in bytes
+ *
+ * Convert the input little endian, 2-byte Unicode string @ins, of length
+ * @ins_len into the string format dictated by the loaded NLS.
+ *
+ * If @outs is NULL, this function allocates the string and the caller is
+ * responsible for calling kfree(@outs); when finished with it.
+ *
+ * On success the function returns the number of bytes written to the output
+ * string *@outs (>= 0), not counting the terminating NULL byte. If the output
+ * string buffer was allocated, *@outs is set to it.
+ *
+ * On error, a negative number corresponding to the error code is returned. In
+ * that case the output string is not allocated. The contents of *@outs are
+ * then undefined.
+ *
+ * This might look a bit odd due to fast path optimization...
+ */
+int ntfs_ucstonls(const ntfs_volume *vol, const uchar_t *ins,
+		const int ins_len, unsigned char **outs, int outs_len)
+{
+	struct nls_table *nls = vol->nls_map;
+	unsigned char *ns;
+	int i, o, ns_len, wc;
+	/* We don't trust outside sources. */
+	if (ins) {
+		ns = *outs;
+		ns_len = outs_len;
+		if (!ns_len) {
+			wc = -ENAMETOOLONG;
+			goto conversion_err;
+		}
+		if (!ns) {
+			ns_len = ins_len * 3;
+			ns = (unsigned char*)kmalloc(ns_len, GFP_NOFS);
+			if (!ns)
+				goto mem_err_out;
+		}
+		for (i = o = 0; i < ins_len; i++) {
+retry:			wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o,
+					ns_len - o);
+			if (wc > 0) {
+				o += wc;
+				continue;
+			} else if (!wc)
+				break;
+			else if (wc == -ENAMETOOLONG && ns != *outs) {
+				unsigned char *tc;
+				/* Grow by 64 bytes. (Chosen at random.) */
+				tc = (unsigned char*)kmalloc(ns_len + 64,
+						GFP_NOFS);
+				if (tc) {
+					memcpy(tc, ns, ns_len);
+					ns_len += 64;
+					kfree(ns);
+					ns = tc;
+					goto retry;
+				} /* No memory so goto conversion_error; */
+			} /* wc < 0, real error. */
+			goto conversion_err;
+		}
+		ns[o] = '\0';
+		*outs = ns;
+		return o;
+	} /* else (!ins) */
+	ntfs_error(NULL, "Received NULL pointer.");
+	return -EINVAL;
+conversion_err:
+	ntfs_error(vol->sb, "Unicode name contains characters that cannot be "
+			"converted to character set %s.", nls->charset);
+	if (ns != *outs)
+		kfree(ns);
+	if (wc != -ENAMETOOLONG)
+		wc = -EILSEQ;
+	return wc;
+mem_err_out:
+	ntfs_error(vol->sb, "Failed to allocate name!");
+	return -ENOMEM;
+}
--- a/fs/ntfs/upcase.c
+++ b/fs/ntfs/upcase.c
+/*
+ * upcase.c - Generate the full NTFS Unicode upcase table in little endian.
+ *	      Part of the Linux-NTFS project.
+ *
+ * Copyright (C) 2001 Richard Russon <ntfs@flatcap.org>
+ * Copyright (c) 2001 Anton Altaparmakov <aia21@cam.ac.uk>
+ *
+ * Modified for mkntfs inclusion 9 June 2001 by Anton Altaparmakov.
+ * Modified for kernel inclusion 10 September 2001 by Anton Altparmakov.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS source
+ * in the file COPYING); if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include "ntfs.h"
+uchar_t *generate_default_upcase(void)
+{
+	const int uc_run_table[][3] = { /* Start, End, Add */
+	{0x0061, 0x007B,  -32}, {0x0451, 0x045D, -80}, {0x1F70, 0x1F72,  74},
+	{0x00E0, 0x00F7,  -32}, {0x045E, 0x0460, -80}, {0x1F72, 0x1F76,  86},
+	{0x00F8, 0x00FF,  -32}, {0x0561, 0x0587, -48}, {0x1F76, 0x1F78, 100},
+	{0x0256, 0x0258, -205}, {0x1F00, 0x1F08,   8}, {0x1F78, 0x1F7A, 128},
+	{0x028A, 0x028C, -217}, {0x1F10, 0x1F16,   8}, {0x1F7A, 0x1F7C, 112},
+	{0x03AC, 0x03AD,  -38}, {0x1F20, 0x1F28,   8}, {0x1F7C, 0x1F7E, 126},
+	{0x03AD, 0x03B0,  -37}, {0x1F30, 0x1F38,   8}, {0x1FB0, 0x1FB2,   8},
+	{0x03B1, 0x03C2,  -32}, {0x1F40, 0x1F46,   8}, {0x1FD0, 0x1FD2,   8},
+	{0x03C2, 0x03C3,  -31}, {0x1F51, 0x1F52,   8}, {0x1FE0, 0x1FE2,   8},
+	{0x03C3, 0x03CC,  -32}, {0x1F53, 0x1F54,   8}, {0x1FE5, 0x1FE6,   7},
+	{0x03CC, 0x03CD,  -64}, {0x1F55, 0x1F56,   8}, {0x2170, 0x2180, -16},
+	{0x03CD, 0x03CF,  -63}, {0x1F57, 0x1F58,   8}, {0x24D0, 0x24EA, -26},
+	{0x0430, 0x0450,  -32}, {0x1F60, 0x1F68,   8}, {0xFF41, 0xFF5B, -32},
+	{0}
+	};
+	const int uc_dup_table[][2] = { /* Start, End */
+	{0x0100, 0x012F}, {0x01A0, 0x01A6}, {0x03E2, 0x03EF}, {0x04CB, 0x04CC},
+	{0x0132, 0x0137}, {0x01B3, 0x01B7}, {0x0460, 0x0481}, {0x04D0, 0x04EB},
+	{0x0139, 0x0149}, {0x01CD, 0x01DD}, {0x0490, 0x04BF}, {0x04EE, 0x04F5},
+	{0x014A, 0x0178}, {0x01DE, 0x01EF}, {0x04BF, 0x04BF}, {0x04F8, 0x04F9},
+	{0x0179, 0x017E}, {0x01F4, 0x01F5}, {0x04C1, 0x04C4}, {0x1E00, 0x1E95},
+	{0x018B, 0x018B}, {0x01FA, 0x0218}, {0x04C7, 0x04C8}, {0x1EA0, 0x1EF9},
+	{0}
+	};
+	const int uc_word_table[][2] = { /* Offset, Value */
+	{0x00FF, 0x0178}, {0x01AD, 0x01AC}, {0x01F3, 0x01F1}, {0x0269, 0x0196},
+	{0x0183, 0x0182}, {0x01B0, 0x01AF}, {0x0253, 0x0181}, {0x026F, 0x019C},
+	{0x0185, 0x0184}, {0x01B9, 0x01B8}, {0x0254, 0x0186}, {0x0272, 0x019D},
+	{0x0188, 0x0187}, {0x01BD, 0x01BC}, {0x0259, 0x018F}, {0x0275, 0x019F},
+	{0x018C, 0x018B}, {0x01C6, 0x01C4}, {0x025B, 0x0190}, {0x0283, 0x01A9},
+	{0x0192, 0x0191}, {0x01C9, 0x01C7}, {0x0260, 0x0193}, {0x0288, 0x01AE},
+	{0x0199, 0x0198}, {0x01CC, 0x01CA}, {0x0263, 0x0194}, {0x0292, 0x01B7},
+	{0x01A8, 0x01A7}, {0x01DD, 0x018E}, {0x0268, 0x0197},
+	{0}
+	};
+	int i, r;
+	uchar_t *uc;
+	uc = ntfs_malloc_nofs(default_upcase_len * sizeof(uchar_t));
+	if (!uc)
+		return uc;
+	memset(uc, 0, default_upcase_len * sizeof(uchar_t));
+	for (i = 0; i < default_upcase_len; i++)
+		uc[i] = cpu_to_le16(i);
+	for (r = 0; uc_run_table[r][0]; r++)
+		for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++)
+			uc[i] = cpu_to_le16((le16_to_cpu(uc[i]) +
+					uc_run_table[r][2]));
+	for (r = 0; uc_dup_table[r][0]; r++)
+		for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2)
+			uc[i + 1] = cpu_to_le16(le16_to_cpu(uc[i + 1]) - 1);
+	for (r = 0; uc_word_table[r][0]; r++)
+		uc[uc_word_table[r][0]] = cpu_to_le16(uc_word_table[r][1]);
+	return uc;
+}
--- a/fs/ntfs/volume.h
+++ b/fs/ntfs/volume.h
+/*
+ * volume.h - Defines for volume structures in NTFS Linux kernel driver. Part
+ *	      of the Linux-NTFS project.
+ *
+ * Copyright (c) 2001,2002 Anton Altaparmakov.
+ * Copyright (C) 2002 Richard Russon.
+ *
+ * This program/include file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program/include file is distributed in the hope that it will be 
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program (in the main directory of the Linux-NTFS 
+ * distribution in the file COPYING); if not, write to the Free Software
+ * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef _LINUX_NTFS_VOLUME_H
+#define _LINUX_NTFS_VOLUME_H
+#include "types.h"
+/* These are used to determine which inode names are returned by readdir(). */
+typedef enum {
+	SHOW_SYSTEM	= 1,
+	SHOW_WIN32	= 2,
+	SHOW_DOS	= 4,
+	SHOW_POSIX	= SHOW_WIN32 | SHOW_DOS,
+	SHOW_ALL	= SHOW_SYSTEM | SHOW_POSIX,
+} READDIR_OPTIONS;
+#define RHideSystemFiles(x)	(!((x) & SHOW_SYSTEM))
+#define RHideLongNames(x)	(!((x) & SHOW_WIN32))
+#define RHideDosNames(x)	(!((x) & SHOW_DOS))
+/*
+ * The NTFS in memory super block structure.
+ */
+typedef struct {
+	/*
+	 * FIXME: Reorder to have commonly used together element within the
+	 * same cache line, aiming at a cache line size of 32 bytes. Aim for
+	 * 64 bytes for less commonly used together elements. Put most commonly
+	 * used elements to front of structure. Obviously do this only when the
+	 * structure has stabilized... (AIA)
+	 */
+	/* Device specifics. */
+	struct super_block *sb;		/* Pointer back to the super_block,
+					   so we don't have to get the offset
+					   every time. */
+	LCN nr_blocks;			/* Number of NTFS_BLOCK_SIZE bytes
+					   sized blocks on the device. */
+	/* Configuration provided by user at mount time. */
+	uid_t uid;			/* uid that files will be mounted as. */
+	gid_t gid;			/* gid that files will be mounted as. */
+	mode_t fmask;			/* The mask for file permissions. */
+	mode_t dmask;			/* The mask for directory
+					   permissions. */
+	READDIR_OPTIONS readdir_opts;	/* Namespace of inode names to show. */
+	u8 mft_zone_multiplier;	/* Initial mft zone multiplier. */
+	u8 on_errors;			/* What to do on file system errors. */
+	/* NTFS bootsector provided information. */
+	u16 sector_size;		/* in bytes */
+	u8 sector_size_bits;		/* log2(sector_size) */
+	u32 cluster_size;		/* in bytes */
+	u32 cluster_size_mask;		/* cluster_size - 1 */
+	u8 cluster_size_bits;		/* log2(cluster_size) */
+	u32 mft_record_size;		/* in bytes */
+	u32 mft_record_size_mask;	/* mft_record_size - 1 */
+	u8 mft_record_size_bits;	/* log2(mft_record_size) */
+	u32 index_record_size;		/* in bytes */
+	u32 index_record_size_mask;	/* index_record_size - 1 */
+	u8 index_record_size_bits;	/* log2(index_record_size) */
+	union {
+		LCN nr_clusters;	/* Volume size in clusters. */
+		LCN nr_lcn_bits;	/* Number of bits in lcn bitmap. */
+	} SN(vcl);
+	LCN mft_lcn;			/* Cluster location of mft data. */
+	LCN mftmirr_lcn;		/* Cluster location of copy of mft. */
+	u64 serial_no;			/* The volume serial number. */
+	/* Mount specific NTFS information. */
+	u32 upcase_len;			/* Number of entries in upcase[]. */
+	uchar_t *upcase;		/* The upcase table. */
+	LCN mft_zone_start;		/* First cluster of the mft zone. */
+	LCN mft_zone_end;		/* First cluster beyond the mft zone. */
+	struct inode *mft_ino;		/* The VFS inode of $MFT. */
+	struct rw_semaphore mftbmp_lock; /* Lock for serializing accesses to the
+					    mft record bitmap ($MFT/$BITMAP). */
+	union {
+		s64 nr_mft_records;	/* Number of records in the mft. */
+		s64 nr_mft_bits;	/* Number of bits in mft bitmap. */
+	} SN(vmm);
+	struct address_space mftbmp_mapping; /* Page cache for $MFT/$BITMAP. */
+	run_list mftbmp_rl;		/* Run list for $MFT/$BITMAP. */
+	s64 mftbmp_size;		/* Data size of $MFT/$BITMAP. */
+	s64 mftbmp_initialized_size;	/* Initialized size of $MFT/$BITMAP. */
+	s64 mftbmp_allocated_size;	/* Allocated size of $MFT/$BITMAP. */
+	struct inode *mftmirr_ino;	/* The VFS inode of $MFTMirr. */
+	struct inode *lcnbmp_ino;	/* The VFS inode of $Bitmap. */
+	struct rw_semaphore lcnbmp_lock; /* Lock for serializing accesses to the
+					    cluster bitmap ($Bitmap/$DATA). */
+	struct inode *vol_ino;		/* The VFS inode of $Volume. */
+	unsigned long vol_flags;	/* Volume flags (VOLUME_*). */
+	u8 major_ver;			/* Ntfs major version of volume. */
+	u8 minor_ver;			/* Ntfs minor version of volume. */
+	struct inode *root_ino;		/* The VFS inode of the root
+					   directory. */
+	struct inode *secure_ino;	/* The VFS inode of $Secure (NTFS3.0+
+					   only, otherwise NULL). */
+	struct nls_table *nls_map;
+} ntfs_volume;
+#define _VCL(X)  SC(vcl,X)
+#define _VMM(X)  SC(vmm,X)
+#endif /* _LINUX_NTFS_VOLUME_H */
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -165,7 +165,8 @@ EXPORT_SYMBOL(d_alloc);
 EXPORT_SYMBOL(d_lookup);
 EXPORT_SYMBOL(__d_path);
 EXPORT_SYMBOL(mark_buffer_dirty);
-EXPORT_SYMBOL(set_buffer_async_io); /* for reiserfs_writepage */
+EXPORT_SYMBOL(end_buffer_io_sync);
+EXPORT_SYMBOL(set_buffer_async_io);
 EXPORT_SYMBOL(__mark_buffer_dirty);
 EXPORT_SYMBOL(__mark_inode_dirty);
 EXPORT_SYMBOL(get_empty_filp);