Commit 8e27b910 authored by Anton Altaparmakov's avatar Anton Altaparmakov

NTFS: 2.0.23 - Major bug fixes (races, deadlocks, non-i386 architectures).

- Massive internal locking changes to mft record locking. Fixes lock
  recursion and replaces the mrec_lock read/write semaphore with a
  mutex. Also removes the now superfluous mft_count. This fixes several
  race conditions and deadlocks, especially in the future write code.
- Fix ntfs over loopback for compressed files by adding an
  optimization barrier. (gcc was screwing up otherwise ?)
- Miscellaneous cleanups all over the code and a fix or two in error
  handling code paths.
Thanks go to Christoph Hellwig for pointing out the following two:
- Remove now unused function fs/ntfs/malloc.h::vmalloc_nofs().
- Fix ntfs_free() for ia64 and parisc by checking for VMALLOC_END, too.
parent 35aa61ec
......@@ -247,6 +247,14 @@ ChangeLog
Note, a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog.
2.0.23:
- Massive internal locking changes to mft record locking. Fixes
various race conditions and deadlocks.
- Fix ntfs over loopback for compressed files by adding an
optimization barrier. (gcc was screwing up otherwise ?)
Thanks go to Christoph Hellwig for pointing these two out:
- Remove now unused function fs/ntfs/malloc.h::vmalloc_nofs().
- Fix ntfs_free() for ia64 and parisc.
2.0.22:
- Small internal cleanups.
2.0.21:
......
......@@ -2,6 +2,20 @@ ToDo:
- Find and fix bugs.
- Enable NFS exporting of NTFS.
2.0.23 - Major bug fixes (races, deadlocks, non-i386 architectures).
- Massive internal locking changes to mft record locking. Fixes lock
recursion and replaces the mrec_lock read/write semaphore with a
mutex. Also removes the now superfluous mft_count. This fixes several
race conditions and deadlocks, especially in the future write code.
- Fix ntfs over loopback for compressed files by adding an
optimization barrier. (gcc was screwing up otherwise ?)
- Miscellaneous cleanups all over the code and a fix or two in error
handling code paths.
Thanks go to Christoph Hellwig for pointing out the following two:
- Remove now unused function fs/ntfs/malloc.h::vmalloc_nofs().
- Fix ntfs_free() for ia64 and parisc by checking for VMALLOC_END, too.
2.0.22 - Cleanups, mainly to ntfs_readdir(), and use C99 initializers.
- Change fs/ntfs/dir.c::ntfs_reddir() to only read/write ->f_pos once
......
......@@ -5,7 +5,7 @@ obj-$(CONFIG_NTFS_FS) += ntfs.o
ntfs-objs := aops.o attrib.o compress.o debug.o dir.o file.o inode.o mft.o \
mst.o namei.o super.o sysctl.o time.o unistr.o upcase.o
EXTRA_CFLAGS = -DNTFS_VERSION=\"2.0.22\"
EXTRA_CFLAGS = -DNTFS_VERSION=\"2.0.23\"
ifeq ($(CONFIG_NTFS_DEBUG),y)
EXTRA_CFLAGS += -DDEBUG
......
......@@ -106,8 +106,6 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
if (!NInoMstProtected(ni)) {
if (likely(page_uptodate && !PageError(page)))
SetPageUptodate(page);
unlock_page(page);
return;
} else {
char *addr;
unsigned int i, recs, nr_err;
......@@ -332,6 +330,8 @@ static int ntfs_read_block(struct page *page)
* for it to be read in before we can do the copy.
*
* Return 0 on success and -errno on error.
*
* WARNING: Do not make this function static! It is used by mft.c!
*/
int ntfs_readpage(struct file *file, struct page *page)
{
......@@ -372,8 +372,8 @@ int ntfs_readpage(struct file *file, struct page *page)
else
base_ni = ni->_INE(base_ntfs_ino);
/* Map, pin and lock the mft record for reading. */
mrec = map_mft_record(READ, base_ni);
/* Map, pin and lock the mft record. */
mrec = map_mft_record(base_ni);
if (unlikely(IS_ERR(mrec))) {
err = PTR_ERR(mrec);
goto err_out;
......@@ -416,7 +416,7 @@ int ntfs_readpage(struct file *file, struct page *page)
put_unm_err_out:
put_attr_search_ctx(ctx);
unm_err_out:
unmap_mft_record(READ, base_ni);
unmap_mft_record(base_ni);
err_out:
unlock_page(page);
return err;
......
......@@ -948,7 +948,7 @@ int map_run_list(ntfs_inode *ni, VCN vcn)
else
base_ni = ni->_INE(base_ntfs_ino);
mrec = map_mft_record(READ, base_ni);
mrec = map_mft_record(base_ni);
if (IS_ERR(mrec))
return PTR_ERR(mrec);
ctx = get_attr_search_ctx(base_ni, mrec);
......@@ -979,7 +979,7 @@ int map_run_list(ntfs_inode *ni, VCN vcn)
put_attr_search_ctx(ctx);
err_out:
unmap_mft_record(READ, base_ni);
unmap_mft_record(base_ni);
return err;
}
......@@ -1671,7 +1671,7 @@ void reinit_attr_search_ctx(attr_search_context *ctx)
return;
} /* Attribute list. */
if (ctx->ntfs_ino != ctx->base_ntfs_ino)
unmap_mft_record(READ, ctx->ntfs_ino);
unmap_mft_record(ctx->ntfs_ino);
init_attr_search_ctx(ctx, ctx->base_ntfs_ino, ctx->base_mrec);
return;
}
......@@ -1704,7 +1704,7 @@ attr_search_context *get_attr_search_ctx(ntfs_inode *ni, MFT_RECORD *mrec)
void put_attr_search_ctx(attr_search_context *ctx)
{
if (ctx->base_ntfs_ino && ctx->ntfs_ino != ctx->base_ntfs_ino)
unmap_mft_record(READ, ctx->ntfs_ino);
unmap_mft_record(ctx->ntfs_ino);
kmem_cache_free(ntfs_attr_ctx_cache, ctx);
return;
}
......
......@@ -608,8 +608,27 @@ int ntfs_read_compressed_block(struct page *page)
if (buffer_uptodate(tbh))
continue;
wait_on_buffer(tbh);
/*
* We need an optimization barrier here, otherwise we start
* hitting the below fixup code when accessing a loopback
* mounted ntfs partition. This indicates either there is a
* race condition in the loop driver or, more likely, gcc
* overoptimises the code without the barrier and it doesn't
* do the Right Thing(TM).
*/
barrier();
if (unlikely(!buffer_uptodate(tbh))) {
ntfs_warning(vol->sb, "Buffer is unlocked but not "
"uptodate! Unplugging the disk queue "
"and rescheduling.");
get_bh(tbh);
blk_run_queues();
schedule();
put_bh(tbh);
if (unlikely(!buffer_uptodate(tbh)))
goto read_err;
ntfs_warning(vol->sb, "Buffer is now uptodate. Good.");
}
}
/*
......
This diff is collapsed.
......@@ -278,7 +278,7 @@ void ntfs_destroy_big_inode(struct inode *inode)
ntfs_inode *ni = NTFS_I(inode);
ntfs_debug("Entering.");
BUG_ON(atomic_read(&ni->mft_count) || !atomic_dec_and_test(&ni->count));
BUG_ON(ni->page || !atomic_dec_and_test(&ni->count));
kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
}
......@@ -299,7 +299,7 @@ static inline ntfs_inode *ntfs_alloc_extent_inode(void)
void ntfs_destroy_extent_inode(ntfs_inode *ni)
{
ntfs_debug("Entering.");
BUG_ON(atomic_read(&ni->mft_count) || !atomic_dec_and_test(&ni->count));
BUG_ON(ni->page || !atomic_dec_and_test(&ni->count));
kmem_cache_free(ntfs_inode_cache, ni);
}
......@@ -323,8 +323,7 @@ static void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
atomic_set(&ni->count, 1);
ni->vol = NTFS_SB(sb);
init_run_list(&ni->run_list);
init_rwsem(&ni->mrec_lock);
atomic_set(&ni->mft_count, 0);
init_MUTEX(&ni->mrec_lock);
ni->page = NULL;
ni->page_ofs = 0;
ni->attr_list_size = 0;
......@@ -504,7 +503,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
ntfs_init_big_inode(vi);
ni = NTFS_I(vi);
m = map_mft_record(READ, ni);
m = map_mft_record(ni);
if (IS_ERR(m)) {
err = PTR_ERR(m);
goto err_out;
......@@ -790,6 +789,11 @@ static int ntfs_read_locked_inode(struct inode *vi)
/* No index allocation. */
vi->i_size = ni->initialized_size =
ni->allocated_size = 0;
/* We are done with the mft record, so we release it. */
put_attr_search_ctx(ctx);
unmap_mft_record(ni);
m = NULL;
ctx = NULL;
goto skip_large_dir_stuff;
} /* LARGE_INDEX: Index allocation present. Setup state. */
NInoSetIndexAllocPresent(ni);
......@@ -834,7 +838,14 @@ static int ntfs_read_locked_inode(struct inode *vi)
ctx->attr->_ANR(initialized_size));
ni->allocated_size = sle64_to_cpu(
ctx->attr->_ANR(allocated_size));
/*
* We are done with the mft record, so we release it. Otherwise
*
*/
put_attr_search_ctx(ctx);
unmap_mft_record(ni);
m = NULL;
ctx = NULL;
/* Get the index bitmap attribute inode. */
bvi = ntfs_attr_iget(vi, AT_BITMAP, I30, 4);
if (unlikely(IS_ERR(bvi))) {
......@@ -858,7 +869,6 @@ static int ntfs_read_locked_inode(struct inode *vi)
bvi->i_size << 3, vi->i_size);
goto unm_err_out;
}
skip_large_dir_stuff:
/* Everyone gets read and scan permissions. */
vi->i_mode |= S_IRUGO | S_IXUGO;
......@@ -998,6 +1008,11 @@ static int ntfs_read_locked_inode(struct inode *vi)
le32_to_cpu(ctx->attr->_ARA(value_length));
}
no_data_attr_special_case:
/* We are done with the mft record, so we release it. */
put_attr_search_ctx(ctx);
unmap_mft_record(ni);
m = NULL;
ctx = NULL;
/* Everyone gets all permissions. */
vi->i_mode |= S_IRWXUGO;
/* If read-only, noone gets write permissions. */
......@@ -1026,9 +1041,6 @@ static int ntfs_read_locked_inode(struct inode *vi)
else
vi->i_blocks = ni->_ICF(compressed_size) >> 9;
put_attr_search_ctx(ctx);
unmap_mft_record(READ, ni);
ntfs_debug("Done.");
return 0;
......@@ -1037,7 +1049,8 @@ static int ntfs_read_locked_inode(struct inode *vi)
err = -EIO;
if (ctx)
put_attr_search_ctx(ctx);
unmap_mft_record(READ, ni);
if (m)
unmap_mft_record(ni);
err_out:
ntfs_error(vi->i_sb, "Failed with error code %i. Marking inode 0x%lx "
"as bad.", -err, vi->i_ino);
......@@ -1091,7 +1104,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
/* Set inode type to zero but preserve permissions. */
vi->i_mode = base_vi->i_mode & ~S_IFMT;
m = map_mft_record(READ, base_ni);
m = map_mft_record(base_ni);
if (IS_ERR(m)) {
err = PTR_ERR(m);
goto err_out;
......@@ -1265,7 +1278,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
ni->nr_extents = -1;
put_attr_search_ctx(ctx);
unmap_mft_record(READ, base_ni);
unmap_mft_record(base_ni);
ntfs_debug("Done.");
return 0;
......@@ -1275,7 +1288,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
err = -EIO;
if (ctx)
put_attr_search_ctx(ctx);
unmap_mft_record(READ, base_ni);
unmap_mft_record(base_ni);
err_out:
ntfs_error(vi->i_sb, "Failed with error code %i while reading "
"attribute inode (mft_no 0x%lx, type 0x%x, name_len "
......@@ -1398,7 +1411,7 @@ void ntfs_read_inode_mount(struct inode *vi)
/* Need this to sanity check attribute list references to $MFT. */
ni->seq_no = le16_to_cpu(m->sequence_number);
/* Provides readpage() and sync_page() for map_mft_record(READ). */
/* Provides readpage() and sync_page() for map_mft_record(). */
vi->i_mapping->a_ops = &ntfs_mft_aops;
ctx = get_attr_search_ctx(ni, m);
......@@ -1795,8 +1808,8 @@ void __ntfs_clear_inode(ntfs_inode *ni)
}
}
/* Synchronize with ntfs_commit_inode(). */
down_write(&ni->mrec_lock);
up_write(&ni->mrec_lock);
down(&ni->mrec_lock);
up(&ni->mrec_lock);
if (NInoDirty(ni)) {
ntfs_error(ni->vol->sb, "Failed to commit dirty inode "
"asynchronously.");
......
......@@ -72,9 +72,8 @@ struct _ntfs_inode {
* The following fields are only valid for real inodes and extent
* inodes.
*/
struct rw_semaphore mrec_lock; /* Lock for serializing access to the
struct semaphore mrec_lock; /* Lock for serializing access to the
mft record belonging to this inode. */
atomic_t mft_count; /* Mapping reference count for book keeping. */
struct page *page; /* The page containing the mft record of the
inode. This should only be touched by the
(un)map_mft_record*() functions. */
......
......@@ -25,20 +25,6 @@
#include <linux/vmalloc.h>
#include <linux/slab.h>
/**
* vmalloc_nofs - allocate any pages but don't allow calls into fs layer
* @size: number of bytes to allocate
*
* Allocate any pages but don't allow calls into fs layer. Return allocated
* memory or NULL if insufficient memory.
*/
static inline void *vmalloc_nofs(unsigned long size)
{
if (likely(size >> PAGE_SHIFT < num_physpages))
return __vmalloc(size, GFP_NOFS | __GFP_HIGHMEM, PAGE_KERNEL);
return NULL;
}
/**
* ntfs_malloc_nofs - allocate memory in multiples of pages
* @size number of bytes to allocate
......@@ -66,7 +52,8 @@ static inline void *ntfs_malloc_nofs(unsigned long size)
static inline void ntfs_free(void *addr)
{
if (likely((unsigned long)addr < VMALLOC_START)) {
if (likely(((unsigned long)addr < VMALLOC_START) ||
((unsigned long)addr >= VMALLOC_END ))) {
return kfree(addr);
/* return free_page((unsigned long)addr); */
}
......
This diff is collapsed.
......@@ -31,15 +31,15 @@ extern int format_mft_record(ntfs_inode *ni, MFT_RECORD *m);
//extern int format_mft_record2(struct super_block *vfs_sb,
// const unsigned long inum, MFT_RECORD *m);
extern MFT_RECORD *map_mft_record(const int rw, ntfs_inode *ni);
extern void unmap_mft_record(const int rw, ntfs_inode *ni);
extern MFT_RECORD *map_mft_record(ntfs_inode *ni);
extern void unmap_mft_record(ntfs_inode *ni);
extern MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref,
ntfs_inode **ntfs_ino);
static inline void unmap_extent_mft_record(ntfs_inode *ni)
{
unmap_mft_record(READ, ni);
unmap_mft_record(ni);
return;
}
......
......@@ -162,6 +162,7 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent)
handle_name:
{
struct dentry *real_dent;
MFT_RECORD *m;
attr_search_context *ctx;
ntfs_inode *ni = NTFS_I(dent_inode);
int err;
......@@ -175,22 +176,23 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent)
name->len * 3 + 1);
kfree(name);
} else /* if (name->type == FILE_NAME_DOS) */ { /* Case 3. */
MFT_RECORD *m;
FILE_NAME_ATTR *fn;
kfree(name);
/* Find the WIN32 name corresponding to the matched DOS name. */
ni = NTFS_I(dent_inode);
m = map_mft_record(READ, ni);
m = map_mft_record(ni);
if (IS_ERR(m)) {
err = PTR_ERR(m);
goto name_err_out;
m = NULL;
ctx = NULL;
goto err_out;
}
ctx = get_attr_search_ctx(ni, m);
if (!ctx) {
err = -ENOMEM;
goto unm_err_out;
goto err_out;
}
do {
ATTR_RECORD *a;
......@@ -202,21 +204,21 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent)
"namespace counterpart to DOS "
"file name. Run chkdsk.");
err = -EIO;
goto put_unm_err_out;
goto err_out;
}
/* Consistency checks. */
a = ctx->attr;
if (a->non_resident || a->flags)
goto eio_put_unm_err_out;
goto eio_err_out;
val_len = le32_to_cpu(a->_ARA(value_length));
if (le16_to_cpu(a->_ARA(value_offset)) + val_len >
le32_to_cpu(a->length))
goto eio_put_unm_err_out;
goto eio_err_out;
fn = (FILE_NAME_ATTR*)((u8*)ctx->attr + le16_to_cpu(
ctx->attr->_ARA(value_offset)));
if ((u32)(fn->file_name_length * sizeof(uchar_t) +
sizeof(FILE_NAME_ATTR)) > val_len)
goto eio_put_unm_err_out;
goto eio_err_out;
} while (fn->file_name_type != FILE_NAME_WIN32);
/* Convert the found WIN32 name to current NLS code page. */
......@@ -226,13 +228,15 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent)
fn->file_name_length * 3 + 1);
put_attr_search_ctx(ctx);
unmap_mft_record(READ, ni);
unmap_mft_record(ni);
}
m = NULL;
ctx = NULL;
/* Check if a conversion error occured. */
if ((signed)nls_name.len < 0) {
err = (signed)nls_name.len;
goto name_err_out;
goto err_out;
}
nls_name.hash = full_name_hash(nls_name.name, nls_name.len);
......@@ -248,7 +252,7 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent)
kfree(nls_name.name);
if (!real_dent) {
err = -ENOMEM;
goto name_err_out;
goto err_out;
}
d_add(real_dent, dent_inode);
return real_dent;
......@@ -269,14 +273,14 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent)
d_instantiate(real_dent, dent_inode);
return real_dent;
eio_put_unm_err_out:
eio_err_out:
ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk.");
err = -EIO;
put_unm_err_out:
err_out:
if (ctx)
put_attr_search_ctx(ctx);
unm_err_out:
unmap_mft_record(READ, ni);
name_err_out:
if (m)
unmap_mft_record(ni);
iput(dent_inode);
return ERR_PTR(err);
}
......
......@@ -852,7 +852,7 @@ static BOOL load_system_files(ntfs_volume *vol)
ntfs_error(sb, "Failed to load $Volume.");
goto iput_lcnbmp_err_out;
}
m = map_mft_record(READ, NTFS_I(vol->vol_ino));
m = map_mft_record(NTFS_I(vol->vol_ino));
if (IS_ERR(m)) {
iput_volume_failed:
iput(vol->vol_ino);
......@@ -867,7 +867,7 @@ static BOOL load_system_files(ntfs_volume *vol)
err_put_vol:
put_attr_search_ctx(ctx);
get_ctx_vol_failed:
unmap_mft_record(READ, NTFS_I(vol->vol_ino));
unmap_mft_record(NTFS_I(vol->vol_ino));
goto iput_volume_failed;
}
vi = (VOLUME_INFORMATION*)((char*)ctx->attr +
......@@ -882,7 +882,7 @@ static BOOL load_system_files(ntfs_volume *vol)
vol->major_ver = vi->major_ver;
vol->minor_ver = vi->minor_ver;
put_attr_search_ctx(ctx);
unmap_mft_record(READ, NTFS_I(vol->vol_ino));
unmap_mft_record(NTFS_I(vol->vol_ino));
printk(KERN_INFO "NTFS volume version %i.%i.\n", vol->major_ver,
vol->minor_ver);
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment