Commit 7d9071a0 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs updates from Al Viro:
 "In this one:

   - d_move fixes (Eric Biederman)

   - UFS fixes (me; locking is mostly sane now, a bunch of bugs in error
     handling ought to be fixed)

   - switch of sb_writers to percpu rwsem (Oleg Nesterov)

   - superblock scalability (Josef Bacik and Dave Chinner)

   - swapon(2) race fix (Hugh Dickins)"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (65 commits)
  vfs: Test for and handle paths that are unreachable from their mnt_root
  dcache: Reduce the scope of i_lock in d_splice_alias
  dcache: Handle escaped paths in prepend_path
  mm: fix potential data race in SyS_swapon
  inode: don't softlockup when evicting inodes
  inode: rename i_wb_list to i_io_list
  sync: serialise per-superblock sync operations
  inode: convert inode_sb_list_lock to per-sb
  inode: add hlist_fake to avoid the inode hash lock in evict
  writeback: plug writeback at a high level
  change sb_writers to use percpu_rw_semaphore
  shift percpu_counter_destroy() into destroy_super_work()
  percpu-rwsem: kill CONFIG_PERCPU_RWSEM
  percpu-rwsem: introduce percpu_rwsem_release() and percpu_rwsem_acquire()
  percpu-rwsem: introduce percpu_down_read_trylock()
  document rwsem_release() in sb_wait_write()
  fix the broken lockdep logic in __sb_start_write()
  introduce __sb_writers_{acquired,release}() helpers
  ufs_inode_get{frag,block}(): get rid of 'phys' argument
  ufs_getfrag_block(): tidy up a bit
  ...
parents bd779669 397d425d
...@@ -93,7 +93,6 @@ config KPROBES_ON_FTRACE ...@@ -93,7 +93,6 @@ config KPROBES_ON_FTRACE
config UPROBES config UPROBES
def_bool n def_bool n
select PERCPU_RWSEM
help help
Uprobes is the user-space counterpart to kprobes: they Uprobes is the user-space counterpart to kprobes: they
enable instrumentation applications (such as 'perf probe') enable instrumentation applications (such as 'perf probe')
......
...@@ -1769,7 +1769,7 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) ...@@ -1769,7 +1769,7 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
{ {
struct inode *inode, *old_inode = NULL; struct inode *inode, *old_inode = NULL;
spin_lock(&inode_sb_list_lock); spin_lock(&blockdev_superblock->s_inode_list_lock);
list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) { list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
...@@ -1781,13 +1781,13 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) ...@@ -1781,13 +1781,13 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
} }
__iget(inode); __iget(inode);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
spin_unlock(&inode_sb_list_lock); spin_unlock(&blockdev_superblock->s_inode_list_lock);
/* /*
* We hold a reference to 'inode' so it couldn't have been * We hold a reference to 'inode' so it couldn't have been
* removed from s_inodes list while we dropped the * removed from s_inodes list while we dropped the
* inode_sb_list_lock. We cannot iput the inode now as we can * s_inode_list_lock We cannot iput the inode now as we can
* be holding the last reference and we cannot iput it under * be holding the last reference and we cannot iput it under
* inode_sb_list_lock. So we keep the reference and iput it * s_inode_list_lock. So we keep the reference and iput it
* later. * later.
*/ */
iput(old_inode); iput(old_inode);
...@@ -1795,8 +1795,8 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) ...@@ -1795,8 +1795,8 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
func(I_BDEV(inode), arg); func(I_BDEV(inode), arg);
spin_lock(&inode_sb_list_lock); spin_lock(&blockdev_superblock->s_inode_list_lock);
} }
spin_unlock(&inode_sb_list_lock); spin_unlock(&blockdev_superblock->s_inode_list_lock);
iput(old_inode); iput(old_inode);
} }
...@@ -1640,9 +1640,7 @@ static void do_async_commit(struct work_struct *work) ...@@ -1640,9 +1640,7 @@ static void do_async_commit(struct work_struct *work)
* Tell lockdep about it. * Tell lockdep about it.
*/ */
if (ac->newtrans->type & __TRANS_FREEZABLE) if (ac->newtrans->type & __TRANS_FREEZABLE)
rwsem_acquire_read( __sb_writers_acquired(ac->root->fs_info->sb, SB_FREEZE_FS);
&ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
0, 1, _THIS_IP_);
current->journal_info = ac->newtrans; current->journal_info = ac->newtrans;
...@@ -1681,9 +1679,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, ...@@ -1681,9 +1679,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
* async commit thread will be the one to unlock it. * async commit thread will be the one to unlock it.
*/ */
if (ac->newtrans->type & __TRANS_FREEZABLE) if (ac->newtrans->type & __TRANS_FREEZABLE)
rwsem_release( __sb_writers_release(root->fs_info->sb, SB_FREEZE_FS);
&root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
1, _THIS_IP_);
schedule_work(&ac->work); schedule_work(&ac->work);
......
...@@ -2718,7 +2718,7 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) ...@@ -2718,7 +2718,7 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
* This helper attempts to cope with remotely renamed directories * This helper attempts to cope with remotely renamed directories
* *
* It assumes that the caller is already holding * It assumes that the caller is already holding
* dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock * dentry->d_parent->d_inode->i_mutex, and rename_lock
* *
* Note: If ever the locking in lock_rename() changes, then please * Note: If ever the locking in lock_rename() changes, then please
* remember to update this too... * remember to update this too...
...@@ -2744,7 +2744,6 @@ static int __d_unalias(struct inode *inode, ...@@ -2744,7 +2744,6 @@ static int __d_unalias(struct inode *inode,
__d_move(alias, dentry, false); __d_move(alias, dentry, false);
ret = 0; ret = 0;
out_err: out_err:
spin_unlock(&inode->i_lock);
if (m2) if (m2)
mutex_unlock(m2); mutex_unlock(m2);
if (m1) if (m1)
...@@ -2790,10 +2789,11 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) ...@@ -2790,10 +2789,11 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
if (S_ISDIR(inode->i_mode)) { if (S_ISDIR(inode->i_mode)) {
struct dentry *new = __d_find_any_alias(inode); struct dentry *new = __d_find_any_alias(inode);
if (unlikely(new)) { if (unlikely(new)) {
/* The reference to new ensures it remains an alias */
spin_unlock(&inode->i_lock);
write_seqlock(&rename_lock); write_seqlock(&rename_lock);
if (unlikely(d_ancestor(new, dentry))) { if (unlikely(d_ancestor(new, dentry))) {
write_sequnlock(&rename_lock); write_sequnlock(&rename_lock);
spin_unlock(&inode->i_lock);
dput(new); dput(new);
new = ERR_PTR(-ELOOP); new = ERR_PTR(-ELOOP);
pr_warn_ratelimited( pr_warn_ratelimited(
...@@ -2812,7 +2812,6 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) ...@@ -2812,7 +2812,6 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
} else { } else {
__d_move(new, dentry, false); __d_move(new, dentry, false);
write_sequnlock(&rename_lock); write_sequnlock(&rename_lock);
spin_unlock(&inode->i_lock);
security_d_instantiate(new, inode); security_d_instantiate(new, inode);
} }
iput(inode); iput(inode);
...@@ -2926,6 +2925,13 @@ static int prepend_path(const struct path *path, ...@@ -2926,6 +2925,13 @@ static int prepend_path(const struct path *path,
if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
struct mount *parent = ACCESS_ONCE(mnt->mnt_parent); struct mount *parent = ACCESS_ONCE(mnt->mnt_parent);
/* Escaped? */
if (dentry != vfsmnt->mnt_root) {
bptr = *buffer;
blen = *buflen;
error = 3;
break;
}
/* Global root? */ /* Global root? */
if (mnt != parent) { if (mnt != parent) {
dentry = ACCESS_ONCE(mnt->mnt_mountpoint); dentry = ACCESS_ONCE(mnt->mnt_mountpoint);
......
...@@ -17,7 +17,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) ...@@ -17,7 +17,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
{ {
struct inode *inode, *toput_inode = NULL; struct inode *inode, *toput_inode = NULL;
spin_lock(&inode_sb_list_lock); spin_lock(&sb->s_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
...@@ -27,13 +27,15 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) ...@@ -27,13 +27,15 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
} }
__iget(inode); __iget(inode);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
spin_unlock(&inode_sb_list_lock); spin_unlock(&sb->s_inode_list_lock);
invalidate_mapping_pages(inode->i_mapping, 0, -1); invalidate_mapping_pages(inode->i_mapping, 0, -1);
iput(toput_inode); iput(toput_inode);
toput_inode = inode; toput_inode = inode;
spin_lock(&inode_sb_list_lock);
spin_lock(&sb->s_inode_list_lock);
} }
spin_unlock(&inode_sb_list_lock); spin_unlock(&sb->s_inode_list_lock);
iput(toput_inode); iput(toput_inode);
} }
......
...@@ -88,7 +88,7 @@ unsigned int dirtytime_expire_interval = 12 * 60 * 60; ...@@ -88,7 +88,7 @@ unsigned int dirtytime_expire_interval = 12 * 60 * 60;
static inline struct inode *wb_inode(struct list_head *head) static inline struct inode *wb_inode(struct list_head *head)
{ {
return list_entry(head, struct inode, i_wb_list); return list_entry(head, struct inode, i_io_list);
} }
/* /*
...@@ -125,22 +125,22 @@ static void wb_io_lists_depopulated(struct bdi_writeback *wb) ...@@ -125,22 +125,22 @@ static void wb_io_lists_depopulated(struct bdi_writeback *wb)
} }
/** /**
* inode_wb_list_move_locked - move an inode onto a bdi_writeback IO list * inode_io_list_move_locked - move an inode onto a bdi_writeback IO list
* @inode: inode to be moved * @inode: inode to be moved
* @wb: target bdi_writeback * @wb: target bdi_writeback
* @head: one of @wb->b_{dirty|io|more_io} * @head: one of @wb->b_{dirty|io|more_io}
* *
* Move @inode->i_wb_list to @list of @wb and set %WB_has_dirty_io. * Move @inode->i_io_list to @list of @wb and set %WB_has_dirty_io.
* Returns %true if @inode is the first occupant of the !dirty_time IO * Returns %true if @inode is the first occupant of the !dirty_time IO
* lists; otherwise, %false. * lists; otherwise, %false.
*/ */
static bool inode_wb_list_move_locked(struct inode *inode, static bool inode_io_list_move_locked(struct inode *inode,
struct bdi_writeback *wb, struct bdi_writeback *wb,
struct list_head *head) struct list_head *head)
{ {
assert_spin_locked(&wb->list_lock); assert_spin_locked(&wb->list_lock);
list_move(&inode->i_wb_list, head); list_move(&inode->i_io_list, head);
/* dirty_time doesn't count as dirty_io until expiration */ /* dirty_time doesn't count as dirty_io until expiration */
if (head != &wb->b_dirty_time) if (head != &wb->b_dirty_time)
...@@ -151,19 +151,19 @@ static bool inode_wb_list_move_locked(struct inode *inode, ...@@ -151,19 +151,19 @@ static bool inode_wb_list_move_locked(struct inode *inode,
} }
/** /**
* inode_wb_list_del_locked - remove an inode from its bdi_writeback IO list * inode_io_list_del_locked - remove an inode from its bdi_writeback IO list
* @inode: inode to be removed * @inode: inode to be removed
* @wb: bdi_writeback @inode is being removed from * @wb: bdi_writeback @inode is being removed from
* *
* Remove @inode which may be on one of @wb->b_{dirty|io|more_io} lists and * Remove @inode which may be on one of @wb->b_{dirty|io|more_io} lists and
* clear %WB_has_dirty_io if all are empty afterwards. * clear %WB_has_dirty_io if all are empty afterwards.
*/ */
static void inode_wb_list_del_locked(struct inode *inode, static void inode_io_list_del_locked(struct inode *inode,
struct bdi_writeback *wb) struct bdi_writeback *wb)
{ {
assert_spin_locked(&wb->list_lock); assert_spin_locked(&wb->list_lock);
list_del_init(&inode->i_wb_list); list_del_init(&inode->i_io_list);
wb_io_lists_depopulated(wb); wb_io_lists_depopulated(wb);
} }
...@@ -351,7 +351,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) ...@@ -351,7 +351,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
/* /*
* Once I_FREEING is visible under i_lock, the eviction path owns * Once I_FREEING is visible under i_lock, the eviction path owns
* the inode and we shouldn't modify ->i_wb_list. * the inode and we shouldn't modify ->i_io_list.
*/ */
if (unlikely(inode->i_state & I_FREEING)) if (unlikely(inode->i_state & I_FREEING))
goto skip_switch; goto skip_switch;
...@@ -390,16 +390,16 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) ...@@ -390,16 +390,16 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
* is always correct including from ->b_dirty_time. The transfer * is always correct including from ->b_dirty_time. The transfer
* preserves @inode->dirtied_when ordering. * preserves @inode->dirtied_when ordering.
*/ */
if (!list_empty(&inode->i_wb_list)) { if (!list_empty(&inode->i_io_list)) {
struct inode *pos; struct inode *pos;
inode_wb_list_del_locked(inode, old_wb); inode_io_list_del_locked(inode, old_wb);
inode->i_wb = new_wb; inode->i_wb = new_wb;
list_for_each_entry(pos, &new_wb->b_dirty, i_wb_list) list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
if (time_after_eq(inode->dirtied_when, if (time_after_eq(inode->dirtied_when,
pos->dirtied_when)) pos->dirtied_when))
break; break;
inode_wb_list_move_locked(inode, new_wb, pos->i_wb_list.prev); inode_io_list_move_locked(inode, new_wb, pos->i_io_list.prev);
} else { } else {
inode->i_wb = new_wb; inode->i_wb = new_wb;
} }
...@@ -961,12 +961,12 @@ void wb_start_background_writeback(struct bdi_writeback *wb) ...@@ -961,12 +961,12 @@ void wb_start_background_writeback(struct bdi_writeback *wb)
/* /*
* Remove the inode from the writeback list it is on. * Remove the inode from the writeback list it is on.
*/ */
void inode_wb_list_del(struct inode *inode) void inode_io_list_del(struct inode *inode)
{ {
struct bdi_writeback *wb; struct bdi_writeback *wb;
wb = inode_to_wb_and_lock_list(inode); wb = inode_to_wb_and_lock_list(inode);
inode_wb_list_del_locked(inode, wb); inode_io_list_del_locked(inode, wb);
spin_unlock(&wb->list_lock); spin_unlock(&wb->list_lock);
} }
...@@ -988,7 +988,7 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb) ...@@ -988,7 +988,7 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
if (time_before(inode->dirtied_when, tail->dirtied_when)) if (time_before(inode->dirtied_when, tail->dirtied_when))
inode->dirtied_when = jiffies; inode->dirtied_when = jiffies;
} }
inode_wb_list_move_locked(inode, wb, &wb->b_dirty); inode_io_list_move_locked(inode, wb, &wb->b_dirty);
} }
/* /*
...@@ -996,7 +996,7 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb) ...@@ -996,7 +996,7 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
*/ */
static void requeue_io(struct inode *inode, struct bdi_writeback *wb) static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
{ {
inode_wb_list_move_locked(inode, wb, &wb->b_more_io); inode_io_list_move_locked(inode, wb, &wb->b_more_io);
} }
static void inode_sync_complete(struct inode *inode) static void inode_sync_complete(struct inode *inode)
...@@ -1055,7 +1055,7 @@ static int move_expired_inodes(struct list_head *delaying_queue, ...@@ -1055,7 +1055,7 @@ static int move_expired_inodes(struct list_head *delaying_queue,
if (older_than_this && if (older_than_this &&
inode_dirtied_after(inode, *older_than_this)) inode_dirtied_after(inode, *older_than_this))
break; break;
list_move(&inode->i_wb_list, &tmp); list_move(&inode->i_io_list, &tmp);
moved++; moved++;
if (flags & EXPIRE_DIRTY_ATIME) if (flags & EXPIRE_DIRTY_ATIME)
set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state); set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state);
...@@ -1078,7 +1078,7 @@ static int move_expired_inodes(struct list_head *delaying_queue, ...@@ -1078,7 +1078,7 @@ static int move_expired_inodes(struct list_head *delaying_queue,
list_for_each_prev_safe(pos, node, &tmp) { list_for_each_prev_safe(pos, node, &tmp) {
inode = wb_inode(pos); inode = wb_inode(pos);
if (inode->i_sb == sb) if (inode->i_sb == sb)
list_move(&inode->i_wb_list, dispatch_queue); list_move(&inode->i_io_list, dispatch_queue);
} }
} }
out: out:
...@@ -1232,10 +1232,10 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, ...@@ -1232,10 +1232,10 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
redirty_tail(inode, wb); redirty_tail(inode, wb);
} else if (inode->i_state & I_DIRTY_TIME) { } else if (inode->i_state & I_DIRTY_TIME) {
inode->dirtied_when = jiffies; inode->dirtied_when = jiffies;
inode_wb_list_move_locked(inode, wb, &wb->b_dirty_time); inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
} else { } else {
/* The inode is clean. Remove from writeback lists. */ /* The inode is clean. Remove from writeback lists. */
inode_wb_list_del_locked(inode, wb); inode_io_list_del_locked(inode, wb);
} }
} }
...@@ -1378,7 +1378,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, ...@@ -1378,7 +1378,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
* touch it. See comment above for explanation. * touch it. See comment above for explanation.
*/ */
if (!(inode->i_state & I_DIRTY_ALL)) if (!(inode->i_state & I_DIRTY_ALL))
inode_wb_list_del_locked(inode, wb); inode_io_list_del_locked(inode, wb);
spin_unlock(&wb->list_lock); spin_unlock(&wb->list_lock);
inode_sync_complete(inode); inode_sync_complete(inode);
out: out:
...@@ -1439,7 +1439,9 @@ static long writeback_sb_inodes(struct super_block *sb, ...@@ -1439,7 +1439,9 @@ static long writeback_sb_inodes(struct super_block *sb,
unsigned long start_time = jiffies; unsigned long start_time = jiffies;
long write_chunk; long write_chunk;
long wrote = 0; /* count both pages and inodes */ long wrote = 0; /* count both pages and inodes */
struct blk_plug plug;
blk_start_plug(&plug);
while (!list_empty(&wb->b_io)) { while (!list_empty(&wb->b_io)) {
struct inode *inode = wb_inode(wb->b_io.prev); struct inode *inode = wb_inode(wb->b_io.prev);
...@@ -1537,6 +1539,7 @@ static long writeback_sb_inodes(struct super_block *sb, ...@@ -1537,6 +1539,7 @@ static long writeback_sb_inodes(struct super_block *sb,
break; break;
} }
} }
blk_finish_plug(&plug);
return wrote; return wrote;
} }
...@@ -2088,7 +2091,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) ...@@ -2088,7 +2091,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
else else
dirty_list = &wb->b_dirty_time; dirty_list = &wb->b_dirty_time;
wakeup_bdi = inode_wb_list_move_locked(inode, wb, wakeup_bdi = inode_io_list_move_locked(inode, wb,
dirty_list); dirty_list);
spin_unlock(&wb->list_lock); spin_unlock(&wb->list_lock);
...@@ -2111,6 +2114,15 @@ void __mark_inode_dirty(struct inode *inode, int flags) ...@@ -2111,6 +2114,15 @@ void __mark_inode_dirty(struct inode *inode, int flags)
} }
EXPORT_SYMBOL(__mark_inode_dirty); EXPORT_SYMBOL(__mark_inode_dirty);
/*
* The @s_sync_lock is used to serialise concurrent sync operations
* to avoid lock contention problems with concurrent wait_sb_inodes() calls.
* Concurrent callers will block on the s_sync_lock rather than doing contending
* walks. The queueing maintains sync(2) required behaviour as all the IO that
* has been issued up to the time this function is enter is guaranteed to be
* completed by the time we have gained the lock and waited for all IO that is
* in progress regardless of the order callers are granted the lock.
*/
static void wait_sb_inodes(struct super_block *sb) static void wait_sb_inodes(struct super_block *sb)
{ {
struct inode *inode, *old_inode = NULL; struct inode *inode, *old_inode = NULL;
...@@ -2121,7 +2133,8 @@ static void wait_sb_inodes(struct super_block *sb) ...@@ -2121,7 +2133,8 @@ static void wait_sb_inodes(struct super_block *sb)
*/ */
WARN_ON(!rwsem_is_locked(&sb->s_umount)); WARN_ON(!rwsem_is_locked(&sb->s_umount));
spin_lock(&inode_sb_list_lock); mutex_lock(&sb->s_sync_lock);
spin_lock(&sb->s_inode_list_lock);
/* /*
* Data integrity sync. Must wait for all pages under writeback, * Data integrity sync. Must wait for all pages under writeback,
...@@ -2141,14 +2154,14 @@ static void wait_sb_inodes(struct super_block *sb) ...@@ -2141,14 +2154,14 @@ static void wait_sb_inodes(struct super_block *sb)
} }
__iget(inode); __iget(inode);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
spin_unlock(&inode_sb_list_lock); spin_unlock(&sb->s_inode_list_lock);
/* /*
* We hold a reference to 'inode' so it couldn't have been * We hold a reference to 'inode' so it couldn't have been
* removed from s_inodes list while we dropped the * removed from s_inodes list while we dropped the
* inode_sb_list_lock. We cannot iput the inode now as we can * s_inode_list_lock. We cannot iput the inode now as we can
* be holding the last reference and we cannot iput it under * be holding the last reference and we cannot iput it under
* inode_sb_list_lock. So we keep the reference and iput it * s_inode_list_lock. So we keep the reference and iput it
* later. * later.
*/ */
iput(old_inode); iput(old_inode);
...@@ -2158,10 +2171,11 @@ static void wait_sb_inodes(struct super_block *sb) ...@@ -2158,10 +2171,11 @@ static void wait_sb_inodes(struct super_block *sb)
cond_resched(); cond_resched();
spin_lock(&inode_sb_list_lock); spin_lock(&sb->s_inode_list_lock);
} }
spin_unlock(&inode_sb_list_lock); spin_unlock(&sb->s_inode_list_lock);
iput(old_inode); iput(old_inode);
mutex_unlock(&sb->s_sync_lock);
} }
static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr, static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr,
......
...@@ -28,16 +28,16 @@ ...@@ -28,16 +28,16 @@
* inode->i_state, inode->i_hash, __iget() * inode->i_state, inode->i_hash, __iget()
* Inode LRU list locks protect: * Inode LRU list locks protect:
* inode->i_sb->s_inode_lru, inode->i_lru * inode->i_sb->s_inode_lru, inode->i_lru
* inode_sb_list_lock protects: * inode->i_sb->s_inode_list_lock protects:
* sb->s_inodes, inode->i_sb_list * inode->i_sb->s_inodes, inode->i_sb_list
* bdi->wb.list_lock protects: * bdi->wb.list_lock protects:
* bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_wb_list * bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_io_list
* inode_hash_lock protects: * inode_hash_lock protects:
* inode_hashtable, inode->i_hash * inode_hashtable, inode->i_hash
* *
* Lock ordering: * Lock ordering:
* *
* inode_sb_list_lock * inode->i_sb->s_inode_list_lock
* inode->i_lock * inode->i_lock
* Inode LRU list locks * Inode LRU list locks
* *
...@@ -45,7 +45,7 @@ ...@@ -45,7 +45,7 @@
* inode->i_lock * inode->i_lock
* *
* inode_hash_lock * inode_hash_lock
* inode_sb_list_lock * inode->i_sb->s_inode_list_lock
* inode->i_lock * inode->i_lock
* *
* iunique_lock * iunique_lock
...@@ -57,8 +57,6 @@ static unsigned int i_hash_shift __read_mostly; ...@@ -57,8 +57,6 @@ static unsigned int i_hash_shift __read_mostly;
static struct hlist_head *inode_hashtable __read_mostly; static struct hlist_head *inode_hashtable __read_mostly;
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
/* /*
* Empty aops. Can be used for the cases where the user does not * Empty aops. Can be used for the cases where the user does not
* define any of the address_space operations. * define any of the address_space operations.
...@@ -359,7 +357,7 @@ void inode_init_once(struct inode *inode) ...@@ -359,7 +357,7 @@ void inode_init_once(struct inode *inode)
memset(inode, 0, sizeof(*inode)); memset(inode, 0, sizeof(*inode));
INIT_HLIST_NODE(&inode->i_hash); INIT_HLIST_NODE(&inode->i_hash);
INIT_LIST_HEAD(&inode->i_devices); INIT_LIST_HEAD(&inode->i_devices);
INIT_LIST_HEAD(&inode->i_wb_list); INIT_LIST_HEAD(&inode->i_io_list);
INIT_LIST_HEAD(&inode->i_lru); INIT_LIST_HEAD(&inode->i_lru);
address_space_init_once(&inode->i_data); address_space_init_once(&inode->i_data);
i_size_ordered_init(inode); i_size_ordered_init(inode);
...@@ -426,18 +424,18 @@ static void inode_lru_list_del(struct inode *inode) ...@@ -426,18 +424,18 @@ static void inode_lru_list_del(struct inode *inode)
*/ */
void inode_sb_list_add(struct inode *inode) void inode_sb_list_add(struct inode *inode)
{ {
spin_lock(&inode_sb_list_lock); spin_lock(&inode->i_sb->s_inode_list_lock);
list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
spin_unlock(&inode_sb_list_lock); spin_unlock(&inode->i_sb->s_inode_list_lock);
} }
EXPORT_SYMBOL_GPL(inode_sb_list_add); EXPORT_SYMBOL_GPL(inode_sb_list_add);
static inline void inode_sb_list_del(struct inode *inode) static inline void inode_sb_list_del(struct inode *inode)
{ {
if (!list_empty(&inode->i_sb_list)) { if (!list_empty(&inode->i_sb_list)) {
spin_lock(&inode_sb_list_lock); spin_lock(&inode->i_sb->s_inode_list_lock);
list_del_init(&inode->i_sb_list); list_del_init(&inode->i_sb_list);
spin_unlock(&inode_sb_list_lock); spin_unlock(&inode->i_sb->s_inode_list_lock);
} }
} }
...@@ -527,8 +525,8 @@ static void evict(struct inode *inode) ...@@ -527,8 +525,8 @@ static void evict(struct inode *inode)
BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(!(inode->i_state & I_FREEING));
BUG_ON(!list_empty(&inode->i_lru)); BUG_ON(!list_empty(&inode->i_lru));
if (!list_empty(&inode->i_wb_list)) if (!list_empty(&inode->i_io_list))
inode_wb_list_del(inode); inode_io_list_del(inode);
inode_sb_list_del(inode); inode_sb_list_del(inode);
...@@ -577,6 +575,7 @@ static void dispose_list(struct list_head *head) ...@@ -577,6 +575,7 @@ static void dispose_list(struct list_head *head)
list_del_init(&inode->i_lru); list_del_init(&inode->i_lru);
evict(inode); evict(inode);
cond_resched();
} }
} }
...@@ -594,7 +593,8 @@ void evict_inodes(struct super_block *sb) ...@@ -594,7 +593,8 @@ void evict_inodes(struct super_block *sb)
struct inode *inode, *next; struct inode *inode, *next;
LIST_HEAD(dispose); LIST_HEAD(dispose);
spin_lock(&inode_sb_list_lock); again:
spin_lock(&sb->s_inode_list_lock);
list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
if (atomic_read(&inode->i_count)) if (atomic_read(&inode->i_count))
continue; continue;
...@@ -609,8 +609,20 @@ void evict_inodes(struct super_block *sb) ...@@ -609,8 +609,20 @@ void evict_inodes(struct super_block *sb)
inode_lru_list_del(inode); inode_lru_list_del(inode);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
list_add(&inode->i_lru, &dispose); list_add(&inode->i_lru, &dispose);
/*
* We can have a ton of inodes to evict at unmount time given
* enough memory, check to see if we need to go to sleep for a
* bit so we don't livelock.
*/
if (need_resched()) {
spin_unlock(&sb->s_inode_list_lock);
cond_resched();
dispose_list(&dispose);
goto again;
}
} }
spin_unlock(&inode_sb_list_lock); spin_unlock(&sb->s_inode_list_lock);
dispose_list(&dispose); dispose_list(&dispose);
} }
...@@ -631,7 +643,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) ...@@ -631,7 +643,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
struct inode *inode, *next; struct inode *inode, *next;
LIST_HEAD(dispose); LIST_HEAD(dispose);
spin_lock(&inode_sb_list_lock); spin_lock(&sb->s_inode_list_lock);
list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
...@@ -654,7 +666,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) ...@@ -654,7 +666,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
list_add(&inode->i_lru, &dispose); list_add(&inode->i_lru, &dispose);
} }
spin_unlock(&inode_sb_list_lock); spin_unlock(&sb->s_inode_list_lock);
dispose_list(&dispose); dispose_list(&dispose);
...@@ -890,7 +902,7 @@ struct inode *new_inode(struct super_block *sb) ...@@ -890,7 +902,7 @@ struct inode *new_inode(struct super_block *sb)
{ {
struct inode *inode; struct inode *inode;
spin_lock_prefetch(&inode_sb_list_lock); spin_lock_prefetch(&sb->s_inode_list_lock);
inode = new_inode_pseudo(sb); inode = new_inode_pseudo(sb);
if (inode) if (inode)
......
...@@ -112,14 +112,13 @@ extern int vfs_open(const struct path *, struct file *, const struct cred *); ...@@ -112,14 +112,13 @@ extern int vfs_open(const struct path *, struct file *, const struct cred *);
/* /*
* inode.c * inode.c
*/ */
extern spinlock_t inode_sb_list_lock;
extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc); extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
extern void inode_add_lru(struct inode *inode); extern void inode_add_lru(struct inode *inode);
/* /*
* fs-writeback.c * fs-writeback.c
*/ */
extern void inode_wb_list_del(struct inode *inode); extern void inode_io_list_del(struct inode *inode);
extern long get_nr_dirty_inodes(void); extern long get_nr_dirty_inodes(void);
extern void evict_inodes(struct super_block *); extern void evict_inodes(struct super_block *);
......
...@@ -560,6 +560,24 @@ static int __nd_alloc_stack(struct nameidata *nd) ...@@ -560,6 +560,24 @@ static int __nd_alloc_stack(struct nameidata *nd)
return 0; return 0;
} }
/**
* path_connected - Verify that a path->dentry is below path->mnt.mnt_root
* @path: nameidate to verify
*
* Rename can sometimes move a file or directory outside of a bind
* mount, path_connected allows those cases to be detected.
*/
static bool path_connected(const struct path *path)
{
struct vfsmount *mnt = path->mnt;
/* Only bind mounts can have disconnected paths */
if (mnt->mnt_root == mnt->mnt_sb->s_root)
return true;
return is_subdir(path->dentry, mnt->mnt_root);
}
static inline int nd_alloc_stack(struct nameidata *nd) static inline int nd_alloc_stack(struct nameidata *nd)
{ {
if (likely(nd->depth != EMBEDDED_LEVELS)) if (likely(nd->depth != EMBEDDED_LEVELS))
...@@ -1296,6 +1314,8 @@ static int follow_dotdot_rcu(struct nameidata *nd) ...@@ -1296,6 +1314,8 @@ static int follow_dotdot_rcu(struct nameidata *nd)
return -ECHILD; return -ECHILD;
nd->path.dentry = parent; nd->path.dentry = parent;
nd->seq = seq; nd->seq = seq;
if (unlikely(!path_connected(&nd->path)))
return -ENOENT;
break; break;
} else { } else {
struct mount *mnt = real_mount(nd->path.mnt); struct mount *mnt = real_mount(nd->path.mnt);
...@@ -1396,7 +1416,7 @@ static void follow_mount(struct path *path) ...@@ -1396,7 +1416,7 @@ static void follow_mount(struct path *path)
} }
} }
static void follow_dotdot(struct nameidata *nd) static int follow_dotdot(struct nameidata *nd)
{ {
if (!nd->root.mnt) if (!nd->root.mnt)
set_root(nd); set_root(nd);
...@@ -1412,6 +1432,8 @@ static void follow_dotdot(struct nameidata *nd) ...@@ -1412,6 +1432,8 @@ static void follow_dotdot(struct nameidata *nd)
/* rare case of legitimate dget_parent()... */ /* rare case of legitimate dget_parent()... */
nd->path.dentry = dget_parent(nd->path.dentry); nd->path.dentry = dget_parent(nd->path.dentry);
dput(old); dput(old);
if (unlikely(!path_connected(&nd->path)))
return -ENOENT;
break; break;
} }
if (!follow_up(&nd->path)) if (!follow_up(&nd->path))
...@@ -1419,6 +1441,7 @@ static void follow_dotdot(struct nameidata *nd) ...@@ -1419,6 +1441,7 @@ static void follow_dotdot(struct nameidata *nd)
} }
follow_mount(&nd->path); follow_mount(&nd->path);
nd->inode = nd->path.dentry->d_inode; nd->inode = nd->path.dentry->d_inode;
return 0;
} }
/* /*
...@@ -1634,7 +1657,7 @@ static inline int handle_dots(struct nameidata *nd, int type) ...@@ -1634,7 +1657,7 @@ static inline int handle_dots(struct nameidata *nd, int type)
if (nd->flags & LOOKUP_RCU) { if (nd->flags & LOOKUP_RCU) {
return follow_dotdot_rcu(nd); return follow_dotdot_rcu(nd);
} else } else
follow_dotdot(nd); return follow_dotdot(nd);
} }
return 0; return 0;
} }
......
...@@ -143,17 +143,17 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, ...@@ -143,17 +143,17 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
/** /**
* fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
* @list: list of inodes being unmounted (sb->s_inodes) * @sb: superblock being unmounted.
* *
* Called during unmount with no locks held, so needs to be safe against * Called during unmount with no locks held, so needs to be safe against
* concurrent modifiers. We temporarily drop inode_sb_list_lock and CAN block. * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
*/ */
void fsnotify_unmount_inodes(struct list_head *list) void fsnotify_unmount_inodes(struct super_block *sb)
{ {
struct inode *inode, *next_i, *need_iput = NULL; struct inode *inode, *next_i, *need_iput = NULL;
spin_lock(&inode_sb_list_lock); spin_lock(&sb->s_inode_list_lock);
list_for_each_entry_safe(inode, next_i, list, i_sb_list) { list_for_each_entry_safe(inode, next_i, &sb->s_inodes, i_sb_list) {
struct inode *need_iput_tmp; struct inode *need_iput_tmp;
/* /*
...@@ -189,7 +189,7 @@ void fsnotify_unmount_inodes(struct list_head *list) ...@@ -189,7 +189,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
/* In case the dropping of a reference would nuke next_i. */ /* In case the dropping of a reference would nuke next_i. */
while (&next_i->i_sb_list != list) { while (&next_i->i_sb_list != &sb->s_inodes) {
spin_lock(&next_i->i_lock); spin_lock(&next_i->i_lock);
if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) && if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) &&
atomic_read(&next_i->i_count)) { atomic_read(&next_i->i_count)) {
...@@ -204,12 +204,12 @@ void fsnotify_unmount_inodes(struct list_head *list) ...@@ -204,12 +204,12 @@ void fsnotify_unmount_inodes(struct list_head *list)
} }
/* /*
* We can safely drop inode_sb_list_lock here because either * We can safely drop s_inode_list_lock here because either
* we actually hold references on both inode and next_i or * we actually hold references on both inode and next_i or
* end of list. Also no new inodes will be added since the * end of list. Also no new inodes will be added since the
* umount has begun. * umount has begun.
*/ */
spin_unlock(&inode_sb_list_lock); spin_unlock(&sb->s_inode_list_lock);
if (need_iput_tmp) if (need_iput_tmp)
iput(need_iput_tmp); iput(need_iput_tmp);
...@@ -221,7 +221,7 @@ void fsnotify_unmount_inodes(struct list_head *list) ...@@ -221,7 +221,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
iput(inode); iput(inode);
spin_lock(&inode_sb_list_lock); spin_lock(&sb->s_inode_list_lock);
} }
spin_unlock(&inode_sb_list_lock); spin_unlock(&sb->s_inode_list_lock);
} }
...@@ -928,7 +928,7 @@ static void add_dquot_ref(struct super_block *sb, int type) ...@@ -928,7 +928,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
int reserved = 0; int reserved = 0;
#endif #endif
spin_lock(&inode_sb_list_lock); spin_lock(&sb->s_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
...@@ -939,7 +939,7 @@ static void add_dquot_ref(struct super_block *sb, int type) ...@@ -939,7 +939,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
} }
__iget(inode); __iget(inode);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
spin_unlock(&inode_sb_list_lock); spin_unlock(&sb->s_inode_list_lock);
#ifdef CONFIG_QUOTA_DEBUG #ifdef CONFIG_QUOTA_DEBUG
if (unlikely(inode_get_rsv_space(inode) > 0)) if (unlikely(inode_get_rsv_space(inode) > 0))
...@@ -951,15 +951,15 @@ static void add_dquot_ref(struct super_block *sb, int type) ...@@ -951,15 +951,15 @@ static void add_dquot_ref(struct super_block *sb, int type)
/* /*
* We hold a reference to 'inode' so it couldn't have been * We hold a reference to 'inode' so it couldn't have been
* removed from s_inodes list while we dropped the * removed from s_inodes list while we dropped the
* inode_sb_list_lock We cannot iput the inode now as we can be * s_inode_list_lock. We cannot iput the inode now as we can be
* holding the last reference and we cannot iput it under * holding the last reference and we cannot iput it under
* inode_sb_list_lock. So we keep the reference and iput it * s_inode_list_lock. So we keep the reference and iput it
* later. * later.
*/ */
old_inode = inode; old_inode = inode;
spin_lock(&inode_sb_list_lock); spin_lock(&sb->s_inode_list_lock);
} }
spin_unlock(&inode_sb_list_lock); spin_unlock(&sb->s_inode_list_lock);
iput(old_inode); iput(old_inode);
#ifdef CONFIG_QUOTA_DEBUG #ifdef CONFIG_QUOTA_DEBUG
...@@ -1028,7 +1028,7 @@ static void remove_dquot_ref(struct super_block *sb, int type, ...@@ -1028,7 +1028,7 @@ static void remove_dquot_ref(struct super_block *sb, int type,
struct inode *inode; struct inode *inode;
int reserved = 0; int reserved = 0;
spin_lock(&inode_sb_list_lock); spin_lock(&sb->s_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
/* /*
* We have to scan also I_NEW inodes because they can already * We have to scan also I_NEW inodes because they can already
...@@ -1044,7 +1044,7 @@ static void remove_dquot_ref(struct super_block *sb, int type, ...@@ -1044,7 +1044,7 @@ static void remove_dquot_ref(struct super_block *sb, int type,
} }
spin_unlock(&dq_data_lock); spin_unlock(&dq_data_lock);
} }
spin_unlock(&inode_sb_list_lock); spin_unlock(&sb->s_inode_list_lock);
#ifdef CONFIG_QUOTA_DEBUG #ifdef CONFIG_QUOTA_DEBUG
if (reserved) { if (reserved) {
printk(KERN_WARNING "VFS (%s): Writes happened after quota" printk(KERN_WARNING "VFS (%s): Writes happened after quota"
......
...@@ -135,6 +135,24 @@ static unsigned long super_cache_count(struct shrinker *shrink, ...@@ -135,6 +135,24 @@ static unsigned long super_cache_count(struct shrinker *shrink,
return total_objects; return total_objects;
} }
static void destroy_super_work(struct work_struct *work)
{
struct super_block *s = container_of(work, struct super_block,
destroy_work);
int i;
for (i = 0; i < SB_FREEZE_LEVELS; i++)
percpu_free_rwsem(&s->s_writers.rw_sem[i]);
kfree(s);
}
static void destroy_super_rcu(struct rcu_head *head)
{
struct super_block *s = container_of(head, struct super_block, rcu);
INIT_WORK(&s->destroy_work, destroy_super_work);
schedule_work(&s->destroy_work);
}
/** /**
* destroy_super - frees a superblock * destroy_super - frees a superblock
* @s: superblock to free * @s: superblock to free
...@@ -143,16 +161,13 @@ static unsigned long super_cache_count(struct shrinker *shrink, ...@@ -143,16 +161,13 @@ static unsigned long super_cache_count(struct shrinker *shrink,
*/ */
static void destroy_super(struct super_block *s) static void destroy_super(struct super_block *s)
{ {
int i;
list_lru_destroy(&s->s_dentry_lru); list_lru_destroy(&s->s_dentry_lru);
list_lru_destroy(&s->s_inode_lru); list_lru_destroy(&s->s_inode_lru);
for (i = 0; i < SB_FREEZE_LEVELS; i++)
percpu_counter_destroy(&s->s_writers.counter[i]);
security_sb_free(s); security_sb_free(s);
WARN_ON(!list_empty(&s->s_mounts)); WARN_ON(!list_empty(&s->s_mounts));
kfree(s->s_subtype); kfree(s->s_subtype);
kfree(s->s_options); kfree(s->s_options);
kfree_rcu(s, rcu); call_rcu(&s->rcu, destroy_super_rcu);
} }
/** /**
...@@ -178,19 +193,19 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) ...@@ -178,19 +193,19 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
goto fail; goto fail;
for (i = 0; i < SB_FREEZE_LEVELS; i++) { for (i = 0; i < SB_FREEZE_LEVELS; i++) {
if (percpu_counter_init(&s->s_writers.counter[i], 0, if (__percpu_init_rwsem(&s->s_writers.rw_sem[i],
GFP_KERNEL) < 0) sb_writers_name[i],
&type->s_writers_key[i]))
goto fail; goto fail;
lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
&type->s_writers_key[i], 0);
} }
init_waitqueue_head(&s->s_writers.wait);
init_waitqueue_head(&s->s_writers.wait_unfrozen); init_waitqueue_head(&s->s_writers.wait_unfrozen);
s->s_bdi = &noop_backing_dev_info; s->s_bdi = &noop_backing_dev_info;
s->s_flags = flags; s->s_flags = flags;
INIT_HLIST_NODE(&s->s_instances); INIT_HLIST_NODE(&s->s_instances);
INIT_HLIST_BL_HEAD(&s->s_anon); INIT_HLIST_BL_HEAD(&s->s_anon);
mutex_init(&s->s_sync_lock);
INIT_LIST_HEAD(&s->s_inodes); INIT_LIST_HEAD(&s->s_inodes);
spin_lock_init(&s->s_inode_list_lock);
if (list_lru_init_memcg(&s->s_dentry_lru)) if (list_lru_init_memcg(&s->s_dentry_lru))
goto fail; goto fail;
...@@ -399,7 +414,7 @@ void generic_shutdown_super(struct super_block *sb) ...@@ -399,7 +414,7 @@ void generic_shutdown_super(struct super_block *sb)
sync_filesystem(sb); sync_filesystem(sb);
sb->s_flags &= ~MS_ACTIVE; sb->s_flags &= ~MS_ACTIVE;
fsnotify_unmount_inodes(&sb->s_inodes); fsnotify_unmount_inodes(sb);
evict_inodes(sb); evict_inodes(sb);
...@@ -1146,72 +1161,46 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data) ...@@ -1146,72 +1161,46 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
*/ */
void __sb_end_write(struct super_block *sb, int level) void __sb_end_write(struct super_block *sb, int level)
{ {
percpu_counter_dec(&sb->s_writers.counter[level-1]); percpu_up_read(sb->s_writers.rw_sem + level-1);
/*
* Make sure s_writers are updated before we wake up waiters in
* freeze_super().
*/
smp_mb();
if (waitqueue_active(&sb->s_writers.wait))
wake_up(&sb->s_writers.wait);
rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_);
} }
EXPORT_SYMBOL(__sb_end_write); EXPORT_SYMBOL(__sb_end_write);
#ifdef CONFIG_LOCKDEP
/*
* We want lockdep to tell us about possible deadlocks with freezing but
* it's it bit tricky to properly instrument it. Getting a freeze protection
* works as getting a read lock but there are subtle problems. XFS for example
* gets freeze protection on internal level twice in some cases, which is OK
* only because we already hold a freeze protection also on higher level. Due
* to these cases we have to tell lockdep we are doing trylock when we
* already hold a freeze protection for a higher freeze level.
*/
static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock,
unsigned long ip)
{
int i;
if (!trylock) {
for (i = 0; i < level - 1; i++)
if (lock_is_held(&sb->s_writers.lock_map[i])) {
trylock = true;
break;
}
}
rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip);
}
#endif
/* /*
* This is an internal function, please use sb_start_{write,pagefault,intwrite} * This is an internal function, please use sb_start_{write,pagefault,intwrite}
* instead. * instead.
*/ */
int __sb_start_write(struct super_block *sb, int level, bool wait) int __sb_start_write(struct super_block *sb, int level, bool wait)
{ {
retry: bool force_trylock = false;
if (unlikely(sb->s_writers.frozen >= level)) { int ret = 1;
if (!wait)
return 0;
wait_event(sb->s_writers.wait_unfrozen,
sb->s_writers.frozen < level);
}
#ifdef CONFIG_LOCKDEP #ifdef CONFIG_LOCKDEP
acquire_freeze_lock(sb, level, !wait, _RET_IP_);
#endif
percpu_counter_inc(&sb->s_writers.counter[level-1]);
/* /*
* Make sure counter is updated before we check for frozen. * We want lockdep to tell us about possible deadlocks with freezing
* freeze_super() first sets frozen and then checks the counter. * but it's it bit tricky to properly instrument it. Getting a freeze
* protection works as getting a read lock but there are subtle
* problems. XFS for example gets freeze protection on internal level
* twice in some cases, which is OK only because we already hold a
* freeze protection also on higher level. Due to these cases we have
* to use wait == F (trylock mode) which must not fail.
*/ */
smp_mb(); if (wait) {
if (unlikely(sb->s_writers.frozen >= level)) { int i;
__sb_end_write(sb, level);
goto retry; for (i = 0; i < level - 1; i++)
if (percpu_rwsem_is_held(sb->s_writers.rw_sem + i)) {
force_trylock = true;
break;
} }
return 1; }
#endif
if (wait && !force_trylock)
percpu_down_read(sb->s_writers.rw_sem + level-1);
else
ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1);
WARN_ON(force_trylock & !ret);
return ret;
} }
EXPORT_SYMBOL(__sb_start_write); EXPORT_SYMBOL(__sb_start_write);
...@@ -1221,37 +1210,33 @@ EXPORT_SYMBOL(__sb_start_write); ...@@ -1221,37 +1210,33 @@ EXPORT_SYMBOL(__sb_start_write);
* @level: type of writers we wait for (normal vs page fault) * @level: type of writers we wait for (normal vs page fault)
* *
* This function waits until there are no writers of given type to given file * This function waits until there are no writers of given type to given file
* system. Caller of this function should make sure there can be no new writers * system.
* of type @level before calling this function. Otherwise this function can
* livelock.
*/ */
static void sb_wait_write(struct super_block *sb, int level) static void sb_wait_write(struct super_block *sb, int level)
{ {
s64 writers; percpu_down_write(sb->s_writers.rw_sem + level-1);
/* /*
* We just cycle-through lockdep here so that it does not complain * We are going to return to userspace and forget about this lock, the
* about returning with lock to userspace * ownership goes to the caller of thaw_super() which does unlock.
*
* FIXME: we should do this before return from freeze_super() after we
* called sync_filesystem(sb) and s_op->freeze_fs(sb), and thaw_super()
* should re-acquire these locks before s_op->unfreeze_fs(sb). However
* this leads to lockdep false-positives, so currently we do the early
* release right after acquire.
*/ */
rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_); percpu_rwsem_release(sb->s_writers.rw_sem + level-1, 0, _THIS_IP_);
rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_); }
do {
DEFINE_WAIT(wait);
/* static void sb_freeze_unlock(struct super_block *sb)
* We use a barrier in prepare_to_wait() to separate setting {
* of frozen and checking of the counter int level;
*/
prepare_to_wait(&sb->s_writers.wait, &wait,
TASK_UNINTERRUPTIBLE);
writers = percpu_counter_sum(&sb->s_writers.counter[level-1]); for (level = 0; level < SB_FREEZE_LEVELS; ++level)
if (writers) percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
schedule();
finish_wait(&sb->s_writers.wait, &wait); for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
} while (writers); percpu_up_write(sb->s_writers.rw_sem + level);
} }
/** /**
...@@ -1310,20 +1295,14 @@ int freeze_super(struct super_block *sb) ...@@ -1310,20 +1295,14 @@ int freeze_super(struct super_block *sb)
return 0; return 0;
} }
/* From now on, no new normal writers can start */
sb->s_writers.frozen = SB_FREEZE_WRITE; sb->s_writers.frozen = SB_FREEZE_WRITE;
smp_wmb();
/* Release s_umount to preserve sb_start_write -> s_umount ordering */ /* Release s_umount to preserve sb_start_write -> s_umount ordering */
up_write(&sb->s_umount); up_write(&sb->s_umount);
sb_wait_write(sb, SB_FREEZE_WRITE); sb_wait_write(sb, SB_FREEZE_WRITE);
down_write(&sb->s_umount);
/* Now we go and block page faults... */ /* Now we go and block page faults... */
down_write(&sb->s_umount);
sb->s_writers.frozen = SB_FREEZE_PAGEFAULT; sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
smp_wmb();
sb_wait_write(sb, SB_FREEZE_PAGEFAULT); sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
/* All writers are done so after syncing there won't be dirty data */ /* All writers are done so after syncing there won't be dirty data */
...@@ -1331,7 +1310,6 @@ int freeze_super(struct super_block *sb) ...@@ -1331,7 +1310,6 @@ int freeze_super(struct super_block *sb)
/* Now wait for internal filesystem counter */ /* Now wait for internal filesystem counter */
sb->s_writers.frozen = SB_FREEZE_FS; sb->s_writers.frozen = SB_FREEZE_FS;
smp_wmb();
sb_wait_write(sb, SB_FREEZE_FS); sb_wait_write(sb, SB_FREEZE_FS);
if (sb->s_op->freeze_fs) { if (sb->s_op->freeze_fs) {
...@@ -1340,7 +1318,7 @@ int freeze_super(struct super_block *sb) ...@@ -1340,7 +1318,7 @@ int freeze_super(struct super_block *sb)
printk(KERN_ERR printk(KERN_ERR
"VFS:Filesystem freeze failed\n"); "VFS:Filesystem freeze failed\n");
sb->s_writers.frozen = SB_UNFROZEN; sb->s_writers.frozen = SB_UNFROZEN;
smp_wmb(); sb_freeze_unlock(sb);
wake_up(&sb->s_writers.wait_unfrozen); wake_up(&sb->s_writers.wait_unfrozen);
deactivate_locked_super(sb); deactivate_locked_super(sb);
return ret; return ret;
...@@ -1372,8 +1350,10 @@ int thaw_super(struct super_block *sb) ...@@ -1372,8 +1350,10 @@ int thaw_super(struct super_block *sb)
return -EINVAL; return -EINVAL;
} }
if (sb->s_flags & MS_RDONLY) if (sb->s_flags & MS_RDONLY) {
sb->s_writers.frozen = SB_UNFROZEN;
goto out; goto out;
}
if (sb->s_op->unfreeze_fs) { if (sb->s_op->unfreeze_fs) {
error = sb->s_op->unfreeze_fs(sb); error = sb->s_op->unfreeze_fs(sb);
...@@ -1385,12 +1365,11 @@ int thaw_super(struct super_block *sb) ...@@ -1385,12 +1365,11 @@ int thaw_super(struct super_block *sb)
} }
} }
out:
sb->s_writers.frozen = SB_UNFROZEN; sb->s_writers.frozen = SB_UNFROZEN;
smp_wmb(); sb_freeze_unlock(sb);
out:
wake_up(&sb->s_writers.wait_unfrozen); wake_up(&sb->s_writers.wait_unfrozen);
deactivate_locked_super(sb); deactivate_locked_super(sb);
return 0; return 0;
} }
EXPORT_SYMBOL(thaw_super); EXPORT_SYMBOL(thaw_super);
...@@ -5,5 +5,5 @@ ...@@ -5,5 +5,5 @@
obj-$(CONFIG_UFS_FS) += ufs.o obj-$(CONFIG_UFS_FS) += ufs.o
ufs-objs := balloc.o cylinder.o dir.o file.o ialloc.o inode.o \ ufs-objs := balloc.o cylinder.o dir.o file.o ialloc.o inode.o \
namei.o super.o symlink.o truncate.o util.o namei.o super.o symlink.o util.o
ccflags-$(CONFIG_UFS_DEBUG) += -DDEBUG ccflags-$(CONFIG_UFS_DEBUG) += -DDEBUG
...@@ -417,7 +417,9 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, ...@@ -417,7 +417,9 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
if (oldcount == 0) { if (oldcount == 0) {
result = ufs_alloc_fragments (inode, cgno, goal, count, err); result = ufs_alloc_fragments (inode, cgno, goal, count, err);
if (result) { if (result) {
write_seqlock(&UFS_I(inode)->meta_lock);
ufs_cpu_to_data_ptr(sb, p, result); ufs_cpu_to_data_ptr(sb, p, result);
write_sequnlock(&UFS_I(inode)->meta_lock);
*err = 0; *err = 0;
UFS_I(inode)->i_lastfrag = UFS_I(inode)->i_lastfrag =
max(UFS_I(inode)->i_lastfrag, fragment + count); max(UFS_I(inode)->i_lastfrag, fragment + count);
...@@ -473,7 +475,9 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, ...@@ -473,7 +475,9 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
ufs_change_blocknr(inode, fragment - oldcount, oldcount, ufs_change_blocknr(inode, fragment - oldcount, oldcount,
uspi->s_sbbase + tmp, uspi->s_sbbase + tmp,
uspi->s_sbbase + result, locked_page); uspi->s_sbbase + result, locked_page);
write_seqlock(&UFS_I(inode)->meta_lock);
ufs_cpu_to_data_ptr(sb, p, result); ufs_cpu_to_data_ptr(sb, p, result);
write_sequnlock(&UFS_I(inode)->meta_lock);
*err = 0; *err = 0;
UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag, UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
fragment + count); fragment + count);
......
...@@ -41,9 +41,7 @@ ...@@ -41,9 +41,7 @@
#include "swab.h" #include "swab.h"
#include "util.h" #include "util.h"
static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock); static int ufs_block_to_path(struct inode *inode, sector_t i_block, unsigned offsets[4])
static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t offsets[4])
{ {
struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi; struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi;
int ptrs = uspi->s_apb; int ptrs = uspi->s_apb;
...@@ -75,227 +73,232 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off ...@@ -75,227 +73,232 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off
return n; return n;
} }
typedef struct {
void *p;
union {
__fs32 key32;
__fs64 key64;
};
struct buffer_head *bh;
} Indirect;
static inline int grow_chain32(struct ufs_inode_info *ufsi,
struct buffer_head *bh, __fs32 *v,
Indirect *from, Indirect *to)
{
Indirect *p;
unsigned seq;
to->bh = bh;
do {
seq = read_seqbegin(&ufsi->meta_lock);
to->key32 = *(__fs32 *)(to->p = v);
for (p = from; p <= to && p->key32 == *(__fs32 *)p->p; p++)
;
} while (read_seqretry(&ufsi->meta_lock, seq));
return (p > to);
}
static inline int grow_chain64(struct ufs_inode_info *ufsi,
struct buffer_head *bh, __fs64 *v,
Indirect *from, Indirect *to)
{
Indirect *p;
unsigned seq;
to->bh = bh;
do {
seq = read_seqbegin(&ufsi->meta_lock);
to->key64 = *(__fs64 *)(to->p = v);
for (p = from; p <= to && p->key64 == *(__fs64 *)p->p; p++)
;
} while (read_seqretry(&ufsi->meta_lock, seq));
return (p > to);
}
/* /*
* Returns the location of the fragment from * Returns the location of the fragment from
* the beginning of the filesystem. * the beginning of the filesystem.
*/ */
static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock) static u64 ufs_frag_map(struct inode *inode, unsigned offsets[4], int depth)
{ {
struct ufs_inode_info *ufsi = UFS_I(inode); struct ufs_inode_info *ufsi = UFS_I(inode);
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
u64 mask = (u64) uspi->s_apbmask>>uspi->s_fpbshift; u64 mask = (u64) uspi->s_apbmask>>uspi->s_fpbshift;
int shift = uspi->s_apbshift-uspi->s_fpbshift; int shift = uspi->s_apbshift-uspi->s_fpbshift;
sector_t offsets[4], *p; Indirect chain[4], *q = chain;
int depth = ufs_block_to_path(inode, frag >> uspi->s_fpbshift, offsets); unsigned *p;
u64 ret = 0L;
__fs32 block;
__fs64 u2_block = 0L;
unsigned flags = UFS_SB(sb)->s_flags; unsigned flags = UFS_SB(sb)->s_flags;
u64 temp = 0L; u64 res = 0;
UFSD(": frag = %llu depth = %d\n", (unsigned long long)frag, depth);
UFSD(": uspi->s_fpbshift = %d ,uspi->s_apbmask = %x, mask=%llx\n", UFSD(": uspi->s_fpbshift = %d ,uspi->s_apbmask = %x, mask=%llx\n",
uspi->s_fpbshift, uspi->s_apbmask, uspi->s_fpbshift, uspi->s_apbmask,
(unsigned long long)mask); (unsigned long long)mask);
if (depth == 0) if (depth == 0)
return 0; goto no_block;
again:
p = offsets; p = offsets;
if (needs_lock)
lock_ufs(sb);
if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
goto ufs2; goto ufs2;
block = ufsi->i_u1.i_data[*p++]; if (!grow_chain32(ufsi, NULL, &ufsi->i_u1.i_data[*p++], chain, q))
if (!block) goto changed;
goto out; if (!q->key32)
goto no_block;
while (--depth) { while (--depth) {
__fs32 *ptr;
struct buffer_head *bh; struct buffer_head *bh;
sector_t n = *p++; unsigned n = *p++;
bh = sb_bread(sb, uspi->s_sbbase + fs32_to_cpu(sb, block)+(n>>shift)); bh = sb_bread(sb, uspi->s_sbbase +
fs32_to_cpu(sb, q->key32) + (n>>shift));
if (!bh) if (!bh)
goto out; goto no_block;
block = ((__fs32 *) bh->b_data)[n & mask]; ptr = (__fs32 *)bh->b_data + (n & mask);
brelse (bh); if (!grow_chain32(ufsi, bh, ptr, chain, ++q))
if (!block) goto changed;
goto out; if (!q->key32)
goto no_block;
} }
ret = (u64) (uspi->s_sbbase + fs32_to_cpu(sb, block) + (frag & uspi->s_fpbmask)); res = fs32_to_cpu(sb, q->key32);
goto out; goto found;
ufs2:
u2_block = ufsi->i_u1.u2_i_data[*p++];
if (!u2_block)
goto out;
ufs2:
if (!grow_chain64(ufsi, NULL, &ufsi->i_u1.u2_i_data[*p++], chain, q))
goto changed;
if (!q->key64)
goto no_block;
while (--depth) { while (--depth) {
__fs64 *ptr;
struct buffer_head *bh; struct buffer_head *bh;
sector_t n = *p++; unsigned n = *p++;
bh = sb_bread(sb, uspi->s_sbbase +
temp = (u64)(uspi->s_sbbase) + fs64_to_cpu(sb, u2_block); fs64_to_cpu(sb, q->key64) + (n>>shift));
bh = sb_bread(sb, temp +(u64) (n>>shift));
if (!bh) if (!bh)
goto out; goto no_block;
u2_block = ((__fs64 *)bh->b_data)[n & mask]; ptr = (__fs64 *)bh->b_data + (n & mask);
brelse(bh); if (!grow_chain64(ufsi, bh, ptr, chain, ++q))
if (!u2_block) goto changed;
goto out; if (!q->key64)
goto no_block;
} }
temp = (u64)uspi->s_sbbase + fs64_to_cpu(sb, u2_block); res = fs64_to_cpu(sb, q->key64);
ret = temp + (u64) (frag & uspi->s_fpbmask); found:
res += uspi->s_sbbase;
no_block:
while (q > chain) {
brelse(q->bh);
q--;
}
return res;
out: changed:
if (needs_lock) while (q > chain) {
unlock_ufs(sb); brelse(q->bh);
return ret; q--;
}
goto again;
}
/*
* Unpacking tails: we have a file with partial final block and
* we had been asked to extend it. If the fragment being written
* is within the same block, we need to extend the tail just to cover
* that fragment. Otherwise the tail is extended to full block.
*
* Note that we might need to create a _new_ tail, but that will
* be handled elsewhere; this is strictly for resizing old
* ones.
*/
static bool
ufs_extend_tail(struct inode *inode, u64 writes_to,
int *err, struct page *locked_page)
{
struct ufs_inode_info *ufsi = UFS_I(inode);
struct super_block *sb = inode->i_sb;
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
unsigned lastfrag = ufsi->i_lastfrag; /* it's a short file, so unsigned is enough */
unsigned block = ufs_fragstoblks(lastfrag);
unsigned new_size;
void *p;
u64 tmp;
if (writes_to < (lastfrag | uspi->s_fpbmask))
new_size = (writes_to & uspi->s_fpbmask) + 1;
else
new_size = uspi->s_fpb;
p = ufs_get_direct_data_ptr(uspi, ufsi, block);
tmp = ufs_new_fragments(inode, p, lastfrag, ufs_data_ptr_to_cpu(sb, p),
new_size, err, locked_page);
return tmp != 0;
} }
/** /**
* ufs_inode_getfrag() - allocate new fragment(s) * ufs_inode_getfrag() - allocate new fragment(s)
* @inode: pointer to inode * @inode: pointer to inode
* @fragment: number of `fragment' which hold pointer * @index: number of block pointer within the inode's array.
* to new allocated fragment(s)
* @new_fragment: number of new allocated fragment(s) * @new_fragment: number of new allocated fragment(s)
* @required: how many fragment(s) we require
* @err: we set it if something wrong * @err: we set it if something wrong
* @phys: pointer to where we save physical number of new allocated fragments,
* NULL if we allocate not data(indirect blocks for example).
* @new: we set it if we allocate new block * @new: we set it if we allocate new block
* @locked_page: for ufs_new_fragments() * @locked_page: for ufs_new_fragments()
*/ */
static struct buffer_head * static u64
ufs_inode_getfrag(struct inode *inode, u64 fragment, ufs_inode_getfrag(struct inode *inode, unsigned index,
sector_t new_fragment, unsigned int required, int *err, sector_t new_fragment, int *err,
long *phys, int *new, struct page *locked_page) int *new, struct page *locked_page)
{ {
struct ufs_inode_info *ufsi = UFS_I(inode); struct ufs_inode_info *ufsi = UFS_I(inode);
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
struct buffer_head * result; u64 tmp, goal, lastfrag;
unsigned blockoff, lastblockoff; unsigned nfrags = uspi->s_fpb;
u64 tmp, goal, lastfrag, block, lastblock; void *p;
void *p, *p2;
UFSD("ENTER, ino %lu, fragment %llu, new_fragment %llu, required %u, "
"metadata %d\n", inode->i_ino, (unsigned long long)fragment,
(unsigned long long)new_fragment, required, !phys);
/* TODO : to be done for write support /* TODO : to be done for write support
if ( (flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) if ( (flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
goto ufs2; goto ufs2;
*/ */
block = ufs_fragstoblks (fragment); p = ufs_get_direct_data_ptr(uspi, ufsi, index);
blockoff = ufs_fragnum (fragment);
p = ufs_get_direct_data_ptr(uspi, ufsi, block);
goal = 0;
repeat:
tmp = ufs_data_ptr_to_cpu(sb, p); tmp = ufs_data_ptr_to_cpu(sb, p);
if (tmp)
goto out;
lastfrag = ufsi->i_lastfrag; lastfrag = ufsi->i_lastfrag;
if (tmp && fragment < lastfrag) {
if (!phys) {
result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff);
if (tmp == ufs_data_ptr_to_cpu(sb, p)) {
UFSD("EXIT, result %llu\n",
(unsigned long long)tmp + blockoff);
return result;
}
brelse (result);
goto repeat;
} else {
*phys = uspi->s_sbbase + tmp + blockoff;
return NULL;
}
}
lastblock = ufs_fragstoblks (lastfrag); /* will that be a new tail? */
lastblockoff = ufs_fragnum (lastfrag); if (new_fragment < UFS_NDIR_FRAGMENT && new_fragment >= lastfrag)
/* nfrags = (new_fragment & uspi->s_fpbmask) + 1;
* We will extend file into new block beyond last allocated block
*/
if (lastblock < block) {
/*
* We must reallocate last allocated block
*/
if (lastblockoff) {
p2 = ufs_get_direct_data_ptr(uspi, ufsi, lastblock);
tmp = ufs_new_fragments(inode, p2, lastfrag,
ufs_data_ptr_to_cpu(sb, p2),
uspi->s_fpb - lastblockoff,
err, locked_page);
if (!tmp) {
if (lastfrag != ufsi->i_lastfrag)
goto repeat;
else
return NULL;
}
lastfrag = ufsi->i_lastfrag;
goal = 0;
if (index) {
goal = ufs_data_ptr_to_cpu(sb,
ufs_get_direct_data_ptr(uspi, ufsi, index - 1));
if (goal)
goal += uspi->s_fpb;
} }
tmp = ufs_data_ptr_to_cpu(sb, tmp = ufs_new_fragments(inode, p, ufs_blknum(new_fragment),
ufs_get_direct_data_ptr(uspi, ufsi, goal, uspi->s_fpb, err, locked_page);
lastblock));
if (tmp)
goal = tmp + uspi->s_fpb;
tmp = ufs_new_fragments (inode, p, fragment - blockoff,
goal, required + blockoff,
err,
phys != NULL ? locked_page : NULL);
} else if (lastblock == block) {
/*
* We will extend last allocated block
*/
tmp = ufs_new_fragments(inode, p, fragment -
(blockoff - lastblockoff),
ufs_data_ptr_to_cpu(sb, p),
required + (blockoff - lastblockoff),
err, phys != NULL ? locked_page : NULL);
} else /* (lastblock > block) */ {
/*
* We will allocate new block before last allocated block
*/
if (block) {
tmp = ufs_data_ptr_to_cpu(sb,
ufs_get_direct_data_ptr(uspi, ufsi, block - 1));
if (tmp)
goal = tmp + uspi->s_fpb;
}
tmp = ufs_new_fragments(inode, p, fragment - blockoff,
goal, uspi->s_fpb, err,
phys != NULL ? locked_page : NULL);
}
if (!tmp) { if (!tmp) {
if ((!blockoff && ufs_data_ptr_to_cpu(sb, p)) ||
(blockoff && lastfrag != ufsi->i_lastfrag))
goto repeat;
*err = -ENOSPC; *err = -ENOSPC;
return NULL; return 0;
} }
if (!phys) { if (new)
result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff);
} else {
*phys = uspi->s_sbbase + tmp + blockoff;
result = NULL;
*err = 0;
*new = 1; *new = 1;
}
inode->i_ctime = CURRENT_TIME_SEC; inode->i_ctime = CURRENT_TIME_SEC;
if (IS_SYNC(inode)) if (IS_SYNC(inode))
ufs_sync_inode (inode); ufs_sync_inode (inode);
mark_inode_dirty(inode); mark_inode_dirty(inode);
UFSD("EXIT, result %llu\n", (unsigned long long)tmp + blockoff); out:
return result; return tmp + uspi->s_sbbase;
/* This part : To be implemented .... /* This part : To be implemented ....
Required only for writing, not required for READ-ONLY. Required only for writing, not required for READ-ONLY.
...@@ -316,95 +319,70 @@ ufs_inode_getfrag(struct inode *inode, u64 fragment, ...@@ -316,95 +319,70 @@ ufs_inode_getfrag(struct inode *inode, u64 fragment,
/** /**
* ufs_inode_getblock() - allocate new block * ufs_inode_getblock() - allocate new block
* @inode: pointer to inode * @inode: pointer to inode
* @bh: pointer to block which hold "pointer" to new allocated block * @ind_block: block number of the indirect block
* @fragment: number of `fragment' which hold pointer * @index: number of pointer within the indirect block
* to new allocated block
* @new_fragment: number of new allocated fragment * @new_fragment: number of new allocated fragment
* (block will hold this fragment and also uspi->s_fpb-1) * (block will hold this fragment and also uspi->s_fpb-1)
* @err: see ufs_inode_getfrag() * @err: see ufs_inode_getfrag()
* @phys: see ufs_inode_getfrag()
* @new: see ufs_inode_getfrag() * @new: see ufs_inode_getfrag()
* @locked_page: see ufs_inode_getfrag() * @locked_page: see ufs_inode_getfrag()
*/ */
static struct buffer_head * static u64
ufs_inode_getblock(struct inode *inode, struct buffer_head *bh, ufs_inode_getblock(struct inode *inode, u64 ind_block,
u64 fragment, sector_t new_fragment, int *err, unsigned index, sector_t new_fragment, int *err,
long *phys, int *new, struct page *locked_page) int *new, struct page *locked_page)
{ {
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
struct buffer_head * result; int shift = uspi->s_apbshift - uspi->s_fpbshift;
unsigned blockoff; u64 tmp = 0, goal;
u64 tmp, goal, block; struct buffer_head *bh;
void *p; void *p;
block = ufs_fragstoblks (fragment); if (!ind_block)
blockoff = ufs_fragnum (fragment); return 0;
UFSD("ENTER, ino %lu, fragment %llu, new_fragment %llu, metadata %d\n",
inode->i_ino, (unsigned long long)fragment,
(unsigned long long)new_fragment, !phys);
result = NULL; bh = sb_bread(sb, ind_block + (index >> shift));
if (!bh) if (unlikely(!bh)) {
goto out; *err = -EIO;
if (!buffer_uptodate(bh)) { return 0;
ll_rw_block (READ, 1, &bh);
wait_on_buffer (bh);
if (!buffer_uptodate(bh))
goto out;
} }
index &= uspi->s_apbmask >> uspi->s_fpbshift;
if (uspi->fs_magic == UFS2_MAGIC) if (uspi->fs_magic == UFS2_MAGIC)
p = (__fs64 *)bh->b_data + block; p = (__fs64 *)bh->b_data + index;
else else
p = (__fs32 *)bh->b_data + block; p = (__fs32 *)bh->b_data + index;
repeat:
tmp = ufs_data_ptr_to_cpu(sb, p); tmp = ufs_data_ptr_to_cpu(sb, p);
if (tmp) { if (tmp)
if (!phys) {
result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff);
if (tmp == ufs_data_ptr_to_cpu(sb, p))
goto out;
brelse (result);
goto repeat;
} else {
*phys = uspi->s_sbbase + tmp + blockoff;
goto out; goto out;
}
}
if (block && (uspi->fs_magic == UFS2_MAGIC ? if (index && (uspi->fs_magic == UFS2_MAGIC ?
(tmp = fs64_to_cpu(sb, ((__fs64 *)bh->b_data)[block-1])) : (tmp = fs64_to_cpu(sb, ((__fs64 *)bh->b_data)[index-1])) :
(tmp = fs32_to_cpu(sb, ((__fs32 *)bh->b_data)[block-1])))) (tmp = fs32_to_cpu(sb, ((__fs32 *)bh->b_data)[index-1]))))
goal = tmp + uspi->s_fpb; goal = tmp + uspi->s_fpb;
else else
goal = bh->b_blocknr + uspi->s_fpb; goal = bh->b_blocknr + uspi->s_fpb;
tmp = ufs_new_fragments(inode, p, ufs_blknum(new_fragment), goal, tmp = ufs_new_fragments(inode, p, ufs_blknum(new_fragment), goal,
uspi->s_fpb, err, locked_page); uspi->s_fpb, err, locked_page);
if (!tmp) { if (!tmp)
if (ufs_data_ptr_to_cpu(sb, p))
goto repeat;
goto out; goto out;
}
if (!phys) { if (new)
result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff);
} else {
*phys = uspi->s_sbbase + tmp + blockoff;
*new = 1; *new = 1;
}
mark_buffer_dirty(bh); mark_buffer_dirty(bh);
if (IS_SYNC(inode)) if (IS_SYNC(inode))
sync_dirty_buffer(bh); sync_dirty_buffer(bh);
inode->i_ctime = CURRENT_TIME_SEC; inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode); mark_inode_dirty(inode);
UFSD("result %llu\n", (unsigned long long)tmp + blockoff);
out: out:
brelse (bh); brelse (bh);
UFSD("EXIT\n"); UFSD("EXIT\n");
return result; if (tmp)
tmp += uspi->s_sbbase;
return tmp;
} }
/** /**
...@@ -412,103 +390,64 @@ ufs_inode_getblock(struct inode *inode, struct buffer_head *bh, ...@@ -412,103 +390,64 @@ ufs_inode_getblock(struct inode *inode, struct buffer_head *bh,
* readpage, writepage and so on * readpage, writepage and so on
*/ */
int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create) static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create)
{ {
struct super_block * sb = inode->i_sb; struct super_block *sb = inode->i_sb;
struct ufs_sb_info * sbi = UFS_SB(sb); struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
struct ufs_sb_private_info * uspi = sbi->s_uspi; int err = 0, new = 0;
struct buffer_head * bh; unsigned offsets[4];
int ret, err, new; int depth = ufs_block_to_path(inode, fragment >> uspi->s_fpbshift, offsets);
unsigned long ptr,phys;
u64 phys64 = 0; u64 phys64 = 0;
bool needs_lock = (sbi->mutex_owner != current); unsigned frag = fragment & uspi->s_fpbmask;
if (!create) { if (!create) {
phys64 = ufs_frag_map(inode, fragment, needs_lock); phys64 = ufs_frag_map(inode, offsets, depth);
UFSD("phys64 = %llu\n", (unsigned long long)phys64); goto out;
if (phys64)
map_bh(bh_result, sb, phys64);
return 0;
} }
/* This code entered only while writing ....? */ /* This code entered only while writing ....? */
err = -EIO; mutex_lock(&UFS_I(inode)->truncate_mutex);
new = 0;
ret = 0;
bh = NULL;
if (needs_lock)
lock_ufs(sb);
UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment); UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment);
if (fragment > if (unlikely(!depth)) {
((UFS_NDADDR + uspi->s_apb + uspi->s_2apb + uspi->s_3apb) ufs_warning(sb, "ufs_get_block", "block > big");
<< uspi->s_fpbshift)) err = -EIO;
goto abort_too_big; goto out;
}
err = 0;
ptr = fragment;
/* if (UFS_I(inode)->i_lastfrag < UFS_NDIR_FRAGMENT) {
* ok, these macros clean the logic up a bit and make unsigned lastfrag = UFS_I(inode)->i_lastfrag;
* it much more readable: unsigned tailfrags = lastfrag & uspi->s_fpbmask;
*/ if (tailfrags && fragment >= lastfrag) {
#define GET_INODE_DATABLOCK(x) \ if (!ufs_extend_tail(inode, fragment,
ufs_inode_getfrag(inode, x, fragment, 1, &err, &phys, &new,\ &err, bh_result->b_page))
bh_result->b_page)
#define GET_INODE_PTR(x) \
ufs_inode_getfrag(inode, x, fragment, uspi->s_fpb, &err, NULL, NULL,\
bh_result->b_page)
#define GET_INDIRECT_DATABLOCK(x) \
ufs_inode_getblock(inode, bh, x, fragment, \
&err, &phys, &new, bh_result->b_page)
#define GET_INDIRECT_PTR(x) \
ufs_inode_getblock(inode, bh, x, fragment, \
&err, NULL, NULL, NULL)
if (ptr < UFS_NDIR_FRAGMENT) {
bh = GET_INODE_DATABLOCK(ptr);
goto out; goto out;
} }
ptr -= UFS_NDIR_FRAGMENT; }
if (ptr < (1 << (uspi->s_apbshift + uspi->s_fpbshift))) {
bh = GET_INODE_PTR(UFS_IND_FRAGMENT + (ptr >> uspi->s_apbshift));
goto get_indirect;
}
ptr -= 1 << (uspi->s_apbshift + uspi->s_fpbshift);
if (ptr < (1 << (uspi->s_2apbshift + uspi->s_fpbshift))) {
bh = GET_INODE_PTR(UFS_DIND_FRAGMENT + (ptr >> uspi->s_2apbshift));
goto get_double;
}
ptr -= 1 << (uspi->s_2apbshift + uspi->s_fpbshift);
bh = GET_INODE_PTR(UFS_TIND_FRAGMENT + (ptr >> uspi->s_3apbshift));
bh = GET_INDIRECT_PTR((ptr >> uspi->s_2apbshift) & uspi->s_apbmask);
get_double:
bh = GET_INDIRECT_PTR((ptr >> uspi->s_apbshift) & uspi->s_apbmask);
get_indirect:
bh = GET_INDIRECT_DATABLOCK(ptr & uspi->s_apbmask);
#undef GET_INODE_DATABLOCK
#undef GET_INODE_PTR
#undef GET_INDIRECT_DATABLOCK
#undef GET_INDIRECT_PTR
if (depth == 1) {
phys64 = ufs_inode_getfrag(inode, offsets[0], fragment,
&err, &new, bh_result->b_page);
} else {
int i;
phys64 = ufs_inode_getfrag(inode, offsets[0], fragment,
&err, NULL, NULL);
for (i = 1; i < depth - 1; i++)
phys64 = ufs_inode_getblock(inode, phys64, offsets[i],
fragment, &err, NULL, NULL);
phys64 = ufs_inode_getblock(inode, phys64, offsets[depth - 1],
fragment, &err, &new, bh_result->b_page);
}
out: out:
if (err) if (phys64) {
goto abort; phys64 += frag;
map_bh(bh_result, sb, phys64);
if (new) if (new)
set_buffer_new(bh_result); set_buffer_new(bh_result);
map_bh(bh_result, sb, phys); }
abort: mutex_unlock(&UFS_I(inode)->truncate_mutex);
if (needs_lock)
unlock_ufs(sb);
return err; return err;
abort_too_big:
ufs_warning(sb, "ufs_get_block", "block > big");
goto abort;
} }
static int ufs_writepage(struct page *page, struct writeback_control *wbc) static int ufs_writepage(struct page *page, struct writeback_control *wbc)
...@@ -526,12 +465,16 @@ int ufs_prepare_chunk(struct page *page, loff_t pos, unsigned len) ...@@ -526,12 +465,16 @@ int ufs_prepare_chunk(struct page *page, loff_t pos, unsigned len)
return __block_write_begin(page, pos, len, ufs_getfrag_block); return __block_write_begin(page, pos, len, ufs_getfrag_block);
} }
static void ufs_truncate_blocks(struct inode *);
static void ufs_write_failed(struct address_space *mapping, loff_t to) static void ufs_write_failed(struct address_space *mapping, loff_t to)
{ {
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
if (to > inode->i_size) if (to > inode->i_size) {
truncate_pagecache(inode, inode->i_size); truncate_pagecache(inode, inode->i_size);
ufs_truncate_blocks(inode);
}
} }
static int ufs_write_begin(struct file *file, struct address_space *mapping, static int ufs_write_begin(struct file *file, struct address_space *mapping,
...@@ -548,6 +491,18 @@ static int ufs_write_begin(struct file *file, struct address_space *mapping, ...@@ -548,6 +491,18 @@ static int ufs_write_begin(struct file *file, struct address_space *mapping,
return ret; return ret;
} }
static int ufs_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
int ret;
ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
if (ret < len)
ufs_write_failed(mapping, pos + len);
return ret;
}
static sector_t ufs_bmap(struct address_space *mapping, sector_t block) static sector_t ufs_bmap(struct address_space *mapping, sector_t block)
{ {
return generic_block_bmap(mapping,block,ufs_getfrag_block); return generic_block_bmap(mapping,block,ufs_getfrag_block);
...@@ -557,7 +512,7 @@ const struct address_space_operations ufs_aops = { ...@@ -557,7 +512,7 @@ const struct address_space_operations ufs_aops = {
.readpage = ufs_readpage, .readpage = ufs_readpage,
.writepage = ufs_writepage, .writepage = ufs_writepage,
.write_begin = ufs_write_begin, .write_begin = ufs_write_begin,
.write_end = generic_write_end, .write_end = ufs_write_end,
.bmap = ufs_bmap .bmap = ufs_bmap
}; };
...@@ -867,11 +822,7 @@ static int ufs_update_inode(struct inode * inode, int do_sync) ...@@ -867,11 +822,7 @@ static int ufs_update_inode(struct inode * inode, int do_sync)
int ufs_write_inode(struct inode *inode, struct writeback_control *wbc) int ufs_write_inode(struct inode *inode, struct writeback_control *wbc)
{ {
int ret; return ufs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
lock_ufs(inode->i_sb);
ret = ufs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
unlock_ufs(inode->i_sb);
return ret;
} }
int ufs_sync_inode (struct inode *inode) int ufs_sync_inode (struct inode *inode)
...@@ -888,24 +839,389 @@ void ufs_evict_inode(struct inode * inode) ...@@ -888,24 +839,389 @@ void ufs_evict_inode(struct inode * inode)
truncate_inode_pages_final(&inode->i_data); truncate_inode_pages_final(&inode->i_data);
if (want_delete) { if (want_delete) {
loff_t old_i_size;
/*UFS_I(inode)->i_dtime = CURRENT_TIME;*/
lock_ufs(inode->i_sb);
mark_inode_dirty(inode);
ufs_update_inode(inode, IS_SYNC(inode));
old_i_size = inode->i_size;
inode->i_size = 0; inode->i_size = 0;
if (inode->i_blocks && ufs_truncate(inode, old_i_size)) if (inode->i_blocks)
ufs_warning(inode->i_sb, __func__, "ufs_truncate failed\n"); ufs_truncate_blocks(inode);
unlock_ufs(inode->i_sb);
} }
invalidate_inode_buffers(inode); invalidate_inode_buffers(inode);
clear_inode(inode); clear_inode(inode);
if (want_delete) { if (want_delete)
lock_ufs(inode->i_sb);
ufs_free_inode(inode); ufs_free_inode(inode);
unlock_ufs(inode->i_sb); }
struct to_free {
struct inode *inode;
u64 to;
unsigned count;
};
static inline void free_data(struct to_free *ctx, u64 from, unsigned count)
{
if (ctx->count && ctx->to != from) {
ufs_free_blocks(ctx->inode, ctx->to - ctx->count, ctx->count);
ctx->count = 0;
}
ctx->count += count;
ctx->to = from + count;
}
#define DIRECT_BLOCK ((inode->i_size + uspi->s_bsize - 1) >> uspi->s_bshift)
#define DIRECT_FRAGMENT ((inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift)
static void ufs_trunc_direct(struct inode *inode)
{
struct ufs_inode_info *ufsi = UFS_I(inode);
struct super_block * sb;
struct ufs_sb_private_info * uspi;
void *p;
u64 frag1, frag2, frag3, frag4, block1, block2;
struct to_free ctx = {.inode = inode};
unsigned i, tmp;
UFSD("ENTER: ino %lu\n", inode->i_ino);
sb = inode->i_sb;
uspi = UFS_SB(sb)->s_uspi;
frag1 = DIRECT_FRAGMENT;
frag4 = min_t(u64, UFS_NDIR_FRAGMENT, ufsi->i_lastfrag);
frag2 = ((frag1 & uspi->s_fpbmask) ? ((frag1 | uspi->s_fpbmask) + 1) : frag1);
frag3 = frag4 & ~uspi->s_fpbmask;
block1 = block2 = 0;
if (frag2 > frag3) {
frag2 = frag4;
frag3 = frag4 = 0;
} else if (frag2 < frag3) {
block1 = ufs_fragstoblks (frag2);
block2 = ufs_fragstoblks (frag3);
}
UFSD("ino %lu, frag1 %llu, frag2 %llu, block1 %llu, block2 %llu,"
" frag3 %llu, frag4 %llu\n", inode->i_ino,
(unsigned long long)frag1, (unsigned long long)frag2,
(unsigned long long)block1, (unsigned long long)block2,
(unsigned long long)frag3, (unsigned long long)frag4);
if (frag1 >= frag2)
goto next1;
/*
* Free first free fragments
*/
p = ufs_get_direct_data_ptr(uspi, ufsi, ufs_fragstoblks(frag1));
tmp = ufs_data_ptr_to_cpu(sb, p);
if (!tmp )
ufs_panic (sb, "ufs_trunc_direct", "internal error");
frag2 -= frag1;
frag1 = ufs_fragnum (frag1);
ufs_free_fragments(inode, tmp + frag1, frag2);
next1:
/*
* Free whole blocks
*/
for (i = block1 ; i < block2; i++) {
p = ufs_get_direct_data_ptr(uspi, ufsi, i);
tmp = ufs_data_ptr_to_cpu(sb, p);
if (!tmp)
continue;
write_seqlock(&ufsi->meta_lock);
ufs_data_ptr_clear(uspi, p);
write_sequnlock(&ufsi->meta_lock);
free_data(&ctx, tmp, uspi->s_fpb);
}
free_data(&ctx, 0, 0);
if (frag3 >= frag4)
goto next3;
/*
* Free last free fragments
*/
p = ufs_get_direct_data_ptr(uspi, ufsi, ufs_fragstoblks(frag3));
tmp = ufs_data_ptr_to_cpu(sb, p);
if (!tmp )
ufs_panic(sb, "ufs_truncate_direct", "internal error");
frag4 = ufs_fragnum (frag4);
write_seqlock(&ufsi->meta_lock);
ufs_data_ptr_clear(uspi, p);
write_sequnlock(&ufsi->meta_lock);
ufs_free_fragments (inode, tmp, frag4);
next3:
UFSD("EXIT: ino %lu\n", inode->i_ino);
}
static void free_full_branch(struct inode *inode, u64 ind_block, int depth)
{
struct super_block *sb = inode->i_sb;
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
struct ufs_buffer_head *ubh = ubh_bread(sb, ind_block, uspi->s_bsize);
unsigned i;
if (!ubh)
return;
if (--depth) {
for (i = 0; i < uspi->s_apb; i++) {
void *p = ubh_get_data_ptr(uspi, ubh, i);
u64 block = ufs_data_ptr_to_cpu(sb, p);
if (block)
free_full_branch(inode, block, depth);
}
} else {
struct to_free ctx = {.inode = inode};
for (i = 0; i < uspi->s_apb; i++) {
void *p = ubh_get_data_ptr(uspi, ubh, i);
u64 block = ufs_data_ptr_to_cpu(sb, p);
if (block)
free_data(&ctx, block, uspi->s_fpb);
}
free_data(&ctx, 0, 0);
}
ubh_bforget(ubh);
ufs_free_blocks(inode, ind_block, uspi->s_fpb);
}
static void free_branch_tail(struct inode *inode, unsigned from, struct ufs_buffer_head *ubh, int depth)
{
struct super_block *sb = inode->i_sb;
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
unsigned i;
if (--depth) {
for (i = from; i < uspi->s_apb ; i++) {
void *p = ubh_get_data_ptr(uspi, ubh, i);
u64 block = ufs_data_ptr_to_cpu(sb, p);
if (block) {
write_seqlock(&UFS_I(inode)->meta_lock);
ufs_data_ptr_clear(uspi, p);
write_sequnlock(&UFS_I(inode)->meta_lock);
ubh_mark_buffer_dirty(ubh);
free_full_branch(inode, block, depth);
}
}
} else {
struct to_free ctx = {.inode = inode};
for (i = from; i < uspi->s_apb; i++) {
void *p = ubh_get_data_ptr(uspi, ubh, i);
u64 block = ufs_data_ptr_to_cpu(sb, p);
if (block) {
write_seqlock(&UFS_I(inode)->meta_lock);
ufs_data_ptr_clear(uspi, p);
write_sequnlock(&UFS_I(inode)->meta_lock);
ubh_mark_buffer_dirty(ubh);
free_data(&ctx, block, uspi->s_fpb);
}
}
free_data(&ctx, 0, 0);
}
if (IS_SYNC(inode) && ubh_buffer_dirty(ubh))
ubh_sync_block(ubh);
ubh_brelse(ubh);
}
static int ufs_alloc_lastblock(struct inode *inode, loff_t size)
{
int err = 0;
struct super_block *sb = inode->i_sb;
struct address_space *mapping = inode->i_mapping;
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
unsigned i, end;
sector_t lastfrag;
struct page *lastpage;
struct buffer_head *bh;
u64 phys64;
lastfrag = (size + uspi->s_fsize - 1) >> uspi->s_fshift;
if (!lastfrag)
goto out;
lastfrag--;
lastpage = ufs_get_locked_page(mapping, lastfrag >>
(PAGE_CACHE_SHIFT - inode->i_blkbits));
if (IS_ERR(lastpage)) {
err = -EIO;
goto out;
}
end = lastfrag & ((1 << (PAGE_CACHE_SHIFT - inode->i_blkbits)) - 1);
bh = page_buffers(lastpage);
for (i = 0; i < end; ++i)
bh = bh->b_this_page;
err = ufs_getfrag_block(inode, lastfrag, bh, 1);
if (unlikely(err))
goto out_unlock;
if (buffer_new(bh)) {
clear_buffer_new(bh);
unmap_underlying_metadata(bh->b_bdev,
bh->b_blocknr);
/*
* we do not zeroize fragment, because of
* if it maped to hole, it already contains zeroes
*/
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
set_page_dirty(lastpage);
} }
if (lastfrag >= UFS_IND_FRAGMENT) {
end = uspi->s_fpb - ufs_fragnum(lastfrag) - 1;
phys64 = bh->b_blocknr + 1;
for (i = 0; i < end; ++i) {
bh = sb_getblk(sb, i + phys64);
lock_buffer(bh);
memset(bh->b_data, 0, sb->s_blocksize);
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
unlock_buffer(bh);
sync_dirty_buffer(bh);
brelse(bh);
}
}
out_unlock:
ufs_put_locked_page(lastpage);
out:
return err;
} }
static void __ufs_truncate_blocks(struct inode *inode)
{
struct ufs_inode_info *ufsi = UFS_I(inode);
struct super_block *sb = inode->i_sb;
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
unsigned offsets[4];
int depth = ufs_block_to_path(inode, DIRECT_BLOCK, offsets);
int depth2;
unsigned i;
struct ufs_buffer_head *ubh[3];
void *p;
u64 block;
if (!depth)
return;
/* find the last non-zero in offsets[] */
for (depth2 = depth - 1; depth2; depth2--)
if (offsets[depth2])
break;
mutex_lock(&ufsi->truncate_mutex);
if (depth == 1) {
ufs_trunc_direct(inode);
offsets[0] = UFS_IND_BLOCK;
} else {
/* get the blocks that should be partially emptied */
p = ufs_get_direct_data_ptr(uspi, ufsi, offsets[0]);
for (i = 0; i < depth2; i++) {
offsets[i]++; /* next branch is fully freed */
block = ufs_data_ptr_to_cpu(sb, p);
if (!block)
break;
ubh[i] = ubh_bread(sb, block, uspi->s_bsize);
if (!ubh[i]) {
write_seqlock(&ufsi->meta_lock);
ufs_data_ptr_clear(uspi, p);
write_sequnlock(&ufsi->meta_lock);
break;
}
p = ubh_get_data_ptr(uspi, ubh[i], offsets[i + 1]);
}
while (i--)
free_branch_tail(inode, offsets[i + 1], ubh[i], depth - i - 1);
}
for (i = offsets[0]; i <= UFS_TIND_BLOCK; i++) {
p = ufs_get_direct_data_ptr(uspi, ufsi, i);
block = ufs_data_ptr_to_cpu(sb, p);
if (block) {
write_seqlock(&ufsi->meta_lock);
ufs_data_ptr_clear(uspi, p);
write_sequnlock(&ufsi->meta_lock);
free_full_branch(inode, block, i - UFS_IND_BLOCK + 1);
}
}
ufsi->i_lastfrag = DIRECT_FRAGMENT;
mark_inode_dirty(inode);
mutex_unlock(&ufsi->truncate_mutex);
}
static int ufs_truncate(struct inode *inode, loff_t size)
{
int err = 0;
UFSD("ENTER: ino %lu, i_size: %llu, old_i_size: %llu\n",
inode->i_ino, (unsigned long long)size,
(unsigned long long)i_size_read(inode));
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)))
return -EINVAL;
if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
return -EPERM;
err = ufs_alloc_lastblock(inode, size);
if (err)
goto out;
block_truncate_page(inode->i_mapping, size, ufs_getfrag_block);
truncate_setsize(inode, size);
__ufs_truncate_blocks(inode);
inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
out:
UFSD("EXIT: err %d\n", err);
return err;
}
void ufs_truncate_blocks(struct inode *inode)
{
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)))
return;
if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
return;
__ufs_truncate_blocks(inode);
}
int ufs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
unsigned int ia_valid = attr->ia_valid;
int error;
error = inode_change_ok(inode, attr);
if (error)
return error;
if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
error = ufs_truncate(inode, attr->ia_size);
if (error)
return error;
}
setattr_copy(inode, attr);
mark_inode_dirty(inode);
return 0;
}
const struct inode_operations ufs_file_inode_operations = {
.setattr = ufs_setattr,
};
...@@ -94,22 +94,6 @@ ...@@ -94,22 +94,6 @@
#include "swab.h" #include "swab.h"
#include "util.h" #include "util.h"
void lock_ufs(struct super_block *sb)
{
struct ufs_sb_info *sbi = UFS_SB(sb);
mutex_lock(&sbi->mutex);
sbi->mutex_owner = current;
}
void unlock_ufs(struct super_block *sb)
{
struct ufs_sb_info *sbi = UFS_SB(sb);
sbi->mutex_owner = NULL;
mutex_unlock(&sbi->mutex);
}
static struct inode *ufs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation) static struct inode *ufs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation)
{ {
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
...@@ -694,7 +678,6 @@ static int ufs_sync_fs(struct super_block *sb, int wait) ...@@ -694,7 +678,6 @@ static int ufs_sync_fs(struct super_block *sb, int wait)
struct ufs_super_block_third * usb3; struct ufs_super_block_third * usb3;
unsigned flags; unsigned flags;
lock_ufs(sb);
mutex_lock(&UFS_SB(sb)->s_lock); mutex_lock(&UFS_SB(sb)->s_lock);
UFSD("ENTER\n"); UFSD("ENTER\n");
...@@ -714,7 +697,6 @@ static int ufs_sync_fs(struct super_block *sb, int wait) ...@@ -714,7 +697,6 @@ static int ufs_sync_fs(struct super_block *sb, int wait)
UFSD("EXIT\n"); UFSD("EXIT\n");
mutex_unlock(&UFS_SB(sb)->s_lock); mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return 0; return 0;
} }
...@@ -758,7 +740,6 @@ static void ufs_put_super(struct super_block *sb) ...@@ -758,7 +740,6 @@ static void ufs_put_super(struct super_block *sb)
ubh_brelse_uspi (sbi->s_uspi); ubh_brelse_uspi (sbi->s_uspi);
kfree (sbi->s_uspi); kfree (sbi->s_uspi);
mutex_destroy(&sbi->mutex);
kfree (sbi); kfree (sbi);
sb->s_fs_info = NULL; sb->s_fs_info = NULL;
UFSD("EXIT\n"); UFSD("EXIT\n");
...@@ -801,7 +782,6 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -801,7 +782,6 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY)); UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY));
mutex_init(&sbi->mutex);
mutex_init(&sbi->s_lock); mutex_init(&sbi->s_lock);
spin_lock_init(&sbi->work_lock); spin_lock_init(&sbi->work_lock);
INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs); INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs);
...@@ -1257,7 +1237,6 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -1257,7 +1237,6 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
return 0; return 0;
failed: failed:
mutex_destroy(&sbi->mutex);
if (ubh) if (ubh)
ubh_brelse_uspi (uspi); ubh_brelse_uspi (uspi);
kfree (uspi); kfree (uspi);
...@@ -1280,7 +1259,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ...@@ -1280,7 +1259,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
unsigned flags; unsigned flags;
sync_filesystem(sb); sync_filesystem(sb);
lock_ufs(sb);
mutex_lock(&UFS_SB(sb)->s_lock); mutex_lock(&UFS_SB(sb)->s_lock);
uspi = UFS_SB(sb)->s_uspi; uspi = UFS_SB(sb)->s_uspi;
flags = UFS_SB(sb)->s_flags; flags = UFS_SB(sb)->s_flags;
...@@ -1296,7 +1274,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ...@@ -1296,7 +1274,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
ufs_set_opt (new_mount_opt, ONERROR_LOCK); ufs_set_opt (new_mount_opt, ONERROR_LOCK);
if (!ufs_parse_options (data, &new_mount_opt)) { if (!ufs_parse_options (data, &new_mount_opt)) {
mutex_unlock(&UFS_SB(sb)->s_lock); mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return -EINVAL; return -EINVAL;
} }
if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) { if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) {
...@@ -1304,14 +1281,12 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ...@@ -1304,14 +1281,12 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
} else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) { } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
pr_err("ufstype can't be changed during remount\n"); pr_err("ufstype can't be changed during remount\n");
mutex_unlock(&UFS_SB(sb)->s_lock); mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return -EINVAL; return -EINVAL;
} }
if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
UFS_SB(sb)->s_mount_opt = new_mount_opt; UFS_SB(sb)->s_mount_opt = new_mount_opt;
mutex_unlock(&UFS_SB(sb)->s_lock); mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return 0; return 0;
} }
...@@ -1335,7 +1310,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ...@@ -1335,7 +1310,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
#ifndef CONFIG_UFS_FS_WRITE #ifndef CONFIG_UFS_FS_WRITE
pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n"); pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n");
mutex_unlock(&UFS_SB(sb)->s_lock); mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return -EINVAL; return -EINVAL;
#else #else
if (ufstype != UFS_MOUNT_UFSTYPE_SUN && if (ufstype != UFS_MOUNT_UFSTYPE_SUN &&
...@@ -1345,13 +1319,11 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ...@@ -1345,13 +1319,11 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
ufstype != UFS_MOUNT_UFSTYPE_UFS2) { ufstype != UFS_MOUNT_UFSTYPE_UFS2) {
pr_err("this ufstype is read-only supported\n"); pr_err("this ufstype is read-only supported\n");
mutex_unlock(&UFS_SB(sb)->s_lock); mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return -EINVAL; return -EINVAL;
} }
if (!ufs_read_cylinder_structures(sb)) { if (!ufs_read_cylinder_structures(sb)) {
pr_err("failed during remounting\n"); pr_err("failed during remounting\n");
mutex_unlock(&UFS_SB(sb)->s_lock); mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return -EPERM; return -EPERM;
} }
sb->s_flags &= ~MS_RDONLY; sb->s_flags &= ~MS_RDONLY;
...@@ -1359,7 +1331,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ...@@ -1359,7 +1331,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
} }
UFS_SB(sb)->s_mount_opt = new_mount_opt; UFS_SB(sb)->s_mount_opt = new_mount_opt;
mutex_unlock(&UFS_SB(sb)->s_lock); mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return 0; return 0;
} }
...@@ -1391,8 +1362,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -1391,8 +1362,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
struct ufs_super_block_third *usb3; struct ufs_super_block_third *usb3;
u64 id = huge_encode_dev(sb->s_bdev->bd_dev); u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
lock_ufs(sb); mutex_lock(&UFS_SB(sb)->s_lock);
usb3 = ubh_get_usb_third(uspi); usb3 = ubh_get_usb_third(uspi);
if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) { if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
...@@ -1413,7 +1383,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -1413,7 +1383,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_fsid.val[0] = (u32)id; buf->f_fsid.val[0] = (u32)id;
buf->f_fsid.val[1] = (u32)(id >> 32); buf->f_fsid.val[1] = (u32)(id >> 32);
unlock_ufs(sb); mutex_unlock(&UFS_SB(sb)->s_lock);
return 0; return 0;
} }
...@@ -1429,6 +1399,8 @@ static struct inode *ufs_alloc_inode(struct super_block *sb) ...@@ -1429,6 +1399,8 @@ static struct inode *ufs_alloc_inode(struct super_block *sb)
return NULL; return NULL;
ei->vfs_inode.i_version = 1; ei->vfs_inode.i_version = 1;
seqlock_init(&ei->meta_lock);
mutex_init(&ei->truncate_mutex);
return &ei->vfs_inode; return &ei->vfs_inode;
} }
......
/*
* linux/fs/ufs/truncate.c
*
* Copyright (C) 1998
* Daniel Pirkl <daniel.pirkl@email.cz>
* Charles University, Faculty of Mathematics and Physics
*
* from
*
* linux/fs/ext2/truncate.c
*
* Copyright (C) 1992, 1993, 1994, 1995
* Remy Card (card@masi.ibp.fr)
* Laboratoire MASI - Institut Blaise Pascal
* Universite Pierre et Marie Curie (Paris VI)
*
* from
*
* linux/fs/minix/truncate.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
* Big-endian to little-endian byte-swapping/bitmaps by
* David S. Miller (davem@caip.rutgers.edu), 1995
*/
/*
* Real random numbers for secure rm added 94/02/18
* Idea from Pierre del Perugia <delperug@gla.ecoledoc.ibp.fr>
*/
/*
* Adoptation to use page cache and UFS2 write support by
* Evgeniy Dushistov <dushistov@mail.ru>, 2006-2007
*/
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/fcntl.h>
#include <linux/time.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
#include <linux/sched.h>
#include "ufs_fs.h"
#include "ufs.h"
#include "swab.h"
#include "util.h"
/*
* Secure deletion currently doesn't work. It interacts very badly
* with buffers shared with memory mappings, and for that reason
* can't be done in the truncate() routines. It should instead be
* done separately in "release()" before calling the truncate routines
* that will release the actual file blocks.
*
* Linus
*/
#define DIRECT_BLOCK ((inode->i_size + uspi->s_bsize - 1) >> uspi->s_bshift)
#define DIRECT_FRAGMENT ((inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift)
static int ufs_trunc_direct(struct inode *inode)
{
struct ufs_inode_info *ufsi = UFS_I(inode);
struct super_block * sb;
struct ufs_sb_private_info * uspi;
void *p;
u64 frag1, frag2, frag3, frag4, block1, block2;
unsigned frag_to_free, free_count;
unsigned i, tmp;
int retry;
UFSD("ENTER: ino %lu\n", inode->i_ino);
sb = inode->i_sb;
uspi = UFS_SB(sb)->s_uspi;
frag_to_free = 0;
free_count = 0;
retry = 0;
frag1 = DIRECT_FRAGMENT;
frag4 = min_t(u64, UFS_NDIR_FRAGMENT, ufsi->i_lastfrag);
frag2 = ((frag1 & uspi->s_fpbmask) ? ((frag1 | uspi->s_fpbmask) + 1) : frag1);
frag3 = frag4 & ~uspi->s_fpbmask;
block1 = block2 = 0;
if (frag2 > frag3) {
frag2 = frag4;
frag3 = frag4 = 0;
} else if (frag2 < frag3) {
block1 = ufs_fragstoblks (frag2);
block2 = ufs_fragstoblks (frag3);
}
UFSD("ino %lu, frag1 %llu, frag2 %llu, block1 %llu, block2 %llu,"
" frag3 %llu, frag4 %llu\n", inode->i_ino,
(unsigned long long)frag1, (unsigned long long)frag2,
(unsigned long long)block1, (unsigned long long)block2,
(unsigned long long)frag3, (unsigned long long)frag4);
if (frag1 >= frag2)
goto next1;
/*
* Free first free fragments
*/
p = ufs_get_direct_data_ptr(uspi, ufsi, ufs_fragstoblks(frag1));
tmp = ufs_data_ptr_to_cpu(sb, p);
if (!tmp )
ufs_panic (sb, "ufs_trunc_direct", "internal error");
frag2 -= frag1;
frag1 = ufs_fragnum (frag1);
ufs_free_fragments(inode, tmp + frag1, frag2);
mark_inode_dirty(inode);
frag_to_free = tmp + frag1;
next1:
/*
* Free whole blocks
*/
for (i = block1 ; i < block2; i++) {
p = ufs_get_direct_data_ptr(uspi, ufsi, i);
tmp = ufs_data_ptr_to_cpu(sb, p);
if (!tmp)
continue;
ufs_data_ptr_clear(uspi, p);
if (free_count == 0) {
frag_to_free = tmp;
free_count = uspi->s_fpb;
} else if (free_count > 0 && frag_to_free == tmp - free_count)
free_count += uspi->s_fpb;
else {
ufs_free_blocks (inode, frag_to_free, free_count);
frag_to_free = tmp;
free_count = uspi->s_fpb;
}
mark_inode_dirty(inode);
}
if (free_count > 0)
ufs_free_blocks (inode, frag_to_free, free_count);
if (frag3 >= frag4)
goto next3;
/*
* Free last free fragments
*/
p = ufs_get_direct_data_ptr(uspi, ufsi, ufs_fragstoblks(frag3));
tmp = ufs_data_ptr_to_cpu(sb, p);
if (!tmp )
ufs_panic(sb, "ufs_truncate_direct", "internal error");
frag4 = ufs_fragnum (frag4);
ufs_data_ptr_clear(uspi, p);
ufs_free_fragments (inode, tmp, frag4);
mark_inode_dirty(inode);
next3:
UFSD("EXIT: ino %lu\n", inode->i_ino);
return retry;
}
static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
{
struct super_block * sb;
struct ufs_sb_private_info * uspi;
struct ufs_buffer_head * ind_ubh;
void *ind;
u64 tmp, indirect_block, i, frag_to_free;
unsigned free_count;
int retry;
UFSD("ENTER: ino %lu, offset %llu, p: %p\n",
inode->i_ino, (unsigned long long)offset, p);
BUG_ON(!p);
sb = inode->i_sb;
uspi = UFS_SB(sb)->s_uspi;
frag_to_free = 0;
free_count = 0;
retry = 0;
tmp = ufs_data_ptr_to_cpu(sb, p);
if (!tmp)
return 0;
ind_ubh = ubh_bread(sb, tmp, uspi->s_bsize);
if (tmp != ufs_data_ptr_to_cpu(sb, p)) {
ubh_brelse (ind_ubh);
return 1;
}
if (!ind_ubh) {
ufs_data_ptr_clear(uspi, p);
return 0;
}
indirect_block = (DIRECT_BLOCK > offset) ? (DIRECT_BLOCK - offset) : 0;
for (i = indirect_block; i < uspi->s_apb; i++) {
ind = ubh_get_data_ptr(uspi, ind_ubh, i);
tmp = ufs_data_ptr_to_cpu(sb, ind);
if (!tmp)
continue;
ufs_data_ptr_clear(uspi, ind);
ubh_mark_buffer_dirty(ind_ubh);
if (free_count == 0) {
frag_to_free = tmp;
free_count = uspi->s_fpb;
} else if (free_count > 0 && frag_to_free == tmp - free_count)
free_count += uspi->s_fpb;
else {
ufs_free_blocks (inode, frag_to_free, free_count);
frag_to_free = tmp;
free_count = uspi->s_fpb;
}
mark_inode_dirty(inode);
}
if (free_count > 0) {
ufs_free_blocks (inode, frag_to_free, free_count);
}
for (i = 0; i < uspi->s_apb; i++)
if (!ufs_is_data_ptr_zero(uspi,
ubh_get_data_ptr(uspi, ind_ubh, i)))
break;
if (i >= uspi->s_apb) {
tmp = ufs_data_ptr_to_cpu(sb, p);
ufs_data_ptr_clear(uspi, p);
ufs_free_blocks (inode, tmp, uspi->s_fpb);
mark_inode_dirty(inode);
ubh_bforget(ind_ubh);
ind_ubh = NULL;
}
if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh))
ubh_sync_block(ind_ubh);
ubh_brelse (ind_ubh);
UFSD("EXIT: ino %lu\n", inode->i_ino);
return retry;
}
static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
{
struct super_block * sb;
struct ufs_sb_private_info * uspi;
struct ufs_buffer_head *dind_bh;
u64 i, tmp, dindirect_block;
void *dind;
int retry = 0;
UFSD("ENTER: ino %lu\n", inode->i_ino);
sb = inode->i_sb;
uspi = UFS_SB(sb)->s_uspi;
dindirect_block = (DIRECT_BLOCK > offset)
? ((DIRECT_BLOCK - offset) >> uspi->s_apbshift) : 0;
retry = 0;
tmp = ufs_data_ptr_to_cpu(sb, p);
if (!tmp)
return 0;
dind_bh = ubh_bread(sb, tmp, uspi->s_bsize);
if (tmp != ufs_data_ptr_to_cpu(sb, p)) {
ubh_brelse (dind_bh);
return 1;
}
if (!dind_bh) {
ufs_data_ptr_clear(uspi, p);
return 0;
}
for (i = dindirect_block ; i < uspi->s_apb ; i++) {
dind = ubh_get_data_ptr(uspi, dind_bh, i);
tmp = ufs_data_ptr_to_cpu(sb, dind);
if (!tmp)
continue;
retry |= ufs_trunc_indirect (inode, offset + (i << uspi->s_apbshift), dind);
ubh_mark_buffer_dirty(dind_bh);
}
for (i = 0; i < uspi->s_apb; i++)
if (!ufs_is_data_ptr_zero(uspi,
ubh_get_data_ptr(uspi, dind_bh, i)))
break;
if (i >= uspi->s_apb) {
tmp = ufs_data_ptr_to_cpu(sb, p);
ufs_data_ptr_clear(uspi, p);
ufs_free_blocks(inode, tmp, uspi->s_fpb);
mark_inode_dirty(inode);
ubh_bforget(dind_bh);
dind_bh = NULL;
}
if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh))
ubh_sync_block(dind_bh);
ubh_brelse (dind_bh);
UFSD("EXIT: ino %lu\n", inode->i_ino);
return retry;
}
static int ufs_trunc_tindirect(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
struct ufs_inode_info *ufsi = UFS_I(inode);
struct ufs_buffer_head * tind_bh;
u64 tindirect_block, tmp, i;
void *tind, *p;
int retry;
UFSD("ENTER: ino %lu\n", inode->i_ino);
retry = 0;
tindirect_block = (DIRECT_BLOCK > (UFS_NDADDR + uspi->s_apb + uspi->s_2apb))
? ((DIRECT_BLOCK - UFS_NDADDR - uspi->s_apb - uspi->s_2apb) >> uspi->s_2apbshift) : 0;
p = ufs_get_direct_data_ptr(uspi, ufsi, UFS_TIND_BLOCK);
if (!(tmp = ufs_data_ptr_to_cpu(sb, p)))
return 0;
tind_bh = ubh_bread (sb, tmp, uspi->s_bsize);
if (tmp != ufs_data_ptr_to_cpu(sb, p)) {
ubh_brelse (tind_bh);
return 1;
}
if (!tind_bh) {
ufs_data_ptr_clear(uspi, p);
return 0;
}
for (i = tindirect_block ; i < uspi->s_apb ; i++) {
tind = ubh_get_data_ptr(uspi, tind_bh, i);
retry |= ufs_trunc_dindirect(inode, UFS_NDADDR +
uspi->s_apb + ((i + 1) << uspi->s_2apbshift), tind);
ubh_mark_buffer_dirty(tind_bh);
}
for (i = 0; i < uspi->s_apb; i++)
if (!ufs_is_data_ptr_zero(uspi,
ubh_get_data_ptr(uspi, tind_bh, i)))
break;
if (i >= uspi->s_apb) {
tmp = ufs_data_ptr_to_cpu(sb, p);
ufs_data_ptr_clear(uspi, p);
ufs_free_blocks(inode, tmp, uspi->s_fpb);
mark_inode_dirty(inode);
ubh_bforget(tind_bh);
tind_bh = NULL;
}
if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh))
ubh_sync_block(tind_bh);
ubh_brelse (tind_bh);
UFSD("EXIT: ino %lu\n", inode->i_ino);
return retry;
}
static int ufs_alloc_lastblock(struct inode *inode)
{
int err = 0;
struct super_block *sb = inode->i_sb;
struct address_space *mapping = inode->i_mapping;
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
unsigned i, end;
sector_t lastfrag;
struct page *lastpage;
struct buffer_head *bh;
u64 phys64;
lastfrag = (i_size_read(inode) + uspi->s_fsize - 1) >> uspi->s_fshift;
if (!lastfrag)
goto out;
lastfrag--;
lastpage = ufs_get_locked_page(mapping, lastfrag >>
(PAGE_CACHE_SHIFT - inode->i_blkbits));
if (IS_ERR(lastpage)) {
err = -EIO;
goto out;
}
end = lastfrag & ((1 << (PAGE_CACHE_SHIFT - inode->i_blkbits)) - 1);
bh = page_buffers(lastpage);
for (i = 0; i < end; ++i)
bh = bh->b_this_page;
err = ufs_getfrag_block(inode, lastfrag, bh, 1);
if (unlikely(err))
goto out_unlock;
if (buffer_new(bh)) {
clear_buffer_new(bh);
unmap_underlying_metadata(bh->b_bdev,
bh->b_blocknr);
/*
* we do not zeroize fragment, because of
* if it maped to hole, it already contains zeroes
*/
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
set_page_dirty(lastpage);
}
if (lastfrag >= UFS_IND_FRAGMENT) {
end = uspi->s_fpb - ufs_fragnum(lastfrag) - 1;
phys64 = bh->b_blocknr + 1;
for (i = 0; i < end; ++i) {
bh = sb_getblk(sb, i + phys64);
lock_buffer(bh);
memset(bh->b_data, 0, sb->s_blocksize);
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
unlock_buffer(bh);
sync_dirty_buffer(bh);
brelse(bh);
}
}
out_unlock:
ufs_put_locked_page(lastpage);
out:
return err;
}
int ufs_truncate(struct inode *inode, loff_t old_i_size)
{
struct ufs_inode_info *ufsi = UFS_I(inode);
struct super_block *sb = inode->i_sb;
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
int retry, err = 0;
UFSD("ENTER: ino %lu, i_size: %llu, old_i_size: %llu\n",
inode->i_ino, (unsigned long long)i_size_read(inode),
(unsigned long long)old_i_size);
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)))
return -EINVAL;
if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
return -EPERM;
err = ufs_alloc_lastblock(inode);
if (err) {
i_size_write(inode, old_i_size);
goto out;
}
block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block);
while (1) {
retry = ufs_trunc_direct(inode);
retry |= ufs_trunc_indirect(inode, UFS_IND_BLOCK,
ufs_get_direct_data_ptr(uspi, ufsi,
UFS_IND_BLOCK));
retry |= ufs_trunc_dindirect(inode, UFS_IND_BLOCK + uspi->s_apb,
ufs_get_direct_data_ptr(uspi, ufsi,
UFS_DIND_BLOCK));
retry |= ufs_trunc_tindirect (inode);
if (!retry)
break;
if (IS_SYNC(inode) && (inode->i_state & I_DIRTY))
ufs_sync_inode (inode);
yield();
}
inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
ufsi->i_lastfrag = DIRECT_FRAGMENT;
mark_inode_dirty(inode);
out:
UFSD("EXIT: err %d\n", err);
return err;
}
int ufs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
unsigned int ia_valid = attr->ia_valid;
int error;
error = inode_change_ok(inode, attr);
if (error)
return error;
if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
loff_t old_i_size = inode->i_size;
/* XXX(truncate): truncate_setsize should be called last */
truncate_setsize(inode, attr->ia_size);
lock_ufs(inode->i_sb);
error = ufs_truncate(inode, old_i_size);
unlock_ufs(inode->i_sb);
if (error)
return error;
}
setattr_copy(inode, attr);
mark_inode_dirty(inode);
return 0;
}
const struct inode_operations ufs_file_inode_operations = {
.setattr = ufs_setattr,
};
...@@ -24,8 +24,6 @@ struct ufs_sb_info { ...@@ -24,8 +24,6 @@ struct ufs_sb_info {
unsigned s_cgno[UFS_MAX_GROUP_LOADED]; unsigned s_cgno[UFS_MAX_GROUP_LOADED];
unsigned short s_cg_loaded; unsigned short s_cg_loaded;
unsigned s_mount_opt; unsigned s_mount_opt;
struct mutex mutex;
struct task_struct *mutex_owner;
struct super_block *sb; struct super_block *sb;
int work_queued; /* non-zero if the delayed work is queued */ int work_queued; /* non-zero if the delayed work is queued */
struct delayed_work sync_work; /* FS sync delayed work */ struct delayed_work sync_work; /* FS sync delayed work */
...@@ -46,6 +44,8 @@ struct ufs_inode_info { ...@@ -46,6 +44,8 @@ struct ufs_inode_info {
__u32 i_oeftflag; __u32 i_oeftflag;
__u16 i_osync; __u16 i_osync;
__u64 i_lastfrag; __u64 i_lastfrag;
seqlock_t meta_lock;
struct mutex truncate_mutex;
__u32 i_dir_start_lookup; __u32 i_dir_start_lookup;
struct inode vfs_inode; struct inode vfs_inode;
}; };
...@@ -122,7 +122,7 @@ extern struct inode *ufs_iget(struct super_block *, unsigned long); ...@@ -122,7 +122,7 @@ extern struct inode *ufs_iget(struct super_block *, unsigned long);
extern int ufs_write_inode (struct inode *, struct writeback_control *); extern int ufs_write_inode (struct inode *, struct writeback_control *);
extern int ufs_sync_inode (struct inode *); extern int ufs_sync_inode (struct inode *);
extern void ufs_evict_inode (struct inode *); extern void ufs_evict_inode (struct inode *);
extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create); extern int ufs_setattr(struct dentry *dentry, struct iattr *attr);
/* namei.c */ /* namei.c */
extern const struct file_operations ufs_dir_operations; extern const struct file_operations ufs_dir_operations;
...@@ -140,10 +140,6 @@ void ufs_mark_sb_dirty(struct super_block *sb); ...@@ -140,10 +140,6 @@ void ufs_mark_sb_dirty(struct super_block *sb);
extern const struct inode_operations ufs_fast_symlink_inode_operations; extern const struct inode_operations ufs_fast_symlink_inode_operations;
extern const struct inode_operations ufs_symlink_inode_operations; extern const struct inode_operations ufs_symlink_inode_operations;
/* truncate.c */
extern int ufs_truncate (struct inode *, loff_t);
extern int ufs_setattr(struct dentry *dentry, struct iattr *attr);
static inline struct ufs_sb_info *UFS_SB(struct super_block *sb) static inline struct ufs_sb_info *UFS_SB(struct super_block *sb)
{ {
return sb->s_fs_info; return sb->s_fs_info;
...@@ -170,7 +166,4 @@ static inline u32 ufs_dtogd(struct ufs_sb_private_info * uspi, u64 b) ...@@ -170,7 +166,4 @@ static inline u32 ufs_dtogd(struct ufs_sb_private_info * uspi, u64 b)
return do_div(b, uspi->s_fpg); return do_div(b, uspi->s_fpg);
} }
extern void lock_ufs(struct super_block *sb);
extern void unlock_ufs(struct super_block *sb);
#endif /* _UFS_UFS_H */ #endif /* _UFS_UFS_H */
...@@ -119,8 +119,7 @@ xfs_setfilesize_trans_alloc( ...@@ -119,8 +119,7 @@ xfs_setfilesize_trans_alloc(
* We may pass freeze protection with a transaction. So tell lockdep * We may pass freeze protection with a transaction. So tell lockdep
* we released it. * we released it.
*/ */
rwsem_release(&ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], __sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS);
1, _THIS_IP_);
/* /*
* We hand off the transaction to the completion thread now, so * We hand off the transaction to the completion thread now, so
* clear the flag here. * clear the flag here.
...@@ -171,8 +170,7 @@ xfs_setfilesize_ioend( ...@@ -171,8 +170,7 @@ xfs_setfilesize_ioend(
* Similarly for freeze protection. * Similarly for freeze protection.
*/ */
current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
0, 1, _THIS_IP_);
return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
} }
......
#ifndef _LINUX_FS_H #ifndef _LINUX_FS_H
#define _LINUX_FS_H #define _LINUX_FS_H
#include <linux/linkage.h> #include <linux/linkage.h>
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/kdev_t.h> #include <linux/kdev_t.h>
...@@ -30,6 +29,8 @@ ...@@ -30,6 +29,8 @@
#include <linux/lockdep.h> #include <linux/lockdep.h>
#include <linux/percpu-rwsem.h> #include <linux/percpu-rwsem.h>
#include <linux/blk_types.h> #include <linux/blk_types.h>
#include <linux/workqueue.h>
#include <linux/percpu-rwsem.h>
#include <asm/byteorder.h> #include <asm/byteorder.h>
#include <uapi/linux/fs.h> #include <uapi/linux/fs.h>
...@@ -636,7 +637,7 @@ struct inode { ...@@ -636,7 +637,7 @@ struct inode {
unsigned long dirtied_time_when; unsigned long dirtied_time_when;
struct hlist_node i_hash; struct hlist_node i_hash;
struct list_head i_wb_list; /* backing dev IO list */ struct list_head i_io_list; /* backing dev IO list */
#ifdef CONFIG_CGROUP_WRITEBACK #ifdef CONFIG_CGROUP_WRITEBACK
struct bdi_writeback *i_wb; /* the associated cgroup wb */ struct bdi_writeback *i_wb; /* the associated cgroup wb */
...@@ -1281,16 +1282,9 @@ enum { ...@@ -1281,16 +1282,9 @@ enum {
#define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1) #define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
struct sb_writers { struct sb_writers {
/* Counters for counting writers at each level */
struct percpu_counter counter[SB_FREEZE_LEVELS];
wait_queue_head_t wait; /* queue for waiting for
writers / faults to finish */
int frozen; /* Is sb frozen? */ int frozen; /* Is sb frozen? */
wait_queue_head_t wait_unfrozen; /* queue for waiting for wait_queue_head_t wait_unfrozen; /* for get_super_thawed() */
sb to be thawed */ struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS];
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map lock_map[SB_FREEZE_LEVELS];
#endif
}; };
struct super_block { struct super_block {
...@@ -1316,7 +1310,6 @@ struct super_block { ...@@ -1316,7 +1310,6 @@ struct super_block {
#endif #endif
const struct xattr_handler **s_xattr; const struct xattr_handler **s_xattr;
struct list_head s_inodes; /* all inodes */
struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */ struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */
struct list_head s_mounts; /* list of mounts; _not_ for fs use */ struct list_head s_mounts; /* list of mounts; _not_ for fs use */
struct block_device *s_bdev; struct block_device *s_bdev;
...@@ -1382,11 +1375,18 @@ struct super_block { ...@@ -1382,11 +1375,18 @@ struct super_block {
struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; struct list_lru s_dentry_lru ____cacheline_aligned_in_smp;
struct list_lru s_inode_lru ____cacheline_aligned_in_smp; struct list_lru s_inode_lru ____cacheline_aligned_in_smp;
struct rcu_head rcu; struct rcu_head rcu;
struct work_struct destroy_work;
struct mutex s_sync_lock; /* sync serialisation lock */
/* /*
* Indicates how deep in a filesystem stack this SB is * Indicates how deep in a filesystem stack this SB is
*/ */
int s_stack_depth; int s_stack_depth;
/* s_inode_list_lock protects s_inodes */
spinlock_t s_inode_list_lock ____cacheline_aligned_in_smp;
struct list_head s_inodes; /* all inodes */
}; };
extern struct timespec current_fs_time(struct super_block *sb); extern struct timespec current_fs_time(struct super_block *sb);
...@@ -1398,6 +1398,11 @@ extern struct timespec current_fs_time(struct super_block *sb); ...@@ -1398,6 +1398,11 @@ extern struct timespec current_fs_time(struct super_block *sb);
void __sb_end_write(struct super_block *sb, int level); void __sb_end_write(struct super_block *sb, int level);
int __sb_start_write(struct super_block *sb, int level, bool wait); int __sb_start_write(struct super_block *sb, int level, bool wait);
#define __sb_writers_acquired(sb, lev) \
percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
#define __sb_writers_release(sb, lev) \
percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
/** /**
* sb_end_write - drop write access to a superblock * sb_end_write - drop write access to a superblock
* @sb: the super we wrote to * @sb: the super we wrote to
...@@ -2614,7 +2619,7 @@ static inline void insert_inode_hash(struct inode *inode) ...@@ -2614,7 +2619,7 @@ static inline void insert_inode_hash(struct inode *inode)
extern void __remove_inode_hash(struct inode *); extern void __remove_inode_hash(struct inode *);
static inline void remove_inode_hash(struct inode *inode) static inline void remove_inode_hash(struct inode *inode)
{ {
if (!inode_unhashed(inode)) if (!inode_unhashed(inode) && !hlist_fake(&inode->i_hash))
__remove_inode_hash(inode); __remove_inode_hash(inode);
} }
......
...@@ -368,7 +368,7 @@ extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, un ...@@ -368,7 +368,7 @@ extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, un
extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group); extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group);
extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_get_mark(struct fsnotify_mark *mark);
extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark);
extern void fsnotify_unmount_inodes(struct list_head *list); extern void fsnotify_unmount_inodes(struct super_block *sb);
/* put here because inotify does some weird stuff when destroying watches */ /* put here because inotify does some weird stuff when destroying watches */
extern void fsnotify_init_event(struct fsnotify_event *event, extern void fsnotify_init_event(struct fsnotify_event *event,
...@@ -404,7 +404,7 @@ static inline u32 fsnotify_get_cookie(void) ...@@ -404,7 +404,7 @@ static inline u32 fsnotify_get_cookie(void)
return 0; return 0;
} }
static inline void fsnotify_unmount_inodes(struct list_head *list) static inline void fsnotify_unmount_inodes(struct super_block *sb)
{} {}
#endif /* CONFIG_FSNOTIFY */ #endif /* CONFIG_FSNOTIFY */
......
...@@ -672,6 +672,11 @@ static inline void hlist_add_fake(struct hlist_node *n) ...@@ -672,6 +672,11 @@ static inline void hlist_add_fake(struct hlist_node *n)
n->pprev = &n->next; n->pprev = &n->next;
} }
static inline bool hlist_fake(struct hlist_node *h)
{
return h->pprev == &h->next;
}
/* /*
* Move a list from one list head to another. Fixup the pprev * Move a list from one list head to another. Fixup the pprev
* reference of the first entry if it exists. * reference of the first entry if it exists.
......
...@@ -16,6 +16,7 @@ struct percpu_rw_semaphore { ...@@ -16,6 +16,7 @@ struct percpu_rw_semaphore {
}; };
extern void percpu_down_read(struct percpu_rw_semaphore *); extern void percpu_down_read(struct percpu_rw_semaphore *);
extern int percpu_down_read_trylock(struct percpu_rw_semaphore *);
extern void percpu_up_read(struct percpu_rw_semaphore *); extern void percpu_up_read(struct percpu_rw_semaphore *);
extern void percpu_down_write(struct percpu_rw_semaphore *); extern void percpu_down_write(struct percpu_rw_semaphore *);
...@@ -31,4 +32,23 @@ extern void percpu_free_rwsem(struct percpu_rw_semaphore *); ...@@ -31,4 +32,23 @@ extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
__percpu_init_rwsem(brw, #brw, &rwsem_key); \ __percpu_init_rwsem(brw, #brw, &rwsem_key); \
}) })
#define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem)
static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
bool read, unsigned long ip)
{
lock_release(&sem->rw_sem.dep_map, 1, ip);
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
if (!read)
sem->rw_sem.owner = NULL;
#endif
}
static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem,
bool read, unsigned long ip)
{
lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip);
}
#endif #endif
...@@ -927,7 +927,6 @@ config NUMA_BALANCING_DEFAULT_ENABLED ...@@ -927,7 +927,6 @@ config NUMA_BALANCING_DEFAULT_ENABLED
menuconfig CGROUPS menuconfig CGROUPS
bool "Control Group support" bool "Control Group support"
select KERNFS select KERNFS
select PERCPU_RWSEM
help help
This option adds support for grouping sets of processes together, for This option adds support for grouping sets of processes together, for
use with process control subsystems such as Cpusets, CFS, memory use with process control subsystems such as Cpusets, CFS, memory
......
obj-y += mutex.o semaphore.o rwsem.o obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
ifdef CONFIG_FUNCTION_TRACER ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
...@@ -24,6 +24,5 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o ...@@ -24,6 +24,5 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
...@@ -88,6 +88,19 @@ void percpu_down_read(struct percpu_rw_semaphore *brw) ...@@ -88,6 +88,19 @@ void percpu_down_read(struct percpu_rw_semaphore *brw)
__up_read(&brw->rw_sem); __up_read(&brw->rw_sem);
} }
int percpu_down_read_trylock(struct percpu_rw_semaphore *brw)
{
if (unlikely(!update_fast_ctr(brw, +1))) {
if (!__down_read_trylock(&brw->rw_sem))
return 0;
atomic_inc(&brw->slow_read_ctr);
__up_read(&brw->rw_sem);
}
rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 1, _RET_IP_);
return 1;
}
void percpu_up_read(struct percpu_rw_semaphore *brw) void percpu_up_read(struct percpu_rw_semaphore *brw)
{ {
rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_); rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_);
......
...@@ -53,9 +53,6 @@ config GENERIC_IO ...@@ -53,9 +53,6 @@ config GENERIC_IO
config STMP_DEVICE config STMP_DEVICE
bool bool
config PERCPU_RWSEM
bool
config ARCH_USE_CMPXCHG_LOCKREF config ARCH_USE_CMPXCHG_LOCKREF
bool bool
......
...@@ -55,13 +55,13 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) ...@@ -55,13 +55,13 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0; nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0;
spin_lock(&wb->list_lock); spin_lock(&wb->list_lock);
list_for_each_entry(inode, &wb->b_dirty, i_wb_list) list_for_each_entry(inode, &wb->b_dirty, i_io_list)
nr_dirty++; nr_dirty++;
list_for_each_entry(inode, &wb->b_io, i_wb_list) list_for_each_entry(inode, &wb->b_io, i_io_list)
nr_io++; nr_io++;
list_for_each_entry(inode, &wb->b_more_io, i_wb_list) list_for_each_entry(inode, &wb->b_more_io, i_io_list)
nr_more_io++; nr_more_io++;
list_for_each_entry(inode, &wb->b_dirty_time, i_wb_list) list_for_each_entry(inode, &wb->b_dirty_time, i_io_list)
if (inode->i_state & I_DIRTY_TIME) if (inode->i_state & I_DIRTY_TIME)
nr_dirty_time++; nr_dirty_time++;
spin_unlock(&wb->list_lock); spin_unlock(&wb->list_lock);
......
...@@ -2143,11 +2143,10 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) ...@@ -2143,11 +2143,10 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
if (S_ISBLK(inode->i_mode)) { if (S_ISBLK(inode->i_mode)) {
p->bdev = bdgrab(I_BDEV(inode)); p->bdev = bdgrab(I_BDEV(inode));
error = blkdev_get(p->bdev, error = blkdev_get(p->bdev,
FMODE_READ | FMODE_WRITE | FMODE_EXCL, FMODE_READ | FMODE_WRITE | FMODE_EXCL, p);
sys_swapon);
if (error < 0) { if (error < 0) {
p->bdev = NULL; p->bdev = NULL;
return -EINVAL; return error;
} }
p->old_block_size = block_size(p->bdev); p->old_block_size = block_size(p->bdev);
error = set_blocksize(p->bdev, PAGE_SIZE); error = set_blocksize(p->bdev, PAGE_SIZE);
...@@ -2348,7 +2347,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) ...@@ -2348,7 +2347,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
struct filename *name; struct filename *name;
struct file *swap_file = NULL; struct file *swap_file = NULL;
struct address_space *mapping; struct address_space *mapping;
int i;
int prio; int prio;
int error; int error;
union swap_header *swap_header; union swap_header *swap_header;
...@@ -2388,19 +2386,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) ...@@ -2388,19 +2386,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
p->swap_file = swap_file; p->swap_file = swap_file;
mapping = swap_file->f_mapping; mapping = swap_file->f_mapping;
for (i = 0; i < nr_swapfiles; i++) {
struct swap_info_struct *q = swap_info[i];
if (q == p || !q->swap_file)
continue;
if (mapping == q->swap_file->f_mapping) {
error = -EBUSY;
goto bad_swap;
}
}
inode = mapping->host; inode = mapping->host;
/* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */ /* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */
error = claim_swapfile(p, inode); error = claim_swapfile(p, inode);
if (unlikely(error)) if (unlikely(error))
...@@ -2433,6 +2420,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) ...@@ -2433,6 +2420,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
goto bad_swap; goto bad_swap;
} }
if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) { if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
int cpu;
p->flags |= SWP_SOLIDSTATE; p->flags |= SWP_SOLIDSTATE;
/* /*
* select a random position to start with to help wear leveling * select a random position to start with to help wear leveling
...@@ -2451,9 +2440,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) ...@@ -2451,9 +2440,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = -ENOMEM; error = -ENOMEM;
goto bad_swap; goto bad_swap;
} }
for_each_possible_cpu(i) { for_each_possible_cpu(cpu) {
struct percpu_cluster *cluster; struct percpu_cluster *cluster;
cluster = per_cpu_ptr(p->percpu_cluster, i); cluster = per_cpu_ptr(p->percpu_cluster, cpu);
cluster_set_null(&cluster->index); cluster_set_null(&cluster->index);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment