Commit 847f56eb authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xfs-for-linus-3.16-rc5' of git://oss.sgi.com/xfs/xfs

Pull xfs fixes from Dave Chinner:
 "Fixes for low memory perforamnce regressions and a quota inode
  handling regression.

  These are regression fixes for issues recently introduced - the change
  in the stack switch location is fairly important, so I've held off
  sending this update until I was sure that it still addresses the stack
  usage problem the original solved.  So while the commits in the xfs
  tree are recent, it has been under tested for several weeks now"

* tag 'xfs-for-linus-3.16-rc5' of git://oss.sgi.com/xfs/xfs:
  xfs: null unused quota inodes when quota is on
  xfs: refine the allocation stack switch
  Revert "xfs: block allocation work needs to be kswapd aware"
parents 59ca9ee4 03e01349
......@@ -4298,8 +4298,8 @@ xfs_bmapi_delay(
}
int
__xfs_bmapi_allocate(
static int
xfs_bmapi_allocate(
struct xfs_bmalloca *bma)
{
struct xfs_mount *mp = bma->ip->i_mount;
......@@ -4578,9 +4578,6 @@ xfs_bmapi_write(
bma.flist = flist;
bma.firstblock = firstblock;
if (flags & XFS_BMAPI_STACK_SWITCH)
bma.stack_switch = 1;
while (bno < end && n < *nmap) {
inhole = eof || bma.got.br_startoff > bno;
wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
......
......@@ -77,7 +77,6 @@ typedef struct xfs_bmap_free
* from written to unwritten, otherwise convert from unwritten to written.
*/
#define XFS_BMAPI_CONVERT 0x040
#define XFS_BMAPI_STACK_SWITCH 0x080
#define XFS_BMAPI_FLAGS \
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
......@@ -86,8 +85,7 @@ typedef struct xfs_bmap_free
{ XFS_BMAPI_PREALLOC, "PREALLOC" }, \
{ XFS_BMAPI_IGSTATE, "IGSTATE" }, \
{ XFS_BMAPI_CONTIG, "CONTIG" }, \
{ XFS_BMAPI_CONVERT, "CONVERT" }, \
{ XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" }
{ XFS_BMAPI_CONVERT, "CONVERT" }
static inline int xfs_bmapi_aflag(int w)
......
......@@ -248,59 +248,6 @@ xfs_bmap_rtalloc(
return 0;
}
/*
* Stack switching interfaces for allocation
*/
static void
xfs_bmapi_allocate_worker(
struct work_struct *work)
{
struct xfs_bmalloca *args = container_of(work,
struct xfs_bmalloca, work);
unsigned long pflags;
unsigned long new_pflags = PF_FSTRANS;
/*
* we are in a transaction context here, but may also be doing work
* in kswapd context, and hence we may need to inherit that state
* temporarily to ensure that we don't block waiting for memory reclaim
* in any way.
*/
if (args->kswapd)
new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
current_set_flags_nested(&pflags, new_pflags);
args->result = __xfs_bmapi_allocate(args);
complete(args->done);
current_restore_flags_nested(&pflags, new_pflags);
}
/*
* Some allocation requests often come in with little stack to work on. Push
* them off to a worker thread so there is lots of stack to use. Otherwise just
* call directly to avoid the context switch overhead here.
*/
int
xfs_bmapi_allocate(
struct xfs_bmalloca *args)
{
DECLARE_COMPLETION_ONSTACK(done);
if (!args->stack_switch)
return __xfs_bmapi_allocate(args);
args->done = &done;
args->kswapd = current_is_kswapd();
INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
queue_work(xfs_alloc_wq, &args->work);
wait_for_completion(&done);
destroy_work_on_stack(&args->work);
return args->result;
}
/*
* Check if the endoff is outside the last extent. If so the caller will grow
* the allocation to a stripe unit boundary. All offsets are considered outside
......
......@@ -55,8 +55,6 @@ struct xfs_bmalloca {
bool userdata;/* set if is user data */
bool aeof; /* allocated space at eof */
bool conv; /* overwriting unwritten extents */
bool stack_switch;
bool kswapd; /* allocation in kswapd context */
int flags;
struct completion *done;
struct work_struct work;
......@@ -66,8 +64,6 @@ struct xfs_bmalloca {
int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
int *committed);
int xfs_bmap_rtalloc(struct xfs_bmalloca *ap);
int xfs_bmapi_allocate(struct xfs_bmalloca *args);
int __xfs_bmapi_allocate(struct xfs_bmalloca *args);
int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
int whichfork, int *eof);
int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
......
......@@ -33,6 +33,7 @@
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_cksum.h"
#include "xfs_alloc.h"
/*
* Cursor allocation zone.
......@@ -2323,7 +2324,7 @@ xfs_btree_rshift(
* record (to be inserted into parent).
*/
STATIC int /* error */
xfs_btree_split(
__xfs_btree_split(
struct xfs_btree_cur *cur,
int level,
union xfs_btree_ptr *ptrp,
......@@ -2503,6 +2504,85 @@ xfs_btree_split(
return error;
}
struct xfs_btree_split_args {
struct xfs_btree_cur *cur;
int level;
union xfs_btree_ptr *ptrp;
union xfs_btree_key *key;
struct xfs_btree_cur **curp;
int *stat; /* success/failure */
int result;
bool kswapd; /* allocation in kswapd context */
struct completion *done;
struct work_struct work;
};
/*
* Stack switching interfaces for allocation
*/
static void
xfs_btree_split_worker(
struct work_struct *work)
{
struct xfs_btree_split_args *args = container_of(work,
struct xfs_btree_split_args, work);
unsigned long pflags;
unsigned long new_pflags = PF_FSTRANS;
/*
* we are in a transaction context here, but may also be doing work
* in kswapd context, and hence we may need to inherit that state
* temporarily to ensure that we don't block waiting for memory reclaim
* in any way.
*/
if (args->kswapd)
new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
current_set_flags_nested(&pflags, new_pflags);
args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
args->key, args->curp, args->stat);
complete(args->done);
current_restore_flags_nested(&pflags, new_pflags);
}
/*
* BMBT split requests often come in with little stack to work on. Push
* them off to a worker thread so there is lots of stack to use. For the other
* btree types, just call directly to avoid the context switch overhead here.
*/
STATIC int /* error */
xfs_btree_split(
struct xfs_btree_cur *cur,
int level,
union xfs_btree_ptr *ptrp,
union xfs_btree_key *key,
struct xfs_btree_cur **curp,
int *stat) /* success/failure */
{
struct xfs_btree_split_args args;
DECLARE_COMPLETION_ONSTACK(done);
if (cur->bc_btnum != XFS_BTNUM_BMAP)
return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
args.cur = cur;
args.level = level;
args.ptrp = ptrp;
args.key = key;
args.curp = curp;
args.stat = stat;
args.done = &done;
args.kswapd = current_is_kswapd();
INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
queue_work(xfs_alloc_wq, &args.work);
wait_for_completion(&done);
destroy_work_on_stack(&args.work);
return args.result;
}
/*
* Copy the old inode root contents into a real block and make the
* broot point to it.
......
......@@ -749,8 +749,7 @@ xfs_iomap_write_allocate(
* pointer that the caller gave to us.
*/
error = xfs_bmapi_write(tp, ip, map_start_fsb,
count_fsb,
XFS_BMAPI_STACK_SWITCH,
count_fsb, 0,
&first_block, 1,
imap, &nimaps, &free_list);
if (error)
......
......@@ -483,10 +483,16 @@ xfs_sb_quota_to_disk(
}
/*
* GQUOTINO and PQUOTINO cannot be used together in versions
* of superblock that do not have pquotino. from->sb_flags
* tells us which quota is active and should be copied to
* disk.
* GQUOTINO and PQUOTINO cannot be used together in versions of
* superblock that do not have pquotino. from->sb_flags tells us which
* quota is active and should be copied to disk. If neither are active,
* make sure we write NULLFSINO to the sb_gquotino field as a quota
* inode value of "0" is invalid when the XFS_SB_VERSION_QUOTA feature
* bit is set.
*
* Note that we don't need to handle the sb_uquotino or sb_pquotino here
* as they do not require any translation. Hence the main sb field loop
* will write them appropriately from the in-core superblock.
*/
if ((*fields & XFS_SB_GQUOTINO) &&
(from->sb_qflags & XFS_GQUOTA_ACCT))
......@@ -494,6 +500,17 @@ xfs_sb_quota_to_disk(
else if ((*fields & XFS_SB_PQUOTINO) &&
(from->sb_qflags & XFS_PQUOTA_ACCT))
to->sb_gquotino = cpu_to_be64(from->sb_pquotino);
else {
/*
* We can't rely on just the fields being logged to tell us
* that it is safe to write NULLFSINO - we should only do that
* if quotas are not actually enabled. Hence only write
* NULLFSINO if both in-core quota inodes are NULL.
*/
if (from->sb_gquotino == NULLFSINO &&
from->sb_pquotino == NULLFSINO)
to->sb_gquotino = cpu_to_be64(NULLFSINO);
}
*fields &= ~(XFS_SB_PQUOTINO | XFS_SB_GQUOTINO);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment