Commit 4dd3c2e5 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfsd-4.15' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "Lots of good bugfixes, including:

   -  fix a number of races in the NFSv4+ state code

   -  fix some shutdown crashes in multiple-network-namespace cases

   -  relax our 4.1 session limits; if you've an artificially low limit
      to the number of 4.1 clients that can mount simultaneously, try
      upgrading"

* tag 'nfsd-4.15' of git://linux-nfs.org/~bfields/linux: (22 commits)
  SUNRPC: Improve ordering of transport processing
  nfsd: deal with revoked delegations appropriately
  svcrdma: Enqueue after setting XPT_CLOSE in completion handlers
  nfsd: use nfs->ns.inum as net ID
  rpc: remove some BUG()s
  svcrdma: Preserve CB send buffer across retransmits
  nfds: avoid gettimeofday for nfssvc_boot time
  fs, nfsd: convert nfs4_file.fi_ref from atomic_t to refcount_t
  fs, nfsd: convert nfs4_cntl_odstate.co_odcount from atomic_t to refcount_t
  fs, nfsd: convert nfs4_stid.sc_count from atomic_t to refcount_t
  lockd: double unregister of inetaddr notifiers
  nfsd4: catch some false session retries
  nfsd4: fix cached replies to solo SEQUENCE compounds
  sunrcp: make function _svc_create_xprt static
  SUNRPC: Fix tracepoint storage issues with svc_recv and svc_rqst_status
  nfsd: use ARRAY_SIZE
  nfsd: give out fewer session slots as limit approaches
  nfsd: increase DRC cache limit
  nfsd: remove unnecessary nofilehandle checks
  nfs_common: convert int to bool
  ...
parents 07c455ee 22700f3c
...@@ -369,6 +369,7 @@ static int lockd_start_svc(struct svc_serv *serv) ...@@ -369,6 +369,7 @@ static int lockd_start_svc(struct svc_serv *serv)
printk(KERN_WARNING printk(KERN_WARNING
"lockd_up: svc_rqst allocation failed, error=%d\n", "lockd_up: svc_rqst allocation failed, error=%d\n",
error); error);
lockd_unregister_notifiers();
goto out_rqst; goto out_rqst;
} }
...@@ -459,13 +460,16 @@ int lockd_up(struct net *net) ...@@ -459,13 +460,16 @@ int lockd_up(struct net *net)
} }
error = lockd_up_net(serv, net); error = lockd_up_net(serv, net);
if (error < 0) if (error < 0) {
goto err_net; lockd_unregister_notifiers();
goto err_put;
}
error = lockd_start_svc(serv); error = lockd_start_svc(serv);
if (error < 0) if (error < 0) {
goto err_start; lockd_down_net(serv, net);
goto err_put;
}
nlmsvc_users++; nlmsvc_users++;
/* /*
* Note: svc_serv structures have an initial use count of 1, * Note: svc_serv structures have an initial use count of 1,
...@@ -476,12 +480,6 @@ int lockd_up(struct net *net) ...@@ -476,12 +480,6 @@ int lockd_up(struct net *net)
err_create: err_create:
mutex_unlock(&nlmsvc_mutex); mutex_unlock(&nlmsvc_mutex);
return error; return error;
err_start:
lockd_down_net(serv, net);
err_net:
lockd_unregister_notifiers();
goto err_put;
} }
EXPORT_SYMBOL_GPL(lockd_up); EXPORT_SYMBOL_GPL(lockd_up);
......
...@@ -55,14 +55,7 @@ locks_end_grace(struct lock_manager *lm) ...@@ -55,14 +55,7 @@ locks_end_grace(struct lock_manager *lm)
} }
EXPORT_SYMBOL_GPL(locks_end_grace); EXPORT_SYMBOL_GPL(locks_end_grace);
/** static bool
* locks_in_grace
*
* Lock managers call this function to determine when it is OK for them
* to answer ordinary lock requests, and when they should accept only
* lock reclaims.
*/
int
__state_in_grace(struct net *net, bool open) __state_in_grace(struct net *net, bool open)
{ {
struct list_head *grace_list = net_generic(net, grace_net_id); struct list_head *grace_list = net_generic(net, grace_net_id);
...@@ -78,15 +71,22 @@ __state_in_grace(struct net *net, bool open) ...@@ -78,15 +71,22 @@ __state_in_grace(struct net *net, bool open)
return false; return false;
} }
int locks_in_grace(struct net *net) /**
* locks_in_grace
*
* Lock managers call this function to determine when it is OK for them
* to answer ordinary lock requests, and when they should accept only
* lock reclaims.
*/
bool locks_in_grace(struct net *net)
{ {
return __state_in_grace(net, 0); return __state_in_grace(net, false);
} }
EXPORT_SYMBOL_GPL(locks_in_grace); EXPORT_SYMBOL_GPL(locks_in_grace);
int opens_in_grace(struct net *net) bool opens_in_grace(struct net *net)
{ {
return __state_in_grace(net, 1); return __state_in_grace(net, true);
} }
EXPORT_SYMBOL_GPL(opens_in_grace); EXPORT_SYMBOL_GPL(opens_in_grace);
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <linux/nsproxy.h> #include <linux/nsproxy.h>
#include <linux/sunrpc/addr.h> #include <linux/sunrpc/addr.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/kernel.h>
#include "state.h" #include "state.h"
#include "netns.h" #include "netns.h"
...@@ -126,8 +127,6 @@ static struct nfsd_fault_inject_op inject_ops[] = { ...@@ -126,8 +127,6 @@ static struct nfsd_fault_inject_op inject_ops[] = {
}, },
}; };
#define NUM_INJECT_OPS (sizeof(inject_ops)/sizeof(struct nfsd_fault_inject_op))
int nfsd_fault_inject_init(void) int nfsd_fault_inject_init(void)
{ {
unsigned int i; unsigned int i;
...@@ -138,7 +137,7 @@ int nfsd_fault_inject_init(void) ...@@ -138,7 +137,7 @@ int nfsd_fault_inject_init(void)
if (!debug_dir) if (!debug_dir)
goto fail; goto fail;
for (i = 0; i < NUM_INJECT_OPS; i++) { for (i = 0; i < ARRAY_SIZE(inject_ops); i++) {
op = &inject_ops[i]; op = &inject_ops[i];
if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd)) if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd))
goto fail; goto fail;
......
...@@ -107,7 +107,7 @@ struct nfsd_net { ...@@ -107,7 +107,7 @@ struct nfsd_net {
bool lockd_up; bool lockd_up;
/* Time of server startup */ /* Time of server startup */
struct timeval nfssvc_boot; struct timespec64 nfssvc_boot;
/* /*
* Max number of connections this nfsd container will allow. Defaults * Max number of connections this nfsd container will allow. Defaults
......
...@@ -748,8 +748,9 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p) ...@@ -748,8 +748,9 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
if (resp->status == 0) { if (resp->status == 0) {
*p++ = htonl(resp->count); *p++ = htonl(resp->count);
*p++ = htonl(resp->committed); *p++ = htonl(resp->committed);
*p++ = htonl(nn->nfssvc_boot.tv_sec); /* unique identifier, y2038 overflow can be ignored */
*p++ = htonl(nn->nfssvc_boot.tv_usec); *p++ = htonl((u32)nn->nfssvc_boot.tv_sec);
*p++ = htonl(nn->nfssvc_boot.tv_nsec);
} }
return xdr_ressize_check(rqstp, p); return xdr_ressize_check(rqstp, p);
} }
...@@ -1119,8 +1120,9 @@ nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p) ...@@ -1119,8 +1120,9 @@ nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p)
p = encode_wcc_data(rqstp, p, &resp->fh); p = encode_wcc_data(rqstp, p, &resp->fh);
/* Write verifier */ /* Write verifier */
if (resp->status == 0) { if (resp->status == 0) {
*p++ = htonl(nn->nfssvc_boot.tv_sec); /* unique identifier, y2038 overflow can be ignored */
*p++ = htonl(nn->nfssvc_boot.tv_usec); *p++ = htonl((u32)nn->nfssvc_boot.tv_sec);
*p++ = htonl(nn->nfssvc_boot.tv_nsec);
} }
return xdr_ressize_check(rqstp, p); return xdr_ressize_check(rqstp, p);
} }
......
...@@ -336,7 +336,7 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls) ...@@ -336,7 +336,7 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls)
trace_layout_recall(&ls->ls_stid.sc_stateid); trace_layout_recall(&ls->ls_stid.sc_stateid);
atomic_inc(&ls->ls_stid.sc_count); refcount_inc(&ls->ls_stid.sc_count);
nfsd4_run_cb(&ls->ls_recall); nfsd4_run_cb(&ls->ls_recall);
out_unlock: out_unlock:
...@@ -441,7 +441,7 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls) ...@@ -441,7 +441,7 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls)
goto done; goto done;
} }
atomic_inc(&ls->ls_stid.sc_count); refcount_inc(&ls->ls_stid.sc_count);
list_add_tail(&new->lo_perstate, &ls->ls_layouts); list_add_tail(&new->lo_perstate, &ls->ls_layouts);
new = NULL; new = NULL;
done: done:
......
...@@ -485,9 +485,6 @@ static __be32 ...@@ -485,9 +485,6 @@ static __be32
nfsd4_getfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_getfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u) union nfsd4_op_u *u)
{ {
if (!cstate->current_fh.fh_dentry)
return nfserr_nofilehandle;
u->getfh = &cstate->current_fh; u->getfh = &cstate->current_fh;
return nfs_ok; return nfs_ok;
} }
...@@ -535,9 +532,6 @@ static __be32 ...@@ -535,9 +532,6 @@ static __be32
nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u) union nfsd4_op_u *u)
{ {
if (!cstate->current_fh.fh_dentry)
return nfserr_nofilehandle;
fh_dup2(&cstate->save_fh, &cstate->current_fh); fh_dup2(&cstate->save_fh, &cstate->current_fh);
if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG)) { if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG)) {
memcpy(&cstate->save_stateid, &cstate->current_stateid, sizeof(stateid_t)); memcpy(&cstate->save_stateid, &cstate->current_stateid, sizeof(stateid_t));
...@@ -570,10 +564,11 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net) ...@@ -570,10 +564,11 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
/* /*
* This is opaque to client, so no need to byte-swap. Use * This is opaque to client, so no need to byte-swap. Use
* __force to keep sparse happy * __force to keep sparse happy. y2038 time_t overflow is
* irrelevant in this usage.
*/ */
verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec; verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
verf[1] = (__force __be32)nn->nfssvc_boot.tv_usec; verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
memcpy(verifier->data, verf, sizeof(verifier->data)); memcpy(verifier->data, verf, sizeof(verifier->data));
} }
...@@ -703,10 +698,8 @@ nfsd4_link(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ...@@ -703,10 +698,8 @@ nfsd4_link(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u) union nfsd4_op_u *u)
{ {
struct nfsd4_link *link = &u->link; struct nfsd4_link *link = &u->link;
__be32 status = nfserr_nofilehandle; __be32 status;
if (!cstate->save_fh.fh_dentry)
return status;
status = nfsd_link(rqstp, &cstate->current_fh, status = nfsd_link(rqstp, &cstate->current_fh,
link->li_name, link->li_namelen, &cstate->save_fh); link->li_name, link->li_namelen, &cstate->save_fh);
if (!status) if (!status)
...@@ -850,10 +843,8 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ...@@ -850,10 +843,8 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u) union nfsd4_op_u *u)
{ {
struct nfsd4_rename *rename = &u->rename; struct nfsd4_rename *rename = &u->rename;
__be32 status = nfserr_nofilehandle; __be32 status;
if (!cstate->save_fh.fh_dentry)
return status;
if (opens_in_grace(SVC_NET(rqstp)) && if (opens_in_grace(SVC_NET(rqstp)) &&
!(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK)) !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK))
return nfserr_grace; return nfserr_grace;
......
This diff is collapsed.
...@@ -447,7 +447,7 @@ void nfsd_reset_versions(void) ...@@ -447,7 +447,7 @@ void nfsd_reset_versions(void)
*/ */
static void set_max_drc(void) static void set_max_drc(void)
{ {
#define NFSD_DRC_SIZE_SHIFT 10 #define NFSD_DRC_SIZE_SHIFT 7
nfsd_drc_max_mem = (nr_free_buffer_pages() nfsd_drc_max_mem = (nr_free_buffer_pages()
>> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
nfsd_drc_mem_used = 0; nfsd_drc_mem_used = 0;
...@@ -517,7 +517,7 @@ int nfsd_create_serv(struct net *net) ...@@ -517,7 +517,7 @@ int nfsd_create_serv(struct net *net)
register_inet6addr_notifier(&nfsd_inet6addr_notifier); register_inet6addr_notifier(&nfsd_inet6addr_notifier);
#endif #endif
} }
do_gettimeofday(&nn->nfssvc_boot); /* record boot time */ ktime_get_real_ts64(&nn->nfssvc_boot); /* record boot time */
return 0; return 0;
} }
......
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#define _NFSD4_STATE_H #define _NFSD4_STATE_H
#include <linux/idr.h> #include <linux/idr.h>
#include <linux/refcount.h>
#include <linux/sunrpc/svc_xprt.h> #include <linux/sunrpc/svc_xprt.h>
#include "nfsfh.h" #include "nfsfh.h"
...@@ -83,7 +84,7 @@ struct nfsd4_callback_ops { ...@@ -83,7 +84,7 @@ struct nfsd4_callback_ops {
* fields that are of general use to any stateid. * fields that are of general use to any stateid.
*/ */
struct nfs4_stid { struct nfs4_stid {
atomic_t sc_count; refcount_t sc_count;
#define NFS4_OPEN_STID 1 #define NFS4_OPEN_STID 1
#define NFS4_LOCK_STID 2 #define NFS4_LOCK_STID 2
#define NFS4_DELEG_STID 4 #define NFS4_DELEG_STID 4
...@@ -169,11 +170,13 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) ...@@ -169,11 +170,13 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
struct nfsd4_slot { struct nfsd4_slot {
u32 sl_seqid; u32 sl_seqid;
__be32 sl_status; __be32 sl_status;
struct svc_cred sl_cred;
u32 sl_datalen; u32 sl_datalen;
u16 sl_opcnt; u16 sl_opcnt;
#define NFSD4_SLOT_INUSE (1 << 0) #define NFSD4_SLOT_INUSE (1 << 0)
#define NFSD4_SLOT_CACHETHIS (1 << 1) #define NFSD4_SLOT_CACHETHIS (1 << 1)
#define NFSD4_SLOT_INITIALIZED (1 << 2) #define NFSD4_SLOT_INITIALIZED (1 << 2)
#define NFSD4_SLOT_CACHED (1 << 3)
u8 sl_flags; u8 sl_flags;
char sl_data[]; char sl_data[];
}; };
...@@ -465,7 +468,7 @@ struct nfs4_clnt_odstate { ...@@ -465,7 +468,7 @@ struct nfs4_clnt_odstate {
struct nfs4_client *co_client; struct nfs4_client *co_client;
struct nfs4_file *co_file; struct nfs4_file *co_file;
struct list_head co_perfile; struct list_head co_perfile;
atomic_t co_odcount; refcount_t co_odcount;
}; };
/* /*
...@@ -481,7 +484,7 @@ struct nfs4_clnt_odstate { ...@@ -481,7 +484,7 @@ struct nfs4_clnt_odstate {
* the global state_lock spinlock. * the global state_lock spinlock.
*/ */
struct nfs4_file { struct nfs4_file {
atomic_t fi_ref; refcount_t fi_ref;
spinlock_t fi_lock; spinlock_t fi_lock;
struct hlist_node fi_hash; /* hash on fi_fhandle */ struct hlist_node fi_hash; /* hash on fi_fhandle */
struct list_head fi_stateids; struct list_head fi_stateids;
...@@ -634,7 +637,7 @@ struct nfs4_file *find_file(struct knfsd_fh *fh); ...@@ -634,7 +637,7 @@ struct nfs4_file *find_file(struct knfsd_fh *fh);
void put_nfs4_file(struct nfs4_file *fi); void put_nfs4_file(struct nfs4_file *fi);
static inline void get_nfs4_file(struct nfs4_file *fi) static inline void get_nfs4_file(struct nfs4_file *fi)
{ {
atomic_inc(&fi->fi_ref); refcount_inc(&fi->fi_ref);
} }
struct file *find_any_file(struct nfs4_file *f); struct file *find_any_file(struct nfs4_file *f);
......
...@@ -649,9 +649,18 @@ static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp) ...@@ -649,9 +649,18 @@ static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE; return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
} }
static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp) /*
* The session reply cache only needs to cache replies that the client
* actually asked us to. But it's almost free for us to cache compounds
* consisting of only a SEQUENCE op, so we may as well cache those too.
* Also, the protocol doesn't give us a convenient response in the case
* of a replay of a solo SEQUENCE op that wasn't cached
* (RETRY_UNCACHED_REP can only be returned in the second op of a
* compound).
*/
static inline bool nfsd4_cache_this(struct nfsd4_compoundres *resp)
{ {
return !(resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS) return (resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS)
|| nfsd4_is_solo_sequence(resp); || nfsd4_is_solo_sequence(resp);
} }
......
...@@ -971,8 +971,8 @@ struct lock_manager { ...@@ -971,8 +971,8 @@ struct lock_manager {
struct net; struct net;
void locks_start_grace(struct net *, struct lock_manager *); void locks_start_grace(struct net *, struct lock_manager *);
void locks_end_grace(struct lock_manager *); void locks_end_grace(struct lock_manager *);
int locks_in_grace(struct net *); bool locks_in_grace(struct net *);
int opens_in_grace(struct net *); bool opens_in_grace(struct net *);
/* that will die - we need it for nfs_lock_info */ /* that will die - we need it for nfs_lock_info */
#include <linux/nfs_fs_i.h> #include <linux/nfs_fs_i.h>
......
...@@ -47,6 +47,7 @@ struct svc_pool { ...@@ -47,6 +47,7 @@ struct svc_pool {
struct svc_pool_stats sp_stats; /* statistics on pool operation */ struct svc_pool_stats sp_stats; /* statistics on pool operation */
#define SP_TASK_PENDING (0) /* still work to do even if no #define SP_TASK_PENDING (0) /* still work to do even if no
* xprt is queued. */ * xprt is queued. */
#define SP_CONGESTED (1)
unsigned long sp_flags; unsigned long sp_flags;
} ____cacheline_aligned_in_smp; } ____cacheline_aligned_in_smp;
......
...@@ -486,20 +486,22 @@ TRACE_EVENT(svc_recv, ...@@ -486,20 +486,22 @@ TRACE_EVENT(svc_recv,
TP_ARGS(rqst, status), TP_ARGS(rqst, status),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(struct sockaddr *, addr)
__field(u32, xid) __field(u32, xid)
__field(int, status) __field(int, status)
__field(unsigned long, flags) __field(unsigned long, flags)
__dynamic_array(unsigned char, addr, rqst->rq_addrlen)
), ),
TP_fast_assign( TP_fast_assign(
__entry->addr = (struct sockaddr *)&rqst->rq_addr;
__entry->xid = status > 0 ? be32_to_cpu(rqst->rq_xid) : 0; __entry->xid = status > 0 ? be32_to_cpu(rqst->rq_xid) : 0;
__entry->status = status; __entry->status = status;
__entry->flags = rqst->rq_flags; __entry->flags = rqst->rq_flags;
memcpy(__get_dynamic_array(addr),
&rqst->rq_addr, rqst->rq_addrlen);
), ),
TP_printk("addr=%pIScp xid=0x%08x status=%d flags=%s", __entry->addr, TP_printk("addr=%pIScp xid=0x%08x status=%d flags=%s",
(struct sockaddr *)__get_dynamic_array(addr),
__entry->xid, __entry->status, __entry->xid, __entry->status,
show_rqstp_flags(__entry->flags)) show_rqstp_flags(__entry->flags))
); );
...@@ -544,22 +546,23 @@ DECLARE_EVENT_CLASS(svc_rqst_status, ...@@ -544,22 +546,23 @@ DECLARE_EVENT_CLASS(svc_rqst_status,
TP_ARGS(rqst, status), TP_ARGS(rqst, status),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(struct sockaddr *, addr)
__field(u32, xid) __field(u32, xid)
__field(int, dropme)
__field(int, status) __field(int, status)
__field(unsigned long, flags) __field(unsigned long, flags)
__dynamic_array(unsigned char, addr, rqst->rq_addrlen)
), ),
TP_fast_assign( TP_fast_assign(
__entry->addr = (struct sockaddr *)&rqst->rq_addr;
__entry->xid = be32_to_cpu(rqst->rq_xid); __entry->xid = be32_to_cpu(rqst->rq_xid);
__entry->status = status; __entry->status = status;
__entry->flags = rqst->rq_flags; __entry->flags = rqst->rq_flags;
memcpy(__get_dynamic_array(addr),
&rqst->rq_addr, rqst->rq_addrlen);
), ),
TP_printk("addr=%pIScp rq_xid=0x%08x status=%d flags=%s", TP_printk("addr=%pIScp rq_xid=0x%08x status=%d flags=%s",
__entry->addr, __entry->xid, (struct sockaddr *)__get_dynamic_array(addr),
__entry->xid,
__entry->status, show_rqstp_flags(__entry->flags)) __entry->status, show_rqstp_flags(__entry->flags))
); );
......
...@@ -855,11 +855,13 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g ...@@ -855,11 +855,13 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
return stat; return stat;
if (integ_len > buf->len) if (integ_len > buf->len)
return stat; return stat;
if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) {
BUG(); WARN_ON_ONCE(1);
return stat;
}
/* copy out mic... */ /* copy out mic... */
if (read_u32_from_xdr_buf(buf, integ_len, &mic.len)) if (read_u32_from_xdr_buf(buf, integ_len, &mic.len))
BUG(); return stat;
if (mic.len > RPC_MAX_AUTH_SIZE) if (mic.len > RPC_MAX_AUTH_SIZE)
return stat; return stat;
mic.data = kmalloc(mic.len, GFP_KERNEL); mic.data = kmalloc(mic.len, GFP_KERNEL);
...@@ -1611,8 +1613,10 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp) ...@@ -1611,8 +1613,10 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp)
BUG_ON(integ_len % 4); BUG_ON(integ_len % 4);
*p++ = htonl(integ_len); *p++ = htonl(integ_len);
*p++ = htonl(gc->gc_seq); *p++ = htonl(gc->gc_seq);
if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) {
BUG(); WARN_ON_ONCE(1);
goto out_err;
}
if (resbuf->tail[0].iov_base == NULL) { if (resbuf->tail[0].iov_base == NULL) {
if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE) if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE)
goto out_err; goto out_err;
......
...@@ -250,9 +250,9 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new) ...@@ -250,9 +250,9 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new)
svc_xprt_received(new); svc_xprt_received(new);
} }
int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name, static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
struct net *net, const int family, struct net *net, const int family,
const unsigned short port, int flags) const unsigned short port, int flags)
{ {
struct svc_xprt_class *xcl; struct svc_xprt_class *xcl;
...@@ -380,7 +380,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) ...@@ -380,7 +380,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
struct svc_pool *pool; struct svc_pool *pool;
struct svc_rqst *rqstp = NULL; struct svc_rqst *rqstp = NULL;
int cpu; int cpu;
bool queued = false;
if (!svc_xprt_has_something_to_do(xprt)) if (!svc_xprt_has_something_to_do(xprt))
goto out; goto out;
...@@ -401,58 +400,25 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) ...@@ -401,58 +400,25 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
atomic_long_inc(&pool->sp_stats.packets); atomic_long_inc(&pool->sp_stats.packets);
redo_search: dprintk("svc: transport %p put into queue\n", xprt);
spin_lock_bh(&pool->sp_lock);
list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
pool->sp_stats.sockets_queued++;
spin_unlock_bh(&pool->sp_lock);
/* find a thread for this xprt */ /* find a thread for this xprt */
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
/* Do a lockless check first */ if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
if (test_bit(RQ_BUSY, &rqstp->rq_flags))
continue; continue;
/*
* Once the xprt has been queued, it can only be dequeued by
* the task that intends to service it. All we can do at that
* point is to try to wake this thread back up so that it can
* do so.
*/
if (!queued) {
spin_lock_bh(&rqstp->rq_lock);
if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) {
/* already busy, move on... */
spin_unlock_bh(&rqstp->rq_lock);
continue;
}
/* this one will do */
rqstp->rq_xprt = xprt;
svc_xprt_get(xprt);
spin_unlock_bh(&rqstp->rq_lock);
}
rcu_read_unlock();
atomic_long_inc(&pool->sp_stats.threads_woken); atomic_long_inc(&pool->sp_stats.threads_woken);
wake_up_process(rqstp->rq_task); wake_up_process(rqstp->rq_task);
put_cpu(); goto out_unlock;
goto out;
}
rcu_read_unlock();
/*
* We didn't find an idle thread to use, so we need to queue the xprt.
* Do so and then search again. If we find one, we can't hook this one
* up to it directly but we can wake the thread up in the hopes that it
* will pick it up once it searches for a xprt to service.
*/
if (!queued) {
queued = true;
dprintk("svc: transport %p put into queue\n", xprt);
spin_lock_bh(&pool->sp_lock);
list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
pool->sp_stats.sockets_queued++;
spin_unlock_bh(&pool->sp_lock);
goto redo_search;
} }
set_bit(SP_CONGESTED, &pool->sp_flags);
rqstp = NULL; rqstp = NULL;
out_unlock:
rcu_read_unlock();
put_cpu(); put_cpu();
out: out:
trace_svc_xprt_do_enqueue(xprt, rqstp); trace_svc_xprt_do_enqueue(xprt, rqstp);
...@@ -721,38 +687,25 @@ rqst_should_sleep(struct svc_rqst *rqstp) ...@@ -721,38 +687,25 @@ rqst_should_sleep(struct svc_rqst *rqstp)
static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
{ {
struct svc_xprt *xprt;
struct svc_pool *pool = rqstp->rq_pool; struct svc_pool *pool = rqstp->rq_pool;
long time_left = 0; long time_left = 0;
/* rq_xprt should be clear on entry */ /* rq_xprt should be clear on entry */
WARN_ON_ONCE(rqstp->rq_xprt); WARN_ON_ONCE(rqstp->rq_xprt);
/* Normally we will wait up to 5 seconds for any required rqstp->rq_xprt = svc_xprt_dequeue(pool);
* cache information to be provided. if (rqstp->rq_xprt)
*/ goto out_found;
rqstp->rq_chandle.thread_wait = 5*HZ;
xprt = svc_xprt_dequeue(pool);
if (xprt) {
rqstp->rq_xprt = xprt;
/* As there is a shortage of threads and this request
* had to be queued, don't allow the thread to wait so
* long for cache updates.
*/
rqstp->rq_chandle.thread_wait = 1*HZ;
clear_bit(SP_TASK_PENDING, &pool->sp_flags);
return xprt;
}
/* /*
* We have to be able to interrupt this wait * We have to be able to interrupt this wait
* to bring down the daemons ... * to bring down the daemons ...
*/ */
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
smp_mb__before_atomic();
clear_bit(SP_CONGESTED, &pool->sp_flags);
clear_bit(RQ_BUSY, &rqstp->rq_flags); clear_bit(RQ_BUSY, &rqstp->rq_flags);
smp_mb(); smp_mb__after_atomic();
if (likely(rqst_should_sleep(rqstp))) if (likely(rqst_should_sleep(rqstp)))
time_left = schedule_timeout(timeout); time_left = schedule_timeout(timeout);
...@@ -761,13 +714,11 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) ...@@ -761,13 +714,11 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
try_to_freeze(); try_to_freeze();
spin_lock_bh(&rqstp->rq_lock);
set_bit(RQ_BUSY, &rqstp->rq_flags); set_bit(RQ_BUSY, &rqstp->rq_flags);
spin_unlock_bh(&rqstp->rq_lock); smp_mb__after_atomic();
rqstp->rq_xprt = svc_xprt_dequeue(pool);
xprt = rqstp->rq_xprt; if (rqstp->rq_xprt)
if (xprt != NULL) goto out_found;
return xprt;
if (!time_left) if (!time_left)
atomic_long_inc(&pool->sp_stats.threads_timedout); atomic_long_inc(&pool->sp_stats.threads_timedout);
...@@ -775,6 +726,15 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) ...@@ -775,6 +726,15 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
if (signalled() || kthread_should_stop()) if (signalled() || kthread_should_stop())
return ERR_PTR(-EINTR); return ERR_PTR(-EINTR);
return ERR_PTR(-EAGAIN); return ERR_PTR(-EAGAIN);
out_found:
/* Normally we will wait up to 5 seconds for any required
* cache information to be provided.
*/
if (!test_bit(SP_CONGESTED, &pool->sp_flags))
rqstp->rq_chandle.thread_wait = 5*HZ;
else
rqstp->rq_chandle.thread_wait = 1*HZ;
return rqstp->rq_xprt;
} }
static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt) static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
......
...@@ -133,6 +133,10 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, ...@@ -133,6 +133,10 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
if (ret) if (ret)
goto out_err; goto out_err;
/* Bump page refcnt so Send completion doesn't release
* the rq_buffer before all retransmits are complete.
*/
get_page(virt_to_page(rqst->rq_buffer));
ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0); ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0);
if (ret) if (ret)
goto out_unmap; goto out_unmap;
...@@ -165,7 +169,6 @@ xprt_rdma_bc_allocate(struct rpc_task *task) ...@@ -165,7 +169,6 @@ xprt_rdma_bc_allocate(struct rpc_task *task)
return -EINVAL; return -EINVAL;
} }
/* svc_rdma_sendto releases this page */
page = alloc_page(RPCRDMA_DEF_GFP); page = alloc_page(RPCRDMA_DEF_GFP);
if (!page) if (!page)
return -ENOMEM; return -ENOMEM;
...@@ -184,6 +187,7 @@ xprt_rdma_bc_free(struct rpc_task *task) ...@@ -184,6 +187,7 @@ xprt_rdma_bc_free(struct rpc_task *task)
{ {
struct rpc_rqst *rqst = task->tk_rqstp; struct rpc_rqst *rqst = task->tk_rqstp;
put_page(virt_to_page(rqst->rq_buffer));
kfree(rqst->rq_rbuffer); kfree(rqst->rq_rbuffer);
} }
......
...@@ -290,6 +290,7 @@ static void qp_event_handler(struct ib_event *event, void *context) ...@@ -290,6 +290,7 @@ static void qp_event_handler(struct ib_event *event, void *context)
ib_event_msg(event->event), event->event, ib_event_msg(event->event), event->event,
event->element.qp); event->element.qp);
set_bit(XPT_CLOSE, &xprt->xpt_flags); set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_enqueue(xprt);
break; break;
} }
} }
...@@ -322,8 +323,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) ...@@ -322,8 +323,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
goto out; goto out;
svc_xprt_enqueue(&xprt->sc_xprt); goto out_enqueue;
goto out;
flushed: flushed:
if (wc->status != IB_WC_WR_FLUSH_ERR) if (wc->status != IB_WC_WR_FLUSH_ERR)
...@@ -333,6 +333,8 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) ...@@ -333,6 +333,8 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
svc_rdma_put_context(ctxt, 1); svc_rdma_put_context(ctxt, 1);
out_enqueue:
svc_xprt_enqueue(&xprt->sc_xprt);
out: out:
svc_xprt_put(&xprt->sc_xprt); svc_xprt_put(&xprt->sc_xprt);
} }
...@@ -358,6 +360,7 @@ void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) ...@@ -358,6 +360,7 @@ void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
if (unlikely(wc->status != IB_WC_SUCCESS)) { if (unlikely(wc->status != IB_WC_SUCCESS)) {
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
svc_xprt_enqueue(&xprt->sc_xprt);
if (wc->status != IB_WC_WR_FLUSH_ERR) if (wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("svcrdma: Send: %s (%u/0x%x)\n", pr_err("svcrdma: Send: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status), ib_wc_status_msg(wc->status),
...@@ -569,8 +572,10 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id, ...@@ -569,8 +572,10 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
case RDMA_CM_EVENT_DEVICE_REMOVAL: case RDMA_CM_EVENT_DEVICE_REMOVAL:
dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n", dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n",
xprt, cma_id); xprt, cma_id);
if (xprt) if (xprt) {
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
svc_xprt_enqueue(&xprt->sc_xprt);
}
break; break;
default: default:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment