Commit c742b634 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfsd-5.8' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "Highlights:

   - Keep nfsd clients from unnecessarily breaking their own
     delegations.

     Note this requires a small kthreadd addition. The result is Tejun
     Heo's suggestion (see link), and he was OK with this going through
     my tree.

   - Patch nfsd/clients/ to display filenames, and to fix byte-order
     when displaying stateid's.

   - fix a module loading/unloading bug, from Neil Brown.

   - A big series from Chuck Lever with RPC/RDMA and tracing
     improvements, and lay some groundwork for RPC-over-TLS"

Link: https://lore.kernel.org/r/1588348912-24781-1-git-send-email-bfields@redhat.com

* tag 'nfsd-5.8' of git://linux-nfs.org/~bfields/linux: (49 commits)
  sunrpc: use kmemdup_nul() in gssp_stringify()
  nfsd: safer handling of corrupted c_type
  nfsd4: make drc_slab global, not per-net
  SUNRPC: Remove unreachable error condition in rpcb_getport_async()
  nfsd: Fix svc_xprt refcnt leak when setup callback client failed
  sunrpc: clean up properly in gss_mech_unregister()
  sunrpc: svcauth_gss_register_pseudoflavor must reject duplicate registrations.
  sunrpc: check that domain table is empty at module unload.
  NFSD: Fix improperly-formatted Doxygen comments
  NFSD: Squash an annoying compiler warning
  SUNRPC: Clean up request deferral tracepoints
  NFSD: Add tracepoints for monitoring NFSD callbacks
  NFSD: Add tracepoints to the NFSD state management code
  NFSD: Add tracepoints to NFSD's duplicate reply cache
  SUNRPC: svc_show_status() macro should have enum definitions
  SUNRPC: Restructure svc_udp_recvfrom()
  SUNRPC: Refactor svc_recvfrom()
  SUNRPC: Clean up svc_release_skb() functions
  SUNRPC: Refactor recvfrom path dealing with incomplete TCP receives
  SUNRPC: Replace dprintk() call sites in TCP receive path
  ...
parents b29482fd 1eb2f96d
......@@ -429,6 +429,7 @@ prototypes::
int (*lm_grant)(struct file_lock *, struct file_lock *, int);
void (*lm_break)(struct file_lock *); /* break_lease callback */
int (*lm_change)(struct file_lock **, int);
bool (*lm_breaker_owns_lease)(struct file_lock *);
locking rules:
......@@ -439,6 +440,7 @@ lm_notify: yes yes no
lm_grant: no no no
lm_break: yes no no
lm_change yes no no
lm_breaker_owns_lease: no no no
========== ============= ================= =========
buffer_head
......
......@@ -1557,6 +1557,9 @@ static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker)
{
bool rc;
if (lease->fl_lmops->lm_breaker_owns_lease
&& lease->fl_lmops->lm_breaker_owns_lease(lease))
return false;
if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) {
rc = false;
goto trace;
......
......@@ -78,6 +78,8 @@ enum {
/* Checksum this amount of the request */
#define RC_CSUMLEN (256U)
int nfsd_drc_slab_create(void);
void nfsd_drc_slab_free(void);
int nfsd_reply_cache_init(struct nfsd_net *);
void nfsd_reply_cache_shutdown(struct nfsd_net *);
int nfsd_cache_lookup(struct svc_rqst *);
......
......@@ -139,7 +139,6 @@ struct nfsd_net {
* Duplicate reply cache
*/
struct nfsd_drc_bucket *drc_hashtbl;
struct kmem_cache *drc_slab;
/* max number of entries allowed in the cache */
unsigned int max_drc_entries;
......
......@@ -38,6 +38,7 @@
#include "nfsd.h"
#include "state.h"
#include "netns.h"
#include "trace.h"
#include "xdr4cb.h"
#include "xdr4.h"
......@@ -904,16 +905,20 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
if (clp->cl_minorversion == 0) {
if (!clp->cl_cred.cr_principal &&
(clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5))
(clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) {
trace_nfsd_cb_setup_err(clp, -EINVAL);
return -EINVAL;
}
args.client_name = clp->cl_cred.cr_principal;
args.prognumber = conn->cb_prog;
args.protocol = XPRT_TRANSPORT_TCP;
args.authflavor = clp->cl_cred.cr_flavor;
clp->cl_cb_ident = conn->cb_ident;
} else {
if (!conn->cb_xprt)
if (!conn->cb_xprt) {
trace_nfsd_cb_setup_err(clp, -EINVAL);
return -EINVAL;
}
clp->cl_cb_conn.cb_xprt = conn->cb_xprt;
clp->cl_cb_session = ses;
args.bc_xprt = conn->cb_xprt;
......@@ -925,32 +930,27 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
/* Create RPC client */
client = rpc_create(&args);
if (IS_ERR(client)) {
dprintk("NFSD: couldn't create callback client: %ld\n",
PTR_ERR(client));
trace_nfsd_cb_setup_err(clp, PTR_ERR(client));
return PTR_ERR(client);
}
cred = get_backchannel_cred(clp, client, ses);
if (!cred) {
trace_nfsd_cb_setup_err(clp, -ENOMEM);
rpc_shutdown_client(client);
return -ENOMEM;
}
clp->cl_cb_client = client;
clp->cl_cb_cred = cred;
trace_nfsd_cb_setup(clp);
return 0;
}
static void warn_no_callback_path(struct nfs4_client *clp, int reason)
{
dprintk("NFSD: warning: no callback path to client %.*s: error %d\n",
(int)clp->cl_name.len, clp->cl_name.data, reason);
}
static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason)
{
if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags))
return;
clp->cl_cb_state = NFSD4_CB_DOWN;
warn_no_callback_path(clp, reason);
trace_nfsd_cb_state(clp);
}
static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason)
......@@ -958,17 +958,20 @@ static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason)
if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags))
return;
clp->cl_cb_state = NFSD4_CB_FAULT;
warn_no_callback_path(clp, reason);
trace_nfsd_cb_state(clp);
}
static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
{
struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
trace_nfsd_cb_done(clp, task->tk_status);
if (task->tk_status)
nfsd4_mark_cb_down(clp, task->tk_status);
else
else {
clp->cl_cb_state = NFSD4_CB_UP;
trace_nfsd_cb_state(clp);
}
}
static void nfsd4_cb_probe_release(void *calldata)
......@@ -993,6 +996,7 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = {
void nfsd4_probe_callback(struct nfs4_client *clp)
{
clp->cl_cb_state = NFSD4_CB_UNKNOWN;
trace_nfsd_cb_state(clp);
set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags);
nfsd4_run_cb(&clp->cl_cb_null);
}
......@@ -1009,6 +1013,7 @@ void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
spin_lock(&clp->cl_lock);
memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn));
spin_unlock(&clp->cl_lock);
trace_nfsd_cb_state(clp);
}
/*
......@@ -1165,8 +1170,7 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
struct nfsd4_callback *cb = calldata;
struct nfs4_client *clp = cb->cb_clp;
dprintk("%s: minorversion=%d\n", __func__,
clp->cl_minorversion);
trace_nfsd_cb_done(clp, task->tk_status);
if (!nfsd4_cb_sequence_done(task, cb))
return;
......@@ -1271,6 +1275,7 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
* kill the old client:
*/
if (clp->cl_cb_client) {
trace_nfsd_cb_shutdown(clp);
rpc_shutdown_client(clp->cl_cb_client);
clp->cl_cb_client = NULL;
put_cred(clp->cl_cb_cred);
......@@ -1301,6 +1306,8 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
err = setup_callback_client(clp, &conn, ses);
if (err) {
nfsd4_mark_cb_down(clp, err);
if (c)
svc_xprt_put(c->cn_xprt);
return;
}
}
......@@ -1314,6 +1321,8 @@ nfsd4_run_cb_work(struct work_struct *work)
struct rpc_clnt *clnt;
int flags;
trace_nfsd_cb_work(clp, cb->cb_msg.rpc_proc->p_name);
if (cb->cb_need_restart) {
cb->cb_need_restart = false;
} else {
......
......@@ -1155,7 +1155,7 @@ extern void nfs_sb_deactive(struct super_block *sb);
#define NFSD42_INTERSSC_MOUNTOPS "vers=4.2,addr=%s,sec=sys"
/**
/*
* Support one copy source server for now.
*/
static __be32
......@@ -1245,10 +1245,9 @@ nfsd4_interssc_disconnect(struct vfsmount *ss_mnt)
mntput(ss_mnt);
}
/**
* nfsd4_setup_inter_ssc
*
/*
* Verify COPY destination stateid.
*
* Connect to the source server with NFSv4.1.
* Create the source struct file for nfsd_copy_range.
* Called with COPY cstate:
......@@ -2302,6 +2301,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
}
check_if_stalefh_allowed(args);
rqstp->rq_lease_breaker = (void **)&cstate->clp;
trace_nfsd_compound(rqstp, args->opcnt);
while (!status && resp->opcnt < args->opcnt) {
op = &args->ops[resp->opcnt++];
......
......@@ -51,6 +51,7 @@
#include "netns.h"
#include "pnfs.h"
#include "filecache.h"
#include "trace.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
......@@ -167,9 +168,6 @@ renew_client_locked(struct nfs4_client *clp)
return;
}
dprintk("renewing client (clientid %08x/%08x)\n",
clp->cl_clientid.cl_boot,
clp->cl_clientid.cl_id);
list_move_tail(&clp->cl_lru, &nn->client_lru);
clp->cl_time = ktime_get_boottime_seconds();
}
......@@ -1922,8 +1920,7 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn)
*/
if (clid->cl_boot == (u32)nn->boot_time)
return 0;
dprintk("NFSD stale clientid (%08x/%08x) boot_time %08llx\n",
clid->cl_boot, clid->cl_id, nn->boot_time);
trace_nfsd_clid_stale(clid);
return 1;
}
......@@ -2406,6 +2403,11 @@ static void states_stop(struct seq_file *s, void *v)
spin_unlock(&clp->cl_lock);
}
static void nfs4_show_fname(struct seq_file *s, struct nfsd_file *f)
{
seq_printf(s, "filename: \"%pD2\"", f->nf_file);
}
static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f)
{
struct inode *inode = f->nf_inode;
......@@ -2422,6 +2424,12 @@ static void nfs4_show_owner(struct seq_file *s, struct nfs4_stateowner *oo)
seq_quote_mem(s, oo->so_owner.data, oo->so_owner.len);
}
static void nfs4_show_stateid(struct seq_file *s, stateid_t *stid)
{
seq_printf(s, "0x%.8x", stid->si_generation);
seq_printf(s, "%12phN", &stid->si_opaque);
}
static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
{
struct nfs4_ol_stateid *ols;
......@@ -2437,7 +2445,9 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
nf = st->sc_file;
file = find_any_file(nf);
seq_printf(s, "- 0x%16phN: { type: open, ", &st->sc_stateid);
seq_printf(s, "- ");
nfs4_show_stateid(s, &st->sc_stateid);
seq_printf(s, ": { type: open, ");
access = bmap_to_share_mode(ols->st_access_bmap);
deny = bmap_to_share_mode(ols->st_deny_bmap);
......@@ -2451,6 +2461,8 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
nfs4_show_superblock(s, file);
seq_printf(s, ", ");
nfs4_show_fname(s, file);
seq_printf(s, ", ");
nfs4_show_owner(s, oo);
seq_printf(s, " }\n");
nfsd_file_put(file);
......@@ -2470,7 +2482,9 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
nf = st->sc_file;
file = find_any_file(nf);
seq_printf(s, "- 0x%16phN: { type: lock, ", &st->sc_stateid);
seq_printf(s, "- ");
nfs4_show_stateid(s, &st->sc_stateid);
seq_printf(s, ": { type: lock, ");
/*
* Note: a lock stateid isn't really the same thing as a lock,
......@@ -2482,6 +2496,8 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
nfs4_show_superblock(s, file);
/* XXX: open stateid? */
seq_printf(s, ", ");
nfs4_show_fname(s, file);
seq_printf(s, ", ");
nfs4_show_owner(s, oo);
seq_printf(s, " }\n");
nfsd_file_put(file);
......@@ -2499,7 +2515,9 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
nf = st->sc_file;
file = nf->fi_deleg_file;
seq_printf(s, "- 0x%16phN: { type: deleg, ", &st->sc_stateid);
seq_printf(s, "- ");
nfs4_show_stateid(s, &st->sc_stateid);
seq_printf(s, ": { type: deleg, ");
/* Kinda dead code as long as we only support read delegs: */
seq_printf(s, "access: %s, ",
......@@ -2508,6 +2526,8 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
/* XXX: lease time, whether it's being recalled. */
nfs4_show_superblock(s, file);
seq_printf(s, ", ");
nfs4_show_fname(s, file);
seq_printf(s, " }\n");
return 0;
......@@ -2521,11 +2541,15 @@ static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st)
ls = container_of(st, struct nfs4_layout_stateid, ls_stid);
file = ls->ls_file;
seq_printf(s, "- 0x%16phN: { type: layout, ", &st->sc_stateid);
seq_printf(s, "- ");
nfs4_show_stateid(s, &st->sc_stateid);
seq_printf(s, ": { type: layout, ");
/* XXX: What else would be useful? */
nfs4_show_superblock(s, file);
seq_printf(s, ", ");
nfs4_show_fname(s, file);
seq_printf(s, " }\n");
return 0;
......@@ -2845,14 +2869,12 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r
conn->cb_prog = se->se_callback_prog;
conn->cb_ident = se->se_callback_ident;
memcpy(&conn->cb_saddr, &rqstp->rq_daddr, rqstp->rq_daddrlen);
trace_nfsd_cb_args(clp, conn);
return;
out_err:
conn->cb_addr.ss_family = AF_UNSPEC;
conn->cb_addrlen = 0;
dprintk("NFSD: this client (clientid %08x/%08x) "
"will not receive delegations\n",
clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
trace_nfsd_cb_nodelegs(clp);
return;
}
......@@ -3458,6 +3480,45 @@ __be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp,
return nfs_ok;
}
static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s)
{
struct nfsd4_conn *c;
list_for_each_entry(c, &s->se_conns, cn_persession) {
if (c->cn_xprt == xpt) {
return c;
}
}
return NULL;
}
static __be32 nfsd4_match_existing_connection(struct svc_rqst *rqst,
struct nfsd4_session *session, u32 req)
{
struct nfs4_client *clp = session->se_client;
struct svc_xprt *xpt = rqst->rq_xprt;
struct nfsd4_conn *c;
__be32 status;
/* Following the last paragraph of RFC 5661 Section 18.34.3: */
spin_lock(&clp->cl_lock);
c = __nfsd4_find_conn(xpt, session);
if (!c)
status = nfserr_noent;
else if (req == c->cn_flags)
status = nfs_ok;
else if (req == NFS4_CDFC4_FORE_OR_BOTH &&
c->cn_flags != NFS4_CDFC4_BACK)
status = nfs_ok;
else if (req == NFS4_CDFC4_BACK_OR_BOTH &&
c->cn_flags != NFS4_CDFC4_FORE)
status = nfs_ok;
else
status = nfserr_inval;
spin_unlock(&clp->cl_lock);
return status;
}
__be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
......@@ -3479,6 +3540,9 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
status = nfserr_wrong_cred;
if (!nfsd4_mach_creds_match(session->se_client, rqstp))
goto out;
status = nfsd4_match_existing_connection(rqstp, session, bcts->dir);
if (status == nfs_ok || status == nfserr_inval)
goto out;
status = nfsd4_map_bcts_dir(&bcts->dir);
if (status)
goto out;
......@@ -3544,18 +3608,6 @@ nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_compound_state *cstate,
return status;
}
static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s)
{
struct nfsd4_conn *c;
list_for_each_entry(c, &s->se_conns, cn_persession) {
if (c->cn_xprt == xpt) {
return c;
}
}
return NULL;
}
static __be32 nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses)
{
struct nfs4_client *clp = ses->se_client;
......@@ -3879,23 +3931,18 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (clp_used_exchangeid(conf))
goto out;
if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
char addr_str[INET6_ADDRSTRLEN];
rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str,
sizeof(addr_str));
dprintk("NFSD: setclientid: string in use by client "
"at %s\n", addr_str);
trace_nfsd_clid_inuse_err(conf);
goto out;
}
}
unconf = find_unconfirmed_client_by_name(&clname, nn);
if (unconf)
unhash_client_locked(unconf);
/* We need to handle only case 1: probable callback update */
if (conf && same_verf(&conf->cl_verifier, &clverifier)) {
/* case 1: probable callback update */
copy_clid(new, conf);
gen_confirm(new, nn);
} else /* case 4 (new client) or cases 2, 3 (client reboot): */
;
}
new->cl_minorversion = 0;
gen_callback(new, setclid, rqstp);
add_to_unconfirmed(new);
......@@ -4076,7 +4123,6 @@ nfsd4_init_slabs(void)
out_free_client_slab:
kmem_cache_destroy(client_slab);
out:
dprintk("nfsd4: out of memory while initializing nfsv4\n");
return -ENOMEM;
}
......@@ -4508,6 +4554,8 @@ nfsd_break_deleg_cb(struct file_lock *fl)
struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
struct nfs4_file *fp = dp->dl_stid.sc_file;
trace_nfsd_deleg_break(&dp->dl_stid.sc_stateid);
/*
* We don't want the locks code to timeout the lease for us;
* we'll remove it ourself if a delegation isn't returned
......@@ -4522,6 +4570,19 @@ nfsd_break_deleg_cb(struct file_lock *fl)
return ret;
}
static bool nfsd_breaker_owns_lease(struct file_lock *fl)
{
struct nfs4_delegation *dl = fl->fl_owner;
struct svc_rqst *rqst;
struct nfs4_client *clp;
if (!i_am_nfsd())
return NULL;
rqst = kthread_data(current);
clp = *(rqst->rq_lease_breaker);
return dl->dl_stid.sc_client == clp;
}
static int
nfsd_change_deleg_cb(struct file_lock *onlist, int arg,
struct list_head *dispose)
......@@ -4533,6 +4594,7 @@ nfsd_change_deleg_cb(struct file_lock *onlist, int arg,
}
static const struct lock_manager_operations nfsd_lease_mng_ops = {
.lm_breaker_owns_lease = nfsd_breaker_owns_lease,
.lm_break = nfsd_break_deleg_cb,
.lm_change = nfsd_change_deleg_cb,
};
......@@ -5018,8 +5080,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open,
memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid));
dprintk("NFSD: delegation stateid=" STATEID_FMT "\n",
STATEID_VAL(&dp->dl_stid.sc_stateid));
trace_nfsd_deleg_open(&dp->dl_stid.sc_stateid);
open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
nfs4_put_stid(&dp->dl_stid);
return;
......@@ -5136,9 +5197,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
nfs4_open_delegation(current_fh, open, stp);
nodeleg:
status = nfs_ok;
dprintk("%s: stateid=" STATEID_FMT "\n", __func__,
STATEID_VAL(&stp->st_stid.sc_stateid));
trace_nfsd_deleg_none(&stp->st_stid.sc_stateid);
out:
/* 4.1 client trying to upgrade/downgrade delegation? */
if (open->op_delegate_type == NFS4_OPEN_DELEGATE_NONE && dp &&
......@@ -5192,8 +5251,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
dprintk("process_renew(%08x/%08x): starting\n",
clid->cl_boot, clid->cl_id);
trace_nfsd_clid_renew(clid);
status = lookup_clientid(clid, cstate, nn, false);
if (status)
goto out;
......@@ -5214,6 +5272,7 @@ nfsd4_end_grace(struct nfsd_net *nn)
if (nn->grace_ended)
return;
trace_nfsd_grace_complete(nn);
nn->grace_ended = true;
/*
* If the server goes down again right now, an NFSv4
......@@ -5279,13 +5338,10 @@ nfs4_laundromat(struct nfsd_net *nn)
copy_stateid_t *cps_t;
int i;
dprintk("NFSD: laundromat service - starting\n");
if (clients_still_reclaiming(nn)) {
new_timeo = 0;
goto out;
}
dprintk("NFSD: end of grace period\n");
nfsd4_end_grace(nn);
INIT_LIST_HEAD(&reaplist);
......@@ -5307,8 +5363,7 @@ nfs4_laundromat(struct nfsd_net *nn)
break;
}
if (mark_client_expired_locked(clp)) {
dprintk("NFSD: client in use (clientid %08x)\n",
clp->cl_clientid.cl_id);
trace_nfsd_clid_expired(&clp->cl_clientid);
continue;
}
list_add(&clp->cl_lru, &reaplist);
......@@ -5316,8 +5371,7 @@ nfs4_laundromat(struct nfsd_net *nn)
spin_unlock(&nn->client_lock);
list_for_each_safe(pos, next, &reaplist) {
clp = list_entry(pos, struct nfs4_client, cl_lru);
dprintk("NFSD: purging unused client (clientid %08x)\n",
clp->cl_clientid.cl_id);
trace_nfsd_clid_purged(&clp->cl_clientid);
list_del_init(&clp->cl_lru);
expire_client(clp);
}
......@@ -5407,7 +5461,6 @@ laundromat_main(struct work_struct *laundry)
laundromat_work);
t = nfs4_laundromat(nn);
dprintk("NFSD: laundromat_main - sleeping for %lld seconds\n", t);
queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ);
}
......@@ -5948,8 +6001,7 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
struct nfs4_stid *s;
struct nfs4_ol_stateid *stp = NULL;
dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__,
seqid, STATEID_VAL(stateid));
trace_nfsd_preprocess(seqid, stateid);
*stpp = NULL;
status = nfsd4_lookup_stateid(cstate, stateid, typemask, &s, nn);
......@@ -6018,9 +6070,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
oo->oo_flags |= NFS4_OO_CONFIRMED;
nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid);
mutex_unlock(&stp->st_mutex);
dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
__func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid));
trace_nfsd_open_confirm(oc->oc_seqid, &stp->st_stid.sc_stateid);
nfsd4_client_record_create(oo->oo_owner.so_client);
status = nfs_ok;
put_stateid:
......@@ -7072,7 +7122,7 @@ nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash,
unsigned int strhashval;
struct nfs4_client_reclaim *crp;
dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", name.len, name.data);
trace_nfsd_clid_reclaim(nn, name.len, name.data);
crp = alloc_reclaim();
if (crp) {
strhashval = clientstr_hashval(name);
......@@ -7122,7 +7172,7 @@ nfsd4_find_reclaim_client(struct xdr_netobj name, struct nfsd_net *nn)
unsigned int strhashval;
struct nfs4_client_reclaim *crp = NULL;
dprintk("NFSD: nfs4_find_reclaim_client for name %.*s\n", name.len, name.data);
trace_nfsd_clid_find(nn, name.len, name.data);
strhashval = clientstr_hashval(name);
list_for_each_entry(crp, &nn->reclaim_str_hashtbl[strhashval], cr_strhash) {
......@@ -7686,6 +7736,9 @@ nfsd_recall_delegations(struct list_head *reaplist)
list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) {
list_del_init(&dp->dl_recall_lru);
clp = dp->dl_stid.sc_client;
trace_nfsd_deleg_recall(&dp->dl_stid.sc_stateid);
/*
* We skipped all entries that had a zero dl_time before,
* so we can now reset the dl_time back to 0. If a delegation
......@@ -7868,6 +7921,7 @@ nfs4_state_start_net(struct net *net)
goto skip_grace;
printk(KERN_INFO "NFSD: starting %lld-second grace period (net %x)\n",
nn->nfsd4_grace, net->ns.inum);
trace_nfsd_grace_start(nn);
queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ);
return 0;
......
......@@ -20,8 +20,7 @@
#include "nfsd.h"
#include "cache.h"
#define NFSDDBG_FACILITY NFSDDBG_REPCACHE
#include "trace.h"
/*
* We use this value to determine the number of hash buckets from the max
......@@ -36,6 +35,8 @@ struct nfsd_drc_bucket {
spinlock_t cache_lock;
};
static struct kmem_cache *drc_slab;
static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
static unsigned long nfsd_reply_cache_count(struct shrinker *shrink,
struct shrink_control *sc);
......@@ -95,7 +96,7 @@ nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum,
{
struct svc_cacherep *rp;
rp = kmem_cache_alloc(nn->drc_slab, GFP_KERNEL);
rp = kmem_cache_alloc(drc_slab, GFP_KERNEL);
if (rp) {
rp->c_state = RC_UNUSED;
rp->c_type = RC_NOCACHE;
......@@ -129,7 +130,7 @@ nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
atomic_dec(&nn->num_drc_entries);
nn->drc_mem_usage -= sizeof(*rp);
}
kmem_cache_free(nn->drc_slab, rp);
kmem_cache_free(drc_slab, rp);
}
static void
......@@ -141,6 +142,18 @@ nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
spin_unlock(&b->cache_lock);
}
int nfsd_drc_slab_create(void)
{
drc_slab = kmem_cache_create("nfsd_drc",
sizeof(struct svc_cacherep), 0, 0, NULL);
return drc_slab ? 0: -ENOMEM;
}
void nfsd_drc_slab_free(void)
{
kmem_cache_destroy(drc_slab);
}
int nfsd_reply_cache_init(struct nfsd_net *nn)
{
unsigned int hashsize;
......@@ -159,18 +172,13 @@ int nfsd_reply_cache_init(struct nfsd_net *nn)
if (status)
goto out_nomem;
nn->drc_slab = kmem_cache_create("nfsd_drc",
sizeof(struct svc_cacherep), 0, 0, NULL);
if (!nn->drc_slab)
goto out_shrinker;
nn->drc_hashtbl = kcalloc(hashsize,
sizeof(*nn->drc_hashtbl), GFP_KERNEL);
if (!nn->drc_hashtbl) {
nn->drc_hashtbl = vzalloc(array_size(hashsize,
sizeof(*nn->drc_hashtbl)));
if (!nn->drc_hashtbl)
goto out_slab;
goto out_shrinker;
}
for (i = 0; i < hashsize; i++) {
......@@ -180,8 +188,6 @@ int nfsd_reply_cache_init(struct nfsd_net *nn)
nn->drc_hashsize = hashsize;
return 0;
out_slab:
kmem_cache_destroy(nn->drc_slab);
out_shrinker:
unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
out_nomem:
......@@ -209,8 +215,6 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
nn->drc_hashtbl = NULL;
nn->drc_hashsize = 0;
kmem_cache_destroy(nn->drc_slab);
nn->drc_slab = NULL;
}
/*
......@@ -323,8 +327,10 @@ nfsd_cache_key_cmp(const struct svc_cacherep *key,
const struct svc_cacherep *rp, struct nfsd_net *nn)
{
if (key->c_key.k_xid == rp->c_key.k_xid &&
key->c_key.k_csum != rp->c_key.k_csum)
key->c_key.k_csum != rp->c_key.k_csum) {
++nn->payload_misses;
trace_nfsd_drc_mismatch(nn, key, rp);
}
return memcmp(&key->c_key, &rp->c_key, sizeof(key->c_key));
}
......@@ -377,15 +383,22 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key,
return ret;
}
/*
/**
* nfsd_cache_lookup - Find an entry in the duplicate reply cache
* @rqstp: Incoming Call to find
*
* Try to find an entry matching the current call in the cache. When none
* is found, we try to grab the oldest expired entry off the LRU list. If
* a suitable one isn't there, then drop the cache_lock and allocate a
* new one, then search again in case one got inserted while this thread
* didn't hold the lock.
*
* Return values:
* %RC_DOIT: Process the request normally
* %RC_REPLY: Reply from cache
* %RC_DROPIT: Do not process the request further
*/
int
nfsd_cache_lookup(struct svc_rqst *rqstp)
int nfsd_cache_lookup(struct svc_rqst *rqstp)
{
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct svc_cacherep *rp, *found;
......@@ -399,7 +412,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
rqstp->rq_cacherep = NULL;
if (type == RC_NOCACHE) {
nfsdstats.rcnocache++;
return rtn;
goto out;
}
csum = nfsd_cache_csum(rqstp);
......@@ -409,10 +422,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
* preallocate an entry.
*/
rp = nfsd_reply_cache_alloc(rqstp, csum, nn);
if (!rp) {
dprintk("nfsd: unable to allocate DRC entry!\n");
return rtn;
}
if (!rp)
goto out;
spin_lock(&b->cache_lock);
found = nfsd_cache_insert(b, rp, nn);
......@@ -431,8 +442,10 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
/* go ahead and prune the cache */
prune_bucket(b, nn);
out:
out_unlock:
spin_unlock(&b->cache_lock);
out:
return rtn;
found_entry:
......@@ -442,13 +455,13 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
/* Request being processed */
if (rp->c_state == RC_INPROG)
goto out;
goto out_trace;
/* From the hall of fame of impractical attacks:
* Is this a user who tries to snoop on the cache? */
rtn = RC_DOIT;
if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure)
goto out;
goto out_trace;
/* Compose RPC reply header */
switch (rp->c_type) {
......@@ -460,21 +473,26 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
break;
case RC_REPLBUFF:
if (!nfsd_cache_append(rqstp, &rp->c_replvec))
goto out; /* should not happen */
goto out_unlock; /* should not happen */
rtn = RC_REPLY;
break;
default:
printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type);
nfsd_reply_cache_free_locked(b, rp, nn);
WARN_ONCE(1, "nfsd: bad repcache type %d\n", rp->c_type);
}
goto out;
out_trace:
trace_nfsd_drc_found(nn, rqstp, rtn);
goto out_unlock;
}
/*
* Update a cache entry. This is called from nfsd_dispatch when
* the procedure has been executed and the complete reply is in
* rqstp->rq_res.
/**
* nfsd_cache_update - Update an entry in the duplicate reply cache.
* @rqstp: svc_rqst with a finished Reply
* @cachetype: which cache to update
* @statp: Reply's status code
*
* This is called from nfsd_dispatch when the procedure has been
* executed and the complete reply is in rqstp->rq_res.
*
* We're copying around data here rather than swapping buffers because
* the toplevel loop requires max-sized buffers, which would be a waste
......@@ -487,8 +505,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
* nfsd failed to encode a reply that otherwise would have been cached.
* In this case, nfsd_cache_update is called with statp == NULL.
*/
void
nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
{
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct svc_cacherep *rp = rqstp->rq_cacherep;
......
......@@ -238,7 +238,7 @@ static inline struct net *netns(struct file *file)
return file_inode(file)->i_sb->s_fs_info;
}
/**
/*
* write_unlock_ip - Release all locks used by a client
*
* Experimental.
......@@ -277,7 +277,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size)
return nlmsvc_unlock_all_by_ip(sap);
}
/**
/*
* write_unlock_fs - Release all locks on a local file system
*
* Experimental.
......@@ -327,7 +327,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
return error;
}
/**
/*
* write_filehandle - Get a variable-length NFS file handle by path
*
* On input, the buffer contains a '\n'-terminated C string comprised of
......@@ -402,7 +402,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
return mesg - buf;
}
/**
/*
* write_threads - Start NFSD, or report the current number of running threads
*
* Input:
......@@ -452,7 +452,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", rv);
}
/**
/*
* write_pool_threads - Set or report the current number of threads per pool
*
* Input:
......@@ -661,7 +661,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
return tlen + len;
}
/**
/*
* write_versions - Set or report the available NFS protocol versions
*
* Input:
......@@ -811,7 +811,7 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size,
return -EINVAL;
}
/**
/*
* write_ports - Pass a socket file descriptor or transport name to listen on
*
* Input:
......@@ -867,7 +867,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
int nfsd_max_blksize;
/**
/*
* write_maxblksize - Set or report the current NFS blksize
*
* Input:
......@@ -917,7 +917,7 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
nfsd_max_blksize);
}
/**
/*
* write_maxconn - Set or report the current max number of connections
*
* Input:
......@@ -998,7 +998,7 @@ static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size,
return rv;
}
/**
/*
* write_leasetime - Set or report the current NFSv4 lease time
*
* Input:
......@@ -1025,7 +1025,7 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
return nfsd4_write_time(file, buf, size, &nn->nfsd4_lease, nn);
}
/**
/*
* write_gracetime - Set or report current NFSv4 grace period time
*
* As above, but sets the time of the NFSv4 grace period.
......@@ -1069,7 +1069,7 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size,
nfs4_recoverydir());
}
/**
/*
* write_recoverydir - Set or report the pathname of the recovery directory
*
* Input:
......@@ -1101,7 +1101,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
return rv;
}
/**
/*
* write_v4_end_grace - release grace period for nfsd's v4.x lock manager
*
* Input:
......@@ -1533,6 +1533,9 @@ static int __init init_nfsd(void)
goto out_free_slabs;
nfsd_fault_inject_init(); /* nfsd fault injection controls */
nfsd_stat_init(); /* Statistics */
retval = nfsd_drc_slab_create();
if (retval)
goto out_free_stat;
nfsd_lockd_init(); /* lockd->nfsd callbacks */
retval = create_proc_exports_entry();
if (retval)
......@@ -1546,6 +1549,8 @@ static int __init init_nfsd(void)
remove_proc_entry("fs/nfs", NULL);
out_free_lockd:
nfsd_lockd_shutdown();
nfsd_drc_slab_free();
out_free_stat:
nfsd_stat_shutdown();
nfsd_fault_inject_cleanup();
nfsd4_exit_pnfs();
......@@ -1560,6 +1565,7 @@ static int __init init_nfsd(void)
static void __exit exit_nfsd(void)
{
nfsd_drc_slab_free();
remove_proc_entry("fs/nfs/exports", NULL);
remove_proc_entry("fs/nfs", NULL);
nfsd_stat_shutdown();
......
......@@ -88,6 +88,8 @@ int nfsd_pool_stats_release(struct inode *, struct file *);
void nfsd_destroy(struct net *net);
bool i_am_nfsd(void);
struct nfsdfs_client {
struct kref cl_ref;
void (*cl_release)(struct kref *kref);
......
......@@ -601,6 +601,11 @@ static const struct svc_serv_ops nfsd_thread_sv_ops = {
.svo_module = THIS_MODULE,
};
bool i_am_nfsd(void)
{
return kthread_func(current) == nfsd;
}
int nfsd_create_serv(struct net *net)
{
int error;
......@@ -1011,6 +1016,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
*statp = rpc_garbage_args;
return 1;
}
rqstp->rq_lease_breaker = NULL;
/*
* Give the xdr decoder a chance to change this if it wants
* (necessary in the NFSv4.0 compound case)
......
......@@ -64,13 +64,6 @@ typedef struct {
refcount_t sc_count;
} copy_stateid_t;
#define STATEID_FMT "(%08x/%08x/%08x/%08x)"
#define STATEID_VAL(s) \
(s)->si_opaque.so_clid.cl_boot, \
(s)->si_opaque.so_clid.cl_id, \
(s)->si_opaque.so_id, \
(s)->si_generation
struct nfsd4_callback {
struct nfs4_client *cb_clp;
struct rpc_message cb_msg;
......
......@@ -277,6 +277,7 @@ DECLARE_EVENT_CLASS(nfsd_stateid_class,
DEFINE_EVENT(nfsd_stateid_class, nfsd_##name, \
TP_PROTO(stateid_t *stp), \
TP_ARGS(stp))
DEFINE_STATEID_EVENT(layoutstate_alloc);
DEFINE_STATEID_EVENT(layoutstate_unhash);
DEFINE_STATEID_EVENT(layoutstate_free);
......@@ -288,6 +289,138 @@ DEFINE_STATEID_EVENT(layout_recall_done);
DEFINE_STATEID_EVENT(layout_recall_fail);
DEFINE_STATEID_EVENT(layout_recall_release);
DEFINE_STATEID_EVENT(deleg_open);
DEFINE_STATEID_EVENT(deleg_none);
DEFINE_STATEID_EVENT(deleg_break);
DEFINE_STATEID_EVENT(deleg_recall);
DECLARE_EVENT_CLASS(nfsd_stateseqid_class,
TP_PROTO(u32 seqid, const stateid_t *stp),
TP_ARGS(seqid, stp),
TP_STRUCT__entry(
__field(u32, seqid)
__field(u32, cl_boot)
__field(u32, cl_id)
__field(u32, si_id)
__field(u32, si_generation)
),
TP_fast_assign(
__entry->seqid = seqid;
__entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
__entry->cl_id = stp->si_opaque.so_clid.cl_id;
__entry->si_id = stp->si_opaque.so_id;
__entry->si_generation = stp->si_generation;
),
TP_printk("seqid=%u client %08x:%08x stateid %08x:%08x",
__entry->seqid, __entry->cl_boot, __entry->cl_id,
__entry->si_id, __entry->si_generation)
)
#define DEFINE_STATESEQID_EVENT(name) \
DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \
TP_PROTO(u32 seqid, const stateid_t *stp), \
TP_ARGS(seqid, stp))
DEFINE_STATESEQID_EVENT(preprocess);
DEFINE_STATESEQID_EVENT(open_confirm);
DECLARE_EVENT_CLASS(nfsd_clientid_class,
TP_PROTO(const clientid_t *clid),
TP_ARGS(clid),
TP_STRUCT__entry(
__field(u32, cl_boot)
__field(u32, cl_id)
),
TP_fast_assign(
__entry->cl_boot = clid->cl_boot;
__entry->cl_id = clid->cl_id;
),
TP_printk("client %08x:%08x", __entry->cl_boot, __entry->cl_id)
)
#define DEFINE_CLIENTID_EVENT(name) \
DEFINE_EVENT(nfsd_clientid_class, nfsd_clid_##name, \
TP_PROTO(const clientid_t *clid), \
TP_ARGS(clid))
DEFINE_CLIENTID_EVENT(expired);
DEFINE_CLIENTID_EVENT(purged);
DEFINE_CLIENTID_EVENT(renew);
DEFINE_CLIENTID_EVENT(stale);
DECLARE_EVENT_CLASS(nfsd_net_class,
TP_PROTO(const struct nfsd_net *nn),
TP_ARGS(nn),
TP_STRUCT__entry(
__field(unsigned long long, boot_time)
),
TP_fast_assign(
__entry->boot_time = nn->boot_time;
),
TP_printk("boot_time=%16llx", __entry->boot_time)
)
#define DEFINE_NET_EVENT(name) \
DEFINE_EVENT(nfsd_net_class, nfsd_##name, \
TP_PROTO(const struct nfsd_net *nn), \
TP_ARGS(nn))
DEFINE_NET_EVENT(grace_start);
DEFINE_NET_EVENT(grace_complete);
DECLARE_EVENT_CLASS(nfsd_clid_class,
TP_PROTO(const struct nfsd_net *nn,
unsigned int namelen,
const unsigned char *namedata),
TP_ARGS(nn, namelen, namedata),
TP_STRUCT__entry(
__field(unsigned long long, boot_time)
__field(unsigned int, namelen)
__dynamic_array(unsigned char, name, namelen)
),
TP_fast_assign(
__entry->boot_time = nn->boot_time;
__entry->namelen = namelen;
memcpy(__get_dynamic_array(name), namedata, namelen);
),
TP_printk("boot_time=%16llx nfs4_clientid=%.*s",
__entry->boot_time, __entry->namelen, __get_str(name))
)
#define DEFINE_CLID_EVENT(name) \
DEFINE_EVENT(nfsd_clid_class, nfsd_clid_##name, \
TP_PROTO(const struct nfsd_net *nn, \
unsigned int namelen, \
const unsigned char *namedata), \
TP_ARGS(nn, namelen, namedata))
DEFINE_CLID_EVENT(find);
DEFINE_CLID_EVENT(reclaim);
TRACE_EVENT(nfsd_clid_inuse_err,
TP_PROTO(const struct nfs4_client *clp),
TP_ARGS(clp),
TP_STRUCT__entry(
__field(u32, cl_boot)
__field(u32, cl_id)
__array(unsigned char, addr, sizeof(struct sockaddr_in6))
__field(unsigned int, namelen)
__dynamic_array(unsigned char, name, clp->cl_name.len)
),
TP_fast_assign(
__entry->cl_boot = clp->cl_clientid.cl_boot;
__entry->cl_id = clp->cl_clientid.cl_id;
memcpy(__entry->addr, &clp->cl_addr,
sizeof(struct sockaddr_in6));
__entry->namelen = clp->cl_name.len;
memcpy(__get_dynamic_array(name), clp->cl_name.data,
clp->cl_name.len);
),
TP_printk("nfs4_clientid %.*s already in use by %pISpc, client %08x:%08x",
__entry->namelen, __get_str(name), __entry->addr,
__entry->cl_boot, __entry->cl_id)
)
TRACE_DEFINE_ENUM(NFSD_FILE_HASHED);
TRACE_DEFINE_ENUM(NFSD_FILE_PENDING);
TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_READ);
......@@ -432,6 +565,218 @@ TRACE_EVENT(nfsd_file_fsnotify_handle_event,
__entry->nlink, __entry->mode, __entry->mask)
);
#include "cache.h"
TRACE_DEFINE_ENUM(RC_DROPIT);
TRACE_DEFINE_ENUM(RC_REPLY);
TRACE_DEFINE_ENUM(RC_DOIT);
#define show_drc_retval(x) \
__print_symbolic(x, \
{ RC_DROPIT, "DROPIT" }, \
{ RC_REPLY, "REPLY" }, \
{ RC_DOIT, "DOIT" })
TRACE_EVENT(nfsd_drc_found,
TP_PROTO(
const struct nfsd_net *nn,
const struct svc_rqst *rqstp,
int result
),
TP_ARGS(nn, rqstp, result),
TP_STRUCT__entry(
__field(unsigned long long, boot_time)
__field(unsigned long, result)
__field(u32, xid)
),
TP_fast_assign(
__entry->boot_time = nn->boot_time;
__entry->result = result;
__entry->xid = be32_to_cpu(rqstp->rq_xid);
),
TP_printk("boot_time=%16llx xid=0x%08x result=%s",
__entry->boot_time, __entry->xid,
show_drc_retval(__entry->result))
);
TRACE_EVENT(nfsd_drc_mismatch,
TP_PROTO(
const struct nfsd_net *nn,
const struct svc_cacherep *key,
const struct svc_cacherep *rp
),
TP_ARGS(nn, key, rp),
TP_STRUCT__entry(
__field(unsigned long long, boot_time)
__field(u32, xid)
__field(u32, cached)
__field(u32, ingress)
),
TP_fast_assign(
__entry->boot_time = nn->boot_time;
__entry->xid = be32_to_cpu(key->c_key.k_xid);
__entry->cached = (__force u32)key->c_key.k_csum;
__entry->ingress = (__force u32)rp->c_key.k_csum;
),
TP_printk("boot_time=%16llx xid=0x%08x cached-csum=0x%08x ingress-csum=0x%08x",
__entry->boot_time, __entry->xid, __entry->cached,
__entry->ingress)
);
TRACE_EVENT(nfsd_cb_args,
TP_PROTO(
const struct nfs4_client *clp,
const struct nfs4_cb_conn *conn
),
TP_ARGS(clp, conn),
TP_STRUCT__entry(
__field(u32, cl_boot)
__field(u32, cl_id)
__field(u32, prog)
__field(u32, ident)
__array(unsigned char, addr, sizeof(struct sockaddr_in6))
),
TP_fast_assign(
__entry->cl_boot = clp->cl_clientid.cl_boot;
__entry->cl_id = clp->cl_clientid.cl_id;
__entry->prog = conn->cb_prog;
__entry->ident = conn->cb_ident;
memcpy(__entry->addr, &conn->cb_addr,
sizeof(struct sockaddr_in6));
),
TP_printk("client %08x:%08x callback addr=%pISpc prog=%u ident=%u",
__entry->cl_boot, __entry->cl_id,
__entry->addr, __entry->prog, __entry->ident)
);
TRACE_EVENT(nfsd_cb_nodelegs,
TP_PROTO(const struct nfs4_client *clp),
TP_ARGS(clp),
TP_STRUCT__entry(
__field(u32, cl_boot)
__field(u32, cl_id)
),
TP_fast_assign(
__entry->cl_boot = clp->cl_clientid.cl_boot;
__entry->cl_id = clp->cl_clientid.cl_id;
),
TP_printk("client %08x:%08x", __entry->cl_boot, __entry->cl_id)
)
TRACE_DEFINE_ENUM(NFSD4_CB_UP);
TRACE_DEFINE_ENUM(NFSD4_CB_UNKNOWN);
TRACE_DEFINE_ENUM(NFSD4_CB_DOWN);
TRACE_DEFINE_ENUM(NFSD4_CB_FAULT);
#define show_cb_state(val) \
__print_symbolic(val, \
{ NFSD4_CB_UP, "UP" }, \
{ NFSD4_CB_UNKNOWN, "UNKNOWN" }, \
{ NFSD4_CB_DOWN, "DOWN" }, \
{ NFSD4_CB_FAULT, "FAULT"})
DECLARE_EVENT_CLASS(nfsd_cb_class,
TP_PROTO(const struct nfs4_client *clp),
TP_ARGS(clp),
TP_STRUCT__entry(
__field(unsigned long, state)
__field(u32, cl_boot)
__field(u32, cl_id)
__array(unsigned char, addr, sizeof(struct sockaddr_in6))
),
TP_fast_assign(
__entry->state = clp->cl_cb_state;
__entry->cl_boot = clp->cl_clientid.cl_boot;
__entry->cl_id = clp->cl_clientid.cl_id;
memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
sizeof(struct sockaddr_in6));
),
TP_printk("addr=%pISpc client %08x:%08x state=%s",
__entry->addr, __entry->cl_boot, __entry->cl_id,
show_cb_state(__entry->state))
);
#define DEFINE_NFSD_CB_EVENT(name) \
DEFINE_EVENT(nfsd_cb_class, nfsd_cb_##name, \
TP_PROTO(const struct nfs4_client *clp), \
TP_ARGS(clp))
DEFINE_NFSD_CB_EVENT(setup);
DEFINE_NFSD_CB_EVENT(state);
DEFINE_NFSD_CB_EVENT(shutdown);
TRACE_EVENT(nfsd_cb_setup_err,
TP_PROTO(
const struct nfs4_client *clp,
long error
),
TP_ARGS(clp, error),
TP_STRUCT__entry(
__field(long, error)
__field(u32, cl_boot)
__field(u32, cl_id)
__array(unsigned char, addr, sizeof(struct sockaddr_in6))
),
TP_fast_assign(
__entry->error = error;
__entry->cl_boot = clp->cl_clientid.cl_boot;
__entry->cl_id = clp->cl_clientid.cl_id;
memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
sizeof(struct sockaddr_in6));
),
TP_printk("addr=%pISpc client %08x:%08x error=%ld",
__entry->addr, __entry->cl_boot, __entry->cl_id, __entry->error)
);
TRACE_EVENT(nfsd_cb_work,
TP_PROTO(
const struct nfs4_client *clp,
const char *procedure
),
TP_ARGS(clp, procedure),
TP_STRUCT__entry(
__field(u32, cl_boot)
__field(u32, cl_id)
__string(procedure, procedure)
__array(unsigned char, addr, sizeof(struct sockaddr_in6))
),
TP_fast_assign(
__entry->cl_boot = clp->cl_clientid.cl_boot;
__entry->cl_id = clp->cl_clientid.cl_id;
__assign_str(procedure, procedure)
memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
sizeof(struct sockaddr_in6));
),
TP_printk("addr=%pISpc client %08x:%08x procedure=%s",
__entry->addr, __entry->cl_boot, __entry->cl_id,
__get_str(procedure))
);
TRACE_EVENT(nfsd_cb_done,
TP_PROTO(
const struct nfs4_client *clp,
int status
),
TP_ARGS(clp, status),
TP_STRUCT__entry(
__field(u32, cl_boot)
__field(u32, cl_id)
__field(int, status)
__array(unsigned char, addr, sizeof(struct sockaddr_in6))
),
TP_fast_assign(
__entry->cl_boot = clp->cl_clientid.cl_boot;
__entry->cl_id = clp->cl_clientid.cl_id;
__entry->status = status;
memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
sizeof(struct sockaddr_in6));
),
TP_printk("addr=%pISpc client %08x:%08x status=%d",
__entry->addr, __entry->cl_boot, __entry->cl_id,
__entry->status)
);
#endif /* _NFSD_TRACE_H */
#undef TRACE_INCLUDE_PATH
......
......@@ -1048,6 +1048,7 @@ struct lock_manager_operations {
bool (*lm_break)(struct file_lock *);
int (*lm_change)(struct file_lock *, int, struct list_head *);
void (*lm_setup)(struct file_lock *, void **);
bool (*lm_breaker_owns_lease)(struct file_lock *);
};
struct lock_manager {
......
......@@ -57,6 +57,7 @@ bool kthread_should_stop(void);
bool kthread_should_park(void);
bool __kthread_should_park(struct task_struct *k);
bool kthread_freezable_should_stop(bool *was_frozen);
void *kthread_func(struct task_struct *k);
void *kthread_data(struct task_struct *k);
void *kthread_probe_data(struct task_struct *k);
int kthread_park(struct task_struct *k);
......
......@@ -84,6 +84,7 @@ struct pf_desc {
u32 service;
char *name;
char *auth_domain_name;
struct auth_domain *domain;
bool datatouch;
};
......
......@@ -254,6 +254,7 @@ struct svc_rqst {
struct page * *rq_page_end; /* one past the last page */
struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
struct bio_vec rq_bvec[RPCSVC_MAXPAGES];
__be32 rq_xid; /* transmission id */
u32 rq_prog; /* program number */
......@@ -299,6 +300,7 @@ struct svc_rqst {
struct net *rq_bc_net; /* pointer to backchannel's
* net namespace
*/
void ** rq_lease_breaker; /* The v4 client breaking a lease */
};
#define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net)
......
......@@ -48,7 +48,6 @@
#include <linux/sunrpc/rpc_rdma.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#define SVCRDMA_DEBUG
/* Default and maximum inline threshold sizes */
enum {
......@@ -160,9 +159,8 @@ struct svc_rdma_send_ctxt {
};
/* svc_rdma_backchannel.c */
extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
__be32 *rdma_resp,
struct xdr_buf *rcvbuf);
extern void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp,
struct svc_rdma_recv_ctxt *rctxt);
/* svc_rdma_recvfrom.c */
extern void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma);
......
......@@ -117,6 +117,12 @@ static inline int register_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u
return 0;
}
static inline bool svc_xprt_is_dead(const struct svc_xprt *xprt)
{
return (test_bit(XPT_DEAD, &xprt->xpt_flags) != 0) ||
(test_bit(XPT_CLOSE, &xprt->xpt_flags) != 0);
}
int svc_reg_xprt_class(struct svc_xprt_class *);
void svc_unreg_xprt_class(struct svc_xprt_class *);
void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *,
......
......@@ -20,7 +20,8 @@ int gss_svc_init(void);
void gss_svc_shutdown(void);
int gss_svc_init_net(struct net *net);
void gss_svc_shutdown_net(struct net *net);
int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name);
struct auth_domain *svcauth_gss_register_pseudoflavor(u32 pseudoflavor,
char *name);
u32 svcauth_gss_flavor(struct auth_domain *dom);
#endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */
......@@ -28,7 +28,7 @@ struct svc_sock {
/* private TCP part */
/* On-the-wire fragment header: */
__be32 sk_reclen;
__be32 sk_marker;
/* As we receive a record, this includes the length received so
* far (including the fragment header): */
u32 sk_tcplen;
......@@ -41,12 +41,12 @@ struct svc_sock {
static inline u32 svc_sock_reclen(struct svc_sock *svsk)
{
return ntohl(svsk->sk_reclen) & RPC_FRAGMENT_SIZE_MASK;
return be32_to_cpu(svsk->sk_marker) & RPC_FRAGMENT_SIZE_MASK;
}
static inline u32 svc_sock_final_rec(struct svc_sock *svsk)
{
return ntohl(svsk->sk_reclen) & RPC_LAST_STREAM_FRAGMENT;
return be32_to_cpu(svsk->sk_marker) & RPC_LAST_STREAM_FRAGMENT;
}
/*
......
......@@ -1279,38 +1279,42 @@ TRACE_EVENT(xprtrdma_leaked_rep,
** Server-side RPC/RDMA events
**/
DECLARE_EVENT_CLASS(svcrdma_xprt_event,
DECLARE_EVENT_CLASS(svcrdma_accept_class,
TP_PROTO(
const struct svc_xprt *xprt
const struct svcxprt_rdma *rdma,
long status
),
TP_ARGS(xprt),
TP_ARGS(rdma, status),
TP_STRUCT__entry(
__field(const void *, xprt)
__string(addr, xprt->xpt_remotebuf)
__field(long, status)
__string(addr, rdma->sc_xprt.xpt_remotebuf)
),
TP_fast_assign(
__entry->xprt = xprt;
__assign_str(addr, xprt->xpt_remotebuf);
__entry->status = status;
__assign_str(addr, rdma->sc_xprt.xpt_remotebuf);
),
TP_printk("xprt=%p addr=%s",
__entry->xprt, __get_str(addr)
TP_printk("addr=%s status=%ld",
__get_str(addr), __entry->status
)
);
#define DEFINE_XPRT_EVENT(name) \
DEFINE_EVENT(svcrdma_xprt_event, svcrdma_xprt_##name, \
TP_PROTO( \
const struct svc_xprt *xprt \
), \
TP_ARGS(xprt))
#define DEFINE_ACCEPT_EVENT(name) \
DEFINE_EVENT(svcrdma_accept_class, svcrdma_##name##_err, \
TP_PROTO( \
const struct svcxprt_rdma *rdma, \
long status \
), \
TP_ARGS(rdma, status))
DEFINE_XPRT_EVENT(accept);
DEFINE_XPRT_EVENT(fail);
DEFINE_XPRT_EVENT(free);
DEFINE_ACCEPT_EVENT(pd);
DEFINE_ACCEPT_EVENT(qp);
DEFINE_ACCEPT_EVENT(fabric);
DEFINE_ACCEPT_EVENT(initdepth);
DEFINE_ACCEPT_EVENT(accept);
TRACE_DEFINE_ENUM(RDMA_MSG);
TRACE_DEFINE_ENUM(RDMA_NOMSG);
......@@ -1355,7 +1359,7 @@ TRACE_EVENT(svcrdma_decode_rqst,
show_rpcrdma_proc(__entry->proc), __entry->hdrlen)
);
TRACE_EVENT(svcrdma_decode_short,
TRACE_EVENT(svcrdma_decode_short_err,
TP_PROTO(
unsigned int hdrlen
),
......@@ -1399,7 +1403,8 @@ DECLARE_EVENT_CLASS(svcrdma_badreq_event,
);
#define DEFINE_BADREQ_EVENT(name) \
DEFINE_EVENT(svcrdma_badreq_event, svcrdma_decode_##name,\
DEFINE_EVENT(svcrdma_badreq_event, \
svcrdma_decode_##name##_err, \
TP_PROTO( \
__be32 *p \
), \
......@@ -1583,28 +1588,117 @@ DECLARE_EVENT_CLASS(svcrdma_dma_map_class,
DEFINE_SVC_DMA_EVENT(dma_map_page);
DEFINE_SVC_DMA_EVENT(dma_unmap_page);
TRACE_EVENT(svcrdma_dma_map_rwctx,
TRACE_EVENT(svcrdma_dma_map_rw_err,
TP_PROTO(
const struct svcxprt_rdma *rdma,
unsigned int nents,
int status
),
TP_ARGS(rdma, status),
TP_ARGS(rdma, nents, status),
TP_STRUCT__entry(
__field(int, status)
__field(unsigned int, nents)
__string(device, rdma->sc_cm_id->device->name)
__string(addr, rdma->sc_xprt.xpt_remotebuf)
),
TP_fast_assign(
__entry->status = status;
__entry->nents = nents;
__assign_str(device, rdma->sc_cm_id->device->name);
__assign_str(addr, rdma->sc_xprt.xpt_remotebuf);
),
TP_printk("addr=%s device=%s nents=%u status=%d",
__get_str(addr), __get_str(device), __entry->nents,
__entry->status
)
);
TRACE_EVENT(svcrdma_no_rwctx_err,
TP_PROTO(
const struct svcxprt_rdma *rdma,
unsigned int num_sges
),
TP_ARGS(rdma, num_sges),
TP_STRUCT__entry(
__field(unsigned int, num_sges)
__string(device, rdma->sc_cm_id->device->name)
__string(addr, rdma->sc_xprt.xpt_remotebuf)
),
TP_fast_assign(
__entry->num_sges = num_sges;
__assign_str(device, rdma->sc_cm_id->device->name);
__assign_str(addr, rdma->sc_xprt.xpt_remotebuf);
),
TP_printk("addr=%s device=%s num_sges=%d",
__get_str(addr), __get_str(device), __entry->num_sges
)
);
TRACE_EVENT(svcrdma_page_overrun_err,
TP_PROTO(
const struct svcxprt_rdma *rdma,
const struct svc_rqst *rqst,
unsigned int pageno
),
TP_ARGS(rdma, rqst, pageno),
TP_STRUCT__entry(
__field(unsigned int, pageno)
__field(u32, xid)
__string(device, rdma->sc_cm_id->device->name)
__string(addr, rdma->sc_xprt.xpt_remotebuf)
),
TP_fast_assign(
__entry->pageno = pageno;
__entry->xid = __be32_to_cpu(rqst->rq_xid);
__assign_str(device, rdma->sc_cm_id->device->name);
__assign_str(addr, rdma->sc_xprt.xpt_remotebuf);
),
TP_printk("addr=%s device=%s xid=0x%08x pageno=%u", __get_str(addr),
__get_str(device), __entry->xid, __entry->pageno
)
);
TRACE_EVENT(svcrdma_small_wrch_err,
TP_PROTO(
const struct svcxprt_rdma *rdma,
unsigned int remaining,
unsigned int seg_no,
unsigned int num_segs
),
TP_ARGS(rdma, remaining, seg_no, num_segs),
TP_STRUCT__entry(
__field(unsigned int, remaining)
__field(unsigned int, seg_no)
__field(unsigned int, num_segs)
__string(device, rdma->sc_cm_id->device->name)
__string(addr, rdma->sc_xprt.xpt_remotebuf)
),
TP_fast_assign(
__entry->remaining = remaining;
__entry->seg_no = seg_no;
__entry->num_segs = num_segs;
__assign_str(device, rdma->sc_cm_id->device->name);
__assign_str(addr, rdma->sc_xprt.xpt_remotebuf);
),
TP_printk("addr=%s device=%s status=%d",
__get_str(addr), __get_str(device), __entry->status
TP_printk("addr=%s device=%s remaining=%u seg_no=%u num_segs=%u",
__get_str(addr), __get_str(device), __entry->remaining,
__entry->seg_no, __entry->num_segs
)
);
......
......@@ -14,6 +14,39 @@
#include <linux/net.h>
#include <linux/tracepoint.h>
TRACE_DEFINE_ENUM(SOCK_STREAM);
TRACE_DEFINE_ENUM(SOCK_DGRAM);
TRACE_DEFINE_ENUM(SOCK_RAW);
TRACE_DEFINE_ENUM(SOCK_RDM);
TRACE_DEFINE_ENUM(SOCK_SEQPACKET);
TRACE_DEFINE_ENUM(SOCK_DCCP);
TRACE_DEFINE_ENUM(SOCK_PACKET);
#define show_socket_type(type) \
__print_symbolic(type, \
{ SOCK_STREAM, "STREAM" }, \
{ SOCK_DGRAM, "DGRAM" }, \
{ SOCK_RAW, "RAW" }, \
{ SOCK_RDM, "RDM" }, \
{ SOCK_SEQPACKET, "SEQPACKET" }, \
{ SOCK_DCCP, "DCCP" }, \
{ SOCK_PACKET, "PACKET" })
/* This list is known to be incomplete, add new enums as needed. */
TRACE_DEFINE_ENUM(AF_UNSPEC);
TRACE_DEFINE_ENUM(AF_UNIX);
TRACE_DEFINE_ENUM(AF_LOCAL);
TRACE_DEFINE_ENUM(AF_INET);
TRACE_DEFINE_ENUM(AF_INET6);
#define rpc_show_address_family(family) \
__print_symbolic(family, \
{ AF_UNSPEC, "AF_UNSPEC" }, \
{ AF_UNIX, "AF_UNIX" }, \
{ AF_LOCAL, "AF_LOCAL" }, \
{ AF_INET, "AF_INET" }, \
{ AF_INET6, "AF_INET6" })
DECLARE_EVENT_CLASS(xdr_buf_class,
TP_PROTO(
const struct xdr_buf *xdr
......@@ -1024,6 +1057,17 @@ TRACE_EVENT(svc_recv,
show_rqstp_flags(__entry->flags))
);
TRACE_DEFINE_ENUM(SVC_GARBAGE);
TRACE_DEFINE_ENUM(SVC_SYSERR);
TRACE_DEFINE_ENUM(SVC_VALID);
TRACE_DEFINE_ENUM(SVC_NEGATIVE);
TRACE_DEFINE_ENUM(SVC_OK);
TRACE_DEFINE_ENUM(SVC_DROP);
TRACE_DEFINE_ENUM(SVC_CLOSE);
TRACE_DEFINE_ENUM(SVC_DENIED);
TRACE_DEFINE_ENUM(SVC_PENDING);
TRACE_DEFINE_ENUM(SVC_COMPLETE);
#define svc_show_status(status) \
__print_symbolic(status, \
{ SVC_GARBAGE, "SVC_GARBAGE" }, \
......@@ -1167,28 +1211,54 @@ DEFINE_EVENT(svc_rqst_status, svc_send,
{ (1UL << XPT_KILL_TEMP), "XPT_KILL_TEMP"}, \
{ (1UL << XPT_CONG_CTRL), "XPT_CONG_CTRL"})
TRACE_EVENT(svc_xprt_create_err,
TP_PROTO(
const char *program,
const char *protocol,
struct sockaddr *sap,
const struct svc_xprt *xprt
),
TP_ARGS(program, protocol, sap, xprt),
TP_STRUCT__entry(
__field(long, error)
__string(program, program)
__string(protocol, protocol)
__array(unsigned char, addr, sizeof(struct sockaddr_in6))
),
TP_fast_assign(
__entry->error = PTR_ERR(xprt);
__assign_str(program, program);
__assign_str(protocol, protocol);
memcpy(__entry->addr, sap, sizeof(__entry->addr));
),
TP_printk("addr=%pISpc program=%s protocol=%s error=%ld",
__entry->addr, __get_str(program), __get_str(protocol),
__entry->error)
);
TRACE_EVENT(svc_xprt_do_enqueue,
TP_PROTO(struct svc_xprt *xprt, struct svc_rqst *rqst),
TP_ARGS(xprt, rqst),
TP_STRUCT__entry(
__field(struct svc_xprt *, xprt)
__field(int, pid)
__field(unsigned long, flags)
__string(addr, xprt->xpt_remotebuf)
),
TP_fast_assign(
__entry->xprt = xprt;
__entry->pid = rqst? rqst->rq_task->pid : 0;
__entry->flags = xprt->xpt_flags;
__assign_str(addr, xprt->xpt_remotebuf);
),
TP_printk("xprt=%p addr=%s pid=%d flags=%s",
__entry->xprt, __get_str(addr),
__entry->pid, show_svc_xprt_flags(__entry->flags))
TP_printk("addr=%s pid=%d flags=%s", __get_str(addr),
__entry->pid, show_svc_xprt_flags(__entry->flags))
);
DECLARE_EVENT_CLASS(svc_xprt_event,
......@@ -1197,25 +1267,55 @@ DECLARE_EVENT_CLASS(svc_xprt_event,
TP_ARGS(xprt),
TP_STRUCT__entry(
__field(struct svc_xprt *, xprt)
__field(unsigned long, flags)
__string(addr, xprt->xpt_remotebuf)
),
TP_fast_assign(
__entry->xprt = xprt;
__entry->flags = xprt->xpt_flags;
__assign_str(addr, xprt->xpt_remotebuf);
),
TP_printk("xprt=%p addr=%s flags=%s",
__entry->xprt, __get_str(addr),
show_svc_xprt_flags(__entry->flags))
TP_printk("addr=%s flags=%s", __get_str(addr),
show_svc_xprt_flags(__entry->flags))
);
DEFINE_EVENT(svc_xprt_event, svc_xprt_no_write_space,
TP_PROTO(struct svc_xprt *xprt),
TP_ARGS(xprt));
#define DEFINE_SVC_XPRT_EVENT(name) \
DEFINE_EVENT(svc_xprt_event, svc_xprt_##name, \
TP_PROTO( \
struct svc_xprt *xprt \
), \
TP_ARGS(xprt))
DEFINE_SVC_XPRT_EVENT(no_write_space);
DEFINE_SVC_XPRT_EVENT(close);
DEFINE_SVC_XPRT_EVENT(detach);
DEFINE_SVC_XPRT_EVENT(free);
TRACE_EVENT(svc_xprt_accept,
TP_PROTO(
const struct svc_xprt *xprt,
const char *service
),
TP_ARGS(xprt, service),
TP_STRUCT__entry(
__string(addr, xprt->xpt_remotebuf)
__string(protocol, xprt->xpt_class->xcl_name)
__string(service, service)
),
TP_fast_assign(
__assign_str(addr, xprt->xpt_remotebuf);
__assign_str(protocol, xprt->xpt_class->xcl_name)
__assign_str(service, service);
),
TP_printk("addr=%s protocol=%s service=%s",
__get_str(addr), __get_str(protocol), __get_str(service)
)
);
TRACE_EVENT(svc_xprt_dequeue,
TP_PROTO(struct svc_rqst *rqst),
......@@ -1223,24 +1323,20 @@ TRACE_EVENT(svc_xprt_dequeue,
TP_ARGS(rqst),
TP_STRUCT__entry(
__field(struct svc_xprt *, xprt)
__field(unsigned long, flags)
__field(unsigned long, wakeup)
__string(addr, rqst->rq_xprt->xpt_remotebuf)
),
TP_fast_assign(
__entry->xprt = rqst->rq_xprt;
__entry->flags = rqst->rq_xprt->xpt_flags;
__entry->wakeup = ktime_to_us(ktime_sub(ktime_get(),
rqst->rq_qtime));
__assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
),
TP_printk("xprt=%p addr=%s flags=%s wakeup-us=%lu",
__entry->xprt, __get_str(addr),
show_svc_xprt_flags(__entry->flags),
__entry->wakeup)
TP_printk("addr=%s flags=%s wakeup-us=%lu", __get_str(addr),
show_svc_xprt_flags(__entry->flags), __entry->wakeup)
);
TRACE_EVENT(svc_wake_up,
......@@ -1265,21 +1361,18 @@ TRACE_EVENT(svc_handle_xprt,
TP_ARGS(xprt, len),
TP_STRUCT__entry(
__field(struct svc_xprt *, xprt)
__field(int, len)
__field(unsigned long, flags)
__string(addr, xprt->xpt_remotebuf)
),
TP_fast_assign(
__entry->xprt = xprt;
__entry->len = len;
__entry->flags = xprt->xpt_flags;
__assign_str(addr, xprt->xpt_remotebuf);
),
TP_printk("xprt=%p addr=%s len=%d flags=%s",
__entry->xprt, __get_str(addr),
TP_printk("addr=%s len=%d flags=%s", __get_str(addr),
__entry->len, show_svc_xprt_flags(__entry->flags))
);
......@@ -1313,27 +1406,221 @@ DECLARE_EVENT_CLASS(svc_deferred_event,
TP_ARGS(dr),
TP_STRUCT__entry(
__field(const void *, dr)
__field(u32, xid)
__string(addr, dr->xprt->xpt_remotebuf)
),
TP_fast_assign(
__entry->dr = dr;
__entry->xid = be32_to_cpu(*(__be32 *)(dr->args +
(dr->xprt_hlen>>2)));
__assign_str(addr, dr->xprt->xpt_remotebuf);
),
TP_printk("addr=%s xid=0x%08x", __get_str(addr), __entry->xid)
TP_printk("addr=%s dr=%p xid=0x%08x", __get_str(addr), __entry->dr,
__entry->xid)
);
#define DEFINE_SVC_DEFERRED_EVENT(name) \
DEFINE_EVENT(svc_deferred_event, svc_##name##_deferred, \
DEFINE_EVENT(svc_deferred_event, svc_defer_##name, \
TP_PROTO( \
const struct svc_deferred_req *dr \
), \
TP_ARGS(dr))
DEFINE_SVC_DEFERRED_EVENT(drop);
DEFINE_SVC_DEFERRED_EVENT(revisit);
DEFINE_SVC_DEFERRED_EVENT(queue);
DEFINE_SVC_DEFERRED_EVENT(recv);
TRACE_EVENT(svcsock_new_socket,
TP_PROTO(
const struct socket *socket
),
TP_ARGS(socket),
TP_STRUCT__entry(
__field(unsigned long, type)
__field(unsigned long, family)
__field(bool, listener)
),
TP_fast_assign(
__entry->type = socket->type;
__entry->family = socket->sk->sk_family;
__entry->listener = (socket->sk->sk_state == TCP_LISTEN);
),
TP_printk("type=%s family=%s%s",
show_socket_type(__entry->type),
rpc_show_address_family(__entry->family),
__entry->listener ? " (listener)" : ""
)
);
TRACE_EVENT(svcsock_marker,
TP_PROTO(
const struct svc_xprt *xprt,
__be32 marker
),
TP_ARGS(xprt, marker),
TP_STRUCT__entry(
__field(unsigned int, length)
__field(bool, last)
__string(addr, xprt->xpt_remotebuf)
),
TP_fast_assign(
__entry->length = be32_to_cpu(marker) & RPC_FRAGMENT_SIZE_MASK;
__entry->last = be32_to_cpu(marker) & RPC_LAST_STREAM_FRAGMENT;
__assign_str(addr, xprt->xpt_remotebuf);
),
TP_printk("addr=%s length=%u%s", __get_str(addr),
__entry->length, __entry->last ? " (last)" : "")
);
DECLARE_EVENT_CLASS(svcsock_class,
TP_PROTO(
const struct svc_xprt *xprt,
ssize_t result
),
TP_ARGS(xprt, result),
TP_STRUCT__entry(
__field(ssize_t, result)
__field(unsigned long, flags)
__string(addr, xprt->xpt_remotebuf)
),
TP_fast_assign(
__entry->result = result;
__entry->flags = xprt->xpt_flags;
__assign_str(addr, xprt->xpt_remotebuf);
),
TP_printk("addr=%s result=%zd flags=%s", __get_str(addr),
__entry->result, show_svc_xprt_flags(__entry->flags)
)
);
#define DEFINE_SVCSOCK_EVENT(name) \
DEFINE_EVENT(svcsock_class, svcsock_##name, \
TP_PROTO( \
const struct svc_xprt *xprt, \
ssize_t result \
), \
TP_ARGS(xprt, result))
DEFINE_SVCSOCK_EVENT(udp_send);
DEFINE_SVCSOCK_EVENT(udp_recv);
DEFINE_SVCSOCK_EVENT(udp_recv_err);
DEFINE_SVCSOCK_EVENT(tcp_send);
DEFINE_SVCSOCK_EVENT(tcp_recv);
DEFINE_SVCSOCK_EVENT(tcp_recv_eagain);
DEFINE_SVCSOCK_EVENT(tcp_recv_err);
DEFINE_SVCSOCK_EVENT(data_ready);
DEFINE_SVCSOCK_EVENT(write_space);
TRACE_EVENT(svcsock_tcp_recv_short,
TP_PROTO(
const struct svc_xprt *xprt,
u32 expected,
u32 received
),
TP_ARGS(xprt, expected, received),
TP_STRUCT__entry(
__field(u32, expected)
__field(u32, received)
__field(unsigned long, flags)
__string(addr, xprt->xpt_remotebuf)
),
TP_fast_assign(
__entry->expected = expected;
__entry->received = received;
__entry->flags = xprt->xpt_flags;
__assign_str(addr, xprt->xpt_remotebuf);
),
TP_printk("addr=%s flags=%s expected=%u received=%u",
__get_str(addr), show_svc_xprt_flags(__entry->flags),
__entry->expected, __entry->received
)
);
TRACE_EVENT(svcsock_tcp_state,
TP_PROTO(
const struct svc_xprt *xprt,
const struct socket *socket
),
TP_ARGS(xprt, socket),
TP_STRUCT__entry(
__field(unsigned long, socket_state)
__field(unsigned long, sock_state)
__field(unsigned long, flags)
__string(addr, xprt->xpt_remotebuf)
),
TP_fast_assign(
__entry->socket_state = socket->state;
__entry->sock_state = socket->sk->sk_state;
__entry->flags = xprt->xpt_flags;
__assign_str(addr, xprt->xpt_remotebuf);
),
TP_printk("addr=%s state=%s sk_state=%s flags=%s", __get_str(addr),
rpc_show_socket_state(__entry->socket_state),
rpc_show_sock_state(__entry->sock_state),
show_svc_xprt_flags(__entry->flags)
)
);
DECLARE_EVENT_CLASS(svcsock_accept_class,
TP_PROTO(
const struct svc_xprt *xprt,
const char *service,
long status
),
TP_ARGS(xprt, service, status),
TP_STRUCT__entry(
__field(long, status)
__string(service, service)
__array(unsigned char, addr, sizeof(struct sockaddr_in6))
),
TP_fast_assign(
__entry->status = status;
__assign_str(service, service);
memcpy(__entry->addr, &xprt->xpt_local, sizeof(__entry->addr));
),
TP_printk("listener=%pISpc service=%s status=%ld",
__entry->addr, __get_str(service), __entry->status
)
);
#define DEFINE_ACCEPT_EVENT(name) \
DEFINE_EVENT(svcsock_accept_class, svcsock_##name##_err, \
TP_PROTO( \
const struct svc_xprt *xprt, \
const char *service, \
long status \
), \
TP_ARGS(xprt, service, status))
DEFINE_ACCEPT_EVENT(accept);
DEFINE_ACCEPT_EVENT(getpeername);
DECLARE_EVENT_CLASS(cache_event,
TP_PROTO(
......@@ -1368,6 +1655,86 @@ DEFINE_CACHE_EVENT(cache_entry_update);
DEFINE_CACHE_EVENT(cache_entry_make_negative);
DEFINE_CACHE_EVENT(cache_entry_no_listener);
DECLARE_EVENT_CLASS(register_class,
TP_PROTO(
const char *program,
const u32 version,
const int family,
const unsigned short protocol,
const unsigned short port,
int error
),
TP_ARGS(program, version, family, protocol, port, error),
TP_STRUCT__entry(
__field(u32, version)
__field(unsigned long, family)
__field(unsigned short, protocol)
__field(unsigned short, port)
__field(int, error)
__string(program, program)
),
TP_fast_assign(
__entry->version = version;
__entry->family = family;
__entry->protocol = protocol;
__entry->port = port;
__entry->error = error;
__assign_str(program, program);
),
TP_printk("program=%sv%u proto=%s port=%u family=%s error=%d",
__get_str(program), __entry->version,
__entry->protocol == IPPROTO_UDP ? "udp" : "tcp",
__entry->port, rpc_show_address_family(__entry->family),
__entry->error
)
);
#define DEFINE_REGISTER_EVENT(name) \
DEFINE_EVENT(register_class, svc_##name, \
TP_PROTO( \
const char *program, \
const u32 version, \
const int family, \
const unsigned short protocol, \
const unsigned short port, \
int error \
), \
TP_ARGS(program, version, family, protocol, \
port, error))
DEFINE_REGISTER_EVENT(register);
DEFINE_REGISTER_EVENT(noregister);
TRACE_EVENT(svc_unregister,
TP_PROTO(
const char *program,
const u32 version,
int error
),
TP_ARGS(program, version, error),
TP_STRUCT__entry(
__field(u32, version)
__field(int, error)
__string(program, program)
),
TP_fast_assign(
__entry->version = version;
__entry->error = error;
__assign_str(program, program);
),
TP_printk("program=%sv%u error=%d",
__get_str(program), __entry->version, __entry->error
)
);
#endif /* _TRACE_SUNRPC_H */
#include <trace/define_trace.h>
......@@ -46,6 +46,7 @@ struct kthread_create_info
struct kthread {
unsigned long flags;
unsigned int cpu;
int (*threadfn)(void *);
void *data;
struct completion parked;
struct completion exited;
......@@ -152,6 +153,20 @@ bool kthread_freezable_should_stop(bool *was_frozen)
}
EXPORT_SYMBOL_GPL(kthread_freezable_should_stop);
/**
* kthread_func - return the function specified on kthread creation
* @task: kthread task in question
*
* Returns NULL if the task is not a kthread.
*/
void *kthread_func(struct task_struct *task)
{
if (task->flags & PF_KTHREAD)
return to_kthread(task)->threadfn;
return NULL;
}
EXPORT_SYMBOL_GPL(kthread_func);
/**
* kthread_data - return data value specified on kthread creation
* @task: kthread task in question
......@@ -164,6 +179,7 @@ void *kthread_data(struct task_struct *task)
{
return to_kthread(task)->data;
}
EXPORT_SYMBOL_GPL(kthread_data);
/**
* kthread_probe_data - speculative version of kthread_data()
......@@ -244,6 +260,7 @@ static int kthread(void *_create)
do_exit(-ENOMEM);
}
self->threadfn = threadfn;
self->data = data;
init_completion(&self->exited);
init_completion(&self->parked);
......
......@@ -37,6 +37,8 @@ gss_mech_free(struct gss_api_mech *gm)
for (i = 0; i < gm->gm_pf_num; i++) {
pf = &gm->gm_pfs[i];
if (pf->domain)
auth_domain_put(pf->domain);
kfree(pf->auth_domain_name);
pf->auth_domain_name = NULL;
}
......@@ -59,6 +61,7 @@ make_auth_domain_name(char *name)
static int
gss_mech_svc_setup(struct gss_api_mech *gm)
{
struct auth_domain *dom;
struct pf_desc *pf;
int i, status;
......@@ -68,10 +71,13 @@ gss_mech_svc_setup(struct gss_api_mech *gm)
status = -ENOMEM;
if (pf->auth_domain_name == NULL)
goto out;
status = svcauth_gss_register_pseudoflavor(pf->pseudoflavor,
pf->auth_domain_name);
if (status)
dom = svcauth_gss_register_pseudoflavor(
pf->pseudoflavor, pf->auth_domain_name);
if (IS_ERR(dom)) {
status = PTR_ERR(dom);
goto out;
}
pf->domain = dom;
}
return 0;
out:
......
......@@ -223,7 +223,7 @@ static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg)
static char *gssp_stringify(struct xdr_netobj *netobj)
{
return kstrndup(netobj->data, netobj->len, GFP_KERNEL);
return kmemdup_nul(netobj->data, netobj->len, GFP_KERNEL);
}
static void gssp_hostbased_service(char **principal)
......
......@@ -809,7 +809,7 @@ u32 svcauth_gss_flavor(struct auth_domain *dom)
EXPORT_SYMBOL_GPL(svcauth_gss_flavor);
int
struct auth_domain *
svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name)
{
struct gss_domain *new;
......@@ -826,21 +826,23 @@ svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name)
new->h.flavour = &svcauthops_gss;
new->pseudoflavor = pseudoflavor;
stat = 0;
test = auth_domain_lookup(name, &new->h);
if (test != &new->h) { /* Duplicate registration */
if (test != &new->h) {
pr_warn("svc: duplicate registration of gss pseudo flavour %s.\n",
name);
stat = -EADDRINUSE;
auth_domain_put(test);
kfree(new->h.name);
goto out_free_dom;
goto out_free_name;
}
return 0;
return test;
out_free_name:
kfree(new->h.name);
out_free_dom:
kfree(new);
out:
return stat;
return ERR_PTR(stat);
}
EXPORT_SYMBOL_GPL(svcauth_gss_register_pseudoflavor);
static inline int
......
......@@ -795,12 +795,6 @@ void rpcb_getport_async(struct rpc_task *task)
child = rpcb_call_async(rpcb_clnt, map, proc);
rpc_release_client(rpcb_clnt);
if (IS_ERR(child)) {
/* rpcb_map_release() has freed the arguments */
dprintk("RPC: %5u %s: rpc_run_task failed\n",
task->tk_pid, __func__);
return;
}
xprt->stat.bind_count++;
rpc_put_task(child);
......
......@@ -52,4 +52,5 @@ static inline int sock_is_loopback(struct sock *sk)
int rpc_clients_notifier_register(void);
void rpc_clients_notifier_unregister(void);
void auth_domain_cleanup(void);
#endif /* _NET_SUNRPC_SUNRPC_H */
......@@ -23,6 +23,7 @@
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/sunrpc/xprtsock.h>
#include "sunrpc.h"
#include "netns.h"
unsigned int sunrpc_net_id;
......@@ -131,6 +132,7 @@ cleanup_sunrpc(void)
unregister_rpc_pipefs();
rpc_destroy_mempool();
unregister_pernet_subsys(&sunrpc_net_ops);
auth_domain_cleanup();
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
rpc_unregister_sysctl();
#endif
......
......@@ -88,15 +88,15 @@ param_get_pool_mode(char *buf, const struct kernel_param *kp)
switch (*ip)
{
case SVC_POOL_AUTO:
return strlcpy(buf, "auto", 20);
return strlcpy(buf, "auto\n", 20);
case SVC_POOL_GLOBAL:
return strlcpy(buf, "global", 20);
return strlcpy(buf, "global\n", 20);
case SVC_POOL_PERCPU:
return strlcpy(buf, "percpu", 20);
return strlcpy(buf, "percpu\n", 20);
case SVC_POOL_PERNODE:
return strlcpy(buf, "pernode", 20);
return strlcpy(buf, "pernode\n", 20);
default:
return sprintf(buf, "%d", *ip);
return sprintf(buf, "%d\n", *ip);
}
}
......@@ -991,6 +991,7 @@ static int __svc_register(struct net *net, const char *progname,
#endif
}
trace_svc_register(progname, version, protocol, port, family, error);
return error;
}
......@@ -1000,11 +1001,6 @@ int svc_rpcbind_set_version(struct net *net,
unsigned short proto,
unsigned short port)
{
dprintk("svc: svc_register(%sv%d, %s, %u, %u)\n",
progp->pg_name, version,
proto == IPPROTO_UDP? "udp" : "tcp",
port, family);
return __svc_register(net, progp->pg_name, progp->pg_prog,
version, family, proto, port);
......@@ -1024,11 +1020,8 @@ int svc_generic_rpcbind_set(struct net *net,
return 0;
if (vers->vs_hidden) {
dprintk("svc: svc_register(%sv%d, %s, %u, %u)"
" (but not telling portmap)\n",
progp->pg_name, version,
proto == IPPROTO_UDP? "udp" : "tcp",
port, family);
trace_svc_noregister(progp->pg_name, version, proto,
port, family, 0);
return 0;
}
......@@ -1106,8 +1099,7 @@ static void __svc_unregister(struct net *net, const u32 program, const u32 versi
if (error == -EPROTONOSUPPORT)
error = rpcb_register(net, program, version, 0, 0);
dprintk("svc: %s(%sv%u), error %d\n",
__func__, progname, version, error);
trace_svc_unregister(progname, version, error);
}
/*
......@@ -1132,9 +1124,6 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net)
continue;
if (progp->pg_vers[i]->vs_hidden)
continue;
dprintk("svc: attempting to unregister %sv%u\n",
progp->pg_name, i);
__svc_unregister(net, progp->pg_prog, i, progp->pg_name);
}
}
......
......@@ -153,6 +153,7 @@ static void svc_xprt_free(struct kref *kref)
xprt_put(xprt->xpt_bc_xprt);
if (xprt->xpt_bc_xps)
xprt_switch_put(xprt->xpt_bc_xps);
trace_svc_xprt_free(xprt);
xprt->xpt_ops->xpo_free(xprt);
module_put(owner);
}
......@@ -206,6 +207,7 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
.sin6_port = htons(port),
};
#endif
struct svc_xprt *xprt;
struct sockaddr *sap;
size_t len;
......@@ -224,7 +226,11 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
return ERR_PTR(-EAFNOSUPPORT);
}
return xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
xprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
if (IS_ERR(xprt))
trace_svc_xprt_create_err(serv->sv_program->pg_name,
xcl->xcl_name, sap, xprt);
return xprt;
}
/*
......@@ -304,15 +310,11 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
{
int err;
dprintk("svc: creating transport %s[%d]\n", xprt_name, port);
err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred);
if (err == -EPROTONOSUPPORT) {
request_module("svc%s", xprt_name);
err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred);
}
if (err < 0)
dprintk("svc: transport %s not found, err %d\n",
xprt_name, -err);
return err;
}
EXPORT_SYMBOL_GPL(svc_create_xprt);
......@@ -780,7 +782,6 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
int len = 0;
if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
dprintk("svc_recv: found XPT_CLOSE\n");
if (test_and_clear_bit(XPT_KILL_TEMP, &xprt->xpt_flags))
xprt->xpt_ops->xpo_kill_temp_xprt(xprt);
svc_delete_xprt(xprt);
......@@ -799,6 +800,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
if (newxpt) {
newxpt->xpt_cred = get_cred(xprt->xpt_cred);
svc_add_new_temp_xprt(serv, newxpt);
trace_svc_xprt_accept(newxpt, serv->sv_name);
} else
module_put(xprt->xpt_class->xcl_owner);
} else if (svc_xprt_reserve_slot(rqstp, xprt)) {
......@@ -835,14 +837,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
struct svc_serv *serv = rqstp->rq_server;
int len, err;
dprintk("svc: server %p waiting for data (to = %ld)\n",
rqstp, timeout);
if (rqstp->rq_xprt)
printk(KERN_ERR
"svc_recv: service %p, transport not NULL!\n",
rqstp);
err = svc_alloc_arg(rqstp);
if (err)
goto out;
......@@ -890,7 +884,6 @@ EXPORT_SYMBOL_GPL(svc_recv);
void svc_drop(struct svc_rqst *rqstp)
{
trace_svc_drop(rqstp);
dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt);
svc_xprt_release(rqstp);
}
EXPORT_SYMBOL_GPL(svc_drop);
......@@ -914,16 +907,10 @@ int svc_send(struct svc_rqst *rqstp)
xb->page_len +
xb->tail[0].iov_len;
trace_svc_sendto(xb);
/* Grab mutex to serialize outgoing data. */
mutex_lock(&xprt->xpt_mutex);
trace_svc_stats_latency(rqstp);
if (test_bit(XPT_DEAD, &xprt->xpt_flags)
|| test_bit(XPT_CLOSE, &xprt->xpt_flags))
len = -ENOTCONN;
else
len = xprt->xpt_ops->xpo_sendto(rqstp);
mutex_unlock(&xprt->xpt_mutex);
len = xprt->xpt_ops->xpo_sendto(rqstp);
trace_svc_send(rqstp, len);
svc_xprt_release(rqstp);
......@@ -1031,11 +1018,10 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
struct svc_serv *serv = xprt->xpt_server;
struct svc_deferred_req *dr;
/* Only do this once */
if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags))
BUG();
return;
dprintk("svc: svc_delete_xprt(%p)\n", xprt);
trace_svc_xprt_detach(xprt);
xprt->xpt_ops->xpo_detach(xprt);
if (xprt->xpt_bc_xprt)
xprt->xpt_bc_xprt->ops->close(xprt->xpt_bc_xprt);
......@@ -1056,6 +1042,7 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
void svc_close_xprt(struct svc_xprt *xprt)
{
trace_svc_xprt_close(xprt);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags))
/* someone else will have to effect the close */
......@@ -1158,16 +1145,15 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
set_bit(XPT_DEFERRED, &xprt->xpt_flags);
if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) {
spin_unlock(&xprt->xpt_lock);
dprintk("revisit canceled\n");
trace_svc_defer_drop(dr);
svc_xprt_put(xprt);
trace_svc_drop_deferred(dr);
kfree(dr);
return;
}
dprintk("revisit queued\n");
dr->xprt = NULL;
list_add(&dr->handle.recent, &xprt->xpt_deferred);
spin_unlock(&xprt->xpt_lock);
trace_svc_defer_queue(dr);
svc_xprt_enqueue(xprt);
svc_xprt_put(xprt);
}
......@@ -1213,22 +1199,24 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip,
dr->argslen << 2);
}
trace_svc_defer(rqstp);
svc_xprt_get(rqstp->rq_xprt);
dr->xprt = rqstp->rq_xprt;
set_bit(RQ_DROPME, &rqstp->rq_flags);
dr->handle.revisit = svc_revisit;
trace_svc_defer(rqstp);
return &dr->handle;
}
/*
* recv data from a deferred request into an active one
*/
static int svc_deferred_recv(struct svc_rqst *rqstp)
static noinline int svc_deferred_recv(struct svc_rqst *rqstp)
{
struct svc_deferred_req *dr = rqstp->rq_deferred;
trace_svc_defer_recv(dr);
/* setup iov_base past transport header */
rqstp->rq_arg.head[0].iov_base = dr->args + (dr->xprt_hlen>>2);
/* The iov_len does not include the transport header bytes */
......@@ -1259,7 +1247,6 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
struct svc_deferred_req,
handle.recent);
list_del_init(&dr->handle.recent);
trace_svc_revisit_deferred(dr);
} else
clear_bit(XPT_DEFERRED, &xprt->xpt_flags);
spin_unlock(&xprt->xpt_lock);
......
......@@ -21,6 +21,8 @@
#include <trace/events/sunrpc.h>
#include "sunrpc.h"
#define RPCDBG_FACILITY RPCDBG_AUTH
......@@ -205,3 +207,26 @@ struct auth_domain *auth_domain_find(char *name)
return NULL;
}
EXPORT_SYMBOL_GPL(auth_domain_find);
/**
* auth_domain_cleanup - check that the auth_domain table is empty
*
* On module unload the auth_domain_table must be empty. To make it
* easier to catch bugs which don't clean up domains properly, we
* warn if anything remains in the table at cleanup time.
*
* Note that we cannot proactively remove the domains at this stage.
* The ->release() function might be in a module that has already been
* unloaded.
*/
void auth_domain_cleanup(void)
{
int h;
struct auth_domain *hp;
for (h = 0; h < DN_HASHMAX; h++)
hlist_for_each_entry(hp, &auth_domain_table[h], hash)
pr_warn("svc: domain %s still present at module unload.\n",
hp->name);
}
......@@ -332,15 +332,6 @@ static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm,
return 0;
}
static inline int ip_map_update(struct net *net, struct ip_map *ipm,
struct unix_domain *udom, time64_t expiry)
{
struct sunrpc_net *sn;
sn = net_generic(net, sunrpc_net_id);
return __ip_map_update(sn->ip_map_cache, ipm, udom, expiry);
}
void svcauth_unix_purge(struct net *net)
{
struct sunrpc_net *sn;
......
......@@ -45,7 +45,6 @@
#include <net/tcp_states.h>
#include <linux/uaccess.h>
#include <asm/ioctls.h>
#include <trace/events/skb.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/clnt.h>
......@@ -55,6 +54,8 @@
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/xprt.h>
#include <trace/events/sunrpc.h>
#include "socklib.h"
#include "sunrpc.h"
......@@ -108,31 +109,35 @@ static void svc_reclassify_socket(struct socket *sock)
}
#endif
/*
* Release an skbuff after use
/**
* svc_tcp_release_rqst - Release transport-related resources
* @rqstp: request structure with resources to be released
*
*/
static void svc_release_skb(struct svc_rqst *rqstp)
static void svc_tcp_release_rqst(struct svc_rqst *rqstp)
{
struct sk_buff *skb = rqstp->rq_xprt_ctxt;
if (skb) {
struct svc_sock *svsk =
container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
rqstp->rq_xprt_ctxt = NULL;
dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
rqstp->rq_xprt_ctxt = NULL;
skb_free_datagram_locked(svsk->sk_sk, skb);
}
}
static void svc_release_udp_skb(struct svc_rqst *rqstp)
/**
* svc_udp_release_rqst - Release transport-related resources
* @rqstp: request structure with resources to be released
*
*/
static void svc_udp_release_rqst(struct svc_rqst *rqstp)
{
struct sk_buff *skb = rqstp->rq_xprt_ctxt;
if (skb) {
rqstp->rq_xprt_ctxt = NULL;
dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
consume_skb(skb);
}
}
......@@ -218,34 +223,68 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
return len;
}
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
static void svc_flush_bvec(const struct bio_vec *bvec, size_t size, size_t seek)
{
struct bvec_iter bi = {
.bi_size = size,
};
struct bio_vec bv;
bvec_iter_advance(bvec, &bi, seek & PAGE_MASK);
for_each_bvec(bv, bvec, bi, bi)
flush_dcache_page(bv.bv_page);
}
#else
static inline void svc_flush_bvec(const struct bio_vec *bvec, size_t size,
size_t seek)
{
}
#endif
/*
* Generic recvfrom routine.
* Read from @rqstp's transport socket. The incoming message fills whole
* pages in @rqstp's rq_pages array until the last page of the message
* has been received into a partial page.
*/
static ssize_t svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov,
unsigned int nr, size_t buflen, unsigned int base)
static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen,
size_t seek)
{
struct svc_sock *svsk =
container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
struct bio_vec *bvec = rqstp->rq_bvec;
struct msghdr msg = { NULL };
unsigned int i;
ssize_t len;
size_t t;
rqstp->rq_xprt_hlen = 0;
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
iov_iter_kvec(&msg.msg_iter, READ, iov, nr, buflen);
if (base != 0) {
iov_iter_advance(&msg.msg_iter, base);
buflen -= base;
for (i = 0, t = 0; t < buflen; i++, t += PAGE_SIZE) {
bvec[i].bv_page = rqstp->rq_pages[i];
bvec[i].bv_len = PAGE_SIZE;
bvec[i].bv_offset = 0;
}
rqstp->rq_respages = &rqstp->rq_pages[i];
rqstp->rq_next_page = rqstp->rq_respages + 1;
iov_iter_bvec(&msg.msg_iter, READ, bvec, i, buflen);
if (seek) {
iov_iter_advance(&msg.msg_iter, seek);
buflen -= seek;
}
len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT);
if (len > 0)
svc_flush_bvec(bvec, len, seek);
/* If we read a full record, then assume there may be more
* data to read (stream based sockets only!)
*/
if (len == buflen)
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
dprintk("svc: socket %p recvfrom(%p, %zu) = %zd\n",
svsk, iov[0].iov_base, iov[0].iov_len, len);
return len;
}
......@@ -282,13 +321,10 @@ static void svc_data_ready(struct sock *sk)
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
if (svsk) {
dprintk("svc: socket %p(inet %p), busy=%d\n",
svsk, sk,
test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
/* Refer to svc_setup_socket() for details. */
rmb();
svsk->sk_odata(sk);
trace_svcsock_data_ready(&svsk->sk_xprt, 0);
if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
svc_xprt_enqueue(&svsk->sk_xprt);
}
......@@ -302,11 +338,9 @@ static void svc_write_space(struct sock *sk)
struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
if (svsk) {
dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
/* Refer to svc_setup_socket() for details. */
rmb();
trace_svcsock_write_space(&svsk->sk_xprt, 0);
svsk->sk_owspace(sk);
svc_xprt_enqueue(&svsk->sk_xprt);
}
......@@ -383,8 +417,15 @@ static int svc_udp_get_dest_address(struct svc_rqst *rqstp,
return 0;
}
/*
* Receive a datagram from a UDP socket.
/**
* svc_udp_recvfrom - Receive a datagram from a UDP socket.
* @rqstp: request structure into which to receive an RPC Call
*
* Called in a loop when XPT_DATA has been set.
*
* Returns:
* On success, the number of bytes in a received RPC Call, or
* %0 if a complete RPC Call message was not ready to return
*/
static int svc_udp_recvfrom(struct svc_rqst *rqstp)
{
......@@ -418,20 +459,14 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
svc_sock_setbufsize(svsk, serv->sv_nrthreads + 3);
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
skb = NULL;
err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
0, 0, MSG_PEEK | MSG_DONTWAIT);
if (err >= 0)
skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err);
if (skb == NULL) {
if (err != -EAGAIN) {
/* possibly an icmp error */
dprintk("svc: recvfrom returned error %d\n", -err);
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
}
return 0;
}
if (err < 0)
goto out_recv_err;
skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err);
if (!skb)
goto out_recv_err;
len = svc_addr_len(svc_addr(rqstp));
rqstp->rq_addrlen = len;
if (skb->tstamp == 0) {
......@@ -442,26 +477,21 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
sock_write_timestamp(svsk->sk_sk, skb->tstamp);
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */
len = skb->len;
len = skb->len;
rqstp->rq_arg.len = len;
trace_svcsock_udp_recv(&svsk->sk_xprt, len);
rqstp->rq_prot = IPPROTO_UDP;
if (!svc_udp_get_dest_address(rqstp, cmh)) {
net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n",
cmh->cmsg_level, cmh->cmsg_type);
goto out_free;
}
if (!svc_udp_get_dest_address(rqstp, cmh))
goto out_cmsg_err;
rqstp->rq_daddrlen = svc_addr_len(svc_daddr(rqstp));
if (skb_is_nonlinear(skb)) {
/* we have to copy */
local_bh_disable();
if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) {
local_bh_enable();
/* checksum error */
goto out_free;
}
if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb))
goto out_bh_enable;
local_bh_enable();
consume_skb(skb);
} else {
......@@ -489,6 +519,20 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
serv->sv_stats->netudpcnt++;
return len;
out_recv_err:
if (err != -EAGAIN) {
/* possibly an icmp error */
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
}
trace_svcsock_udp_recv_err(&svsk->sk_xprt, err);
return 0;
out_cmsg_err:
net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n",
cmh->cmsg_level, cmh->cmsg_type);
goto out_free;
out_bh_enable:
local_bh_enable();
out_free:
kfree_skb(skb);
return 0;
......@@ -498,6 +542,9 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
* svc_udp_sendto - Send out a reply on a UDP socket
* @rqstp: completed svc_rqst
*
* xpt_mutex ensures @rqstp's whole message is written to the socket
* without interruption.
*
* Returns the number of bytes sent, or a negative errno.
*/
static int svc_udp_sendto(struct svc_rqst *rqstp)
......@@ -519,10 +566,15 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
unsigned int uninitialized_var(sent);
int err;
svc_release_udp_skb(rqstp);
svc_udp_release_rqst(rqstp);
svc_set_cmsg_data(rqstp, cmh);
mutex_lock(&xprt->xpt_mutex);
if (svc_xprt_is_dead(xprt))
goto out_notconn;
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
xdr_free_bvec(xdr);
if (err == -ECONNREFUSED) {
......@@ -530,9 +582,16 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
xdr_free_bvec(xdr);
}
trace_svcsock_udp_send(xprt, err);
mutex_unlock(&xprt->xpt_mutex);
if (err < 0)
return err;
return sent;
out_notconn:
mutex_unlock(&xprt->xpt_mutex);
return -ENOTCONN;
}
static int svc_udp_has_wspace(struct svc_xprt *xprt)
......@@ -576,7 +635,7 @@ static const struct svc_xprt_ops svc_udp_ops = {
.xpo_recvfrom = svc_udp_recvfrom,
.xpo_sendto = svc_udp_sendto,
.xpo_read_payload = svc_sock_read_payload,
.xpo_release_rqst = svc_release_udp_skb,
.xpo_release_rqst = svc_udp_release_rqst,
.xpo_detach = svc_sock_detach,
.xpo_free = svc_sock_free,
.xpo_has_wspace = svc_udp_has_wspace,
......@@ -632,9 +691,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
dprintk("svc: socket %p TCP (listen) state change %d\n",
sk, sk->sk_state);
if (svsk) {
/* Refer to svc_setup_socket() for details. */
rmb();
......@@ -655,8 +711,7 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
if (svsk) {
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt);
} else
printk("svc: socket %p: no user data\n", sk);
}
}
}
......@@ -667,15 +722,11 @@ static void svc_tcp_state_change(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n",
sk, sk->sk_state, sk->sk_user_data);
if (!svsk)
printk("svc: socket %p: no user data\n", sk);
else {
if (svsk) {
/* Refer to svc_setup_socket() for details. */
rmb();
svsk->sk_ostate(sk);
trace_svcsock_tcp_state(&svsk->sk_xprt, svsk->sk_sock);
if (sk->sk_state != TCP_ESTABLISHED) {
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt);
......@@ -696,9 +747,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
struct socket *newsock;
struct svc_sock *newsvsk;
int err, slen;
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
if (!sock)
return NULL;
......@@ -711,30 +760,18 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
else if (err != -EAGAIN)
net_warn_ratelimited("%s: accept failed (err %d)!\n",
serv->sv_name, -err);
trace_svcsock_accept_err(xprt, serv->sv_name, err);
return NULL;
}
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
err = kernel_getpeername(newsock, sin);
if (err < 0) {
net_warn_ratelimited("%s: peername failed (err %d)!\n",
serv->sv_name, -err);
trace_svcsock_getpeername_err(xprt, serv->sv_name, err);
goto failed; /* aborted connection or whatever */
}
slen = err;
/* Ideally, we would want to reject connections from unauthorized
* hosts here, but when we get encryption, the IP of the host won't
* tell us anything. For now just warn about unpriv connections.
*/
if (!svc_port_is_privileged(sin)) {
dprintk("%s: connect from unprivileged port: %s\n",
serv->sv_name,
__svc_print_addr(sin, buf, sizeof(buf)));
}
dprintk("%s: connect from %s\n", serv->sv_name,
__svc_print_addr(sin, buf, sizeof(buf)));
/* Reset the inherited callbacks before calling svc_setup_socket */
newsock->sk->sk_state_change = svsk->sk_ostate;
newsock->sk->sk_data_ready = svsk->sk_odata;
......@@ -752,10 +789,8 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);
err = kernel_getsockname(newsock, sin);
slen = err;
if (unlikely(err < 0)) {
dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err);
if (unlikely(err < 0))
slen = offsetof(struct sockaddr, sa_data);
}
svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen);
if (sock_is_loopback(newsock->sk))
......@@ -772,13 +807,14 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
return NULL;
}
static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
static size_t svc_tcp_restore_pages(struct svc_sock *svsk,
struct svc_rqst *rqstp)
{
unsigned int i, len, npages;
size_t len = svsk->sk_datalen;
unsigned int i, npages;
if (svsk->sk_datalen == 0)
if (!len)
return 0;
len = svsk->sk_datalen;
npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
for (i = 0; i < npages; i++) {
if (rqstp->rq_pages[i] != NULL)
......@@ -827,47 +863,45 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk)
}
/*
* Receive fragment record header.
* If we haven't gotten the record length yet, get the next four bytes.
* Receive fragment record header into sk_marker.
*/
static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
static ssize_t svc_tcp_read_marker(struct svc_sock *svsk,
struct svc_rqst *rqstp)
{
struct svc_serv *serv = svsk->sk_xprt.xpt_server;
unsigned int want;
int len;
ssize_t want, len;
/* If we haven't gotten the record length yet,
* get the next four bytes.
*/
if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
struct msghdr msg = { NULL };
struct kvec iov;
want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen;
iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen;
iov.iov_len = want;
len = svc_recvfrom(rqstp, &iov, 1, want, 0);
iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, want);
len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT);
if (len < 0)
goto error;
return len;
svsk->sk_tcplen += len;
if (len < want) {
dprintk("svc: short recvfrom while reading record "
"length (%d of %d)\n", len, want);
return -EAGAIN;
/* call again to read the remaining bytes */
goto err_short;
}
dprintk("svc: TCP record, %d bytes\n", svc_sock_reclen(svsk));
trace_svcsock_marker(&svsk->sk_xprt, svsk->sk_marker);
if (svc_sock_reclen(svsk) + svsk->sk_datalen >
serv->sv_max_mesg) {
net_notice_ratelimited("RPC: fragment too large: %d\n",
svc_sock_reclen(svsk));
goto err_delete;
}
svsk->sk_xprt.xpt_server->sv_max_mesg)
goto err_too_large;
}
return svc_sock_reclen(svsk);
error:
dprintk("RPC: TCP recv_record got %d\n", len);
return len;
err_delete:
err_too_large:
net_notice_ratelimited("svc: %s %s RPC fragment too large: %d\n",
__func__, svsk->sk_xprt.xpt_server->sv_name,
svc_sock_reclen(svsk));
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
err_short:
return -EAGAIN;
}
......@@ -916,87 +950,58 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
return -EAGAIN;
}
static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len)
{
int i = 0;
int t = 0;
while (t < len) {
vec[i].iov_base = page_address(pages[i]);
vec[i].iov_len = PAGE_SIZE;
i++;
t += PAGE_SIZE;
}
return i;
}
static void svc_tcp_fragment_received(struct svc_sock *svsk)
{
/* If we have more data, signal svc_xprt_enqueue() to try again */
dprintk("svc: TCP %s record (%d bytes)\n",
svc_sock_final_rec(svsk) ? "final" : "nonfinal",
svc_sock_reclen(svsk));
svsk->sk_tcplen = 0;
svsk->sk_reclen = 0;
svsk->sk_marker = xdr_zero;
}
/*
* Receive data from a TCP socket.
/**
* svc_tcp_recvfrom - Receive data from a TCP socket
* @rqstp: request structure into which to receive an RPC Call
*
* Called in a loop when XPT_DATA has been set.
*
* Read the 4-byte stream record marker, then use the record length
* in that marker to set up exactly the resources needed to receive
* the next RPC message into @rqstp.
*
* Returns:
* On success, the number of bytes in a received RPC Call, or
* %0 if a complete RPC Call message was not ready to return
*
* The zero return case handles partial receives and callback Replies.
* The state of a partial receive is preserved in the svc_sock for
* the next call to svc_tcp_recvfrom.
*/
static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
{
struct svc_sock *svsk =
container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
struct svc_serv *serv = svsk->sk_xprt.xpt_server;
int len;
struct kvec *vec;
unsigned int want, base;
size_t want, base;
ssize_t len;
__be32 *p;
__be32 calldir;
int pnum;
dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
len = svc_tcp_recv_record(svsk, rqstp);
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
len = svc_tcp_read_marker(svsk, rqstp);
if (len < 0)
goto error;
base = svc_tcp_restore_pages(svsk, rqstp);
want = svc_sock_reclen(svsk) - (svsk->sk_tcplen - sizeof(rpc_fraghdr));
vec = rqstp->rq_vec;
pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], base + want);
rqstp->rq_respages = &rqstp->rq_pages[pnum];
rqstp->rq_next_page = rqstp->rq_respages + 1;
/* Now receive data */
len = svc_recvfrom(rqstp, vec, pnum, base + want, base);
want = len - (svsk->sk_tcplen - sizeof(rpc_fraghdr));
len = svc_tcp_read_msg(rqstp, base + want, base);
if (len >= 0) {
trace_svcsock_tcp_recv(&svsk->sk_xprt, len);
svsk->sk_tcplen += len;
svsk->sk_datalen += len;
}
if (len != want || !svc_sock_final_rec(svsk)) {
svc_tcp_save_pages(svsk, rqstp);
if (len < 0 && len != -EAGAIN)
goto err_delete;
if (len == want)
svc_tcp_fragment_received(svsk);
else
dprintk("svc: incomplete TCP record (%d of %d)\n",
(int)(svsk->sk_tcplen - sizeof(rpc_fraghdr)),
svc_sock_reclen(svsk));
goto err_noclose;
}
if (svsk->sk_datalen < 8) {
svsk->sk_datalen = 0;
goto err_delete; /* client is nuts. */
}
if (len != want || !svc_sock_final_rec(svsk))
goto err_incomplete;
if (svsk->sk_datalen < 8)
goto err_nuts;
rqstp->rq_arg.len = svsk->sk_datalen;
rqstp->rq_arg.page_base = 0;
......@@ -1031,14 +1036,26 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
return rqstp->rq_arg.len;
err_incomplete:
svc_tcp_save_pages(svsk, rqstp);
if (len < 0 && len != -EAGAIN)
goto err_delete;
if (len == want)
svc_tcp_fragment_received(svsk);
else
trace_svcsock_tcp_recv_short(&svsk->sk_xprt,
svc_sock_reclen(svsk),
svsk->sk_tcplen - sizeof(rpc_fraghdr));
goto err_noclose;
error:
if (len != -EAGAIN)
goto err_delete;
dprintk("RPC: TCP recvfrom got EAGAIN\n");
trace_svcsock_tcp_recv_eagain(&svsk->sk_xprt, 0);
return 0;
err_nuts:
svsk->sk_datalen = 0;
err_delete:
printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
svsk->sk_xprt.xpt_server->sv_name, -len);
trace_svcsock_tcp_recv_err(&svsk->sk_xprt, len);
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
err_noclose:
return 0; /* record not complete */
......@@ -1048,6 +1065,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
* svc_tcp_sendto - Send out a reply on a TCP socket
* @rqstp: completed svc_rqst
*
* xpt_mutex ensures @rqstp's whole message is written to the socket
* without interruption.
*
* Returns the number of bytes sent, or a negative errno.
*/
static int svc_tcp_sendto(struct svc_rqst *rqstp)
......@@ -1063,14 +1083,22 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
unsigned int uninitialized_var(sent);
int err;
svc_release_skb(rqstp);
svc_tcp_release_rqst(rqstp);
mutex_lock(&xprt->xpt_mutex);
if (svc_xprt_is_dead(xprt))
goto out_notconn;
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent);
xdr_free_bvec(xdr);
trace_svcsock_tcp_send(xprt, err < 0 ? err : sent);
if (err < 0 || sent != (xdr->len + sizeof(marker)))
goto out_close;
mutex_unlock(&xprt->xpt_mutex);
return sent;
out_notconn:
mutex_unlock(&xprt->xpt_mutex);
return -ENOTCONN;
out_close:
pr_notice("rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n",
xprt->xpt_server->sv_name,
......@@ -1078,6 +1106,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
(err < 0) ? err : sent, xdr->len);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_enqueue(xprt);
mutex_unlock(&xprt->xpt_mutex);
return -EAGAIN;
}
......@@ -1094,7 +1123,7 @@ static const struct svc_xprt_ops svc_tcp_ops = {
.xpo_recvfrom = svc_tcp_recvfrom,
.xpo_sendto = svc_tcp_sendto,
.xpo_read_payload = svc_sock_read_payload,
.xpo_release_rqst = svc_release_skb,
.xpo_release_rqst = svc_tcp_release_rqst,
.xpo_detach = svc_tcp_sock_detach,
.xpo_free = svc_sock_free,
.xpo_has_wspace = svc_tcp_has_wspace,
......@@ -1132,18 +1161,16 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
if (sk->sk_state == TCP_LISTEN) {
dprintk("setting up TCP socket for listening\n");
strcpy(svsk->sk_xprt.xpt_remotebuf, "listener");
set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
sk->sk_data_ready = svc_tcp_listen_data_ready;
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
} else {
dprintk("setting up TCP socket for reading\n");
sk->sk_state_change = svc_tcp_state_change;
sk->sk_data_ready = svc_data_ready;
sk->sk_write_space = svc_write_space;
svsk->sk_reclen = 0;
svsk->sk_marker = xdr_zero;
svsk->sk_tcplen = 0;
svsk->sk_datalen = 0;
memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
......@@ -1188,7 +1215,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
int err = 0;
dprintk("svc: svc_setup_socket %p\n", sock);
svsk = kzalloc(sizeof(*svsk), GFP_KERNEL);
if (!svsk)
return ERR_PTR(-ENOMEM);
......@@ -1225,12 +1251,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
else
svc_tcp_init(svsk, serv);
dprintk("svc: svc_setup_socket created %p (inet %p), "
"listen %d close %d\n",
svsk, svsk->sk_sk,
test_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags),
test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
trace_svcsock_new_socket(sock);
return svsk;
}
......@@ -1322,11 +1343,6 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
struct sockaddr *newsin = (struct sockaddr *)&addr;
int newlen;
int family;
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
dprintk("svc: svc_create_socket(%s, %d, %s)\n",
serv->sv_program->pg_name, protocol,
__svc_print_addr(sin, buf, sizeof(buf)));
if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) {
printk(KERN_WARNING "svc: only UDP and TCP "
......@@ -1383,7 +1399,6 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen);
return (struct svc_xprt *)svsk;
bummer:
dprintk("svc: svc_create_socket error = %d\n", -error);
sock_release(sock);
return ERR_PTR(error);
}
......@@ -1397,8 +1412,6 @@ static void svc_sock_detach(struct svc_xprt *xprt)
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
struct sock *sk = svsk->sk_sk;
dprintk("svc: svc_sock_detach(%p)\n", svsk);
/* put back the old socket callbacks */
lock_sock(sk);
sk->sk_state_change = svsk->sk_ostate;
......@@ -1415,8 +1428,6 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
dprintk("svc: svc_tcp_sock_detach(%p)\n", svsk);
svc_sock_detach(xprt);
if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
......@@ -1431,7 +1442,6 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
static void svc_sock_free(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
dprintk("svc: svc_sock_free(%p)\n", svsk);
if (svsk->sk_sock->file)
sockfd_put(svsk->sk_sock);
......
......@@ -10,59 +10,34 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
#undef SVCRDMA_BACKCHANNEL_DEBUG
/**
* svc_rdma_handle_bc_reply - Process incoming backchannel reply
* @xprt: controlling backchannel transport
* @rdma_resp: pointer to incoming transport header
* @rcvbuf: XDR buffer into which to decode the reply
* svc_rdma_handle_bc_reply - Process incoming backchannel Reply
* @rqstp: resources for handling the Reply
* @rctxt: Received message
*
* Returns:
* %0 if @rcvbuf is filled in, xprt_complete_rqst called,
* %-EAGAIN if server should call ->recvfrom again.
*/
int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
struct xdr_buf *rcvbuf)
void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp,
struct svc_rdma_recv_ctxt *rctxt)
{
struct svc_xprt *sxprt = rqstp->rq_xprt;
struct rpc_xprt *xprt = sxprt->xpt_bc_xprt;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct xdr_buf *rcvbuf = &rqstp->rq_arg;
struct kvec *dst, *src = &rcvbuf->head[0];
__be32 *rdma_resp = rctxt->rc_recv_buf;
struct rpc_rqst *req;
u32 credits;
size_t len;
__be32 xid;
__be32 *p;
int ret;
p = (__be32 *)src->iov_base;
len = src->iov_len;
xid = *rdma_resp;
#ifdef SVCRDMA_BACKCHANNEL_DEBUG
pr_info("%s: xid=%08x, length=%zu\n",
__func__, be32_to_cpu(xid), len);
pr_info("%s: RPC/RDMA: %*ph\n",
__func__, (int)RPCRDMA_HDRLEN_MIN, rdma_resp);
pr_info("%s: RPC: %*ph\n",
__func__, (int)len, p);
#endif
ret = -EAGAIN;
if (src->iov_len < 24)
goto out_shortreply;
spin_lock(&xprt->queue_lock);
req = xprt_lookup_rqst(xprt, xid);
req = xprt_lookup_rqst(xprt, *rdma_resp);
if (!req)
goto out_notfound;
goto out_unlock;
dst = &req->rq_private_buf.head[0];
memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
if (dst->iov_len < len)
if (dst->iov_len < src->iov_len)
goto out_unlock;
memcpy(dst->iov_base, p, len);
memcpy(dst->iov_base, src->iov_base, src->iov_len);
xprt_pin_rqst(req);
spin_unlock(&xprt->queue_lock);
......@@ -71,31 +46,17 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
credits = 1; /* don't deadlock */
else if (credits > r_xprt->rx_buf.rb_bc_max_requests)
credits = r_xprt->rx_buf.rb_bc_max_requests;
spin_lock(&xprt->transport_lock);
xprt->cwnd = credits << RPC_CWNDSHIFT;
spin_unlock(&xprt->transport_lock);
spin_lock(&xprt->queue_lock);
ret = 0;
xprt_complete_rqst(req->rq_task, rcvbuf->len);
xprt_unpin_rqst(req);
rcvbuf->len = 0;
out_unlock:
spin_unlock(&xprt->queue_lock);
out:
return ret;
out_shortreply:
dprintk("svcrdma: short bc reply: xprt=%p, len=%zu\n",
xprt, src->iov_len);
goto out;
out_notfound:
dprintk("svcrdma: unrecognized bc reply: xprt=%p, xid=%08x\n",
xprt, be32_to_cpu(xid));
goto out_unlock;
}
/* Send a backwards direction RPC call.
......@@ -192,10 +153,6 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
*p++ = xdr_zero;
*p = xdr_zero;
#ifdef SVCRDMA_BACKCHANNEL_DEBUG
pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer);
#endif
rqst->rq_xtime = ktime_get();
rc = svc_rdma_bc_sendto(rdma, rqst, ctxt);
if (rc)
......@@ -206,45 +163,36 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
svc_rdma_send_ctxt_put(rdma, ctxt);
drop_connection:
dprintk("svcrdma: failed to send bc call\n");
return -ENOTCONN;
}
/* Send an RPC call on the passive end of a transport
* connection.
/**
* xprt_rdma_bc_send_request - Send a reverse-direction Call
* @rqst: rpc_rqst containing Call message to be sent
*
* Return values:
* %0 if the message was sent successfully
* %ENOTCONN if the message was not sent
*/
static int
xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
static int xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
{
struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
struct svcxprt_rdma *rdma;
struct svcxprt_rdma *rdma =
container_of(sxprt, struct svcxprt_rdma, sc_xprt);
int ret;
dprintk("svcrdma: sending bc call with xid: %08x\n",
be32_to_cpu(rqst->rq_xid));
if (test_bit(XPT_DEAD, &sxprt->xpt_flags))
return -ENOTCONN;
mutex_lock(&sxprt->xpt_mutex);
ret = -ENOTCONN;
rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) {
ret = rpcrdma_bc_send_request(rdma, rqst);
if (ret == -ENOTCONN)
svc_close_xprt(sxprt);
}
mutex_unlock(&sxprt->xpt_mutex);
if (ret < 0)
return ret;
return 0;
ret = rpcrdma_bc_send_request(rdma, rqst);
if (ret == -ENOTCONN)
svc_close_xprt(sxprt);
return ret;
}
static void
xprt_rdma_bc_close(struct rpc_xprt *xprt)
{
dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
xprt_disconnect_done(xprt);
xprt->cwnd = RPC_CWNDSHIFT;
}
......@@ -252,8 +200,6 @@ xprt_rdma_bc_close(struct rpc_xprt *xprt)
static void
xprt_rdma_bc_put(struct rpc_xprt *xprt)
{
dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
xprt_rdma_free_addresses(xprt);
xprt_free(xprt);
}
......@@ -288,19 +234,14 @@ xprt_setup_rdma_bc(struct xprt_create *args)
struct rpc_xprt *xprt;
struct rpcrdma_xprt *new_xprt;
if (args->addrlen > sizeof(xprt->addr)) {
dprintk("RPC: %s: address too large\n", __func__);
if (args->addrlen > sizeof(xprt->addr))
return ERR_PTR(-EBADF);
}
xprt = xprt_alloc(args->net, sizeof(*new_xprt),
RPCRDMA_MAX_BC_REQUESTS,
RPCRDMA_MAX_BC_REQUESTS);
if (!xprt) {
dprintk("RPC: %s: couldn't allocate rpc_xprt\n",
__func__);
if (!xprt)
return ERR_PTR(-ENOMEM);
}
xprt->timeout = &xprt_rdma_bc_timeout;
xprt_set_bound(xprt);
......
......@@ -665,23 +665,23 @@ static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg,
return hdr_len;
out_short:
trace_svcrdma_decode_short(rq_arg->len);
trace_svcrdma_decode_short_err(rq_arg->len);
return -EINVAL;
out_version:
trace_svcrdma_decode_badvers(rdma_argp);
trace_svcrdma_decode_badvers_err(rdma_argp);
return -EPROTONOSUPPORT;
out_drop:
trace_svcrdma_decode_drop(rdma_argp);
trace_svcrdma_decode_drop_err(rdma_argp);
return 0;
out_proc:
trace_svcrdma_decode_badproc(rdma_argp);
trace_svcrdma_decode_badproc_err(rdma_argp);
return -EINVAL;
out_inval:
trace_svcrdma_decode_parse(rdma_argp);
trace_svcrdma_decode_parse_err(rdma_argp);
return -EINVAL;
}
......@@ -878,12 +878,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
goto out_drop;
rqstp->rq_xprt_hlen = ret;
if (svc_rdma_is_backchannel_reply(xprt, p)) {
ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, p,
&rqstp->rq_arg);
svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
return ret;
}
if (svc_rdma_is_backchannel_reply(xprt, p))
goto out_backchannel;
svc_rdma_get_inv_rkey(rdma_xprt, ctxt);
p += rpcrdma_fixed_maxsz;
......@@ -913,6 +910,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
return ret;
out_backchannel:
svc_rdma_handle_bc_reply(rqstp, ctxt);
out_drop:
svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
return 0;
......
......@@ -9,13 +9,10 @@
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/sunrpc/svc_rdma.h>
#include <linux/sunrpc/debug.h>
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc);
static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc);
......@@ -39,7 +36,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc);
struct svc_rdma_rw_ctxt {
struct list_head rw_list;
struct rdma_rw_ctx rw_ctx;
int rw_nents;
unsigned int rw_nents;
struct sg_table rw_sg_table;
struct scatterlist rw_first_sgl[];
};
......@@ -67,19 +64,22 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE),
GFP_KERNEL);
if (!ctxt)
goto out;
goto out_noctx;
INIT_LIST_HEAD(&ctxt->rw_list);
}
ctxt->rw_sg_table.sgl = ctxt->rw_first_sgl;
if (sg_alloc_table_chained(&ctxt->rw_sg_table, sges,
ctxt->rw_sg_table.sgl,
SG_CHUNK_SIZE)) {
kfree(ctxt);
ctxt = NULL;
}
out:
SG_CHUNK_SIZE))
goto out_free;
return ctxt;
out_free:
kfree(ctxt);
out_noctx:
trace_svcrdma_no_rwctx_err(rdma, sges);
return NULL;
}
static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
......@@ -107,6 +107,34 @@ void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma)
}
}
/**
* svc_rdma_rw_ctx_init - Prepare a R/W context for I/O
* @rdma: controlling transport instance
* @ctxt: R/W context to prepare
* @offset: RDMA offset
* @handle: RDMA tag/handle
* @direction: I/O direction
*
* Returns on success, the number of WQEs that will be needed
* on the workqueue, or a negative errno.
*/
static int svc_rdma_rw_ctx_init(struct svcxprt_rdma *rdma,
struct svc_rdma_rw_ctxt *ctxt,
u64 offset, u32 handle,
enum dma_data_direction direction)
{
int ret;
ret = rdma_rw_ctx_init(&ctxt->rw_ctx, rdma->sc_qp, rdma->sc_port_num,
ctxt->rw_sg_table.sgl, ctxt->rw_nents,
0, offset, handle, direction);
if (unlikely(ret < 0)) {
svc_rdma_put_rw_ctxt(rdma, ctxt);
trace_svcrdma_dma_map_rw_err(rdma, ctxt->rw_nents, ret);
}
return ret;
}
/* A chunk context tracks all I/O for moving one Read or Write
* chunk. This is a a set of rdma_rw's that handle data movement
* for all segments of one chunk.
......@@ -428,15 +456,13 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
ctxt = svc_rdma_get_rw_ctxt(rdma,
(write_len >> PAGE_SHIFT) + 2);
if (!ctxt)
goto out_noctx;
return -ENOMEM;
constructor(info, write_len, ctxt);
ret = rdma_rw_ctx_init(&ctxt->rw_ctx, rdma->sc_qp,
rdma->sc_port_num, ctxt->rw_sg_table.sgl,
ctxt->rw_nents, 0, seg_offset,
seg_handle, DMA_TO_DEVICE);
ret = svc_rdma_rw_ctx_init(rdma, ctxt, seg_offset, seg_handle,
DMA_TO_DEVICE);
if (ret < 0)
goto out_initerr;
return -EIO;
trace_svcrdma_send_wseg(seg_handle, write_len, seg_offset);
......@@ -455,18 +481,9 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
return 0;
out_overflow:
dprintk("svcrdma: inadequate space in Write chunk (%u)\n",
info->wi_nsegs);
trace_svcrdma_small_wrch_err(rdma, remaining, info->wi_seg_no,
info->wi_nsegs);
return -E2BIG;
out_noctx:
dprintk("svcrdma: no R/W ctxs available\n");
return -ENOMEM;
out_initerr:
svc_rdma_put_rw_ctxt(rdma, ctxt);
trace_svcrdma_dma_map_rwctx(rdma, ret);
return -EIO;
}
/* Send one of an xdr_buf's kvecs by itself. To send a Reply
......@@ -616,7 +633,7 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
sge_no = PAGE_ALIGN(info->ri_pageoff + len) >> PAGE_SHIFT;
ctxt = svc_rdma_get_rw_ctxt(cc->cc_rdma, sge_no);
if (!ctxt)
goto out_noctx;
return -ENOMEM;
ctxt->rw_nents = sge_no;
sg = ctxt->rw_sg_table.sgl;
......@@ -646,29 +663,18 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
goto out_overrun;
}
ret = rdma_rw_ctx_init(&ctxt->rw_ctx, cc->cc_rdma->sc_qp,
cc->cc_rdma->sc_port_num,
ctxt->rw_sg_table.sgl, ctxt->rw_nents,
0, offset, rkey, DMA_FROM_DEVICE);
ret = svc_rdma_rw_ctx_init(cc->cc_rdma, ctxt, offset, rkey,
DMA_FROM_DEVICE);
if (ret < 0)
goto out_initerr;
return -EIO;
list_add(&ctxt->rw_list, &cc->cc_rwctxts);
cc->cc_sqecount += ret;
return 0;
out_noctx:
dprintk("svcrdma: no R/W ctxs available\n");
return -ENOMEM;
out_overrun:
dprintk("svcrdma: request overruns rq_pages\n");
trace_svcrdma_page_overrun_err(cc->cc_rdma, rqstp, info->ri_pageno);
return -EINVAL;
out_initerr:
trace_svcrdma_dma_map_rwctx(cc->cc_rdma, ret);
svc_rdma_put_rw_ctxt(cc->cc_rdma, ctxt);
return -EIO;
}
/* Walk the segments in the Read chunk starting at @p and construct
......
......@@ -868,12 +868,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
__be32 *p;
int ret;
/* Create the RDMA response header. xprt->xpt_mutex,
* acquired in svc_send(), serializes RPC replies. The
* code path below that inserts the credit grant value
* into each transport header runs only inside this
* critical section.
*/
ret = -ENOTCONN;
if (svc_xprt_is_dead(xprt))
goto err0;
ret = -ENOMEM;
sctxt = svc_rdma_send_ctxt_get(rdma);
if (!sctxt)
......
......@@ -211,7 +211,12 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id,
newxprt->sc_ord = param->initiator_depth;
sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
newxprt->sc_xprt.xpt_remotelen = svc_addr_len(sa);
memcpy(&newxprt->sc_xprt.xpt_remote, sa,
newxprt->sc_xprt.xpt_remotelen);
snprintf(newxprt->sc_xprt.xpt_remotebuf,
sizeof(newxprt->sc_xprt.xpt_remotebuf) - 1, "%pISc", sa);
/* The remote port is arbitrary and not under the control of the
* client ULP. Set it to a fixed value so that the DRC continues
* to be effective after a reconnect.
......@@ -309,11 +314,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
struct svcxprt_rdma *cma_xprt;
int ret;
dprintk("svcrdma: Creating RDMA listener\n");
if ((sa->sa_family != AF_INET) && (sa->sa_family != AF_INET6)) {
dprintk("svcrdma: Address family %d is not supported.\n", sa->sa_family);
if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6)
return ERR_PTR(-EAFNOSUPPORT);
}
cma_xprt = svc_rdma_create_xprt(serv, net);
if (!cma_xprt)
return ERR_PTR(-ENOMEM);
......@@ -324,7 +326,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(listen_id)) {
ret = PTR_ERR(listen_id);
dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
goto err0;
}
......@@ -333,23 +334,17 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
*/
#if IS_ENABLED(CONFIG_IPV6)
ret = rdma_set_afonly(listen_id, 1);
if (ret) {
dprintk("svcrdma: rdma_set_afonly failed = %d\n", ret);
if (ret)
goto err1;
}
#endif
ret = rdma_bind_addr(listen_id, sa);
if (ret) {
dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret);
if (ret)
goto err1;
}
cma_xprt->sc_cm_id = listen_id;
ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG);
if (ret) {
dprintk("svcrdma: rdma_listen failed = %d\n", ret);
if (ret)
goto err1;
}
/*
* We need to use the address from the cm_id in case the
......@@ -405,9 +400,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (!newxprt)
return NULL;
dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n",
newxprt, newxprt->sc_cm_id);
dev = newxprt->sc_cm_id->device;
newxprt->sc_port_num = newxprt->sc_cm_id->port_num;
......@@ -443,21 +435,17 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
newxprt->sc_pd = ib_alloc_pd(dev, 0);
if (IS_ERR(newxprt->sc_pd)) {
dprintk("svcrdma: error creating PD for connect request\n");
trace_svcrdma_pd_err(newxprt, PTR_ERR(newxprt->sc_pd));
goto errout;
}
newxprt->sc_sq_cq = ib_alloc_cq_any(dev, newxprt, newxprt->sc_sq_depth,
IB_POLL_WORKQUEUE);
if (IS_ERR(newxprt->sc_sq_cq)) {
dprintk("svcrdma: error creating SQ CQ for connect request\n");
if (IS_ERR(newxprt->sc_sq_cq))
goto errout;
}
newxprt->sc_rq_cq =
ib_alloc_cq_any(dev, newxprt, rq_depth, IB_POLL_WORKQUEUE);
if (IS_ERR(newxprt->sc_rq_cq)) {
dprintk("svcrdma: error creating RQ CQ for connect request\n");
if (IS_ERR(newxprt->sc_rq_cq))
goto errout;
}
memset(&qp_attr, 0, sizeof qp_attr);
qp_attr.event_handler = qp_event_handler;
......@@ -481,7 +469,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr);
if (ret) {
dprintk("svcrdma: failed to create QP, ret=%d\n", ret);
trace_svcrdma_qp_err(newxprt, ret);
goto errout;
}
newxprt->sc_qp = newxprt->sc_cm_id->qp;
......@@ -489,8 +477,10 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (!(dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
newxprt->sc_snd_w_inv = false;
if (!rdma_protocol_iwarp(dev, newxprt->sc_port_num) &&
!rdma_ib_or_roce(dev, newxprt->sc_port_num))
!rdma_ib_or_roce(dev, newxprt->sc_port_num)) {
trace_svcrdma_fabric_err(newxprt, -EINVAL);
goto errout;
}
if (!svc_rdma_post_recvs(newxprt))
goto errout;
......@@ -512,15 +502,17 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
conn_param.initiator_depth = min_t(int, newxprt->sc_ord,
dev->attrs.max_qp_init_rd_atom);
if (!conn_param.initiator_depth) {
dprintk("svcrdma: invalid ORD setting\n");
ret = -EINVAL;
trace_svcrdma_initdepth_err(newxprt, ret);
goto errout;
}
conn_param.private_data = &pmsg;
conn_param.private_data_len = sizeof(pmsg);
ret = rdma_accept(newxprt->sc_cm_id, &conn_param);
if (ret)
if (ret) {
trace_svcrdma_accept_err(newxprt, ret);
goto errout;
}
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
dprintk("svcrdma: new connection %p accepted:\n", newxprt);
......@@ -535,12 +527,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dprintk(" ord : %d\n", conn_param.initiator_depth);
#endif
trace_svcrdma_xprt_accept(&newxprt->sc_xprt);
return &newxprt->sc_xprt;
errout:
dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret);
trace_svcrdma_xprt_fail(&newxprt->sc_xprt);
/* Take a reference in case the DTO handler runs */
svc_xprt_get(&newxprt->sc_xprt);
if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp))
......@@ -578,8 +567,6 @@ static void __svc_rdma_free(struct work_struct *work)
container_of(work, struct svcxprt_rdma, sc_work);
struct svc_xprt *xprt = &rdma->sc_xprt;
trace_svcrdma_xprt_free(xprt);
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
ib_drain_qp(rdma->sc_qp);
......
......@@ -2528,8 +2528,16 @@ static int bc_sendto(struct rpc_rqst *req)
return sent;
}
/*
* The send routine. Borrows from svc_send
/**
* bc_send_request - Send a backchannel Call on a TCP socket
* @req: rpc_rqst containing Call message to be sent
*
* xpt_mutex ensures @rqstp's whole message is written to the socket
* without interruption.
*
* Return values:
* %0 if the message was sent successfully
* %ENOTCONN if the message was not sent
*/
static int bc_send_request(struct rpc_rqst *req)
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment