Commit b8ba293d authored by Neil Brown's avatar Neil Brown Committed by Linus Torvalds

[PATCH] knfsd: idempotent replay cache for OPEN state

This implements the idempotent replay cache need for NFSv4 OPEN state.
each state owner (open owner or lock owner) is required to store the
last sequence number mutating operation, and retransmit it when replayed
sequence number is presented for the operation.

I've implemented the cache as a static buffer of size 112 bytes
(NFSD4_REPLAY_ISIZE) which is large enough to hold the OPEN, the largest
of the sequence mutation operations.  This implements the cache for
OPEN, OPEN_CONFIRM, OPEN_DOWNGRADE, and CLOSE.  LOCK and UNLOCK will be
added when byte-range locking is done (soon!).
parent ca04cb99
...@@ -664,6 +664,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, ...@@ -664,6 +664,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
break; break;
case OP_CLOSE: case OP_CLOSE:
op->status = nfsd4_close(rqstp, &current_fh, &op->u.close); op->status = nfsd4_close(rqstp, &current_fh, &op->u.close);
op->replay = &op->u.close.cl_stateowner->so_replay;
break; break;
case OP_COMMIT: case OP_COMMIT:
op->status = nfsd4_commit(rqstp, &current_fh, &op->u.commit); op->status = nfsd4_commit(rqstp, &current_fh, &op->u.commit);
...@@ -693,12 +694,15 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, ...@@ -693,12 +694,15 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
break; break;
case OP_OPEN: case OP_OPEN:
op->status = nfsd4_open(rqstp, &current_fh, &op->u.open); op->status = nfsd4_open(rqstp, &current_fh, &op->u.open);
op->replay = &op->u.open.op_stateowner->so_replay;
break; break;
case OP_OPEN_CONFIRM: case OP_OPEN_CONFIRM:
op->status = nfsd4_open_confirm(rqstp, &current_fh, &op->u.open_confirm); op->status = nfsd4_open_confirm(rqstp, &current_fh, &op->u.open_confirm);
op->replay = &op->u.open_confirm.oc_stateowner->so_replay;
break; break;
case OP_OPEN_DOWNGRADE: case OP_OPEN_DOWNGRADE:
op->status = nfsd4_open_downgrade(rqstp, &current_fh, &op->u.open_downgrade); op->status = nfsd4_open_downgrade(rqstp, &current_fh, &op->u.open_downgrade);
op->replay = &op->u.open_downgrade.od_stateowner->so_replay;
break; break;
case OP_PUTFH: case OP_PUTFH:
op->status = nfsd4_putfh(rqstp, &current_fh, &op->u.putfh); op->status = nfsd4_putfh(rqstp, &current_fh, &op->u.putfh);
...@@ -753,8 +757,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, ...@@ -753,8 +757,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
} }
encode_op: encode_op:
nfsd4_encode_operation(resp, op); if (op->status == NFSERR_REPLAY_ME) {
status = op->status; nfsd4_encode_replay(resp, op);
status = op->status = NFS_OK;
} else {
nfsd4_encode_operation(resp, op);
status = op->status;
}
} }
out: out:
......
...@@ -759,6 +759,7 @@ free_stateowner(struct nfs4_stateowner *sop) { ...@@ -759,6 +759,7 @@ free_stateowner(struct nfs4_stateowner *sop) {
static struct nfs4_stateowner * static struct nfs4_stateowner *
alloc_init_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_open *open) { alloc_init_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_open *open) {
struct nfs4_stateowner *sop; struct nfs4_stateowner *sop;
struct nfs4_replay *rp;
unsigned int idhashval; unsigned int idhashval;
if (!(sop = alloc_stateowner(&open->op_owner))) if (!(sop = alloc_stateowner(&open->op_owner)))
...@@ -776,6 +777,10 @@ alloc_init_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct n ...@@ -776,6 +777,10 @@ alloc_init_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct n
sop->so_client = clp; sop->so_client = clp;
sop->so_seqid = open->op_seqid; sop->so_seqid = open->op_seqid;
sop->so_confirmed = 0; sop->so_confirmed = 0;
rp = &sop->so_replay;
rp->rp_status = NFSERR_SERVERFAULT;
rp->rp_buflen = 0;
rp->rp_buf = rp->rp_ibuf;
alloc_sowner++; alloc_sowner++;
return sop; return sop;
} }
...@@ -1019,9 +1024,22 @@ nfsd4_process_open1(struct nfsd4_open *open) ...@@ -1019,9 +1024,22 @@ nfsd4_process_open1(struct nfsd4_open *open)
strhashval = ownerstr_hashval(clientid->cl_id, open->op_owner); strhashval = ownerstr_hashval(clientid->cl_id, open->op_owner);
if (find_stateowner_str(strhashval, open, &sop)) { if (find_stateowner_str(strhashval, open, &sop)) {
open->op_stateowner = sop; open->op_stateowner = sop;
/* check for replay */
if (open->op_seqid == sop->so_seqid){ if (open->op_seqid == sop->so_seqid){
/* XXX retplay: for now, return bad seqid */ if (!sop->so_replay.rp_buflen) {
status = nfserr_bad_seqid; /*
* The original OPEN failed in so spectacularly that we
* don't even have replay data saved! Therefore, we
* have no choice but to continue processing
* this OPEN; presumably, we'll fail again for the same
* reason.
*/
dprintk("nfsd4_process_open1: replay with no replay cache\n");
status = NFS_OK;
goto renew;
}
/* replay: indicate to calling function */
status = NFSERR_REPLAY_ME;
goto out; goto out;
} }
if (sop->so_confirmed) { if (sop->so_confirmed) {
...@@ -1033,9 +1051,8 @@ nfsd4_process_open1(struct nfsd4_open *open) ...@@ -1033,9 +1051,8 @@ nfsd4_process_open1(struct nfsd4_open *open)
goto out; goto out;
} }
/* If we get here, we received and OPEN for an unconfirmed /* If we get here, we received and OPEN for an unconfirmed
* nfs4_stateowner. If seqid's are the same then this * nfs4_stateowner.
* is a replay. * Since the sequid's are different, purge the
* If the sequid's are different, then purge the
* existing nfs4_stateowner, and instantiate a new one. * existing nfs4_stateowner, and instantiate a new one.
*/ */
clp = sop->so_client; clp = sop->so_client;
...@@ -1367,8 +1384,6 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl ...@@ -1367,8 +1384,6 @@ nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int fl
/* /*
* Checks for sequence id mutating operations. * Checks for sequence id mutating operations.
*
* XXX need to code replay cache logic
*/ */
int int
nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp) nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp)
...@@ -1466,13 +1481,14 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei ...@@ -1466,13 +1481,14 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
} }
check_replay: check_replay:
status = nfserr_bad_seqid;
if (seqid == sop->so_seqid) { if (seqid == sop->so_seqid) {
printk("NFSD: preprocess_seqid_op: retransmission?\n"); printk("NFSD: preprocess_seqid_op: retransmission?\n");
/* XXX will need to indicate replay to calling function here */ /* indicate replay to calling function */
status = NFSERR_REPLAY_ME;
} else } else
printk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d\n", sop->so_seqid +1, seqid); printk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d\n", sop->so_seqid +1, seqid);
status = nfserr_bad_seqid;
goto out; goto out;
} }
...@@ -1499,7 +1515,6 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs ...@@ -1499,7 +1515,6 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
sop->so_confirmed = 1; sop->so_confirmed = 1;
update_stateid(&stp->st_stateid); update_stateid(&stp->st_stateid);
memcpy(&oc->oc_resp_stateid, &stp->st_stateid, sizeof(stateid_t)); memcpy(&oc->oc_resp_stateid, &stp->st_stateid, sizeof(stateid_t));
/* XXX renew the client lease here */
dprintk("NFSD: nfsd4_open_confirm: success, seqid=%d " dprintk("NFSD: nfsd4_open_confirm: success, seqid=%d "
"stateid=(%08x/%08x/%08x/%08x)\n", oc->oc_seqid, "stateid=(%08x/%08x/%08x/%08x)\n", oc->oc_seqid,
stp->st_stateid.si_boot, stp->st_stateid.si_boot,
......
...@@ -932,6 +932,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) ...@@ -932,6 +932,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
for (i = 0; i < argp->opcnt; i++) { for (i = 0; i < argp->opcnt; i++) {
op = &argp->ops[i]; op = &argp->ops[i];
op->replay = NULL;
/* /*
* We can't use READ_BUF() here because we need to handle * We can't use READ_BUF() here because we need to handle
...@@ -1110,19 +1111,32 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) ...@@ -1110,19 +1111,32 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
} while (0) } while (0)
#define ADJUST_ARGS() resp->p = p #define ADJUST_ARGS() resp->p = p
/*
* Header routine to setup seqid operation replay cache
*/
#define ENCODE_SEQID_OP_HEAD \
u32 *p; \
u32 *save; \
\
save = resp->p;
/* /*
* Routine for encoding the result of a * Routine for encoding the result of a
* "seqid-mutating" NFSv4 operation. This is * "seqid-mutating" NFSv4 operation. This is
* where seqids are incremented * where seqids are incremented, and the
* replay cache is filled.
*/ */
#define ENCODE_SEQID_OP_TAIL(stateowner) do { \ #define ENCODE_SEQID_OP_TAIL(stateowner) do { \
BUG_ON(!stateowner); \ if (seqid_mutating_err(nfserr) && stateowner) { \
if (seqid_mutating_err(nfserr) && stateowner) { \ if (stateowner->so_confirmed) \
if (stateowner->so_confirmed) \ stateowner->so_seqid++; \
stateowner->so_seqid++; \ stateowner->so_replay.rp_status = nfserr; \
} \ stateowner->so_replay.rp_buflen = \
} while(0) (((char *)(resp)->p - (char *)save)); \
memcpy(stateowner->so_replay.rp_buf, save, \
stateowner->so_replay.rp_buflen); \
} } while(0)
static u32 nfs4_ftypes[16] = { static u32 nfs4_ftypes[16] = {
...@@ -1623,7 +1637,7 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_acc ...@@ -1623,7 +1637,7 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_acc
static void static void
nfsd4_encode_close(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_close *close) nfsd4_encode_close(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_close *close)
{ {
ENCODE_HEAD; ENCODE_SEQID_OP_HEAD;
if (!nfserr) { if (!nfserr) {
RESERVE_SPACE(sizeof(stateid_t)); RESERVE_SPACE(sizeof(stateid_t));
...@@ -1631,8 +1645,7 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_clos ...@@ -1631,8 +1645,7 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_clos
WRITEMEM(&close->cl_stateid.si_opaque, sizeof(stateid_opaque_t)); WRITEMEM(&close->cl_stateid.si_opaque, sizeof(stateid_opaque_t));
ADJUST_ARGS(); ADJUST_ARGS();
} }
if ((close->cl_stateowner) && (close->cl_stateowner->so_confirmed)) ENCODE_SEQID_OP_TAIL(close->cl_stateowner);
close->cl_stateowner->so_seqid++;
} }
...@@ -1712,7 +1725,7 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_link ...@@ -1712,7 +1725,7 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_link
static void static void
nfsd4_encode_open(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open *open) nfsd4_encode_open(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open *open)
{ {
ENCODE_HEAD; ENCODE_SEQID_OP_HEAD;
if (nfserr) if (nfserr)
return; return;
...@@ -1776,7 +1789,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open ...@@ -1776,7 +1789,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open
static void static void
nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open_confirm *oc) nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open_confirm *oc)
{ {
ENCODE_HEAD; ENCODE_SEQID_OP_HEAD;
if (!nfserr) { if (!nfserr) {
RESERVE_SPACE(sizeof(stateid_t)); RESERVE_SPACE(sizeof(stateid_t));
...@@ -1791,7 +1804,7 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, int nfserr, struct nfs ...@@ -1791,7 +1804,7 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, int nfserr, struct nfs
static void static void
nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open_downgrade *od) nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open_downgrade *od)
{ {
ENCODE_HEAD; ENCODE_SEQID_OP_HEAD;
if (!nfserr) { if (!nfserr) {
RESERVE_SPACE(sizeof(stateid_t)); RESERVE_SPACE(sizeof(stateid_t));
...@@ -2170,6 +2183,30 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) ...@@ -2170,6 +2183,30 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
*statp = op->status; *statp = op->status;
} }
/*
* Encode the reply stored in the stateowner reply cache
*
* XDR note: do not encode rp->rp_buflen: the buffer contains the
* previously sent already encoded operation.
*/
void
nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
{
ENCODE_HEAD;
struct nfs4_replay *rp = op->replay;
BUG_ON(!rp);
RESERVE_SPACE(8);
WRITE32(op->opnum);
WRITE32(NFS_OK);
ADJUST_ARGS();
RESERVE_SPACE(rp->rp_buflen);
WRITEMEM(rp->rp_buf, rp->rp_buflen);
ADJUST_ARGS();
}
/* /*
* END OF "GENERIC" ENCODE ROUTINES. * END OF "GENERIC" ENCODE ROUTINES.
*/ */
......
...@@ -95,6 +95,27 @@ update_stateid(stateid_t *stateid) ...@@ -95,6 +95,27 @@ update_stateid(stateid_t *stateid)
stateid->si_generation++; stateid->si_generation++;
} }
/* A reasonable value for REPLAY_ISIZE was estimated as follows:
* The OPEN response, typically the largest, requires
* 4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) + 8(verifier) +
* 4(deleg. type) + 8(deleg. stateid) + 4(deleg. recall flag) +
* 20(deleg. space limit) + ~32(deleg. ace) = 112 bytes
*/
#define NFSD4_REPLAY_ISIZE 112
/*
* Replay buffer, where the result of the last seqid-mutating operation
* is cached.
*/
struct nfs4_replay {
u32 rp_status;
unsigned int rp_buflen;
char *rp_buf;
unsigned intrp_allocated;
char rp_ibuf[NFSD4_REPLAY_ISIZE];
};
/* /*
* nfs4_stateowner can either be an open_owner, or (eventually) a lock_owner * nfs4_stateowner can either be an open_owner, or (eventually) a lock_owner
* *
...@@ -111,6 +132,7 @@ struct nfs4_stateowner { ...@@ -111,6 +132,7 @@ struct nfs4_stateowner {
u32 so_seqid; u32 so_seqid;
struct xdr_netobj so_owner; /* open owner name */ struct xdr_netobj so_owner; /* open owner name */
int so_confirmed; /* successful OPEN_CONFIRM? */ int so_confirmed; /* successful OPEN_CONFIRM? */
struct nfs4_replay so_replay;
}; };
/* /*
......
...@@ -284,6 +284,7 @@ struct nfsd4_op { ...@@ -284,6 +284,7 @@ struct nfsd4_op {
struct nfsd4_verify verify; struct nfsd4_verify verify;
struct nfsd4_write write; struct nfsd4_write write;
} u; } u;
struct nfs4_replay * replay;
}; };
struct nfsd4_compoundargs { struct nfsd4_compoundargs {
...@@ -339,6 +340,7 @@ int nfs4svc_decode_compoundargs(struct svc_rqst *, u32 *, ...@@ -339,6 +340,7 @@ int nfs4svc_decode_compoundargs(struct svc_rqst *, u32 *,
int nfs4svc_encode_compoundres(struct svc_rqst *, u32 *, int nfs4svc_encode_compoundres(struct svc_rqst *, u32 *,
struct nfsd4_compoundres *); struct nfsd4_compoundres *);
void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
int nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, int nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
struct dentry *dentry, u32 *buffer, int *countp, struct dentry *dentry, u32 *buffer, int *countp,
u32 *bmval); u32 *bmval);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment