Commit e604aad2 authored by Chuck Lever's avatar Chuck Lever

svcrdma: Use struct xdr_stream to decode ingress transport headers

The logic that checks incoming network headers has to be scrupulous.

De-duplicate: replace open-coded buffer overflow checks with the use
of xdr_stream helpers that are used most everywhere else XDR
decoding is done.

One minor change to the sanity checks: instead of checking the
length of individual segments, cap the length of the whole chunk
to be sure it can fit in the set of pages available in rq_pages.
This should be a better test of whether the server can handle the
chunks in each request.
Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
parent 2426ddfd
...@@ -58,7 +58,8 @@ enum { ...@@ -58,7 +58,8 @@ enum {
enum { enum {
rpcrdma_fixed_maxsz = 4, rpcrdma_fixed_maxsz = 4,
rpcrdma_segment_maxsz = 4, rpcrdma_segment_maxsz = 4,
rpcrdma_readchunk_maxsz = 2 + rpcrdma_segment_maxsz, rpcrdma_readseg_maxsz = 1 + rpcrdma_segment_maxsz,
rpcrdma_readchunk_maxsz = 1 + rpcrdma_readseg_maxsz,
}; };
/* /*
......
...@@ -132,6 +132,7 @@ struct svc_rdma_recv_ctxt { ...@@ -132,6 +132,7 @@ struct svc_rdma_recv_ctxt {
struct ib_sge rc_recv_sge; struct ib_sge rc_recv_sge;
void *rc_recv_buf; void *rc_recv_buf;
struct xdr_buf rc_arg; struct xdr_buf rc_arg;
struct xdr_stream rc_stream;
bool rc_temp; bool rc_temp;
u32 rc_byte_len; u32 rc_byte_len;
unsigned int rc_page_count; unsigned int rc_page_count;
......
...@@ -1469,7 +1469,7 @@ DECLARE_EVENT_CLASS(svcrdma_segment_event, ...@@ -1469,7 +1469,7 @@ DECLARE_EVENT_CLASS(svcrdma_segment_event,
); );
#define DEFINE_SEGMENT_EVENT(name) \ #define DEFINE_SEGMENT_EVENT(name) \
DEFINE_EVENT(svcrdma_segment_event, svcrdma_encode_##name,\ DEFINE_EVENT(svcrdma_segment_event, svcrdma_##name,\
TP_PROTO( \ TP_PROTO( \
u32 handle, \ u32 handle, \
u32 length, \ u32 length, \
...@@ -1477,8 +1477,9 @@ DECLARE_EVENT_CLASS(svcrdma_segment_event, ...@@ -1477,8 +1477,9 @@ DECLARE_EVENT_CLASS(svcrdma_segment_event,
), \ ), \
TP_ARGS(handle, length, offset)) TP_ARGS(handle, length, offset))
DEFINE_SEGMENT_EVENT(rseg); DEFINE_SEGMENT_EVENT(decode_wseg);
DEFINE_SEGMENT_EVENT(wseg); DEFINE_SEGMENT_EVENT(encode_rseg);
DEFINE_SEGMENT_EVENT(encode_wseg);
DECLARE_EVENT_CLASS(svcrdma_chunk_event, DECLARE_EVENT_CLASS(svcrdma_chunk_event,
TP_PROTO( TP_PROTO(
......
...@@ -358,15 +358,14 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp, ...@@ -358,15 +358,14 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
arg->len = ctxt->rc_byte_len; arg->len = ctxt->rc_byte_len;
} }
/* This accommodates the largest possible Write chunk, /* This accommodates the largest possible Write chunk.
* in one segment.
*/ */
#define MAX_BYTES_WRITE_SEG ((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT)) #define MAX_BYTES_WRITE_CHUNK ((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT))
/* This accommodates the largest possible Position-Zero /* This accommodates the largest possible Position-Zero
* Read chunk or Reply chunk, in one segment. * Read chunk or Reply chunk.
*/ */
#define MAX_BYTES_SPECIAL_SEG ((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT)) #define MAX_BYTES_SPECIAL_CHUNK ((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT))
/* Sanity check the Read list. /* Sanity check the Read list.
* *
...@@ -374,7 +373,7 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp, ...@@ -374,7 +373,7 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
* - This implementation supports only one Read chunk. * - This implementation supports only one Read chunk.
* *
* Sanity checks: * Sanity checks:
* - Read list does not overflow buffer. * - Read list does not overflow Receive buffer.
* - Segment size limited by largest NFS data payload. * - Segment size limited by largest NFS data payload.
* *
* The segment count is limited to how many segments can * The segment count is limited to how many segments can
...@@ -382,30 +381,44 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp, ...@@ -382,30 +381,44 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
* buffer. That's about 40 Read segments for a 1KB inline * buffer. That's about 40 Read segments for a 1KB inline
* threshold. * threshold.
* *
* Returns pointer to the following Write list. * Return values:
* %true: Read list is valid. @rctxt's xdr_stream is updated
* to point to the first byte past the Read list.
* %false: Read list is corrupt. @rctxt's xdr_stream is left
* in an unknown state.
*/ */
static __be32 *xdr_check_read_list(__be32 *p, const __be32 *end) static bool xdr_check_read_list(struct svc_rdma_recv_ctxt *rctxt)
{ {
u32 position; u32 position, len;
bool first; bool first;
__be32 *p;
p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
if (!p)
return false;
len = 0;
first = true; first = true;
while (*p++ != xdr_zero) { while (*p != xdr_zero) {
p = xdr_inline_decode(&rctxt->rc_stream,
rpcrdma_readseg_maxsz * sizeof(*p));
if (!p)
return false;
if (first) { if (first) {
position = be32_to_cpup(p++); position = be32_to_cpup(p);
first = false; first = false;
} else if (be32_to_cpup(p++) != position) { } else if (be32_to_cpup(p) != position) {
return NULL; return false;
} }
p++; /* handle */ p += 2;
if (be32_to_cpup(p++) > MAX_BYTES_SPECIAL_SEG) len += be32_to_cpup(p);
return NULL;
p += 2; /* offset */
if (p > end) p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
return NULL; if (!p)
return false;
} }
return p; return len <= MAX_BYTES_SPECIAL_CHUNK;
} }
/* The segment count is limited to how many segments can /* The segment count is limited to how many segments can
...@@ -413,67 +426,93 @@ static __be32 *xdr_check_read_list(__be32 *p, const __be32 *end) ...@@ -413,67 +426,93 @@ static __be32 *xdr_check_read_list(__be32 *p, const __be32 *end)
* buffer. That's about 60 Write segments for a 1KB inline * buffer. That's about 60 Write segments for a 1KB inline
* threshold. * threshold.
*/ */
static __be32 *xdr_check_write_chunk(__be32 *p, const __be32 *end, static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt, u32 maxlen)
u32 maxlen)
{ {
u32 i, segcount; u32 i, segcount, total;
__be32 *p;
segcount = be32_to_cpup(p++); p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
if (!p)
return false;
segcount = be32_to_cpup(p);
total = 0;
for (i = 0; i < segcount; i++) { for (i = 0; i < segcount; i++) {
p++; /* handle */ u32 handle, length;
if (be32_to_cpup(p++) > maxlen) u64 offset;
return NULL;
p += 2; /* offset */
if (p > end) p = xdr_inline_decode(&rctxt->rc_stream,
return NULL; rpcrdma_segment_maxsz * sizeof(*p));
} if (!p)
return false;
handle = be32_to_cpup(p++);
length = be32_to_cpup(p++);
xdr_decode_hyper(p, &offset);
trace_svcrdma_decode_wseg(handle, length, offset);
return p; total += length;
}
return total <= maxlen;
} }
/* Sanity check the Write list. /* Sanity check the Write list.
* *
* Implementation limits: * Implementation limits:
* - This implementation supports only one Write chunk. * - This implementation currently supports only one Write chunk.
* *
* Sanity checks: * Sanity checks:
* - Write list does not overflow buffer. * - Write list does not overflow Receive buffer.
* - Segment size limited by largest NFS data payload. * - Chunk size limited by largest NFS data payload.
* *
* Returns pointer to the following Reply chunk. * Return values:
* %true: Write list is valid. @rctxt's xdr_stream is updated
* to point to the first byte past the Write list.
* %false: Write list is corrupt. @rctxt's xdr_stream is left
* in an unknown state.
*/ */
static __be32 *xdr_check_write_list(__be32 *p, const __be32 *end) static bool xdr_check_write_list(struct svc_rdma_recv_ctxt *rctxt)
{ {
u32 chcount; u32 chcount = 0;
__be32 *p;
chcount = 0; p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
while (*p++ != xdr_zero) {
p = xdr_check_write_chunk(p, end, MAX_BYTES_WRITE_SEG);
if (!p) if (!p)
return NULL; return false;
if (chcount++ > 1) while (*p != xdr_zero) {
return NULL; if (!xdr_check_write_chunk(rctxt, MAX_BYTES_WRITE_CHUNK))
return false;
++chcount;
p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
if (!p)
return false;
} }
return p; return chcount < 2;
} }
/* Sanity check the Reply chunk. /* Sanity check the Reply chunk.
* *
* Sanity checks: * Sanity checks:
* - Reply chunk does not overflow buffer. * - Reply chunk does not overflow Receive buffer.
* - Segment size limited by largest NFS data payload. * - Chunk size limited by largest NFS data payload.
* *
* Returns pointer to the following RPC header. * Return values:
* %true: Reply chunk is valid. @rctxt's xdr_stream is updated
* to point to the first byte past the Reply chunk.
* %false: Reply chunk is corrupt. @rctxt's xdr_stream is left
* in an unknown state.
*/ */
static __be32 *xdr_check_reply_chunk(__be32 *p, const __be32 *end) static bool xdr_check_reply_chunk(struct svc_rdma_recv_ctxt *rctxt)
{ {
if (*p++ != xdr_zero) { __be32 *p;
p = xdr_check_write_chunk(p, end, MAX_BYTES_SPECIAL_SEG);
p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
if (!p) if (!p)
return NULL; return false;
} if (*p != xdr_zero)
return p; if (!xdr_check_write_chunk(rctxt, MAX_BYTES_SPECIAL_CHUNK))
return false;
return true;
} }
/* RPC-over-RDMA Version One private extension: Remote Invalidation. /* RPC-over-RDMA Version One private extension: Remote Invalidation.
...@@ -538,60 +577,61 @@ static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma, ...@@ -538,60 +577,61 @@ static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma,
ctxt->rc_inv_rkey = be32_to_cpu(inv_rkey); ctxt->rc_inv_rkey = be32_to_cpu(inv_rkey);
} }
/* On entry, xdr->head[0].iov_base points to first byte in the /**
* RPC-over-RDMA header. * svc_rdma_xdr_decode_req - Decode the transport header
* @rq_arg: xdr_buf containing ingress RPC/RDMA message
* @rctxt: state of decoding
*
* On entry, xdr->head[0].iov_base points to first byte of the
* RPC-over-RDMA transport header.
* *
* On successful exit, head[0] points to first byte past the * On successful exit, head[0] points to first byte past the
* RPC-over-RDMA header. For RDMA_MSG, this is the RPC message. * RPC-over-RDMA header. For RDMA_MSG, this is the RPC message.
*
* The length of the RPC-over-RDMA header is returned. * The length of the RPC-over-RDMA header is returned.
* *
* Assumptions: * Assumptions:
* - The transport header is entirely contained in the head iovec. * - The transport header is entirely contained in the head iovec.
*/ */
static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg) static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg,
struct svc_rdma_recv_ctxt *rctxt)
{ {
__be32 *p, *end, *rdma_argp; __be32 *p, *rdma_argp;
unsigned int hdr_len; unsigned int hdr_len;
/* Verify that there's enough bytes for header + something */
if (rq_arg->len <= RPCRDMA_HDRLEN_ERR)
goto out_short;
rdma_argp = rq_arg->head[0].iov_base; rdma_argp = rq_arg->head[0].iov_base;
if (*(rdma_argp + 1) != rpcrdma_version) xdr_init_decode(&rctxt->rc_stream, rq_arg, rdma_argp, NULL);
goto out_version;
switch (*(rdma_argp + 3)) { p = xdr_inline_decode(&rctxt->rc_stream,
rpcrdma_fixed_maxsz * sizeof(*p));
if (unlikely(!p))
goto out_short;
p++;
if (*p != rpcrdma_version)
goto out_version;
p += 2;
switch (*p) {
case rdma_msg: case rdma_msg:
break; break;
case rdma_nomsg: case rdma_nomsg:
break; break;
case rdma_done: case rdma_done:
goto out_drop; goto out_drop;
case rdma_error: case rdma_error:
goto out_drop; goto out_drop;
default: default:
goto out_proc; goto out_proc;
} }
end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len); if (!xdr_check_read_list(rctxt))
p = xdr_check_read_list(rdma_argp + 4, end);
if (!p)
goto out_inval; goto out_inval;
p = xdr_check_write_list(p, end); if (!xdr_check_write_list(rctxt))
if (!p)
goto out_inval;
p = xdr_check_reply_chunk(p, end);
if (!p)
goto out_inval; goto out_inval;
if (p > end) if (!xdr_check_reply_chunk(rctxt))
goto out_inval; goto out_inval;
rq_arg->head[0].iov_base = p; rq_arg->head[0].iov_base = rctxt->rc_stream.p;
hdr_len = (unsigned long)p - (unsigned long)rdma_argp; hdr_len = xdr_stream_pos(&rctxt->rc_stream);
rq_arg->head[0].iov_len -= hdr_len; rq_arg->head[0].iov_len -= hdr_len;
rq_arg->len -= hdr_len; rq_arg->len -= hdr_len;
trace_svcrdma_decode_rqst(rdma_argp, hdr_len); trace_svcrdma_decode_rqst(rdma_argp, hdr_len);
...@@ -786,7 +826,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) ...@@ -786,7 +826,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_next_page = rqstp->rq_respages; rqstp->rq_next_page = rqstp->rq_respages;
p = (__be32 *)rqstp->rq_arg.head[0].iov_base; p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg); ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt);
if (ret < 0) if (ret < 0)
goto out_err; goto out_err;
if (ret == 0) if (ret == 0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment