Commit db223a59 authored by Long Li's avatar Long Li Committed by Steve French

CIFS: SMBD: Upper layer performs SMB write via RDMA read through memory registration

When sending I/O, if size is larger than rdma_readwrite_threshold we prepare
to send SMB write packet for a RDMA read via memory registration. The actual
I/O is done by remote peer through local RDMA hardware. Modify the relevant
fields in the packet accordingly, and append a smbd_buffer_descriptor_v1 to
the end of the SMB write packet.

On write I/O finish, deregister the memory region if this was for a RDMA read.
If remote invalidation is not used, the call to smbd_deregister_mr will do
local invalidation and possibly wait. Memory region is normally deregistered
in MID callback as soon as it's used. There are situations where the MID may
not be created on I/O failure, under which memory region is deregistered when
write data context is released.
Signed-off-by: default avatarLong Li <longli@microsoft.com>
Signed-off-by: default avatarSteve French <smfrench@gmail.com>
Reviewed-by: default avatarPavel Shilovsky <pshilov@microsoft.com>
Reviewed-by: default avatarRonnie Sahlberg <lsahlber@redhat.com>
parent c7398583
...@@ -1174,6 +1174,9 @@ struct cifs_writedata { ...@@ -1174,6 +1174,9 @@ struct cifs_writedata {
pid_t pid; pid_t pid;
unsigned int bytes; unsigned int bytes;
int result; int result;
#ifdef CONFIG_CIFS_SMB_DIRECT
struct smbd_mr *mr;
#endif
unsigned int pagesz; unsigned int pagesz;
unsigned int tailsz; unsigned int tailsz;
unsigned int credits; unsigned int credits;
......
...@@ -43,6 +43,7 @@ ...@@ -43,6 +43,7 @@
#include "cifs_unicode.h" #include "cifs_unicode.h"
#include "cifs_debug.h" #include "cifs_debug.h"
#include "fscache.h" #include "fscache.h"
#include "smbdirect.h"
#ifdef CONFIG_CIFS_POSIX #ifdef CONFIG_CIFS_POSIX
static struct { static struct {
...@@ -1923,6 +1924,12 @@ cifs_writedata_release(struct kref *refcount) ...@@ -1923,6 +1924,12 @@ cifs_writedata_release(struct kref *refcount)
{ {
struct cifs_writedata *wdata = container_of(refcount, struct cifs_writedata *wdata = container_of(refcount,
struct cifs_writedata, refcount); struct cifs_writedata, refcount);
#ifdef CONFIG_CIFS_SMB_DIRECT
if (wdata->mr) {
smbd_deregister_mr(wdata->mr);
wdata->mr = NULL;
}
#endif
if (wdata->cfile) if (wdata->cfile)
cifsFileInfo_put(wdata->cfile); cifsFileInfo_put(wdata->cfile);
......
...@@ -48,6 +48,7 @@ ...@@ -48,6 +48,7 @@
#include "smb2glob.h" #include "smb2glob.h"
#include "cifspdu.h" #include "cifspdu.h"
#include "cifs_spnego.h" #include "cifs_spnego.h"
#include "smbdirect.h"
/* /*
* The following table defines the expected "StructureSize" of SMB2 requests * The following table defines the expected "StructureSize" of SMB2 requests
...@@ -2728,7 +2729,19 @@ smb2_writev_callback(struct mid_q_entry *mid) ...@@ -2728,7 +2729,19 @@ smb2_writev_callback(struct mid_q_entry *mid)
wdata->result = -EIO; wdata->result = -EIO;
break; break;
} }
#ifdef CONFIG_CIFS_SMB_DIRECT
/*
* If this wdata has a memory registered, the MR can be freed
* The number of MRs available is limited, it's important to recover
* used MR as soon as I/O is finished. Hold MR longer in the later
* I/O process can possibly result in I/O deadlock due to lack of MR
* to send request on I/O retry
*/
if (wdata->mr) {
smbd_deregister_mr(wdata->mr);
wdata->mr = NULL;
}
#endif
if (wdata->result) if (wdata->result)
cifs_stats_fail_inc(tcon, SMB2_WRITE_HE); cifs_stats_fail_inc(tcon, SMB2_WRITE_HE);
...@@ -2780,7 +2793,42 @@ smb2_async_writev(struct cifs_writedata *wdata, ...@@ -2780,7 +2793,42 @@ smb2_async_writev(struct cifs_writedata *wdata,
req->DataOffset = cpu_to_le16( req->DataOffset = cpu_to_le16(
offsetof(struct smb2_write_req, Buffer)); offsetof(struct smb2_write_req, Buffer));
req->RemainingBytes = 0; req->RemainingBytes = 0;
#ifdef CONFIG_CIFS_SMB_DIRECT
/*
* If we want to do a server RDMA read, fill in and append
* smbd_buffer_descriptor_v1 to the end of write request
*/
if (server->rdma && wdata->bytes >=
server->smbd_conn->rdma_readwrite_threshold) {
struct smbd_buffer_descriptor_v1 *v1;
bool need_invalidate = server->dialect == SMB30_PROT_ID;
wdata->mr = smbd_register_mr(
server->smbd_conn, wdata->pages,
wdata->nr_pages, wdata->tailsz,
false, need_invalidate);
if (!wdata->mr) {
rc = -ENOBUFS;
goto async_writev_out;
}
req->Length = 0;
req->DataOffset = 0;
req->RemainingBytes =
(wdata->nr_pages-1)*PAGE_SIZE + wdata->tailsz;
req->Channel = SMB2_CHANNEL_RDMA_V1_INVALIDATE;
if (need_invalidate)
req->Channel = SMB2_CHANNEL_RDMA_V1;
req->WriteChannelInfoOffset =
offsetof(struct smb2_write_req, Buffer);
req->WriteChannelInfoLength =
sizeof(struct smbd_buffer_descriptor_v1);
v1 = (struct smbd_buffer_descriptor_v1 *) &req->Buffer[0];
v1->offset = wdata->mr->mr->iova;
v1->token = wdata->mr->mr->rkey;
v1->length = wdata->mr->mr->length;
}
#endif
/* 4 for rfc1002 length field and 1 for Buffer */ /* 4 for rfc1002 length field and 1 for Buffer */
iov[0].iov_len = 4; iov[0].iov_len = 4;
rfc1002_marker = cpu_to_be32(total_len - 1 + wdata->bytes); rfc1002_marker = cpu_to_be32(total_len - 1 + wdata->bytes);
...@@ -2794,11 +2842,22 @@ smb2_async_writev(struct cifs_writedata *wdata, ...@@ -2794,11 +2842,22 @@ smb2_async_writev(struct cifs_writedata *wdata,
rqst.rq_npages = wdata->nr_pages; rqst.rq_npages = wdata->nr_pages;
rqst.rq_pagesz = wdata->pagesz; rqst.rq_pagesz = wdata->pagesz;
rqst.rq_tailsz = wdata->tailsz; rqst.rq_tailsz = wdata->tailsz;
#ifdef CONFIG_CIFS_SMB_DIRECT
if (wdata->mr) {
iov[1].iov_len += sizeof(struct smbd_buffer_descriptor_v1);
rqst.rq_npages = 0;
}
#endif
cifs_dbg(FYI, "async write at %llu %u bytes\n", cifs_dbg(FYI, "async write at %llu %u bytes\n",
wdata->offset, wdata->bytes); wdata->offset, wdata->bytes);
#ifdef CONFIG_CIFS_SMB_DIRECT
/* For RDMA read, I/O size is in RemainingBytes not in Length */
if (!wdata->mr)
req->Length = cpu_to_le32(wdata->bytes);
#else
req->Length = cpu_to_le32(wdata->bytes); req->Length = cpu_to_le32(wdata->bytes);
#endif
if (wdata->credits) { if (wdata->credits) {
shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes, shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment