Commit 3c9a7c58 authored by Bodo Stroesser's avatar Bodo Stroesser Committed by Martin K. Petersen

scsi: target: tcmu: Optimize scatter_data_area()

scatter_data_area() has two purposes:

 1) Create the iovs for the data area buffer of a SCSI cmd.

 2) If there is data in DMA_TO_DEVICE direction, copy
    the data from sg_list to data area buffer.

Both are done in a common loop.

In case of DMA_FROM_DEVICE data transfer, scatter_data_area() is called
with parameter copy_data = false. But this flag is just used to skip
memcpy() for data, while radix_tree_lookup still is called for every dbi of
the area area buffer, and kmap and kunmap are called for every page from
sg_list and data_area as well as flush_dcache_page() for the data area
pages.  Since the only thing to do with copy_data = false would be to set
up the iovs, this is a noticeable overhead.  Rework the iov creation in the
main loop of scatter_data_area() providing the new function
new_block_to_iov().  Based on this, create the short new function
tcmu_setup_iovs() that only writes the iovs with no overhead.  This new
function is now called instead of scatter_data_area() for bidi buffers and
for data buffers in those cases where memcpy() would have been skipped.

Link: https://lore.kernel.org/r/20200910155041.17654-4-bstroesser@ts.fujitsu.comAcked-by: default avatarMike Christie <michael.christie@oracle.com>
Signed-off-by: default avatarBodo Stroesser <bstroesser@ts.fujitsu.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 7e98905e
...@@ -181,6 +181,8 @@ struct tcmu_cmd { ...@@ -181,6 +181,8 @@ struct tcmu_cmd {
uint32_t dbi_cur; uint32_t dbi_cur;
uint32_t *dbi; uint32_t *dbi;
uint32_t data_len_bidi;
unsigned long deadline; unsigned long deadline;
#define TCMU_CMD_BIT_EXPIRED 0 #define TCMU_CMD_BIT_EXPIRED 0
...@@ -579,7 +581,45 @@ static inline void tcmu_cmd_set_block_cnts(struct tcmu_cmd *cmd) ...@@ -579,7 +581,45 @@ static inline void tcmu_cmd_set_block_cnts(struct tcmu_cmd *cmd)
len += se_cmd->t_bidi_data_sg[i].length; len += se_cmd->t_bidi_data_sg[i].length;
cmd->dbi_bidi_cnt = DIV_ROUND_UP(len, DATA_BLOCK_SIZE); cmd->dbi_bidi_cnt = DIV_ROUND_UP(len, DATA_BLOCK_SIZE);
cmd->dbi_cnt += cmd->dbi_bidi_cnt; cmd->dbi_cnt += cmd->dbi_bidi_cnt;
cmd->data_len_bidi = len;
}
}
static int new_block_to_iov(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
struct iovec **iov, int prev_dbi, int *remain)
{
/* Get the next dbi */
int dbi = tcmu_cmd_get_dbi(cmd);
/* Do not add more than DATA_BLOCK_SIZE to iov */
int len = min_t(int, DATA_BLOCK_SIZE, *remain);
*remain -= len;
/*
* The following code will gather and map the blocks to the same iovec
* when the blocks are all next to each other.
*/
if (dbi != prev_dbi + 1) {
/* dbi is not next to previous dbi, so start new iov */
if (prev_dbi >= 0)
(*iov)++;
/* write offset relative to mb_addr */
(*iov)->iov_base = (void __user *)
(udev->data_off + dbi * DATA_BLOCK_SIZE);
} }
(*iov)->iov_len += len;
return dbi;
}
static void tcmu_setup_iovs(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
struct iovec **iov, int data_length)
{
/* start value of dbi + 1 must not be a valid dbi */
int dbi = -2;
/* We prepare the IOVs for DMA_FROM_DEVICE transfer direction */
while (data_length > 0)
dbi = new_block_to_iov(udev, cmd, iov, dbi, &data_length);
} }
static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd) static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd)
...@@ -646,45 +686,22 @@ static inline size_t head_to_end(size_t head, size_t size) ...@@ -646,45 +686,22 @@ static inline size_t head_to_end(size_t head, size_t size)
return size - head; return size - head;
} }
static inline void new_iov(struct iovec **iov, bool first)
{
struct iovec *iovec;
if (!first)
(*iov)++;
iovec = *iov;
memset(iovec, 0, sizeof(struct iovec));
}
#define UPDATE_HEAD(head, used, size) smp_store_release(&head, ((head % size) + used) % size) #define UPDATE_HEAD(head, used, size) smp_store_release(&head, ((head % size) + used) % size)
/* offset is relative to mb_addr */ static void scatter_data_area(struct tcmu_dev *udev, struct tcmu_cmd *tcmu_cmd,
static inline size_t get_block_offset_user(struct tcmu_dev *dev, struct iovec **iov)
int dbi, int remaining)
{
return dev->data_off + dbi * DATA_BLOCK_SIZE +
DATA_BLOCK_SIZE - remaining;
}
static inline size_t iov_tail(struct iovec *iov)
{ {
return (size_t)iov->iov_base + iov->iov_len; struct se_cmd *se_cmd = tcmu_cmd->se_cmd;
} /* start value of dbi + 1 must not be a valid dbi */
int i, dbi = -2;
static void scatter_data_area(struct tcmu_dev *udev,
struct tcmu_cmd *tcmu_cmd, struct scatterlist *data_sg,
unsigned int data_nents, struct iovec **iov, bool copy_data)
{
int i, dbi;
int block_remaining = 0; int block_remaining = 0;
int data_len = se_cmd->data_length;
void *from, *to = NULL; void *from, *to = NULL;
size_t copy_bytes, to_offset, offset; size_t copy_bytes, offset;
struct scatterlist *sg; struct scatterlist *sg;
struct page *page; struct page *page;
bool first = true;
for_each_sg(data_sg, sg, data_nents, i) { for_each_sg(se_cmd->t_data_sg, sg, se_cmd->t_data_nents, i) {
int sg_remaining = sg->length; int sg_remaining = sg->length;
from = kmap_atomic(sg_page(sg)) + sg->offset; from = kmap_atomic(sg_page(sg)) + sg->offset;
while (sg_remaining > 0) { while (sg_remaining > 0) {
...@@ -694,50 +711,19 @@ static void scatter_data_area(struct tcmu_dev *udev, ...@@ -694,50 +711,19 @@ static void scatter_data_area(struct tcmu_dev *udev,
kunmap_atomic(to); kunmap_atomic(to);
} }
block_remaining = DATA_BLOCK_SIZE; /* get next dbi and add to IOVs */
dbi = tcmu_cmd_get_dbi(tcmu_cmd); dbi = new_block_to_iov(udev, tcmu_cmd, iov, dbi,
&data_len);
page = tcmu_get_block_page(udev, dbi); page = tcmu_get_block_page(udev, dbi);
to = kmap_atomic(page); to = kmap_atomic(page);
block_remaining = DATA_BLOCK_SIZE;
} }
/*
* Covert to virtual offset of the ring data area.
*/
to_offset = get_block_offset_user(udev, dbi,
block_remaining);
/*
* The following code will gather and map the blocks
* to the same iovec when the blocks are all next to
* each other.
*/
copy_bytes = min_t(size_t, sg_remaining, copy_bytes = min_t(size_t, sg_remaining,
block_remaining); block_remaining);
if (!first && to_offset == iov_tail(*iov)) {
/*
* Will append to the current iovec, because
* the current block page is next to the
* previous one.
*/
(*iov)->iov_len += copy_bytes;
} else {
/*
* Will allocate a new iovec because we are
* first time here or the current block page
* is not next to the previous one.
*/
new_iov(iov, first);
(*iov)->iov_base = (void __user *)to_offset;
(*iov)->iov_len = copy_bytes;
first = false;
}
if (copy_data) {
offset = DATA_BLOCK_SIZE - block_remaining; offset = DATA_BLOCK_SIZE - block_remaining;
memcpy(to + offset, memcpy(to + offset, from + sg->length - sg_remaining,
from + sg->length - sg_remaining,
copy_bytes); copy_bytes);
}
sg_remaining -= copy_bytes; sg_remaining -= copy_bytes;
block_remaining -= copy_bytes; block_remaining -= copy_bytes;
...@@ -1010,7 +996,6 @@ static int queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, sense_reason_t *scsi_err) ...@@ -1010,7 +996,6 @@ static int queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, sense_reason_t *scsi_err)
int iov_cnt, iov_bidi_cnt, cmd_id; int iov_cnt, iov_bidi_cnt, cmd_id;
uint32_t cmd_head; uint32_t cmd_head;
uint64_t cdb_off; uint64_t cdb_off;
bool copy_to_data_area;
/* size of data buffer needed */ /* size of data buffer needed */
size_t data_length = (size_t)tcmu_cmd->dbi_cnt * DATA_BLOCK_SIZE; size_t data_length = (size_t)tcmu_cmd->dbi_cnt * DATA_BLOCK_SIZE;
...@@ -1084,17 +1069,19 @@ static int queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, sense_reason_t *scsi_err) ...@@ -1084,17 +1069,19 @@ static int queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, sense_reason_t *scsi_err)
/* prepare iov list and copy data to data area if necessary */ /* prepare iov list and copy data to data area if necessary */
tcmu_cmd_reset_dbi_cur(tcmu_cmd); tcmu_cmd_reset_dbi_cur(tcmu_cmd);
iov = &entry->req.iov[0]; iov = &entry->req.iov[0];
copy_to_data_area = (se_cmd->data_direction == DMA_TO_DEVICE
|| se_cmd->se_cmd_flags & SCF_BIDI); if (se_cmd->data_direction == DMA_TO_DEVICE ||
scatter_data_area(udev, tcmu_cmd, se_cmd->t_data_sg, se_cmd->se_cmd_flags & SCF_BIDI)
se_cmd->t_data_nents, &iov, copy_to_data_area); scatter_data_area(udev, tcmu_cmd, &iov);
else
tcmu_setup_iovs(udev, tcmu_cmd, &iov, se_cmd->data_length);
entry->req.iov_cnt = iov_cnt - iov_bidi_cnt; entry->req.iov_cnt = iov_cnt - iov_bidi_cnt;
/* Handle BIDI commands */ /* Handle BIDI commands */
if (se_cmd->se_cmd_flags & SCF_BIDI) { if (se_cmd->se_cmd_flags & SCF_BIDI) {
iov++; iov++;
scatter_data_area(udev, tcmu_cmd, se_cmd->t_bidi_data_sg, tcmu_setup_iovs(udev, tcmu_cmd, &iov, tcmu_cmd->data_len_bidi);
se_cmd->t_bidi_data_nents, &iov, false);
entry->req.iov_bidi_cnt = iov_bidi_cnt; entry->req.iov_bidi_cnt = iov_bidi_cnt;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment