Commit 82d8dfd2 authored by Quinn Tran's avatar Quinn Tran Committed by Martin K. Petersen

scsi: qla2xxx: edif: Fix performance dip due to lock contention

User experienced performance dip on measuring IOPS while EDIF
enabled. During I/O time, driver uses dma_pool_zalloc() call to allocate a
chunk of memory. This call contains a lock behind the scene which
contribute to lock contention. Save the allocated memory for reuse and
avoid the lock.
Signed-off-by: default avatarQuinn Tran <qutran@marvell.com>
Signed-off-by: default avatarNilesh Javali <njavali@marvell.com>
Reviewed-by: default avatarHimanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 430eef03
...@@ -384,6 +384,13 @@ struct els_reject { ...@@ -384,6 +384,13 @@ struct els_reject {
struct req_que; struct req_que;
struct qla_tgt_sess; struct qla_tgt_sess;
struct qla_buf_dsc {
u16 tag;
#define TAG_FREED 0xffff
void *buf;
dma_addr_t buf_dma;
};
/* /*
* SCSI Request Block * SCSI Request Block
*/ */
...@@ -392,14 +399,16 @@ struct srb_cmd { ...@@ -392,14 +399,16 @@ struct srb_cmd {
uint32_t request_sense_length; uint32_t request_sense_length;
uint32_t fw_sense_length; uint32_t fw_sense_length;
uint8_t *request_sense_ptr; uint8_t *request_sense_ptr;
struct ct6_dsd *ct6_ctx;
struct crc_context *crc_ctx; struct crc_context *crc_ctx;
struct ct6_dsd ct6_ctx;
struct qla_buf_dsc buf_dsc;
}; };
/* /*
* SRB flag definitions * SRB flag definitions
*/ */
#define SRB_DMA_VALID BIT_0 /* Command sent to ISP */ #define SRB_DMA_VALID BIT_0 /* Command sent to ISP */
#define SRB_GOT_BUF BIT_1
#define SRB_FCP_CMND_DMA_VALID BIT_12 /* DIF: DSD List valid */ #define SRB_FCP_CMND_DMA_VALID BIT_12 /* DIF: DSD List valid */
#define SRB_CRC_CTX_DMA_VALID BIT_2 /* DIF: context DMA valid */ #define SRB_CRC_CTX_DMA_VALID BIT_2 /* DIF: context DMA valid */
#define SRB_CRC_PROT_DMA_VALID BIT_4 /* DIF: prot DMA valid */ #define SRB_CRC_PROT_DMA_VALID BIT_4 /* DIF: prot DMA valid */
...@@ -3722,6 +3731,16 @@ struct qla_fw_resources { ...@@ -3722,6 +3731,16 @@ struct qla_fw_resources {
#define QLA_IOCB_PCT_LIMIT 95 #define QLA_IOCB_PCT_LIMIT 95
struct qla_buf_pool {
u16 num_bufs;
u16 num_active;
u16 max_used;
u16 reserved;
unsigned long *buf_map;
void **buf_array;
dma_addr_t *dma_array;
};
/*Queue pair data structure */ /*Queue pair data structure */
struct qla_qpair { struct qla_qpair {
spinlock_t qp_lock; spinlock_t qp_lock;
...@@ -3775,6 +3794,7 @@ struct qla_qpair { ...@@ -3775,6 +3794,7 @@ struct qla_qpair {
struct qla_tgt_counters tgt_counters; struct qla_tgt_counters tgt_counters;
uint16_t cpuid; uint16_t cpuid;
struct qla_fw_resources fwres ____cacheline_aligned; struct qla_fw_resources fwres ____cacheline_aligned;
struct qla_buf_pool buf_pool;
u32 cmd_cnt; u32 cmd_cnt;
u32 cmd_completion_cnt; u32 cmd_completion_cnt;
u32 prev_completion_cnt; u32 prev_completion_cnt;
......
...@@ -3007,26 +3007,16 @@ qla28xx_start_scsi_edif(srb_t *sp) ...@@ -3007,26 +3007,16 @@ qla28xx_start_scsi_edif(srb_t *sp)
goto queuing_error; goto queuing_error;
} }
ctx = sp->u.scmd.ct6_ctx = if (qla_get_buf(vha, sp->qpair, &sp->u.scmd.buf_dsc)) {
mempool_alloc(ha->ctx_mempool, GFP_ATOMIC);
if (!ctx) {
ql_log(ql_log_fatal, vha, 0x3010,
"Failed to allocate ctx for cmd=%p.\n", cmd);
goto queuing_error;
}
memset(ctx, 0, sizeof(struct ct6_dsd));
ctx->fcp_cmnd = dma_pool_zalloc(ha->fcp_cmnd_dma_pool,
GFP_ATOMIC, &ctx->fcp_cmnd_dma);
if (!ctx->fcp_cmnd) {
ql_log(ql_log_fatal, vha, 0x3011, ql_log(ql_log_fatal, vha, 0x3011,
"Failed to allocate fcp_cmnd for cmd=%p.\n", cmd); "Failed to allocate buf for fcp_cmnd for cmd=%p.\n", cmd);
goto queuing_error; goto queuing_error;
} }
/* Initialize the DSD list and dma handle */ sp->flags |= SRB_GOT_BUF;
INIT_LIST_HEAD(&ctx->dsd_list); ctx = &sp->u.scmd.ct6_ctx;
ctx->dsd_use_cnt = 0; ctx->fcp_cmnd = sp->u.scmd.buf_dsc.buf;
ctx->fcp_cmnd_dma = sp->u.scmd.buf_dsc.buf_dma;
if (cmd->cmd_len > 16) { if (cmd->cmd_len > 16) {
additional_cdb_len = cmd->cmd_len - 16; additional_cdb_len = cmd->cmd_len - 16;
...@@ -3145,7 +3135,6 @@ qla28xx_start_scsi_edif(srb_t *sp) ...@@ -3145,7 +3135,6 @@ qla28xx_start_scsi_edif(srb_t *sp)
cmd_pkt->fcp_cmnd_dseg_len = cpu_to_le16(ctx->fcp_cmnd_len); cmd_pkt->fcp_cmnd_dseg_len = cpu_to_le16(ctx->fcp_cmnd_len);
put_unaligned_le64(ctx->fcp_cmnd_dma, &cmd_pkt->fcp_cmnd_dseg_address); put_unaligned_le64(ctx->fcp_cmnd_dma, &cmd_pkt->fcp_cmnd_dseg_address);
sp->flags |= SRB_FCP_CMND_DMA_VALID;
cmd_pkt->byte_count = cpu_to_le32((uint32_t)scsi_bufflen(cmd)); cmd_pkt->byte_count = cpu_to_le32((uint32_t)scsi_bufflen(cmd));
/* Set total data segment count. */ /* Set total data segment count. */
cmd_pkt->entry_count = (uint8_t)req_cnt; cmd_pkt->entry_count = (uint8_t)req_cnt;
...@@ -3177,15 +3166,11 @@ qla28xx_start_scsi_edif(srb_t *sp) ...@@ -3177,15 +3166,11 @@ qla28xx_start_scsi_edif(srb_t *sp)
return QLA_SUCCESS; return QLA_SUCCESS;
queuing_error_fcp_cmnd: queuing_error_fcp_cmnd:
dma_pool_free(ha->fcp_cmnd_dma_pool, ctx->fcp_cmnd, ctx->fcp_cmnd_dma);
queuing_error: queuing_error:
if (tot_dsds) if (tot_dsds)
scsi_dma_unmap(cmd); scsi_dma_unmap(cmd);
if (sp->u.scmd.ct6_ctx) { qla_put_buf(sp->qpair, &sp->u.scmd.buf_dsc);
mempool_free(sp->u.scmd.ct6_ctx, ha->ctx_mempool);
sp->u.scmd.ct6_ctx = NULL;
}
qla_put_fw_resources(sp->qpair, &sp->iores); qla_put_fw_resources(sp->qpair, &sp->iores);
spin_unlock_irqrestore(lock, flags); spin_unlock_irqrestore(lock, flags);
......
...@@ -1015,5 +1015,8 @@ int qla2xxx_enable_port(struct Scsi_Host *shost); ...@@ -1015,5 +1015,8 @@ int qla2xxx_enable_port(struct Scsi_Host *shost);
uint64_t qla2x00_get_num_tgts(scsi_qla_host_t *vha); uint64_t qla2x00_get_num_tgts(scsi_qla_host_t *vha);
uint64_t qla2x00_count_set_bits(u32 num); uint64_t qla2x00_count_set_bits(u32 num);
int qla_create_buf_pool(struct scsi_qla_host *, struct qla_qpair *);
void qla_free_buf_pool(struct qla_qpair *);
int qla_get_buf(struct scsi_qla_host *, struct qla_qpair *, struct qla_buf_dsc *);
void qla_put_buf(struct qla_qpair *, struct qla_buf_dsc *);
#endif /* _QLA_GBL_H */ #endif /* _QLA_GBL_H */
...@@ -9442,6 +9442,13 @@ struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *vha, int qos, ...@@ -9442,6 +9442,13 @@ struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *vha, int qos,
goto fail_mempool; goto fail_mempool;
} }
if (qla_create_buf_pool(vha, qpair)) {
ql_log(ql_log_warn, vha, 0xd036,
"Failed to initialize buf pool for qpair %d\n",
qpair->id);
goto fail_bufpool;
}
/* Mark as online */ /* Mark as online */
qpair->online = 1; qpair->online = 1;
...@@ -9457,7 +9464,10 @@ struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *vha, int qos, ...@@ -9457,7 +9464,10 @@ struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *vha, int qos,
} }
return qpair; return qpair;
fail_bufpool:
mempool_destroy(qpair->srb_mempool);
fail_mempool: fail_mempool:
qla25xx_delete_req_que(vha, qpair->req);
fail_req: fail_req:
qla25xx_delete_rsp_que(vha, qpair->rsp); qla25xx_delete_rsp_que(vha, qpair->rsp);
fail_rsp: fail_rsp:
...@@ -9483,6 +9493,8 @@ int qla2xxx_delete_qpair(struct scsi_qla_host *vha, struct qla_qpair *qpair) ...@@ -9483,6 +9493,8 @@ int qla2xxx_delete_qpair(struct scsi_qla_host *vha, struct qla_qpair *qpair)
qpair->delete_in_progress = 1; qpair->delete_in_progress = 1;
qla_free_buf_pool(qpair);
ret = qla25xx_delete_req_que(vha, qpair->req); ret = qla25xx_delete_req_que(vha, qpair->req);
if (ret != QLA_SUCCESS) if (ret != QLA_SUCCESS)
goto fail; goto fail;
......
...@@ -623,7 +623,7 @@ qla24xx_build_scsi_type_6_iocbs(srb_t *sp, struct cmd_type_6 *cmd_pkt, ...@@ -623,7 +623,7 @@ qla24xx_build_scsi_type_6_iocbs(srb_t *sp, struct cmd_type_6 *cmd_pkt,
} }
cur_seg = scsi_sglist(cmd); cur_seg = scsi_sglist(cmd);
ctx = sp->u.scmd.ct6_ctx; ctx = &sp->u.scmd.ct6_ctx;
while (tot_dsds) { while (tot_dsds) {
avail_dsds = (tot_dsds > QLA_DSDS_PER_IOCB) ? avail_dsds = (tot_dsds > QLA_DSDS_PER_IOCB) ?
...@@ -3459,13 +3459,7 @@ qla82xx_start_scsi(srb_t *sp) ...@@ -3459,13 +3459,7 @@ qla82xx_start_scsi(srb_t *sp)
goto queuing_error; goto queuing_error;
} }
ctx = sp->u.scmd.ct6_ctx = ctx = &sp->u.scmd.ct6_ctx;
mempool_alloc(ha->ctx_mempool, GFP_ATOMIC);
if (!ctx) {
ql_log(ql_log_fatal, vha, 0x3010,
"Failed to allocate ctx for cmd=%p.\n", cmd);
goto queuing_error;
}
memset(ctx, 0, sizeof(struct ct6_dsd)); memset(ctx, 0, sizeof(struct ct6_dsd));
ctx->fcp_cmnd = dma_pool_zalloc(ha->fcp_cmnd_dma_pool, ctx->fcp_cmnd = dma_pool_zalloc(ha->fcp_cmnd_dma_pool,
......
...@@ -1080,3 +1080,119 @@ void qla_update_host_map(struct scsi_qla_host *vha, port_id_t id) ...@@ -1080,3 +1080,119 @@ void qla_update_host_map(struct scsi_qla_host *vha, port_id_t id)
qla_update_vp_map(vha, SET_AL_PA); qla_update_vp_map(vha, SET_AL_PA);
} }
} }
int qla_create_buf_pool(struct scsi_qla_host *vha, struct qla_qpair *qp)
{
int sz;
qp->buf_pool.num_bufs = qp->req->length;
sz = BITS_TO_LONGS(qp->req->length);
qp->buf_pool.buf_map = kcalloc(sz, sizeof(long), GFP_KERNEL);
if (!qp->buf_pool.buf_map) {
ql_log(ql_log_warn, vha, 0x0186,
"Failed to allocate buf_map(%ld).\n", sz * sizeof(unsigned long));
return -ENOMEM;
}
sz = qp->req->length * sizeof(void *);
qp->buf_pool.buf_array = kcalloc(qp->req->length, sizeof(void *), GFP_KERNEL);
if (!qp->buf_pool.buf_array) {
ql_log(ql_log_warn, vha, 0x0186,
"Failed to allocate buf_array(%d).\n", sz);
kfree(qp->buf_pool.buf_map);
return -ENOMEM;
}
sz = qp->req->length * sizeof(dma_addr_t);
qp->buf_pool.dma_array = kcalloc(qp->req->length, sizeof(dma_addr_t), GFP_KERNEL);
if (!qp->buf_pool.dma_array) {
ql_log(ql_log_warn, vha, 0x0186,
"Failed to allocate dma_array(%d).\n", sz);
kfree(qp->buf_pool.buf_map);
kfree(qp->buf_pool.buf_array);
return -ENOMEM;
}
set_bit(0, qp->buf_pool.buf_map);
return 0;
}
void qla_free_buf_pool(struct qla_qpair *qp)
{
int i;
struct qla_hw_data *ha = qp->vha->hw;
for (i = 0; i < qp->buf_pool.num_bufs; i++) {
if (qp->buf_pool.buf_array[i] && qp->buf_pool.dma_array[i])
dma_pool_free(ha->fcp_cmnd_dma_pool, qp->buf_pool.buf_array[i],
qp->buf_pool.dma_array[i]);
qp->buf_pool.buf_array[i] = NULL;
qp->buf_pool.dma_array[i] = 0;
}
kfree(qp->buf_pool.dma_array);
kfree(qp->buf_pool.buf_array);
kfree(qp->buf_pool.buf_map);
}
/* it is assume qp->qp_lock is held at this point */
int qla_get_buf(struct scsi_qla_host *vha, struct qla_qpair *qp, struct qla_buf_dsc *dsc)
{
u16 tag, i = 0;
void *buf;
dma_addr_t buf_dma;
struct qla_hw_data *ha = vha->hw;
dsc->tag = TAG_FREED;
again:
tag = find_first_zero_bit(qp->buf_pool.buf_map, qp->buf_pool.num_bufs);
if (tag >= qp->buf_pool.num_bufs) {
ql_dbg(ql_dbg_io, vha, 0x00e2,
"qp(%d) ran out of buf resource.\n", qp->id);
return -EIO;
}
if (tag == 0) {
set_bit(0, qp->buf_pool.buf_map);
i++;
if (i == 5) {
ql_dbg(ql_dbg_io, vha, 0x00e3,
"qp(%d) unable to get tag.\n", qp->id);
return -EIO;
}
goto again;
}
if (!qp->buf_pool.buf_array[tag]) {
buf = dma_pool_zalloc(ha->fcp_cmnd_dma_pool, GFP_ATOMIC, &buf_dma);
if (!buf) {
ql_log(ql_log_fatal, vha, 0x13b1,
"Failed to allocate buf.\n");
return -ENOMEM;
}
dsc->buf = qp->buf_pool.buf_array[tag] = buf;
dsc->buf_dma = qp->buf_pool.dma_array[tag] = buf_dma;
} else {
dsc->buf = qp->buf_pool.buf_array[tag];
dsc->buf_dma = qp->buf_pool.dma_array[tag];
memset(dsc->buf, 0, FCP_CMND_DMA_POOL_SIZE);
}
qp->buf_pool.num_active++;
if (qp->buf_pool.num_active > qp->buf_pool.max_used)
qp->buf_pool.max_used = qp->buf_pool.num_active;
dsc->tag = tag;
set_bit(tag, qp->buf_pool.buf_map);
return 0;
}
/* it is assume qp->qp_lock is held at this point */
void qla_put_buf(struct qla_qpair *qp, struct qla_buf_dsc *dsc)
{
if (dsc->tag == TAG_FREED)
return;
clear_bit(dsc->tag, qp->buf_pool.buf_map);
qp->buf_pool.num_active--;
dsc->tag = TAG_FREED;
}
...@@ -733,15 +733,17 @@ void qla2x00_sp_free_dma(srb_t *sp) ...@@ -733,15 +733,17 @@ void qla2x00_sp_free_dma(srb_t *sp)
} }
if (sp->flags & SRB_FCP_CMND_DMA_VALID) { if (sp->flags & SRB_FCP_CMND_DMA_VALID) {
struct ct6_dsd *ctx1 = sp->u.scmd.ct6_ctx; struct ct6_dsd *ctx1 = &sp->u.scmd.ct6_ctx;
dma_pool_free(ha->fcp_cmnd_dma_pool, ctx1->fcp_cmnd, dma_pool_free(ha->fcp_cmnd_dma_pool, ctx1->fcp_cmnd,
ctx1->fcp_cmnd_dma); ctx1->fcp_cmnd_dma);
list_splice(&ctx1->dsd_list, &ha->gbl_dsd_list); list_splice(&ctx1->dsd_list, &ha->gbl_dsd_list);
ha->gbl_dsd_inuse -= ctx1->dsd_use_cnt; ha->gbl_dsd_inuse -= ctx1->dsd_use_cnt;
ha->gbl_dsd_avail += ctx1->dsd_use_cnt; ha->gbl_dsd_avail += ctx1->dsd_use_cnt;
mempool_free(ctx1, ha->ctx_mempool);
} }
if (sp->flags & SRB_GOT_BUF)
qla_put_buf(sp->qpair, &sp->u.scmd.buf_dsc);
} }
void qla2x00_sp_compl(srb_t *sp, int res) void qla2x00_sp_compl(srb_t *sp, int res)
...@@ -817,14 +819,13 @@ void qla2xxx_qpair_sp_free_dma(srb_t *sp) ...@@ -817,14 +819,13 @@ void qla2xxx_qpair_sp_free_dma(srb_t *sp)
} }
if (sp->flags & SRB_FCP_CMND_DMA_VALID) { if (sp->flags & SRB_FCP_CMND_DMA_VALID) {
struct ct6_dsd *ctx1 = sp->u.scmd.ct6_ctx; struct ct6_dsd *ctx1 = &sp->u.scmd.ct6_ctx;
dma_pool_free(ha->fcp_cmnd_dma_pool, ctx1->fcp_cmnd, dma_pool_free(ha->fcp_cmnd_dma_pool, ctx1->fcp_cmnd,
ctx1->fcp_cmnd_dma); ctx1->fcp_cmnd_dma);
list_splice(&ctx1->dsd_list, &ha->gbl_dsd_list); list_splice(&ctx1->dsd_list, &ha->gbl_dsd_list);
ha->gbl_dsd_inuse -= ctx1->dsd_use_cnt; ha->gbl_dsd_inuse -= ctx1->dsd_use_cnt;
ha->gbl_dsd_avail += ctx1->dsd_use_cnt; ha->gbl_dsd_avail += ctx1->dsd_use_cnt;
mempool_free(ctx1, ha->ctx_mempool);
sp->flags &= ~SRB_FCP_CMND_DMA_VALID; sp->flags &= ~SRB_FCP_CMND_DMA_VALID;
} }
...@@ -834,6 +835,9 @@ void qla2xxx_qpair_sp_free_dma(srb_t *sp) ...@@ -834,6 +835,9 @@ void qla2xxx_qpair_sp_free_dma(srb_t *sp)
dma_pool_free(ha->dl_dma_pool, ctx0, ctx0->crc_ctx_dma); dma_pool_free(ha->dl_dma_pool, ctx0, ctx0->crc_ctx_dma);
sp->flags &= ~SRB_CRC_CTX_DMA_VALID; sp->flags &= ~SRB_CRC_CTX_DMA_VALID;
} }
if (sp->flags & SRB_GOT_BUF)
qla_put_buf(sp->qpair, &sp->u.scmd.buf_dsc);
} }
void qla2xxx_qpair_sp_compl(srb_t *sp, int res) void qla2xxx_qpair_sp_compl(srb_t *sp, int res)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment