Commit 9ae58e14 authored by Eddie Wai's avatar Eddie Wai Committed by James Bottomley

[SCSI] bnx2i: Optimized the iSCSI offload performance

Modified the event coalescing code for iSCSI offload to combat both
corner cases and optimize performance as follows:

1. Added mechanism to loop back a second time to process any leftover
CQEs that was generated by the hardware during the time the driver is
busy processing previous CQEs in the bh.  This not only helps the
performance but also fixes the corner case when no more CQEs are being
generated in the pipeline; so those leftover CQEs will get a a chance
to be processed.

2. Added ARM_CQE_FP to distinguish between fast path arming versus
slow path arming.  This change will guarantee that the CQEs will
always get a chance to be re-armed during fast path completions.

3. Removed the inline event coalescing division for perf optimization.
Also fixed a division-by-zero error when the event_coal_div module
param was set to 0.

4. Changed the default SQ WQEs size from 256 to 128 to match chip
default.

5. Changed the cmd_per_lun from 32 to 24.
Signed-off-by: default avatarEddie Wai <eddie.wai@broadcom.com>
Reviewed-by: default avatarMike Christie <michaelc@cs.wisc.edu>
Signed-off-by: default avatarJames Bottomley <jbottomley@parallels.com>
parent d5307a07
...@@ -70,7 +70,7 @@ ...@@ -70,7 +70,7 @@
#define BNX2I_570X_SQ_WQES_MAX 128 #define BNX2I_570X_SQ_WQES_MAX 128
#define BNX2I_5770X_SQ_WQES_MAX 512 #define BNX2I_5770X_SQ_WQES_MAX 512
#define BNX2I_570X_SQ_WQES_DEFAULT 128 #define BNX2I_570X_SQ_WQES_DEFAULT 128
#define BNX2I_5770X_SQ_WQES_DEFAULT 256 #define BNX2I_5770X_SQ_WQES_DEFAULT 128
#define BNX2I_570X_CQ_WQES_MAX 128 #define BNX2I_570X_CQ_WQES_MAX 128
#define BNX2I_5770X_CQ_WQES_MAX 512 #define BNX2I_5770X_CQ_WQES_MAX 512
...@@ -115,6 +115,7 @@ ...@@ -115,6 +115,7 @@
#define BNX2X_MAX_CQS 8 #define BNX2X_MAX_CQS 8
#define CNIC_ARM_CQE 1 #define CNIC_ARM_CQE 1
#define CNIC_ARM_CQE_FP 2
#define CNIC_DISARM_CQE 0 #define CNIC_DISARM_CQE 0
#define REG_RD(__hba, offset) \ #define REG_RD(__hba, offset) \
...@@ -666,7 +667,9 @@ enum { ...@@ -666,7 +667,9 @@ enum {
* after HBA reset is completed by bnx2i/cnic/bnx2 * after HBA reset is completed by bnx2i/cnic/bnx2
* modules * modules
* @state: tracks offload connection state machine * @state: tracks offload connection state machine
* @teardown_mode: indicates if conn teardown is abortive or orderly * @timestamp: tracks the start time when the ep begins to connect
* @num_active_cmds: tracks the number of outstanding commands for this ep
* @ec_shift: the amount of shift as part of the event coal calc
* @qp: QP information * @qp: QP information
* @ids: contains chip allocated *context id* & driver assigned * @ids: contains chip allocated *context id* & driver assigned
* *iscsi cid* * *iscsi cid*
...@@ -685,6 +688,7 @@ struct bnx2i_endpoint { ...@@ -685,6 +688,7 @@ struct bnx2i_endpoint {
u32 state; u32 state;
unsigned long timestamp; unsigned long timestamp;
int num_active_cmds; int num_active_cmds;
u32 ec_shift;
struct qp_info qp; struct qp_info qp;
struct ep_handles ids; struct ep_handles ids;
......
...@@ -138,7 +138,6 @@ void bnx2i_arm_cq_event_coalescing(struct bnx2i_endpoint *ep, u8 action) ...@@ -138,7 +138,6 @@ void bnx2i_arm_cq_event_coalescing(struct bnx2i_endpoint *ep, u8 action)
u16 next_index; u16 next_index;
u32 num_active_cmds; u32 num_active_cmds;
/* Coalesce CQ entries only on 10G devices */ /* Coalesce CQ entries only on 10G devices */
if (!test_bit(BNX2I_NX2_DEV_57710, &ep->hba->cnic_dev_type)) if (!test_bit(BNX2I_NX2_DEV_57710, &ep->hba->cnic_dev_type))
return; return;
...@@ -148,16 +147,19 @@ void bnx2i_arm_cq_event_coalescing(struct bnx2i_endpoint *ep, u8 action) ...@@ -148,16 +147,19 @@ void bnx2i_arm_cq_event_coalescing(struct bnx2i_endpoint *ep, u8 action)
* interrupts and other unwanted results * interrupts and other unwanted results
*/ */
cq_db = (struct bnx2i_5771x_cq_db *) ep->qp.cq_pgtbl_virt; cq_db = (struct bnx2i_5771x_cq_db *) ep->qp.cq_pgtbl_virt;
if (action != CNIC_ARM_CQE_FP)
if (cq_db->sqn[0] && cq_db->sqn[0] != 0xFFFF) if (cq_db->sqn[0] && cq_db->sqn[0] != 0xFFFF)
return; return;
if (action == CNIC_ARM_CQE) { if (action == CNIC_ARM_CQE || action == CNIC_ARM_CQE_FP) {
num_active_cmds = ep->num_active_cmds; num_active_cmds = ep->num_active_cmds;
if (num_active_cmds <= event_coal_min) if (num_active_cmds <= event_coal_min)
next_index = 1; next_index = 1;
else else
next_index = event_coal_min + next_index = event_coal_min +
(num_active_cmds - event_coal_min) / event_coal_div; ((num_active_cmds - event_coal_min) >>
ep->ec_shift);
if (!next_index) if (!next_index)
next_index = 1; next_index = 1;
cq_index = ep->qp.cqe_exp_seq_sn + next_index - 1; cq_index = ep->qp.cqe_exp_seq_sn + next_index - 1;
...@@ -1935,7 +1937,6 @@ static void bnx2i_process_new_cqes(struct bnx2i_conn *bnx2i_conn) ...@@ -1935,7 +1937,6 @@ static void bnx2i_process_new_cqes(struct bnx2i_conn *bnx2i_conn)
qp->cq_cons_idx++; qp->cq_cons_idx++;
} }
} }
bnx2i_arm_cq_event_coalescing(bnx2i_conn->ep, CNIC_ARM_CQE);
} }
/** /**
...@@ -1949,22 +1950,23 @@ static void bnx2i_process_new_cqes(struct bnx2i_conn *bnx2i_conn) ...@@ -1949,22 +1950,23 @@ static void bnx2i_process_new_cqes(struct bnx2i_conn *bnx2i_conn)
static void bnx2i_fastpath_notification(struct bnx2i_hba *hba, static void bnx2i_fastpath_notification(struct bnx2i_hba *hba,
struct iscsi_kcqe *new_cqe_kcqe) struct iscsi_kcqe *new_cqe_kcqe)
{ {
struct bnx2i_conn *conn; struct bnx2i_conn *bnx2i_conn;
u32 iscsi_cid; u32 iscsi_cid;
iscsi_cid = new_cqe_kcqe->iscsi_conn_id; iscsi_cid = new_cqe_kcqe->iscsi_conn_id;
conn = bnx2i_get_conn_from_id(hba, iscsi_cid); bnx2i_conn = bnx2i_get_conn_from_id(hba, iscsi_cid);
if (!conn) { if (!bnx2i_conn) {
printk(KERN_ALERT "cid #%x not valid\n", iscsi_cid); printk(KERN_ALERT "cid #%x not valid\n", iscsi_cid);
return; return;
} }
if (!conn->ep) { if (!bnx2i_conn->ep) {
printk(KERN_ALERT "cid #%x - ep not bound\n", iscsi_cid); printk(KERN_ALERT "cid #%x - ep not bound\n", iscsi_cid);
return; return;
} }
bnx2i_process_new_cqes(bnx2i_conn);
bnx2i_process_new_cqes(conn); bnx2i_arm_cq_event_coalescing(bnx2i_conn->ep, CNIC_ARM_CQE_FP);
bnx2i_process_new_cqes(bnx2i_conn);
} }
......
...@@ -379,6 +379,7 @@ static struct iscsi_endpoint *bnx2i_alloc_ep(struct bnx2i_hba *hba) ...@@ -379,6 +379,7 @@ static struct iscsi_endpoint *bnx2i_alloc_ep(struct bnx2i_hba *hba)
{ {
struct iscsi_endpoint *ep; struct iscsi_endpoint *ep;
struct bnx2i_endpoint *bnx2i_ep; struct bnx2i_endpoint *bnx2i_ep;
u32 ec_div;
ep = iscsi_create_endpoint(sizeof(*bnx2i_ep)); ep = iscsi_create_endpoint(sizeof(*bnx2i_ep));
if (!ep) { if (!ep) {
...@@ -393,6 +394,11 @@ static struct iscsi_endpoint *bnx2i_alloc_ep(struct bnx2i_hba *hba) ...@@ -393,6 +394,11 @@ static struct iscsi_endpoint *bnx2i_alloc_ep(struct bnx2i_hba *hba)
bnx2i_ep->ep_iscsi_cid = (u16) -1; bnx2i_ep->ep_iscsi_cid = (u16) -1;
bnx2i_ep->hba = hba; bnx2i_ep->hba = hba;
bnx2i_ep->hba_age = hba->age; bnx2i_ep->hba_age = hba->age;
ec_div = event_coal_div;
while (ec_div >>= 1)
bnx2i_ep->ec_shift += 1;
hba->ofld_conns_active++; hba->ofld_conns_active++;
init_waitqueue_head(&bnx2i_ep->ofld_wait); init_waitqueue_head(&bnx2i_ep->ofld_wait);
return ep; return ep;
...@@ -2159,7 +2165,7 @@ static struct scsi_host_template bnx2i_host_template = { ...@@ -2159,7 +2165,7 @@ static struct scsi_host_template bnx2i_host_template = {
.change_queue_depth = iscsi_change_queue_depth, .change_queue_depth = iscsi_change_queue_depth,
.can_queue = 1024, .can_queue = 1024,
.max_sectors = 127, .max_sectors = 127,
.cmd_per_lun = 32, .cmd_per_lun = 24,
.this_id = -1, .this_id = -1,
.use_clustering = ENABLE_CLUSTERING, .use_clustering = ENABLE_CLUSTERING,
.sg_tablesize = ISCSI_MAX_BDS_PER_CMD, .sg_tablesize = ISCSI_MAX_BDS_PER_CMD,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment