Commit 94d7f1a2 authored by Doug Ledford's avatar Doug Ledford

Merge branches 'hfi1' and 'iw_cxgb4' into k.o/for-4.7

This diff is collapsed.
...@@ -500,9 +500,6 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) ...@@ -500,9 +500,6 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev)
* skb_shinfo(skb)->nr_frags, skb_is_gso(skb)); * skb_shinfo(skb)->nr_frags, skb_is_gso(skb));
*/ */
if (!netif_carrier_ok(netdev))
return NETDEV_TX_OK;
if (netif_queue_stopped(netdev)) if (netif_queue_stopped(netdev))
return NETDEV_TX_BUSY; return NETDEV_TX_BUSY;
......
...@@ -1090,7 +1090,7 @@ void qib_free_devdata(struct qib_devdata *dd) ...@@ -1090,7 +1090,7 @@ void qib_free_devdata(struct qib_devdata *dd)
qib_dbg_ibdev_exit(&dd->verbs_dev); qib_dbg_ibdev_exit(&dd->verbs_dev);
#endif #endif
free_percpu(dd->int_counter); free_percpu(dd->int_counter);
ib_dealloc_device(&dd->verbs_dev.rdi.ibdev); rvt_dealloc_device(&dd->verbs_dev.rdi);
} }
u64 qib_int_counter(struct qib_devdata *dd) u64 qib_int_counter(struct qib_devdata *dd)
...@@ -1183,7 +1183,7 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) ...@@ -1183,7 +1183,7 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
bail: bail:
if (!list_empty(&dd->list)) if (!list_empty(&dd->list))
list_del_init(&dd->list); list_del_init(&dd->list);
ib_dealloc_device(&dd->verbs_dev.rdi.ibdev); rvt_dealloc_device(&dd->verbs_dev.rdi);
return ERR_PTR(ret); return ERR_PTR(ret);
} }
......
...@@ -230,7 +230,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp, ...@@ -230,7 +230,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp,
* *
* Return 1 if constructed; otherwise, return 0. * Return 1 if constructed; otherwise, return 0.
*/ */
int qib_make_rc_req(struct rvt_qp *qp) int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
{ {
struct qib_qp_priv *priv = qp->priv; struct qib_qp_priv *priv = qp->priv;
struct qib_ibdev *dev = to_idev(qp->ibqp.device); struct qib_ibdev *dev = to_idev(qp->ibqp.device);
......
...@@ -739,7 +739,7 @@ void qib_do_send(struct rvt_qp *qp) ...@@ -739,7 +739,7 @@ void qib_do_send(struct rvt_qp *qp)
struct qib_qp_priv *priv = qp->priv; struct qib_qp_priv *priv = qp->priv;
struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
struct qib_pportdata *ppd = ppd_from_ibp(ibp); struct qib_pportdata *ppd = ppd_from_ibp(ibp);
int (*make_req)(struct rvt_qp *qp); int (*make_req)(struct rvt_qp *qp, unsigned long *flags);
unsigned long flags; unsigned long flags;
if ((qp->ibqp.qp_type == IB_QPT_RC || if ((qp->ibqp.qp_type == IB_QPT_RC ||
...@@ -781,7 +781,7 @@ void qib_do_send(struct rvt_qp *qp) ...@@ -781,7 +781,7 @@ void qib_do_send(struct rvt_qp *qp)
qp->s_hdrwords = 0; qp->s_hdrwords = 0;
spin_lock_irqsave(&qp->s_lock, flags); spin_lock_irqsave(&qp->s_lock, flags);
} }
} while (make_req(qp)); } while (make_req(qp, &flags));
spin_unlock_irqrestore(&qp->s_lock, flags); spin_unlock_irqrestore(&qp->s_lock, flags);
} }
......
...@@ -45,7 +45,7 @@ ...@@ -45,7 +45,7 @@
* *
* Return 1 if constructed; otherwise, return 0. * Return 1 if constructed; otherwise, return 0.
*/ */
int qib_make_uc_req(struct rvt_qp *qp) int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags)
{ {
struct qib_qp_priv *priv = qp->priv; struct qib_qp_priv *priv = qp->priv;
struct qib_other_headers *ohdr; struct qib_other_headers *ohdr;
......
...@@ -238,7 +238,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) ...@@ -238,7 +238,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
* *
* Return 1 if constructed; otherwise, return 0. * Return 1 if constructed; otherwise, return 0.
*/ */
int qib_make_ud_req(struct rvt_qp *qp) int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
{ {
struct qib_qp_priv *priv = qp->priv; struct qib_qp_priv *priv = qp->priv;
struct qib_other_headers *ohdr; struct qib_other_headers *ohdr;
...@@ -294,7 +294,7 @@ int qib_make_ud_req(struct rvt_qp *qp) ...@@ -294,7 +294,7 @@ int qib_make_ud_req(struct rvt_qp *qp)
this_cpu_inc(ibp->pmastats->n_unicast_xmit); this_cpu_inc(ibp->pmastats->n_unicast_xmit);
lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
if (unlikely(lid == ppd->lid)) { if (unlikely(lid == ppd->lid)) {
unsigned long flags; unsigned long tflags = *flags;
/* /*
* If DMAs are in progress, we can't generate * If DMAs are in progress, we can't generate
* a completion for the loopback packet since * a completion for the loopback packet since
...@@ -307,10 +307,10 @@ int qib_make_ud_req(struct rvt_qp *qp) ...@@ -307,10 +307,10 @@ int qib_make_ud_req(struct rvt_qp *qp)
goto bail; goto bail;
} }
qp->s_cur = next_cur; qp->s_cur = next_cur;
local_irq_save(flags); spin_unlock_irqrestore(&qp->s_lock, tflags);
spin_unlock_irqrestore(&qp->s_lock, flags);
qib_ud_loopback(qp, wqe); qib_ud_loopback(qp, wqe);
spin_lock_irqsave(&qp->s_lock, flags); spin_lock_irqsave(&qp->s_lock, tflags);
*flags = tflags;
qib_send_complete(qp, wqe, IB_WC_SUCCESS); qib_send_complete(qp, wqe, IB_WC_SUCCESS);
goto done; goto done;
} }
......
...@@ -430,11 +430,11 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, ...@@ -430,11 +430,11 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
void qib_send_rc_ack(struct rvt_qp *qp); void qib_send_rc_ack(struct rvt_qp *qp);
int qib_make_rc_req(struct rvt_qp *qp); int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags);
int qib_make_uc_req(struct rvt_qp *qp); int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags);
int qib_make_ud_req(struct rvt_qp *qp); int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags);
int qib_register_ib_device(struct qib_devdata *); int qib_register_ib_device(struct qib_devdata *);
......
...@@ -829,13 +829,13 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, ...@@ -829,13 +829,13 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
case IB_QPT_SMI: case IB_QPT_SMI:
case IB_QPT_GSI: case IB_QPT_GSI:
case IB_QPT_UD: case IB_QPT_UD:
qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & RVT_OPCODE_QP_MASK; qp->allowed_ops = IB_OPCODE_UD;
break; break;
case IB_QPT_RC: case IB_QPT_RC:
qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & RVT_OPCODE_QP_MASK; qp->allowed_ops = IB_OPCODE_RC;
break; break;
case IB_QPT_UC: case IB_QPT_UC:
qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & RVT_OPCODE_QP_MASK; qp->allowed_ops = IB_OPCODE_UC;
break; break;
default: default:
ret = ERR_PTR(-EINVAL); ret = ERR_PTR(-EINVAL);
......
...@@ -106,6 +106,19 @@ struct rvt_dev_info *rvt_alloc_device(size_t size, int nports) ...@@ -106,6 +106,19 @@ struct rvt_dev_info *rvt_alloc_device(size_t size, int nports)
} }
EXPORT_SYMBOL(rvt_alloc_device); EXPORT_SYMBOL(rvt_alloc_device);
/**
* rvt_dealloc_device - deallocate rdi
* @rdi: structure to free
*
* Free a structure allocated with rvt_alloc_device()
*/
void rvt_dealloc_device(struct rvt_dev_info *rdi)
{
kfree(rdi->ports);
ib_dealloc_device(&rdi->ibdev);
}
EXPORT_SYMBOL(rvt_dealloc_device);
static int rvt_query_device(struct ib_device *ibdev, static int rvt_query_device(struct ib_device *ibdev,
struct ib_device_attr *props, struct ib_device_attr *props,
struct ib_udata *uhw) struct ib_udata *uhw)
......
...@@ -612,6 +612,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, ...@@ -612,6 +612,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
struct Scsi_Host *shost; struct Scsi_Host *shost;
struct iser_conn *iser_conn = NULL; struct iser_conn *iser_conn = NULL;
struct ib_conn *ib_conn; struct ib_conn *ib_conn;
u32 max_fr_sectors;
u16 max_cmds; u16 max_cmds;
shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0); shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
...@@ -632,7 +633,6 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, ...@@ -632,7 +633,6 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
iser_conn = ep->dd_data; iser_conn = ep->dd_data;
max_cmds = iser_conn->max_cmds; max_cmds = iser_conn->max_cmds;
shost->sg_tablesize = iser_conn->scsi_sg_tablesize; shost->sg_tablesize = iser_conn->scsi_sg_tablesize;
shost->max_sectors = iser_conn->scsi_max_sectors;
mutex_lock(&iser_conn->state_mutex); mutex_lock(&iser_conn->state_mutex);
if (iser_conn->state != ISER_CONN_UP) { if (iser_conn->state != ISER_CONN_UP) {
...@@ -657,8 +657,6 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, ...@@ -657,8 +657,6 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
*/ */
shost->sg_tablesize = min_t(unsigned short, shost->sg_tablesize, shost->sg_tablesize = min_t(unsigned short, shost->sg_tablesize,
ib_conn->device->ib_device->attrs.max_fast_reg_page_list_len); ib_conn->device->ib_device->attrs.max_fast_reg_page_list_len);
shost->max_sectors = min_t(unsigned int,
1024, (shost->sg_tablesize * PAGE_SIZE) >> 9);
if (iscsi_host_add(shost, if (iscsi_host_add(shost,
ib_conn->device->ib_device->dma_device)) { ib_conn->device->ib_device->dma_device)) {
...@@ -672,6 +670,15 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, ...@@ -672,6 +670,15 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
goto free_host; goto free_host;
} }
/*
* FRs or FMRs can only map up to a (device) page per entry, but if the
* first entry is misaligned we'll end up using using two entries
* (head and tail) for a single page worth data, so we have to drop
* one segment from the calculation.
*/
max_fr_sectors = ((shost->sg_tablesize - 1) * PAGE_SIZE) >> 9;
shost->max_sectors = min(iser_max_sectors, max_fr_sectors);
if (cmds_max > max_cmds) { if (cmds_max > max_cmds) {
iser_info("cmds_max changed from %u to %u\n", iser_info("cmds_max changed from %u to %u\n",
cmds_max, max_cmds); cmds_max, max_cmds);
...@@ -989,7 +996,6 @@ static struct scsi_host_template iscsi_iser_sht = { ...@@ -989,7 +996,6 @@ static struct scsi_host_template iscsi_iser_sht = {
.queuecommand = iscsi_queuecommand, .queuecommand = iscsi_queuecommand,
.change_queue_depth = scsi_change_queue_depth, .change_queue_depth = scsi_change_queue_depth,
.sg_tablesize = ISCSI_ISER_DEF_SG_TABLESIZE, .sg_tablesize = ISCSI_ISER_DEF_SG_TABLESIZE,
.max_sectors = ISER_DEF_MAX_SECTORS,
.cmd_per_lun = ISER_DEF_CMD_PER_LUN, .cmd_per_lun = ISER_DEF_CMD_PER_LUN,
.eh_abort_handler = iscsi_eh_abort, .eh_abort_handler = iscsi_eh_abort,
.eh_device_reset_handler= iscsi_eh_device_reset, .eh_device_reset_handler= iscsi_eh_device_reset,
......
...@@ -53,20 +53,6 @@ ...@@ -53,20 +53,6 @@
#include "sdma.h" #include "sdma.h"
#include "trace.h" #include "trace.h"
struct cpu_mask_set {
struct cpumask mask;
struct cpumask used;
uint gen;
};
struct hfi1_affinity {
struct cpu_mask_set def_intr;
struct cpu_mask_set rcv_intr;
struct cpu_mask_set proc;
/* spin lock to protect affinity struct */
spinlock_t lock;
};
/* Name of IRQ types, indexed by enum irq_type */ /* Name of IRQ types, indexed by enum irq_type */
static const char * const irq_type_names[] = { static const char * const irq_type_names[] = {
"SDMA", "SDMA",
...@@ -82,6 +68,48 @@ static inline void init_cpu_mask_set(struct cpu_mask_set *set) ...@@ -82,6 +68,48 @@ static inline void init_cpu_mask_set(struct cpu_mask_set *set)
set->gen = 0; set->gen = 0;
} }
/* Initialize non-HT cpu cores mask */
int init_real_cpu_mask(struct hfi1_devdata *dd)
{
struct hfi1_affinity *info;
int possible, curr_cpu, i, ht;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
cpumask_clear(&info->real_cpu_mask);
/* Start with cpu online mask as the real cpu mask */
cpumask_copy(&info->real_cpu_mask, cpu_online_mask);
/*
* Remove HT cores from the real cpu mask. Do this in two steps below.
*/
possible = cpumask_weight(&info->real_cpu_mask);
ht = cpumask_weight(topology_sibling_cpumask(
cpumask_first(&info->real_cpu_mask)));
/*
* Step 1. Skip over the first N HT siblings and use them as the
* "real" cores. Assumes that HT cores are not enumerated in
* succession (except in the single core case).
*/
curr_cpu = cpumask_first(&info->real_cpu_mask);
for (i = 0; i < possible / ht; i++)
curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask);
/*
* Step 2. Remove the remaining HT siblings. Use cpumask_next() to
* skip any gaps.
*/
for (; i < possible; i++) {
cpumask_clear_cpu(curr_cpu, &info->real_cpu_mask);
curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask);
}
dd->affinity = info;
return 0;
}
/* /*
* Interrupt affinity. * Interrupt affinity.
* *
...@@ -93,20 +121,17 @@ static inline void init_cpu_mask_set(struct cpu_mask_set *set) ...@@ -93,20 +121,17 @@ static inline void init_cpu_mask_set(struct cpu_mask_set *set)
* to the node relative 1 as necessary. * to the node relative 1 as necessary.
* *
*/ */
int hfi1_dev_affinity_init(struct hfi1_devdata *dd) void hfi1_dev_affinity_init(struct hfi1_devdata *dd)
{ {
int node = pcibus_to_node(dd->pcidev->bus); int node = pcibus_to_node(dd->pcidev->bus);
struct hfi1_affinity *info; struct hfi1_affinity *info = dd->affinity;
const struct cpumask *local_mask; const struct cpumask *local_mask;
int curr_cpu, possible, i, ht; int curr_cpu, possible, i;
if (node < 0) if (node < 0)
node = numa_node_id(); node = numa_node_id();
dd->node = node; dd->node = node;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
spin_lock_init(&info->lock); spin_lock_init(&info->lock);
init_cpu_mask_set(&info->def_intr); init_cpu_mask_set(&info->def_intr);
...@@ -116,30 +141,8 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) ...@@ -116,30 +141,8 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
local_mask = cpumask_of_node(dd->node); local_mask = cpumask_of_node(dd->node);
if (cpumask_first(local_mask) >= nr_cpu_ids) if (cpumask_first(local_mask) >= nr_cpu_ids)
local_mask = topology_core_cpumask(0); local_mask = topology_core_cpumask(0);
/* use local mask as default */ /* Use the "real" cpu mask of this node as the default */
cpumask_copy(&info->def_intr.mask, local_mask); cpumask_and(&info->def_intr.mask, &info->real_cpu_mask, local_mask);
/*
* Remove HT cores from the default mask. Do this in two steps below.
*/
possible = cpumask_weight(&info->def_intr.mask);
ht = cpumask_weight(topology_sibling_cpumask(
cpumask_first(&info->def_intr.mask)));
/*
* Step 1. Skip over the first N HT siblings and use them as the
* "real" cores. Assumes that HT cores are not enumerated in
* succession (except in the single core case).
*/
curr_cpu = cpumask_first(&info->def_intr.mask);
for (i = 0; i < possible / ht; i++)
curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
/*
* Step 2. Remove the remaining HT siblings. Use cpumask_next() to
* skip any gaps.
*/
for (; i < possible; i++) {
cpumask_clear_cpu(curr_cpu, &info->def_intr.mask);
curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
}
/* fill in the receive list */ /* fill in the receive list */
possible = cpumask_weight(&info->def_intr.mask); possible = cpumask_weight(&info->def_intr.mask);
...@@ -167,8 +170,6 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) ...@@ -167,8 +170,6 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
} }
cpumask_copy(&info->proc.mask, cpu_online_mask); cpumask_copy(&info->proc.mask, cpu_online_mask);
dd->affinity = info;
return 0;
} }
void hfi1_dev_affinity_free(struct hfi1_devdata *dd) void hfi1_dev_affinity_free(struct hfi1_devdata *dd)
......
...@@ -64,10 +64,27 @@ enum affinity_flags { ...@@ -64,10 +64,27 @@ enum affinity_flags {
AFF_IRQ_LOCAL AFF_IRQ_LOCAL
}; };
struct cpu_mask_set {
struct cpumask mask;
struct cpumask used;
uint gen;
};
struct hfi1_affinity {
struct cpu_mask_set def_intr;
struct cpu_mask_set rcv_intr;
struct cpu_mask_set proc;
struct cpumask real_cpu_mask;
/* spin lock to protect affinity struct */
spinlock_t lock;
};
struct hfi1_msix_entry; struct hfi1_msix_entry;
/* Initialize non-HT cpu cores mask */
int init_real_cpu_mask(struct hfi1_devdata *);
/* Initialize driver affinity data */ /* Initialize driver affinity data */
int hfi1_dev_affinity_init(struct hfi1_devdata *); void hfi1_dev_affinity_init(struct hfi1_devdata *);
/* Free driver affinity data */ /* Free driver affinity data */
void hfi1_dev_affinity_free(struct hfi1_devdata *); void hfi1_dev_affinity_free(struct hfi1_devdata *);
/* /*
......
This diff is collapsed.
...@@ -389,6 +389,7 @@ ...@@ -389,6 +389,7 @@
#define LAST_REMOTE_STATE_COMPLETE 0x13 #define LAST_REMOTE_STATE_COMPLETE 0x13
#define LINK_QUALITY_INFO 0x14 #define LINK_QUALITY_INFO 0x14
#define REMOTE_DEVICE_ID 0x15 #define REMOTE_DEVICE_ID 0x15
#define LINK_DOWN_REASON 0x16
/* 8051 lane specific register field IDs */ /* 8051 lane specific register field IDs */
#define TX_EQ_SETTINGS 0x00 #define TX_EQ_SETTINGS 0x00
...@@ -497,6 +498,11 @@ ...@@ -497,6 +498,11 @@
#define PWRM_BER_CONTROL 0x1 #define PWRM_BER_CONTROL 0x1
#define PWRM_BANDWIDTH_CONTROL 0x2 #define PWRM_BANDWIDTH_CONTROL 0x2
/* 8051 link down reasons */
#define LDR_LINK_TRANSFER_ACTIVE_LOW 0xa
#define LDR_RECEIVED_LINKDOWN_IDLE_MSG 0xb
#define LDR_RECEIVED_HOST_OFFLINE_REQ 0xc
/* verify capability fabric CRC size bits */ /* verify capability fabric CRC size bits */
enum { enum {
CAP_CRC_14B = (1 << 0), /* 14b CRC */ CAP_CRC_14B = (1 << 0), /* 14b CRC */
...@@ -691,7 +697,6 @@ void handle_verify_cap(struct work_struct *work); ...@@ -691,7 +697,6 @@ void handle_verify_cap(struct work_struct *work);
void handle_freeze(struct work_struct *work); void handle_freeze(struct work_struct *work);
void handle_link_up(struct work_struct *work); void handle_link_up(struct work_struct *work);
void handle_link_down(struct work_struct *work); void handle_link_down(struct work_struct *work);
void handle_8051_request(struct work_struct *work);
void handle_link_downgrade(struct work_struct *work); void handle_link_downgrade(struct work_struct *work);
void handle_link_bounce(struct work_struct *work); void handle_link_bounce(struct work_struct *work);
void handle_sma_message(struct work_struct *work); void handle_sma_message(struct work_struct *work);
......
...@@ -771,6 +771,7 @@ ...@@ -771,6 +771,7 @@
#define RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK 0x1ull #define RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK 0x1ull
#define RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT 0 #define RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT 0
#define RCV_RSM_CFG_PACKET_TYPE_SHIFT 60 #define RCV_RSM_CFG_PACKET_TYPE_SHIFT 60
#define RCV_RSM_CFG_OFFSET_SHIFT 32
#define RCV_RSM_MAP_TABLE (RXE + 0x000000000900) #define RCV_RSM_MAP_TABLE (RXE + 0x000000000900)
#define RCV_RSM_MAP_TABLE_RCV_CONTEXT_A_MASK 0xFFull #define RCV_RSM_MAP_TABLE_RCV_CONTEXT_A_MASK 0xFFull
#define RCV_RSM_MATCH (RXE + 0x000000000800) #define RCV_RSM_MATCH (RXE + 0x000000000800)
......
...@@ -413,7 +413,8 @@ static ssize_t diagpkt_send(struct diag_pkt *dp) ...@@ -413,7 +413,8 @@ static ssize_t diagpkt_send(struct diag_pkt *dp)
goto bail; goto bail;
} }
/* can only use kernel contexts */ /* can only use kernel contexts */
if (dd->send_contexts[dp->sw_index].type != SC_KERNEL) { if (dd->send_contexts[dp->sw_index].type != SC_KERNEL &&
dd->send_contexts[dp->sw_index].type != SC_VL15) {
ret = -EINVAL; ret = -EINVAL;
goto bail; goto bail;
} }
......
...@@ -75,7 +75,8 @@ DEFINE_MUTEX(hfi1_mutex); /* general driver use */ ...@@ -75,7 +75,8 @@ DEFINE_MUTEX(hfi1_mutex); /* general driver use */
unsigned int hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU; unsigned int hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU;
module_param_named(max_mtu, hfi1_max_mtu, uint, S_IRUGO); module_param_named(max_mtu, hfi1_max_mtu, uint, S_IRUGO);
MODULE_PARM_DESC(max_mtu, "Set max MTU bytes, default is 8192"); MODULE_PARM_DESC(max_mtu, "Set max MTU bytes, default is " __stringify(
HFI1_DEFAULT_MAX_MTU));
unsigned int hfi1_cu = 1; unsigned int hfi1_cu = 1;
module_param_named(cu, hfi1_cu, uint, S_IRUGO); module_param_named(cu, hfi1_cu, uint, S_IRUGO);
......
...@@ -1413,8 +1413,15 @@ static int __acquire_chip_resource(struct hfi1_devdata *dd, u32 resource) ...@@ -1413,8 +1413,15 @@ static int __acquire_chip_resource(struct hfi1_devdata *dd, u32 resource)
if (resource & CR_DYN_MASK) { if (resource & CR_DYN_MASK) {
/* a dynamic resource is in use if either HFI has set the bit */ /* a dynamic resource is in use if either HFI has set the bit */
if (dd->pcidev->device == PCI_DEVICE_ID_INTEL0 &&
(resource & (CR_I2C1 | CR_I2C2))) {
/* discrete devices must serialize across both chains */
all_bits = resource_mask(0, CR_I2C1 | CR_I2C2) |
resource_mask(1, CR_I2C1 | CR_I2C2);
} else {
all_bits = resource_mask(0, resource) | all_bits = resource_mask(0, resource) |
resource_mask(1, resource); resource_mask(1, resource);
}
my_bit = resource_mask(dd->hfi1_id, resource); my_bit = resource_mask(dd->hfi1_id, resource);
} else { } else {
/* non-dynamic resources are not split between HFIs */ /* non-dynamic resources are not split between HFIs */
......
...@@ -455,9 +455,9 @@ struct rvt_sge_state; ...@@ -455,9 +455,9 @@ struct rvt_sge_state;
#define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE) #define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE)
/* use this MTU size if none other is given */ /* use this MTU size if none other is given */
#define HFI1_DEFAULT_ACTIVE_MTU 8192 #define HFI1_DEFAULT_ACTIVE_MTU 10240
/* use this MTU size as the default maximum */ /* use this MTU size as the default maximum */
#define HFI1_DEFAULT_MAX_MTU 8192 #define HFI1_DEFAULT_MAX_MTU 10240
/* default partition key */ /* default partition key */
#define DEFAULT_PKEY 0xffff #define DEFAULT_PKEY 0xffff
...@@ -606,7 +606,6 @@ struct hfi1_pportdata { ...@@ -606,7 +606,6 @@ struct hfi1_pportdata {
struct work_struct link_vc_work; struct work_struct link_vc_work;
struct work_struct link_up_work; struct work_struct link_up_work;
struct work_struct link_down_work; struct work_struct link_down_work;
struct work_struct dc_host_req_work;
struct work_struct sma_message_work; struct work_struct sma_message_work;
struct work_struct freeze_work; struct work_struct freeze_work;
struct work_struct link_downgrade_work; struct work_struct link_downgrade_work;
...@@ -1258,7 +1257,7 @@ void receive_interrupt_work(struct work_struct *work); ...@@ -1258,7 +1257,7 @@ void receive_interrupt_work(struct work_struct *work);
static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf) static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf)
{ {
return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) | return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) |
((!!(rhf & RHF_DC_INFO_MASK)) << 4); ((!!(rhf & RHF_DC_INFO_SMASK)) << 4);
} }
static inline u16 generate_jkey(kuid_t uid) static inline u16 generate_jkey(kuid_t uid)
...@@ -1333,6 +1332,9 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, ...@@ -1333,6 +1332,9 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
u32 pkey, u32 slid, u32 dlid, u8 sc5, u32 pkey, u32 slid, u32 dlid, u8 sc5,
const struct ib_grh *old_grh); const struct ib_grh *old_grh);
#define PKEY_CHECK_INVALID -1
int egress_pkey_check(struct hfi1_pportdata *ppd, __be16 *lrh, __be32 *bth,
u8 sc5, int8_t s_pkey_index);
#define PACKET_EGRESS_TIMEOUT 350 #define PACKET_EGRESS_TIMEOUT 350
static inline void pause_for_credit_return(struct hfi1_devdata *dd) static inline void pause_for_credit_return(struct hfi1_devdata *dd)
...@@ -1776,6 +1778,7 @@ extern struct mutex hfi1_mutex; ...@@ -1776,6 +1778,7 @@ extern struct mutex hfi1_mutex;
#define HFI1_PKT_USER_SC_INTEGRITY \ #define HFI1_PKT_USER_SC_INTEGRITY \
(SEND_CTXT_CHECK_ENABLE_DISALLOW_NON_KDETH_PACKETS_SMASK \ (SEND_CTXT_CHECK_ENABLE_DISALLOW_NON_KDETH_PACKETS_SMASK \
| SEND_CTXT_CHECK_ENABLE_DISALLOW_KDETH_PACKETS_SMASK \
| SEND_CTXT_CHECK_ENABLE_DISALLOW_BYPASS_SMASK \ | SEND_CTXT_CHECK_ENABLE_DISALLOW_BYPASS_SMASK \
| SEND_CTXT_CHECK_ENABLE_DISALLOW_GRH_SMASK) | SEND_CTXT_CHECK_ENABLE_DISALLOW_GRH_SMASK)
......
...@@ -422,9 +422,10 @@ static enum hrtimer_restart cca_timer_fn(struct hrtimer *t) ...@@ -422,9 +422,10 @@ static enum hrtimer_restart cca_timer_fn(struct hrtimer *t)
struct cca_timer *cca_timer; struct cca_timer *cca_timer;
struct hfi1_pportdata *ppd; struct hfi1_pportdata *ppd;
int sl; int sl;
u16 ccti, ccti_timer, ccti_min; u16 ccti_timer, ccti_min;
struct cc_state *cc_state; struct cc_state *cc_state;
unsigned long flags; unsigned long flags;
enum hrtimer_restart ret = HRTIMER_NORESTART;
cca_timer = container_of(t, struct cca_timer, hrtimer); cca_timer = container_of(t, struct cca_timer, hrtimer);
ppd = cca_timer->ppd; ppd = cca_timer->ppd;
...@@ -450,24 +451,21 @@ static enum hrtimer_restart cca_timer_fn(struct hrtimer *t) ...@@ -450,24 +451,21 @@ static enum hrtimer_restart cca_timer_fn(struct hrtimer *t)
spin_lock_irqsave(&ppd->cca_timer_lock, flags); spin_lock_irqsave(&ppd->cca_timer_lock, flags);
ccti = cca_timer->ccti; if (cca_timer->ccti > ccti_min) {
if (ccti > ccti_min) {
cca_timer->ccti--; cca_timer->ccti--;
set_link_ipg(ppd); set_link_ipg(ppd);
} }
spin_unlock_irqrestore(&ppd->cca_timer_lock, flags); if (cca_timer->ccti > ccti_min) {
rcu_read_unlock();
if (ccti > ccti_min) {
unsigned long nsec = 1024 * ccti_timer; unsigned long nsec = 1024 * ccti_timer;
/* ccti_timer is in units of 1.024 usec */ /* ccti_timer is in units of 1.024 usec */
hrtimer_forward_now(t, ns_to_ktime(nsec)); hrtimer_forward_now(t, ns_to_ktime(nsec));
return HRTIMER_RESTART; ret = HRTIMER_RESTART;
} }
return HRTIMER_NORESTART;
spin_unlock_irqrestore(&ppd->cca_timer_lock, flags);
rcu_read_unlock();
return ret;
} }
/* /*
...@@ -496,7 +494,6 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, ...@@ -496,7 +494,6 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
INIT_WORK(&ppd->link_vc_work, handle_verify_cap); INIT_WORK(&ppd->link_vc_work, handle_verify_cap);
INIT_WORK(&ppd->link_up_work, handle_link_up); INIT_WORK(&ppd->link_up_work, handle_link_up);
INIT_WORK(&ppd->link_down_work, handle_link_down); INIT_WORK(&ppd->link_down_work, handle_link_down);
INIT_WORK(&ppd->dc_host_req_work, handle_8051_request);
INIT_WORK(&ppd->freeze_work, handle_freeze); INIT_WORK(&ppd->freeze_work, handle_freeze);
INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade); INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade);
INIT_WORK(&ppd->sma_message_work, handle_sma_message); INIT_WORK(&ppd->sma_message_work, handle_sma_message);
...@@ -1007,7 +1004,7 @@ void hfi1_free_devdata(struct hfi1_devdata *dd) ...@@ -1007,7 +1004,7 @@ void hfi1_free_devdata(struct hfi1_devdata *dd)
free_percpu(dd->rcv_limit); free_percpu(dd->rcv_limit);
hfi1_dev_affinity_free(dd); hfi1_dev_affinity_free(dd);
free_percpu(dd->send_schedule); free_percpu(dd->send_schedule);
ib_dealloc_device(&dd->verbs_dev.rdi.ibdev); rvt_dealloc_device(&dd->verbs_dev.rdi);
} }
/* /*
...@@ -1110,7 +1107,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) ...@@ -1110,7 +1107,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
bail: bail:
if (!list_empty(&dd->list)) if (!list_empty(&dd->list))
list_del_init(&dd->list); list_del_init(&dd->list);
ib_dealloc_device(&dd->verbs_dev.rdi.ibdev); rvt_dealloc_device(&dd->verbs_dev.rdi);
return ERR_PTR(ret); return ERR_PTR(ret);
} }
......
...@@ -999,7 +999,21 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, ...@@ -999,7 +999,21 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
break; break;
} }
if ((link_state == HLS_DN_POLL ||
link_state == HLS_DN_DOWNDEF)) {
/*
* Going to poll. No matter what the current state,
* always move offline first, then tune and start the
* link. This correctly handles a FM link bounce and
* a link enable. Going offline is a no-op if already
* offline.
*/
set_link_state(ppd, HLS_DN_OFFLINE);
tune_serdes(ppd);
start_link(ppd);
} else {
set_link_state(ppd, link_state); set_link_state(ppd, link_state);
}
if (link_state == HLS_DN_DISABLE && if (link_state == HLS_DN_DISABLE &&
(ppd->offline_disabled_reason > (ppd->offline_disabled_reason >
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) || HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) ||
......
...@@ -91,7 +91,7 @@ static unsigned long mmu_node_start(struct mmu_rb_node *node) ...@@ -91,7 +91,7 @@ static unsigned long mmu_node_start(struct mmu_rb_node *node)
static unsigned long mmu_node_last(struct mmu_rb_node *node) static unsigned long mmu_node_last(struct mmu_rb_node *node)
{ {
return PAGE_ALIGN((node->addr & PAGE_MASK) + node->len) - 1; return PAGE_ALIGN(node->addr + node->len) - 1;
} }
int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops) int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops)
...@@ -126,10 +126,15 @@ void hfi1_mmu_rb_unregister(struct rb_root *root) ...@@ -126,10 +126,15 @@ void hfi1_mmu_rb_unregister(struct rb_root *root)
if (!handler) if (!handler)
return; return;
/* Unregister first so we don't get any more notifications. */
if (current->mm)
mmu_notifier_unregister(&handler->mn, current->mm);
spin_lock_irqsave(&mmu_rb_lock, flags); spin_lock_irqsave(&mmu_rb_lock, flags);
list_del(&handler->list); list_del(&handler->list);
spin_unlock_irqrestore(&mmu_rb_lock, flags); spin_unlock_irqrestore(&mmu_rb_lock, flags);
spin_lock_irqsave(&handler->lock, flags);
if (!RB_EMPTY_ROOT(root)) { if (!RB_EMPTY_ROOT(root)) {
struct rb_node *node; struct rb_node *node;
struct mmu_rb_node *rbnode; struct mmu_rb_node *rbnode;
...@@ -141,9 +146,8 @@ void hfi1_mmu_rb_unregister(struct rb_root *root) ...@@ -141,9 +146,8 @@ void hfi1_mmu_rb_unregister(struct rb_root *root)
handler->ops->remove(root, rbnode, NULL); handler->ops->remove(root, rbnode, NULL);
} }
} }
spin_unlock_irqrestore(&handler->lock, flags);
if (current->mm)
mmu_notifier_unregister(&handler->mn, current->mm);
kfree(handler); kfree(handler);
} }
...@@ -235,6 +239,25 @@ struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr, ...@@ -235,6 +239,25 @@ struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr,
return node; return node;
} }
struct mmu_rb_node *hfi1_mmu_rb_extract(struct rb_root *root,
unsigned long addr, unsigned long len)
{
struct mmu_rb_handler *handler = find_mmu_handler(root);
struct mmu_rb_node *node;
unsigned long flags;
if (!handler)
return ERR_PTR(-EINVAL);
spin_lock_irqsave(&handler->lock, flags);
node = __mmu_rb_search(handler, addr, len);
if (node)
__mmu_int_rb_remove(node, handler->root);
spin_unlock_irqrestore(&handler->lock, flags);
return node;
}
void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node) void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node)
{ {
struct mmu_rb_handler *handler = find_mmu_handler(root); struct mmu_rb_handler *handler = find_mmu_handler(root);
...@@ -293,9 +316,9 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, ...@@ -293,9 +316,9 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn,
hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u", hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u",
node->addr, node->len); node->addr, node->len);
if (handler->ops->invalidate(root, node)) { if (handler->ops->invalidate(root, node)) {
spin_unlock_irqrestore(&handler->lock, flags); __mmu_int_rb_remove(node, root);
__mmu_rb_remove(handler, node, mm); if (handler->ops->remove)
spin_lock_irqsave(&handler->lock, flags); handler->ops->remove(root, node, mm);
} }
} }
spin_unlock_irqrestore(&handler->lock, flags); spin_unlock_irqrestore(&handler->lock, flags);
......
...@@ -70,5 +70,7 @@ int hfi1_mmu_rb_insert(struct rb_root *, struct mmu_rb_node *); ...@@ -70,5 +70,7 @@ int hfi1_mmu_rb_insert(struct rb_root *, struct mmu_rb_node *);
void hfi1_mmu_rb_remove(struct rb_root *, struct mmu_rb_node *); void hfi1_mmu_rb_remove(struct rb_root *, struct mmu_rb_node *);
struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *, unsigned long, struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *, unsigned long,
unsigned long); unsigned long);
struct mmu_rb_node *hfi1_mmu_rb_extract(struct rb_root *, unsigned long,
unsigned long);
#endif /* _HFI1_MMU_RB_H */ #endif /* _HFI1_MMU_RB_H */
...@@ -139,23 +139,30 @@ void pio_send_control(struct hfi1_devdata *dd, int op) ...@@ -139,23 +139,30 @@ void pio_send_control(struct hfi1_devdata *dd, int op)
/* Send Context Size (SCS) wildcards */ /* Send Context Size (SCS) wildcards */
#define SCS_POOL_0 -1 #define SCS_POOL_0 -1
#define SCS_POOL_1 -2 #define SCS_POOL_1 -2
/* Send Context Count (SCC) wildcards */ /* Send Context Count (SCC) wildcards */
#define SCC_PER_VL -1 #define SCC_PER_VL -1
#define SCC_PER_CPU -2 #define SCC_PER_CPU -2
#define SCC_PER_KRCVQ -3 #define SCC_PER_KRCVQ -3
#define SCC_ACK_CREDITS 32
/* Send Context Size (SCS) constants */
#define SCS_ACK_CREDITS 32
#define SCS_VL15_CREDITS 102 /* 3 pkts of 2048B data + 128B header */
#define PIO_THRESHOLD_CEILING 4096
#define PIO_WAIT_BATCH_SIZE 5 #define PIO_WAIT_BATCH_SIZE 5
/* default send context sizes */ /* default send context sizes */
static struct sc_config_sizes sc_config_sizes[SC_MAX] = { static struct sc_config_sizes sc_config_sizes[SC_MAX] = {
[SC_KERNEL] = { .size = SCS_POOL_0, /* even divide, pool 0 */ [SC_KERNEL] = { .size = SCS_POOL_0, /* even divide, pool 0 */
.count = SCC_PER_VL },/* one per NUMA */ .count = SCC_PER_VL }, /* one per NUMA */
[SC_ACK] = { .size = SCC_ACK_CREDITS, [SC_ACK] = { .size = SCS_ACK_CREDITS,
.count = SCC_PER_KRCVQ }, .count = SCC_PER_KRCVQ },
[SC_USER] = { .size = SCS_POOL_0, /* even divide, pool 0 */ [SC_USER] = { .size = SCS_POOL_0, /* even divide, pool 0 */
.count = SCC_PER_CPU }, /* one per CPU */ .count = SCC_PER_CPU }, /* one per CPU */
[SC_VL15] = { .size = SCS_VL15_CREDITS,
.count = 1 },
}; };
...@@ -202,7 +209,8 @@ static int wildcard_to_pool(int wc) ...@@ -202,7 +209,8 @@ static int wildcard_to_pool(int wc)
static const char *sc_type_names[SC_MAX] = { static const char *sc_type_names[SC_MAX] = {
"kernel", "kernel",
"ack", "ack",
"user" "user",
"vl15"
}; };
static const char *sc_type_name(int index) static const char *sc_type_name(int index)
...@@ -230,6 +238,22 @@ int init_sc_pools_and_sizes(struct hfi1_devdata *dd) ...@@ -230,6 +238,22 @@ int init_sc_pools_and_sizes(struct hfi1_devdata *dd)
int extra; int extra;
int i; int i;
/*
* When SDMA is enabled, kernel context pio packet size is capped by
* "piothreshold". Reduce pio buffer allocation for kernel context by
* setting it to a fixed size. The allocation allows 3-deep buffering
* of the largest pio packets plus up to 128 bytes header, sufficient
* to maintain verbs performance.
*
* When SDMA is disabled, keep the default pooling allocation.
*/
if (HFI1_CAP_IS_KSET(SDMA)) {
u16 max_pkt_size = (piothreshold < PIO_THRESHOLD_CEILING) ?
piothreshold : PIO_THRESHOLD_CEILING;
sc_config_sizes[SC_KERNEL].size =
3 * (max_pkt_size + 128) / PIO_BLOCK_SIZE;
}
/* /*
* Step 0: * Step 0:
* - copy the centipercents/absolute sizes from the pool config * - copy the centipercents/absolute sizes from the pool config
...@@ -311,7 +335,7 @@ int init_sc_pools_and_sizes(struct hfi1_devdata *dd) ...@@ -311,7 +335,7 @@ int init_sc_pools_and_sizes(struct hfi1_devdata *dd)
if (i == SC_ACK) { if (i == SC_ACK) {
count = dd->n_krcv_queues; count = dd->n_krcv_queues;
} else if (i == SC_KERNEL) { } else if (i == SC_KERNEL) {
count = (INIT_SC_PER_VL * num_vls) + 1 /* VL15 */; count = INIT_SC_PER_VL * num_vls;
} else if (count == SCC_PER_CPU) { } else if (count == SCC_PER_CPU) {
count = dd->num_rcv_contexts - dd->n_krcv_queues; count = dd->num_rcv_contexts - dd->n_krcv_queues;
} else if (count < 0) { } else if (count < 0) {
...@@ -596,7 +620,7 @@ u32 sc_mtu_to_threshold(struct send_context *sc, u32 mtu, u32 hdrqentsize) ...@@ -596,7 +620,7 @@ u32 sc_mtu_to_threshold(struct send_context *sc, u32 mtu, u32 hdrqentsize)
* Return value is what to write into the CSR: trigger return when * Return value is what to write into the CSR: trigger return when
* unreturned credits pass this count. * unreturned credits pass this count.
*/ */
static u32 sc_percent_to_threshold(struct send_context *sc, u32 percent) u32 sc_percent_to_threshold(struct send_context *sc, u32 percent)
{ {
return (sc->credits * percent) / 100; return (sc->credits * percent) / 100;
} }
...@@ -790,7 +814,10 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, ...@@ -790,7 +814,10 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
* For Ack contexts, set a threshold for half the credits. * For Ack contexts, set a threshold for half the credits.
* For User contexts use the given percentage. This has been * For User contexts use the given percentage. This has been
* sanitized on driver start-up. * sanitized on driver start-up.
* For Kernel contexts, use the default MTU plus a header. * For Kernel contexts, use the default MTU plus a header
* or half the credits, whichever is smaller. This should
* work for both the 3-deep buffering allocation and the
* pooling allocation.
*/ */
if (type == SC_ACK) { if (type == SC_ACK) {
thresh = sc_percent_to_threshold(sc, 50); thresh = sc_percent_to_threshold(sc, 50);
...@@ -798,7 +825,9 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, ...@@ -798,7 +825,9 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
thresh = sc_percent_to_threshold(sc, thresh = sc_percent_to_threshold(sc,
user_credit_return_threshold); user_credit_return_threshold);
} else { /* kernel */ } else { /* kernel */
thresh = sc_mtu_to_threshold(sc, hfi1_max_mtu, hdrqentsize); thresh = min(sc_percent_to_threshold(sc, 50),
sc_mtu_to_threshold(sc, hfi1_max_mtu,
hdrqentsize));
} }
reg = thresh << SC(CREDIT_CTRL_THRESHOLD_SHIFT); reg = thresh << SC(CREDIT_CTRL_THRESHOLD_SHIFT);
/* add in early return */ /* add in early return */
...@@ -1531,7 +1560,8 @@ static void sc_piobufavail(struct send_context *sc) ...@@ -1531,7 +1560,8 @@ static void sc_piobufavail(struct send_context *sc)
unsigned long flags; unsigned long flags;
unsigned i, n = 0; unsigned i, n = 0;
if (dd->send_contexts[sc->sw_index].type != SC_KERNEL) if (dd->send_contexts[sc->sw_index].type != SC_KERNEL &&
dd->send_contexts[sc->sw_index].type != SC_VL15)
return; return;
list = &sc->piowait; list = &sc->piowait;
/* /*
...@@ -1900,7 +1930,7 @@ int init_pervl_scs(struct hfi1_devdata *dd) ...@@ -1900,7 +1930,7 @@ int init_pervl_scs(struct hfi1_devdata *dd)
u32 ctxt; u32 ctxt;
struct hfi1_pportdata *ppd = dd->pport; struct hfi1_pportdata *ppd = dd->pport;
dd->vld[15].sc = sc_alloc(dd, SC_KERNEL, dd->vld[15].sc = sc_alloc(dd, SC_VL15,
dd->rcd[0]->rcvhdrqentsize, dd->node); dd->rcd[0]->rcvhdrqentsize, dd->node);
if (!dd->vld[15].sc) if (!dd->vld[15].sc)
goto nomem; goto nomem;
......
...@@ -51,7 +51,8 @@ ...@@ -51,7 +51,8 @@
#define SC_KERNEL 0 #define SC_KERNEL 0
#define SC_ACK 1 #define SC_ACK 1
#define SC_USER 2 #define SC_USER 2
#define SC_MAX 3 #define SC_VL15 3
#define SC_MAX 4
/* invalid send context index */ /* invalid send context index */
#define INVALID_SCI 0xff #define INVALID_SCI 0xff
...@@ -293,6 +294,7 @@ void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context); ...@@ -293,6 +294,7 @@ void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context);
void sc_add_credit_return_intr(struct send_context *sc); void sc_add_credit_return_intr(struct send_context *sc);
void sc_del_credit_return_intr(struct send_context *sc); void sc_del_credit_return_intr(struct send_context *sc);
void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold); void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold);
u32 sc_percent_to_threshold(struct send_context *sc, u32 percent);
u32 sc_mtu_to_threshold(struct send_context *sc, u32 mtu, u32 hdrqentsize); u32 sc_mtu_to_threshold(struct send_context *sc, u32 mtu, u32 hdrqentsize);
void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint); void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint);
void sc_wait(struct hfi1_devdata *dd); void sc_wait(struct hfi1_devdata *dd);
......
...@@ -114,21 +114,11 @@ static int qual_power(struct hfi1_pportdata *ppd) ...@@ -114,21 +114,11 @@ static int qual_power(struct hfi1_pportdata *ppd)
if (ret) if (ret)
return ret; return ret;
if (QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]) != 4) cable_power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]);
cable_power_class = QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]);
else
cable_power_class = QSFP_PWR(cache[QSFP_MOD_PWR_OFFS]);
if (cable_power_class <= 3 && cable_power_class > (power_class_max - 1)) if (cable_power_class > power_class_max)
ppd->offline_disabled_reason =
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY);
else if (cable_power_class > 4 && cable_power_class > (power_class_max))
ppd->offline_disabled_reason = ppd->offline_disabled_reason =
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY); HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY);
/*
* cable_power_class will never have value 4 as this simply
* means the high power settings are unused
*/
if (ppd->offline_disabled_reason == if (ppd->offline_disabled_reason ==
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY)) { HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY)) {
...@@ -173,12 +163,9 @@ static int set_qsfp_high_power(struct hfi1_pportdata *ppd) ...@@ -173,12 +163,9 @@ static int set_qsfp_high_power(struct hfi1_pportdata *ppd)
u8 *cache = ppd->qsfp_info.cache; u8 *cache = ppd->qsfp_info.cache;
int ret; int ret;
if (QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]) != 4) cable_power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]);
cable_power_class = QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]);
else
cable_power_class = QSFP_PWR(cache[QSFP_MOD_PWR_OFFS]);
if (cable_power_class) { if (cable_power_class > QSFP_POWER_CLASS_1) {
power_ctrl_byte = cache[QSFP_PWR_CTRL_BYTE_OFFS]; power_ctrl_byte = cache[QSFP_PWR_CTRL_BYTE_OFFS];
power_ctrl_byte |= 1; power_ctrl_byte |= 1;
...@@ -190,8 +177,7 @@ static int set_qsfp_high_power(struct hfi1_pportdata *ppd) ...@@ -190,8 +177,7 @@ static int set_qsfp_high_power(struct hfi1_pportdata *ppd)
if (ret != 1) if (ret != 1)
return -EIO; return -EIO;
if (cable_power_class > 3) { if (cable_power_class > QSFP_POWER_CLASS_4) {
/* > power class 4*/
power_ctrl_byte |= (1 << 2); power_ctrl_byte |= (1 << 2);
ret = qsfp_write(ppd, ppd->dd->hfi1_id, ret = qsfp_write(ppd, ppd->dd->hfi1_id,
QSFP_PWR_CTRL_BYTE_OFFS, QSFP_PWR_CTRL_BYTE_OFFS,
...@@ -212,12 +198,21 @@ static void apply_rx_cdr(struct hfi1_pportdata *ppd, ...@@ -212,12 +198,21 @@ static void apply_rx_cdr(struct hfi1_pportdata *ppd,
{ {
u32 rx_preset; u32 rx_preset;
u8 *cache = ppd->qsfp_info.cache; u8 *cache = ppd->qsfp_info.cache;
int cable_power_class;
if (!((cache[QSFP_MOD_PWR_OFFS] & 0x4) && if (!((cache[QSFP_MOD_PWR_OFFS] & 0x4) &&
(cache[QSFP_CDR_INFO_OFFS] & 0x40))) (cache[QSFP_CDR_INFO_OFFS] & 0x40)))
return; return;
/* rx_preset preset to zero to catch error */ /* RX CDR present, bypass supported */
cable_power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]);
if (cable_power_class <= QSFP_POWER_CLASS_3) {
/* Power class <= 3, ignore config & turn RX CDR on */
*cdr_ctrl_byte |= 0xF;
return;
}
get_platform_config_field( get_platform_config_field(
ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE, ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE,
rx_preset_index, RX_PRESET_TABLE_QSFP_RX_CDR_APPLY, rx_preset_index, RX_PRESET_TABLE_QSFP_RX_CDR_APPLY,
...@@ -250,15 +245,25 @@ static void apply_rx_cdr(struct hfi1_pportdata *ppd, ...@@ -250,15 +245,25 @@ static void apply_rx_cdr(struct hfi1_pportdata *ppd,
static void apply_tx_cdr(struct hfi1_pportdata *ppd, static void apply_tx_cdr(struct hfi1_pportdata *ppd,
u32 tx_preset_index, u32 tx_preset_index,
u8 *ctr_ctrl_byte) u8 *cdr_ctrl_byte)
{ {
u32 tx_preset; u32 tx_preset;
u8 *cache = ppd->qsfp_info.cache; u8 *cache = ppd->qsfp_info.cache;
int cable_power_class;
if (!((cache[QSFP_MOD_PWR_OFFS] & 0x8) && if (!((cache[QSFP_MOD_PWR_OFFS] & 0x8) &&
(cache[QSFP_CDR_INFO_OFFS] & 0x80))) (cache[QSFP_CDR_INFO_OFFS] & 0x80)))
return; return;
/* TX CDR present, bypass supported */
cable_power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]);
if (cable_power_class <= QSFP_POWER_CLASS_3) {
/* Power class <= 3, ignore config & turn TX CDR on */
*cdr_ctrl_byte |= 0xF0;
return;
}
get_platform_config_field( get_platform_config_field(
ppd->dd, ppd->dd,
PLATFORM_CONFIG_TX_PRESET_TABLE, tx_preset_index, PLATFORM_CONFIG_TX_PRESET_TABLE, tx_preset_index,
...@@ -282,10 +287,10 @@ static void apply_tx_cdr(struct hfi1_pportdata *ppd, ...@@ -282,10 +287,10 @@ static void apply_tx_cdr(struct hfi1_pportdata *ppd,
(tx_preset << 2) | (tx_preset << 3)); (tx_preset << 2) | (tx_preset << 3));
if (tx_preset) if (tx_preset)
*ctr_ctrl_byte |= (tx_preset << 4); *cdr_ctrl_byte |= (tx_preset << 4);
else else
/* Preserve current/determined RX CDR status */ /* Preserve current/determined RX CDR status */
*ctr_ctrl_byte &= ((tx_preset << 4) | 0xF); *cdr_ctrl_byte &= ((tx_preset << 4) | 0xF);
} }
static void apply_cdr_settings( static void apply_cdr_settings(
...@@ -598,6 +603,7 @@ static void apply_tunings( ...@@ -598,6 +603,7 @@ static void apply_tunings(
"Applying TX settings"); "Applying TX settings");
} }
/* Must be holding the QSFP i2c resource */
static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset, static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
u32 *ptr_rx_preset, u32 *ptr_total_atten) u32 *ptr_rx_preset, u32 *ptr_total_atten)
{ {
...@@ -605,26 +611,19 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset, ...@@ -605,26 +611,19 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
u16 lss = ppd->link_speed_supported, lse = ppd->link_speed_enabled; u16 lss = ppd->link_speed_supported, lse = ppd->link_speed_enabled;
u8 *cache = ppd->qsfp_info.cache; u8 *cache = ppd->qsfp_info.cache;
ret = acquire_chip_resource(ppd->dd, qsfp_resource(ppd->dd), QSFP_WAIT);
if (ret) {
dd_dev_err(ppd->dd, "%s: hfi%d: cannot lock i2c chain\n",
__func__, (int)ppd->dd->hfi1_id);
return ret;
}
ppd->qsfp_info.limiting_active = 1; ppd->qsfp_info.limiting_active = 1;
ret = set_qsfp_tx(ppd, 0); ret = set_qsfp_tx(ppd, 0);
if (ret) if (ret)
goto bail_unlock; return ret;
ret = qual_power(ppd); ret = qual_power(ppd);
if (ret) if (ret)
goto bail_unlock; return ret;
ret = qual_bitrate(ppd); ret = qual_bitrate(ppd);
if (ret) if (ret)
goto bail_unlock; return ret;
if (ppd->qsfp_info.reset_needed) { if (ppd->qsfp_info.reset_needed) {
reset_qsfp(ppd); reset_qsfp(ppd);
...@@ -636,7 +635,7 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset, ...@@ -636,7 +635,7 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
ret = set_qsfp_high_power(ppd); ret = set_qsfp_high_power(ppd);
if (ret) if (ret)
goto bail_unlock; return ret;
if (cache[QSFP_EQ_INFO_OFFS] & 0x4) { if (cache[QSFP_EQ_INFO_OFFS] & 0x4) {
ret = get_platform_config_field( ret = get_platform_config_field(
...@@ -646,7 +645,7 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset, ...@@ -646,7 +645,7 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
ptr_tx_preset, 4); ptr_tx_preset, 4);
if (ret) { if (ret) {
*ptr_tx_preset = OPA_INVALID_INDEX; *ptr_tx_preset = OPA_INVALID_INDEX;
goto bail_unlock; return ret;
} }
} else { } else {
ret = get_platform_config_field( ret = get_platform_config_field(
...@@ -656,7 +655,7 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset, ...@@ -656,7 +655,7 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
ptr_tx_preset, 4); ptr_tx_preset, 4);
if (ret) { if (ret) {
*ptr_tx_preset = OPA_INVALID_INDEX; *ptr_tx_preset = OPA_INVALID_INDEX;
goto bail_unlock; return ret;
} }
} }
...@@ -665,7 +664,7 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset, ...@@ -665,7 +664,7 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
PORT_TABLE_RX_PRESET_IDX, ptr_rx_preset, 4); PORT_TABLE_RX_PRESET_IDX, ptr_rx_preset, 4);
if (ret) { if (ret) {
*ptr_rx_preset = OPA_INVALID_INDEX; *ptr_rx_preset = OPA_INVALID_INDEX;
goto bail_unlock; return ret;
} }
if ((lss & OPA_LINK_SPEED_25G) && (lse & OPA_LINK_SPEED_25G)) if ((lss & OPA_LINK_SPEED_25G) && (lse & OPA_LINK_SPEED_25G))
...@@ -685,8 +684,6 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset, ...@@ -685,8 +684,6 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
ret = set_qsfp_tx(ppd, 1); ret = set_qsfp_tx(ppd, 1);
bail_unlock:
release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
return ret; return ret;
} }
...@@ -833,12 +830,22 @@ void tune_serdes(struct hfi1_pportdata *ppd) ...@@ -833,12 +830,22 @@ void tune_serdes(struct hfi1_pportdata *ppd)
total_atten = platform_atten + remote_atten; total_atten = platform_atten + remote_atten;
tuning_method = OPA_PASSIVE_TUNING; tuning_method = OPA_PASSIVE_TUNING;
} else } else {
ppd->offline_disabled_reason = ppd->offline_disabled_reason =
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_CHASSIS_CONFIG); HFI1_ODR_MASK(OPA_LINKDOWN_REASON_CHASSIS_CONFIG);
goto bail;
}
break; break;
case PORT_TYPE_QSFP: case PORT_TYPE_QSFP:
if (qsfp_mod_present(ppd)) { if (qsfp_mod_present(ppd)) {
ret = acquire_chip_resource(ppd->dd,
qsfp_resource(ppd->dd),
QSFP_WAIT);
if (ret) {
dd_dev_err(ppd->dd, "%s: hfi%d: cannot lock i2c chain\n",
__func__, (int)ppd->dd->hfi1_id);
goto bail;
}
refresh_qsfp_cache(ppd, &ppd->qsfp_info); refresh_qsfp_cache(ppd, &ppd->qsfp_info);
if (ppd->qsfp_info.cache_valid) { if (ppd->qsfp_info.cache_valid) {
...@@ -853,21 +860,23 @@ void tune_serdes(struct hfi1_pportdata *ppd) ...@@ -853,21 +860,23 @@ void tune_serdes(struct hfi1_pportdata *ppd)
* update the cache to reflect the changes * update the cache to reflect the changes
*/ */
refresh_qsfp_cache(ppd, &ppd->qsfp_info); refresh_qsfp_cache(ppd, &ppd->qsfp_info);
if (ret)
goto bail;
limiting_active = limiting_active =
ppd->qsfp_info.limiting_active; ppd->qsfp_info.limiting_active;
} else { } else {
dd_dev_err(dd, dd_dev_err(dd,
"%s: Reading QSFP memory failed\n", "%s: Reading QSFP memory failed\n",
__func__); __func__);
goto bail; ret = -EINVAL; /* a fail indication */
} }
} else release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
if (ret)
goto bail;
} else {
ppd->offline_disabled_reason = ppd->offline_disabled_reason =
HFI1_ODR_MASK( HFI1_ODR_MASK(
OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED); OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED);
goto bail;
}
break; break;
default: default:
dd_dev_info(ppd->dd, "%s: Unknown port type\n", __func__); dd_dev_info(ppd->dd, "%s: Unknown port type\n", __func__);
......
...@@ -167,8 +167,12 @@ static inline int opa_mtu_enum_to_int(int mtu) ...@@ -167,8 +167,12 @@ static inline int opa_mtu_enum_to_int(int mtu)
*/ */
static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu) static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
{ {
int val = opa_mtu_enum_to_int((int)mtu); int val;
/* Constraining 10KB packets to 8KB packets */
if (mtu == (enum ib_mtu)OPA_MTU_10240)
mtu = OPA_MTU_8192;
val = opa_mtu_enum_to_int((int)mtu);
if (val > 0) if (val > 0)
return val; return val;
return ib_mtu_enum_to_int(mtu); return ib_mtu_enum_to_int(mtu);
......
...@@ -96,7 +96,7 @@ int i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, ...@@ -96,7 +96,7 @@ int i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset,
{ {
int ret; int ret;
if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__)) if (!check_chip_resource(ppd->dd, i2c_target(target), __func__))
return -EACCES; return -EACCES;
/* make sure the TWSI bus is in a sane state */ /* make sure the TWSI bus is in a sane state */
...@@ -162,7 +162,7 @@ int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, ...@@ -162,7 +162,7 @@ int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset,
{ {
int ret; int ret;
if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__)) if (!check_chip_resource(ppd->dd, i2c_target(target), __func__))
return -EACCES; return -EACCES;
/* make sure the TWSI bus is in a sane state */ /* make sure the TWSI bus is in a sane state */
...@@ -192,7 +192,7 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, ...@@ -192,7 +192,7 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int ret; int ret;
u8 page; u8 page;
if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__)) if (!check_chip_resource(ppd->dd, i2c_target(target), __func__))
return -EACCES; return -EACCES;
/* make sure the TWSI bus is in a sane state */ /* make sure the TWSI bus is in a sane state */
...@@ -276,7 +276,7 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, ...@@ -276,7 +276,7 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int ret; int ret;
u8 page; u8 page;
if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__)) if (!check_chip_resource(ppd->dd, i2c_target(target), __func__))
return -EACCES; return -EACCES;
/* make sure the TWSI bus is in a sane state */ /* make sure the TWSI bus is in a sane state */
...@@ -355,6 +355,8 @@ int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, ...@@ -355,6 +355,8 @@ int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
* The calls to qsfp_{read,write} in this function correctly handle the * The calls to qsfp_{read,write} in this function correctly handle the
* address map difference between this mapping and the mapping implemented * address map difference between this mapping and the mapping implemented
* by those functions * by those functions
*
* The caller must be holding the QSFP i2c chain resource.
*/ */
int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp) int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp)
{ {
...@@ -371,13 +373,9 @@ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp) ...@@ -371,13 +373,9 @@ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp)
if (!qsfp_mod_present(ppd)) { if (!qsfp_mod_present(ppd)) {
ret = -ENODEV; ret = -ENODEV;
goto bail_no_release; goto bail;
} }
ret = acquire_chip_resource(ppd->dd, qsfp_resource(ppd->dd), QSFP_WAIT);
if (ret)
goto bail_no_release;
ret = qsfp_read(ppd, target, 0, cache, QSFP_PAGESIZE); ret = qsfp_read(ppd, target, 0, cache, QSFP_PAGESIZE);
if (ret != QSFP_PAGESIZE) { if (ret != QSFP_PAGESIZE) {
dd_dev_info(ppd->dd, dd_dev_info(ppd->dd,
...@@ -440,8 +438,6 @@ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp) ...@@ -440,8 +438,6 @@ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp)
} }
} }
release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags); spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
ppd->qsfp_info.cache_valid = 1; ppd->qsfp_info.cache_valid = 1;
ppd->qsfp_info.cache_refresh_required = 0; ppd->qsfp_info.cache_refresh_required = 0;
...@@ -450,8 +446,6 @@ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp) ...@@ -450,8 +446,6 @@ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp)
return 0; return 0;
bail: bail:
release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
bail_no_release:
memset(cache, 0, (QSFP_MAX_NUM_PAGES * 128)); memset(cache, 0, (QSFP_MAX_NUM_PAGES * 128));
return ret; return ret;
} }
...@@ -466,7 +460,28 @@ const char * const hfi1_qsfp_devtech[16] = { ...@@ -466,7 +460,28 @@ const char * const hfi1_qsfp_devtech[16] = {
#define QSFP_DUMP_CHUNK 16 /* Holds longest string */ #define QSFP_DUMP_CHUNK 16 /* Holds longest string */
#define QSFP_DEFAULT_HDR_CNT 224 #define QSFP_DEFAULT_HDR_CNT 224
static const char *pwr_codes = "1.5W2.0W2.5W3.5W"; #define QSFP_PWR(pbyte) (((pbyte) >> 6) & 3)
#define QSFP_HIGH_PWR(pbyte) ((pbyte) & 3)
/* For use with QSFP_HIGH_PWR macro */
#define QSFP_HIGH_PWR_UNUSED 0 /* Bits [1:0] = 00 implies low power module */
/*
* Takes power class byte [Page 00 Byte 129] in SFF 8636
* Returns power class as integer (1 through 7, per SFF 8636 rev 2.4)
*/
int get_qsfp_power_class(u8 power_byte)
{
if (QSFP_HIGH_PWR(power_byte) == QSFP_HIGH_PWR_UNUSED)
/* power classes count from 1, their bit encodings from 0 */
return (QSFP_PWR(power_byte) + 1);
/*
* 00 in the high power classes stands for unused, bringing
* balance to the off-by-1 offset above, we add 4 here to
* account for the difference between the low and high power
* groups
*/
return (QSFP_HIGH_PWR(power_byte) + 4);
}
int qsfp_mod_present(struct hfi1_pportdata *ppd) int qsfp_mod_present(struct hfi1_pportdata *ppd)
{ {
...@@ -537,6 +552,16 @@ int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len, ...@@ -537,6 +552,16 @@ int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len,
return ret; return ret;
} }
static const char *pwr_codes[8] = {"N/AW",
"1.5W",
"2.0W",
"2.5W",
"3.5W",
"4.0W",
"4.5W",
"5.0W"
};
int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len) int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len)
{ {
u8 *cache = &ppd->qsfp_info.cache[0]; u8 *cache = &ppd->qsfp_info.cache[0];
...@@ -546,6 +571,7 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len) ...@@ -546,6 +571,7 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len)
int bidx = 0; int bidx = 0;
u8 *atten = &cache[QSFP_ATTEN_OFFS]; u8 *atten = &cache[QSFP_ATTEN_OFFS];
u8 *vendor_oui = &cache[QSFP_VOUI_OFFS]; u8 *vendor_oui = &cache[QSFP_VOUI_OFFS];
u8 power_byte = 0;
sofar = 0; sofar = 0;
lenstr[0] = ' '; lenstr[0] = ' ';
...@@ -555,9 +581,9 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len) ...@@ -555,9 +581,9 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len)
if (QSFP_IS_CU(cache[QSFP_MOD_TECH_OFFS])) if (QSFP_IS_CU(cache[QSFP_MOD_TECH_OFFS]))
sprintf(lenstr, "%dM ", cache[QSFP_MOD_LEN_OFFS]); sprintf(lenstr, "%dM ", cache[QSFP_MOD_LEN_OFFS]);
power_byte = cache[QSFP_MOD_PWR_OFFS];
sofar += scnprintf(buf + sofar, len - sofar, "PWR:%.3sW\n", sofar += scnprintf(buf + sofar, len - sofar, "PWR:%.3sW\n",
pwr_codes + pwr_codes[get_qsfp_power_class(power_byte)]);
(QSFP_PWR(cache[QSFP_MOD_PWR_OFFS]) * 4));
sofar += scnprintf(buf + sofar, len - sofar, "TECH:%s%s\n", sofar += scnprintf(buf + sofar, len - sofar, "TECH:%s%s\n",
lenstr, lenstr,
......
...@@ -82,8 +82,9 @@ ...@@ -82,8 +82,9 @@
/* Byte 128 is Identifier: must be 0x0c for QSFP, or 0x0d for QSFP+ */ /* Byte 128 is Identifier: must be 0x0c for QSFP, or 0x0d for QSFP+ */
#define QSFP_MOD_ID_OFFS 128 #define QSFP_MOD_ID_OFFS 128
/* /*
* Byte 129 is "Extended Identifier". We only care about D7,D6: Power class * Byte 129 is "Extended Identifier".
* 0:1.5W, 1:2.0W, 2:2.5W, 3:3.5W * For bits [7:6]: 0:1.5W, 1:2.0W, 2:2.5W, 3:3.5W
* For bits [1:0]: 0:Unused, 1:4W, 2:4.5W, 3:5W
*/ */
#define QSFP_MOD_PWR_OFFS 129 #define QSFP_MOD_PWR_OFFS 129
/* Byte 130 is Connector type. Not Intel req'd */ /* Byte 130 is Connector type. Not Intel req'd */
...@@ -190,6 +191,9 @@ extern const char *const hfi1_qsfp_devtech[16]; ...@@ -190,6 +191,9 @@ extern const char *const hfi1_qsfp_devtech[16];
#define QSFP_HIGH_BIAS_WARNING 0x22 #define QSFP_HIGH_BIAS_WARNING 0x22
#define QSFP_LOW_BIAS_WARNING 0x11 #define QSFP_LOW_BIAS_WARNING 0x11
#define QSFP_ATTEN_SDR(attenarray) (attenarray[0])
#define QSFP_ATTEN_DDR(attenarray) (attenarray[1])
/* /*
* struct qsfp_data encapsulates state of QSFP device for one port. * struct qsfp_data encapsulates state of QSFP device for one port.
* it will be part of port-specific data if a board supports QSFP. * it will be part of port-specific data if a board supports QSFP.
...@@ -201,12 +205,6 @@ extern const char *const hfi1_qsfp_devtech[16]; ...@@ -201,12 +205,6 @@ extern const char *const hfi1_qsfp_devtech[16];
* and let the qsfp_lock arbitrate access to common resources. * and let the qsfp_lock arbitrate access to common resources.
* *
*/ */
#define QSFP_PWR(pbyte) (((pbyte) >> 6) & 3)
#define QSFP_HIGH_PWR(pbyte) (((pbyte) & 3) | 4)
#define QSFP_ATTEN_SDR(attenarray) (attenarray[0])
#define QSFP_ATTEN_DDR(attenarray) (attenarray[1])
struct qsfp_data { struct qsfp_data {
/* Helps to find our way */ /* Helps to find our way */
struct hfi1_pportdata *ppd; struct hfi1_pportdata *ppd;
...@@ -223,6 +221,7 @@ struct qsfp_data { ...@@ -223,6 +221,7 @@ struct qsfp_data {
int refresh_qsfp_cache(struct hfi1_pportdata *ppd, int refresh_qsfp_cache(struct hfi1_pportdata *ppd,
struct qsfp_data *cp); struct qsfp_data *cp);
int get_qsfp_power_class(u8 power_byte);
int qsfp_mod_present(struct hfi1_pportdata *ppd); int qsfp_mod_present(struct hfi1_pportdata *ppd);
int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr,
u32 len, u8 *data); u32 len, u8 *data);
......
...@@ -1497,7 +1497,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, ...@@ -1497,7 +1497,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
/* Ignore reserved NAK codes. */ /* Ignore reserved NAK codes. */
goto bail_stop; goto bail_stop;
} }
return ret; /* cannot be reached */
bail_stop: bail_stop:
hfi1_stop_rc_timers(qp); hfi1_stop_rc_timers(qp);
return ret; return ret;
...@@ -2021,8 +2021,6 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, ...@@ -2021,8 +2021,6 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
if (sl >= OPA_MAX_SLS) if (sl >= OPA_MAX_SLS)
return; return;
cca_timer = &ppd->cca_timer[sl];
cc_state = get_cc_state(ppd); cc_state = get_cc_state(ppd);
if (!cc_state) if (!cc_state)
...@@ -2041,6 +2039,7 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, ...@@ -2041,6 +2039,7 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
spin_lock_irqsave(&ppd->cca_timer_lock, flags); spin_lock_irqsave(&ppd->cca_timer_lock, flags);
cca_timer = &ppd->cca_timer[sl];
if (cca_timer->ccti < ccti_limit) { if (cca_timer->ccti < ccti_limit) {
if (cca_timer->ccti + ccti_incr <= ccti_limit) if (cca_timer->ccti + ccti_incr <= ccti_limit)
cca_timer->ccti += ccti_incr; cca_timer->ccti += ccti_incr;
...@@ -2049,8 +2048,6 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, ...@@ -2049,8 +2048,6 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
set_link_ipg(ppd); set_link_ipg(ppd);
} }
spin_unlock_irqrestore(&ppd->cca_timer_lock, flags);
ccti = cca_timer->ccti; ccti = cca_timer->ccti;
if (!hrtimer_active(&cca_timer->hrtimer)) { if (!hrtimer_active(&cca_timer->hrtimer)) {
...@@ -2061,6 +2058,8 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, ...@@ -2061,6 +2058,8 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
HRTIMER_MODE_REL); HRTIMER_MODE_REL);
} }
spin_unlock_irqrestore(&ppd->cca_timer_lock, flags);
if ((trigger_threshold != 0) && (ccti >= trigger_threshold)) if ((trigger_threshold != 0) && (ccti >= trigger_threshold))
log_cca_event(ppd, sl, rlid, lqpn, rqpn, svc_type); log_cca_event(ppd, sl, rlid, lqpn, rqpn, svc_type);
} }
......
...@@ -831,7 +831,6 @@ void hfi1_do_send(struct rvt_qp *qp) ...@@ -831,7 +831,6 @@ void hfi1_do_send(struct rvt_qp *qp)
struct hfi1_pkt_state ps; struct hfi1_pkt_state ps;
struct hfi1_qp_priv *priv = qp->priv; struct hfi1_qp_priv *priv = qp->priv;
int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps); int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
unsigned long flags;
unsigned long timeout; unsigned long timeout;
unsigned long timeout_int; unsigned long timeout_int;
int cpu; int cpu;
...@@ -866,11 +865,11 @@ void hfi1_do_send(struct rvt_qp *qp) ...@@ -866,11 +865,11 @@ void hfi1_do_send(struct rvt_qp *qp)
timeout_int = SEND_RESCHED_TIMEOUT; timeout_int = SEND_RESCHED_TIMEOUT;
} }
spin_lock_irqsave(&qp->s_lock, flags); spin_lock_irqsave(&qp->s_lock, ps.flags);
/* Return if we are already busy processing a work request. */ /* Return if we are already busy processing a work request. */
if (!hfi1_send_ok(qp)) { if (!hfi1_send_ok(qp)) {
spin_unlock_irqrestore(&qp->s_lock, flags); spin_unlock_irqrestore(&qp->s_lock, ps.flags);
return; return;
} }
...@@ -884,7 +883,7 @@ void hfi1_do_send(struct rvt_qp *qp) ...@@ -884,7 +883,7 @@ void hfi1_do_send(struct rvt_qp *qp)
do { do {
/* Check for a constructed packet to be sent. */ /* Check for a constructed packet to be sent. */
if (qp->s_hdrwords != 0) { if (qp->s_hdrwords != 0) {
spin_unlock_irqrestore(&qp->s_lock, flags); spin_unlock_irqrestore(&qp->s_lock, ps.flags);
/* /*
* If the packet cannot be sent now, return and * If the packet cannot be sent now, return and
* the send tasklet will be woken up later. * the send tasklet will be woken up later.
...@@ -897,11 +896,14 @@ void hfi1_do_send(struct rvt_qp *qp) ...@@ -897,11 +896,14 @@ void hfi1_do_send(struct rvt_qp *qp)
if (unlikely(time_after(jiffies, timeout))) { if (unlikely(time_after(jiffies, timeout))) {
if (workqueue_congested(cpu, if (workqueue_congested(cpu,
ps.ppd->hfi1_wq)) { ps.ppd->hfi1_wq)) {
spin_lock_irqsave(&qp->s_lock, flags); spin_lock_irqsave(
&qp->s_lock,
ps.flags);
qp->s_flags &= ~RVT_S_BUSY; qp->s_flags &= ~RVT_S_BUSY;
hfi1_schedule_send(qp); hfi1_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, spin_unlock_irqrestore(
flags); &qp->s_lock,
ps.flags);
this_cpu_inc( this_cpu_inc(
*ps.ppd->dd->send_schedule); *ps.ppd->dd->send_schedule);
return; return;
...@@ -913,11 +915,11 @@ void hfi1_do_send(struct rvt_qp *qp) ...@@ -913,11 +915,11 @@ void hfi1_do_send(struct rvt_qp *qp)
} }
timeout = jiffies + (timeout_int) / 8; timeout = jiffies + (timeout_int) / 8;
} }
spin_lock_irqsave(&qp->s_lock, flags); spin_lock_irqsave(&qp->s_lock, ps.flags);
} }
} while (make_req(qp, &ps)); } while (make_req(qp, &ps));
spin_unlock_irqrestore(&qp->s_lock, flags); spin_unlock_irqrestore(&qp->s_lock, ps.flags);
} }
/* /*
......
...@@ -84,7 +84,7 @@ static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj, ...@@ -84,7 +84,7 @@ static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj,
rcu_read_unlock(); rcu_read_unlock();
return -EINVAL; return -EINVAL;
} }
memcpy(buf, &cc_state->cct, count); memcpy(buf, (void *)&cc_state->cct + pos, count);
rcu_read_unlock(); rcu_read_unlock();
return count; return count;
...@@ -131,7 +131,7 @@ static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj, ...@@ -131,7 +131,7 @@ static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj,
rcu_read_unlock(); rcu_read_unlock();
return -EINVAL; return -EINVAL;
} }
memcpy(buf, &cc_state->cong_setting, count); memcpy(buf, (void *)&cc_state->cong_setting + pos, count);
rcu_read_unlock(); rcu_read_unlock();
return count; return count;
......
...@@ -322,7 +322,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) ...@@ -322,7 +322,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
(lid == ppd->lid || (lid == ppd->lid ||
(lid == be16_to_cpu(IB_LID_PERMISSIVE) && (lid == be16_to_cpu(IB_LID_PERMISSIVE) &&
qp->ibqp.qp_type == IB_QPT_GSI)))) { qp->ibqp.qp_type == IB_QPT_GSI)))) {
unsigned long flags; unsigned long tflags = ps->flags;
/* /*
* If DMAs are in progress, we can't generate * If DMAs are in progress, we can't generate
* a completion for the loopback packet since * a completion for the loopback packet since
...@@ -335,10 +335,10 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) ...@@ -335,10 +335,10 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
goto bail; goto bail;
} }
qp->s_cur = next_cur; qp->s_cur = next_cur;
local_irq_save(flags); spin_unlock_irqrestore(&qp->s_lock, tflags);
spin_unlock_irqrestore(&qp->s_lock, flags);
ud_loopback(qp, wqe); ud_loopback(qp, wqe);
spin_lock_irqsave(&qp->s_lock, flags); spin_lock_irqsave(&qp->s_lock, tflags);
ps->flags = tflags;
hfi1_send_complete(qp, wqe, IB_WC_SUCCESS); hfi1_send_complete(qp, wqe, IB_WC_SUCCESS);
goto done_free_tx; goto done_free_tx;
} }
......
...@@ -399,8 +399,11 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo) ...@@ -399,8 +399,11 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
* pages, accept the amount pinned so far and program only that. * pages, accept the amount pinned so far and program only that.
* User space knows how to deal with partially programmed buffers. * User space knows how to deal with partially programmed buffers.
*/ */
if (!hfi1_can_pin_pages(dd, fd->tid_n_pinned, npages)) if (!hfi1_can_pin_pages(dd, fd->tid_n_pinned, npages)) {
return -ENOMEM; ret = -ENOMEM;
goto bail;
}
pinned = hfi1_acquire_user_pages(vaddr, npages, true, pages); pinned = hfi1_acquire_user_pages(vaddr, npages, true, pages);
if (pinned <= 0) { if (pinned <= 0) {
ret = pinned; ret = pinned;
......
...@@ -180,6 +180,8 @@ struct user_sdma_iovec { ...@@ -180,6 +180,8 @@ struct user_sdma_iovec {
u64 offset; u64 offset;
}; };
#define SDMA_CACHE_NODE_EVICT BIT(0)
struct sdma_mmu_node { struct sdma_mmu_node {
struct mmu_rb_node rb; struct mmu_rb_node rb;
struct list_head list; struct list_head list;
...@@ -187,6 +189,7 @@ struct sdma_mmu_node { ...@@ -187,6 +189,7 @@ struct sdma_mmu_node {
atomic_t refcount; atomic_t refcount;
struct page **pages; struct page **pages;
unsigned npages; unsigned npages;
unsigned long flags;
}; };
struct user_sdma_request { struct user_sdma_request {
...@@ -597,6 +600,13 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, ...@@ -597,6 +600,13 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
goto free_req; goto free_req;
} }
/* Checking P_KEY for requests from user-space */
if (egress_pkey_check(dd->pport, req->hdr.lrh, req->hdr.bth, sc,
PKEY_CHECK_INVALID)) {
ret = -EINVAL;
goto free_req;
}
/* /*
* Also should check the BTH.lnh. If it says the next header is GRH then * Also should check the BTH.lnh. If it says the next header is GRH then
* the RXE parsing will be off and will land in the middle of the KDETH * the RXE parsing will be off and will land in the middle of the KDETH
...@@ -1030,27 +1040,29 @@ static inline int num_user_pages(const struct iovec *iov) ...@@ -1030,27 +1040,29 @@ static inline int num_user_pages(const struct iovec *iov)
return 1 + ((epage - spage) >> PAGE_SHIFT); return 1 + ((epage - spage) >> PAGE_SHIFT);
} }
/* Caller must hold pq->evict_lock */
static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages) static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages)
{ {
u32 cleared = 0; u32 cleared = 0;
struct sdma_mmu_node *node, *ptr; struct sdma_mmu_node *node, *ptr;
struct list_head to_evict = LIST_HEAD_INIT(to_evict);
spin_lock(&pq->evict_lock);
list_for_each_entry_safe_reverse(node, ptr, &pq->evict, list) { list_for_each_entry_safe_reverse(node, ptr, &pq->evict, list) {
/* Make sure that no one is still using the node. */ /* Make sure that no one is still using the node. */
if (!atomic_read(&node->refcount)) { if (!atomic_read(&node->refcount)) {
/* set_bit(SDMA_CACHE_NODE_EVICT, &node->flags);
* Need to use the page count now as the remove callback list_del_init(&node->list);
* will free the node. list_add(&node->list, &to_evict);
*/
cleared += node->npages; cleared += node->npages;
spin_unlock(&pq->evict_lock);
hfi1_mmu_rb_remove(&pq->sdma_rb_root, &node->rb);
spin_lock(&pq->evict_lock);
if (cleared >= npages) if (cleared >= npages)
break; break;
} }
} }
spin_unlock(&pq->evict_lock);
list_for_each_entry_safe(node, ptr, &to_evict, list)
hfi1_mmu_rb_remove(&pq->sdma_rb_root, &node->rb);
return cleared; return cleared;
} }
...@@ -1062,7 +1074,7 @@ static int pin_vector_pages(struct user_sdma_request *req, ...@@ -1062,7 +1074,7 @@ static int pin_vector_pages(struct user_sdma_request *req,
struct sdma_mmu_node *node = NULL; struct sdma_mmu_node *node = NULL;
struct mmu_rb_node *rb_node; struct mmu_rb_node *rb_node;
rb_node = hfi1_mmu_rb_search(&pq->sdma_rb_root, rb_node = hfi1_mmu_rb_extract(&pq->sdma_rb_root,
(unsigned long)iovec->iov.iov_base, (unsigned long)iovec->iov.iov_base,
iovec->iov.iov_len); iovec->iov.iov_len);
if (rb_node && !IS_ERR(rb_node)) if (rb_node && !IS_ERR(rb_node))
...@@ -1076,7 +1088,6 @@ static int pin_vector_pages(struct user_sdma_request *req, ...@@ -1076,7 +1088,6 @@ static int pin_vector_pages(struct user_sdma_request *req,
return -ENOMEM; return -ENOMEM;
node->rb.addr = (unsigned long)iovec->iov.iov_base; node->rb.addr = (unsigned long)iovec->iov.iov_base;
node->rb.len = iovec->iov.iov_len;
node->pq = pq; node->pq = pq;
atomic_set(&node->refcount, 0); atomic_set(&node->refcount, 0);
INIT_LIST_HEAD(&node->list); INIT_LIST_HEAD(&node->list);
...@@ -1093,11 +1104,25 @@ static int pin_vector_pages(struct user_sdma_request *req, ...@@ -1093,11 +1104,25 @@ static int pin_vector_pages(struct user_sdma_request *req,
memcpy(pages, node->pages, node->npages * sizeof(*pages)); memcpy(pages, node->pages, node->npages * sizeof(*pages));
npages -= node->npages; npages -= node->npages;
/*
* If rb_node is NULL, it means that this is brand new node
* and, therefore not on the eviction list.
* If, however, the rb_node is non-NULL, it means that the
* node is already in RB tree and, therefore on the eviction
* list (nodes are unconditionally inserted in the eviction
* list). In that case, we have to remove the node prior to
* calling the eviction function in order to prevent it from
* freeing this node.
*/
if (rb_node) {
spin_lock(&pq->evict_lock);
list_del_init(&node->list);
spin_unlock(&pq->evict_lock);
}
retry: retry:
if (!hfi1_can_pin_pages(pq->dd, pq->n_locked, npages)) { if (!hfi1_can_pin_pages(pq->dd, pq->n_locked, npages)) {
spin_lock(&pq->evict_lock);
cleared = sdma_cache_evict(pq, npages); cleared = sdma_cache_evict(pq, npages);
spin_unlock(&pq->evict_lock);
if (cleared >= npages) if (cleared >= npages)
goto retry; goto retry;
} }
...@@ -1117,36 +1142,31 @@ static int pin_vector_pages(struct user_sdma_request *req, ...@@ -1117,36 +1142,31 @@ static int pin_vector_pages(struct user_sdma_request *req,
goto bail; goto bail;
} }
kfree(node->pages); kfree(node->pages);
node->rb.len = iovec->iov.iov_len;
node->pages = pages; node->pages = pages;
node->npages += pinned; node->npages += pinned;
npages = node->npages; npages = node->npages;
spin_lock(&pq->evict_lock); spin_lock(&pq->evict_lock);
if (!rb_node)
list_add(&node->list, &pq->evict); list_add(&node->list, &pq->evict);
else
list_move(&node->list, &pq->evict);
pq->n_locked += pinned; pq->n_locked += pinned;
spin_unlock(&pq->evict_lock); spin_unlock(&pq->evict_lock);
} }
iovec->pages = node->pages; iovec->pages = node->pages;
iovec->npages = npages; iovec->npages = npages;
if (!rb_node) {
ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb); ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb);
if (ret) { if (ret) {
spin_lock(&pq->evict_lock); spin_lock(&pq->evict_lock);
if (!list_empty(&node->list))
list_del(&node->list); list_del(&node->list);
pq->n_locked -= node->npages; pq->n_locked -= node->npages;
spin_unlock(&pq->evict_lock); spin_unlock(&pq->evict_lock);
ret = 0;
goto bail; goto bail;
} }
} else {
atomic_inc(&node->refcount);
}
return 0; return 0;
bail: bail:
if (!rb_node) if (rb_node)
unpin_vector_pages(current->mm, node->pages, 0, node->npages);
kfree(node); kfree(node);
return ret; return ret;
} }
...@@ -1558,6 +1578,19 @@ static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode, ...@@ -1558,6 +1578,19 @@ static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode,
container_of(mnode, struct sdma_mmu_node, rb); container_of(mnode, struct sdma_mmu_node, rb);
spin_lock(&node->pq->evict_lock); spin_lock(&node->pq->evict_lock);
/*
* We've been called by the MMU notifier but this node has been
* scheduled for eviction. The eviction function will take care
* of freeing this node.
* We have to take the above lock first because we are racing
* against the setting of the bit in the eviction function.
*/
if (mm && test_bit(SDMA_CACHE_NODE_EVICT, &node->flags)) {
spin_unlock(&node->pq->evict_lock);
return;
}
if (!list_empty(&node->list))
list_del(&node->list); list_del(&node->list);
node->pq->n_locked -= node->npages; node->pq->n_locked -= node->npages;
spin_unlock(&node->pq->evict_lock); spin_unlock(&node->pq->evict_lock);
......
...@@ -545,7 +545,7 @@ static inline int qp_ok(int opcode, struct hfi1_packet *packet) ...@@ -545,7 +545,7 @@ static inline int qp_ok(int opcode, struct hfi1_packet *packet)
if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK)) if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
goto dropit; goto dropit;
if (((opcode & OPCODE_QP_MASK) == packet->qp->allowed_ops) || if (((opcode & RVT_OPCODE_QP_MASK) == packet->qp->allowed_ops) ||
(opcode == IB_OPCODE_CNP)) (opcode == IB_OPCODE_CNP))
return 1; return 1;
dropit: dropit:
...@@ -1089,16 +1089,16 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, ...@@ -1089,16 +1089,16 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
/* /*
* egress_pkey_matches_entry - return 1 if the pkey matches ent (ent * egress_pkey_matches_entry - return 1 if the pkey matches ent (ent
* being an entry from the ingress partition key table), return 0 * being an entry from the partition key table), return 0
* otherwise. Use the matching criteria for egress partition keys * otherwise. Use the matching criteria for egress partition keys
* specified in the OPAv1 spec., section 9.1l.7. * specified in the OPAv1 spec., section 9.1l.7.
*/ */
static inline int egress_pkey_matches_entry(u16 pkey, u16 ent) static inline int egress_pkey_matches_entry(u16 pkey, u16 ent)
{ {
u16 mkey = pkey & PKEY_LOW_15_MASK; u16 mkey = pkey & PKEY_LOW_15_MASK;
u16 ment = ent & PKEY_LOW_15_MASK; u16 mentry = ent & PKEY_LOW_15_MASK;
if (mkey == ment) { if (mkey == mentry) {
/* /*
* If pkey[15] is set (full partition member), * If pkey[15] is set (full partition member),
* is bit 15 in the corresponding table element * is bit 15 in the corresponding table element
...@@ -1111,32 +1111,32 @@ static inline int egress_pkey_matches_entry(u16 pkey, u16 ent) ...@@ -1111,32 +1111,32 @@ static inline int egress_pkey_matches_entry(u16 pkey, u16 ent)
return 0; return 0;
} }
/* /**
* egress_pkey_check - return 0 if hdr's pkey matches according to the * egress_pkey_check - check P_KEY of a packet
* criteria in the OPAv1 spec., section 9.11.7. * @ppd: Physical IB port data
* @lrh: Local route header
* @bth: Base transport header
* @sc5: SC for packet
* @s_pkey_index: It will be used for look up optimization for kernel contexts
* only. If it is negative value, then it means user contexts is calling this
* function.
*
* It checks if hdr's pkey is valid.
*
* Return: 0 on success, otherwise, 1
*/ */
static inline int egress_pkey_check(struct hfi1_pportdata *ppd, int egress_pkey_check(struct hfi1_pportdata *ppd, __be16 *lrh, __be32 *bth,
struct hfi1_ib_header *hdr, u8 sc5, int8_t s_pkey_index)
struct rvt_qp *qp)
{ {
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_other_headers *ohdr;
struct hfi1_devdata *dd; struct hfi1_devdata *dd;
int i = 0; int i;
u16 pkey; u16 pkey;
u8 lnh, sc5 = priv->s_sc; int is_user_ctxt_mechanism = (s_pkey_index < 0);
if (!(ppd->part_enforce & HFI1_PART_ENFORCE_OUT)) if (!(ppd->part_enforce & HFI1_PART_ENFORCE_OUT))
return 0; return 0;
/* locate the pkey within the headers */ pkey = (u16)be32_to_cpu(bth[0]);
lnh = be16_to_cpu(hdr->lrh[0]) & 3;
if (lnh == HFI1_LRH_GRH)
ohdr = &hdr->u.l.oth;
else
ohdr = &hdr->u.oth;
pkey = (u16)be32_to_cpu(ohdr->bth[0]);
/* If SC15, pkey[0:14] must be 0x7fff */ /* If SC15, pkey[0:14] must be 0x7fff */
if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK)) if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK))
...@@ -1146,29 +1146,38 @@ static inline int egress_pkey_check(struct hfi1_pportdata *ppd, ...@@ -1146,29 +1146,38 @@ static inline int egress_pkey_check(struct hfi1_pportdata *ppd,
if ((pkey & PKEY_LOW_15_MASK) == 0) if ((pkey & PKEY_LOW_15_MASK) == 0)
goto bad; goto bad;
/* The most likely matching pkey has index qp->s_pkey_index */ /*
if (unlikely(!egress_pkey_matches_entry(pkey, * For the kernel contexts only, if a qp is passed into the function,
ppd->pkeys * the most likely matching pkey has index qp->s_pkey_index
[qp->s_pkey_index]))) { */
/* no match - try the entire table */ if (!is_user_ctxt_mechanism &&
for (; i < MAX_PKEY_VALUES; i++) { egress_pkey_matches_entry(pkey, ppd->pkeys[s_pkey_index])) {
if (egress_pkey_matches_entry(pkey, ppd->pkeys[i])) return 0;
break;
}
} }
if (i < MAX_PKEY_VALUES) for (i = 0; i < MAX_PKEY_VALUES; i++) {
if (egress_pkey_matches_entry(pkey, ppd->pkeys[i]))
return 0; return 0;
}
bad: bad:
/*
* For the user-context mechanism, the P_KEY check would only happen
* once per SDMA request, not once per packet. Therefore, there's no
* need to increment the counter for the user-context mechanism.
*/
if (!is_user_ctxt_mechanism) {
incr_cntr64(&ppd->port_xmit_constraint_errors); incr_cntr64(&ppd->port_xmit_constraint_errors);
dd = ppd->dd; dd = ppd->dd;
if (!(dd->err_info_xmit_constraint.status & OPA_EI_STATUS_SMASK)) { if (!(dd->err_info_xmit_constraint.status &
u16 slid = be16_to_cpu(hdr->lrh[3]); OPA_EI_STATUS_SMASK)) {
u16 slid = be16_to_cpu(lrh[3]);
dd->err_info_xmit_constraint.status |= OPA_EI_STATUS_SMASK; dd->err_info_xmit_constraint.status |=
OPA_EI_STATUS_SMASK;
dd->err_info_xmit_constraint.slid = slid; dd->err_info_xmit_constraint.slid = slid;
dd->err_info_xmit_constraint.pkey = pkey; dd->err_info_xmit_constraint.pkey = pkey;
} }
}
return 1; return 1;
} }
...@@ -1227,11 +1236,26 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) ...@@ -1227,11 +1236,26 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{ {
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
struct hfi1_qp_priv *priv = qp->priv; struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_other_headers *ohdr;
struct hfi1_ib_header *hdr;
send_routine sr; send_routine sr;
int ret; int ret;
u8 lnh;
hdr = &ps->s_txreq->phdr.hdr;
/* locate the pkey within the headers */
lnh = be16_to_cpu(hdr->lrh[0]) & 3;
if (lnh == HFI1_LRH_GRH)
ohdr = &hdr->u.l.oth;
else
ohdr = &hdr->u.oth;
sr = get_send_routine(qp, ps->s_txreq); sr = get_send_routine(qp, ps->s_txreq);
ret = egress_pkey_check(dd->pport, &ps->s_txreq->phdr.hdr, qp); ret = egress_pkey_check(dd->pport,
hdr->lrh,
ohdr->bth,
priv->s_sc,
qp->s_pkey_index);
if (unlikely(ret)) { if (unlikely(ret)) {
/* /*
* The value we are returning here does not get propagated to * The value we are returning here does not get propagated to
......
...@@ -215,6 +215,7 @@ struct hfi1_pkt_state { ...@@ -215,6 +215,7 @@ struct hfi1_pkt_state {
struct hfi1_ibport *ibp; struct hfi1_ibport *ibp;
struct hfi1_pportdata *ppd; struct hfi1_pportdata *ppd;
struct verbs_txreq *s_txreq; struct verbs_txreq *s_txreq;
unsigned long flags;
}; };
#define HFI1_PSN_CREDIT 16 #define HFI1_PSN_CREDIT 16
...@@ -334,9 +335,6 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, ...@@ -334,9 +335,6 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
#endif #endif
#define PSN_MODIFY_MASK 0xFFFFFF #define PSN_MODIFY_MASK 0xFFFFFF
/* Number of bits to pay attention to in the opcode for checking qp type */
#define OPCODE_QP_MASK 0xE0
/* /*
* Compare the lower 24 bits of the msn values. * Compare the lower 24 bits of the msn values.
* Returns an integer <, ==, or > than zero. * Returns an integer <, ==, or > than zero.
......
...@@ -467,6 +467,7 @@ static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi, ...@@ -467,6 +467,7 @@ static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi,
} }
struct rvt_dev_info *rvt_alloc_device(size_t size, int nports); struct rvt_dev_info *rvt_alloc_device(size_t size, int nports);
void rvt_dealloc_device(struct rvt_dev_info *rdi);
int rvt_register_device(struct rvt_dev_info *rvd); int rvt_register_device(struct rvt_dev_info *rvd);
void rvt_unregister_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd);
int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
......
...@@ -117,7 +117,8 @@ ...@@ -117,7 +117,8 @@
/* /*
* Wait flags that would prevent any packet type from being sent. * Wait flags that would prevent any packet type from being sent.
*/ */
#define RVT_S_ANY_WAIT_IO (RVT_S_WAIT_PIO | RVT_S_WAIT_TX | \ #define RVT_S_ANY_WAIT_IO \
(RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN | RVT_S_WAIT_TX | \
RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM) RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM)
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment