Commit 9a69d1ae authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband: (33 commits)
  IB/ipath: Fix lockdep error upon "ifconfig ibN down"
  IB/ipath: Fix races with ib_resize_cq()
  IB/ipath: Support new PCIE device, QLE7142
  IB/ipath: Set CPU affinity early
  IB/ipath: Fix EEPROM read when driver is compiled with -Os
  IB/ipath: Fix and recover TXE piobuf and PBC parity errors
  IB/ipath: Change HT CRC message to indicate how to resolve problem
  IB/ipath: Clean up module exit code
  IB/ipath: Call mtrr_del with correct arguments
  IB/ipath: Flush RWQEs if access error or invalid error seen
  IB/ipath: Improved support for PowerPC
  IB/ipath: Drop unnecessary "(void *)" casts
  IB/ipath: Support multiple simultaneous devices of different types
  IB/ipath: Fix mismatch in shifts and masks for printing debug info
  IB/ipath: Fix compiler warnings and errors on non-x86_64 systems
  IB/ipath: Print more informative parity error messages
  IB/ipath: Ensure that PD of MR matches PD of QP checking the Rkey
  IB/ipath: RC and UC should validate SLID and DLID
  IB/ipath: Only allow complete writes to flash
  IB/ipath: Count SRQs properly
  ...
parents c0341b0f 3d27b004
......@@ -197,7 +197,7 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
"resource=%x, qp_state=%s\n",
__FUNCTION__,
to_event_str(event_id),
be64_to_cpu(wr->ae.ae_generic.user_context),
(unsigned long long) be64_to_cpu(wr->ae.ae_generic.user_context),
be32_to_cpu(wr->ae.ae_generic.resource_type),
be32_to_cpu(wr->ae.ae_generic.resource),
to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state)));
......
......@@ -115,7 +115,7 @@ u16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
((unsigned long) &(head->shared_ptr[mqsp]) -
(unsigned long) head);
pr_debug("%s addr %p dma_addr %llx\n", __FUNCTION__,
&(head->shared_ptr[mqsp]), (u64)*dma_addr);
&(head->shared_ptr[mqsp]), (unsigned long long) *dma_addr);
return &(head->shared_ptr[mqsp]);
}
return NULL;
......
......@@ -302,7 +302,7 @@ int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
vq_req = vq_req_alloc(c2dev);
if (!vq_req) {
err = -ENOMEM;
goto bail1;
goto bail0;
}
vq_req->qp = qp;
vq_req->cm_id = cm_id;
......@@ -311,7 +311,7 @@ int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
if (!wr) {
err = -ENOMEM;
goto bail2;
goto bail1;
}
/* Build the WR */
......@@ -331,7 +331,7 @@ int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
/* Validate private_data length */
if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
err = -EINVAL;
goto bail2;
goto bail1;
}
if (iw_param->private_data) {
......@@ -348,19 +348,19 @@ int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
err = vq_send_wr(c2dev, (union c2wr *) wr);
if (err) {
vq_req_put(c2dev, vq_req);
goto bail2;
goto bail1;
}
/* Wait for reply from adapter */
err = vq_wait_for_reply(c2dev, vq_req);
if (err)
goto bail2;
goto bail1;
/* Check that reply is present */
reply = (struct c2wr_cr_accept_rep *) (unsigned long) vq_req->reply_msg;
if (!reply) {
err = -ENOMEM;
goto bail2;
goto bail1;
}
err = c2_errno(reply);
......@@ -368,9 +368,8 @@ int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
if (!err)
c2_set_qp_state(qp, C2_QP_STATE_RTS);
bail2:
kfree(wr);
bail1:
kfree(wr);
vq_req_free(c2dev, vq_req);
bail0:
if (err) {
......
......@@ -390,14 +390,18 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
}
mr = kmalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
if (!mr) {
vfree(page_list);
return ERR_PTR(-ENOMEM);
}
mr->pd = to_c2pd(ib_pd);
pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
"*iova_start %llx, first pa %llx, last pa %llx\n",
__FUNCTION__, page_shift, pbl_depth, total_len,
*iova_start, page_list[0], page_list[pbl_depth-1]);
(unsigned long long) *iova_start,
(unsigned long long) page_list[0],
(unsigned long long) page_list[pbl_depth-1]);
err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list,
(1 << page_shift), pbl_depth,
total_len, 0, iova_start,
......
......@@ -527,7 +527,7 @@ int c2_rnic_init(struct c2_dev *c2dev)
DMA_FROM_DEVICE);
pci_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma);
pr_debug("%s rep_vq va %p dma %llx\n", __FUNCTION__, q1_pages,
(u64)c2dev->rep_vq.host_dma);
(unsigned long long) c2dev->rep_vq.host_dma);
c2_mq_rep_init(&c2dev->rep_vq,
1,
qsize,
......@@ -550,7 +550,7 @@ int c2_rnic_init(struct c2_dev *c2dev)
DMA_FROM_DEVICE);
pci_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma);
pr_debug("%s aeq va %p dma %llx\n", __FUNCTION__, q1_pages,
(u64)c2dev->rep_vq.host_dma);
(unsigned long long) c2dev->rep_vq.host_dma);
c2_mq_rep_init(&c2dev->aeq,
2,
qsize,
......
......@@ -141,8 +141,9 @@ struct infinipath_stats {
* packets if ipath not configured, etc.)
*/
__u64 sps_krdrops;
__u64 sps_txeparity; /* PIO buffer parity error, recovered */
/* pad for future growth */
__u64 __sps_pad[46];
__u64 __sps_pad[45];
};
/*
......@@ -185,6 +186,9 @@ typedef enum _ipath_ureg {
#define IPATH_RUNTIME_PCIE 0x2
#define IPATH_RUNTIME_FORCE_WC_ORDER 0x4
#define IPATH_RUNTIME_RCVHDR_COPY 0x8
#define IPATH_RUNTIME_MASTER 0x10
#define IPATH_RUNTIME_PBC_REWRITE 0x20
#define IPATH_RUNTIME_LOOSE_DMA_ALIGN 0x40
/*
* This structure is returned by ipath_userinit() immediately after
......@@ -202,7 +206,8 @@ struct ipath_base_info {
/* version of software, for feature checking. */
__u32 spi_sw_version;
/* InfiniPath port assigned, goes into sent packets */
__u32 spi_port;
__u16 spi_port;
__u16 spi_subport;
/*
* IB MTU, packets IB data must be less than this.
* The MTU is in bytes, and will be a multiple of 4 bytes.
......@@ -218,7 +223,7 @@ struct ipath_base_info {
__u32 spi_tidcnt;
/* size of the TID Eager list in infinipath, in entries */
__u32 spi_tidegrcnt;
/* size of a single receive header queue entry. */
/* size of a single receive header queue entry in words. */
__u32 spi_rcvhdrent_size;
/*
* Count of receive header queue entries allocated.
......@@ -310,6 +315,12 @@ struct ipath_base_info {
__u32 spi_filler_for_align;
/* address of readonly memory copy of the rcvhdrq tail register. */
__u64 spi_rcvhdr_tailaddr;
/* shared memory pages for subports if IPATH_RUNTIME_MASTER is set */
__u64 spi_subport_uregbase;
__u64 spi_subport_rcvegrbuf;
__u64 spi_subport_rcvhdr_base;
} __attribute__ ((aligned(8)));
......@@ -328,12 +339,12 @@ struct ipath_base_info {
/*
* Minor version differences are always compatible
* a within a major version, however if if user software is larger
* a within a major version, however if user software is larger
* than driver software, some new features and/or structure fields
* may not be implemented; the user code must deal with this if it
* cares, or it must abort after initialization reports the difference
* cares, or it must abort after initialization reports the difference.
*/
#define IPATH_USER_SWMINOR 2
#define IPATH_USER_SWMINOR 3
#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
......@@ -379,7 +390,16 @@ struct ipath_user_info {
*/
__u32 spu_rcvhdrsize;
__u64 spu_unused; /* kept for compatible layout */
/*
* If two or more processes wish to share a port, each process
* must set the spu_subport_cnt and spu_subport_id to the same
* values. The only restriction on the spu_subport_id is that
* it be unique for a given node.
*/
__u16 spu_subport_cnt;
__u16 spu_subport_id;
__u32 spu_unused; /* kept for compatible layout */
/*
* address of struct base_info to write to
......@@ -392,19 +412,25 @@ struct ipath_user_info {
#define IPATH_CMD_MIN 16
#define IPATH_CMD_USER_INIT 16 /* set up userspace */
#define __IPATH_CMD_USER_INIT 16 /* old set up userspace (for old user code) */
#define IPATH_CMD_PORT_INFO 17 /* find out what resources we got */
#define IPATH_CMD_RECV_CTRL 18 /* control receipt of packets */
#define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */
#define IPATH_CMD_TID_FREE 20 /* free expected TID entries */
#define IPATH_CMD_SET_PART_KEY 21 /* add partition key */
#define IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes */
#define IPATH_CMD_ASSIGN_PORT 23 /* allocate HCA and port */
#define IPATH_CMD_USER_INIT 24 /* set up userspace */
#define IPATH_CMD_MAX 21
#define IPATH_CMD_MAX 24
struct ipath_port_info {
__u32 num_active; /* number of active units */
__u32 unit; /* unit (chip) assigned to caller */
__u32 port; /* port on unit assigned to caller */
__u16 port; /* port on unit assigned to caller */
__u16 subport; /* subport on unit assigned to caller */
__u16 num_ports; /* number of ports available on unit */
__u16 num_subports; /* number of subport slaves opened on port */
};
struct ipath_tid_info {
......@@ -435,6 +461,8 @@ struct ipath_cmd {
__u32 recv_ctrl;
/* partition key to set */
__u16 part_key;
/* user address of __u32 bitmask of active slaves */
__u64 slave_mask_addr;
} cmd;
};
......@@ -596,6 +624,10 @@ struct infinipath_counters {
/* K_PktFlags bits */
#define INFINIPATH_KPF_INTR 0x1
#define INFINIPATH_KPF_SUBPORT_MASK 0x3
#define INFINIPATH_KPF_SUBPORT_SHIFT 1
#define INFINIPATH_MAX_SUBPORT 4
/* SendPIO per-buffer control */
#define INFINIPATH_SP_TEST 0x40
......@@ -610,7 +642,7 @@ struct ipath_header {
/*
* Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset -
* 14 bits before ECO change ~28 Dec 03. After that, Vers 4,
* Port 3, TID 11, offset 14.
* Port 4, TID 11, offset 13.
*/
__le32 ver_port_tid_offset;
__le16 chksum;
......
......@@ -46,7 +46,7 @@
*/
void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
{
struct ipath_cq_wc *wc = cq->queue;
struct ipath_cq_wc *wc;
unsigned long flags;
u32 head;
u32 next;
......@@ -57,6 +57,7 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
* Note that the head pointer might be writable by user processes.
* Take care to verify it is a sane value.
*/
wc = cq->queue;
head = wc->head;
if (head >= (unsigned) cq->ibcq.cqe) {
head = cq->ibcq.cqe;
......@@ -109,21 +110,27 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
{
struct ipath_cq *cq = to_icq(ibcq);
struct ipath_cq_wc *wc = cq->queue;
struct ipath_cq_wc *wc;
unsigned long flags;
int npolled;
u32 tail;
spin_lock_irqsave(&cq->lock, flags);
wc = cq->queue;
tail = wc->tail;
if (tail > (u32) cq->ibcq.cqe)
tail = (u32) cq->ibcq.cqe;
for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
if (wc->tail == wc->head)
if (tail == wc->head)
break;
*entry = wc->queue[wc->tail];
if (wc->tail >= cq->ibcq.cqe)
wc->tail = 0;
*entry = wc->queue[tail];
if (tail >= cq->ibcq.cqe)
tail = 0;
else
wc->tail++;
tail++;
}
wc->tail = tail;
spin_unlock_irqrestore(&cq->lock, flags);
......@@ -177,11 +184,6 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
goto done;
}
if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
ret = ERR_PTR(-ENOMEM);
goto done;
}
/* Allocate the completion queue structure. */
cq = kmalloc(sizeof(*cq), GFP_KERNEL);
if (!cq) {
......@@ -237,6 +239,16 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
} else
cq->ip = NULL;
spin_lock(&dev->n_cqs_lock);
if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
spin_unlock(&dev->n_cqs_lock);
ret = ERR_PTR(-ENOMEM);
goto bail_wc;
}
dev->n_cqs_allocated++;
spin_unlock(&dev->n_cqs_lock);
/*
* ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
* The number of entries should be >= the number requested or return
......@@ -253,7 +265,6 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
ret = &cq->ibcq;
dev->n_cqs_allocated++;
goto done;
bail_wc:
......@@ -280,7 +291,9 @@ int ipath_destroy_cq(struct ib_cq *ibcq)
struct ipath_cq *cq = to_icq(ibcq);
tasklet_kill(&cq->comptask);
spin_lock(&dev->n_cqs_lock);
dev->n_cqs_allocated--;
spin_unlock(&dev->n_cqs_lock);
if (cq->ip)
kref_put(&cq->ip->ref, ipath_release_mmap_info);
else
......@@ -316,10 +329,16 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
return 0;
}
/**
* ipath_resize_cq - change the size of the CQ
* @ibcq: the completion queue
*
* Returns 0 for success.
*/
int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
{
struct ipath_cq *cq = to_icq(ibcq);
struct ipath_cq_wc *old_wc = cq->queue;
struct ipath_cq_wc *old_wc;
struct ipath_cq_wc *wc;
u32 head, tail, n;
int ret;
......@@ -355,6 +374,7 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
* Make sure head and tail are sane since they
* might be user writable.
*/
old_wc = cq->queue;
head = old_wc->head;
if (head > (u32) cq->ibcq.cqe)
head = (u32) cq->ibcq.cqe;
......
......@@ -95,16 +95,6 @@ const char *ipath_ibcstatus_str[] = {
"RecovIdle",
};
/*
* These variables are initialized in the chip-specific files
* but are defined here.
*/
u16 ipath_gpio_sda_num, ipath_gpio_scl_num;
u64 ipath_gpio_sda, ipath_gpio_scl;
u64 infinipath_i_bitsextant;
ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
static void __devexit ipath_remove_one(struct pci_dev *);
static int __devinit ipath_init_one(struct pci_dev *,
const struct pci_device_id *);
......@@ -527,28 +517,146 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
return ret;
}
static void __devexit cleanup_device(struct ipath_devdata *dd)
{
int port;
ipath_shutdown_device(dd);
if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
/* can't do anything more with chip; needs re-init */
*dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
if (dd->ipath_kregbase) {
/*
* if we haven't already cleaned up before these are
* to ensure any register reads/writes "fail" until
* re-init
*/
dd->ipath_kregbase = NULL;
dd->ipath_uregbase = 0;
dd->ipath_sregbase = 0;
dd->ipath_cregbase = 0;
dd->ipath_kregsize = 0;
}
ipath_disable_wc(dd);
}
if (dd->ipath_pioavailregs_dma) {
dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
(void *) dd->ipath_pioavailregs_dma,
dd->ipath_pioavailregs_phys);
dd->ipath_pioavailregs_dma = NULL;
}
if (dd->ipath_dummy_hdrq) {
dma_free_coherent(&dd->pcidev->dev,
dd->ipath_pd[0]->port_rcvhdrq_size,
dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
dd->ipath_dummy_hdrq = NULL;
}
if (dd->ipath_pageshadow) {
struct page **tmpp = dd->ipath_pageshadow;
dma_addr_t *tmpd = dd->ipath_physshadow;
int i, cnt = 0;
ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
"locked\n");
for (port = 0; port < dd->ipath_cfgports; port++) {
int port_tidbase = port * dd->ipath_rcvtidcnt;
int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
for (i = port_tidbase; i < maxtid; i++) {
if (!tmpp[i])
continue;
pci_unmap_page(dd->pcidev, tmpd[i],
PAGE_SIZE, PCI_DMA_FROMDEVICE);
ipath_release_user_pages(&tmpp[i], 1);
tmpp[i] = NULL;
cnt++;
}
}
if (cnt) {
ipath_stats.sps_pageunlocks += cnt;
ipath_cdbg(VERBOSE, "There were still %u expTID "
"entries locked\n", cnt);
}
if (ipath_stats.sps_pagelocks ||
ipath_stats.sps_pageunlocks)
ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
"unlocked via ipath_m{un}lock\n",
(unsigned long long)
ipath_stats.sps_pagelocks,
(unsigned long long)
ipath_stats.sps_pageunlocks);
ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
dd->ipath_pageshadow);
vfree(dd->ipath_pageshadow);
dd->ipath_pageshadow = NULL;
}
/*
* free any resources still in use (usually just kernel ports)
* at unload; we do for portcnt, not cfgports, because cfgports
* could have changed while we were loaded.
*/
for (port = 0; port < dd->ipath_portcnt; port++) {
struct ipath_portdata *pd = dd->ipath_pd[port];
dd->ipath_pd[port] = NULL;
ipath_free_pddata(dd, pd);
}
kfree(dd->ipath_pd);
/*
* debuggability, in case some cleanup path tries to use it
* after this
*/
dd->ipath_pd = NULL;
}
static void __devexit ipath_remove_one(struct pci_dev *pdev)
{
struct ipath_devdata *dd;
struct ipath_devdata *dd = pci_get_drvdata(pdev);
ipath_cdbg(VERBOSE, "removing, pdev=%p\n", pdev);
if (!pdev)
return;
ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
dd = pci_get_drvdata(pdev);
if (dd->verbs_dev)
ipath_unregister_ib_device(dd->verbs_dev);
ipath_diag_remove(dd);
ipath_user_remove(dd);
ipathfs_remove_device(dd);
ipath_device_remove_group(&pdev->dev, dd);
ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, "
"unit %u\n", dd, (u32) dd->ipath_unit);
if (dd->ipath_kregbase) {
ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n",
dd->ipath_kregbase);
cleanup_device(dd);
/*
* turn off rcv, send, and interrupts for all ports, all drivers
* should also hard reset the chip here?
* free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
* for all versions of the driver, if they were allocated
*/
if (pdev->irq) {
ipath_cdbg(VERBOSE,
"unit %u free_irq of irq %x\n",
dd->ipath_unit, pdev->irq);
free_irq(pdev->irq, dd);
} else
ipath_dbg("irq is 0, not doing free_irq "
"for unit %u\n", dd->ipath_unit);
/*
* we check for NULL here, because it's outside
* the kregbase check, and we need to call it
* after the free_irq. Thus it's possible that
* the function pointers were never initialized.
*/
if (dd->ipath_f_cleanup)
/* clean up chip-specific stuff */
dd->ipath_f_cleanup(dd);
ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase);
iounmap((volatile void __iomem *) dd->ipath_kregbase);
dd->ipath_kregbase = NULL;
}
pci_release_regions(pdev);
ipath_cdbg(VERBOSE, "calling pci_disable_device\n");
pci_disable_device(pdev);
......@@ -760,8 +868,8 @@ static void get_rhf_errstring(u32 err, char *msg, size_t len)
static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum,
int err)
{
return dd->ipath_port0_skbs ?
(void *)dd->ipath_port0_skbs[bufnum]->data : NULL;
return dd->ipath_port0_skbinfo ?
(void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL;
}
/**
......@@ -783,31 +891,34 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd,
*/
/*
* We need 4 extra bytes for unaligned transfer copying
* We need 2 extra bytes for ipath_ether data sent in the
* key header. In order to keep everything dword aligned,
* we'll reserve 4 bytes.
*/
len = dd->ipath_ibmaxlen + 4;
if (dd->ipath_flags & IPATH_4BYTE_TID) {
/* we need a 4KB multiple alignment, and there is no way
/* We need a 2KB multiple alignment, and there is no way
* to do it except to allocate extra and then skb_reserve
* enough to bring it up to the right alignment.
*/
len = dd->ipath_ibmaxlen + 4 + (1 << 11) - 1;
len += 2047;
}
else
len = dd->ipath_ibmaxlen + 4;
skb = __dev_alloc_skb(len, gfp_mask);
if (!skb) {
ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n",
len);
goto bail;
}
skb_reserve(skb, 4);
if (dd->ipath_flags & IPATH_4BYTE_TID) {
u32 una = ((1 << 11) - 1) & (unsigned long)(skb->data + 4);
u32 una = (unsigned long)skb->data & 2047;
if (una)
skb_reserve(skb, 4 + (1 << 11) - una);
else
skb_reserve(skb, 4);
} else
skb_reserve(skb, 4);
skb_reserve(skb, 2048 - una);
}
bail:
return skb;
......@@ -1326,6 +1437,9 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd,
"for port %u rcvhdrqtailaddr failed\n",
pd->port_port);
ret = -ENOMEM;
dma_free_coherent(&dd->pcidev->dev, amt,
pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
pd->port_rcvhdrq = NULL;
goto bail;
}
pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;
......@@ -1347,12 +1461,13 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd,
ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; "
"hdrtailaddr@%p %llx physical\n",
pd->port_port, pd->port_rcvhdrq,
pd->port_rcvhdrq_phys, pd->port_rcvhdrtail_kvaddr,
(unsigned long long)pd->port_rcvhdrqtailaddr_phys);
(unsigned long long) pd->port_rcvhdrq_phys,
pd->port_rcvhdrtail_kvaddr, (unsigned long long)
pd->port_rcvhdrqtailaddr_phys);
/* clear for security and sanity on each use */
memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size);
memset((void *)pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
/*
* tell chip each time we init it, even if we are re-using previous
......@@ -1805,7 +1920,7 @@ void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd)
pd->port_rcvhdrq = NULL;
if (pd->port_rcvhdrtail_kvaddr) {
dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
(void *)pd->port_rcvhdrtail_kvaddr,
pd->port_rcvhdrtail_kvaddr,
pd->port_rcvhdrqtailaddr_phys);
pd->port_rcvhdrtail_kvaddr = NULL;
}
......@@ -1824,24 +1939,32 @@ void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd)
dma_free_coherent(&dd->pcidev->dev, size,
base, pd->port_rcvegrbuf_phys[e]);
}
vfree(pd->port_rcvegrbuf);
kfree(pd->port_rcvegrbuf);
pd->port_rcvegrbuf = NULL;
vfree(pd->port_rcvegrbuf_phys);
kfree(pd->port_rcvegrbuf_phys);
pd->port_rcvegrbuf_phys = NULL;
pd->port_rcvegrbuf_chunks = 0;
} else if (pd->port_port == 0 && dd->ipath_port0_skbs) {
} else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) {
unsigned e;
struct sk_buff **skbs = dd->ipath_port0_skbs;
struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo;
dd->ipath_port0_skbs = NULL;
ipath_cdbg(VERBOSE, "free closed port %d ipath_port0_skbs "
"@ %p\n", pd->port_port, skbs);
dd->ipath_port0_skbinfo = NULL;
ipath_cdbg(VERBOSE, "free closed port %d "
"ipath_port0_skbinfo @ %p\n", pd->port_port,
skbinfo);
for (e = 0; e < dd->ipath_rcvegrcnt; e++)
if (skbs[e])
dev_kfree_skb(skbs[e]);
vfree(skbs);
if (skbinfo[e].skb) {
pci_unmap_single(dd->pcidev, skbinfo[e].phys,
dd->ipath_ibmaxlen,
PCI_DMA_FROMDEVICE);
dev_kfree_skb(skbinfo[e].skb);
}
vfree(skbinfo);
}
kfree(pd->port_tid_pg_list);
vfree(pd->subport_uregbase);
vfree(pd->subport_rcvegrbuf);
vfree(pd->subport_rcvhdr_base);
kfree(pd);
}
......@@ -1907,150 +2030,12 @@ static int __init infinipath_init(void)
return ret;
}
static void cleanup_device(struct ipath_devdata *dd)
{
int port;
ipath_shutdown_device(dd);
if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
/* can't do anything more with chip; needs re-init */
*dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
if (dd->ipath_kregbase) {
/*
* if we haven't already cleaned up before these are
* to ensure any register reads/writes "fail" until
* re-init
*/
dd->ipath_kregbase = NULL;
dd->ipath_uregbase = 0;
dd->ipath_sregbase = 0;
dd->ipath_cregbase = 0;
dd->ipath_kregsize = 0;
}
ipath_disable_wc(dd);
}
if (dd->ipath_pioavailregs_dma) {
dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
(void *) dd->ipath_pioavailregs_dma,
dd->ipath_pioavailregs_phys);
dd->ipath_pioavailregs_dma = NULL;
}
if (dd->ipath_dummy_hdrq) {
dma_free_coherent(&dd->pcidev->dev,
dd->ipath_pd[0]->port_rcvhdrq_size,
dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
dd->ipath_dummy_hdrq = NULL;
}
if (dd->ipath_pageshadow) {
struct page **tmpp = dd->ipath_pageshadow;
int i, cnt = 0;
ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
"locked\n");
for (port = 0; port < dd->ipath_cfgports; port++) {
int port_tidbase = port * dd->ipath_rcvtidcnt;
int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
for (i = port_tidbase; i < maxtid; i++) {
if (!tmpp[i])
continue;
ipath_release_user_pages(&tmpp[i], 1);
tmpp[i] = NULL;
cnt++;
}
}
if (cnt) {
ipath_stats.sps_pageunlocks += cnt;
ipath_cdbg(VERBOSE, "There were still %u expTID "
"entries locked\n", cnt);
}
if (ipath_stats.sps_pagelocks ||
ipath_stats.sps_pageunlocks)
ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
"unlocked via ipath_m{un}lock\n",
(unsigned long long)
ipath_stats.sps_pagelocks,
(unsigned long long)
ipath_stats.sps_pageunlocks);
ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
dd->ipath_pageshadow);
vfree(dd->ipath_pageshadow);
dd->ipath_pageshadow = NULL;
}
/*
* free any resources still in use (usually just kernel ports)
* at unload; we do for portcnt, not cfgports, because cfgports
* could have changed while we were loaded.
*/
for (port = 0; port < dd->ipath_portcnt; port++) {
struct ipath_portdata *pd = dd->ipath_pd[port];
dd->ipath_pd[port] = NULL;
ipath_free_pddata(dd, pd);
}
kfree(dd->ipath_pd);
/*
* debuggability, in case some cleanup path tries to use it
* after this
*/
dd->ipath_pd = NULL;
}
static void __exit infinipath_cleanup(void)
{
struct ipath_devdata *dd, *tmp;
unsigned long flags;
ipath_diagpkt_remove();
ipath_exit_ipathfs();
ipath_driver_remove_group(&ipath_driver.driver);
spin_lock_irqsave(&ipath_devs_lock, flags);
/*
* turn off rcv, send, and interrupts for all ports, all drivers
* should also hard reset the chip here?
* free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
* for all versions of the driver, if they were allocated
*/
list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
spin_unlock_irqrestore(&ipath_devs_lock, flags);
if (dd->ipath_kregbase)
cleanup_device(dd);
if (dd->pcidev) {
if (dd->pcidev->irq) {
ipath_cdbg(VERBOSE,
"unit %u free_irq of irq %x\n",
dd->ipath_unit, dd->pcidev->irq);
free_irq(dd->pcidev->irq, dd);
} else
ipath_dbg("irq is 0, not doing free_irq "
"for unit %u\n", dd->ipath_unit);
/*
* we check for NULL here, because it's outside
* the kregbase check, and we need to call it
* after the free_irq. Thus it's possible that
* the function pointers were never initialized.
*/
if (dd->ipath_f_cleanup)
/* clean up chip-specific stuff */
dd->ipath_f_cleanup(dd);
dd->pcidev = NULL;
}
spin_lock_irqsave(&ipath_devs_lock, flags);
}
spin_unlock_irqrestore(&ipath_devs_lock, flags);
ipath_cdbg(VERBOSE, "Unregistering pci driver\n");
pci_unregister_driver(&ipath_driver);
......
......@@ -100,9 +100,9 @@ static int i2c_gpio_set(struct ipath_devdata *dd,
gpioval = &dd->ipath_gpio_out;
read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
if (line == i2c_line_scl)
mask = ipath_gpio_scl;
mask = dd->ipath_gpio_scl;
else
mask = ipath_gpio_sda;
mask = dd->ipath_gpio_sda;
if (new_line_state == i2c_line_high)
/* tri-state the output rather than force high */
......@@ -119,12 +119,12 @@ static int i2c_gpio_set(struct ipath_devdata *dd,
write_val = 0x0UL;
if (line == i2c_line_scl) {
write_val <<= ipath_gpio_scl_num;
*gpioval = *gpioval & ~(1UL << ipath_gpio_scl_num);
write_val <<= dd->ipath_gpio_scl_num;
*gpioval = *gpioval & ~(1UL << dd->ipath_gpio_scl_num);
*gpioval |= write_val;
} else {
write_val <<= ipath_gpio_sda_num;
*gpioval = *gpioval & ~(1UL << ipath_gpio_sda_num);
write_val <<= dd->ipath_gpio_sda_num;
*gpioval = *gpioval & ~(1UL << dd->ipath_gpio_sda_num);
*gpioval |= write_val;
}
ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval);
......@@ -157,9 +157,9 @@ static int i2c_gpio_get(struct ipath_devdata *dd,
read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
/* config line to be an input */
if (line == i2c_line_scl)
mask = ipath_gpio_scl;
mask = dd->ipath_gpio_scl;
else
mask = ipath_gpio_sda;
mask = dd->ipath_gpio_sda;
write_val = read_val & ~mask;
ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val);
read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
......@@ -187,6 +187,7 @@ static int i2c_gpio_get(struct ipath_devdata *dd,
static void i2c_wait_for_writes(struct ipath_devdata *dd)
{
(void)ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
rmb();
}
static void scl_out(struct ipath_devdata *dd, u8 bit)
......
......@@ -41,6 +41,12 @@
#include "ipath_kernel.h"
#include "ipath_common.h"
/*
* mmap64 doesn't allow all 64 bits for 32-bit applications
* so only use the low 43 bits.
*/
#define MMAP64_MASK 0x7FFFFFFFFFFUL
static int ipath_open(struct inode *, struct file *);
static int ipath_close(struct inode *, struct file *);
static ssize_t ipath_write(struct file *, const char __user *, size_t,
......@@ -57,18 +63,35 @@ static struct file_operations ipath_file_ops = {
.mmap = ipath_mmap
};
static int ipath_get_base_info(struct ipath_portdata *pd,
static int ipath_get_base_info(struct file *fp,
void __user *ubase, size_t ubase_size)
{
struct ipath_portdata *pd = port_fp(fp);
int ret = 0;
struct ipath_base_info *kinfo = NULL;
struct ipath_devdata *dd = pd->port_dd;
unsigned subport_cnt;
int shared, master;
size_t sz;
subport_cnt = pd->port_subport_cnt;
if (!subport_cnt) {
shared = 0;
master = 0;
subport_cnt = 1;
} else {
shared = 1;
master = !subport_fp(fp);
}
if (ubase_size < sizeof(*kinfo)) {
sz = sizeof(*kinfo);
/* If port sharing is not requested, allow the old size structure */
if (!shared)
sz -= 3 * sizeof(u64);
if (ubase_size < sz) {
ipath_cdbg(PROC,
"Base size %lu, need %lu (version mismatch?)\n",
(unsigned long) ubase_size,
(unsigned long) sizeof(*kinfo));
"Base size %zu, need %zu (version mismatch?)\n",
ubase_size, sz);
ret = -EINVAL;
goto bail;
}
......@@ -95,7 +118,9 @@ static int ipath_get_base_info(struct ipath_portdata *pd,
kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk;
kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen /
pd->port_rcvegrbuf_chunks;
kinfo->spi_tidcnt = dd->ipath_rcvtidcnt;
kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt;
if (master)
kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt;
/*
* for this use, may be ipath_cfgports summed over all chips that
* are are configured and present
......@@ -118,31 +143,75 @@ static int ipath_get_base_info(struct ipath_portdata *pd,
* page_address() macro worked, but in 2.6.11, even that returns the
* full 64 bit address (upper bits all 1's). So far, using the
* physical addresses (or chip offsets, for chip mapping) works, but
* no doubt some future kernel release will chang that, and we'll be
* on to yet another method of dealing with this
* no doubt some future kernel release will change that, and we'll be
* on to yet another method of dealing with this.
*/
kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys;
kinfo->spi_rcvhdr_tailaddr = (u64)pd->port_rcvhdrqtailaddr_phys;
kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys;
kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys;
kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys;
kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
(void *) dd->ipath_statusp -
(void *) dd->ipath_pioavailregs_dma;
if (!shared) {
kinfo->spi_piocnt = dd->ipath_pbufsport;
kinfo->spi_piobufbase = (u64) pd->port_piobufs;
kinfo->__spi_uregbase =
dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
dd->ipath_palign * pd->port_port;
} else if (master) {
kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) +
(dd->ipath_pbufsport % subport_cnt);
/* Master's PIO buffers are after all the slave's */
kinfo->spi_piobufbase = (u64) pd->port_piobufs +
dd->ipath_palign *
(dd->ipath_pbufsport - kinfo->spi_piocnt);
kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
dd->ipath_palign * pd->port_port;
} else {
unsigned slave = subport_fp(fp) - 1;
kinfo->spi_pioindex = dd->ipath_pbufsport * (pd->port_port - 1);
kinfo->spi_piocnt = dd->ipath_pbufsport;
kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt;
kinfo->spi_piobufbase = (u64) pd->port_piobufs +
dd->ipath_palign * kinfo->spi_piocnt * slave;
kinfo->__spi_uregbase = ((u64) pd->subport_uregbase +
PAGE_SIZE * slave) & MMAP64_MASK;
kinfo->spi_rcvhdr_base = ((u64) pd->subport_rcvhdr_base +
pd->port_rcvhdrq_size * slave) & MMAP64_MASK;
kinfo->spi_rcvhdr_tailaddr =
(u64) pd->port_rcvhdrqtailaddr_phys & MMAP64_MASK;
kinfo->spi_rcv_egrbufs = ((u64) pd->subport_rcvegrbuf +
dd->ipath_rcvegrcnt * dd->ipath_rcvegrbufsize * slave) &
MMAP64_MASK;
}
kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) /
dd->ipath_palign;
kinfo->spi_pioalign = dd->ipath_palign;
kinfo->spi_qpair = IPATH_KD_QP;
kinfo->spi_piosize = dd->ipath_ibmaxlen;
kinfo->spi_mtu = dd->ipath_ibmaxlen; /* maxlen, not ibmtu */
kinfo->spi_port = pd->port_port;
kinfo->spi_subport = subport_fp(fp);
kinfo->spi_sw_version = IPATH_KERN_SWVERSION;
kinfo->spi_hw_version = dd->ipath_revision;
if (master) {
kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER;
kinfo->spi_subport_uregbase =
(u64) pd->subport_uregbase & MMAP64_MASK;
kinfo->spi_subport_rcvegrbuf =
(u64) pd->subport_rcvegrbuf & MMAP64_MASK;
kinfo->spi_subport_rcvhdr_base =
(u64) pd->subport_rcvhdr_base & MMAP64_MASK;
ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
kinfo->spi_port, kinfo->spi_runtime_flags,
(unsigned long long) kinfo->spi_subport_uregbase,
(unsigned long long) kinfo->spi_subport_rcvegrbuf,
(unsigned long long) kinfo->spi_subport_rcvhdr_base);
}
if (copy_to_user(ubase, kinfo, sizeof(*kinfo)))
ret = -EFAULT;
......@@ -154,6 +223,7 @@ static int ipath_get_base_info(struct ipath_portdata *pd,
/**
* ipath_tid_update - update a port TID
* @pd: the port
* @fp: the ipath device file
* @ti: the TID information
*
* The new implementation as of Oct 2004 is that the driver assigns
......@@ -176,11 +246,11 @@ static int ipath_get_base_info(struct ipath_portdata *pd,
* virtually contiguous pages, that should change to improve
* performance.
*/
static int ipath_tid_update(struct ipath_portdata *pd,
static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp,
const struct ipath_tid_info *ti)
{
int ret = 0, ntids;
u32 tid, porttid, cnt, i, tidcnt;
u32 tid, porttid, cnt, i, tidcnt, tidoff;
u16 *tidlist;
struct ipath_devdata *dd = pd->port_dd;
u64 physaddr;
......@@ -188,6 +258,7 @@ static int ipath_tid_update(struct ipath_portdata *pd,
u64 __iomem *tidbase;
unsigned long tidmap[8];
struct page **pagep = NULL;
unsigned subport = subport_fp(fp);
if (!dd->ipath_pageshadow) {
ret = -ENOMEM;
......@@ -204,20 +275,34 @@ static int ipath_tid_update(struct ipath_portdata *pd,
ret = -EFAULT;
goto done;
}
porttid = pd->port_port * dd->ipath_rcvtidcnt;
if (!pd->port_subport_cnt) {
tidcnt = dd->ipath_rcvtidcnt;
if (cnt >= tidcnt) {
tid = pd->port_tidcursor;
tidoff = 0;
} else if (!subport) {
tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
(dd->ipath_rcvtidcnt % pd->port_subport_cnt);
tidoff = dd->ipath_rcvtidcnt - tidcnt;
porttid += tidoff;
tid = tidcursor_fp(fp);
} else {
tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
tidoff = tidcnt * (subport - 1);
porttid += tidoff;
tid = tidcursor_fp(fp);
}
if (cnt > tidcnt) {
/* make sure it all fits in port_tid_pg_list */
dev_info(&dd->pcidev->dev, "Process tried to allocate %u "
"TIDs, only trying max (%u)\n", cnt, tidcnt);
cnt = tidcnt;
}
pagep = (struct page **)pd->port_tid_pg_list;
tidlist = (u16 *) (&pagep[cnt]);
pagep = &((struct page **) pd->port_tid_pg_list)[tidoff];
tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff];
memset(tidmap, 0, sizeof(tidmap));
tid = pd->port_tidcursor;
/* before decrement; chip actual # */
porttid = pd->port_port * tidcnt;
ntids = tidcnt;
tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
dd->ipath_rcvtidbase +
......@@ -274,16 +359,19 @@ static int ipath_tid_update(struct ipath_portdata *pd,
ret = -ENOMEM;
break;
}
tidlist[i] = tid;
tidlist[i] = tid + tidoff;
ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, "
"vaddr %lx\n", i, tid, vaddr);
"vaddr %lx\n", i, tid + tidoff, vaddr);
/* we "know" system pages and TID pages are same size */
dd->ipath_pageshadow[porttid + tid] = pagep[i];
dd->ipath_physshadow[porttid + tid] = ipath_map_page(
dd->pcidev, pagep[i], 0, PAGE_SIZE,
PCI_DMA_FROMDEVICE);
/*
* don't need atomic or it's overhead
*/
__set_bit(tid, tidmap);
physaddr = page_to_phys(pagep[i]);
physaddr = dd->ipath_physshadow[porttid + tid];
ipath_stats.sps_pagelocks++;
ipath_cdbg(VERBOSE,
"TID %u, vaddr %lx, physaddr %llx pgp %p\n",
......@@ -317,6 +405,9 @@ static int ipath_tid_update(struct ipath_portdata *pd,
tid);
dd->ipath_f_put_tid(dd, &tidbase[tid], 1,
dd->ipath_tidinvalid);
pci_unmap_page(dd->pcidev,
dd->ipath_physshadow[porttid + tid],
PAGE_SIZE, PCI_DMA_FROMDEVICE);
dd->ipath_pageshadow[porttid + tid] = NULL;
ipath_stats.sps_pageunlocks++;
}
......@@ -341,7 +432,10 @@ static int ipath_tid_update(struct ipath_portdata *pd,
}
if (tid == tidcnt)
tid = 0;
if (!pd->port_subport_cnt)
pd->port_tidcursor = tid;
else
tidcursor_fp(fp) = tid;
}
done:
......@@ -354,6 +448,7 @@ static int ipath_tid_update(struct ipath_portdata *pd,
/**
* ipath_tid_free - free a port TID
* @pd: the port
* @subport: the subport
* @ti: the TID info
*
* right now we are unlocking one page at a time, but since
......@@ -367,7 +462,7 @@ static int ipath_tid_update(struct ipath_portdata *pd,
* they pass in to us.
*/
static int ipath_tid_free(struct ipath_portdata *pd,
static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
const struct ipath_tid_info *ti)
{
int ret = 0;
......@@ -388,11 +483,20 @@ static int ipath_tid_free(struct ipath_portdata *pd,
}
porttid = pd->port_port * dd->ipath_rcvtidcnt;
if (!pd->port_subport_cnt)
tidcnt = dd->ipath_rcvtidcnt;
else if (!subport) {
tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
(dd->ipath_rcvtidcnt % pd->port_subport_cnt);
porttid += dd->ipath_rcvtidcnt - tidcnt;
} else {
tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
porttid += tidcnt * (subport - 1);
}
tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
dd->ipath_rcvtidbase +
porttid * sizeof(*tidbase));
tidcnt = dd->ipath_rcvtidcnt;
limit = sizeof(tidmap) * BITS_PER_BYTE;
if (limit > tidcnt)
/* just in case size changes in future */
......@@ -417,6 +521,9 @@ static int ipath_tid_free(struct ipath_portdata *pd,
pd->port_pid, tid);
dd->ipath_f_put_tid(dd, &tidbase[tid], 1,
dd->ipath_tidinvalid);
pci_unmap_page(dd->pcidev,
dd->ipath_physshadow[porttid + tid],
PAGE_SIZE, PCI_DMA_FROMDEVICE);
ipath_release_user_pages(
&dd->ipath_pageshadow[porttid + tid], 1);
dd->ipath_pageshadow[porttid + tid] = NULL;
......@@ -581,20 +688,24 @@ static int ipath_set_part_key(struct ipath_portdata *pd, u16 key)
/**
* ipath_manage_rcvq - manage a port's receive queue
* @pd: the port
* @subport: the subport
* @start_stop: action to carry out
*
* start_stop == 0 disables receive on the port, for use in queue
* overflow conditions. start_stop==1 re-enables, to be used to
* re-init the software copy of the head register
*/
static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop)
static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport,
int start_stop)
{
struct ipath_devdata *dd = pd->port_dd;
u64 tval;
ipath_cdbg(PROC, "%sabling rcv for unit %u port %u\n",
ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n",
start_stop ? "en" : "dis", dd->ipath_unit,
pd->port_port);
pd->port_port, subport);
if (subport)
goto bail;
/* atomically clear receive enable port. */
if (start_stop) {
/*
......@@ -609,7 +720,7 @@ static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop)
* updated and correct itself, even in the face of software
* bugs.
*/
*pd->port_rcvhdrtail_kvaddr = 0;
*(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0;
set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
&dd->ipath_rcvctrl);
} else
......@@ -630,6 +741,7 @@ static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop)
tval = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
}
/* always; new head should be equal to new tail; see above */
bail:
return 0;
}
......@@ -687,6 +799,36 @@ static void ipath_clean_part_key(struct ipath_portdata *pd,
}
}
/*
* Initialize the port data with the receive buffer sizes
* so this can be done while the master port is locked.
* Otherwise, there is a race with a slave opening the port
* and seeing these fields uninitialized.
*/
static void init_user_egr_sizes(struct ipath_portdata *pd)
{
struct ipath_devdata *dd = pd->port_dd;
unsigned egrperchunk, egrcnt, size;
/*
* to avoid wasting a lot of memory, we allocate 32KB chunks of
* physically contiguous memory, advance through it until used up
* and then allocate more. Of course, we need memory to store those
* extra pointers, now. Started out with 256KB, but under heavy
* memory pressure (creating large files and then copying them over
* NFS while doing lots of MPI jobs), we hit some allocation
* failures, even though we can sleep... (2.6.10) Still get
* failures at 64K. 32K is the lowest we can go without wasting
* additional memory.
*/
size = 0x8000;
egrperchunk = size / dd->ipath_rcvegrbufsize;
egrcnt = dd->ipath_rcvegrcnt;
pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk;
pd->port_rcvegrbufs_perchunk = egrperchunk;
pd->port_rcvegrbuf_size = size;
}
/**
* ipath_create_user_egr - allocate eager TID buffers
* @pd: the port to allocate TID buffers for
......@@ -702,7 +844,7 @@ static void ipath_clean_part_key(struct ipath_portdata *pd,
static int ipath_create_user_egr(struct ipath_portdata *pd)
{
struct ipath_devdata *dd = pd->port_dd;
unsigned e, egrcnt, alloced, egrperchunk, chunk, egrsize, egroff;
unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
size_t size;
int ret;
gfp_t gfp_flags;
......@@ -722,31 +864,18 @@ static int ipath_create_user_egr(struct ipath_portdata *pd)
ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid "
"offset %x, egrsize %u\n", egrcnt, egroff, egrsize);
/*
* to avoid wasting a lot of memory, we allocate 32KB chunks of
* physically contiguous memory, advance through it until used up
* and then allocate more. Of course, we need memory to store those
* extra pointers, now. Started out with 256KB, but under heavy
* memory pressure (creating large files and then copying them over
* NFS while doing lots of MPI jobs), we hit some allocation
* failures, even though we can sleep... (2.6.10) Still get
* failures at 64K. 32K is the lowest we can go without wasting
* additional memory.
*/
size = 0x8000;
alloced = ALIGN(egrsize * egrcnt, size);
egrperchunk = size / egrsize;
chunk = (egrcnt + egrperchunk - 1) / egrperchunk;
pd->port_rcvegrbuf_chunks = chunk;
pd->port_rcvegrbufs_perchunk = egrperchunk;
pd->port_rcvegrbuf_size = size;
pd->port_rcvegrbuf = vmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]));
chunk = pd->port_rcvegrbuf_chunks;
egrperchunk = pd->port_rcvegrbufs_perchunk;
size = pd->port_rcvegrbuf_size;
pd->port_rcvegrbuf = kmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]),
GFP_KERNEL);
if (!pd->port_rcvegrbuf) {
ret = -ENOMEM;
goto bail;
}
pd->port_rcvegrbuf_phys =
vmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]));
kmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]),
GFP_KERNEL);
if (!pd->port_rcvegrbuf_phys) {
ret = -ENOMEM;
goto bail_rcvegrbuf;
......@@ -791,105 +920,23 @@ static int ipath_create_user_egr(struct ipath_portdata *pd)
pd->port_rcvegrbuf_phys[e]);
}
vfree(pd->port_rcvegrbuf_phys);
kfree(pd->port_rcvegrbuf_phys);
pd->port_rcvegrbuf_phys = NULL;
bail_rcvegrbuf:
vfree(pd->port_rcvegrbuf);
kfree(pd->port_rcvegrbuf);
pd->port_rcvegrbuf = NULL;
bail:
return ret;
}
static int ipath_do_user_init(struct ipath_portdata *pd,
const struct ipath_user_info *uinfo)
{
int ret = 0;
struct ipath_devdata *dd = pd->port_dd;
u32 head32;
/* for now, if major version is different, bail */
if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) {
dev_info(&dd->pcidev->dev,
"User major version %d not same as driver "
"major %d\n", uinfo->spu_userversion >> 16,
IPATH_USER_SWMAJOR);
ret = -ENODEV;
goto done;
}
if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR)
ipath_dbg("User minor version %d not same as driver "
"minor %d\n", uinfo->spu_userversion & 0xffff,
IPATH_USER_SWMINOR);
if (uinfo->spu_rcvhdrsize) {
ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
if (ret)
goto done;
}
/* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
/* for right now, kernel piobufs are at end, so port 1 is at 0 */
pd->port_piobufs = dd->ipath_piobufbase +
dd->ipath_pbufsport * (pd->port_port -
1) * dd->ipath_palign;
ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n",
pd->port_port, pd->port_piobufs);
/*
* Now allocate the rcvhdr Q and eager TIDs; skip the TID
* array for time being. If pd->port_port > chip-supported,
* we need to do extra stuff here to handle by handling overflow
* through port 0, someday
*/
ret = ipath_create_rcvhdrq(dd, pd);
if (!ret)
ret = ipath_create_user_egr(pd);
if (ret)
goto done;
/*
* set the eager head register for this port to the current values
* of the tail pointers, since we don't know if they were
* updated on last use of the port.
*/
head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
dd->ipath_lastegrheads[pd->port_port] = -1;
dd->ipath_lastrcvhdrqtails[pd->port_port] = -1;
ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
pd->port_port, head32);
pd->port_tidcursor = 0; /* start at beginning after open */
/*
* now enable the port; the tail registers will be written to memory
* by the chip as soon as it sees the write to
* dd->ipath_kregs->kr_rcvctrl. The update only happens on
* transition from 0 to 1, so clear it first, then set it as part of
* enabling the port. This will (very briefly) affect any other
* open ports, but it shouldn't be long enough to be an issue.
* We explictly set the in-memory copy to 0 beforehand, so we don't
* have to wait to be sure the DMA update has happened.
*/
*pd->port_rcvhdrtail_kvaddr = 0ULL;
set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
&dd->ipath_rcvctrl);
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
dd->ipath_rcvctrl);
done:
return ret;
}
/* common code for the mappings on dma_alloc_coherent mem */
static int ipath_mmap_mem(struct vm_area_struct *vma,
struct ipath_portdata *pd, unsigned len,
int write_ok, dma_addr_t addr, char *what)
struct ipath_portdata *pd, unsigned len, int write_ok,
void *kvaddr, char *what)
{
struct ipath_devdata *dd = pd->port_dd;
unsigned pfn = (unsigned long)addr >> PAGE_SHIFT;
unsigned long pfn;
int ret;
if ((vma->vm_end - vma->vm_start) > len) {
......@@ -912,16 +959,16 @@ static int ipath_mmap_mem(struct vm_area_struct *vma,
vma->vm_flags &= ~VM_MAYWRITE;
}
pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT;
ret = remap_pfn_range(vma, vma->vm_start, pfn,
len, vma->vm_page_prot);
if (ret)
dev_info(&dd->pcidev->dev,
"%s port%u mmap of %lx, %x bytes r%c failed: %d\n",
what, pd->port_port, (unsigned long)addr, len,
write_ok?'w':'o', ret);
dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x "
"bytes r%c failed: %d\n", what, pd->port_port,
pfn, len, write_ok?'w':'o', ret);
else
ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes r%c\n",
what, pd->port_port, (unsigned long)addr, len,
ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes "
"r%c\n", what, pd->port_port, pfn, len,
write_ok?'w':'o');
bail:
return ret;
......@@ -957,7 +1004,8 @@ static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd,
static int mmap_piobufs(struct vm_area_struct *vma,
struct ipath_devdata *dd,
struct ipath_portdata *pd)
struct ipath_portdata *pd,
unsigned piobufs, unsigned piocnt)
{
unsigned long phys;
int ret;
......@@ -968,16 +1016,15 @@ static int mmap_piobufs(struct vm_area_struct *vma,
* process data, and catches users who might try to read the i/o
* space due to a bug.
*/
if ((vma->vm_end - vma->vm_start) >
(dd->ipath_pbufsport * dd->ipath_palign)) {
if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) {
dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: "
"reqlen %lx > PAGE\n",
vma->vm_end - vma->vm_start);
ret = -EFAULT;
ret = -EINVAL;
goto bail;
}
phys = dd->ipath_physaddr + pd->port_piobufs;
phys = dd->ipath_physaddr + piobufs;
/*
* Don't mark this as non-cached, or we don't get the
......@@ -1011,7 +1058,7 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
struct ipath_devdata *dd = pd->port_dd;
unsigned long start, size;
size_t total_size, i;
dma_addr_t *phys;
unsigned long pfn;
int ret;
size = pd->port_rcvegrbuf_size;
......@@ -1021,7 +1068,7 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
"reqlen %lx > actual %lx\n",
vma->vm_end - vma->vm_start,
(unsigned long) total_size);
ret = -EFAULT;
ret = -EINVAL;
goto bail;
}
......@@ -1035,11 +1082,11 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
vma->vm_flags &= ~VM_MAYWRITE;
start = vma->vm_start;
phys = pd->port_rcvegrbuf_phys;
for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) {
ret = remap_pfn_range(vma, start, phys[i] >> PAGE_SHIFT,
size, vma->vm_page_prot);
pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT;
ret = remap_pfn_range(vma, start, pfn, size,
vma->vm_page_prot);
if (ret < 0)
goto bail;
}
......@@ -1049,6 +1096,122 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
return ret;
}
/*
* ipath_file_vma_nopage - handle a VMA page fault.
*/
static struct page *ipath_file_vma_nopage(struct vm_area_struct *vma,
unsigned long address, int *type)
{
unsigned long offset = address - vma->vm_start;
struct page *page = NOPAGE_SIGBUS;
void *pageptr;
/*
* Convert the vmalloc address into a struct page.
*/
pageptr = (void *)(offset + (vma->vm_pgoff << PAGE_SHIFT));
page = vmalloc_to_page(pageptr);
if (!page)
goto out;
/* Increment the reference count. */
get_page(page);
if (type)
*type = VM_FAULT_MINOR;
out:
return page;
}
static struct vm_operations_struct ipath_file_vm_ops = {
.nopage = ipath_file_vma_nopage,
};
static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
struct ipath_portdata *pd, unsigned subport)
{
unsigned long len;
struct ipath_devdata *dd;
void *addr;
size_t size;
int ret;
/* If the port is not shared, all addresses should be physical */
if (!pd->port_subport_cnt) {
ret = -EINVAL;
goto bail;
}
dd = pd->port_dd;
size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
/*
* Master has all the slave uregbase, rcvhdrq, and
* rcvegrbufs mmapped.
*/
if (subport == 0) {
unsigned num_slaves = pd->port_subport_cnt - 1;
if (pgaddr == ((u64) pd->subport_uregbase & MMAP64_MASK)) {
addr = pd->subport_uregbase;
size = PAGE_SIZE * num_slaves;
} else if (pgaddr == ((u64) pd->subport_rcvhdr_base &
MMAP64_MASK)) {
addr = pd->subport_rcvhdr_base;
size = pd->port_rcvhdrq_size * num_slaves;
} else if (pgaddr == ((u64) pd->subport_rcvegrbuf &
MMAP64_MASK)) {
addr = pd->subport_rcvegrbuf;
size *= num_slaves;
} else {
ret = -EINVAL;
goto bail;
}
} else if (pgaddr == (((u64) pd->subport_uregbase +
PAGE_SIZE * (subport - 1)) & MMAP64_MASK)) {
addr = pd->subport_uregbase + PAGE_SIZE * (subport - 1);
size = PAGE_SIZE;
} else if (pgaddr == (((u64) pd->subport_rcvhdr_base +
pd->port_rcvhdrq_size * (subport - 1)) &
MMAP64_MASK)) {
addr = pd->subport_rcvhdr_base +
pd->port_rcvhdrq_size * (subport - 1);
size = pd->port_rcvhdrq_size;
} else if (pgaddr == (((u64) pd->subport_rcvegrbuf +
size * (subport - 1)) & MMAP64_MASK)) {
addr = pd->subport_rcvegrbuf + size * (subport - 1);
/* rcvegrbufs are read-only on the slave */
if (vma->vm_flags & VM_WRITE) {
dev_info(&dd->pcidev->dev,
"Can't map eager buffers as "
"writable (flags=%lx)\n", vma->vm_flags);
ret = -EPERM;
goto bail;
}
/*
* Don't allow permission to later change to writeable
* with mprotect.
*/
vma->vm_flags &= ~VM_MAYWRITE;
} else {
ret = -EINVAL;
goto bail;
}
len = vma->vm_end - vma->vm_start;
if (len > size) {
ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size);
ret = -EINVAL;
goto bail;
}
vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
vma->vm_ops = &ipath_file_vm_ops;
vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
ret = 0;
bail:
return ret;
}
/**
* ipath_mmap - mmap various structures into user space
* @fp: the file pointer
......@@ -1064,73 +1227,99 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
struct ipath_portdata *pd;
struct ipath_devdata *dd;
u64 pgaddr, ureg;
unsigned piobufs, piocnt;
int ret;
pd = port_fp(fp);
if (!pd) {
ret = -EINVAL;
goto bail;
}
dd = pd->port_dd;
/*
* This is the ipath_do_user_init() code, mapping the shared buffers
* into the user process. The address referred to by vm_pgoff is the
* virtual, not physical, address; we only do one mmap for each
* space mapped.
* file offset passed via mmap(). For shared ports, this is the
* kernel vmalloc() address of the pages to share with the master.
* For non-shared or master ports, this is a physical address.
* We only do one mmap for each space mapped.
*/
pgaddr = vma->vm_pgoff << PAGE_SHIFT;
/*
* Must fit in 40 bits for our hardware; some checked elsewhere,
* but we'll be paranoid. Check for 0 is mostly in case one of the
* allocations failed, but user called mmap anyway. We want to catch
* that before it can match.
* Check for 0 in case one of the allocations failed, but user
* called mmap anyway.
*/
if (!pgaddr || pgaddr >= (1ULL<<40)) {
ipath_dev_err(dd, "Bad phys addr %llx, start %lx, end %lx\n",
(unsigned long long)pgaddr, vma->vm_start, vma->vm_end);
return -EINVAL;
if (!pgaddr) {
ret = -EINVAL;
goto bail;
}
/* just the offset of the port user registers, not physical addr */
ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
ipath_cdbg(MM, "ushare: pgaddr %llx vm_start=%lx, vmlen %lx\n",
ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n",
(unsigned long long) pgaddr, vma->vm_start,
vma->vm_end - vma->vm_start);
vma->vm_end - vma->vm_start, dd->ipath_unit,
pd->port_port, subport_fp(fp));
if (vma->vm_start & (PAGE_SIZE-1)) {
ipath_dev_err(dd,
"vm_start not aligned: %lx, end=%lx phys %lx\n",
vma->vm_start, vma->vm_end, (unsigned long)pgaddr);
ret = -EINVAL;
/*
* Physical addresses must fit in 40 bits for our hardware.
* Check for kernel virtual addresses first, anything else must
* match a HW or memory address.
*/
if (pgaddr >= (1ULL<<40)) {
ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
goto bail;
}
else if (pgaddr == ureg)
if (!pd->port_subport_cnt) {
/* port is not shared */
ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
piocnt = dd->ipath_pbufsport;
piobufs = pd->port_piobufs;
} else if (!subport_fp(fp)) {
/* caller is the master */
ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) +
(dd->ipath_pbufsport % pd->port_subport_cnt);
piobufs = pd->port_piobufs +
dd->ipath_palign * (dd->ipath_pbufsport - piocnt);
} else {
unsigned slave = subport_fp(fp) - 1;
/* caller is a slave */
ureg = 0;
piocnt = dd->ipath_pbufsport / pd->port_subport_cnt;
piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
}
if (pgaddr == ureg)
ret = mmap_ureg(vma, dd, ureg);
else if (pgaddr == pd->port_piobufs)
ret = mmap_piobufs(vma, dd, pd);
else if (pgaddr == (u64) pd->port_rcvegr_phys)
else if (pgaddr == piobufs)
ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt);
else if (pgaddr == dd->ipath_pioavailregs_phys)
/* in-memory copy of pioavail registers */
ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
(void *) dd->ipath_pioavailregs_dma,
"pioavail registers");
else if (subport_fp(fp))
/* Subports don't mmap the physical receive buffers */
ret = -EINVAL;
else if (pgaddr == pd->port_rcvegr_phys)
ret = mmap_rcvegrbufs(vma, pd);
else if (pgaddr == (u64) pd->port_rcvhdrq_phys) {
else if (pgaddr == (u64) pd->port_rcvhdrq_phys)
/*
* The rcvhdrq itself; readonly except on HT (so have
* to allow writable mapping), multiple pages, contiguous
* from an i/o perspective.
*/
unsigned total_size =
ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize
* sizeof(u32), PAGE_SIZE);
ret = ipath_mmap_mem(vma, pd, total_size, 1,
pd->port_rcvhdrq_phys,
ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1,
pd->port_rcvhdrq,
"rcvhdrq");
}
else if (pgaddr == (u64)pd->port_rcvhdrqtailaddr_phys)
else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys)
/* in-memory copy of rcvhdrq tail register */
ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
pd->port_rcvhdrqtailaddr_phys,
pd->port_rcvhdrtail_kvaddr,
"rcvhdrq tail");
else if (pgaddr == dd->ipath_pioavailregs_phys)
/* in-memory copy of pioavail registers */
ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
dd->ipath_pioavailregs_phys,
"pioavail registers");
else
ret = -EINVAL;
......@@ -1138,9 +1327,10 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
if (ret < 0)
dev_info(&dd->pcidev->dev,
"Failure %d on addr %lx, off %lx\n",
-ret, vma->vm_start, vma->vm_pgoff);
"Failure %d on off %llx len %lx\n",
-ret, (unsigned long long)pgaddr,
vma->vm_end - vma->vm_start);
bail:
return ret;
}
......@@ -1154,6 +1344,8 @@ static unsigned int ipath_poll(struct file *fp,
struct ipath_devdata *dd;
pd = port_fp(fp);
if (!pd)
goto bail;
dd = pd->port_dd;
bit = pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT;
......@@ -1176,7 +1368,7 @@ static unsigned int ipath_poll(struct file *fp,
if (tail == head) {
set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
if(dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
(void)ipath_write_ureg(dd, ur_rcvhdrhead,
dd->ipath_rhdrhead_intr_off
| head, pd->port_port);
......@@ -1200,18 +1392,80 @@ static unsigned int ipath_poll(struct file *fp,
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
dd->ipath_rcvctrl);
bail:
return pollflag;
}
static int init_subports(struct ipath_devdata *dd,
struct ipath_portdata *pd,
const struct ipath_user_info *uinfo)
{
int ret = 0;
unsigned num_slaves;
size_t size;
/* Old user binaries don't know about subports */
if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR)
goto bail;
/*
* If the user is requesting zero or one port,
* skip the subport allocation.
*/
if (uinfo->spu_subport_cnt <= 1)
goto bail;
if (uinfo->spu_subport_cnt > 4) {
ret = -EINVAL;
goto bail;
}
num_slaves = uinfo->spu_subport_cnt - 1;
pd->subport_uregbase = vmalloc(PAGE_SIZE * num_slaves);
if (!pd->subport_uregbase) {
ret = -ENOMEM;
goto bail;
}
/* Note: pd->port_rcvhdrq_size isn't initialized yet. */
size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
sizeof(u32), PAGE_SIZE) * num_slaves;
pd->subport_rcvhdr_base = vmalloc(size);
if (!pd->subport_rcvhdr_base) {
ret = -ENOMEM;
goto bail_ureg;
}
pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks *
pd->port_rcvegrbuf_size *
num_slaves);
if (!pd->subport_rcvegrbuf) {
ret = -ENOMEM;
goto bail_rhdr;
}
pd->port_subport_cnt = uinfo->spu_subport_cnt;
pd->port_subport_id = uinfo->spu_subport_id;
pd->active_slaves = 1;
goto bail;
bail_rhdr:
vfree(pd->subport_rcvhdr_base);
bail_ureg:
vfree(pd->subport_uregbase);
pd->subport_uregbase = NULL;
bail:
return ret;
}
static int try_alloc_port(struct ipath_devdata *dd, int port,
struct file *fp)
struct file *fp,
const struct ipath_user_info *uinfo)
{
struct ipath_portdata *pd;
int ret;
if (!dd->ipath_pd[port]) {
void *p, *ptmp;
if (!(pd = dd->ipath_pd[port])) {
void *ptmp;
p = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
/*
* Allocate memory for use in ipath_tid_update() just once
......@@ -1221,33 +1475,35 @@ static int try_alloc_port(struct ipath_devdata *dd, int port,
ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) +
dd->ipath_rcvtidcnt * sizeof(struct page **),
GFP_KERNEL);
if (!p || !ptmp) {
if (!pd || !ptmp) {
ipath_dev_err(dd, "Unable to allocate portdata "
"memory, failing open\n");
ret = -ENOMEM;
kfree(p);
kfree(pd);
kfree(ptmp);
goto bail;
}
dd->ipath_pd[port] = p;
dd->ipath_pd[port] = pd;
dd->ipath_pd[port]->port_port = port;
dd->ipath_pd[port]->port_dd = dd;
dd->ipath_pd[port]->port_tid_pg_list = ptmp;
init_waitqueue_head(&dd->ipath_pd[port]->port_wait);
}
if (!dd->ipath_pd[port]->port_cnt) {
dd->ipath_pd[port]->port_cnt = 1;
fp->private_data = (void *) dd->ipath_pd[port];
if (!pd->port_cnt) {
pd->userversion = uinfo->spu_userversion;
init_user_egr_sizes(pd);
if ((ret = init_subports(dd, pd, uinfo)) != 0)
goto bail;
ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n",
current->comm, current->pid, dd->ipath_unit,
port);
dd->ipath_pd[port]->port_pid = current->pid;
strncpy(dd->ipath_pd[port]->port_comm, current->comm,
sizeof(dd->ipath_pd[port]->port_comm));
pd->port_cnt = 1;
port_fp(fp) = pd;
pd->port_pid = current->pid;
strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
ipath_stats.sps_ports++;
ret = 0;
goto bail;
}
} else
ret = -EBUSY;
bail:
......@@ -1264,7 +1520,8 @@ static inline int usable(struct ipath_devdata *dd)
| IPATH_LINKUNK));
}
static int find_free_port(int unit, struct file *fp)
static int find_free_port(int unit, struct file *fp,
const struct ipath_user_info *uinfo)
{
struct ipath_devdata *dd = ipath_lookup(unit);
int ret, i;
......@@ -1279,8 +1536,8 @@ static int find_free_port(int unit, struct file *fp)
goto bail;
}
for (i = 0; i < dd->ipath_cfgports; i++) {
ret = try_alloc_port(dd, i, fp);
for (i = 1; i < dd->ipath_cfgports; i++) {
ret = try_alloc_port(dd, i, fp, uinfo);
if (ret != -EBUSY)
goto bail;
}
......@@ -1290,13 +1547,14 @@ static int find_free_port(int unit, struct file *fp)
return ret;
}
static int find_best_unit(struct file *fp)
static int find_best_unit(struct file *fp,
const struct ipath_user_info *uinfo)
{
int ret = 0, i, prefunit = -1, devmax;
int maxofallports, npresent, nup;
int ndev;
(void) ipath_count_units(&npresent, &nup, &maxofallports);
devmax = ipath_count_units(&npresent, &nup, &maxofallports);
/*
* This code is present to allow a knowledgeable person to
......@@ -1343,8 +1601,6 @@ static int find_best_unit(struct file *fp)
if (prefunit != -1)
devmax = prefunit + 1;
else
devmax = ipath_count_units(NULL, NULL, NULL);
recheck:
for (i = 1; i < maxofallports; i++) {
for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax;
......@@ -1359,7 +1615,7 @@ static int find_best_unit(struct file *fp)
* next.
*/
continue;
ret = try_alloc_port(dd, i, fp);
ret = try_alloc_port(dd, i, fp, uinfo);
if (!ret)
goto done;
}
......@@ -1395,22 +1651,183 @@ static int find_best_unit(struct file *fp)
return ret;
}
static int find_shared_port(struct file *fp,
const struct ipath_user_info *uinfo)
{
int devmax, ndev, i;
int ret = 0;
devmax = ipath_count_units(NULL, NULL, NULL);
for (ndev = 0; ndev < devmax; ndev++) {
struct ipath_devdata *dd = ipath_lookup(ndev);
if (!dd)
continue;
for (i = 1; i < dd->ipath_cfgports; i++) {
struct ipath_portdata *pd = dd->ipath_pd[i];
/* Skip ports which are not yet open */
if (!pd || !pd->port_cnt)
continue;
/* Skip port if it doesn't match the requested one */
if (pd->port_subport_id != uinfo->spu_subport_id)
continue;
/* Verify the sharing process matches the master */
if (pd->port_subport_cnt != uinfo->spu_subport_cnt ||
pd->userversion != uinfo->spu_userversion ||
pd->port_cnt >= pd->port_subport_cnt) {
ret = -EINVAL;
goto done;
}
port_fp(fp) = pd;
subport_fp(fp) = pd->port_cnt++;
tidcursor_fp(fp) = 0;
pd->active_slaves |= 1 << subport_fp(fp);
ipath_cdbg(PROC,
"%s[%u] %u sharing %s[%u] unit:port %u:%u\n",
current->comm, current->pid,
subport_fp(fp),
pd->port_comm, pd->port_pid,
dd->ipath_unit, pd->port_port);
ret = 1;
goto done;
}
}
done:
return ret;
}
static int ipath_open(struct inode *in, struct file *fp)
{
int ret, user_minor;
/* The real work is performed later in ipath_assign_port() */
fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL);
return fp->private_data ? 0 : -ENOMEM;
}
/* Get port early, so can set affinity prior to memory allocation */
static int ipath_assign_port(struct file *fp,
const struct ipath_user_info *uinfo)
{
int ret;
int i_minor;
unsigned swminor;
/* Check to be sure we haven't already initialized this file */
if (port_fp(fp)) {
ret = -EINVAL;
goto done;
}
/* for now, if major version is different, bail */
if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) {
ipath_dbg("User major version %d not same as driver "
"major %d\n", uinfo->spu_userversion >> 16,
IPATH_USER_SWMAJOR);
ret = -ENODEV;
goto done;
}
swminor = uinfo->spu_userversion & 0xffff;
if (swminor != IPATH_USER_SWMINOR)
ipath_dbg("User minor version %d not same as driver "
"minor %d\n", swminor, IPATH_USER_SWMINOR);
mutex_lock(&ipath_mutex);
user_minor = iminor(in) - IPATH_USER_MINOR_BASE;
if (swminor == IPATH_USER_SWMINOR && uinfo->spu_subport_cnt &&
(ret = find_shared_port(fp, uinfo))) {
mutex_unlock(&ipath_mutex);
if (ret > 0)
ret = 0;
goto done;
}
i_minor = iminor(fp->f_dentry->d_inode) - IPATH_USER_MINOR_BASE;
ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n",
(long)in->i_rdev, user_minor);
(long)fp->f_dentry->d_inode->i_rdev, i_minor);
if (user_minor)
ret = find_free_port(user_minor - 1, fp);
if (i_minor)
ret = find_free_port(i_minor - 1, fp, uinfo);
else
ret = find_best_unit(fp);
ret = find_best_unit(fp, uinfo);
mutex_unlock(&ipath_mutex);
done:
return ret;
}
static int ipath_do_user_init(struct file *fp,
const struct ipath_user_info *uinfo)
{
int ret;
struct ipath_portdata *pd;
struct ipath_devdata *dd;
u32 head32;
pd = port_fp(fp);
dd = pd->port_dd;
if (uinfo->spu_rcvhdrsize) {
ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
if (ret)
goto done;
}
/* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
/* for right now, kernel piobufs are at end, so port 1 is at 0 */
pd->port_piobufs = dd->ipath_piobufbase +
dd->ipath_pbufsport * (pd->port_port - 1) * dd->ipath_palign;
ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n",
pd->port_port, pd->port_piobufs);
/*
* Now allocate the rcvhdr Q and eager TIDs; skip the TID
* array for time being. If pd->port_port > chip-supported,
* we need to do extra stuff here to handle by handling overflow
* through port 0, someday
*/
ret = ipath_create_rcvhdrq(dd, pd);
if (!ret)
ret = ipath_create_user_egr(pd);
if (ret)
goto done;
/*
* set the eager head register for this port to the current values
* of the tail pointers, since we don't know if they were
* updated on last use of the port.
*/
head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
dd->ipath_lastegrheads[pd->port_port] = -1;
dd->ipath_lastrcvhdrqtails[pd->port_port] = -1;
ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
pd->port_port, head32);
pd->port_tidcursor = 0; /* start at beginning after open */
/*
* now enable the port; the tail registers will be written to memory
* by the chip as soon as it sees the write to
* dd->ipath_kregs->kr_rcvctrl. The update only happens on
* transition from 0 to 1, so clear it first, then set it as part of
* enabling the port. This will (very briefly) affect any other
* open ports, but it shouldn't be long enough to be an issue.
* We explictly set the in-memory copy to 0 beforehand, so we don't
* have to wait to be sure the DMA update has happened.
*/
*(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0ULL;
set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
&dd->ipath_rcvctrl);
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
dd->ipath_rcvctrl);
done:
return ret;
}
......@@ -1433,6 +1850,8 @@ static void unlock_expected_tids(struct ipath_portdata *pd)
if (!dd->ipath_pageshadow[i])
continue;
pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i],
PAGE_SIZE, PCI_DMA_FROMDEVICE);
ipath_release_user_pages_on_close(&dd->ipath_pageshadow[i],
1);
dd->ipath_pageshadow[i] = NULL;
......@@ -1453,6 +1872,7 @@ static void unlock_expected_tids(struct ipath_portdata *pd)
static int ipath_close(struct inode *in, struct file *fp)
{
int ret = 0;
struct ipath_filedata *fd;
struct ipath_portdata *pd;
struct ipath_devdata *dd;
unsigned port;
......@@ -1462,9 +1882,24 @@ static int ipath_close(struct inode *in, struct file *fp)
mutex_lock(&ipath_mutex);
pd = port_fp(fp);
port = pd->port_port;
fd = (struct ipath_filedata *) fp->private_data;
fp->private_data = NULL;
pd = fd->pd;
if (!pd) {
mutex_unlock(&ipath_mutex);
goto bail;
}
if (--pd->port_cnt) {
/*
* XXX If the master closes the port before the slave(s),
* revoke the mmap for the eager receive queue so
* the slave(s) don't wait for receive data forever.
*/
pd->active_slaves &= ~(1 << fd->subport);
mutex_unlock(&ipath_mutex);
goto bail;
}
port = pd->port_port;
dd = pd->port_dd;
if (pd->port_hdrqfull) {
......@@ -1503,8 +1938,6 @@ static int ipath_close(struct inode *in, struct file *fp)
/* clean up the pkeys for this port user */
ipath_clean_part_key(pd, dd);
/*
* be paranoid, and never write 0's to these, just use an
* unused part of the port 0 tail page. Of course,
......@@ -1523,39 +1956,49 @@ static int ipath_close(struct inode *in, struct file *fp)
i = dd->ipath_pbufsport * (port - 1);
ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport);
dd->ipath_f_clear_tids(dd, pd->port_port);
if (dd->ipath_pageshadow)
unlock_expected_tids(pd);
ipath_stats.sps_ports--;
ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
pd->port_comm, pd->port_pid,
dd->ipath_unit, port);
dd->ipath_f_clear_tids(dd, pd->port_port);
}
pd->port_cnt = 0;
pd->port_pid = 0;
dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */
mutex_unlock(&ipath_mutex);
ipath_free_pddata(dd, pd); /* after releasing the mutex */
bail:
kfree(fd);
return ret;
}
static int ipath_port_info(struct ipath_portdata *pd,
static int ipath_port_info(struct ipath_portdata *pd, u16 subport,
struct ipath_port_info __user *uinfo)
{
struct ipath_port_info info;
int nup;
int ret;
size_t sz;
(void) ipath_count_units(NULL, &nup, NULL);
info.num_active = nup;
info.unit = pd->port_dd->ipath_unit;
info.port = pd->port_port;
info.subport = subport;
/* Don't return new fields if old library opened the port. */
if ((pd->userversion & 0xffff) == IPATH_USER_SWMINOR) {
/* Number of user ports available for this device. */
info.num_ports = pd->port_dd->ipath_cfgports - 1;
info.num_subports = pd->port_subport_cnt;
sz = sizeof(info);
} else
sz = sizeof(info) - 2 * sizeof(u16);
if (copy_to_user(uinfo, &info, sizeof(info))) {
if (copy_to_user(uinfo, &info, sz)) {
ret = -EFAULT;
goto bail;
}
......@@ -1565,6 +2008,16 @@ static int ipath_port_info(struct ipath_portdata *pd,
return ret;
}
static int ipath_get_slave_info(struct ipath_portdata *pd,
void __user *slave_mask_addr)
{
int ret = 0;
if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32)))
ret = -EFAULT;
return ret;
}
static ssize_t ipath_write(struct file *fp, const char __user *data,
size_t count, loff_t *off)
{
......@@ -1591,6 +2044,8 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
consumed = sizeof(cmd.type);
switch (cmd.type) {
case IPATH_CMD_ASSIGN_PORT:
case __IPATH_CMD_USER_INIT:
case IPATH_CMD_USER_INIT:
copy = sizeof(cmd.cmd.user_info);
dest = &cmd.cmd.user_info;
......@@ -1617,6 +2072,11 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
dest = &cmd.cmd.part_key;
src = &ucmd->cmd.part_key;
break;
case IPATH_CMD_SLAVE_INFO:
copy = sizeof(cmd.cmd.slave_mask_addr);
dest = &cmd.cmd.slave_mask_addr;
src = &ucmd->cmd.slave_mask_addr;
break;
default:
ret = -EINVAL;
goto bail;
......@@ -1634,34 +2094,55 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
consumed += copy;
pd = port_fp(fp);
if (!pd && cmd.type != __IPATH_CMD_USER_INIT &&
cmd.type != IPATH_CMD_ASSIGN_PORT) {
ret = -EINVAL;
goto bail;
}
switch (cmd.type) {
case IPATH_CMD_ASSIGN_PORT:
ret = ipath_assign_port(fp, &cmd.cmd.user_info);
if (ret)
goto bail;
break;
case __IPATH_CMD_USER_INIT:
/* backwards compatibility, get port first */
ret = ipath_assign_port(fp, &cmd.cmd.user_info);
if (ret)
goto bail;
/* and fall through to current version. */
case IPATH_CMD_USER_INIT:
ret = ipath_do_user_init(pd, &cmd.cmd.user_info);
if (ret < 0)
ret = ipath_do_user_init(fp, &cmd.cmd.user_info);
if (ret)
goto bail;
ret = ipath_get_base_info(
pd, (void __user *) (unsigned long)
fp, (void __user *) (unsigned long)
cmd.cmd.user_info.spu_base_info,
cmd.cmd.user_info.spu_base_info_size);
break;
case IPATH_CMD_RECV_CTRL:
ret = ipath_manage_rcvq(pd, cmd.cmd.recv_ctrl);
ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl);
break;
case IPATH_CMD_PORT_INFO:
ret = ipath_port_info(pd,
ret = ipath_port_info(pd, subport_fp(fp),
(struct ipath_port_info __user *)
(unsigned long) cmd.cmd.port_info);
break;
case IPATH_CMD_TID_UPDATE:
ret = ipath_tid_update(pd, &cmd.cmd.tid_info);
ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info);
break;
case IPATH_CMD_TID_FREE:
ret = ipath_tid_free(pd, &cmd.cmd.tid_info);
ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info);
break;
case IPATH_CMD_SET_PART_KEY:
ret = ipath_set_part_key(pd, cmd.cmd.part_key);
break;
case IPATH_CMD_SLAVE_INFO:
ret = ipath_get_slave_info(pd,
(void __user *) (unsigned long)
cmd.cmd.slave_mask_addr);
break;
}
if (ret >= 0)
......@@ -1858,4 +2339,3 @@ void ipath_user_remove(struct ipath_devdata *dd)
bail:
return;
}
......@@ -356,19 +356,16 @@ static ssize_t flash_write(struct file *file, const char __user *buf,
pos = *ppos;
if ( pos < 0) {
if (pos != 0) {
ret = -EINVAL;
goto bail;
}
if (pos >= sizeof(struct ipath_flash)) {
ret = 0;
if (count != sizeof(struct ipath_flash)) {
ret = -EINVAL;
goto bail;
}
if (count > sizeof(struct ipath_flash) - pos)
count = sizeof(struct ipath_flash) - pos;
tmp = kmalloc(count, GFP_KERNEL);
if (!tmp) {
ret = -ENOMEM;
......
......@@ -252,8 +252,8 @@ static const struct ipath_cregs ipath_ht_cregs = {
};
/* kr_intstatus, kr_intclear, kr_intmask bits */
#define INFINIPATH_I_RCVURG_MASK 0x1FF
#define INFINIPATH_I_RCVAVAIL_MASK 0x1FF
#define INFINIPATH_I_RCVURG_MASK ((1U<<9)-1)
#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<9)-1)
/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
#define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0
......@@ -338,7 +338,7 @@ static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs,
if (crcbits) {
u16 ctrl0, ctrl1;
snprintf(bitsmsg, sizeof bitsmsg,
"[HT%s lane %s CRC (%llx); ignore till reload]",
"[HT%s lane %s CRC (%llx); powercycle to completely clear]",
!(crcbits & _IPATH_HTLINK1_CRCBITS) ?
"0 (A)" : (!(crcbits & _IPATH_HTLINK0_CRCBITS)
? "1 (B)" : "0+1 (A+B)"),
......@@ -389,17 +389,28 @@ static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs,
_IPATH_HTLINK1_CRCBITS)));
}
/* 6110 specific hardware errors... */
static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = {
INFINIPATH_HWE_MSG(HTCBUSIREQPARITYERR, "HTC Ireq Parity"),
INFINIPATH_HWE_MSG(HTCBUSTREQPARITYERR, "HTC Treq Parity"),
INFINIPATH_HWE_MSG(HTCBUSTRESPPARITYERR, "HTC Tresp Parity"),
INFINIPATH_HWE_MSG(HTCMISCERR5, "HT core Misc5"),
INFINIPATH_HWE_MSG(HTCMISCERR6, "HT core Misc6"),
INFINIPATH_HWE_MSG(HTCMISCERR7, "HT core Misc7"),
INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
};
/**
* ipath_ht_handle_hwerrors - display hardware errors
* ipath_ht_handle_hwerrors - display hardware errors.
* @dd: the infinipath device
* @msg: the output buffer
* @msgl: the size of the output buffer
*
* Use same msg buffer as regular errors to avoid
* excessive stack use. Most hardware errors are catastrophic, but for
* right now, we'll print them and continue.
* We reuse the same message buffer as ipath_handle_errors() to avoid
* excessive stack usage.
* Use same msg buffer as regular errors to avoid excessive stack
* use. Most hardware errors are catastrophic, but for right now,
* we'll print them and continue. We reuse the same message buffer as
* ipath_handle_errors() to avoid excessive stack usage.
*/
static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
size_t msgl)
......@@ -440,19 +451,49 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
* make sure we get this much out, unless told to be quiet,
* or it's occurred within the last 5 seconds
*/
if ((hwerrs & ~dd->ipath_lasthwerror) ||
if ((hwerrs & ~(dd->ipath_lasthwerror |
((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
<< INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
(ipath_debug & __IPATH_VERBDBG))
dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
"(cleared)\n", (unsigned long long) hwerrs);
dd->ipath_lasthwerror |= hwerrs;
if (hwerrs & ~infinipath_hwe_bitsextant)
if (hwerrs & ~dd->ipath_hwe_bitsextant)
ipath_dev_err(dd, "hwerror interrupt with unknown errors "
"%llx set\n", (unsigned long long)
(hwerrs & ~infinipath_hwe_bitsextant));
(hwerrs & ~dd->ipath_hwe_bitsextant));
ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
if (ctrl & INFINIPATH_C_FREEZEMODE) {
/*
* parity errors in send memory are recoverable,
* just cancel the send (if indicated in * sendbuffererror),
* count the occurrence, unfreeze (if no other handled
* hardware error bits are set), and continue. They can
* occur if a processor speculative read is done to the PIO
* buffer while we are sending a packet, for example.
*/
if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
<< INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
ipath_stats.sps_txeparity++;
ipath_dbg("Recovering from TXE parity error (%llu), "
"hwerrstatus=%llx\n",
(unsigned long long) ipath_stats.sps_txeparity,
(unsigned long long) hwerrs);
ipath_disarm_senderrbufs(dd);
hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
<< INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
if (!hwerrs) { /* else leave in freeze mode */
ipath_write_kreg(dd,
dd->ipath_kregs->kr_control,
dd->ipath_control);
return;
}
}
if (hwerrs) {
/*
* if any set that we aren't ignoring; only
......@@ -499,44 +540,16 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
bits);
strlcat(msg, bitsmsg, msgl);
}
if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK
<< INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) {
bits = (u32) ((hwerrs >>
INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) &
INFINIPATH_HWE_RXEMEMPARITYERR_MASK);
snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ",
bits);
strlcat(msg, bitsmsg, msgl);
}
if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK
<< INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
bits = (u32) ((hwerrs >>
INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) &
INFINIPATH_HWE_TXEMEMPARITYERR_MASK);
snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ",
bits);
strlcat(msg, bitsmsg, msgl);
}
if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR)
strlcat(msg, "[IB2IPATH Parity]", msgl);
if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR)
strlcat(msg, "[IPATH2IB Parity]", msgl);
if (hwerrs & INFINIPATH_HWE_HTCBUSIREQPARITYERR)
strlcat(msg, "[HTC Ireq Parity]", msgl);
if (hwerrs & INFINIPATH_HWE_HTCBUSTREQPARITYERR)
strlcat(msg, "[HTC Treq Parity]", msgl);
if (hwerrs & INFINIPATH_HWE_HTCBUSTRESPPARITYERR)
strlcat(msg, "[HTC Tresp Parity]", msgl);
ipath_format_hwerrors(hwerrs,
ipath_6110_hwerror_msgs,
sizeof(ipath_6110_hwerror_msgs) /
sizeof(ipath_6110_hwerror_msgs[0]),
msg, msgl);
if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS))
hwerr_crcbits(dd, hwerrs, msg, msgl);
if (hwerrs & INFINIPATH_HWE_HTCMISCERR5)
strlcat(msg, "[HT core Misc5]", msgl);
if (hwerrs & INFINIPATH_HWE_HTCMISCERR6)
strlcat(msg, "[HT core Misc6]", msgl);
if (hwerrs & INFINIPATH_HWE_HTCMISCERR7)
strlcat(msg, "[HT core Misc7]", msgl);
if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
msgl);
......@@ -573,11 +586,6 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
dd->ipath_hwerrmask);
}
if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR)
strlcat(msg, "[Rx Dsync]", msgl);
if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED)
strlcat(msg, "[SerDes PLL]", msgl);
ipath_dev_err(dd, "%s hardware error\n", msg);
if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
/*
......@@ -1080,21 +1088,21 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
}
static void ipath_init_ht_variables(void)
static void ipath_init_ht_variables(struct ipath_devdata *dd)
{
ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
ipath_gpio_sda = IPATH_GPIO_SDA;
ipath_gpio_scl = IPATH_GPIO_SCL;
dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
dd->ipath_gpio_sda = IPATH_GPIO_SDA;
dd->ipath_gpio_scl = IPATH_GPIO_SCL;
infinipath_i_bitsextant =
dd->ipath_i_bitsextant =
(INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
(INFINIPATH_I_RCVAVAIL_MASK <<
INFINIPATH_I_RCVAVAIL_SHIFT) |
INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
infinipath_e_bitsextant =
dd->ipath_e_bitsextant =
INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
......@@ -1112,7 +1120,7 @@ static void ipath_init_ht_variables(void)
INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
INFINIPATH_E_HARDWARE;
infinipath_hwe_bitsextant =
dd->ipath_hwe_bitsextant =
(INFINIPATH_HWE_HTCMEMPARITYERR_MASK <<
INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) |
(INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
......@@ -1141,8 +1149,8 @@ static void ipath_init_ht_variables(void)
INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
}
/**
......@@ -1607,5 +1615,5 @@ void ipath_init_iba6110_funcs(struct ipath_devdata *dd)
* do very early init that is needed before ipath_f_bus is
* called
*/
ipath_init_ht_variables();
ipath_init_ht_variables(dd);
}
......@@ -263,8 +263,8 @@ static const struct ipath_cregs ipath_pe_cregs = {
};
/* kr_intstatus, kr_intclear, kr_intmask bits */
#define INFINIPATH_I_RCVURG_MASK 0x1F
#define INFINIPATH_I_RCVAVAIL_MASK 0x1F
#define INFINIPATH_I_RCVURG_MASK ((1U<<5)-1)
#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<5)-1)
/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
#define INFINIPATH_HWE_PCIEMEMPARITYERR_MASK 0x000000000000003fULL
......@@ -294,6 +294,33 @@ static const struct ipath_cregs ipath_pe_cregs = {
#define IPATH_GPIO_SCL (1ULL << \
(_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
/*
* Rev2 silicon allows suppressing check for ArmLaunch errors.
* this can speed up short packet sends on systems that do
* not guaranteee write-order.
*/
#define INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR (1ULL<<63)
/* 6120 specific hardware errors... */
static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = {
INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"),
INFINIPATH_HWE_MSG(PCIECPLTIMEOUT, "PCIe completion timeout"),
/*
* In practice, it's unlikely wthat we'll see PCIe PLL, or bus
* parity or memory parity error failures, because most likely we
* won't be able to talk to the core of the chip. Nonetheless, we
* might see them, if they are in parts of the PCIe core that aren't
* essential.
*/
INFINIPATH_HWE_MSG(PCIE1PLLFAILED, "PCIePLL1"),
INFINIPATH_HWE_MSG(PCIE0PLLFAILED, "PCIePLL0"),
INFINIPATH_HWE_MSG(PCIEBUSPARITYXTLH, "PCIe XTLH core parity"),
INFINIPATH_HWE_MSG(PCIEBUSPARITYXADM, "PCIe ADM TX core parity"),
INFINIPATH_HWE_MSG(PCIEBUSPARITYRADM, "PCIe ADM RX core parity"),
INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
};
/**
* ipath_pe_handle_hwerrors - display hardware errors.
* @dd: the infinipath device
......@@ -343,19 +370,49 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
* make sure we get this much out, unless told to be quiet,
* or it's occurred within the last 5 seconds
*/
if ((hwerrs & ~dd->ipath_lasthwerror) ||
if ((hwerrs & ~(dd->ipath_lasthwerror |
((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
<< INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
(ipath_debug & __IPATH_VERBDBG))
dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
"(cleared)\n", (unsigned long long) hwerrs);
dd->ipath_lasthwerror |= hwerrs;
if (hwerrs & ~infinipath_hwe_bitsextant)
if (hwerrs & ~dd->ipath_hwe_bitsextant)
ipath_dev_err(dd, "hwerror interrupt with unknown errors "
"%llx set\n", (unsigned long long)
(hwerrs & ~infinipath_hwe_bitsextant));
(hwerrs & ~dd->ipath_hwe_bitsextant));
ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
if (ctrl & INFINIPATH_C_FREEZEMODE) {
/*
* parity errors in send memory are recoverable,
* just cancel the send (if indicated in * sendbuffererror),
* count the occurrence, unfreeze (if no other handled
* hardware error bits are set), and continue. They can
* occur if a processor speculative read is done to the PIO
* buffer while we are sending a packet, for example.
*/
if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
<< INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
ipath_stats.sps_txeparity++;
ipath_dbg("Recovering from TXE parity error (%llu), "
"hwerrstatus=%llx\n",
(unsigned long long) ipath_stats.sps_txeparity,
(unsigned long long) hwerrs);
ipath_disarm_senderrbufs(dd);
hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
<< INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
if (!hwerrs) { /* else leave in freeze mode */
ipath_write_kreg(dd,
dd->ipath_kregs->kr_control,
dd->ipath_control);
return;
}
}
if (hwerrs) {
/*
* if any set that we aren't ignoring only make the
......@@ -379,9 +436,8 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
} else {
ipath_dbg("Clearing freezemode on ignored hardware "
"error\n");
ctrl &= ~INFINIPATH_C_FREEZEMODE;
ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
ctrl);
dd->ipath_control);
}
}
......@@ -396,24 +452,13 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
dd->ipath_hwerrmask);
}
if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK
<< INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) {
bits = (u32) ((hwerrs >>
INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) &
INFINIPATH_HWE_RXEMEMPARITYERR_MASK);
snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ",
bits);
strlcat(msg, bitsmsg, msgl);
}
if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK
<< INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
bits = (u32) ((hwerrs >>
INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) &
INFINIPATH_HWE_TXEMEMPARITYERR_MASK);
snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ",
bits);
strlcat(msg, bitsmsg, msgl);
}
ipath_format_hwerrors(hwerrs,
ipath_6120_hwerror_msgs,
sizeof(ipath_6120_hwerror_msgs)/
sizeof(ipath_6120_hwerror_msgs[0]),
msg, msgl);
if (hwerrs & (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK
<< INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT)) {
bits = (u32) ((hwerrs >>
......@@ -423,10 +468,6 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
"[PCIe Mem Parity Errs %x] ", bits);
strlcat(msg, bitsmsg, msgl);
}
if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR)
strlcat(msg, "[IB2IPATH Parity]", msgl);
if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR)
strlcat(msg, "[IPATH2IB Parity]", msgl);
#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP | \
INFINIPATH_HWE_COREPLL_RFSLIP )
......@@ -452,34 +493,6 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
dd->ipath_hwerrmask);
}
if (hwerrs & INFINIPATH_HWE_PCIEPOISONEDTLP)
strlcat(msg, "[PCIe Poisoned TLP]", msgl);
if (hwerrs & INFINIPATH_HWE_PCIECPLTIMEOUT)
strlcat(msg, "[PCIe completion timeout]", msgl);
/*
* In practice, it's unlikely wthat we'll see PCIe PLL, or bus
* parity or memory parity error failures, because most likely we
* won't be able to talk to the core of the chip. Nonetheless, we
* might see them, if they are in parts of the PCIe core that aren't
* essential.
*/
if (hwerrs & INFINIPATH_HWE_PCIE1PLLFAILED)
strlcat(msg, "[PCIePLL1]", msgl);
if (hwerrs & INFINIPATH_HWE_PCIE0PLLFAILED)
strlcat(msg, "[PCIePLL0]", msgl);
if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXTLH)
strlcat(msg, "[PCIe XTLH core parity]", msgl);
if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXADM)
strlcat(msg, "[PCIe ADM TX core parity]", msgl);
if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYRADM)
strlcat(msg, "[PCIe ADM RX core parity]", msgl);
if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR)
strlcat(msg, "[Rx Dsync]", msgl);
if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED)
strlcat(msg, "[SerDes PLL]", msgl);
ipath_dev_err(dd, "%s hardware error\n", msg);
if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) {
/*
......@@ -525,6 +538,9 @@ static int ipath_pe_boardname(struct ipath_devdata *dd, char *name,
case 5:
n = "InfiniPath_QMH7140";
break;
case 6:
n = "InfiniPath_QLE7142";
break;
default:
ipath_dev_err(dd,
"Don't yet know about board with ID %u\n",
......@@ -571,9 +587,12 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd)
if (!dd->ipath_boardrev) // no PLL for Emulator
val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
/* workaround bug 9460 in internal interface bus parity checking */
if (dd->ipath_minrev < 2) {
/* workaround bug 9460 in internal interface bus parity
* checking. Fixed (HW bug 9490) in Rev2.
*/
val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM;
}
dd->ipath_hwerrmask = val;
}
......@@ -583,8 +602,8 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd)
*/
static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
{
u64 val, tmp, config1;
int ret = 0, change = 0;
u64 val, tmp, config1, prev_val;
int ret = 0;
ipath_dbg("Trying to bringup serdes\n");
......@@ -641,6 +660,7 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
prev_val = val;
if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) &
INFINIPATH_XGXS_MDIOADDR_MASK) != 3) {
val &=
......@@ -648,11 +668,9 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
INFINIPATH_XGXS_MDIOADDR_SHIFT);
/* MDIO address 3 */
val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT;
change = 1;
}
if (val & INFINIPATH_XGXS_RESET) {
val &= ~INFINIPATH_XGXS_RESET;
change = 1;
}
if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
......@@ -661,9 +679,19 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
INFINIPATH_XGXS_RX_POL_SHIFT);
val |= dd->ipath_rx_pol_inv <<
INFINIPATH_XGXS_RX_POL_SHIFT;
change = 1;
}
if (change)
if (dd->ipath_minrev >= 2) {
/* Rev 2. can tolerate multiple writes to PBC, and
* allowing them can provide lower latency on some
* CPUs, but this feature is off by default, only
* turned on by setting D63 of XGXSconfig reg.
* May want to make this conditional more
* fine-grained in future. This is not exactly
* related to XGXS, but where the bit ended up.
*/
val |= INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR;
}
if (val != prev_val)
ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
......@@ -717,9 +745,25 @@ static void ipath_pe_quiet_serdes(struct ipath_devdata *dd)
ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
}
/* this is not yet needed on this chip, so just return 0. */
static int ipath_pe_intconfig(struct ipath_devdata *dd)
{
u64 val;
u32 chiprev;
/*
* If the chip supports added error indication via GPIO pins,
* enable interrupts on those bits so the interrupt routine
* can count the events. Also set flag so interrupt routine
* can know they are expected.
*/
chiprev = dd->ipath_revision >> INFINIPATH_R_CHIPREVMINOR_SHIFT;
if ((chiprev & INFINIPATH_R_CHIPREVMINOR_MASK) > 1) {
/* Rev2+ reports extra errors via internal GPIO pins */
dd->ipath_flags |= IPATH_GPIO_ERRINTRS;
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask);
val |= IPATH_GPIO_ERRINTR_MASK;
ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val);
}
return 0;
}
......@@ -853,21 +897,23 @@ static int ipath_setup_pe_config(struct ipath_devdata *dd,
return 0;
}
static void ipath_init_pe_variables(void)
static void ipath_init_pe_variables(struct ipath_devdata *dd)
{
/*
* bits for selecting i2c direction and values,
* used for I2C serial flash
*/
ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
ipath_gpio_sda = IPATH_GPIO_SDA;
ipath_gpio_scl = IPATH_GPIO_SCL;
dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
dd->ipath_gpio_sda = IPATH_GPIO_SDA;
dd->ipath_gpio_scl = IPATH_GPIO_SCL;
/* variables for sanity checking interrupt and errors */
infinipath_hwe_bitsextant =
dd->ipath_hwe_bitsextant =
(INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
(INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) |
(INFINIPATH_HWE_PCIEMEMPARITYERR_MASK <<
INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) |
INFINIPATH_HWE_PCIE1PLLFAILED |
......@@ -883,13 +929,13 @@ static void ipath_init_pe_variables(void)
INFINIPATH_HWE_SERDESPLLFAILED |
INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
infinipath_i_bitsextant =
dd->ipath_i_bitsextant =
(INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
(INFINIPATH_I_RCVAVAIL_MASK <<
INFINIPATH_I_RCVAVAIL_SHIFT) |
INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
infinipath_e_bitsextant =
dd->ipath_e_bitsextant =
INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
......@@ -907,8 +953,8 @@ static void ipath_init_pe_variables(void)
INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
INFINIPATH_E_HARDWARE;
infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
}
/* setup the MSI stuff again after a reset. I'd like to just call
......@@ -1082,6 +1128,45 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
mmiowb();
spin_unlock_irqrestore(&dd->ipath_tid_lock, flags);
}
/**
* ipath_pe_put_tid_2 - write a TID in chip, Revision 2 or higher
* @dd: the infinipath device
* @tidptr: pointer to the expected TID (in chip) to udpate
* @tidtype: 0 for eager, 1 for expected
* @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
*
* This exists as a separate routine to allow for selection of the
* appropriate "flavor". The static calls in cleanup just use the
* revision-agnostic form, as they are not performance critical.
*/
static void ipath_pe_put_tid_2(struct ipath_devdata *dd, u64 __iomem *tidptr,
u32 type, unsigned long pa)
{
u32 __iomem *tidp32 = (u32 __iomem *)tidptr;
if (pa != dd->ipath_tidinvalid) {
if (pa & ((1U << 11) - 1)) {
dev_info(&dd->pcidev->dev, "BUG: physaddr %lx "
"not 2KB aligned!\n", pa);
return;
}
pa >>= 11;
/* paranoia check */
if (pa & (7<<29))
ipath_dev_err(dd,
"BUG: Physical page address 0x%lx "
"has bits set in 31-29\n", pa);
if (type == 0)
pa |= dd->ipath_tidtemplate;
else /* for now, always full 4KB page */
pa |= 2 << 29;
}
if (dd->ipath_kregbase)
writel(pa, tidp32);
mmiowb();
}
/**
* ipath_pe_clear_tid - clear all TID entries for a port, expected and eager
......@@ -1203,7 +1288,7 @@ int __attribute__((weak)) ipath_unordered_wc(void)
/**
* ipath_init_pe_get_base_info - set chip-specific flags for user code
* @dd: the infinipath device
* @pd: the infinipath port
* @kbase: ipath_base_info pointer
*
* We set the PCIE flag because the lower bandwidth on PCIe vs
......@@ -1212,6 +1297,7 @@ int __attribute__((weak)) ipath_unordered_wc(void)
static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
{
struct ipath_base_info *kinfo = kbase;
struct ipath_devdata *dd;
if (ipath_unordered_wc()) {
kinfo->spi_runtime_flags |= IPATH_RUNTIME_FORCE_WC_ORDER;
......@@ -1220,8 +1306,20 @@ static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
else
ipath_cdbg(PROC, "Not Intel processor, WC ordered\n");
kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE;
if (pd == NULL)
goto done;
dd = pd->port_dd;
if (dd != NULL && dd->ipath_minrev >= 2) {
ipath_cdbg(PROC, "IBA6120 Rev2, allow multiple PBC write\n");
kinfo->spi_runtime_flags |= IPATH_RUNTIME_PBC_REWRITE;
ipath_cdbg(PROC, "IBA6120 Rev2, allow loose DMA alignment\n");
kinfo->spi_runtime_flags |= IPATH_RUNTIME_LOOSE_DMA_ALIGN;
}
done:
kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE;
return 0;
}
......@@ -1244,6 +1342,9 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *dd)
dd->ipath_f_quiet_serdes = ipath_pe_quiet_serdes;
dd->ipath_f_bringup_serdes = ipath_pe_bringup_serdes;
dd->ipath_f_clear_tids = ipath_pe_clear_tids;
if (dd->ipath_minrev >= 2)
dd->ipath_f_put_tid = ipath_pe_put_tid_2;
else
dd->ipath_f_put_tid = ipath_pe_put_tid;
dd->ipath_f_cleanup = ipath_setup_pe_cleanup;
dd->ipath_f_setextled = ipath_setup_pe_setextled;
......@@ -1259,6 +1360,6 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *dd)
dd->ipath_kregs = &ipath_pe_kregs;
dd->ipath_cregs = &ipath_pe_cregs;
ipath_init_pe_variables();
ipath_init_pe_variables(dd);
}
......@@ -88,13 +88,13 @@ MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver");
static int create_port0_egr(struct ipath_devdata *dd)
{
unsigned e, egrcnt;
struct sk_buff **skbs;
struct ipath_skbinfo *skbinfo;
int ret;
egrcnt = dd->ipath_rcvegrcnt;
skbs = vmalloc(sizeof(*dd->ipath_port0_skbs) * egrcnt);
if (skbs == NULL) {
skbinfo = vmalloc(sizeof(*dd->ipath_port0_skbinfo) * egrcnt);
if (skbinfo == NULL) {
ipath_dev_err(dd, "allocation error for eager TID "
"skb array\n");
ret = -ENOMEM;
......@@ -109,13 +109,13 @@ static int create_port0_egr(struct ipath_devdata *dd)
* 4 bytes so that the data buffer stays word aligned.
* See ipath_kreceive() for more details.
*/
skbs[e] = ipath_alloc_skb(dd, GFP_KERNEL);
if (!skbs[e]) {
skbinfo[e].skb = ipath_alloc_skb(dd, GFP_KERNEL);
if (!skbinfo[e].skb) {
ipath_dev_err(dd, "SKB allocation error for "
"eager TID %u\n", e);
while (e != 0)
dev_kfree_skb(skbs[--e]);
vfree(skbs);
dev_kfree_skb(skbinfo[--e].skb);
vfree(skbinfo);
ret = -ENOMEM;
goto bail;
}
......@@ -124,14 +124,17 @@ static int create_port0_egr(struct ipath_devdata *dd)
* After loop above, so we can test non-NULL to see if ready
* to use at receive, etc.
*/
dd->ipath_port0_skbs = skbs;
dd->ipath_port0_skbinfo = skbinfo;
for (e = 0; e < egrcnt; e++) {
unsigned long phys =
virt_to_phys(dd->ipath_port0_skbs[e]->data);
dd->ipath_port0_skbinfo[e].phys =
ipath_map_single(dd->pcidev,
dd->ipath_port0_skbinfo[e].skb->data,
dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE);
dd->ipath_f_put_tid(dd, e + (u64 __iomem *)
((char __iomem *) dd->ipath_kregbase +
dd->ipath_rcvegrbase), 0, phys);
dd->ipath_rcvegrbase), 0,
dd->ipath_port0_skbinfo[e].phys);
}
ret = 0;
......@@ -432,16 +435,33 @@ static int init_pioavailregs(struct ipath_devdata *dd)
*/
static void init_shadow_tids(struct ipath_devdata *dd)
{
dd->ipath_pageshadow = (struct page **)
vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
struct page **pages;
dma_addr_t *addrs;
pages = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
sizeof(struct page *));
if (!dd->ipath_pageshadow)
if (!pages) {
ipath_dev_err(dd, "failed to allocate shadow page * "
"array, no expected sends!\n");
else
memset(dd->ipath_pageshadow, 0,
dd->ipath_cfgports * dd->ipath_rcvtidcnt *
dd->ipath_pageshadow = NULL;
return;
}
addrs = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
sizeof(dma_addr_t));
if (!addrs) {
ipath_dev_err(dd, "failed to allocate shadow dma handle "
"array, no expected sends!\n");
vfree(dd->ipath_pageshadow);
dd->ipath_pageshadow = NULL;
return;
}
memset(pages, 0, dd->ipath_cfgports * dd->ipath_rcvtidcnt *
sizeof(struct page *));
dd->ipath_pageshadow = pages;
dd->ipath_physshadow = addrs;
}
static void enable_chip(struct ipath_devdata *dd,
......
......@@ -37,6 +37,50 @@
#include "ipath_verbs.h"
#include "ipath_common.h"
/*
* Called when we might have an error that is specific to a particular
* PIO buffer, and may need to cancel that buffer, so it can be re-used.
*/
void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
{
u32 piobcnt;
unsigned long sbuf[4];
/*
* it's possible that sendbuffererror could have bits set; might
* have already done this as a result of hardware error handling
*/
piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
/* read these before writing errorclear */
sbuf[0] = ipath_read_kreg64(
dd, dd->ipath_kregs->kr_sendbuffererror);
sbuf[1] = ipath_read_kreg64(
dd, dd->ipath_kregs->kr_sendbuffererror + 1);
if (piobcnt > 128) {
sbuf[2] = ipath_read_kreg64(
dd, dd->ipath_kregs->kr_sendbuffererror + 2);
sbuf[3] = ipath_read_kreg64(
dd, dd->ipath_kregs->kr_sendbuffererror + 3);
}
if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
int i;
if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG)) {
__IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
"SendbufErrs %lx %lx", sbuf[0],
sbuf[1]);
if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
printk(" %lx %lx ", sbuf[2], sbuf[3]);
printk("\n");
}
for (i = 0; i < piobcnt; i++)
if (test_bit(i, sbuf))
ipath_disarm_piobufs(dd, i, 1);
dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */
}
}
/* These are all rcv-related errors which we want to count for stats */
#define E_SUM_PKTERRS \
(INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \
......@@ -68,53 +112,9 @@
static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
{
unsigned long sbuf[4];
u64 ignore_this_time = 0;
u32 piobcnt;
/* if possible that sendbuffererror could be valid */
piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
/* read these before writing errorclear */
sbuf[0] = ipath_read_kreg64(
dd, dd->ipath_kregs->kr_sendbuffererror);
sbuf[1] = ipath_read_kreg64(
dd, dd->ipath_kregs->kr_sendbuffererror + 1);
if (piobcnt > 128) {
sbuf[2] = ipath_read_kreg64(
dd, dd->ipath_kregs->kr_sendbuffererror + 2);
sbuf[3] = ipath_read_kreg64(
dd, dd->ipath_kregs->kr_sendbuffererror + 3);
}
if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
int i;
ipath_cdbg(PKT, "SendbufErrs %lx %lx ", sbuf[0], sbuf[1]);
if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
printk("%lx %lx ", sbuf[2], sbuf[3]);
for (i = 0; i < piobcnt; i++) {
if (test_bit(i, sbuf)) {
u32 __iomem *piobuf;
if (i < dd->ipath_piobcnt2k)
piobuf = (u32 __iomem *)
(dd->ipath_pio2kbase +
i * dd->ipath_palign);
else
piobuf = (u32 __iomem *)
(dd->ipath_pio4kbase +
(i - dd->ipath_piobcnt2k) *
dd->ipath_4kalign);
ipath_cdbg(PKT,
"PIObuf[%u] @%p pbc is %x; ",
i, piobuf, readl(piobuf));
ipath_disarm_piobufs(dd, i, 1);
}
}
if (ipath_debug & __IPATH_PKTDBG)
printk("\n");
}
ipath_disarm_senderrbufs(dd);
if ((errs & E_SUM_LINK_PKTERRS) &&
!(dd->ipath_flags & IPATH_LINKACTIVE)) {
/*
......@@ -132,6 +132,82 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
return ignore_this_time;
}
/* generic hw error messages... */
#define INFINIPATH_HWE_TXEMEMPARITYERR_MSG(a) \
{ \
.mask = ( INFINIPATH_HWE_TXEMEMPARITYERR_##a << \
INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT ), \
.msg = "TXE " #a " Memory Parity" \
}
#define INFINIPATH_HWE_RXEMEMPARITYERR_MSG(a) \
{ \
.mask = ( INFINIPATH_HWE_RXEMEMPARITYERR_##a << \
INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT ), \
.msg = "RXE " #a " Memory Parity" \
}
static const struct ipath_hwerror_msgs ipath_generic_hwerror_msgs[] = {
INFINIPATH_HWE_MSG(IBCBUSFRSPCPARITYERR, "IPATH2IB Parity"),
INFINIPATH_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2IPATH Parity"),
INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOBUF),
INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOPBC),
INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOLAUNCHFIFO),
INFINIPATH_HWE_RXEMEMPARITYERR_MSG(RCVBUF),
INFINIPATH_HWE_RXEMEMPARITYERR_MSG(LOOKUPQ),
INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EAGERTID),
INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EXPTID),
INFINIPATH_HWE_RXEMEMPARITYERR_MSG(FLAGBUF),
INFINIPATH_HWE_RXEMEMPARITYERR_MSG(DATAINFO),
INFINIPATH_HWE_RXEMEMPARITYERR_MSG(HDRINFO),
};
/**
* ipath_format_hwmsg - format a single hwerror message
* @msg message buffer
* @msgl length of message buffer
* @hwmsg message to add to message buffer
*/
static void ipath_format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
{
strlcat(msg, "[", msgl);
strlcat(msg, hwmsg, msgl);
strlcat(msg, "]", msgl);
}
/**
* ipath_format_hwerrors - format hardware error messages for display
* @hwerrs hardware errors bit vector
* @hwerrmsgs hardware error descriptions
* @nhwerrmsgs number of hwerrmsgs
* @msg message buffer
* @msgl message buffer length
*/
void ipath_format_hwerrors(u64 hwerrs,
const struct ipath_hwerror_msgs *hwerrmsgs,
size_t nhwerrmsgs,
char *msg, size_t msgl)
{
int i;
const int glen =
sizeof(ipath_generic_hwerror_msgs) /
sizeof(ipath_generic_hwerror_msgs[0]);
for (i=0; i<glen; i++) {
if (hwerrs & ipath_generic_hwerror_msgs[i].mask) {
ipath_format_hwmsg(msg, msgl,
ipath_generic_hwerror_msgs[i].msg);
}
}
for (i=0; i<nhwerrmsgs; i++) {
if (hwerrs & hwerrmsgs[i].mask) {
ipath_format_hwmsg(msg, msgl, hwerrmsgs[i].msg);
}
}
}
/* return the strings for the most common link states */
static char *ib_linkstate(u32 linkstate)
{
......@@ -404,10 +480,10 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
}
if (!noprint && (errs & ~infinipath_e_bitsextant))
if (!noprint && (errs & ~dd->ipath_e_bitsextant))
ipath_dev_err(dd, "error interrupt with unknown errors "
"%llx set\n", (unsigned long long)
(errs & ~infinipath_e_bitsextant));
(errs & ~dd->ipath_e_bitsextant));
if (errs & E_SUM_ERRS)
ignore_this_time = handle_e_sum_errs(dd, errs);
......@@ -478,6 +554,14 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
~(INFINIPATH_E_HARDWARE |
INFINIPATH_E_IBSTATUSCHANGED);
}
/* likely due to cancel, so suppress */
if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
dd->ipath_lastcancel > jiffies) {
ipath_dbg("Suppressed armlaunch/spktlen after error send cancel\n");
errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN);
}
if (!errs)
return 0;
......@@ -529,7 +613,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
* don't report same point multiple times,
* except kernel
*/
tl = (u32) * pd->port_rcvhdrtail_kvaddr;
tl = *(u64 *) pd->port_rcvhdrtail_kvaddr;
if (tl == dd->ipath_lastrcvhdrqtails[i])
continue;
hd = ipath_read_ureg32(dd, ur_rcvhdrhead,
......@@ -729,9 +813,9 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat)
int rcvdint = 0;
portr = ((istat >> INFINIPATH_I_RCVAVAIL_SHIFT) &
infinipath_i_rcvavail_mask)
dd->ipath_i_rcvavail_mask)
| ((istat >> INFINIPATH_I_RCVURG_SHIFT) &
infinipath_i_rcvurg_mask);
dd->ipath_i_rcvurg_mask);
for (i = 1; i < dd->ipath_cfgports; i++) {
struct ipath_portdata *pd = dd->ipath_pd[i];
if (portr & (1 << i) && pd && pd->port_cnt &&
......@@ -808,7 +892,7 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
if (oldhead != curtail) {
if (dd->ipath_flags & IPATH_GPIO_INTR) {
ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
(u64) (1 << 2));
(u64) (1 << IPATH_GPIO_PORT0_BIT));
istat = port0rbits | INFINIPATH_I_GPIO;
}
else
......@@ -838,10 +922,10 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
if (unexpected)
unexpected = 0;
if (unlikely(istat & ~infinipath_i_bitsextant))
if (unlikely(istat & ~dd->ipath_i_bitsextant))
ipath_dev_err(dd,
"interrupt with unknown interrupts %x set\n",
istat & (u32) ~ infinipath_i_bitsextant);
istat & (u32) ~ dd->ipath_i_bitsextant);
else
ipath_cdbg(VERBOSE, "intr stat=0x%x\n", istat);
......@@ -867,26 +951,80 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
if (istat & INFINIPATH_I_GPIO) {
/*
* Packets are available in the port 0 rcv queue.
* Eventually this needs to be generalized to check
* IPATH_GPIO_INTR, and the specific GPIO bit, if
* GPIO interrupts are used for anything else.
* GPIO interrupts fall in two broad classes:
* GPIO_2 indicates (on some HT4xx boards) that a packet
* has arrived for Port 0. Checking for this
* is controlled by flag IPATH_GPIO_INTR.
* GPIO_3..5 on IBA6120 Rev2 chips indicate errors
* that we need to count. Checking for this
* is controlled by flag IPATH_GPIO_ERRINTRS.
*/
if (unlikely(!(dd->ipath_flags & IPATH_GPIO_INTR))) {
u32 gpiostatus;
u32 to_clear = 0;
gpiostatus = ipath_read_kreg32(
dd, dd->ipath_kregs->kr_gpio_status);
ipath_dbg("Unexpected GPIO interrupt bits %x\n",
gpiostatus);
ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
gpiostatus);
/* First the error-counter case.
*/
if ((gpiostatus & IPATH_GPIO_ERRINTR_MASK) &&
(dd->ipath_flags & IPATH_GPIO_ERRINTRS)) {
/* want to clear the bits we see asserted. */
to_clear |= (gpiostatus & IPATH_GPIO_ERRINTR_MASK);
/*
* Count appropriately, clear bits out of our copy,
* as they have been "handled".
*/
if (gpiostatus & (1 << IPATH_GPIO_RXUVL_BIT)) {
ipath_dbg("FlowCtl on UnsupVL\n");
dd->ipath_rxfc_unsupvl_errs++;
}
else {
/* Clear GPIO status bit 2 */
ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
(u64) (1 << 2));
if (gpiostatus & (1 << IPATH_GPIO_OVRUN_BIT)) {
ipath_dbg("Overrun Threshold exceeded\n");
dd->ipath_overrun_thresh_errs++;
}
if (gpiostatus & (1 << IPATH_GPIO_LLI_BIT)) {
ipath_dbg("Local Link Integrity error\n");
dd->ipath_lli_errs++;
}
gpiostatus &= ~IPATH_GPIO_ERRINTR_MASK;
}
/* Now the Port0 Receive case */
if ((gpiostatus & (1 << IPATH_GPIO_PORT0_BIT)) &&
(dd->ipath_flags & IPATH_GPIO_INTR)) {
/*
* GPIO status bit 2 is set, and we expected it.
* clear it and indicate in p0bits.
* This probably only happens if a Port0 pkt
* arrives at _just_ the wrong time, and we
* handle that by seting chk0rcv;
*/
to_clear |= (1 << IPATH_GPIO_PORT0_BIT);
gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT);
chk0rcv = 1;
}
if (unlikely(gpiostatus)) {
/*
* Some unexpected bits remain. If they could have
* caused the interrupt, complain and clear.
* MEA: this is almost certainly non-ideal.
* we should look into auto-disable of unexpected
* GPIO interrupts, possibly on a "three strikes"
* basis.
*/
u32 mask;
mask = ipath_read_kreg32(
dd, dd->ipath_kregs->kr_gpio_mask);
if (mask & gpiostatus) {
ipath_dbg("Unexpected GPIO IRQ bits %x\n",
gpiostatus & mask);
to_clear |= (gpiostatus & mask);
}
}
if (to_clear) {
ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
(u64) to_clear);
}
}
chk0rcv |= istat & port0rbits;
......@@ -911,9 +1049,9 @@ irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
istat &= ~port0rbits;
}
if (istat & ((infinipath_i_rcvavail_mask <<
if (istat & ((dd->ipath_i_rcvavail_mask <<
INFINIPATH_I_RCVAVAIL_SHIFT)
| (infinipath_i_rcvurg_mask <<
| (dd->ipath_i_rcvurg_mask <<
INFINIPATH_I_RCVURG_SHIFT)))
handle_urcv(dd, istat);
......
......@@ -39,6 +39,8 @@
*/
#include <linux/interrupt.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
#include <asm/io.h>
#include "ipath_common.h"
......@@ -62,7 +64,7 @@ struct ipath_portdata {
/* rcvhdrq base, needs mmap before useful */
void *port_rcvhdrq;
/* kernel virtual address where hdrqtail is updated */
volatile __le64 *port_rcvhdrtail_kvaddr;
void *port_rcvhdrtail_kvaddr;
/*
* temp buffer for expected send setup, allocated at open, instead
* of each setup call
......@@ -79,8 +81,8 @@ struct ipath_portdata {
dma_addr_t port_rcvhdrq_phys;
dma_addr_t port_rcvhdrqtailaddr_phys;
/*
* number of opens on this instance (0 or 1; ignoring forks, dup,
* etc. for now)
* number of opens (including slave subports) on this instance
* (ignoring forks, dup, etc. for now)
*/
int port_cnt;
/*
......@@ -89,6 +91,10 @@ struct ipath_portdata {
*/
/* instead of calculating it */
unsigned port_port;
/* non-zero if port is being shared. */
u16 port_subport_cnt;
/* non-zero if port is being shared. */
u16 port_subport_id;
/* chip offset of PIO buffers for this port */
u32 port_piobufs;
/* how many alloc_pages() chunks in port_rcvegrbuf_pages */
......@@ -121,6 +127,16 @@ struct ipath_portdata {
u16 port_pkeys[4];
/* so file ops can get at unit */
struct ipath_devdata *port_dd;
/* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
void *subport_uregbase;
/* An array of pages for the eager receive buffers * N */
void *subport_rcvegrbuf;
/* An array of pages for the eager header queue entries * N */
void *subport_rcvhdr_base;
/* The version of the library which opened this port */
u32 userversion;
/* Bitmask of active slaves */
u32 active_slaves;
};
struct sk_buff;
......@@ -132,6 +148,11 @@ struct _ipath_layer {
void *l_arg;
};
struct ipath_skbinfo {
struct sk_buff *skb;
dma_addr_t phys;
};
struct ipath_devdata {
struct list_head ipath_list;
......@@ -154,7 +175,7 @@ struct ipath_devdata {
/* ipath_cfgports pointers */
struct ipath_portdata **ipath_pd;
/* sk_buffs used by port 0 eager receive queue */
struct sk_buff **ipath_port0_skbs;
struct ipath_skbinfo *ipath_port0_skbinfo;
/* kvirt address of 1st 2k pio buffer */
void __iomem *ipath_pio2kbase;
/* kvirt address of 1st 4k pio buffer */
......@@ -315,12 +336,16 @@ struct ipath_devdata {
u8 ipath_ht_slave_off;
/* for write combining settings */
unsigned long ipath_wc_cookie;
unsigned long ipath_wc_base;
unsigned long ipath_wc_len;
/* ref count for each pkey */
atomic_t ipath_pkeyrefs[4];
/* shadow copy of all exptids physaddr; used only by funcsim */
u64 *ipath_tidsimshadow;
/* shadow copy of struct page *'s for exp tid pages */
struct page **ipath_pageshadow;
/* shadow copy of dma handles for exp tid pages */
dma_addr_t *ipath_physshadow;
/* lock to workaround chip bug 9437 */
spinlock_t ipath_tid_lock;
......@@ -402,6 +427,9 @@ struct ipath_devdata {
unsigned long ipath_rcvctrl;
/* shadow kr_sendctrl */
unsigned long ipath_sendctrl;
/* ports waiting for PIOavail intr */
unsigned long ipath_portpiowait;
unsigned long ipath_lastcancel; /* to not count armlaunch after cancel */
/* value we put in kr_rcvhdrcnt */
u32 ipath_rcvhdrcnt;
......@@ -465,8 +493,6 @@ struct ipath_devdata {
u32 ipath_htwidth;
/* HT speed (200,400,800,1000) from HT config */
u32 ipath_htspeed;
/* ports waiting for PIOavail intr */
unsigned long ipath_portpiowait;
/*
* number of sequential ibcstatus change for polling active/quiet
* (i.e., link not coming up).
......@@ -510,8 +536,47 @@ struct ipath_devdata {
u32 ipath_lli_counter;
/* local link integrity errors */
u32 ipath_lli_errors;
/*
* Above counts only cases where _successive_ LocalLinkIntegrity
* errors were seen in the receive headers of kern-packets.
* Below are the three (monotonically increasing) counters
* maintained via GPIO interrupts on iba6120-rev2.
*/
u32 ipath_rxfc_unsupvl_errs;
u32 ipath_overrun_thresh_errs;
u32 ipath_lli_errs;
/*
* Not all devices managed by a driver instance are the same
* type, so these fields must be per-device.
*/
u64 ipath_i_bitsextant;
ipath_err_t ipath_e_bitsextant;
ipath_err_t ipath_hwe_bitsextant;
/*
* Below should be computable from number of ports,
* since they are never modified.
*/
u32 ipath_i_rcvavail_mask;
u32 ipath_i_rcvurg_mask;
/*
* Register bits for selecting i2c direction and values, used for
* I2C serial flash.
*/
u16 ipath_gpio_sda_num;
u16 ipath_gpio_scl_num;
u64 ipath_gpio_sda;
u64 ipath_gpio_scl;
};
/* Private data for file operations */
struct ipath_filedata {
struct ipath_portdata *pd;
unsigned subport;
unsigned tidcursor;
};
extern struct list_head ipath_dev_list;
extern spinlock_t ipath_devs_lock;
extern struct ipath_devdata *ipath_lookup(int unit);
......@@ -521,6 +586,7 @@ int ipath_enable_wc(struct ipath_devdata *dd);
void ipath_disable_wc(struct ipath_devdata *dd);
int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp);
void ipath_shutdown_device(struct ipath_devdata *);
void ipath_disarm_senderrbufs(struct ipath_devdata *);
struct file_operations;
int ipath_cdev_init(int minor, char *name, struct file_operations *fops,
......@@ -572,7 +638,11 @@ int ipath_set_lid(struct ipath_devdata *, u32, u8);
int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
/* for use in system calls, where we want to know device type, etc. */
#define port_fp(fp) ((struct ipath_portdata *) (fp)->private_data)
#define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd
#define subport_fp(fp) \
((struct ipath_filedata *)(fp)->private_data)->subport
#define tidcursor_fp(fp) \
((struct ipath_filedata *)(fp)->private_data)->tidcursor
/*
* values for ipath_flags
......@@ -612,6 +682,15 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
/* can miss port0 rx interrupts */
#define IPATH_POLL_RX_INTR 0x40000
#define IPATH_DISABLED 0x80000 /* administratively disabled */
/* Use GPIO interrupts for new counters */
#define IPATH_GPIO_ERRINTRS 0x100000
/* Bits in GPIO for the added interrupts */
#define IPATH_GPIO_PORT0_BIT 2
#define IPATH_GPIO_RXUVL_BIT 3
#define IPATH_GPIO_OVRUN_BIT 4
#define IPATH_GPIO_LLI_BIT 5
#define IPATH_GPIO_ERRINTR_MASK 0x38
/* portdata flag bit offsets */
/* waiting for a packet to arrive */
......@@ -798,6 +877,13 @@ void ipath_exit_ipathfs(void);
int ipathfs_add_device(struct ipath_devdata *);
int ipathfs_remove_device(struct ipath_devdata *);
/*
* dma_addr wrappers - all 0's invalid for hw
*/
dma_addr_t ipath_map_page(struct pci_dev *, struct page *, unsigned long,
size_t, int);
dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int);
/*
* Flush write combining store buffers (if present) and perform a write
* barrier.
......@@ -855,4 +941,20 @@ extern struct mutex ipath_mutex;
#endif /* _IPATH_DEBUGGING */
/*
* this is used for formatting hw error messages...
*/
struct ipath_hwerror_msgs {
u64 mask;
const char *msg;
};
#define INFINIPATH_HWE_MSG(a, b) { .mask = INFINIPATH_HWE_##a, .msg = b }
/* in ipath_intr.c... */
void ipath_format_hwerrors(u64 hwerrs,
const struct ipath_hwerror_msgs *hwerrmsgs,
size_t nhwerrmsgs,
char *msg, size_t lmsg);
#endif /* _IPATH_KERNEL_H */
......@@ -118,9 +118,10 @@ void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey)
* Check the IB SGE for validity and initialize our internal version
* of it.
*/
int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
struct ib_sge *sge, int acc)
{
struct ipath_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
struct ipath_mregion *mr;
unsigned n, m;
size_t off;
......@@ -140,7 +141,8 @@ int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
goto bail;
}
mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))];
if (unlikely(mr == NULL || mr->lkey != sge->lkey)) {
if (unlikely(mr == NULL || mr->lkey != sge->lkey ||
qp->ibqp.pd != mr->pd)) {
ret = 0;
goto bail;
}
......@@ -188,9 +190,10 @@ int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
*
* Return 1 if successful, otherwise 0.
*/
int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
u32 len, u64 vaddr, u32 rkey, int acc)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ipath_lkey_table *rkt = &dev->lk_table;
struct ipath_sge *sge = &ss->sge;
struct ipath_mregion *mr;
......@@ -214,7 +217,8 @@ int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
}
mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))];
if (unlikely(mr == NULL || mr->lkey != rkey)) {
if (unlikely(mr == NULL || mr->lkey != rkey ||
qp->ibqp.pd != mr->pd)) {
ret = 0;
goto bail;
}
......
......@@ -87,7 +87,8 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp,
struct ipath_devdata *dd = to_idev(ibdev)->dd;
u32 vendor, majrev, minrev;
if (smp->attr_mod)
/* GUID 0 is illegal */
if (smp->attr_mod || (dd->ipath_guid == 0))
smp->status |= IB_SMP_INVALID_FIELD;
nip->base_version = 1;
......@@ -131,10 +132,15 @@ static int recv_subn_get_guidinfo(struct ib_smp *smp,
* We only support one GUID for now. If this changes, the
* portinfo.guid_cap field needs to be updated too.
*/
if (startgx == 0)
/* The first is a copy of the read-only HW GUID. */
*p = to_idev(ibdev)->dd->ipath_guid;
if (startgx == 0) {
__be64 g = to_idev(ibdev)->dd->ipath_guid;
if (g == 0)
/* GUID 0 is illegal */
smp->status |= IB_SMP_INVALID_FIELD;
else
/* The first is a copy of the read-only HW GUID. */
*p = g;
} else
smp->status |= IB_SMP_INVALID_FIELD;
return reply(smp);
......
......@@ -138,6 +138,7 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
goto bail;
}
mr->mr.pd = pd;
mr->mr.user_base = *iova_start;
mr->mr.iova = *iova_start;
mr->mr.length = 0;
......@@ -197,6 +198,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
goto bail;
}
mr->mr.pd = pd;
mr->mr.user_base = region->user_base;
mr->mr.iova = region->virt_base;
mr->mr.length = region->length;
......@@ -289,6 +291,7 @@ struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
* Resources are allocated but no valid mapping (RKEY can't be
* used).
*/
fmr->mr.pd = pd;
fmr->mr.user_base = 0;
fmr->mr.iova = 0;
fmr->mr.length = 0;
......
......@@ -335,6 +335,7 @@ static void ipath_reset_qp(struct ipath_qp *qp)
qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
qp->r_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
qp->r_nak_state = 0;
qp->r_wrid_valid = 0;
qp->s_rnr_timeout = 0;
qp->s_head = 0;
qp->s_tail = 0;
......@@ -342,6 +343,7 @@ static void ipath_reset_qp(struct ipath_qp *qp)
qp->s_last = 0;
qp->s_ssn = 1;
qp->s_lsn = 0;
qp->s_wait_credit = 0;
if (qp->r_rq.wq) {
qp->r_rq.wq->head = 0;
qp->r_rq.wq->tail = 0;
......@@ -352,12 +354,13 @@ static void ipath_reset_qp(struct ipath_qp *qp)
/**
* ipath_error_qp - put a QP into an error state
* @qp: the QP to put into an error state
* @err: the receive completion error to signal if a RWQE is active
*
* Flushes both send and receive work queues.
* QP s_lock should be held and interrupts disabled.
*/
void ipath_error_qp(struct ipath_qp *qp)
void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
struct ib_wc wc;
......@@ -373,7 +376,6 @@ void ipath_error_qp(struct ipath_qp *qp)
list_del_init(&qp->piowait);
spin_unlock(&dev->pending_lock);
wc.status = IB_WC_WR_FLUSH_ERR;
wc.vendor_err = 0;
wc.byte_len = 0;
wc.imm_data = 0;
......@@ -385,6 +387,12 @@ void ipath_error_qp(struct ipath_qp *qp)
wc.sl = 0;
wc.dlid_path_bits = 0;
wc.port_num = 0;
if (qp->r_wrid_valid) {
qp->r_wrid_valid = 0;
wc.status = err;
ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
}
wc.status = IB_WC_WR_FLUSH_ERR;
while (qp->s_last != qp->s_head) {
struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
......@@ -501,7 +509,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
break;
case IB_QPS_ERR:
ipath_error_qp(qp);
ipath_error_qp(qp, IB_WC_GENERAL_ERR);
break;
default:
......@@ -516,7 +524,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
qp->remote_qpn = attr->dest_qp_num;
if (attr_mask & IB_QP_SQ_PSN) {
qp->s_next_psn = attr->sq_psn;
qp->s_psn = qp->s_next_psn = attr->sq_psn;
qp->s_last_psn = qp->s_next_psn - 1;
}
......
......@@ -201,6 +201,18 @@ int ipath_make_rc_req(struct ipath_qp *qp,
qp->s_rnr_timeout)
goto done;
/* Limit the number of packets sent without an ACK. */
if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) {
qp->s_wait_credit = 1;
dev->n_rc_stalls++;
spin_lock(&dev->pending_lock);
if (list_empty(&qp->timerwait))
list_add_tail(&qp->timerwait,
&dev->pending[dev->pending_index]);
spin_unlock(&dev->pending_lock);
goto done;
}
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
hwords = 5;
bth0 = 0;
......@@ -221,7 +233,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
/* Check if send work queue is empty. */
if (qp->s_tail == qp->s_head)
goto done;
qp->s_psn = wqe->psn = qp->s_next_psn;
wqe->psn = qp->s_next_psn;
newreq = 1;
}
/*
......@@ -393,12 +405,6 @@ int ipath_make_rc_req(struct ipath_qp *qp,
ss = &qp->s_sge;
len = qp->s_len;
if (len > pmtu) {
/*
* Request an ACK every 1/2 MB to avoid retransmit
* timeouts.
*/
if (((wqe->length - len) % (512 * 1024)) == 0)
bth2 |= 1 << 31;
len = pmtu;
break;
}
......@@ -435,12 +441,6 @@ int ipath_make_rc_req(struct ipath_qp *qp,
ss = &qp->s_sge;
len = qp->s_len;
if (len > pmtu) {
/*
* Request an ACK every 1/2 MB to avoid retransmit
* timeouts.
*/
if (((wqe->length - len) % (512 * 1024)) == 0)
bth2 |= 1 << 31;
len = pmtu;
break;
}
......@@ -498,6 +498,8 @@ int ipath_make_rc_req(struct ipath_qp *qp,
*/
goto done;
}
if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)
bth2 |= 1 << 31; /* Request ACK. */
qp->s_len -= len;
qp->s_hdrwords = hwords;
qp->s_cur_sge = ss;
......@@ -737,6 +739,15 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
return;
}
static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
{
if (qp->s_wait_credit) {
qp->s_wait_credit = 0;
tasklet_hi_schedule(&qp->s_task);
}
qp->s_last_psn = psn;
}
/**
* do_rc_ack - process an incoming RC ACK
* @qp: the QP the ACK came in on
......@@ -805,7 +816,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
* The last valid PSN seen is the previous
* request's.
*/
qp->s_last_psn = wqe->psn - 1;
update_last_psn(qp, wqe->psn - 1);
/* Retry this request. */
ipath_restart_rc(qp, wqe->psn, &wc);
/*
......@@ -864,7 +875,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
ipath_get_credit(qp, aeth);
qp->s_rnr_retry = qp->s_rnr_retry_cnt;
qp->s_retry = qp->s_retry_cnt;
qp->s_last_psn = psn;
update_last_psn(qp, psn);
ret = 1;
goto bail;
......@@ -883,7 +894,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
goto bail;
/* The last valid PSN is the previous PSN. */
qp->s_last_psn = psn - 1;
update_last_psn(qp, psn - 1);
dev->n_rc_resends += (int)qp->s_psn - (int)psn;
......@@ -898,7 +909,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
case 3: /* NAK */
/* The last valid PSN seen is the previous request's. */
if (qp->s_last != qp->s_tail)
qp->s_last_psn = wqe->psn - 1;
update_last_psn(qp, wqe->psn - 1);
switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) &
IPATH_AETH_CREDIT_MASK) {
case 0: /* PSN sequence error */
......@@ -1071,7 +1082,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
* since we don't want s_sge modified.
*/
qp->s_len -= pmtu;
qp->s_last_psn = psn;
update_last_psn(qp, psn);
spin_unlock_irqrestore(&qp->s_lock, flags);
ipath_copy_sge(&qp->s_sge, data, pmtu);
goto bail;
......@@ -1223,7 +1234,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
* Address range must be a subset of the original
* request and start on pmtu boundaries.
*/
ok = ipath_rkey_ok(dev, &qp->s_rdma_sge,
ok = ipath_rkey_ok(qp, &qp->s_rdma_sge,
qp->s_rdma_len, vaddr, rkey,
IB_ACCESS_REMOTE_READ);
if (unlikely(!ok)) {
......@@ -1282,6 +1293,14 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
return 1;
}
static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
{
spin_lock_irq(&qp->s_lock);
qp->state = IB_QPS_ERR;
ipath_error_qp(qp, err);
spin_unlock_irq(&qp->s_lock);
}
/**
* ipath_rc_rcv - process an incoming RC packet
* @dev: the device this packet came in on
......@@ -1309,6 +1328,10 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
struct ib_reth *reth;
int header_in_data;
/* Validate the SLID. See Ch. 9.6.1.5 */
if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
goto done;
/* Check for GRH */
if (!has_grh) {
ohdr = &hdr->u.oth;
......@@ -1370,8 +1393,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
*/
if (qp->r_ack_state >= OP(COMPARE_SWAP))
goto send_ack;
/* XXX Flush WQEs */
qp->state = IB_QPS_ERR;
ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR);
qp->r_ack_state = OP(SEND_ONLY);
qp->r_nak_state = IB_NAK_INVALID_REQUEST;
qp->r_ack_psn = qp->r_psn;
......@@ -1477,9 +1499,9 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
goto nack_inv;
ipath_copy_sge(&qp->r_sge, data, tlen);
qp->r_msn++;
if (opcode == OP(RDMA_WRITE_LAST) ||
opcode == OP(RDMA_WRITE_ONLY))
if (!qp->r_wrid_valid)
break;
qp->r_wrid_valid = 0;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.opcode = IB_WC_RECV;
......@@ -1517,7 +1539,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int ok;
/* Check rkey & NAK */
ok = ipath_rkey_ok(dev, &qp->r_sge,
ok = ipath_rkey_ok(qp, &qp->r_sge,
qp->r_len, vaddr, rkey,
IB_ACCESS_REMOTE_WRITE);
if (unlikely(!ok))
......@@ -1559,7 +1581,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int ok;
/* Check rkey & NAK */
ok = ipath_rkey_ok(dev, &qp->s_rdma_sge,
ok = ipath_rkey_ok(qp, &qp->s_rdma_sge,
qp->s_rdma_len, vaddr, rkey,
IB_ACCESS_REMOTE_READ);
if (unlikely(!ok)) {
......@@ -1618,7 +1640,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
goto nack_inv;
rkey = be32_to_cpu(ateth->rkey);
/* Check rkey & NAK */
if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge,
if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge,
sizeof(u64), vaddr, rkey,
IB_ACCESS_REMOTE_ATOMIC)))
goto nack_acc;
......@@ -1670,8 +1692,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
* is pending though.
*/
if (qp->r_ack_state < OP(COMPARE_SWAP)) {
/* XXX Flush WQEs */
qp->state = IB_QPS_ERR;
ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR);
qp->r_ack_state = OP(RDMA_WRITE_ONLY);
qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
qp->r_ack_psn = qp->r_psn;
......
......@@ -134,10 +134,24 @@
#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40
#define INFINIPATH_HWE_RXEMEMPARITYERR_MASK 0x7FULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT 44
#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL
#define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL
#define INFINIPATH_HWE_IBCBUSTOSPCPARITYERR 0x4000000000000000ULL
#define INFINIPATH_HWE_IBCBUSFRSPCPARITYERR 0x8000000000000000ULL
/* txe mem parity errors (shift by INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) */
#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF 0x1ULL
#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC 0x2ULL
#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOLAUNCHFIFO 0x4ULL
/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */
#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF 0x01ULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ 0x02ULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x04ULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x08ULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF 0x10ULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL
#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO 0x40ULL
/* waldo specific -- find the rest in ipath_6110.c */
#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL
/* monty specific -- find the rest in ipath_6120.c */
#define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL
/* kr_hwdiagctrl bits */
#define INFINIPATH_DC_FORCETXEMEMPARITYERR_MASK 0xFULL
......@@ -209,9 +223,9 @@
/* combination link status states that we use with some frequency */
#define IPATH_IBSTATE_MASK ((INFINIPATH_IBCS_LINKTRAININGSTATE_MASK \
<< INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
<< INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | \
(INFINIPATH_IBCS_LINKSTATE_MASK \
<<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
<<INFINIPATH_IBCS_LINKSTATE_SHIFT))
#define IPATH_IBSTATE_INIT ((INFINIPATH_IBCS_L_STATE_INIT \
<< INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
(INFINIPATH_IBCS_LT_STATE_LINKUP \
......@@ -302,6 +316,17 @@
typedef u64 ipath_err_t;
/* The following change with the type of device, so
* need to be part of the ipath_devdata struct, or
* we could have problems plugging in devices of
* different types (e.g. one HT, one PCIE)
* in one system, to be managed by one driver.
* On the other hand, this file is may also be included
* by other code, so leave the declarations here
* temporarily. Minor footprint issue if common-model
* linker used, none if C89+ linker used.
*/
/* mask of defined bits for various registers */
extern u64 infinipath_i_bitsextant;
extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
......@@ -309,13 +334,6 @@ extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
/* masks that are different in various chips, or only exist in some chips */
extern u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
/*
* register bits for selecting i2c direction and values, used for I2C serial
* flash
*/
extern u16 ipath_gpio_sda_num, ipath_gpio_scl_num;
extern u64 ipath_gpio_sda, ipath_gpio_scl;
/*
* These are the infinipath general register numbers (not offsets).
* The kernel registers are used directly, those beyond the kernel
......
......@@ -108,7 +108,6 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
int user = to_ipd(qp->ibqp.pd)->user;
int i, j, ret;
struct ib_wc wc;
......@@ -119,8 +118,7 @@ static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe)
continue;
/* Check LKEY */
if ((user && wqe->sg_list[i].lkey == 0) ||
!ipath_lkey_ok(&dev->lk_table,
&qp->r_sg_list[j], &wqe->sg_list[i],
!ipath_lkey_ok(qp, &qp->r_sg_list[j], &wqe->sg_list[i],
IB_ACCESS_LOCAL_WRITE))
goto bad_lkey;
qp->r_len += wqe->sg_list[i].length;
......@@ -231,6 +229,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
}
}
spin_unlock_irqrestore(&rq->lock, flags);
qp->r_wrid_valid = 1;
bail:
return ret;
......@@ -326,7 +325,7 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
case IB_WR_RDMA_WRITE:
if (wqe->length == 0)
break;
if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, wqe->length,
if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
IB_ACCESS_REMOTE_WRITE))) {
......@@ -350,7 +349,7 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
break;
case IB_WR_RDMA_READ:
if (unlikely(!ipath_rkey_ok(dev, &sqp->s_sge, wqe->length,
if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
IB_ACCESS_REMOTE_READ)))
......@@ -365,7 +364,7 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, sizeof(u64),
if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
wqe->wr.wr.rdma.remote_addr,
wqe->wr.wr.rdma.rkey,
IB_ACCESS_REMOTE_ATOMIC)))
......@@ -575,8 +574,7 @@ int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr)
}
if (wr->sg_list[i].length == 0)
continue;
if (!ipath_lkey_ok(&to_idev(qp->ibqp.device)->lk_table,
&wqe->sg_list[j], &wr->sg_list[i],
if (!ipath_lkey_ok(qp, &wqe->sg_list[j], &wr->sg_list[i],
acc)) {
spin_unlock_irqrestore(&qp->s_lock, flags);
ret = -EINVAL;
......
......@@ -104,11 +104,6 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
u32 sz;
struct ib_srq *ret;
if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
ret = ERR_PTR(-ENOMEM);
goto done;
}
if (srq_init_attr->attr.max_wr == 0) {
ret = ERR_PTR(-EINVAL);
goto done;
......@@ -180,10 +175,17 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
spin_lock_init(&srq->rq.lock);
srq->rq.wq->head = 0;
srq->rq.wq->tail = 0;
srq->rq.max_sge = srq_init_attr->attr.max_sge;
srq->limit = srq_init_attr->attr.srq_limit;
spin_lock(&dev->n_srqs_lock);
if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
spin_unlock(&dev->n_srqs_lock);
ret = ERR_PTR(-ENOMEM);
goto bail_wq;
}
dev->n_srqs_allocated++;
spin_unlock(&dev->n_srqs_lock);
ret = &srq->ibsrq;
goto done;
......@@ -351,7 +353,12 @@ int ipath_destroy_srq(struct ib_srq *ibsrq)
struct ipath_srq *srq = to_isrq(ibsrq);
struct ipath_ibdev *dev = to_idev(ibsrq->device);
spin_lock(&dev->n_srqs_lock);
dev->n_srqs_allocated--;
spin_unlock(&dev->n_srqs_lock);
if (srq->ip)
kref_put(&srq->ip->ref, ipath_release_mmap_info);
else
vfree(srq->rq.wq);
kfree(srq);
......
......@@ -257,7 +257,7 @@ static ssize_t store_guid(struct device *dev,
struct ipath_devdata *dd = dev_get_drvdata(dev);
ssize_t ret;
unsigned short guid[8];
__be64 nguid;
__be64 new_guid;
u8 *ng;
int i;
......@@ -266,7 +266,7 @@ static ssize_t store_guid(struct device *dev,
&guid[4], &guid[5], &guid[6], &guid[7]) != 8)
goto invalid;
ng = (u8 *) &nguid;
ng = (u8 *) &new_guid;
for (i = 0; i < 8; i++) {
if (guid[i] > 0xff)
......@@ -274,7 +274,10 @@ static ssize_t store_guid(struct device *dev,
ng[i] = guid[i];
}
dd->ipath_guid = nguid;
if (new_guid == 0)
goto invalid;
dd->ipath_guid = new_guid;
dd->ipath_nguid = 1;
ret = strlen(buf);
......@@ -297,6 +300,16 @@ static ssize_t show_nguid(struct device *dev,
return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_nguid);
}
static ssize_t show_nports(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct ipath_devdata *dd = dev_get_drvdata(dev);
/* Return the number of user ports available. */
return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_cfgports - 1);
}
static ssize_t show_serial(struct device *dev,
struct device_attribute *attr,
char *buf)
......@@ -608,6 +621,7 @@ static DEVICE_ATTR(mlid, S_IWUSR | S_IRUGO, show_mlid, store_mlid);
static DEVICE_ATTR(mtu, S_IWUSR | S_IRUGO, show_mtu, store_mtu);
static DEVICE_ATTR(enabled, S_IWUSR | S_IRUGO, show_enabled, store_enabled);
static DEVICE_ATTR(nguid, S_IRUGO, show_nguid, NULL);
static DEVICE_ATTR(nports, S_IRUGO, show_nports, NULL);
static DEVICE_ATTR(reset, S_IWUSR, NULL, store_reset);
static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
......@@ -623,6 +637,7 @@ static struct attribute *dev_attributes[] = {
&dev_attr_mlid.attr,
&dev_attr_mtu.attr,
&dev_attr_nguid.attr,
&dev_attr_nports.attr,
&dev_attr_serial.attr,
&dev_attr_status.attr,
&dev_attr_status_str.attr,
......
......@@ -246,6 +246,10 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
struct ib_reth *reth;
int header_in_data;
/* Validate the SLID. See Ch. 9.6.1.5 */
if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
goto done;
/* Check for GRH */
if (!has_grh) {
ohdr = &hdr->u.oth;
......@@ -440,7 +444,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
int ok;
/* Check rkey */
ok = ipath_rkey_ok(dev, &qp->r_sge, qp->r_len,
ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len,
vaddr, rkey,
IB_ACCESS_REMOTE_WRITE);
if (unlikely(!ok)) {
......
......@@ -39,7 +39,6 @@
static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
u32 *lengthp, struct ipath_sge_state *ss)
{
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
int user = to_ipd(qp->ibqp.pd)->user;
int i, j, ret;
struct ib_wc wc;
......@@ -50,8 +49,7 @@ static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
continue;
/* Check LKEY */
if ((user && wqe->sg_list[i].lkey == 0) ||
!ipath_lkey_ok(&dev->lk_table,
j ? &ss->sg_list[j - 1] : &ss->sge,
!ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge,
&wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
goto bad_lkey;
*lengthp += wqe->sg_list[i].length;
......@@ -343,7 +341,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
if (wr->sg_list[i].length == 0)
continue;
if (!ipath_lkey_ok(&dev->lk_table, ss.num_sge ?
if (!ipath_lkey_ok(qp, ss.num_sge ?
sg_list + ss.num_sge - 1 : &ss.sge,
&wr->sg_list[i], 0)) {
ret = -EINVAL;
......
......@@ -89,6 +89,62 @@ static int __get_user_pages(unsigned long start_page, size_t num_pages,
return ret;
}
/**
* ipath_map_page - a safety wrapper around pci_map_page()
*
* A dma_addr of all 0's is interpreted by the chip as "disabled".
* Unfortunately, it can also be a valid dma_addr returned on some
* architectures.
*
* The powerpc iommu assigns dma_addrs in ascending order, so we don't
* have to bother with retries or mapping a dummy page to insure we
* don't just get the same mapping again.
*
* I'm sure we won't be so lucky with other iommu's, so FIXME.
*/
dma_addr_t ipath_map_page(struct pci_dev *hwdev, struct page *page,
unsigned long offset, size_t size, int direction)
{
dma_addr_t phys;
phys = pci_map_page(hwdev, page, offset, size, direction);
if (phys == 0) {
pci_unmap_page(hwdev, phys, size, direction);
phys = pci_map_page(hwdev, page, offset, size, direction);
/*
* FIXME: If we get 0 again, we should keep this page,
* map another, then free the 0 page.
*/
}
return phys;
}
/**
* ipath_map_single - a safety wrapper around pci_map_single()
*
* Same idea as ipath_map_page().
*/
dma_addr_t ipath_map_single(struct pci_dev *hwdev, void *ptr, size_t size,
int direction)
{
dma_addr_t phys;
phys = pci_map_single(hwdev, ptr, size, direction);
if (phys == 0) {
pci_unmap_single(hwdev, phys, size, direction);
phys = pci_map_single(hwdev, ptr, size, direction);
/*
* FIXME: If we get 0 again, we should keep this page,
* map another, then free the 0 page.
*/
}
return phys;
}
/**
* ipath_get_user_pages - lock user pages into memory
* @start_page: the start page
......
......@@ -898,7 +898,8 @@ int ipath_get_counters(struct ipath_devdata *dd,
ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) +
ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) +
ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) +
ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt);
ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt) +
dd->ipath_rxfc_unsupvl_errs;
cntrs->port_rcv_remphys_errors =
ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt);
cntrs->port_xmit_discards =
......@@ -911,8 +912,10 @@ int ipath_get_counters(struct ipath_devdata *dd,
ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
cntrs->port_rcv_packets =
ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
cntrs->local_link_integrity_errors = dd->ipath_lli_errors;
cntrs->excessive_buffer_overrun_errors = 0; /* XXX */
cntrs->local_link_integrity_errors =
(dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
dd->ipath_lli_errs : dd->ipath_lli_errors;
cntrs->excessive_buffer_overrun_errors = dd->ipath_overrun_thresh_errs;
ret = 0;
......@@ -1199,6 +1202,7 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
struct ipath_ah *ah;
struct ib_ah *ret;
struct ipath_ibdev *dev = to_idev(pd->device);
unsigned long flags;
/* A multicast address requires a GRH (see ch. 8.4.1). */
if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
......@@ -1225,16 +1229,16 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
goto bail;
}
spin_lock(&dev->n_ahs_lock);
spin_lock_irqsave(&dev->n_ahs_lock, flags);
if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
spin_unlock(&dev->n_ahs_lock);
spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
kfree(ah);
ret = ERR_PTR(-ENOMEM);
goto bail;
}
dev->n_ahs_allocated++;
spin_unlock(&dev->n_ahs_lock);
spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
/* ib_create_ah() will initialize ah->ibah. */
ah->attr = *ah_attr;
......@@ -1255,10 +1259,11 @@ static int ipath_destroy_ah(struct ib_ah *ibah)
{
struct ipath_ibdev *dev = to_idev(ibah->device);
struct ipath_ah *ah = to_iah(ibah);
unsigned long flags;
spin_lock(&dev->n_ahs_lock);
spin_lock_irqsave(&dev->n_ahs_lock, flags);
dev->n_ahs_allocated--;
spin_unlock(&dev->n_ahs_lock);
spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
kfree(ah);
......@@ -1380,11 +1385,13 @@ static int enable_timer(struct ipath_devdata *dd)
* processing.
*/
if (dd->ipath_flags & IPATH_GPIO_INTR) {
u64 val;
ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
0x2074076542310ULL);
/* Enable GPIO bit 2 interrupt */
ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
(u64) (1 << 2));
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask);
val |= (u64) (1 << IPATH_GPIO_PORT0_BIT);
ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val);
}
init_timer(&dd->verbs_timer);
......@@ -1399,8 +1406,17 @@ static int enable_timer(struct ipath_devdata *dd)
static int disable_timer(struct ipath_devdata *dd)
{
/* Disable GPIO bit 2 interrupt */
if (dd->ipath_flags & IPATH_GPIO_INTR)
ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0);
if (dd->ipath_flags & IPATH_GPIO_INTR) {
u64 val;
/* Disable GPIO bit 2 interrupt */
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask);
val &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val);
/*
* We might want to undo changes to debugportselect,
* but how?
*/
}
del_timer_sync(&dd->verbs_timer);
......@@ -1683,6 +1699,7 @@ static ssize_t show_stats(struct class_device *cdev, char *buf)
"RC OTH NAKs %d\n"
"RC timeouts %d\n"
"RC RDMA dup %d\n"
"RC stalls %d\n"
"piobuf wait %d\n"
"no piobuf %d\n"
"PKT drops %d\n"
......@@ -1690,7 +1707,7 @@ static ssize_t show_stats(struct class_device *cdev, char *buf)
dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
dev->n_other_naks, dev->n_timeouts,
dev->n_rdma_dup_busy, dev->n_piowait,
dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait,
dev->n_no_piobuf, dev->n_pkt_drops, dev->n_wqe_errs);
for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
const struct ipath_opcode_stats *si = &dev->opstats[i];
......
......@@ -220,6 +220,7 @@ struct ipath_segarray {
};
struct ipath_mregion {
struct ib_pd *pd; /* shares refcnt of ibmr.pd */
u64 user_base; /* User's address for this region */
u64 iova; /* IB start address of this region */
size_t length;
......@@ -364,12 +365,14 @@ struct ipath_qp {
u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
u8 r_reuse_sge; /* for UC receive errors */
u8 r_sge_inx; /* current index into sg_list */
u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */
u8 qp_access_flags;
u8 s_max_sge; /* size of s_wq->sg_list */
u8 s_retry_cnt; /* number of times to retry */
u8 s_rnr_retry_cnt;
u8 s_retry; /* requester retry counter */
u8 s_rnr_retry; /* requester RNR retry counter */
u8 s_wait_credit; /* limit number of unacked packets sent */
u8 s_pkey_index; /* PKEY index to use */
u8 timeout; /* Timeout for this QP */
enum ib_mtu path_mtu;
......@@ -393,6 +396,8 @@ struct ipath_qp {
#define IPATH_S_BUSY 0
#define IPATH_S_SIGNAL_REQ_WR 1
#define IPATH_PSN_CREDIT 2048
/*
* Since struct ipath_swqe is not a fixed size, we can't simply index into
* struct ipath_qp.s_wq. This function does the array index computation.
......@@ -521,6 +526,7 @@ struct ipath_ibdev {
u32 n_rnr_naks;
u32 n_other_naks;
u32 n_timeouts;
u32 n_rc_stalls;
u32 n_pkt_drops;
u32 n_vl15_dropped;
u32 n_wqe_errs;
......@@ -634,6 +640,8 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
int ipath_destroy_qp(struct ib_qp *ibqp);
void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err);
int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
......@@ -653,12 +661,6 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
u32 len, u64 vaddr, u32 rkey, int acc);
int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
struct ib_sge *sge, int acc);
void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length);
void ipath_skip_sge(struct ipath_sge_state *ss, u32 length);
......@@ -683,10 +685,10 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt,
void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey);
int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
struct ib_sge *sge, int acc);
int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
u32 len, u64 vaddr, u32 rkey, int acc);
int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
......
......@@ -38,13 +38,23 @@
#include "ipath_kernel.h"
/**
* ipath_unordered_wc - indicate whether write combining is ordered
* ipath_enable_wc - enable write combining for MMIO writes to the device
* @dd: infinipath device
*
* PowerPC systems (at least those in the 970 processor family)
* write partially filled store buffers in address order, but will write
* completely filled store buffers in "random" order, and therefore must
* have serialization for correctness with current InfiniPath chips.
* Nothing to do on PowerPC, so just return without error.
*/
int ipath_enable_wc(struct ipath_devdata *dd)
{
return 0;
}
/**
* ipath_unordered_wc - indicate whether write combining is unordered
*
* Because our performance depends on our ability to do write
* combining mmio writes in the most efficient way, we need to
* know if we are on a processor that may reorder stores when
* write combining.
*/
int ipath_unordered_wc(void)
{
......
......@@ -123,6 +123,8 @@ int ipath_enable_wc(struct ipath_devdata *dd)
ipath_cdbg(VERBOSE, "Set mtrr for chip to WC, "
"cookie is %d\n", cookie);
dd->ipath_wc_cookie = cookie;
dd->ipath_wc_base = (unsigned long) pioaddr;
dd->ipath_wc_len = (unsigned long) piolen;
}
}
......@@ -136,9 +138,16 @@ int ipath_enable_wc(struct ipath_devdata *dd)
void ipath_disable_wc(struct ipath_devdata *dd)
{
if (dd->ipath_wc_cookie) {
int r;
ipath_cdbg(VERBOSE, "undoing WCCOMB on pio buffers\n");
mtrr_del(dd->ipath_wc_cookie, 0, 0);
dd->ipath_wc_cookie = 0;
r = mtrr_del(dd->ipath_wc_cookie, dd->ipath_wc_base,
dd->ipath_wc_len);
if (r < 0)
dev_info(&dd->pcidev->dev,
"mtrr_del(%lx, %lx, %lx) failed: %d\n",
dd->ipath_wc_cookie, dd->ipath_wc_base,
dd->ipath_wc_len, r);
dd->ipath_wc_cookie = 0; /* even on failure */
}
}
......
config INFINIBAND_ISER
tristate "ISCSI RDMA Protocol"
tristate "iSCSI Extensions for RDMA (iSER)"
depends on INFINIBAND && SCSI && INET
select SCSI_ISCSI_ATTRS
---help---
Support for the ISCSI RDMA Protocol over InfiniBand. This
allows you to access storage devices that speak ISER/ISCSI
over InfiniBand.
Support for the iSCSI Extensions for RDMA (iSER) Protocol
over InfiniBand. This allows you to access storage devices
that speak iSCSI over iSER over InfiniBand.
The ISER protocol is defined by IETF.
See <http://www.ietf.org/>.
The iSER protocol is defined by IETF.
See <http://www.ietf.org/internet-drafts/draft-ietf-ips-iser-05.txt>
and <http://www.infinibandta.org/members/spec/iser_annex_060418.pdf>
......@@ -317,6 +317,8 @@ iscsi_iser_conn_destroy(struct iscsi_cls_conn *cls_conn)
struct iscsi_iser_conn *iser_conn = conn->dd_data;
iscsi_conn_teardown(cls_conn);
if (iser_conn->ib_conn)
iser_conn->ib_conn->iser_conn = NULL;
kfree(iser_conn);
}
......
......@@ -192,7 +192,7 @@ struct iser_regd_buf {
struct iser_dto {
struct iscsi_iser_cmd_task *ctask;
struct iscsi_iser_conn *conn;
struct iser_conn *ib_conn;
int notify_enable;
/* vector of registered buffers */
......@@ -355,4 +355,11 @@ int iser_post_send(struct iser_desc *tx_desc);
int iser_conn_state_comp(struct iser_conn *ib_conn,
enum iser_ib_conn_state comp);
int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask,
struct iser_data_buf *data,
enum iser_data_dir iser_dir,
enum dma_data_direction dma_dir);
void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask);
#endif
......@@ -66,42 +66,6 @@ static void iser_dto_add_regd_buff(struct iser_dto *dto,
dto->regd_vector_len++;
}
static int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask,
struct iser_data_buf *data,
enum iser_data_dir iser_dir,
enum dma_data_direction dma_dir)
{
struct device *dma_device;
iser_ctask->dir[iser_dir] = 1;
dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device;
data->dma_nents = dma_map_sg(dma_device, data->buf, data->size, dma_dir);
if (data->dma_nents == 0) {
iser_err("dma_map_sg failed!!!\n");
return -EINVAL;
}
return 0;
}
static void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask)
{
struct device *dma_device;
struct iser_data_buf *data;
dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device;
if (iser_ctask->dir[ISER_DIR_IN]) {
data = &iser_ctask->data[ISER_DIR_IN];
dma_unmap_sg(dma_device, data->buf, data->size, DMA_FROM_DEVICE);
}
if (iser_ctask->dir[ISER_DIR_OUT]) {
data = &iser_ctask->data[ISER_DIR_OUT];
dma_unmap_sg(dma_device, data->buf, data->size, DMA_TO_DEVICE);
}
}
/* Register user buffer memory and initialize passive rdma
* dto descriptor. Total data size is stored in
* iser_ctask->data[ISER_DIR_IN].data_len
......@@ -249,7 +213,7 @@ static int iser_post_receive_control(struct iscsi_conn *conn)
}
recv_dto = &rx_desc->dto;
recv_dto->conn = iser_conn;
recv_dto->ib_conn = iser_conn->ib_conn;
recv_dto->regd_vector_len = 0;
regd_hdr = &rx_desc->hdr_regd_buf;
......@@ -296,7 +260,7 @@ static void iser_create_send_desc(struct iscsi_iser_conn *iser_conn,
regd_hdr->virt_addr = tx_desc; /* == &tx_desc->iser_header */
regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN;
send_dto->conn = iser_conn;
send_dto->ib_conn = iser_conn->ib_conn;
send_dto->notify_enable = 1;
send_dto->regd_vector_len = 0;
......@@ -588,7 +552,7 @@ void iser_rcv_completion(struct iser_desc *rx_desc,
unsigned long dto_xfer_len)
{
struct iser_dto *dto = &rx_desc->dto;
struct iscsi_iser_conn *conn = dto->conn;
struct iscsi_iser_conn *conn = dto->ib_conn->iser_conn;
struct iscsi_session *session = conn->iscsi_conn->session;
struct iscsi_cmd_task *ctask;
struct iscsi_iser_cmd_task *iser_ctask;
......@@ -641,7 +605,8 @@ void iser_rcv_completion(struct iser_desc *rx_desc,
void iser_snd_completion(struct iser_desc *tx_desc)
{
struct iser_dto *dto = &tx_desc->dto;
struct iscsi_iser_conn *iser_conn = dto->conn;
struct iser_conn *ib_conn = dto->ib_conn;
struct iscsi_iser_conn *iser_conn = ib_conn->iser_conn;
struct iscsi_conn *conn = iser_conn->iscsi_conn;
struct iscsi_mgmt_task *mtask;
......@@ -652,7 +617,7 @@ void iser_snd_completion(struct iser_desc *tx_desc)
if (tx_desc->type == ISCSI_TX_DATAOUT)
kmem_cache_free(ig.desc_cache, tx_desc);
atomic_dec(&iser_conn->ib_conn->post_send_buf_count);
atomic_dec(&ib_conn->post_send_buf_count);
write_lock(conn->recv_lock);
if (conn->suspend_tx) {
......@@ -698,14 +663,19 @@ void iser_ctask_rdma_init(struct iscsi_iser_cmd_task *iser_ctask)
void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask)
{
int deferred;
int is_rdma_aligned = 1;
/* if we were reading, copy back to unaligned sglist,
* anyway dma_unmap and free the copy
*/
if (iser_ctask->data_copy[ISER_DIR_IN].copy_buf != NULL)
if (iser_ctask->data_copy[ISER_DIR_IN].copy_buf != NULL) {
is_rdma_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_IN);
if (iser_ctask->data_copy[ISER_DIR_OUT].copy_buf != NULL)
}
if (iser_ctask->data_copy[ISER_DIR_OUT].copy_buf != NULL) {
is_rdma_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_OUT);
}
if (iser_ctask->dir[ISER_DIR_IN]) {
deferred = iser_regd_buff_release
......@@ -725,6 +695,8 @@ void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask)
}
}
/* if the data was unaligned, it was already unmapped and then copied */
if (is_rdma_aligned)
iser_dma_unmap_task_data(iser_ctask);
}
......
......@@ -369,6 +369,44 @@ static void iser_page_vec_build(struct iser_data_buf *data,
}
}
int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask,
struct iser_data_buf *data,
enum iser_data_dir iser_dir,
enum dma_data_direction dma_dir)
{
struct device *dma_device;
iser_ctask->dir[iser_dir] = 1;
dma_device =
iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device;
data->dma_nents = dma_map_sg(dma_device, data->buf, data->size, dma_dir);
if (data->dma_nents == 0) {
iser_err("dma_map_sg failed!!!\n");
return -EINVAL;
}
return 0;
}
void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask)
{
struct device *dma_device;
struct iser_data_buf *data;
dma_device =
iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device;
if (iser_ctask->dir[ISER_DIR_IN]) {
data = &iser_ctask->data[ISER_DIR_IN];
dma_unmap_sg(dma_device, data->buf, data->size, DMA_FROM_DEVICE);
}
if (iser_ctask->dir[ISER_DIR_OUT]) {
data = &iser_ctask->data[ISER_DIR_OUT];
dma_unmap_sg(dma_device, data->buf, data->size, DMA_TO_DEVICE);
}
}
/**
* iser_reg_rdma_mem - Registers memory intended for RDMA,
* obtaining rkey and va
......@@ -394,6 +432,10 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask,
iser_err("rdma alignment violation %d/%d aligned\n",
aligned_len, mem->size);
iser_data_buf_dump(mem);
/* unmap the command data before accessing it */
iser_dma_unmap_task_data(iser_ctask);
/* allocate copy buf, if we are writing, copy the */
/* unaligned scatterlist, dma map the copy */
if (iser_start_rdma_unaligned_sg(iser_ctask, cmd_dir) != 0)
......
......@@ -571,6 +571,8 @@ void iser_conn_release(struct iser_conn *ib_conn)
/* on EVENT_ADDR_ERROR there's no device yet for this conn */
if (device != NULL)
iser_device_try_release(device);
if (ib_conn->iser_conn)
ib_conn->iser_conn->ib_conn = NULL;
kfree(ib_conn);
}
......@@ -694,7 +696,7 @@ int iser_post_recv(struct iser_desc *rx_desc)
struct iser_dto *recv_dto = &rx_desc->dto;
/* Retrieve conn */
ib_conn = recv_dto->conn->ib_conn;
ib_conn = recv_dto->ib_conn;
iser_dto_to_iov(recv_dto, iov, 2);
......@@ -727,7 +729,7 @@ int iser_post_send(struct iser_desc *tx_desc)
struct iser_conn *ib_conn;
struct iser_dto *dto = &tx_desc->dto;
ib_conn = dto->conn->ib_conn;
ib_conn = dto->ib_conn;
iser_dto_to_iov(dto, iov, MAX_REGD_BUF_VECTOR_LEN);
......@@ -774,7 +776,7 @@ static void iser_comp_error_worker(void *data)
static void iser_handle_comp_error(struct iser_desc *desc)
{
struct iser_dto *dto = &desc->dto;
struct iser_conn *ib_conn = dto->conn->ib_conn;
struct iser_conn *ib_conn = dto->ib_conn;
iser_dto_buffs_release(dto);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment