Commit 8a1f00a8 authored by Robin Holt's avatar Robin Holt Committed by Tony Luck

[IA64-SGI] Correct BTE notification timeouts on SN2.

The SN2 Block Transfer Engine occassionally fails to send a notification
that it has completed a transfer to the kernel.  This patch adds a
timeout mechanism which will detect the failure, reset the interface,
and then retry the transfer.
 
Signed-off-by: Robin Holt
Signed-off-by: default avatarTony Luck <tony.luck@intel.com>
parent 4ead42c5
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <asm/sn/pda.h> #include <asm/sn/pda.h>
#include "shubio.h" #include "shubio.h"
#include <asm/nodedata.h> #include <asm/nodedata.h>
#include <asm/delay.h>
#include <linux/bootmem.h> #include <linux/bootmem.h>
#include <linux/string.h> #include <linux/string.h>
...@@ -69,6 +70,7 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) ...@@ -69,6 +70,7 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
struct bteinfo_s *bte; struct bteinfo_s *bte;
bte_result_t bte_status; bte_result_t bte_status;
unsigned long irq_flags; unsigned long irq_flags;
unsigned long itc_end = 0;
struct bteinfo_s *btes_to_try[MAX_INTERFACES_TO_TRY]; struct bteinfo_s *btes_to_try[MAX_INTERFACES_TO_TRY];
int bte_if_index; int bte_if_index;
...@@ -107,6 +109,7 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) ...@@ -107,6 +109,7 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
} }
} }
retry_bteop:
do { do {
local_irq_save(irq_flags); local_irq_save(irq_flags);
...@@ -121,7 +124,7 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) ...@@ -121,7 +124,7 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
} }
if (spin_trylock(&bte->spinlock)) { if (spin_trylock(&bte->spinlock)) {
if ((*bte->most_rcnt_na & BTE_ACTIVE) || if (!(*bte->most_rcnt_na & BTE_WORD_AVAILABLE) ||
(BTE_LNSTAT_LOAD(bte) & BTE_ACTIVE)) { (BTE_LNSTAT_LOAD(bte) & BTE_ACTIVE)) {
/* Got the lock but BTE still busy */ /* Got the lock but BTE still busy */
spin_unlock(&bte->spinlock); spin_unlock(&bte->spinlock);
...@@ -155,7 +158,7 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) ...@@ -155,7 +158,7 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
transfer_size = ((len >> L1_CACHE_SHIFT) & BTE_LEN_MASK); transfer_size = ((len >> L1_CACHE_SHIFT) & BTE_LEN_MASK);
/* Initialize the notification to a known value. */ /* Initialize the notification to a known value. */
*bte->most_rcnt_na = -1L; *bte->most_rcnt_na = BTE_WORD_BUSY;
/* Set the status reg busy bit and transfer length */ /* Set the status reg busy bit and transfer length */
BTE_PRINTKV(("IBLS = 0x%lx\n", IBLS_BUSY | transfer_size)); BTE_PRINTKV(("IBLS = 0x%lx\n", IBLS_BUSY | transfer_size));
...@@ -177,13 +180,25 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) ...@@ -177,13 +180,25 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
BTE_PRINTK(("IBCT = 0x%lx)\n", BTE_VALID_MODE(mode))); BTE_PRINTK(("IBCT = 0x%lx)\n", BTE_VALID_MODE(mode)));
BTE_CTRL_STORE(bte, BTE_VALID_MODE(mode)); BTE_CTRL_STORE(bte, BTE_VALID_MODE(mode));
itc_end = ia64_get_itc() + (40000000 * local_cpu_data->cyc_per_usec);
spin_unlock_irqrestore(&bte->spinlock, irq_flags); spin_unlock_irqrestore(&bte->spinlock, irq_flags);
if (notification != NULL) { if (notification != NULL) {
return BTE_SUCCESS; return BTE_SUCCESS;
} }
while ((transfer_stat = *bte->most_rcnt_na) == -1UL) { while ((transfer_stat = *bte->most_rcnt_na) == BTE_WORD_BUSY) {
if (ia64_get_itc() > itc_end) {
BTE_PRINTK(("BTE timeout nasid 0x%x bte%d IBLS = 0x%lx na 0x%lx\n",
NASID_GET(bte->bte_base_addr), bte->bte_num,
BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na) );
bte->bte_error_count++;
bte->bh_error = IBLS_ERROR;
bte_error_handler((unsigned long)NODEPDA(bte->bte_cnode));
*bte->most_rcnt_na = BTE_WORD_AVAILABLE;
goto retry_bteop;
}
} }
BTE_PRINTKV((" Delay Done. IBLS = 0x%lx, most_rcnt_na = 0x%lx\n", BTE_PRINTKV((" Delay Done. IBLS = 0x%lx, most_rcnt_na = 0x%lx\n",
...@@ -191,10 +206,11 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) ...@@ -191,10 +206,11 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
if (transfer_stat & IBLS_ERROR) { if (transfer_stat & IBLS_ERROR) {
bte_status = transfer_stat & ~IBLS_ERROR; bte_status = transfer_stat & ~IBLS_ERROR;
*bte->most_rcnt_na = 0L;
} else { } else {
bte_status = BTE_SUCCESS; bte_status = BTE_SUCCESS;
} }
*bte->most_rcnt_na = BTE_WORD_AVAILABLE;
BTE_PRINTK(("Returning status is 0x%lx and most_rcnt_na is 0x%lx\n", BTE_PRINTK(("Returning status is 0x%lx and most_rcnt_na is 0x%lx\n",
BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na)); BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na));
...@@ -414,7 +430,7 @@ void bte_init_node(nodepda_t * mynodepda, cnodeid_t cnode) ...@@ -414,7 +430,7 @@ void bte_init_node(nodepda_t * mynodepda, cnodeid_t cnode)
*/ */
mynodepda->bte_if[i].most_rcnt_na = mynodepda->bte_if[i].most_rcnt_na =
&(mynodepda->bte_if[i].notify); &(mynodepda->bte_if[i].notify);
mynodepda->bte_if[i].notify = 0L; mynodepda->bte_if[i].notify = BTE_WORD_AVAILABLE;
spin_lock_init(&mynodepda->bte_if[i].spinlock); spin_lock_init(&mynodepda->bte_if[i].spinlock);
mynodepda->bte_if[i].bte_cnode = cnode; mynodepda->bte_if[i].bte_cnode = cnode;
......
...@@ -56,6 +56,8 @@ ...@@ -56,6 +56,8 @@
#define BTE_VALID_MODE(x) ((x) & (IBCT_NOTIFY | IBCT_ZFIL_MODE)) #define BTE_VALID_MODE(x) ((x) & (IBCT_NOTIFY | IBCT_ZFIL_MODE))
#define BTE_ACTIVE (IBLS_BUSY | IBLS_ERROR) #define BTE_ACTIVE (IBLS_BUSY | IBLS_ERROR)
#define BTE_WORD_AVAILABLE (IBLS_BUSY << 1)
#define BTE_WORD_BUSY (~BTE_WORD_AVAILABLE)
/* /*
* Some macros to simplify reading. * Some macros to simplify reading.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment