Commit c3e7e791 authored by Corey Minyard's avatar Corey Minyard Committed by Linus Torvalds

[PATCH] ipmi: kcs error0 delay

BMCs can get into ERROR0 state while flashing new firmware, particularly while
the BMC is erasing the next flash block, which may take a just under 2 seconds
on a Dell PowerEdge 2800 (1.75 seconds typical), during which time the
single-threaded firmware may not be able to process new commands.  In
particular, clearing OBF may not take effect immediately.

We want it to delay in ERROR0 after clearing OBF a bit waiting for OBF to
actually be clear before proceeding.

This introduces a new return value from the LLDD's event loop,
SI_SM_CALL_WITH_TICK_DELAY.  This means the calling thread/timer should
schedule_timeout() at least 1 tick, rather than busy-wait.  This is a longer
delay than SI_SM_CALL_WITH_DELAY, which is typically a 250us busy-wait.
Signed-off-by: default avatarMatt Domsch <Matt_Domsch@dell.com>
Signed-off-by: default avatarCorey Minyard <minyard@acm.org>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 21dcd300
...@@ -41,6 +41,7 @@ ...@@ -41,6 +41,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/moduleparam.h> #include <linux/moduleparam.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/jiffies.h>
#include <linux/ipmi_msgdefs.h> /* for completion codes */ #include <linux/ipmi_msgdefs.h> /* for completion codes */
#include "ipmi_si_sm.h" #include "ipmi_si_sm.h"
...@@ -99,6 +100,7 @@ enum kcs_states { ...@@ -99,6 +100,7 @@ enum kcs_states {
#define IBF_RETRY_TIMEOUT 1000000 #define IBF_RETRY_TIMEOUT 1000000
#define OBF_RETRY_TIMEOUT 1000000 #define OBF_RETRY_TIMEOUT 1000000
#define MAX_ERROR_RETRIES 10 #define MAX_ERROR_RETRIES 10
#define ERROR0_OBF_WAIT_JIFFIES (2*HZ)
struct si_sm_data struct si_sm_data
{ {
...@@ -115,6 +117,7 @@ struct si_sm_data ...@@ -115,6 +117,7 @@ struct si_sm_data
unsigned int error_retries; unsigned int error_retries;
long ibf_timeout; long ibf_timeout;
long obf_timeout; long obf_timeout;
unsigned long error0_timeout;
}; };
static unsigned int init_kcs_data(struct si_sm_data *kcs, static unsigned int init_kcs_data(struct si_sm_data *kcs,
...@@ -187,6 +190,7 @@ static inline void start_error_recovery(struct si_sm_data *kcs, char *reason) ...@@ -187,6 +190,7 @@ static inline void start_error_recovery(struct si_sm_data *kcs, char *reason)
printk(KERN_DEBUG "ipmi_kcs_sm: kcs hosed: %s\n", reason); printk(KERN_DEBUG "ipmi_kcs_sm: kcs hosed: %s\n", reason);
kcs->state = KCS_HOSED; kcs->state = KCS_HOSED;
} else { } else {
kcs->error0_timeout = jiffies + ERROR0_OBF_WAIT_JIFFIES;
kcs->state = KCS_ERROR0; kcs->state = KCS_ERROR0;
} }
} }
...@@ -423,6 +427,10 @@ static enum si_sm_result kcs_event(struct si_sm_data *kcs, long time) ...@@ -423,6 +427,10 @@ static enum si_sm_result kcs_event(struct si_sm_data *kcs, long time)
case KCS_ERROR0: case KCS_ERROR0:
clear_obf(kcs, status); clear_obf(kcs, status);
status = read_status(kcs);
if (GET_STATUS_OBF(status)) /* controller isn't responding */
if (time_before(jiffies, kcs->error0_timeout))
return SI_SM_CALL_WITH_TICK_DELAY;
write_cmd(kcs, KCS_GET_STATUS_ABORT); write_cmd(kcs, KCS_GET_STATUS_ABORT);
kcs->state = KCS_ERROR1; kcs->state = KCS_ERROR1;
break; break;
......
...@@ -1932,7 +1932,8 @@ static int try_get_dev_id(struct smi_info *smi_info) ...@@ -1932,7 +1932,8 @@ static int try_get_dev_id(struct smi_info *smi_info)
smi_result = smi_info->handlers->event(smi_info->si_sm, 0); smi_result = smi_info->handlers->event(smi_info->si_sm, 0);
for (;;) for (;;)
{ {
if (smi_result == SI_SM_CALL_WITH_DELAY) { if (smi_result == SI_SM_CALL_WITH_DELAY ||
smi_result == SI_SM_CALL_WITH_TICK_DELAY) {
schedule_timeout_uninterruptible(1); schedule_timeout_uninterruptible(1);
smi_result = smi_info->handlers->event( smi_result = smi_info->handlers->event(
smi_info->si_sm, 100); smi_info->si_sm, 100);
......
...@@ -62,6 +62,7 @@ enum si_sm_result ...@@ -62,6 +62,7 @@ enum si_sm_result
{ {
SI_SM_CALL_WITHOUT_DELAY, /* Call the driver again immediately */ SI_SM_CALL_WITHOUT_DELAY, /* Call the driver again immediately */
SI_SM_CALL_WITH_DELAY, /* Delay some before calling again. */ SI_SM_CALL_WITH_DELAY, /* Delay some before calling again. */
SI_SM_CALL_WITH_TICK_DELAY, /* Delay at least 1 tick before calling again. */
SI_SM_TRANSACTION_COMPLETE, /* A transaction is finished. */ SI_SM_TRANSACTION_COMPLETE, /* A transaction is finished. */
SI_SM_IDLE, /* The SM is in idle state. */ SI_SM_IDLE, /* The SM is in idle state. */
SI_SM_HOSED, /* The hardware violated the state machine. */ SI_SM_HOSED, /* The hardware violated the state machine. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment