Commit 71839f7d authored by Ben Hutchings's avatar Ben Hutchings Committed by David S. Miller

sfc: Distinguish critical and non-critical over-temperature conditions

Set both the 'maximum' and critical temperature limits for LM87
hardware monitors on Falcon boards.  Do not shut down a port until the
critical temperature is reached, but warn as soon as the 'maximum'
temperature is reached.
Signed-off-by: default avatarBen Hutchings <bhutchings@solarflare.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 4484cd7d
......@@ -30,17 +30,28 @@
#define FALCON_BOARD_SFN4112F 0x52
/* Board temperature is about 15°C above ambient when air flow is
* limited. */
* limited. The maximum acceptable ambient temperature varies
* depending on the PHY specifications but the critical temperature
* above which we should shut down to avoid damage is 80°C. */
#define FALCON_BOARD_TEMP_BIAS 15
#define FALCON_BOARD_TEMP_CRIT (80 + FALCON_BOARD_TEMP_BIAS)
/* SFC4000 datasheet says: 'The maximum permitted junction temperature
* is 125°C; the thermal design of the environment for the SFC4000
* should aim to keep this well below 100°C.' */
#define FALCON_JUNC_TEMP_MIN 0
#define FALCON_JUNC_TEMP_MAX 90
#define FALCON_JUNC_TEMP_CRIT 125
/*****************************************************************************
* Support for LM87 sensor chip used on several boards
*/
#define LM87_REG_TEMP_HW_INT_LOCK 0x13
#define LM87_REG_TEMP_HW_EXT_LOCK 0x14
#define LM87_REG_TEMP_HW_INT 0x17
#define LM87_REG_TEMP_HW_EXT 0x18
#define LM87_REG_TEMP_EXT1 0x26
#define LM87_REG_TEMP_INT 0x27
#define LM87_REG_ALARMS1 0x41
#define LM87_REG_ALARMS2 0x42
#define LM87_IN_LIMITS(nr, _min, _max) \
......@@ -57,6 +68,27 @@
#if defined(CONFIG_SENSORS_LM87) || defined(CONFIG_SENSORS_LM87_MODULE)
static int efx_poke_lm87(struct i2c_client *client, const u8 *reg_values)
{
while (*reg_values) {
u8 reg = *reg_values++;
u8 value = *reg_values++;
int rc = i2c_smbus_write_byte_data(client, reg, value);
if (rc)
return rc;
}
return 0;
}
static const u8 falcon_lm87_common_regs[] = {
LM87_REG_TEMP_HW_INT_LOCK, FALCON_BOARD_TEMP_CRIT,
LM87_REG_TEMP_HW_INT, FALCON_BOARD_TEMP_CRIT,
LM87_TEMP_EXT1_LIMITS(FALCON_JUNC_TEMP_MIN, FALCON_JUNC_TEMP_MAX),
LM87_REG_TEMP_HW_EXT_LOCK, FALCON_JUNC_TEMP_CRIT,
LM87_REG_TEMP_HW_EXT, FALCON_JUNC_TEMP_CRIT,
0
};
static int efx_init_lm87(struct efx_nic *efx, struct i2c_board_info *info,
const u8 *reg_values)
{
......@@ -67,13 +99,12 @@ static int efx_init_lm87(struct efx_nic *efx, struct i2c_board_info *info,
if (!client)
return -EIO;
while (*reg_values) {
u8 reg = *reg_values++;
u8 value = *reg_values++;
rc = i2c_smbus_write_byte_data(client, reg, value);
if (rc)
goto err;
}
rc = efx_poke_lm87(client, reg_values);
if (rc)
goto err;
rc = efx_poke_lm87(client, falcon_lm87_common_regs);
if (rc)
goto err;
board->hwmon_client = client;
return 0;
......@@ -91,36 +122,56 @@ static void efx_fini_lm87(struct efx_nic *efx)
static int efx_check_lm87(struct efx_nic *efx, unsigned mask)
{
struct i2c_client *client = falcon_board(efx)->hwmon_client;
s32 alarms1, alarms2;
bool temp_crit, elec_fault, is_failure;
u16 alarms;
s32 reg;
/* If link is up then do not monitor temperature */
if (EFX_WORKAROUND_7884(efx) && efx->link_state.up)
return 0;
alarms1 = i2c_smbus_read_byte_data(client, LM87_REG_ALARMS1);
alarms2 = i2c_smbus_read_byte_data(client, LM87_REG_ALARMS2);
if (alarms1 < 0)
return alarms1;
if (alarms2 < 0)
return alarms2;
alarms1 &= mask;
alarms2 &= mask >> 8;
if (alarms1 || alarms2) {
reg = i2c_smbus_read_byte_data(client, LM87_REG_ALARMS1);
if (reg < 0)
return reg;
alarms = reg;
reg = i2c_smbus_read_byte_data(client, LM87_REG_ALARMS2);
if (reg < 0)
return reg;
alarms |= reg << 8;
alarms &= mask;
temp_crit = false;
if (alarms & LM87_ALARM_TEMP_INT) {
reg = i2c_smbus_read_byte_data(client, LM87_REG_TEMP_INT);
if (reg < 0)
return reg;
if (reg > FALCON_BOARD_TEMP_CRIT)
temp_crit = true;
}
if (alarms & LM87_ALARM_TEMP_EXT1) {
reg = i2c_smbus_read_byte_data(client, LM87_REG_TEMP_EXT1);
if (reg < 0)
return reg;
if (reg > FALCON_JUNC_TEMP_CRIT)
temp_crit = true;
}
elec_fault = alarms & ~(LM87_ALARM_TEMP_INT | LM87_ALARM_TEMP_EXT1);
is_failure = temp_crit || elec_fault;
if (alarms)
netif_err(efx, hw, efx->net_dev,
"LM87 detected a hardware failure (status %02x:%02x)"
"%s%s%s\n",
alarms1, alarms2,
(alarms1 & LM87_ALARM_TEMP_INT) ?
"LM87 detected a hardware %s (status %02x:%02x)"
"%s%s%s%s\n",
is_failure ? "failure" : "problem",
alarms & 0xff, alarms >> 8,
(alarms & LM87_ALARM_TEMP_INT) ?
"; board is overheating" : "",
(alarms1 & LM87_ALARM_TEMP_EXT1) ?
(alarms & LM87_ALARM_TEMP_EXT1) ?
"; controller is overheating" : "",
(alarms1 & ~(LM87_ALARM_TEMP_INT | LM87_ALARM_TEMP_EXT1)
|| alarms2) ?
"; electrical fault" : "");
return -ERANGE;
}
temp_crit ? "; reached critical temperature" : "",
elec_fault ? "; electrical fault" : "");
return 0;
return is_failure ? -ERANGE : 0;
}
#else /* !CONFIG_SENSORS_LM87 */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment