sfc: Distinguish critical and non-critical over-temperature conditions

Set both the 'maximum' and critical temperature limits for LM87
hardware monitors on Falcon boards.  Do not shut down a port until the
critical temperature is reached, but warn as soon as the 'maximum'
temperature is reached.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Ben Hutchings 2010-12-02 13:46:24 +00:00 committed by David S. Miller
parent 4484cd7ded
commit 71839f7d16
1 changed files with 83 additions and 32 deletions

View File

@ -30,17 +30,28 @@
#define FALCON_BOARD_SFN4112F 0x52 #define FALCON_BOARD_SFN4112F 0x52
/* Board temperature is about 15°C above ambient when air flow is /* Board temperature is about 15°C above ambient when air flow is
* limited. */ * limited. The maximum acceptable ambient temperature varies
* depending on the PHY specifications but the critical temperature
* above which we should shut down to avoid damage is 80°C. */
#define FALCON_BOARD_TEMP_BIAS 15 #define FALCON_BOARD_TEMP_BIAS 15
#define FALCON_BOARD_TEMP_CRIT (80 + FALCON_BOARD_TEMP_BIAS)
/* SFC4000 datasheet says: 'The maximum permitted junction temperature /* SFC4000 datasheet says: 'The maximum permitted junction temperature
* is 125°C; the thermal design of the environment for the SFC4000 * is 125°C; the thermal design of the environment for the SFC4000
* should aim to keep this well below 100°C.' */ * should aim to keep this well below 100°C.' */
#define FALCON_JUNC_TEMP_MIN 0
#define FALCON_JUNC_TEMP_MAX 90 #define FALCON_JUNC_TEMP_MAX 90
#define FALCON_JUNC_TEMP_CRIT 125
/***************************************************************************** /*****************************************************************************
* Support for LM87 sensor chip used on several boards * Support for LM87 sensor chip used on several boards
*/ */
#define LM87_REG_TEMP_HW_INT_LOCK 0x13
#define LM87_REG_TEMP_HW_EXT_LOCK 0x14
#define LM87_REG_TEMP_HW_INT 0x17
#define LM87_REG_TEMP_HW_EXT 0x18
#define LM87_REG_TEMP_EXT1 0x26
#define LM87_REG_TEMP_INT 0x27
#define LM87_REG_ALARMS1 0x41 #define LM87_REG_ALARMS1 0x41
#define LM87_REG_ALARMS2 0x42 #define LM87_REG_ALARMS2 0x42
#define LM87_IN_LIMITS(nr, _min, _max) \ #define LM87_IN_LIMITS(nr, _min, _max) \
@ -57,6 +68,27 @@
#if defined(CONFIG_SENSORS_LM87) || defined(CONFIG_SENSORS_LM87_MODULE) #if defined(CONFIG_SENSORS_LM87) || defined(CONFIG_SENSORS_LM87_MODULE)
static int efx_poke_lm87(struct i2c_client *client, const u8 *reg_values)
{
while (*reg_values) {
u8 reg = *reg_values++;
u8 value = *reg_values++;
int rc = i2c_smbus_write_byte_data(client, reg, value);
if (rc)
return rc;
}
return 0;
}
static const u8 falcon_lm87_common_regs[] = {
LM87_REG_TEMP_HW_INT_LOCK, FALCON_BOARD_TEMP_CRIT,
LM87_REG_TEMP_HW_INT, FALCON_BOARD_TEMP_CRIT,
LM87_TEMP_EXT1_LIMITS(FALCON_JUNC_TEMP_MIN, FALCON_JUNC_TEMP_MAX),
LM87_REG_TEMP_HW_EXT_LOCK, FALCON_JUNC_TEMP_CRIT,
LM87_REG_TEMP_HW_EXT, FALCON_JUNC_TEMP_CRIT,
0
};
static int efx_init_lm87(struct efx_nic *efx, struct i2c_board_info *info, static int efx_init_lm87(struct efx_nic *efx, struct i2c_board_info *info,
const u8 *reg_values) const u8 *reg_values)
{ {
@ -67,13 +99,12 @@ static int efx_init_lm87(struct efx_nic *efx, struct i2c_board_info *info,
if (!client) if (!client)
return -EIO; return -EIO;
while (*reg_values) { rc = efx_poke_lm87(client, reg_values);
u8 reg = *reg_values++; if (rc)
u8 value = *reg_values++; goto err;
rc = i2c_smbus_write_byte_data(client, reg, value); rc = efx_poke_lm87(client, falcon_lm87_common_regs);
if (rc) if (rc)
goto err; goto err;
}
board->hwmon_client = client; board->hwmon_client = client;
return 0; return 0;
@ -91,36 +122,56 @@ static void efx_fini_lm87(struct efx_nic *efx)
static int efx_check_lm87(struct efx_nic *efx, unsigned mask) static int efx_check_lm87(struct efx_nic *efx, unsigned mask)
{ {
struct i2c_client *client = falcon_board(efx)->hwmon_client; struct i2c_client *client = falcon_board(efx)->hwmon_client;
s32 alarms1, alarms2; bool temp_crit, elec_fault, is_failure;
u16 alarms;
s32 reg;
/* If link is up then do not monitor temperature */ /* If link is up then do not monitor temperature */
if (EFX_WORKAROUND_7884(efx) && efx->link_state.up) if (EFX_WORKAROUND_7884(efx) && efx->link_state.up)
return 0; return 0;
alarms1 = i2c_smbus_read_byte_data(client, LM87_REG_ALARMS1); reg = i2c_smbus_read_byte_data(client, LM87_REG_ALARMS1);
alarms2 = i2c_smbus_read_byte_data(client, LM87_REG_ALARMS2); if (reg < 0)
if (alarms1 < 0) return reg;
return alarms1; alarms = reg;
if (alarms2 < 0) reg = i2c_smbus_read_byte_data(client, LM87_REG_ALARMS2);
return alarms2; if (reg < 0)
alarms1 &= mask; return reg;
alarms2 &= mask >> 8; alarms |= reg << 8;
if (alarms1 || alarms2) { alarms &= mask;
netif_err(efx, hw, efx->net_dev,
"LM87 detected a hardware failure (status %02x:%02x)"
"%s%s%s\n",
alarms1, alarms2,
(alarms1 & LM87_ALARM_TEMP_INT) ?
"; board is overheating" : "",
(alarms1 & LM87_ALARM_TEMP_EXT1) ?
"; controller is overheating" : "",
(alarms1 & ~(LM87_ALARM_TEMP_INT | LM87_ALARM_TEMP_EXT1)
|| alarms2) ?
"; electrical fault" : "");
return -ERANGE;
}
return 0; temp_crit = false;
if (alarms & LM87_ALARM_TEMP_INT) {
reg = i2c_smbus_read_byte_data(client, LM87_REG_TEMP_INT);
if (reg < 0)
return reg;
if (reg > FALCON_BOARD_TEMP_CRIT)
temp_crit = true;
}
if (alarms & LM87_ALARM_TEMP_EXT1) {
reg = i2c_smbus_read_byte_data(client, LM87_REG_TEMP_EXT1);
if (reg < 0)
return reg;
if (reg > FALCON_JUNC_TEMP_CRIT)
temp_crit = true;
}
elec_fault = alarms & ~(LM87_ALARM_TEMP_INT | LM87_ALARM_TEMP_EXT1);
is_failure = temp_crit || elec_fault;
if (alarms)
netif_err(efx, hw, efx->net_dev,
"LM87 detected a hardware %s (status %02x:%02x)"
"%s%s%s%s\n",
is_failure ? "failure" : "problem",
alarms & 0xff, alarms >> 8,
(alarms & LM87_ALARM_TEMP_INT) ?
"; board is overheating" : "",
(alarms & LM87_ALARM_TEMP_EXT1) ?
"; controller is overheating" : "",
temp_crit ? "; reached critical temperature" : "",
elec_fault ? "; electrical fault" : "");
return is_failure ? -ERANGE : 0;
} }
#else /* !CONFIG_SENSORS_LM87 */ #else /* !CONFIG_SENSORS_LM87 */