powerpc/powernv: Invoke opal call to handle hmi.

When we hit the HMI in Linux, invoke opal call to handle/recover from HMI
errors in real mode and then in virtual mode during check_irq_replay()
invoke opal_poll_events()/opal_do_notifier() to retrieve HMI event from
OPAL and act accordingly.

Now that we are ready to handle HMI interrupt directly in linux, remove
the HMI interrupt registration with firmware.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
This commit is contained in:
Mahesh Salgaonkar 2014-07-29 18:40:07 +05:30 committed by Benjamin Herrenschmidt
parent 0869b6fd20
commit 0ef95b411e
6 changed files with 267 additions and 7 deletions

View File

@ -148,6 +148,7 @@ struct opal_sg_list {
#define OPAL_DUMP_RESEND 91
#define OPAL_DUMP_INFO2 94
#define OPAL_PCI_EEH_FREEZE_SET 97
#define OPAL_HANDLE_HMI 98
#ifndef __ASSEMBLY__
@ -245,6 +246,7 @@ enum OpalMessageType {
OPAL_MSG_MEM_ERR,
OPAL_MSG_EPOW,
OPAL_MSG_SHUTDOWN,
OPAL_MSG_HMI_EVT,
OPAL_MSG_TYPE_MAX,
};
@ -513,6 +515,50 @@ struct OpalMemoryErrorData {
} u;
};
/* HMI interrupt event */
enum OpalHMI_Version {
OpalHMIEvt_V1 = 1,
};
enum OpalHMI_Severity {
OpalHMI_SEV_NO_ERROR = 0,
OpalHMI_SEV_WARNING = 1,
OpalHMI_SEV_ERROR_SYNC = 2,
OpalHMI_SEV_FATAL = 3,
};
enum OpalHMI_Disposition {
OpalHMI_DISPOSITION_RECOVERED = 0,
OpalHMI_DISPOSITION_NOT_RECOVERED = 1,
};
enum OpalHMI_ErrType {
OpalHMI_ERROR_MALFUNC_ALERT = 0,
OpalHMI_ERROR_PROC_RECOV_DONE,
OpalHMI_ERROR_PROC_RECOV_DONE_AGAIN,
OpalHMI_ERROR_PROC_RECOV_MASKED,
OpalHMI_ERROR_TFAC,
OpalHMI_ERROR_TFMR_PARITY,
OpalHMI_ERROR_HA_OVERFLOW_WARN,
OpalHMI_ERROR_XSCOM_FAIL,
OpalHMI_ERROR_XSCOM_DONE,
OpalHMI_ERROR_SCOM_FIR,
OpalHMI_ERROR_DEBUG_TRIG_FIR,
OpalHMI_ERROR_HYP_RESOURCE,
};
struct OpalHMIEvent {
uint8_t version; /* 0x00 */
uint8_t severity; /* 0x01 */
uint8_t type; /* 0x02 */
uint8_t disposition; /* 0x03 */
uint8_t reserved_1[4]; /* 0x04 */
__be64 hmer;
/* TFMR register. Valid only for TFAC and TFMR_PARITY error type. */
__be64 tfmr;
};
enum {
OPAL_P7IOC_DIAG_TYPE_NONE = 0,
OPAL_P7IOC_DIAG_TYPE_RGC = 1,
@ -873,6 +919,7 @@ int64_t opal_get_param(uint64_t token, uint32_t param_id, uint64_t buffer,
int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer,
uint64_t length);
int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data);
int64_t opal_handle_hmi(void);
/* Internal functions */
extern int early_init_dt_scan_opal(unsigned long node, const char *uname,

View File

@ -167,6 +167,7 @@ struct paca_struct {
* and already using emergency stack.
*/
u16 in_mce;
u8 hmi_event_available; /* HMI event is available */
#endif
/* Stuff for accurate time accounting */

View File

@ -1,7 +1,7 @@
obj-y += setup.o opal-wrappers.o opal.o opal-async.o
obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
obj-y += opal-msglog.o
obj-y += opal-msglog.o opal-hmi.o
obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o
obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o

View File

@ -0,0 +1,188 @@
/*
* OPAL hypervisor Maintenance interrupt handling support in PowreNV.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; If not, see <http://www.gnu.org/licenses/>.
*
* Copyright 2014 IBM Corporation
* Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
*/
#undef DEBUG
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/of.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <asm/opal.h>
#include <asm/cputable.h>
static int opal_hmi_handler_nb_init;
struct OpalHmiEvtNode {
struct list_head list;
struct OpalHMIEvent hmi_evt;
};
static LIST_HEAD(opal_hmi_evt_list);
static DEFINE_SPINLOCK(opal_hmi_evt_lock);
static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt)
{
const char *level, *sevstr, *error_info;
static const char *hmi_error_types[] = {
"Malfunction Alert",
"Processor Recovery done",
"Processor recovery occurred again",
"Processor recovery occurred for masked error",
"Timer facility experienced an error",
"TFMR SPR is corrupted",
"UPS (Uniterrupted Power System) Overflow indication",
"An XSCOM operation failure",
"An XSCOM operation completed",
"SCOM has set a reserved FIR bit to cause recovery",
"Debug trigger has set a reserved FIR bit to cause recovery",
"A hypervisor resource error occurred"
};
/* Print things out */
if (hmi_evt->version != OpalHMIEvt_V1) {
pr_err("HMI Interrupt, Unknown event version %d !\n",
hmi_evt->version);
return;
}
switch (hmi_evt->severity) {
case OpalHMI_SEV_NO_ERROR:
level = KERN_INFO;
sevstr = "Harmless";
break;
case OpalHMI_SEV_WARNING:
level = KERN_WARNING;
sevstr = "";
break;
case OpalHMI_SEV_ERROR_SYNC:
level = KERN_ERR;
sevstr = "Severe";
break;
case OpalHMI_SEV_FATAL:
default:
level = KERN_ERR;
sevstr = "Fatal";
break;
}
printk("%s%s Hypervisor Maintenance interrupt [%s]\n",
level, sevstr,
hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ?
"Recovered" : "Not recovered");
error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ?
hmi_error_types[hmi_evt->type]
: "Unknown";
printk("%s Error detail: %s\n", level, error_info);
printk("%s HMER: %016llx\n", level, be64_to_cpu(hmi_evt->hmer));
if ((hmi_evt->type == OpalHMI_ERROR_TFAC) ||
(hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY))
printk("%s TFMR: %016llx\n", level,
be64_to_cpu(hmi_evt->tfmr));
}
static void hmi_event_handler(struct work_struct *work)
{
unsigned long flags;
struct OpalHMIEvent *hmi_evt;
struct OpalHmiEvtNode *msg_node;
uint8_t disposition;
spin_lock_irqsave(&opal_hmi_evt_lock, flags);
while (!list_empty(&opal_hmi_evt_list)) {
msg_node = list_entry(opal_hmi_evt_list.next,
struct OpalHmiEvtNode, list);
list_del(&msg_node->list);
spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
hmi_evt = (struct OpalHMIEvent *) &msg_node->hmi_evt;
print_hmi_event_info(hmi_evt);
disposition = hmi_evt->disposition;
kfree(msg_node);
/*
* Check if HMI event has been recovered or not. If not
* then we can't continue, invoke panic.
*/
if (disposition != OpalHMI_DISPOSITION_RECOVERED)
panic("Unrecoverable HMI exception");
spin_lock_irqsave(&opal_hmi_evt_lock, flags);
}
spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
}
static DECLARE_WORK(hmi_event_work, hmi_event_handler);
/*
* opal_handle_hmi_event - notifier handler that queues up HMI events
* to be preocessed later.
*/
static int opal_handle_hmi_event(struct notifier_block *nb,
unsigned long msg_type, void *msg)
{
unsigned long flags;
struct OpalHMIEvent *hmi_evt;
struct opal_msg *hmi_msg = msg;
struct OpalHmiEvtNode *msg_node;
/* Sanity Checks */
if (msg_type != OPAL_MSG_HMI_EVT)
return 0;
/* HMI event info starts from param[0] */
hmi_evt = (struct OpalHMIEvent *)&hmi_msg->params[0];
/* Delay the logging of HMI events to workqueue. */
msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC);
if (!msg_node) {
pr_err("HMI: out of memory, Opal message event not handled\n");
return -ENOMEM;
}
memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(struct OpalHMIEvent));
spin_lock_irqsave(&opal_hmi_evt_lock, flags);
list_add(&msg_node->list, &opal_hmi_evt_list);
spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
schedule_work(&hmi_event_work);
return 0;
}
static struct notifier_block opal_hmi_handler_nb = {
.notifier_call = opal_handle_hmi_event,
.next = NULL,
.priority = 0,
};
static int __init opal_hmi_handler_init(void)
{
int ret;
if (!opal_hmi_handler_nb_init) {
ret = opal_message_notifier_register(
OPAL_MSG_HMI_EVT, &opal_hmi_handler_nb);
if (ret) {
pr_err("%s: Can't register OPAL event notifier (%d)\n",
__func__, ret);
return ret;
}
opal_hmi_handler_nb_init = 1;
}
return 0;
}
subsys_initcall(opal_hmi_handler_init);

View File

@ -244,3 +244,4 @@ OPAL_CALL(opal_sync_host_reboot, OPAL_SYNC_HOST_REBOOT);
OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ);
OPAL_CALL(opal_get_param, OPAL_GET_PARAM);
OPAL_CALL(opal_set_param, OPAL_SET_PARAM);
OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI);

View File

@ -194,9 +194,6 @@ static int __init opal_register_exception_handlers(void)
* fwnmi area at 0x7000 to provide the glue space to OPAL
*/
glue = 0x7000;
opal_register_exception_handler(OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
0, glue);
glue += 128;
opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
#endif
@ -517,15 +514,41 @@ int opal_machine_check(struct pt_regs *regs)
/* Early hmi handler called in real mode. */
int opal_hmi_exception_early(struct pt_regs *regs)
{
/* TODO: Call opal hmi handler. */
s64 rc;
/*
* call opal hmi handler. Pass paca address as token.
* The return value OPAL_SUCCESS is an indication that there is
* an HMI event generated waiting to pull by Linux.
*/
rc = opal_handle_hmi();
if (rc == OPAL_SUCCESS) {
local_paca->hmi_event_available = 1;
return 1;
}
return 0;
}
/* HMI exception handler called in virtual mode during check_irq_replay. */
int opal_handle_hmi_exception(struct pt_regs *regs)
{
/* TODO: Retrive and print HMI event from OPAL. */
s64 rc;
__be64 evt = 0;
/*
* Check if HMI event is available.
* if Yes, then call opal_poll_events to pull opal messages and
* process them.
*/
if (!local_paca->hmi_event_available)
return 0;
local_paca->hmi_event_available = 0;
rc = opal_poll_events(&evt);
if (rc == OPAL_SUCCESS && evt)
opal_do_notifier(be64_to_cpu(evt));
return 1;
}
static uint64_t find_recovery_address(uint64_t nip)