Merge branch 'sparc64-non-resumable-user-error-recovery'

Liam R. Howlett says:

====================
sparc64: Recover from userspace non-resumable PIO & MEM errors

A non-resumable error from userspace is able to cause a kernel panic or trap
loop due to the setup and handling of the queued traps once in the kernel.
This patch series addresses both of these issues.

The queues are fixed by simply zeroing the memory before use.

PIO errors from userspace will result in a SIGBUS being sent to the user
process.

The MEM errors form userspace will result in a SIGKILL and also cause the
offending pages to be claimed so they are no longer used in future tasks.
SIGKILL is used to ensure that the process does not try to coredump and result
in an attempt to read the memory again from within kernel space.  Although
there is a HV call to scrub the memory (mem_scrub), there is no easy way to
guarantee that the real memory address(es) are not used by other tasks.
Clearing the error with mem_scrub would zero the memory and cause the other
processes to proceed with bad data.

The handling of other non-resumable errors remain unchanged and will cause a
panic.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2017-01-30 14:28:22 -08:00
commit 54791b276b
2 changed files with 74 additions and 1 deletions

View File

@ -1021,7 +1021,7 @@ static void __init alloc_one_queue(unsigned long *pa_ptr, unsigned long qmask)
unsigned long order = get_order(size);
unsigned long p;
p = __get_free_pages(GFP_KERNEL, order);
p = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
if (!p) {
prom_printf("SUN4V: Error, cannot allocate queue.\n");
prom_halt();

View File

@ -2051,6 +2051,73 @@ void sun4v_resum_overflow(struct pt_regs *regs)
atomic_inc(&sun4v_resum_oflow_cnt);
}
/* Given a set of registers, get the virtual addressi that was being accessed
* by the faulting instructions at tpc.
*/
static unsigned long sun4v_get_vaddr(struct pt_regs *regs)
{
unsigned int insn;
if (!copy_from_user(&insn, (void __user *)regs->tpc, 4)) {
return compute_effective_address(regs, insn,
(insn >> 25) & 0x1f);
}
return 0;
}
/* Attempt to handle non-resumable errors generated from userspace.
* Returns true if the signal was handled, false otherwise.
*/
bool sun4v_nonresum_error_user_handled(struct pt_regs *regs,
struct sun4v_error_entry *ent) {
unsigned int attrs = ent->err_attrs;
if (attrs & SUN4V_ERR_ATTRS_MEMORY) {
unsigned long addr = ent->err_raddr;
siginfo_t info;
if (addr == ~(u64)0) {
/* This seems highly unlikely to ever occur */
pr_emerg("SUN4V NON-RECOVERABLE ERROR: Memory error detected in unknown location!\n");
} else {
unsigned long page_cnt = DIV_ROUND_UP(ent->err_size,
PAGE_SIZE);
/* Break the unfortunate news. */
pr_emerg("SUN4V NON-RECOVERABLE ERROR: Memory failed at %016lX\n",
addr);
pr_emerg("SUN4V NON-RECOVERABLE ERROR: Claiming %lu ages.\n",
page_cnt);
while (page_cnt-- > 0) {
if (pfn_valid(addr >> PAGE_SHIFT))
get_page(pfn_to_page(addr >> PAGE_SHIFT));
addr += PAGE_SIZE;
}
}
info.si_signo = SIGKILL;
info.si_errno = 0;
info.si_trapno = 0;
force_sig_info(info.si_signo, &info, current);
return true;
}
if (attrs & SUN4V_ERR_ATTRS_PIO) {
siginfo_t info;
info.si_signo = SIGBUS;
info.si_code = BUS_ADRERR;
info.si_addr = (void __user *)sun4v_get_vaddr(regs);
force_sig_info(info.si_signo, &info, current);
return true;
}
/* Default to doing nothing */
return false;
}
/* We run with %pil set to PIL_NORMAL_MAX and PSTATE_IE enabled in %pstate.
* Log the event, clear the first word of the entry, and die.
*/
@ -2075,6 +2142,12 @@ void sun4v_nonresum_error(struct pt_regs *regs, unsigned long offset)
put_cpu();
if (!(regs->tstate & TSTATE_PRIV) &&
sun4v_nonresum_error_user_handled(regs, &local_copy)) {
/* DON'T PANIC: This userspace error was handled. */
return;
}
#ifdef CONFIG_PCI
/* Check for the special PCI poke sequence. */
if (pci_poke_in_progress && pci_poke_cpu == cpu) {