Merge branch 'ras/core' into core/objtool, to pick up the new exception table format

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2016-02-25 09:01:09 +01:00
commit 319e305ca4
12 changed files with 322 additions and 122 deletions

View File

@ -290,3 +290,38 @@ Due to the way that the exception table is built and needs to be ordered,
only use exceptions for code in the .text section. Any other section only use exceptions for code in the .text section. Any other section
will cause the exception table to not be sorted correctly, and the will cause the exception table to not be sorted correctly, and the
exceptions will fail. exceptions will fail.
Things changed when 64-bit support was added to x86 Linux. Rather than
double the size of the exception table by expanding the two entries
from 32-bits to 64 bits, a clever trick was used to store addresses
as relative offsets from the table itself. The assembly code changed
from:
.long 1b,3b
to:
.long (from) - .
.long (to) - .
and the C-code that uses these values converts back to absolute addresses
like this:
ex_insn_addr(const struct exception_table_entry *x)
{
return (unsigned long)&x->insn + x->insn;
}
In v4.6 the exception table entry was expanded with a new field "handler".
This is also 32-bits wide and contains a third relative function
pointer which points to one of:
1) int ex_handler_default(const struct exception_table_entry *fixup)
This is legacy case that just jumps to the fixup code
2) int ex_handler_fault(const struct exception_table_entry *fixup)
This case provides the fault number of the trap that occurred at
entry->insn. It is used to distinguish page faults from machine
check.
3) int ex_handler_ext(const struct exception_table_entry *fixup)
This case is used for uaccess_err ... we need to set a flag
in the task structure. Before the handler functions existed this
case was handled by adding a large offset to the fixup to tag
it as special.
More functions can easily be added.

View File

@ -44,19 +44,22 @@
/* Exception table entry */ /* Exception table entry */
#ifdef __ASSEMBLY__ #ifdef __ASSEMBLY__
# define _ASM_EXTABLE(from,to) \ # define _ASM_EXTABLE_HANDLE(from, to, handler) \
.pushsection "__ex_table","a" ; \ .pushsection "__ex_table","a" ; \
.balign 8 ; \ .balign 4 ; \
.long (from) - . ; \ .long (from) - . ; \
.long (to) - . ; \ .long (to) - . ; \
.long (handler) - . ; \
.popsection .popsection
# define _ASM_EXTABLE_EX(from,to) \ # define _ASM_EXTABLE(from, to) \
.pushsection "__ex_table","a" ; \ _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
.balign 8 ; \
.long (from) - . ; \ # define _ASM_EXTABLE_FAULT(from, to) \
.long (to) - . + 0x7ffffff0 ; \ _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
.popsection
# define _ASM_EXTABLE_EX(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
# define _ASM_NOKPROBE(entry) \ # define _ASM_NOKPROBE(entry) \
.pushsection "_kprobe_blacklist","aw" ; \ .pushsection "_kprobe_blacklist","aw" ; \
@ -89,19 +92,24 @@
.endm .endm
#else #else
# define _ASM_EXTABLE(from,to) \ # define _EXPAND_EXTABLE_HANDLE(x) #x
# define _ASM_EXTABLE_HANDLE(from, to, handler) \
" .pushsection \"__ex_table\",\"a\"\n" \ " .pushsection \"__ex_table\",\"a\"\n" \
" .balign 8\n" \ " .balign 4\n" \
" .long (" #from ") - .\n" \ " .long (" #from ") - .\n" \
" .long (" #to ") - .\n" \ " .long (" #to ") - .\n" \
" .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n" \
" .popsection\n" " .popsection\n"
# define _ASM_EXTABLE_EX(from,to) \ # define _ASM_EXTABLE(from, to) \
" .pushsection \"__ex_table\",\"a\"\n" \ _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
" .balign 8\n" \
" .long (" #from ") - .\n" \ # define _ASM_EXTABLE_FAULT(from, to) \
" .long (" #to ") - . + 0x7ffffff0\n" \ _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
" .popsection\n"
# define _ASM_EXTABLE_EX(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
/* For C file, we already have NOKPROBE_SYMBOL macro */ /* For C file, we already have NOKPROBE_SYMBOL macro */
#endif #endif

View File

@ -269,6 +269,10 @@
#define MSR_IA32_MC0_CTL2 0x00000280 #define MSR_IA32_MC0_CTL2 0x00000280
#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) #define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x))
/* 'SMCA': AMD64 Scalable MCA */
#define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004
#define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
#define MSR_P6_PERFCTR0 0x000000c1 #define MSR_P6_PERFCTR0 0x000000c1
#define MSR_P6_PERFCTR1 0x000000c2 #define MSR_P6_PERFCTR1 0x000000c2
#define MSR_P6_EVNTSEL0 0x00000186 #define MSR_P6_EVNTSEL0 0x00000186

View File

@ -90,12 +90,11 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
likely(!__range_not_ok(addr, size, user_addr_max())) likely(!__range_not_ok(addr, size, user_addr_max()))
/* /*
* The exception table consists of pairs of addresses relative to the * The exception table consists of triples of addresses relative to the
* exception table enty itself: the first is the address of an * exception table entry itself. The first address is of an instruction
* instruction that is allowed to fault, and the second is the address * that is allowed to fault, the second is the target at which the program
* at which the program should continue. No registers are modified, * should continue. The third is a handler function to deal with the fault
* so it is entirely up to the continuation code to figure out what to * caused by the instruction in the first field.
* do.
* *
* All the routines below use bits of fixup code that are out of line * All the routines below use bits of fixup code that are out of line
* with the main instruction path. This means when everything is well, * with the main instruction path. This means when everything is well,
@ -104,13 +103,14 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
*/ */
struct exception_table_entry { struct exception_table_entry {
int insn, fixup; int insn, fixup, handler;
}; };
/* This is not the generic standard exception_table_entry format */ /* This is not the generic standard exception_table_entry format */
#define ARCH_HAS_SORT_EXTABLE #define ARCH_HAS_SORT_EXTABLE
#define ARCH_HAS_SEARCH_EXTABLE #define ARCH_HAS_SEARCH_EXTABLE
extern int fixup_exception(struct pt_regs *regs); extern int fixup_exception(struct pt_regs *regs, int trapnr);
extern bool ex_has_fault_handler(unsigned long ip);
extern int early_fixup_exception(unsigned long *ip); extern int early_fixup_exception(unsigned long *ip);
/* /*

View File

@ -14,6 +14,7 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <asm/mce.h> #include <asm/mce.h>
#include <asm/uaccess.h>
#include "mce-internal.h" #include "mce-internal.h"
@ -29,7 +30,7 @@
* panic situations) * panic situations)
*/ */
enum context { IN_KERNEL = 1, IN_USER = 2 }; enum context { IN_KERNEL = 1, IN_USER = 2, IN_KERNEL_RECOV = 3 };
enum ser { SER_REQUIRED = 1, NO_SER = 2 }; enum ser { SER_REQUIRED = 1, NO_SER = 2 };
enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 }; enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
@ -48,6 +49,7 @@ static struct severity {
#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c } #define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
#define KERNEL .context = IN_KERNEL #define KERNEL .context = IN_KERNEL
#define USER .context = IN_USER #define USER .context = IN_USER
#define KERNEL_RECOV .context = IN_KERNEL_RECOV
#define SER .ser = SER_REQUIRED #define SER .ser = SER_REQUIRED
#define NOSER .ser = NO_SER #define NOSER .ser = NO_SER
#define EXCP .excp = EXCP_CONTEXT #define EXCP .excp = EXCP_CONTEXT
@ -86,6 +88,10 @@ static struct severity {
PANIC, "In kernel and no restart IP", PANIC, "In kernel and no restart IP",
EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0) EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
), ),
MCESEV(
PANIC, "In kernel and no restart IP",
EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0)
),
MCESEV( MCESEV(
DEFERRED, "Deferred error", DEFERRED, "Deferred error",
NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED) NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
@ -122,6 +128,11 @@ static struct severity {
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR), SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR),
MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV) MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
), ),
MCESEV(
AR, "Action required: data load in error recoverable area of kernel",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
KERNEL_RECOV
),
MCESEV( MCESEV(
AR, "Action required: data load error in a user process", AR, "Action required: data load error in a user process",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
@ -170,6 +181,9 @@ static struct severity {
) /* always matches. keep at end */ ) /* always matches. keep at end */
}; };
#define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
(MCG_STATUS_RIPV|MCG_STATUS_EIPV))
/* /*
* If mcgstatus indicated that ip/cs on the stack were * If mcgstatus indicated that ip/cs on the stack were
* no good, then "m->cs" will be zero and we will have * no good, then "m->cs" will be zero and we will have
@ -183,7 +197,11 @@ static struct severity {
*/ */
static int error_context(struct mce *m) static int error_context(struct mce *m)
{ {
return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL; if ((m->cs & 3) == 3)
return IN_USER;
if (mc_recoverable(m->mcgstatus) && ex_has_fault_handler(m->ip))
return IN_KERNEL_RECOV;
return IN_KERNEL;
} }
/* /*

View File

@ -961,6 +961,20 @@ static void mce_clear_state(unsigned long *toclear)
} }
} }
static int do_memory_failure(struct mce *m)
{
int flags = MF_ACTION_REQUIRED;
int ret;
pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr);
if (!(m->mcgstatus & MCG_STATUS_RIPV))
flags |= MF_MUST_KILL;
ret = memory_failure(m->addr >> PAGE_SHIFT, MCE_VECTOR, flags);
if (ret)
pr_err("Memory error not recovered");
return ret;
}
/* /*
* The actual machine check handler. This only handles real * The actual machine check handler. This only handles real
* exceptions when something got corrupted coming in through int 18. * exceptions when something got corrupted coming in through int 18.
@ -998,8 +1012,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
DECLARE_BITMAP(toclear, MAX_NR_BANKS); DECLARE_BITMAP(toclear, MAX_NR_BANKS);
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
char *msg = "Unknown"; char *msg = "Unknown";
u64 recover_paddr = ~0ull;
int flags = MF_ACTION_REQUIRED;
int lmce = 0; int lmce = 0;
/* If this CPU is offline, just bail out. */ /* If this CPU is offline, just bail out. */
@ -1136,22 +1148,13 @@ void do_machine_check(struct pt_regs *regs, long error_code)
} }
/* /*
* At insane "tolerant" levels we take no action. Otherwise * If tolerant is at an insane level we drop requests to kill
* we only die if we have no other choice. For less serious * processes and continue even when there is no way out.
* issues we try to recover, or limit damage to the current
* process.
*/ */
if (cfg->tolerant < 3) { if (cfg->tolerant == 3)
if (no_way_out) kill_it = 0;
mce_panic("Fatal machine check on current CPU", &m, msg); else if (no_way_out)
if (worst == MCE_AR_SEVERITY) { mce_panic("Fatal machine check on current CPU", &m, msg);
recover_paddr = m.addr;
if (!(m.mcgstatus & MCG_STATUS_RIPV))
flags |= MF_MUST_KILL;
} else if (kill_it) {
force_sig(SIGBUS, current);
}
}
if (worst > 0) if (worst > 0)
mce_report_event(regs); mce_report_event(regs);
@ -1159,25 +1162,24 @@ void do_machine_check(struct pt_regs *regs, long error_code)
out: out:
sync_core(); sync_core();
if (recover_paddr == ~0ull) if (worst != MCE_AR_SEVERITY && !kill_it)
goto done; goto out_ist;
pr_err("Uncorrected hardware memory error in user-access at %llx", /* Fault was in user mode and we need to take some action */
recover_paddr); if ((m.cs & 3) == 3) {
/* ist_begin_non_atomic(regs);
* We must call memory_failure() here even if the current process is local_irq_enable();
* doomed. We still need to mark the page as poisoned and alert any
* other users of the page. if (kill_it || do_memory_failure(&m))
*/ force_sig(SIGBUS, current);
ist_begin_non_atomic(regs); local_irq_disable();
local_irq_enable(); ist_end_non_atomic();
if (memory_failure(recover_paddr >> PAGE_SHIFT, MCE_VECTOR, flags) < 0) { } else {
pr_err("Memory error not recovered"); if (!fixup_exception(regs, X86_TRAP_MC))
force_sig(SIGBUS, current); mce_panic("Failed kernel mode recovery", &m, NULL);
} }
local_irq_disable();
ist_end_non_atomic(); out_ist:
done:
ist_exit(regs); ist_exit(regs);
} }
EXPORT_SYMBOL_GPL(do_machine_check); EXPORT_SYMBOL_GPL(do_machine_check);
@ -1628,10 +1630,10 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
case X86_VENDOR_AMD: { case X86_VENDOR_AMD: {
u32 ebx = cpuid_ebx(0x80000007); u32 ebx = cpuid_ebx(0x80000007);
mce_amd_feature_init(c);
mce_flags.overflow_recov = !!(ebx & BIT(0)); mce_flags.overflow_recov = !!(ebx & BIT(0));
mce_flags.succor = !!(ebx & BIT(1)); mce_flags.succor = !!(ebx & BIT(1));
mce_flags.smca = !!(ebx & BIT(3)); mce_flags.smca = !!(ebx & BIT(3));
mce_amd_feature_init(c);
break; break;
} }

View File

@ -28,7 +28,7 @@
#include <asm/msr.h> #include <asm/msr.h>
#include <asm/trace/irq_vectors.h> #include <asm/trace/irq_vectors.h>
#define NR_BLOCKS 9 #define NR_BLOCKS 5
#define THRESHOLD_MAX 0xFFF #define THRESHOLD_MAX 0xFFF
#define INT_TYPE_APIC 0x00020000 #define INT_TYPE_APIC 0x00020000
#define MASK_VALID_HI 0x80000000 #define MASK_VALID_HI 0x80000000
@ -49,6 +49,19 @@
#define DEF_LVT_OFF 0x2 #define DEF_LVT_OFF 0x2
#define DEF_INT_TYPE_APIC 0x2 #define DEF_INT_TYPE_APIC 0x2
/* Scalable MCA: */
/* Threshold LVT offset is at MSR0xC0000410[15:12] */
#define SMCA_THR_LVT_OFF 0xF000
/*
* OS is required to set the MCAX bit to acknowledge that it is now using the
* new MSR ranges and new registers under each bank. It also means that the OS
* will configure deferred errors in the new MCx_CONFIG register. If the bit is
* not set, uncorrectable errors will cause a system panic.
*/
#define SMCA_MCAX_EN_OFF 0x1
static const char * const th_names[] = { static const char * const th_names[] = {
"load_store", "load_store",
"insn_fetch", "insn_fetch",
@ -84,6 +97,13 @@ struct thresh_restart {
static inline bool is_shared_bank(int bank) static inline bool is_shared_bank(int bank)
{ {
/*
* Scalable MCA provides for only one core to have access to the MSRs of
* a shared bank.
*/
if (mce_flags.smca)
return false;
/* Bank 4 is for northbridge reporting and is thus shared */ /* Bank 4 is for northbridge reporting and is thus shared */
return (bank == 4); return (bank == 4);
} }
@ -135,6 +155,14 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
} }
if (apic != msr) { if (apic != msr) {
/*
* On SMCA CPUs, LVT offset is programmed at a different MSR, and
* the BIOS provides the value. The original field where LVT offset
* was set is reserved. Return early here:
*/
if (mce_flags.smca)
return 0;
pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d " pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
"for bank %d, block %d (MSR%08X=0x%x%08x)\n", "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
b->cpu, apic, b->bank, b->block, b->address, hi, lo); b->cpu, apic, b->bank, b->block, b->address, hi, lo);
@ -247,14 +275,65 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
wrmsr(MSR_CU_DEF_ERR, low, high); wrmsr(MSR_CU_DEF_ERR, low, high);
} }
static int
prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
int offset, u32 misc_high)
{
unsigned int cpu = smp_processor_id();
struct threshold_block b;
int new;
if (!block)
per_cpu(bank_map, cpu) |= (1 << bank);
memset(&b, 0, sizeof(b));
b.cpu = cpu;
b.bank = bank;
b.block = block;
b.address = addr;
b.interrupt_capable = lvt_interrupt_supported(bank, misc_high);
if (!b.interrupt_capable)
goto done;
b.interrupt_enable = 1;
if (mce_flags.smca) {
u32 smca_low, smca_high;
u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
smca_high |= SMCA_MCAX_EN_OFF;
wrmsr(smca_addr, smca_low, smca_high);
}
/* Gather LVT offset for thresholding: */
if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
goto out;
new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
} else {
new = (misc_high & MASK_LVTOFF_HI) >> 20;
}
offset = setup_APIC_mce_threshold(offset, new);
if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
mce_threshold_vector = amd_threshold_interrupt;
done:
mce_threshold_block_init(&b, offset);
out:
return offset;
}
/* cpu init entry point, called from mce.c with preempt off */ /* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c) void mce_amd_feature_init(struct cpuinfo_x86 *c)
{ {
struct threshold_block b;
unsigned int cpu = smp_processor_id();
u32 low = 0, high = 0, address = 0; u32 low = 0, high = 0, address = 0;
unsigned int bank, block; unsigned int bank, block;
int offset = -1, new; int offset = -1;
for (bank = 0; bank < mca_cfg.banks; ++bank) { for (bank = 0; bank < mca_cfg.banks; ++bank) {
for (block = 0; block < NR_BLOCKS; ++block) { for (block = 0; block < NR_BLOCKS; ++block) {
@ -279,29 +358,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
(high & MASK_LOCKED_HI)) (high & MASK_LOCKED_HI))
continue; continue;
if (!block) offset = prepare_threshold_block(bank, block, address, offset, high);
per_cpu(bank_map, cpu) |= (1 << bank);
memset(&b, 0, sizeof(b));
b.cpu = cpu;
b.bank = bank;
b.block = block;
b.address = address;
b.interrupt_capable = lvt_interrupt_supported(bank, high);
if (!b.interrupt_capable)
goto init;
b.interrupt_enable = 1;
new = (high & MASK_LVTOFF_HI) >> 20;
offset = setup_APIC_mce_threshold(offset, new);
if ((offset == new) &&
(mce_threshold_vector != amd_threshold_interrupt))
mce_threshold_vector = amd_threshold_interrupt;
init:
mce_threshold_block_init(&b, offset);
} }
} }

View File

@ -987,7 +987,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
* In case the user-specified fault handler returned * In case the user-specified fault handler returned
* zero, try to fix up. * zero, try to fix up.
*/ */
if (fixup_exception(regs)) if (fixup_exception(regs, trapnr))
return 1; return 1;
/* /*

View File

@ -185,7 +185,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
} }
if (!user_mode(regs)) { if (!user_mode(regs)) {
if (!fixup_exception(regs)) { if (!fixup_exception(regs, trapnr)) {
tsk->thread.error_code = error_code; tsk->thread.error_code = error_code;
tsk->thread.trap_nr = trapnr; tsk->thread.trap_nr = trapnr;
die(str, regs, error_code); die(str, regs, error_code);
@ -439,7 +439,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
tsk = current; tsk = current;
if (!user_mode(regs)) { if (!user_mode(regs)) {
if (fixup_exception(regs)) if (fixup_exception(regs, X86_TRAP_GP))
return; return;
tsk->thread.error_code = error_code; tsk->thread.error_code = error_code;
@ -690,7 +690,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
cond_local_irq_enable(regs); cond_local_irq_enable(regs);
if (!user_mode(regs)) { if (!user_mode(regs)) {
if (!fixup_exception(regs)) { if (!fixup_exception(regs, trapnr)) {
task->thread.error_code = error_code; task->thread.error_code = error_code;
task->thread.trap_nr = trapnr; task->thread.trap_nr = trapnr;
die(str, regs, error_code); die(str, regs, error_code);

View File

@ -3,6 +3,9 @@
#include <linux/sort.h> #include <linux/sort.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
typedef bool (*ex_handler_t)(const struct exception_table_entry *,
struct pt_regs *, int);
static inline unsigned long static inline unsigned long
ex_insn_addr(const struct exception_table_entry *x) ex_insn_addr(const struct exception_table_entry *x)
{ {
@ -13,11 +16,56 @@ ex_fixup_addr(const struct exception_table_entry *x)
{ {
return (unsigned long)&x->fixup + x->fixup; return (unsigned long)&x->fixup + x->fixup;
} }
static inline ex_handler_t
int fixup_exception(struct pt_regs *regs) ex_fixup_handler(const struct exception_table_entry *x)
{ {
const struct exception_table_entry *fixup; return (ex_handler_t)((unsigned long)&x->handler + x->handler);
unsigned long new_ip; }
bool ex_handler_default(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr)
{
regs->ip = ex_fixup_addr(fixup);
return true;
}
EXPORT_SYMBOL(ex_handler_default);
bool ex_handler_fault(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr)
{
regs->ip = ex_fixup_addr(fixup);
regs->ax = trapnr;
return true;
}
EXPORT_SYMBOL_GPL(ex_handler_fault);
bool ex_handler_ext(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr)
{
/* Special hack for uaccess_err */
current_thread_info()->uaccess_err = 1;
regs->ip = ex_fixup_addr(fixup);
return true;
}
EXPORT_SYMBOL(ex_handler_ext);
bool ex_has_fault_handler(unsigned long ip)
{
const struct exception_table_entry *e;
ex_handler_t handler;
e = search_exception_tables(ip);
if (!e)
return false;
handler = ex_fixup_handler(e);
return handler == ex_handler_fault;
}
int fixup_exception(struct pt_regs *regs, int trapnr)
{
const struct exception_table_entry *e;
ex_handler_t handler;
#ifdef CONFIG_PNPBIOS #ifdef CONFIG_PNPBIOS
if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) { if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
@ -33,42 +81,34 @@ int fixup_exception(struct pt_regs *regs)
} }
#endif #endif
fixup = search_exception_tables(regs->ip); e = search_exception_tables(regs->ip);
if (fixup) { if (!e)
new_ip = ex_fixup_addr(fixup); return 0;
if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) { handler = ex_fixup_handler(e);
/* Special hack for uaccess_err */ return handler(e, regs, trapnr);
current_thread_info()->uaccess_err = 1;
new_ip -= 0x7ffffff0;
}
regs->ip = new_ip;
return 1;
}
return 0;
} }
/* Restricted version used during very early boot */ /* Restricted version used during very early boot */
int __init early_fixup_exception(unsigned long *ip) int __init early_fixup_exception(unsigned long *ip)
{ {
const struct exception_table_entry *fixup; const struct exception_table_entry *e;
unsigned long new_ip; unsigned long new_ip;
ex_handler_t handler;
fixup = search_exception_tables(*ip); e = search_exception_tables(*ip);
if (fixup) { if (!e)
new_ip = ex_fixup_addr(fixup); return 0;
if (fixup->fixup - fixup->insn >= 0x7ffffff0 - 4) { new_ip = ex_fixup_addr(e);
/* uaccess handling not supported during early boot */ handler = ex_fixup_handler(e);
return 0;
}
*ip = new_ip; /* special handling not supported during early boot */
return 1; if (handler != ex_handler_default)
} return 0;
return 0; *ip = new_ip;
return 1;
} }
/* /*
@ -133,6 +173,8 @@ void sort_extable(struct exception_table_entry *start,
i += 4; i += 4;
p->fixup += i; p->fixup += i;
i += 4; i += 4;
p->handler += i;
i += 4;
} }
sort(start, finish - start, sizeof(struct exception_table_entry), sort(start, finish - start, sizeof(struct exception_table_entry),
@ -145,6 +187,8 @@ void sort_extable(struct exception_table_entry *start,
i += 4; i += 4;
p->fixup -= i; p->fixup -= i;
i += 4; i += 4;
p->handler -= i;
i += 4;
} }
} }

View File

@ -663,7 +663,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
int sig; int sig;
/* Are we prepared to handle this kernel fault? */ /* Are we prepared to handle this kernel fault? */
if (fixup_exception(regs)) { if (fixup_exception(regs, X86_TRAP_PF)) {
/* /*
* Any interrupt that takes a fault gets the fixup. This makes * Any interrupt that takes a fault gets the fixup. This makes
* the below recursive fault logic only apply to a faults from * the below recursive fault logic only apply to a faults from

View File

@ -209,6 +209,35 @@ static int compare_relative_table(const void *a, const void *b)
return 0; return 0;
} }
static void x86_sort_relative_table(char *extab_image, int image_size)
{
int i;
i = 0;
while (i < image_size) {
uint32_t *loc = (uint32_t *)(extab_image + i);
w(r(loc) + i, loc);
w(r(loc + 1) + i + 4, loc + 1);
w(r(loc + 2) + i + 8, loc + 2);
i += sizeof(uint32_t) * 3;
}
qsort(extab_image, image_size / 12, 12, compare_relative_table);
i = 0;
while (i < image_size) {
uint32_t *loc = (uint32_t *)(extab_image + i);
w(r(loc) - i, loc);
w(r(loc + 1) - (i + 4), loc + 1);
w(r(loc + 2) - (i + 8), loc + 2);
i += sizeof(uint32_t) * 3;
}
}
static void sort_relative_table(char *extab_image, int image_size) static void sort_relative_table(char *extab_image, int image_size)
{ {
int i; int i;
@ -281,6 +310,9 @@ do_file(char const *const fname)
break; break;
case EM_386: case EM_386:
case EM_X86_64: case EM_X86_64:
custom_sort = x86_sort_relative_table;
break;
case EM_S390: case EM_S390:
custom_sort = sort_relative_table; custom_sort = sort_relative_table;
break; break;