Merge branch 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (80 commits) x86, mce: Add boot options for corrected errors x86, mce: Fix mce printing x86, mce: fix for mce counters x86, mce: support action-optional machine checks x86, mce: define MCE_VECTOR x86, mce: rename mce_notify_user to mce_notify_irq x86: fix panic with interrupts off (needed for MCE) x86, mce: export MCE severities coverage via debugfs x86, mce: implement new status bits x86, mce: print header/footer only once for multiple MCEs x86, mce: default to panic timeout for machine checks x86, mce: improve mce_get_rip x86, mce: make non Monarch panic message "Fatal machine check" too x86, mce: switch x86 machine check handler to Monarch election. x86, mce: implement panic synchronization x86, mce: implement bootstrapping for machine check wakeups x86, mce: check early in exception handler if panic is needed x86, mce: add table driven machine check grading x86, mce: remove TSC print heuristic x86, mce: log corrected errors when panicing ...
2009-06-13 13:14:51 -07:00 · 2009-06-13 13:14:51 -07:00 · a2ee2981ae
parent 7603ef03a2 0d5959723e
commit a2ee2981ae
39 changed files with 2973 additions and 1663 deletions
--- a/Documentation/Changes
+++ b/Documentation/Changes
@ -48,6 +48,7 @@ o  procps                 3.2.0                   # ps --version
 o  oprofile               0.9                     # oprofiled --version
 o  udev                   081                     # udevinfo -V
 o  grub                   0.93                    # grub --version
+o  mcelog		  0.6

 Kernel compilation
 ==================
@ -276,6 +277,16 @@ before running exportfs or mountd.  It is recommended that all NFS
 services be protected from the internet-at-large by a firewall where
 that is possible.

+mcelog
+------
+
+In Linux 2.6.31+ the i386 kernel needs to run the mcelog utility
+as a regular cronjob similar to the x86-64 kernel to process and log
+machine check events when CONFIG_X86_NEW_MCE is enabled. Machine check
+events are errors reported by the CPU. Processing them is strongly encouraged.
+All x86-64 kernels since 2.6.4 require the mcelog utility to
+process machine checks.
+
 Getting updated software
 ========================

@ -365,6 +376,10 @@ FUSE
 ----
 o <http://sourceforge.net/projects/fuse>

+mcelog
+------
+o <ftp://ftp.kernel.org/pub/linux/utils/cpu/mce/mcelog/>
+
 Networking
 **********

--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@ -437,3 +437,13 @@ Why:	Superseded by tdfxfb. I2C/DDC support used to live in a separate
 	driver but this caused driver conflicts.
 Who:	Jean Delvare <khali@linux-fr.org>
 	Krzysztof Helt <krzysztof.h1@wp.pl>
+
+----------------------------
+
+What:	CONFIG_X86_OLD_MCE
+When:	2.6.32
+Why:	Remove the old legacy 32bit machine check code. This has been
+	superseded by the newer machine check code from the 64bit port,
+	but the old version has been kept around for easier testing. Note this
+	doesn't impact the old P5 and WinChip machine check handlers.
+Who:	Andi Kleen <andi@firstfloor.org>
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@ -5,21 +5,51 @@ only the AMD64 specific ones are listed here.

 Machine check

-   mce=off disable machine check
-   mce=bootlog Enable logging of machine checks left over from booting.
-               Disabled by default on AMD because some BIOS leave bogus ones.
-               If your BIOS doesn't do that it's a good idea to enable though
-               to make sure you log even machine check events that result
-               in a reboot. On Intel systems it is enabled by default.
+   Please see Documentation/x86/x86_64/machinecheck for sysfs runtime tunables.
+
+   mce=off
+		Disable machine check
+   mce=no_cmci
+		Disable CMCI(Corrected Machine Check Interrupt) that
+		Intel processor supports.  Usually this disablement is
+		not recommended, but it might be handy if your hardware
+		is misbehaving.
+		Note that you'll get more problems without CMCI than with
+		due to the shared banks, i.e. you might get duplicated
+		error logs.
+   mce=dont_log_ce
+		Don't make logs for corrected errors.  All events reported
+		as corrected are silently cleared by OS.
+		This option will be useful if you have no interest in any
+		of corrected errors.
+   mce=ignore_ce
+		Disable features for corrected errors, e.g. polling timer
+		and CMCI.  All events reported as corrected are not cleared
+		by OS and remained in its error banks.
+		Usually this disablement is not recommended, however if
+		there is an agent checking/clearing corrected errors
+		(e.g. BIOS or hardware monitoring applications), conflicting
+		with OS's error handling, and you cannot deactivate the agent,
+		then this option will be a help.
+   mce=bootlog
+		Enable logging of machine checks left over from booting.
+		Disabled by default on AMD because some BIOS leave bogus ones.
+		If your BIOS doesn't do that it's a good idea to enable though
+		to make sure you log even machine check events that result
+		in a reboot. On Intel systems it is enabled by default.
   mce=nobootlog
 		Disable boot machine check logging.
-   mce=tolerancelevel (number)
+   mce=tolerancelevel[,monarchtimeout] (number,number)
+		tolerance levels:
 		0: always panic on uncorrected errors, log corrected errors
 		1: panic or SIGBUS on uncorrected errors, log corrected errors
 		2: SIGBUS or log uncorrected errors, log corrected errors
 		3: never panic or SIGBUS, log all errors (for testing only)
 		Default is 1
 		Can be also set using sysfs which is preferable.
+		monarchtimeout:
+		Sets the time in us to wait for other CPUs on machine checks. 0
+		to disable.

   nomce (for compatibility with i386): same as mce=off

--- a/Documentation/x86/x86_64/machinecheck
+++ b/Documentation/x86/x86_64/machinecheck
@ -41,7 +41,9 @@ check_interval
 	the polling interval.  When the poller stops finding MCEs, it
 	triggers an exponential backoff (poll less often) on the polling
 	interval. The check_interval variable is both the initial and
-	maximum polling interval.
+	maximum polling interval. 0 means no polling for corrected machine
+	check errors (but some corrected errors might be still reported
+	in other ways)

 tolerant
 	Tolerance level. When a machine check exception occurs for a non
@ -67,6 +69,10 @@ trigger
 	Program to run when a machine check event is detected.
 	This is an alternative to running mcelog regularly from cron
 	and allows to detect events faster.
+monarch_timeout
+	How long to wait for the other CPUs to machine check too on a
+	exception. 0 to disable waiting for other CPUs.
+	Unit: us

 TBD document entries for AMD threshold interrupt configuration

--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@ -789,10 +789,26 @@ config X86_MCE
 	  to disable it.  MCE support simply ignores non-MCE processors like
 	  the 386 and 486, so nearly everyone can say Y here.

+config X86_OLD_MCE
+	depends on X86_32 && X86_MCE
+	bool "Use legacy machine check code (will go away)"
+	default n
+	select X86_ANCIENT_MCE
+	---help---
+	  Use the old i386 machine check code. This is merely intended for
+	  testing in a transition period. Try this if you run into any machine
+	  check related software problems, but report the problem to
+	  linux-kernel.  When in doubt say no.
+
+config X86_NEW_MCE
+	depends on X86_MCE
+	bool
+	default y if (!X86_OLD_MCE && X86_32) || X86_64
+
 config X86_MCE_INTEL
 	def_bool y
 	prompt "Intel MCE features"
-	depends on X86_64 && X86_MCE && X86_LOCAL_APIC
+	depends on X86_NEW_MCE && X86_LOCAL_APIC
 	---help---
 	   Additional support for intel specific MCE features such as
 	   the thermal monitor.
@ -800,19 +816,36 @@ config X86_MCE_INTEL
 config X86_MCE_AMD
 	def_bool y
 	prompt "AMD MCE features"
-	depends on X86_64 && X86_MCE && X86_LOCAL_APIC
+	depends on X86_NEW_MCE && X86_LOCAL_APIC
 	---help---
 	   Additional support for AMD specific MCE features such as
 	   the DRAM Error Threshold.

+config X86_ANCIENT_MCE
+	def_bool n
+	depends on X86_32
+	prompt "Support for old Pentium 5 / WinChip machine checks"
+	---help---
+	  Include support for machine check handling on old Pentium 5 or WinChip
+	  systems. These typically need to be enabled explicitely on the command
+	  line.
+
 config X86_MCE_THRESHOLD
 	depends on X86_MCE_AMD || X86_MCE_INTEL
 	bool
 	default y

+config X86_MCE_INJECT
+	depends on X86_NEW_MCE
+	tristate "Machine check injector support"
+	---help---
+	  Provide support for injecting machine checks for testing purposes.
+	  If you don't know what a machine check is and you don't do kernel
+	  QA it is safe to say n.
+
 config X86_MCE_NONFATAL
 	tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4"
-	depends on X86_32 && X86_MCE
+	depends on X86_OLD_MCE
 	---help---
 	  Enabling this feature starts a timer that triggers every 5 seconds which
 	  will look at the machine check registers to see if anything happened.
@ -825,11 +858,15 @@ config X86_MCE_NONFATAL

 config X86_MCE_P4THERMAL
 	bool "check for P4 thermal throttling interrupt."
-	depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP)
+	depends on X86_OLD_MCE && X86_MCE && (X86_UP_APIC || SMP)
 	---help---
 	  Enabling this feature will cause a message to be printed when the P4
 	  enters thermal throttling.

+config X86_THERMAL_VECTOR
+	def_bool y
+	depends on X86_MCE_P4THERMAL || X86_MCE_INTEL
+
 config VM86
 	bool "Enable VM86 support" if EMBEDDED
 	default y
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@ -14,6 +14,7 @@ BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
 BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
 BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
 BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
+BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)

 BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0,
 		 smp_invalidate_interrupt)
@ -52,8 +53,16 @@ BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
 BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
 #endif

-#ifdef CONFIG_X86_MCE_P4THERMAL
+#ifdef CONFIG_X86_THERMAL_VECTOR
 BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
 #endif

+#ifdef CONFIG_X86_MCE_THRESHOLD
+BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
+#endif
+
+#ifdef CONFIG_X86_NEW_MCE
+BUILD_INTERRUPT(mce_self_interrupt,MCE_SELF_VECTOR)
+#endif
+
 #endif
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@ -22,7 +22,7 @@ typedef struct {
 #endif
 #ifdef CONFIG_X86_MCE
 	unsigned int irq_thermal_count;
-# ifdef CONFIG_X86_64
+# ifdef CONFIG_X86_MCE_THRESHOLD
 	unsigned int irq_threshold_count;
 # endif
 #endif
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@ -34,6 +34,7 @@ extern void perf_pending_interrupt(void);
 extern void spurious_interrupt(void);
 extern void thermal_interrupt(void);
 extern void reschedule_interrupt(void);
+extern void mce_self_interrupt(void);

 extern void invalidate_interrupt(void);
 extern void invalidate_interrupt0(void);
@ -46,6 +47,7 @@ extern void invalidate_interrupt6(void);
 extern void invalidate_interrupt7(void);

 extern void irq_move_cleanup_interrupt(void);
+extern void reboot_interrupt(void);
 extern void threshold_interrupt(void);

 extern void call_function_interrupt(void);
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@ -25,6 +25,7 @@
 */

 #define NMI_VECTOR			0x02
+#define MCE_VECTOR			0x12

 /*
 * IDT vectors usable for external interrupt sources start
@ -87,13 +88,8 @@
 #define CALL_FUNCTION_VECTOR		0xfc
 #define CALL_FUNCTION_SINGLE_VECTOR	0xfb
 #define THERMAL_APIC_VECTOR		0xfa
-
-#ifdef CONFIG_X86_32
-/* 0xf8 - 0xf9 : free */
-#else
-# define THRESHOLD_APIC_VECTOR		0xf9
-# define UV_BAU_MESSAGE			0xf8
-#endif
+#define THRESHOLD_APIC_VECTOR		0xf9
+#define REBOOT_VECTOR			0xf8

 /* f0-f7 used for spreading out TLB flushes: */
 #define INVALIDATE_TLB_VECTOR_END	0xf7
@ -117,6 +113,13 @@
 */
 #define LOCAL_PENDING_VECTOR		0xec

+#define UV_BAU_MESSAGE			0xec
+
+/*
+ * Self IPI vector for machine checks
+ */
+#define MCE_SELF_VECTOR			0xeb
+
 /*
 * First APIC vector available to drivers: (vectors 0x30-0xee) we
 * start at 0x31(0x41) to spread out vectors evenly between priority
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@ -1,8 +1,6 @@
 #ifndef _ASM_X86_MCE_H
 #define _ASM_X86_MCE_H

-#ifdef __x86_64__
-
 #include <linux/types.h>
 #include <asm/ioctls.h>

@ -10,21 +8,35 @@
 * Machine Check support for x86
 */

-#define MCG_CTL_P	 (1UL<<8)   /* MCG_CAP register available */
-#define MCG_EXT_P	 (1ULL<<9)   /* Extended registers available */
-#define MCG_CMCI_P	 (1ULL<<10)  /* CMCI supported */
+#define MCG_BANKCNT_MASK	0xff         /* Number of Banks */
+#define MCG_CTL_P		(1ULL<<8)    /* MCG_CAP register available */
+#define MCG_EXT_P		(1ULL<<9)    /* Extended registers available */
+#define MCG_CMCI_P		(1ULL<<10)   /* CMCI supported */
+#define MCG_EXT_CNT_MASK	0xff0000     /* Number of Extended registers */
+#define MCG_EXT_CNT_SHIFT	16
+#define MCG_EXT_CNT(c)		(((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
+#define MCG_SER_P	 	(1ULL<<24)   /* MCA recovery/new status bits */

-#define MCG_STATUS_RIPV  (1UL<<0)   /* restart ip valid */
-#define MCG_STATUS_EIPV  (1UL<<1)   /* ip points to correct instruction */
-#define MCG_STATUS_MCIP  (1UL<<2)   /* machine check in progress */
+#define MCG_STATUS_RIPV  (1ULL<<0)   /* restart ip valid */
+#define MCG_STATUS_EIPV  (1ULL<<1)   /* ip points to correct instruction */
+#define MCG_STATUS_MCIP  (1ULL<<2)   /* machine check in progress */

-#define MCI_STATUS_VAL   (1UL<<63)  /* valid error */
-#define MCI_STATUS_OVER  (1UL<<62)  /* previous errors lost */
-#define MCI_STATUS_UC    (1UL<<61)  /* uncorrected error */
-#define MCI_STATUS_EN    (1UL<<60)  /* error enabled */
-#define MCI_STATUS_MISCV (1UL<<59)  /* misc error reg. valid */
-#define MCI_STATUS_ADDRV (1UL<<58)  /* addr reg. valid */
-#define MCI_STATUS_PCC   (1UL<<57)  /* processor context corrupt */
+#define MCI_STATUS_VAL   (1ULL<<63)  /* valid error */
+#define MCI_STATUS_OVER  (1ULL<<62)  /* previous errors lost */
+#define MCI_STATUS_UC    (1ULL<<61)  /* uncorrected error */
+#define MCI_STATUS_EN    (1ULL<<60)  /* error enabled */
+#define MCI_STATUS_MISCV (1ULL<<59)  /* misc error reg. valid */
+#define MCI_STATUS_ADDRV (1ULL<<58)  /* addr reg. valid */
+#define MCI_STATUS_PCC   (1ULL<<57)  /* processor context corrupt */
+#define MCI_STATUS_S	 (1ULL<<56)  /* Signaled machine check */
+#define MCI_STATUS_AR	 (1ULL<<55)  /* Action required */
+
+/* MISC register defines */
+#define MCM_ADDR_SEGOFF  0	/* segment offset */
+#define MCM_ADDR_LINEAR  1	/* linear address */
+#define MCM_ADDR_PHYS	 2	/* physical address */
+#define MCM_ADDR_MEM	 3	/* memory address */
+#define MCM_ADDR_GENERIC 7	/* generic */

 /* Fields are zero when not available */
 struct mce {
@ -34,13 +46,19 @@ struct mce {
 	__u64 mcgstatus;
 	__u64 ip;
 	__u64 tsc;	/* cpu time stamp counter */
-	__u64 res1;	/* for future extension */
-	__u64 res2;	/* dito. */
+	__u64 time;	/* wall time_t when error was detected */
+	__u8  cpuvendor;	/* cpu vendor as encoded in system.h */
+	__u8  pad1;
+	__u16 pad2;
+	__u32 cpuid;	/* CPUID 1 EAX */
 	__u8  cs;		/* code segment */
 	__u8  bank;	/* machine check bank */
-	__u8  cpu;	/* cpu that raised the error */
+	__u8  cpu;	/* cpu number; obsolete; use extcpu now */
 	__u8  finished;   /* entry is valid */
-	__u32 pad;
+	__u32 extcpu;	/* linux cpu number that detected the error */
+	__u32 socketid;	/* CPU socket ID */
+	__u32 apicid;	/* CPU initial apic ID */
+	__u64 mcgcap;	/* MCGCAP MSR: machine check capabilities of CPU */
 };

 /*
@ -57,7 +75,7 @@ struct mce_log {
 	unsigned len;	    /* = MCE_LOG_LEN */
 	unsigned next;
 	unsigned flags;
-	unsigned pad0;
+	unsigned recordlen;	/* length of struct mce */
 	struct mce entry[MCE_LOG_LEN];
 };

@ -82,19 +100,16 @@ struct mce_log {
 #define K8_MCE_THRESHOLD_BANK_5    (MCE_THRESHOLD_BASE + 5 * 9)
 #define K8_MCE_THRESHOLD_DRAM_ECC  (MCE_THRESHOLD_BANK_4 + 0)

-#endif /* __x86_64__ */
-
 #ifdef __KERNEL__

-#ifdef CONFIG_X86_32
 extern int mce_disabled;
-#else /* CONFIG_X86_32 */

 #include <asm/atomic.h>
+#include <linux/percpu.h>

 void mce_setup(struct mce *m);
 void mce_log(struct mce *m);
-DECLARE_PER_CPU(struct sys_device, device_mce);
+DECLARE_PER_CPU(struct sys_device, mce_dev);
 extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);

 /*
@ -104,6 +119,8 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
 #define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1)

 #ifdef CONFIG_X86_MCE_INTEL
+extern int mce_cmci_disabled;
+extern int mce_ignore_ce;
 void mce_intel_feature_init(struct cpuinfo_x86 *c);
 void cmci_clear(void);
 void cmci_reenable(void);
@ -123,13 +140,16 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c);
 static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
 #endif

-extern int mce_available(struct cpuinfo_x86 *c);
+int mce_available(struct cpuinfo_x86 *c);
+
+DECLARE_PER_CPU(unsigned, mce_exception_count);
+DECLARE_PER_CPU(unsigned, mce_poll_count);

 void mce_log_therm_throt_event(__u64 status);

 extern atomic_t mce_entry;

-extern void do_machine_check(struct pt_regs *, long);
+void do_machine_check(struct pt_regs *, long);

 typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
 DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
@ -139,14 +159,16 @@ enum mcp_flags {
 	MCP_UC = (1 << 1),		/* log uncorrected errors */
 	MCP_DONTLOG = (1 << 2),		/* only clear, don't log */
 };
-extern void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
+void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);

-extern int mce_notify_user(void);
+int mce_notify_irq(void);
+void mce_notify_process(void);

-#endif /* !CONFIG_X86_32 */
+DECLARE_PER_CPU(struct mce, injectm);
+extern struct file_operations mce_chrdev_ops;

 #ifdef CONFIG_X86_MCE
-extern void mcheck_init(struct cpuinfo_x86 *c);
+void mcheck_init(struct cpuinfo_x86 *c);
 #else
 #define mcheck_init(c) do { } while (0)
 #endif
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@ -207,7 +207,14 @@

 #define MSR_IA32_THERM_CONTROL		0x0000019a
 #define MSR_IA32_THERM_INTERRUPT	0x0000019b
+
+#define THERM_INT_LOW_ENABLE		(1 << 0)
+#define THERM_INT_HIGH_ENABLE		(1 << 1)
+
 #define MSR_IA32_THERM_STATUS		0x0000019c
+
+#define THERM_STATUS_PROCHOT		(1 << 0)
+
 #define MSR_IA32_MISC_ENABLE		0x000001a0

 /* MISC_ENABLE bits: architectural */
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@ -899,7 +899,7 @@ void clear_local_APIC(void)
 	}

 	/* lets not touch this if we didn't frob it */
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
+#ifdef CONFIG_X86_THERMAL_VECTOR
 	if (maxlvt >= 5) {
 		v = apic_read(APIC_LVTTHMR);
 		apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
@ -2017,7 +2017,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
 	apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
 	apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
 	apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
+#ifdef CONFIG_X86_THERMAL_VECTOR
 	if (maxlvt >= 5)
 		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
 #endif
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@ -66,7 +66,7 @@ static inline unsigned int get_nmi_count(int cpu)

 static inline int mce_in_progress(void)
 {
-#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
+#if defined(CONFIG_X86_NEW_MCE)
 	return atomic_read(&mce_entry) > 0;
 #endif
 	return 0;
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@ -1,7 +1,11 @@
-obj-y				=  mce_$(BITS).o therm_throt.o
+obj-y				=  mce.o therm_throt.o

-obj-$(CONFIG_X86_32)		+= k7.o p4.o p5.o p6.o winchip.o
-obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel_64.o
+obj-$(CONFIG_X86_NEW_MCE)	+= mce-severity.o
+obj-$(CONFIG_X86_OLD_MCE)	+= k7.o p4.o p6.o
+obj-$(CONFIG_X86_ANCIENT_MCE)	+= winchip.o p5.o
+obj-$(CONFIG_X86_MCE_P4THERMAL)	+= mce_intel.o
+obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel_64.o mce_intel.o
 obj-$(CONFIG_X86_MCE_AMD)	+= mce_amd_64.o
 obj-$(CONFIG_X86_MCE_NONFATAL)	+= non-fatal.o
 obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
+obj-$(CONFIG_X86_MCE_INJECT)	+= mce-inject.o
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@ -2,11 +2,10 @@
 * Athlon specific Machine Check Exception Reporting
 * (C) Copyright 2002 Dave Jones <davej@redhat.com>
 */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
 #include <linux/smp.h>

 #include <asm/processor.h>
@ -15,12 +14,12 @@

 #include "mce.h"

-/* Machine Check Handler For AMD Athlon/Duron */
+/* Machine Check Handler For AMD Athlon/Duron: */
 static void k7_machine_check(struct pt_regs *regs, long error_code)
 {
-	int recover = 1;
 	u32 alow, ahigh, high, low;
 	u32 mcgstl, mcgsth;
+	int recover = 1;
 	int i;

 	rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
@ -32,15 +31,19 @@ static void k7_machine_check(struct pt_regs *regs, long error_code)

 	for (i = 1; i < nr_mce_banks; i++) {
 		rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
-		if (high&(1<<31)) {
+		if (high & (1<<31)) {
 			char misc[20];
 			char addr[24];
-			misc[0] = addr[0] = '\0';
+
+			misc[0] = '\0';
+			addr[0] = '\0';
+
 			if (high & (1<<29))
 				recover |= 1;
 			if (high & (1<<25))
 				recover |= 2;
 			high &= ~(1<<31);
+
 			if (high & (1<<27)) {
 				rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
 				snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
@ -49,27 +52,31 @@ static void k7_machine_check(struct pt_regs *regs, long error_code)
 				rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
 				snprintf(addr, 24, " at %08x%08x", ahigh, alow);
 			}
+
 			printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
 				smp_processor_id(), i, high, low, misc, addr);
-			/* Clear it */
+
+			/* Clear it: */
 			wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
-			/* Serialize */
+			/* Serialize: */
 			wmb();
 			add_taint(TAINT_MACHINE_CHECK);
 		}
 	}

-	if (recover&2)
+	if (recover & 2)
 		panic("CPU context corrupt");
-	if (recover&1)
+	if (recover & 1)
 		panic("Unable to continue");
+
 	printk(KERN_EMERG "Attempting to continue.\n");
+
 	mcgstl &= ~(1<<2);
 	wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
 }


-/* AMD K7 machine check is Intel like */
+/* AMD K7 machine check is Intel like: */
 void amd_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
@ -79,21 +86,26 @@ void amd_mcheck_init(struct cpuinfo_x86 *c)
 		return;

 	machine_check_vector = k7_machine_check;
+	/* Make sure the vector pointer is visible before we enable MCEs: */
 	wmb();

 	printk(KERN_INFO "Intel machine check architecture supported.\n");
+
 	rdmsr(MSR_IA32_MCG_CAP, l, h);
 	if (l & (1<<8))	/* Control register present ? */
 		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
 	nr_mce_banks = l & 0xff;

-	/* Clear status for MC index 0 separately, we don't touch CTL,
-	 * as some K7 Athlons cause spurious MCEs when its enabled. */
+	/*
+	 * Clear status for MC index 0 separately, we don't touch CTL,
+	 * as some K7 Athlons cause spurious MCEs when its enabled:
+	 */
 	if (boot_cpu_data.x86 == 6) {
 		wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0);
 		i = 1;
 	} else
 		i = 0;
+
 	for (; i < nr_mce_banks; i++) {
 		wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
 		wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@ -0,0 +1,127 @@
+/*
+ * Machine check injection support.
+ * Copyright 2008 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ *
+ * Authors:
+ * Andi Kleen
+ * Ying Huang
+ */
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/smp.h>
+#include <asm/mce.h>
+
+/* Update fake mce registers on current CPU. */
+static void inject_mce(struct mce *m)
+{
+	struct mce *i = &per_cpu(injectm, m->extcpu);
+
+	/* Make sure noone reads partially written injectm */
+	i->finished = 0;
+	mb();
+	m->finished = 0;
+	/* First set the fields after finished */
+	i->extcpu = m->extcpu;
+	mb();
+	/* Now write record in order, finished last (except above) */
+	memcpy(i, m, sizeof(struct mce));
+	/* Finally activate it */
+	mb();
+	i->finished = 1;
+}
+
+struct delayed_mce {
+	struct timer_list timer;
+	struct mce m;
+};
+
+/* Inject mce on current CPU */
+static void raise_mce(unsigned long data)
+{
+	struct delayed_mce *dm = (struct delayed_mce *)data;
+	struct mce *m = &dm->m;
+	int cpu = m->extcpu;
+
+	inject_mce(m);
+	if (m->status & MCI_STATUS_UC) {
+		struct pt_regs regs;
+		memset(&regs, 0, sizeof(struct pt_regs));
+		regs.ip = m->ip;
+		regs.cs = m->cs;
+		printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
+		do_machine_check(&regs, 0);
+		printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
+	} else {
+		mce_banks_t b;
+		memset(&b, 0xff, sizeof(mce_banks_t));
+		printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
+		machine_check_poll(0, &b);
+		mce_notify_irq();
+		printk(KERN_INFO "Finished machine check poll on CPU %d\n",
+		       cpu);
+	}
+	kfree(dm);
+}
+
+/* Error injection interface */
+static ssize_t mce_write(struct file *filp, const char __user *ubuf,
+			 size_t usize, loff_t *off)
+{
+	struct delayed_mce *dm;
+	struct mce m;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	/*
+	 * There are some cases where real MSR reads could slip
+	 * through.
+	 */
+	if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA))
+		return -EIO;
+
+	if ((unsigned long)usize > sizeof(struct mce))
+		usize = sizeof(struct mce);
+	if (copy_from_user(&m, ubuf, usize))
+		return -EFAULT;
+
+	if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
+		return -EINVAL;
+
+	dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL);
+	if (!dm)
+		return -ENOMEM;
+
+	/*
+	 * Need to give user space some time to set everything up,
+	 * so do it a jiffie or two later everywhere.
+	 * Should we use a hrtimer here for better synchronization?
+	 */
+	memcpy(&dm->m, &m, sizeof(struct mce));
+	setup_timer(&dm->timer, raise_mce, (unsigned long)dm);
+	dm->timer.expires = jiffies + 2;
+	add_timer_on(&dm->timer, m.extcpu);
+	return usize;
+}
+
+static int inject_init(void)
+{
+	printk(KERN_INFO "Machine check injector initialized\n");
+	mce_chrdev_ops.write = mce_write;
+	return 0;
+}
+
+module_init(inject_init);
+/*
+ * Cannot tolerate unloading currently because we cannot
+ * guarantee all openers of mce_chrdev will get a reference to us.
+ */
+MODULE_LICENSE("GPL");
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@ -0,0 +1,15 @@
+#include <asm/mce.h>
+
+enum severity_level {
+	MCE_NO_SEVERITY,
+	MCE_KEEP_SEVERITY,
+	MCE_SOME_SEVERITY,
+	MCE_AO_SEVERITY,
+	MCE_UC_SEVERITY,
+	MCE_AR_SEVERITY,
+	MCE_PANIC_SEVERITY,
+};
+
+int mce_severity(struct mce *a, int tolerant, char **msg);
+
+extern int mce_ser;
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@ -0,0 +1,218 @@
+/*
+ * MCE grading rules.
+ * Copyright 2008, 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ *
+ * Author: Andi Kleen
+ */
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/debugfs.h>
+#include <asm/mce.h>
+
+#include "mce-internal.h"
+
+/*
+ * Grade an mce by severity. In general the most severe ones are processed
+ * first. Since there are quite a lot of combinations test the bits in a
+ * table-driven way. The rules are simply processed in order, first
+ * match wins.
+ *
+ * Note this is only used for machine check exceptions, the corrected
+ * errors use much simpler rules. The exceptions still check for the corrected
+ * errors, but only to leave them alone for the CMCI handler (except for
+ * panic situations)
+ */
+
+enum context { IN_KERNEL = 1, IN_USER = 2 };
+enum ser { SER_REQUIRED = 1, NO_SER = 2 };
+
+static struct severity {
+	u64 mask;
+	u64 result;
+	unsigned char sev;
+	unsigned char mcgmask;
+	unsigned char mcgres;
+	unsigned char ser;
+	unsigned char context;
+	unsigned char covered;
+	char *msg;
+} severities[] = {
+#define KERNEL .context = IN_KERNEL
+#define USER .context = IN_USER
+#define SER .ser = SER_REQUIRED
+#define NOSER .ser = NO_SER
+#define SEV(s) .sev = MCE_ ## s ## _SEVERITY
+#define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r }
+#define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r }
+#define MCGMASK(x, res, s, m, r...) \
+	{ .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r }
+#define MASK(x, y, s, m, r...) \
+	{ .mask = x, .result = y, SEV(s), .msg = m, ## r }
+#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
+#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
+#define MCACOD 0xffff
+
+	BITCLR(MCI_STATUS_VAL, NO, "Invalid"),
+	BITCLR(MCI_STATUS_EN, NO, "Not enabled"),
+	BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"),
+	/* When MCIP is not set something is very confused */
+	MCGMASK(MCG_STATUS_MCIP, 0, PANIC, "MCIP not set in MCA handler"),
+	/* Neither return not error IP -- no chance to recover -> PANIC */
+	MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC,
+		"Neither restart nor error IP"),
+	MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP",
+		KERNEL),
+	BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER),
+	MASK(MCI_STATUS_OVER|MCI_STATUS_UC|MCI_STATUS_EN, MCI_STATUS_UC, SOME,
+	     "Spurious not enabled", SER),
+
+	/* ignore OVER for UCNA */
+	MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP,
+	     "Uncorrected no action required", SER),
+	MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR, PANIC,
+	     "Illegal combination (UCNA with AR=1)", SER),
+	MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER),
+
+	/* AR add known MCACODs here */
+	MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC,
+	     "Action required with lost events", SER),
+	MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_SAR, PANIC,
+	     "Action required; unknown MCACOD", SER),
+
+	/* known AO MCACODs: */
+	MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0, AO,
+	     "Action optional: memory scrubbing error", SER),
+	MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO,
+	     "Action optional: last level cache writeback error", SER),
+
+	MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME,
+	     "Action optional unknown MCACOD", SER),
+	MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME,
+	     "Action optional with lost events", SER),
+	BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"),
+	BITSET(MCI_STATUS_UC, UC, "Uncorrected"),
+	BITSET(0, SOME, "No match")	/* always matches. keep at end */
+};
+
+/*
+ * If the EIPV bit is set, it means the saved IP is the
+ * instruction which caused the MCE.
+ */
+static int error_context(struct mce *m)
+{
+	if (m->mcgstatus & MCG_STATUS_EIPV)
+		return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
+	/* Unknown, assume kernel */
+	return IN_KERNEL;
+}
+
+int mce_severity(struct mce *a, int tolerant, char **msg)
+{
+	enum context ctx = error_context(a);
+	struct severity *s;
+
+	for (s = severities;; s++) {
+		if ((a->status & s->mask) != s->result)
+			continue;
+		if ((a->mcgstatus & s->mcgmask) != s->mcgres)
+			continue;
+		if (s->ser == SER_REQUIRED && !mce_ser)
+			continue;
+		if (s->ser == NO_SER && mce_ser)
+			continue;
+		if (s->context && ctx != s->context)
+			continue;
+		if (msg)
+			*msg = s->msg;
+		s->covered = 1;
+		if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) {
+			if (panic_on_oops || tolerant < 1)
+				return MCE_PANIC_SEVERITY;
+		}
+		return s->sev;
+	}
+}
+
+static void *s_start(struct seq_file *f, loff_t *pos)
+{
+	if (*pos >= ARRAY_SIZE(severities))
+		return NULL;
+	return &severities[*pos];
+}
+
+static void *s_next(struct seq_file *f, void *data, loff_t *pos)
+{
+	if (++(*pos) >= ARRAY_SIZE(severities))
+		return NULL;
+	return &severities[*pos];
+}
+
+static void s_stop(struct seq_file *f, void *data)
+{
+}
+
+static int s_show(struct seq_file *f, void *data)
+{
+	struct severity *ser = data;
+	seq_printf(f, "%d\t%s\n", ser->covered, ser->msg);
+	return 0;
+}
+
+static const struct seq_operations severities_seq_ops = {
+	.start	= s_start,
+	.next	= s_next,
+	.stop	= s_stop,
+	.show	= s_show,
+};
+
+static int severities_coverage_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &severities_seq_ops);
+}
+
+static ssize_t severities_coverage_write(struct file *file,
+					 const char __user *ubuf,
+					 size_t count, loff_t *ppos)
+{
+	int i;
+	for (i = 0; i < ARRAY_SIZE(severities); i++)
+		severities[i].covered = 0;
+	return count;
+}
+
+static const struct file_operations severities_coverage_fops = {
+	.open		= severities_coverage_open,
+	.release	= seq_release,
+	.read		= seq_read,
+	.write		= severities_coverage_write,
+};
+
+static int __init severities_debugfs_init(void)
+{
+	struct dentry *dmce = NULL, *fseverities_coverage = NULL;
+
+	dmce = debugfs_create_dir("mce", NULL);
+	if (dmce == NULL)
+		goto err_out;
+	fseverities_coverage = debugfs_create_file("severities-coverage",
+						   0444, dmce, NULL,
+						   &severities_coverage_fops);
+	if (fseverities_coverage == NULL)
+		goto err_out;
+
+	return 0;
+
+err_out:
+	if (fseverities_coverage)
+		debugfs_remove(fseverities_coverage);
+	if (dmce)
+		debugfs_remove(dmce);
+	return -ENOMEM;
+}
+late_initcall(severities_debugfs_init);
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
--- a/arch/x86/kernel/cpu/mcheck/mce.h
+++ b/arch/x86/kernel/cpu/mcheck/mce.h
@ -1,14 +1,38 @@
 #include <linux/init.h>
 #include <asm/mce.h>

+#ifdef CONFIG_X86_OLD_MCE
 void amd_mcheck_init(struct cpuinfo_x86 *c);
 void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
-void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
 void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
+#endif
+
+#ifdef CONFIG_X86_ANCIENT_MCE
+void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
 void winchip_mcheck_init(struct cpuinfo_x86 *c);
+extern int mce_p5_enable;
+static inline int mce_p5_enabled(void) { return mce_p5_enable; }
+static inline void enable_p5_mce(void) { mce_p5_enable = 1; }
+#else
+static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
+static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
+static inline int mce_p5_enabled(void) { return 0; }
+static inline void enable_p5_mce(void) { }
+#endif

 /* Call the installed machine check handler for this CPU setup. */
 extern void (*machine_check_vector)(struct pt_regs *, long error_code);

+#ifdef CONFIG_X86_OLD_MCE
+
 extern int nr_mce_banks;

+void intel_set_thermal_handler(void);
+
+#else
+
+static inline void intel_set_thermal_handler(void) { }
+
+#endif
+
+void intel_init_thermal(struct cpuinfo_x86 *c);
--- a/arch/x86/kernel/cpu/mcheck/mce_32.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_32.c
@ -1,76 +0,0 @@
-/*
- * mce.c - x86 Machine Check Exception Reporting
- * (c) 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>, Dave Jones <davej@redhat.com>
- */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/smp.h>
-#include <linux/thread_info.h>
-
-#include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/mce.h>
-
-#include "mce.h"
-
-int mce_disabled;
-int nr_mce_banks;
-
-EXPORT_SYMBOL_GPL(nr_mce_banks);	/* non-fatal.o */
-
-/* Handle unconfigured int18 (should never happen) */
-static void unexpected_machine_check(struct pt_regs *regs, long error_code)
-{
-	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id());
-}
-
-/* Call the installed machine check handler for this CPU setup. */
-void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
-
-/* This has to be run for each processor */
-void mcheck_init(struct cpuinfo_x86 *c)
-{
-	if (mce_disabled == 1)
-		return;
-
-	switch (c->x86_vendor) {
-	case X86_VENDOR_AMD:
-		amd_mcheck_init(c);
-		break;
-
-	case X86_VENDOR_INTEL:
-		if (c->x86 == 5)
-			intel_p5_mcheck_init(c);
-		if (c->x86 == 6)
-			intel_p6_mcheck_init(c);
-		if (c->x86 == 15)
-			intel_p4_mcheck_init(c);
-		break;
-
-	case X86_VENDOR_CENTAUR:
-		if (c->x86 == 5)
-			winchip_mcheck_init(c);
-		break;
-
-	default:
-		break;
-	}
-}
-
-static int __init mcheck_disable(char *str)
-{
-	mce_disabled = 1;
-	return 1;
-}
-
-static int __init mcheck_enable(char *str)
-{
-	mce_disabled = -1;
-	return 1;
-}
-
-__setup("nomce", mcheck_disable);
-__setup("mce", mcheck_enable);
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@ -13,22 +13,22 @@
 *
 *  All MC4_MISCi registers are shared between multi-cores
 */
-
-#include <linux/cpu.h>
-#include <linux/errno.h>
-#include <linux/init.h>
 #include <linux/interrupt.h>
-#include <linux/kobject.h>
 #include <linux/notifier.h>
-#include <linux/sched.h>
-#include <linux/smp.h>
+#include <linux/kobject.h>
+#include <linux/percpu.h>
 #include <linux/sysdev.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
 #include <linux/sysfs.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+
 #include <asm/apic.h>
+#include <asm/idle.h>
 #include <asm/mce.h>
 #include <asm/msr.h>
-#include <asm/percpu.h>
-#include <asm/idle.h>

 #define PFX               "mce_threshold: "
 #define VERSION           "version 1.1.1"
@ -48,26 +48,26 @@
 #define MCG_XBLK_ADDR     0xC0000400

 struct threshold_block {
-	unsigned int block;
-	unsigned int bank;
-	unsigned int cpu;
-	u32 address;
-	u16 interrupt_enable;
-	u16 threshold_limit;
-	struct kobject kobj;
-	struct list_head miscj;
+	unsigned int		block;
+	unsigned int		bank;
+	unsigned int		cpu;
+	u32			address;
+	u16			interrupt_enable;
+	u16			threshold_limit;
+	struct kobject		kobj;
+	struct list_head	miscj;
 };

 /* defaults used early on boot */
 static struct threshold_block threshold_defaults = {
-	.interrupt_enable = 0,
-	.threshold_limit = THRESHOLD_MAX,
+	.interrupt_enable	= 0,
+	.threshold_limit	= THRESHOLD_MAX,
 };

 struct threshold_bank {
-	struct kobject *kobj;
-	struct threshold_block *blocks;
-	cpumask_var_t cpus;
+	struct kobject		*kobj;
+	struct threshold_block	*blocks;
+	cpumask_var_t		cpus;
 };
 static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);

@ -86,9 +86,9 @@ static void amd_threshold_interrupt(void);
 */

 struct thresh_restart {
-	struct threshold_block *b;
-	int reset;
-	u16 old_limit;
+	struct threshold_block	*b;
+	int			reset;
+	u16			old_limit;
 };

 /* must be called with correct cpu affinity */
@ -110,6 +110,7 @@ static void threshold_restart_bank(void *_tr)
 	} else if (tr->old_limit) {	/* change limit w/o reset */
 		int new_count = (mci_misc_hi & THRESHOLD_MAX) +
 		    (tr->old_limit - tr->b->threshold_limit);
+
 		mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
 		    (new_count & THRESHOLD_MAX);
 	}
@ -125,11 +126,11 @@ static void threshold_restart_bank(void *_tr)
 /* cpu init entry point, called from mce.c with preempt off */
 void mce_amd_feature_init(struct cpuinfo_x86 *c)
 {
-	unsigned int bank, block;
 	unsigned int cpu = smp_processor_id();
-	u8 lvt_off;
 	u32 low = 0, high = 0, address = 0;
+	unsigned int bank, block;
 	struct thresh_restart tr;
+	u8 lvt_off;

 	for (bank = 0; bank < NR_BANKS; ++bank) {
 		for (block = 0; block < NR_BLOCKS; ++block) {
@ -140,8 +141,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 				if (!address)
 					break;
 				address += MCG_XBLK_ADDR;
-			}
-			else
+			} else
 				++address;

 			if (rdmsr_safe(address, &low, &high))
@ -193,9 +193,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 */
 static void amd_threshold_interrupt(void)
 {
+	u32 low = 0, high = 0, address = 0;
 	unsigned int bank, block;
 	struct mce m;
-	u32 low = 0, high = 0, address = 0;

 	mce_setup(&m);

@ -204,16 +204,16 @@ static void amd_threshold_interrupt(void)
 		if (!(per_cpu(bank_map, m.cpu) & (1 << bank)))
 			continue;
 		for (block = 0; block < NR_BLOCKS; ++block) {
-			if (block == 0)
+			if (block == 0) {
 				address = MSR_IA32_MC0_MISC + bank * 4;
-			else if (block == 1) {
+			} else if (block == 1) {
 				address = (low & MASK_BLKPTR_LO) >> 21;
 				if (!address)
 					break;
 				address += MCG_XBLK_ADDR;
-			}
-			else
+			} else {
 				++address;
+			}

 			if (rdmsr_safe(address, &low, &high))
 				break;
@ -229,8 +229,10 @@ static void amd_threshold_interrupt(void)
 			     (high & MASK_LOCKED_HI))
 				continue;

-			/* Log the machine check that caused the threshold
-			   event. */
+			/*
+			 * Log the machine check that caused the threshold
+			 * event.
+			 */
 			machine_check_poll(MCP_TIMESTAMP,
 					&__get_cpu_var(mce_poll_banks));

@ -254,48 +256,52 @@ static void amd_threshold_interrupt(void)

 struct threshold_attr {
 	struct attribute attr;
-	ssize_t(*show) (struct threshold_block *, char *);
-	ssize_t(*store) (struct threshold_block *, const char *, size_t count);
+	ssize_t (*show) (struct threshold_block *, char *);
+	ssize_t (*store) (struct threshold_block *, const char *, size_t count);
 };

-#define SHOW_FIELDS(name)                                           \
-static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
-{                                                                   \
-        return sprintf(buf, "%lx\n", (unsigned long) b->name);      \
+#define SHOW_FIELDS(name)						\
+static ssize_t show_ ## name(struct threshold_block *b, char *buf)	\
+{									\
+	return sprintf(buf, "%lx\n", (unsigned long) b->name);		\
 }
 SHOW_FIELDS(interrupt_enable)
 SHOW_FIELDS(threshold_limit)

-static ssize_t store_interrupt_enable(struct threshold_block *b,
-				      const char *buf, size_t count)
+static ssize_t
+store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
 {
-	char *end;
 	struct thresh_restart tr;
-	unsigned long new = simple_strtoul(buf, &end, 0);
-	if (end == buf)
+	unsigned long new;
+
+	if (strict_strtoul(buf, 0, &new) < 0)
 		return -EINVAL;
+
 	b->interrupt_enable = !!new;

-	tr.b = b;
-	tr.reset = 0;
-	tr.old_limit = 0;
+	tr.b		= b;
+	tr.reset	= 0;
+	tr.old_limit	= 0;
+
 	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);

-	return end - buf;
+	return size;
 }

-static ssize_t store_threshold_limit(struct threshold_block *b,
-				     const char *buf, size_t count)
+static ssize_t
+store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
 {
-	char *end;
 	struct thresh_restart tr;
-	unsigned long new = simple_strtoul(buf, &end, 0);
-	if (end == buf)
+	unsigned long new;
+
+	if (strict_strtoul(buf, 0, &new) < 0)
 		return -EINVAL;
+
 	if (new > THRESHOLD_MAX)
 		new = THRESHOLD_MAX;
 	if (new < 1)
 		new = 1;
+
 	tr.old_limit = b->threshold_limit;
 	b->threshold_limit = new;
 	tr.b = b;
@ -303,12 +309,12 @@ static ssize_t store_threshold_limit(struct threshold_block *b,

 	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);

-	return end - buf;
+	return size;
 }

 struct threshold_block_cross_cpu {
-	struct threshold_block *tb;
-	long retval;
+	struct threshold_block	*tb;
+	long			retval;
 };

 static void local_error_count_handler(void *_tbcc)
@ -338,16 +344,13 @@ static ssize_t store_error_count(struct threshold_block *b,
 	return 1;
 }

-#define THRESHOLD_ATTR(_name,_mode,_show,_store) {            \
-        .attr = {.name = __stringify(_name), .mode = _mode }, \
-        .show = _show,                                        \
-        .store = _store,                                      \
+#define RW_ATTR(val)							\
+static struct threshold_attr val = {					\
+	.attr	= {.name = __stringify(val), .mode = 0644 },		\
+	.show	= show_## val,						\
+	.store	= store_## val,						\
 };

-#define RW_ATTR(name)                                           \
-static struct threshold_attr name =                             \
-        THRESHOLD_ATTR(name, 0644, show_## name, store_## name)
-
 RW_ATTR(interrupt_enable);
 RW_ATTR(threshold_limit);
 RW_ATTR(error_count);
@ -359,15 +362,17 @@ static struct attribute *default_attrs[] = {
 	NULL
 };

-#define to_block(k) container_of(k, struct threshold_block, kobj)
-#define to_attr(a) container_of(a, struct threshold_attr, attr)
+#define to_block(k)	container_of(k, struct threshold_block, kobj)
+#define to_attr(a)	container_of(a, struct threshold_attr, attr)

 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 {
 	struct threshold_block *b = to_block(kobj);
 	struct threshold_attr *a = to_attr(attr);
 	ssize_t ret;
+
 	ret = a->show ? a->show(b, buf) : -EIO;
+
 	return ret;
 }

@ -377,18 +382,20 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
 	struct threshold_block *b = to_block(kobj);
 	struct threshold_attr *a = to_attr(attr);
 	ssize_t ret;
+
 	ret = a->store ? a->store(b, buf, count) : -EIO;
+
 	return ret;
 }

 static struct sysfs_ops threshold_ops = {
-	.show = show,
-	.store = store,
+	.show			= show,
+	.store			= store,
 };

 static struct kobj_type threshold_ktype = {
-	.sysfs_ops = &threshold_ops,
-	.default_attrs = default_attrs,
+	.sysfs_ops		= &threshold_ops,
+	.default_attrs		= default_attrs,
 };

 static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
@ -396,9 +403,9 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
 					       unsigned int block,
 					       u32 address)
 {
-	int err;
-	u32 low, high;
 	struct threshold_block *b = NULL;
+	u32 low, high;
+	int err;

 	if ((bank >= NR_BANKS) || (block >= NR_BLOCKS))
 		return 0;
@ -421,20 +428,21 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
 	if (!b)
 		return -ENOMEM;

-	b->block = block;
-	b->bank = bank;
-	b->cpu = cpu;
-	b->address = address;
-	b->interrupt_enable = 0;
-	b->threshold_limit = THRESHOLD_MAX;
+	b->block		= block;
+	b->bank			= bank;
+	b->cpu			= cpu;
+	b->address		= address;
+	b->interrupt_enable	= 0;
+	b->threshold_limit	= THRESHOLD_MAX;

 	INIT_LIST_HEAD(&b->miscj);

-	if (per_cpu(threshold_banks, cpu)[bank]->blocks)
+	if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
 		list_add(&b->miscj,
 			 &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
-	else
+	} else {
 		per_cpu(threshold_banks, cpu)[bank]->blocks = b;
+	}

 	err = kobject_init_and_add(&b->kobj, &threshold_ktype,
 				   per_cpu(threshold_banks, cpu)[bank]->kobj,
@ -447,8 +455,9 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
 		if (!address)
 			return 0;
 		address += MCG_XBLK_ADDR;
-	} else
+	} else {
 		++address;
+	}

 	err = allocate_threshold_blocks(cpu, bank, ++block, address);
 	if (err)
@ -500,13 +509,14 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 		if (!b)
 			goto out;

-		err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj,
+		err = sysfs_create_link(&per_cpu(mce_dev, cpu).kobj,
 					b->kobj, name);
 		if (err)
 			goto out;

 		cpumask_copy(b->cpus, cpu_core_mask(cpu));
 		per_cpu(threshold_banks, cpu)[bank] = b;
+
 		goto out;
 	}
 #endif
@ -522,7 +532,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 		goto out;
 	}

-	b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj);
+	b->kobj = kobject_create_and_add(name, &per_cpu(mce_dev, cpu).kobj);
 	if (!b->kobj)
 		goto out_free;

@ -542,7 +552,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 		if (i == cpu)
 			continue;

-		err = sysfs_create_link(&per_cpu(device_mce, i).kobj,
+		err = sysfs_create_link(&per_cpu(mce_dev, i).kobj,
 					b->kobj, name);
 		if (err)
 			goto out;
@ -605,15 +615,13 @@ static void deallocate_threshold_block(unsigned int cpu,

 static void threshold_remove_bank(unsigned int cpu, int bank)
 {
-	int i = 0;
 	struct threshold_bank *b;
 	char name[32];
+	int i = 0;

 	b = per_cpu(threshold_banks, cpu)[bank];
-
 	if (!b)
 		return;
-
 	if (!b->blocks)
 		goto free_out;

@ -622,8 +630,9 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
 #ifdef CONFIG_SMP
 	/* sibling symlink */
 	if (shared_bank[bank] && b->blocks->cpu != cpu) {
-		sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name);
+		sysfs_remove_link(&per_cpu(mce_dev, cpu).kobj, name);
 		per_cpu(threshold_banks, cpu)[bank] = NULL;
+
 		return;
 	}
 #endif
@ -633,7 +642,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
 		if (i == cpu)
 			continue;

-		sysfs_remove_link(&per_cpu(device_mce, i).kobj, name);
+		sysfs_remove_link(&per_cpu(mce_dev, i).kobj, name);
 		per_cpu(threshold_banks, i)[bank] = NULL;
 	}

@ -659,12 +668,9 @@ static void threshold_remove_device(unsigned int cpu)
 }

 /* get notified when a cpu comes on/off */
-static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action,
-						     unsigned int cpu)
+static void __cpuinit
+amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu)
 {
-	if (cpu >= NR_CPUS)
-		return;
-
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
@ -686,11 +692,12 @@ static __init int threshold_init_device(void)
 	/* to hit CPUs online before the notifier is up */
 	for_each_online_cpu(lcpu) {
 		int err = threshold_create_device(lcpu);
+
 		if (err)
 			return err;
 	}
 	threshold_cpu_callback = amd_64_threshold_cpu_callback;
+
 	return 0;
 }
-
 device_initcall(threshold_init_device);
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@ -0,0 +1,74 @@
+/*
+ * Common code for Intel machine checks
+ */
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/therm_throt.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/apic.h>
+#include <asm/msr.h>
+
+#include "mce.h"
+
+void intel_init_thermal(struct cpuinfo_x86 *c)
+{
+	unsigned int cpu = smp_processor_id();
+	int tm2 = 0;
+	u32 l, h;
+
+	/* Thermal monitoring depends on ACPI and clock modulation*/
+	if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
+		return;
+
+	/*
+	 * First check if its enabled already, in which case there might
+	 * be some SMM goo which handles it, so we can't even put a handler
+	 * since it might be delivered via SMI already:
+	 */
+	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+	h = apic_read(APIC_LVTTHMR);
+	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
+		printk(KERN_DEBUG
+		       "CPU%d: Thermal monitoring handled by SMI\n", cpu);
+		return;
+	}
+
+	if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
+		tm2 = 1;
+
+	/* Check whether a vector already exists */
+	if (h & APIC_VECTOR_MASK) {
+		printk(KERN_DEBUG
+		       "CPU%d: Thermal LVT vector (%#x) already installed\n",
+		       cpu, (h & APIC_VECTOR_MASK));
+		return;
+	}
+
+	/* We'll mask the thermal vector in the lapic till we're ready: */
+	h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
+	apic_write(APIC_LVTTHMR, h);
+
+	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
+	wrmsr(MSR_IA32_THERM_INTERRUPT,
+		l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
+
+	intel_set_thermal_handler();
+
+	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+	wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
+
+	/* Unmask the thermal vector: */
+	l = apic_read(APIC_LVTTHMR);
+	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+
+	printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
+	       cpu, tm2 ? "TM2" : "TM1");
+
+	/* enable thermal throttle processing */
+	atomic_set(&therm_throt_en, 1);
+}
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@ -16,6 +16,8 @@
 #include <asm/idle.h>
 #include <asm/therm_throt.h>

+#include "mce.h"
+
 asmlinkage void smp_thermal_interrupt(void)
 {
 	__u64 msr_val;
@ -26,67 +28,13 @@ asmlinkage void smp_thermal_interrupt(void)
 	irq_enter();

 	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
-	if (therm_throt_process(msr_val & 1))
+	if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
 		mce_log_therm_throt_event(msr_val);

 	inc_irq_stat(irq_thermal_count);
 	irq_exit();
 }

-static void intel_init_thermal(struct cpuinfo_x86 *c)
-{
-	u32 l, h;
-	int tm2 = 0;
-	unsigned int cpu = smp_processor_id();
-
-	if (!cpu_has(c, X86_FEATURE_ACPI))
-		return;
-
-	if (!cpu_has(c, X86_FEATURE_ACC))
-		return;
-
-	/* first check if TM1 is already enabled by the BIOS, in which
-	 * case there might be some SMM goo which handles it, so we can't even
-	 * put a handler since it might be delivered via SMI already.
-	 */
-	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-	h = apic_read(APIC_LVTTHMR);
-	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
-		printk(KERN_DEBUG
-		       "CPU%d: Thermal monitoring handled by SMI\n", cpu);
-		return;
-	}
-
-	if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
-		tm2 = 1;
-
-	if (h & APIC_VECTOR_MASK) {
-		printk(KERN_DEBUG
-		       "CPU%d: Thermal LVT vector (%#x) already "
-		       "installed\n", cpu, (h & APIC_VECTOR_MASK));
-		return;
-	}
-
-	h = THERMAL_APIC_VECTOR;
-	h |= (APIC_DM_FIXED | APIC_LVT_MASKED);
-	apic_write(APIC_LVTTHMR, h);
-
-	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
-	wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h);
-
-	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-	wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
-
-	l = apic_read(APIC_LVTTHMR);
-	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
-	printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
-		cpu, tm2 ? "TM2" : "TM1");
-
-	/* enable thermal throttle processing */
-	atomic_set(&therm_throt_en, 1);
-	return;
-}
-
 /*
 * Support for Intel Correct Machine Check Interrupts. This allows
 * the CPU to raise an interrupt when a corrected machine check happened.
@ -108,6 +56,9 @@ static int cmci_supported(int *banks)
 {
 	u64 cap;

+	if (mce_cmci_disabled || mce_ignore_ce)
+		return 0;
+
 	/*
 	 * Vendor check is not strictly needed, but the initial
 	 * initialization is vendor keyed and this
@ -131,7 +82,7 @@ static int cmci_supported(int *banks)
 static void intel_threshold_interrupt(void)
 {
 	machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
-	mce_notify_user();
+	mce_notify_irq();
 }

 static void print_update(char *type, int *hdr, int num)
@ -247,7 +198,7 @@ void cmci_rediscover(int dying)
 		return;
 	cpumask_copy(old, &current->cpus_allowed);

-	for_each_online_cpu (cpu) {
+	for_each_online_cpu(cpu) {
 		if (cpu == dying)
 			continue;
 		if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@ -6,15 +6,14 @@
 * This file contains routines to check for non-fatal MCEs every 15s
 *
 */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/jiffies.h>
-#include <linux/workqueue.h>
 #include <linux/interrupt.h>
-#include <linux/smp.h>
+#include <linux/workqueue.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/smp.h>

 #include <asm/processor.h>
 #include <asm/system.h>
@ -22,9 +21,9 @@

 #include "mce.h"

-static int firstbank;
+static int		firstbank;

-#define MCE_RATE	15*HZ	/* timer rate is 15s */
+#define MCE_RATE	(15*HZ)	/* timer rate is 15s */

 static void mce_checkregs(void *info)
 {
@ -34,23 +33,24 @@ static void mce_checkregs(void *info)
 	for (i = firstbank; i < nr_mce_banks; i++) {
 		rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);

-		if (high & (1<<31)) {
-			printk(KERN_INFO "MCE: The hardware reports a non "
-				"fatal, correctable incident occurred on "
-				"CPU %d.\n",
+		if (!(high & (1<<31)))
+			continue;
+
+		printk(KERN_INFO "MCE: The hardware reports a non fatal, "
+			"correctable incident occurred on CPU %d.\n",
 				smp_processor_id());
-			printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low);

-			/*
-			 * Scrub the error so we don't pick it up in MCE_RATE
-			 * seconds time.
-			 */
-			wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
+		printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low);

-			/* Serialize */
-			wmb();
-			add_taint(TAINT_MACHINE_CHECK);
-		}
+		/*
+		 * Scrub the error so we don't pick it up in MCE_RATE
+		 * seconds time:
+		 */
+		wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
+
+		/* Serialize: */
+		wmb();
+		add_taint(TAINT_MACHINE_CHECK);
 	}
 }

@ -77,16 +77,17 @@ static int __init init_nonfatal_mce_checker(void)

 	/* Some Athlons misbehave when we frob bank 0 */
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-		boot_cpu_data.x86 == 6)
-			firstbank = 1;
+						boot_cpu_data.x86 == 6)
+		firstbank = 1;
 	else
-			firstbank = 0;
+		firstbank = 0;

 	/*
 	 * Check for non-fatal errors every MCE_RATE s
 	 */
 	schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
 	printk(KERN_INFO "Machine check exception polling timer started.\n");
+
 	return 0;
 }
 module_init(init_nonfatal_mce_checker);
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@ -2,18 +2,17 @@
 * P4 specific Machine Check Exception Reporting
 */

-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
 #include <linux/smp.h>

+#include <asm/therm_throt.h>
 #include <asm/processor.h>
 #include <asm/system.h>
-#include <asm/msr.h>
 #include <asm/apic.h>
-
-#include <asm/therm_throt.h>
+#include <asm/msr.h>

 #include "mce.h"

@ -36,6 +35,7 @@ static int mce_num_extended_msrs;


 #ifdef CONFIG_X86_MCE_P4THERMAL
+
 static void unexpected_thermal_interrupt(struct pt_regs *regs)
 {
 	printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
@ -43,7 +43,7 @@ static void unexpected_thermal_interrupt(struct pt_regs *regs)
 	add_taint(TAINT_MACHINE_CHECK);
 }

-/* P4/Xeon Thermal transition interrupt handler */
+/* P4/Xeon Thermal transition interrupt handler: */
 static void intel_thermal_interrupt(struct pt_regs *regs)
 {
 	__u64 msr_val;
@ -51,11 +51,12 @@ static void intel_thermal_interrupt(struct pt_regs *regs)
 	ack_APIC_irq();

 	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
-	therm_throt_process(msr_val & 0x1);
+	therm_throt_process(msr_val & THERM_STATUS_PROCHOT);
 }

-/* Thermal interrupt handler for this CPU setup */
-static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt;
+/* Thermal interrupt handler for this CPU setup: */
+static void (*vendor_thermal_interrupt)(struct pt_regs *regs) =
+						unexpected_thermal_interrupt;

 void smp_thermal_interrupt(struct pt_regs *regs)
 {
@ -65,67 +66,15 @@ void smp_thermal_interrupt(struct pt_regs *regs)
 	irq_exit();
 }

-/* P4/Xeon Thermal regulation detect and init */
-static void intel_init_thermal(struct cpuinfo_x86 *c)
+void intel_set_thermal_handler(void)
 {
-	u32 l, h;
-	unsigned int cpu = smp_processor_id();
-
-	/* Thermal monitoring */
-	if (!cpu_has(c, X86_FEATURE_ACPI))
-		return;	/* -ENODEV */
-
-	/* Clock modulation */
-	if (!cpu_has(c, X86_FEATURE_ACC))
-		return;	/* -ENODEV */
-
-	/* first check if its enabled already, in which case there might
-	 * be some SMM goo which handles it, so we can't even put a handler
-	 * since it might be delivered via SMI already -zwanem.
-	 */
-	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-	h = apic_read(APIC_LVTTHMR);
-	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
-		printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
-				cpu);
-		return; /* -EBUSY */
-	}
-
-	/* check whether a vector already exists, temporarily masked? */
-	if (h & APIC_VECTOR_MASK) {
-		printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already "
-				"installed\n",
-			cpu, (h & APIC_VECTOR_MASK));
-		return; /* -EBUSY */
-	}
-
-	/* The temperature transition interrupt handler setup */
-	h = THERMAL_APIC_VECTOR;		/* our delivery vector */
-	h |= (APIC_DM_FIXED | APIC_LVT_MASKED);	/* we'll mask till we're ready */
-	apic_write(APIC_LVTTHMR, h);
-
-	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
-	wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
-
-	/* ok we're good to go... */
 	vendor_thermal_interrupt = intel_thermal_interrupt;
-
-	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-	wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
-
-	l = apic_read(APIC_LVTTHMR);
-	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
-	printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
-
-	/* enable thermal throttle processing */
-	atomic_set(&therm_throt_en, 1);
-	return;
 }
+
 #endif /* CONFIG_X86_MCE_P4THERMAL */

-
 /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
-static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
+static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
 {
 	u32 h;

@ -143,9 +92,9 @@ static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)

 static void intel_machine_check(struct pt_regs *regs, long error_code)
 {
-	int recover = 1;
 	u32 alow, ahigh, high, low;
 	u32 mcgstl, mcgsth;
+	int recover = 1;
 	int i;

 	rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
@ -157,7 +106,9 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)

 	if (mce_num_extended_msrs > 0) {
 		struct intel_mce_extended_msrs dbg;
+
 		intel_get_extended_msrs(&dbg);
+
 		printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n"
 			"\teax: %08x ebx: %08x ecx: %08x edx: %08x\n"
 			"\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
@ -171,6 +122,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
 		if (high & (1<<31)) {
 			char misc[20];
 			char addr[24];
+
 			misc[0] = addr[0] = '\0';
 			if (high & (1<<29))
 				recover |= 1;
@ -196,6 +148,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
 		panic("Unable to continue");

 	printk(KERN_EMERG "Attempting to continue.\n");
+
 	/*
 	 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
 	 * recoverable/continuable.This will allow BIOS to look at the MSRs
@ -217,7 +170,6 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
 	wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
 }

-
 void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@ -2,11 +2,10 @@
 * P5 specific Machine Check Exception Reporting
 * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
 */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
 #include <linux/smp.h>

 #include <asm/processor.h>
@ -15,39 +14,58 @@

 #include "mce.h"

-/* Machine check handler for Pentium class Intel */
+/* By default disabled */
+int		mce_p5_enable;
+
+/* Machine check handler for Pentium class Intel CPUs: */
 static void pentium_machine_check(struct pt_regs *regs, long error_code)
 {
 	u32 loaddr, hi, lotype;
+
 	rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
 	rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
-	printk(KERN_EMERG "CPU#%d: Machine Check Exception:  0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype);
-	if (lotype&(1<<5))
-		printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id());
+
+	printk(KERN_EMERG
+		"CPU#%d: Machine Check Exception:  0x%8X (type 0x%8X).\n",
+		smp_processor_id(), loaddr, lotype);
+
+	if (lotype & (1<<5)) {
+		printk(KERN_EMERG
+			"CPU#%d: Possible thermal failure (CPU on fire ?).\n",
+			smp_processor_id());
+	}
+
 	add_taint(TAINT_MACHINE_CHECK);
 }

-/* Set up machine check reporting for processors with Intel style MCE */
+/* Set up machine check reporting for processors with Intel style MCE: */
 void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 l, h;

-	/*Check for MCE support */
+	/* Check for MCE support: */
 	if (!cpu_has(c, X86_FEATURE_MCE))
 		return;

-	/* Default P5 to off as its often misconnected */
+#ifdef CONFIG_X86_OLD_MCE
+	/* Default P5 to off as its often misconnected: */
 	if (mce_disabled != -1)
 		return;
+#endif
+
 	machine_check_vector = pentium_machine_check;
+	/* Make sure the vector pointer is visible before we enable MCEs: */
 	wmb();

-	/* Read registers before enabling */
+	/* Read registers before enabling: */
 	rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
 	rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
-	printk(KERN_INFO "Intel old style machine check architecture supported.\n");
+	printk(KERN_INFO
+	       "Intel old style machine check architecture supported.\n");

-	/* Enable MCE */
+	/* Enable MCE: */
 	set_in_cr4(X86_CR4_MCE);
-	printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id());
+	printk(KERN_INFO
+	       "Intel old style machine check reporting enabled on CPU#%d.\n",
+	       smp_processor_id());
 }
--- a/arch/x86/kernel/cpu/mcheck/p6.c
+++ b/arch/x86/kernel/cpu/mcheck/p6.c
@ -2,11 +2,10 @@
 * P6 specific Machine Check Exception Reporting
 * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
 */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
 #include <linux/smp.h>

 #include <asm/processor.h>
@ -18,9 +17,9 @@
 /* Machine Check Handler For PII/PIII */
 static void intel_machine_check(struct pt_regs *regs, long error_code)
 {
-	int recover = 1;
 	u32 alow, ahigh, high, low;
 	u32 mcgstl, mcgsth;
+	int recover = 1;
 	int i;

 	rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
@ -35,12 +34,16 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
 		if (high & (1<<31)) {
 			char misc[20];
 			char addr[24];
-			misc[0] = addr[0] = '\0';
+
+			misc[0] = '\0';
+			addr[0] = '\0';
+
 			if (high & (1<<29))
 				recover |= 1;
 			if (high & (1<<25))
 				recover |= 2;
 			high &= ~(1<<31);
+
 			if (high & (1<<27)) {
 				rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
 				snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
@ -49,6 +52,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
 				rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
 				snprintf(addr, 24, " at %08x%08x", ahigh, alow);
 			}
+
 			printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
 				smp_processor_id(), i, high, low, misc, addr);
 		}
@ -63,16 +67,17 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
 	/*
 	 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
 	 * recoverable/continuable.This will allow BIOS to look at the MSRs
-	 * for errors if the OS could not log the error.
+	 * for errors if the OS could not log the error:
 	 */
 	for (i = 0; i < nr_mce_banks; i++) {
 		unsigned int msr;
+
 		msr = MSR_IA32_MC0_STATUS+i*4;
 		rdmsr(msr, low, high);
 		if (high & (1<<31)) {
-			/* Clear it */
+			/* Clear it: */
 			wrmsr(msr, 0UL, 0UL);
-			/* Serialize */
+			/* Serialize: */
 			wmb();
 			add_taint(TAINT_MACHINE_CHECK);
 		}
@ -81,7 +86,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
 	wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
 }

-/* Set up machine check reporting for processors with Intel style MCE */
+/* Set up machine check reporting for processors with Intel style MCE: */
 void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
@ -97,6 +102,7 @@ void intel_p6_mcheck_init(struct cpuinfo_x86 *c)

 	/* Ok machine check is available */
 	machine_check_vector = intel_machine_check;
+	/* Make sure the vector pointer is visible before we enable MCEs: */
 	wmb();

 	printk(KERN_INFO "Intel machine check architecture supported.\n");
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@ -1,7 +1,7 @@
 /*
- *
 * Thermal throttle event support code (such as syslog messaging and rate
 * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
+ *
 * This allows consistent reporting of CPU thermal throttle events.
 *
 * Maintains a counter in /sys that keeps track of the number of thermal
@ -13,43 +13,43 @@
 * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
 *          Inspired by Ross Biro's and Al Borchers' counter code.
 */
-
+#include <linux/notifier.h>
+#include <linux/jiffies.h>
 #include <linux/percpu.h>
 #include <linux/sysdev.h>
 #include <linux/cpu.h>
-#include <asm/cpu.h>
-#include <linux/notifier.h>
-#include <linux/jiffies.h>
+
 #include <asm/therm_throt.h>

 /* How long to wait between reporting thermal events */
-#define CHECK_INTERVAL              (300 * HZ)
+#define CHECK_INTERVAL		(300 * HZ)

 static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
 static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
-atomic_t therm_throt_en = ATOMIC_INIT(0);
+
+atomic_t therm_throt_en		= ATOMIC_INIT(0);

 #ifdef CONFIG_SYSFS
-#define define_therm_throt_sysdev_one_ro(_name)                              \
-        static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
+#define define_therm_throt_sysdev_one_ro(_name)				\
+	static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)

-#define define_therm_throt_sysdev_show_func(name)                            \
-static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev,        \
-					struct sysdev_attribute *attr,	     \
-                                              char *buf)                     \
-{                                                                            \
-	unsigned int cpu = dev->id;                                          \
-	ssize_t ret;                                                         \
-                                                                             \
-	preempt_disable();              /* CPU hotplug */                    \
-	if (cpu_online(cpu))                                                 \
-		ret = sprintf(buf, "%lu\n",                                  \
-			      per_cpu(thermal_throttle_##name, cpu));        \
-	else                                                                 \
-		ret = 0;                                                     \
-	preempt_enable();                                                    \
-                                                                             \
-	return ret;                                                          \
+#define define_therm_throt_sysdev_show_func(name)			\
+static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev,	\
+					struct sysdev_attribute *attr,	\
+					      char *buf)		\
+{									\
+	unsigned int cpu = dev->id;					\
+	ssize_t ret;							\
+									\
+	preempt_disable();	/* CPU hotplug */			\
+	if (cpu_online(cpu))						\
+		ret = sprintf(buf, "%lu\n",				\
+			      per_cpu(thermal_throttle_##name, cpu));	\
+	else								\
+		ret = 0;						\
+	preempt_enable();						\
+									\
+	return ret;							\
 }

 define_therm_throt_sysdev_show_func(count);
@ -61,8 +61,8 @@ static struct attribute *thermal_throttle_attrs[] = {
 };

 static struct attribute_group thermal_throttle_attr_group = {
-	.attrs = thermal_throttle_attrs,
-	.name = "thermal_throttle"
+	.attrs	= thermal_throttle_attrs,
+	.name	= "thermal_throttle"
 };
 #endif /* CONFIG_SYSFS */

@ -110,10 +110,11 @@ int therm_throt_process(int curr)
 }

 #ifdef CONFIG_SYSFS
-/* Add/Remove thermal_throttle interface for CPU device */
+/* Add/Remove thermal_throttle interface for CPU device: */
 static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)
 {
-	return sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group);
+	return sysfs_create_group(&sys_dev->kobj,
+				  &thermal_throttle_attr_group);
 }

 static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
@ -121,19 +122,21 @@ static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
 	sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
 }

-/* Mutex protecting device creation against CPU hotplug */
+/* Mutex protecting device creation against CPU hotplug: */
 static DEFINE_MUTEX(therm_cpu_lock);

 /* Get notified when a cpu comes on/off. Be hotplug friendly. */
-static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
-						   unsigned long action,
-						   void *hcpu)
+static __cpuinit int
+thermal_throttle_cpu_callback(struct notifier_block *nfb,
+			      unsigned long action,
+			      void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
 	struct sys_device *sys_dev;
 	int err = 0;

 	sys_dev = get_cpu_sysdev(cpu);
+
 	switch (action) {
 	case CPU_UP_PREPARE:
 	case CPU_UP_PREPARE_FROZEN:
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@ -17,7 +17,7 @@ static void default_threshold_interrupt(void)

 void (*mce_threshold_vector)(void) = default_threshold_interrupt;

-asmlinkage void mce_threshold_interrupt(void)
+asmlinkage void smp_threshold_interrupt(void)
 {
 	exit_idle();
 	irq_enter();
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@ -2,11 +2,10 @@
 * IDT Winchip specific Machine Check Exception Reporting
 * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
 */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>

 #include <asm/processor.h>
 #include <asm/system.h>
@ -14,7 +13,7 @@

 #include "mce.h"

-/* Machine check handler for WinChip C6 */
+/* Machine check handler for WinChip C6: */
 static void winchip_machine_check(struct pt_regs *regs, long error_code)
 {
 	printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
@ -25,12 +24,18 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code)
 void winchip_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 lo, hi;
+
 	machine_check_vector = winchip_machine_check;
+	/* Make sure the vector pointer is visible before we enable MCEs: */
 	wmb();
+
 	rdmsr(MSR_IDT_FCR1, lo, hi);
 	lo |= (1<<2);	/* Enable EIERRINT (int 18 MCE) */
 	lo &= ~(1<<4);	/* Enable MCE */
 	wrmsr(MSR_IDT_FCR1, lo, hi);
+
 	set_in_cr4(X86_CR4_MCE);
-	printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n");
+
+	printk(KERN_INFO
+	       "Winchip machine check reporting enabled on CPU#0.\n");
 }
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@ -963,6 +963,8 @@ END(\sym)
 #ifdef CONFIG_SMP
 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
 	irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
+apicinterrupt REBOOT_VECTOR \
+	reboot_interrupt smp_reboot_interrupt
 #endif

 #ifdef CONFIG_X86_UV
@ -994,10 +996,15 @@ apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \
 #endif

 apicinterrupt THRESHOLD_APIC_VECTOR \
-	threshold_interrupt mce_threshold_interrupt
+	threshold_interrupt smp_threshold_interrupt
 apicinterrupt THERMAL_APIC_VECTOR \
 	thermal_interrupt smp_thermal_interrupt

+#ifdef CONFIG_X86_MCE
+apicinterrupt MCE_SELF_VECTOR \
+	mce_self_interrupt smp_mce_self_interrupt
+#endif
+
 #ifdef CONFIG_SMP
 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
 	call_function_single_interrupt smp_call_function_single_interrupt
@ -1379,7 +1386,7 @@ errorentry xen_stack_segment do_stack_segment
 errorentry general_protection do_general_protection
 errorentry page_fault do_page_fault
 #ifdef CONFIG_X86_MCE
-paranoidzeroentry machine_check do_machine_check
+paranoidzeroentry machine_check *machine_check_vector(%rip)
 #endif

 	/*
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@ -12,6 +12,7 @@
 #include <asm/io_apic.h>
 #include <asm/irq.h>
 #include <asm/idle.h>
+#include <asm/mce.h>
 #include <asm/hw_irq.h>

 atomic_t irq_err_count;
@ -96,12 +97,22 @@ static int show_other_interrupts(struct seq_file *p, int prec)
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
 	seq_printf(p, "  Thermal event interrupts\n");
-# ifdef CONFIG_X86_64
+# ifdef CONFIG_X86_MCE_THRESHOLD
 	seq_printf(p, "%*s: ", prec, "THR");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
 	seq_printf(p, "  Threshold APIC interrupts\n");
 # endif
+#endif
+#ifdef CONFIG_X86_NEW_MCE
+	seq_printf(p, "%*s: ", prec, "MCE");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
+	seq_printf(p, "  Machine check exceptions\n");
+	seq_printf(p, "%*s: ", prec, "MCP");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
+	seq_printf(p, "  Machine check polls\n");
 #endif
 	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
 #if defined(CONFIG_X86_IO_APIC)
@ -185,9 +196,13 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 #endif
 #ifdef CONFIG_X86_MCE
 	sum += irq_stats(cpu)->irq_thermal_count;
-# ifdef CONFIG_X86_64
+# ifdef CONFIG_X86_MCE_THRESHOLD
 	sum += irq_stats(cpu)->irq_threshold_count;
 # endif
+#endif
+#ifdef CONFIG_X86_NEW_MCE
+	sum += per_cpu(mce_exception_count, cpu);
+	sum += per_cpu(mce_poll_count, cpu);
 #endif
 	return sum;
 }
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@ -173,6 +173,9 @@ static void __init smp_intr_init(void)
 	/* Low priority IPI to cleanup after moving an irq */
 	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
 	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
+
+	/* IPI used for rebooting/stopping */
+	alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt);
 #endif
 #endif /* CONFIG_SMP */
 }
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@ -24,11 +24,11 @@
 #include <asm/ucontext.h>
 #include <asm/i387.h>
 #include <asm/vdso.h>
+#include <asm/mce.h>

 #ifdef CONFIG_X86_64
 #include <asm/proto.h>
 #include <asm/ia32_unistd.h>
-#include <asm/mce.h>
 #endif /* CONFIG_X86_64 */

 #include <asm/syscall.h>
@ -856,10 +856,10 @@ static void do_signal(struct pt_regs *regs)
 void
 do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 {
-#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
+#ifdef CONFIG_X86_NEW_MCE
 	/* notify userspace of pending MCEs */
 	if (thread_info_flags & _TIF_MCE_NOTIFY)
-		mce_notify_user();
+		mce_notify_process();
 #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */

 	/* deal with pending signal delivery */
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@ -150,14 +150,40 @@ void native_send_call_func_ipi(const struct cpumask *mask)
 * this function calls the 'stop' function on all other CPUs in the system.
 */

+asmlinkage void smp_reboot_interrupt(void)
+{
+	ack_APIC_irq();
+	irq_enter();
+	stop_this_cpu(NULL);
+	irq_exit();
+}
+
 static void native_smp_send_stop(void)
 {
 	unsigned long flags;
+	unsigned long wait;

 	if (reboot_force)
 		return;

-	smp_call_function(stop_this_cpu, NULL, 0);
+	/*
+	 * Use an own vector here because smp_call_function
+	 * does lots of things not suitable in a panic situation.
+	 * On most systems we could also use an NMI here,
+	 * but there are a few systems around where NMI
+	 * is problematic so stay with an non NMI for now
+	 * (this implies we cannot stop CPUs spinning with irq off
+	 * currently)
+	 */
+	if (num_online_cpus() > 1) {
+		apic->send_IPI_allbutself(REBOOT_VECTOR);
+
+		/* Don't wait longer than a second */
+		wait = USEC_PER_SEC;
+		while (num_online_cpus() > 1 && wait--)
+			udelay(1);
+	}
+
 	local_irq_save(flags);
 	disable_local_APIC();
 	local_irq_restore(flags);
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@ -798,15 +798,15 @@ unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)

 	return new_kesp;
 }
-#else
+#endif
+
 asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
 {
 }

-asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
+asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
 {
 }
-#endif

 /*
 * 'math_state_restore()' saves the current math information in the
--- a/kernel/timer.c
+++ b/kernel/timer.c
@ -757,6 +757,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
 	wake_up_idle_cpu(cpu);
 	spin_unlock_irqrestore(&base->lock, flags);
 }
+EXPORT_SYMBOL_GPL(add_timer_on);

 /**
 * del_timer - deactive a timer.