diff --git a/Documentation/DocBook/kgdb.tmpl b/Documentation/DocBook/kgdb.tmpl
index 028a8444d95e..e8acd1f03456 100644
--- a/Documentation/DocBook/kgdb.tmpl
+++ b/Documentation/DocBook/kgdb.tmpl
@@ -84,10 +84,9 @@
     runs an instance of gdb against the vmlinux file which contains
     the symbols (not boot image such as bzImage, zImage, uImage...).
     In gdb the developer specifies the connection parameters and
-    connects to kgdb.  Depending on which kgdb I/O modules exist in
-    the kernel for a given architecture, it may be possible to debug
-    the test machine's kernel with the development machine using a
-    rs232 or ethernet connection.
+    connects to kgdb.  The type of connection a developer makes with
+    gdb depends on the availability of kgdb I/O modules compiled as
+    builtin's or kernel modules in the test machine's kernel.
     </para>
   </chapter>
   <chapter id="CompilingAKernel">
@@ -223,7 +222,7 @@
   </para>
   <para>
   IMPORTANT NOTE: Using this option with kgdb over the console
-  (kgdboc) or kgdb over ethernet (kgdboe) is not supported.
+  (kgdboc) is not supported.
   </para>
   </sect1>
   </chapter>
@@ -249,18 +248,11 @@
     (gdb) target remote /dev/ttyS0
     </programlisting>
     <para>
-    Example (kgdb to a terminal server):
+    Example (kgdb to a terminal server on tcp port 2012):
     </para>
     <programlisting>
     % gdb ./vmlinux
-    (gdb) target remote udp:192.168.2.2:6443
-    </programlisting>
-    <para>
-    Example (kgdb over ethernet):
-    </para>
-    <programlisting>
-    % gdb ./vmlinux
-    (gdb) target remote udp:192.168.2.2:6443
+    (gdb) target remote 192.168.2.2:2012
     </programlisting>
     <para>
     Once connected, you can debug a kernel the way you would debug an
diff --git a/Documentation/hwmon/sysfs-interface b/Documentation/hwmon/sysfs-interface
index f4a8ebc1ef1a..2d845730d4e0 100644
--- a/Documentation/hwmon/sysfs-interface
+++ b/Documentation/hwmon/sysfs-interface
@@ -2,17 +2,12 @@ Naming and data format standards for sysfs files
 ------------------------------------------------
 
 The libsensors library offers an interface to the raw sensors data
-through the sysfs interface. See libsensors documentation and source for
-further information. As of writing this document, libsensors
-(from lm_sensors 2.8.3) is heavily chip-dependent. Adding or updating
-support for any given chip requires modifying the library's code.
-This is because libsensors was written for the procfs interface
-older kernel modules were using, which wasn't standardized enough.
-Recent versions of libsensors (from lm_sensors 2.8.2 and later) have
-support for the sysfs interface, though.
-
-The new sysfs interface was designed to be as chip-independent as
-possible.
+through the sysfs interface. Since lm-sensors 3.0.0, libsensors is
+completely chip-independent. It assumes that all the kernel drivers
+implement the standard sysfs interface described in this document.
+This makes adding or updating support for any given chip very easy, as
+libsensors, and applications using it, do not need to be modified.
+This is a major improvement compared to lm-sensors 2.
 
 Note that motherboards vary widely in the connections to sensor chips.
 There is no standard that ensures, for example, that the second
@@ -35,19 +30,17 @@ access this data in a simple and consistent way. That said, such programs
 will have to implement conversion, labeling and hiding of inputs. For
 this reason, it is still not recommended to bypass the library.
 
-If you are developing a userspace application please send us feedback on
-this standard.
-
-Note that this standard isn't completely established yet, so it is subject
-to changes. If you are writing a new hardware monitoring driver those
-features can't seem to fit in this interface, please contact us with your
-extension proposal. Keep in mind that backward compatibility must be
-preserved.
-
 Each chip gets its own directory in the sysfs /sys/devices tree.  To
 find all sensor chips, it is easier to follow the device symlinks from
 /sys/class/hwmon/hwmon*.
 
+Up to lm-sensors 3.0.0, libsensors looks for hardware monitoring attributes
+in the "physical" device directory. Since lm-sensors 3.0.1, attributes found
+in the hwmon "class" device directory are also supported. Complex drivers
+(e.g. drivers for multifunction chips) may want to use this possibility to
+avoid namespace pollution. The only drawback will be that older versions of
+libsensors won't support the driver in question.
+
 All sysfs values are fixed point numbers.
 
 There is only one value per file, unlike the older /proc specification.
diff --git a/MAINTAINERS b/MAINTAINERS
index cd587eec9fa7..8f0ec46a7096 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4431,10 +4431,10 @@ M:	johnpol@2ka.mipt.ru
 S:	Maintained
 
 W83791D HARDWARE MONITORING DRIVER
-P:	Charles Spirakis
-M:	bezaur@gmail.com
+P:	Marc Hulsman
+M:	m.hulsman@tudelft.nl
 L:	lm-sensors@lm-sensors.org
-S:	Odd Fixes
+S:	Maintained
 
 W83793 HARDWARE MONITORING DRIVER
 P:	Rudolf Marek
diff --git a/Makefile b/Makefile
index 2b4977c9844e..6aff5f47c21d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 26
-EXTRAVERSION = -rc7
+EXTRAVERSION = -rc8
 NAME = Rotary Wombat
 
 # *DOCUMENTATION*
diff --git a/arch/alpha/Makefile b/arch/alpha/Makefile
index 4e1a8e2c4541..4759fe751aa1 100644
--- a/arch/alpha/Makefile
+++ b/arch/alpha/Makefile
@@ -13,6 +13,7 @@ NM := $(NM) -B
 LDFLAGS_vmlinux	:= -static -N #-relax
 CHECKFLAGS	+= -D__alpha__ -m64
 cflags-y	:= -pipe -mno-fp-regs -ffixed-8 -msmall-data
+cflags-y	+= $(call cc-option, -fno-jump-tables)
 
 cpuflags-$(CONFIG_ALPHA_EV4)		:= -mcpu=ev4
 cpuflags-$(CONFIG_ALPHA_EV5)		:= -mcpu=ev5
diff --git a/arch/alpha/kernel/core_t2.c b/arch/alpha/kernel/core_t2.c
index c0750291b44a..d9980d47ab81 100644
--- a/arch/alpha/kernel/core_t2.c
+++ b/arch/alpha/kernel/core_t2.c
@@ -74,6 +74,8 @@
 # define DBG(args)
 #endif
 
+DEFINE_SPINLOCK(t2_hae_lock);
+
 static volatile unsigned int t2_mcheck_any_expected;
 static volatile unsigned int t2_mcheck_last_taken;
 
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 36ab22a7ea12..5cf45fc51343 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -71,6 +71,23 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82378, quirk_i
 static void __init
 quirk_cypress(struct pci_dev *dev)
 {
+	/* The Notorious Cy82C693 chip.  */
+
+	/* The generic legacy mode IDE fixup in drivers/pci/probe.c
+	   doesn't work correctly with the Cypress IDE controller as
+	   it has non-standard register layout.  Fix that.  */
+	if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE) {
+		dev->resource[2].start = dev->resource[3].start = 0;
+		dev->resource[2].end = dev->resource[3].end = 0;
+		dev->resource[2].flags = dev->resource[3].flags = 0;
+		if (PCI_FUNC(dev->devfn) == 2) {
+			dev->resource[0].start = 0x170;
+			dev->resource[0].end = 0x177;
+			dev->resource[1].start = 0x376;
+			dev->resource[1].end = 0x376;
+		}
+	}
+
 	/* The Cypress bridge responds on the PCI bus in the address range
 	   0xffff0000-0xffffffff (conventional x86 BIOS ROM).  There is no
 	   way to turn this off.  The bridge also supports several extended
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index dc57790250d2..c778779007fc 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -447,7 +447,7 @@ struct unaligned_stat {
 
 
 /* Macro for exception fixup code to access integer registers.  */
-#define una_reg(r)  (regs->regs[(r) >= 16 && (r) <= 18 ? (r)+19 : (r)])
+#define una_reg(r)  (_regs[(r) >= 16 && (r) <= 18 ? (r)+19 : (r)])
 
 
 asmlinkage void
@@ -456,6 +456,7 @@ do_entUna(void * va, unsigned long opcode, unsigned long reg,
 {
 	long error, tmp1, tmp2, tmp3, tmp4;
 	unsigned long pc = regs->pc - 4;
+	unsigned long *_regs = regs->regs;
 	const struct exception_table_entry *fixup;
 
 	unaligned[0].count++;
diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types
index 207a8b5a0c4a..0be5630ff568 100644
--- a/arch/arm/tools/mach-types
+++ b/arch/arm/tools/mach-types
@@ -12,7 +12,7 @@
 #
 #   http://www.arm.linux.org.uk/developer/machines/?action=new
 #
-# Last update: Sat Apr 19 11:23:38 2008
+# Last update: Mon Jul 7 16:25:39 2008
 #
 # machine_is_xxx	CONFIG_xxxx		MACH_TYPE_xxx		number
 #
@@ -560,7 +560,6 @@ husky			MACH_HUSKY		HUSKY			543
 boxer			MACH_BOXER		BOXER			544
 shepherd		MACH_SHEPHERD		SHEPHERD		545
 aml42800aa		MACH_AML42800AA		AML42800AA		546
-ml674001		MACH_MACH_TYPE_ML674001	MACH_TYPE_ML674001	547
 lpc2294			MACH_LPC2294		LPC2294			548
 switchgrass		MACH_SWITCHGRASS	SWITCHGRASS		549
 ens_cmu			MACH_ENS_CMU		ENS_CMU			550
@@ -748,7 +747,6 @@ anubis			MACH_ANUBIS		ANUBIS			734
 ite8152			MACH_ITE8152		ITE8152			735
 lpc3xxx			MACH_LPC3XXX		LPC3XXX			736
 puppeteer		MACH_PUPPETEER		PUPPETEER		737
-vt001			MACH_MACH_VADATECH	MACH_VADATECH		738
 e570			MACH_E570		E570			739
 x50			MACH_X50		X50			740
 recon			MACH_RECON		RECON			741
@@ -839,7 +837,7 @@ ccxp270			MACH_CCXP		CCXP			825
 omap_gsample		MACH_OMAP_GSAMPLE	OMAP_GSAMPLE		826
 realview_eb		MACH_REALVIEW_EB	REALVIEW_EB		827
 samoa			MACH_SAMOA		SAMOA			828
-t3xscale		MACH_T3XSCALE		T3XSCALE		829
+palmt3			MACH_PALMT3		PALMT3			829
 i878			MACH_I878		I878			830
 borzoi			MACH_BORZOI		BORZOI			831
 gecko			MACH_GECKO		GECKO			832
@@ -895,7 +893,7 @@ mio8390			MACH_MIO8390		MIO8390			881
 omi_board		MACH_OMI_BOARD		OMI_BOARD		882
 mx21civ			MACH_MX21CIV		MX21CIV			883
 mahi_cdac		MACH_MAHI_CDAC		MAHI_CDAC		884
-xscale_palmtx		MACH_XSCALE_PALMTX	XSCALE_PALMTX		885
+palmtx			MACH_PALMTX		PALMTX			885
 s3c2413			MACH_S3C2413		S3C2413			887
 samsys_ep0		MACH_SAMSYS_EP0		SAMSYS_EP0		888
 wg302v1			MACH_WG302V1		WG302V1			889
@@ -918,7 +916,7 @@ nxdb500			MACH_NXDB500		NXDB500			905
 apf9328			MACH_APF9328		APF9328			906
 omap_wipoq		MACH_OMAP_WIPOQ		OMAP_WIPOQ		907
 omap_twip		MACH_OMAP_TWIP		OMAP_TWIP		908
-xscale_treo650		MACH_XSCALE_PALMTREO650	XSCALE_PALMTREO650	909
+palmtreo650		MACH_PALMTREO650	PALMTREO650		909
 acumen			MACH_ACUMEN		ACUMEN			910
 xp100			MACH_XP100		XP100			911
 fs2410			MACH_FS2410		FS2410			912
@@ -926,8 +924,8 @@ pxa270_cerf		MACH_PXA270_CERF	PXA270_CERF		913
 sq2ftlpalm		MACH_SQ2FTLPALM		SQ2FTLPALM		914
 bsemserver		MACH_BSEMSERVER		BSEMSERVER		915
 netclient		MACH_NETCLIENT		NETCLIENT		916
-xscale_palmtt5		MACH_XSCALE_PALMTT5	XSCALE_PALMTT5		917
-xscale_palmtc		MACH_OMAP_PALMTC	OMAP_PALMTC		918
+palmt5			MACH_PALMT5		PALMT5			917
+palmtc			MACH_PALMTC		PALMTC			918
 omap_apollon		MACH_OMAP_APOLLON	OMAP_APOLLON		919
 mxc30030evb		MACH_MXC30030EVB	MXC30030EVB		920
 rea_2d			MACH_REA_2D		REA_2D			921
@@ -1220,7 +1218,6 @@ empca400		MACH_EMPCA400		EMPCA400		1211
 em7210			MACH_EM7210		EM7210			1212
 htchermes		MACH_HTCHERMES		HTCHERMES		1213
 eti_c1			MACH_ETI_C1		ETI_C1			1214
-mach_dep2410		MACH_MACH_DEP2410	MACH_DEP2410		1215
 ac100			MACH_AC100		AC100			1216
 sneetch			MACH_SNEETCH		SNEETCH			1217
 studentmate		MACH_STUDENTMATE	STUDENTMATE		1218
@@ -1421,10 +1418,10 @@ looxc550		MACH_LOOXC550		LOOXC550		1417
 cnty_titan		MACH_CNTY_TITAN		CNTY_TITAN		1418
 app3xx			MACH_APP3XX		APP3XX			1419
 sideoatsgrama		MACH_SIDEOATSGRAMA	SIDEOATSGRAMA		1420
-xscale_palmt700p	MACH_XSCALE_PALMT700P	XSCALE_PALMT700P	1421
-xscale_palmt700w	MACH_XSCALE_PALMT700W	XSCALE_PALMT700W	1422
-xscale_palmt750		MACH_XSCALE_PALMT750	XSCALE_PALMT750		1423
-xscale_palmt755p	MACH_XSCALE_PALMT755P	XSCALE_PALMT755P	1424
+palmtreo700p		MACH_PALMTREO700P	PALMTREO700P		1421
+palmtreo700w		MACH_PALMTREO700W	PALMTREO700W		1422
+palmtreo750		MACH_PALMTREO750	PALMTREO750		1423
+palmtreo755p		MACH_PALMTREO755P	PALMTREO755P		1424
 ezreganut9200		MACH_EZREGANUT9200	EZREGANUT9200		1425
 sarge			MACH_SARGE		SARGE			1426
 a696			MACH_A696		A696			1427
@@ -1463,7 +1460,7 @@ artemis			MACH_ARTEMIS		ARTEMIS			1462
 htctitan		MACH_HTCTITAN		HTCTITAN		1463
 qranium			MACH_QRANIUM		QRANIUM			1464
 adx_wsc2		MACH_ADX_WSC2		ADX_WSC2		1465
-adx_medcom		MACH_ADX_MEDINET	ADX_MEDINET		1466
+adx_medcom		MACH_ADX_MEDCOM		ADX_MEDCOM		1466
 bboard			MACH_BBOARD		BBOARD			1467
 cambria			MACH_CAMBRIA		CAMBRIA			1468
 mt7xxx			MACH_MT7XXX		MT7XXX			1469
@@ -1519,7 +1516,7 @@ wp188			MACH_WP188		WP188			1518
 corsica			MACH_CORSICA		CORSICA			1519
 bigeye			MACH_BIGEYE		BIGEYE			1520
 tll5000			MACH_TLL5000		TLL5000			1522
-hni270			MACH_HNI_X270		HNI_X270		1523
+bebot			MACH_BEBOT		BEBOT			1523
 qong			MACH_QONG		QONG			1524
 tcompact		MACH_TCOMPACT		TCOMPACT		1525
 puma5			MACH_PUMA5		PUMA5			1526
@@ -1636,7 +1633,6 @@ awlug4lcu		MACH_AWLUG4LCU		AWLUG4LCU		1637
 palermoc		MACH_PALERMOC		PALERMOC		1638
 omap_ldp		MACH_OMAP_LDP		OMAP_LDP		1639
 ip500			MACH_IP500		IP500			1640
-mx35ads			MACH_MACH_MX35ADS	MACH_MX35ADS		1641
 ase2			MACH_ASE2		ASE2			1642
 mx35evb			MACH_MX35EVB		MX35EVB			1643
 aml_m8050		MACH_AML_M8050		AML_M8050		1644
@@ -1647,7 +1643,7 @@ badger			MACH_BADGER		BADGER			1648
 trizeps4wl		MACH_TRIZEPS4WL		TRIZEPS4WL		1649
 trizeps5		MACH_TRIZEPS5		TRIZEPS5		1650
 marlin			MACH_MARLIN		MARLIN			1651
-ts7800			MACH_TS7800		TS7800			1652
+ts78xx			MACH_TS78XX		TS78XX			1652
 hpipaq214		MACH_HPIPAQ214		HPIPAQ214		1653
 at572d940dcm		MACH_AT572D940DCM	AT572D940DCM		1654
 ne1board		MACH_NE1BOARD		NE1BOARD		1655
@@ -1720,3 +1716,99 @@ htc_kaiser		MACH_HTC_KAISER		HTC_KAISER		1724
 lg_ks20			MACH_LG_KS20		LG_KS20			1725
 hhgps			MACH_HHGPS		HHGPS			1726
 nokia_n810_wimax	MACH_NOKIA_N810_WIMAX	NOKIA_N810_WIMAX	1727
+insight			MACH_INSIGHT		INSIGHT			1728
+sapphire		MACH_SAPPHIRE		SAPPHIRE		1729
+csb637xo		MACH_CSB637XO		CSB637XO		1730
+evisiong		MACH_EVISIONG		EVISIONG		1731
+stmp37xx		MACH_STMP37XX		STMP37XX		1732
+stmp378x		MACH_STMP38XX		STMP38XX		1733
+tnt			MACH_TNT		TNT			1734
+tbxt			MACH_TBXT		TBXT			1735
+playmate		MACH_PLAYMATE		PLAYMATE		1736
+pns10			MACH_PNS10		PNS10			1737
+eznavi			MACH_EZNAVI		EZNAVI			1738
+ps4000			MACH_PS4000		PS4000			1739
+ezx_a780		MACH_EZX_A780		EZX_A780		1740
+ezx_e680		MACH_EZX_E680		EZX_E680		1741
+ezx_a1200		MACH_EZX_A1200		EZX_A1200		1742
+ezx_e6			MACH_EZX_E6		EZX_E6			1743
+ezx_e2			MACH_EZX_E2		EZX_E2			1744
+ezx_a910		MACH_EZX_A910		EZX_A910		1745
+cwmx31			MACH_CWMX31		CWMX31			1746
+sl2312			MACH_SL2312		SL2312			1747
+blenny			MACH_BLENNY		BLENNY			1748
+ds107			MACH_DS107		DS107			1749
+dsx07			MACH_DSX07		DSX07			1750
+picocom1		MACH_PICOCOM1		PICOCOM1		1751
+lynx_wolverine		MACH_LYNX_WOLVERINE	LYNX_WOLVERINE		1752
+ubisys_p9_sc19		MACH_UBISYS_P9_SC19	UBISYS_P9_SC19		1753
+kratos_low		MACH_KRATOS_LOW		KRATOS_LOW		1754
+m700			MACH_M700		M700			1755
+edmini_v2		MACH_EDMINI_V2		EDMINI_V2		1756
+zipit2			MACH_ZIPIT2		ZIPIT2			1757
+hslfemtocell		MACH_HSLFEMTOCELL	HSLFEMTOCELL		1758
+daintree_at91		MACH_DAINTREE_AT91	DAINTREE_AT91		1759
+sg560usb		MACH_SG560USB		SG560USB		1760
+omap3_pandora		MACH_OMAP3_PANDORA	OMAP3_PANDORA		1761
+usr8200			MACH_USR8200		USR8200			1762
+s1s65k			MACH_S1S65K		S1S65K			1763
+s2s65a			MACH_S2S65A		S2S65A			1764
+icore			MACH_ICORE		ICORE			1765
+mss2			MACH_MSS2		MSS2			1766
+belmont			MACH_BELMONT		BELMONT			1767
+asusp525		MACH_ASUSP525		ASUSP525		1768
+lb88rc8480		MACH_LB88RC8480		LB88RC8480		1769
+hipxa			MACH_HIPXA		HIPXA			1770
+mx25_3ds		MACH_MX25_3DS		MX25_3DS		1771
+m800			MACH_M800		M800			1772
+omap3530_lv_som		MACH_OMAP3530_LV_SOM	OMAP3530_LV_SOM		1773
+prima_evb		MACH_PRIMA_EVB		PRIMA_EVB		1774
+mx31bt1			MACH_MX31BT1		MX31BT1			1775
+atlas4_evb		MACH_ATLAS4_EVB		ATLAS4_EVB		1776
+mx31cicada		MACH_MX31CICADA		MX31CICADA		1777
+mi424wr			MACH_MI424WR		MI424WR			1778
+axs_ultrax		MACH_AXS_ULTRAX		AXS_ULTRAX		1779
+at572d940deb		MACH_AT572D940DEB	AT572D940DEB		1780
+davinci_da8xx_evm	MACH_DAVINCI_DA8XX_EVM	DAVINCI_DA8XX_EVM	1781
+ep9302			MACH_EP9302		EP9302			1782
+at572d940hfeb		MACH_AT572D940HFEB	AT572D940HFEB		1783
+cybook3			MACH_CYBOOK3		CYBOOK3			1784
+wdg002			MACH_WDG002		WDG002			1785
+sg560adsl		MACH_SG560ADSL		SG560ADSL		1786
+nextio_n2800_ica	MACH_NEXTIO_N2800_ICA	NEXTIO_N2800_ICA	1787
+marvell_newdb		MACH_MARVELL_NEWDB	MARVELL_NEWDB		1789
+vandihud		MACH_VANDIHUD		VANDIHUD		1790
+magx_e8			MACH_MAGX_E8		MAGX_E8			1791
+magx_z6			MACH_MAGX_Z6		MAGX_Z6			1792
+magx_v8			MACH_MAGX_V8		MAGX_V8			1793
+magx_u9			MACH_MAGX_U9		MAGX_U9			1794
+toughcf08		MACH_TOUGHCF08		TOUGHCF08		1795
+zw4400			MACH_ZW4400		ZW4400			1796
+marat91			MACH_MARAT91		MARAT91			1797
+overo			MACH_OVERO		OVERO			1798
+at2440evb		MACH_AT2440EVB		AT2440EVB		1799
+neocore926		MACH_NEOCORE926		NEOCORE926		1800
+wnr854t			MACH_WNR854T		WNR854T			1801
+imx27			MACH_IMX27		IMX27			1802
+moose_db		MACH_MOOSE_DB		MOOSE_DB		1803
+fab4			MACH_FAB4		FAB4			1804
+htcdiamond		MACH_HTCDIAMOND		HTCDIAMOND		1805
+fiona			MACH_FIONA		FIONA			1806
+mxc30030_x		MACH_MXC30030_X		MXC30030_X		1807
+bmp1000			MACH_BMP1000		BMP1000			1808
+logi9200		MACH_LOGI9200		LOGI9200		1809
+tqma31			MACH_TQMA31		TQMA31			1810
+ccw9p9215js		MACH_CCW9P9215JS	CCW9P9215JS		1811
+rd88f5181l_ge		MACH_RD88F5181L_GE	RD88F5181L_GE		1812
+sifmain			MACH_SIFMAIN		SIFMAIN			1813
+sam9_l9261		MACH_SAM9_L9261		SAM9_L9261		1814
+cc9m2443js		MACH_CC9M2443JS		CC9M2443JS		1815
+xaria300		MACH_XARIA300		XARIA300		1816
+it9200			MACH_IT9200		IT9200			1817
+rd88f5181l_fxo		MACH_RD88F5181L_FXO	RD88F5181L_FXO		1818
+kriss_sensor		MACH_KRISS_SENSOR	KRISS_SENSOR		1819
+pilz_pmi5		MACH_PILZ_PMI5		PILZ_PMI5		1820
+jade			MACH_JADE		JADE			1821
+ks8695_softplc		MACH_KS8695_SOFTPLC	KS8695_SOFTPLC		1822
+gprisc4			MACH_GPRISC4		GPRISC4			1823
+stamp9260		MACH_STAMP9260		STAMP9260		1824
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 082c31dcfd99..39752cdef6ff 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -558,8 +558,6 @@ static struct iosapic_rte_info * __init_refok iosapic_alloc_rte (void)
 	if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) {
 		rte = alloc_bootmem(sizeof(struct iosapic_rte_info) *
 				    NR_PREALLOCATE_RTE_ENTRIES);
-		if (!rte)
-			return NULL;
 		for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++)
 			list_add(&rte->rte_list, &free_rte_list);
 	}
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index f48a809c686d..4ae15c8c2488 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -578,8 +578,6 @@ setup_arch (char **cmdline_p)
 	cpu_init();	/* initialize the bootstrap CPU */
 	mmu_context_init();	/* initialize context_id bitmap */
 
-	check_sal_cache_flush();
-
 #ifdef CONFIG_ACPI
 	acpi_boot_init();
 #endif
@@ -607,6 +605,7 @@ setup_arch (char **cmdline_p)
 		ia64_mca_init();
 
 	platform_setup(cmdline_p);
+	check_sal_cache_flush();
 	paging_init();
 }
 
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index 49d3120415eb..e585f9a2afb9 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -512,6 +512,8 @@ static ssize_t sn2_ptc_proc_write(struct file *file, const char __user *user, si
 	int cpu;
 	char optstr[64];
 
+	if (count == 0 || count > sizeof(optstr))
+		return -EINVAL;
 	if (copy_from_user(optstr, user, count))
 		return -EFAULT;
 	optstr[count - 1] = '\0';
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 52e18e6d2ba0..e0edaaa6920a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -383,6 +383,7 @@ config VMI
 config KVM_CLOCK
 	bool "KVM paravirtualized clock"
 	select PARAVIRT
+	select PARAVIRT_CLOCK
 	depends on !(X86_VISWS || X86_VOYAGER)
 	help
 	  Turning on this option will allow you to run a paravirtualized clock
@@ -410,6 +411,10 @@ config PARAVIRT
 	  over full virtualization.  However, when run without a hypervisor
 	  the kernel is theoretically slower and slightly larger.
 
+config PARAVIRT_CLOCK
+	bool
+	default n
+
 endif
 
 config MEMTEST_BOOTPARAM
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5e618c3b4720..77807d4769c9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -82,6 +82,7 @@ obj-$(CONFIG_VMI)		+= vmi_32.o vmiclock_32.o
 obj-$(CONFIG_KVM_GUEST)		+= kvm.o
 obj-$(CONFIG_KVM_CLOCK)		+= kvmclock.o
 obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT_CLOCK)	+= pvclock.o
 
 obj-$(CONFIG_PCSPKR_PLATFORM)	+= pcspeaker.o
 
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 08a30986d472..87edf1ceb1df 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -18,6 +18,7 @@
 
 #include <linux/clocksource.h>
 #include <linux/kvm_para.h>
+#include <asm/pvclock.h>
 #include <asm/arch_hooks.h>
 #include <asm/msr.h>
 #include <asm/apic.h>
@@ -36,18 +37,9 @@ static int parse_no_kvmclock(char *arg)
 early_param("no-kvmclock", parse_no_kvmclock);
 
 /* The hypervisor will put information about time periodically here */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock);
-#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock);
+static struct pvclock_wall_clock wall_clock;
 
-static inline u64 kvm_get_delta(u64 last_tsc)
-{
-	int cpu = smp_processor_id();
-	u64 delta = native_read_tsc() - last_tsc;
-	return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
-}
-
-static struct kvm_wall_clock wall_clock;
-static cycle_t kvm_clock_read(void);
 /*
  * The wallclock is the time of day when we booted. Since then, some time may
  * have elapsed since the hypervisor wrote the data. So we try to account for
@@ -55,64 +47,37 @@ static cycle_t kvm_clock_read(void);
  */
 static unsigned long kvm_get_wallclock(void)
 {
-	u32 wc_sec, wc_nsec;
-	u64 delta;
+	struct pvclock_vcpu_time_info *vcpu_time;
 	struct timespec ts;
-	int version, nsec;
 	int low, high;
 
 	low = (int)__pa(&wall_clock);
 	high = ((u64)__pa(&wall_clock) >> 32);
-
-	delta = kvm_clock_read();
-
 	native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
-	do {
-		version = wall_clock.wc_version;
-		rmb();
-		wc_sec = wall_clock.wc_sec;
-		wc_nsec = wall_clock.wc_nsec;
-		rmb();
-	} while ((wall_clock.wc_version != version) || (version & 1));
 
-	delta = kvm_clock_read() - delta;
-	delta += wc_nsec;
-	nsec = do_div(delta, NSEC_PER_SEC);
-	set_normalized_timespec(&ts, wc_sec + delta, nsec);
-	/*
-	 * Of all mechanisms of time adjustment I've tested, this one
-	 * was the champion!
-	 */
-	return ts.tv_sec + 1;
+	vcpu_time = &get_cpu_var(hv_clock);
+	pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
+	put_cpu_var(hv_clock);
+
+	return ts.tv_sec;
 }
 
 static int kvm_set_wallclock(unsigned long now)
 {
-	return 0;
+	return -1;
 }
 
-/*
- * This is our read_clock function. The host puts an tsc timestamp each time
- * it updates a new time. Without the tsc adjustment, we can have a situation
- * in which a vcpu starts to run earlier (smaller system_time), but probes
- * time later (compared to another vcpu), leading to backwards time
- */
 static cycle_t kvm_clock_read(void)
 {
-	u64 last_tsc, now;
-	int cpu;
+	struct pvclock_vcpu_time_info *src;
+	cycle_t ret;
 
-	preempt_disable();
-	cpu = smp_processor_id();
-
-	last_tsc = get_clock(cpu, tsc_timestamp);
-	now = get_clock(cpu, system_time);
-
-	now += kvm_get_delta(last_tsc);
-	preempt_enable();
-
-	return now;
+	src = &get_cpu_var(hv_clock);
+	ret = pvclock_clocksource_read(src);
+	put_cpu_var(hv_clock);
+	return ret;
 }
+
 static struct clocksource kvm_clock = {
 	.name = "kvm-clock",
 	.read = kvm_clock_read,
@@ -123,13 +88,14 @@ static struct clocksource kvm_clock = {
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static int kvm_register_clock(void)
+static int kvm_register_clock(char *txt)
 {
 	int cpu = smp_processor_id();
 	int low, high;
 	low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
 	high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
-
+	printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
+	       cpu, high, low, txt);
 	return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
 }
 
@@ -140,12 +106,20 @@ static void kvm_setup_secondary_clock(void)
 	 * Now that the first cpu already had this clocksource initialized,
 	 * we shouldn't fail.
 	 */
-	WARN_ON(kvm_register_clock());
+	WARN_ON(kvm_register_clock("secondary cpu clock"));
 	/* ok, done with our trickery, call native */
 	setup_secondary_APIC_clock();
 }
 #endif
 
+#ifdef CONFIG_SMP
+void __init kvm_smp_prepare_boot_cpu(void)
+{
+	WARN_ON(kvm_register_clock("primary cpu clock"));
+	native_smp_prepare_boot_cpu();
+}
+#endif
+
 /*
  * After the clock is registered, the host will keep writing to the
  * registered memory location. If the guest happens to shutdown, this memory
@@ -174,13 +148,16 @@ void __init kvmclock_init(void)
 		return;
 
 	if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
-		if (kvm_register_clock())
+		if (kvm_register_clock("boot clock"))
 			return;
 		pv_time_ops.get_wallclock = kvm_get_wallclock;
 		pv_time_ops.set_wallclock = kvm_set_wallclock;
 		pv_time_ops.sched_clock = kvm_clock_read;
 #ifdef CONFIG_X86_LOCAL_APIC
 		pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
+#endif
+#ifdef CONFIG_SMP
+		smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
 #endif
 		machine_ops.shutdown  = kvm_shutdown;
 #ifdef CONFIG_KEXEC
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
new file mode 100644
index 000000000000..05fbe9a0325a
--- /dev/null
+++ b/arch/x86/kernel/pvclock.c
@@ -0,0 +1,141 @@
+/*  paravirtual clock -- common code used by kvm/xen
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <asm/pvclock.h>
+
+/*
+ * These are perodically updated
+ *    xen: magic shared_info page
+ *    kvm: gpa registered via msr
+ * and then copied here.
+ */
+struct pvclock_shadow_time {
+	u64 tsc_timestamp;     /* TSC at last update of time vals.  */
+	u64 system_timestamp;  /* Time, in nanosecs, since boot.    */
+	u32 tsc_to_nsec_mul;
+	int tsc_shift;
+	u32 version;
+};
+
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
+{
+	u64 product;
+#ifdef __i386__
+	u32 tmp1, tmp2;
+#endif
+
+	if (shift < 0)
+		delta >>= -shift;
+	else
+		delta <<= shift;
+
+#ifdef __i386__
+	__asm__ (
+		"mul  %5       ; "
+		"mov  %4,%%eax ; "
+		"mov  %%edx,%4 ; "
+		"mul  %5       ; "
+		"xor  %5,%5    ; "
+		"add  %4,%%eax ; "
+		"adc  %5,%%edx ; "
+		: "=A" (product), "=r" (tmp1), "=r" (tmp2)
+		: "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
+#elif __x86_64__
+	__asm__ (
+		"mul %%rdx ; shrd $32,%%rdx,%%rax"
+		: "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
+#else
+#error implement me!
+#endif
+
+	return product;
+}
+
+static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
+{
+	u64 delta = native_read_tsc() - shadow->tsc_timestamp;
+	return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
+}
+
+/*
+ * Reads a consistent set of time-base values from hypervisor,
+ * into a shadow data area.
+ */
+static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
+					struct pvclock_vcpu_time_info *src)
+{
+	do {
+		dst->version = src->version;
+		rmb();		/* fetch version before data */
+		dst->tsc_timestamp     = src->tsc_timestamp;
+		dst->system_timestamp  = src->system_time;
+		dst->tsc_to_nsec_mul   = src->tsc_to_system_mul;
+		dst->tsc_shift         = src->tsc_shift;
+		rmb();		/* test version after fetching data */
+	} while ((src->version & 1) || (dst->version != src->version));
+
+	return dst->version;
+}
+
+cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
+{
+	struct pvclock_shadow_time shadow;
+	unsigned version;
+	cycle_t ret, offset;
+
+	do {
+		version = pvclock_get_time_values(&shadow, src);
+		barrier();
+		offset = pvclock_get_nsec_offset(&shadow);
+		ret = shadow.system_timestamp + offset;
+		barrier();
+	} while (version != src->version);
+
+	return ret;
+}
+
+void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
+			    struct pvclock_vcpu_time_info *vcpu_time,
+			    struct timespec *ts)
+{
+	u32 version;
+	u64 delta;
+	struct timespec now;
+
+	/* get wallclock at system boot */
+	do {
+		version = wall_clock->version;
+		rmb();		/* fetch version before time */
+		now.tv_sec  = wall_clock->sec;
+		now.tv_nsec = wall_clock->nsec;
+		rmb();		/* fetch time before checking version */
+	} while ((wall_clock->version & 1) || (version != wall_clock->version));
+
+	delta = pvclock_clocksource_read(vcpu_time);	/* time since system boot */
+	delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
+
+	now.tv_nsec = do_div(delta, NSEC_PER_SEC);
+	now.tv_sec = delta;
+
+	set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
+}
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index f2f5d260874e..3829aa7b663f 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -200,9 +200,12 @@ int __pit_timer_fn(struct kvm_kpit_state *ps)
 
 	atomic_inc(&pt->pending);
 	smp_mb__after_atomic_inc();
-	if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
-		vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-		wake_up_interruptible(&vcpu0->wq);
+	if (vcpu0) {
+		set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
+		if (waitqueue_active(&vcpu0->wq)) {
+			vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+			wake_up_interruptible(&vcpu0->wq);
+		}
 	}
 
 	pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c297c50eba63..ebc03f5ae162 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -940,6 +940,7 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
 	wait_queue_head_t *q = &apic->vcpu->wq;
 
 	atomic_inc(&apic->timer.pending);
+	set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests);
 	if (waitqueue_active(q)) {
 		apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 		wake_up_interruptible(q);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ee3f53098f0c..7e7c3969f7a2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -640,6 +640,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
 			rmap_remove(kvm, spte);
 			--kvm->stat.lpages;
 			set_shadow_pte(spte, shadow_trap_nonpresent_pte);
+			spte = NULL;
 			write_protected = 1;
 		}
 		spte = rmap_next(kvm, rmapp, spte);
@@ -1082,10 +1083,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		struct kvm_mmu_page *shadow;
 
 		spte |= PT_WRITABLE_MASK;
-		if (user_fault) {
-			mmu_unshadow(vcpu->kvm, gfn);
-			goto unshadowed;
-		}
 
 		shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
 		if (shadow ||
@@ -1102,8 +1099,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		}
 	}
 
-unshadowed:
-
 	if (pte_access & ACC_WRITE_MASK)
 		mark_page_dirty(vcpu->kvm, gfn);
 
@@ -1580,11 +1575,13 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
 				  u64 *spte,
 				  const void *new)
 {
-	if ((sp->role.level != PT_PAGE_TABLE_LEVEL)
-	    && !vcpu->arch.update_pte.largepage) {
-		++vcpu->kvm->stat.mmu_pde_zapped;
-		return;
-	}
+	if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
+		if (!vcpu->arch.update_pte.largepage ||
+		    sp->role.glevels == PT32_ROOT_LEVEL) {
+			++vcpu->kvm->stat.mmu_pde_zapped;
+			return;
+		}
+        }
 
 	++vcpu->kvm->stat.mmu_pte_updated;
 	if (sp->role.glevels == PT32_ROOT_LEVEL)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 02efbe75f317..540e95179074 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -566,7 +566,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 	load_transition_efer(vmx);
 }
 
-static void vmx_load_host_state(struct vcpu_vmx *vmx)
+static void __vmx_load_host_state(struct vcpu_vmx *vmx)
 {
 	unsigned long flags;
 
@@ -596,6 +596,13 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx)
 	reload_host_efer(vmx);
 }
 
+static void vmx_load_host_state(struct vcpu_vmx *vmx)
+{
+	preempt_disable();
+	__vmx_load_host_state(vmx);
+	preempt_enable();
+}
+
 /*
  * Switches to specified vcpu, until a matching vcpu_put(), but assumes
  * vcpu mutex is already taken.
@@ -654,7 +661,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
 {
-	vmx_load_host_state(to_vmx(vcpu));
+	__vmx_load_host_state(to_vmx(vcpu));
 }
 
 static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
@@ -884,11 +891,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 	switch (msr_index) {
 #ifdef CONFIG_X86_64
 	case MSR_EFER:
+		vmx_load_host_state(vmx);
 		ret = kvm_set_msr_common(vcpu, msr_index, data);
-		if (vmx->host_state.loaded) {
-			reload_host_efer(vmx);
-			load_transition_efer(vmx);
-		}
 		break;
 	case MSR_FS_BASE:
 		vmcs_writel(GUEST_FS_BASE, data);
@@ -910,11 +914,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 		guest_write_tsc(data);
 		break;
 	default:
+		vmx_load_host_state(vmx);
 		msr = find_msr_entry(vmx, msr_index);
 		if (msr) {
 			msr->data = data;
-			if (vmx->host_state.loaded)
-				load_msrs(vmx->guest_msrs, vmx->save_nmsrs);
 			break;
 		}
 		ret = kvm_set_msr_common(vcpu, msr_index, data);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 00acf1301a15..63a77caa59f1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -492,8 +492,8 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
 static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
 {
 	static int version;
-	struct kvm_wall_clock wc;
-	struct timespec wc_ts;
+	struct pvclock_wall_clock wc;
+	struct timespec now, sys, boot;
 
 	if (!wall_clock)
 		return;
@@ -502,10 +502,19 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
 
 	kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
 
-	wc_ts = current_kernel_time();
-	wc.wc_sec = wc_ts.tv_sec;
-	wc.wc_nsec = wc_ts.tv_nsec;
-	wc.wc_version = version;
+	/*
+	 * The guest calculates current wall clock time by adding
+	 * system time (updated by kvm_write_guest_time below) to the
+	 * wall clock specified here.  guest system time equals host
+	 * system time for us, thus we must fill in host boot time here.
+	 */
+	now = current_kernel_time();
+	ktime_get_ts(&sys);
+	boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys));
+
+	wc.sec = boot.tv_sec;
+	wc.nsec = boot.tv_nsec;
+	wc.version = version;
 
 	kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
 
@@ -513,6 +522,45 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
 	kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
 }
 
+static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
+{
+	uint32_t quotient, remainder;
+
+	/* Don't try to replace with do_div(), this one calculates
+	 * "(dividend << 32) / divisor" */
+	__asm__ ( "divl %4"
+		  : "=a" (quotient), "=d" (remainder)
+		  : "0" (0), "1" (dividend), "r" (divisor) );
+	return quotient;
+}
+
+static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *hv_clock)
+{
+	uint64_t nsecs = 1000000000LL;
+	int32_t  shift = 0;
+	uint64_t tps64;
+	uint32_t tps32;
+
+	tps64 = tsc_khz * 1000LL;
+	while (tps64 > nsecs*2) {
+		tps64 >>= 1;
+		shift--;
+	}
+
+	tps32 = (uint32_t)tps64;
+	while (tps32 <= (uint32_t)nsecs) {
+		tps32 <<= 1;
+		shift++;
+	}
+
+	hv_clock->tsc_shift = shift;
+	hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32);
+
+	pr_debug("%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n",
+		 __FUNCTION__, tsc_khz, hv_clock->tsc_shift,
+		 hv_clock->tsc_to_system_mul);
+}
+
 static void kvm_write_guest_time(struct kvm_vcpu *v)
 {
 	struct timespec ts;
@@ -523,6 +571,11 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
 	if ((!vcpu->time_page))
 		return;
 
+	if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) {
+		kvm_set_time_scale(tsc_khz, &vcpu->hv_clock);
+		vcpu->hv_clock_tsc_khz = tsc_khz;
+	}
+
 	/* Keep irq disabled to prevent changes to the clock */
 	local_irq_save(flags);
 	kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER,
@@ -537,14 +590,14 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
 	/*
 	 * The interface expects us to write an even number signaling that the
 	 * update is finished. Since the guest won't see the intermediate
-	 * state, we just write "2" at the end
+	 * state, we just increase by 2 at the end.
 	 */
-	vcpu->hv_clock.version = 2;
+	vcpu->hv_clock.version += 2;
 
 	shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
 
 	memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
-		sizeof(vcpu->hv_clock));
+	       sizeof(vcpu->hv_clock));
 
 	kunmap_atomic(shared_kaddr, KM_USER0);
 
@@ -599,10 +652,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		/* ...but clean it before doing the actual write */
 		vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
 
-		vcpu->arch.hv_clock.tsc_to_system_mul =
-					clocksource_khz2mult(tsc_khz, 22);
-		vcpu->arch.hv_clock.tsc_shift = 22;
-
 		down_read(&current->mm->mmap_sem);
 		vcpu->arch.time_page =
 				gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
@@ -2759,6 +2808,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if (vcpu->requests) {
 		if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
 			__kvm_migrate_timers(vcpu);
+		if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
+			kvm_x86_ops->tlb_flush(vcpu);
 		if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
 				       &vcpu->requests)) {
 			kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS;
@@ -2772,6 +2823,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		}
 	}
 
+	clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
 	kvm_inject_pending_timer_irqs(vcpu);
 
 	preempt_disable();
@@ -2781,21 +2833,13 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	local_irq_disable();
 
-	if (need_resched()) {
+	if (vcpu->requests || need_resched()) {
 		local_irq_enable();
 		preempt_enable();
 		r = 1;
 		goto out;
 	}
 
-	if (vcpu->requests)
-		if (test_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) {
-			local_irq_enable();
-			preempt_enable();
-			r = 1;
-			goto out;
-		}
-
 	if (signal_pending(current)) {
 		local_irq_enable();
 		preempt_enable();
@@ -2825,9 +2869,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	kvm_guest_enter();
 
-	if (vcpu->requests)
-		if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
-			kvm_x86_ops->tlb_flush(vcpu);
 
 	KVMTRACE_0D(VMENTRY, vcpu, entryexit);
 	kvm_x86_ops->run(vcpu, kvm_run);
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 2e641be2737e..6c388e593bc8 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -5,8 +5,9 @@
 config XEN
 	bool "Xen guest support"
 	select PARAVIRT
+	select PARAVIRT_CLOCK
 	depends on X86_32
-	depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER)
+	depends on X86_CMPXCHG && X86_TSC && X86_PAE && !(X86_VISWS || X86_VOYAGER)
 	help
 	  This is the Linux Xen port.  Enabling this will allow the
 	  kernel to boot in a paravirtualized environment under the
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c8a56e457d61..f09c1c69c37a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -785,38 +785,35 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
 static __init void xen_pagetable_setup_start(pgd_t *base)
 {
 	pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
+	int i;
 
 	/* special set_pte for pagetable initialization */
 	pv_mmu_ops.set_pte = xen_set_pte_init;
 
 	init_mm.pgd = base;
 	/*
-	 * copy top-level of Xen-supplied pagetable into place.	 For
-	 * !PAE we can use this as-is, but for PAE it is a stand-in
-	 * while we copy the pmd pages.
+	 * copy top-level of Xen-supplied pagetable into place.  This
+	 * is a stand-in while we copy the pmd pages.
 	 */
 	memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t));
 
-	if (PTRS_PER_PMD > 1) {
-		int i;
-		/*
-		 * For PAE, need to allocate new pmds, rather than
-		 * share Xen's, since Xen doesn't like pmd's being
-		 * shared between address spaces.
-		 */
-		for (i = 0; i < PTRS_PER_PGD; i++) {
-			if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
-				pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+	/*
+	 * For PAE, need to allocate new pmds, rather than
+	 * share Xen's, since Xen doesn't like pmd's being
+	 * shared between address spaces.
+	 */
+	for (i = 0; i < PTRS_PER_PGD; i++) {
+		if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
+			pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
 
-				memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
-				       PAGE_SIZE);
+			memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
+			       PAGE_SIZE);
 
-				make_lowmem_page_readonly(pmd);
+			make_lowmem_page_readonly(pmd);
 
-				set_pgd(&base[i], __pgd(1 + __pa(pmd)));
-			} else
-				pgd_clear(&base[i]);
-		}
+			set_pgd(&base[i], __pgd(1 + __pa(pmd)));
+		} else
+			pgd_clear(&base[i]);
 	}
 
 	/* make sure zero_page is mapped RO so we can use it in pagetables */
@@ -873,17 +870,7 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
 
 	/* Actually pin the pagetable down, but we can't set PG_pinned
 	   yet because the page structures don't exist yet. */
-	{
-		unsigned level;
-
-#ifdef CONFIG_X86_PAE
-		level = MMUEXT_PIN_L3_TABLE;
-#else
-		level = MMUEXT_PIN_L2_TABLE;
-#endif
-
-		pin_pagetable_pfn(level, PFN_DOWN(__pa(base)));
-	}
+	pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
 }
 
 /* This is called once we have the cpu_possible_map */
@@ -1093,7 +1080,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.make_pte = xen_make_pte,
 	.make_pgd = xen_make_pgd,
 
-#ifdef CONFIG_X86_PAE
 	.set_pte_atomic = xen_set_pte_atomic,
 	.set_pte_present = xen_set_pte_at,
 	.set_pud = xen_set_pud,
@@ -1102,7 +1088,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 
 	.make_pmd = xen_make_pmd,
 	.pmd_val = xen_pmd_val,
-#endif	/* PAE */
 
 	.activate_mm = xen_activate_mm,
 	.dup_mmap = xen_dup_mmap,
@@ -1228,6 +1213,11 @@ asmlinkage void __init xen_start_kernel(void)
 	if (xen_feature(XENFEAT_supervisor_mode_kernel))
 		pv_info.kernel_rpl = 0;
 
+	/* Prevent unwanted bits from being set in PTEs. */
+	__supported_pte_mask &= ~_PAGE_GLOBAL;
+	if (!is_initial_xendomain())
+		__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
+
 	/* set the limit of our address space */
 	xen_reserve_top();
 
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3525ef523a74..df40bf74ea75 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -179,50 +179,56 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 		preempt_enable();
 }
 
+/* Assume pteval_t is equivalent to all the other *val_t types. */
+static pteval_t pte_mfn_to_pfn(pteval_t val)
+{
+	if (val & _PAGE_PRESENT) {
+		unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT;
+		pteval_t flags = val & ~PTE_MASK;
+		val = (mfn_to_pfn(mfn) << PAGE_SHIFT) | flags;
+	}
+
+	return val;
+}
+
+static pteval_t pte_pfn_to_mfn(pteval_t val)
+{
+	if (val & _PAGE_PRESENT) {
+		unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT;
+		pteval_t flags = val & ~PTE_MASK;
+		val = (pfn_to_mfn(pfn) << PAGE_SHIFT) | flags;
+	}
+
+	return val;
+}
+
 pteval_t xen_pte_val(pte_t pte)
 {
-	pteval_t ret = pte.pte;
-
-	if (ret & _PAGE_PRESENT)
-		ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
-
-	return ret;
+	return pte_mfn_to_pfn(pte.pte);
 }
 
 pgdval_t xen_pgd_val(pgd_t pgd)
 {
-	pgdval_t ret = pgd.pgd;
-	if (ret & _PAGE_PRESENT)
-		ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
-	return ret;
+	return pte_mfn_to_pfn(pgd.pgd);
 }
 
 pte_t xen_make_pte(pteval_t pte)
 {
-	if (pte & _PAGE_PRESENT) {
-		pte = phys_to_machine(XPADDR(pte)).maddr;
-		pte &= ~(_PAGE_PCD | _PAGE_PWT);
-	}
-
-	return (pte_t){ .pte = pte };
+	pte = pte_pfn_to_mfn(pte);
+	return native_make_pte(pte);
 }
 
 pgd_t xen_make_pgd(pgdval_t pgd)
 {
-	if (pgd & _PAGE_PRESENT)
-		pgd = phys_to_machine(XPADDR(pgd)).maddr;
-
-	return (pgd_t){ pgd };
+	pgd = pte_pfn_to_mfn(pgd);
+	return native_make_pgd(pgd);
 }
 
 pmdval_t xen_pmd_val(pmd_t pmd)
 {
-	pmdval_t ret = native_pmd_val(pmd);
-	if (ret & _PAGE_PRESENT)
-		ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
-	return ret;
+	return pte_mfn_to_pfn(pmd.pmd);
 }
-#ifdef CONFIG_X86_PAE
+
 void xen_set_pud(pud_t *ptr, pud_t val)
 {
 	struct multicall_space mcs;
@@ -267,17 +273,9 @@ void xen_pmd_clear(pmd_t *pmdp)
 
 pmd_t xen_make_pmd(pmdval_t pmd)
 {
-	if (pmd & _PAGE_PRESENT)
-		pmd = phys_to_machine(XPADDR(pmd)).maddr;
-
+	pmd = pte_pfn_to_mfn(pmd);
 	return native_make_pmd(pmd);
 }
-#else  /* !PAE */
-void xen_set_pte(pte_t *ptep, pte_t pte)
-{
-	*ptep = pte;
-}
-#endif	/* CONFIG_X86_PAE */
 
 /*
   (Yet another) pagetable walker.  This one is intended for pinning a
@@ -430,8 +428,6 @@ static int pin_page(struct page *page, enum pt_level level)
    read-only, and can be pinned. */
 void xen_pgd_pin(pgd_t *pgd)
 {
-	unsigned level;
-
 	xen_mc_batch();
 
 	if (pgd_walk(pgd, pin_page, TASK_SIZE)) {
@@ -441,14 +437,7 @@ void xen_pgd_pin(pgd_t *pgd)
 		xen_mc_batch();
 	}
 
-#ifdef CONFIG_X86_PAE
-	level = MMUEXT_PIN_L3_TABLE;
-#else
-	level = MMUEXT_PIN_L2_TABLE;
-#endif
-
-	xen_do_pin(level, PFN_DOWN(__pa(pgd)));
-
+	xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
 	xen_mc_issue(0);
 }
 
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index b5e189b1519d..5fe961caffd4 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -37,14 +37,13 @@ void xen_exit_mmap(struct mm_struct *mm);
 void xen_pgd_pin(pgd_t *pgd);
 //void xen_pgd_unpin(pgd_t *pgd);
 
-#ifdef CONFIG_X86_PAE
-unsigned long long xen_pte_val(pte_t);
-unsigned long long xen_pmd_val(pmd_t);
-unsigned long long xen_pgd_val(pgd_t);
+pteval_t xen_pte_val(pte_t);
+pmdval_t xen_pmd_val(pmd_t);
+pgdval_t xen_pgd_val(pgd_t);
 
-pte_t xen_make_pte(unsigned long long);
-pmd_t xen_make_pmd(unsigned long long);
-pgd_t xen_make_pgd(unsigned long long);
+pte_t xen_make_pte(pteval_t);
+pmd_t xen_make_pmd(pmdval_t);
+pgd_t xen_make_pgd(pgdval_t);
 
 void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 		    pte_t *ptep, pte_t pteval);
@@ -53,15 +52,4 @@ void xen_set_pud(pud_t *ptr, pud_t val);
 void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
 void xen_pmd_clear(pmd_t *pmdp);
 
-
-#else
-unsigned long xen_pte_val(pte_t);
-unsigned long xen_pmd_val(pmd_t);
-unsigned long xen_pgd_val(pgd_t);
-
-pte_t xen_make_pte(unsigned long);
-pmd_t xen_make_pmd(unsigned long);
-pgd_t xen_make_pgd(unsigned long);
-#endif
-
 #endif	/* _XEN_MMU_H */
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 52b2e3856980..41e217503c96 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -14,6 +14,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/math64.h>
 
+#include <asm/pvclock.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 
@@ -31,17 +32,6 @@
 
 static cycle_t xen_clocksource_read(void);
 
-/* These are perodically updated in shared_info, and then copied here. */
-struct shadow_time_info {
-	u64 tsc_timestamp;     /* TSC at last update of time vals.  */
-	u64 system_timestamp;  /* Time, in nanosecs, since boot.    */
-	u32 tsc_to_nsec_mul;
-	int tsc_shift;
-	u32 version;
-};
-
-static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
-
 /* runstate info updated by Xen */
 static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
 
@@ -211,7 +201,7 @@ unsigned long long xen_sched_clock(void)
 unsigned long xen_cpu_khz(void)
 {
 	u64 xen_khz = 1000000ULL << 32;
-	const struct vcpu_time_info *info =
+	const struct pvclock_vcpu_time_info *info =
 		&HYPERVISOR_shared_info->vcpu_info[0].time;
 
 	do_div(xen_khz, info->tsc_to_system_mul);
@@ -223,121 +213,26 @@ unsigned long xen_cpu_khz(void)
 	return xen_khz;
 }
 
-/*
- * Reads a consistent set of time-base values from Xen, into a shadow data
- * area.
- */
-static unsigned get_time_values_from_xen(void)
-{
-	struct vcpu_time_info   *src;
-	struct shadow_time_info *dst;
-
-	/* src is shared memory with the hypervisor, so we need to
-	   make sure we get a consistent snapshot, even in the face of
-	   being preempted. */
-	src = &__get_cpu_var(xen_vcpu)->time;
-	dst = &__get_cpu_var(shadow_time);
-
-	do {
-		dst->version = src->version;
-		rmb();		/* fetch version before data */
-		dst->tsc_timestamp     = src->tsc_timestamp;
-		dst->system_timestamp  = src->system_time;
-		dst->tsc_to_nsec_mul   = src->tsc_to_system_mul;
-		dst->tsc_shift         = src->tsc_shift;
-		rmb();		/* test version after fetching data */
-	} while ((src->version & 1) | (dst->version ^ src->version));
-
-	return dst->version;
-}
-
-/*
- * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
- * yielding a 64-bit result.
- */
-static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
-{
-	u64 product;
-#ifdef __i386__
-	u32 tmp1, tmp2;
-#endif
-
-	if (shift < 0)
-		delta >>= -shift;
-	else
-		delta <<= shift;
-
-#ifdef __i386__
-	__asm__ (
-		"mul  %5       ; "
-		"mov  %4,%%eax ; "
-		"mov  %%edx,%4 ; "
-		"mul  %5       ; "
-		"xor  %5,%5    ; "
-		"add  %4,%%eax ; "
-		"adc  %5,%%edx ; "
-		: "=A" (product), "=r" (tmp1), "=r" (tmp2)
-		: "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
-#elif __x86_64__
-	__asm__ (
-		"mul %%rdx ; shrd $32,%%rdx,%%rax"
-		: "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
-#else
-#error implement me!
-#endif
-
-	return product;
-}
-
-static u64 get_nsec_offset(struct shadow_time_info *shadow)
-{
-	u64 now, delta;
-	now = native_read_tsc();
-	delta = now - shadow->tsc_timestamp;
-	return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
-}
-
 static cycle_t xen_clocksource_read(void)
 {
-	struct shadow_time_info *shadow = &get_cpu_var(shadow_time);
+        struct pvclock_vcpu_time_info *src;
 	cycle_t ret;
-	unsigned version;
-
-	do {
-		version = get_time_values_from_xen();
-		barrier();
-		ret = shadow->system_timestamp + get_nsec_offset(shadow);
-		barrier();
-	} while (version != __get_cpu_var(xen_vcpu)->time.version);
-
-	put_cpu_var(shadow_time);
 
+	src = &get_cpu_var(xen_vcpu)->time;
+	ret = pvclock_clocksource_read(src);
+	put_cpu_var(xen_vcpu);
 	return ret;
 }
 
 static void xen_read_wallclock(struct timespec *ts)
 {
-	const struct shared_info *s = HYPERVISOR_shared_info;
-	u32 version;
-	u64 delta;
-	struct timespec now;
+	struct shared_info *s = HYPERVISOR_shared_info;
+	struct pvclock_wall_clock *wall_clock = &(s->wc);
+        struct pvclock_vcpu_time_info *vcpu_time;
 
-	/* get wallclock at system boot */
-	do {
-		version = s->wc_version;
-		rmb();		/* fetch version before time */
-		now.tv_sec  = s->wc_sec;
-		now.tv_nsec = s->wc_nsec;
-		rmb();		/* fetch time before checking version */
-	} while ((s->wc_version & 1) | (version ^ s->wc_version));
-
-	delta = xen_clocksource_read();	/* time since system boot */
-	delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
-
-	now.tv_nsec = do_div(delta, NSEC_PER_SEC);
-	now.tv_sec = delta;
-
-	set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
+	vcpu_time = &get_cpu_var(xen_vcpu)->time;
+	pvclock_read_wallclock(wall_clock, vcpu_time, ts);
+	put_cpu_var(xen_vcpu);
 }
 
 unsigned long xen_get_wallclock(void)
@@ -345,7 +240,6 @@ unsigned long xen_get_wallclock(void)
 	struct timespec ts;
 
 	xen_read_wallclock(&ts);
-
 	return ts.tv_sec;
 }
 
@@ -569,8 +463,6 @@ __init void xen_time_init(void)
 {
 	int cpu = smp_processor_id();
 
-	get_time_values_from_xen();
-
 	clocksource_register(&xen_clocksource);
 
 	if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 288d587ce73c..6ec3b4f7719b 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -17,7 +17,7 @@ ENTRY(startup_xen)
 
 	__FINIT
 
-.pushsection .bss.page_aligned
+.pushsection .text
 	.align PAGE_SIZE_asm
 ENTRY(hypercall_page)
 	.skip 0x1000
@@ -30,11 +30,7 @@ ENTRY(hypercall_page)
 	ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,          .long  startup_xen)
 	ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long  hypercall_page)
 	ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,       .asciz "!writable_page_tables|pae_pgdir_above_4gb")
-#ifdef CONFIG_X86_PAE
 	ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz "yes")
-#else
-	ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz "no")
-#endif
 	ELFNOTE(Xen, XEN_ELFNOTE_LOADER,         .asciz "generic")
 
 #endif /*CONFIG_XEN */
diff --git a/drivers/char/drm/i915_drv.c b/drivers/char/drm/i915_drv.c
index e8f3d682e3b1..93aed1c38bd2 100644
--- a/drivers/char/drm/i915_drv.c
+++ b/drivers/char/drm/i915_drv.c
@@ -389,6 +389,7 @@ static int i915_resume(struct drm_device *dev)
 	pci_restore_state(dev->pdev);
 	if (pci_enable_device(dev->pdev))
 		return -1;
+	pci_set_master(dev->pdev);
 
 	pci_write_config_byte(dev->pdev, LBB, dev_priv->saveLBB);
 
diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c
index b1a757a5ee27..8f81139d6194 100644
--- a/drivers/char/tty_ioctl.c
+++ b/drivers/char/tty_ioctl.c
@@ -981,16 +981,9 @@ EXPORT_SYMBOL_GPL(tty_perform_flush);
 int n_tty_ioctl(struct tty_struct *tty, struct file *file,
 		       unsigned int cmd, unsigned long arg)
 {
-	struct tty_struct *real_tty;
 	unsigned long flags;
 	int retval;
 
-	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
-	    tty->driver->subtype == PTY_TYPE_MASTER)
-		real_tty = tty->link;
-	else
-		real_tty = tty;
-
 	switch (cmd) {
 	case TCXONC:
 		retval = tty_check_change(tty);
diff --git a/drivers/hwmon/abituguru3.c b/drivers/hwmon/abituguru3.c
index ed33fddc4dee..f00f497b9ca9 100644
--- a/drivers/hwmon/abituguru3.c
+++ b/drivers/hwmon/abituguru3.c
@@ -30,6 +30,7 @@
 #include <linux/platform_device.h>
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
+#include <linux/dmi.h>
 #include <asm/io.h>
 
 /* uGuru3 bank addresses */
@@ -323,7 +324,7 @@ static const struct abituguru3_motherboard_info abituguru3_motherboards[] = {
 		{ "AUX1 Fan",		36, 2, 60, 1, 0 },
 		{ NULL, 0, 0, 0, 0, 0 } }
 	},
-	{ 0x0013, "unknown", {
+	{ 0x0013, "Abit AW8D", {
 		{ "CPU Core",		 0, 0, 10, 1, 0 },
 		{ "DDR",		 1, 0, 10, 1, 0 },
 		{ "DDR VTT",		 2, 0, 10, 1, 0 },
@@ -349,6 +350,7 @@ static const struct abituguru3_motherboard_info abituguru3_motherboards[] = {
 		{ "AUX2 Fan",		36, 2, 60, 1, 0 },
 		{ "AUX3 Fan",		37, 2, 60, 1, 0 },
 		{ "AUX4 Fan",		38, 2, 60, 1, 0 },
+		{ "AUX5 Fan",		39, 2, 60, 1, 0 },
 		{ NULL, 0, 0, 0, 0, 0 } }
 	},
 	{ 0x0014, "Abit AB9 Pro", {
@@ -1111,11 +1113,12 @@ static int __init abituguru3_detect(void)
 {
 	/* See if there is an uguru3 there. An idle uGuru3 will hold 0x00 or
 	   0x08 at DATA and 0xAC at CMD. Sometimes the uGuru3 will hold 0x05
-	   at CMD instead, why is unknown. So we test for 0x05 too. */
+	   or 0x55 at CMD instead, why is unknown. */
 	u8 data_val = inb_p(ABIT_UGURU3_BASE + ABIT_UGURU3_DATA);
 	u8 cmd_val = inb_p(ABIT_UGURU3_BASE + ABIT_UGURU3_CMD);
 	if (((data_val == 0x00) || (data_val == 0x08)) &&
-			((cmd_val == 0xAC) || (cmd_val == 0x05)))
+			((cmd_val == 0xAC) || (cmd_val == 0x05) ||
+			 (cmd_val == 0x55)))
 		return ABIT_UGURU3_BASE;
 
 	ABIT_UGURU3_DEBUG("no Abit uGuru3 found, data = 0x%02X, cmd = "
@@ -1138,6 +1141,15 @@ static int __init abituguru3_init(void)
 	int address, err;
 	struct resource res = { .flags = IORESOURCE_IO };
 
+#ifdef CONFIG_DMI
+	const char *board_vendor = dmi_get_system_info(DMI_BOARD_VENDOR);
+
+	/* safety check, refuse to load on non Abit motherboards */
+	if (!force && (!board_vendor ||
+			strcmp(board_vendor, "http://www.abit.com.tw/")))
+		return -ENODEV;
+#endif
+
 	address = abituguru3_detect();
 	if (address < 0)
 		return address;
diff --git a/drivers/hwmon/adt7473.c b/drivers/hwmon/adt7473.c
index c1009d6f9796..93dbf5e7ff8a 100644
--- a/drivers/hwmon/adt7473.c
+++ b/drivers/hwmon/adt7473.c
@@ -309,6 +309,9 @@ static struct adt7473_data *adt7473_update_device(struct device *dev)
 						ADT7473_REG_PWM_BHVR(i));
 	}
 
+	i = i2c_smbus_read_byte_data(client, ADT7473_REG_CFG4);
+	data->max_duty_at_overheat = !!(i & ADT7473_CFG4_MAX_DUTY_AT_OVT);
+
 	data->limits_last_updated = local_jiffies;
 	data->limits_valid = 1;
 
diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c
index fa7696905154..de698dc73020 100644
--- a/drivers/hwmon/lm75.c
+++ b/drivers/hwmon/lm75.c
@@ -251,10 +251,13 @@ static int lm75_detach_client(struct i2c_client *client)
    the SMBus standard. */
 static int lm75_read_value(struct i2c_client *client, u8 reg)
 {
+	int value;
+
 	if (reg == LM75_REG_CONF)
 		return i2c_smbus_read_byte_data(client, reg);
-	else
-		return swab16(i2c_smbus_read_word_data(client, reg));
+
+	value = i2c_smbus_read_word_data(client, reg);
+	return (value < 0) ? value : swab16(value);
 }
 
 static int lm75_write_value(struct i2c_client *client, u8 reg, u16 value)
@@ -287,9 +290,16 @@ static struct lm75_data *lm75_update_device(struct device *dev)
 		int i;
 		dev_dbg(&client->dev, "Starting lm75 update\n");
 
-		for (i = 0; i < ARRAY_SIZE(data->temp); i++)
-			data->temp[i] = lm75_read_value(client,
-							LM75_REG_TEMP[i]);
+		for (i = 0; i < ARRAY_SIZE(data->temp); i++) {
+			int status;
+
+			status = lm75_read_value(client, LM75_REG_TEMP[i]);
+			if (status < 0)
+				dev_dbg(&client->dev, "reg %d, err %d\n",
+						LM75_REG_TEMP[i], status);
+			else
+				data->temp[i] = status;
+		}
 		data->last_updated = jiffies;
 		data->valid = 1;
 	}
diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c
index 182fe6a5605f..ee5eca1c1921 100644
--- a/drivers/hwmon/lm85.c
+++ b/drivers/hwmon/lm85.c
@@ -192,23 +192,20 @@ static int RANGE_TO_REG( int range )
 {
 	int i;
 
-	if ( range < lm85_range_map[0] ) { 
-		return 0 ;
-	} else if ( range > lm85_range_map[15] ) {
+	if (range >= lm85_range_map[15])
 		return 15 ;
-	} else {  /* find closest match */
-		for ( i = 14 ; i >= 0 ; --i ) {
-			if ( range > lm85_range_map[i] ) { /* range bracketed */
-				if ((lm85_range_map[i+1] - range) < 
-					(range - lm85_range_map[i])) {
-					i++;
-					break;
-				}
-				break;
-			}
+
+	/* Find the closest match */
+	for (i = 14; i >= 0; --i) {
+		if (range >= lm85_range_map[i]) {
+			if ((lm85_range_map[i + 1] - range) <
+					(range - lm85_range_map[i]))
+				return i + 1;
+			return i;
 		}
 	}
-	return( i & 0x0f );
+
+	return 0;
 }
 #define RANGE_FROM_REG(val) (lm85_range_map[(val)&0x0f])
 
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index b224079d4e1f..d5862e5d99a0 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -109,7 +109,11 @@ static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_m
 {
 	struct page *page;
 
-	page = alloc_pages(gfp_mask, order);
+	/*
+	 * Use __GFP_ZERO because buggy firmware assumes ICM pages are
+	 * cleared, and subtle failures are seen if they aren't.
+	 */
+	page = alloc_pages(gfp_mask | __GFP_ZERO, order);
 	if (!page)
 		return -ENOMEM;
 
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 5126d5d9ea0e..2e554a4ab337 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -176,7 +176,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
 	 * we set it now, so we can trap and pass that trap to the Guest if it
 	 * uses the FPU. */
 	if (cpu->ts)
-		lguest_set_ts();
+		unlazy_fpu(current);
 
 	/* SYSENTER is an optimized way of doing system calls.  We can't allow
 	 * it because it always jumps to privilege level 0.  A normal Guest
@@ -196,6 +196,10 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
 	 * trap made the switcher code come back, and an error code which some
 	 * traps set.  */
 
+	 /* Restore SYSENTER if it's supposed to be on. */
+	 if (boot_cpu_has(X86_FEATURE_SEP))
+		wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
+
 	/* If the Guest page faulted, then the cr2 register will tell us the
 	 * bad virtual address.  We have to grab this now, because once we
 	 * re-enable interrupts an interrupt could fault and thus overwrite
@@ -203,13 +207,12 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
 	if (cpu->regs->trapnum == 14)
 		cpu->arch.last_pagefault = read_cr2();
 	/* Similarly, if we took a trap because the Guest used the FPU,
-	 * we have to restore the FPU it expects to see. */
+	 * we have to restore the FPU it expects to see.
+	 * math_state_restore() may sleep and we may even move off to
+	 * a different CPU. So all the critical stuff should be done
+	 * before this.  */
 	else if (cpu->regs->trapnum == 7)
 		math_state_restore();
-
-	/* Restore SYSENTER if it's supposed to be on. */
-	if (boot_cpu_has(X86_FEATURE_SEP))
-		wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
 }
 
 /*H:130 Now we've examined the hypercall code; our Guest can make requests.
diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
index bafb69b6f7cb..fc6f4b8c64b3 100644
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -942,7 +942,7 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock,
 	m->msg_namelen = 0;
 
 	if (skb) {
-		total_len = min(total_len, skb->len);
+		total_len = min_t(size_t, total_len, skb->len);
 		error = skb_copy_datagram_iovec(skb, 0, m->msg_iov, total_len);
 		if (error == 0)
 			error = total_len;
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index 8662a6b7a30b..25b352b664d9 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -68,7 +68,6 @@ obj-$(CONFIG_WAFER_WDT) += wafer5823wdt.o
 obj-$(CONFIG_I6300ESB_WDT) += i6300esb.o
 obj-$(CONFIG_ITCO_WDT) += iTCO_wdt.o iTCO_vendor_support.o
 obj-$(CONFIG_IT8712F_WDT) += it8712f_wdt.o
-CFLAGS_hpwdt.o += -O
 obj-$(CONFIG_HP_WATCHDOG) += hpwdt.o
 obj-$(CONFIG_SC1200_WDT) += sc1200wdt.o
 obj-$(CONFIG_SCx200_WDT) += scx200_wdt.o
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 4f0f22b020ea..76e5b7386af9 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -529,7 +529,7 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
 
 #ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
 		/* Clear master flag /before/ clearing selector flag. */
-		rmb();
+		wmb();
 #endif
 		pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
 		while (pending_words != 0) {
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 9ecb92f68543..9ff7b1c04239 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -855,7 +855,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 	 */
 
 	/* Update group descriptor block for new group */
-	gdp = (struct ext4_group_desc *)primary->b_data + gdb_off;
+	gdp = (struct ext4_group_desc *)((char *)primary->b_data +
+					 gdb_off * EXT4_DESC_SIZE(sb));
 
 	ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
 	ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index c19184f2e70e..bec76b1c2bb0 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -246,15 +246,11 @@ static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
 
 }
 
-static inline unsigned int zero_metapath_length(const struct metapath *mp,
-						unsigned height)
+static inline unsigned int metapath_branch_start(const struct metapath *mp)
 {
-	unsigned int i;
-	for (i = 0; i < height - 1; i++) {
-		if (mp->mp_list[i] != 0)
-			return i;
-	}
-	return height;
+	if (mp->mp_list[0] == 0)
+		return 2;
+	return 1;
 }
 
 /**
@@ -436,7 +432,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct buffer_head *dibh = mp->mp_bh[0];
 	u64 bn, dblock = 0;
-	unsigned n, i, blks, alloced = 0, iblks = 0, zmpl = 0;
+	unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
 	unsigned dblks = 0;
 	unsigned ptrs_per_blk;
 	const unsigned end_of_metadata = height - 1;
@@ -471,9 +467,8 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
 			/* Building up tree height */
 			state = ALLOC_GROW_HEIGHT;
 			iblks = height - ip->i_height;
-			zmpl = zero_metapath_length(mp, height);
-			iblks -= zmpl;
-			iblks += height;
+			branch_start = metapath_branch_start(mp);
+			iblks += (height - branch_start);
 		}
 	}
 
@@ -509,13 +504,13 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
 					sizeof(struct gfs2_meta_header));
 				*ptr = zero_bn;
 				state = ALLOC_GROW_DEPTH;
-				for(i = zmpl; i < height; i++) {
+				for(i = branch_start; i < height; i++) {
 					if (mp->mp_bh[i] == NULL)
 						break;
 					brelse(mp->mp_bh[i]);
 					mp->mp_bh[i] = NULL;
 				}
-				i = zmpl;
+				i = branch_start;
 			}
 			if (n == 0)
 				break;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 6387523a3153..3401628d742b 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -195,7 +195,7 @@ static u32 gfs2_bitfit(const u8 *buffer, unsigned int buflen, u32 goal,
 	   depending on architecture.  I've experimented with several ways
 	   of writing this section such as using an else before the goto
 	   but this one seems to be the fastest. */
-	while ((unsigned char *)plong < end - 1) {
+	while ((unsigned char *)plong < end - sizeof(unsigned long)) {
 		prefetch(plong + 1);
 		if (((*plong) & LBITMASK) != lskipval)
 			break;
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 49c7cd0502cc..779d2eb649c5 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -130,10 +130,11 @@ static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p,
 				struct mnt_fhstatus *res)
 {
 	struct nfs_fh *fh = res->fh;
+	unsigned size;
 
 	if ((res->status = ntohl(*p++)) == 0) {
-		int size = ntohl(*p++);
-		if (size <= NFS3_FHSIZE) {
+		size = ntohl(*p++);
+		if (size <= NFS3_FHSIZE && size != 0) {
 			fh->size = size;
 			memcpy(fh->data, p, size);
 		} else
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2a4a024a4e7b..614efeed5437 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1216,8 +1216,6 @@ static int nfs_validate_mount_data(void *options,
 {
 	struct nfs_mount_data *data = (struct nfs_mount_data *)options;
 
-	memset(args, 0, sizeof(*args));
-
 	if (data == NULL)
 		goto out_no_data;
 
@@ -1251,13 +1249,13 @@ static int nfs_validate_mount_data(void *options,
 	case 5:
 		memset(data->context, 0, sizeof(data->context));
 	case 6:
-		if (data->flags & NFS_MOUNT_VER3)
+		if (data->flags & NFS_MOUNT_VER3) {
+			if (data->root.size > NFS3_FHSIZE || data->root.size == 0)
+				goto out_invalid_fh;
 			mntfh->size = data->root.size;
-		else
+		} else
 			mntfh->size = NFS2_FHSIZE;
 
-		if (mntfh->size > sizeof(mntfh->data))
-			goto out_invalid_fh;
 
 		memcpy(mntfh->data, data->root.data, mntfh->size);
 		if (mntfh->size < sizeof(mntfh->data))
@@ -1585,24 +1583,29 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 {
 	struct nfs_server *server = NULL;
 	struct super_block *s;
-	struct nfs_fh mntfh;
-	struct nfs_parsed_mount_data data;
+	struct nfs_parsed_mount_data *data;
+	struct nfs_fh *mntfh;
 	struct dentry *mntroot;
 	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
 	struct nfs_sb_mountdata sb_mntdata = {
 		.mntflags = flags,
 	};
-	int error;
+	int error = -ENOMEM;
 
-	security_init_mnt_opts(&data.lsm_opts);
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL);
+	if (data == NULL || mntfh == NULL)
+		goto out_free_fh;
+
+	security_init_mnt_opts(&data->lsm_opts);
 
 	/* Validate the mount data */
-	error = nfs_validate_mount_data(raw_data, &data, &mntfh, dev_name);
+	error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name);
 	if (error < 0)
 		goto out;
 
 	/* Get a volume representation */
-	server = nfs_create_server(&data, &mntfh);
+	server = nfs_create_server(data, mntfh);
 	if (IS_ERR(server)) {
 		error = PTR_ERR(server);
 		goto out;
@@ -1630,16 +1633,16 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 
 	if (!s->s_root) {
 		/* initial superblock/root creation */
-		nfs_fill_super(s, &data);
+		nfs_fill_super(s, data);
 	}
 
-	mntroot = nfs_get_root(s, &mntfh);
+	mntroot = nfs_get_root(s, mntfh);
 	if (IS_ERR(mntroot)) {
 		error = PTR_ERR(mntroot);
 		goto error_splat_super;
 	}
 
-	error = security_sb_set_mnt_opts(s, &data.lsm_opts);
+	error = security_sb_set_mnt_opts(s, &data->lsm_opts);
 	if (error)
 		goto error_splat_root;
 
@@ -1649,9 +1652,12 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 	error = 0;
 
 out:
-	kfree(data.nfs_server.hostname);
-	kfree(data.mount_server.hostname);
-	security_free_mnt_opts(&data.lsm_opts);
+	kfree(data->nfs_server.hostname);
+	kfree(data->mount_server.hostname);
+	security_free_mnt_opts(&data->lsm_opts);
+out_free_fh:
+	kfree(mntfh);
+	kfree(data);
 	return error;
 
 out_err_nosb:
@@ -1800,8 +1806,6 @@ static int nfs4_validate_mount_data(void *options,
 	struct nfs4_mount_data *data = (struct nfs4_mount_data *)options;
 	char *c;
 
-	memset(args, 0, sizeof(*args));
-
 	if (data == NULL)
 		goto out_no_data;
 
@@ -1959,26 +1963,31 @@ static int nfs4_validate_mount_data(void *options,
 static int nfs4_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
 {
-	struct nfs_parsed_mount_data data;
+	struct nfs_parsed_mount_data *data;
 	struct super_block *s;
 	struct nfs_server *server;
-	struct nfs_fh mntfh;
+	struct nfs_fh *mntfh;
 	struct dentry *mntroot;
 	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
 	struct nfs_sb_mountdata sb_mntdata = {
 		.mntflags = flags,
 	};
-	int error;
+	int error = -ENOMEM;
 
-	security_init_mnt_opts(&data.lsm_opts);
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL);
+	if (data == NULL || mntfh == NULL)
+		goto out_free_fh;
+
+	security_init_mnt_opts(&data->lsm_opts);
 
 	/* Validate the mount data */
-	error = nfs4_validate_mount_data(raw_data, &data, dev_name);
+	error = nfs4_validate_mount_data(raw_data, data, dev_name);
 	if (error < 0)
 		goto out;
 
 	/* Get a volume representation */
-	server = nfs4_create_server(&data, &mntfh);
+	server = nfs4_create_server(data, mntfh);
 	if (IS_ERR(server)) {
 		error = PTR_ERR(server);
 		goto out;
@@ -2009,13 +2018,13 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 		nfs4_fill_super(s);
 	}
 
-	mntroot = nfs4_get_root(s, &mntfh);
+	mntroot = nfs4_get_root(s, mntfh);
 	if (IS_ERR(mntroot)) {
 		error = PTR_ERR(mntroot);
 		goto error_splat_super;
 	}
 
-	error = security_sb_set_mnt_opts(s, &data.lsm_opts);
+	error = security_sb_set_mnt_opts(s, &data->lsm_opts);
 	if (error)
 		goto error_splat_root;
 
@@ -2025,10 +2034,13 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 	error = 0;
 
 out:
-	kfree(data.client_address);
-	kfree(data.nfs_server.export_path);
-	kfree(data.nfs_server.hostname);
-	security_free_mnt_opts(&data.lsm_opts);
+	kfree(data->client_address);
+	kfree(data->nfs_server.export_path);
+	kfree(data->nfs_server.hostname);
+	security_free_mnt_opts(&data->lsm_opts);
+out_free_fh:
+	kfree(mntfh);
+	kfree(data);
 	return error;
 
 out_free:
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 6d8ace3e3259..f333848fd3be 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -739,12 +739,13 @@ int nfs_updatepage(struct file *file, struct page *page,
 	}
 
 	status = nfs_writepage_setup(ctx, page, offset, count);
-	__set_page_dirty_nobuffers(page);
+	if (status < 0)
+		nfs_set_pageerror(page);
+	else
+		__set_page_dirty_nobuffers(page);
 
         dprintk("NFS:      nfs_updatepage returns %d (isize %Ld)\n",
 			status, (long long)i_size_read(inode));
-	if (status < 0)
-		nfs_set_pageerror(page);
 	return status;
 }
 
diff --git a/fs/select.c b/fs/select.c
index 8dda969614a9..da0e88201c3a 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -249,7 +249,6 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
 						retval++;
 					}
 				}
-				cond_resched();
 			}
 			if (res_in)
 				*rinp = res_in;
@@ -257,6 +256,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
 				*routp = res_out;
 			if (res_ex)
 				*rexp = res_ex;
+			cond_resched();
 		}
 		wait = NULL;
 		if (retval || !*timeout || signal_pending(current))
diff --git a/include/asm-alpha/core_mcpcia.h b/include/asm-alpha/core_mcpcia.h
index 525b4f6a7ace..acf55b483472 100644
--- a/include/asm-alpha/core_mcpcia.h
+++ b/include/asm-alpha/core_mcpcia.h
@@ -261,7 +261,7 @@ struct el_MCPCIA_uncorrected_frame_mcheck {
 	}
 #endif
 
-static inline int __mcpcia_is_mmio(unsigned long addr)
+extern inline int __mcpcia_is_mmio(unsigned long addr)
 {
 	return (addr & 0x80000000UL) == 0;
 }
diff --git a/include/asm-alpha/core_t2.h b/include/asm-alpha/core_t2.h
index 90e6b5d6c214..46bfff58f670 100644
--- a/include/asm-alpha/core_t2.h
+++ b/include/asm-alpha/core_t2.h
@@ -356,13 +356,13 @@ struct el_t2_frame_corrected {
 #define vip	volatile int *
 #define vuip	volatile unsigned int *
 
-static inline u8 t2_inb(unsigned long addr)
+extern inline u8 t2_inb(unsigned long addr)
 {
 	long result = *(vip) ((addr << 5) + T2_IO + 0x00);
 	return __kernel_extbl(result, addr & 3);
 }
 
-static inline void t2_outb(u8 b, unsigned long addr)
+extern inline void t2_outb(u8 b, unsigned long addr)
 {
 	unsigned long w;
 
@@ -371,13 +371,13 @@ static inline void t2_outb(u8 b, unsigned long addr)
 	mb();
 }
 
-static inline u16 t2_inw(unsigned long addr)
+extern inline u16 t2_inw(unsigned long addr)
 {
 	long result = *(vip) ((addr << 5) + T2_IO + 0x08);
 	return __kernel_extwl(result, addr & 3);
 }
 
-static inline void t2_outw(u16 b, unsigned long addr)
+extern inline void t2_outw(u16 b, unsigned long addr)
 {
 	unsigned long w;
 
@@ -386,12 +386,12 @@ static inline void t2_outw(u16 b, unsigned long addr)
 	mb();
 }
 
-static inline u32 t2_inl(unsigned long addr)
+extern inline u32 t2_inl(unsigned long addr)
 {
 	return *(vuip) ((addr << 5) + T2_IO + 0x18);
 }
 
-static inline void t2_outl(u32 b, unsigned long addr)
+extern inline void t2_outl(u32 b, unsigned long addr)
 {
 	*(vuip) ((addr << 5) + T2_IO + 0x18) = b;
 	mb();
@@ -435,7 +435,7 @@ static inline void t2_outl(u32 b, unsigned long addr)
 	set_hae(msb); \
 }
 
-static DEFINE_SPINLOCK(t2_hae_lock);
+extern spinlock_t t2_hae_lock;
 
 /*
  * NOTE: take T2_DENSE_MEM off in each readX/writeX routine, since
diff --git a/include/asm-alpha/io.h b/include/asm-alpha/io.h
index 38f18cf18c9d..e971ab000f95 100644
--- a/include/asm-alpha/io.h
+++ b/include/asm-alpha/io.h
@@ -35,7 +35,7 @@
  * register not being up-to-date with respect to the hardware
  * value.
  */
-static inline void __set_hae(unsigned long new_hae)
+extern inline void __set_hae(unsigned long new_hae)
 {
 	unsigned long flags;
 	local_irq_save(flags);
@@ -49,7 +49,7 @@ static inline void __set_hae(unsigned long new_hae)
 	local_irq_restore(flags);
 }
 
-static inline void set_hae(unsigned long new_hae)
+extern inline void set_hae(unsigned long new_hae)
 {
 	if (new_hae != alpha_mv.hae_cache)
 		__set_hae(new_hae);
@@ -176,7 +176,7 @@ REMAP2(u64, writeq, volatile)
 #undef REMAP1
 #undef REMAP2
 
-static inline void __iomem *generic_ioportmap(unsigned long a)
+extern inline void __iomem *generic_ioportmap(unsigned long a)
 {
 	return alpha_mv.mv_ioportmap(a);
 }
diff --git a/include/asm-alpha/mmu_context.h b/include/asm-alpha/mmu_context.h
index 6a5be1f7debf..86c08a02d239 100644
--- a/include/asm-alpha/mmu_context.h
+++ b/include/asm-alpha/mmu_context.h
@@ -23,7 +23,7 @@
 #endif
 
 
-extern inline unsigned long
+static inline unsigned long
 __reload_thread(struct pcb_struct *pcb)
 {
 	register unsigned long a0 __asm__("$16");
@@ -114,7 +114,7 @@ extern unsigned long last_asn;
 #define __MMU_EXTERN_INLINE
 #endif
 
-static inline unsigned long
+extern inline unsigned long
 __get_new_mm_context(struct mm_struct *mm, long cpu)
 {
 	unsigned long asn = cpu_last_asn(cpu);
@@ -226,7 +226,7 @@ ev4_activate_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm)
 # endif
 #endif
 
-extern inline int
+static inline int
 init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
 	int i;
diff --git a/include/asm-alpha/percpu.h b/include/asm-alpha/percpu.h
index 48348fe34c19..3495e8e00d70 100644
--- a/include/asm-alpha/percpu.h
+++ b/include/asm-alpha/percpu.h
@@ -1,6 +1,78 @@
 #ifndef __ALPHA_PERCPU_H
 #define __ALPHA_PERCPU_H
+#include <linux/compiler.h>
+#include <linux/threads.h>
 
-#include <asm-generic/percpu.h>
+/*
+ * Determine the real variable name from the name visible in the
+ * kernel sources.
+ */
+#define per_cpu_var(var) per_cpu__##var
+
+#ifdef CONFIG_SMP
+
+/*
+ * per_cpu_offset() is the offset that has to be added to a
+ * percpu variable to get to the instance for a certain processor.
+ */
+extern unsigned long __per_cpu_offset[NR_CPUS];
+
+#define per_cpu_offset(x) (__per_cpu_offset[x])
+
+#define __my_cpu_offset per_cpu_offset(raw_smp_processor_id())
+#ifdef CONFIG_DEBUG_PREEMPT
+#define my_cpu_offset per_cpu_offset(smp_processor_id())
+#else
+#define my_cpu_offset __my_cpu_offset
+#endif
+
+#ifndef MODULE
+#define SHIFT_PERCPU_PTR(var, offset) RELOC_HIDE(&per_cpu_var(var), (offset))
+#define PER_CPU_ATTRIBUTES
+#else
+/*
+ * To calculate addresses of locally defined variables, GCC uses 32-bit
+ * displacement from the GP. Which doesn't work for per cpu variables in
+ * modules, as an offset to the kernel per cpu area is way above 4G.
+ *
+ * This forces allocation of a GOT entry for per cpu variable using
+ * ldq instruction with a 'literal' relocation.
+ */
+#define SHIFT_PERCPU_PTR(var, offset) ({		\
+	extern int simple_identifier_##var(void);	\
+	unsigned long __ptr, tmp_gp;			\
+	asm (  "br	%1, 1f		  	      \n\
+	1:	ldgp	%1, 0(%1)	    	      \n\
+		ldq %0, per_cpu__" #var"(%1)\t!literal"		\
+		: "=&r"(__ptr), "=&r"(tmp_gp));		\
+	(typeof(&per_cpu_var(var)))(__ptr + (offset)); })
+
+#define PER_CPU_ATTRIBUTES	__used
+
+#endif /* MODULE */
+
+/*
+ * A percpu variable may point to a discarded regions. The following are
+ * established ways to produce a usable pointer from the percpu variable
+ * offset.
+ */
+#define per_cpu(var, cpu) \
+	(*SHIFT_PERCPU_PTR(var, per_cpu_offset(cpu)))
+#define __get_cpu_var(var) \
+	(*SHIFT_PERCPU_PTR(var, my_cpu_offset))
+#define __raw_get_cpu_var(var) \
+	(*SHIFT_PERCPU_PTR(var, __my_cpu_offset))
+
+#else /* ! SMP */
+
+#define per_cpu(var, cpu)		(*((void)(cpu), &per_cpu_var(var)))
+#define __get_cpu_var(var)		per_cpu_var(var)
+#define __raw_get_cpu_var(var)		per_cpu_var(var)
+
+#define PER_CPU_ATTRIBUTES
+
+#endif /* SMP */
+
+#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu_var(name)
 
 #endif /* __ALPHA_PERCPU_H */
diff --git a/include/asm-alpha/system.h b/include/asm-alpha/system.h
index ed221d6408fc..afe20fa58c99 100644
--- a/include/asm-alpha/system.h
+++ b/include/asm-alpha/system.h
@@ -184,7 +184,7 @@ enum amask_enum {
    __amask; })
 
 #define __CALL_PAL_R0(NAME, TYPE)				\
-static inline TYPE NAME(void)					\
+extern inline TYPE NAME(void)					\
 {								\
 	register TYPE __r0 __asm__("$0");			\
 	__asm__ __volatile__(					\
@@ -196,7 +196,7 @@ static inline TYPE NAME(void)					\
 }
 
 #define __CALL_PAL_W1(NAME, TYPE0)				\
-static inline void NAME(TYPE0 arg0)				\
+extern inline void NAME(TYPE0 arg0)				\
 {								\
 	register TYPE0 __r16 __asm__("$16") = arg0;		\
 	__asm__ __volatile__(					\
@@ -207,7 +207,7 @@ static inline void NAME(TYPE0 arg0)				\
 }
 
 #define __CALL_PAL_W2(NAME, TYPE0, TYPE1)			\
-static inline void NAME(TYPE0 arg0, TYPE1 arg1)			\
+extern inline void NAME(TYPE0 arg0, TYPE1 arg1)			\
 {								\
 	register TYPE0 __r16 __asm__("$16") = arg0;		\
 	register TYPE1 __r17 __asm__("$17") = arg1;		\
@@ -219,7 +219,7 @@ static inline void NAME(TYPE0 arg0, TYPE1 arg1)			\
 }
 
 #define __CALL_PAL_RW1(NAME, RTYPE, TYPE0)			\
-static inline RTYPE NAME(TYPE0 arg0)				\
+extern inline RTYPE NAME(TYPE0 arg0)				\
 {								\
 	register RTYPE __r0 __asm__("$0");			\
 	register TYPE0 __r16 __asm__("$16") = arg0;		\
@@ -232,7 +232,7 @@ static inline RTYPE NAME(TYPE0 arg0)				\
 }
 
 #define __CALL_PAL_RW2(NAME, RTYPE, TYPE0, TYPE1)		\
-static inline RTYPE NAME(TYPE0 arg0, TYPE1 arg1)		\
+extern inline RTYPE NAME(TYPE0 arg0, TYPE1 arg1)		\
 {								\
 	register RTYPE __r0 __asm__("$0");			\
 	register TYPE0 __r16 __asm__("$16") = arg0;		\
diff --git a/include/asm-alpha/vga.h b/include/asm-alpha/vga.h
index e8df1e7aae6b..c00106bac521 100644
--- a/include/asm-alpha/vga.h
+++ b/include/asm-alpha/vga.h
@@ -13,7 +13,7 @@
 #define VT_BUF_HAVE_MEMSETW
 #define VT_BUF_HAVE_MEMCPYW
 
-extern inline void scr_writew(u16 val, volatile u16 *addr)
+static inline void scr_writew(u16 val, volatile u16 *addr)
 {
 	if (__is_ioaddr(addr))
 		__raw_writew(val, (volatile u16 __iomem *) addr);
@@ -21,7 +21,7 @@ extern inline void scr_writew(u16 val, volatile u16 *addr)
 		*addr = val;
 }
 
-extern inline u16 scr_readw(volatile const u16 *addr)
+static inline u16 scr_readw(volatile const u16 *addr)
 {
 	if (__is_ioaddr(addr))
 		return __raw_readw((volatile const u16 __iomem *) addr);
@@ -29,7 +29,7 @@ extern inline u16 scr_readw(volatile const u16 *addr)
 		return *addr;
 }
 
-extern inline void scr_memsetw(u16 *s, u16 c, unsigned int count)
+static inline void scr_memsetw(u16 *s, u16 c, unsigned int count)
 {
 	if (__is_ioaddr(s))
 		memsetw_io((u16 __iomem *) s, c, count);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 1d8cd01fa514..844f2a89afbc 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -18,6 +18,7 @@
 #include <linux/kvm_para.h>
 #include <linux/kvm_types.h>
 
+#include <asm/pvclock-abi.h>
 #include <asm/desc.h>
 
 #define KVM_MAX_VCPUS 16
@@ -282,7 +283,8 @@ struct kvm_vcpu_arch {
 	struct x86_emulate_ctxt emulate_ctxt;
 
 	gpa_t time;
-	struct kvm_vcpu_time_info hv_clock;
+	struct pvclock_vcpu_time_info hv_clock;
+	unsigned int hv_clock_tsc_khz;
 	unsigned int time_offset;
 	struct page *time_page;
 };
diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
index 509845942070..bfd9900742bf 100644
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h
@@ -48,24 +48,6 @@ struct kvm_mmu_op_release_pt {
 #ifdef __KERNEL__
 #include <asm/processor.h>
 
-/* xen binary-compatible interface. See xen headers for details */
-struct kvm_vcpu_time_info {
-	uint32_t version;
-	uint32_t pad0;
-	uint64_t tsc_timestamp;
-	uint64_t system_time;
-	uint32_t tsc_to_system_mul;
-	int8_t   tsc_shift;
-	int8_t	 pad[3];
-} __attribute__((__packed__)); /* 32 bytes */
-
-struct kvm_wall_clock {
-	uint32_t wc_version;
-	uint32_t wc_sec;
-	uint32_t wc_nsec;
-} __attribute__((__packed__));
-
-
 extern void kvmclock_init(void);
 
 
diff --git a/include/asm-x86/pvclock-abi.h b/include/asm-x86/pvclock-abi.h
new file mode 100644
index 000000000000..6857f840b243
--- /dev/null
+++ b/include/asm-x86/pvclock-abi.h
@@ -0,0 +1,42 @@
+#ifndef _ASM_X86_PVCLOCK_ABI_H_
+#define _ASM_X86_PVCLOCK_ABI_H_
+#ifndef __ASSEMBLY__
+
+/*
+ * These structs MUST NOT be changed.
+ * They are the ABI between hypervisor and guest OS.
+ * Both Xen and KVM are using this.
+ *
+ * pvclock_vcpu_time_info holds the system time and the tsc timestamp
+ * of the last update. So the guest can use the tsc delta to get a
+ * more precise system time.  There is one per virtual cpu.
+ *
+ * pvclock_wall_clock references the point in time when the system
+ * time was zero (usually boot time), thus the guest calculates the
+ * current wall clock by adding the system time.
+ *
+ * Protocol for the "version" fields is: hypervisor raises it (making
+ * it uneven) before it starts updating the fields and raises it again
+ * (making it even) when it is done.  Thus the guest can make sure the
+ * time values it got are consistent by checking the version before
+ * and after reading them.
+ */
+
+struct pvclock_vcpu_time_info {
+	u32   version;
+	u32   pad0;
+	u64   tsc_timestamp;
+	u64   system_time;
+	u32   tsc_to_system_mul;
+	s8    tsc_shift;
+	u8    pad[3];
+} __attribute__((__packed__)); /* 32 bytes */
+
+struct pvclock_wall_clock {
+	u32   version;
+	u32   sec;
+	u32   nsec;
+} __attribute__((__packed__));
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_X86_PVCLOCK_ABI_H_ */
diff --git a/include/asm-x86/pvclock.h b/include/asm-x86/pvclock.h
new file mode 100644
index 000000000000..85b1bba8e0a3
--- /dev/null
+++ b/include/asm-x86/pvclock.h
@@ -0,0 +1,13 @@
+#ifndef _ASM_X86_PVCLOCK_H_
+#define _ASM_X86_PVCLOCK_H_
+
+#include <linux/clocksource.h>
+#include <asm/pvclock-abi.h>
+
+/* some helper functions for xen and kvm pv clock sources */
+cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
+void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
+			    struct pvclock_vcpu_time_info *vcpu,
+			    struct timespec *ts);
+
+#endif /* _ASM_X86_PVCLOCK_H_ */
diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h
index baf3a4dce28c..e11f24038b1d 100644
--- a/include/asm-x86/xen/page.h
+++ b/include/asm-x86/xen/page.h
@@ -150,13 +150,9 @@ static inline pte_t __pte_ma(pteval_t x)
 	return (pte_t) { .pte = x };
 }
 
-#ifdef CONFIG_X86_PAE
 #define pmd_val_ma(v) ((v).pmd)
 #define pud_val_ma(v) ((v).pgd.pgd)
 #define __pmd_ma(x)	((pmd_t) { (x) } )
-#else  /* !X86_PAE */
-#define pmd_val_ma(v)	((v).pud.pgd.pgd)
-#endif	/* CONFIG_X86_PAE */
 
 #define pgd_val_ma(x)	((x).pgd)
 
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 6a5dbdc8a7dc..686895bacd9d 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -94,7 +94,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat,
 				       unsigned long freepfn,
 				       unsigned long startpfn,
 				       unsigned long endpfn);
-extern void reserve_bootmem_node(pg_data_t *pgdat,
+extern int reserve_bootmem_node(pg_data_t *pgdat,
 				 unsigned long physaddr,
 				 unsigned long size,
 				 int flags);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 092b1b25291d..de9d1df4bba2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -33,6 +33,7 @@
 #define KVM_REQ_REPORT_TPR_ACCESS  2
 #define KVM_REQ_MMU_RELOAD         3
 #define KVM_REQ_TRIPLE_FAULT       4
+#define KVM_REQ_PENDING_TIMER      5
 
 struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 59f1c0bd8f9c..d2a003586761 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -27,8 +27,7 @@
  * 	This routine is called by the kernel to write a series of
  * 	characters to the tty device.  The characters may come from
  * 	user space or kernel space.  This routine will return the
- *	number of characters actually accepted for writing.  This
- *	routine is mandatory.
+ *	number of characters actually accepted for writing.
  *
  *	Optional: Required for writable devices.
  *
@@ -134,7 +133,7 @@
  * 	This routine notifies the tty driver that it should hangup the
  * 	tty device.
  *
- *	Required:
+ *	Optional:
  *
  * void (*break_ctl)(struct tty_stuct *tty, int state);
  *
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index e0a612bc9c4e..f422f7218e1c 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -367,6 +367,12 @@ static inline int ipv6_addr_any(const struct in6_addr *a)
 		 a->s6_addr32[2] | a->s6_addr32[3] ) == 0); 
 }
 
+static inline int ipv6_addr_loopback(const struct in6_addr *a)
+{
+	return ((a->s6_addr32[0] | a->s6_addr32[1] |
+		 a->s6_addr32[2] | (a->s6_addr32[3] ^ htonl(1))) == 0);
+}
+
 static inline int ipv6_addr_v4mapped(const struct in6_addr *a)
 {
 	return ((a->s6_addr32[0] | a->s6_addr32[1] |
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index aa540e6be502..d9dd0f707296 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -95,6 +95,11 @@ extern struct list_head net_namespace_list;
 #ifdef CONFIG_NET_NS
 extern void __put_net(struct net *net);
 
+static inline int net_alive(struct net *net)
+{
+	return net && atomic_read(&net->count);
+}
+
 static inline struct net *get_net(struct net *net)
 {
 	atomic_inc(&net->count);
@@ -125,6 +130,12 @@ int net_eq(const struct net *net1, const struct net *net2)
 	return net1 == net2;
 }
 #else
+
+static inline int net_alive(struct net *net)
+{
+	return 1;
+}
+
 static inline struct net *get_net(struct net *net)
 {
 	return net;
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 9b018da48cf3..819a0331cda9 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -10,6 +10,7 @@
 #define __XEN_PUBLIC_XEN_H__
 
 #include <asm/xen/interface.h>
+#include <asm/pvclock-abi.h>
 
 /*
  * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS).
@@ -336,7 +337,7 @@ struct vcpu_info {
 	uint8_t evtchn_upcall_mask;
 	unsigned long evtchn_pending_sel;
 	struct arch_vcpu_info arch;
-	struct vcpu_time_info time;
+	struct pvclock_vcpu_time_info time;
 }; /* 64 bytes (x86) */
 
 /*
@@ -384,9 +385,7 @@ struct shared_info {
 	 * Wallclock time: updated only by control software. Guests should base
 	 * their gettimeofday() syscall on this wallclock-base value.
 	 */
-	uint32_t wc_version;      /* Version counter: see vcpu_time_info_t. */
-	uint32_t wc_sec;          /* Secs  00:00:00 UTC, Jan 1, 1970.  */
-	uint32_t wc_nsec;         /* Nsecs 00:00:00 UTC, Jan 1, 1970.  */
+	struct pvclock_wall_clock wc;
 
 	struct arch_shared_info arch;
 
diff --git a/kernel/futex.c b/kernel/futex.c
index 449def8074fe..7d1136e97c14 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1096,21 +1096,64 @@ static void unqueue_me_pi(struct futex_q *q)
  * private futexes.
  */
 static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
-				struct task_struct *newowner)
+				struct task_struct *newowner,
+				struct rw_semaphore *fshared)
 {
 	u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
 	struct futex_pi_state *pi_state = q->pi_state;
+	struct task_struct *oldowner = pi_state->owner;
 	u32 uval, curval, newval;
-	int ret;
+	int ret, attempt = 0;
 
 	/* Owner died? */
+	if (!pi_state->owner)
+		newtid |= FUTEX_OWNER_DIED;
+
+	/*
+	 * We are here either because we stole the rtmutex from the
+	 * pending owner or we are the pending owner which failed to
+	 * get the rtmutex. We have to replace the pending owner TID
+	 * in the user space variable. This must be atomic as we have
+	 * to preserve the owner died bit here.
+	 *
+	 * Note: We write the user space value _before_ changing the
+	 * pi_state because we can fault here. Imagine swapped out
+	 * pages or a fork, which was running right before we acquired
+	 * mmap_sem, that marked all the anonymous memory readonly for
+	 * cow.
+	 *
+	 * Modifying pi_state _before_ the user space value would
+	 * leave the pi_state in an inconsistent state when we fault
+	 * here, because we need to drop the hash bucket lock to
+	 * handle the fault. This might be observed in the PID check
+	 * in lookup_pi_state.
+	 */
+retry:
+	if (get_futex_value_locked(&uval, uaddr))
+		goto handle_fault;
+
+	while (1) {
+		newval = (uval & FUTEX_OWNER_DIED) | newtid;
+
+		curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
+
+		if (curval == -EFAULT)
+			goto handle_fault;
+		if (curval == uval)
+			break;
+		uval = curval;
+	}
+
+	/*
+	 * We fixed up user space. Now we need to fix the pi_state
+	 * itself.
+	 */
 	if (pi_state->owner != NULL) {
 		spin_lock_irq(&pi_state->owner->pi_lock);
 		WARN_ON(list_empty(&pi_state->list));
 		list_del_init(&pi_state->list);
 		spin_unlock_irq(&pi_state->owner->pi_lock);
-	} else
-		newtid |= FUTEX_OWNER_DIED;
+	}
 
 	pi_state->owner = newowner;
 
@@ -1118,26 +1161,35 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
 	WARN_ON(!list_empty(&pi_state->list));
 	list_add(&pi_state->list, &newowner->pi_state_list);
 	spin_unlock_irq(&newowner->pi_lock);
+	return 0;
 
 	/*
-	 * We own it, so we have to replace the pending owner
-	 * TID. This must be atomic as we have preserve the
-	 * owner died bit here.
+	 * To handle the page fault we need to drop the hash bucket
+	 * lock here. That gives the other task (either the pending
+	 * owner itself or the task which stole the rtmutex) the
+	 * chance to try the fixup of the pi_state. So once we are
+	 * back from handling the fault we need to check the pi_state
+	 * after reacquiring the hash bucket lock and before trying to
+	 * do another fixup. When the fixup has been done already we
+	 * simply return.
 	 */
-	ret = get_futex_value_locked(&uval, uaddr);
+handle_fault:
+	spin_unlock(q->lock_ptr);
 
-	while (!ret) {
-		newval = (uval & FUTEX_OWNER_DIED) | newtid;
+	ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt++);
 
-		curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
+	spin_lock(q->lock_ptr);
 
-		if (curval == -EFAULT)
-			ret = -EFAULT;
-		if (curval == uval)
-			break;
-		uval = curval;
-	}
-	return ret;
+	/*
+	 * Check if someone else fixed it for us:
+	 */
+	if (pi_state->owner != oldowner)
+		return 0;
+
+	if (ret)
+		return ret;
+
+	goto retry;
 }
 
 /*
@@ -1507,7 +1559,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
 		 * that case:
 		 */
 		if (q.pi_state->owner != curr)
-			ret = fixup_pi_state_owner(uaddr, &q, curr);
+			ret = fixup_pi_state_owner(uaddr, &q, curr, fshared);
 	} else {
 		/*
 		 * Catch the rare case, where the lock was released
@@ -1539,7 +1591,8 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
 				int res;
 
 				owner = rt_mutex_owner(&q.pi_state->pi_mutex);
-				res = fixup_pi_state_owner(uaddr, &q, owner);
+				res = fixup_pi_state_owner(uaddr, &q, owner,
+							   fshared);
 
 				/* propagate -EFAULT, if the fixup failed */
 				if (res)
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 79e3c90113c2..3ec23c3ec97f 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -1499,7 +1499,8 @@ int kgdb_nmicallback(int cpu, void *regs)
 	return 1;
 }
 
-void kgdb_console_write(struct console *co, const char *s, unsigned count)
+static void kgdb_console_write(struct console *co, const char *s,
+   unsigned count)
 {
 	unsigned long flags;
 
diff --git a/kernel/sched.c b/kernel/sched.c
index b048ad8a11af..3aaa5c8cb421 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4398,22 +4398,20 @@ do_wait_for_common(struct completion *x, long timeout, int state)
 			     signal_pending(current)) ||
 			    (state == TASK_KILLABLE &&
 			     fatal_signal_pending(current))) {
-				__remove_wait_queue(&x->wait, &wait);
-				return -ERESTARTSYS;
+				timeout = -ERESTARTSYS;
+				break;
 			}
 			__set_current_state(state);
 			spin_unlock_irq(&x->wait.lock);
 			timeout = schedule_timeout(timeout);
 			spin_lock_irq(&x->wait.lock);
-			if (!timeout) {
-				__remove_wait_queue(&x->wait, &wait);
-				return timeout;
-			}
-		} while (!x->done);
+		} while (!x->done && timeout);
 		__remove_wait_queue(&x->wait, &wait);
+		if (!x->done)
+			return timeout;
 	}
 	x->done--;
-	return timeout;
+	return timeout ?: 1;
 }
 
 static long __sched
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 1dad5bbb59b6..0f3c19197fa4 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -250,7 +250,8 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 			if (rt_rq->rt_time || rt_rq->rt_nr_running)
 				idle = 0;
 			spin_unlock(&rt_rq->rt_runtime_lock);
-		}
+		} else if (rt_rq->rt_nr_running)
+			idle = 0;
 
 		if (enqueue)
 			sched_rt_rq_enqueue(rt_rq);
diff --git a/mm/bootmem.c b/mm/bootmem.c
index e8fb927392b9..8d9f60e06f62 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -442,15 +442,17 @@ unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
 	return init_bootmem_core(pgdat, freepfn, startpfn, endpfn);
 }
 
-void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
+int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
 				 unsigned long size, int flags)
 {
 	int ret;
 
 	ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
 	if (ret < 0)
-		return;
+		return -ENOMEM;
 	reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
+
+	return 0;
 }
 
 void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
diff --git a/mm/memory.c b/mm/memory.c
index 9aefaae46858..d14b251a25a6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1045,6 +1045,26 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
 	return page;
 }
 
+/* Can we do the FOLL_ANON optimization? */
+static inline int use_zero_page(struct vm_area_struct *vma)
+{
+	/*
+	 * We don't want to optimize FOLL_ANON for make_pages_present()
+	 * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
+	 * we want to get the page from the page tables to make sure
+	 * that we serialize and update with any other user of that
+	 * mapping.
+	 */
+	if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
+		return 0;
+	/*
+	 * And if we have a fault or a nopfn routine, it's not an
+	 * anonymous region.
+	 */
+	return !vma->vm_ops ||
+		(!vma->vm_ops->fault && !vma->vm_ops->nopfn);
+}
+
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		unsigned long start, int len, int write, int force,
 		struct page **pages, struct vm_area_struct **vmas)
@@ -1119,8 +1139,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		foll_flags = FOLL_TOUCH;
 		if (pages)
 			foll_flags |= FOLL_GET;
-		if (!write && !(vma->vm_flags & VM_LOCKED) &&
-		    (!vma->vm_ops || !vma->vm_ops->fault))
+		if (!write && use_zero_page(vma))
 			foll_flags |= FOLL_ANON;
 
 		do {
@@ -1766,7 +1785,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
 	if (likely(pte_same(*page_table, orig_pte))) {
 		if (old_page) {
-			page_remove_rmap(old_page, vma);
 			if (!PageAnon(old_page)) {
 				dec_mm_counter(mm, file_rss);
 				inc_mm_counter(mm, anon_rss);
@@ -1788,6 +1806,32 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		lru_cache_add_active(new_page);
 		page_add_new_anon_rmap(new_page, vma, address);
 
+		if (old_page) {
+			/*
+			 * Only after switching the pte to the new page may
+			 * we remove the mapcount here. Otherwise another
+			 * process may come and find the rmap count decremented
+			 * before the pte is switched to the new page, and
+			 * "reuse" the old page writing into it while our pte
+			 * here still points into it and can be read by other
+			 * threads.
+			 *
+			 * The critical issue is to order this
+			 * page_remove_rmap with the ptp_clear_flush above.
+			 * Those stores are ordered by (if nothing else,)
+			 * the barrier present in the atomic_add_negative
+			 * in page_remove_rmap.
+			 *
+			 * Then the TLB flush in ptep_clear_flush ensures that
+			 * no process can access the old page before the
+			 * decremented mapcount is visible. And the old page
+			 * cannot be reused until after the decremented
+			 * mapcount is visible. So transitively, TLBs to
+			 * old page will be flushed before it can be reused.
+			 */
+			page_remove_rmap(old_page, vma);
+		}
+
 		/* Free the old page.. */
 		new_page = old_page;
 		ret |= VM_FAULT_WRITE;
diff --git a/mm/slab.c b/mm/slab.c
index 06236e4ddc1b..046607f05f3e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3263,9 +3263,12 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
 
 		if (cpuset_zone_allowed_hardwall(zone, flags) &&
 			cache->nodelists[nid] &&
-			cache->nodelists[nid]->free_objects)
+			cache->nodelists[nid]->free_objects) {
 				obj = ____cache_alloc_node(cache,
 					flags | GFP_THISNODE, nid);
+				if (obj)
+					break;
+		}
 	}
 
 	if (!obj) {
diff --git a/net/core/dev.c b/net/core/dev.c
index 68d8df0992ab..c421a1f8f0b9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2077,6 +2077,10 @@ int netif_receive_skb(struct sk_buff *skb)
 
 	rcu_read_lock();
 
+	/* Don't receive packets in an exiting network namespace */
+	if (!net_alive(dev_net(skb->dev)))
+		goto out;
+
 #ifdef CONFIG_NET_CLS_ACT
 	if (skb->tc_verd & TC_NCLS) {
 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 72b4c184dd84..7c52fe277b62 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -140,6 +140,9 @@ static void cleanup_net(struct work_struct *work)
 	struct pernet_operations *ops;
 	struct net *net;
 
+	/* Be very certain incoming network packets will not find us */
+	rcu_barrier();
+
 	net = container_of(work, struct net, work);
 
 	mutex_lock(&net_mutex);
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 4e5c8615832c..17eb48b8e329 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -102,6 +102,15 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	if (hdr->version != 6)
 		goto err;
 
+	/*
+	 * RFC4291 2.5.3
+	 * A packet received on an interface with a destination address
+	 * of loopback must be dropped.
+	 */
+	if (!(dev->flags & IFF_LOOPBACK) &&
+	    ipv6_addr_loopback(&hdr->daddr))
+		goto err;
+
 	skb->transport_header = skb->network_header + sizeof(*hdr);
 	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
 
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index c042ce19bd14..86e28a75267f 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -345,18 +345,21 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 	case IPV6_DSTOPTS:
 	{
 		struct ipv6_txoptions *opt;
+
+		/* remove any sticky options header with a zero option
+		 * length, per RFC3542.
+		 */
 		if (optlen == 0)
 			optval = NULL;
+		else if (optlen < sizeof(struct ipv6_opt_hdr) ||
+			 optlen & 0x7 || optlen > 8 * 255)
+			goto e_inval;
 
 		/* hop-by-hop / destination options are privileged option */
 		retv = -EPERM;
 		if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW))
 			break;
 
-		if (optlen < sizeof(struct ipv6_opt_hdr) ||
-		    optlen & 0x7 || optlen > 8 * 255)
-			goto e_inval;
-
 		opt = ipv6_renew_options(sk, np->opt, optname,
 					 (struct ipv6_opt_hdr __user *)optval,
 					 optlen);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 28d8bd53bd3a..c80d5899f279 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1132,7 +1132,7 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb,
 	ieee80211_tx_handler *handler;
 	struct ieee80211_tx_data tx;
 	ieee80211_tx_result res = TX_DROP, res_prepare;
-	int ret, i;
+	int ret, i, retries = 0;
 
 	WARN_ON(__ieee80211_queue_pending(local, control->queue));
 
@@ -1216,6 +1216,13 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb,
 		if (!__ieee80211_queue_stopped(local, control->queue)) {
 			clear_bit(IEEE80211_LINK_STATE_PENDING,
 				  &local->state[control->queue]);
+			retries++;
+			/*
+			 * Driver bug, it's rejecting packets but
+			 * not stopping queues.
+			 */
+			if (WARN_ON_ONCE(retries > 5))
+				goto drop;
 			goto retry;
 		}
 		memcpy(&store->control, control,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e7e3baf7009e..0dbcde6758ea 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4401,7 +4401,9 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
 	if (copy_from_user(&getaddrs, optval, len))
 		return -EFAULT;
 
-	if (getaddrs.addr_num <= 0) return -EINVAL;
+	if (getaddrs.addr_num <= 0 ||
+	    getaddrs.addr_num >= (INT_MAX / sizeof(union sctp_addr)))
+		return -EINVAL;
 	/*
 	 *  For UDP-style sockets, id specifies the association to query.
 	 *  If the id field is set to the value '0' then the locally bound
diff --git a/sound/isa/sb/sb_mixer.c b/sound/isa/sb/sb_mixer.c
index 91d14224f6b3..73d4572d136b 100644
--- a/sound/isa/sb/sb_mixer.c
+++ b/sound/isa/sb/sb_mixer.c
@@ -925,7 +925,7 @@ static unsigned char als4000_saved_regs[] = {
 static void save_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs)
 {
 	unsigned char *val = chip->saved_regs;
-	snd_assert(num_regs > ARRAY_SIZE(chip->saved_regs), return);
+	snd_assert(num_regs <= ARRAY_SIZE(chip->saved_regs), return);
 	for (; num_regs; num_regs--)
 		*val++ = snd_sbmixer_read(chip, *regs++);
 }
@@ -933,7 +933,7 @@ static void save_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs)
 static void restore_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs)
 {
 	unsigned char *val = chip->saved_regs;
-	snd_assert(num_regs > ARRAY_SIZE(chip->saved_regs), return);
+	snd_assert(num_regs <= ARRAY_SIZE(chip->saved_regs), return);
 	for (; num_regs; num_regs--)
 		snd_sbmixer_write(chip, *regs++, *val++);
 }
diff --git a/sound/pci/aw2/aw2-alsa.c b/sound/pci/aw2/aw2-alsa.c
index 56f87cd33c19..3f00ddf450f8 100644
--- a/sound/pci/aw2/aw2-alsa.c
+++ b/sound/pci/aw2/aw2-alsa.c
@@ -316,6 +316,8 @@ static int __devinit snd_aw2_create(struct snd_card *card,
 		return -ENOMEM;
 	}
 
+	/* (2) initialization of the chip hardware */
+	snd_aw2_saa7146_setup(&chip->saa7146, chip->iobase_virt);
 
 	if (request_irq(pci->irq, snd_aw2_saa7146_interrupt,
 			IRQF_SHARED, "Audiowerk2", chip)) {
@@ -329,8 +331,6 @@ static int __devinit snd_aw2_create(struct snd_card *card,
 	}
 	chip->irq = pci->irq;
 
-	/* (2) initialization of the chip hardware */
-	snd_aw2_saa7146_setup(&chip->saa7146, chip->iobase_virt);
 	err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops);
 	if (err < 0) {
 		free_irq(chip->irq, (void *)chip);
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 98778cb69c6e..1dcf9f3d1107 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -269,28 +269,9 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 	}
 }
 
-static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector)
+static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi)
 {
-	int i;
-
-	for (i = 0; i < IOAPIC_NUM_PINS; i++)
-		if (ioapic->redirtbl[i].fields.vector == vector)
-			return i;
-	return -1;
-}
-
-void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
-{
-	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
 	union ioapic_redir_entry *ent;
-	int gsi;
-
-	gsi = get_eoi_gsi(ioapic, vector);
-	if (gsi == -1) {
-		printk(KERN_WARNING "Can't find redir item for %d EOI\n",
-		       vector);
-		return;
-	}
 
 	ent = &ioapic->redirtbl[gsi];
 	ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
@@ -300,6 +281,16 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
 		ioapic_deliver(ioapic, gsi);
 }
 
+void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
+{
+	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+	int i;
+
+	for (i = 0; i < IOAPIC_NUM_PINS; i++)
+		if (ioapic->redirtbl[i].fields.vector == vector)
+			__kvm_ioapic_update_eoi(ioapic, i);
+}
+
 static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr)
 {
 	struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;