From 6536e3123e5d3371a6f52e32a3d0694bcc987702 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 20 Jan 2012 14:33:53 -0800
Subject: [PATCH 01/16] mm: fix warnings regarding enum migrate_mode

sparc64 allmodconfig:

In file included from include/linux/compat.h:15,
                 from /usr/src/25/arch/sparc/include/asm/siginfo.h:19,
                 from include/linux/signal.h:5,
                 from include/linux/sched.h:73,
                 from arch/sparc/kernel/asm-offsets.c:13:
include/linux/fs.h:618: warning: parameter has incomplete type

It seems that my sparc64 compiler (gcc-3.4.5) doesn't like the forward
declaration of enums.

Fix this by moving the "enum migrate_mode" definition into its own header
file.

Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Dave Jones <davej@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Andy Isaacson <adi@hexapodia.org>
Cc: Nai Xia <nai.xia@gmail.com>
Cc: Johannes Weiner <jweiner@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h           |  2 +-
 include/linux/migrate.h      | 14 +-------------
 include/linux/migrate_mode.h | 16 ++++++++++++++++
 3 files changed, 18 insertions(+), 14 deletions(-)
 create mode 100644 include/linux/migrate_mode.h

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0244082d42c5..4b3a41fe22bf 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -10,6 +10,7 @@
 #include <linux/ioctl.h>
 #include <linux/blk_types.h>
 #include <linux/types.h>
+#include <linux/migrate_mode.h>
 
 /*
  * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -526,7 +527,6 @@ enum positive_aop_returns {
 struct page;
 struct address_space;
 struct writeback_control;
-enum migrate_mode;
 
 struct iov_iter {
 	const struct iovec *iov;
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index eaf867412f7a..05ed2828a553 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -3,22 +3,10 @@
 
 #include <linux/mm.h>
 #include <linux/mempolicy.h>
+#include <linux/migrate_mode.h>
 
 typedef struct page *new_page_t(struct page *, unsigned long private, int **);
 
-/*
- * MIGRATE_ASYNC means never block
- * MIGRATE_SYNC_LIGHT in the current implementation means to allow blocking
- *	on most operations but not ->writepage as the potential stall time
- *	is too significant
- * MIGRATE_SYNC will block when migrating pages
- */
-enum migrate_mode {
-	MIGRATE_ASYNC,
-	MIGRATE_SYNC_LIGHT,
-	MIGRATE_SYNC,
-};
-
 #ifdef CONFIG_MIGRATION
 #define PAGE_MIGRATION 1
 
diff --git a/include/linux/migrate_mode.h b/include/linux/migrate_mode.h
new file mode 100644
index 000000000000..ebf3d89a3919
--- /dev/null
+++ b/include/linux/migrate_mode.h
@@ -0,0 +1,16 @@
+#ifndef MIGRATE_MODE_H_INCLUDED
+#define MIGRATE_MODE_H_INCLUDED
+/*
+ * MIGRATE_ASYNC means never block
+ * MIGRATE_SYNC_LIGHT in the current implementation means to allow blocking
+ *	on most operations but not ->writepage as the potential stall time
+ *	is too significant
+ * MIGRATE_SYNC will block when migrating pages
+ */
+enum migrate_mode {
+	MIGRATE_ASYNC,
+	MIGRATE_SYNC_LIGHT,
+	MIGRATE_SYNC,
+};
+
+#endif		/* MIGRATE_MODE_H_INCLUDED */

From 687875fb7de4a95223af20ee024282fa9099f860 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.cz>
Date: Fri, 20 Jan 2012 14:33:55 -0800
Subject: [PATCH 02/16] mm: fix NULL ptr dereference in __count_immobile_pages

Fix the following NULL ptr dereference caused by

  cat /sys/devices/system/memory/memory0/removable

Pid: 13979, comm: sed Not tainted 3.0.13-0.5-default #1 IBM BladeCenter LS21 -[7971PAM]-/Server Blade
RIP: __count_immobile_pages+0x4/0x100
Process sed (pid: 13979, threadinfo ffff880221c36000, task ffff88022e788480)
Call Trace:
  is_pageblock_removable_nolock+0x34/0x40
  is_mem_section_removable+0x74/0xf0
  show_mem_removable+0x41/0x70
  sysfs_read_file+0xfe/0x1c0
  vfs_read+0xc7/0x130
  sys_read+0x53/0xa0
  system_call_fastpath+0x16/0x1b

We are crashing because we are trying to dereference NULL zone which
came from pfn=0 (struct page ffffea0000000000). According to the boot
log this page is marked reserved:
e820 update range: 0000000000000000 - 0000000000010000 (usable) ==> (reserved)

and early_node_map confirms that:
early_node_map[3] active PFN ranges
    1: 0x00000010 -> 0x0000009c
    1: 0x00000100 -> 0x000bffa3
    1: 0x00100000 -> 0x00240000

The problem is that memory_present works in PAGE_SECTION_MASK aligned
blocks so the reserved range sneaks into the the section as well.  This
also means that free_area_init_node will not take care of those reserved
pages and they stay uninitialized.

When we try to read the removable status we walk through all available
sections and hope that the zone is valid for all pages in the section.
But this is not true in this case as the zone and nid are not initialized.

We have only one node in this particular case and it is marked as node=1
(rather than 0) and that made the problem visible because page_to_nid will
return 0 and there are no zones on the node.

Let's check that the zone is valid and that the given pfn falls into its
boundaries and mark the section not removable.  This might cause some
false positives, probably, but we do not have any sane way to find out
whether the page is reserved by the platform or it is just not used for
whatever other reasons.

Signed-off-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0027d8f4a1bb..21c272d517bc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5414,6 +5414,17 @@ __count_immobile_pages(struct zone *zone, struct page *page, int count)
 bool is_pageblock_removable_nolock(struct page *page)
 {
 	struct zone *zone = page_zone(page);
+	unsigned long pfn = page_to_pfn(page);
+
+	/*
+	 * We have to be careful here because we are iterating over memory
+	 * sections which are not zone aware so we might end up outside of
+	 * the zone but still within the section.
+	 */
+	if (!zone || zone->zone_start_pfn > pfn ||
+			zone->zone_start_pfn + zone->spanned_pages <= pfn)
+		return false;
+
 	return __count_immobile_pages(zone, page, 0);
 }
 

From 656a070629adfe23c12768e35ddf91feab469ff7 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.cz>
Date: Fri, 20 Jan 2012 14:33:58 -0800
Subject: [PATCH 03/16] mm: __count_immobile_pages(): make sure the node is
 online

page_zone() requires an online node otherwise we are accessing NULL
NODE_DATA.  This is not an issue at the moment because node_zones are
located at the structure beginning but this might change in the future
so better be careful about that.

Signed-off-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 21c272d517bc..d2186ecb36f7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5413,15 +5413,22 @@ __count_immobile_pages(struct zone *zone, struct page *page, int count)
 
 bool is_pageblock_removable_nolock(struct page *page)
 {
-	struct zone *zone = page_zone(page);
-	unsigned long pfn = page_to_pfn(page);
+	struct zone *zone;
+	unsigned long pfn;
 
 	/*
 	 * We have to be careful here because we are iterating over memory
 	 * sections which are not zone aware so we might end up outside of
 	 * the zone but still within the section.
+	 * We have to take care about the node as well. If the node is offline
+	 * its NODE_DATA will be NULL - see page_zone.
 	 */
-	if (!zone || zone->zone_start_pfn > pfn ||
+	if (!node_online(page_to_nid(page)))
+		return false;
+
+	zone = page_zone(page);
+	pfn = page_to_pfn(page);
+	if (zone->zone_start_pfn > pfn ||
 			zone->zone_start_pfn + zone->spanned_pages <= pfn)
 		return false;
 

From e9a4593cc5e36c6d47c87b439cb41c2568e7395f Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 20 Jan 2012 14:33:59 -0800
Subject: [PATCH 04/16] leds: add led driver for Bachmann's ot200

Add support for leds on Bachmann's ot200 visualisation device.  The
device has three leds on the back panel (led_err, led_init and led_run)
and can handle up to seven leds on the front panel.

The driver was written by Linutronix on behalf of Bachmann electronic
GmbH.  It incorporates feedback from Lars-Peter Clausen

[akpm@linux-foundation.org: add dependency on HAS_IOMEM]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com>
Cc: Lars-Peter Clausen <lars@metafoo.de>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/leds/Kconfig      |   7 ++
 drivers/leds/Makefile     |   1 +
 drivers/leds/leds-ot200.c | 171 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 179 insertions(+)
 create mode 100644 drivers/leds/leds-ot200.c

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index c957c344233f..9ca28fced2b9 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -403,6 +403,13 @@ config LEDS_MAX8997
 	  This option enables support for on-chip LED drivers on
 	  MAXIM MAX8997 PMIC.
 
+config LEDS_OT200
+	tristate "LED support for the Bachmann OT200"
+	depends on LEDS_CLASS && HAS_IOMEM
+	help
+	  This option enables support for the LEDs on the Bachmann OT200.
+	  Say Y to enable LEDs on the Bachmann OT200.
+
 config LEDS_TRIGGERS
 	bool "LED Trigger support"
 	depends on LEDS_CLASS
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index b8a9723477f0..1fc6875a8b20 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_LEDS_LP5523)		+= leds-lp5523.o
 obj-$(CONFIG_LEDS_TCA6507)		+= leds-tca6507.o
 obj-$(CONFIG_LEDS_CLEVO_MAIL)		+= leds-clevo-mail.o
 obj-$(CONFIG_LEDS_HP6XX)		+= leds-hp6xx.o
+obj-$(CONFIG_LEDS_OT200)		+= leds-ot200.o
 obj-$(CONFIG_LEDS_FSG)			+= leds-fsg.o
 obj-$(CONFIG_LEDS_PCA955X)		+= leds-pca955x.o
 obj-$(CONFIG_LEDS_DA903X)		+= leds-da903x.o
diff --git a/drivers/leds/leds-ot200.c b/drivers/leds/leds-ot200.c
new file mode 100644
index 000000000000..c4646825a620
--- /dev/null
+++ b/drivers/leds/leds-ot200.c
@@ -0,0 +1,171 @@
+/*
+ * Bachmann ot200 leds driver.
+ *
+ * Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+ *         Christian Gmeiner <christian.gmeiner@gmail.com>
+ *
+ * License: GPL as published by the FSF.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/leds.h>
+#include <linux/io.h>
+#include <linux/module.h>
+
+
+struct ot200_led {
+	struct led_classdev cdev;
+	const char *name;
+	unsigned long port;
+	u8 mask;
+};
+
+/*
+ * The device has three leds on the back panel (led_err, led_init and led_run)
+ * and can handle up to seven leds on the front panel.
+ */
+
+static struct ot200_led leds[] = {
+	{
+		.name = "led_run",
+		.port = 0x5a,
+		.mask = BIT(0),
+	},
+	{
+		.name = "led_init",
+		.port = 0x5a,
+		.mask = BIT(1),
+	},
+	{
+		.name = "led_err",
+		.port = 0x5a,
+		.mask = BIT(2),
+	},
+	{
+		.name = "led_1",
+		.port = 0x49,
+		.mask = BIT(7),
+	},
+	{
+		.name = "led_2",
+		.port = 0x49,
+		.mask = BIT(6),
+	},
+	{
+		.name = "led_3",
+		.port = 0x49,
+		.mask = BIT(5),
+	},
+	{
+		.name = "led_4",
+		.port = 0x49,
+		.mask = BIT(4),
+	},
+	{
+		.name = "led_5",
+		.port = 0x49,
+		.mask = BIT(3),
+	},
+	{
+		.name = "led_6",
+		.port = 0x49,
+		.mask = BIT(2),
+	},
+	{
+		.name = "led_7",
+		.port = 0x49,
+		.mask = BIT(1),
+	}
+};
+
+static DEFINE_SPINLOCK(value_lock);
+
+/*
+ * we need to store the current led states, as it is not
+ * possible to read the current led state via inb().
+ */
+static u8 leds_back;
+static u8 leds_front;
+
+static void ot200_led_brightness_set(struct led_classdev *led_cdev,
+		enum led_brightness value)
+{
+	struct ot200_led *led = container_of(led_cdev, struct ot200_led, cdev);
+	u8 *val;
+	unsigned long flags;
+
+	spin_lock_irqsave(&value_lock, flags);
+
+	if (led->port == 0x49)
+		val = &leds_front;
+	else if (led->port == 0x5a)
+		val = &leds_back;
+	else
+		BUG();
+
+	if (value == LED_OFF)
+		*val &= ~led->mask;
+	else
+		*val |= led->mask;
+
+	outb(*val, led->port);
+	spin_unlock_irqrestore(&value_lock, flags);
+}
+
+static int __devinit ot200_led_probe(struct platform_device *pdev)
+{
+	int i;
+	int ret;
+
+	for (i = 0; i < ARRAY_SIZE(leds); i++) {
+
+		leds[i].cdev.name = leds[i].name;
+		leds[i].cdev.brightness_set = ot200_led_brightness_set;
+
+		ret = led_classdev_register(&pdev->dev, &leds[i].cdev);
+		if (ret < 0)
+			goto err;
+	}
+
+	leds_front = 0;		/* turn off all front leds */
+	leds_back = BIT(1);	/* turn on init led */
+	outb(leds_front, 0x49);
+	outb(leds_back, 0x5a);
+
+	return 0;
+
+err:
+	for (i = i - 1; i >= 0; i--)
+		led_classdev_unregister(&leds[i].cdev);
+
+	return ret;
+}
+
+static int __devexit ot200_led_remove(struct platform_device *pdev)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(leds); i++)
+		led_classdev_unregister(&leds[i].cdev);
+
+	return 0;
+}
+
+static struct platform_driver ot200_led_driver = {
+	.probe		= ot200_led_probe,
+	.remove		= __devexit_p(ot200_led_remove),
+	.driver		= {
+		.name	= "leds-ot200",
+		.owner	= THIS_MODULE,
+	},
+};
+
+module_platform_driver(ot200_led_driver);
+
+MODULE_AUTHOR("Sebastian A. Siewior <bigeasy@linutronix.de>");
+MODULE_DESCRIPTION("ot200 LED driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:leds-ot200");

From 2a4e64b8f6bcbf23ddd375b78342051ae8862284 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@gnu.org>
Date: Fri, 20 Jan 2012 14:34:01 -0800
Subject: [PATCH 05/16] ipc/mqueue: simplify reading msgqueue limit

Because the current task is being used to get the limit, we can simply
use rlimit() instead of task_rlimit().

Signed-off-by: Davidlohr Bueso <dave@gnu.org>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/mqueue.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 9b7c8ab7d75c..86ee272de210 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -128,7 +128,6 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
 
 	if (S_ISREG(mode)) {
 		struct mqueue_inode_info *info;
-		struct task_struct *p = current;
 		unsigned long mq_bytes, mq_msg_tblsz;
 
 		inode->i_fop = &mqueue_file_operations;
@@ -159,7 +158,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
 
 		spin_lock(&mq_lock);
 		if (u->mq_bytes + mq_bytes < u->mq_bytes ||
-		    u->mq_bytes + mq_bytes > task_rlimit(p, RLIMIT_MSGQUEUE)) {
+		    u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) {
 			spin_unlock(&mq_lock);
 			/* mqueue_evict_inode() releases info->messages */
 			ret = -EMFILE;

From d496aab567e7e52b3e974c9192a5de6e77dce32c Mon Sep 17 00:00:00 2001
From: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Date: Fri, 20 Jan 2012 14:34:04 -0800
Subject: [PATCH 06/16] kprobes: initialize before using a hlist

Commit ef53d9c5e ("kprobes: improve kretprobe scalability with hashed
locking") introduced a bug where we can potentially leak
kretprobe_instances since we initialize a hlist head after having used
it.

Initialize the hlist head before using it.

Reported by: Jim Keniston <jkenisto@us.ibm.com>
Acked-by: Jim Keniston <jkenisto@us.ibm.com>
Signed-off-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Srinivasa D S <srinivasa@in.ibm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kprobes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 95dd7212e610..29f5b65bee29 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1077,6 +1077,7 @@ void __kprobes kprobe_flush_task(struct task_struct *tk)
 		/* Early boot.  kretprobe_table_locks not yet initialized. */
 		return;
 
+	INIT_HLIST_HEAD(&empty_rp);
 	hash = hash_ptr(tk, KPROBE_HASH_BITS);
 	head = &kretprobe_inst_table[hash];
 	kretprobe_table_lock(hash, &flags);
@@ -1085,7 +1086,6 @@ void __kprobes kprobe_flush_task(struct task_struct *tk)
 			recycle_rp_inst(ri, &empty_rp);
 	}
 	kretprobe_table_unlock(hash, &flags);
-	INIT_HLIST_HEAD(&empty_rp);
 	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
 		hlist_del(&ri->hlist);
 		kfree(ri);

From 36c3e75907c8cb515fad260190ca1beb7e53df96 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Fri, 20 Jan 2012 14:34:05 -0800
Subject: [PATCH 07/16] drivers/video/backlight/adp88x0_bl.c: fix bit testing
 logic

We need to write new value if the bit mask fields of new value is not
equal to old value.  It does not make sense to write new value only when
all the bit_mask bits are zero.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Cc: Michael Hennerich <michael.hennerich@analog.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/backlight/adp8860_bl.c | 2 +-
 drivers/video/backlight/adp8870_bl.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/video/backlight/adp8860_bl.c b/drivers/video/backlight/adp8860_bl.c
index 66bc74d9ce2a..378276c9d3cf 100644
--- a/drivers/video/backlight/adp8860_bl.c
+++ b/drivers/video/backlight/adp8860_bl.c
@@ -146,7 +146,7 @@ static int adp8860_set_bits(struct i2c_client *client, int reg, uint8_t bit_mask
 
 	ret = adp8860_read(client, reg, &reg_val);
 
-	if (!ret && ((reg_val & bit_mask) == 0)) {
+	if (!ret && ((reg_val & bit_mask) != bit_mask)) {
 		reg_val |= bit_mask;
 		ret = adp8860_write(client, reg, reg_val);
 	}
diff --git a/drivers/video/backlight/adp8870_bl.c b/drivers/video/backlight/adp8870_bl.c
index 6c68a6899e87..6735059376d6 100644
--- a/drivers/video/backlight/adp8870_bl.c
+++ b/drivers/video/backlight/adp8870_bl.c
@@ -160,7 +160,7 @@ static int adp8870_set_bits(struct i2c_client *client, int reg, uint8_t bit_mask
 
 	ret = adp8870_read(client, reg, &reg_val);
 
-	if (!ret && ((reg_val & bit_mask) == 0)) {
+	if (!ret && ((reg_val & bit_mask) != bit_mask)) {
 		reg_val |= bit_mask;
 		ret = adp8870_write(client, reg, reg_val);
 	}

From d59d9ebaacba32b63f24d53b1463519b445b4683 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Fri, 20 Jan 2012 14:34:06 -0800
Subject: [PATCH 08/16] drivers/video/backlight/l4f00242t03.c: return proper
 error in l4f00242t03_probe if regulator_get() fails

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Alberto Panizzo <alberto@amarulasolutions.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/backlight/l4f00242t03.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/video/backlight/l4f00242t03.c b/drivers/video/backlight/l4f00242t03.c
index 4f5d1c4cb6ab..27d1d7a29c77 100644
--- a/drivers/video/backlight/l4f00242t03.c
+++ b/drivers/video/backlight/l4f00242t03.c
@@ -190,6 +190,7 @@ static int __devinit l4f00242t03_probe(struct spi_device *spi)
 
 	priv->io_reg = regulator_get(&spi->dev, "vdd");
 	if (IS_ERR(priv->io_reg)) {
+		ret = PTR_ERR(priv->io_reg);
 		dev_err(&spi->dev, "%s: Unable to get the IO regulator\n",
 		       __func__);
 		goto err3;
@@ -197,6 +198,7 @@ static int __devinit l4f00242t03_probe(struct spi_device *spi)
 
 	priv->core_reg = regulator_get(&spi->dev, "vcore");
 	if (IS_ERR(priv->core_reg)) {
+		ret = PTR_ERR(priv->core_reg);
 		dev_err(&spi->dev, "%s: Unable to get the core regulator\n",
 		       __func__);
 		goto err4;

From 85e72aa5384b1a614563ad63257ded0e91d1a620 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 20 Jan 2012 14:34:09 -0800
Subject: [PATCH 09/16] proc: clear_refs: do not clear reserved pages

/proc/pid/clear_refs is used to clear the Referenced and YOUNG bits for
pages and corresponding page table entries of the task with PID pid, which
includes any special mappings inserted into the page tables in order to
provide things like vDSOs and user helper functions.

On ARM this causes a problem because the vectors page is mapped as a
global mapping and since ec706dab ("ARM: add a vma entry for the user
accessible vector page"), a VMA is also inserted into each task for this
page to aid unwinding through signals and syscall restarts.  Since the
vectors page is required for handling faults, clearing the YOUNG bit (and
subsequently writing a faulting pte) means that we lose the vectors page
*globally* and cannot fault it back in.  This results in a system deadlock
on the next exception.

To see this problem in action, just run:

	$ echo 1 > /proc/self/clear_refs

on an ARM platform (as any user) and watch your system hang.  I think this
has been the case since 2.6.37

This patch avoids clearing the aforementioned bits for reserved pages,
therefore leaving the vectors page intact on ARM.  Since reserved pages
are not candidates for swap, this change should not have any impact on the
usefulness of clear_refs.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Reported-by: Moussa Ba <moussaba@micron.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Acked-by: Nicolas Pitre <nico@linaro.org>
Cc: Matt Mackall <mpm@selenic.com>
Cc: <stable@vger.kernel.org>		[2.6.37+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index e418c5abdb0e..7dcd2a250495 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -518,6 +518,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
 		if (!page)
 			continue;
 
+		if (PageReserved(page))
+			continue;
+
 		/* Clear accessed and referenced bits. */
 		ptep_test_and_clear_young(vma, addr, pte);
 		ClearPageReferenced(page);

From 6568d4a9c9ff16d6c4f0b14dfea567806ce579e4 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 20 Jan 2012 14:34:12 -0800
Subject: [PATCH 10/16] mm: memcg: update the correct soft limit tree during
 migration

end_migration() passes the old page instead of the new page to commit
the charge.  This page descriptor is not used for committing itself,
though, since we also pass the (correct) page_cgroup descriptor.  But
it's used to find the soft limit tree through the page's zone, so the
soft limit tree of the old page's zone is updated instead of that of the
new page's, which might get slightly out of date until the next charge
reaches the ratelimit point.

This glitch has been present since 5564e88 ("memcg: condense
page_cgroup-to-page lookup points").

This fixes a bug that I introduced in 2.6.38.  It's benign enough (to my
knowledge) that we probably don't want this for stable.

Reported-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Kirill A. Shutemov <kirill@shutemov.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3dbff4dcde35..4baddbae94cc 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3247,7 +3247,7 @@ int mem_cgroup_prepare_migration(struct page *page,
 		ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
 	else
 		ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
-	__mem_cgroup_commit_charge(memcg, page, 1, pc, ctype);
+	__mem_cgroup_commit_charge(memcg, newpage, 1, pc, ctype);
 	return ret;
 }
 

From 409eb8c2611b4310947a150af988111f7f52ab15 Mon Sep 17 00:00:00 2001
From: Hillf Danton <dhillf@gmail.com>
Date: Fri, 20 Jan 2012 14:34:13 -0800
Subject: [PATCH 11/16] mm/hugetlb.c: undo change to page mapcount in fault
 handler

Page mapcount should be updated only if we are sure that the page ends
up in the page table otherwise we would leak if we couldn't COW due to
reservations or if idx is out of bounds.

Signed-off-by: Hillf Danton <dhillf@gmail.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ea8c3a4cd2ae..5f34bd8dda34 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2508,6 +2508,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 {
 	struct hstate *h = hstate_vma(vma);
 	int ret = VM_FAULT_SIGBUS;
+	int anon_rmap = 0;
 	pgoff_t idx;
 	unsigned long size;
 	struct page *page;
@@ -2562,14 +2563,13 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			spin_lock(&inode->i_lock);
 			inode->i_blocks += blocks_per_huge_page(h);
 			spin_unlock(&inode->i_lock);
-			page_dup_rmap(page);
 		} else {
 			lock_page(page);
 			if (unlikely(anon_vma_prepare(vma))) {
 				ret = VM_FAULT_OOM;
 				goto backout_unlocked;
 			}
-			hugepage_add_new_anon_rmap(page, vma, address);
+			anon_rmap = 1;
 		}
 	} else {
 		/*
@@ -2582,7 +2582,6 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			      VM_FAULT_SET_HINDEX(h - hstates);
 			goto backout_unlocked;
 		}
-		page_dup_rmap(page);
 	}
 
 	/*
@@ -2606,6 +2605,10 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (!huge_pte_none(huge_ptep_get(ptep)))
 		goto backout;
 
+	if (anon_rmap)
+		hugepage_add_new_anon_rmap(page, vma, address);
+	else
+		page_dup_rmap(page);
 	new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
 				&& (vma->vm_flags & VM_SHARED)));
 	set_huge_pte_at(mm, address, ptep, new_pte);

From cb78edfdcef5259ac9e9088bd63810d21299928d Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Fri, 20 Jan 2012 14:34:16 -0800
Subject: [PATCH 12/16] kdump: define KEXEC_NOTE_BYTES arch specific for s390x

kdump only allocates memory for the prstatus ELF note.  For s390x,
besides of prstatus multiple ELF notes for various different register
types are stored.  Therefore the currently allocated memory is not
sufficient.  With this patch the KEXEC_NOTE_BYTES macro can be defined
by architecture code and for s390x it is set to the correct size now.

Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/include/asm/kexec.h | 18 ++++++++++++++++++
 include/linux/kexec.h         |  2 ++
 2 files changed, 20 insertions(+)

diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index cf4e47b0948c..3f30dac804ea 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -42,6 +42,24 @@
 /* The native architecture */
 #define KEXEC_ARCH KEXEC_ARCH_S390
 
+/*
+ * Size for s390x ELF notes per CPU
+ *
+ * Seven notes plus zero note at the end: prstatus, fpregset, timer,
+ * tod_cmp, tod_reg, control regs, and prefix
+ */
+#define KEXEC_NOTE_BYTES \
+	(ALIGN(sizeof(struct elf_note), 4) * 8 + \
+	 ALIGN(sizeof("CORE"), 4) * 7 + \
+	 ALIGN(sizeof(struct elf_prstatus), 4) + \
+	 ALIGN(sizeof(elf_fpregset_t), 4) + \
+	 ALIGN(sizeof(u64), 4) + \
+	 ALIGN(sizeof(u64), 4) + \
+	 ALIGN(sizeof(u32), 4) + \
+	 ALIGN(sizeof(u64) * 16, 4) + \
+	 ALIGN(sizeof(u32), 4) \
+	)
+
 /* Provide a dummy definition to avoid build failures. */
 static inline void crash_setup_regs(struct pt_regs *newregs,
 					struct pt_regs *oldregs) { }
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 2fa0901219d4..0d7d6a1b172f 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -50,9 +50,11 @@
  * note header.  For kdump, the code in vmcore.c runs in the context
  * of the second kernel to combine them into one note.
  */
+#ifndef KEXEC_NOTE_BYTES
 #define KEXEC_NOTE_BYTES ( (KEXEC_NOTE_HEAD_BYTES * 2) +		\
 			    KEXEC_CORE_NOTE_NAME_BYTES +		\
 			    KEXEC_CORE_NOTE_DESC_BYTES )
+#endif
 
 /*
  * This structure is used to hold the arguments that are used when loading

From 85046579bde15e532983438f86b36856e358f417 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 20 Jan 2012 14:34:19 -0800
Subject: [PATCH 13/16] SHM_UNLOCK: fix long unpreemptible section

scan_mapping_unevictable_pages() is used to make SysV SHM_LOCKed pages
evictable again once the shared memory is unlocked.  It does this with
pagevec_lookup()s across the whole object (which might occupy most of
memory), and takes 300ms to unlock 7GB here.  A cond_resched() every
PAGEVEC_SIZE pages would be good.

However, KOSAKI-san points out that this is called under shmem.c's
info->lock, and it's also under shm.c's shm_lock(), both spinlocks.
There is no strong reason for that: we need to take these pages off the
unevictable list soonish, but those locks are not required for it.

So move the call to scan_mapping_unevictable_pages() from shmem.c's
unlock handling up to shm.c's unlock handling.  Remove the recently
added barrier, not needed now we have spin_unlock() before the scan.

Use get_file(), with subsequent fput(), to make sure we have a reference
to mapping throughout scan_mapping_unevictable_pages(): that's something
that was previously guaranteed by the shm_lock().

Remove shmctl's lru_add_drain_all(): we don't fault in pages at SHM_LOCK
time, and we lazily discover them to be Unevictable later, so it serves
no purpose for SHM_LOCK; and serves no purpose for SHM_UNLOCK, since
pages still on pagevec are not marked Unevictable.

The original code avoided redundant rescans by checking VM_LOCKED flag
at its level: now avoid them by checking shp's SHM_LOCKED.

The original code called scan_mapping_unevictable_pages() on a locked
area at shm_destroy() time: perhaps we once had accounting cross-checks
which required that, but not now, so skip the overhead and just let
inode eviction deal with them.

Put check_move_unevictable_page() and scan_mapping_unevictable_pages()
under CONFIG_SHMEM (with stub for the TINY case when ramfs is used),
more as comment than to save space; comment them used for SHM_UNLOCK.

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michel Lespinasse <walken@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/shm.c   | 37 ++++++++++++++++++++++---------------
 mm/shmem.c  |  7 -------
 mm/vmscan.c | 12 +++++++++++-
 3 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/ipc/shm.c b/ipc/shm.c
index 02ecf2c078fc..854ab58e5f6e 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -870,9 +870,7 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
 	case SHM_LOCK:
 	case SHM_UNLOCK:
 	{
-		struct file *uninitialized_var(shm_file);
-
-		lru_add_drain_all();  /* drain pagevecs to lru lists */
+		struct file *shm_file;
 
 		shp = shm_lock_check(ns, shmid);
 		if (IS_ERR(shp)) {
@@ -895,22 +893,31 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
 		err = security_shm_shmctl(shp, cmd);
 		if (err)
 			goto out_unlock;
-		
-		if(cmd==SHM_LOCK) {
+
+		shm_file = shp->shm_file;
+		if (is_file_hugepages(shm_file))
+			goto out_unlock;
+
+		if (cmd == SHM_LOCK) {
 			struct user_struct *user = current_user();
-			if (!is_file_hugepages(shp->shm_file)) {
-				err = shmem_lock(shp->shm_file, 1, user);
-				if (!err && !(shp->shm_perm.mode & SHM_LOCKED)){
-					shp->shm_perm.mode |= SHM_LOCKED;
-					shp->mlock_user = user;
-				}
+			err = shmem_lock(shm_file, 1, user);
+			if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
+				shp->shm_perm.mode |= SHM_LOCKED;
+				shp->mlock_user = user;
 			}
-		} else if (!is_file_hugepages(shp->shm_file)) {
-			shmem_lock(shp->shm_file, 0, shp->mlock_user);
-			shp->shm_perm.mode &= ~SHM_LOCKED;
-			shp->mlock_user = NULL;
+			goto out_unlock;
 		}
+
+		/* SHM_UNLOCK */
+		if (!(shp->shm_perm.mode & SHM_LOCKED))
+			goto out_unlock;
+		shmem_lock(shm_file, 0, shp->mlock_user);
+		shp->shm_perm.mode &= ~SHM_LOCKED;
+		shp->mlock_user = NULL;
+		get_file(shm_file);
 		shm_unlock(shp);
+		scan_mapping_unevictable_pages(shm_file->f_mapping);
+		fput(shm_file);
 		goto out;
 	}
 	case IPC_RMID:
diff --git a/mm/shmem.c b/mm/shmem.c
index feead1943d92..4aaa53abe302 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1068,13 +1068,6 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
 		user_shm_unlock(inode->i_size, user);
 		info->flags &= ~VM_LOCKED;
 		mapping_clear_unevictable(file->f_mapping);
-		/*
-		 * Ensure that a racing putback_lru_page() can see
-		 * the pages of this mapping are evictable when we
-		 * skip them due to !PageLRU during the scan.
-		 */
-		smp_mb__after_clear_bit();
-		scan_mapping_unevictable_pages(file->f_mapping);
 	}
 	retval = 0;
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2880396f7953..e097c1026b58 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3499,6 +3499,7 @@ int page_evictable(struct page *page, struct vm_area_struct *vma)
 	return 1;
 }
 
+#ifdef CONFIG_SHMEM
 /**
  * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list
  * @page: page to check evictability and move to appropriate lru list
@@ -3509,6 +3510,8 @@ int page_evictable(struct page *page, struct vm_area_struct *vma)
  *
  * Restrictions: zone->lru_lock must be held, page must be on LRU and must
  * have PageUnevictable set.
+ *
+ * This function is only used for SysV IPC SHM_UNLOCK.
  */
 static void check_move_unevictable_page(struct page *page, struct zone *zone)
 {
@@ -3545,6 +3548,8 @@ static void check_move_unevictable_page(struct page *page, struct zone *zone)
  *
  * Scan all pages in mapping.  Check unevictable pages for
  * evictability and move them to the appropriate zone lru list.
+ *
+ * This function is only used for SysV IPC SHM_UNLOCK.
  */
 void scan_mapping_unevictable_pages(struct address_space *mapping)
 {
@@ -3590,9 +3595,14 @@ void scan_mapping_unevictable_pages(struct address_space *mapping)
 		pagevec_release(&pvec);
 
 		count_vm_events(UNEVICTABLE_PGSCANNED, pg_scanned);
+		cond_resched();
 	}
-
 }
+#else
+void scan_mapping_unevictable_pages(struct address_space *mapping)
+{
+}
+#endif /* CONFIG_SHMEM */
 
 static void warn_scan_unevictable_pages(void)
 {

From 245132643e1cfcd145bbc86a716c1818371fcb93 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 20 Jan 2012 14:34:21 -0800
Subject: [PATCH 14/16] SHM_UNLOCK: fix Unevictable pages stranded after swap

Commit cc39c6a9bbde ("mm: account skipped entries to avoid looping in
find_get_pages") correctly fixed an infinite loop; but left a problem
that find_get_pages() on shmem would return 0 (appearing to callers to
mean end of tree) when it meets a run of nr_pages swap entries.

The only uses of find_get_pages() on shmem are via pagevec_lookup(),
called from invalidate_mapping_pages(), and from shmctl SHM_UNLOCK's
scan_mapping_unevictable_pages().  The first is already commented, and
not worth worrying about; but the second can leave pages on the
Unevictable list after an unusual sequence of swapping and locking.

Fix that by using shmem_find_get_pages_and_swap() (then ignoring the
swap) instead of pagevec_lookup().

But I don't want to contaminate vmscan.c with shmem internals, nor
shmem.c with LRU locking.  So move scan_mapping_unevictable_pages() into
shmem.c, renaming it shmem_unlock_mapping(); and rename
check_move_unevictable_page() to check_move_unevictable_pages(), looping
down an array of pages, oftentimes under the same lock.

Leave out the "rotate unevictable list" block: that's a leftover from
when this was used for /proc/sys/vm/scan_unevictable_pages, whose flawed
handling involved looking at pages at tail of LRU.

Was there significance to the sequence first ClearPageUnevictable, then
test page_evictable, then SetPageUnevictable here? I think not, we're
under LRU lock, and have no barriers between those.

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michel Lespinasse <walken@google.com>
Cc: <stable@vger.kernel.org> [back to 3.1 but will need respins]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/shmem_fs.h |   1 +
 include/linux/swap.h     |   2 +-
 ipc/shm.c                |   2 +-
 mm/shmem.c               |  46 ++++++++++++--
 mm/vmscan.c              | 132 ++++++++++++---------------------------
 5 files changed, 85 insertions(+), 98 deletions(-)

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index e4c711c6f321..79ab2555b3b0 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -48,6 +48,7 @@ extern struct file *shmem_file_setup(const char *name,
 					loff_t size, unsigned long flags);
 extern int shmem_zero_setup(struct vm_area_struct *);
 extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
+extern void shmem_unlock_mapping(struct address_space *mapping);
 extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
 					pgoff_t index, gfp_t gfp_mask);
 extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 06061a7f8e69..3e60228e7299 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -273,7 +273,7 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
 #endif
 
 extern int page_evictable(struct page *page, struct vm_area_struct *vma);
-extern void scan_mapping_unevictable_pages(struct address_space *);
+extern void check_move_unevictable_pages(struct page **, int nr_pages);
 
 extern unsigned long scan_unevictable_pages;
 extern int scan_unevictable_handler(struct ctl_table *, int,
diff --git a/ipc/shm.c b/ipc/shm.c
index 854ab58e5f6e..b76be5bda6c2 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -916,7 +916,7 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
 		shp->mlock_user = NULL;
 		get_file(shm_file);
 		shm_unlock(shp);
-		scan_mapping_unevictable_pages(shm_file->f_mapping);
+		shmem_unlock_mapping(shm_file->f_mapping);
 		fput(shm_file);
 		goto out;
 	}
diff --git a/mm/shmem.c b/mm/shmem.c
index 4aaa53abe302..269d049294ab 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -379,7 +379,7 @@ static int shmem_free_swap(struct address_space *mapping,
 /*
  * Pagevec may contain swap entries, so shuffle up pages before releasing.
  */
-static void shmem_pagevec_release(struct pagevec *pvec)
+static void shmem_deswap_pagevec(struct pagevec *pvec)
 {
 	int i, j;
 
@@ -389,7 +389,36 @@ static void shmem_pagevec_release(struct pagevec *pvec)
 			pvec->pages[j++] = page;
 	}
 	pvec->nr = j;
-	pagevec_release(pvec);
+}
+
+/*
+ * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists.
+ */
+void shmem_unlock_mapping(struct address_space *mapping)
+{
+	struct pagevec pvec;
+	pgoff_t indices[PAGEVEC_SIZE];
+	pgoff_t index = 0;
+
+	pagevec_init(&pvec, 0);
+	/*
+	 * Minor point, but we might as well stop if someone else SHM_LOCKs it.
+	 */
+	while (!mapping_unevictable(mapping)) {
+		/*
+		 * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it
+		 * has finished, if it hits a row of PAGEVEC_SIZE swap entries.
+		 */
+		pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
+					PAGEVEC_SIZE, pvec.pages, indices);
+		if (!pvec.nr)
+			break;
+		index = indices[pvec.nr - 1] + 1;
+		shmem_deswap_pagevec(&pvec);
+		check_move_unevictable_pages(pvec.pages, pvec.nr);
+		pagevec_release(&pvec);
+		cond_resched();
+	}
 }
 
 /*
@@ -440,7 +469,8 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 			}
 			unlock_page(page);
 		}
-		shmem_pagevec_release(&pvec);
+		shmem_deswap_pagevec(&pvec);
+		pagevec_release(&pvec);
 		mem_cgroup_uncharge_end();
 		cond_resched();
 		index++;
@@ -470,7 +500,8 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 			continue;
 		}
 		if (index == start && indices[0] > end) {
-			shmem_pagevec_release(&pvec);
+			shmem_deswap_pagevec(&pvec);
+			pagevec_release(&pvec);
 			break;
 		}
 		mem_cgroup_uncharge_start();
@@ -494,7 +525,8 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 			}
 			unlock_page(page);
 		}
-		shmem_pagevec_release(&pvec);
+		shmem_deswap_pagevec(&pvec);
+		pagevec_release(&pvec);
 		mem_cgroup_uncharge_end();
 		index++;
 	}
@@ -2438,6 +2470,10 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
 	return 0;
 }
 
+void shmem_unlock_mapping(struct address_space *mapping)
+{
+}
+
 void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 {
 	truncate_inode_pages_range(inode->i_mapping, lstart, lend);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e097c1026b58..c52b23552659 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -26,7 +26,6 @@
 #include <linux/buffer_head.h>	/* for try_to_release_page(),
 					buffer_heads_over_limit */
 #include <linux/mm_inline.h>
-#include <linux/pagevec.h>
 #include <linux/backing-dev.h>
 #include <linux/rmap.h>
 #include <linux/topology.h>
@@ -661,7 +660,7 @@ void putback_lru_page(struct page *page)
 		 * When racing with an mlock or AS_UNEVICTABLE clearing
 		 * (page is unlocked) make sure that if the other thread
 		 * does not observe our setting of PG_lru and fails
-		 * isolation/check_move_unevictable_page,
+		 * isolation/check_move_unevictable_pages,
 		 * we see PG_mlocked/AS_UNEVICTABLE cleared below and move
 		 * the page back to the evictable list.
 		 *
@@ -3501,106 +3500,57 @@ int page_evictable(struct page *page, struct vm_area_struct *vma)
 
 #ifdef CONFIG_SHMEM
 /**
- * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list
- * @page: page to check evictability and move to appropriate lru list
- * @zone: zone page is in
+ * check_move_unevictable_pages - check pages for evictability and move to appropriate zone lru list
+ * @pages:	array of pages to check
+ * @nr_pages:	number of pages to check
  *
- * Checks a page for evictability and moves the page to the appropriate
- * zone lru list.
- *
- * Restrictions: zone->lru_lock must be held, page must be on LRU and must
- * have PageUnevictable set.
+ * Checks pages for evictability and moves them to the appropriate lru list.
  *
  * This function is only used for SysV IPC SHM_UNLOCK.
  */
-static void check_move_unevictable_page(struct page *page, struct zone *zone)
+void check_move_unevictable_pages(struct page **pages, int nr_pages)
 {
 	struct lruvec *lruvec;
+	struct zone *zone = NULL;
+	int pgscanned = 0;
+	int pgrescued = 0;
+	int i;
 
-	VM_BUG_ON(PageActive(page));
-retry:
-	ClearPageUnevictable(page);
-	if (page_evictable(page, NULL)) {
-		enum lru_list l = page_lru_base_type(page);
+	for (i = 0; i < nr_pages; i++) {
+		struct page *page = pages[i];
+		struct zone *pagezone;
 
-		__dec_zone_state(zone, NR_UNEVICTABLE);
-		lruvec = mem_cgroup_lru_move_lists(zone, page,
-						   LRU_UNEVICTABLE, l);
-		list_move(&page->lru, &lruvec->lists[l]);
-		__inc_zone_state(zone, NR_INACTIVE_ANON + l);
-		__count_vm_event(UNEVICTABLE_PGRESCUED);
-	} else {
-		/*
-		 * rotate unevictable list
-		 */
-		SetPageUnevictable(page);
-		lruvec = mem_cgroup_lru_move_lists(zone, page, LRU_UNEVICTABLE,
-						   LRU_UNEVICTABLE);
-		list_move(&page->lru, &lruvec->lists[LRU_UNEVICTABLE]);
-		if (page_evictable(page, NULL))
-			goto retry;
-	}
-}
-
-/**
- * scan_mapping_unevictable_pages - scan an address space for evictable pages
- * @mapping: struct address_space to scan for evictable pages
- *
- * Scan all pages in mapping.  Check unevictable pages for
- * evictability and move them to the appropriate zone lru list.
- *
- * This function is only used for SysV IPC SHM_UNLOCK.
- */
-void scan_mapping_unevictable_pages(struct address_space *mapping)
-{
-	pgoff_t next = 0;
-	pgoff_t end   = (i_size_read(mapping->host) + PAGE_CACHE_SIZE - 1) >>
-			 PAGE_CACHE_SHIFT;
-	struct zone *zone;
-	struct pagevec pvec;
-
-	if (mapping->nrpages == 0)
-		return;
-
-	pagevec_init(&pvec, 0);
-	while (next < end &&
-		pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
-		int i;
-		int pg_scanned = 0;
-
-		zone = NULL;
-
-		for (i = 0; i < pagevec_count(&pvec); i++) {
-			struct page *page = pvec.pages[i];
-			pgoff_t page_index = page->index;
-			struct zone *pagezone = page_zone(page);
-
-			pg_scanned++;
-			if (page_index > next)
-				next = page_index;
-			next++;
-
-			if (pagezone != zone) {
-				if (zone)
-					spin_unlock_irq(&zone->lru_lock);
-				zone = pagezone;
-				spin_lock_irq(&zone->lru_lock);
-			}
-
-			if (PageLRU(page) && PageUnevictable(page))
-				check_move_unevictable_page(page, zone);
+		pgscanned++;
+		pagezone = page_zone(page);
+		if (pagezone != zone) {
+			if (zone)
+				spin_unlock_irq(&zone->lru_lock);
+			zone = pagezone;
+			spin_lock_irq(&zone->lru_lock);
 		}
-		if (zone)
-			spin_unlock_irq(&zone->lru_lock);
-		pagevec_release(&pvec);
 
-		count_vm_events(UNEVICTABLE_PGSCANNED, pg_scanned);
-		cond_resched();
+		if (!PageLRU(page) || !PageUnevictable(page))
+			continue;
+
+		if (page_evictable(page, NULL)) {
+			enum lru_list lru = page_lru_base_type(page);
+
+			VM_BUG_ON(PageActive(page));
+			ClearPageUnevictable(page);
+			__dec_zone_state(zone, NR_UNEVICTABLE);
+			lruvec = mem_cgroup_lru_move_lists(zone, page,
+						LRU_UNEVICTABLE, lru);
+			list_move(&page->lru, &lruvec->lists[lru]);
+			__inc_zone_state(zone, NR_INACTIVE_ANON + lru);
+			pgrescued++;
+		}
+	}
+
+	if (zone) {
+		__count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
+		__count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
+		spin_unlock_irq(&zone->lru_lock);
 	}
-}
-#else
-void scan_mapping_unevictable_pages(struct address_space *mapping)
-{
 }
 #endif /* CONFIG_SHMEM */
 

From 9f9f1acd713d69fae2af286fbeedc6c8963411c6 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Fri, 20 Jan 2012 14:34:24 -0800
Subject: [PATCH 15/16] mm: fix rss count leakage during migration

Memory migration fills a pte with a migration entry and it doesn't
update the rss counters.  Then it replaces the migration entry with the
new page (or the old one if migration failed).  But between these two
passes this pte can be unmaped, or a task can fork a child and it will
get a copy of this migration entry.  Nobody accounts for this in the rss
counters.

This patch properly adjust rss counters for migration entries in
zap_pte_range() and copy_one_pte().  Thus we avoid extra atomic
operations on the migration fast-path.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory.c | 37 ++++++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 5e30583c2605..fa2f04e0337c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -878,15 +878,24 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 			}
 			if (likely(!non_swap_entry(entry)))
 				rss[MM_SWAPENTS]++;
-			else if (is_write_migration_entry(entry) &&
-					is_cow_mapping(vm_flags)) {
-				/*
-				 * COW mappings require pages in both parent
-				 * and child to be set to read.
-				 */
-				make_migration_entry_read(&entry);
-				pte = swp_entry_to_pte(entry);
-				set_pte_at(src_mm, addr, src_pte, pte);
+			else if (is_migration_entry(entry)) {
+				page = migration_entry_to_page(entry);
+
+				if (PageAnon(page))
+					rss[MM_ANONPAGES]++;
+				else
+					rss[MM_FILEPAGES]++;
+
+				if (is_write_migration_entry(entry) &&
+				    is_cow_mapping(vm_flags)) {
+					/*
+					 * COW mappings require pages in both
+					 * parent and child to be set to read.
+					 */
+					make_migration_entry_read(&entry);
+					pte = swp_entry_to_pte(entry);
+					set_pte_at(src_mm, addr, src_pte, pte);
+				}
 			}
 		}
 		goto out_set_pte;
@@ -1191,6 +1200,16 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 
 			if (!non_swap_entry(entry))
 				rss[MM_SWAPENTS]--;
+			else if (is_migration_entry(entry)) {
+				struct page *page;
+
+				page = migration_entry_to_page(entry);
+
+				if (PageAnon(page))
+					rss[MM_ANONPAGES]--;
+				else
+					rss[MM_FILEPAGES]--;
+			}
 			if (unlikely(!free_swap_and_cache(entry)))
 				print_bad_pte(vma, addr, ptent, NULL);
 		}

From c25a785d6647984505fa165b5cd84cfc9a95970b Mon Sep 17 00:00:00 2001
From: Dan Rosenberg <drosenberg@vsecurity.com>
Date: Fri, 20 Jan 2012 14:34:27 -0800
Subject: [PATCH 16/16] score: fix off-by-one index into syscall table

If the provided system call number is equal to __NR_syscalls, the
current check will pass and a function pointer just after the system
call table may be called, since sys_call_table is an array with total
size __NR_syscalls.

Whether or not this is a security bug depends on what the compiler puts
immediately after the system call table.  It's likely that this won't do
anything bad because there is an additional NULL check on the syscall
entry, but if there happens to be a non-NULL value immediately after the
system call table, this may result in local privilege escalation.

Signed-off-by: Dan Rosenberg <drosenberg@vsecurity.com>
Cc: <stable@vger.kernel.org>
Cc: Chen Liqin <liqin.chen@sunplusct.com>
Cc: Lennox Wu <lennox.wu@gmail.com>
Cc: Eugene Teo <eugeneteo@kernel.sg>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/score/kernel/entry.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/score/kernel/entry.S b/arch/score/kernel/entry.S
index 577abba3fac6..83bb96079c43 100644
--- a/arch/score/kernel/entry.S
+++ b/arch/score/kernel/entry.S
@@ -408,7 +408,7 @@ ENTRY(handle_sys)
 	sw	r9, [r0, PT_EPC]
 
 	cmpi.c	r27, __NR_syscalls 	# check syscall number
-	bgtu	illegal_syscall
+	bgeu	illegal_syscall
 
 	slli	r8, r27, 2		# get syscall routine
 	la	r11, sys_call_table