From 8d7dc9283f399e1fda4e48a1c453f689326d9396 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 14 Apr 2015 19:33:59 -0700
Subject: [PATCH 001/101] rcu: Control grace-period delays directly from value

In a misguided attempt to avoid an #ifdef, the use of the
gp_init_delay module parameter was conditioned on the corresponding
RCU_TORTURE_TEST_SLOW_INIT Kconfig variable, using IS_ENABLED() at
the point of use in the code.  This meant that the compiler always saw
the delay, which meant that RCU_TORTURE_TEST_SLOW_INIT_DELAY had to be
unconditionally defined.  This in turn caused "make oldconfig" to ask
pointless questions about the value of RCU_TORTURE_TEST_SLOW_INIT_DELAY
in cases where it was not even used.

This commit avoids these pointless questions by defining gp_init_delay
under #ifdef.  In one branch, gp_init_delay is initialized to
RCU_TORTURE_TEST_SLOW_INIT_DELAY and is also a module parameter (thus
allowing boot-time modification), and in the other branch gp_init_delay
is a const variable initialized by default to zero.

This approach also simplifies the code at the delay point by eliminating
the IS_DEFINED().  Because gp_init_delay is constant zero in the no-delay
case intended for production use, the "gp_init_delay > 0" check causes
the delay to become dead code, as desired in this case.  In addition,
this commit replaces magic constant "10" with the preprocessor variable
PER_RCU_NODE_PERIOD, which controls the number of grace periods that
are allowed to elapse at full speed before a delay is inserted.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by:
Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcu/tree.c | 16 +++++++++-------
 lib/Kconfig.debug |  1 +
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 233165da782f..8cf7304b2867 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -162,11 +162,14 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
 static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO;
 module_param(kthread_prio, int, 0644);
 
-/* Delay in jiffies for grace-period initialization delays. */
-static int gp_init_delay = IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT)
-				? CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY
-				: 0;
+/* Delay in jiffies for grace-period initialization delays, debug only. */
+#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT
+static int gp_init_delay = CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY;
 module_param(gp_init_delay, int, 0644);
+#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */
+static const int gp_init_delay;
+#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */
+#define PER_RCU_NODE_PERIOD 10	/* Number of grace periods between delays. */
 
 /*
  * Track the rcutorture test sequence number and the update version
@@ -1843,9 +1846,8 @@ static int rcu_gp_init(struct rcu_state *rsp)
 		raw_spin_unlock_irq(&rnp->lock);
 		cond_resched_rcu_qs();
 		ACCESS_ONCE(rsp->gp_activity) = jiffies;
-		if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_SLOW_INIT) &&
-		    gp_init_delay > 0 &&
-		    !(rsp->gpnum % (rcu_num_nodes * 10)))
+		if (gp_init_delay > 0 &&
+		    !(rsp->gpnum % (rcu_num_nodes * PER_RCU_NODE_PERIOD)))
 			schedule_timeout_uninterruptible(gp_init_delay);
 	}
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1ad74c0df01f..5f5ff7d7e5eb 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1268,6 +1268,7 @@ config RCU_TORTURE_TEST_SLOW_INIT_DELAY
 	int "How much to slow down RCU grace-period initialization"
 	range 0 5
 	default 3
+	depends on RCU_TORTURE_TEST_SLOW_INIT
 	help
 	  This option specifies the number of jiffies to wait between
 	  each rcu_node structure initialization.

From c57dcb566d3d866a302a1da2e06344bec31d5bcd Mon Sep 17 00:00:00 2001
From: Ross Lagerwall <ross.lagerwall@citrix.com>
Date: Thu, 2 Apr 2015 08:39:00 +0100
Subject: [PATCH 002/101] efivarfs: Ensure VariableName is NUL-terminated

Some buggy firmware implementations update VariableNameSize on success
such that it does not include the final NUL character which results in
garbage in the efivarfs name entries.  Use kzalloc on the efivar_entry
(as is done in efivars.c) to ensure that the name is always
NUL-terminated.

The buggy firmware is:
BIOS Information
        Vendor: Intel Corp.
        Version: S1200RP.86B.02.02.0005.102320140911
        Release Date: 10/23/2014
        BIOS Revision: 4.6
System Information
        Manufacturer: Intel Corporation
        Product Name: S1200RP_SE

Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
Acked-by: Matthew Garrett <mjg59@coreos.com>
Cc: Jeremy Kerr <jk@ozlabs.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 fs/efivarfs/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index ddbce42548c9..acf9a67f6770 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -121,7 +121,7 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
 	int len, i;
 	int err = -ENOMEM;
 
-	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 	if (!entry)
 		return err;
 

From 98b228f55014870092c15d7d168fecac69f2f12a Mon Sep 17 00:00:00 2001
From: Roy Franz <roy.franz@linaro.org>
Date: Wed, 15 Apr 2015 16:32:24 -0700
Subject: [PATCH 003/101] x86/efi: Store upper bits of command line buffer
 address in ext_cmd_line_ptr

Until now, the EFI stub was only setting the 32 bit cmd_line_ptr in
the setup_header structure, so on 64 bit platforms this could be truncated.
This patch adds setting the upper bits of the buffer address in
ext_cmd_line_ptr.  This case was likely never hit, as the allocation
for this buffer is done at the lowest available address.  Only
x86_64 kernels have this problem, as the 1-1 mapping mandated
by EFI ensures that all memory is 32 bit addressable on 32 bit
platforms.  The EFI stub does not support mixed mode, so the
32 bit kernel on 64 bit firmware case does not need to be handled.

Signed-off-by: Roy Franz <roy.franz@linaro.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/boot/compressed/eboot.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 92b9a5f2aed6..5999980206bf 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -1110,6 +1110,8 @@ struct boot_params *make_boot_params(struct efi_config *c)
 	if (!cmdline_ptr)
 		goto fail;
 	hdr->cmd_line_ptr = (unsigned long)cmdline_ptr;
+	/* Fill in upper bits of command line address, NOP on 32 bit  */
+	boot_params->ext_cmd_line_ptr = (u64)(unsigned long)cmdline_ptr >> 32;
 
 	hdr->ramdisk_image = 0;
 	hdr->ramdisk_size = 0;

From 94d4b4765b7ddb8478b0d57663cf7a08e2263bbf Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 23 Nov 2012 19:19:07 +0100
Subject: [PATCH 004/101] x86/mm: Clean up types in xlate_dev_mem_ptr()

Pavel Machek reported the following compiler warning on
x86/32 CONFIG_HIGHMEM64G=y builds:

  arch/x86/mm/ioremap.c:344:10: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]

Clean up the types in this function by using a single natural type for
internal calculations (unsigned long), to make it more apparent what's
happening, and also to remove fragile casts.

Reported-by: Pavel Machek <pavel@ucw.cz>
Cc: jgross@suse.com
Cc: roland@purestorage.com
Link: http://lkml.kernel.org/r/20150416080440.GA507@amd
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/ioremap.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index fdf617c00e2f..4bf037b20f47 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -332,18 +332,20 @@ EXPORT_SYMBOL(iounmap);
  */
 void *xlate_dev_mem_ptr(phys_addr_t phys)
 {
-	void *addr;
-	unsigned long start = phys & PAGE_MASK;
+	unsigned long start  = phys &  PAGE_MASK;
+	unsigned long offset = phys & ~PAGE_MASK;
+	unsigned long vaddr;
 
 	/* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
 	if (page_is_ram(start >> PAGE_SHIFT))
 		return __va(phys);
 
-	addr = (void __force *)ioremap_cache(start, PAGE_SIZE);
-	if (addr)
-		addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
+	vaddr = (unsigned long)ioremap_cache(start, PAGE_SIZE);
+	/* Only add the offset on success and return NULL if the ioremap() failed: */
+	if (vaddr)
+		vaddr += offset;
 
-	return addr;
+	return (void *)vaddr;
 }
 
 void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)

From 3b6e042188994466ec257b71296b5f85b894dcd9 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Tue, 21 Apr 2015 17:26:23 +0200
Subject: [PATCH 005/101] perf/x86/intel: Add cpu_(prepare|starting|dying) for
 core_pmu

The core_pmu does not define cpu_* callbacks, which handles
allocation of 'struct cpu_hw_events::shared_regs' data,
initialization of debug store and PMU_FL_EXCL_CNTRS counters.

While this probably won't happen on bare metal, virtual CPU can
define x86_pmu.extra_regs together with PMU version 1 and thus
be using core_pmu -> using shared_regs data without it being
allocated. That could could leave to following panic:

	BUG: unable to handle kernel NULL pointer dereference at (null)
	IP: [<ffffffff8152cd4f>] _spin_lock_irqsave+0x1f/0x40

	SNIP

	 [<ffffffff81024bd9>] __intel_shared_reg_get_constraints+0x69/0x1e0
	 [<ffffffff81024deb>] intel_get_event_constraints+0x9b/0x180
	 [<ffffffff8101e815>] x86_schedule_events+0x75/0x1d0
	 [<ffffffff810586dc>] ? check_preempt_curr+0x7c/0x90
	 [<ffffffff810649fe>] ? try_to_wake_up+0x24e/0x3e0
	 [<ffffffff81064ba2>] ? default_wake_function+0x12/0x20
	 [<ffffffff8109eb16>] ? autoremove_wake_function+0x16/0x40
	 [<ffffffff810577e9>] ? __wake_up_common+0x59/0x90
	 [<ffffffff811a9517>] ? __d_lookup+0xa7/0x150
	 [<ffffffff8119db5f>] ? do_lookup+0x9f/0x230
	 [<ffffffff811a993a>] ? dput+0x9a/0x150
	 [<ffffffff8119c8f5>] ? path_to_nameidata+0x25/0x60
	 [<ffffffff8119e90a>] ? __link_path_walk+0x7da/0x1000
	 [<ffffffff8101d8f9>] ? x86_pmu_add+0xb9/0x170
	 [<ffffffff8101d7a7>] x86_pmu_commit_txn+0x67/0xc0
	 [<ffffffff811b07b0>] ? mntput_no_expire+0x30/0x110
	 [<ffffffff8119c731>] ? path_put+0x31/0x40
	 [<ffffffff8107c297>] ? current_fs_time+0x27/0x30
	 [<ffffffff8117d170>] ? mem_cgroup_get_reclaim_stat_from_page+0x20/0x70
	 [<ffffffff8111b7aa>] group_sched_in+0x13a/0x170
	 [<ffffffff81014a29>] ? sched_clock+0x9/0x10
	 [<ffffffff8111bac8>] ctx_sched_in+0x2e8/0x330
	 [<ffffffff8111bb7b>] perf_event_sched_in+0x6b/0xb0
	 [<ffffffff8111bc36>] perf_event_context_sched_in+0x76/0xc0
	 [<ffffffff8111eb3b>] perf_event_comm+0x1bb/0x2e0
	 [<ffffffff81195ee9>] set_task_comm+0x69/0x80
	 [<ffffffff81195fe1>] setup_new_exec+0xe1/0x2e0
	 [<ffffffff811ea68e>] load_elf_binary+0x3ce/0x1ab0

Adding cpu_(prepare|starting|dying) for core_pmu to have
shared_regs data allocated for core_pmu. AFAICS there's no harm
to initialize debug store and PMU_FL_EXCL_CNTRS either for
core_pmu.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20150421152623.GC13169@krava.redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 66 +++++++++++++++-----------
 1 file changed, 38 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 219d3fb423a1..960e85de13fb 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2533,34 +2533,6 @@ ssize_t intel_event_sysfs_show(char *page, u64 config)
 	return x86_event_sysfs_show(page, config, event);
 }
 
-static __initconst const struct x86_pmu core_pmu = {
-	.name			= "core",
-	.handle_irq		= x86_pmu_handle_irq,
-	.disable_all		= x86_pmu_disable_all,
-	.enable_all		= core_pmu_enable_all,
-	.enable			= core_pmu_enable_event,
-	.disable		= x86_pmu_disable_event,
-	.hw_config		= x86_pmu_hw_config,
-	.schedule_events	= x86_schedule_events,
-	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
-	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
-	.event_map		= intel_pmu_event_map,
-	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
-	.apic			= 1,
-	/*
-	 * Intel PMCs cannot be accessed sanely above 32 bit width,
-	 * so we install an artificial 1<<31 period regardless of
-	 * the generic event period:
-	 */
-	.max_period		= (1ULL << 31) - 1,
-	.get_event_constraints	= intel_get_event_constraints,
-	.put_event_constraints	= intel_put_event_constraints,
-	.event_constraints	= intel_core_event_constraints,
-	.guest_get_msrs		= core_guest_get_msrs,
-	.format_attrs		= intel_arch_formats_attr,
-	.events_sysfs_show	= intel_event_sysfs_show,
-};
-
 struct intel_shared_regs *allocate_shared_regs(int cpu)
 {
 	struct intel_shared_regs *regs;
@@ -2743,6 +2715,44 @@ static struct attribute *intel_arch3_formats_attr[] = {
 	NULL,
 };
 
+static __initconst const struct x86_pmu core_pmu = {
+	.name			= "core",
+	.handle_irq		= x86_pmu_handle_irq,
+	.disable_all		= x86_pmu_disable_all,
+	.enable_all		= core_pmu_enable_all,
+	.enable			= core_pmu_enable_event,
+	.disable		= x86_pmu_disable_event,
+	.hw_config		= x86_pmu_hw_config,
+	.schedule_events	= x86_schedule_events,
+	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
+	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
+	.event_map		= intel_pmu_event_map,
+	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
+	.apic			= 1,
+	/*
+	 * Intel PMCs cannot be accessed sanely above 32-bit width,
+	 * so we install an artificial 1<<31 period regardless of
+	 * the generic event period:
+	 */
+	.max_period		= (1ULL<<31) - 1,
+	.get_event_constraints	= intel_get_event_constraints,
+	.put_event_constraints	= intel_put_event_constraints,
+	.event_constraints	= intel_core_event_constraints,
+	.guest_get_msrs		= core_guest_get_msrs,
+	.format_attrs		= intel_arch_formats_attr,
+	.events_sysfs_show	= intel_event_sysfs_show,
+
+	/*
+	 * Virtual (or funny metal) CPU can define x86_pmu.extra_regs
+	 * together with PMU version 1 and thus be using core_pmu with
+	 * shared_regs. We need following callbacks here to allocate
+	 * it properly.
+	 */
+	.cpu_prepare		= intel_pmu_cpu_prepare,
+	.cpu_starting		= intel_pmu_cpu_starting,
+	.cpu_dying		= intel_pmu_cpu_dying,
+};
+
 static __initconst const struct x86_pmu intel_pmu = {
 	.name			= "Intel",
 	.handle_irq		= intel_pmu_handle_irq,

From 80bcffb376a6890dd7452b12c1ba032f8f24fef6 Mon Sep 17 00:00:00 2001
From: Sonny Rao <sonnyrao@chromium.org>
Date: Mon, 20 Apr 2015 15:34:07 -0700
Subject: [PATCH 006/101] perf/x86/intel/uncore: Add support for Intel Haswell
 ULT (lower power Mobile Processor) IMC uncore PMUs

This uncore is the same as the Haswell desktop part but uses a
different PCI ID.

Signed-off-by: Sonny Rao <sonnyrao@chromium.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1429569247-16697-1-git-send-email-sonnyrao@chromium.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index 3001015b755c..ca75e70865ef 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -1,6 +1,9 @@
 /* Nehalem/SandBridge/Haswell uncore support */
 #include "perf_event_intel_uncore.h"
 
+/* Uncore IMC PCI Id */
+#define PCI_DEVICE_ID_INTEL_HSW_U_IMC	0x0a04
+
 /* SNB event control */
 #define SNB_UNC_CTL_EV_SEL_MASK			0x000000ff
 #define SNB_UNC_CTL_UMASK_MASK			0x0000ff00
@@ -472,6 +475,10 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = {
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
 		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
 	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
 	{ /* end: all zeroes */ },
 };
 
@@ -502,6 +509,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
 	IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver),    /* 3rd Gen Core processor */
 	IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
 	IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */
+	IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */
 	{  /* end marker */ }
 };
 

From 0140e6141e4f1d4b15fb469e6912b0e71b7d1cc2 Mon Sep 17 00:00:00 2001
From: Sonny Rao <sonnyrao@chromium.org>
Date: Tue, 21 Apr 2015 12:33:11 -0700
Subject: [PATCH 007/101] perf/x86/intel/uncore: Move PCI IDs for IMC to uncore
 driver

This keeps all the related PCI IDs together in the driver where
they are used.

Signed-off-by: Sonny Rao <sonnyrao@chromium.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1429644791-25724-1-git-send-email-sonnyrao@chromium.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 6 +++++-
 include/linux/pci_ids.h                           | 4 ----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index ca75e70865ef..4562e9e22c60 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -1,7 +1,11 @@
 /* Nehalem/SandBridge/Haswell uncore support */
 #include "perf_event_intel_uncore.h"
 
-/* Uncore IMC PCI Id */
+/* Uncore IMC PCI IDs */
+#define PCI_DEVICE_ID_INTEL_SNB_IMC	0x0100
+#define PCI_DEVICE_ID_INTEL_IVB_IMC	0x0154
+#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC	0x0150
+#define PCI_DEVICE_ID_INTEL_HSW_IMC	0x0c00
 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC	0x0a04
 
 /* SNB event control */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index e63c02a93f6b..a59385852233 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2539,10 +2539,6 @@
 
 #define PCI_VENDOR_ID_INTEL		0x8086
 #define PCI_DEVICE_ID_INTEL_EESSC	0x0008
-#define PCI_DEVICE_ID_INTEL_SNB_IMC	0x0100
-#define PCI_DEVICE_ID_INTEL_IVB_IMC	0x0154
-#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC	0x0150
-#define PCI_DEVICE_ID_INTEL_HSW_IMC	0x0c00
 #define PCI_DEVICE_ID_INTEL_PXHD_0	0x0320
 #define PCI_DEVICE_ID_INTEL_PXHD_1	0x0321
 #define PCI_DEVICE_ID_INTEL_PXH_0	0x0329

From cb24d01d217497fb32467de22d773655f47d3896 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 22 Apr 2015 10:04:23 -0300
Subject: [PATCH 008/101] perf trace: Enable events when doing system wide
 tracing and starting a workload

 commit f7aa222ff397
 Author: Arnaldo Carvalho de Melo <acme@redhat.com>
 Date:   Tue Feb 3 13:25:39 2015 -0300

    perf trace: No need to enable evsels for workload started from perf

The assumption was that whenever a workload is specified, the
attr.enable_on_exec evsel flag would be set, but that is not happening
when perf_record_opts.system_wide is set, for instance

That resulted in both perf_evlist__enable() and attr.enable_on_exec
being not called/set, which made the events to remain disabled while the
workload runs, producing no output.

Fix it,  by calling perf_evlist__enable() in the 'trace' tool
when forking and not targetting a workload started from trace

v2: Test against !target__none(), as suggested by Namhyung Kim, that is
what is used in perf_evsel__config() when deciding if the
attr.enable_on_exec flag to be set. More work is needed to cover other
cases such as opts->initial_delay.

Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-27z7169pvfxgj8upic636syv@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index e124741be187..8842218e1856 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2241,10 +2241,11 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 	if (err < 0)
 		goto out_error_mmap;
 
+	if (!target__none(&trace->opts.target))
+		perf_evlist__enable(evlist);
+
 	if (forks)
 		perf_evlist__start_workload(evlist);
-	else
-		perf_evlist__enable(evlist);
 
 	trace->multiple_threads = evlist->threads->map[0] == -1 ||
 				  evlist->threads->nr > 1 ||

From 02ac5421ddc634767c732f9b6a10a395a9ecfc4f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 22 Apr 2015 11:11:57 -0300
Subject: [PATCH 009/101] perf trace: Disable events and drain events when
 forked workload ends

We were not checking in the inner event processing loop if the forked workload
had finished, which, on a busy system, may make it take a long time trying to
drain events, entering a seemingly neverending loop, waiting for the system to
get idle enough to make it drain the buffers.

Fix it by disabling the events when 'done' is true, in the inner loop, to start
draining what is in the buffers.

Now:

[root@ssdandy ~]# time trace --filter-pids 14003 -a sleep 1 | tail
  996.748 ( 0.002 ms): sh/30296 rt_sigprocmask(how: SETMASK, nset: 0x7ffc83418160, sigsetsize: 8) = 0
  996.751 ( 0.002 ms): sh/30296 rt_sigprocmask(how: BLOCK, nset: 0x7ffc834181f0, oset: 0x7ffc83418270, sigsetsize: 8) = 0
  996.755 ( 0.002 ms): sh/30296 rt_sigaction(sig: INT, act: 0x7ffc83417f50, oact: 0x7ffc83417ff0, sigsetsize: 8) = 0
 1004.543 ( 0.362 ms): tail/30198  ... [continued]: read()) = 4096
 1004.548 ( 7.791 ms): sh/30296 wait4(upid: -1, stat_addr: 0x7ffc834181a0) ...
 1004.975 ( 0.427 ms): tail/30198 read(buf: 0x7633f0, count: 8192) = 4096
 1005.390 ( 0.410 ms): tail/30198 read(buf: 0x765410, count: 8192) = 4096
 1005.743 ( 0.348 ms): tail/30198 read(buf: 0x7633f0, count: 8192) = 4096
 1006.197 ( 0.449 ms): tail/30198 read(buf: 0x765410, count: 8192) = 4096
 1006.492 ( 0.290 ms): tail/30198 read(buf: 0x7633f0, count: 8192) = 4096

real	0m1.219s
user	0m0.704s
sys	0m0.331s
[root@ssdandy ~]#

Reported-by: Michael Petlan <mpetlan@redhat.com>
Suggested-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-p6kpn1b26qcbe47pufpw0tex@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 8842218e1856..e122970361f2 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2273,6 +2273,11 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 
 			if (interrupted)
 				goto out_disable;
+
+			if (done && !draining) {
+				perf_evlist__disable(evlist);
+				draining = true;
+			}
 		}
 	}
 

From 6145c259cd454bcb7a1288f7bbb7b4fbc18175dd Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 23 Apr 2015 14:40:37 +0100
Subject: [PATCH 010/101] perf kmem: Consistently use PRIu64 for printing u64
 values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Building the perf tool for 32-bit ARM results in the following build
error due to a combination of an incorrect conversion specifier and
compiling with -Werror:

  builtin-kmem.c: In function ‘print_page_summary’:
  builtin-kmem.c:644:9: error: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 3 has type ‘u64’ [-Werror=format=]
           nr_alloc_freed, (total_alloc_freed_bytes) / 1024);
           ^
  builtin-kmem.c:647:9: error: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 3 has type ‘u64’ [-Werror=format=]
           (total_page_alloc_bytes - total_alloc_freed_bytes) / 1024);
           ^
  cc1: all warnings being treated as errors

This patch fixes the problem by consistently using PRIu64 for printing
out u64 values.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429796437-1790-1-git-send-email-will.deacon@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-kmem.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 63ea01349b6e..a1915b430044 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -640,9 +640,9 @@ static void print_page_summary(void)
 	       nr_page_frees, total_page_free_bytes / 1024);
 	printf("\n");
 
-	printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests",
+	printf("%-30s: %'16"PRIu64"   [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests",
 	       nr_alloc_freed, (total_alloc_freed_bytes) / 1024);
-	printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total alloc-only requests",
+	printf("%-30s: %'16"PRIu64"   [ %'16"PRIu64" KB ]\n", "Total alloc-only requests",
 	       nr_page_allocs - nr_alloc_freed,
 	       (total_page_alloc_bytes - total_alloc_freed_bytes) / 1024);
 	printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total free-only requests",

From de28c15daf60e9625bece22f13a091fac8d05f1d Mon Sep 17 00:00:00 2001
From: Bobby Powers <bobbypowers@gmail.com>
Date: Tue, 21 Apr 2015 19:19:41 -0400
Subject: [PATCH 011/101] tools lib api: Undefine _FORTIFY_SOURCE before
 setting it

Some toolchains (like Hardened Gentoo) define _FORTIFY_SOURCE in the
built-in, default args.  This causes perf builds to fail with:

<command-line>:0:0: error: "_FORTIFY_SOURCE" redefined [-Werror]
<built-in>: note: this is the location of the previous definition cc1:
all warnings being treated as errors

To avoid this, undefine _FORTIFY_SOURCE before (possibly re-)defining it
in tools/lib/api.

v2 applies cleanly on top of already pulled kbuild changes for 4.1-rc1.

Signed-off-by: Bobby Powers <bobbypowers@gmail.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Dirk Gouders <dirk@gouders.net>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: linux-kbuild@vger.kernel.org
Link: http://lkml.kernel.org/r/1429658381-3039-1-git-send-email-bobbypowers@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/api/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
index d8fe29fc19a4..8bd960658463 100644
--- a/tools/lib/api/Makefile
+++ b/tools/lib/api/Makefile
@@ -16,7 +16,7 @@ MAKEFLAGS += --no-print-directory
 LIBFILE = $(OUTPUT)libapi.a
 
 CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
-CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -D_FORTIFY_SOURCE=2 -fPIC
+CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC
 CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
 
 RM = rm -f

From 4ad1f4300e3bddf63109aa63cfb2d37e8585ecc7 Mon Sep 17 00:00:00 2001
From: David Ahern <david.ahern@oracle.com>
Date: Tue, 14 Apr 2015 13:49:33 -0400
Subject: [PATCH 012/101] perf kmem: Fix compiles on RHEL6/OL6
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

0d68bc92c48 breaks compiles on RHEL6/OL6:
    cc1: warnings being treated as errors
    builtin-kmem.c: In function ‘search_page_alloc_stat’:
    builtin-kmem.c:322: error: declaration of ‘stat’ shadows a global declaration
                            node = &parent->rb_left;
    /usr/include/sys/stat.h:455: error: shadowed declaration is here
    builtin-kmem.c: In function ‘perf_evsel__process_page_alloc_event’:
    builtin-kmem.c:378: error: declaration of ‘stat’ shadows a global declaration
    /usr/include/sys/stat.h:455: error: shadowed declaration is here
    builtin-kmem.c: In function ‘perf_evsel__process_page_free_event’:
    builtin-kmem.c:431: error: declaration of ‘stat’ shadows a global declaration
    /usr/include/sys/stat.h:455: error: shadowed declaration is here

Rename local variable to pstat to avoid the name conflict.

Signed-off-by: David Ahern <david.ahern@oracle.com>
Link: http://lkml.kernel.org/r/1429033773-31383-1-git-send-email-david.ahern@oracle.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-kmem.c | 54 +++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index a1915b430044..1634186d537c 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -319,7 +319,7 @@ static int page_stat_cmp(struct page_stat *a, struct page_stat *b)
 	return 0;
 }
 
-static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool create)
+static struct page_stat *search_page_alloc_stat(struct page_stat *pstat, bool create)
 {
 	struct rb_node **node = &page_alloc_tree.rb_node;
 	struct rb_node *parent = NULL;
@@ -331,7 +331,7 @@ static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool cre
 		parent = *node;
 		data = rb_entry(*node, struct page_stat, node);
 
-		cmp = page_stat_cmp(data, stat);
+		cmp = page_stat_cmp(data, pstat);
 		if (cmp < 0)
 			node = &parent->rb_left;
 		else if (cmp > 0)
@@ -345,10 +345,10 @@ static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool cre
 
 	data = zalloc(sizeof(*data));
 	if (data != NULL) {
-		data->page = stat->page;
-		data->order = stat->order;
-		data->gfp_flags = stat->gfp_flags;
-		data->migrate_type = stat->migrate_type;
+		data->page = pstat->page;
+		data->order = pstat->order;
+		data->gfp_flags = pstat->gfp_flags;
+		data->migrate_type = pstat->migrate_type;
 
 		rb_link_node(&data->node, parent, node);
 		rb_insert_color(&data->node, &page_alloc_tree);
@@ -375,7 +375,7 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
 	unsigned int migrate_type = perf_evsel__intval(evsel, sample,
 						       "migratetype");
 	u64 bytes = kmem_page_size << order;
-	struct page_stat *stat;
+	struct page_stat *pstat;
 	struct page_stat this = {
 		.order = order,
 		.gfp_flags = gfp_flags,
@@ -401,21 +401,21 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
 	 * This is to find the current page (with correct gfp flags and
 	 * migrate type) at free event.
 	 */
-	stat = search_page(page, true);
-	if (stat == NULL)
+	pstat = search_page(page, true);
+	if (pstat == NULL)
 		return -ENOMEM;
 
-	stat->order = order;
-	stat->gfp_flags = gfp_flags;
-	stat->migrate_type = migrate_type;
+	pstat->order = order;
+	pstat->gfp_flags = gfp_flags;
+	pstat->migrate_type = migrate_type;
 
 	this.page = page;
-	stat = search_page_alloc_stat(&this, true);
-	if (stat == NULL)
+	pstat = search_page_alloc_stat(&this, true);
+	if (pstat == NULL)
 		return -ENOMEM;
 
-	stat->nr_alloc++;
-	stat->alloc_bytes += bytes;
+	pstat->nr_alloc++;
+	pstat->alloc_bytes += bytes;
 
 	order_stats[order][migrate_type]++;
 
@@ -428,7 +428,7 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
 	u64 page;
 	unsigned int order = perf_evsel__intval(evsel, sample, "order");
 	u64 bytes = kmem_page_size << order;
-	struct page_stat *stat;
+	struct page_stat *pstat;
 	struct page_stat this = {
 		.order = order,
 	};
@@ -441,8 +441,8 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
 	nr_page_frees++;
 	total_page_free_bytes += bytes;
 
-	stat = search_page(page, false);
-	if (stat == NULL) {
+	pstat = search_page(page, false);
+	if (pstat == NULL) {
 		pr_debug2("missing free at page %"PRIx64" (order: %d)\n",
 			  page, order);
 
@@ -453,18 +453,18 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
 	}
 
 	this.page = page;
-	this.gfp_flags = stat->gfp_flags;
-	this.migrate_type = stat->migrate_type;
+	this.gfp_flags = pstat->gfp_flags;
+	this.migrate_type = pstat->migrate_type;
 
-	rb_erase(&stat->node, &page_tree);
-	free(stat);
+	rb_erase(&pstat->node, &page_tree);
+	free(pstat);
 
-	stat = search_page_alloc_stat(&this, false);
-	if (stat == NULL)
+	pstat = search_page_alloc_stat(&this, false);
+	if (pstat == NULL)
 		return -ENOENT;
 
-	stat->nr_free++;
-	stat->free_bytes += bytes;
+	pstat->nr_free++;
+	pstat->free_bytes += bytes;
 
 	return 0;
 }

From 410ceb8f2f1d4edeb02d229ef192e76602005b8b Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 24 Apr 2015 10:45:16 +0900
Subject: [PATCH 013/101] tools lib traceevent: Fix build failure on 32-bit
 arch

In my i386 build, it failed like this:

    CC       event-parse.o
  event-parse.c: In function 'print_str_arg':
  event-parse.c:3868:5: warning: format '%lu' expects argument of type 'long unsigned int',
                        but argument 3 has type 'uint64_t' [-Wformat]

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Javi Merino <javi.merino@arm.com>
Link: http://lkml.kernel.org/r/20150424020218.GF1905@sejong
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 12a7e2a40c89..aa21bd55bd8a 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -3865,7 +3865,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
 			} else if (el_size == 4) {
 				trace_seq_printf(s, "%u", *(uint32_t *)num);
 			} else if (el_size == 8) {
-				trace_seq_printf(s, "%lu", *(uint64_t *)num);
+				trace_seq_printf(s, "%"PRIu64, *(uint64_t *)num);
 			} else {
 				trace_seq_printf(s, "BAD SIZE:%d 0x%x",
 						 el_size, *(uint8_t *)num);

From 325d73bf8fea8af2227240b7305253fb052d3a68 Mon Sep 17 00:00:00 2001
From: Bob Liu <bob.liu@oracle.com>
Date: Fri, 3 Apr 2015 14:42:58 +0800
Subject: [PATCH 014/101] xen/blkback: safely unmap purge persistent grants
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit c43cf3ea8385 ("xen-blkback: safely unmap grants in case they
are still in use") use gnttab_unmap_refs_async() to wait until the
mapped pages are no longer in use before unmapping them, but that
commit missed the persistent case.  Purge persistent pages can't be
unmapped either unless no longer in use.

Signed-off-by: Bob Liu <bob.liu@oracle.com>
Acked-by: Roger Pau Monné <roger.pau@citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 drivers/block/xen-blkback/blkback.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index bd2b3bbbb22c..48e98f2712b5 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -329,8 +329,18 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
 	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	struct persistent_gnt *persistent_gnt;
-	int ret, segs_to_unmap = 0;
+	int segs_to_unmap = 0;
 	struct xen_blkif *blkif = container_of(work, typeof(*blkif), persistent_purge_work);
+	struct gntab_unmap_queue_data unmap_data;
+	struct completion unmap_completion;
+
+	init_completion(&unmap_completion);
+
+	unmap_data.data = &unmap_completion;
+	unmap_data.done = &free_persistent_gnts_unmap_callback;
+	unmap_data.pages = pages;
+	unmap_data.unmap_ops = unmap;
+	unmap_data.kunmap_ops = NULL;
 
 	while(!list_empty(&blkif->persistent_purge_list)) {
 		persistent_gnt = list_first_entry(&blkif->persistent_purge_list,
@@ -346,17 +356,19 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
 		pages[segs_to_unmap] = persistent_gnt->page;
 
 		if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
-			ret = gnttab_unmap_refs(unmap, NULL, pages,
-				segs_to_unmap);
-			BUG_ON(ret);
+			unmap_data.count = segs_to_unmap;
+			gnttab_unmap_refs_async(&unmap_data);
+			wait_for_completion(&unmap_completion);
+
 			put_free_pages(blkif, pages, segs_to_unmap);
 			segs_to_unmap = 0;
 		}
 		kfree(persistent_gnt);
 	}
 	if (segs_to_unmap > 0) {
-		ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap);
-		BUG_ON(ret);
+		unmap_data.count = segs_to_unmap;
+		gnttab_unmap_refs_async(&unmap_data);
+		wait_for_completion(&unmap_completion);
 		put_free_pages(blkif, pages, segs_to_unmap);
 	}
 }

From b44166cd46e28dd608d5baa5873047a40f32919c Mon Sep 17 00:00:00 2001
From: Bob Liu <bob.liu@oracle.com>
Date: Fri, 3 Apr 2015 14:42:59 +0800
Subject: [PATCH 015/101] xen/grant: introduce func gnttab_unmap_refs_sync()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are several place using gnttab async unmap and wait for
completion, so move the common code to a function
gnttab_unmap_refs_sync().

Signed-off-by: Bob Liu <bob.liu@oracle.com>
Acked-by: Roger Pau Monné <roger.pau@citrix.com>
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 drivers/block/xen-blkback/blkback.c | 31 +++--------------------------
 drivers/xen/gntdev.c                | 28 +++-----------------------
 drivers/xen/grant-table.c           | 28 ++++++++++++++++++++++++++
 include/xen/grant_table.h           |  1 +
 4 files changed, 35 insertions(+), 53 deletions(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 48e98f2712b5..713fc9ff1149 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -265,17 +265,6 @@ static void put_persistent_gnt(struct xen_blkif *blkif,
 	atomic_dec(&blkif->persistent_gnt_in_use);
 }
 
-static void free_persistent_gnts_unmap_callback(int result,
-						struct gntab_unmap_queue_data *data)
-{
-	struct completion *c = data->data;
-
-	/* BUG_ON used to reproduce existing behaviour,
-	   but is this the best way to deal with this? */
-	BUG_ON(result);
-	complete(c);
-}
-
 static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
                                  unsigned int num)
 {
@@ -285,12 +274,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
 	struct rb_node *n;
 	int segs_to_unmap = 0;
 	struct gntab_unmap_queue_data unmap_data;
-	struct completion unmap_completion;
 
-	init_completion(&unmap_completion);
-
-	unmap_data.data = &unmap_completion;
-	unmap_data.done = &free_persistent_gnts_unmap_callback;
 	unmap_data.pages = pages;
 	unmap_data.unmap_ops = unmap;
 	unmap_data.kunmap_ops = NULL;
@@ -310,8 +294,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
 			!rb_next(&persistent_gnt->node)) {
 
 			unmap_data.count = segs_to_unmap;
-			gnttab_unmap_refs_async(&unmap_data);
-			wait_for_completion(&unmap_completion);
+			BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
 
 			put_free_pages(blkif, pages, segs_to_unmap);
 			segs_to_unmap = 0;
@@ -332,12 +315,7 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
 	int segs_to_unmap = 0;
 	struct xen_blkif *blkif = container_of(work, typeof(*blkif), persistent_purge_work);
 	struct gntab_unmap_queue_data unmap_data;
-	struct completion unmap_completion;
 
-	init_completion(&unmap_completion);
-
-	unmap_data.data = &unmap_completion;
-	unmap_data.done = &free_persistent_gnts_unmap_callback;
 	unmap_data.pages = pages;
 	unmap_data.unmap_ops = unmap;
 	unmap_data.kunmap_ops = NULL;
@@ -357,9 +335,7 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
 
 		if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
 			unmap_data.count = segs_to_unmap;
-			gnttab_unmap_refs_async(&unmap_data);
-			wait_for_completion(&unmap_completion);
-
+			BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
 			put_free_pages(blkif, pages, segs_to_unmap);
 			segs_to_unmap = 0;
 		}
@@ -367,8 +343,7 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
 	}
 	if (segs_to_unmap > 0) {
 		unmap_data.count = segs_to_unmap;
-		gnttab_unmap_refs_async(&unmap_data);
-		wait_for_completion(&unmap_completion);
+		BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
 		put_free_pages(blkif, pages, segs_to_unmap);
 	}
 }
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index d5bb1a33d0a3..89274850741b 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -327,30 +327,10 @@ static int map_grant_pages(struct grant_map *map)
 	return err;
 }
 
-struct unmap_grant_pages_callback_data
-{
-	struct completion completion;
-	int result;
-};
-
-static void unmap_grant_callback(int result,
-				 struct gntab_unmap_queue_data *data)
-{
-	struct unmap_grant_pages_callback_data* d = data->data;
-
-	d->result = result;
-	complete(&d->completion);
-}
-
 static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
 {
 	int i, err = 0;
 	struct gntab_unmap_queue_data unmap_data;
-	struct unmap_grant_pages_callback_data data;
-
-	init_completion(&data.completion);
-	unmap_data.data = &data;
-	unmap_data.done= &unmap_grant_callback;
 
 	if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
 		int pgno = (map->notify.addr >> PAGE_SHIFT);
@@ -367,11 +347,9 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
 	unmap_data.pages = map->pages + offset;
 	unmap_data.count = pages;
 
-	gnttab_unmap_refs_async(&unmap_data);
-
-	wait_for_completion(&data.completion);
-	if (data.result)
-		return data.result;
+	err = gnttab_unmap_refs_sync(&unmap_data);
+	if (err)
+		return err;
 
 	for (i = 0; i < pages; i++) {
 		if (map->unmap_ops[offset+i].status)
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 17972fbacddc..b1c7170e5c9e 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -123,6 +123,11 @@ struct gnttab_ops {
 	int (*query_foreign_access)(grant_ref_t ref);
 };
 
+struct unmap_refs_callback_data {
+	struct completion completion;
+	int result;
+};
+
 static struct gnttab_ops *gnttab_interface;
 
 static int grant_table_version;
@@ -863,6 +868,29 @@ void gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item)
 }
 EXPORT_SYMBOL_GPL(gnttab_unmap_refs_async);
 
+static void unmap_refs_callback(int result,
+		struct gntab_unmap_queue_data *data)
+{
+	struct unmap_refs_callback_data *d = data->data;
+
+	d->result = result;
+	complete(&d->completion);
+}
+
+int gnttab_unmap_refs_sync(struct gntab_unmap_queue_data *item)
+{
+	struct unmap_refs_callback_data data;
+
+	init_completion(&data.completion);
+	item->data = &data;
+	item->done = &unmap_refs_callback;
+	gnttab_unmap_refs_async(item);
+	wait_for_completion(&data.completion);
+
+	return data.result;
+}
+EXPORT_SYMBOL_GPL(gnttab_unmap_refs_sync);
+
 static int gnttab_map_frames_v1(xen_pfn_t *frames, unsigned int nr_gframes)
 {
 	int rc;
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index 143ca5ffab7a..4478f4b4aae2 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -191,6 +191,7 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
 		      struct gnttab_unmap_grant_ref *kunmap_ops,
 		      struct page **pages, unsigned int count);
 void gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item);
+int gnttab_unmap_refs_sync(struct gntab_unmap_queue_data *item);
 
 
 /* Perform a batch of grant map/copy operations. Retry every batch slot

From a526973e0291b245aefe12023a7f775f3c0e59a2 Mon Sep 17 00:00:00 2001
From: Andrew Andrianov <andrew@ncrmnt.org>
Date: Sat, 11 Apr 2015 23:29:19 +0300
Subject: [PATCH 016/101] pinctrl: mvebu: Fix mapping of pin 63 (gpo -> gpio)

Signed-off-by: Andrew Andrianov <andrew@ncrmnt.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/mvebu/pinctrl-armada-370.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-370.c b/drivers/pinctrl/mvebu/pinctrl-armada-370.c
index 42f930f70de3..03aa58c4cb85 100644
--- a/drivers/pinctrl/mvebu/pinctrl-armada-370.c
+++ b/drivers/pinctrl/mvebu/pinctrl-armada-370.c
@@ -364,7 +364,7 @@ static struct mvebu_mpp_mode mv88f6710_mpp_modes[] = {
 	   MPP_FUNCTION(0x5, "audio", "mclk"),
 	   MPP_FUNCTION(0x6, "uart0", "cts")),
 	MPP_MODE(63,
-	   MPP_FUNCTION(0x0, "gpo", NULL),
+	   MPP_FUNCTION(0x0, "gpio", NULL),
 	   MPP_FUNCTION(0x1, "spi0", "sck"),
 	   MPP_FUNCTION(0x2, "tclk", NULL)),
 	MPP_MODE(64,

From dc391502fdbf97a9cabdc58ba8c915175383f681 Mon Sep 17 00:00:00 2001
From: "Ivan T. Ivanov" <ivan.ivanov@linaro.org>
Date: Fri, 17 Apr 2015 17:50:49 +0300
Subject: [PATCH 017/101] pinctrl: qcom-spmi: Fix pin direction configuration

Pin direction configuration was incorrectly overwritten
by output and function values in set_mux(). Fix this.

Signed-off-by: Ivan T. Ivanov <ivan.ivanov@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/qcom/pinctrl-spmi-gpio.c | 1 +
 drivers/pinctrl/qcom/pinctrl-spmi-mpp.c  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
index b2d22218a258..de684ca93b5a 100644
--- a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
+++ b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
@@ -260,6 +260,7 @@ static int pmic_gpio_set_mux(struct pinctrl_dev *pctldev, unsigned function,
 			val = 1;
 	}
 
+	val = val << PMIC_GPIO_REG_MODE_DIR_SHIFT;
 	val |= pad->function << PMIC_GPIO_REG_MODE_FUNCTION_SHIFT;
 	val |= pad->out_value & PMIC_GPIO_REG_MODE_VALUE_SHIFT;
 
diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c
index 8f36c5f91949..890df16353b3 100644
--- a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c
+++ b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c
@@ -370,6 +370,7 @@ static int pmic_mpp_set_mux(struct pinctrl_dev *pctldev, unsigned function,
 		}
 	}
 
+	val = val << PMIC_MPP_REG_MODE_DIR_SHIFT;
 	val |= pad->function << PMIC_MPP_REG_MODE_FUNCTION_SHIFT;
 	val |= pad->out_value & PMIC_MPP_REG_MODE_VALUE_MASK;
 

From c671835021798c1c40ca0b55b49feff76ed5e0e1 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Sat, 25 Apr 2015 07:25:03 +0000
Subject: [PATCH 018/101] perf top: Fix a segfault when kernel map is
 restricted.

Perf top raise a warning if a kernel sample is collected but kernel map
is restricted. The warning message needs to dereference al.map->dso...

However, previous perf_event__preprocess_sample() doesn't always
guarantee al.map != NULL, for example, when kernel map is restricted.

This patch validates al.map before dereferencing, avoid the segfault.

Before this patch:

 $ cat /proc/sys/kernel/kptr_restrict
 1
 $ perf top -p  120183
 perf: Segmentation fault
 -------- backtrace --------
 /path/to/perf[0x509868]
 /lib64/libc.so.6(+0x3545f)[0x7f9a1540045f]
 /path/to/perf[0x448820]
 /path/to/perf(cmd_top+0xe3c)[0x44a5dc]
 /path/to/perf[0x4766a2]
 /path/to/perf(main+0x5f5)[0x42e545]
 /lib64/libc.so.6(__libc_start_main+0xf4)[0x7f9a153ecbd4]
 /path/to/perf[0x42e674]

And gdb call trace:

 Program received signal SIGSEGV, Segmentation fault.
 perf_event__process_sample (machine=0xa44030, sample=0x7fffffffa4c0, evsel=0xa43b00, event=0x7ffff41c3000, tool=0x7fffffffa8a0)
    at builtin-top.c:736
 736				  !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
 (gdb) bt
 #0  perf_event__process_sample (machine=0xa44030, sample=0x7fffffffa4c0, evsel=0xa43b00, event=0x7ffff41c3000, tool=0x7fffffffa8a0)
     at builtin-top.c:736
 #1  perf_top__mmap_read_idx (top=top@entry=0x7fffffffa8a0, idx=idx@entry=0) at builtin-top.c:855
 #2  0x000000000044a5dd in perf_top__mmap_read (top=0x7fffffffa8a0) at builtin-top.c:872
 #3  __cmd_top (top=0x7fffffffa8a0) at builtin-top.c:997
 #4  cmd_top (argc=<optimized out>, argv=<optimized out>, prefix=<optimized out>) at builtin-top.c:1267
 #5  0x00000000004766a3 in run_builtin (p=p@entry=0x8a6ce8 <commands+264>, argc=argc@entry=3, argv=argv@entry=0x7fffffffdf70)
      at perf.c:371
 #6  0x000000000042e546 in handle_internal_command (argv=0x7fffffffdf70, argc=3) at perf.c:430
 #7  run_argv (argv=0x7fffffffdcf0, argcp=0x7fffffffdcfc) at perf.c:474
 #8  main (argc=3, argv=0x7fffffffdf70) at perf.c:589
 (gdb)

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/r/1429946703-80807-1-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-top.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 1cb3436276d1..6a4d5d41c671 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -733,7 +733,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
 "Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
 "Check /proc/sys/kernel/kptr_restrict.\n\n"
 "Kernel%s samples will not be resolved.\n",
-			  !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
+			  al.map && !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
 			  " modules" : "");
 		if (use_browser <= 0)
 			sleep(5);

From d13855ef18e1852b2c4dc86ddf5759c5b34628cb Mon Sep 17 00:00:00 2001
From: He Kuang <hekuang@huawei.com>
Date: Sat, 25 Apr 2015 16:08:58 +0800
Subject: [PATCH 019/101] perf probe: Fix bug with global variables handling

There are missing curly braces which causes find_variable() return wrong
value when probing with global variables.

This problem can be reproduced as following:

  $ perf probe -v --add='generic_perform_write global_variable_for_test'
  ...
  Try to find probe point from debuginfo.
  Probe point found: generic_perform_write+0
  Searching 'global_variable_for_test' variable in context.
  An error occurred in debuginfo analysis (-2).
    Error: Failed to add events. Reason: No such file or directory (Code: -2)

After this patch:

  $ perf probe -v --add='generic_perform_write global_variable_for_test'
  ...
  Converting variable global_variable_for_test into trace event.
  global_variable_for_test type is int.
  Found 1 probe_trace_events.
  Opening /sys/kernel/debug/tracing/kprobe_events write=1
  Added new event:
  Writing event: p:probe/generic_perform_write _stext+1237464
  global_variable_for_test=@global_variable_for_test+0:s32
    probe:generic_perform_write (on generic_perform_write with
    global_variable_for_test)

  You can now use it in all perf tools, such as:

      perf record -e probe:generic_perform_write -aR sleep 1

Signed-off-by: He Kuang <hekuang@huawei.com>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1429949338-18678-1-git-send-email-hekuang@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-finder.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 44554c3c2220..1c3cc07937d5 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -578,10 +578,12 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf)
 	/* Search child die for local variables and parameters. */
 	if (!die_find_variable_at(sc_die, pf->pvar->var, pf->addr, &vr_die)) {
 		/* Search again in global variables */
-		if (!die_find_variable_at(&pf->cu_die, pf->pvar->var, 0, &vr_die))
+		if (!die_find_variable_at(&pf->cu_die, pf->pvar->var,
+						0, &vr_die)) {
 			pr_warning("Failed to find '%s' in this function.\n",
 				   pf->pvar->var);
 			ret = -ENOENT;
+		}
 	}
 	if (ret >= 0)
 		ret = convert_variable(&vr_die, pf);

From 052b0f6eaf8b1f02669884a177bc3ce463133a42 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Fri, 24 Apr 2015 10:00:48 -0700
Subject: [PATCH 020/101] perf bench futex: Fix hung wakeup tasks after
 requeueing

The futex-requeue benchmark can hang because of missing wakeups once the
benchmark is done, ie:

[Run 1]: Requeued 1024 of 1024 threads in 0.3290 ms
perf: couldn't wakeup all tasks (135/1024)

This bug, while perhaps suggesting missing wakeups in kernel futex code,
is merely a consequence of the crappy FUTEX_CMP_REQUEUE man page,
incorrectly mentioning that the number of requeued tasks is in fact
returned, not the wakeups.

This patch acknowledges this and updates the corresponding futex_wake
code around it.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Mel Gorman <mgorman@suse.de>
Link: http://lkml.kernel.org/r/1429894848.10273.44.camel@stgolabs.net
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/bench/futex-requeue.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index bedff6b5b3cf..ad0d9b5342fb 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -132,6 +132,9 @@ int bench_futex_requeue(int argc, const char **argv,
 	if (!fshared)
 		futex_flag = FUTEX_PRIVATE_FLAG;
 
+	if (nrequeue > nthreads)
+		nrequeue = nthreads;
+
 	printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), "
 	       "%d at a time.\n\n",  getpid(), nthreads,
 	       fshared ? "shared":"private", &futex1, &futex2, nrequeue);
@@ -161,20 +164,18 @@ int bench_futex_requeue(int argc, const char **argv,
 
 		/* Ok, all threads are patiently blocked, start requeueing */
 		gettimeofday(&start, NULL);
-		for (nrequeued = 0; nrequeued < nthreads; nrequeued += nrequeue) {
+		while (nrequeued < nthreads) {
 			/*
 			 * Do not wakeup any tasks blocked on futex1, allowing
 			 * us to really measure futex_wait functionality.
 			 */
-			futex_cmp_requeue(&futex1, 0, &futex2, 0,
-					  nrequeue, futex_flag);
+			nrequeued += futex_cmp_requeue(&futex1, 0, &futex2, 0,
+						       nrequeue, futex_flag);
 		}
+
 		gettimeofday(&end, NULL);
 		timersub(&end, &start, &runtime);
 
-		if (nrequeued > nthreads)
-			nrequeued = nthreads;
-
 		update_stats(&requeued_stats, nrequeued);
 		update_stats(&requeuetime_stats, runtime.tv_usec);
 
@@ -184,7 +185,7 @@ int bench_futex_requeue(int argc, const char **argv,
 		}
 
 		/* everybody should be blocked on futex2, wake'em up */
-		nrequeued = futex_wake(&futex2, nthreads, futex_flag);
+		nrequeued = futex_wake(&futex2, nrequeued, futex_flag);
 		if (nthreads != nrequeued)
 			warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads);
 

From 24f1ced167e5e011040b4c3aae75aee45a79eed5 Mon Sep 17 00:00:00 2001
From: Petr Holasek <pholasek@redhat.com>
Date: Thu, 16 Apr 2015 17:38:17 +0200
Subject: [PATCH 021/101] perf bench numa: Fixes of --quiet argument

Corrected description and fixed function of --quiet argument.

Signed-off-by: Petr Holasek <pholasek@redhat.com>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1429198699-25039-2-git-send-email-pholasek@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/bench/numa.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index ebfa163b80b5..cd872e9c3a9c 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -180,7 +180,7 @@ static const struct option options[] = {
 	OPT_INTEGER('H', "thp"		, &p0.thp,		"MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"),
 	OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details"),
 	OPT_BOOLEAN('m', "measure_convergence",	&p0.measure_convergence, "measure convergence latency"),
-	OPT_BOOLEAN('q', "quiet"	, &p0.show_quiet,	"bzero the initial allocations"),
+	OPT_BOOLEAN('q', "quiet"	, &p0.show_quiet,	"quiet mode"),
 	OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"),
 
 	/* Special option string parsing callbacks: */
@@ -1395,7 +1395,7 @@ static void print_res(const char *name, double val,
 	if (!name)
 		name = "main,";
 
-	if (g->p.show_quiet)
+	if (!g->p.show_quiet)
 		printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short);
 	else
 		printf(" %14.3f %s\n", val, txt_long);

From 1d90a685eb75a56648d7dd22c704a1a6da516de9 Mon Sep 17 00:00:00 2001
From: Petr Holasek <pholasek@redhat.com>
Date: Thu, 16 Apr 2015 17:38:19 +0200
Subject: [PATCH 022/101] perf bench numa: Fix immediate meeting of convergence
 condition

This patch fixes the race in the beginning of benchmark run when some
threads hasn't got assigned curr_cpu yet so they don't occur in
nodes-of-process stats and benchmark concludes that all remaining
threads are converged already.

The race can be reproduced with small amount of threads and some bigger
amount of shared process memory, e.g. one process, two threads and 5GB
of process memory.

Signed-off-by: Petr Holasek <pholasek@redhat.com>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1429198699-25039-4-git-send-email-pholasek@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/bench/numa.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index cd872e9c3a9c..ba5efa4710b5 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -828,6 +828,9 @@ static int count_process_nodes(int process_nr)
 		td = g->threads + task_nr;
 
 		node = numa_node_of_cpu(td->curr_cpu);
+		if (node < 0) /* curr_cpu was likely still -1 */
+			return 0;
+
 		node_present[node] = 1;
 	}
 
@@ -882,6 +885,11 @@ static void calc_convergence_compression(int *strong)
 	for (p = 0; p < g->p.nr_proc; p++) {
 		unsigned int nodes = count_process_nodes(p);
 
+		if (!nodes) {
+			*strong = 0;
+			return;
+		}
+
 		nodes_min = min(nodes, nodes_min);
 		nodes_max = max(nodes, nodes_max);
 	}

From 2a700d8edffdbfb8200332d96c3147e042b337f1 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Mon, 13 Apr 2015 11:18:51 -0300
Subject: [PATCH 023/101] [media] marvell-ccic: fix Y'CbCr ordering

Various formats had their byte ordering implemented incorrectly, and
the V4L2_PIX_FMT_UYVY is actually impossible to create, instead you
get V4L2_PIX_FMT_YVYU.

This was working before commit ad6ac452227b7cb93ac79beec092850d178740b1
("add new formats support for marvell-ccic driver"). That commit broke
the original format support and the OLPC XO-1 laptop showed wrong
colors ever since (if you are crazy enough to attempt to run the latest
kernel on it, like I did).

The email addresses of the authors of that patch are no longer valid,
so without a way to reach them and ask them about their test setup
I am going with what I can test on the OLPC laptop.

If this breaks something for someone on their non-OLPC setup, then
contact the linux-media mailinglist. My suspicion however is that
that commit went in untested.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Acked-by: Jonathan Corbet <corbet@lwn.net>
Cc: <stable@vger.kernel.org>      # for v3.19 and up
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 drivers/media/platform/marvell-ccic/mcam-core.c | 14 +++++++-------
 drivers/media/platform/marvell-ccic/mcam-core.h |  8 ++++----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/media/platform/marvell-ccic/mcam-core.c b/drivers/media/platform/marvell-ccic/mcam-core.c
index 9c64b5d01c6a..110fd70c7326 100644
--- a/drivers/media/platform/marvell-ccic/mcam-core.c
+++ b/drivers/media/platform/marvell-ccic/mcam-core.c
@@ -116,8 +116,8 @@ static struct mcam_format_struct {
 		.planar		= false,
 	},
 	{
-		.desc		= "UYVY 4:2:2",
-		.pixelformat	= V4L2_PIX_FMT_UYVY,
+		.desc		= "YVYU 4:2:2",
+		.pixelformat	= V4L2_PIX_FMT_YVYU,
 		.mbus_code	= MEDIA_BUS_FMT_YUYV8_2X8,
 		.bpp		= 2,
 		.planar		= false,
@@ -748,7 +748,7 @@ static void mcam_ctlr_image(struct mcam_camera *cam)
 
 	switch (fmt->pixelformat) {
 	case V4L2_PIX_FMT_YUYV:
-	case V4L2_PIX_FMT_UYVY:
+	case V4L2_PIX_FMT_YVYU:
 		widthy = fmt->width * 2;
 		widthuv = 0;
 		break;
@@ -784,15 +784,15 @@ static void mcam_ctlr_image(struct mcam_camera *cam)
 	case V4L2_PIX_FMT_YUV420:
 	case V4L2_PIX_FMT_YVU420:
 		mcam_reg_write_mask(cam, REG_CTRL0,
-			C0_DF_YUV | C0_YUV_420PL | C0_YUVE_YVYU, C0_DF_MASK);
+			C0_DF_YUV | C0_YUV_420PL | C0_YUVE_VYUY, C0_DF_MASK);
 		break;
 	case V4L2_PIX_FMT_YUYV:
 		mcam_reg_write_mask(cam, REG_CTRL0,
-			C0_DF_YUV | C0_YUV_PACKED | C0_YUVE_UYVY, C0_DF_MASK);
+			C0_DF_YUV | C0_YUV_PACKED | C0_YUVE_NOSWAP, C0_DF_MASK);
 		break;
-	case V4L2_PIX_FMT_UYVY:
+	case V4L2_PIX_FMT_YVYU:
 		mcam_reg_write_mask(cam, REG_CTRL0,
-			C0_DF_YUV | C0_YUV_PACKED | C0_YUVE_YUYV, C0_DF_MASK);
+			C0_DF_YUV | C0_YUV_PACKED | C0_YUVE_SWAP24, C0_DF_MASK);
 		break;
 	case V4L2_PIX_FMT_JPEG:
 		mcam_reg_write_mask(cam, REG_CTRL0,
diff --git a/drivers/media/platform/marvell-ccic/mcam-core.h b/drivers/media/platform/marvell-ccic/mcam-core.h
index aa0c6eac254a..7ffdf4dbaf8c 100644
--- a/drivers/media/platform/marvell-ccic/mcam-core.h
+++ b/drivers/media/platform/marvell-ccic/mcam-core.h
@@ -330,10 +330,10 @@ int mccic_resume(struct mcam_camera *cam);
 #define	  C0_YUVE_YVYU	  0x00010000	/* Y1CrY0Cb		*/
 #define	  C0_YUVE_VYUY	  0x00020000	/* CrY1CbY0		*/
 #define	  C0_YUVE_UYVY	  0x00030000	/* CbY1CrY0		*/
-#define	  C0_YUVE_XYUV	  0x00000000	/* 420: .YUV		*/
-#define	  C0_YUVE_XYVU	  0x00010000	/* 420: .YVU		*/
-#define	  C0_YUVE_XUVY	  0x00020000	/* 420: .UVY		*/
-#define	  C0_YUVE_XVUY	  0x00030000	/* 420: .VUY		*/
+#define	  C0_YUVE_NOSWAP  0x00000000	/* no bytes swapping	*/
+#define	  C0_YUVE_SWAP13  0x00010000	/* swap byte 1 and 3	*/
+#define	  C0_YUVE_SWAP24  0x00020000	/* swap byte 2 and 4	*/
+#define	  C0_YUVE_SWAP1324 0x00030000	/* swap bytes 1&3 and 2&4 */
 /* Bayer bits 18,19 if needed */
 #define	  C0_EOF_VSYNC	  0x00400000	/* Generate EOF by VSYNC */
 #define	  C0_VEDGE_CTRL   0x00800000	/* Detect falling edge of VSYNC */

From 5a9b06a27db6b006605421658418fb8943a6e217 Mon Sep 17 00:00:00 2001
From: Koji Matsuoka <koji.matsuoka.xm@renesas.com>
Date: Sun, 29 Mar 2015 10:04:56 -0300
Subject: [PATCH 024/101] [media] media: soc_camera: rcar_vin: Fix
 wait_for_completion

When stopping abnormally, a driver can't return from wait_for_completion.
This patch resolved this problem by changing wait_for_completion_timeout
from wait_for_completion.

Signed-off-by: Koji Matsuoka <koji.matsuoka.xm@renesas.com>
Signed-off-by: Yoshihiro Kaneko <ykaneko0929@gmail.com>
Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 drivers/media/platform/soc_camera/rcar_vin.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/media/platform/soc_camera/rcar_vin.c b/drivers/media/platform/soc_camera/rcar_vin.c
index 9351f64dee7b..6460f8e1b07f 100644
--- a/drivers/media/platform/soc_camera/rcar_vin.c
+++ b/drivers/media/platform/soc_camera/rcar_vin.c
@@ -135,6 +135,8 @@
 #define VIN_MAX_WIDTH		2048
 #define VIN_MAX_HEIGHT		2048
 
+#define TIMEOUT_MS		100
+
 enum chip_id {
 	RCAR_GEN2,
 	RCAR_H1,
@@ -820,7 +822,10 @@ static void rcar_vin_wait_stop_streaming(struct rcar_vin_priv *priv)
 		if (priv->state == STOPPING) {
 			priv->request_to_stop = true;
 			spin_unlock_irq(&priv->lock);
-			wait_for_completion(&priv->capture_stop);
+			if (!wait_for_completion_timeout(
+					&priv->capture_stop,
+					msecs_to_jiffies(TIMEOUT_MS)))
+				priv->state = STOPPED;
 			spin_lock_irq(&priv->lock);
 		}
 	}

From fefad2d54beb8aad6bf4ac6daeb74f86f52565de Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Sun, 12 Apr 2015 09:09:05 -0300
Subject: [PATCH 025/101] [media] v4l: omap4iss: Replace outdated OMAP4 control
 pad API with syscon

The omap4_ctrl_pad_readl and omap4_ctrl_pad_writel functions have been
removed by commit efde234674d9 but are still used by the OMAP4 ISS
driver, resulting in a compilation breakage:

drivers/staging/media/omap4iss/iss_csiphy.c: In function 'omap4iss_csiphy_config':
drivers/staging/media/omap4iss/iss_csiphy.c:167:2: error: implicit declaration of function 'omap4_ctrl_pad_writel' [-Werror=implicit-function-declaration]
  omap4_ctrl_pad_writel(cam_rx_ctrl,

Fix the problem by using the syscon API to reaplace the control pad API.
Lookup the syscon instance by compatible name for now as the OMAP4 ISS
driver doesn't support DT yet.

Fixes: efde234674d9 ("ARM: OMAP4+: control: remove support for legacy pad read/write")

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Sakari Alius <sakari.ailus@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 drivers/staging/media/omap4iss/Kconfig      |  1 +
 drivers/staging/media/omap4iss/iss.c        | 11 +++++++++++
 drivers/staging/media/omap4iss/iss.h        |  4 ++++
 drivers/staging/media/omap4iss/iss_csiphy.c | 12 +++++++-----
 4 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/drivers/staging/media/omap4iss/Kconfig b/drivers/staging/media/omap4iss/Kconfig
index b78643f907e7..072dac04a750 100644
--- a/drivers/staging/media/omap4iss/Kconfig
+++ b/drivers/staging/media/omap4iss/Kconfig
@@ -2,6 +2,7 @@ config VIDEO_OMAP4
 	bool "OMAP 4 Camera support"
 	depends on VIDEO_V4L2=y && VIDEO_V4L2_SUBDEV_API && I2C=y && ARCH_OMAP4
 	depends on HAS_DMA
+	select MFD_SYSCON
 	select VIDEOBUF2_DMA_CONTIG
 	---help---
 	  Driver for an OMAP 4 ISS controller.
diff --git a/drivers/staging/media/omap4iss/iss.c b/drivers/staging/media/omap4iss/iss.c
index e0ad5e520e2d..7ced940bd807 100644
--- a/drivers/staging/media/omap4iss/iss.c
+++ b/drivers/staging/media/omap4iss/iss.c
@@ -17,6 +17,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/i2c.h>
 #include <linux/interrupt.h>
+#include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
@@ -1386,6 +1387,16 @@ static int iss_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, iss);
 
+	/*
+	 * TODO: When implementing DT support switch to syscon regmap lookup by
+	 * phandle.
+	 */
+	iss->syscon = syscon_regmap_lookup_by_compatible("syscon");
+	if (IS_ERR(iss->syscon)) {
+		ret = PTR_ERR(iss->syscon);
+		goto error;
+	}
+
 	/* Clocks */
 	ret = iss_map_mem_resource(pdev, iss, OMAP4_ISS_MEM_TOP);
 	if (ret < 0)
diff --git a/drivers/staging/media/omap4iss/iss.h b/drivers/staging/media/omap4iss/iss.h
index 734cfeeb0314..35df8b4709e6 100644
--- a/drivers/staging/media/omap4iss/iss.h
+++ b/drivers/staging/media/omap4iss/iss.h
@@ -29,6 +29,8 @@
 #include "iss_ipipe.h"
 #include "iss_resizer.h"
 
+struct regmap;
+
 #define to_iss_device(ptr_module)				\
 	container_of(ptr_module, struct iss_device, ptr_module)
 #define to_device(ptr_module)						\
@@ -79,6 +81,7 @@ struct iss_reg {
 
 /*
  * struct iss_device - ISS device structure.
+ * @syscon: Regmap for the syscon register space
  * @crashed: Bitmask of crashed entities (indexed by entity ID)
  */
 struct iss_device {
@@ -93,6 +96,7 @@ struct iss_device {
 
 	struct resource *res[OMAP4_ISS_MEM_LAST];
 	void __iomem *regs[OMAP4_ISS_MEM_LAST];
+	struct regmap *syscon;
 
 	u64 raw_dmamask;
 
diff --git a/drivers/staging/media/omap4iss/iss_csiphy.c b/drivers/staging/media/omap4iss/iss_csiphy.c
index 7c3d55d811ef..748607f8918f 100644
--- a/drivers/staging/media/omap4iss/iss_csiphy.c
+++ b/drivers/staging/media/omap4iss/iss_csiphy.c
@@ -13,6 +13,7 @@
 
 #include <linux/delay.h>
 #include <linux/device.h>
+#include <linux/regmap.h>
 
 #include "../../../../arch/arm/mach-omap2/control.h"
 
@@ -140,9 +141,11 @@ int omap4iss_csiphy_config(struct iss_device *iss,
 	 * - bit [18] : CSIPHY1 CTRLCLK enable
 	 * - bit [17:16] : CSIPHY1 config: 00 d-phy, 01/10 ccp2
 	 */
-	cam_rx_ctrl = omap4_ctrl_pad_readl(
-			OMAP4_CTRL_MODULE_PAD_CORE_CONTROL_CAMERA_RX);
-
+	/*
+	 * TODO: When implementing DT support specify the CONTROL_CAMERA_RX
+	 * register offset in the syscon property instead of hardcoding it.
+	 */
+	regmap_read(iss->syscon, 0x68, &cam_rx_ctrl);
 
 	if (subdevs->interface == ISS_INTERFACE_CSI2A_PHY1) {
 		cam_rx_ctrl &= ~(OMAP4_CAMERARX_CSI21_LANEENABLE_MASK |
@@ -166,8 +169,7 @@ int omap4iss_csiphy_config(struct iss_device *iss,
 		cam_rx_ctrl |= OMAP4_CAMERARX_CSI22_CTRLCLKEN_MASK;
 	}
 
-	omap4_ctrl_pad_writel(cam_rx_ctrl,
-		 OMAP4_CTRL_MODULE_PAD_CORE_CONTROL_CAMERA_RX);
+	regmap_write(iss->syscon, 0x68, cam_rx_ctrl);
 
 	/* Reset used lane count */
 	csi2->phy->used_data_lanes = 0;

From 4e637ac212b63f4b5dd1da626aca34ffcbfd5daa Mon Sep 17 00:00:00 2001
From: "Ivan T. Ivanov" <ivan.ivanov@linaro.org>
Date: Thu, 9 Apr 2015 18:18:37 +0300
Subject: [PATCH 026/101] pinctrl: qcom-spmi-mpp: Fix input value report

Fix interpretation of the pmic_mpp_read() return code,
negative value means an error.

Signed-off-by: Ivan T. Ivanov <ivan.ivanov@linaro.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@sonymobile.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/qcom/pinctrl-spmi-mpp.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c
index 890df16353b3..211b942ad6d5 100644
--- a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c
+++ b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c
@@ -577,10 +577,11 @@ static void pmic_mpp_config_dbg_show(struct pinctrl_dev *pctldev,
 
 		if (pad->input_enabled) {
 			ret = pmic_mpp_read(state, pad, PMIC_MPP_REG_RT_STS);
-			if (!ret) {
-				ret &= PMIC_MPP_REG_RT_STS_VAL_MASK;
-				pad->out_value = ret;
-			}
+			if (ret < 0)
+				return;
+
+			ret &= PMIC_MPP_REG_RT_STS_VAL_MASK;
+			pad->out_value = ret;
 		}
 
 		seq_printf(s, " %-4s", pad->output_enabled ? "out" : "in");

From 2b953a5e994ce279904ec70220f7d4f31d380a0a Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Tue, 28 Apr 2015 18:46:20 -0400
Subject: [PATCH 027/101] xen: Suspend ticks on all CPUs during suspend

Commit 77e32c89a711 ("clockevents: Manage device's state separately for
the core") decouples clockevent device's modes from states. With this
change when a Xen guest tries to resume, it won't be calling its
set_mode op which needs to be done on each VCPU in order to make the
hypervisor aware that we are in oneshot mode.

This happens because clockevents_tick_resume() (which is an intermediate
step of resuming ticks on a processor) doesn't call clockevents_set_state()
anymore and because during suspend clockevent devices on all VCPUs (except
for the one doing the suspend) are left in ONESHOT state. As result, during
resume the clockevents state machine will assume that device is already
where it should be and doesn't need to be updated.

To avoid this problem we should suspend ticks on all VCPUs during
suspend.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/xen/suspend.c | 10 ++++++++++
 drivers/xen/manage.c   |  9 ++++++---
 include/xen/xen-ops.h  |  1 +
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index d9497698645a..53b4c0811f4f 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -88,7 +88,17 @@ static void xen_vcpu_notify_restore(void *data)
 	tick_resume_local();
 }
 
+static void xen_vcpu_notify_suspend(void *data)
+{
+	tick_suspend_local();
+}
+
 void xen_arch_resume(void)
 {
 	on_each_cpu(xen_vcpu_notify_restore, NULL, 1);
 }
+
+void xen_arch_suspend(void)
+{
+	on_each_cpu(xen_vcpu_notify_suspend, NULL, 1);
+}
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index bf1940706422..9e6a85104a20 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -131,6 +131,8 @@ static void do_suspend(void)
 		goto out_resume;
 	}
 
+	xen_arch_suspend();
+
 	si.cancelled = 1;
 
 	err = stop_machine(xen_suspend, &si, cpumask_of(0));
@@ -148,11 +150,12 @@ static void do_suspend(void)
 		si.cancelled = 1;
 	}
 
+	xen_arch_resume();
+
 out_resume:
-	if (!si.cancelled) {
-		xen_arch_resume();
+	if (!si.cancelled)
 		xs_resume();
-	} else
+	else
 		xs_suspend_cancel();
 
 	dpm_resume_end(si.cancelled ? PMSG_THAW : PMSG_RESTORE);
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index c643e6a94c9a..0ce4f32017ea 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -13,6 +13,7 @@ void xen_arch_post_suspend(int suspend_cancelled);
 
 void xen_timer_resume(void);
 void xen_arch_resume(void);
+void xen_arch_suspend(void);
 
 void xen_resume_notifier_register(struct notifier_block *nb);
 void xen_resume_notifier_unregister(struct notifier_block *nb);

From 8014bcc86ef112eab9ee1db312dba4e6b608cf89 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 13 Apr 2015 00:26:35 +0100
Subject: [PATCH 028/101] xen-pciback: Add name prefix to global 'permissive'
 variable

The variable for the 'permissive' module parameter used to be static
but was recently changed to be extern.  This puts it in the kernel
global namespace if the driver is built-in, so its name should begin
with a prefix identifying the driver.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Fixes: af6fc858a35b ("xen-pciback: limit guest control of command register")
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 drivers/xen/xen-pciback/conf_space.c        | 6 +++---
 drivers/xen/xen-pciback/conf_space.h        | 2 +-
 drivers/xen/xen-pciback/conf_space_header.c | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c
index 75fe3d466515..9c234209d8b5 100644
--- a/drivers/xen/xen-pciback/conf_space.c
+++ b/drivers/xen/xen-pciback/conf_space.c
@@ -16,8 +16,8 @@
 #include "conf_space.h"
 #include "conf_space_quirks.h"
 
-bool permissive;
-module_param(permissive, bool, 0644);
+bool xen_pcibk_permissive;
+module_param_named(permissive, xen_pcibk_permissive, bool, 0644);
 
 /* This is where xen_pcibk_read_config_byte, xen_pcibk_read_config_word,
  * xen_pcibk_write_config_word, and xen_pcibk_write_config_byte are created. */
@@ -262,7 +262,7 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value)
 		 * This means that some fields may still be read-only because
 		 * they have entries in the config_field list that intercept
 		 * the write and do nothing. */
-		if (dev_data->permissive || permissive) {
+		if (dev_data->permissive || xen_pcibk_permissive) {
 			switch (size) {
 			case 1:
 				err = pci_write_config_byte(dev, offset,
diff --git a/drivers/xen/xen-pciback/conf_space.h b/drivers/xen/xen-pciback/conf_space.h
index 2e1d73d1d5d0..62461a8ba1d6 100644
--- a/drivers/xen/xen-pciback/conf_space.h
+++ b/drivers/xen/xen-pciback/conf_space.h
@@ -64,7 +64,7 @@ struct config_field_entry {
 	void *data;
 };
 
-extern bool permissive;
+extern bool xen_pcibk_permissive;
 
 #define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
 
diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c
index c2260a0456c9..ad3d17d29c81 100644
--- a/drivers/xen/xen-pciback/conf_space_header.c
+++ b/drivers/xen/xen-pciback/conf_space_header.c
@@ -118,7 +118,7 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
 
 	cmd->val = value;
 
-	if (!permissive && (!dev_data || !dev_data->permissive))
+	if (!xen_pcibk_permissive && (!dev_data || !dev_data->permissive))
 		return 0;
 
 	/* Only allow the guest to control certain bits. */

From 2c62e8492ed7358bbe7da51666c7e0f6da9474ee Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Thu, 30 Apr 2015 12:41:28 +0800
Subject: [PATCH 029/101] x86/PCI/ACPI: Make all resources except [io
 0xcf8-0xcff] available on PCI bus

An IO port or MMIO resource assigned to a PCI host bridge may be
consumed by the host bridge itself or available to its child
bus/devices. The ACPI specification defines a bit (Producer/Consumer)
to tell whether the resource is consumed by the host bridge itself,
but firmware hasn't used that bit consistently, so we can't rely on it.

Before commit 593669c2ac0f ("x86/PCI/ACPI: Use common ACPI resource
interfaces to simplify implementation"), arch/x86/pci/acpi.c ignored
all IO port resources defined by acpi_resource_io and
acpi_resource_fixed_io to filter out IO ports consumed by the host
bridge itself.

Commit 593669c2ac0f ("x86/PCI/ACPI: Use common ACPI resource interfaces
to simplify implementation") started accepting all IO port and MMIO
resources, which caused a regression that IO port resources consumed
by the host bridge itself became available to its child devices.

Then commit 63f1789ec716 ("x86/PCI/ACPI: Ignore resources consumed by
host bridge itself") ignored resources consumed by the host bridge
itself by checking the IORESOURCE_WINDOW flag, which accidently removed
MMIO resources defined by acpi_resource_memory24, acpi_resource_memory32
and acpi_resource_fixed_memory32.

On x86 and IA64 platforms, all IO port and MMIO resources are assumed
to be available to child bus/devices except one special case:
    IO port [0xCF8-0xCFF] is consumed by the host bridge itself
    to access PCI configuration space.

So explicitly filter out PCI CFG IO ports[0xCF8-0xCFF]. This solution
will also ease the way to consolidate ACPI PCI host bridge common code
from x86, ia64 and ARM64.

Related ACPI table are archived at:
https://bugzilla.kernel.org/show_bug.cgi?id=94221

Related discussions at:
http://patchwork.ozlabs.org/patch/461633/
https://lkml.org/lkml/2015/3/29/304

Fixes: 63f1789ec716 (Ignore resources consumed by host bridge itself)
Reported-by: Bernhard Thaler <bernhard.thaler@wvnet.at>
Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: 4.0+ <stable@vger.kernel.org> # 4.0+
Reviewed-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/pci/acpi.c     | 24 ++++++++++++++++++++++--
 drivers/acpi/resource.c |  2 +-
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index e4695985f9de..d93963340c3c 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -325,6 +325,26 @@ static void release_pci_root_info(struct pci_host_bridge *bridge)
 	kfree(info);
 }
 
+/*
+ * An IO port or MMIO resource assigned to a PCI host bridge may be
+ * consumed by the host bridge itself or available to its child
+ * bus/devices. The ACPI specification defines a bit (Producer/Consumer)
+ * to tell whether the resource is consumed by the host bridge itself,
+ * but firmware hasn't used that bit consistently, so we can't rely on it.
+ *
+ * On x86 and IA64 platforms, all IO port and MMIO resources are assumed
+ * to be available to child bus/devices except one special case:
+ *     IO port [0xCF8-0xCFF] is consumed by the host bridge itself
+ *     to access PCI configuration space.
+ *
+ * So explicitly filter out PCI CFG IO ports[0xCF8-0xCFF].
+ */
+static bool resource_is_pcicfg_ioport(struct resource *res)
+{
+	return (res->flags & IORESOURCE_IO) &&
+		res->start == 0xCF8 && res->end == 0xCFF;
+}
+
 static void probe_pci_root_info(struct pci_root_info *info,
 				struct acpi_device *device,
 				int busnum, int domain,
@@ -346,8 +366,8 @@ static void probe_pci_root_info(struct pci_root_info *info,
 			"no IO and memory resources present in _CRS\n");
 	else
 		resource_list_for_each_entry_safe(entry, tmp, list) {
-			if ((entry->res->flags & IORESOURCE_WINDOW) == 0 ||
-			    (entry->res->flags & IORESOURCE_DISABLED))
+			if ((entry->res->flags & IORESOURCE_DISABLED) ||
+			    resource_is_pcicfg_ioport(entry->res))
 				resource_list_destroy_entry(entry);
 			else
 				entry->res->name = info->name;
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index 5589a6e2a023..8244f013f210 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -573,7 +573,7 @@ EXPORT_SYMBOL_GPL(acpi_dev_get_resources);
  * @ares: Input ACPI resource object.
  * @types: Valid resource types of IORESOURCE_XXX
  *
- * This is a hepler function to support acpi_dev_get_resources(), which filters
+ * This is a helper function to support acpi_dev_get_resources(), which filters
  * ACPI resource objects according to resource types.
  */
 int acpi_dev_filter_resource_type(struct acpi_resource *ares,

From 3349fb64b2927407017d970dd5c4daf3c5ad69f8 Mon Sep 17 00:00:00 2001
From: Chris Bainbridge <chris.bainbridge@gmail.com>
Date: Wed, 29 Apr 2015 21:21:40 +0100
Subject: [PATCH 030/101] ACPI / SBS: Add 5 us delay to fix SBS hangs on
 MacBook

Commit 7bc5a2bad0b8 'ACPI: Support _OSI("Darwin") correctly' caused
the MacBook firmware to expose the SBS, resulting in intermittent
hangs of several minutes on boot, and failure to detect or report
the battery.  Fix this by adding a 5 us delay to the start of each
SMBUS transaction.  This timing is the result of experimentation -
hangs were observed with 3 us but never with 5 us.

Fixes: 7bc5a2bad0b8 'ACPI: Support _OSI("Darwin") correctly'
Link: https://bugzilla.kernel.org/show_bug.cgi?id=94651
Signed-off-by: Chris Bainbridge <chris.bainbridge@gmail.com>
Cc: 3.18+ <stable@vger.kernel.org> # 3.18+
[ rjw: Subject and changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/sbshc.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c
index 26e5b5060523..bf034f8b7c1a 100644
--- a/drivers/acpi/sbshc.c
+++ b/drivers/acpi/sbshc.c
@@ -14,6 +14,7 @@
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
+#include <linux/dmi.h>
 #include "sbshc.h"
 
 #define PREFIX "ACPI: "
@@ -87,6 +88,8 @@ enum acpi_smb_offset {
 	ACPI_SMB_ALARM_DATA = 0x26,	/* 2 bytes alarm data */
 };
 
+static bool macbook;
+
 static inline int smb_hc_read(struct acpi_smb_hc *hc, u8 address, u8 *data)
 {
 	return ec_read(hc->offset + address, data);
@@ -132,6 +135,8 @@ static int acpi_smbus_transaction(struct acpi_smb_hc *hc, u8 protocol,
 	}
 
 	mutex_lock(&hc->lock);
+	if (macbook)
+		udelay(5);
 	if (smb_hc_read(hc, ACPI_SMB_PROTOCOL, &temp))
 		goto end;
 	if (temp) {
@@ -257,12 +262,29 @@ extern int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit,
 			      acpi_handle handle, acpi_ec_query_func func,
 			      void *data);
 
+static int macbook_dmi_match(const struct dmi_system_id *d)
+{
+	pr_debug("Detected MacBook, enabling workaround\n");
+	macbook = true;
+	return 0;
+}
+
+static struct dmi_system_id acpi_smbus_dmi_table[] = {
+	{ macbook_dmi_match, "Apple MacBook", {
+	  DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
+	  DMI_MATCH(DMI_PRODUCT_NAME, "MacBook") },
+	},
+	{ },
+};
+
 static int acpi_smbus_hc_add(struct acpi_device *device)
 {
 	int status;
 	unsigned long long val;
 	struct acpi_smb_hc *hc;
 
+	dmi_check_system(acpi_smbus_dmi_table);
+
 	if (!device)
 		return -EINVAL;
 

From 2375a212ca06d35f90841bc511b3e9ae8a95a82e Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ao2@ao2.it>
Date: Wed, 29 Apr 2015 10:37:24 +0200
Subject: [PATCH 031/101] ACPI / documentation: fix a sentence about GPIO
 resources

The sentence "These resources are used be used to pass ..." contains
a suspicious repetition, likely the author meant "These resources can
be used to pass ...".

Simplify the wording.

Signed-off-by: Antonio Ospite <ao2@ao2.it>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/acpi/enumeration.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/acpi/enumeration.txt b/Documentation/acpi/enumeration.txt
index 750401f91341..15dfce708ebf 100644
--- a/Documentation/acpi/enumeration.txt
+++ b/Documentation/acpi/enumeration.txt
@@ -253,7 +253,7 @@ input driver:
 GPIO support
 ~~~~~~~~~~~~
 ACPI 5 introduced two new resources to describe GPIO connections: GpioIo
-and GpioInt. These resources are used be used to pass GPIO numbers used by
+and GpioInt. These resources can be used to pass GPIO numbers used by
 the device to the driver. ACPI 5.1 extended this with _DSD (Device
 Specific Data) which made it possible to name the GPIOs among other things.
 

From e944ec2ca00fb0170ba9d7f2aeec32c22dc0d4ec Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 29 Apr 2015 21:08:48 +0900
Subject: [PATCH 032/101] perf report: Fix -T/--threads option to work again

The commit 512ae1bd6acb ("perf tools: Consolidate management of default
sort orders") changed default value of the 'sort_order' variable to NULL
indicating that users don't set any sort keys on the command line.

However it missed to update a check in perf_evlist__tty_browse_hists()
so that 'perf report -T' cannot show the per-thread values after the
normal output.  This patch fixes it to work again.

Note that the -T option only works on --stdio and neither --sort nor
--parent option was given.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1430309328-28317-1-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-report.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 476cdf7afcca..b63aeda719be 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -329,7 +329,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
 		fprintf(stdout, "\n\n");
 	}
 
-	if (sort_order == default_sort_order &&
+	if (sort_order == NULL &&
 	    parent_pattern == default_parent_pattern) {
 		fprintf(stdout, "#\n# (%s)\n#\n", help);
 

From 5f55d2ae699d1756ad6132786c7f9c27dc456b66 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 28 Apr 2015 10:23:30 -0600
Subject: [PATCH 033/101] vfio-pci: Log device requests more verbosely

Log some clues indicating whether the user is receiving device
request interfaces or not listening.  This can help indicate why a
driver unbind is blocked or explain why QEMU automatically unplugged
a device from the VM.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/vfio_pci.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 69fab0fd15ae..e9851add6f4e 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -907,8 +907,14 @@ static void vfio_pci_request(void *device_data, unsigned int count)
 	mutex_lock(&vdev->igate);
 
 	if (vdev->req_trigger) {
-		dev_dbg(&vdev->pdev->dev, "Requesting device from user\n");
+		if (!(count % 10))
+			dev_notice_ratelimited(&vdev->pdev->dev,
+				"Relaying device request to user (#%u)\n",
+				count);
 		eventfd_signal(vdev->req_trigger, 1);
+	} else if (count == 0) {
+		dev_warn(&vdev->pdev->dev,
+			"No device request channel registered, blocked until released by user\n");
 	}
 
 	mutex_unlock(&vdev->igate);

From db7d4d7f40215843000cb9d441c9149fd42ea36b Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Fri, 1 May 2015 16:31:41 -0600
Subject: [PATCH 034/101] vfio: Fix runaway interruptible timeout

Commit 13060b64b819 ("vfio: Add and use device request op for vfio
bus drivers") incorrectly makes use of an interruptible timeout.
When interrupted, the signal remains pending resulting in subsequent
timeouts occurring instantly.  This makes the loop spin at a much
higher rate than intended.

Instead of making this completely non-interruptible, we can change
this into a sort of interruptible-once behavior and use the "once"
to log debug information.  The driver API doesn't allow us to abort
and return an error code.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Fixes: 13060b64b819
Cc: stable@vger.kernel.org # v4.0
---
 drivers/vfio/vfio.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 0d336625ac71..e1278fe04b1e 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -710,6 +710,8 @@ void *vfio_del_group_dev(struct device *dev)
 	void *device_data = device->device_data;
 	struct vfio_unbound_dev *unbound;
 	unsigned int i = 0;
+	long ret;
+	bool interrupted = false;
 
 	/*
 	 * The group exists so long as we have a device reference.  Get
@@ -755,9 +757,22 @@ void *vfio_del_group_dev(struct device *dev)
 
 		vfio_device_put(device);
 
-	} while (wait_event_interruptible_timeout(vfio.release_q,
-						  !vfio_dev_present(group, dev),
-						  HZ * 10) <= 0);
+		if (interrupted) {
+			ret = wait_event_timeout(vfio.release_q,
+					!vfio_dev_present(group, dev), HZ * 10);
+		} else {
+			ret = wait_event_interruptible_timeout(vfio.release_q,
+					!vfio_dev_present(group, dev), HZ * 10);
+			if (ret == -ERESTARTSYS) {
+				interrupted = true;
+				dev_warn(dev,
+					 "Device is currently in use, task"
+					 " \"%s\" (%d) "
+					 "blocked until device is released",
+					 current->comm, task_pid_nr(current));
+			}
+		}
+	} while (ret <= 0);
 
 	vfio_group_put(group);
 

From 9b071a43553d6b2df4364951639f61076a8dd676 Mon Sep 17 00:00:00 2001
From: Philippe Coval <philippe.coval@open.eurogiciel.org>
Date: Sat, 2 May 2015 15:14:08 +0200
Subject: [PATCH 035/101] ideapad_laptop: Add Lenovo G40-30 to devices without
 radio switch

Lenovo G40-30 does not provide any physical radio switch to user.
Therefore disable the rfkill switch identically to the Yoga 2 approach.
(Note for later, models ids are sorted alphabetically).

Benefit is to make wireless available again without unloading module.

It was tested successfully on 4.1.0-rc1 base with this model:
(LENOVO_MT_80FY_BU_idea_FM_Lenovo G40-30).

BugLink: https://bugs.launchpad.net/ideapad-laptop/+bug/1450946
Cc: platform-driver-x86@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Philippe Coval <rzr@gna.org>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
 drivers/platform/x86/ideapad-laptop.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index b3d419a84723..b496db87bc05 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -829,6 +829,13 @@ static void ideapad_acpi_notify(acpi_handle handle, u32 event, void *data)
  * report all radios as hardware-blocked.
  */
 static const struct dmi_system_id no_hw_rfkill_list[] = {
+	{
+		.ident = "Lenovo G40-30",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo G40-30"),
+		},
+	},
 	{
 		.ident = "Lenovo Yoga 2 11 / 13 / Pro",
 		.matches = {

From 7829fb09a2b4268b30dd9bc782fa5ebee278b137 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 30 Apr 2015 04:13:52 +0200
Subject: [PATCH 036/101] lib: make memzero_explicit more robust against dead
 store elimination

In commit 0b053c951829 ("lib: memzero_explicit: use barrier instead
of OPTIMIZER_HIDE_VAR"), we made memzero_explicit() more robust in
case LTO would decide to inline memzero_explicit() and eventually
find out it could be elimiated as dead store.

While using barrier() works well for the case of gcc, recent efforts
from LLVMLinux people suggest to use llvm as an alternative to gcc,
and there, Stephan found in a simple stand-alone user space example
that llvm could nevertheless optimize and thus elimitate the memset().
A similar issue has been observed in the referenced llvm bug report,
which is regarded as not-a-bug.

Based on some experiments, icc is a bit special on its own, while it
doesn't seem to eliminate the memset(), it could do so with an own
implementation, and then result in similar findings as with llvm.

The fix in this patch now works for all three compilers (also tested
with more aggressive optimization levels). Arguably, in the current
kernel tree it's more of a theoretical issue, but imho, it's better
to be pedantic about it.

It's clearly visible with gcc/llvm though, with the below code: if we
would have used barrier() only here, llvm would have omitted clearing,
not so with barrier_data() variant:

  static inline void memzero_explicit(void *s, size_t count)
  {
    memset(s, 0, count);
    barrier_data(s);
  }

  int main(void)
  {
    char buff[20];
    memzero_explicit(buff, sizeof(buff));
    return 0;
  }

  $ gcc -O2 test.c
  $ gdb a.out
  (gdb) disassemble main
  Dump of assembler code for function main:
   0x0000000000400400  <+0>: lea   -0x28(%rsp),%rax
   0x0000000000400405  <+5>: movq  $0x0,-0x28(%rsp)
   0x000000000040040e <+14>: movq  $0x0,-0x20(%rsp)
   0x0000000000400417 <+23>: movl  $0x0,-0x18(%rsp)
   0x000000000040041f <+31>: xor   %eax,%eax
   0x0000000000400421 <+33>: retq
  End of assembler dump.

  $ clang -O2 test.c
  $ gdb a.out
  (gdb) disassemble main
  Dump of assembler code for function main:
   0x00000000004004f0  <+0>: xorps  %xmm0,%xmm0
   0x00000000004004f3  <+3>: movaps %xmm0,-0x18(%rsp)
   0x00000000004004f8  <+8>: movl   $0x0,-0x8(%rsp)
   0x0000000000400500 <+16>: lea    -0x18(%rsp),%rax
   0x0000000000400505 <+21>: xor    %eax,%eax
   0x0000000000400507 <+23>: retq
  End of assembler dump.

As gcc, clang, but also icc defines __GNUC__, it's sufficient to define
this in compiler-gcc.h only to be picked up. For a fallback or otherwise
unsupported compiler, we define it as a barrier. Similarly, for ecc which
does not support gcc inline asm.

Reference: https://llvm.org/bugs/show_bug.cgi?id=15495
Reported-by: Stephan Mueller <smueller@chronox.de>
Tested-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Stephan Mueller <smueller@chronox.de>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: mancha security <mancha1@zoho.com>
Cc: Mark Charlebois <charlebm@gmail.com>
Cc: Behan Webster <behanw@converseincode.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/compiler-gcc.h   | 16 +++++++++++++++-
 include/linux/compiler-intel.h |  3 +++
 include/linux/compiler.h       |  4 ++++
 lib/string.c                   |  2 +-
 4 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index cdf13ca7cac3..371e560d13cf 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -9,10 +9,24 @@
 		   + __GNUC_MINOR__ * 100 \
 		   + __GNUC_PATCHLEVEL__)
 
-
 /* Optimization barrier */
+
 /* The "volatile" is due to gcc bugs */
 #define barrier() __asm__ __volatile__("": : :"memory")
+/*
+ * This version is i.e. to prevent dead stores elimination on @ptr
+ * where gcc and llvm may behave differently when otherwise using
+ * normal barrier(): while gcc behavior gets along with a normal
+ * barrier(), llvm needs an explicit input variable to be assumed
+ * clobbered. The issue is as follows: while the inline asm might
+ * access any memory it wants, the compiler could have fit all of
+ * @ptr into memory registers instead, and since @ptr never escaped
+ * from that, it proofed that the inline asm wasn't touching any of
+ * it. This version works well with both compilers, i.e. we're telling
+ * the compiler that the inline asm absolutely may see the contents
+ * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495
+ */
+#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory")
 
 /*
  * This macro obfuscates arithmetic on a variable address so that gcc
diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
index ba147a1727e6..0c9a2f2c2802 100644
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h
@@ -13,9 +13,12 @@
 /* Intel ECC compiler doesn't support gcc specific asm stmts.
  * It uses intrinsics to do the equivalent things.
  */
+#undef barrier_data
 #undef RELOC_HIDE
 #undef OPTIMIZER_HIDE_VAR
 
+#define barrier_data(ptr) barrier()
+
 #define RELOC_HIDE(ptr, off)					\
   ({ unsigned long __ptr;					\
      __ptr = (unsigned long) (ptr);				\
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 0e41ca0e5927..867722591be2 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -169,6 +169,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 # define barrier() __memory_barrier()
 #endif
 
+#ifndef barrier_data
+# define barrier_data(ptr) barrier()
+#endif
+
 /* Unreachable code */
 #ifndef unreachable
 # define unreachable() do { } while (1)
diff --git a/lib/string.c b/lib/string.c
index a5792019193c..bb3d4b6993c4 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -607,7 +607,7 @@ EXPORT_SYMBOL(memset);
 void memzero_explicit(void *s, size_t count)
 {
 	memset(s, 0, count);
-	barrier();
+	barrier_data(s);
 }
 EXPORT_SYMBOL(memzero_explicit);
 

From f440c4ee3e53f767974fe60bcbc0b6687a5fb53f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Fern=C3=A1ndez=20Rojas?= <noltari@gmail.com>
Date: Sat, 2 May 2015 12:08:42 +0200
Subject: [PATCH 037/101] hwrng: bcm63xx - Fix driver compilation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- s/clk_didsable_unprepare/clk_disable_unprepare
- s/prov/priv
- s/error/ret (bcm63xx_rng_probe)

Fixes: 6229c16060fe ("hwrng: bcm63xx - make use of devm_hwrng_register")
Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/bcm63xx-rng.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/char/hw_random/bcm63xx-rng.c b/drivers/char/hw_random/bcm63xx-rng.c
index d1494ecd9e11..4b31f1387f37 100644
--- a/drivers/char/hw_random/bcm63xx-rng.c
+++ b/drivers/char/hw_random/bcm63xx-rng.c
@@ -57,7 +57,7 @@ static void bcm63xx_rng_cleanup(struct hwrng *rng)
 	val &= ~RNG_EN;
 	__raw_writel(val, priv->regs + RNG_CTRL);
 
-	clk_didsable_unprepare(prov->clk);
+	clk_disable_unprepare(priv->clk);
 }
 
 static int bcm63xx_rng_data_present(struct hwrng *rng, int wait)
@@ -97,14 +97,14 @@ static int bcm63xx_rng_probe(struct platform_device *pdev)
 	priv->rng.name = pdev->name;
 	priv->rng.init = bcm63xx_rng_init;
 	priv->rng.cleanup = bcm63xx_rng_cleanup;
-	prov->rng.data_present = bcm63xx_rng_data_present;
+	priv->rng.data_present = bcm63xx_rng_data_present;
 	priv->rng.data_read = bcm63xx_rng_data_read;
 
 	priv->clk = devm_clk_get(&pdev->dev, "ipsec");
 	if (IS_ERR(priv->clk)) {
-		error = PTR_ERR(priv->clk);
-		dev_err(&pdev->dev, "no clock for device: %d\n", error);
-		return error;
+		ret = PTR_ERR(priv->clk);
+		dev_err(&pdev->dev, "no clock for device: %d\n", ret);
+		return ret;
 	}
 
 	if (!devm_request_mem_region(&pdev->dev, r->start,
@@ -120,11 +120,11 @@ static int bcm63xx_rng_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
-	error = devm_hwrng_register(&pdev->dev, &priv->rng);
-	if (error) {
+	ret = devm_hwrng_register(&pdev->dev, &priv->rng);
+	if (ret) {
 		dev_err(&pdev->dev, "failed to register rng device: %d\n",
-			error);
-		return error;
+			ret);
+		return ret;
 	}
 
 	dev_info(&pdev->dev, "registered RNG driver\n");

From a00212e21928640486d3cc939cf4d908e8522016 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 4 May 2015 01:58:27 +0200
Subject: [PATCH 038/101] ACPI / documentation: Fix ambiguity in the GPIO
 properties document

The first paragraph in Documentation/acpi/gpio-properties.txt is
ambiguous, so make it more clear.

Reported-by: Antonio Ospite <ao2@ao2.it>
Acked-by: Antonio Ospite <ao2@ao2.it>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/acpi/gpio-properties.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/acpi/gpio-properties.txt b/Documentation/acpi/gpio-properties.txt
index ae36fcf86dc7..f35dad11f0de 100644
--- a/Documentation/acpi/gpio-properties.txt
+++ b/Documentation/acpi/gpio-properties.txt
@@ -1,9 +1,9 @@
 _DSD Device Properties Related to GPIO
 --------------------------------------
 
-With the release of ACPI 5.1 and the _DSD configuration objecte names
-can finally be given to GPIOs (and other things as well) returned by
-_CRS.  Previously, we were only able to use an integer index to find
+With the release of ACPI 5.1, the _DSD configuration object finally
+allows names to be given to GPIOs (and other things as well) returned
+by _CRS.  Previously, we were only able to use an integer index to find
 the corresponding GPIO, which is pretty error prone (it depends on
 the _CRS output ordering, for example).
 

From 74d77e50f23123938fbb7987eba71310864e6a7c Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 20 Apr 2015 10:59:17 -0500
Subject: [PATCH 039/101] pinctrl: mediatek: mtk-common: initialize unmask

cppcheck detected an uninitialized variable:

[drivers/pinctrl/mediatek/pinctrl-mtk-common.c:897]:
  (error) Uninitialized variable: unmask

unmask should be initialized to zero to ensure unmasking
only occurs if a previous mask occurred. The current situation
is that the unmask variable could contain any random garbage
causing random unexpected unmasking.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/mediatek/pinctrl-mtk-common.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
index 493294c0ebe6..474812e2b0cb 100644
--- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
+++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
@@ -881,6 +881,8 @@ static int mtk_gpio_set_debounce(struct gpio_chip *chip, unsigned offset,
 	if (!mtk_eint_get_mask(pctl, eint_num)) {
 		mtk_eint_mask(d);
 		unmask = 1;
+	} else {
+		unmask = 0;
 	}
 
 	clr_bit = 0xff << eint_offset;

From 622532bb2fad8fe342fb685727ae0be566f6be5d Mon Sep 17 00:00:00 2001
From: Witold Szczeponik <Witold.Szczeponik@gmx.net>
Date: Fri, 1 May 2015 19:05:20 +0200
Subject: [PATCH 040/101] ACPI / PNP: add two IDs to list for PNPACPI device
 enumeration

Commit eec15edbb0e1 (ACPI / PNP: use device ID list for PNPACPI device
enumeration) changed the way how ACPI devices are enumerated and when
they are added to the PNP bus.

However, it broke the sound card support on (at least) a vintage
IBM ThinkPad 600E: with said commit applied, two of the necessary
"CSC01xx" devices are not added to the PNP bus and hence can not be
found during the initialization of the "snd-cs4236" module.  As a
consequence, loading "snd-cs4236" causes null pointer exceptions.
The attached patch fixes the problem end re-enables sound on the
IBM ThinkPad 600E.

Fixes: eec15edbb0e1 (ACPI / PNP: use device ID list for PNPACPI device enumeration)
Signed-off-by: Witold Szczeponik <Witold.Szczeponik@gmx.net>
Cc: 3.16+ <stable@vger.kernel.org> # 3.16+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_pnp.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/acpi/acpi_pnp.c b/drivers/acpi/acpi_pnp.c
index b193f8425999..ff6d8adc9cda 100644
--- a/drivers/acpi/acpi_pnp.c
+++ b/drivers/acpi/acpi_pnp.c
@@ -304,6 +304,8 @@ static const struct acpi_device_id acpi_pnp_device_ids[] = {
 	{"PNPb006"},
 	/* cs423x-pnpbios */
 	{"CSC0100"},
+	{"CSC0103"},
+	{"CSC0110"},
 	{"CSC0000"},
 	{"GIM0100"},		/* Guillemot Turtlebeach something appears to be cs4232 compatible */
 	/* es18xx-pnpbios */

From 5463e7c18e51152104aba9614e6abfc039a8b710 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Tue, 21 Apr 2015 10:40:54 -0700
Subject: [PATCH 041/101] Revert "f2fs: enhance multi-threads performance"

This reports performance regression by Yuanhan Liu.
The basic idea was to reduce one-point mutex, but it turns out this causes
another contention like context swithes.

https://lkml.org/lkml/2015/4/21/11

Until finishing the analysis on this issue, I'd like to revert this for a while.

This reverts commit 78373b7319abdf15050af5b1632c4c8b8b398f33.
---
 fs/f2fs/data.c  | 7 +++++++
 fs/f2fs/f2fs.h  | 1 +
 fs/f2fs/super.c | 1 +
 3 files changed, 9 insertions(+)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b91b0e10678e..1e1aae669fa8 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1513,6 +1513,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
 {
 	struct inode *inode = mapping->host;
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	bool locked = false;
 	int ret;
 	long diff;
 
@@ -1533,7 +1534,13 @@ static int f2fs_write_data_pages(struct address_space *mapping,
 
 	diff = nr_pages_to_write(sbi, DATA, wbc);
 
+	if (!S_ISDIR(inode->i_mode)) {
+		mutex_lock(&sbi->writepages);
+		locked = true;
+	}
 	ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
+	if (locked)
+		mutex_unlock(&sbi->writepages);
 
 	f2fs_submit_merged_bio(sbi, DATA, WRITE);
 
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index d8921cf2ba9a..8de34ab6d5b1 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -625,6 +625,7 @@ struct f2fs_sb_info {
 	struct mutex cp_mutex;			/* checkpoint procedure lock */
 	struct rw_semaphore cp_rwsem;		/* blocking FS operations */
 	struct rw_semaphore node_write;		/* locking node writes */
+	struct mutex writepages;		/* mutex for writepages() */
 	wait_queue_head_t cp_wait;
 
 	struct inode_management im[MAX_INO_ENTRY];      /* manage inode cache */
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 160b88346b24..b2dd1b01f076 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1035,6 +1035,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->raw_super = raw_super;
 	sbi->raw_super_buf = raw_super_buf;
 	mutex_init(&sbi->gc_mutex);
+	mutex_init(&sbi->writepages);
 	mutex_init(&sbi->cp_mutex);
 	init_rwsem(&sbi->node_write);
 	clear_sbi_flag(sbi, SBI_POR_DOING);

From 7263b1bd0490fca68ee7eedb0b6973cb86d4701c Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 22 Apr 2015 11:03:48 -0700
Subject: [PATCH 042/101] f2fs: fix wrong error hanlder in f2fs_follow_link

The page_follow_link_light returns NULL and its error pointer was remained
in nd->path.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/namei.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 7e3794edae42..658e8079aaf9 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -298,16 +298,14 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
 
 static void *f2fs_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-	struct page *page;
+	struct page *page = page_follow_link_light(dentry, nd);
 
-	page = page_follow_link_light(dentry, nd);
-	if (IS_ERR(page))
+	if (IS_ERR_OR_NULL(page))
 		return page;
 
 	/* this is broken symlink case */
 	if (*nd_get_link(nd) == 0) {
-		kunmap(page);
-		page_cache_release(page);
+		page_put_link(dentry, nd, page);
 		return ERR_PTR(-ENOENT);
 	}
 	return page;

From e8a4a2696fecb398b0288c43c0e0dbb91e265bb2 Mon Sep 17 00:00:00 2001
From: Tahsin Erdogan <tahsin@google.com>
Date: Mon, 4 May 2015 21:15:31 -0700
Subject: [PATCH 043/101] x86/spinlocks: Fix regression in spinlock contention
 detection

A spinlock is regarded as contended when there is at least one waiter.
Currently, the code that checks whether there are any waiters rely on
tail value being greater than head. However, this is not true if tail
reaches the max value and wraps back to zero, so arch_spin_is_contended()
incorrectly returns 0 (not contended) when tail is smaller than head.

The original code (before regression) handled this case by casting the
(tail - head) to an unsigned value. This change simply restores that
behavior.

Fixes: d6abfdb20223 ("x86/spinlocks/paravirt: Fix memory corruption on unlock")
Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Cc: peterz@infradead.org
Cc: Waiman.Long@hp.com
Cc: borntraeger@de.ibm.com
Cc: oleg@redhat.com
Cc: raghavendra.kt@linux.vnet.ibm.com
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1430799331-20445-1-git-send-email-tahsin@google.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/spinlock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index cf87de3fc390..64b611782ef0 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -169,7 +169,7 @@ static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 	struct __raw_tickets tmp = READ_ONCE(lock->tickets);
 
 	tmp.head &= ~TICKET_SLOWPATH_FLAG;
-	return (tmp.tail - tmp.head) > TICKET_LOCK_INC;
+	return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
 }
 #define arch_spin_is_contended	arch_spin_is_contended
 

From 285214409a9e5fceba2215461b4682b6069d8e77 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Date: Mon, 20 Apr 2015 14:01:11 -0600
Subject: [PATCH 044/101] RDMA/CMA: Canonize IPv4 on IPV6 sockets properly

When accepting a new IPv4 connect to an IPv6 socket, the CMA tries to
canonize the address family to IPv4, but does not properly process
the listening sockaddr to get the listening port, and does not properly
set the address family of the canonized sockaddr.

Fixes: e51060f08a61 ("IB: IP address based RDMA connection manager")

Cc: <stable@vger.kernel.org>
Reported-By: Yotam Kenneth <yotamke@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Tested-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/cma.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index d570030d899c..06441a43c3aa 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -859,19 +859,27 @@ static void cma_save_ib_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id
 	memcpy(&ib->sib_addr, &path->dgid, 16);
 }
 
+static __be16 ss_get_port(const struct sockaddr_storage *ss)
+{
+	if (ss->ss_family == AF_INET)
+		return ((struct sockaddr_in *)ss)->sin_port;
+	else if (ss->ss_family == AF_INET6)
+		return ((struct sockaddr_in6 *)ss)->sin6_port;
+	BUG();
+}
+
 static void cma_save_ip4_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
 			      struct cma_hdr *hdr)
 {
-	struct sockaddr_in *listen4, *ip4;
+	struct sockaddr_in *ip4;
 
-	listen4 = (struct sockaddr_in *) &listen_id->route.addr.src_addr;
 	ip4 = (struct sockaddr_in *) &id->route.addr.src_addr;
-	ip4->sin_family = listen4->sin_family;
+	ip4->sin_family = AF_INET;
 	ip4->sin_addr.s_addr = hdr->dst_addr.ip4.addr;
-	ip4->sin_port = listen4->sin_port;
+	ip4->sin_port = ss_get_port(&listen_id->route.addr.src_addr);
 
 	ip4 = (struct sockaddr_in *) &id->route.addr.dst_addr;
-	ip4->sin_family = listen4->sin_family;
+	ip4->sin_family = AF_INET;
 	ip4->sin_addr.s_addr = hdr->src_addr.ip4.addr;
 	ip4->sin_port = hdr->port;
 }
@@ -879,16 +887,15 @@ static void cma_save_ip4_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_i
 static void cma_save_ip6_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
 			      struct cma_hdr *hdr)
 {
-	struct sockaddr_in6 *listen6, *ip6;
+	struct sockaddr_in6 *ip6;
 
-	listen6 = (struct sockaddr_in6 *) &listen_id->route.addr.src_addr;
 	ip6 = (struct sockaddr_in6 *) &id->route.addr.src_addr;
-	ip6->sin6_family = listen6->sin6_family;
+	ip6->sin6_family = AF_INET6;
 	ip6->sin6_addr = hdr->dst_addr.ip6;
-	ip6->sin6_port = listen6->sin6_port;
+	ip6->sin6_port = ss_get_port(&listen_id->route.addr.src_addr);
 
 	ip6 = (struct sockaddr_in6 *) &id->route.addr.dst_addr;
-	ip6->sin6_family = listen6->sin6_family;
+	ip6->sin6_family = AF_INET6;
 	ip6->sin6_addr = hdr->src_addr.ip6;
 	ip6->sin6_port = hdr->port;
 }

From 0b7410471d59ce2ea30453e68c03bdb941d5951e Mon Sep 17 00:00:00 2001
From: Hariprasad S <hariprasad@chelsio.com>
Date: Wed, 22 Apr 2015 01:44:58 +0530
Subject: [PATCH 045/101] iw_cxgb4: Cleanup register defines/MACROS

Cleanup macros and register defines for consistency

Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/cxgb4/cm.c          | 4 ++--
 drivers/infiniband/hw/cxgb4/t4fw_ri_api.h | 4 +++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 57176ddd4c50..0493cca3ec15 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -675,7 +675,7 @@ static int send_connect(struct c4iw_ep *ep)
 	if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
 		opt2 |= T5_OPT_2_VALID_F;
 		opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
-		opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */
+		opt2 |= T5_ISS_F;
 	}
 	t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure);
 
@@ -2214,7 +2214,7 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
 		u32 isn = (prandom_u32() & ~7UL) - 1;
 		opt2 |= T5_OPT_2_VALID_F;
 		opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
-		opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */
+		opt2 |= T5_ISS_F;
 		rpl5 = (void *)rpl;
 		memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16));
 		if (peer2peer)
diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
index 5e53327fc647..343e8daf2270 100644
--- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
+++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
@@ -848,6 +848,8 @@ enum {                     /* TCP congestion control algorithms */
 #define CONG_CNTRL_V(x) ((x) << CONG_CNTRL_S)
 #define CONG_CNTRL_G(x) (((x) >> CONG_CNTRL_S) & CONG_CNTRL_M)
 
-#define CONG_CNTRL_VALID   (1 << 18)
+#define T5_ISS_S    18
+#define T5_ISS_V(x) ((x) << T5_ISS_S)
+#define T5_ISS_F    T5_ISS_V(1U)
 
 #endif /* _T4FW_RI_API_H_ */

From 6198dd8d7a6a7f40dc4599cb0676101d9cb82776 Mon Sep 17 00:00:00 2001
From: Hariprasad S <hariprasad@chelsio.com>
Date: Wed, 22 Apr 2015 01:44:59 +0530
Subject: [PATCH 046/101] iw_cxgb4: 32b platform fixes

- get_dma_mr() was using ~0UL which is should be ~0ULL.  This causes the
DMA MR to get setup incorrectly in hardware.

- wr_log_show() needed a 64b divide function div64_u64() instead of
  doing
division directly.

- fixed warnings about recasting a pointer to a u64

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/cxgb4/cm.c     |  2 +-
 drivers/infiniband/hw/cxgb4/cq.c     |  7 +++----
 drivers/infiniband/hw/cxgb4/device.c |  6 +++---
 drivers/infiniband/hw/cxgb4/mem.c    |  6 +++---
 drivers/infiniband/hw/cxgb4/qp.c     | 10 +++++-----
 5 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 0493cca3ec15..6fb31bacd5b4 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -3571,7 +3571,7 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
 	 * TP will ignore any value > 0 for MSS index.
 	 */
 	req->tcb.opt0 = cpu_to_be64(MSS_IDX_V(0xF));
-	req->cookie = (unsigned long)skb;
+	req->cookie = (uintptr_t)skb;
 
 	set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id);
 	ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb);
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index ab7692ac2044..25dbd6986301 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -55,7 +55,7 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 			FW_RI_RES_WR_NRES_V(1) |
 			FW_WR_COMPL_F);
 	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
-	res_wr->cookie = (unsigned long) &wr_wait;
+	res_wr->cookie = (uintptr_t)&wr_wait;
 	res = res_wr->res;
 	res->u.cq.restype = FW_RI_RES_TYPE_CQ;
 	res->u.cq.op = FW_RI_RES_OP_RESET;
@@ -125,7 +125,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 			FW_RI_RES_WR_NRES_V(1) |
 			FW_WR_COMPL_F);
 	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
-	res_wr->cookie = (unsigned long) &wr_wait;
+	res_wr->cookie = (uintptr_t)&wr_wait;
 	res = res_wr->res;
 	res->u.cq.restype = FW_RI_RES_TYPE_CQ;
 	res->u.cq.op = FW_RI_RES_OP_WRITE;
@@ -970,8 +970,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
 	}
 	PDBG("%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n",
 	     __func__, chp->cq.cqid, chp, chp->cq.size,
-	     chp->cq.memsize,
-	     (unsigned long long) chp->cq.dma_addr);
+	     chp->cq.memsize, (unsigned long long) chp->cq.dma_addr);
 	return &chp->ibcq;
 err5:
 	kfree(mm2);
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 8fb295e4a9ab..7ed32537eb59 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -151,7 +151,7 @@ static int wr_log_show(struct seq_file *seq, void *v)
 	int prev_ts_set = 0;
 	int idx, end;
 
-#define ts2ns(ts) div64_ul((ts) * dev->rdev.lldi.cclk_ps, 1000)
+#define ts2ns(ts) div64_u64((ts) * dev->rdev.lldi.cclk_ps, 1000)
 
 	idx = atomic_read(&dev->rdev.wr_log_idx) &
 		(dev->rdev.wr_log_size - 1);
@@ -784,10 +784,10 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
 	     rdev->lldi.vr->qp.size,
 	     rdev->lldi.vr->cq.start,
 	     rdev->lldi.vr->cq.size);
-	PDBG("udb len 0x%x udb base %llx db_reg %p gts_reg %p qpshift %lu "
+	PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p qpshift %lu "
 	     "qpmask 0x%x cqshift %lu cqmask 0x%x\n",
 	     (unsigned)pci_resource_len(rdev->lldi.pdev, 2),
-	     (u64)pci_resource_start(rdev->lldi.pdev, 2),
+	     (void *)pci_resource_start(rdev->lldi.pdev, 2),
 	     rdev->lldi.db_reg,
 	     rdev->lldi.gts_reg,
 	     rdev->qpshift, rdev->qpmask,
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 3ef0cf9f5c44..cff815b91707 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -144,7 +144,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
 		if (i == (num_wqe-1)) {
 			req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) |
 						    FW_WR_COMPL_F);
-			req->wr.wr_lo = (__force __be64)(unsigned long) &wr_wait;
+			req->wr.wr_lo = (__force __be64)&wr_wait;
 		} else
 			req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR));
 		req->wr.wr_mid = cpu_to_be32(
@@ -676,12 +676,12 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
 	mhp->attr.zbva = 0;
 	mhp->attr.va_fbo = 0;
 	mhp->attr.page_size = 0;
-	mhp->attr.len = ~0UL;
+	mhp->attr.len = ~0ULL;
 	mhp->attr.pbl_size = 0;
 
 	ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, php->pdid,
 			      FW_RI_STAG_NSMR, mhp->attr.perms,
-			      mhp->attr.mw_bind_enable, 0, 0, ~0UL, 0, 0, 0);
+			      mhp->attr.mw_bind_enable, 0, 0, ~0ULL, 0, 0, 0);
 	if (ret)
 		goto err1;
 
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 15cae5a31018..389ced335bc5 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -275,7 +275,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 			FW_RI_RES_WR_NRES_V(2) |
 			FW_WR_COMPL_F);
 	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
-	res_wr->cookie = (unsigned long) &wr_wait;
+	res_wr->cookie = (uintptr_t)&wr_wait;
 	res = res_wr->res;
 	res->u.sqrq.restype = FW_RI_RES_TYPE_SQ;
 	res->u.sqrq.op = FW_RI_RES_OP_WRITE;
@@ -1209,7 +1209,7 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
 	wqe->flowid_len16 = cpu_to_be32(
 		FW_WR_FLOWID_V(ep->hwtid) |
 		FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
-	wqe->cookie = (unsigned long) &ep->com.wr_wait;
+	wqe->cookie = (uintptr_t)&ep->com.wr_wait;
 
 	wqe->u.fini.type = FW_RI_TYPE_FINI;
 	ret = c4iw_ofld_send(&rhp->rdev, skb);
@@ -1279,7 +1279,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
 		FW_WR_FLOWID_V(qhp->ep->hwtid) |
 		FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
 
-	wqe->cookie = (unsigned long) &qhp->ep->com.wr_wait;
+	wqe->cookie = (uintptr_t)&qhp->ep->com.wr_wait;
 
 	wqe->u.init.type = FW_RI_TYPE_INIT;
 	wqe->u.init.mpareqbit_p2ptype =
@@ -1766,11 +1766,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 		mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize);
 		insert_mmap(ucontext, mm2);
 		mm3->key = uresp.sq_db_gts_key;
-		mm3->addr = (__force unsigned long) qhp->wq.sq.udb;
+		mm3->addr = (__force unsigned long)qhp->wq.sq.udb;
 		mm3->len = PAGE_SIZE;
 		insert_mmap(ucontext, mm3);
 		mm4->key = uresp.rq_db_gts_key;
-		mm4->addr = (__force unsigned long) qhp->wq.rq.udb;
+		mm4->addr = (__force unsigned long)qhp->wq.rq.udb;
 		mm4->len = PAGE_SIZE;
 		insert_mmap(ucontext, mm4);
 		if (mm5) {

From 09ece8b9e983fe858de6eab7a386d58d194227b6 Mon Sep 17 00:00:00 2001
From: Hariprasad S <hariprasad@chelsio.com>
Date: Wed, 22 Apr 2015 01:45:00 +0530
Subject: [PATCH 047/101] iw_cxgb4: use BAR2 GTS register for T5 kernel mode
 CQs

For T5, we must not use the kdb/kgts registers, in order avoid db drops
under extreme loads.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/cxgb4/cq.c | 15 +++++++++++----
 drivers/infiniband/hw/cxgb4/t4.h |  7 ++++---
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 25dbd6986301..68ddb3710215 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -156,12 +156,19 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 		goto err4;
 
 	cq->gen = 1;
-	cq->gts = rdev->lldi.gts_reg;
 	cq->rdev = rdev;
 	if (user) {
-		cq->ugts = (u64)pci_resource_start(rdev->lldi.pdev, 2) +
-					(cq->cqid << rdev->cqshift);
-		cq->ugts &= PAGE_MASK;
+		u32 off = (cq->cqid << rdev->cqshift) & PAGE_MASK;
+
+		cq->ugts = (u64)rdev->bar2_pa + off;
+	} else if (is_t4(rdev->lldi.adapter_type)) {
+		cq->gts = rdev->lldi.gts_reg;
+		cq->qid_mask = -1U;
+	} else {
+		u32 off = ((cq->cqid << rdev->cqshift) & PAGE_MASK) + 12;
+
+		cq->gts = rdev->bar2_kva + off;
+		cq->qid_mask = rdev->qpmask;
 	}
 	return 0;
 err4:
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 871cdcac7be2..7f2a6c244d25 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -539,6 +539,7 @@ struct t4_cq {
 	size_t memsize;
 	__be64 bits_type_ts;
 	u32 cqid;
+	u32 qid_mask;
 	int vector;
 	u16 size; /* including status page */
 	u16 cidx;
@@ -563,12 +564,12 @@ static inline int t4_arm_cq(struct t4_cq *cq, int se)
 	set_bit(CQ_ARMED, &cq->flags);
 	while (cq->cidx_inc > CIDXINC_M) {
 		val = SEINTARM_V(0) | CIDXINC_V(CIDXINC_M) | TIMERREG_V(7) |
-		      INGRESSQID_V(cq->cqid);
+		      INGRESSQID_V(cq->cqid & cq->qid_mask);
 		writel(val, cq->gts);
 		cq->cidx_inc -= CIDXINC_M;
 	}
 	val = SEINTARM_V(se) | CIDXINC_V(cq->cidx_inc) | TIMERREG_V(6) |
-	      INGRESSQID_V(cq->cqid);
+	      INGRESSQID_V(cq->cqid & cq->qid_mask);
 	writel(val, cq->gts);
 	cq->cidx_inc = 0;
 	return 0;
@@ -601,7 +602,7 @@ static inline void t4_hwcq_consume(struct t4_cq *cq)
 		u32 val;
 
 		val = SEINTARM_V(0) | CIDXINC_V(cq->cidx_inc) | TIMERREG_V(7) |
-		      INGRESSQID_V(cq->cqid);
+		      INGRESSQID_V(cq->cqid & cq->qid_mask);
 		writel(val, cq->gts);
 		cq->cidx_inc = 0;
 	}

From 4a75a86c8d04390f268d7237cc49fe9a8e36efe7 Mon Sep 17 00:00:00 2001
From: Hariprasad S <hariprasad@chelsio.com>
Date: Wed, 22 Apr 2015 01:45:01 +0530
Subject: [PATCH 048/101] iw_cxgb4: enforce qp/cq id requirements

Currently the iw_cxgb4 implementation requires the qp and cq qid densities
to match as well as the qp and cq id ranges.  So fail a device open if
the device configuration doesn't meet the requirements.

The reason for these restictions has to do with the fact that IQ qid X
has a UGTS register in the same bar2 page as EQ qid X.  Thus both qids
need to be allocated to the same user process for security reasons.
The logic that does this (the qpid allocator in iw_cxgb4/resource.c)
handles this but requires the above restrictions.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/cxgb4/device.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 7ed32537eb59..83209bb38285 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -764,6 +764,29 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
 
 	c4iw_init_dev_ucontext(rdev, &rdev->uctx);
 
+	/*
+	 * This implementation assumes udb_density == ucq_density!  Eventually
+	 * we might need to support this but for now fail the open. Also the
+	 * cqid and qpid range must match for now.
+	 */
+	if (rdev->lldi.udb_density != rdev->lldi.ucq_density) {
+		pr_err(MOD "%s: unsupported udb/ucq densities %u/%u\n",
+		       pci_name(rdev->lldi.pdev), rdev->lldi.udb_density,
+		       rdev->lldi.ucq_density);
+		err = -EINVAL;
+		goto err1;
+	}
+	if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start ||
+	    rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) {
+		pr_err(MOD "%s: unsupported qp and cq id ranges "
+		       "qp start %u size %u cq start %u size %u\n",
+		       pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start,
+		       rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size,
+		       rdev->lldi.vr->cq.size);
+		err = -EINVAL;
+		goto err1;
+	}
+
 	/*
 	 * qpshift is the number of bits to shift the qpid left in order
 	 * to get the correct address of the doorbell for that qp.

From 6eec177461751f0fe191cf9977cde692b9481d0a Mon Sep 17 00:00:00 2001
From: Tatyana Nikolova <Tatyana.E.Nikolova@intel.com>
Date: Tue, 21 Apr 2015 16:28:10 -0400
Subject: [PATCH 049/101] RDMA/core: Enable the iWarp Port Mapper to provide
 the actual address of the connecting peer to its clients

Add functionality to enable the port mapper on the passive side to provide to its
clients the actual (non-mapped) ip/tcp address information of the connecting peer

1) Adding remote_info_cb() to process the address info of the connecting peer
   The address info is provided by the user space port mapper service when
   the connection is initiated by the peer
2) Adding a hash list to store the remote address info
3) Adding functionality to add/remove the remote address info
   After the info has been provided to the port mapper client,
   it is removed from the hash list

Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/iwpm_msg.c  |  73 +++++++++-
 drivers/infiniband/core/iwpm_util.c | 208 +++++++++++++++++++++++-----
 drivers/infiniband/core/iwpm_util.h |  15 ++
 include/rdma/iw_portmap.h           |  25 ++++
 include/uapi/rdma/rdma_netlink.h    |   1 +
 5 files changed, 288 insertions(+), 34 deletions(-)

diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
index b85ddbc979e0..ab081702566f 100644
--- a/drivers/infiniband/core/iwpm_msg.c
+++ b/drivers/infiniband/core/iwpm_msg.c
@@ -468,7 +468,8 @@ int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb)
 }
 EXPORT_SYMBOL(iwpm_add_mapping_cb);
 
-/* netlink attribute policy for the response to add and query mapping request */
+/* netlink attribute policy for the response to add and query mapping request
+ * and response with remote address info */
 static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = {
 	[IWPM_NLA_QUERY_MAPPING_SEQ]      = { .type = NLA_U32 },
 	[IWPM_NLA_QUERY_LOCAL_ADDR]       = { .len = sizeof(struct sockaddr_storage) },
@@ -559,6 +560,76 @@ int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
 }
 EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb);
 
+/*
+ * iwpm_remote_info_cb - Process a port mapper message, containing
+ *			  the remote connecting peer address info
+ */
+int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nlattr *nltb[IWPM_NLA_RQUERY_MAPPING_MAX];
+	struct sockaddr_storage *local_sockaddr, *remote_sockaddr;
+	struct sockaddr_storage *mapped_loc_sockaddr, *mapped_rem_sockaddr;
+	struct iwpm_remote_info *rem_info;
+	const char *msg_type;
+	u8 nl_client;
+	int ret = -EINVAL;
+
+	msg_type = "Remote Mapping info";
+	if (iwpm_parse_nlmsg(cb, IWPM_NLA_RQUERY_MAPPING_MAX,
+				resp_query_policy, nltb, msg_type))
+		return ret;
+
+	nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
+	if (!iwpm_valid_client(nl_client)) {
+		pr_info("%s: Invalid port mapper client = %d\n",
+				__func__, nl_client);
+		return ret;
+	}
+	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+
+	local_sockaddr = (struct sockaddr_storage *)
+			nla_data(nltb[IWPM_NLA_QUERY_LOCAL_ADDR]);
+	remote_sockaddr = (struct sockaddr_storage *)
+			nla_data(nltb[IWPM_NLA_QUERY_REMOTE_ADDR]);
+	mapped_loc_sockaddr = (struct sockaddr_storage *)
+			nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR]);
+	mapped_rem_sockaddr = (struct sockaddr_storage *)
+			nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_REM_ADDR]);
+
+	if (mapped_loc_sockaddr->ss_family != local_sockaddr->ss_family ||
+		mapped_rem_sockaddr->ss_family != remote_sockaddr->ss_family) {
+		pr_info("%s: Sockaddr family doesn't match the requested one\n",
+				__func__);
+		return ret;
+	}
+	rem_info = kzalloc(sizeof(struct iwpm_remote_info), GFP_ATOMIC);
+	if (!rem_info) {
+		pr_err("%s: Unable to allocate a remote info\n", __func__);
+		ret = -ENOMEM;
+		return ret;
+	}
+	memcpy(&rem_info->mapped_loc_sockaddr, mapped_loc_sockaddr,
+	       sizeof(struct sockaddr_storage));
+	memcpy(&rem_info->remote_sockaddr, remote_sockaddr,
+	       sizeof(struct sockaddr_storage));
+	memcpy(&rem_info->mapped_rem_sockaddr, mapped_rem_sockaddr,
+	       sizeof(struct sockaddr_storage));
+	rem_info->nl_client = nl_client;
+
+	iwpm_add_remote_info(rem_info);
+
+	iwpm_print_sockaddr(local_sockaddr,
+			"remote_info: Local sockaddr:");
+	iwpm_print_sockaddr(mapped_loc_sockaddr,
+			"remote_info: Mapped local sockaddr:");
+	iwpm_print_sockaddr(remote_sockaddr,
+			"remote_info: Remote sockaddr:");
+	iwpm_print_sockaddr(mapped_rem_sockaddr,
+			"remote_info: Mapped remote sockaddr:");
+	return ret;
+}
+EXPORT_SYMBOL(iwpm_remote_info_cb);
+
 /* netlink attribute policy for the received request for mapping info */
 static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = {
 	[IWPM_NLA_MAPINFO_ULIB_NAME] = { .type = NLA_STRING,
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index 69e9f84c1605..a626795bf9c7 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -33,8 +33,10 @@
 
 #include "iwpm_util.h"
 
-#define IWPM_HASH_BUCKET_SIZE	512
-#define IWPM_HASH_BUCKET_MASK	(IWPM_HASH_BUCKET_SIZE - 1)
+#define IWPM_MAPINFO_HASH_SIZE	512
+#define IWPM_MAPINFO_HASH_MASK	(IWPM_MAPINFO_HASH_SIZE - 1)
+#define IWPM_REMINFO_HASH_SIZE	64
+#define IWPM_REMINFO_HASH_MASK	(IWPM_REMINFO_HASH_SIZE - 1)
 
 static LIST_HEAD(iwpm_nlmsg_req_list);
 static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock);
@@ -42,31 +44,49 @@ static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock);
 static struct hlist_head *iwpm_hash_bucket;
 static DEFINE_SPINLOCK(iwpm_mapinfo_lock);
 
+static struct hlist_head *iwpm_reminfo_bucket;
+static DEFINE_SPINLOCK(iwpm_reminfo_lock);
+
 static DEFINE_MUTEX(iwpm_admin_lock);
 static struct iwpm_admin_data iwpm_admin;
 
 int iwpm_init(u8 nl_client)
 {
+	int ret = 0;
 	if (iwpm_valid_client(nl_client))
 		return -EINVAL;
 	mutex_lock(&iwpm_admin_lock);
 	if (atomic_read(&iwpm_admin.refcount) == 0) {
-		iwpm_hash_bucket = kzalloc(IWPM_HASH_BUCKET_SIZE *
+		iwpm_hash_bucket = kzalloc(IWPM_MAPINFO_HASH_SIZE *
 					sizeof(struct hlist_head), GFP_KERNEL);
 		if (!iwpm_hash_bucket) {
-			mutex_unlock(&iwpm_admin_lock);
+			ret = -ENOMEM;
 			pr_err("%s Unable to create mapinfo hash table\n", __func__);
-			return -ENOMEM;
+			goto init_exit;
+		}
+		iwpm_reminfo_bucket = kzalloc(IWPM_REMINFO_HASH_SIZE *
+					sizeof(struct hlist_head), GFP_KERNEL);
+		if (!iwpm_reminfo_bucket) {
+			kfree(iwpm_hash_bucket);
+			ret = -ENOMEM;
+			pr_err("%s Unable to create reminfo hash table\n", __func__);
+			goto init_exit;
 		}
 	}
 	atomic_inc(&iwpm_admin.refcount);
+init_exit:
 	mutex_unlock(&iwpm_admin_lock);
-	iwpm_set_valid(nl_client, 1);
-	return 0;
+	if (!ret) {
+		iwpm_set_valid(nl_client, 1);
+		pr_debug("%s: Mapinfo and reminfo tables are created\n",
+				__func__);
+	}
+	return ret;
 }
 EXPORT_SYMBOL(iwpm_init);
 
 static void free_hash_bucket(void);
+static void free_reminfo_bucket(void);
 
 int iwpm_exit(u8 nl_client)
 {
@@ -81,7 +101,8 @@ int iwpm_exit(u8 nl_client)
 	}
 	if (atomic_dec_and_test(&iwpm_admin.refcount)) {
 		free_hash_bucket();
-		pr_debug("%s: Mapinfo hash table is destroyed\n", __func__);
+		free_reminfo_bucket();
+		pr_debug("%s: Resources are destroyed\n", __func__);
 	}
 	mutex_unlock(&iwpm_admin_lock);
 	iwpm_set_valid(nl_client, 0);
@@ -89,7 +110,7 @@ int iwpm_exit(u8 nl_client)
 }
 EXPORT_SYMBOL(iwpm_exit);
 
-static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage *,
+static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage *,
 					       struct sockaddr_storage *);
 
 int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
@@ -99,9 +120,10 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
 	struct hlist_head *hash_bucket_head;
 	struct iwpm_mapping_info *map_info;
 	unsigned long flags;
+	int ret = -EINVAL;
 
 	if (!iwpm_valid_client(nl_client))
-		return -EINVAL;
+		return ret;
 	map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL);
 	if (!map_info) {
 		pr_err("%s: Unable to allocate a mapping info\n", __func__);
@@ -115,13 +137,16 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
 
 	spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
 	if (iwpm_hash_bucket) {
-		hash_bucket_head = get_hash_bucket_head(
+		hash_bucket_head = get_mapinfo_hash_bucket(
 					&map_info->local_sockaddr,
 					&map_info->mapped_sockaddr);
-		hlist_add_head(&map_info->hlist_node, hash_bucket_head);
+		if (hash_bucket_head) {
+			hlist_add_head(&map_info->hlist_node, hash_bucket_head);
+			ret = 0;
+		}
 	}
 	spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL(iwpm_create_mapinfo);
 
@@ -136,9 +161,12 @@ int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr,
 
 	spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
 	if (iwpm_hash_bucket) {
-		hash_bucket_head = get_hash_bucket_head(
+		hash_bucket_head = get_mapinfo_hash_bucket(
 					local_sockaddr,
 					mapped_local_addr);
+		if (!hash_bucket_head)
+			goto remove_mapinfo_exit;
+
 		hlist_for_each_entry_safe(map_info, tmp_hlist_node,
 					hash_bucket_head, hlist_node) {
 
@@ -152,6 +180,7 @@ int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr,
 			}
 		}
 	}
+remove_mapinfo_exit:
 	spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
 	return ret;
 }
@@ -166,7 +195,7 @@ static void free_hash_bucket(void)
 
 	/* remove all the mapinfo data from the list */
 	spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
-	for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) {
+	for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) {
 		hlist_for_each_entry_safe(map_info, tmp_hlist_node,
 			&iwpm_hash_bucket[i], hlist_node) {
 
@@ -180,6 +209,96 @@ static void free_hash_bucket(void)
 	spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
 }
 
+static void free_reminfo_bucket(void)
+{
+	struct hlist_node *tmp_hlist_node;
+	struct iwpm_remote_info *rem_info;
+	unsigned long flags;
+	int i;
+
+	/* remove all the remote info from the list */
+	spin_lock_irqsave(&iwpm_reminfo_lock, flags);
+	for (i = 0; i < IWPM_REMINFO_HASH_SIZE; i++) {
+		hlist_for_each_entry_safe(rem_info, tmp_hlist_node,
+			&iwpm_reminfo_bucket[i], hlist_node) {
+
+				hlist_del_init(&rem_info->hlist_node);
+				kfree(rem_info);
+			}
+	}
+	/* free the hash list */
+	kfree(iwpm_reminfo_bucket);
+	iwpm_reminfo_bucket = NULL;
+	spin_unlock_irqrestore(&iwpm_reminfo_lock, flags);
+}
+
+static struct hlist_head *get_reminfo_hash_bucket(struct sockaddr_storage *,
+						struct sockaddr_storage *);
+
+void iwpm_add_remote_info(struct iwpm_remote_info *rem_info)
+{
+	struct hlist_head *hash_bucket_head;
+	unsigned long flags;
+
+	spin_lock_irqsave(&iwpm_reminfo_lock, flags);
+	if (iwpm_reminfo_bucket) {
+		hash_bucket_head = get_reminfo_hash_bucket(
+					&rem_info->mapped_loc_sockaddr,
+					&rem_info->mapped_rem_sockaddr);
+		if (hash_bucket_head)
+			hlist_add_head(&rem_info->hlist_node, hash_bucket_head);
+	}
+	spin_unlock_irqrestore(&iwpm_reminfo_lock, flags);
+}
+
+int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr,
+				struct sockaddr_storage *mapped_rem_addr,
+				struct sockaddr_storage *remote_addr,
+				u8 nl_client)
+{
+	struct hlist_node *tmp_hlist_node;
+	struct hlist_head *hash_bucket_head;
+	struct iwpm_remote_info *rem_info = NULL;
+	unsigned long flags;
+	int ret = -EINVAL;
+
+	if (!iwpm_valid_client(nl_client)) {
+		pr_info("%s: Invalid client = %d\n", __func__, nl_client);
+		return ret;
+	}
+	spin_lock_irqsave(&iwpm_reminfo_lock, flags);
+	if (iwpm_reminfo_bucket) {
+		hash_bucket_head = get_reminfo_hash_bucket(
+					mapped_loc_addr,
+					mapped_rem_addr);
+		if (!hash_bucket_head)
+			goto get_remote_info_exit;
+		hlist_for_each_entry_safe(rem_info, tmp_hlist_node,
+					hash_bucket_head, hlist_node) {
+
+			if (!iwpm_compare_sockaddr(&rem_info->mapped_loc_sockaddr,
+				mapped_loc_addr) &&
+				!iwpm_compare_sockaddr(&rem_info->mapped_rem_sockaddr,
+				mapped_rem_addr)) {
+
+				memcpy(remote_addr, &rem_info->remote_sockaddr,
+					sizeof(struct sockaddr_storage));
+				iwpm_print_sockaddr(remote_addr,
+						"get_remote_info: Remote sockaddr:");
+
+				hlist_del_init(&rem_info->hlist_node);
+				kfree(rem_info);
+				ret = 0;
+				break;
+			}
+		}
+	}
+get_remote_info_exit:
+	spin_unlock_irqrestore(&iwpm_reminfo_lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(iwpm_get_remote_info);
+
 struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq,
 					u8 nl_client, gfp_t gfp)
 {
@@ -409,31 +528,54 @@ static u32 iwpm_ipv4_jhash(struct sockaddr_in *ipv4_sockaddr)
 	return hash;
 }
 
-static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage
-					       *local_sockaddr,
-					       struct sockaddr_storage
-					       *mapped_sockaddr)
+static int get_hash_bucket(struct sockaddr_storage *a_sockaddr,
+				struct sockaddr_storage *b_sockaddr, u32 *hash)
 {
-	u32 local_hash, mapped_hash, hash;
+	u32 a_hash, b_hash;
 
-	if (local_sockaddr->ss_family == AF_INET) {
-		local_hash = iwpm_ipv4_jhash((struct sockaddr_in *) local_sockaddr);
-		mapped_hash = iwpm_ipv4_jhash((struct sockaddr_in *) mapped_sockaddr);
+	if (a_sockaddr->ss_family == AF_INET) {
+		a_hash = iwpm_ipv4_jhash((struct sockaddr_in *) a_sockaddr);
+		b_hash = iwpm_ipv4_jhash((struct sockaddr_in *) b_sockaddr);
 
-	} else if (local_sockaddr->ss_family == AF_INET6) {
-		local_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) local_sockaddr);
-		mapped_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) mapped_sockaddr);
+	} else if (a_sockaddr->ss_family == AF_INET6) {
+		a_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) a_sockaddr);
+		b_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) b_sockaddr);
 	} else {
 		pr_err("%s: Invalid sockaddr family\n", __func__);
-		return NULL;
+		return -EINVAL;
 	}
 
-	if (local_hash == mapped_hash) /* if port mapper isn't available */
-		hash = local_hash;
+	if (a_hash == b_hash) /* if port mapper isn't available */
+		*hash = a_hash;
 	else
-		hash = jhash_2words(local_hash, mapped_hash, 0);
+		*hash = jhash_2words(a_hash, b_hash, 0);
+	return 0;
+}
 
-	return &iwpm_hash_bucket[hash & IWPM_HASH_BUCKET_MASK];
+static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage
+				*local_sockaddr, struct sockaddr_storage
+				*mapped_sockaddr)
+{
+	u32 hash;
+	int ret;
+
+	ret = get_hash_bucket(local_sockaddr, mapped_sockaddr, &hash);
+	if (ret)
+		return NULL;
+	return &iwpm_hash_bucket[hash & IWPM_MAPINFO_HASH_MASK];
+}
+
+static struct hlist_head *get_reminfo_hash_bucket(struct sockaddr_storage
+				*mapped_loc_sockaddr, struct sockaddr_storage
+				*mapped_rem_sockaddr)
+{
+	u32 hash;
+	int ret;
+
+	ret = get_hash_bucket(mapped_loc_sockaddr, mapped_rem_sockaddr, &hash);
+	if (ret)
+		return NULL;
+	return &iwpm_reminfo_bucket[hash & IWPM_REMINFO_HASH_MASK];
 }
 
 static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid)
@@ -512,7 +654,7 @@ int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid)
 	}
 	skb_num++;
 	spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
-	for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) {
+	for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) {
 		hlist_for_each_entry(map_info, &iwpm_hash_bucket[i],
 				     hlist_node) {
 			if (map_info->nl_client != nl_client)
@@ -595,7 +737,7 @@ int iwpm_mapinfo_available(void)
 
 	spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
 	if (iwpm_hash_bucket) {
-		for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) {
+		for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) {
 			if (!hlist_empty(&iwpm_hash_bucket[i])) {
 				full_bucket = 1;
 				break;
diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h
index 9777c869a140..ee2d9ff095be 100644
--- a/drivers/infiniband/core/iwpm_util.h
+++ b/drivers/infiniband/core/iwpm_util.h
@@ -76,6 +76,14 @@ struct iwpm_mapping_info {
 	u8     nl_client;
 };
 
+struct iwpm_remote_info {
+	struct hlist_node hlist_node;
+	struct sockaddr_storage remote_sockaddr;
+	struct sockaddr_storage mapped_loc_sockaddr;
+	struct sockaddr_storage mapped_rem_sockaddr;
+	u8     nl_client;
+};
+
 struct iwpm_admin_data {
 	atomic_t refcount;
 	atomic_t nlmsg_seq;
@@ -127,6 +135,13 @@ int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request);
  */
 int iwpm_get_nlmsg_seq(void);
 
+/**
+ * iwpm_add_reminfo - Add remote address info of the connecting peer
+ *                    to the remote info hash table
+ * @reminfo: The remote info to be added
+ */
+void iwpm_add_remote_info(struct iwpm_remote_info *reminfo);
+
 /**
  * iwpm_valid_client - Check if the port mapper client is valid
  * @nl_client: The index of the netlink client
diff --git a/include/rdma/iw_portmap.h b/include/rdma/iw_portmap.h
index 928b2775e992..fda31673a562 100644
--- a/include/rdma/iw_portmap.h
+++ b/include/rdma/iw_portmap.h
@@ -147,6 +147,16 @@ int iwpm_add_mapping_cb(struct sk_buff *, struct netlink_callback *);
  */
 int iwpm_add_and_query_mapping_cb(struct sk_buff *, struct netlink_callback *);
 
+/**
+ * iwpm_remote_info_cb - Process remote connecting peer address info, which
+ *                       the port mapper has received from the connecting peer
+ *
+ * @cb: Contains the received message (payload and netlink header)
+ *
+ * Stores the IPv4/IPv6 address info in a hash table
+ */
+int iwpm_remote_info_cb(struct sk_buff *, struct netlink_callback *);
+
 /**
  * iwpm_mapping_error_cb - Process port mapper notification for error
  *
@@ -174,6 +184,21 @@ int iwpm_mapping_info_cb(struct sk_buff *, struct netlink_callback *);
  */
 int iwpm_ack_mapping_info_cb(struct sk_buff *, struct netlink_callback *);
 
+/**
+ * iwpm_get_remote_info - Get the remote connecting peer address info
+ *
+ * @mapped_loc_addr: Mapped local address of the listening peer
+ * @mapped_rem_addr: Mapped remote address of the connecting peer
+ * @remote_addr: To store the remote address of the connecting peer
+ * @nl_client: The index of the netlink client
+ *
+ * The remote address info is retrieved and provided to the client in
+ * the remote_addr. After that it is removed from the hash table
+ */
+int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr,
+			struct sockaddr_storage *mapped_rem_addr,
+			struct sockaddr_storage *remote_addr, u8 nl_client);
+
 /**
  * iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address
  *                       info in a hash table
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index de69170a30ce..6e4bb4270ca2 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -37,6 +37,7 @@ enum {
 	RDMA_NL_IWPM_ADD_MAPPING,
 	RDMA_NL_IWPM_QUERY_MAPPING,
 	RDMA_NL_IWPM_REMOVE_MAPPING,
+	RDMA_NL_IWPM_REMOTE_INFO,
 	RDMA_NL_IWPM_HANDLE_ERR,
 	RDMA_NL_IWPM_MAPINFO,
 	RDMA_NL_IWPM_MAPINFO_NUM,

From 230da36ae919e690dbcc44d1be8a2154214c6e36 Mon Sep 17 00:00:00 2001
From: Tatyana Nikolova <Tatyana.E.Nikolova@intel.com>
Date: Tue, 21 Apr 2015 16:28:25 -0400
Subject: [PATCH 050/101] RDMA/nes: Report the actual address of the remote
 connecting peer

Get the actual (non-mapped) ip/tcp address of the connecting peer from
the port mapper and report the address info to the user space application
at the time of connection establishment

Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/nes/nes.c    |  1 +
 drivers/infiniband/hw/nes/nes_cm.c | 63 ++++++++++++++++++++++--------
 2 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index 3b2a6dc8ea99..9f9d5c563a61 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -116,6 +116,7 @@ static struct ibnl_client_cbs nes_nl_cb_table[] = {
 	[RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
 	[RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
 	[RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
+	[RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb},
 	[RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
 	[RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
 	[RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 6f09a72e78d7..72b43417cbe3 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -596,27 +596,52 @@ static void nes_form_reg_msg(struct nes_vnic *nesvnic,
 	memcpy(pm_msg->if_name, nesvnic->netdev->name, IWPM_IFNAME_SIZE);
 }
 
+static void record_sockaddr_info(struct sockaddr_storage *addr_info,
+					nes_addr_t *ip_addr, u16 *port_num)
+{
+	struct sockaddr_in *in_addr = (struct sockaddr_in *)addr_info;
+
+	if (in_addr->sin_family == AF_INET) {
+		*ip_addr = ntohl(in_addr->sin_addr.s_addr);
+		*port_num = ntohs(in_addr->sin_port);
+	}
+}
+
 /*
  * nes_record_pm_msg - Save the received mapping info
  */
 static void nes_record_pm_msg(struct nes_cm_info *cm_info,
 			struct iwpm_sa_data *pm_msg)
 {
-	struct sockaddr_in *mapped_loc_addr =
-			(struct sockaddr_in *)&pm_msg->mapped_loc_addr;
-	struct sockaddr_in *mapped_rem_addr =
-			(struct sockaddr_in *)&pm_msg->mapped_rem_addr;
+	record_sockaddr_info(&pm_msg->mapped_loc_addr,
+		&cm_info->mapped_loc_addr, &cm_info->mapped_loc_port);
 
-	if (mapped_loc_addr->sin_family == AF_INET) {
-		cm_info->mapped_loc_addr =
-			ntohl(mapped_loc_addr->sin_addr.s_addr);
-		cm_info->mapped_loc_port = ntohs(mapped_loc_addr->sin_port);
-	}
-	if (mapped_rem_addr->sin_family == AF_INET) {
-		cm_info->mapped_rem_addr =
-			ntohl(mapped_rem_addr->sin_addr.s_addr);
-		cm_info->mapped_rem_port = ntohs(mapped_rem_addr->sin_port);
-	}
+	record_sockaddr_info(&pm_msg->mapped_rem_addr,
+		&cm_info->mapped_rem_addr, &cm_info->mapped_rem_port);
+}
+
+/*
+ * nes_get_reminfo - Get the address info of the remote connecting peer
+ */
+static int nes_get_remote_addr(struct nes_cm_node *cm_node)
+{
+	struct sockaddr_storage mapped_loc_addr, mapped_rem_addr;
+	struct sockaddr_storage remote_addr;
+	int ret;
+
+	nes_create_sockaddr(htonl(cm_node->mapped_loc_addr),
+			htons(cm_node->mapped_loc_port), &mapped_loc_addr);
+	nes_create_sockaddr(htonl(cm_node->mapped_rem_addr),
+			htons(cm_node->mapped_rem_port), &mapped_rem_addr);
+
+	ret = iwpm_get_remote_info(&mapped_loc_addr, &mapped_rem_addr,
+				&remote_addr, RDMA_NL_NES);
+	if (ret)
+		nes_debug(NES_DBG_CM, "Unable to find remote peer address info\n");
+	else
+		record_sockaddr_info(&remote_addr, &cm_node->rem_addr,
+				&cm_node->rem_port);
+	return ret;
 }
 
 /**
@@ -1566,9 +1591,14 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
 		return NULL;
 
 	/* set our node specific transport info */
-	cm_node->loc_addr = cm_info->loc_addr;
+	if (listener) {
+		cm_node->loc_addr = listener->loc_addr;
+		cm_node->loc_port = listener->loc_port;
+	} else {
+		cm_node->loc_addr = cm_info->loc_addr;
+		cm_node->loc_port = cm_info->loc_port;
+	}
 	cm_node->rem_addr = cm_info->rem_addr;
-	cm_node->loc_port = cm_info->loc_port;
 	cm_node->rem_port = cm_info->rem_port;
 
 	cm_node->mapped_loc_addr = cm_info->mapped_loc_addr;
@@ -2151,6 +2181,7 @@ static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
 		cm_node->state = NES_CM_STATE_ESTABLISHED;
 		if (datasize) {
 			cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+			nes_get_remote_addr(cm_node);
 			handle_rcv_mpa(cm_node, skb);
 		} else { /* rcvd ACK only */
 			dev_kfree_skb_any(skb);

From 5b6b8fe64053b2649660ded2f3c5be25ebddbfdb Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Tue, 21 Apr 2015 16:28:41 -0400
Subject: [PATCH 051/101] RDMA/cxgb4: Report the actual address of the remote
 connecting peer

Get the actual (non-mapped) ip/tcp address of the connecting peer from
the port mapper

Also setup the passive side endpoint to correctly display the actual
and mapped addresses for the new connection.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/cxgb4/cm.c     | 54 +++++++++++++++++++++++++---
 drivers/infiniband/hw/cxgb4/device.c |  1 +
 2 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 6fb31bacd5b4..3c3b00e4e7af 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -583,6 +583,22 @@ static void c4iw_record_pm_msg(struct c4iw_ep *ep,
 		sizeof(ep->com.mapped_remote_addr));
 }
 
+static int get_remote_addr(struct c4iw_ep *ep)
+{
+	int ret;
+
+	print_addr(&ep->com, __func__, "get_remote_addr");
+
+	ret = iwpm_get_remote_info(&ep->com.mapped_local_addr,
+				   &ep->com.mapped_remote_addr,
+				   &ep->com.remote_addr, RDMA_NL_C4IW);
+	if (ret)
+		pr_info(MOD "Unable to find remote peer addr info - err %d\n",
+			ret);
+
+	return ret;
+}
+
 static void best_mtu(const unsigned short *mtus, unsigned short mtu,
 		     unsigned int *idx, int use_ts, int ipv6)
 {
@@ -2352,27 +2368,57 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
 	state_set(&child_ep->com, CONNECTING);
 	child_ep->com.dev = dev;
 	child_ep->com.cm_id = NULL;
+
+	/*
+	 * The mapped_local and mapped_remote addresses get setup with
+	 * the actual 4-tuple.  The local address will be based on the
+	 * actual local address of the connection, but on the port number
+	 * of the parent listening endpoint.  The remote address is
+	 * setup based on a query to the IWPM since we don't know what it
+	 * originally was before mapping.  If no mapping was done, then
+	 * mapped_remote == remote, and mapped_local == local.
+	 */
 	if (iptype == 4) {
 		struct sockaddr_in *sin = (struct sockaddr_in *)
-			&child_ep->com.local_addr;
+			&child_ep->com.mapped_local_addr;
+
 		sin->sin_family = PF_INET;
 		sin->sin_port = local_port;
 		sin->sin_addr.s_addr = *(__be32 *)local_ip;
-		sin = (struct sockaddr_in *)&child_ep->com.remote_addr;
+
+		sin = (struct sockaddr_in *)&child_ep->com.local_addr;
+		sin->sin_family = PF_INET;
+		sin->sin_port = ((struct sockaddr_in *)
+				 &parent_ep->com.local_addr)->sin_port;
+		sin->sin_addr.s_addr = *(__be32 *)local_ip;
+
+		sin = (struct sockaddr_in *)&child_ep->com.mapped_remote_addr;
 		sin->sin_family = PF_INET;
 		sin->sin_port = peer_port;
 		sin->sin_addr.s_addr = *(__be32 *)peer_ip;
 	} else {
 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
-			&child_ep->com.local_addr;
+			&child_ep->com.mapped_local_addr;
+
 		sin6->sin6_family = PF_INET6;
 		sin6->sin6_port = local_port;
 		memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
-		sin6 = (struct sockaddr_in6 *)&child_ep->com.remote_addr;
+
+		sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
+		sin6->sin6_family = PF_INET6;
+		sin6->sin6_port = ((struct sockaddr_in6 *)
+				   &parent_ep->com.local_addr)->sin6_port;
+		memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
+
+		sin6 = (struct sockaddr_in6 *)&child_ep->com.mapped_remote_addr;
 		sin6->sin6_family = PF_INET6;
 		sin6->sin6_port = peer_port;
 		memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16);
 	}
+	memcpy(&child_ep->com.remote_addr, &child_ep->com.mapped_remote_addr,
+	       sizeof(child_ep->com.remote_addr));
+	get_remote_addr(child_ep);
+
 	c4iw_get_ep(&parent_ep->com);
 	child_ep->parent_ep = parent_ep;
 	child_ep->tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 83209bb38285..1ffbd038c0ae 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -93,6 +93,7 @@ static struct ibnl_client_cbs c4iw_nl_cb_table[] = {
 	[RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
 	[RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
 	[RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
+	[RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb},
 	[RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
 	[RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
 };

From c1d383b5785b1e0fb5fb862864712a7208219e6a Mon Sep 17 00:00:00 2001
From: Guy Shapiro <guysh@mellanox.com>
Date: Wed, 15 Apr 2015 18:17:56 +0300
Subject: [PATCH 052/101] IB/core: dma map/unmap locking optimizations

Currently, while mapping or unmapping pages for ODP, the umem mutex is locked
and unlocked once for each page. Such lock/unlock operation take few tens to
hundreds of nsecs. This makes a significant impact when mapping or unmapping few
MBs of memory.

To avoid this, the mutex should be locked only once per operation, and not per
page.

Signed-off-by: Guy Shapiro <guysh@mellanox.com>
Acked-by: Shachar Raindel <raindel@mellanox.com>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/umem_odp.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 8b8cc6fa0ab0..aba47398880d 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -446,7 +446,6 @@ static int ib_umem_odp_map_dma_single_page(
 	int remove_existing_mapping = 0;
 	int ret = 0;
 
-	mutex_lock(&umem->odp_data->umem_mutex);
 	/*
 	 * Note: we avoid writing if seq is different from the initial seq, to
 	 * handle case of a racing notifier. This check also allows us to bail
@@ -479,8 +478,6 @@ static int ib_umem_odp_map_dma_single_page(
 	}
 
 out:
-	mutex_unlock(&umem->odp_data->umem_mutex);
-
 	/* On Demand Paging - avoid pinning the page */
 	if (umem->context->invalidate_range || !stored_page)
 		put_page(page);
@@ -586,6 +583,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
 
 		bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
 		user_virt += npages << PAGE_SHIFT;
+		mutex_lock(&umem->odp_data->umem_mutex);
 		for (j = 0; j < npages; ++j) {
 			ret = ib_umem_odp_map_dma_single_page(
 				umem, k, base_virt_addr, local_page_list[j],
@@ -594,6 +592,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
 				break;
 			k++;
 		}
+		mutex_unlock(&umem->odp_data->umem_mutex);
 
 		if (ret < 0) {
 			/* Release left over pages when handling errors. */
@@ -633,9 +632,9 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
 	 * faults from completion. We might be racing with other
 	 * invalidations, so we must make sure we free each page only
 	 * once. */
+	mutex_lock(&umem->odp_data->umem_mutex);
 	for (addr = virt; addr < bound; addr += (u64)umem->page_size) {
 		idx = (addr - ib_umem_start(umem)) / PAGE_SIZE;
-		mutex_lock(&umem->odp_data->umem_mutex);
 		if (umem->odp_data->page_list[idx]) {
 			struct page *page = umem->odp_data->page_list[idx];
 			struct page *head_page = compound_head(page);
@@ -663,7 +662,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
 			umem->odp_data->page_list[idx] = NULL;
 			umem->odp_data->dma_list[idx] = 0;
 		}
-		mutex_unlock(&umem->odp_data->umem_mutex);
 	}
+	mutex_unlock(&umem->odp_data->umem_mutex);
 }
 EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);

From 325ad0617adaf163e32dd2d857b90baf65a25b5b Mon Sep 17 00:00:00 2001
From: Guy Shapiro <guysh@mellanox.com>
Date: Wed, 15 Apr 2015 18:17:57 +0300
Subject: [PATCH 053/101] IB/core: dma unmap optimizations

While unmapping an ODP writable page, the dirty bit of the page is set. In
order to do so, the head of the compound page is found.
Currently, the compound head is found even on non-writable pages, where it is
never used, leading to unnecessary cpu barrier that impacts performance.

This patch moves the search for the compound head to be done only when needed.

Signed-off-by: Guy Shapiro <guysh@mellanox.com>
Acked-by: Shachar Raindel <raindel@mellanox.com>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/umem_odp.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index aba47398880d..40becdb3196e 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -637,7 +637,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
 		idx = (addr - ib_umem_start(umem)) / PAGE_SIZE;
 		if (umem->odp_data->page_list[idx]) {
 			struct page *page = umem->odp_data->page_list[idx];
-			struct page *head_page = compound_head(page);
 			dma_addr_t dma = umem->odp_data->dma_list[idx];
 			dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK;
 
@@ -645,7 +644,8 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
 
 			ib_dma_unmap_page(dev, dma_addr, PAGE_SIZE,
 					  DMA_BIDIRECTIONAL);
-			if (dma & ODP_WRITE_ALLOWED_BIT)
+			if (dma & ODP_WRITE_ALLOWED_BIT) {
+				struct page *head_page = compound_head(page);
 				/*
 				 * set_page_dirty prefers being called with
 				 * the page lock. However, MMU notifiers are
@@ -656,6 +656,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
 				 * be removed.
 				 */
 				set_page_dirty(head_page);
+			}
 			/* on demand pinning support */
 			if (!umem->context->invalidate_range)
 				put_page(page);

From 87a26e976cb93e26742224bdd39f51f7861aa9b7 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@suse.com>
Date: Tue, 21 Apr 2015 14:50:34 -0700
Subject: [PATCH 054/101] IB/qib: add acounting for MTRR

There is no good reason not to, we eventually delete it as well.

Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Suresh Siddha <sbsiddha@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Juergen Gross <jgross@suse.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Antonino Daplas <adaplas@gmail.com>
Cc: Jean-Christophe Plagniol-Villard <plagnioj@jcrosoft.com>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Cc: Mike Marciniszyn <infinipath@intel.com>
Cc: Roland Dreier <roland@kernel.org>
Cc: Sean Hefty <sean.hefty@intel.com>
Cc: Hal Rosenstock <hal.rosenstock@gmail.com>
Cc: linux-rdma@vger.kernel.org
Cc: linux-fbdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/qib/qib_wc_x86_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/qib/qib_wc_x86_64.c b/drivers/infiniband/hw/qib/qib_wc_x86_64.c
index 81b225f2300a..fe0850ac6883 100644
--- a/drivers/infiniband/hw/qib/qib_wc_x86_64.c
+++ b/drivers/infiniband/hw/qib/qib_wc_x86_64.c
@@ -118,7 +118,7 @@ int qib_enable_wc(struct qib_devdata *dd)
 	if (!ret) {
 		int cookie;
 
-		cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 0);
+		cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 1);
 		if (cookie < 0) {
 			{
 				qib_devinfo(dd->pcidev,

From d4988623cc605131bed8c77f007082c3555c39ee Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@suse.com>
Date: Wed, 22 Apr 2015 11:38:24 -0700
Subject: [PATCH 055/101] IB/qib: use arch_phys_wc_add()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This driver already makes use of ioremap_wc() on PIO buffers,
so convert it to use arch_phys_wc_add().

The qib driver uses a mmap() special case for when PAT is
not used, this behaviour used to be determined with a
module parameter but since we have been asked to just
remove that module parameter this checks for the WC cookie,
if not set we can assume PAT was used. If its set we do
what we used to do for the mmap for when MTRR was enabled.

The removal of the module parameter is OK given that Andy
notes that even if users of module parameter are still around
it will not prevent loading of the module on recent kernels.

Cc: Doug Ledford <dledford@redhat.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Rickard Strandqvist <rickard_strandqvist@spectrumdigital.se>
Cc: Mike Marciniszyn <mike.marciniszyn@intel.com>
Cc: Roland Dreier <roland@purestorage.com>
Cc: Sean Hefty <sean.hefty@intel.com>
Cc: Hal Rosenstock <hal.rosenstock@gmail.com>
Cc: Dennis Dalessandro <dennis.dalessandro@intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Suresh Siddha <sbsiddha@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Juergen Gross <jgross@suse.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Antonino Daplas <adaplas@gmail.com>
Cc: Jean-Christophe Plagniol-Villard <plagnioj@jcrosoft.com>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Stefan Bader <stefan.bader@canonical.com>
Cc: konrad.wilk@oracle.com
Cc: ville.syrjala@linux.intel.com
Cc: david.vrabel@citrix.com
Cc: jbeulich@suse.com
Cc: Roger Pau Monné <roger.pau@citrix.com>
Cc: infinipath@intel.com
Cc: linux-rdma@vger.kernel.org
Cc: linux-fbdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: xen-devel@lists.xensource.com
Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/qib/qib.h           |  1 -
 drivers/infiniband/hw/qib/qib_file_ops.c  |  3 +-
 drivers/infiniband/hw/qib/qib_iba6120.c   |  8 ++---
 drivers/infiniband/hw/qib/qib_iba7220.c   |  8 ++---
 drivers/infiniband/hw/qib/qib_iba7322.c   | 41 +++++++++++------------
 drivers/infiniband/hw/qib/qib_init.c      | 26 ++++----------
 drivers/infiniband/hw/qib/qib_wc_x86_64.c | 31 +++--------------
 7 files changed, 39 insertions(+), 79 deletions(-)

diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index ffd48bfc4923..ba5173e24973 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -1136,7 +1136,6 @@ extern struct qib_devdata *qib_lookup(int unit);
 extern u32 qib_cpulist_count;
 extern unsigned long *qib_cpulist;
 
-extern unsigned qib_wc_pat;
 extern unsigned qib_cc_table_size;
 int qib_init(struct qib_devdata *, int);
 int init_chip_wc_pat(struct qib_devdata *dd, u32);
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 9ea6c440a00c..725881890c4a 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -835,7 +835,8 @@ static int mmap_piobufs(struct vm_area_struct *vma,
 	vma->vm_flags &= ~VM_MAYREAD;
 	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
 
-	if (qib_wc_pat)
+	/* We used PAT if wc_cookie == 0 */
+	if (!dd->wc_cookie)
 		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 
 	ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT,
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index 0d2ba59af30a..4b927809d1a1 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -3315,11 +3315,9 @@ static int init_6120_variables(struct qib_devdata *dd)
 	qib_6120_config_ctxts(dd);
 	qib_set_ctxtcnt(dd);
 
-	if (qib_wc_pat) {
-		ret = init_chip_wc_pat(dd, 0);
-		if (ret)
-			goto bail;
-	}
+	ret = init_chip_wc_pat(dd, 0);
+	if (ret)
+		goto bail;
 	set_6120_baseaddrs(dd); /* set chip access pointers now */
 
 	ret = 0;
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 22affda8af88..00b2af211157 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -4126,11 +4126,9 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
 	qib_7220_config_ctxts(dd);
 	qib_set_ctxtcnt(dd);  /* needed for PAT setup */
 
-	if (qib_wc_pat) {
-		ret = init_chip_wc_pat(dd, 0);
-		if (ret)
-			goto bail;
-	}
+	ret = init_chip_wc_pat(dd, 0);
+	if (ret)
+		goto bail;
 	set_7220_baseaddrs(dd); /* set chip access pointers now */
 
 	ret = 0;
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index ef97b71c8f7d..f32b4628e991 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -6429,6 +6429,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
 	unsigned features, pidx, sbufcnt;
 	int ret, mtu;
 	u32 sbufs, updthresh;
+	resource_size_t vl15off;
 
 	/* pport structs are contiguous, allocated after devdata */
 	ppd = (struct qib_pportdata *)(dd + 1);
@@ -6677,29 +6678,27 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
 	qib_7322_config_ctxts(dd);
 	qib_set_ctxtcnt(dd);
 
-	if (qib_wc_pat) {
-		resource_size_t vl15off;
-		/*
-		 * We do not set WC on the VL15 buffers to avoid
-		 * a rare problem with unaligned writes from
-		 * interrupt-flushed store buffers, so we need
-		 * to map those separately here.  We can't solve
-		 * this for the rarely used mtrr case.
-		 */
-		ret = init_chip_wc_pat(dd, 0);
-		if (ret)
-			goto bail;
+	/*
+	 * We do not set WC on the VL15 buffers to avoid
+	 * a rare problem with unaligned writes from
+	 * interrupt-flushed store buffers, so we need
+	 * to map those separately here.  We can't solve
+	 * this for the rarely used mtrr case.
+	 */
+	ret = init_chip_wc_pat(dd, 0);
+	if (ret)
+		goto bail;
 
-		/* vl15 buffers start just after the 4k buffers */
-		vl15off = dd->physaddr + (dd->piobufbase >> 32) +
-			dd->piobcnt4k * dd->align4k;
-		dd->piovl15base	= ioremap_nocache(vl15off,
-						  NUM_VL15_BUFS * dd->align4k);
-		if (!dd->piovl15base) {
-			ret = -ENOMEM;
-			goto bail;
-		}
+	/* vl15 buffers start just after the 4k buffers */
+	vl15off = dd->physaddr + (dd->piobufbase >> 32) +
+		  dd->piobcnt4k * dd->align4k;
+	dd->piovl15base	= ioremap_nocache(vl15off,
+					  NUM_VL15_BUFS * dd->align4k);
+	if (!dd->piovl15base) {
+		ret = -ENOMEM;
+		goto bail;
 	}
+
 	qib_7322_set_baseaddrs(dd); /* set chip access pointers now */
 
 	ret = 0;
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 2ee36953e234..7e00470adc30 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -91,15 +91,6 @@ MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port");
 unsigned qib_cc_table_size;
 module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO);
 MODULE_PARM_DESC(cc_table_size, "Congestion control table entries 0 (CCA disabled - default), min = 128, max = 1984");
-/*
- * qib_wc_pat parameter:
- *      0 is WC via MTRR
- *      1 is WC via PAT
- *      If PAT initialization fails, code reverts back to MTRR
- */
-unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */
-module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO);
-MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism");
 
 static void verify_interrupt(unsigned long);
 
@@ -1377,8 +1368,7 @@ static void cleanup_device_data(struct qib_devdata *dd)
 		spin_unlock(&dd->pport[pidx].cc_shadow_lock);
 	}
 
-	if (!qib_wc_pat)
-		qib_disable_wc(dd);
+	qib_disable_wc(dd);
 
 	if (dd->pioavailregs_dma) {
 		dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
@@ -1547,14 +1537,12 @@ static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto bail;
 	}
 
-	if (!qib_wc_pat) {
-		ret = qib_enable_wc(dd);
-		if (ret) {
-			qib_dev_err(dd,
-				"Write combining not enabled (err %d): performance may be poor\n",
-				-ret);
-			ret = 0;
-		}
+	ret = qib_enable_wc(dd);
+	if (ret) {
+		qib_dev_err(dd,
+			"Write combining not enabled (err %d): performance may be poor\n",
+			-ret);
+		ret = 0;
 	}
 
 	qib_verify_pioperf(dd);
diff --git a/drivers/infiniband/hw/qib/qib_wc_x86_64.c b/drivers/infiniband/hw/qib/qib_wc_x86_64.c
index fe0850ac6883..6d61ef98721c 100644
--- a/drivers/infiniband/hw/qib/qib_wc_x86_64.c
+++ b/drivers/infiniband/hw/qib/qib_wc_x86_64.c
@@ -116,21 +116,9 @@ int qib_enable_wc(struct qib_devdata *dd)
 	}
 
 	if (!ret) {
-		int cookie;
-
-		cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 1);
-		if (cookie < 0) {
-			{
-				qib_devinfo(dd->pcidev,
-					 "mtrr_add()  WC for PIO bufs failed (%d)\n",
-					 cookie);
-				ret = -EINVAL;
-			}
-		} else {
-			dd->wc_cookie = cookie;
-			dd->wc_base = (unsigned long) pioaddr;
-			dd->wc_len = (unsigned long) piolen;
-		}
+		dd->wc_cookie = arch_phys_wc_add(pioaddr, piolen);
+		if (dd->wc_cookie < 0)
+			ret = -EINVAL;
 	}
 
 	return ret;
@@ -142,18 +130,7 @@ int qib_enable_wc(struct qib_devdata *dd)
  */
 void qib_disable_wc(struct qib_devdata *dd)
 {
-	if (dd->wc_cookie) {
-		int r;
-
-		r = mtrr_del(dd->wc_cookie, dd->wc_base,
-			     dd->wc_len);
-		if (r < 0)
-			qib_devinfo(dd->pcidev,
-				 "mtrr_del(%lx, %lx, %lx) failed: %d\n",
-				 dd->wc_cookie, dd->wc_base,
-				 dd->wc_len, r);
-		dd->wc_cookie = 0; /* even on failure */
-	}
+	arch_phys_wc_del(dd->wc_cookie);
 }
 
 /**

From d67e199611b986b345ea3087ee2e4a15da1c98b3 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 21 Apr 2015 16:46:28 +0300
Subject: [PATCH 056/101] efi: Fix error handling in
 add_sysfs_runtime_map_entry()

I spotted two (difficult to hit) bugs while reviewing this.

1)  There is a double free bug because we unregister "map_kset" in
    add_sysfs_runtime_map_entry() and also efi_runtime_map_init().
2)  If we fail to allocate "entry" then we should return
    ERR_PTR(-ENOMEM) instead of NULL.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Guangyu Sun <guangyu.sun@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 drivers/firmware/efi/runtime-map.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/firmware/efi/runtime-map.c b/drivers/firmware/efi/runtime-map.c
index 87b8e3b900d2..5c55227a34c8 100644
--- a/drivers/firmware/efi/runtime-map.c
+++ b/drivers/firmware/efi/runtime-map.c
@@ -120,7 +120,8 @@ add_sysfs_runtime_map_entry(struct kobject *kobj, int nr)
 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 	if (!entry) {
 		kset_unregister(map_kset);
-		return entry;
+		map_kset = NULL;
+		return ERR_PTR(-ENOMEM);
 	}
 
 	memcpy(&entry->md, efi_runtime_map + nr * efi_memdesc_size,
@@ -132,6 +133,7 @@ add_sysfs_runtime_map_entry(struct kobject *kobj, int nr)
 	if (ret) {
 		kobject_put(&entry->kobj);
 		kset_unregister(map_kset);
+		map_kset = NULL;
 		return ERR_PTR(ret);
 	}
 
@@ -195,8 +197,6 @@ int __init efi_runtime_map_init(struct kobject *efi_kobj)
 		entry = *(map_entries + j);
 		kobject_put(&entry->kobj);
 	}
-	if (map_kset)
-		kset_unregister(map_kset);
 out:
 	return ret;
 }

From e59d29e88f7b7e3d1231202b0203d0af6f15a440 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Tue, 28 Apr 2015 08:46:09 +0000
Subject: [PATCH 057/101] perf probe: Fix segfault if passed with ''.

Since parse_perf_probe_point() deals with a user passed argument, we
should not assume it to be a valid string.

Without this patch, if pass '' to perf probe, a segfault raises:

 $ perf probe -a ''
 Segmentation fault

This patch checks argument of parse_perf_probe_point() before
string processing.

After this patch:

 $ perf probe -a ''

  usage: perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]
     or: perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]
     ...

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/r/1430210769-94177-1-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index d8bb616ff57c..d05b77cf35f7 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -1084,6 +1084,8 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
 	 *
 	 * TODO:Group name support
 	 */
+	if (!arg)
+		return -EINVAL;
 
 	ptr = strpbrk(arg, ";=@+%");
 	if (ptr && *ptr == '=') {	/* Event name */

From 471e70583217728955436a3fa6e5201e5c8c296a Mon Sep 17 00:00:00 2001
From: Honggang LI <honli@redhat.com>
Date: Wed, 29 Apr 2015 17:40:44 +0800
Subject: [PATCH 058/101] IB/core: change rdma_gid2ip into void function as it
 always return zero

Signed-off-by: Honggang Li <honli@redhat.com>
Acked-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/addr.c | 13 +++----------
 include/rdma/ib_addr.h         |  3 +--
 2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index f80da50d84a5..38339d220d7f 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -472,13 +472,8 @@ int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac,
 	} sgid_addr, dgid_addr;
 
 
-	ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid);
-	if (ret)
-		return ret;
-
-	ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid);
-	if (ret)
-		return ret;
+	rdma_gid2ip(&sgid_addr._sockaddr, sgid);
+	rdma_gid2ip(&dgid_addr._sockaddr, dgid);
 
 	memset(&dev_addr, 0, sizeof(dev_addr));
 
@@ -512,10 +507,8 @@ int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
 		struct sockaddr_in6 _sockaddr_in6;
 	} gid_addr;
 
-	ret = rdma_gid2ip(&gid_addr._sockaddr, sgid);
+	rdma_gid2ip(&gid_addr._sockaddr, sgid);
 
-	if (ret)
-		return ret;
 	memset(&dev_addr, 0, sizeof(dev_addr));
 	ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
 	if (ret)
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index ce55906b54a0..ac54c27a2bfd 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -160,7 +160,7 @@ static inline int rdma_ip2gid(struct sockaddr *addr, union ib_gid *gid)
 }
 
 /* Important - sockaddr should be a union of sockaddr_in and sockaddr_in6 */
-static inline int rdma_gid2ip(struct sockaddr *out, union ib_gid *gid)
+static inline void rdma_gid2ip(struct sockaddr *out, union ib_gid *gid)
 {
 	if (ipv6_addr_v4mapped((struct in6_addr *)gid)) {
 		struct sockaddr_in *out_in = (struct sockaddr_in *)out;
@@ -173,7 +173,6 @@ static inline int rdma_gid2ip(struct sockaddr *out, union ib_gid *gid)
 		out_in->sin6_family = AF_INET6;
 		memcpy(&out_in->sin6_addr.s6_addr, gid->raw, 16);
 	}
-	return 0;
 }
 
 static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr,

From 0d0f738f6a11856a704dcd8fd3a008b200f17625 Mon Sep 17 00:00:00 2001
From: David Ahern <david.ahern@oracle.com>
Date: Sun, 3 May 2015 09:48:26 -0400
Subject: [PATCH 059/101] IB/core: Fix unaligned accesses

Addresses the following kernel logs seen during boot of sparc systems:

Kernel unaligned access at TPC[103bce50] cm_find_listen+0x34/0xf8 [ib_cm]
Kernel unaligned access at TPC[103bce50] cm_find_listen+0x34/0xf8 [ib_cm]
Kernel unaligned access at TPC[103bce50] cm_find_listen+0x34/0xf8 [ib_cm]
Kernel unaligned access at TPC[103bce50] cm_find_listen+0x34/0xf8 [ib_cm]
Kernel unaligned access at TPC[103bce50] cm_find_listen+0x34/0xf8 [ib_cm]

Signed-off-by: David Ahern <david.ahern@oracle.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/cm.c      | 23 +++++++++++------------
 drivers/infiniband/core/cm_msgs.h |  4 ++--
 include/rdma/ib_cm.h              |  7 ++++---
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index e28a494e2a3a..0c1419105ff0 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -437,39 +437,38 @@ static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
 	return cm_id_priv;
 }
 
-static void cm_mask_copy(u8 *dst, u8 *src, u8 *mask)
+static void cm_mask_copy(u32 *dst, const u32 *src, const u32 *mask)
 {
 	int i;
 
-	for (i = 0; i < IB_CM_COMPARE_SIZE / sizeof(unsigned long); i++)
-		((unsigned long *) dst)[i] = ((unsigned long *) src)[i] &
-					     ((unsigned long *) mask)[i];
+	for (i = 0; i < IB_CM_COMPARE_SIZE; i++)
+		dst[i] = src[i] & mask[i];
 }
 
 static int cm_compare_data(struct ib_cm_compare_data *src_data,
 			   struct ib_cm_compare_data *dst_data)
 {
-	u8 src[IB_CM_COMPARE_SIZE];
-	u8 dst[IB_CM_COMPARE_SIZE];
+	u32 src[IB_CM_COMPARE_SIZE];
+	u32 dst[IB_CM_COMPARE_SIZE];
 
 	if (!src_data || !dst_data)
 		return 0;
 
 	cm_mask_copy(src, src_data->data, dst_data->mask);
 	cm_mask_copy(dst, dst_data->data, src_data->mask);
-	return memcmp(src, dst, IB_CM_COMPARE_SIZE);
+	return memcmp(src, dst, sizeof(src));
 }
 
-static int cm_compare_private_data(u8 *private_data,
+static int cm_compare_private_data(u32 *private_data,
 				   struct ib_cm_compare_data *dst_data)
 {
-	u8 src[IB_CM_COMPARE_SIZE];
+	u32 src[IB_CM_COMPARE_SIZE];
 
 	if (!dst_data)
 		return 0;
 
 	cm_mask_copy(src, private_data, dst_data->mask);
-	return memcmp(src, dst_data->data, IB_CM_COMPARE_SIZE);
+	return memcmp(src, dst_data->data, sizeof(src));
 }
 
 /*
@@ -538,7 +537,7 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
 
 static struct cm_id_private * cm_find_listen(struct ib_device *device,
 					     __be64 service_id,
-					     u8 *private_data)
+					     u32 *private_data)
 {
 	struct rb_node *node = cm.listen_service_table.rb_node;
 	struct cm_id_private *cm_id_priv;
@@ -953,7 +952,7 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
 		cm_mask_copy(cm_id_priv->compare_data->data,
 			     compare_data->data, compare_data->mask);
 		memcpy(cm_id_priv->compare_data->mask, compare_data->mask,
-		       IB_CM_COMPARE_SIZE);
+		       sizeof(compare_data->mask));
 	}
 
 	cm_id->state = IB_CM_LISTEN;
diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h
index be068f47e47e..8b76f0ef965e 100644
--- a/drivers/infiniband/core/cm_msgs.h
+++ b/drivers/infiniband/core/cm_msgs.h
@@ -103,7 +103,7 @@ struct cm_req_msg {
 	/* local ACK timeout:5, rsvd:3 */
 	u8 alt_offset139;
 
-	u8 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE];
+	u32 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE / sizeof(u32)];
 
 } __attribute__ ((packed));
 
@@ -801,7 +801,7 @@ struct cm_sidr_req_msg {
 	__be16 rsvd;
 	__be64 service_id;
 
-	u8 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE];
+	u32 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE / sizeof(u32)];
 } __attribute__ ((packed));
 
 struct cm_sidr_rep_msg {
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 0e3ff30647d5..39ed2d2fbd51 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -105,7 +105,8 @@ enum ib_cm_data_size {
 	IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE = 216,
 	IB_CM_SIDR_REP_PRIVATE_DATA_SIZE = 136,
 	IB_CM_SIDR_REP_INFO_LENGTH	 = 72,
-	IB_CM_COMPARE_SIZE		 = 64
+	/* compare done u32 at a time */
+	IB_CM_COMPARE_SIZE		 = (64 / sizeof(u32))
 };
 
 struct ib_cm_id;
@@ -337,8 +338,8 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id);
 #define IB_SDP_SERVICE_ID_MASK	cpu_to_be64(0xFFFFFFFFFFFF0000ULL)
 
 struct ib_cm_compare_data {
-	u8  data[IB_CM_COMPARE_SIZE];
-	u8  mask[IB_CM_COMPARE_SIZE];
+	u32  data[IB_CM_COMPARE_SIZE];
+	u32  mask[IB_CM_COMPARE_SIZE];
 };
 
 /**

From 179d03bbfd2ebc63934753a696467d28bf9f5b64 Mon Sep 17 00:00:00 2001
From: Hariprasad S <hariprasad@chelsio.com>
Date: Tue, 5 May 2015 03:55:24 +0530
Subject: [PATCH 060/101] iw_cxgb4: Remove negative advice dmesg warnings

Remove these log messages in favor of per-endpoint counters as well as
device-global counters that can be inspected via debugfs.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/cxgb4/cm.c       | 27 +++++++++++++++++---------
 drivers/infiniband/hw/cxgb4/device.c   |  7 +++++++
 drivers/infiniband/hw/cxgb4/iw_cxgb4.h |  7 +++++++
 3 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 3c3b00e4e7af..bb95a6c0477b 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -2058,9 +2058,12 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
 	     status, status2errno(status));
 
 	if (is_neg_adv(status)) {
-		dev_warn(&dev->rdev.lldi.pdev->dev,
-			 "Connection problems for atid %u status %u (%s)\n",
-			 atid, status, neg_adv_str(status));
+		PDBG("%s Connection problems for atid %u status %u (%s)\n",
+		     __func__, atid, status, neg_adv_str(status));
+		ep->stats.connect_neg_adv++;
+		mutex_lock(&dev->rdev.stats.lock);
+		dev->rdev.stats.neg_adv++;
+		mutex_unlock(&dev->rdev.stats.lock);
 		return 0;
 	}
 
@@ -2566,9 +2569,13 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
 
 	ep = lookup_tid(t, tid);
 	if (is_neg_adv(req->status)) {
-		dev_warn(&dev->rdev.lldi.pdev->dev,
-			 "Negative advice on abort - tid %u status %d (%s)\n",
-			 ep->hwtid, req->status, neg_adv_str(req->status));
+		PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
+		     __func__, ep->hwtid, req->status,
+		     neg_adv_str(req->status));
+		ep->stats.abort_neg_adv++;
+		mutex_lock(&dev->rdev.stats.lock);
+		dev->rdev.stats.neg_adv++;
+		mutex_unlock(&dev->rdev.stats.lock);
 		return 0;
 	}
 	PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
@@ -3977,9 +3984,11 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
 		return 0;
 	}
 	if (is_neg_adv(req->status)) {
-		dev_warn(&dev->rdev.lldi.pdev->dev,
-			 "Negative advice on abort - tid %u status %d (%s)\n",
-			 ep->hwtid, req->status, neg_adv_str(req->status));
+		PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
+		     __func__, ep->hwtid, req->status,
+		     neg_adv_str(req->status));
+		ep->stats.abort_neg_adv++;
+		dev->rdev.stats.neg_adv++;
 		kfree_skb(skb);
 		return 0;
 	}
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 1ffbd038c0ae..cf54d6922dc4 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -490,6 +490,7 @@ static int stats_show(struct seq_file *seq, void *v)
 		   dev->rdev.stats.act_ofld_conn_fails);
 	seq_printf(seq, "PAS_OFLD_CONN_FAILS: %10llu\n",
 		   dev->rdev.stats.pas_ofld_conn_fails);
+	seq_printf(seq, "NEG_ADV_RCVD: %10llu\n", dev->rdev.stats.neg_adv);
 	seq_printf(seq, "AVAILABLE IRD: %10u\n", dev->avail_ird);
 	return 0;
 }
@@ -561,10 +562,13 @@ static int dump_ep(int id, void *p, void *data)
 		cc = snprintf(epd->buf + epd->pos, space,
 			      "ep %p cm_id %p qp %p state %d flags 0x%lx "
 			      "history 0x%lx hwtid %d atid %d "
+			      "conn_na %u abort_na %u "
 			      "%pI4:%d/%d <-> %pI4:%d/%d\n",
 			      ep, ep->com.cm_id, ep->com.qp,
 			      (int)ep->com.state, ep->com.flags,
 			      ep->com.history, ep->hwtid, ep->atid,
+			      ep->stats.connect_neg_adv,
+			      ep->stats.abort_neg_adv,
 			      &lsin->sin_addr, ntohs(lsin->sin_port),
 			      ntohs(mapped_lsin->sin_port),
 			      &rsin->sin_addr, ntohs(rsin->sin_port),
@@ -582,10 +586,13 @@ static int dump_ep(int id, void *p, void *data)
 		cc = snprintf(epd->buf + epd->pos, space,
 			      "ep %p cm_id %p qp %p state %d flags 0x%lx "
 			      "history 0x%lx hwtid %d atid %d "
+			      "conn_na %u abort_na %u "
 			      "%pI6:%d/%d <-> %pI6:%d/%d\n",
 			      ep, ep->com.cm_id, ep->com.qp,
 			      (int)ep->com.state, ep->com.flags,
 			      ep->com.history, ep->hwtid, ep->atid,
+			      ep->stats.connect_neg_adv,
+			      ep->stats.abort_neg_adv,
 			      &lsin6->sin6_addr, ntohs(lsin6->sin6_port),
 			      ntohs(mapped_lsin6->sin6_port),
 			      &rsin6->sin6_addr, ntohs(rsin6->sin6_port),
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index d87e1650f643..97bb5550a6cf 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -137,6 +137,7 @@ struct c4iw_stats {
 	u64  tcam_full;
 	u64  act_ofld_conn_fails;
 	u64  pas_ofld_conn_fails;
+	u64  neg_adv;
 };
 
 struct c4iw_hw_queue {
@@ -814,6 +815,11 @@ struct c4iw_listen_ep {
 	int backlog;
 };
 
+struct c4iw_ep_stats {
+	unsigned connect_neg_adv;
+	unsigned abort_neg_adv;
+};
+
 struct c4iw_ep {
 	struct c4iw_ep_common com;
 	struct c4iw_ep *parent_ep;
@@ -846,6 +852,7 @@ struct c4iw_ep {
 	unsigned int retry_count;
 	int snd_win;
 	int rcv_win;
+	struct c4iw_ep_stats stats;
 };
 
 static inline void print_addr(struct c4iw_ep_common *epc, const char *func,

From 8f71c1a27b84948720be17fffba71a67a1f0942d Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 5 May 2015 13:01:39 +0200
Subject: [PATCH 061/101] IPoIB/CM: Fix indentation level

See also patch "IPoIB/cm: Add connected mode support for devices
without SRQs" (commit ID 68e995a29572). Detected by smatch.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Pradeep Satyanarayana <pradeeps@linux.vnet.ibm.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_cm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 56959adb6c7d..cf32a778e7d0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -386,8 +386,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
 					   rx->rx_ring[i].mapping,
 					   GFP_KERNEL)) {
 			ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
-				ret = -ENOMEM;
-				goto err_count;
+			ret = -ENOMEM;
+			goto err_count;
 		}
 		ret = ipoib_cm_post_receive_nonsrq(dev, rx, &t->wr, t->sge, i);
 		if (ret) {

From 954138dc25dca0263f315c5a3450059df05a4ea1 Mon Sep 17 00:00:00 2001
From: Yann Droneaud <ydroneaud@opteya.com>
Date: Mon, 4 May 2015 14:31:03 +0200
Subject: [PATCH 062/101] MAINTAINERS: add include/rdma/ to InfiniBand
 subsystem

Most headers for InfiniBand/RDMA are located under
include/rdma/ and include/uapi/rdma.

Signed-off-by: Yann Droneaud <ydroneaud@opteya.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 781e099495d3..c80714ad3114 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5053,6 +5053,8 @@ S:	Supported
 F:	Documentation/infiniband/
 F:	drivers/infiniband/
 F:	include/uapi/linux/if_infiniband.h
+F:	include/uapi/rdma/
+F:	include/rdma/
 
 INOTIFY
 M:	John McCutchan <john@johnmccutchan.com>

From b6b2bbe65b2117afd9766faa7cffea4d8f681455 Mon Sep 17 00:00:00 2001
From: Doug Ledford <dledford@redhat.com>
Date: Tue, 5 May 2015 12:57:09 -0400
Subject: [PATCH 063/101] MAINTAINERS: Update InfiniBand subsystem maintainer

Since Roland stepped down, the community asked me to take his place, and
the nomination was followed by sufficient votes and no dissensions that
we can move forward with the change.

Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index c80714ad3114..40c14f14204d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5042,13 +5042,13 @@ S:	Orphan
 F:	drivers/video/fbdev/imsttfb.c
 
 INFINIBAND SUBSYSTEM
-M:	Roland Dreier <roland@kernel.org>
+M:	Doug Ledford <dledford@redhat.com>
 M:	Sean Hefty <sean.hefty@intel.com>
 M:	Hal Rosenstock <hal.rosenstock@gmail.com>
 L:	linux-rdma@vger.kernel.org
 W:	http://www.openfabrics.org/
 Q:	http://patchwork.kernel.org/project/linux-rdma/list/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git
+T:	git git://github.com/dledford/linux.git
 S:	Supported
 F:	Documentation/infiniband/
 F:	drivers/infiniband/

From 5cec98834989a014a9560b1841649eaca95cf00e Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Wed, 29 Apr 2015 17:10:12 -0400
Subject: [PATCH 064/101] xen/events: Clear cpu_evtchn_mask before resuming

When a guest is resumed, the hypervisor may change event channel
assignments. If this happens and the guest uses 2-level events it
is possible for the interrupt to be claimed by wrong VCPU since
cpu_evtchn_mask bits may be stale. This can happen even though
evtchn_2l_bind_to_cpu() attempts to clear old bits: irq_info that
is passed in is not necessarily the original one (from pre-migration
times) but instead is freshly allocated during resume and so any
information about which CPU the channel was bound to is lost.

Thus we should clear the mask during resume.

We also need to make sure that bits for xenstore and console channels
are set when these two subsystems are resumed. While rebind_evtchn_irq()
(which is invoked for both of them on a resume) calls irq_set_affinity(),
the latter will in fact postpone setting affinity until handling the
interrupt. But because cpu_evtchn_mask will have bits for these two
cleared we won't be able to take the interrupt.

With that in mind, we need to bind those two channels explicitly in
rebind_evtchn_irq(). We will keep irq_set_affinity() so that we have a
pass through generic irq affinity code later, in case something needs
to be updated there as well.

(Also replace cpumask_of(0) with cpumask_of(info->cpu) in
rebind_evtchn_irq(): it should be set to zero in preceding
xen_irq_info_evtchn_setup().)

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reported-by: Annie Li <annie.li@oracle.com>
Cc: <stable@vger.kernel.org> # 3.14+
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 drivers/xen/events/events_2l.c   | 10 ++++++++++
 drivers/xen/events/events_base.c |  5 +++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c
index 5db43fc100a4..7dd46312c180 100644
--- a/drivers/xen/events/events_2l.c
+++ b/drivers/xen/events/events_2l.c
@@ -345,6 +345,15 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static void evtchn_2l_resume(void)
+{
+	int i;
+
+	for_each_online_cpu(i)
+		memset(per_cpu(cpu_evtchn_mask, i), 0, sizeof(xen_ulong_t) *
+				EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD);
+}
+
 static const struct evtchn_ops evtchn_ops_2l = {
 	.max_channels      = evtchn_2l_max_channels,
 	.nr_channels       = evtchn_2l_max_channels,
@@ -356,6 +365,7 @@ static const struct evtchn_ops evtchn_ops_2l = {
 	.mask              = evtchn_2l_mask,
 	.unmask            = evtchn_2l_unmask,
 	.handle_events     = evtchn_2l_handle_events,
+	.resume	           = evtchn_2l_resume,
 };
 
 void __init xen_evtchn_2l_init(void)
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 70fba973a107..a1ec564d791c 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -1279,8 +1279,9 @@ void rebind_evtchn_irq(int evtchn, int irq)
 
 	mutex_unlock(&irq_mapping_update_lock);
 
-	/* new event channels are always bound to cpu 0 */
-	irq_set_affinity(irq, cpumask_of(0));
+        bind_evtchn_to_cpu(evtchn, info->cpu);
+	/* This will be deferred until interrupt is processed */
+	irq_set_affinity(irq, cpumask_of(info->cpu));
 
 	/* Unmask the event channel. */
 	enable_irq(irq);

From 16f1cf3ba7303228372d3756677bf7d10e79cf9f Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Wed, 29 Apr 2015 17:10:13 -0400
Subject: [PATCH 065/101] xen/xenbus: Update xenbus event channel on resume

After a resume the hypervisor/tools may change xenbus event
channel number. We should re-query it.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 drivers/xen/xenbus/xenbus_probe.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 564b31584860..5390a674b5e3 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -57,6 +57,7 @@
 #include <xen/xen.h>
 #include <xen/xenbus.h>
 #include <xen/events.h>
+#include <xen/xen-ops.h>
 #include <xen/page.h>
 
 #include <xen/hvm.h>
@@ -735,6 +736,30 @@ static int __init xenstored_local_init(void)
 	return err;
 }
 
+static int xenbus_resume_cb(struct notifier_block *nb,
+			    unsigned long action, void *data)
+{
+	int err = 0;
+
+	if (xen_hvm_domain()) {
+		uint64_t v;
+
+		err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
+		if (!err && v)
+			xen_store_evtchn = v;
+		else
+			pr_warn("Cannot update xenstore event channel: %d\n",
+				err);
+	} else
+		xen_store_evtchn = xen_start_info->store_evtchn;
+
+	return err;
+}
+
+static struct notifier_block xenbus_resume_nb = {
+	.notifier_call = xenbus_resume_cb,
+};
+
 static int __init xenbus_init(void)
 {
 	int err = 0;
@@ -793,6 +818,10 @@ static int __init xenbus_init(void)
 		goto out_error;
 	}
 
+	if ((xen_store_domain_type != XS_LOCAL) &&
+	    (xen_store_domain_type != XS_UNKNOWN))
+		xen_resume_notifier_register(&xenbus_resume_nb);
+
 #ifdef CONFIG_XEN_COMPAT_XENFS
 	/*
 	 * Create xenfs mountpoint in /proc for compatibility with

From b9d934f27c91b878c4b2e64299d6e419a4022f8d Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Wed, 29 Apr 2015 17:10:14 -0400
Subject: [PATCH 066/101] xen/console: Update console event channel on resume

After a resume the hypervisor/tools may change console event
channel number. We should re-query it.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 drivers/tty/hvc/hvc_xen.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c
index f1e57425e39f..5bab1c684bb1 100644
--- a/drivers/tty/hvc/hvc_xen.c
+++ b/drivers/tty/hvc/hvc_xen.c
@@ -299,11 +299,27 @@ static int xen_initial_domain_console_init(void)
 	return 0;
 }
 
+static void xen_console_update_evtchn(struct xencons_info *info)
+{
+	if (xen_hvm_domain()) {
+		uint64_t v;
+		int err;
+
+		err = hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN, &v);
+		if (!err && v)
+			info->evtchn = v;
+	} else
+		info->evtchn = xen_start_info->console.domU.evtchn;
+}
+
 void xen_console_resume(void)
 {
 	struct xencons_info *info = vtermno_to_xencons(HVC_COOKIE);
-	if (info != NULL && info->irq)
+	if (info != NULL && info->irq) {
+		if (!xen_initial_domain())
+			xen_console_update_evtchn(info);
 		rebind_evtchn_irq(info->evtchn, info->irq);
+	}
 }
 
 static void xencons_disconnect_backend(struct xencons_info *info)

From 16e6bd5970c88a2ac018b84a5f1dd5c2ff1fdf2c Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Wed, 29 Apr 2015 17:10:15 -0400
Subject: [PATCH 067/101] xen/events: Set irq_info->evtchn before binding the
 channel to CPU in __startup_pirq()

.. because bind_evtchn_to_cpu(evtchn, cpu) will map evtchn to
'info' and pass 'info' down to xen_evtchn_port_bind_to_cpu().

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Tested-by: Annie Li <annie.li@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 drivers/xen/events/events_base.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index a1ec564d791c..2b8553bd8715 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -529,8 +529,8 @@ static unsigned int __startup_pirq(unsigned int irq)
 	if (rc)
 		goto err;
 
-	bind_evtchn_to_cpu(evtchn, 0);
 	info->evtchn = evtchn;
+	bind_evtchn_to_cpu(evtchn, 0);
 
 	rc = xen_evtchn_port_setup(info);
 	if (rc)

From a71dbdaa8ca2933391b08e0ae5567083e3af0892 Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Mon, 4 May 2015 11:02:15 -0400
Subject: [PATCH 068/101] hypervisor/x86/xen: Unset X86_BUG_SYSRET_SS_ATTRS on
 Xen PV guests

Commit 61f01dd941ba ("x86_64, asm: Work around AMD SYSRET SS descriptor
attribute issue") makes AMD processors set SS to __KERNEL_DS in
__switch_to() to deal with cases when SS is NULL.

This breaks Xen PV guests who do not want to load SS with__KERNEL_DS.

Since the problem that the commit is trying to address would have to be
fixed in the hypervisor (if it in fact exists under Xen) there is no
reason to set X86_BUG_SYSRET_SS_ATTRS flag for PV VPCUs here.

This can be easily achieved by adding x86_hyper_xen_hvm.set_cpu_features
op which will clear this flag. (And since this structure is no longer
HVM-specific we should do some renaming).

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/x86/include/asm/hypervisor.h |  2 +-
 arch/x86/kernel/cpu/hypervisor.c  |  4 ++--
 arch/x86/xen/enlighten.c          | 29 +++++++++++++++++++----------
 3 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index e42f758a0fbd..055ea9941dd5 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -50,7 +50,7 @@ extern const struct hypervisor_x86 *x86_hyper;
 /* Recognized hypervisors */
 extern const struct hypervisor_x86 x86_hyper_vmware;
 extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
-extern const struct hypervisor_x86 x86_hyper_xen_hvm;
+extern const struct hypervisor_x86 x86_hyper_xen;
 extern const struct hypervisor_x86 x86_hyper_kvm;
 
 extern void init_hypervisor(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 36ce402a3fa5..d820d8eae96b 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -27,8 +27,8 @@
 
 static const __initconst struct hypervisor_x86 * const hypervisors[] =
 {
-#ifdef CONFIG_XEN_PVHVM
-	&x86_hyper_xen_hvm,
+#ifdef CONFIG_XEN
+	&x86_hyper_xen,
 #endif
 	&x86_hyper_vmware,
 	&x86_hyper_ms_hyperv,
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 94578efd3067..46957ead3060 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1760,6 +1760,9 @@ static struct notifier_block xen_hvm_cpu_notifier = {
 
 static void __init xen_hvm_guest_init(void)
 {
+	if (xen_pv_domain())
+		return;
+
 	init_hvm_pv_info();
 
 	xen_hvm_init_shared_info();
@@ -1775,6 +1778,7 @@ static void __init xen_hvm_guest_init(void)
 	xen_hvm_init_time_ops();
 	xen_hvm_init_mmu_ops();
 }
+#endif
 
 static bool xen_nopv = false;
 static __init int xen_parse_nopv(char *arg)
@@ -1784,14 +1788,11 @@ static __init int xen_parse_nopv(char *arg)
 }
 early_param("xen_nopv", xen_parse_nopv);
 
-static uint32_t __init xen_hvm_platform(void)
+static uint32_t __init xen_platform(void)
 {
 	if (xen_nopv)
 		return 0;
 
-	if (xen_pv_domain())
-		return 0;
-
 	return xen_cpuid_base();
 }
 
@@ -1809,11 +1810,19 @@ bool xen_hvm_need_lapic(void)
 }
 EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
 
-const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = {
-	.name			= "Xen HVM",
-	.detect			= xen_hvm_platform,
+static void xen_set_cpu_features(struct cpuinfo_x86 *c)
+{
+	if (xen_pv_domain())
+		clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+}
+
+const struct hypervisor_x86 x86_hyper_xen = {
+	.name			= "Xen",
+	.detect			= xen_platform,
+#ifdef CONFIG_XEN_PVHVM
 	.init_platform		= xen_hvm_guest_init,
-	.x2apic_available	= xen_x2apic_para_available,
-};
-EXPORT_SYMBOL(x86_hyper_xen_hvm);
 #endif
+	.x2apic_available	= xen_x2apic_para_available,
+	.set_cpu_features       = xen_set_cpu_features,
+};
+EXPORT_SYMBOL(x86_hyper_xen);

From de71ad2c97862eae1516aa36528cc3b317c17b2f Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.c.dionne@gmail.com>
Date: Mon, 4 May 2015 15:16:44 -0300
Subject: [PATCH 069/101] x86: Make cpu_tss available to external modules

Commit 75182b1632 ("x86/asm/entry: Switch all C consumers of
kernel_stack to this_cpu_sp0()") changed current_thread_info
to use this_cpu_sp0, and indirectly made it rely on init_tss
which was exported with EXPORT_PER_CPU_SYMBOL_GPL.
As a result some macros and inline functions such as set/get_fs,
test_thread_flag and variants have been made unusable for
external modules.

Make cpu_tss exported with EXPORT_PER_CPU_SYMBOL so that these
functions are accessible again, as they were previously.

Signed-off-by: Marc Dionne <marc.dionne@your-file-system.com>
Acked-by: Andy Lutomirski <luto@amacapital.net>
Link: http://lkml.kernel.org/r/1430763404-21221-1-git-send-email-marc.dionne@your-file-system.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/process.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 8213da62b1b7..bfc99b3b6522 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -57,7 +57,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
 	.io_bitmap		= { [0 ... IO_BITMAP_LONGS] = ~0 },
 #endif
 };
-EXPORT_PER_CPU_SYMBOL_GPL(cpu_tss);
+EXPORT_PER_CPU_SYMBOL(cpu_tss);
 
 #ifdef CONFIG_X86_64
 static DEFINE_PER_CPU(unsigned char, is_idle);

From d467f7a405cf0e7f06ed8d3175607ebb4ed06671 Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Thu, 26 Mar 2015 13:35:18 -0500
Subject: [PATCH 070/101] ipmi_ssif: Fix the logic on user-supplied addresses

Returning zero is success.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_ssif.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index f40e3bd2c69c..1de1914f5f89 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -1832,7 +1832,7 @@ static int init_ipmi_ssif(void)
 		rv = new_ssif_client(addr[i], adapter_name[i],
 				     dbg[i], slave_addrs[i],
 				     SI_HARDCODED);
-		if (!rv)
+		if (rv)
 			pr_err(PFX
 			       "Couldn't add hardcoded device at addr 0x%x\n",
 			       addr[i]);

From b0e9aaa99dfb3036829e91d4f0aae449639e221a Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Tue, 31 Mar 2015 12:48:53 -0500
Subject: [PATCH 071/101] ipmi:ssif: Ignore spaces when comparing I2C adapter
 names

Some of the adapters have spaces in their names, but that's really
hard to pass in as a module or kernel parameters.  So ignore the
spaces.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 Documentation/IPMI.txt        |  5 ++++-
 drivers/char/ipmi/ipmi_ssif.c | 25 ++++++++++++++++++++++---
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/Documentation/IPMI.txt b/Documentation/IPMI.txt
index 653d5d739d7f..31d1d658827f 100644
--- a/Documentation/IPMI.txt
+++ b/Documentation/IPMI.txt
@@ -505,7 +505,10 @@ at module load time (for a module) with:
 
 The addresses are normal I2C addresses.  The adapter is the string
 name of the adapter, as shown in /sys/class/i2c-adapter/i2c-<n>/name.
-It is *NOT* i2c-<n> itself.
+It is *NOT* i2c-<n> itself.  Also, the comparison is done ignoring
+spaces, so if the name is "This is an I2C chip" you can say
+adapter_name=ThisisanI2cchip.  This is because it's hard to pass in
+spaces in kernel parameters.
 
 The debug flags are bit flags for each BMC found, they are:
 IPMI messages: 1, driver state: 2, timing: 4, I2C probe: 8
diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index 1de1914f5f89..3c3b7257867b 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -1258,6 +1258,23 @@ static const struct file_operations smi_stats_proc_ops = {
 	.release	= single_release,
 };
 
+static int strcmp_nospace(char *s1, char *s2)
+{
+	while (*s1 && *s2) {
+		while (isspace(*s1))
+			s1++;
+		while (isspace(*s2))
+			s2++;
+		if (*s1 > *s2)
+			return 1;
+		if (*s1 < *s2)
+			return -1;
+		s1++;
+		s2++;
+	}
+	return 0;
+}
+
 static struct ssif_addr_info *ssif_info_find(unsigned short addr,
 					     char *adapter_name,
 					     bool match_null_name)
@@ -1272,8 +1289,10 @@ static struct ssif_addr_info *ssif_info_find(unsigned short addr,
 					/* One is NULL and one is not */
 					continue;
 				}
-				if (strcmp(info->adapter_name, adapter_name))
-					/* Names to not match */
+				if (adapter_name &&
+				    strcmp_nospace(info->adapter_name,
+						   adapter_name))
+					/* Names do not match */
 					continue;
 			}
 			found = info;
@@ -1407,7 +1426,7 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	} else {
  no_support:
 		/* Assume no multi-part or PEC support */
-		pr_info(PFX "Error fetching SSIF: %d %d %2.2x, your system probably doesn't support this command so  using defaults\n",
+		pr_info(PFX "Error fetching SSIF: %d %d %2.2x, your system probably doesn't support this command so using defaults\n",
 		       rv, len, resp[2]);
 
 		ssif_info->max_xmit_msg_size = 32;

From 5e33cd0c5a299772b5ec1a493f0a77548664ae06 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sun, 22 Feb 2015 10:21:07 -0800
Subject: [PATCH 072/101] ipmi: Remove incorrect use of seq_has_overflowed

commit d6c5dc18d863 ("ipmi: Remove uses of return value of seq_printf")
incorrectly changed the return value of various proc_show functions
to use seq_has_overflowed().

These functions should return 0 on completion rather than 1/true
on overflow.  1 is the same as #define SEQ_SKIP which would cause
the output to not be emitted (skipped) instead.

This is a logical defect only as the length of these outputs are
all smaller than the initial allocation done by the seq filesystem.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_msghandler.c | 4 ++--
 drivers/char/ipmi/ipmi_si_intf.c    | 4 ++--
 drivers/char/ipmi/ipmi_ssif.c       | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 9bb592872532..bf75f6361773 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -2000,7 +2000,7 @@ static int smi_ipmb_proc_show(struct seq_file *m, void *v)
 		seq_printf(m, " %x", intf->channels[i].address);
 	seq_putc(m, '\n');
 
-	return seq_has_overflowed(m);
+	return 0;
 }
 
 static int smi_ipmb_proc_open(struct inode *inode, struct file *file)
@@ -2023,7 +2023,7 @@ static int smi_version_proc_show(struct seq_file *m, void *v)
 		   ipmi_version_major(&intf->bmc->id),
 		   ipmi_version_minor(&intf->bmc->id));
 
-	return seq_has_overflowed(m);
+	return 0;
 }
 
 static int smi_version_proc_open(struct inode *inode, struct file *file)
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 5e90a18afbaf..468c75e10330 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -3080,7 +3080,7 @@ static int smi_type_proc_show(struct seq_file *m, void *v)
 
 	seq_printf(m, "%s\n", si_to_str[smi->si_type]);
 
-	return seq_has_overflowed(m);
+	return 0;
 }
 
 static int smi_type_proc_open(struct inode *inode, struct file *file)
@@ -3153,7 +3153,7 @@ static int smi_params_proc_show(struct seq_file *m, void *v)
 		   smi->irq,
 		   smi->slave_addr);
 
-	return seq_has_overflowed(m);
+	return 0;
 }
 
 static int smi_params_proc_open(struct inode *inode, struct file *file)
diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index 3c3b7257867b..ee3b8c5e7e21 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -1200,7 +1200,7 @@ static int smi_type_proc_show(struct seq_file *m, void *v)
 {
 	seq_puts(m, "ssif\n");
 
-	return seq_has_overflowed(m);
+	return 0;
 }
 
 static int smi_type_proc_open(struct inode *inode, struct file *file)

From b1e65e71535aa128089d4cb1b6d90db7551fcb05 Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Fri, 10 Apr 2015 20:19:18 -0500
Subject: [PATCH 073/101] ipmi: Don't report err in the SI driver for SSIF
 devices

Really ignore them by returning -ENODEV from the probe, but not
doing anything.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_si_intf.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 468c75e10330..461274168d0f 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -2244,7 +2244,7 @@ static int ipmi_pnp_probe(struct pnp_dev *dev,
 	acpi_handle handle;
 	acpi_status status;
 	unsigned long long tmp;
-	int rv;
+	int rv = -EINVAL;
 
 	acpi_dev = pnp_acpi_device(dev);
 	if (!acpi_dev)
@@ -2276,6 +2276,7 @@ static int ipmi_pnp_probe(struct pnp_dev *dev,
 		info->si_type = SI_BT;
 		break;
 	case 4: /* SSIF, just ignore */
+		rv = -ENODEV;
 		goto err_free;
 	default:
 		dev_info(&dev->dev, "unknown IPMI type %lld\n", tmp);
@@ -2336,7 +2337,7 @@ static int ipmi_pnp_probe(struct pnp_dev *dev,
 
 err_free:
 	kfree(info);
-	return -EINVAL;
+	return rv;
 }
 
 static void ipmi_pnp_remove(struct pnp_dev *dev)

From df6dd1b35b0ec0ac6a5298378ceaf487091f448c Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Mon, 27 Apr 2015 09:45:06 +0200
Subject: [PATCH 074/101] thinkpad_acpi: Fix warning for static not at
 beginning

Fix the following warning:

warning: "static" is not at beginning of declaration
 void static hotkey_mask_warn_incomplete_mask(void)
 ^

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Cc: Henrique de Moraes Holschuh <ibm-acpi@hmh.eng.br>
Cc: Darren Hart <dvhart@infradead.org>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
---
 drivers/platform/x86/thinkpad_acpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 7769575345d8..9bb9ad6d4a1b 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -2115,7 +2115,7 @@ static int hotkey_mask_get(void)
 	return 0;
 }
 
-void static hotkey_mask_warn_incomplete_mask(void)
+static void hotkey_mask_warn_incomplete_mask(void)
 {
 	/* log only what the user can fix... */
 	const u32 wantedmask = hotkey_driver_mask &

From 99ebbd30e3640f6addb37f222b4d6ad4b807d9ea Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 5 May 2015 16:23:25 -0700
Subject: [PATCH 075/101] revert "zram: move compact_store() to sysfs functions
 area"

Revert commit c72c6160d967ed26a0b136dbab337f821d233509

It was intended to be a cosmetic change that w/o any functional change
and was part of a bigger change:

  http://lkml.iu.edu/hypermail/linux/kernel/1503.1/01818.html

Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/block/zram/zram_drv.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index c94386aa563d..8dcbced0eafd 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -74,6 +74,27 @@ static inline struct zram *dev_to_zram(struct device *dev)
 	return (struct zram *)dev_to_disk(dev)->private_data;
 }
 
+static ssize_t compact_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	unsigned long nr_migrated;
+	struct zram *zram = dev_to_zram(dev);
+	struct zram_meta *meta;
+
+	down_read(&zram->init_lock);
+	if (!init_done(zram)) {
+		up_read(&zram->init_lock);
+		return -EINVAL;
+	}
+
+	meta = zram->meta;
+	nr_migrated = zs_compact(meta->mem_pool);
+	atomic64_add(nr_migrated, &zram->stats.num_migrated);
+	up_read(&zram->init_lock);
+
+	return len;
+}
+
 static ssize_t disksize_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -1038,6 +1059,7 @@ static const struct block_device_operations zram_devops = {
 	.owner = THIS_MODULE
 };
 
+static DEVICE_ATTR_WO(compact);
 static DEVICE_ATTR_RW(disksize);
 static DEVICE_ATTR_RO(initstate);
 static DEVICE_ATTR_WO(reset);
@@ -1114,6 +1136,7 @@ static struct attribute *zram_disk_attrs[] = {
 	&dev_attr_num_writes.attr,
 	&dev_attr_failed_reads.attr,
 	&dev_attr_failed_writes.attr,
+	&dev_attr_compact.attr,
 	&dev_attr_invalid_io.attr,
 	&dev_attr_notify_free.attr,
 	&dev_attr_zero_pages.attr,

From 74f3037c4015f3a440dc4cb4e31477875fa9791c Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Tue, 5 May 2015 16:23:28 -0700
Subject: [PATCH 076/101] zram: add Designated Reviewer for zram in MAINTAINERS

Sergey Senozhatsky has contributed/reviewed to zram for a long time.  He
is really helpful for maintaining zram so I want for him to continue
helping me as Designated Reviewer unless he hates it.

Signed-off-by: Minchan Kim <minchan@kernel.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 781e099495d3..1c7fd3c85ba7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11037,6 +11037,7 @@ F:	drivers/media/pci/zoran/
 ZRAM COMPRESSED RAM BLOCK DEVICE DRVIER
 M:	Minchan Kim <minchan@kernel.org>
 M:	Nitin Gupta <ngupta@vflare.org>
+R:	Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 F:	drivers/block/zram/

From 48b945a19cf6e7e548b2ce545ec88f93284ab276 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 5 May 2015 16:23:30 -0700
Subject: [PATCH 077/101] MAINTAINERS: add co-maintainer for LED subsystem

Add myself (Jacek Anaszewski) as a co-maintainer for the LED subsystem.

Signed-off-by: Jacek Anaszewski <j.anaszewski@samsung.com>
Acked-by: Bryan Wu <cooloney@gmail.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 1c7fd3c85ba7..b399b34a2496 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5805,6 +5805,7 @@ F:	drivers/scsi/53c700*
 LED SUBSYSTEM
 M:	Bryan Wu <cooloney@gmail.com>
 M:	Richard Purdie <rpurdie@rpsys.net>
+M:	Jacek Anaszewski <j.anaszewski@samsung.com>
 L:	linux-leds@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/cooloney/linux-leds.git
 S:	Maintained

From 7d616e4ddb9c0754ed6245a43332d5b867e4db11 Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Tue, 5 May 2015 16:23:33 -0700
Subject: [PATCH 078/101] lib: delete lib/find_last_bit.c

The file lib/find_last_bit.c was no longer used and supposed to be
deleted by commit 8f6f19dd51 ("lib: move find_last_bit to
lib/find_next_bit.c") but that delete didn't happen.  This gets rid of
it.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/find_last_bit.c | 41 -----------------------------------------
 1 file changed, 41 deletions(-)
 delete mode 100644 lib/find_last_bit.c

diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c
deleted file mode 100644
index 3e3be40c6a6e..000000000000
--- a/lib/find_last_bit.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/* find_last_bit.c: fallback find next bit implementation
- *
- * Copyright (C) 2008 IBM Corporation
- * Written by Rusty Russell <rusty@rustcorp.com.au>
- * (Inspired by David Howell's find_next_bit implementation)
- *
- * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
- * size and improve performance, 2015.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/bitops.h>
-#include <linux/bitmap.h>
-#include <linux/export.h>
-#include <linux/kernel.h>
-
-#ifndef find_last_bit
-
-unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
-{
-	if (size) {
-		unsigned long val = BITMAP_LAST_WORD_MASK(size);
-		unsigned long idx = (size-1) / BITS_PER_LONG;
-
-		do {
-			val &= addr[idx];
-			if (val)
-				return idx * BITS_PER_LONG + __fls(val);
-
-			val = ~0ul;
-		} while (idx--);
-	}
-	return size;
-}
-EXPORT_SYMBOL(find_last_bit);
-
-#endif

From 09789e5de18e4e442870b2d700831f5cb802eb05 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Tue, 5 May 2015 16:23:35 -0700
Subject: [PATCH 079/101] mm/memory-failure: call shake_page() when error hits
 thp tail page

Currently memory_failure() calls shake_page() to sweep pages out from
pcplists only when the victim page is 4kB LRU page or thp head page.
But we should do this for a thp tail page too.

Consider that a memory error hits a thp tail page whose head page is on
a pcplist when memory_failure() runs.  Then, the current kernel skips
shake_pages() part, so hwpoison_user_mappings() returns without calling
split_huge_page() nor try_to_unmap() because PageLRU of the thp head is
still cleared due to the skip of shake_page().

As a result, me_huge_page() runs for the thp, which is broken behavior.

One effect is a leak of the thp.  And another is to fail to isolate the
memory error, so later access to the error address causes another MCE,
which kills the processes which used the thp.

This patch fixes this problem by calling shake_page() for thp tail case.

Fixes: 385de35722c9 ("thp: allow a hwpoisoned head page to be put back to LRU")
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Dean Nelson <dnelson@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Jin Dongming <jin.dongming@np.css.fujitsu.com>
Cc: <stable@vger.kernel.org>	[3.4+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory-failure.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index d9359b770cd9..22e0f270e4f7 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1187,10 +1187,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 	 * The check (unnecessarily) ignores LRU pages being isolated and
 	 * walked by the page reclaim code, however that's not a big loss.
 	 */
-	if (!PageHuge(p) && !PageTransTail(p)) {
-		if (!PageLRU(p))
-			shake_page(p, 0);
-		if (!PageLRU(p)) {
+	if (!PageHuge(p)) {
+		if (!PageLRU(hpage))
+			shake_page(hpage, 0);
+		if (!PageLRU(hpage)) {
 			/*
 			 * shake_page could have turned it free.
 			 */

From 01e76903f655a4d88c2e09d3182436c65f6e1213 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 5 May 2015 16:23:38 -0700
Subject: [PATCH 080/101] kasan: show gcc version requirements in Kconfig and
 Documentation

The documentation shows a need for gcc > 4.9.2, but it's really >=.  The
Kconfig entries don't show require versions so add them.  Correct a
latter/later typo too.  Also mention that gcc 5 required to catch out of
bounds accesses to global and stack variables.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kasan.txt | 8 +++++---
 lib/Kconfig.kasan       | 8 ++++++--
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/Documentation/kasan.txt b/Documentation/kasan.txt
index 092fc10961fe..4692241789b1 100644
--- a/Documentation/kasan.txt
+++ b/Documentation/kasan.txt
@@ -9,7 +9,9 @@ a fast and comprehensive solution for finding use-after-free and out-of-bounds
 bugs.
 
 KASan uses compile-time instrumentation for checking every memory access,
-therefore you will need a certain version of GCC > 4.9.2
+therefore you will need a gcc version of 4.9.2 or later. KASan could detect out
+of bounds accesses to stack or global variables, but only if gcc 5.0 or later was
+used to built the kernel.
 
 Currently KASan is supported only for x86_64 architecture and requires that the
 kernel be built with the SLUB allocator.
@@ -23,8 +25,8 @@ To enable KASAN configure kernel with:
 
 and choose between CONFIG_KASAN_OUTLINE and CONFIG_KASAN_INLINE. Outline/inline
 is compiler instrumentation types. The former produces smaller binary the
-latter is 1.1 - 2 times faster. Inline instrumentation requires GCC 5.0 or
-latter.
+latter is 1.1 - 2 times faster. Inline instrumentation requires a gcc version
+of 5.0 or later.
 
 Currently KASAN works only with the SLUB memory allocator.
 For better bug detection and nicer report, enable CONFIG_STACKTRACE and put
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 4fecaedc80a2..777eda7d1ab4 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -10,8 +10,11 @@ config KASAN
 	help
 	  Enables kernel address sanitizer - runtime memory debugger,
 	  designed to find out-of-bounds accesses and use-after-free bugs.
-	  This is strictly debugging feature. It consumes about 1/8
-	  of available memory and brings about ~x3 performance slowdown.
+	  This is strictly a debugging feature and it requires a gcc version
+	  of 4.9.2 or later. Detection of out of bounds accesses to stack or
+	  global variables requires gcc 5.0 or later.
+	  This feature consumes about 1/8 of available memory and brings about
+	  ~x3 performance slowdown.
 	  For better error detection enable CONFIG_STACKTRACE,
 	  and add slub_debug=U to boot cmdline.
 
@@ -40,6 +43,7 @@ config KASAN_INLINE
 	  memory accesses. This is faster than outline (in some workloads
 	  it gives about x2 boost over outline instrumentation), but
 	  make kernel's .text size much bigger.
+	  This requires a gcc version of 5.0 or later.
 
 endchoice
 

From c71f1e05e62f3eb843b6458eeb7298f269b34b1e Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Tue, 5 May 2015 16:23:41 -0700
Subject: [PATCH 081/101] Documentation: bindings: add abracon,abx80x

Document the bindings for abracon,abx80x and related compatibles.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Cc: Philippe De Muyter <phdm@macqel.be>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .../bindings/rtc/abracon,abx80x.txt           | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/rtc/abracon,abx80x.txt

diff --git a/Documentation/devicetree/bindings/rtc/abracon,abx80x.txt b/Documentation/devicetree/bindings/rtc/abracon,abx80x.txt
new file mode 100644
index 000000000000..be789685a1c2
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/abracon,abx80x.txt
@@ -0,0 +1,30 @@
+Abracon ABX80X I2C ultra low power RTC/Alarm chip
+
+The Abracon ABX80X family consist of the ab0801, ab0803, ab0804, ab0805, ab1801,
+ab1803, ab1804 and ab1805. The ab0805 is the superset of ab080x and the ab1805
+is the superset of ab180x.
+
+Required properties:
+
+ - "compatible": should one of:
+        "abracon,abx80x"
+        "abracon,ab0801"
+        "abracon,ab0803"
+        "abracon,ab0804"
+        "abracon,ab0805"
+        "abracon,ab1801"
+        "abracon,ab1803"
+        "abracon,ab1804"
+        "abracon,ab1805"
+	Using "abracon,abx80x" will enable chip autodetection.
+ - "reg": I2C bus address of the device
+
+Optional properties:
+
+The abx804 and abx805 have a trickle charger that is able to charge the
+connected battery or supercap. Both the following properties have to be defined
+and valid to enable charging:
+
+ - "abracon,tc-diode": should be "standard" (0.6V) or "schottky" (0.3V)
+ - "abracon,tc-resistor": should be <0>, <3>, <6> or <11>. 0 disables the output
+                          resistor, the other values are in ohm.

From 4d61ff6b9960cb00cf2c12abd5769aa2dd475415 Mon Sep 17 00:00:00 2001
From: Philippe De Muyter <phdm@macqel.be>
Date: Tue, 5 May 2015 16:23:44 -0700
Subject: [PATCH 082/101] rtc: add rtc-abx80x, a driver for the Abracon AB x80x
 i2c rtc

This is a basic driver for the ultra-low-power Abracon AB x80x series of RTC
chips. It supports in particular, the supersets AB0805 and AB1805.
It allows reading and writing the time, and enables the supercapacitor/
battery charger.

[arnd@arndb.de: abx805 depends on i2c]
[alexandre.belloni@free-electrons.com: renam buffer from date to buf in abx80x_rtc_read_time()]
Signed-off-by: Philippe De Muyter <phdm@macqel.be>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Paul Bolle <pebolle@tiscali.nl>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/Kconfig      |  10 ++
 drivers/rtc/Makefile     |   1 +
 drivers/rtc/rtc-abx80x.c | 307 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 318 insertions(+)
 create mode 100644 drivers/rtc/rtc-abx80x.c

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 6149ae01e11f..0fe4ad8826b2 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -164,6 +164,16 @@ config RTC_DRV_ABB5ZES3
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-ab-b5ze-s3.
 
+config RTC_DRV_ABX80X
+	tristate "Abracon ABx80x"
+	help
+	  If you say yes here you get support for Abracon AB080X and AB180X
+	  families of ultra-low-power  battery- and capacitor-backed real-time
+	  clock chips.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-abx80x.
+
 config RTC_DRV_AS3722
 	tristate "ams AS3722 RTC driver"
 	depends on MFD_AS3722
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index c31731c29762..2b82e2b0311b 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_RTC_DRV_88PM80X)	+= rtc-88pm80x.o
 obj-$(CONFIG_RTC_DRV_AB3100)	+= rtc-ab3100.o
 obj-$(CONFIG_RTC_DRV_AB8500)	+= rtc-ab8500.o
 obj-$(CONFIG_RTC_DRV_ABB5ZES3)	+= rtc-ab-b5ze-s3.o
+obj-$(CONFIG_RTC_DRV_ABX80X)	+= rtc-abx80x.o
 obj-$(CONFIG_RTC_DRV_ARMADA38X)	+= rtc-armada38x.o
 obj-$(CONFIG_RTC_DRV_AS3722)	+= rtc-as3722.o
 obj-$(CONFIG_RTC_DRV_AT32AP700X)+= rtc-at32ap700x.o
diff --git a/drivers/rtc/rtc-abx80x.c b/drivers/rtc/rtc-abx80x.c
new file mode 100644
index 000000000000..4337c3bc6ace
--- /dev/null
+++ b/drivers/rtc/rtc-abx80x.c
@@ -0,0 +1,307 @@
+/*
+ * A driver for the I2C members of the Abracon AB x8xx RTC family,
+ * and compatible: AB 1805 and AB 0805
+ *
+ * Copyright 2014-2015 Macq S.A.
+ *
+ * Author: Philippe De Muyter <phdm@macqel.be>
+ * Author: Alexandre Belloni <alexandre.belloni@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/bcd.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/rtc.h>
+
+#define ABX8XX_REG_HTH		0x00
+#define ABX8XX_REG_SC		0x01
+#define ABX8XX_REG_MN		0x02
+#define ABX8XX_REG_HR		0x03
+#define ABX8XX_REG_DA		0x04
+#define ABX8XX_REG_MO		0x05
+#define ABX8XX_REG_YR		0x06
+#define ABX8XX_REG_WD		0x07
+
+#define ABX8XX_REG_CTRL1	0x10
+#define ABX8XX_CTRL_WRITE	BIT(1)
+#define ABX8XX_CTRL_12_24	BIT(6)
+
+#define ABX8XX_REG_CFG_KEY	0x1f
+#define ABX8XX_CFG_KEY_MISC	0x9d
+
+#define ABX8XX_REG_ID0		0x28
+
+#define ABX8XX_REG_TRICKLE	0x20
+#define ABX8XX_TRICKLE_CHARGE_ENABLE	0xa0
+#define ABX8XX_TRICKLE_STANDARD_DIODE	0x8
+#define ABX8XX_TRICKLE_SCHOTTKY_DIODE	0x4
+
+static u8 trickle_resistors[] = {0, 3, 6, 11};
+
+enum abx80x_chip {AB0801, AB0803, AB0804, AB0805,
+	AB1801, AB1803, AB1804, AB1805, ABX80X};
+
+struct abx80x_cap {
+	u16 pn;
+	bool has_tc;
+};
+
+static struct abx80x_cap abx80x_caps[] = {
+	[AB0801] = {.pn = 0x0801},
+	[AB0803] = {.pn = 0x0803},
+	[AB0804] = {.pn = 0x0804, .has_tc = true},
+	[AB0805] = {.pn = 0x0805, .has_tc = true},
+	[AB1801] = {.pn = 0x1801},
+	[AB1803] = {.pn = 0x1803},
+	[AB1804] = {.pn = 0x1804, .has_tc = true},
+	[AB1805] = {.pn = 0x1805, .has_tc = true},
+	[ABX80X] = {.pn = 0}
+};
+
+static struct i2c_driver abx80x_driver;
+
+static int abx80x_enable_trickle_charger(struct i2c_client *client,
+					 u8 trickle_cfg)
+{
+	int err;
+
+	/*
+	 * Write the configuration key register to enable access to the Trickle
+	 * register
+	 */
+	err = i2c_smbus_write_byte_data(client, ABX8XX_REG_CFG_KEY,
+					ABX8XX_CFG_KEY_MISC);
+	if (err < 0) {
+		dev_err(&client->dev, "Unable to write configuration key\n");
+		return -EIO;
+	}
+
+	err = i2c_smbus_write_byte_data(client, ABX8XX_REG_TRICKLE,
+					ABX8XX_TRICKLE_CHARGE_ENABLE |
+					trickle_cfg);
+	if (err < 0) {
+		dev_err(&client->dev, "Unable to write trickle register\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int abx80x_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	unsigned char buf[8];
+	int err;
+
+	err = i2c_smbus_read_i2c_block_data(client, ABX8XX_REG_HTH,
+					    sizeof(buf), buf);
+	if (err < 0) {
+		dev_err(&client->dev, "Unable to read date\n");
+		return -EIO;
+	}
+
+	tm->tm_sec = bcd2bin(buf[ABX8XX_REG_SC] & 0x7F);
+	tm->tm_min = bcd2bin(buf[ABX8XX_REG_MN] & 0x7F);
+	tm->tm_hour = bcd2bin(buf[ABX8XX_REG_HR] & 0x3F);
+	tm->tm_wday = buf[ABX8XX_REG_WD] & 0x7;
+	tm->tm_mday = bcd2bin(buf[ABX8XX_REG_DA] & 0x3F);
+	tm->tm_mon = bcd2bin(buf[ABX8XX_REG_MO] & 0x1F) - 1;
+	tm->tm_year = bcd2bin(buf[ABX8XX_REG_YR]) + 100;
+
+	err = rtc_valid_tm(tm);
+	if (err < 0)
+		dev_err(&client->dev, "retrieved date/time is not valid.\n");
+
+	return err;
+}
+
+static int abx80x_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	unsigned char buf[8];
+	int err;
+
+	if (tm->tm_year < 100)
+		return -EINVAL;
+
+	buf[ABX8XX_REG_HTH] = 0;
+	buf[ABX8XX_REG_SC] = bin2bcd(tm->tm_sec);
+	buf[ABX8XX_REG_MN] = bin2bcd(tm->tm_min);
+	buf[ABX8XX_REG_HR] = bin2bcd(tm->tm_hour);
+	buf[ABX8XX_REG_DA] = bin2bcd(tm->tm_mday);
+	buf[ABX8XX_REG_MO] = bin2bcd(tm->tm_mon + 1);
+	buf[ABX8XX_REG_YR] = bin2bcd(tm->tm_year - 100);
+	buf[ABX8XX_REG_WD] = tm->tm_wday;
+
+	err = i2c_smbus_write_i2c_block_data(client, ABX8XX_REG_HTH,
+					     sizeof(buf), buf);
+	if (err < 0) {
+		dev_err(&client->dev, "Unable to write to date registers\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static const struct rtc_class_ops abx80x_rtc_ops = {
+	.read_time	= abx80x_rtc_read_time,
+	.set_time	= abx80x_rtc_set_time,
+};
+
+static int abx80x_dt_trickle_cfg(struct device_node *np)
+{
+	const char *diode;
+	int trickle_cfg = 0;
+	int i, ret;
+	u32 tmp;
+
+	ret = of_property_read_string(np, "abracon,tc-diode", &diode);
+	if (ret)
+		return ret;
+
+	if (!strcmp(diode, "standard"))
+		trickle_cfg |= ABX8XX_TRICKLE_STANDARD_DIODE;
+	else if (!strcmp(diode, "schottky"))
+		trickle_cfg |= ABX8XX_TRICKLE_SCHOTTKY_DIODE;
+	else
+		return -EINVAL;
+
+	ret = of_property_read_u32(np, "abracon,tc-resistor", &tmp);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < sizeof(trickle_resistors); i++)
+		if (trickle_resistors[i] == tmp)
+			break;
+
+	if (i == sizeof(trickle_resistors))
+		return -EINVAL;
+
+	return (trickle_cfg | i);
+}
+
+static int abx80x_probe(struct i2c_client *client,
+			const struct i2c_device_id *id)
+{
+	struct device_node *np = client->dev.of_node;
+	struct rtc_device *rtc;
+	int i, data, err, trickle_cfg = -EINVAL;
+	char buf[7];
+	unsigned int part = id->driver_data;
+	unsigned int partnumber;
+	unsigned int majrev, minrev;
+	unsigned int lot;
+	unsigned int wafer;
+	unsigned int uid;
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
+		return -ENODEV;
+
+	err = i2c_smbus_read_i2c_block_data(client, ABX8XX_REG_ID0,
+					    sizeof(buf), buf);
+	if (err < 0) {
+		dev_err(&client->dev, "Unable to read partnumber\n");
+		return -EIO;
+	}
+
+	partnumber = (buf[0] << 8) | buf[1];
+	majrev = buf[2] >> 3;
+	minrev = buf[2] & 0x7;
+	lot = ((buf[4] & 0x80) << 2) | ((buf[6] & 0x80) << 1) | buf[3];
+	uid = ((buf[4] & 0x7f) << 8) | buf[5];
+	wafer = (buf[6] & 0x7c) >> 2;
+	dev_info(&client->dev, "model %04x, revision %u.%u, lot %x, wafer %x, uid %x\n",
+		 partnumber, majrev, minrev, lot, wafer, uid);
+
+	data = i2c_smbus_read_byte_data(client, ABX8XX_REG_CTRL1);
+	if (data < 0) {
+		dev_err(&client->dev, "Unable to read control register\n");
+		return -EIO;
+	}
+
+	err = i2c_smbus_write_byte_data(client, ABX8XX_REG_CTRL1,
+					((data & ~ABX8XX_CTRL_12_24) |
+					 ABX8XX_CTRL_WRITE));
+	if (err < 0) {
+		dev_err(&client->dev, "Unable to write control register\n");
+		return -EIO;
+	}
+
+	/* part autodetection */
+	if (part == ABX80X) {
+		for (i = 0; abx80x_caps[i].pn; i++)
+			if (partnumber == abx80x_caps[i].pn)
+				break;
+		if (abx80x_caps[i].pn == 0) {
+			dev_err(&client->dev, "Unknown part: %04x\n",
+				partnumber);
+			return -EINVAL;
+		}
+		part = i;
+	}
+
+	if (partnumber != abx80x_caps[part].pn) {
+		dev_err(&client->dev, "partnumber mismatch %04x != %04x\n",
+			partnumber, abx80x_caps[part].pn);
+		return -EINVAL;
+	}
+
+	if (np && abx80x_caps[part].has_tc)
+		trickle_cfg = abx80x_dt_trickle_cfg(np);
+
+	if (trickle_cfg > 0) {
+		dev_info(&client->dev, "Enabling trickle charger: %02x\n",
+			 trickle_cfg);
+		abx80x_enable_trickle_charger(client, trickle_cfg);
+	}
+
+	rtc = devm_rtc_device_register(&client->dev, abx80x_driver.driver.name,
+				       &abx80x_rtc_ops, THIS_MODULE);
+
+	if (IS_ERR(rtc))
+		return PTR_ERR(rtc);
+
+	i2c_set_clientdata(client, rtc);
+
+	return 0;
+}
+
+static int abx80x_remove(struct i2c_client *client)
+{
+	return 0;
+}
+
+static const struct i2c_device_id abx80x_id[] = {
+	{ "abx80x", ABX80X },
+	{ "ab0801", AB0801 },
+	{ "ab0803", AB0803 },
+	{ "ab0804", AB0804 },
+	{ "ab0805", AB0805 },
+	{ "ab1801", AB1801 },
+	{ "ab1803", AB1803 },
+	{ "ab1804", AB1804 },
+	{ "ab1805", AB1805 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, abx80x_id);
+
+static struct i2c_driver abx80x_driver = {
+	.driver		= {
+		.name	= "rtc-abx80x",
+	},
+	.probe		= abx80x_probe,
+	.remove		= abx80x_remove,
+	.id_table	= abx80x_id,
+};
+
+module_i2c_driver(abx80x_driver);
+
+MODULE_AUTHOR("Philippe De Muyter <phdm@macqel.be>");
+MODULE_AUTHOR("Alexandre Belloni <alexandre.belloni@free-electrons.com>");
+MODULE_DESCRIPTION("Abracon ABX80X RTC driver");
+MODULE_LICENSE("GPL v2");

From 602498f9aa43d4951eece3fd6ad95a6d0a78d537 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Tue, 5 May 2015 16:23:46 -0700
Subject: [PATCH 083/101] mm: soft-offline: fix num_poisoned_pages counting on
 concurrent events

If multiple soft offline events hit one free page/hugepage concurrently,
soft_offline_page() can handle the free page/hugepage multiple times,
which makes num_poisoned_pages counter increased more than once.  This
patch fixes this wrong counting by checking TestSetPageHWPoison for normal
papes and by checking the return value of dequeue_hwpoisoned_huge_page()
for hugepages.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Dean Nelson <dnelson@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: <stable@vger.kernel.org>	[3.14+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory-failure.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 22e0f270e4f7..501820c815b3 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1777,12 +1777,12 @@ int soft_offline_page(struct page *page, int flags)
 	} else if (ret == 0) { /* for free pages */
 		if (PageHuge(page)) {
 			set_page_hwpoison_huge_page(hpage);
-			dequeue_hwpoisoned_huge_page(hpage);
-			atomic_long_add(1 << compound_order(hpage),
+			if (!dequeue_hwpoisoned_huge_page(hpage))
+				atomic_long_add(1 << compound_order(hpage),
 					&num_poisoned_pages);
 		} else {
-			SetPageHWPoison(page);
-			atomic_long_inc(&num_poisoned_pages);
+			if (!TestSetPageHWPoison(page))
+				atomic_long_inc(&num_poisoned_pages);
 		}
 	}
 	unset_migratetype_isolate(page, MIGRATE_MOVABLE);

From 7ea434a4eb49db83d17cc076f2267704c52938ae Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Tue, 5 May 2015 16:23:49 -0700
Subject: [PATCH 084/101] mm/hwpoison-inject: fix refcounting in no-injection
 case

Hwpoison injection via debugfs:hwpoison/corrupt-pfn takes a refcount of
the target page.  But current code doesn't release it if the target page
is not supposed to be injected, which results in memory leak.  This patch
simply adds the refcount releasing code.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Dean Nelson <dnelson@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hwpoison-inject.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index 329caf56df22..2b3f933e3282 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -40,7 +40,7 @@ static int hwpoison_inject(void *data, u64 val)
 	 * This implies unable to support non-LRU pages.
 	 */
 	if (!PageLRU(p) && !PageHuge(p))
-		return 0;
+		goto put_out;
 
 	/*
 	 * do a racy check with elevated page count, to make sure PG_hwpoison
@@ -52,11 +52,14 @@ static int hwpoison_inject(void *data, u64 val)
 	err = hwpoison_filter(hpage);
 	unlock_page(hpage);
 	if (err)
-		return 0;
+		goto put_out;
 
 inject:
 	pr_info("Injecting memory failure at pfn %#lx\n", pfn);
 	return memory_failure(pfn, 18, MF_COUNT_INCREASED);
+put_out:
+	put_page(hpage);
+	return 0;
 }
 
 static int hwpoison_unpoison(void *data, u64 val)

From e386eed89c764f102fcc3c0d4c78c65a357f7399 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Tue, 5 May 2015 16:23:52 -0700
Subject: [PATCH 085/101] mm/hwpoison-inject: check PageLRU of hpage

Hwpoison injector checks PageLRU of the raw target page to find out
whether the page is an appropriate target, but current code now filters
out thp tail pages, which prevents us from testing for such cases via this
interface.  So let's check hpage instead of p.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Dean Nelson <dnelson@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hwpoison-inject.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index 2b3f933e3282..4ca5fe0042e1 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -34,12 +34,12 @@ static int hwpoison_inject(void *data, u64 val)
 	if (!hwpoison_filter_enable)
 		goto inject;
 
-	if (!PageLRU(p) && !PageHuge(p))
-		shake_page(p, 0);
+	if (!PageLRU(hpage) && !PageHuge(p))
+		shake_page(hpage, 0);
 	/*
 	 * This implies unable to support non-LRU pages.
 	 */
-	if (!PageLRU(p) && !PageHuge(p))
+	if (!PageLRU(hpage) && !PageHuge(p))
 		goto put_out;
 
 	/*

From f5b697700c86d7d01489202bfd37d86665754afd Mon Sep 17 00:00:00 2001
From: Daniel Baluta <daniel.baluta@intel.com>
Date: Tue, 5 May 2015 16:23:54 -0700
Subject: [PATCH 086/101] configfs: init configfs module earlier at boot time

We need this earlier in the boot process to allow various subsystems to
use configfs (e.g Industrial IIO).

Also, debugfs is at core_initcall level and configfs should be on the same
level from infrastructure point of view.

Signed-off-by: Daniel Baluta <daniel.baluta@intel.com>
Suggested-by: Lars-Peter Clausen <lars@metafoo.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Joel Becker <jlbec@evilplan.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/configfs/mount.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index da94e41bdbf6..537356742091 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -173,5 +173,5 @@ MODULE_LICENSE("GPL");
 MODULE_VERSION("0.0.2");
 MODULE_DESCRIPTION("Simple RAM filesystem for user driven kernel subsystem configuration.");
 
-module_init(configfs_init);
+core_initcall(configfs_init);
 module_exit(configfs_exit);

From 05836c378c7af9527b98a83746f32c7289a5f3c8 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 5 May 2015 16:23:57 -0700
Subject: [PATCH 087/101] util_macros.h: have array pointer point to array of
 constants

Using the new find_closest() macro can result in the following sparse
warnings.

  drivers/hwmon/lm85.c:194:16: warning:
  		incorrect type in initializer (different modifiers)
  drivers/hwmon/lm85.c:194:16:    expected int *__fc_a
  drivers/hwmon/lm85.c:194:16:    got int static const [toplevel] *<noident>
  drivers/hwmon/lm85.c:210:16: warning:
  		incorrect type in initializer (different modifiers)
  drivers/hwmon/lm85.c:210:16:    expected int *__fc_a
  drivers/hwmon/lm85.c:210:16:    got int const *map

This is because the array passed to find_closest() will typically be
declared as array of constants, but the macro declares a non-constant
pointer to it.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/util_macros.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h
index d5f4fb69dba3..f9b2ce58039b 100644
--- a/include/linux/util_macros.h
+++ b/include/linux/util_macros.h
@@ -5,7 +5,7 @@
 ({									\
 	typeof(as) __fc_i, __fc_as = (as) - 1;				\
 	typeof(x) __fc_x = (x);						\
-	typeof(*a) *__fc_a = (a);					\
+	typeof(*a) const *__fc_a = (a);					\
 	for (__fc_i = 0; __fc_i < __fc_as; __fc_i++) {			\
 		if (__fc_x op DIV_ROUND_CLOSEST(__fc_a[__fc_i] +	\
 						__fc_a[__fc_i + 1], 2))	\

From d8fd150fe3935e1692bf57c66691e17409ebb9c1 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Tue, 5 May 2015 16:24:00 -0700
Subject: [PATCH 088/101] nilfs2: fix sanity check of btree level in
 nilfs_btree_root_broken()

The range check for b-tree level parameter in nilfs_btree_root_broken()
is wrong; it accepts the case of "level == NILFS_BTREE_LEVEL_MAX" even
though the level is limited to values in the range of 0 to
(NILFS_BTREE_LEVEL_MAX - 1).

Since the level parameter is read from storage device and used to index
nilfs_btree_path array whose element count is NILFS_BTREE_LEVEL_MAX, it
can cause memory overrun during btree operations if the boundary value
is set to the level parameter on device.

This fixes the broken sanity check and adds a comment to clarify that
the upper bound NILFS_BTREE_LEVEL_MAX is exclusive.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nilfs2/btree.c         | 2 +-
 include/linux/nilfs2_fs.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 059f37137f9a..919fd5bb14a8 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -388,7 +388,7 @@ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
 	nchildren = nilfs_btree_node_get_nchildren(node);
 
 	if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
-		     level > NILFS_BTREE_LEVEL_MAX ||
+		     level >= NILFS_BTREE_LEVEL_MAX ||
 		     nchildren < 0 ||
 		     nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
 		pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n",
diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index ff3fea3194c6..9abb763e4b86 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -460,7 +460,7 @@ struct nilfs_btree_node {
 /* level */
 #define NILFS_BTREE_LEVEL_DATA          0
 #define NILFS_BTREE_LEVEL_NODE_MIN      (NILFS_BTREE_LEVEL_DATA + 1)
-#define NILFS_BTREE_LEVEL_MAX           14
+#define NILFS_BTREE_LEVEL_MAX           14	/* Max level (exclusive) */
 
 /**
  * struct nilfs_palloc_group_desc - block group descriptor

From b1432a2a35565f538586774a03bf277c27fc267d Mon Sep 17 00:00:00 2001
From: Junxiao Bi <junxiao.bi@oracle.com>
Date: Tue, 5 May 2015 16:24:02 -0700
Subject: [PATCH 089/101] ocfs2: dlm: fix race between purge and get lock
 resource

There is a race window in dlm_get_lock_resource(), which may return a
lock resource which has been purged.  This will cause the process to
hang forever in dlmlock() as the ast msg can't be handled due to its
lock resource not existing.

    dlm_get_lock_resource {
        ...
        spin_lock(&dlm->spinlock);
        tmpres = __dlm_lookup_lockres_full(dlm, lockid, namelen, hash);
        if (tmpres) {
             spin_unlock(&dlm->spinlock);
             >>>>>>>> race window, dlm_run_purge_list() may run and purge
                              the lock resource
             spin_lock(&tmpres->spinlock);
             ...
             spin_unlock(&tmpres->spinlock);
        }
    }

Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <joseph.qi@huawei.com>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/dlm/dlmmaster.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index a6944b25fd5b..fdf4b41d0609 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -757,6 +757,19 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
 	if (tmpres) {
 		spin_unlock(&dlm->spinlock);
 		spin_lock(&tmpres->spinlock);
+
+		/*
+		 * Right after dlm spinlock was released, dlm_thread could have
+		 * purged the lockres. Check if lockres got unhashed. If so
+		 * start over.
+		 */
+		if (hlist_unhashed(&tmpres->hash_node)) {
+			spin_unlock(&tmpres->spinlock);
+			dlm_lockres_put(tmpres);
+			tmpres = NULL;
+			goto lookup;
+		}
+
 		/* Wait on the thread that is mastering the resource */
 		if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
 			__dlm_wait_on_lockres(tmpres);

From 489405fe5ed38e65f6f82f131a39c67f3bae6045 Mon Sep 17 00:00:00 2001
From: Gregory CLEMENT <gregory.clement@free-electrons.com>
Date: Tue, 5 May 2015 16:24:05 -0700
Subject: [PATCH 090/101] rtc: armada38x: fix concurrency access in
 armada38x_rtc_set_time

While setting the time, the RTC TIME register should not be accessed.
However due to hardware constraints, setting the RTC time involves
sleeping during 100ms.  This sleep was done outside the critical section
protected by the spinlock, so it was possible to read the RTC TIME
register and get an incorrect value.  This patch introduces a mutex for
protecting the RTC TIME access, unlike the spinlock it is allowed to
sleep in a critical section protected by a mutex.

The RTC STATUS register can still be used from the interrupt handler but
it has no effect on setting the time.

Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Acked-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Andrew Lunn <andrew@lunn.ch>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: <stable@vger.kernel.org>	[4.0]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-armada38x.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/rtc/rtc-armada38x.c b/drivers/rtc/rtc-armada38x.c
index 43e04af39e09..cb70ced7e0db 100644
--- a/drivers/rtc/rtc-armada38x.c
+++ b/drivers/rtc/rtc-armada38x.c
@@ -40,6 +40,13 @@ struct armada38x_rtc {
 	void __iomem	    *regs;
 	void __iomem	    *regs_soc;
 	spinlock_t	    lock;
+	/*
+	 * While setting the time, the RTC TIME register should not be
+	 * accessed. Setting the RTC time involves sleeping during
+	 * 100ms, so a mutex instead of a spinlock is used to protect
+	 * it
+	 */
+	struct mutex	    mutex_time;
 	int		    irq;
 };
 
@@ -59,8 +66,7 @@ static int armada38x_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	struct armada38x_rtc *rtc = dev_get_drvdata(dev);
 	unsigned long time, time_check, flags;
 
-	spin_lock_irqsave(&rtc->lock, flags);
-
+	mutex_lock(&rtc->mutex_time);
 	time = readl(rtc->regs + RTC_TIME);
 	/*
 	 * WA for failing time set attempts. As stated in HW ERRATA if
@@ -71,7 +77,7 @@ static int armada38x_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	if ((time_check - time) > 1)
 		time_check = readl(rtc->regs + RTC_TIME);
 
-	spin_unlock_irqrestore(&rtc->lock, flags);
+	mutex_unlock(&rtc->mutex_time);
 
 	rtc_time_to_tm(time_check, tm);
 
@@ -94,19 +100,12 @@ static int armada38x_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	 * then wait for 100ms before writing to the time register to be
 	 * sure that the data will be taken into account.
 	 */
-	spin_lock_irqsave(&rtc->lock, flags);
-
+	mutex_lock(&rtc->mutex_time);
 	rtc_delayed_write(0, rtc, RTC_STATUS);
-
-	spin_unlock_irqrestore(&rtc->lock, flags);
-
 	msleep(100);
-
-	spin_lock_irqsave(&rtc->lock, flags);
-
 	rtc_delayed_write(time, rtc, RTC_TIME);
+	mutex_unlock(&rtc->mutex_time);
 
-	spin_unlock_irqrestore(&rtc->lock, flags);
 out:
 	return ret;
 }
@@ -230,6 +229,7 @@ static __init int armada38x_rtc_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	spin_lock_init(&rtc->lock);
+	mutex_init(&rtc->mutex_time);
 
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "rtc");
 	rtc->regs = devm_ioremap_resource(&pdev->dev, res);

From 15c5725e6b86cb8dfc4ca655a22005cc678a6f6f Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Thu, 16 Apr 2015 21:09:53 +0800
Subject: [PATCH 091/101] ipmi: Remove unused including <linux/version.h>

Remove including <linux/version.h> that don't need it.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_ssif.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index ee3b8c5e7e21..f6ea4fa444b3 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -31,7 +31,6 @@
  * interface into the I2C driver, I believe.
  */
 
-#include <linux/version.h>
 #if defined(MODVERSIONS)
 #include <linux/modversions.h>
 #endif

From a182a4b2b3e85a559ea2cd3545f4311db41325f2 Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Wed, 22 Apr 2015 13:25:40 -0500
Subject: [PATCH 092/101] ipmi: Report an error if ACPI _IFT doesn't exist

When probing an ACPI table, report a specific error, instead of just
returning an error, if _IFT doesn't exist.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_si_intf.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 461274168d0f..b5a1b450471f 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -2262,8 +2262,10 @@ static int ipmi_pnp_probe(struct pnp_dev *dev,
 
 	/* _IFT tells us the interface type: KCS, BT, etc */
 	status = acpi_evaluate_integer(handle, "_IFT", NULL, &tmp);
-	if (ACPI_FAILURE(status))
+	if (ACPI_FAILURE(status)) {
+		dev_err(&dev->dev, "Could not find ACPI IPMI interface type\n");
 		goto err_free;
+	}
 
 	switch (tmp) {
 	case 1:

From 9f8127048ab8b47b43f8aeaaec9fec2da44be9a1 Mon Sep 17 00:00:00 2001
From: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Date: Thu, 23 Apr 2015 11:16:44 +0900
Subject: [PATCH 093/101] ipmi: Fix a problem that messages are not issued in
 run_to_completion mode

start_next_msg() issues a message placed in smi_info->waiting_msg
if it is non-NULL.  However, sender() sets a message to
smi_info->curr_msg and NULL to smi_info->waiting_msg in the context
of run_to_completion mode.  As the result, it leads an infinite
loop by waiting the completion of unissued message when leaving
dying message after kernel panic.

sender() should set the message to smi_info->waiting_msg not
curr_msg.

Signed-off-by: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_si_intf.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index b5a1b450471f..8a45e92ff60c 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -942,8 +942,7 @@ static void sender(void                *send_info,
 		 * If we are running to completion, start it and run
 		 * transactions until everything is clear.
 		 */
-		smi_info->curr_msg = msg;
-		smi_info->waiting_msg = NULL;
+		smi_info->waiting_msg = msg;
 
 		/*
 		 * Run to completion means we are single-threaded, no

From 9162052173d2381e2bbabc224c3c1457acb4c54c Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Fri, 24 Apr 2015 07:46:06 -0500
Subject: [PATCH 094/101] ipmi: Add alert handling to SSIF

The SSIF interface can optionally have an SMBus alert come in when
data is ready.  Unfortunately, the IPMI spec gives wiggle room to
the implementer to allow them to always have the alert enabled,
even if the driver doesn't enable it.  So implement alerts.
If you don't in this situation, the SMBus alert handling will
constantly complain.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_ssif.c | 132 +++++++++++++++++++++++++++++-----
 1 file changed, 116 insertions(+), 16 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index f6ea4fa444b3..5b82d9947ec5 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -165,6 +165,9 @@ enum ssif_stat_indexes {
 	/* Number of watchdog pretimeouts. */
 	SSIF_STAT_watchdog_pretimeouts,
 
+	/* Number of alers received. */
+	SSIF_STAT_alerts,
+
 	/* Always add statistics before this value, it must be last. */
 	SSIF_NUM_STATS
 };
@@ -213,7 +216,16 @@ struct ssif_info {
 #define WDT_PRE_TIMEOUT_INT	0x08
 	unsigned char       msg_flags;
 
+	u8		    global_enables;
 	bool		    has_event_buffer;
+	bool		    supports_alert;
+
+	/*
+	 * Used to tell what we should do with alerts.  If we are
+	 * waiting on a response, read the data immediately.
+	 */
+	bool		    got_alert;
+	bool		    waiting_alert;
 
 	/*
 	 * If set to true, this will request events the next time the
@@ -517,14 +529,10 @@ static int ssif_i2c_send(struct ssif_info *ssif_info,
 static void msg_done_handler(struct ssif_info *ssif_info, int result,
 			     unsigned char *data, unsigned int len);
 
-static void retry_timeout(unsigned long data)
+static void start_get(struct ssif_info *ssif_info)
 {
-	struct ssif_info *ssif_info = (void *) data;
 	int rv;
 
-	if (ssif_info->stopping)
-		return;
-
 	ssif_info->rtc_us_timer = 0;
 
 	rv = ssif_i2c_send(ssif_info, msg_done_handler, I2C_SMBUS_READ,
@@ -539,6 +547,46 @@ static void retry_timeout(unsigned long data)
 	}
 }
 
+static void retry_timeout(unsigned long data)
+{
+	struct ssif_info *ssif_info = (void *) data;
+	unsigned long oflags, *flags;
+	bool waiting;
+
+	if (ssif_info->stopping)
+		return;
+
+	flags = ipmi_ssif_lock_cond(ssif_info, &oflags);
+	waiting = ssif_info->waiting_alert;
+	ssif_info->waiting_alert = false;
+	ipmi_ssif_unlock_cond(ssif_info, flags);
+
+	if (waiting)
+		start_get(ssif_info);
+}
+
+
+static void ssif_alert(struct i2c_client *client, unsigned int data)
+{
+	struct ssif_info *ssif_info = i2c_get_clientdata(client);
+	unsigned long oflags, *flags;
+	bool do_get = false;
+
+	ssif_inc_stat(ssif_info, alerts);
+
+	flags = ipmi_ssif_lock_cond(ssif_info, &oflags);
+	if (ssif_info->waiting_alert) {
+		ssif_info->waiting_alert = false;
+		del_timer(&ssif_info->retry_timer);
+		do_get = true;
+	} else if (ssif_info->curr_msg) {
+		ssif_info->got_alert = true;
+	}
+	ipmi_ssif_unlock_cond(ssif_info, flags);
+	if (do_get)
+		start_get(ssif_info);
+}
+
 static int start_resend(struct ssif_info *ssif_info);
 
 static void msg_done_handler(struct ssif_info *ssif_info, int result,
@@ -558,9 +606,12 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
 		if (ssif_info->retries_left > 0) {
 			ssif_inc_stat(ssif_info, receive_retries);
 
+			flags = ipmi_ssif_lock_cond(ssif_info, &oflags);
+			ssif_info->waiting_alert = true;
+			ssif_info->rtc_us_timer = SSIF_MSG_USEC;
 			mod_timer(&ssif_info->retry_timer,
 				  jiffies + SSIF_MSG_JIFFIES);
-			ssif_info->rtc_us_timer = SSIF_MSG_USEC;
+			ipmi_ssif_unlock_cond(ssif_info, flags);
 			return;
 		}
 
@@ -649,7 +700,7 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
 			if (rv < 0) {
 				if (ssif_info->ssif_debug & SSIF_DEBUG_MSG)
 					pr_info(PFX
-						"Error from i2c_non_blocking_op(2)\n");
+						"Error from ssif_i2c_send\n");
 
 				result = -EIO;
 			} else
@@ -863,15 +914,32 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result,
 			msg_done_handler(ssif_info, -EIO, NULL, 0);
 		}
 	} else {
+		unsigned long oflags, *flags;
+		bool got_alert;
+
 		ssif_inc_stat(ssif_info, sent_messages);
 		ssif_inc_stat(ssif_info, sent_messages_parts);
 
-		/* Wait a jiffie then request the next message */
-		ssif_info->retries_left = SSIF_RECV_RETRIES;
-		ssif_info->rtc_us_timer = SSIF_MSG_PART_USEC;
-		mod_timer(&ssif_info->retry_timer,
-			  jiffies + SSIF_MSG_PART_JIFFIES);
-		return;
+		flags = ipmi_ssif_lock_cond(ssif_info, &oflags);
+		got_alert = ssif_info->got_alert;
+		if (got_alert) {
+			ssif_info->got_alert = false;
+			ssif_info->waiting_alert = false;
+		}
+
+		if (got_alert) {
+			ipmi_ssif_unlock_cond(ssif_info, flags);
+			/* The alert already happened, try now. */
+			retry_timeout((unsigned long) ssif_info);
+		} else {
+			/* Wait a jiffie then request the next message */
+			ssif_info->waiting_alert = true;
+			ssif_info->retries_left = SSIF_RECV_RETRIES;
+			ssif_info->rtc_us_timer = SSIF_MSG_PART_USEC;
+			mod_timer(&ssif_info->retry_timer,
+				  jiffies + SSIF_MSG_PART_JIFFIES);
+			ipmi_ssif_unlock_cond(ssif_info, flags);
+		}
 	}
 }
 
@@ -880,6 +948,8 @@ static int start_resend(struct ssif_info *ssif_info)
 	int rv;
 	int command;
 
+	ssif_info->got_alert = false;
+
 	if (ssif_info->data_len > 32) {
 		command = SSIF_IPMI_MULTI_PART_REQUEST_START;
 		ssif_info->multi_data = ssif_info->data;
@@ -1242,6 +1312,8 @@ static int smi_stats_proc_show(struct seq_file *m, void *v)
 		   ssif_get_stat(ssif_info, events));
 	seq_printf(m, "watchdog_pretimeouts:   %u\n",
 		   ssif_get_stat(ssif_info, watchdog_pretimeouts));
+	seq_printf(m, "alerts:                 %u\n",
+		   ssif_get_stat(ssif_info, alerts));
 	return 0;
 }
 
@@ -1324,6 +1396,12 @@ static bool check_acpi(struct ssif_info *ssif_info, struct device *dev)
 	return false;
 }
 
+/*
+ * Global enables we care about.
+ */
+#define GLOBAL_ENABLES_MASK (IPMI_BMC_EVT_MSG_BUFF | IPMI_BMC_RCV_MSG_INTR | \
+			     IPMI_BMC_EVT_MSG_INTR)
+
 static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
 {
 	unsigned char     msg[3];
@@ -1454,6 +1532,8 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		goto found;
 	}
 
+	ssif_info->global_enables = resp[3];
+
 	if (resp[3] & IPMI_BMC_EVT_MSG_BUFF) {
 		ssif_info->has_event_buffer = true;
 		/* buffer is already enabled, nothing to do. */
@@ -1462,18 +1542,37 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
 
 	msg[0] = IPMI_NETFN_APP_REQUEST << 2;
 	msg[1] = IPMI_SET_BMC_GLOBAL_ENABLES_CMD;
-	msg[2] = resp[3] | IPMI_BMC_EVT_MSG_BUFF;
+	msg[2] = ssif_info->global_enables | IPMI_BMC_EVT_MSG_BUFF;
 	rv = do_cmd(client, 3, msg, &len, resp);
 	if (rv || (len < 2)) {
-		pr_warn(PFX "Error getting global enables: %d %d %2.2x\n",
+		pr_warn(PFX "Error setting global enables: %d %d %2.2x\n",
 			rv, len, resp[2]);
 		rv = 0; /* Not fatal */
 		goto found;
 	}
 
-	if (resp[2] == 0)
+	if (resp[2] == 0) {
 		/* A successful return means the event buffer is supported. */
 		ssif_info->has_event_buffer = true;
+		ssif_info->global_enables |= IPMI_BMC_EVT_MSG_BUFF;
+	}
+
+	msg[0] = IPMI_NETFN_APP_REQUEST << 2;
+	msg[1] = IPMI_SET_BMC_GLOBAL_ENABLES_CMD;
+	msg[2] = ssif_info->global_enables | IPMI_BMC_RCV_MSG_INTR;
+	rv = do_cmd(client, 3, msg, &len, resp);
+	if (rv || (len < 2)) {
+		pr_warn(PFX "Error setting global enables: %d %d %2.2x\n",
+			rv, len, resp[2]);
+		rv = 0; /* Not fatal */
+		goto found;
+	}
+
+	if (resp[2] == 0) {
+		/* A successful return means the alert is supported. */
+		ssif_info->supports_alert = true;
+		ssif_info->global_enables |= IPMI_BMC_RCV_MSG_INTR;
+	}
 
  found:
 	ssif_info->intf_num = atomic_inc_return(&next_intf);
@@ -1831,6 +1930,7 @@ static struct i2c_driver ssif_i2c_driver = {
 	},
 	.probe		= ssif_probe,
 	.remove		= ssif_remove,
+	.alert		= ssif_alert,
 	.id_table	= ssif_id,
 	.detect		= ssif_detect
 };

From 3d69d43baa2749c3d187ce70940d7aebe609e149 Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Wed, 29 Apr 2015 17:59:21 -0500
Subject: [PATCH 095/101] ipmi: Fix multi-part message handling

Lots of little fixes for multi-part messages:

The values was not being re-initialized, if something went wrong
handling a multi-part message and it got left in a bad state, it
might be an issue.

The commands were not correct when issuing multi-part reads, the
code was not passing in the proper value for commands.  Also clean
up some minor formatting issues.

Get the block number from the right location, limit the maximum send
message size to 63 bytes and explain why, and fix some minor sylistic
issues.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_ssif.c | 51 ++++++++++++++++++++++++++---------
 1 file changed, 38 insertions(+), 13 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index 5b82d9947ec5..207689c444a8 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -489,13 +489,13 @@ static int ipmi_ssif_thread(void *data)
 
 		if (ssif_info->i2c_read_write == I2C_SMBUS_WRITE) {
 			result = i2c_smbus_write_block_data(
-				ssif_info->client, SSIF_IPMI_REQUEST,
+				ssif_info->client, ssif_info->i2c_command,
 				ssif_info->i2c_data[0],
 				ssif_info->i2c_data + 1);
 			ssif_info->done_handler(ssif_info, result, NULL, 0);
 		} else {
 			result = i2c_smbus_read_block_data(
-				ssif_info->client, SSIF_IPMI_RESPONSE,
+				ssif_info->client, ssif_info->i2c_command,
 				ssif_info->i2c_data);
 			if (result < 0)
 				ssif_info->done_handler(ssif_info, result,
@@ -534,6 +534,7 @@ static void start_get(struct ssif_info *ssif_info)
 	int rv;
 
 	ssif_info->rtc_us_timer = 0;
+	ssif_info->multi_pos = 0;
 
 	rv = ssif_i2c_send(ssif_info, msg_done_handler, I2C_SMBUS_READ,
 			  SSIF_IPMI_RESPONSE,
@@ -631,9 +632,9 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
 		ssif_inc_stat(ssif_info, received_message_parts);
 
 		/* Remove the multi-part read marker. */
-		for (i = 0; i < (len-2); i++)
-			ssif_info->data[i] = data[i+2];
 		len -= 2;
+		for (i = 0; i < len; i++)
+			ssif_info->data[i] = data[i+2];
 		ssif_info->multi_len = len;
 		ssif_info->multi_pos = 1;
 
@@ -660,9 +661,9 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
 			goto continue_op;
 		}
 
-		blocknum = data[ssif_info->multi_len];
+		blocknum = data[0];
 
-		if (ssif_info->multi_len+len-1 > IPMI_MAX_MSG_LENGTH) {
+		if (ssif_info->multi_len + len - 1 > IPMI_MAX_MSG_LENGTH) {
 			/* Received message too big, abort the operation. */
 			result = -E2BIG;
 			if (ssif_info->ssif_debug & SSIF_DEBUG_MSG)
@@ -672,15 +673,15 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
 		}
 
 		/* Remove the blocknum from the data. */
-		for (i = 0; i < (len-1); i++)
-			ssif_info->data[i+ssif_info->multi_len] = data[i+1];
 		len--;
+		for (i = 0; i < len; i++)
+			ssif_info->data[i + ssif_info->multi_len] = data[i + 1];
 		ssif_info->multi_len += len;
 		if (blocknum == 0xff) {
 			/* End of read */
 			len = ssif_info->multi_len;
 			data = ssif_info->data;
-		} else if ((blocknum+1) != ssif_info->multi_pos) {
+		} else if (blocknum + 1 != ssif_info->multi_pos) {
 			/*
 			 * Out of sequence block, just abort.  Block
 			 * numbers start at zero for the second block,
@@ -880,7 +881,11 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result,
 	}
 
 	if (ssif_info->multi_data) {
-		/* In the middle of a multi-data write. */
+		/*
+		 * In the middle of a multi-data write.  See the comment
+		 * in the SSIF_MULTI_n_PART case in the probe function
+		 * for details on the intricacies of this.
+		 */
 		int left;
 
 		ssif_inc_stat(ssif_info, sent_messages_parts);
@@ -984,7 +989,7 @@ static int start_send(struct ssif_info *ssif_info,
 		return -E2BIG;
 
 	ssif_info->retries_left = SSIF_SEND_RETRIES;
-	memcpy(ssif_info->data+1, data, len);
+	memcpy(ssif_info->data + 1, data, len);
 	ssif_info->data_len = len;
 	return start_resend(ssif_info);
 }
@@ -1487,13 +1492,33 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
 			break;
 
 		case SSIF_MULTI_2_PART:
-			if (ssif_info->max_xmit_msg_size > 64)
-				ssif_info->max_xmit_msg_size = 64;
+			if (ssif_info->max_xmit_msg_size > 63)
+				ssif_info->max_xmit_msg_size = 63;
 			if (ssif_info->max_recv_msg_size > 62)
 				ssif_info->max_recv_msg_size = 62;
 			break;
 
 		case SSIF_MULTI_n_PART:
+			/*
+			 * The specification is rather confusing at
+			 * this point, but I think I understand what
+			 * is meant.  At least I have a workable
+			 * solution.  With multi-part messages, you
+			 * cannot send a message that is a multiple of
+			 * 32-bytes in length, because the start and
+			 * middle messages are 32-bytes and the end
+			 * message must be at least one byte.  You
+			 * can't fudge on an extra byte, that would
+			 * screw up things like fru data writes.  So
+			 * we limit the length to 63 bytes.  That way
+			 * a 32-byte message gets sent as a single
+			 * part.  A larger message will be a 32-byte
+			 * start and the next message is always going
+			 * to be 1-31 bytes in length.  Not ideal, but
+			 * it should work.
+			 */
+			if (ssif_info->max_xmit_msg_size > 63)
+				ssif_info->max_xmit_msg_size = 63;
 			break;
 
 		default:

From c88d47480d300eaad80c213d50c9bf6077fc49bc Mon Sep 17 00:00:00 2001
From: Bobby Powers <bobbypowers@gmail.com>
Date: Mon, 27 Apr 2015 08:10:41 -0700
Subject: [PATCH 096/101] x86/fpu: Always restore_xinit_state() when
 use_eager_cpu()

The following commit:

  f893959b0898 ("x86/fpu: Don't abuse drop_init_fpu() in flush_thread()")

removed drop_init_fpu() usage from flush_thread(). This seems to break
things for me - the Go 1.4 test suite fails all over the place with
floating point comparision errors (offending commit found through
bisection).

The functional change was that flush_thread() after this commit
only calls restore_init_xstate() when both use_eager_fpu() and
!used_math() are true. drop_init_fpu() (now fpu_reset_state()) calls
restore_init_xstate() regardless of whether current used_math() - apply
the same logic here.

Switch used_math() -> tsk_used_math(tsk) to consistently use the grabbed
tsk instead of current, like in the rest of flush_thread().

Tested-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Bobby Powers <bobbypowers@gmail.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Pekka Riikonen <priikone@iki.fi>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suresh Siddha <sbsiddha@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: f893959b ("x86/fpu: Don't abuse drop_init_fpu() in flush_thread()")
Link: http://lkml.kernel.org/r/1430147441-9820-1-git-send-email-bobbypowers@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/process.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index bfc99b3b6522..6e338e3b1dc0 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -156,11 +156,13 @@ void flush_thread(void)
 		/* FPU state will be reallocated lazily at the first use. */
 		drop_fpu(tsk);
 		free_thread_xstate(tsk);
-	} else if (!used_math()) {
-		/* kthread execs. TODO: cleanup this horror. */
-		if (WARN_ON(init_fpu(tsk)))
-			force_sig(SIGKILL, tsk);
-		user_fpu_begin();
+	} else {
+		if (!tsk_used_math(tsk)) {
+			/* kthread execs. TODO: cleanup this horror. */
+			if (WARN_ON(init_fpu(tsk)))
+				force_sig(SIGKILL, tsk);
+			user_fpu_begin();
+		}
 		restore_init_xstate();
 	}
 }

From 8746515d7f04c9ea94cf43e2db1fd2cfca93276d Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Fri, 24 Apr 2015 10:16:40 +0100
Subject: [PATCH 097/101] xen: Add __GFP_DMA flag when xen_swiotlb_init gets
 free pages on ARM

Make sure that xen_swiotlb_init allocates buffers that are DMA capable
when at least one memblock is available below 4G. Otherwise we assume
that all devices on the SoC can cope with >4G addresses. We do this on
ARM and ARM64, where dom0 is mapped 1:1, so pfn == mfn in this case.

No functional changes on x86.

From: Chen Baozi <baozich@gmail.com>

Signed-off-by: Chen Baozi <baozich@gmail.com>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Tested-by: Chen Baozi <baozich@gmail.com>
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/arm/include/asm/xen/page.h |  1 +
 arch/arm/xen/mm.c               | 15 +++++++++++++++
 arch/x86/include/asm/xen/page.h |  5 +++++
 drivers/xen/swiotlb-xen.c       |  2 +-
 4 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h
index 2f7e6ff67d51..0b579b2f4e0e 100644
--- a/arch/arm/include/asm/xen/page.h
+++ b/arch/arm/include/asm/xen/page.h
@@ -110,5 +110,6 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 bool xen_arch_need_swiotlb(struct device *dev,
 			   unsigned long pfn,
 			   unsigned long mfn);
+unsigned long xen_get_swiotlb_free_pages(unsigned int order);
 
 #endif /* _ASM_ARM_XEN_PAGE_H */
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 793551d15f1d..498325074a06 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -4,6 +4,7 @@
 #include <linux/gfp.h>
 #include <linux/highmem.h>
 #include <linux/export.h>
+#include <linux/memblock.h>
 #include <linux/of_address.h>
 #include <linux/slab.h>
 #include <linux/types.h>
@@ -21,6 +22,20 @@
 #include <asm/xen/hypercall.h>
 #include <asm/xen/interface.h>
 
+unsigned long xen_get_swiotlb_free_pages(unsigned int order)
+{
+	struct memblock_region *reg;
+	gfp_t flags = __GFP_NOWARN;
+
+	for_each_memblock(memory, reg) {
+		if (reg->base < (phys_addr_t)0xffffffff) {
+			flags |= __GFP_DMA;
+			break;
+		}
+	}
+	return __get_free_pages(flags, order);
+}
+
 enum dma_cache_op {
        DMA_UNMAP,
        DMA_MAP,
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 358dcd338915..c44a5d53e464 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -269,4 +269,9 @@ static inline bool xen_arch_need_swiotlb(struct device *dev,
 	return false;
 }
 
+static inline unsigned long xen_get_swiotlb_free_pages(unsigned int order)
+{
+	return __get_free_pages(__GFP_NOWARN, order);
+}
+
 #endif /* _ASM_X86_XEN_PAGE_H */
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 810ad419e34c..4c549323c605 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -235,7 +235,7 @@ int __ref xen_swiotlb_init(int verbose, bool early)
 #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
 #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
 		while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
-			xen_io_tlb_start = (void *)__get_free_pages(__GFP_NOWARN, order);
+			xen_io_tlb_start = (void *)xen_get_swiotlb_free_pages(order);
 			if (xen_io_tlb_start)
 				break;
 			order--;

From c5272a28566b00cce79127ad382406e0a8650690 Mon Sep 17 00:00:00 2001
From: Doug Anderson <dianders@chromium.org>
Date: Fri, 1 May 2015 09:01:27 -0700
Subject: [PATCH 098/101] pinctrl: Don't just pretend to protect pinctrl_maps,
 do it for real

Way back, when the world was a simpler place and there was no war, no
evil, and no kernel bugs, there was just a single pinctrl lock.  That
was how the world was when (57291ce pinctrl: core device tree mapping
table parsing support) was written.  In that case, there were
instances where the pinctrl mutex was already held when
pinctrl_register_map() was called, hence a "locked" parameter was
passed to the function to indicate that the mutex was already locked
(so we shouldn't lock it again).

A few years ago in (42fed7b pinctrl: move subsystem mutex to
pinctrl_dev struct), we switched to a separate pinctrl_maps_mutex.
...but (oops) we forgot to re-think about the whole "locked" parameter
for pinctrl_register_map().  Basically the "locked" parameter appears
to still refer to whether the bigger pinctrl_dev mutex is locked, but
we're using it to skip locks of our (now separate) pinctrl_maps_mutex.

That's kind of a bad thing(TM).  Probably nobody noticed because most
of the calls to pinctrl_register_map happen at boot time and we've got
synchronous device probing.  ...and even cases where we're
asynchronous don't end up actually hitting the race too often.  ...but
after banging my head against the wall for a bug that reproduced 1 out
of 1000 reboots and lots of looking through kgdb, I finally noticed
this.

Anyway, we can now safely remove the "locked" parameter and go back to
a war-free, evil-free, and kernel-bug-free world.

Fixes: 42fed7ba44e4 ("pinctrl: move subsystem mutex to pinctrl_dev struct")
Signed-off-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/core.c       | 10 ++++------
 drivers/pinctrl/core.h       |  2 +-
 drivers/pinctrl/devicetree.c |  2 +-
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index 89dca77ca038..18ee2089df4a 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -1110,7 +1110,7 @@ void devm_pinctrl_put(struct pinctrl *p)
 EXPORT_SYMBOL_GPL(devm_pinctrl_put);
 
 int pinctrl_register_map(struct pinctrl_map const *maps, unsigned num_maps,
-			 bool dup, bool locked)
+			 bool dup)
 {
 	int i, ret;
 	struct pinctrl_maps *maps_node;
@@ -1178,11 +1178,9 @@ int pinctrl_register_map(struct pinctrl_map const *maps, unsigned num_maps,
 		maps_node->maps = maps;
 	}
 
-	if (!locked)
-		mutex_lock(&pinctrl_maps_mutex);
+	mutex_lock(&pinctrl_maps_mutex);
 	list_add_tail(&maps_node->node, &pinctrl_maps);
-	if (!locked)
-		mutex_unlock(&pinctrl_maps_mutex);
+	mutex_unlock(&pinctrl_maps_mutex);
 
 	return 0;
 }
@@ -1197,7 +1195,7 @@ int pinctrl_register_map(struct pinctrl_map const *maps, unsigned num_maps,
 int pinctrl_register_mappings(struct pinctrl_map const *maps,
 			      unsigned num_maps)
 {
-	return pinctrl_register_map(maps, num_maps, true, false);
+	return pinctrl_register_map(maps, num_maps, true);
 }
 
 void pinctrl_unregister_map(struct pinctrl_map const *map)
diff --git a/drivers/pinctrl/core.h b/drivers/pinctrl/core.h
index 75476b3d87da..b24ea846c867 100644
--- a/drivers/pinctrl/core.h
+++ b/drivers/pinctrl/core.h
@@ -183,7 +183,7 @@ static inline struct pin_desc *pin_desc_get(struct pinctrl_dev *pctldev,
 }
 
 int pinctrl_register_map(struct pinctrl_map const *maps, unsigned num_maps,
-			 bool dup, bool locked);
+			 bool dup);
 void pinctrl_unregister_map(struct pinctrl_map const *map);
 
 extern int pinctrl_force_sleep(struct pinctrl_dev *pctldev);
diff --git a/drivers/pinctrl/devicetree.c b/drivers/pinctrl/devicetree.c
index eda13de2e7c0..0bbf7d71b281 100644
--- a/drivers/pinctrl/devicetree.c
+++ b/drivers/pinctrl/devicetree.c
@@ -92,7 +92,7 @@ static int dt_remember_or_free_map(struct pinctrl *p, const char *statename,
 	dt_map->num_maps = num_maps;
 	list_add_tail(&dt_map->node, &p->dt_maps);
 
-	return pinctrl_register_map(map, num_maps, false, true);
+	return pinctrl_register_map(map, num_maps, false);
 }
 
 struct pinctrl_dev *of_pinctrl_get(struct device_node *np)

From 8b10c5e2b59ef2a80a07ab594a3b4987a4676211 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 1 May 2015 16:08:46 +0200
Subject: [PATCH 099/101] perf: Annotate inherited event ctx->mutex recursion

While fuzzing Sasha tripped over another ctx->mutex recursion lockdep
splat. Annotate this.

Reported-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 41 +++++++++++++++++++++++++++++++++--------
 1 file changed, 33 insertions(+), 8 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 81aa3a4ece9f..1a3bf48743ce 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -913,10 +913,30 @@ static void put_ctx(struct perf_event_context *ctx)
  * Those places that change perf_event::ctx will hold both
  * perf_event_ctx::mutex of the 'old' and 'new' ctx value.
  *
- * Lock ordering is by mutex address. There is one other site where
- * perf_event_context::mutex nests and that is put_event(). But remember that
- * that is a parent<->child context relation, and migration does not affect
- * children, therefore these two orderings should not interact.
+ * Lock ordering is by mutex address. There are two other sites where
+ * perf_event_context::mutex nests and those are:
+ *
+ *  - perf_event_exit_task_context()	[ child , 0 ]
+ *      __perf_event_exit_task()
+ *        sync_child_event()
+ *          put_event()			[ parent, 1 ]
+ *
+ *  - perf_event_init_context()		[ parent, 0 ]
+ *      inherit_task_group()
+ *        inherit_group()
+ *          inherit_event()
+ *            perf_event_alloc()
+ *              perf_init_event()
+ *                perf_try_init_event()	[ child , 1 ]
+ *
+ * While it appears there is an obvious deadlock here -- the parent and child
+ * nesting levels are inverted between the two. This is in fact safe because
+ * life-time rules separate them. That is an exiting task cannot fork, and a
+ * spawning task cannot (yet) exit.
+ *
+ * But remember that that these are parent<->child context relations, and
+ * migration does not affect children, therefore these two orderings should not
+ * interact.
  *
  * The change in perf_event::ctx does not affect children (as claimed above)
  * because the sys_perf_event_open() case will install a new event and break
@@ -3657,9 +3677,6 @@ static void perf_remove_from_owner(struct perf_event *event)
 	}
 }
 
-/*
- * Called when the last reference to the file is gone.
- */
 static void put_event(struct perf_event *event)
 {
 	struct perf_event_context *ctx;
@@ -3697,6 +3714,9 @@ int perf_event_release_kernel(struct perf_event *event)
 }
 EXPORT_SYMBOL_GPL(perf_event_release_kernel);
 
+/*
+ * Called when the last reference to the file is gone.
+ */
 static int perf_release(struct inode *inode, struct file *file)
 {
 	put_event(file->private_data);
@@ -7364,7 +7384,12 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
 		return -ENODEV;
 
 	if (event->group_leader != event) {
-		ctx = perf_event_ctx_lock(event->group_leader);
+		/*
+		 * This ctx->mutex can nest when we're called through
+		 * inheritance. See the perf_event_ctx_lock_nested() comment.
+		 */
+		ctx = perf_event_ctx_lock_nested(event->group_leader,
+						 SINGLE_DEPTH_NESTING);
 		BUG_ON(!ctx);
 	}
 

From 6d374056354a742eed4d0050498101e56e794c4b Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Tue, 21 Apr 2015 05:34:41 -0400
Subject: [PATCH 100/101] perf/x86/intel: Fix SLM cache event list

iTLB-load-misses and LLC-load-misses count incorrectly on SLM.

There is no ITLB.MISSES support on SLM. Event PAGE_WALKS.I_SIDE_WALK
should be used to count iTLB-load-misses. This event counts when an
instruction (I) page walk is completed or started. Since a page walk
implies a TLB miss, the number of TLB misses can be counted by counting
the number of pagewalks.

DMND_DATA_RD counts both demand and DCU prefetch data reads. However,
LLC-load-misses should only count demand reads. There is no way to not
include prefetches with a single counter on SLM. So the LLC-load-misses
support should be removed on SLM.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1429608881-5055-1-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 960e85de13fb..3998131d1a68 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1134,7 +1134,7 @@ static __initconst const u64 slm_hw_cache_extra_regs
  [ C(LL  ) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS,
-		[ C(RESULT_MISS)   ] = SLM_DMND_READ|SLM_LLC_MISS,
+		[ C(RESULT_MISS)   ] = 0,
 	},
 	[ C(OP_WRITE) ] = {
 		[ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS,
@@ -1184,8 +1184,7 @@ static __initconst const u64 slm_hw_cache_event_ids
 	[ C(OP_READ) ] = {
 		/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
 		[ C(RESULT_ACCESS) ] = 0x01b7,
-		/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
-		[ C(RESULT_MISS)   ] = 0x01b7,
+		[ C(RESULT_MISS)   ] = 0,
 	},
 	[ C(OP_WRITE) ] = {
 		/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
@@ -1217,7 +1216,7 @@ static __initconst const u64 slm_hw_cache_event_ids
  [ C(ITLB) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
-		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES */
+		[ C(RESULT_MISS)   ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */
 	},
 	[ C(OP_WRITE) ] = {
 		[ C(RESULT_ACCESS) ] = -1,

From 44b11fee51711ca85aa2b121a49bf029d18a3722 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Thu, 23 Apr 2015 09:07:09 +0200
Subject: [PATCH 101/101] perf/x86/rapl: Enable Broadwell-U RAPL support

This patch enables RAPL counters (energy consumption counters)
support for Intel Broadwell-U processors (Model 61):

To use:

  $ perf stat -a -I 1000 -e power/energy-cores/,power/energy-pkg/,power/energy-ram/ sleep 10

Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: <stable@vger.kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: jacob.jun.pan@linux.intel.com
Cc: kan.liang@intel.com
Cc: peterz@infradead.org
Cc: sonnyrao@chromium.org
Link: http://lkml.kernel.org/r/20150423070709.GA4970@thinkpad
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_rapl.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index 999289b94025..358c54ad20d4 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -722,6 +722,7 @@ static int __init rapl_pmu_init(void)
 		break;
 	case 60: /* Haswell */
 	case 69: /* Haswell-Celeron */
+	case 61: /* Broadwell */
 		rapl_cntr_mask = RAPL_IDX_HSW;
 		rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
 		break;