From bf31c5e07db81073aa7fedc5401afa967b177fe2 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Mon, 18 Jul 2016 13:09:36 +0100
Subject: [PATCH 01/35] ARM: 8587/1: dma-mapping: Use %zu for printing a size_t
 variable

According to Documentation/printk-formats.txt when printing
a size_t variable we should use %zu or %zx format specifiers.

As we are printing a memory size value, we should better use %zu
in this case.

Reported-by: Frank Mori Hess <fmh6jj@gmail.com>
Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/dma-mapping.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index c6834c0cfd1c..a2302aba5df2 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -436,7 +436,7 @@ static int __init atomic_pool_init(void)
 		gen_pool_set_algo(atomic_pool,
 				gen_pool_first_fit_order_align,
 				(void *)PAGE_SHIFT);
-		pr_info("DMA: preallocated %zd KiB pool for atomic coherent allocations\n",
+		pr_info("DMA: preallocated %zu KiB pool for atomic coherent allocations\n",
 		       atomic_pool_size / 1024);
 		return 0;
 	}
@@ -445,7 +445,7 @@ static int __init atomic_pool_init(void)
 	gen_pool_destroy(atomic_pool);
 	atomic_pool = NULL;
 out:
-	pr_err("DMA: failed to allocate %zx KiB pool for atomic coherent allocation\n",
+	pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n",
 	       atomic_pool_size / 1024);
 	return -ENOMEM;
 }

From 3cb59581a860f5f542b37aed214f74b3f2ccfa32 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 19 Jul 2016 11:23:37 +0100
Subject: [PATCH 02/35] ARM: 8588/1: localise objcopy flags

We currently define OBJCOPYFLAGS in the top-level arm Makefile, and thus
these flags will be passed to all uses of objcopy, kernel-wide, for
which they are not explicitly overridden. The flags we set are intended
for converting a few ELF files into raw binaries, and thus the flags
chosen are problematic for some other uses which do not expect a raw
binary result, e.g. the upcoming lkdtm rodata test:

  http://www.openwall.com/lists/kernel-hardening/2016/06/08/2

This patch localises the objcopy flags such that they are only used for
the cases we require them for today.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Kees Cook <keescook@chromium.org>
Tested-by: Laura Abbott <labbott@redhat.com>
Cc: Russell King <linux@armlinux.org.uk>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Makefile      | 1 -
 arch/arm/boot/Makefile | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 56ea5c60b318..6e30b8bc21b9 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -23,7 +23,6 @@ ifeq ($(CONFIG_ARM_MODULE_PLTS),y)
 LDFLAGS_MODULE	+= -T $(srctree)/arch/arm/kernel/module.lds
 endif
 
-OBJCOPYFLAGS	:=-O binary -R .comment -S
 GZFLAGS		:=-9
 #KBUILD_CFLAGS	+=-pipe
 
diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile
index bdc1d5af03d2..50f8d1be7fcb 100644
--- a/arch/arm/boot/Makefile
+++ b/arch/arm/boot/Makefile
@@ -11,6 +11,8 @@
 # Copyright (C) 1995-2002 Russell King
 #
 
+OBJCOPYFLAGS	:=-O binary -R .comment -S
+
 ifneq ($(MACHINE),)
 include $(MACHINE)/Makefile.boot
 endif

From 7d281b620d229486429d851b10a05da871d22e79 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Thu, 28 Jul 2016 19:37:21 +0100
Subject: [PATCH 03/35] ARM: 8589/1: asm/memory.h: remove dead definitions

The last ad-hoc __phys_to_virt definition was removed in commit fd0053c9
("ARM: realview: remove sparsemem hack"). Therefore we can remove the
unneeded definitions and unduplicate the virt_to_pfn macro from
asm/memory.h.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/memory.h | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 31c07a2cc100..76cbd9c674df 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -159,13 +159,8 @@
  * PFNs are used to describe any physical page; this means
  * PFN 0 == physical address 0.
  */
-#if defined(__virt_to_phys)
-#define PHYS_OFFSET	PLAT_PHYS_OFFSET
-#define PHYS_PFN_OFFSET	((unsigned long)(PHYS_OFFSET >> PAGE_SHIFT))
 
-#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
-
-#elif defined(CONFIG_ARM_PATCH_PHYS_VIRT)
+#if defined(CONFIG_ARM_PATCH_PHYS_VIRT)
 
 /*
  * Constants used to force the right instruction encodings and shifts
@@ -182,10 +177,6 @@ extern const void *__pv_table_begin, *__pv_table_end;
 #define PHYS_OFFSET	((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT)
 #define PHYS_PFN_OFFSET	(__pv_phys_pfn_offset)
 
-#define virt_to_pfn(kaddr) \
-	((((unsigned long)(kaddr) - PAGE_OFFSET) >> PAGE_SHIFT) + \
-	 PHYS_PFN_OFFSET)
-
 #define __pv_stub(from,to,instr,type)			\
 	__asm__("@ __pv_stub\n"				\
 	"1:	" instr "	%0, %1, %2\n"		\
@@ -257,12 +248,12 @@ static inline unsigned long __phys_to_virt(phys_addr_t x)
 	return x - PHYS_OFFSET + PAGE_OFFSET;
 }
 
+#endif
+
 #define virt_to_pfn(kaddr) \
 	((((unsigned long)(kaddr) - PAGE_OFFSET) >> PAGE_SHIFT) + \
 	 PHYS_PFN_OFFSET)
 
-#endif
-
 /*
  * These are *only* valid on the kernel direct mapped RAM memory.
  * Note: Drivers should NOT use these.  They are the wrong

From fc1473103cfa0b785dd3ff8de2430fec42cfc8ad Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Wed, 3 Aug 2016 20:29:38 +0100
Subject: [PATCH 04/35] ARM: 8592/1: cache-l2x0.c: Replace magic numbers

Replace magic numbers used for L310 Prefetch Control Register

Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/cache-l2x0.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index cc12905ae6f8..7e624872bd6f 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -710,8 +710,10 @@ static void __init l2c310_fixup(void __iomem *base, u32 cache_id,
 	    revision < L310_CACHE_ID_RTL_R3P2) {
 		u32 val = l2x0_saved_regs.prefetch_ctrl;
 		/* I don't think bit23 is required here... but iMX6 does so */
-		if (val & (BIT(30) | BIT(23))) {
-			val &= ~(BIT(30) | BIT(23));
+		if (val & (L310_PREFETCH_CTRL_DBL_LINEFILL |
+			   L310_PREFETCH_CTRL_DBL_LINEFILL_INCR)) {
+			val &= ~(L310_PREFETCH_CTRL_DBL_LINEFILL |
+				 L310_PREFETCH_CTRL_DBL_LINEFILL_INCR);
 			l2x0_saved_regs.prefetch_ctrl = val;
 			errata[n++] = "752271";
 		}

From 55604b7ab1b5b8f560721e69b1ac059bd8d2078e Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Wed, 3 Aug 2016 20:33:34 +0100
Subject: [PATCH 05/35] ARM: 8593/1: cache-l2x0.c: Do not clear bit 23 in
 prefetch control register

As per L2C-310 TRM[1]:

"... You can control this feature using bits 30,27 and 23 of the
Prefetch Control Register. Bit 23 and 27 are only used if you set bit 30
HIGH..."

which means there is no need to clear bit 23 if bit 30 is being cleared.

[1] http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0246e/CJAJACBJ.html

Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/cache-l2x0.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 7e624872bd6f..ca5595fa072a 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -709,11 +709,8 @@ static void __init l2c310_fixup(void __iomem *base, u32 cache_id,
 	if (revision >= L310_CACHE_ID_RTL_R3P0 &&
 	    revision < L310_CACHE_ID_RTL_R3P2) {
 		u32 val = l2x0_saved_regs.prefetch_ctrl;
-		/* I don't think bit23 is required here... but iMX6 does so */
-		if (val & (L310_PREFETCH_CTRL_DBL_LINEFILL |
-			   L310_PREFETCH_CTRL_DBL_LINEFILL_INCR)) {
-			val &= ~(L310_PREFETCH_CTRL_DBL_LINEFILL |
-				 L310_PREFETCH_CTRL_DBL_LINEFILL_INCR);
+		if (val & L310_PREFETCH_CTRL_DBL_LINEFILL) {
+			val &= ~L310_PREFETCH_CTRL_DBL_LINEFILL;
 			l2x0_saved_regs.prefetch_ctrl = val;
 			errata[n++] = "752271";
 		}

From d782e426b835bd2e79d868eb4af8510ed79e0aee Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Fri, 5 Aug 2016 06:03:41 +0100
Subject: [PATCH 06/35] ARM: 8594/1: enable binfmt_flat on systems with an MMU

Now that the generic changes are in place, this can be enabled on ARM
with the use of proper user space accessors in the flat_get_addr_from_rp()
and flat_put_addr_at_rp() handlers as rp actually holds a user space
address.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/flat.h | 5 +++--
 fs/Kconfig.binfmt           | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/arm/include/asm/flat.h b/arch/arm/include/asm/flat.h
index e847d23351ed..acf1d14b89a6 100644
--- a/arch/arm/include/asm/flat.h
+++ b/arch/arm/include/asm/flat.h
@@ -8,8 +8,9 @@
 #define	flat_argvp_envp_on_stack()		1
 #define	flat_old_ram_flag(flags)		(flags)
 #define	flat_reloc_valid(reloc, size)		((reloc) <= (size))
-#define	flat_get_addr_from_rp(rp, relval, flags, persistent) ((void)persistent,get_unaligned(rp))
-#define	flat_put_addr_at_rp(rp, val, relval)	put_unaligned(val,rp)
+#define	flat_get_addr_from_rp(rp, relval, flags, persistent) \
+	({ unsigned long __val; __get_user_unaligned(__val, rp); __val; })
+#define	flat_put_addr_at_rp(rp, val, relval)	__put_user_unaligned(val, rp)
 #define	flat_get_relocate_addr(rel)		(rel)
 #define	flat_set_persistent(relval, p)		0
 
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index c7efddf6e038..4c09d93d9569 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -89,7 +89,7 @@ config BINFMT_SCRIPT
 
 config BINFMT_FLAT
 	bool "Kernel support for flat binaries"
-	depends on !MMU || M68K
+	depends on !MMU || ARM || M68K
 	depends on !FRV || BROKEN
 	help
 	  Support uClinux FLAT format binaries.

From 7619751f8c900fa5fdd76db06f4caf095c56de8e Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 10 Aug 2016 22:46:49 +0100
Subject: [PATCH 07/35] ARM: 8595/2: apply more __ro_after_init

Guided by grsecurity's analogous __read_only markings in arch/arm,
this applies several uses of __ro_after_init to structures that are
only updated during __init.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/cpuidle.c |  2 +-
 arch/arm/kernel/setup.c   | 10 +++++-----
 arch/arm/kernel/smp.c     |  2 +-
 arch/arm/lib/delay.c      |  2 +-
 arch/arm/mm/mmu.c         |  2 +-
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/arm/kernel/cpuidle.c b/arch/arm/kernel/cpuidle.c
index 7dccc964d75f..a3308ad1a024 100644
--- a/arch/arm/kernel/cpuidle.c
+++ b/arch/arm/kernel/cpuidle.c
@@ -19,7 +19,7 @@ extern struct of_cpuidle_method __cpuidle_method_of_table[];
 static const struct of_cpuidle_method __cpuidle_method_of_table_sentinel
 	__used __section(__cpuidle_method_of_table_end);
 
-static struct cpuidle_ops cpuidle_ops[NR_CPUS];
+static struct cpuidle_ops cpuidle_ops[NR_CPUS] __ro_after_init;
 
 /**
  * arm_cpuidle_simple_enter() - a wrapper to cpu_do_idle()
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index df7f2a75e769..aca999e17184 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -114,19 +114,19 @@ EXPORT_SYMBOL(elf_hwcap2);
 
 
 #ifdef MULTI_CPU
-struct processor processor __read_mostly;
+struct processor processor __ro_after_init;
 #endif
 #ifdef MULTI_TLB
-struct cpu_tlb_fns cpu_tlb __read_mostly;
+struct cpu_tlb_fns cpu_tlb __ro_after_init;
 #endif
 #ifdef MULTI_USER
-struct cpu_user_fns cpu_user __read_mostly;
+struct cpu_user_fns cpu_user __ro_after_init;
 #endif
 #ifdef MULTI_CACHE
-struct cpu_cache_fns cpu_cache __read_mostly;
+struct cpu_cache_fns cpu_cache __ro_after_init;
 #endif
 #ifdef CONFIG_OUTER_CACHE
-struct outer_cache_fns outer_cache __read_mostly;
+struct outer_cache_fns outer_cache __ro_after_init;
 EXPORT_SYMBOL(outer_cache);
 #endif
 
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 861521606c6d..937c8920d741 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -82,7 +82,7 @@ enum ipi_msg_type {
 
 static DECLARE_COMPLETION(cpu_running);
 
-static struct smp_operations smp_ops;
+static struct smp_operations smp_ops __ro_after_init;
 
 void __init smp_set_ops(const struct smp_operations *ops)
 {
diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c
index 8044591dca72..2cef11884857 100644
--- a/arch/arm/lib/delay.c
+++ b/arch/arm/lib/delay.c
@@ -29,7 +29,7 @@
 /*
  * Default to the loop-based delay implementation.
  */
-struct arm_delay_ops arm_delay_ops = {
+struct arm_delay_ops arm_delay_ops __ro_after_init = {
 	.delay		= __loop_delay,
 	.const_udelay	= __loop_const_udelay,
 	.udelay		= __loop_udelay,
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 62f4d01941f7..2396935e715c 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -243,7 +243,7 @@ __setup("noalign", noalign_setup);
 #define PROT_PTE_S2_DEVICE	PROT_PTE_DEVICE
 #define PROT_SECT_DEVICE	PMD_TYPE_SECT|PMD_SECT_AP_WRITE
 
-static struct mem_type mem_types[] = {
+static struct mem_type mem_types[] __ro_after_init = {
 	[MT_DEVICE] = {		  /* Strongly ordered / ARMv6 shared device */
 		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
 				  L_PTE_SHARED,

From bcd3006f07ef9c80ac225854c1518c22f2f56d71 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Wed, 10 Aug 2016 20:17:34 +0100
Subject: [PATCH 08/35] ARM: 8596/1: amba: Support clk parents and rates
 assigned in DT

Add the call to of_clk_set_defaults() into the amba probe path so
that devices on the amba bus can use the assigned rates and
parents feature of the common clock framework.

Cc: Michael Turquette <mturquette@baylibre.com>
Tested-by: Jorge Ramirez Ortiz <jorge.ramirez-ortiz@linaro.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/amba/bus.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index a5b5c87e2114..a56fa2a1e9aa 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -19,6 +19,7 @@
 #include <linux/amba/bus.h>
 #include <linux/sizes.h>
 #include <linux/limits.h>
+#include <linux/clk/clk-conf.h>
 
 #include <asm/irq.h>
 
@@ -237,6 +238,10 @@ static int amba_probe(struct device *dev)
 	int ret;
 
 	do {
+		ret = of_clk_set_defaults(dev->of_node, false);
+		if (ret < 0)
+			break;
+
 		ret = dev_pm_domain_attach(dev, true);
 		if (ret == -EPROBE_DEFER)
 			break;

From 92bb8d5d55f7fe1a7a1201c42120c1611840807c Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@marvell.com>
Date: Mon, 15 Aug 2016 09:20:21 +0100
Subject: [PATCH 09/35] ARM: 8597/1: VDSO: put RO and RO after init objects
 into proper sections

vdso_data_mapping is never modified, so mark it as const.

vdso_total_pages, vdso_data_page, vdso_text_mapping and cntvct_ok are
initialized by vdso_init(), thereafter are read only.

The fact that they are read only after init makes them candidates for
__ro_after_init declarations.

Signed-off-by: Jisheng Zhang <jszhang@marvell.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Acked-by: Nathan Lynch <nathan_lynch@mentor.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/vdso.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c
index 994e971a8538..bbbffe946122 100644
--- a/arch/arm/kernel/vdso.c
+++ b/arch/arm/kernel/vdso.c
@@ -17,6 +17,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/cache.h>
 #include <linux/elf.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
@@ -39,7 +40,7 @@
 static struct page **vdso_text_pagelist;
 
 /* Total number of pages needed for the data and text portions of the VDSO. */
-unsigned int vdso_total_pages __read_mostly;
+unsigned int vdso_total_pages __ro_after_init;
 
 /*
  * The VDSO data page.
@@ -47,13 +48,13 @@ unsigned int vdso_total_pages __read_mostly;
 static union vdso_data_store vdso_data_store __page_aligned_data;
 static struct vdso_data *vdso_data = &vdso_data_store.data;
 
-static struct page *vdso_data_page;
-static struct vm_special_mapping vdso_data_mapping = {
+static struct page *vdso_data_page __ro_after_init;
+static const struct vm_special_mapping vdso_data_mapping = {
 	.name = "[vvar]",
 	.pages = &vdso_data_page,
 };
 
-static struct vm_special_mapping vdso_text_mapping = {
+static struct vm_special_mapping vdso_text_mapping __ro_after_init = {
 	.name = "[vdso]",
 };
 
@@ -67,7 +68,7 @@ struct elfinfo {
 /* Cached result of boot-time check for whether the arch timer exists,
  * and if so, whether the virtual counter is useable.
  */
-static bool cntvct_ok __read_mostly;
+static bool cntvct_ok __ro_after_init;
 
 static bool __init cntvct_functional(void)
 {

From 10b52a49ac62cd8cedc08c50178dc6fbb8925b87 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 18 Aug 2016 11:45:18 +0100
Subject: [PATCH 10/35] ARM: 8598/1: remove traces of perf_ops_bp

Even though perf_ops_bp was removed/renamed back in commit
b0a873ebbf87bf38 ("perf: Register PMU implementations"), as part of
v2.6.37, its definition still lives on in some arch headers.

This patch removes the vestigal definition from arm.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Russell King <linux@armlinux.org.uk>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/hw_breakpoint.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm/include/asm/hw_breakpoint.h b/arch/arm/include/asm/hw_breakpoint.h
index 8e427c7b4425..afcaf8bf971b 100644
--- a/arch/arm/include/asm/hw_breakpoint.h
+++ b/arch/arm/include/asm/hw_breakpoint.h
@@ -114,7 +114,6 @@ struct notifier_block;
 struct perf_event;
 struct pmu;
 
-extern struct pmu perf_ops_bp;
 extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
 				  int *gen_len, int *gen_type);
 extern int arch_check_bp_in_kernelspace(struct perf_event *bp);

From 83809b90a6dbedbcd94fcb99c9cde9477534f3d3 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Fri, 19 Aug 2016 15:15:03 +0100
Subject: [PATCH 11/35] ARM: sa1100: move StrongARM CPU ID checks to cputype.h

Move the StrongARM CPU ID checks out of the platform's hardware.h
file into asm/cputype.h

Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/cputype.h               | 21 ++++++++++++++++++++
 arch/arm/mach-sa1100/include/mach/hardware.h | 18 -----------------
 drivers/cpufreq/sa1110-cpufreq.c             |  2 +-
 3 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index 1ee94c716a7f..d6a4902a75d7 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -60,6 +60,7 @@
 	((mpidr >> (MPIDR_LEVEL_BITS * level)) & MPIDR_LEVEL_MASK)
 
 #define ARM_CPU_IMP_ARM			0x41
+#define ARM_CPU_IMP_DEC			0x44
 #define ARM_CPU_IMP_INTEL		0x69
 
 /* ARM implemented processors */
@@ -76,6 +77,17 @@
 #define ARM_CPU_PART_CORTEX_A15		0x4100c0f0
 #define ARM_CPU_PART_MASK		0xff00fff0
 
+/* DEC implemented cores */
+#define ARM_CPU_PART_SA1100		0x4400a110
+
+/* Intel implemented cores */
+#define ARM_CPU_PART_SA1110		0x6900b110
+#define ARM_CPU_REV_SA1110_A0		0
+#define ARM_CPU_REV_SA1110_B0		4
+#define ARM_CPU_REV_SA1110_B1		5
+#define ARM_CPU_REV_SA1110_B2		6
+#define ARM_CPU_REV_SA1110_B4		8
+
 #define ARM_CPU_XSCALE_ARCH_MASK	0xe000
 #define ARM_CPU_XSCALE_ARCH_V1		0x2000
 #define ARM_CPU_XSCALE_ARCH_V2		0x4000
@@ -173,6 +185,11 @@ static inline unsigned int __attribute_const__ read_cpuid_implementor(void)
 	return (read_cpuid_id() & 0xFF000000) >> 24;
 }
 
+static inline unsigned int __attribute_const__ read_cpuid_revision(void)
+{
+	return read_cpuid_id() & 0x0000000f;
+}
+
 /*
  * The CPU part number is meaningless without referring to the CPU
  * implementer: implementers are free to define their own part numbers
@@ -208,6 +225,10 @@ static inline unsigned int __attribute_const__ read_cpuid_mpidr(void)
 	return read_cpuid(CPUID_MPIDR);
 }
 
+/* StrongARM-11x0 CPUs */
+#define cpu_is_sa1100() (read_cpuid_part() == ARM_CPU_PART_SA1100)
+#define cpu_is_sa1110() (read_cpuid_part() == ARM_CPU_PART_SA1110)
+
 /*
  * Intel's XScale3 core supports some v6 features (supersections, L2)
  * but advertises itself as v5 as it does not support the v6 ISA.  For
diff --git a/arch/arm/mach-sa1100/include/mach/hardware.h b/arch/arm/mach-sa1100/include/mach/hardware.h
index cbedd75a9d65..55c85ce7010d 100644
--- a/arch/arm/mach-sa1100/include/mach/hardware.h
+++ b/arch/arm/mach-sa1100/include/mach/hardware.h
@@ -36,28 +36,10 @@
 #define io_v2p( x )             \
    ( (((x)&0x00ffffff) | (((x)&(0x30000000>>VIO_SHIFT))<<VIO_SHIFT)) + PIO_START )
 
-#define CPU_SA1110_A0	(0)
-#define CPU_SA1110_B0	(4)
-#define CPU_SA1110_B1	(5)
-#define CPU_SA1110_B2	(6)
-#define CPU_SA1110_B4	(8)
-
-#define CPU_SA1100_ID	(0x4401a110)
-#define CPU_SA1100_MASK	(0xfffffff0)
-#define CPU_SA1110_ID	(0x6901b110)
-#define CPU_SA1110_MASK	(0xfffffff0)
-
 #define __MREG(x)	IOMEM(io_p2v(x))
 
 #ifndef __ASSEMBLY__
 
-#include <asm/cputype.h>
-
-#define CPU_REVISION	(read_cpuid_id() & 15)
-
-#define cpu_is_sa1100()	((read_cpuid_id() & CPU_SA1100_MASK) == CPU_SA1100_ID)
-#define cpu_is_sa1110()	((read_cpuid_id() & CPU_SA1110_MASK) == CPU_SA1110_ID)
-
 # define __REG(x)	(*((volatile unsigned long __iomem *)io_p2v(x)))
 # define __PREG(x)	(io_v2p((unsigned long)&(x)))
 
diff --git a/drivers/cpufreq/sa1110-cpufreq.c b/drivers/cpufreq/sa1110-cpufreq.c
index b5befc211172..2bac9b6cfeea 100644
--- a/drivers/cpufreq/sa1110-cpufreq.c
+++ b/drivers/cpufreq/sa1110-cpufreq.c
@@ -159,7 +159,7 @@ sdram_calculate_timing(struct sdram_info *sd, u_int cpu_khz,
 	 * half speed or use delayed read latching (errata 13).
 	 */
 	if ((ns_to_cycles(sdram->tck, sd_khz) > 1) ||
-	    (CPU_REVISION < CPU_SA1110_B2 && sd_khz < 62000))
+	    (read_cpuid_revision() < ARM_CPU_REV_SA1110_B2 && sd_khz < 62000))
 		sd_khz /= 2;
 
 	sd->mdcnfg = MDCNFG & 0x007f007f;

From c94e4ad2d5e0e5f4036cccaa0e6e773841e8f859 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Fri, 19 Aug 2016 16:24:36 +0100
Subject: [PATCH 12/35] ARM: document and update UNCACHEABLE_ADDR definitions

Document the UNCACHEABLE_ADDR definitions for footbridge and SA1100
so that we know where they're located and/or what they're accessing.
Change RiscPC to calculate the UNCACHEABLE_ADDR value from FLUSH_BASE
as that's where we locate that.

UNCACHEABLE_ADDR is used to perform an uncached access (ARMv4
terminology) necessary to force a CPU clock-switch to the memory-
speed clock, as required for entering WFI.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mach-footbridge/include/mach/hardware.h | 2 +-
 arch/arm/mach-rpc/include/mach/hardware.h        | 2 +-
 arch/arm/mach-sa1100/include/mach/hardware.h     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/mach-footbridge/include/mach/hardware.h b/arch/arm/mach-footbridge/include/mach/hardware.h
index 02f6d7a706b1..20d5ad781fe2 100644
--- a/arch/arm/mach-footbridge/include/mach/hardware.h
+++ b/arch/arm/mach-footbridge/include/mach/hardware.h
@@ -59,7 +59,7 @@
 #define XBUS_SWITCH_J17_11	((*XBUS_SWITCH) & (1 << 5))
 #define XBUS_SWITCH_J17_9	((*XBUS_SWITCH) & (1 << 6))
 
-#define UNCACHEABLE_ADDR	(ARMCSR_BASE + 0x108)
+#define UNCACHEABLE_ADDR	(ARMCSR_BASE + 0x108)	/* CSR_ROMBASEMASK */
 
 
 /* PIC irq control */
diff --git a/arch/arm/mach-rpc/include/mach/hardware.h b/arch/arm/mach-rpc/include/mach/hardware.h
index 257166b21f3d..aa79fa47373a 100644
--- a/arch/arm/mach-rpc/include/mach/hardware.h
+++ b/arch/arm/mach-rpc/include/mach/hardware.h
@@ -40,7 +40,7 @@
 #define SCREEN_END		0xdfc00000
 #define SCREEN_BASE		0xdf800000
 
-#define UNCACHEABLE_ADDR	0xdf010000
+#define UNCACHEABLE_ADDR	(FLUSH_BASE + 0x10000)
 
 /*
  * IO Addresses
diff --git a/arch/arm/mach-sa1100/include/mach/hardware.h b/arch/arm/mach-sa1100/include/mach/hardware.h
index 55c85ce7010d..d944fd7e464f 100644
--- a/arch/arm/mach-sa1100/include/mach/hardware.h
+++ b/arch/arm/mach-sa1100/include/mach/hardware.h
@@ -13,7 +13,7 @@
 #define __ASM_ARCH_HARDWARE_H
 
 
-#define UNCACHEABLE_ADDR	0xfa050000
+#define UNCACHEABLE_ADDR	0xfa050000	/* ICIP */
 
 
 /*

From 5a0e0691147a7f841ddb54d067b165071dff592f Mon Sep 17 00:00:00 2001
From: Andy Gross <andy.gross@linaro.org>
Date: Tue, 23 Aug 2016 14:25:32 +0100
Subject: [PATCH 13/35] ARM: 8601/1: Remove unused secure_flush_area API

This patch removes the unused secure_flush_area function.  The only
consumer of this function has moved to using the streaming DMA APIs.

Signed-off-by: Andy Gross <andy.gross@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/cacheflush.h | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 9156fc303afd..bdd283bc5842 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -501,21 +501,4 @@ static inline void set_kernel_text_ro(void) { }
 void flush_uprobe_xol_access(struct page *page, unsigned long uaddr,
 			     void *kaddr, unsigned long len);
 
-/**
- * secure_flush_area - ensure coherency across the secure boundary
- * @addr: virtual address
- * @size: size of region
- *
- * Ensure that the specified area of memory is coherent across the secure
- * boundary from the non-secure side.  This is used when calling secure
- * firmware where the secure firmware does not ensure coherency.
- */
-static inline void secure_flush_area(const void *addr, size_t size)
-{
-	phys_addr_t phys = __pa(addr);
-
-	__cpuc_flush_dcache_area((void *)addr, size);
-	outer_flush_range(phys, phys + size);
-}
-
 #endif

From 35fa91eed817d2c65c59ef5a9737011313be6ac0 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 16 Aug 2016 17:21:02 +0200
Subject: [PATCH 14/35] ARM: kernel: merge core and init PLTs

The PLT code uses a separate .init.plt section to allocate PLT entries
for jump and call instructions in __init code. However, even for fairly
sizable modules like mac80211.ko, we only end up with a couple of PLT
entries in the .init section, and so we can simplify the code
significantly by emitting all PLT entries into the same section.

Tested-by: Jongsung Kim <neidhard.kim@lge.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm/include/asm/module.h |  6 ++--
 arch/arm/kernel/module-plts.c | 64 +++++++++++------------------------
 arch/arm/kernel/module.lds    |  3 +-
 3 files changed, 23 insertions(+), 50 deletions(-)

diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h
index e358b7966c06..464748b9fd7d 100644
--- a/arch/arm/include/asm/module.h
+++ b/arch/arm/include/asm/module.h
@@ -23,10 +23,8 @@ struct mod_arch_specific {
 	struct unwind_table *unwind[ARM_SEC_MAX];
 #endif
 #ifdef CONFIG_ARM_MODULE_PLTS
-	struct elf32_shdr   *core_plt;
-	struct elf32_shdr   *init_plt;
-	int		    core_plt_count;
-	int		    init_plt_count;
+	struct elf32_shdr   *plt;
+	int		    plt_count;
 #endif
 };
 
diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c
index 0c7efc3446c0..6832d1d6444e 100644
--- a/arch/arm/kernel/module-plts.c
+++ b/arch/arm/kernel/module-plts.c
@@ -30,28 +30,16 @@ struct plt_entries {
 	u32	lit[PLT_ENT_COUNT];
 };
 
-static bool in_init(const struct module *mod, u32 addr)
-{
-	return addr - (u32)mod->init_layout.base < mod->init_layout.size;
-}
-
 u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
 {
 	struct plt_entries *plt, *plt_end;
-	int c, *count;
+	int c;
 
-	if (in_init(mod, loc)) {
-		plt = (void *)mod->arch.init_plt->sh_addr;
-		plt_end = (void *)plt + mod->arch.init_plt->sh_size;
-		count = &mod->arch.init_plt_count;
-	} else {
-		plt = (void *)mod->arch.core_plt->sh_addr;
-		plt_end = (void *)plt + mod->arch.core_plt->sh_size;
-		count = &mod->arch.core_plt_count;
-	}
+	plt = (void *)mod->arch.plt->sh_addr;
+	plt_end = (void *)plt + mod->arch.plt->sh_size;
 
 	/* Look for an existing entry pointing to 'val' */
-	for (c = *count; plt < plt_end; c -= PLT_ENT_COUNT, plt++) {
+	for (c = mod->arch.plt_count; plt < plt_end; c -= PLT_ENT_COUNT, plt++) {
 		int i;
 
 		if (!c) {
@@ -60,13 +48,13 @@ u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
 				{ [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, },
 				{ val, }
 			};
-			++*count;
+			mod->arch.plt_count++;
 			return (u32)plt->ldr;
 		}
 		for (i = 0; i < PLT_ENT_COUNT; i++) {
 			if (!plt->lit[i]) {
 				plt->lit[i] = val;
-				++*count;
+				mod->arch.plt_count++;
 			}
 			if (plt->lit[i] == val)
 				return (u32)&plt->ldr[i];
@@ -132,21 +120,19 @@ static unsigned int count_plts(Elf32_Addr base, const Elf32_Rel *rel, int num)
 int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 			      char *secstrings, struct module *mod)
 {
-	unsigned long core_plts = 0, init_plts = 0;
+	unsigned long plts = 0;
 	Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum;
 
 	/*
 	 * To store the PLTs, we expand the .text section for core module code
-	 * and the .init.text section for initialization code.
+	 * and for initialization code.
 	 */
 	for (s = sechdrs; s < sechdrs_end; ++s)
-		if (strcmp(".core.plt", secstrings + s->sh_name) == 0)
-			mod->arch.core_plt = s;
-		else if (strcmp(".init.plt", secstrings + s->sh_name) == 0)
-			mod->arch.init_plt = s;
+		if (strcmp(".plt", secstrings + s->sh_name) == 0)
+			mod->arch.plt = s;
 
-	if (!mod->arch.core_plt || !mod->arch.init_plt) {
-		pr_err("%s: sections missing\n", mod->name);
+	if (!mod->arch.plt) {
+		pr_err("%s: module PLT section missing\n", mod->name);
 		return -ENOEXEC;
 	}
 
@@ -158,26 +144,16 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 		if (s->sh_type != SHT_REL)
 			continue;
 
-		if (strstr(secstrings + s->sh_name, ".init"))
-			init_plts += count_plts(dstsec->sh_addr, rels, numrels);
-		else
-			core_plts += count_plts(dstsec->sh_addr, rels, numrels);
+		plts += count_plts(dstsec->sh_addr, rels, numrels);
 	}
 
-	mod->arch.core_plt->sh_type = SHT_NOBITS;
-	mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
-	mod->arch.core_plt->sh_addralign = L1_CACHE_BYTES;
-	mod->arch.core_plt->sh_size = round_up(core_plts * PLT_ENT_SIZE,
-					       sizeof(struct plt_entries));
-	mod->arch.core_plt_count = 0;
+	mod->arch.plt->sh_type = SHT_NOBITS;
+	mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	mod->arch.plt->sh_addralign = L1_CACHE_BYTES;
+	mod->arch.plt->sh_size = round_up(plts * PLT_ENT_SIZE,
+					  sizeof(struct plt_entries));
+	mod->arch.plt_count = 0;
 
-	mod->arch.init_plt->sh_type = SHT_NOBITS;
-	mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
-	mod->arch.init_plt->sh_addralign = L1_CACHE_BYTES;
-	mod->arch.init_plt->sh_size = round_up(init_plts * PLT_ENT_SIZE,
-					       sizeof(struct plt_entries));
-	mod->arch.init_plt_count = 0;
-	pr_debug("%s: core.plt=%x, init.plt=%x\n", __func__,
-		 mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size);
+	pr_debug("%s: plt=%x\n", __func__, mod->arch.plt->sh_size);
 	return 0;
 }
diff --git a/arch/arm/kernel/module.lds b/arch/arm/kernel/module.lds
index 3682fa107918..05881e2b414c 100644
--- a/arch/arm/kernel/module.lds
+++ b/arch/arm/kernel/module.lds
@@ -1,4 +1,3 @@
 SECTIONS {
-        .core.plt : { BYTE(0) }
-        .init.plt : { BYTE(0) }
+	.plt : { BYTE(0) }
 }

From 05123fef098220323e60834d5520b15d277e0415 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 16 Aug 2016 16:49:56 +0200
Subject: [PATCH 15/35] ARM: kernel: allocate PLT entries only for external
 symbols

When CONFIG_ARM_MODULE_PLTS is enabled, jump and call instructions in
modules no longer need to be within 16 MB (8 MB for Thumb2) of their
targets. If they are further away, a PLT entry will be generated on the
fly for each of them, which extends the range to the entire 32-bit
address space.

However, since these PLT entries will become the branch targets of the
original jump and call instructions, the PLT itself needs to be in
range, or we end up in the same situation we started in. Since the PLT
is in a separate section, this essentially means that all jumps and calls
inside the same module must be resolvable without PLT entries.

The PLT allocation code executes before the module itself is loaded in
its final location, and so it has to use a worst-case estimate for
which jumps and calls will require an entry in the PLT at relocation
time. As an optimization, this code deduplicates entries pointing to
the same symbol, using a O(n^2) algorithm. However, it does not take
the above into account, i.e., that PLT entries will only be needed for
jump and call relocations against symbols that are not defined in the
module.

So disregard relocations against symbols that are defined in the module
itself.

As an additional minor optimization, ignore input sections that lack
the SHF_EXECINSTR flag. Since jump and call relocations operate on
executable instructions only, there is no need to look in sections that
do not contain executable code.

Tested-by: Jongsung Kim <neidhard.kim@lge.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm/kernel/module-plts.c | 49 ++++++++++++++++++++++++++---------
 1 file changed, 37 insertions(+), 12 deletions(-)

diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c
index 6832d1d6444e..6f93a905eeee 100644
--- a/arch/arm/kernel/module-plts.c
+++ b/arch/arm/kernel/module-plts.c
@@ -88,32 +88,45 @@ static int duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num,
 }
 
 /* Count how many PLT entries we may need */
-static unsigned int count_plts(Elf32_Addr base, const Elf32_Rel *rel, int num)
+static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
+			       const Elf32_Rel *rel, int num)
 {
 	unsigned int ret = 0;
+	const Elf32_Sym *s;
+	u32 mask;
 	int i;
 
+	if (IS_ENABLED(CONFIG_THUMB2_KERNEL))
+		mask = __opcode_to_mem_thumb32(0x07ff2fff);
+	else
+		mask = __opcode_to_mem_arm(0x00ffffff);
+
 	/*
 	 * Sure, this is order(n^2), but it's usually short, and not
 	 * time critical
 	 */
-	for (i = 0; i < num; i++)
+	for (i = 0; i < num; i++) {
 		switch (ELF32_R_TYPE(rel[i].r_info)) {
 		case R_ARM_CALL:
 		case R_ARM_PC24:
 		case R_ARM_JUMP24:
-			if (!duplicate_rel(base, rel, i,
-					   __opcode_to_mem_arm(0x00ffffff)))
-				ret++;
-			break;
-#ifdef CONFIG_THUMB2_KERNEL
 		case R_ARM_THM_CALL:
 		case R_ARM_THM_JUMP24:
-			if (!duplicate_rel(base, rel, i,
-					   __opcode_to_mem_thumb32(0x07ff2fff)))
+			/*
+			 * We only have to consider branch targets that resolve
+			 * to undefined symbols. This is not simply a heuristic,
+			 * it is a fundamental limitation, since the PLT itself
+			 * is part of the module, and needs to be within range
+			 * as well, so modules can never grow beyond that limit.
+			 */
+			s = syms + ELF32_R_SYM(rel[i].r_info);
+			if (s->st_shndx != SHN_UNDEF)
+				break;
+
+			if (!duplicate_rel(base, rel, i, mask))
 				ret++;
-#endif
 		}
+	}
 	return ret;
 }
 
@@ -122,19 +135,27 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 {
 	unsigned long plts = 0;
 	Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum;
+	Elf32_Sym *syms = NULL;
 
 	/*
 	 * To store the PLTs, we expand the .text section for core module code
 	 * and for initialization code.
 	 */
-	for (s = sechdrs; s < sechdrs_end; ++s)
+	for (s = sechdrs; s < sechdrs_end; ++s) {
 		if (strcmp(".plt", secstrings + s->sh_name) == 0)
 			mod->arch.plt = s;
+		else if (s->sh_type == SHT_SYMTAB)
+			syms = (Elf32_Sym *)s->sh_addr;
+	}
 
 	if (!mod->arch.plt) {
 		pr_err("%s: module PLT section missing\n", mod->name);
 		return -ENOEXEC;
 	}
+	if (!syms) {
+		pr_err("%s: module symtab section missing\n", mod->name);
+		return -ENOEXEC;
+	}
 
 	for (s = sechdrs + 1; s < sechdrs_end; ++s) {
 		const Elf32_Rel *rels = (void *)ehdr + s->sh_offset;
@@ -144,7 +165,11 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 		if (s->sh_type != SHT_REL)
 			continue;
 
-		plts += count_plts(dstsec->sh_addr, rels, numrels);
+		/* ignore relocations that operate on non-exec sections */
+		if (!(dstsec->sh_flags & SHF_EXECINSTR))
+			continue;
+
+		plts += count_plts(syms, dstsec->sh_addr, rels, numrels);
 	}
 
 	mod->arch.plt->sh_type = SHT_NOBITS;

From 1031a7e674d1de481d641c3723d5f53b776f621f Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 17 Aug 2016 13:45:21 +0200
Subject: [PATCH 16/35] ARM: kernel: sort relocation sections before allocating
 PLTs

The PLT allocation routines try to establish an upper bound on the
number of PLT entries that will be required at relocation time, and
optimize this by disregarding duplicates (i.e., PLT entries that will
end up pointing to the same function). This is currently a O(n^2)
algorithm, but we can greatly simplify this by
- sorting the relocation section so that relocations that can use the
  same PLT entry will be listed adjacently,
- disregard jump/call relocations with addends; these are highly unusual,
  for relocations against SHN_UNDEF symbols, and so we can simply allocate
  a PLT entry for each one we encounter, without trying to optimize away
  duplicates.

Tested-by: Jongsung Kim <neidhard.kim@lge.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm/kernel/module-plts.c | 98 ++++++++++++++++++++++++-----------
 1 file changed, 69 insertions(+), 29 deletions(-)

diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c
index 6f93a905eeee..ad1b98fbcd98 100644
--- a/arch/arm/kernel/module-plts.c
+++ b/arch/arm/kernel/module-plts.c
@@ -9,6 +9,7 @@
 #include <linux/elf.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/sort.h>
 
 #include <asm/cache.h>
 #include <asm/opcodes.h>
@@ -63,28 +64,63 @@ u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
 	BUG();
 }
 
-static int duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num,
-			   u32 mask)
+#define cmp_3way(a,b)	((a) < (b) ? -1 : (a) > (b))
+
+static int cmp_rel(const void *a, const void *b)
 {
-	u32 *loc1, *loc2;
+	const Elf32_Rel *x = a, *y = b;
 	int i;
 
-	for (i = 0; i < num; i++) {
-		if (rel[i].r_info != rel[num].r_info)
-			continue;
+	/* sort by type and symbol index */
+	i = cmp_3way(ELF32_R_TYPE(x->r_info), ELF32_R_TYPE(y->r_info));
+	if (i == 0)
+		i = cmp_3way(ELF32_R_SYM(x->r_info), ELF32_R_SYM(y->r_info));
+	return i;
+}
 
-		/*
-		 * Identical relocation types against identical symbols can
-		 * still result in different PLT entries if the addend in the
-		 * place is different. So resolve the target of the relocation
-		 * to compare the values.
-		 */
-		loc1 = (u32 *)(base + rel[i].r_offset);
-		loc2 = (u32 *)(base + rel[num].r_offset);
-		if (((*loc1 ^ *loc2) & mask) == 0)
-			return 1;
+static bool is_zero_addend_relocation(Elf32_Addr base, const Elf32_Rel *rel)
+{
+	u32 *tval = (u32 *)(base + rel->r_offset);
+
+	/*
+	 * Do a bitwise compare on the raw addend rather than fully decoding
+	 * the offset and doing an arithmetic comparison.
+	 * Note that a zero-addend jump/call relocation is encoded taking the
+	 * PC bias into account, i.e., -8 for ARM and -4 for Thumb2.
+	 */
+	switch (ELF32_R_TYPE(rel->r_info)) {
+		u16 upper, lower;
+
+	case R_ARM_THM_CALL:
+	case R_ARM_THM_JUMP24:
+		upper = __mem_to_opcode_thumb16(((u16 *)tval)[0]);
+		lower = __mem_to_opcode_thumb16(((u16 *)tval)[1]);
+
+		return (upper & 0x7ff) == 0x7ff && (lower & 0x2fff) == 0x2ffe;
+
+	case R_ARM_CALL:
+	case R_ARM_PC24:
+	case R_ARM_JUMP24:
+		return (__mem_to_opcode_arm(*tval) & 0xffffff) == 0xfffffe;
 	}
-	return 0;
+	BUG();
+}
+
+static bool duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num)
+{
+	const Elf32_Rel *prev;
+
+	/*
+	 * Entries are sorted by type and symbol index. That means that,
+	 * if a duplicate entry exists, it must be in the preceding
+	 * slot.
+	 */
+	if (!num)
+		return false;
+
+	prev = rel + num - 1;
+	return cmp_rel(rel + num, prev) == 0 &&
+	       is_zero_addend_relocation(base, prev);
 }
 
 /* Count how many PLT entries we may need */
@@ -93,18 +129,8 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
 {
 	unsigned int ret = 0;
 	const Elf32_Sym *s;
-	u32 mask;
 	int i;
 
-	if (IS_ENABLED(CONFIG_THUMB2_KERNEL))
-		mask = __opcode_to_mem_thumb32(0x07ff2fff);
-	else
-		mask = __opcode_to_mem_arm(0x00ffffff);
-
-	/*
-	 * Sure, this is order(n^2), but it's usually short, and not
-	 * time critical
-	 */
 	for (i = 0; i < num; i++) {
 		switch (ELF32_R_TYPE(rel[i].r_info)) {
 		case R_ARM_CALL:
@@ -123,7 +149,18 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
 			if (s->st_shndx != SHN_UNDEF)
 				break;
 
-			if (!duplicate_rel(base, rel, i, mask))
+			/*
+			 * Jump relocations with non-zero addends against
+			 * undefined symbols are supported by the ELF spec, but
+			 * do not occur in practice (e.g., 'jump n bytes past
+			 * the entry point of undefined function symbol f').
+			 * So we need to support them, but there is no need to
+			 * take them into consideration when trying to optimize
+			 * this code. So let's only check for duplicates when
+			 * the addend is zero.
+			 */
+			if (!is_zero_addend_relocation(base, rel + i) ||
+			    !duplicate_rel(base, rel, i))
 				ret++;
 		}
 	}
@@ -158,7 +195,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 	}
 
 	for (s = sechdrs + 1; s < sechdrs_end; ++s) {
-		const Elf32_Rel *rels = (void *)ehdr + s->sh_offset;
+		Elf32_Rel *rels = (void *)ehdr + s->sh_offset;
 		int numrels = s->sh_size / sizeof(Elf32_Rel);
 		Elf32_Shdr *dstsec = sechdrs + s->sh_info;
 
@@ -169,6 +206,9 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 		if (!(dstsec->sh_flags & SHF_EXECINSTR))
 			continue;
 
+		/* sort by type and symbol index */
+		sort(rels, numrels, sizeof(Elf32_Rel), cmp_rel, NULL);
+
 		plts += count_plts(syms, dstsec->sh_addr, rels, numrels);
 	}
 

From 66e94ba3c8ea5ff5f1443a50441f953ef44010b1 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 18 Aug 2016 10:58:49 +0200
Subject: [PATCH 17/35] ARM: kernel: avoid brute force search on PLT generation

Given that we now sort the relocation sections in a way that guarantees
that entries that can share a single PLT entry end up adjacently, there
is no a longer a need to go over the entire list to look for an existing
entry that matches our jump target. If such a match exists, it was the
last one to be emitted, so we can simply check the preceding slot.

Note that this will still work correctly in the [theoretical] presence of
call/jump relocations against SHN_UNDEF symbols with non-zero addends,
although not optimally. Since the relocations are presented in the same
order that we checked them for duplicates, any duplicates that we failed
to spot the first time around will be accounted for in the PLT allocation
so there is guaranteed to be sufficient space for them when actually
emitting the PLT.

For instance, the following sequence of relocations:

  000004d8  00058b0a R_ARM_THM_CALL    00000000   warn_slowpath_null
  000004fc  00058b0a R_ARM_THM_CALL    00000000   warn_slowpath_null
  0000050e  00058b0a R_ARM_THM_CALL    00000000   warn_slowpath_null
  00000520  00058b0a R_ARM_THM_CALL    00000000   warn_slowpath_null
  00000532  00058b0a R_ARM_THM_CALL    00000000   warn_slowpath_null
  00000544  00058b0a R_ARM_THM_CALL    00000000   warn_slowpath_null
  00000556  00058b0a R_ARM_THM_CALL    00000000   warn_slowpath_null
  00000568  00058b0a R_ARM_THM_CALL    00000000   warn_slowpath_null
  0000057a  00058b0a R_ARM_THM_CALL    00000000   warn_slowpath_null
  0000058c  00058b0a R_ARM_THM_CALL    00000000   warn_slowpath_null
  0000059e  00058b0a R_ARM_THM_CALL    00000000   warn_slowpath_null
  000005b0  00058b0a R_ARM_THM_CALL    00000000   warn_slowpath_null
  000005c2  00058b0a R_ARM_THM_CALL    00000000   warn_slowpath_null
  000005d4  00058b0a R_ARM_THM_CALL    00000000   warn_slowpath_null

may result in several PLT entries to be allocated, and also emitted, if
any of the entries in the middle refer to a Place that contains a non-zero
addend (i.e., one for all the preceding zero-addend relocations, one for
all the following zero-addend relocations, and one for the non-zero addend
relocation itself)

Tested-by: Jongsung Kim <neidhard.kim@lge.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm/kernel/module-plts.c | 54 +++++++++++++++++++----------------
 1 file changed, 29 insertions(+), 25 deletions(-)

diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c
index ad1b98fbcd98..3a5cba90c971 100644
--- a/arch/arm/kernel/module-plts.c
+++ b/arch/arm/kernel/module-plts.c
@@ -33,35 +33,39 @@ struct plt_entries {
 
 u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
 {
-	struct plt_entries *plt, *plt_end;
-	int c;
+	struct plt_entries *plt = (struct plt_entries *)mod->arch.plt->sh_addr;
+	int idx = 0;
 
-	plt = (void *)mod->arch.plt->sh_addr;
-	plt_end = (void *)plt + mod->arch.plt->sh_size;
+	/*
+	 * Look for an existing entry pointing to 'val'. Given that the
+	 * relocations are sorted, this will be the last entry we allocated.
+	 * (if one exists).
+	 */
+	if (mod->arch.plt_count > 0) {
+		plt += (mod->arch.plt_count - 1) / PLT_ENT_COUNT;
+		idx = (mod->arch.plt_count - 1) % PLT_ENT_COUNT;
 
-	/* Look for an existing entry pointing to 'val' */
-	for (c = mod->arch.plt_count; plt < plt_end; c -= PLT_ENT_COUNT, plt++) {
-		int i;
+		if (plt->lit[idx] == val)
+			return (u32)&plt->ldr[idx];
 
-		if (!c) {
-			/* Populate a new set of entries */
-			*plt = (struct plt_entries){
-				{ [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, },
-				{ val, }
-			};
-			mod->arch.plt_count++;
-			return (u32)plt->ldr;
-		}
-		for (i = 0; i < PLT_ENT_COUNT; i++) {
-			if (!plt->lit[i]) {
-				plt->lit[i] = val;
-				mod->arch.plt_count++;
-			}
-			if (plt->lit[i] == val)
-				return (u32)&plt->ldr[i];
-		}
+		idx = (idx + 1) % PLT_ENT_COUNT;
+		if (!idx)
+			plt++;
 	}
-	BUG();
+
+	mod->arch.plt_count++;
+	BUG_ON(mod->arch.plt_count * PLT_ENT_SIZE > mod->arch.plt->sh_size);
+
+	if (!idx)
+		/* Populate a new set of entries */
+		*plt = (struct plt_entries){
+			{ [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, },
+			{ val, }
+		};
+	else
+		plt->lit[idx] = val;
+
+	return (u32)&plt->ldr[idx];
 }
 
 #define cmp_3way(a,b)	((a) < (b) ? -1 : (a) > (b))

From 26150aa96d60298de669e3d81a5b7e132b653ce7 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Tue, 30 Aug 2016 17:24:34 +0100
Subject: [PATCH 18/35] ARM: 8602/1: factor out CSSELR/CCSIDR operations that
 use cp15 directly

Currently we use raw cp15 operations to access the cache setup data.

This patch abstracts the CSSELR and CCSIDR accessors out to a header so
that the implementation for them can be switched out as we do with other
cpu/cachetype operations.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Tested-by: Andras Szemzo <sza@esh.hu>
Tested-by: Joachim Eastwood <manabian@gmail.com>
Tested-by: Alexandre TORGUE <alexandre.torgue@st.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/cachetype.h | 24 ++++++++++++++++++++++++
 arch/arm/kernel/setup.c          |  7 ++-----
 2 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/arch/arm/include/asm/cachetype.h b/arch/arm/include/asm/cachetype.h
index 7ea78144ae22..8609de8e313a 100644
--- a/arch/arm/include/asm/cachetype.h
+++ b/arch/arm/include/asm/cachetype.h
@@ -56,4 +56,28 @@ static inline unsigned int __attribute__((pure)) cacheid_is(unsigned int mask)
 	       (~__CACHEID_NEVER & __CACHEID_ARCH_MIN & mask & cacheid);
 }
 
+#define CSSELR_ICACHE	1
+#define CSSELR_DCACHE	0
+
+#define CSSELR_L1	(0 << 1)
+#define CSSELR_L2	(1 << 1)
+#define CSSELR_L3	(2 << 1)
+#define CSSELR_L4	(3 << 1)
+#define CSSELR_L5	(4 << 1)
+#define CSSELR_L6	(5 << 1)
+#define CSSELR_L7	(6 << 1)
+
+static inline void set_csselr(unsigned int cache_selector)
+{
+	asm volatile("mcr p15, 2, %0, c0, c0, 0" : : "r" (cache_selector));
+}
+
+static inline unsigned int read_ccsidr(void)
+{
+	unsigned int val;
+
+	asm volatile("mrc p15, 1, %0, c0, c0, 0" : "=r" (val));
+	return val;
+}
+
 #endif
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index aca999e17184..d7568808eb7b 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -290,12 +290,9 @@ static int cpu_has_aliasing_icache(unsigned int arch)
 	/* arch specifies the register format */
 	switch (arch) {
 	case CPU_ARCH_ARMv7:
-		asm("mcr	p15, 2, %0, c0, c0, 0 @ set CSSELR"
-		    : /* No output operands */
-		    : "r" (1));
+		set_csselr(CSSELR_ICACHE | CSSELR_L1);
 		isb();
-		asm("mrc	p15, 1, %0, c0, c0, 0 @ read CCSIDR"
-		    : "=r" (id_reg));
+		id_reg = read_ccsidr();
 		line_size = 4 << ((id_reg & 0x7) + 2);
 		num_sets = ((id_reg >> 13) & 0x7fff) + 1;
 		aliasing_icache = (line_size * num_sets) > PAGE_SIZE;

From 296909ee6d9cf98f68a61d5e9774ff5435df2c6a Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Tue, 30 Aug 2016 17:25:59 +0100
Subject: [PATCH 19/35] ARM: 8603/1: V7M: Add addresses for mem-mapped V7M
 cache operations

V7M implements cache operations similarly to V7A/R, however all operations
are performed via memory-mapped IO instead of co-processor operations.

This patch adds register definitions relevant to the V7M ARM architecture's
cache architecture.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Tested-by: Andras Szemzo <sza@esh.hu>
Tested-by: Joachim Eastwood <manabian@gmail.com>
Tested-by: Alexandre TORGUE <alexandre.torgue@st.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/v7m.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/arch/arm/include/asm/v7m.h b/arch/arm/include/asm/v7m.h
index 615781c61627..1fd775c1bc5d 100644
--- a/arch/arm/include/asm/v7m.h
+++ b/arch/arm/include/asm/v7m.h
@@ -24,6 +24,9 @@
 
 #define V7M_SCB_CCR			0x14
 #define V7M_SCB_CCR_STKALIGN			(1 << 9)
+#define V7M_SCB_CCR_DC				(1 << 16)
+#define V7M_SCB_CCR_IC				(1 << 17)
+#define V7M_SCB_CCR_BP				(1 << 18)
 
 #define V7M_SCB_SHPR2			0x1c
 #define V7M_SCB_SHPR3			0x20
@@ -47,6 +50,25 @@
 #define EXC_RET_STACK_MASK			0x00000004
 #define EXC_RET_THREADMODE_PROCESSSTACK		0xfffffffd
 
+/* Cache related definitions */
+
+#define	V7M_SCB_CLIDR		0x78	/* Cache Level ID register */
+#define	V7M_SCB_CTR		0x7c	/* Cache Type register */
+#define	V7M_SCB_CCSIDR		0x80	/* Cache size ID register */
+#define	V7M_SCB_CSSELR		0x84	/* Cache size selection register */
+
+/* Cache opeartions */
+#define	V7M_SCB_ICIALLU		0x250	/* I-cache invalidate all to PoU */
+#define	V7M_SCB_ICIMVAU		0x258	/* I-cache invalidate by MVA to PoU */
+#define	V7M_SCB_DCIMVAC		0x25c	/* D-cache invalidate by MVA to PoC */
+#define	V7M_SCB_DCISW		0x260	/* D-cache invalidate by set-way */
+#define	V7M_SCB_DCCMVAU		0x264	/* D-cache clean by MVA to PoU */
+#define	V7M_SCB_DCCMVAC		0x268	/* D-cache clean by MVA to PoC */
+#define	V7M_SCB_DCCSW		0x26c	/* D-cache clean by set-way */
+#define	V7M_SCB_DCCIMVAC	0x270	/* D-cache clean and invalidate by MVA to PoC */
+#define	V7M_SCB_DCCISW		0x274	/* D-cache clean and invalidate by set-way */
+#define	V7M_SCB_BPIALL		0x278	/* D-cache clean and invalidate by set-way */
+
 #ifndef __ASSEMBLY__
 
 enum reboot_mode;

From f5a5c89e36d0897b65e4e6bc2f646f75f8074263 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Tue, 30 Aug 2016 17:27:19 +0100
Subject: [PATCH 20/35] ARM: 8604/1: V7M: Add support for reading the CTR with
 read_cpuid_cachetype()

With the addition of caches to the V7M Architecture a new Cache Type
Register (CTR) is defined at 0xE000ED7C. This register serves the same
purpose as the V7A/R version and accessed via the read_cpuid_cachetype.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Tested-by: Andras Szemzo <sza@esh.hu>
Tested-by: Joachim Eastwood <manabian@gmail.com>
Tested-by: Alexandre TORGUE <alexandre.torgue@st.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/cachetype.h | 15 +++++++++++++++
 arch/arm/include/asm/cputype.h   | 15 ++++++++++-----
 arch/arm/kernel/setup.c          |  9 +++++----
 3 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/arch/arm/include/asm/cachetype.h b/arch/arm/include/asm/cachetype.h
index 8609de8e313a..01509ae0bbec 100644
--- a/arch/arm/include/asm/cachetype.h
+++ b/arch/arm/include/asm/cachetype.h
@@ -67,6 +67,7 @@ static inline unsigned int __attribute__((pure)) cacheid_is(unsigned int mask)
 #define CSSELR_L6	(5 << 1)
 #define CSSELR_L7	(6 << 1)
 
+#ifndef CONFIG_CPU_V7M
 static inline void set_csselr(unsigned int cache_selector)
 {
 	asm volatile("mcr p15, 2, %0, c0, c0, 0" : : "r" (cache_selector));
@@ -79,5 +80,19 @@ static inline unsigned int read_ccsidr(void)
 	asm volatile("mrc p15, 1, %0, c0, c0, 0" : "=r" (val));
 	return val;
 }
+#else /* CONFIG_CPU_V7M */
+#include <linux/io.h>
+#include "asm/v7m.h"
+
+static inline void set_csselr(unsigned int cache_selector)
+{
+	writel(cache_selector, BASEADDR_V7M_SCB + V7M_SCB_CTR);
+}
+
+static inline unsigned int read_ccsidr(void)
+{
+	return readl(BASEADDR_V7M_SCB + V7M_SCB_CCSIDR);
+}
+#endif
 
 #endif
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index d6a4902a75d7..754f86f667d4 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -164,6 +164,11 @@ static inline unsigned int __attribute_const__ read_cpuid_id(void)
 	return read_cpuid(CPUID_ID);
 }
 
+static inline unsigned int __attribute_const__ read_cpuid_cachetype(void)
+{
+	return read_cpuid(CPUID_CACHETYPE);
+}
+
 #elif defined(CONFIG_CPU_V7M)
 
 static inline unsigned int __attribute_const__ read_cpuid_id(void)
@@ -171,6 +176,11 @@ static inline unsigned int __attribute_const__ read_cpuid_id(void)
 	return readl(BASEADDR_V7M_SCB + V7M_SCB_CPUID);
 }
 
+static inline unsigned int __attribute_const__ read_cpuid_cachetype(void)
+{
+	return readl(BASEADDR_V7M_SCB + V7M_SCB_CTR);
+}
+
 #else /* ifdef CONFIG_CPU_CP15 / elif defined(CONFIG_CPU_V7M) */
 
 static inline unsigned int __attribute_const__ read_cpuid_id(void)
@@ -210,11 +220,6 @@ static inline unsigned int __attribute_const__ xscale_cpu_arch_version(void)
 	return read_cpuid_id() & ARM_CPU_XSCALE_ARCH_MASK;
 }
 
-static inline unsigned int __attribute_const__ read_cpuid_cachetype(void)
-{
-	return read_cpuid(CPUID_CACHETYPE);
-}
-
 static inline unsigned int __attribute_const__ read_cpuid_tcmstatus(void)
 {
 	return read_cpuid(CPUID_TCM);
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index d7568808eb7b..34e3f3c45634 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -312,11 +312,12 @@ static void __init cacheid_init(void)
 {
 	unsigned int arch = cpu_architecture();
 
-	if (arch == CPU_ARCH_ARMv7M) {
-		cacheid = 0;
-	} else if (arch >= CPU_ARCH_ARMv6) {
+	if (arch >= CPU_ARCH_ARMv6) {
 		unsigned int cachetype = read_cpuid_cachetype();
-		if ((cachetype & (7 << 29)) == 4 << 29) {
+
+		if ((arch == CPU_ARCH_ARMv7M) && !cachetype) {
+			cacheid = 0;
+		} else if ((cachetype & (7 << 29)) == 4 << 29) {
 			/* ARMv7 register format */
 			arch = CPU_ARCH_ARMv7;
 			cacheid = CACHEID_VIPT_NONALIASING;

From b2bf482a5099264fb75936b5b552cdf3c247c93a Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Tue, 30 Aug 2016 17:28:43 +0100
Subject: [PATCH 21/35] ARM: 8605/1: V7M: fix notrace variant of
 save_and_disable_irqs

Commit 8e43a905 "ARM: 7325/1: fix v7 boot with lockdep enabled"
introduced notrace variant of save_and_disable_irqs to balance notrace
variant of restore_irqs; however V7M case has been missed. It was not
noticed because cache-v7.S the only place where notrace variant is used.
So fix it, since we are going to extend V7 cache routines to handle V7M
case too.

Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Tested-by: Andras Szemzo <sza@esh.hu>
Tested-by: Joachim Eastwood <manabian@gmail.com>
Tested-by: Alexandre TORGUE <alexandre.torgue@st.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/assembler.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 4eaea2173bf8..68b06f9c65de 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -159,7 +159,11 @@
 	.endm
 
 	.macro	save_and_disable_irqs_notrace, oldcpsr
+#ifdef CONFIG_CPU_V7M
+	mrs	\oldcpsr, primask
+#else
 	mrs	\oldcpsr, cpsr
+#endif
 	disable_irq_notrace
 	.endm
 

From 9a1af5f2206bd303ed201c6895c42ac3ac120a20 Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Tue, 30 Aug 2016 17:30:02 +0100
Subject: [PATCH 22/35] ARM: 8606/1: V7M: introduce cache operations

This commit implements the cache operation for V7M.

It is based on V7 counterpart and differs as follows:
- cache operations are memory mapped
- only Thumb instruction set is supported
- we don't handle user access faults

Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Tested-by: Andras Szemzo <sza@esh.hu>
Tested-by: Joachim Eastwood <manabian@gmail.com>
Tested-by: Alexandre TORGUE <alexandre.torgue@st.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/cache-v7m.S   | 453 ++++++++++++++++++++++++++++++++++++++
 arch/arm/mm/proc-macros.S |  16 ++
 2 files changed, 469 insertions(+)
 create mode 100644 arch/arm/mm/cache-v7m.S

diff --git a/arch/arm/mm/cache-v7m.S b/arch/arm/mm/cache-v7m.S
new file mode 100644
index 000000000000..816a7e44e6f1
--- /dev/null
+++ b/arch/arm/mm/cache-v7m.S
@@ -0,0 +1,453 @@
+/*
+ *  linux/arch/arm/mm/cache-v7m.S
+ *
+ *  Based on linux/arch/arm/mm/cache-v7.S
+ *
+ *  Copyright (C) 2001 Deep Blue Solutions Ltd.
+ *  Copyright (C) 2005 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This is the "shell" of the ARMv7M processor support.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/errno.h>
+#include <asm/unwind.h>
+#include <asm/v7m.h>
+
+#include "proc-macros.S"
+
+/* Generic V7M read/write macros for memory mapped cache operations */
+.macro v7m_cache_read, rt, reg
+	movw	\rt, #:lower16:BASEADDR_V7M_SCB + \reg
+	movt	\rt, #:upper16:BASEADDR_V7M_SCB + \reg
+	ldr     \rt, [\rt]
+.endm
+
+.macro v7m_cacheop, rt, tmp, op, c = al
+	movw\c	\tmp, #:lower16:BASEADDR_V7M_SCB + \op
+	movt\c	\tmp, #:upper16:BASEADDR_V7M_SCB + \op
+	str\c	\rt, [\tmp]
+.endm
+
+
+.macro	read_ccsidr, rt
+	v7m_cache_read \rt, V7M_SCB_CCSIDR
+.endm
+
+.macro read_clidr, rt
+	v7m_cache_read \rt, V7M_SCB_CLIDR
+.endm
+
+.macro	write_csselr, rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_CSSELR
+.endm
+
+/*
+ * dcisw: Invalidate data cache by set/way
+ */
+.macro dcisw, rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_DCISW
+.endm
+
+/*
+ * dccisw: Clean and invalidate data cache by set/way
+ */
+.macro dccisw, rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_DCCISW
+.endm
+
+/*
+ * dccimvac: Clean and invalidate data cache line by MVA to PoC.
+ */
+.irp    c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
+.macro dccimvac\c, rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_DCCIMVAC, \c
+.endm
+.endr
+
+/*
+ * dcimvac: Invalidate data cache line by MVA to PoC
+ */
+.macro dcimvac, rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_DCIMVAC
+.endm
+
+/*
+ * dccmvau: Clean data cache line by MVA to PoU
+ */
+.macro dccmvau, rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_DCCMVAU
+.endm
+
+/*
+ * dccmvac: Clean data cache line by MVA to PoC
+ */
+.macro dccmvac,  rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_DCCMVAC
+.endm
+
+/*
+ * icimvau: Invalidate instruction caches by MVA to PoU
+ */
+.macro icimvau, rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_ICIMVAU
+.endm
+
+/*
+ * Invalidate the icache, inner shareable if SMP, invalidate BTB for UP.
+ * rt data ignored by ICIALLU(IS), so can be used for the address
+ */
+.macro invalidate_icache, rt
+	v7m_cacheop \rt, \rt, V7M_SCB_ICIALLU
+	mov \rt, #0
+.endm
+
+/*
+ * Invalidate the BTB, inner shareable if SMP.
+ * rt data ignored by BPIALL, so it can be used for the address
+ */
+.macro invalidate_bp, rt
+	v7m_cacheop \rt, \rt, V7M_SCB_BPIALL
+	mov \rt, #0
+.endm
+
+ENTRY(v7m_invalidate_l1)
+	mov	r0, #0
+
+	write_csselr r0, r1
+	read_ccsidr r0
+
+	movw	r1, #0x7fff
+	and	r2, r1, r0, lsr #13
+
+	movw	r1, #0x3ff
+
+	and	r3, r1, r0, lsr #3      @ NumWays - 1
+	add	r2, r2, #1              @ NumSets
+
+	and	r0, r0, #0x7
+	add	r0, r0, #4      @ SetShift
+
+	clz	r1, r3          @ WayShift
+	add	r4, r3, #1      @ NumWays
+1:	sub	r2, r2, #1      @ NumSets--
+	mov	r3, r4          @ Temp = NumWays
+2:	subs	r3, r3, #1      @ Temp--
+	mov	r5, r3, lsl r1
+	mov	r6, r2, lsl r0
+	orr	r5, r5, r6      @ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
+	dcisw	r5, r6
+	bgt	2b
+	cmp	r2, #0
+	bgt	1b
+	dsb	st
+	isb
+	ret	lr
+ENDPROC(v7m_invalidate_l1)
+
+/*
+ *	v7m_flush_icache_all()
+ *
+ *	Flush the whole I-cache.
+ *
+ *	Registers:
+ *	r0 - set to 0
+ */
+ENTRY(v7m_flush_icache_all)
+	invalidate_icache r0
+	ret	lr
+ENDPROC(v7m_flush_icache_all)
+
+/*
+ *	v7m_flush_dcache_all()
+ *
+ *	Flush the whole D-cache.
+ *
+ *	Corrupted registers: r0-r7, r9-r11
+ */
+ENTRY(v7m_flush_dcache_all)
+	dmb					@ ensure ordering with previous memory accesses
+	read_clidr r0
+	mov	r3, r0, lsr #23			@ move LoC into position
+	ands	r3, r3, #7 << 1			@ extract LoC*2 from clidr
+	beq	finished			@ if loc is 0, then no need to clean
+start_flush_levels:
+	mov	r10, #0				@ start clean at cache level 0
+flush_levels:
+	add	r2, r10, r10, lsr #1		@ work out 3x current cache level
+	mov	r1, r0, lsr r2			@ extract cache type bits from clidr
+	and	r1, r1, #7			@ mask of the bits for current cache only
+	cmp	r1, #2				@ see what cache we have at this level
+	blt	skip				@ skip if no cache, or just i-cache
+#ifdef CONFIG_PREEMPT
+	save_and_disable_irqs_notrace r9	@ make cssr&csidr read atomic
+#endif
+	write_csselr r10, r1			@ set current cache level
+	isb					@ isb to sych the new cssr&csidr
+	read_ccsidr r1				@ read the new csidr
+#ifdef CONFIG_PREEMPT
+	restore_irqs_notrace r9
+#endif
+	and	r2, r1, #7			@ extract the length of the cache lines
+	add	r2, r2, #4			@ add 4 (line length offset)
+	movw	r4, #0x3ff
+	ands	r4, r4, r1, lsr #3		@ find maximum number on the way size
+	clz	r5, r4				@ find bit position of way size increment
+	movw	r7, #0x7fff
+	ands	r7, r7, r1, lsr #13		@ extract max number of the index size
+loop1:
+	mov	r9, r7				@ create working copy of max index
+loop2:
+	lsl	r6, r4, r5
+	orr	r11, r10, r6			@ factor way and cache number into r11
+	lsl	r6, r9, r2
+	orr	r11, r11, r6			@ factor index number into r11
+	dccisw	r11, r6				@ clean/invalidate by set/way
+	subs	r9, r9, #1			@ decrement the index
+	bge	loop2
+	subs	r4, r4, #1			@ decrement the way
+	bge	loop1
+skip:
+	add	r10, r10, #2			@ increment cache number
+	cmp	r3, r10
+	bgt	flush_levels
+finished:
+	mov	r10, #0				@ swith back to cache level 0
+	write_csselr r10, r3			@ select current cache level in cssr
+	dsb	st
+	isb
+	ret	lr
+ENDPROC(v7m_flush_dcache_all)
+
+/*
+ *	v7m_flush_cache_all()
+ *
+ *	Flush the entire cache system.
+ *  The data cache flush is now achieved using atomic clean / invalidates
+ *  working outwards from L1 cache. This is done using Set/Way based cache
+ *  maintenance instructions.
+ *  The instruction cache can still be invalidated back to the point of
+ *  unification in a single instruction.
+ *
+ */
+ENTRY(v7m_flush_kern_cache_all)
+	stmfd	sp!, {r4-r7, r9-r11, lr}
+	bl	v7m_flush_dcache_all
+	invalidate_icache r0
+	ldmfd	sp!, {r4-r7, r9-r11, lr}
+	ret	lr
+ENDPROC(v7m_flush_kern_cache_all)
+
+/*
+ *	v7m_flush_cache_all()
+ *
+ *	Flush all TLB entries in a particular address space
+ *
+ *	- mm    - mm_struct describing address space
+ */
+ENTRY(v7m_flush_user_cache_all)
+	/*FALLTHROUGH*/
+
+/*
+ *	v7m_flush_cache_range(start, end, flags)
+ *
+ *	Flush a range of TLB entries in the specified address space.
+ *
+ *	- start - start address (may not be aligned)
+ *	- end   - end address (exclusive, may not be aligned)
+ *	- flags	- vm_area_struct flags describing address space
+ *
+ *	It is assumed that:
+ *	- we have a VIPT cache.
+ */
+ENTRY(v7m_flush_user_cache_range)
+	ret	lr
+ENDPROC(v7m_flush_user_cache_all)
+ENDPROC(v7m_flush_user_cache_range)
+
+/*
+ *	v7m_coherent_kern_range(start,end)
+ *
+ *	Ensure that the I and D caches are coherent within specified
+ *	region.  This is typically used when code has been written to
+ *	a memory region, and will be executed.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ *
+ *	It is assumed that:
+ *	- the Icache does not read data from the write buffer
+ */
+ENTRY(v7m_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	v7m_coherent_user_range(start,end)
+ *
+ *	Ensure that the I and D caches are coherent within specified
+ *	region.  This is typically used when code has been written to
+ *	a memory region, and will be executed.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ *
+ *	It is assumed that:
+ *	- the Icache does not read data from the write buffer
+ */
+ENTRY(v7m_coherent_user_range)
+ UNWIND(.fnstart		)
+	dcache_line_size r2, r3
+	sub	r3, r2, #1
+	bic	r12, r0, r3
+1:
+/*
+ * We use open coded version of dccmvau otherwise USER() would
+ * point at movw instruction.
+ */
+	dccmvau	r12, r3
+	add	r12, r12, r2
+	cmp	r12, r1
+	blo	1b
+	dsb	ishst
+	icache_line_size r2, r3
+	sub	r3, r2, #1
+	bic	r12, r0, r3
+2:
+	icimvau r12, r3
+	add	r12, r12, r2
+	cmp	r12, r1
+	blo	2b
+	invalidate_bp r0
+	dsb	ishst
+	isb
+	ret	lr
+ UNWIND(.fnend		)
+ENDPROC(v7m_coherent_kern_range)
+ENDPROC(v7m_coherent_user_range)
+
+/*
+ *	v7m_flush_kern_dcache_area(void *addr, size_t size)
+ *
+ *	Ensure that the data held in the page kaddr is written back
+ *	to the page in question.
+ *
+ *	- addr	- kernel address
+ *	- size	- region size
+ */
+ENTRY(v7m_flush_kern_dcache_area)
+	dcache_line_size r2, r3
+	add	r1, r0, r1
+	sub	r3, r2, #1
+	bic	r0, r0, r3
+1:
+	dccimvac r0, r3		@ clean & invalidate D line / unified line
+	add	r0, r0, r2
+	cmp	r0, r1
+	blo	1b
+	dsb	st
+	ret	lr
+ENDPROC(v7m_flush_kern_dcache_area)
+
+/*
+ *	v7m_dma_inv_range(start,end)
+ *
+ *	Invalidate the data cache within the specified region; we will
+ *	be performing a DMA operation in this region and we want to
+ *	purge old data in the cache.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+v7m_dma_inv_range:
+	dcache_line_size r2, r3
+	sub	r3, r2, #1
+	tst	r0, r3
+	bic	r0, r0, r3
+	dccimvacne r0, r3
+	subne	r3, r2, #1	@ restore r3, corrupted by v7m's dccimvac
+	tst	r1, r3
+	bic	r1, r1, r3
+	dccimvacne r1, r3
+1:
+	dcimvac r0, r3
+	add	r0, r0, r2
+	cmp	r0, r1
+	blo	1b
+	dsb	st
+	ret	lr
+ENDPROC(v7m_dma_inv_range)
+
+/*
+ *	v7m_dma_clean_range(start,end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+v7m_dma_clean_range:
+	dcache_line_size r2, r3
+	sub	r3, r2, #1
+	bic	r0, r0, r3
+1:
+	dccmvac r0, r3			@ clean D / U line
+	add	r0, r0, r2
+	cmp	r0, r1
+	blo	1b
+	dsb	st
+	ret	lr
+ENDPROC(v7m_dma_clean_range)
+
+/*
+ *	v7m_dma_flush_range(start,end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+ENTRY(v7m_dma_flush_range)
+	dcache_line_size r2, r3
+	sub	r3, r2, #1
+	bic	r0, r0, r3
+1:
+	dccimvac r0, r3			 @ clean & invalidate D / U line
+	add	r0, r0, r2
+	cmp	r0, r1
+	blo	1b
+	dsb	st
+	ret	lr
+ENDPROC(v7m_dma_flush_range)
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(v7m_dma_map_area)
+	add	r1, r1, r0
+	teq	r2, #DMA_FROM_DEVICE
+	beq	v7m_dma_inv_range
+	b	v7m_dma_clean_range
+ENDPROC(v7m_dma_map_area)
+
+/*
+ *	dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(v7m_dma_unmap_area)
+	add	r1, r1, r0
+	teq	r2, #DMA_TO_DEVICE
+	bne	v7m_dma_inv_range
+	ret	lr
+ENDPROC(v7m_dma_unmap_area)
+
+	.globl	v7m_flush_kern_cache_louis
+	.equ	v7m_flush_kern_cache_louis, v7m_flush_kern_cache_all
+
+	__INITDATA
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions v7m
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index c671f345266a..0d40c285bd86 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -7,6 +7,10 @@
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 
+#ifdef CONFIG_CPU_V7M
+#include <asm/v7m.h>
+#endif
+
 /*
  * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
  */
@@ -70,7 +74,13 @@
  * on ARMv7.
  */
 	.macro	dcache_line_size, reg, tmp
+#ifdef CONFIG_CPU_V7M
+	movw	\tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
+	movt	\tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
+	ldr     \tmp, [\tmp]
+#else
 	mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
+#endif
 	lsr	\tmp, \tmp, #16
 	and	\tmp, \tmp, #0xf		@ cache line size encoding
 	mov	\reg, #4			@ bytes per word
@@ -82,7 +92,13 @@
  * on ARMv7.
  */
 	.macro	icache_line_size, reg, tmp
+#ifdef CONFIG_CPU_V7M
+	movw	\tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
+	movt	\tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
+	ldr     \tmp, [\tmp]
+#else
 	mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
+#endif
 	and	\tmp, \tmp, #0xf		@ cache line size encoding
 	mov	\reg, #4			@ bytes per word
 	mov	\reg, \reg, lsl \tmp		@ actual cache line size

From bc0ee9d24ad21a5c2b5944f66623a02e9c8831aa Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Tue, 30 Aug 2016 17:31:22 +0100
Subject: [PATCH 23/35] ARM: 8607/1: V7M: Wire up caches for V7M processors
 with cache support.

This patch does the plumbing required to invoke the V7M cache code added
in earlier patches in this series, although there is no users for that
yet.

In order to honour the I/D cache disable config options, this patch changes
the mechanism by which the CCR is set on boot, to be more like V7A/R.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Tested-by: Andras Szemzo <sza@esh.hu>
Tested-by: Joachim Eastwood <manabian@gmail.com>
Tested-by: Alexandre TORGUE <alexandre.torgue@st.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/glue-cache.h |  4 ----
 arch/arm/kernel/head-nommu.S      | 16 +++++++++++++++-
 arch/arm/mm/Kconfig               | 10 +++++++---
 arch/arm/mm/Makefile              |  2 ++
 arch/arm/mm/proc-v7m.S            |  5 ++---
 5 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/arch/arm/include/asm/glue-cache.h b/arch/arm/include/asm/glue-cache.h
index cab07f69382d..01c3d92624e5 100644
--- a/arch/arm/include/asm/glue-cache.h
+++ b/arch/arm/include/asm/glue-cache.h
@@ -118,11 +118,7 @@
 #endif
 
 #if defined(CONFIG_CPU_V7M)
-# ifdef _CACHE
 #  define MULTI_CACHE 1
-# else
-#  define _CACHE nop
-# endif
 #endif
 
 #if !defined(_CACHE) && !defined(MULTI_CACHE)
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index fb1a69eb49c1..6b4eb27b8758 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -158,7 +158,21 @@ __after_proc_init:
 	bic	r0, r0, #CR_V
 #endif
 	mcr	p15, 0, r0, c1, c0, 0		@ write control reg
-#endif /* CONFIG_CPU_CP15 */
+#elif defined (CONFIG_CPU_V7M)
+	/* For V7M systems we want to modify the CCR similarly to the SCTLR */
+#ifdef CONFIG_CPU_DCACHE_DISABLE
+	bic	r0, r0, #V7M_SCB_CCR_DC
+#endif
+#ifdef CONFIG_CPU_BPREDICT_DISABLE
+	bic	r0, r0, #V7M_SCB_CCR_BP
+#endif
+#ifdef CONFIG_CPU_ICACHE_DISABLE
+	bic	r0, r0, #V7M_SCB_CCR_IC
+#endif
+	movw	r3, #:lower16:(BASEADDR_V7M_SCB + V7M_SCB_CCR)
+	movt	r3, #:upper16:(BASEADDR_V7M_SCB + V7M_SCB_CCR)
+	str	r0, [r3]
+#endif /* CONFIG_CPU_CP15 elif CONFIG_CPU_V7M */
 	ret	lr
 ENDPROC(__after_proc_init)
 	.ltorg
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index d15a7fe51618..e613122e5e1b 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -403,6 +403,7 @@ config CPU_V7M
 	bool
 	select CPU_32v7M
 	select CPU_ABRT_NOMMU
+	select CPU_CACHE_V7M
 	select CPU_CACHE_NOP
 	select CPU_PABRT_LEGACY
 	select CPU_THUMBONLY
@@ -518,6 +519,9 @@ config CPU_CACHE_VIPT
 config CPU_CACHE_FA
 	bool
 
+config CPU_CACHE_V7M
+	bool
+
 if MMU
 # The copy-page model
 config CPU_COPY_V4WT
@@ -750,14 +754,14 @@ config CPU_HIGH_VECTOR
 
 config CPU_ICACHE_DISABLE
 	bool "Disable I-Cache (I-bit)"
-	depends on CPU_CP15 && !(CPU_ARM720T || CPU_ARM740T || CPU_XSCALE || CPU_XSC3)
+	depends on (CPU_CP15 && !(CPU_ARM720T || CPU_ARM740T || CPU_XSCALE || CPU_XSC3)) || CPU_V7M
 	help
 	  Say Y here to disable the processor instruction cache. Unless
 	  you have a reason not to or are unsure, say N.
 
 config CPU_DCACHE_DISABLE
 	bool "Disable D-Cache (C-bit)"
-	depends on CPU_CP15 && !SMP
+	depends on (CPU_CP15 && !SMP) || CPU_V7M
 	help
 	  Say Y here to disable the processor data cache. Unless
 	  you have a reason not to or are unsure, say N.
@@ -792,7 +796,7 @@ config CPU_CACHE_ROUND_ROBIN
 
 config CPU_BPREDICT_DISABLE
 	bool "Disable branch prediction"
-	depends on CPU_ARM1020 || CPU_V6 || CPU_V6K || CPU_MOHAWK || CPU_XSC3 || CPU_V7 || CPU_FA526
+	depends on CPU_ARM1020 || CPU_V6 || CPU_V6K || CPU_MOHAWK || CPU_XSC3 || CPU_V7 || CPU_FA526 || CPU_V7M
 	help
 	  Say Y here to disable branch prediction.  If unsure, say N.
 
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 7f76d96ce546..e75abaeb16db 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -43,9 +43,11 @@ obj-$(CONFIG_CPU_CACHE_V6)	+= cache-v6.o
 obj-$(CONFIG_CPU_CACHE_V7)	+= cache-v7.o
 obj-$(CONFIG_CPU_CACHE_FA)	+= cache-fa.o
 obj-$(CONFIG_CPU_CACHE_NOP)	+= cache-nop.o
+obj-$(CONFIG_CPU_CACHE_V7M)	+= cache-v7m.o
 
 AFLAGS_cache-v6.o	:=-Wa,-march=armv6
 AFLAGS_cache-v7.o	:=-Wa,-march=armv7-a
+AFLAGS_cache-v7m.o	:=-Wa,-march=armv7-m
 
 obj-$(CONFIG_CPU_COPY_V4WT)	+= copypage-v4wt.o
 obj-$(CONFIG_CPU_COPY_V4WB)	+= copypage-v4wb.o
diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
index 7229d8d0be1a..11f5816e2680 100644
--- a/arch/arm/mm/proc-v7m.S
+++ b/arch/arm/mm/proc-v7m.S
@@ -118,9 +118,8 @@ __v7m_setup:
 
 	@ Configure the System Control Register to ensure 8-byte stack alignment
 	@ Note the STKALIGN bit is either RW or RAO.
-	ldr	r12, [r0, V7M_SCB_CCR]	@ system control register
-	orr	r12, #V7M_SCB_CCR_STKALIGN
-	str	r12, [r0, V7M_SCB_CCR]
+	ldr	r0, [r0, V7M_SCB_CCR]   @ system control register
+	orr	r0, #V7M_SCB_CCR_STKALIGN
 	ret	lr
 ENDPROC(__v7m_setup)
 

From c3a6bcbe6a9e7e0d66c279e4a47aa07327040b38 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Tue, 30 Aug 2016 17:32:42 +0100
Subject: [PATCH 24/35] ARM: 8608/1: V7M: Indirect proc_info construction for
 V7M CPUs

This patch copies the method used for V7A/R CPUs to specify differing
processor info for different cores.

This patch differentiates Cortex-M3 and Cortex-M4 and leaves a fallback case
for any other V7M processors.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Tested-by: Andras Szemzo <sza@esh.hu>
Tested-by: Joachim Eastwood <manabian@gmail.com>
Tested-by: Alexandre TORGUE <alexandre.torgue@st.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/proc-v7m.S | 46 ++++++++++++++++++++++++++++++++----------
 1 file changed, 35 insertions(+), 11 deletions(-)

diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
index 11f5816e2680..796a9836cfdb 100644
--- a/arch/arm/mm/proc-v7m.S
+++ b/arch/arm/mm/proc-v7m.S
@@ -132,6 +132,40 @@ ENDPROC(__v7m_setup)
 
 	.section ".proc.info.init", #alloc
 
+.macro __v7m_proc name, initfunc, cache_fns = nop_cache_fns, hwcaps = 0,  proc_fns = v7m_processor_functions
+	.long	0			/* proc_info_list.__cpu_mm_mmu_flags */
+	.long	0			/* proc_info_list.__cpu_io_mmu_flags */
+	initfn	\initfunc, \name
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \hwcaps
+	.long	cpu_v7m_name
+	.long   \proc_fns
+	.long	0			/* proc_info_list.tlb */
+	.long	0			/* proc_info_list.user */
+	.long	\cache_fns
+.endm
+
+	/*
+	 * Match ARM Cortex-M4 processor.
+	 */
+	.type	__v7m_cm4_proc_info, #object
+__v7m_cm4_proc_info:
+	.long	0x410fc240		/* ARM Cortex-M4 0xC24 */
+	.long	0xff0ffff0		/* Mask off revision, patch release */
+	__v7m_proc __v7m_cm4_proc_info, __v7m_setup, hwcaps = HWCAP_EDSP
+	.size	__v7m_cm4_proc_info, . - __v7m_cm4_proc_info
+
+	/*
+	 * Match ARM Cortex-M3 processor.
+	 */
+	.type	__v7m_cm3_proc_info, #object
+__v7m_cm3_proc_info:
+	.long	0x410fc230		/* ARM Cortex-M3 0xC23 */
+	.long	0xff0ffff0		/* Mask off revision, patch release */
+	__v7m_proc __v7m_cm3_proc_info, __v7m_setup
+	.size	__v7m_cm3_proc_info, . - __v7m_cm3_proc_info
+
 	/*
 	 * Match any ARMv7-M processor core.
 	 */
@@ -139,16 +173,6 @@ ENDPROC(__v7m_setup)
 __v7m_proc_info:
 	.long	0x000f0000		@ Required ID value
 	.long	0x000f0000		@ Mask for ID
-	.long   0			@ proc_info_list.__cpu_mm_mmu_flags
-	.long   0			@ proc_info_list.__cpu_io_mmu_flags
-	initfn	__v7m_setup, __v7m_proc_info	@ proc_info_list.__cpu_flush
-	.long	cpu_arch_name
-	.long	cpu_elf_name
-	.long	HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT
-	.long	cpu_v7m_name
-	.long	v7m_processor_functions	@ proc_info_list.proc
-	.long	0			@ proc_info_list.tlb
-	.long	0			@ proc_info_list.user
-	.long	nop_cache_fns		@ proc_info_list.cache
+	__v7m_proc __v7m_proc_info, __v7m_setup
 	.size	__v7m_proc_info, . - __v7m_proc_info
 

From 6a8146f420be2e59cf511c5a046b762142ff201d Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Tue, 30 Aug 2016 17:34:00 +0100
Subject: [PATCH 25/35] ARM: 8609/1: V7M: Add support for the Cortex-M7
 processor

Cortex-M7 is a new member of the V7M processor family that adds, among
other things, caches over the features available in Cortex-M4.

This patch adds support for recognising the processor at boot time, and
make use of recently introduced cache functions.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Tested-by: Andras Szemzo <sza@esh.hu>
Tested-by: Joachim Eastwood <manabian@gmail.com>
Tested-by: Alexandre TORGUE <alexandre.torgue@st.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/proc-v7m.S | 56 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
index 796a9836cfdb..e6786f007a59 100644
--- a/arch/arm/mm/proc-v7m.S
+++ b/arch/arm/mm/proc-v7m.S
@@ -74,14 +74,42 @@ ENTRY(cpu_v7m_do_resume)
 ENDPROC(cpu_v7m_do_resume)
 #endif
 
+ENTRY(cpu_cm7_dcache_clean_area)
+	dcache_line_size r2, r3
+	movw	r3, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_DCCMVAC
+	movt	r3, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_DCCMVAC
+
+1:	str	r0, [r3]		@ clean D entry
+	add	r0, r0, r2
+	subs	r1, r1, r2
+	bhi	1b
+	dsb
+	ret	lr
+ENDPROC(cpu_cm7_dcache_clean_area)
+
+ENTRY(cpu_cm7_proc_fin)
+	movw	r2, #:lower16:(BASEADDR_V7M_SCB + V7M_SCB_CCR)
+	movt	r2, #:upper16:(BASEADDR_V7M_SCB + V7M_SCB_CCR)
+	ldr	r0, [r2]
+	bic	r0, r0, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC)
+	str	r0, [r2]
+	ret	lr
+ENDPROC(cpu_cm7_proc_fin)
+
 	.section ".text.init", #alloc, #execinstr
 
+__v7m_cm7_setup:
+	mov	r8, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC| V7M_SCB_CCR_BP)
+	b	__v7m_setup_cont
 /*
  *	__v7m_setup
  *
  *	This should be able to cover all ARMv7-M cores.
  */
 __v7m_setup:
+	mov	r8, 0
+
+__v7m_setup_cont:
 	@ Configure the vector table base address
 	ldr	r0, =BASEADDR_V7M_SCB
 	ldr	r12, =vector_table
@@ -116,14 +144,32 @@ __v7m_setup:
 	mov	r1, #1
 	msr	control, r1		@ Thread mode has unpriviledged access
 
+	@ Configure caches (if implemented)
+	teq     r8, #0
+	stmneia	r12, {r0-r6, lr}	@ v7m_invalidate_l1 touches r0-r6
+	blne	v7m_invalidate_l1
+	teq     r8, #0			@ re-evalutae condition
+	ldmneia	r12, {r0-r6, lr}
+
 	@ Configure the System Control Register to ensure 8-byte stack alignment
 	@ Note the STKALIGN bit is either RW or RAO.
 	ldr	r0, [r0, V7M_SCB_CCR]   @ system control register
 	orr	r0, #V7M_SCB_CCR_STKALIGN
+	orr	r0, r0, r8
+
 	ret	lr
 ENDPROC(__v7m_setup)
 
+/*
+ * Cortex-M7 processor functions
+ */
+	globl_equ	cpu_cm7_proc_init,	cpu_v7m_proc_init
+	globl_equ	cpu_cm7_reset,		cpu_v7m_reset
+	globl_equ	cpu_cm7_do_idle,	cpu_v7m_do_idle
+	globl_equ	cpu_cm7_switch_mm,	cpu_v7m_switch_mm
+
 	define_processor_functions v7m, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1
+	define_processor_functions cm7, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1
 
 	.section ".rodata"
 	string cpu_arch_name, "armv7m"
@@ -146,6 +192,16 @@ ENDPROC(__v7m_setup)
 	.long	\cache_fns
 .endm
 
+	/*
+	 * Match ARM Cortex-M7 processor.
+	 */
+	.type	__v7m_cm7_proc_info, #object
+__v7m_cm7_proc_info:
+	.long	0x410fc270		/* ARM Cortex-M7 0xC27 */
+	.long	0xff0ffff0		/* Mask off revision, patch release */
+	__v7m_proc __v7m_cm7_proc_info, __v7m_cm7_setup, hwcaps = HWCAP_EDSP, cache_fns = v7m_cache_fns, proc_fns = cm7_processor_functions
+	.size	__v7m_cm7_proc_info, . - __v7m_cm7_proc_info
+
 	/*
 	 * Match ARM Cortex-M4 processor.
 	 */

From 8e02676ffa6906a97de7f90772e9cdcb75ea6743 Mon Sep 17 00:00:00 2001
From: Torgue Alexandre <alexandre.torgue@st.com>
Date: Wed, 31 Aug 2016 09:14:14 +0100
Subject: [PATCH 26/35] ARM: 8610/1: V7M: Add dsb before jumping in handler
 mode

According to ARM AN321 (section 4.12):

"If the vector table is in writable memory such as SRAM, either relocated
by VTOR or a device dependent memory remapping mechanism, then
architecturally a memory barrier instruction is required after the vector
table entry is updated, and if the exception is to be activated
immediately"

Reviewed-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Signed-off-by: Alexandre TORGUE <alexandre.torgue@st.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/proc-v7m.S | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
index e6786f007a59..f6d333f09bfe 100644
--- a/arch/arm/mm/proc-v7m.S
+++ b/arch/arm/mm/proc-v7m.S
@@ -132,6 +132,7 @@ __v7m_setup_cont:
 	badr	r1, 1f
 	ldr	r5, [r12, #11 * 4]	@ read the SVC vector entry
 	str	r1, [r12, #11 * 4]	@ write the temporary SVC vector entry
+	dsb
 	mov	r6, lr			@ save LR
 	ldr	sp, =init_thread_union + THREAD_START_SP
 	cpsie	i

From b828f960215f02e5d2c88bbd27565c694254a15a Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 2 Sep 2016 10:35:18 +0100
Subject: [PATCH 27/35] ARM: 8611/1: l2x0: add PMU support

The L2C-220 (AKA L220) and L2C-310 (AKA PL310) cache controllers feature
a Performance Monitoring Unit (PMU), which can be useful for tuning
and/or debugging. This hardware is always present and the relevant
registers are accessible to non-secure accesses. Thus, no special
firmware interface is necessary.

This patch adds support for the PMU, plugging into the usual perf
infrastructure. The overflow interrupt is not always available (e.g. on
RealView PBX A9 it is not wired up at all), and the hardware counters
saturate, so the driver does not make use of this. Instead, the driver
periodically polls and reset counters as required to avoid losing
events due to saturation.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Pawel Moll <pawel.moll@arm.com>
Tested-by: Kim Phillips <kim.phillips@arm.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/hardware/cache-l2x0.h |  19 +
 arch/arm/mm/Kconfig                        |   7 +
 arch/arm/mm/Makefile                       |   1 +
 arch/arm/mm/cache-l2x0-pmu.c               | 584 +++++++++++++++++++++
 arch/arm/mm/cache-l2x0.c                   |   6 +
 include/linux/cpuhotplug.h                 |   1 +
 6 files changed, 618 insertions(+)
 create mode 100644 arch/arm/mm/cache-l2x0-pmu.c

diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h
index 3a5ec1c25659..736292b42fca 100644
--- a/arch/arm/include/asm/hardware/cache-l2x0.h
+++ b/arch/arm/include/asm/hardware/cache-l2x0.h
@@ -87,6 +87,15 @@
 #define L310_CACHE_ID_RTL_R3P2		0x08
 #define L310_CACHE_ID_RTL_R3P3		0x09
 
+#define L2X0_EVENT_CNT_CTRL_ENABLE	BIT(0)
+
+#define L2X0_EVENT_CNT_CFG_SRC_SHIFT	2
+#define L2X0_EVENT_CNT_CFG_SRC_MASK	0xf
+#define L2X0_EVENT_CNT_CFG_SRC_DISABLED	0
+#define L2X0_EVENT_CNT_CFG_INT_DISABLED	0
+#define L2X0_EVENT_CNT_CFG_INT_INCR	1
+#define L2X0_EVENT_CNT_CFG_INT_OVERFLOW	2
+
 /* L2C auxiliary control register - bits common to L2C-210/220/310 */
 #define L2C_AUX_CTRL_WAY_SIZE_SHIFT		17
 #define L2C_AUX_CTRL_WAY_SIZE_MASK		(7 << 17)
@@ -157,6 +166,16 @@ static inline int l2x0_of_init(u32 aux_val, u32 aux_mask)
 }
 #endif
 
+#ifdef CONFIG_CACHE_L2X0_PMU
+void l2x0_pmu_register(void __iomem *base, u32 part);
+void l2x0_pmu_suspend(void);
+void l2x0_pmu_resume(void);
+#else
+static inline void l2x0_pmu_register(void __iomem *base, u32 part) {}
+static inline void l2x0_pmu_suspend(void) {}
+static inline void l2x0_pmu_resume(void) {}
+#endif
+
 struct l2x0_regs {
 	unsigned long phy_base;
 	unsigned long aux_ctrl;
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index e613122e5e1b..c1799dd1d0d9 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -920,6 +920,13 @@ config CACHE_L2X0
 	help
 	  This option enables the L2x0 PrimeCell.
 
+config CACHE_L2X0_PMU
+	bool "L2x0 performance monitor support" if CACHE_L2X0
+	depends on PERF_EVENTS
+	help
+	  This option enables support for the performance monitoring features
+	  of the L220 and PL310 outer cache controllers.
+
 if CACHE_L2X0
 
 config PL310_ERRATA_588369
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index e75abaeb16db..e8698241ece9 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -103,6 +103,7 @@ AFLAGS_proc-v7.o	:=-Wa,-march=armv7-a
 obj-$(CONFIG_OUTER_CACHE)	+= l2c-common.o
 obj-$(CONFIG_CACHE_FEROCEON_L2)	+= cache-feroceon-l2.o
 obj-$(CONFIG_CACHE_L2X0)	+= cache-l2x0.o l2c-l2x0-resume.o
+obj-$(CONFIG_CACHE_L2X0_PMU)	+= cache-l2x0-pmu.o
 obj-$(CONFIG_CACHE_XSC3L2)	+= cache-xsc3l2.o
 obj-$(CONFIG_CACHE_TAUROS2)	+= cache-tauros2.o
 obj-$(CONFIG_CACHE_UNIPHIER)	+= cache-uniphier.o
diff --git a/arch/arm/mm/cache-l2x0-pmu.c b/arch/arm/mm/cache-l2x0-pmu.c
new file mode 100644
index 000000000000..976d3057272e
--- /dev/null
+++ b/arch/arm/mm/cache-l2x0-pmu.c
@@ -0,0 +1,584 @@
+/*
+ * L220/L310 cache controller support
+ *
+ * Copyright (C) 2016 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/errno.h>
+#include <linux/hrtimer.h>
+#include <linux/io.h>
+#include <linux/list.h>
+#include <linux/perf_event.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include <asm/hardware/cache-l2x0.h>
+
+#define PMU_NR_COUNTERS 2
+
+static void __iomem *l2x0_base;
+static struct pmu *l2x0_pmu;
+static cpumask_t pmu_cpu;
+
+static const char *l2x0_name;
+
+static ktime_t l2x0_pmu_poll_period;
+static struct hrtimer l2x0_pmu_hrtimer;
+
+/*
+ * The L220/PL310 PMU has two equivalent counters, Counter1 and Counter0.
+ * Registers controlling these are laid out in pairs, in descending order, i.e.
+ * the register for Counter1 comes first, followed by the register for
+ * Counter0.
+ * We ensure that idx 0 -> Counter0, and idx1 -> Counter1.
+ */
+static struct perf_event *events[PMU_NR_COUNTERS];
+
+/* Find an unused counter */
+static int l2x0_pmu_find_idx(void)
+{
+	int i;
+
+	for (i = 0; i < PMU_NR_COUNTERS; i++) {
+		if (!events[i])
+			return i;
+	}
+
+	return -1;
+}
+
+/* How many counters are allocated? */
+static int l2x0_pmu_num_active_counters(void)
+{
+	int i, cnt = 0;
+
+	for (i = 0; i < PMU_NR_COUNTERS; i++) {
+		if (events[i])
+			cnt++;
+	}
+
+	return cnt;
+}
+
+static void l2x0_pmu_counter_config_write(int idx, u32 val)
+{
+	writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT0_CFG - 4 * idx);
+}
+
+static u32 l2x0_pmu_counter_read(int idx)
+{
+	return readl_relaxed(l2x0_base + L2X0_EVENT_CNT0_VAL - 4 * idx);
+}
+
+static void l2x0_pmu_counter_write(int idx, u32 val)
+{
+	writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT0_VAL - 4 * idx);
+}
+
+static void __l2x0_pmu_enable(void)
+{
+	u32 val = readl_relaxed(l2x0_base + L2X0_EVENT_CNT_CTRL);
+	val |= L2X0_EVENT_CNT_CTRL_ENABLE;
+	writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT_CTRL);
+}
+
+static void __l2x0_pmu_disable(void)
+{
+	u32 val = readl_relaxed(l2x0_base + L2X0_EVENT_CNT_CTRL);
+	val &= ~L2X0_EVENT_CNT_CTRL_ENABLE;
+	writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT_CTRL);
+}
+
+static void l2x0_pmu_enable(struct pmu *pmu)
+{
+	if (l2x0_pmu_num_active_counters() == 0)
+		return;
+
+	__l2x0_pmu_enable();
+}
+
+static void l2x0_pmu_disable(struct pmu *pmu)
+{
+	if (l2x0_pmu_num_active_counters() == 0)
+		return;
+
+	__l2x0_pmu_disable();
+}
+
+static void warn_if_saturated(u32 count)
+{
+	if (count != 0xffffffff)
+		return;
+
+	pr_warn_ratelimited("L2X0 counter saturated. Poll period too long\n");
+}
+
+static void l2x0_pmu_event_read(struct perf_event *event)
+{
+	struct hw_perf_event *hw = &event->hw;
+	u64 prev_count, new_count, mask;
+
+	do {
+		 prev_count = local64_read(&hw->prev_count);
+		 new_count = l2x0_pmu_counter_read(hw->idx);
+	} while (local64_xchg(&hw->prev_count, new_count) != prev_count);
+
+	mask = GENMASK_ULL(31, 0);
+	local64_add((new_count - prev_count) & mask, &event->count);
+
+	warn_if_saturated(new_count);
+}
+
+static void l2x0_pmu_event_configure(struct perf_event *event)
+{
+	struct hw_perf_event *hw = &event->hw;
+
+	/*
+	 * The L2X0 counters saturate at 0xffffffff rather than wrapping, so we
+	 * will *always* lose some number of events when a counter saturates,
+	 * and have no way of detecting how many were lost.
+	 *
+	 * To minimize the impact of this, we try to maximize the period by
+	 * always starting counters at zero. To ensure that group ratios are
+	 * representative, we poll periodically to avoid counters saturating.
+	 * See l2x0_pmu_poll().
+	 */
+	local64_set(&hw->prev_count, 0);
+	l2x0_pmu_counter_write(hw->idx, 0);
+}
+
+static enum hrtimer_restart l2x0_pmu_poll(struct hrtimer *hrtimer)
+{
+	unsigned long flags;
+	int i;
+
+	local_irq_save(flags);
+	__l2x0_pmu_disable();
+
+	for (i = 0; i < PMU_NR_COUNTERS; i++) {
+		struct perf_event *event = events[i];
+
+		if (!event)
+			continue;
+
+		l2x0_pmu_event_read(event);
+		l2x0_pmu_event_configure(event);
+	}
+
+	__l2x0_pmu_enable();
+	local_irq_restore(flags);
+
+	hrtimer_forward_now(hrtimer, l2x0_pmu_poll_period);
+	return HRTIMER_RESTART;
+}
+
+
+static void __l2x0_pmu_event_enable(int idx, u32 event)
+{
+	u32 val;
+
+	val = event << L2X0_EVENT_CNT_CFG_SRC_SHIFT;
+	val |= L2X0_EVENT_CNT_CFG_INT_DISABLED;
+	l2x0_pmu_counter_config_write(idx, val);
+}
+
+static void l2x0_pmu_event_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hw = &event->hw;
+
+	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+		return;
+
+	if (flags & PERF_EF_RELOAD) {
+		WARN_ON_ONCE(!(hw->state & PERF_HES_UPTODATE));
+		l2x0_pmu_event_configure(event);
+	}
+
+	hw->state = 0;
+
+	__l2x0_pmu_event_enable(hw->idx, hw->config_base);
+}
+
+static void __l2x0_pmu_event_disable(int idx)
+{
+	u32 val;
+
+	val = L2X0_EVENT_CNT_CFG_SRC_DISABLED << L2X0_EVENT_CNT_CFG_SRC_SHIFT;
+	val |= L2X0_EVENT_CNT_CFG_INT_DISABLED;
+	l2x0_pmu_counter_config_write(idx, val);
+}
+
+static void l2x0_pmu_event_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hw = &event->hw;
+
+	if (WARN_ON_ONCE(event->hw.state & PERF_HES_STOPPED))
+		return;
+
+	__l2x0_pmu_event_disable(hw->idx);
+
+	hw->state |= PERF_HES_STOPPED;
+
+	if (flags & PERF_EF_UPDATE) {
+		l2x0_pmu_event_read(event);
+		hw->state |= PERF_HES_UPTODATE;
+	}
+}
+
+static int l2x0_pmu_event_add(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hw = &event->hw;
+	int idx = l2x0_pmu_find_idx();
+
+	if (idx == -1)
+		return -EAGAIN;
+
+	/*
+	 * Pin the timer, so that the overflows are handled by the chosen
+	 * event->cpu (this is the same one as presented in "cpumask"
+	 * attribute).
+	 */
+	if (l2x0_pmu_num_active_counters() == 0)
+		hrtimer_start(&l2x0_pmu_hrtimer, l2x0_pmu_poll_period,
+			      HRTIMER_MODE_REL_PINNED);
+
+	events[idx] = event;
+	hw->idx = idx;
+
+	l2x0_pmu_event_configure(event);
+
+	hw->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+	if (flags & PERF_EF_START)
+		l2x0_pmu_event_start(event, 0);
+
+	return 0;
+}
+
+static void l2x0_pmu_event_del(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hw = &event->hw;
+
+	l2x0_pmu_event_stop(event, PERF_EF_UPDATE);
+
+	events[hw->idx] = NULL;
+	hw->idx = -1;
+
+	if (l2x0_pmu_num_active_counters() == 0)
+		hrtimer_cancel(&l2x0_pmu_hrtimer);
+}
+
+static bool l2x0_pmu_group_is_valid(struct perf_event *event)
+{
+	struct pmu *pmu = event->pmu;
+	struct perf_event *leader = event->group_leader;
+	struct perf_event *sibling;
+	int num_hw = 0;
+
+	if (leader->pmu == pmu)
+		num_hw++;
+	else if (!is_software_event(leader))
+		return false;
+
+	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
+		if (sibling->pmu == pmu)
+			num_hw++;
+		else if (!is_software_event(sibling))
+			return false;
+	}
+
+	return num_hw <= PMU_NR_COUNTERS;
+}
+
+static int l2x0_pmu_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hw = &event->hw;
+
+	if (event->attr.type != l2x0_pmu->type)
+		return -ENOENT;
+
+	if (is_sampling_event(event) ||
+	    event->attach_state & PERF_ATTACH_TASK)
+		return -EINVAL;
+
+	if (event->attr.exclude_user   ||
+	    event->attr.exclude_kernel ||
+	    event->attr.exclude_hv     ||
+	    event->attr.exclude_idle   ||
+	    event->attr.exclude_host   ||
+	    event->attr.exclude_guest)
+		return -EINVAL;
+
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	if (event->attr.config & ~L2X0_EVENT_CNT_CFG_SRC_MASK)
+		return -EINVAL;
+
+	hw->config_base = event->attr.config;
+
+	if (!l2x0_pmu_group_is_valid(event))
+		return -EINVAL;
+
+	event->cpu = cpumask_first(&pmu_cpu);
+
+	return 0;
+}
+
+struct l2x0_event_attribute {
+	struct device_attribute attr;
+	unsigned int config;
+	bool pl310_only;
+};
+
+#define L2X0_EVENT_ATTR(_name, _config, _pl310_only)				\
+	(&((struct l2x0_event_attribute[]) {{					\
+		.attr = __ATTR(_name, S_IRUGO, l2x0_pmu_event_show, NULL),	\
+		.config = _config,						\
+		.pl310_only = _pl310_only,					\
+	}})[0].attr.attr)
+
+#define L220_PLUS_EVENT_ATTR(_name, _config)					\
+	L2X0_EVENT_ATTR(_name, _config, false)
+
+#define PL310_EVENT_ATTR(_name, _config)					\
+	L2X0_EVENT_ATTR(_name, _config, true)
+
+static ssize_t l2x0_pmu_event_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct l2x0_event_attribute *lattr;
+
+	lattr = container_of(attr, typeof(*lattr), attr);
+	return snprintf(buf, PAGE_SIZE, "config=0x%x\n", lattr->config);
+}
+
+static umode_t l2x0_pmu_event_attr_is_visible(struct kobject *kobj,
+					      struct attribute *attr,
+					      int unused)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct l2x0_event_attribute *lattr;
+
+	lattr = container_of(attr, typeof(*lattr), attr.attr);
+
+	if (!lattr->pl310_only || strcmp("l2c_310", pmu->name) == 0)
+		return attr->mode;
+
+	return 0;
+}
+
+static struct attribute *l2x0_pmu_event_attrs[] = {
+	L220_PLUS_EVENT_ATTR(co,	0x1),
+	L220_PLUS_EVENT_ATTR(drhit,	0x2),
+	L220_PLUS_EVENT_ATTR(drreq,	0x3),
+	L220_PLUS_EVENT_ATTR(dwhit,	0x4),
+	L220_PLUS_EVENT_ATTR(dwreq,	0x5),
+	L220_PLUS_EVENT_ATTR(dwtreq,	0x6),
+	L220_PLUS_EVENT_ATTR(irhit,	0x7),
+	L220_PLUS_EVENT_ATTR(irreq,	0x8),
+	L220_PLUS_EVENT_ATTR(wa,	0x9),
+	PL310_EVENT_ATTR(ipfalloc,	0xa),
+	PL310_EVENT_ATTR(epfhit,	0xb),
+	PL310_EVENT_ATTR(epfalloc,	0xc),
+	PL310_EVENT_ATTR(srrcvd,	0xd),
+	PL310_EVENT_ATTR(srconf,	0xe),
+	PL310_EVENT_ATTR(epfrcvd,	0xf),
+	NULL
+};
+
+static struct attribute_group l2x0_pmu_event_attrs_group = {
+	.name = "events",
+	.attrs = l2x0_pmu_event_attrs,
+	.is_visible = l2x0_pmu_event_attr_is_visible,
+};
+
+static ssize_t l2x0_pmu_cpumask_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	return cpumap_print_to_pagebuf(true, buf, &pmu_cpu);
+}
+
+static struct device_attribute l2x0_pmu_cpumask_attr =
+		__ATTR(cpumask, S_IRUGO, l2x0_pmu_cpumask_show, NULL);
+
+static struct attribute *l2x0_pmu_cpumask_attrs[] = {
+	&l2x0_pmu_cpumask_attr.attr,
+	NULL,
+};
+
+static struct attribute_group l2x0_pmu_cpumask_attr_group = {
+	.attrs = l2x0_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *l2x0_pmu_attr_groups[] = {
+	&l2x0_pmu_event_attrs_group,
+	&l2x0_pmu_cpumask_attr_group,
+	NULL,
+};
+
+static void l2x0_pmu_reset(void)
+{
+	int i;
+
+	__l2x0_pmu_disable();
+
+	for (i = 0; i < PMU_NR_COUNTERS; i++)
+		__l2x0_pmu_event_disable(i);
+}
+
+static int l2x0_pmu_offline_cpu(unsigned int cpu)
+{
+	unsigned int target;
+
+	if (!cpumask_test_and_clear_cpu(cpu, &pmu_cpu))
+		return 0;
+
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	perf_pmu_migrate_context(l2x0_pmu, cpu, target);
+	cpumask_set_cpu(target, &pmu_cpu);
+
+	return 0;
+}
+
+void l2x0_pmu_suspend(void)
+{
+	int i;
+
+	if (!l2x0_pmu)
+		return;
+
+	l2x0_pmu_disable(l2x0_pmu);
+
+	for (i = 0; i < PMU_NR_COUNTERS; i++) {
+		if (events[i])
+			l2x0_pmu_event_stop(events[i], PERF_EF_UPDATE);
+	}
+
+}
+
+void l2x0_pmu_resume(void)
+{
+	int i;
+
+	if (!l2x0_pmu)
+		return;
+
+	l2x0_pmu_reset();
+
+	for (i = 0; i < PMU_NR_COUNTERS; i++) {
+		if (events[i])
+			l2x0_pmu_event_start(events[i], PERF_EF_RELOAD);
+	}
+
+	l2x0_pmu_enable(l2x0_pmu);
+}
+
+void __init l2x0_pmu_register(void __iomem *base, u32 part)
+{
+	/*
+	 * Determine whether we support the PMU, and choose the name for sysfs.
+	 * This is also used by l2x0_pmu_event_attr_is_visible to determine
+	 * which events to display, as the PL310 PMU supports a superset of
+	 * L220 events.
+	 *
+	 * The L210 PMU has a different programmer's interface, and is not
+	 * supported by this driver.
+	 *
+	 * We must defer registering the PMU until the perf subsystem is up and
+	 * running, so just stash the name and base, and leave that to another
+	 * initcall.
+	 */
+	switch (part & L2X0_CACHE_ID_PART_MASK) {
+	case L2X0_CACHE_ID_PART_L220:
+		l2x0_name = "l2c_220";
+		break;
+	case L2X0_CACHE_ID_PART_L310:
+		l2x0_name = "l2c_310";
+		break;
+	default:
+		return;
+	}
+
+	l2x0_base = base;
+}
+
+static __init int l2x0_pmu_init(void)
+{
+	int ret;
+
+	if (!l2x0_base)
+		return 0;
+
+	l2x0_pmu = kzalloc(sizeof(*l2x0_pmu), GFP_KERNEL);
+	if (!l2x0_pmu) {
+		pr_warn("Unable to allocate L2x0 PMU\n");
+		return -ENOMEM;
+	}
+
+	*l2x0_pmu = (struct pmu) {
+		.task_ctx_nr = perf_invalid_context,
+		.pmu_enable = l2x0_pmu_enable,
+		.pmu_disable = l2x0_pmu_disable,
+		.read = l2x0_pmu_event_read,
+		.start = l2x0_pmu_event_start,
+		.stop = l2x0_pmu_event_stop,
+		.add = l2x0_pmu_event_add,
+		.del = l2x0_pmu_event_del,
+		.event_init = l2x0_pmu_event_init,
+		.attr_groups = l2x0_pmu_attr_groups,
+	};
+
+	l2x0_pmu_reset();
+
+	/*
+	 * We always use a hrtimer rather than an interrupt.
+	 * See comments in l2x0_pmu_event_configure and l2x0_pmu_poll.
+	 *
+	 * Polling once a second allows the counters to fill up to 1/128th on a
+	 * quad-core test chip with cores clocked at 400MHz. Hopefully this
+	 * leaves sufficient headroom to avoid overflow on production silicon
+	 * at higher frequencies.
+	 */
+	l2x0_pmu_poll_period = ms_to_ktime(1000);
+	hrtimer_init(&l2x0_pmu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	l2x0_pmu_hrtimer.function = l2x0_pmu_poll;
+
+	cpumask_set_cpu(0, &pmu_cpu);
+	ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_L2X0_ONLINE,
+					"AP_PERF_ARM_L2X0_ONLINE", NULL,
+					l2x0_pmu_offline_cpu);
+	if (ret)
+		goto out_pmu;
+
+	ret = perf_pmu_register(l2x0_pmu, l2x0_name, -1);
+	if (ret)
+		goto out_cpuhp;
+
+	return 0;
+
+out_cpuhp:
+	cpuhp_remove_state_nocalls(CPUHP_AP_PERF_ARM_L2X0_ONLINE);
+out_pmu:
+	kfree(l2x0_pmu);
+	l2x0_pmu = NULL;
+	return ret;
+}
+device_initcall(l2x0_pmu_init);
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index ca5595fa072a..d1870c777c6e 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -142,6 +142,8 @@ static void l2c_disable(void)
 {
 	void __iomem *base = l2x0_base;
 
+	l2x0_pmu_suspend();
+
 	outer_cache.flush_all();
 	l2c_write_sec(0, base, L2X0_CTRL);
 	dsb(st);
@@ -159,6 +161,8 @@ static void l2c_resume(void)
 	/* Do not touch the controller if already enabled. */
 	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN))
 		l2c_enable(base, l2x0_data->num_lock);
+
+	l2x0_pmu_resume();
 }
 
 /*
@@ -891,6 +895,8 @@ static int __init __l2c_init(const struct l2c_init_data *data,
 	pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n",
 		data->type, cache_id, aux);
 
+	l2x0_pmu_register(l2x0_base, cache_id);
+
 	return 0;
 }
 
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 242bf530edfc..7e1ba14a3d78 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -86,6 +86,7 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_S390_SF_ONLINE,
 	CPUHP_AP_PERF_ARM_CCI_ONLINE,
 	CPUHP_AP_PERF_ARM_CCN_ONLINE,
+	CPUHP_AP_PERF_ARM_L2X0_ONLINE,
 	CPUHP_AP_WORKQUEUE_ONLINE,
 	CPUHP_AP_RCUTREE_ONLINE,
 	CPUHP_AP_NOTIFY_ONLINE,

From eac8dbf74f30479b70b9a53f3b24285bd6229d09 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Sat, 3 Sep 2016 09:14:58 +0100
Subject: [PATCH 28/35] ARM: sa1111: ensure we only touch RAB bus type devices
 when removing

When removing a SA1111 device, we try to remove all child devices.
However, we must only remove our own RAB bus typed devices from the
tree, there may be other devices present which should not be touched.

This is necessary before we introduce gpiochip to SA1111 to avoid
incorrectly trying to remove the gpiochip device, leading to an oops
in __release_resource().

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/common/sa1111.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index 2e076c492005..303c62861d84 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -815,6 +815,8 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq)
 static int sa1111_remove_one(struct device *dev, void *data)
 {
 	struct sa1111_dev *sadev = SA1111_DEV(dev);
+	if (dev->bus != &sa1111_bus_type)
+		return 0;
 	device_del(&sadev->dev);
 	release_resource(&sadev->res);
 	put_device(&sadev->dev);

From 7d53c1f01210aa79838f1fe34b2a89f8e900d720 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Tue, 6 Sep 2016 15:29:26 +0100
Subject: [PATCH 29/35] ARM: sa1111: use devm_kzalloc()

Use devm_kzalloc() to allocate our driver data, so we can eliminate its
kfree() from the device removal and error cleanup paths.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/common/sa1111.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index 303c62861d84..f2ce21c03646 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -696,7 +696,7 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq)
 	if (!pd)
 		return -EINVAL;
 
-	sachip = kzalloc(sizeof(struct sa1111), GFP_KERNEL);
+	sachip = devm_kzalloc(me, sizeof(struct sa1111), GFP_KERNEL);
 	if (!sachip)
 		return -ENOMEM;
 
@@ -808,7 +808,6 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq)
  err_clkput:
 	clk_put(sachip->clk);
  err_free:
-	kfree(sachip);
 	return ret;
 }
 
@@ -847,7 +846,6 @@ static void __sa1111_remove(struct sa1111 *sachip)
 
 	iounmap(sachip->base);
 	clk_put(sachip->clk);
-	kfree(sachip);
 }
 
 struct sa1111_save_data {

From deee856a5c6293c50ad6589c9f8a1cca5ed08ae9 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Tue, 30 Aug 2016 22:47:35 +0100
Subject: [PATCH 30/35] ARM: sa1111: use devm_clk_get()

Convert sa1111 to use devm_clk_get() to get its clock resource, and
strip out the clk_put() calls.  This simplifies the error handling
a little.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/common/sa1111.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index f2ce21c03646..09a4f932c2ec 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -700,15 +700,13 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq)
 	if (!sachip)
 		return -ENOMEM;
 
-	sachip->clk = clk_get(me, "SA1111_CLK");
-	if (IS_ERR(sachip->clk)) {
-		ret = PTR_ERR(sachip->clk);
-		goto err_free;
-	}
+	sachip->clk = devm_clk_get(me, "SA1111_CLK");
+	if (IS_ERR(sachip->clk))
+		return PTR_ERR(sachip->clk);
 
 	ret = clk_prepare(sachip->clk);
 	if (ret)
-		goto err_clkput;
+		return ret;
 
 	spin_lock_init(&sachip->lock);
 
@@ -805,9 +803,6 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq)
 	iounmap(sachip->base);
  err_clk_unprep:
 	clk_unprepare(sachip->clk);
- err_clkput:
-	clk_put(sachip->clk);
- err_free:
 	return ret;
 }
 
@@ -845,7 +840,6 @@ static void __sa1111_remove(struct sa1111 *sachip)
 	}
 
 	iounmap(sachip->base);
-	clk_put(sachip->clk);
 }
 
 struct sa1111_save_data {

From ccb7d854d65a09ffe4b6c8342b8d1c6bee6740bb Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Tue, 6 Sep 2016 16:01:32 +0100
Subject: [PATCH 31/35] ARM: sa1111: move irq cleanup to separate function

Move the SA1111 interrupt cleanup to a separate function, so it can be
re-used in the probe error cleanup path.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/common/sa1111.c | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index 09a4f932c2ec..eaa818d921a9 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -509,6 +509,24 @@ static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base)
 	return 0;
 }
 
+static void sa1111_remove_irq(struct sa1111 *sachip)
+{
+	void __iomem *irqbase = sachip->base + SA1111_INTC;
+
+	/* disable all IRQs */
+	sa1111_writel(0, irqbase + SA1111_INTEN0);
+	sa1111_writel(0, irqbase + SA1111_INTEN1);
+	sa1111_writel(0, irqbase + SA1111_WAKEEN0);
+	sa1111_writel(0, irqbase + SA1111_WAKEEN1);
+
+	if (sachip->irq != NO_IRQ) {
+		irq_set_chained_handler_and_data(sachip->irq, NULL, NULL);
+		irq_free_descs(sachip->irq_base, SA1111_IRQ_NR);
+
+		release_mem_region(sachip->phys + SA1111_INTC, 512);
+	}
+}
+
 /*
  * Bring the SA1111 out of reset.  This requires a set procedure:
  *  1. nRESET asserted (by hardware)
@@ -819,26 +837,13 @@ static int sa1111_remove_one(struct device *dev, void *data)
 
 static void __sa1111_remove(struct sa1111 *sachip)
 {
-	void __iomem *irqbase = sachip->base + SA1111_INTC;
-
 	device_for_each_child(sachip->dev, NULL, sa1111_remove_one);
 
-	/* disable all IRQs */
-	sa1111_writel(0, irqbase + SA1111_INTEN0);
-	sa1111_writel(0, irqbase + SA1111_INTEN1);
-	sa1111_writel(0, irqbase + SA1111_WAKEEN0);
-	sa1111_writel(0, irqbase + SA1111_WAKEEN1);
+	sa1111_remove_irq(sachip);
 
 	clk_disable(sachip->clk);
 	clk_unprepare(sachip->clk);
 
-	if (sachip->irq != NO_IRQ) {
-		irq_set_chained_handler_and_data(sachip->irq, NULL, NULL);
-		irq_free_descs(sachip->irq_base, SA1111_IRQ_NR);
-
-		release_mem_region(sachip->phys + SA1111_INTC, 512);
-	}
-
 	iounmap(sachip->base);
 }
 

From 17cf50116e10affaadfee9af2253c841f7ac0623 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Wed, 31 Aug 2016 08:49:45 +0100
Subject: [PATCH 32/35] ARM: sa1111: implement a gpio_chip for SA1111 GPIOs

Add a gpio_chip instance for SA1111 GPIOs.  This allows us to use
gpiolib to lookup and manipulate SA1111 GPIOs.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/common/sa1111.c | 166 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 166 insertions(+)

diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index eaa818d921a9..16cc3b5122ad 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -15,6 +15,7 @@
  * from machine specific code with proper arguments when required.
  */
 #include <linux/module.h>
+#include <linux/gpio/driver.h>
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/kernel.h>
@@ -107,6 +108,7 @@ struct sa1111 {
 	spinlock_t	lock;
 	void __iomem	*base;
 	struct sa1111_platform_data *pdata;
+	struct gpio_chip gc;
 #ifdef CONFIG_PM
 	void		*saved_state;
 #endif
@@ -527,6 +529,163 @@ static void sa1111_remove_irq(struct sa1111 *sachip)
 	}
 }
 
+enum {
+	SA1111_GPIO_PXDDR = (SA1111_GPIO_PADDR - SA1111_GPIO_PADDR),
+	SA1111_GPIO_PXDRR = (SA1111_GPIO_PADRR - SA1111_GPIO_PADDR),
+	SA1111_GPIO_PXDWR = (SA1111_GPIO_PADWR - SA1111_GPIO_PADDR),
+	SA1111_GPIO_PXSDR = (SA1111_GPIO_PASDR - SA1111_GPIO_PADDR),
+	SA1111_GPIO_PXSSR = (SA1111_GPIO_PASSR - SA1111_GPIO_PADDR),
+};
+
+static struct sa1111 *gc_to_sa1111(struct gpio_chip *gc)
+{
+	return container_of(gc, struct sa1111, gc);
+}
+
+static void __iomem *sa1111_gpio_map_reg(struct sa1111 *sachip, unsigned offset)
+{
+	void __iomem *reg = sachip->base + SA1111_GPIO;
+
+	if (offset < 4)
+		return reg + SA1111_GPIO_PADDR;
+	if (offset < 10)
+		return reg + SA1111_GPIO_PBDDR;
+	if (offset < 18)
+		return reg + SA1111_GPIO_PCDDR;
+	return NULL;
+}
+
+static u32 sa1111_gpio_map_bit(unsigned offset)
+{
+	if (offset < 4)
+		return BIT(offset);
+	if (offset < 10)
+		return BIT(offset - 4);
+	if (offset < 18)
+		return BIT(offset - 10);
+	return 0;
+}
+
+static void sa1111_gpio_modify(void __iomem *reg, u32 mask, u32 set)
+{
+	u32 val;
+
+	val = readl_relaxed(reg);
+	val &= ~mask;
+	val |= mask & set;
+	writel_relaxed(val, reg);
+}
+
+static int sa1111_gpio_get_direction(struct gpio_chip *gc, unsigned offset)
+{
+	struct sa1111 *sachip = gc_to_sa1111(gc);
+	void __iomem *reg = sa1111_gpio_map_reg(sachip, offset);
+	u32 mask = sa1111_gpio_map_bit(offset);
+
+	return !!(readl_relaxed(reg + SA1111_GPIO_PXDDR) & mask);
+}
+
+static int sa1111_gpio_direction_input(struct gpio_chip *gc, unsigned offset)
+{
+	struct sa1111 *sachip = gc_to_sa1111(gc);
+	unsigned long flags;
+	void __iomem *reg = sa1111_gpio_map_reg(sachip, offset);
+	u32 mask = sa1111_gpio_map_bit(offset);
+
+	spin_lock_irqsave(&sachip->lock, flags);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PXDDR, mask, mask);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PXSDR, mask, mask);
+	spin_unlock_irqrestore(&sachip->lock, flags);
+
+	return 0;
+}
+
+static int sa1111_gpio_direction_output(struct gpio_chip *gc, unsigned offset,
+	int value)
+{
+	struct sa1111 *sachip = gc_to_sa1111(gc);
+	unsigned long flags;
+	void __iomem *reg = sa1111_gpio_map_reg(sachip, offset);
+	u32 mask = sa1111_gpio_map_bit(offset);
+
+	spin_lock_irqsave(&sachip->lock, flags);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PXDWR, mask, value ? mask : 0);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PXSSR, mask, value ? mask : 0);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PXDDR, mask, 0);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PXSDR, mask, 0);
+	spin_unlock_irqrestore(&sachip->lock, flags);
+
+	return 0;
+}
+
+static int sa1111_gpio_get(struct gpio_chip *gc, unsigned offset)
+{
+	struct sa1111 *sachip = gc_to_sa1111(gc);
+	void __iomem *reg = sa1111_gpio_map_reg(sachip, offset);
+	u32 mask = sa1111_gpio_map_bit(offset);
+
+	return !!(readl_relaxed(reg + SA1111_GPIO_PXDRR) & mask);
+}
+
+static void sa1111_gpio_set(struct gpio_chip *gc, unsigned offset, int value)
+{
+	struct sa1111 *sachip = gc_to_sa1111(gc);
+	unsigned long flags;
+	void __iomem *reg = sa1111_gpio_map_reg(sachip, offset);
+	u32 mask = sa1111_gpio_map_bit(offset);
+
+	spin_lock_irqsave(&sachip->lock, flags);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PXDWR, mask, value ? mask : 0);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PXSSR, mask, value ? mask : 0);
+	spin_unlock_irqrestore(&sachip->lock, flags);
+}
+
+static void sa1111_gpio_set_multiple(struct gpio_chip *gc, unsigned long *mask,
+	unsigned long *bits)
+{
+	struct sa1111 *sachip = gc_to_sa1111(gc);
+	unsigned long flags;
+	void __iomem *reg = sachip->base + SA1111_GPIO;
+	u32 msk, val;
+
+	msk = *mask;
+	val = *bits;
+
+	spin_lock_irqsave(&sachip->lock, flags);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PADWR, msk & 15, val);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PASSR, msk & 15, val);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PBDWR, (msk >> 4) & 255, val >> 4);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PBSSR, (msk >> 4) & 255, val >> 4);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PCDWR, (msk >> 12) & 255, val >> 12);
+	sa1111_gpio_modify(reg + SA1111_GPIO_PCSSR, (msk >> 12) & 255, val >> 12);
+	spin_unlock_irqrestore(&sachip->lock, flags);
+}
+
+static int sa1111_gpio_to_irq(struct gpio_chip *gc, unsigned offset)
+{
+	struct sa1111 *sachip = gc_to_sa1111(gc);
+
+	return sachip->irq_base + offset;
+}
+
+static int sa1111_setup_gpios(struct sa1111 *sachip)
+{
+	sachip->gc.label = "sa1111";
+	sachip->gc.parent = sachip->dev;
+	sachip->gc.owner = THIS_MODULE;
+	sachip->gc.get_direction = sa1111_gpio_get_direction;
+	sachip->gc.direction_input = sa1111_gpio_direction_input;
+	sachip->gc.direction_output = sa1111_gpio_direction_output;
+	sachip->gc.get = sa1111_gpio_get;
+	sachip->gc.set = sa1111_gpio_set;
+	sachip->gc.set_multiple = sa1111_gpio_set_multiple;
+	sachip->gc.to_irq = sa1111_gpio_to_irq;
+	sachip->gc.base = -1;
+	sachip->gc.ngpio = 18;
+
+	return devm_gpiochip_add_data(sachip->dev, &sachip->gc, sachip);
+}
+
 /*
  * Bring the SA1111 out of reset.  This requires a set procedure:
  *  1. nRESET asserted (by hardware)
@@ -773,6 +932,11 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq)
 			goto err_clk;
 	}
 
+	/* Setup the GPIOs - should really be done after the IRQ setup */
+	ret = sa1111_setup_gpios(sachip);
+	if (ret)
+		goto err_irq;
+
 #ifdef CONFIG_ARCH_SA1100
 	{
 	unsigned int val;
@@ -815,6 +979,8 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq)
 
 	return 0;
 
+ err_irq:
+	sa1111_remove_irq(sachip);
  err_clk:
 	clk_disable(sachip->clk);
  err_unmap:

From 1629c9ab3679908d2e18ca306df1f07f48713f6b Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Tue, 6 Sep 2016 18:43:28 +0100
Subject: [PATCH 33/35] ARM: sa1111: clean up duplication in IRQ chip
 implementation

Clean up the duplication in the IRQ chip implementation - we can compute
the register address from the interrupt number rather than duplicating
the code for each register.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/common/sa1111.c | 205 +++++++++++----------------------------
 1 file changed, 56 insertions(+), 149 deletions(-)

diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index 16cc3b5122ad..7f232ce50944 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -233,30 +233,44 @@ static void sa1111_irq_handler(struct irq_desc *desc)
 #define SA1111_IRQMASK_LO(x)	(1 << (x - sachip->irq_base))
 #define SA1111_IRQMASK_HI(x)	(1 << (x - sachip->irq_base - 32))
 
+static u32 sa1111_irqmask(struct irq_data *d)
+{
+	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
+
+	return BIT((d->irq - sachip->irq_base) & 31);
+}
+
+static int sa1111_irqbank(struct irq_data *d)
+{
+	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
+
+	return ((d->irq - sachip->irq_base) / 32) * 4;
+}
+
 static void sa1111_ack_irq(struct irq_data *d)
 {
 }
 
-static void sa1111_mask_lowirq(struct irq_data *d)
+static void sa1111_mask_irq(struct irq_data *d)
 {
 	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-	void __iomem *mapbase = sachip->base + SA1111_INTC;
-	unsigned long ie0;
+	void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d);
+	u32 ie;
 
-	ie0 = sa1111_readl(mapbase + SA1111_INTEN0);
-	ie0 &= ~SA1111_IRQMASK_LO(d->irq);
-	writel(ie0, mapbase + SA1111_INTEN0);
+	ie = sa1111_readl(mapbase + SA1111_INTEN0);
+	ie &= ~sa1111_irqmask(d);
+	sa1111_writel(ie, mapbase + SA1111_INTEN0);
 }
 
-static void sa1111_unmask_lowirq(struct irq_data *d)
+static void sa1111_unmask_irq(struct irq_data *d)
 {
 	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-	void __iomem *mapbase = sachip->base + SA1111_INTC;
-	unsigned long ie0;
+	void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d);
+	u32 ie;
 
-	ie0 = sa1111_readl(mapbase + SA1111_INTEN0);
-	ie0 |= SA1111_IRQMASK_LO(d->irq);
-	sa1111_writel(ie0, mapbase + SA1111_INTEN0);
+	ie = sa1111_readl(mapbase + SA1111_INTEN0);
+	ie |= sa1111_irqmask(d);
+	sa1111_writel(ie, mapbase + SA1111_INTEN0);
 }
 
 /*
@@ -266,18 +280,17 @@ static void sa1111_unmask_lowirq(struct irq_data *d)
  * be triggered.  In fact, its very difficult, if not impossible to get
  * INTSET to re-trigger the interrupt.
  */
-static int sa1111_retrigger_lowirq(struct irq_data *d)
+static int sa1111_retrigger_irq(struct irq_data *d)
 {
 	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-	void __iomem *mapbase = sachip->base + SA1111_INTC;
-	unsigned int mask = SA1111_IRQMASK_LO(d->irq);
-	unsigned long ip0;
+	void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d);
+	u32 ip, mask = sa1111_irqmask(d);
 	int i;
 
-	ip0 = sa1111_readl(mapbase + SA1111_INTPOL0);
+	ip = sa1111_readl(mapbase + SA1111_INTPOL0);
 	for (i = 0; i < 8; i++) {
-		sa1111_writel(ip0 ^ mask, mapbase + SA1111_INTPOL0);
-		sa1111_writel(ip0, mapbase + SA1111_INTPOL0);
+		sa1111_writel(ip ^ mask, mapbase + SA1111_INTPOL0);
+		sa1111_writel(ip, mapbase + SA1111_INTPOL0);
 		if (sa1111_readl(mapbase + SA1111_INTSTATCLR0) & mask)
 			break;
 	}
@@ -288,12 +301,11 @@ static int sa1111_retrigger_lowirq(struct irq_data *d)
 	return i == 8 ? -1 : 0;
 }
 
-static int sa1111_type_lowirq(struct irq_data *d, unsigned int flags)
+static int sa1111_type_irq(struct irq_data *d, unsigned int flags)
 {
 	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-	void __iomem *mapbase = sachip->base + SA1111_INTC;
-	unsigned int mask = SA1111_IRQMASK_LO(d->irq);
-	unsigned long ip0;
+	void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d);
+	u32 ip, mask = sa1111_irqmask(d);
 
 	if (flags == IRQ_TYPE_PROBE)
 		return 0;
@@ -301,144 +313,41 @@ static int sa1111_type_lowirq(struct irq_data *d, unsigned int flags)
 	if ((!(flags & IRQ_TYPE_EDGE_RISING) ^ !(flags & IRQ_TYPE_EDGE_FALLING)) == 0)
 		return -EINVAL;
 
-	ip0 = sa1111_readl(mapbase + SA1111_INTPOL0);
+	ip = sa1111_readl(mapbase + SA1111_INTPOL0);
 	if (flags & IRQ_TYPE_EDGE_RISING)
-		ip0 &= ~mask;
+		ip &= ~mask;
 	else
-		ip0 |= mask;
-	sa1111_writel(ip0, mapbase + SA1111_INTPOL0);
-	sa1111_writel(ip0, mapbase + SA1111_WAKEPOL0);
+		ip |= mask;
+	sa1111_writel(ip, mapbase + SA1111_INTPOL0);
+	sa1111_writel(ip, mapbase + SA1111_WAKEPOL0);
 
 	return 0;
 }
 
-static int sa1111_wake_lowirq(struct irq_data *d, unsigned int on)
+static int sa1111_wake_irq(struct irq_data *d, unsigned int on)
 {
 	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-	void __iomem *mapbase = sachip->base + SA1111_INTC;
-	unsigned int mask = SA1111_IRQMASK_LO(d->irq);
-	unsigned long we0;
+	void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d);
+	u32 we, mask = sa1111_irqmask(d);
 
-	we0 = sa1111_readl(mapbase + SA1111_WAKEEN0);
+	we = sa1111_readl(mapbase + SA1111_WAKEEN0);
 	if (on)
-		we0 |= mask;
+		we |= mask;
 	else
-		we0 &= ~mask;
-	sa1111_writel(we0, mapbase + SA1111_WAKEEN0);
+		we &= ~mask;
+	sa1111_writel(we, mapbase + SA1111_WAKEEN0);
 
 	return 0;
 }
 
-static struct irq_chip sa1111_low_chip = {
-	.name		= "SA1111-l",
+static struct irq_chip sa1111_irq_chip = {
+	.name		= "SA1111",
 	.irq_ack	= sa1111_ack_irq,
-	.irq_mask	= sa1111_mask_lowirq,
-	.irq_unmask	= sa1111_unmask_lowirq,
-	.irq_retrigger	= sa1111_retrigger_lowirq,
-	.irq_set_type	= sa1111_type_lowirq,
-	.irq_set_wake	= sa1111_wake_lowirq,
-};
-
-static void sa1111_mask_highirq(struct irq_data *d)
-{
-	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-	void __iomem *mapbase = sachip->base + SA1111_INTC;
-	unsigned long ie1;
-
-	ie1 = sa1111_readl(mapbase + SA1111_INTEN1);
-	ie1 &= ~SA1111_IRQMASK_HI(d->irq);
-	sa1111_writel(ie1, mapbase + SA1111_INTEN1);
-}
-
-static void sa1111_unmask_highirq(struct irq_data *d)
-{
-	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-	void __iomem *mapbase = sachip->base + SA1111_INTC;
-	unsigned long ie1;
-
-	ie1 = sa1111_readl(mapbase + SA1111_INTEN1);
-	ie1 |= SA1111_IRQMASK_HI(d->irq);
-	sa1111_writel(ie1, mapbase + SA1111_INTEN1);
-}
-
-/*
- * Attempt to re-trigger the interrupt.  The SA1111 contains a register
- * (INTSET) which claims to do this.  However, in practice no amount of
- * manipulation of INTEN and INTSET guarantees that the interrupt will
- * be triggered.  In fact, its very difficult, if not impossible to get
- * INTSET to re-trigger the interrupt.
- */
-static int sa1111_retrigger_highirq(struct irq_data *d)
-{
-	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-	void __iomem *mapbase = sachip->base + SA1111_INTC;
-	unsigned int mask = SA1111_IRQMASK_HI(d->irq);
-	unsigned long ip1;
-	int i;
-
-	ip1 = sa1111_readl(mapbase + SA1111_INTPOL1);
-	for (i = 0; i < 8; i++) {
-		sa1111_writel(ip1 ^ mask, mapbase + SA1111_INTPOL1);
-		sa1111_writel(ip1, mapbase + SA1111_INTPOL1);
-		if (sa1111_readl(mapbase + SA1111_INTSTATCLR1) & mask)
-			break;
-	}
-
-	if (i == 8)
-		pr_err("Danger Will Robinson: failed to re-trigger IRQ%d\n",
-		       d->irq);
-	return i == 8 ? -1 : 0;
-}
-
-static int sa1111_type_highirq(struct irq_data *d, unsigned int flags)
-{
-	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-	void __iomem *mapbase = sachip->base + SA1111_INTC;
-	unsigned int mask = SA1111_IRQMASK_HI(d->irq);
-	unsigned long ip1;
-
-	if (flags == IRQ_TYPE_PROBE)
-		return 0;
-
-	if ((!(flags & IRQ_TYPE_EDGE_RISING) ^ !(flags & IRQ_TYPE_EDGE_FALLING)) == 0)
-		return -EINVAL;
-
-	ip1 = sa1111_readl(mapbase + SA1111_INTPOL1);
-	if (flags & IRQ_TYPE_EDGE_RISING)
-		ip1 &= ~mask;
-	else
-		ip1 |= mask;
-	sa1111_writel(ip1, mapbase + SA1111_INTPOL1);
-	sa1111_writel(ip1, mapbase + SA1111_WAKEPOL1);
-
-	return 0;
-}
-
-static int sa1111_wake_highirq(struct irq_data *d, unsigned int on)
-{
-	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-	void __iomem *mapbase = sachip->base + SA1111_INTC;
-	unsigned int mask = SA1111_IRQMASK_HI(d->irq);
-	unsigned long we1;
-
-	we1 = sa1111_readl(mapbase + SA1111_WAKEEN1);
-	if (on)
-		we1 |= mask;
-	else
-		we1 &= ~mask;
-	sa1111_writel(we1, mapbase + SA1111_WAKEEN1);
-
-	return 0;
-}
-
-static struct irq_chip sa1111_high_chip = {
-	.name		= "SA1111-h",
-	.irq_ack	= sa1111_ack_irq,
-	.irq_mask	= sa1111_mask_highirq,
-	.irq_unmask	= sa1111_unmask_highirq,
-	.irq_retrigger	= sa1111_retrigger_highirq,
-	.irq_set_type	= sa1111_type_highirq,
-	.irq_set_wake	= sa1111_wake_highirq,
+	.irq_mask	= sa1111_mask_irq,
+	.irq_unmask	= sa1111_unmask_irq,
+	.irq_retrigger	= sa1111_retrigger_irq,
+	.irq_set_type	= sa1111_type_irq,
+	.irq_set_wake	= sa1111_wake_irq,
 };
 
 static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base)
@@ -484,16 +393,14 @@ static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base)
 
 	for (i = IRQ_GPAIN0; i <= SSPROR; i++) {
 		irq = sachip->irq_base + i;
-		irq_set_chip_and_handler(irq, &sa1111_low_chip,
-					 handle_edge_irq);
+		irq_set_chip_and_handler(irq, &sa1111_irq_chip, handle_edge_irq);
 		irq_set_chip_data(irq, sachip);
 		irq_clear_status_flags(irq, IRQ_NOREQUEST | IRQ_NOPROBE);
 	}
 
 	for (i = AUDXMTDMADONEA; i <= IRQ_S1_BVD1_STSCHG; i++) {
 		irq = sachip->irq_base + i;
-		irq_set_chip_and_handler(irq, &sa1111_high_chip,
-					 handle_edge_irq);
+		irq_set_chip_and_handler(irq, &sa1111_irq_chip, handle_edge_irq);
 		irq_set_chip_data(irq, sachip);
 		irq_clear_status_flags(irq, IRQ_NOREQUEST | IRQ_NOPROBE);
 	}

From cf6e4ca3e53f734f8fcb253a686c1fdbb6a50e85 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Tue, 6 Sep 2016 00:45:33 +0100
Subject: [PATCH 34/35] ARM: sa1111: add sa1111_get_irq()

Add a helper function to get the irq number for a device.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/common/sa1111.c               | 8 ++++++++
 arch/arm/include/asm/hardware/sa1111.h | 2 ++
 2 files changed, 10 insertions(+)

diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index 7f232ce50944..3c722a4143fe 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -1357,6 +1357,14 @@ void sa1111_disable_device(struct sa1111_dev *sadev)
 }
 EXPORT_SYMBOL(sa1111_disable_device);
 
+int sa1111_get_irq(struct sa1111_dev *sadev, unsigned num)
+{
+	if (num >= ARRAY_SIZE(sadev->irq))
+		return -EINVAL;
+	return sadev->irq[num];
+}
+EXPORT_SYMBOL_GPL(sa1111_get_irq);
+
 /*
  *	SA1111 "Register Access Bus."
  *
diff --git a/arch/arm/include/asm/hardware/sa1111.h b/arch/arm/include/asm/hardware/sa1111.h
index 7c2bbc7f0be1..267e43b5225f 100644
--- a/arch/arm/include/asm/hardware/sa1111.h
+++ b/arch/arm/include/asm/hardware/sa1111.h
@@ -446,6 +446,8 @@ struct sa1111_driver {
 int sa1111_enable_device(struct sa1111_dev *);
 void sa1111_disable_device(struct sa1111_dev *);
 
+int sa1111_get_irq(struct sa1111_dev *, unsigned num);
+
 unsigned int sa1111_pll_clock(struct sa1111_dev *);
 
 #define SA1111_AUDIO_ACLINK	0

From b60752f2b20c167859943e001727f0d4da419b23 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Tue, 6 Sep 2016 13:46:46 +0100
Subject: [PATCH 35/35] ARM: sa1111: provide to_sa1111_device() macro

Provide a nicer to_sa1111_device macro to convert a struct device to a
sa1111_dev.  We will need this for drivers when converting them to
dev_pm_ops, or removing shutdown methods.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/common/sa1111.c               | 18 +++++++++---------
 arch/arm/include/asm/hardware/sa1111.h |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index 3c722a4143fe..4ecd5120fce7 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -691,7 +691,7 @@ sa1111_configure_smc(struct sa1111 *sachip, int sdram, unsigned int drac,
 
 static void sa1111_dev_release(struct device *_dev)
 {
-	struct sa1111_dev *dev = SA1111_DEV(_dev);
+	struct sa1111_dev *dev = to_sa1111_device(_dev);
 
 	kfree(dev);
 }
@@ -899,7 +899,7 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq)
 
 static int sa1111_remove_one(struct device *dev, void *data)
 {
-	struct sa1111_dev *sadev = SA1111_DEV(dev);
+	struct sa1111_dev *sadev = to_sa1111_device(dev);
 	if (dev->bus != &sa1111_bus_type)
 		return 0;
 	device_del(&sadev->dev);
@@ -1373,7 +1373,7 @@ EXPORT_SYMBOL_GPL(sa1111_get_irq);
  */
 static int sa1111_match(struct device *_dev, struct device_driver *_drv)
 {
-	struct sa1111_dev *dev = SA1111_DEV(_dev);
+	struct sa1111_dev *dev = to_sa1111_device(_dev);
 	struct sa1111_driver *drv = SA1111_DRV(_drv);
 
 	return !!(dev->devid & drv->devid);
@@ -1381,7 +1381,7 @@ static int sa1111_match(struct device *_dev, struct device_driver *_drv)
 
 static int sa1111_bus_suspend(struct device *dev, pm_message_t state)
 {
-	struct sa1111_dev *sadev = SA1111_DEV(dev);
+	struct sa1111_dev *sadev = to_sa1111_device(dev);
 	struct sa1111_driver *drv = SA1111_DRV(dev->driver);
 	int ret = 0;
 
@@ -1392,7 +1392,7 @@ static int sa1111_bus_suspend(struct device *dev, pm_message_t state)
 
 static int sa1111_bus_resume(struct device *dev)
 {
-	struct sa1111_dev *sadev = SA1111_DEV(dev);
+	struct sa1111_dev *sadev = to_sa1111_device(dev);
 	struct sa1111_driver *drv = SA1111_DRV(dev->driver);
 	int ret = 0;
 
@@ -1406,12 +1406,12 @@ static void sa1111_bus_shutdown(struct device *dev)
 	struct sa1111_driver *drv = SA1111_DRV(dev->driver);
 
 	if (drv && drv->shutdown)
-		drv->shutdown(SA1111_DEV(dev));
+		drv->shutdown(to_sa1111_device(dev));
 }
 
 static int sa1111_bus_probe(struct device *dev)
 {
-	struct sa1111_dev *sadev = SA1111_DEV(dev);
+	struct sa1111_dev *sadev = to_sa1111_device(dev);
 	struct sa1111_driver *drv = SA1111_DRV(dev->driver);
 	int ret = -ENODEV;
 
@@ -1422,7 +1422,7 @@ static int sa1111_bus_probe(struct device *dev)
 
 static int sa1111_bus_remove(struct device *dev)
 {
-	struct sa1111_dev *sadev = SA1111_DEV(dev);
+	struct sa1111_dev *sadev = to_sa1111_device(dev);
 	struct sa1111_driver *drv = SA1111_DRV(dev->driver);
 	int ret = 0;
 
@@ -1487,7 +1487,7 @@ static int sa1111_needs_bounce(struct device *dev, dma_addr_t addr, size_t size)
 static int sa1111_notifier_call(struct notifier_block *n, unsigned long action,
 	void *data)
 {
-	struct sa1111_dev *dev = SA1111_DEV(data);
+	struct sa1111_dev *dev = to_sa1111_device(data);
 
 	switch (action) {
 	case BUS_NOTIFY_ADD_DEVICE:
diff --git a/arch/arm/include/asm/hardware/sa1111.h b/arch/arm/include/asm/hardware/sa1111.h
index 267e43b5225f..8979fa3bbf2d 100644
--- a/arch/arm/include/asm/hardware/sa1111.h
+++ b/arch/arm/include/asm/hardware/sa1111.h
@@ -420,7 +420,7 @@ struct sa1111_dev {
 	u64		dma_mask;
 };
 
-#define SA1111_DEV(_d)	container_of((_d), struct sa1111_dev, dev)
+#define to_sa1111_device(x)	container_of(x, struct sa1111_dev, dev)
 
 #define sa1111_get_drvdata(d)	dev_get_drvdata(&(d)->dev)
 #define sa1111_set_drvdata(d,p)	dev_set_drvdata(&(d)->dev, p)