From 7c88f2bf7840c0ea67ac2de2e293a653f62ea134 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Thu, 20 Jul 2017 07:05:28 +0200
Subject: [PATCH 001/391] tile: defconfig: Cleanup from old Kconfig options

Remove old, dead Kconfig options (in order appearing in this commit):
 - CRYPTO_ZLIB: commit 110492183c4b ("crypto: compress - remove unused
   pcomp interface");
 - IP_NF_TARGET_ULOG: commit d4da843e6fad ("netfilter: kill remnants of
   ulog targets");

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Chris Metcalf <cmetcalf@mellanox.com>
---
 arch/tile/configs/tilegx_defconfig  | 1 -
 arch/tile/configs/tilepro_defconfig | 2 --
 2 files changed, 3 deletions(-)

diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig
index 0d925fa0f0c1..9f94435cc44f 100644
--- a/arch/tile/configs/tilegx_defconfig
+++ b/arch/tile/configs/tilegx_defconfig
@@ -409,5 +409,4 @@ CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig
index 149d8e8eacb8..1c5bd4f8ffca 100644
--- a/arch/tile/configs/tilepro_defconfig
+++ b/arch/tile/configs/tilepro_defconfig
@@ -189,7 +189,6 @@ CONFIG_IP_NF_MATCH_ECN=m
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=y
 CONFIG_IP_NF_TARGET_REJECT=y
-CONFIG_IP_NF_TARGET_ULOG=m
 CONFIG_IP_NF_MANGLE=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
@@ -521,7 +520,6 @@ CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_ZLIB=m
 CONFIG_CRYPTO_LZO=m
 CONFIG_CRC_CCITT=m
 CONFIG_CRC7=m

From 637f23abca87d26e091e0d6647ec878d97d2c6cd Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 22 Jul 2017 10:33:02 +0300
Subject: [PATCH 002/391] tile: array underflow in setup_maxnodemem()

My static checker correctly complains that we should have a lower bound
on "node" to prevent an array underflow.

Fixes: 867e359b97c9 ("arch/tile: core support for Tilera 32-bit chips.")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Chris Metcalf <cmetcalf@mellanox.com>
---
 arch/tile/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index 443a70bccc1c..b1474e7d9afb 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -140,7 +140,7 @@ static int __init setup_maxnodemem(char *str)
 {
 	char *endp;
 	unsigned long long maxnodemem;
-	long node;
+	unsigned long node;
 
 	node = str ? simple_strtoul(str, &endp, 0) : INT_MAX;
 	if (node >= MAX_NUMNODES || *endp != ':')

From 472b46c352c9ff0b6fa57dbf85d77c51901a3368 Mon Sep 17 00:00:00 2001
From: Mikko Rapeli <mikko.rapeli@iki.fi>
Date: Sun, 6 Aug 2017 18:44:27 +0200
Subject: [PATCH 003/391] uapi linux/kfd_ioctl.h: only use __u32 and __u64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Include <drm/drm.h> instead of <linux/types.h> which on Linux includes
<linux/types.h> and on non-Linux platforms defines __u32 etc types.

Fixes user space compilation errors like:

linux/kfd_ioctl.h:33:2: error: unknown type name ‘uint32_t’
  uint32_t major_version; /* from KFD */
  ^~~~~~~~

Signed-off-by: Mikko Rapeli <mikko.rapeli@iki.fi>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 include/uapi/linux/kfd_ioctl.h | 168 ++++++++++++++++-----------------
 1 file changed, 84 insertions(+), 84 deletions(-)

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 7b4567bacfc2..26283fefdf5f 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -23,15 +23,15 @@
 #ifndef KFD_IOCTL_H_INCLUDED
 #define KFD_IOCTL_H_INCLUDED
 
-#include <linux/types.h>
+#include <drm/drm.h>
 #include <linux/ioctl.h>
 
 #define KFD_IOCTL_MAJOR_VERSION 1
 #define KFD_IOCTL_MINOR_VERSION 1
 
 struct kfd_ioctl_get_version_args {
-	uint32_t major_version;	/* from KFD */
-	uint32_t minor_version;	/* from KFD */
+	__u32 major_version;	/* from KFD */
+	__u32 minor_version;	/* from KFD */
 };
 
 /* For kfd_ioctl_create_queue_args.queue_type. */
@@ -43,36 +43,36 @@ struct kfd_ioctl_get_version_args {
 #define KFD_MAX_QUEUE_PRIORITY		15
 
 struct kfd_ioctl_create_queue_args {
-	uint64_t ring_base_address;	/* to KFD */
-	uint64_t write_pointer_address;	/* from KFD */
-	uint64_t read_pointer_address;	/* from KFD */
-	uint64_t doorbell_offset;	/* from KFD */
+	__u64 ring_base_address;	/* to KFD */
+	__u64 write_pointer_address;	/* from KFD */
+	__u64 read_pointer_address;	/* from KFD */
+	__u64 doorbell_offset;	/* from KFD */
 
-	uint32_t ring_size;		/* to KFD */
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t queue_type;		/* to KFD */
-	uint32_t queue_percentage;	/* to KFD */
-	uint32_t queue_priority;	/* to KFD */
-	uint32_t queue_id;		/* from KFD */
+	__u32 ring_size;		/* to KFD */
+	__u32 gpu_id;		/* to KFD */
+	__u32 queue_type;		/* to KFD */
+	__u32 queue_percentage;	/* to KFD */
+	__u32 queue_priority;	/* to KFD */
+	__u32 queue_id;		/* from KFD */
 
-	uint64_t eop_buffer_address;	/* to KFD */
-	uint64_t eop_buffer_size;	/* to KFD */
-	uint64_t ctx_save_restore_address; /* to KFD */
-	uint64_t ctx_save_restore_size;	/* to KFD */
+	__u64 eop_buffer_address;	/* to KFD */
+	__u64 eop_buffer_size;	/* to KFD */
+	__u64 ctx_save_restore_address; /* to KFD */
+	__u64 ctx_save_restore_size;	/* to KFD */
 };
 
 struct kfd_ioctl_destroy_queue_args {
-	uint32_t queue_id;		/* to KFD */
-	uint32_t pad;
+	__u32 queue_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_update_queue_args {
-	uint64_t ring_base_address;	/* to KFD */
+	__u64 ring_base_address;	/* to KFD */
 
-	uint32_t queue_id;		/* to KFD */
-	uint32_t ring_size;		/* to KFD */
-	uint32_t queue_percentage;	/* to KFD */
-	uint32_t queue_priority;	/* to KFD */
+	__u32 queue_id;		/* to KFD */
+	__u32 ring_size;		/* to KFD */
+	__u32 queue_percentage;	/* to KFD */
+	__u32 queue_priority;	/* to KFD */
 };
 
 /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
@@ -80,13 +80,13 @@ struct kfd_ioctl_update_queue_args {
 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
 
 struct kfd_ioctl_set_memory_policy_args {
-	uint64_t alternate_aperture_base;	/* to KFD */
-	uint64_t alternate_aperture_size;	/* to KFD */
+	__u64 alternate_aperture_base;	/* to KFD */
+	__u64 alternate_aperture_size;	/* to KFD */
 
-	uint32_t gpu_id;			/* to KFD */
-	uint32_t default_policy;		/* to KFD */
-	uint32_t alternate_policy;		/* to KFD */
-	uint32_t pad;
+	__u32 gpu_id;			/* to KFD */
+	__u32 default_policy;		/* to KFD */
+	__u32 alternate_policy;		/* to KFD */
+	__u32 pad;
 };
 
 /*
@@ -97,26 +97,26 @@ struct kfd_ioctl_set_memory_policy_args {
  */
 
 struct kfd_ioctl_get_clock_counters_args {
-	uint64_t gpu_clock_counter;	/* from KFD */
-	uint64_t cpu_clock_counter;	/* from KFD */
-	uint64_t system_clock_counter;	/* from KFD */
-	uint64_t system_clock_freq;	/* from KFD */
+	__u64 gpu_clock_counter;	/* from KFD */
+	__u64 cpu_clock_counter;	/* from KFD */
+	__u64 system_clock_counter;	/* from KFD */
+	__u64 system_clock_freq;	/* from KFD */
 
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t pad;
+	__u32 gpu_id;		/* to KFD */
+	__u32 pad;
 };
 
 #define NUM_OF_SUPPORTED_GPUS 7
 
 struct kfd_process_device_apertures {
-	uint64_t lds_base;		/* from KFD */
-	uint64_t lds_limit;		/* from KFD */
-	uint64_t scratch_base;		/* from KFD */
-	uint64_t scratch_limit;		/* from KFD */
-	uint64_t gpuvm_base;		/* from KFD */
-	uint64_t gpuvm_limit;		/* from KFD */
-	uint32_t gpu_id;		/* from KFD */
-	uint32_t pad;
+	__u64 lds_base;		/* from KFD */
+	__u64 lds_limit;		/* from KFD */
+	__u64 scratch_base;		/* from KFD */
+	__u64 scratch_limit;		/* from KFD */
+	__u64 gpuvm_base;		/* from KFD */
+	__u64 gpuvm_limit;		/* from KFD */
+	__u32 gpu_id;		/* from KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_get_process_apertures_args {
@@ -124,8 +124,8 @@ struct kfd_ioctl_get_process_apertures_args {
 			process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */
 
 	/* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS] */
-	uint32_t num_of_nodes;
-	uint32_t pad;
+	__u32 num_of_nodes;
+	__u32 pad;
 };
 
 #define MAX_ALLOWED_NUM_POINTS    100
@@ -133,25 +133,25 @@ struct kfd_ioctl_get_process_apertures_args {
 #define MAX_ALLOWED_WAC_BUFF_SIZE  128
 
 struct kfd_ioctl_dbg_register_args {
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t pad;
+	__u32 gpu_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_dbg_unregister_args {
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t pad;
+	__u32 gpu_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_dbg_address_watch_args {
-	uint64_t content_ptr;		/* a pointer to the actual content */
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t buf_size_in_bytes;	/*including gpu_id and buf_size */
+	__u64 content_ptr;		/* a pointer to the actual content */
+	__u32 gpu_id;		/* to KFD */
+	__u32 buf_size_in_bytes;	/*including gpu_id and buf_size */
 };
 
 struct kfd_ioctl_dbg_wave_control_args {
-	uint64_t content_ptr;		/* a pointer to the actual content */
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t buf_size_in_bytes;	/*including gpu_id and buf_size */
+	__u64 content_ptr;		/* a pointer to the actual content */
+	__u32 gpu_id;		/* to KFD */
+	__u32 buf_size_in_bytes;	/*including gpu_id and buf_size */
 };
 
 /* Matching HSA_EVENTTYPE */
@@ -172,44 +172,44 @@ struct kfd_ioctl_dbg_wave_control_args {
 #define KFD_SIGNAL_EVENT_LIMIT			256
 
 struct kfd_ioctl_create_event_args {
-	uint64_t event_page_offset;	/* from KFD */
-	uint32_t event_trigger_data;	/* from KFD - signal events only */
-	uint32_t event_type;		/* to KFD */
-	uint32_t auto_reset;		/* to KFD */
-	uint32_t node_id;		/* to KFD - only valid for certain
+	__u64 event_page_offset;	/* from KFD */
+	__u32 event_trigger_data;	/* from KFD - signal events only */
+	__u32 event_type;		/* to KFD */
+	__u32 auto_reset;		/* to KFD */
+	__u32 node_id;		/* to KFD - only valid for certain
 							event types */
-	uint32_t event_id;		/* from KFD */
-	uint32_t event_slot_index;	/* from KFD */
+	__u32 event_id;		/* from KFD */
+	__u32 event_slot_index;	/* from KFD */
 };
 
 struct kfd_ioctl_destroy_event_args {
-	uint32_t event_id;		/* to KFD */
-	uint32_t pad;
+	__u32 event_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_set_event_args {
-	uint32_t event_id;		/* to KFD */
-	uint32_t pad;
+	__u32 event_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_reset_event_args {
-	uint32_t event_id;		/* to KFD */
-	uint32_t pad;
+	__u32 event_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_memory_exception_failure {
-	uint32_t NotPresent;	/* Page not present or supervisor privilege */
-	uint32_t ReadOnly;	/* Write access to a read-only page */
-	uint32_t NoExecute;	/* Execute access to a page marked NX */
-	uint32_t pad;
+	__u32 NotPresent;	/* Page not present or supervisor privilege */
+	__u32 ReadOnly;	/* Write access to a read-only page */
+	__u32 NoExecute;	/* Execute access to a page marked NX */
+	__u32 pad;
 };
 
 /* memory exception data*/
 struct kfd_hsa_memory_exception_data {
 	struct kfd_memory_exception_failure failure;
-	uint64_t va;
-	uint32_t gpu_id;
-	uint32_t pad;
+	__u64 va;
+	__u32 gpu_id;
+	__u32 pad;
 };
 
 /* Event data*/
@@ -217,19 +217,19 @@ struct kfd_event_data {
 	union {
 		struct kfd_hsa_memory_exception_data memory_exception_data;
 	};				/* From KFD */
-	uint64_t kfd_event_data_ext;	/* pointer to an extension structure
+	__u64 kfd_event_data_ext;	/* pointer to an extension structure
 					   for future exception types */
-	uint32_t event_id;		/* to KFD */
-	uint32_t pad;
+	__u32 event_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_wait_events_args {
-	uint64_t events_ptr;		/* pointed to struct
+	__u64 events_ptr;		/* pointed to struct
 					   kfd_event_data array, to KFD */
-	uint32_t num_events;		/* to KFD */
-	uint32_t wait_for_all;		/* to KFD */
-	uint32_t timeout;		/* to KFD */
-	uint32_t wait_result;		/* from KFD */
+	__u32 num_events;		/* to KFD */
+	__u32 wait_for_all;		/* to KFD */
+	__u32 timeout;		/* to KFD */
+	__u32 wait_result;		/* from KFD */
 };
 
 struct kfd_ioctl_set_scratch_backing_va_args {

From a33b2d0359a0aae25d5ac7a26b85a5682485ebbb Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 8 Aug 2016 17:46:23 -0700
Subject: [PATCH 004/391] selftests/seccomp: Add tests for basic ptrace actions

This adds tests for using only ptrace to perform syscall changes, just
to validate matching behavior between seccomp events and ptrace events.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 41 +++++++++++++++----
 1 file changed, 34 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 73f5ea6778ce..e61b963f011b 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1262,6 +1262,13 @@ TEST_F(TRACE_poke, getpid_runs_normally)
 # error "Do not know how to find your architecture's registers and syscalls"
 #endif
 
+/* When the syscall return can't be changed, stub out the tests for it. */
+#ifdef SYSCALL_NUM_RET_SHARE_REG
+# define EXPECT_SYSCALL_RETURN(val, action)	EXPECT_EQ(-1, action)
+#else
+# define EXPECT_SYSCALL_RETURN(val, action)	EXPECT_EQ(val, action)
+#endif
+
 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
  * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
  */
@@ -1357,7 +1364,7 @@ void change_syscall(struct __test_metadata *_metadata,
 #ifdef SYSCALL_NUM_RET_SHARE_REG
 		TH_LOG("Can't modify syscall return on this architecture");
 #else
-		regs.SYSCALL_RET = 1;
+		regs.SYSCALL_RET = EPERM;
 #endif
 
 #ifdef HAVE_GETREGS
@@ -1426,6 +1433,8 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
 
 	if (nr == __NR_getpid)
 		change_syscall(_metadata, tracee, __NR_getppid);
+	if (nr == __NR_open)
+		change_syscall(_metadata, tracee, -1);
 }
 
 FIXTURE_DATA(TRACE_syscall) {
@@ -1480,6 +1489,28 @@ FIXTURE_TEARDOWN(TRACE_syscall)
 		free(self->prog.filter);
 }
 
+TEST_F(TRACE_syscall, ptrace_syscall_redirected)
+{
+	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+	teardown_trace_fixture(_metadata, self->tracer);
+	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+					   true);
+
+	/* Tracer will redirect getpid to getppid. */
+	EXPECT_NE(self->mypid, syscall(__NR_getpid));
+}
+
+TEST_F(TRACE_syscall, ptrace_syscall_dropped)
+{
+	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+	teardown_trace_fixture(_metadata, self->tracer);
+	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+					   true);
+
+	/* Tracer should skip the open syscall, resulting in EPERM. */
+	EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_open));
+}
+
 TEST_F(TRACE_syscall, syscall_allowed)
 {
 	long ret;
@@ -1520,13 +1551,8 @@ TEST_F(TRACE_syscall, syscall_dropped)
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
 	ASSERT_EQ(0, ret);
 
-#ifdef SYSCALL_NUM_RET_SHARE_REG
-	/* gettid has been skipped */
-	EXPECT_EQ(-1, syscall(__NR_gettid));
-#else
 	/* gettid has been skipped and an altered return value stored. */
-	EXPECT_EQ(1, syscall(__NR_gettid));
-#endif
+	EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_gettid));
 	EXPECT_NE(self->mytid, syscall(__NR_gettid));
 }
 
@@ -1557,6 +1583,7 @@ TEST_F(TRACE_syscall, skip_after_RET_TRACE)
 	ASSERT_EQ(0, ret);
 
 	/* Tracer will redirect getpid to getppid, and we should see EPERM. */
+	errno = 0;
 	EXPECT_EQ(-1, syscall(__NR_getpid));
 	EXPECT_EQ(EPERM, errno);
 }

From 967d7ba8415139107033770a40c7ec7dd17fbcb1 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 2 Dec 2015 11:39:36 -0800
Subject: [PATCH 005/391] selftests/seccomp: Add simple seccomp overhead
 benchmark

This attempts to produce a comparison between native getpid() and a
RET_ALLOW-filtered getpid(), to measure the overhead cost of using
seccomp().

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 tools/testing/selftests/seccomp/Makefile      | 18 +++-
 .../selftests/seccomp/seccomp_benchmark.c     | 99 +++++++++++++++++++
 2 files changed, 112 insertions(+), 5 deletions(-)
 create mode 100644 tools/testing/selftests/seccomp/seccomp_benchmark.c

diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile
index aeb0c805f3ca..553d870b4ca9 100644
--- a/tools/testing/selftests/seccomp/Makefile
+++ b/tools/testing/selftests/seccomp/Makefile
@@ -1,8 +1,16 @@
-TEST_GEN_PROGS := seccomp_bpf
-CFLAGS += -Wl,-no-as-needed -Wall
-LDFLAGS += -lpthread
+all:
 
 include ../lib.mk
 
-$(TEST_GEN_PROGS): seccomp_bpf.c ../kselftest_harness.h
-	$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+.PHONY: all clean
+
+BINARIES := seccomp_bpf seccomp_benchmark
+CFLAGS += -Wl,-no-as-needed -Wall
+
+seccomp_bpf: seccomp_bpf.c ../kselftest_harness.h
+	$(CC) $(CFLAGS) $(LDFLAGS) -lpthread $< -o $@
+
+TEST_PROGS += $(BINARIES)
+EXTRA_CLEAN := $(BINARIES)
+
+all: $(BINARIES)
diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
new file mode 100644
index 000000000000..5838c8697ec3
--- /dev/null
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -0,0 +1,99 @@
+/*
+ * Strictly speaking, this is not a test. But it can report during test
+ * runs so relative performace can be measured.
+ */
+#define _GNU_SOURCE
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+
+#define ARRAY_SIZE(a)    (sizeof(a) / sizeof(a[0]))
+
+unsigned long long timing(clockid_t clk_id, unsigned long long samples)
+{
+	pid_t pid, ret;
+	unsigned long long i;
+	struct timespec start, finish;
+
+	pid = getpid();
+	assert(clock_gettime(clk_id, &start) == 0);
+	for (i = 0; i < samples; i++) {
+		ret = syscall(__NR_getpid);
+		assert(pid == ret);
+	}
+	assert(clock_gettime(clk_id, &finish) == 0);
+
+	i = finish.tv_sec - start.tv_sec;
+	i *= 1000000000;
+	i += finish.tv_nsec - start.tv_nsec;
+
+	printf("%lu.%09lu - %lu.%09lu = %llu\n",
+		finish.tv_sec, finish.tv_nsec,
+		start.tv_sec, start.tv_nsec,
+		i);
+
+	return i;
+}
+
+unsigned long long calibrate(void)
+{
+	unsigned long long i;
+
+	printf("Calibrating reasonable sample size...\n");
+
+	for (i = 5; ; i++) {
+		unsigned long long samples = 1 << i;
+
+		/* Find something that takes more than 5 seconds to run. */
+		if (timing(CLOCK_REALTIME, samples) / 1000000000ULL > 5)
+			return samples;
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+	unsigned long long samples;
+	unsigned long long native, filtered;
+
+	if (argc > 1)
+		samples = strtoull(argv[1], NULL, 0);
+	else
+		samples = calibrate();
+
+	printf("Benchmarking %llu samples...\n", samples);
+
+	native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+	printf("getpid native: %llu ns\n", native);
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	assert(ret == 0);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	assert(ret == 0);
+
+	filtered = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+	printf("getpid RET_ALLOW: %llu ns\n", filtered);
+
+	printf("Estimated seccomp overhead per syscall: %llu ns\n",
+		filtered - native);
+
+	if (filtered == native)
+		printf("Trying running again with more samples.\n");
+
+	return 0;
+}

From f3f6e30669c048f47d51ea59df9946a91f551c4c Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 2 Aug 2017 14:08:39 -0700
Subject: [PATCH 006/391] selftests/seccomp: Refactor RET_ERRNO tests

This refactors the errno tests (since they all use the same pattern for
their filter) and adds a RET_DATA field ordering test.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Tyler Hicks <tyhicks@canonical.com>
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 95 +++++++++++--------
 1 file changed, 58 insertions(+), 37 deletions(-)

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index e61b963f011b..2fb49d99588d 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -136,7 +136,7 @@ TEST(no_new_privs_support)
 	}
 }
 
-/* Tests kernel support by checking for a copy_from_user() fault on * NULL. */
+/* Tests kernel support by checking for a copy_from_user() fault on NULL. */
 TEST(mode_filter_support)
 {
 	long ret;
@@ -541,26 +541,30 @@ TEST(arg_out_of_range)
 	EXPECT_EQ(EINVAL, errno);
 }
 
+#define ERRNO_FILTER(name, errno)					\
+	struct sock_filter _read_filter_##name[] = {			\
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,				\
+			offsetof(struct seccomp_data, nr)),		\
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),	\
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno),	\
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),		\
+	};								\
+	struct sock_fprog prog_##name = {				\
+		.len = (unsigned short)ARRAY_SIZE(_read_filter_##name),	\
+		.filter = _read_filter_##name,				\
+	}
+
+/* Make sure basic errno values are correctly passed through a filter. */
 TEST(ERRNO_valid)
 {
-	struct sock_filter filter[] = {
-		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
-			offsetof(struct seccomp_data, nr)),
-		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
-	};
-	struct sock_fprog prog = {
-		.len = (unsigned short)ARRAY_SIZE(filter),
-		.filter = filter,
-	};
+	ERRNO_FILTER(valid, E2BIG);
 	long ret;
 	pid_t parent = getppid();
 
 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 	ASSERT_EQ(0, ret);
 
-	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
 	ASSERT_EQ(0, ret);
 
 	EXPECT_EQ(parent, syscall(__NR_getppid));
@@ -568,26 +572,17 @@ TEST(ERRNO_valid)
 	EXPECT_EQ(E2BIG, errno);
 }
 
+/* Make sure an errno of zero is correctly handled by the arch code. */
 TEST(ERRNO_zero)
 {
-	struct sock_filter filter[] = {
-		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
-			offsetof(struct seccomp_data, nr)),
-		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
-	};
-	struct sock_fprog prog = {
-		.len = (unsigned short)ARRAY_SIZE(filter),
-		.filter = filter,
-	};
+	ERRNO_FILTER(zero, 0);
 	long ret;
 	pid_t parent = getppid();
 
 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 	ASSERT_EQ(0, ret);
 
-	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
 	ASSERT_EQ(0, ret);
 
 	EXPECT_EQ(parent, syscall(__NR_getppid));
@@ -595,26 +590,21 @@ TEST(ERRNO_zero)
 	EXPECT_EQ(0, read(0, NULL, 0));
 }
 
+/*
+ * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
+ * This tests that the errno value gets capped correctly, fixed by
+ * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
+ */
 TEST(ERRNO_capped)
 {
-	struct sock_filter filter[] = {
-		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
-			offsetof(struct seccomp_data, nr)),
-		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
-	};
-	struct sock_fprog prog = {
-		.len = (unsigned short)ARRAY_SIZE(filter),
-		.filter = filter,
-	};
+	ERRNO_FILTER(capped, 4096);
 	long ret;
 	pid_t parent = getppid();
 
 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 	ASSERT_EQ(0, ret);
 
-	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
 	ASSERT_EQ(0, ret);
 
 	EXPECT_EQ(parent, syscall(__NR_getppid));
@@ -622,6 +612,37 @@ TEST(ERRNO_capped)
 	EXPECT_EQ(4095, errno);
 }
 
+/*
+ * Filters are processed in reverse order: last applied is executed first.
+ * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
+ * SECCOMP_RET_DATA mask results will follow the most recently applied
+ * matching filter return (and not the lowest or highest value).
+ */
+TEST(ERRNO_order)
+{
+	ERRNO_FILTER(first,  11);
+	ERRNO_FILTER(second, 13);
+	ERRNO_FILTER(third,  12);
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
+	ASSERT_EQ(0, ret);
+
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	EXPECT_EQ(-1, read(0, NULL, 0));
+	EXPECT_EQ(12, errno);
+}
+
 FIXTURE_DATA(TRAP) {
 	struct sock_fprog prog;
 };

From deb4de8b31bc5bf21efb6ac31150a01a631cd647 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 2 Aug 2017 15:00:40 -0700
Subject: [PATCH 007/391] seccomp: Provide matching filter for introspection

Both the upcoming logging improvements and changes to RET_KILL will need
to know which filter a given seccomp return value originated from. In
order to delay logic processing of result until after the seccomp loop,
this adds a single pointer assignment on matches. This will allow both
log and RET_KILL logic to work off the filter rather than doing more
expensive tests inside the time-critical run_filters loop.

Running tight cycles of getpid() with filters attached shows no measurable
difference in speed.

Suggested-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Tyler Hicks <tyhicks@canonical.com>
---
 kernel/seccomp.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 98b59b5db90b..1f3347fc2605 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -171,10 +171,14 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
 /**
  * seccomp_run_filters - evaluates all seccomp filters against @sd
  * @sd: optional seccomp data to be passed to filters
+ * @match: stores struct seccomp_filter that resulted in the return value,
+ *         unless filter returned SECCOMP_RET_ALLOW, in which case it will
+ *         be unchanged.
  *
  * Returns valid seccomp BPF response codes.
  */
-static u32 seccomp_run_filters(const struct seccomp_data *sd)
+static u32 seccomp_run_filters(const struct seccomp_data *sd,
+			       struct seccomp_filter **match)
 {
 	struct seccomp_data sd_local;
 	u32 ret = SECCOMP_RET_ALLOW;
@@ -198,8 +202,10 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd)
 	for (; f; f = f->prev) {
 		u32 cur_ret = BPF_PROG_RUN(f->prog, sd);
 
-		if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
+		if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) {
 			ret = cur_ret;
+			*match = f;
+		}
 	}
 	return ret;
 }
@@ -566,6 +572,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 			    const bool recheck_after_trace)
 {
 	u32 filter_ret, action;
+	struct seccomp_filter *match = NULL;
 	int data;
 
 	/*
@@ -574,7 +581,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 	 */
 	rmb();
 
-	filter_ret = seccomp_run_filters(sd);
+	filter_ret = seccomp_run_filters(sd, &match);
 	data = filter_ret & SECCOMP_RET_DATA;
 	action = filter_ret & SECCOMP_RET_ACTION;
 
@@ -638,6 +645,11 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 		return 0;
 
 	case SECCOMP_RET_ALLOW:
+		/*
+		 * Note that the "match" filter will always be NULL for
+		 * this action since SECCOMP_RET_ALLOW is the starting
+		 * state in seccomp_run_filters().
+		 */
 		return 0;
 
 	case SECCOMP_RET_KILL:

From 8e5f1ad116df6b0de65eac458d5e7c318d1c05af Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Fri, 11 Aug 2017 04:33:52 +0000
Subject: [PATCH 008/391] seccomp: Sysctl to display available actions

This patch creates a read-only sysctl containing an ordered list of
seccomp actions that the kernel supports. The ordering, from left to
right, is the lowest action value (kill) to the highest action value
(allow). Currently, a read of the sysctl file would return "kill trap
errno trace allow". The contents of this sysctl file can be useful for
userspace code as well as the system administrator.

The path to the sysctl is:

  /proc/sys/kernel/seccomp/actions_avail

libseccomp and other userspace code can easily determine which actions
the current kernel supports. The set of actions supported by the current
kernel may be different than the set of action macros found in kernel
headers that were installed where the userspace code was built.

In addition, this sysctl will allow system administrators to know which
actions are supported by the kernel and make it easier to configure
exactly what seccomp logs through the audit subsystem. Support for this
level of logging configuration will come in a future patch.

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/sysctl/kernel.txt               |  1 +
 .../userspace-api/seccomp_filter.rst          | 16 ++++++
 kernel/seccomp.c                              | 51 +++++++++++++++++++
 3 files changed, 68 insertions(+)

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index bac23c198360..995c42cf86ba 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -74,6 +74,7 @@ show up in /proc/sys/kernel:
 - reboot-cmd                  [ SPARC only ]
 - rtsig-max
 - rtsig-nr
+- seccomp/                    ==> Documentation/userspace-api/seccomp_filter.rst
 - sem
 - sem_next_id		      [ sysv ipc ]
 - sg-big-buff                 [ generic SCSI device (sg) ]
diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst
index f71eb5ef1f2d..35fc7cbf1d95 100644
--- a/Documentation/userspace-api/seccomp_filter.rst
+++ b/Documentation/userspace-api/seccomp_filter.rst
@@ -169,7 +169,23 @@ The ``samples/seccomp/`` directory contains both an x86-specific example
 and a more generic example of a higher level macro interface for BPF
 program generation.
 
+Sysctls
+=======
 
+Seccomp's sysctl files can be found in the ``/proc/sys/kernel/seccomp/``
+directory. Here's a description of each file in that directory:
+
+``actions_avail``:
+	A read-only ordered list of seccomp return values (refer to the
+	``SECCOMP_RET_*`` macros above) in string form. The ordering, from
+	left-to-right, is the least permissive return value to the most
+	permissive return value.
+
+	The list represents the set of seccomp return values supported
+	by the kernel. A userspace program may use this list to
+	determine if the actions found in the ``seccomp.h``, when the
+	program was built, differs from the set of actions actually
+	supported in the current running kernel.
 
 Adding architecture support
 ===========================
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 1f3347fc2605..5f19f41e4e50 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -17,11 +17,13 @@
 #include <linux/audit.h>
 #include <linux/compat.h>
 #include <linux/coredump.h>
+#include <linux/kmemleak.h>
 #include <linux/sched.h>
 #include <linux/sched/task_stack.h>
 #include <linux/seccomp.h>
 #include <linux/slab.h>
 #include <linux/syscalls.h>
+#include <linux/sysctl.h>
 
 #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
 #include <asm/syscall.h>
@@ -934,3 +936,52 @@ long seccomp_get_filter(struct task_struct *task, unsigned long filter_off,
 	return ret;
 }
 #endif
+
+#ifdef CONFIG_SYSCTL
+
+/* Human readable action names for friendly sysctl interaction */
+#define SECCOMP_RET_KILL_NAME		"kill"
+#define SECCOMP_RET_TRAP_NAME		"trap"
+#define SECCOMP_RET_ERRNO_NAME		"errno"
+#define SECCOMP_RET_TRACE_NAME		"trace"
+#define SECCOMP_RET_ALLOW_NAME		"allow"
+
+static const char seccomp_actions_avail[] = SECCOMP_RET_KILL_NAME	" "
+					    SECCOMP_RET_TRAP_NAME	" "
+					    SECCOMP_RET_ERRNO_NAME	" "
+					    SECCOMP_RET_TRACE_NAME	" "
+					    SECCOMP_RET_ALLOW_NAME;
+
+static struct ctl_path seccomp_sysctl_path[] = {
+	{ .procname = "kernel", },
+	{ .procname = "seccomp", },
+	{ }
+};
+
+static struct ctl_table seccomp_sysctl_table[] = {
+	{
+		.procname	= "actions_avail",
+		.data		= (void *) &seccomp_actions_avail,
+		.maxlen		= sizeof(seccomp_actions_avail),
+		.mode		= 0444,
+		.proc_handler	= proc_dostring,
+	},
+	{ }
+};
+
+static int __init seccomp_sysctl_init(void)
+{
+	struct ctl_table_header *hdr;
+
+	hdr = register_sysctl_paths(seccomp_sysctl_path, seccomp_sysctl_table);
+	if (!hdr)
+		pr_warn("seccomp: sysctl registration failed\n");
+	else
+		kmemleak_not_leak(hdr);
+
+	return 0;
+}
+
+device_initcall(seccomp_sysctl_init)
+
+#endif /* CONFIG_SYSCTL */

From d612b1fd8010d0d67b5287fe146b8b55bcbb8655 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Fri, 11 Aug 2017 04:33:53 +0000
Subject: [PATCH 009/391] seccomp: Operation for checking if an action is
 available

Userspace code that needs to check if the kernel supports a given action
may not be able to use the /proc/sys/kernel/seccomp/actions_avail
sysctl. The process may be running in a sandbox and, therefore,
sufficient filesystem access may not be available. This patch adds an
operation to the seccomp(2) syscall that allows userspace code to ask
the kernel if a given action is available.

If the action is supported by the kernel, 0 is returned. If the action
is not supported by the kernel, -1 is returned with errno set to
-EOPNOTSUPP. If this check is attempted on a kernel that doesn't support
this new operation, -1 is returned with errno set to -EINVAL meaning
that userspace code will have the ability to differentiate between the
two error cases.

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Suggested-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/uapi/linux/seccomp.h                  |  5 +--
 kernel/seccomp.c                              | 26 ++++++++++++++
 tools/testing/selftests/seccomp/seccomp_bpf.c | 36 +++++++++++++++++++
 3 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 0f238a43ff1e..aaad61cc46bc 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -11,8 +11,9 @@
 #define SECCOMP_MODE_FILTER	2 /* uses user-supplied filter. */
 
 /* Valid operations for seccomp syscall. */
-#define SECCOMP_SET_MODE_STRICT	0
-#define SECCOMP_SET_MODE_FILTER	1
+#define SECCOMP_SET_MODE_STRICT		0
+#define SECCOMP_SET_MODE_FILTER		1
+#define SECCOMP_GET_ACTION_AVAIL	2
 
 /* Valid flags for SECCOMP_SET_MODE_FILTER */
 #define SECCOMP_FILTER_FLAG_TSYNC	1
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 5f19f41e4e50..7a6089f66fed 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -808,6 +808,27 @@ static inline long seccomp_set_mode_filter(unsigned int flags,
 }
 #endif
 
+static long seccomp_get_action_avail(const char __user *uaction)
+{
+	u32 action;
+
+	if (copy_from_user(&action, uaction, sizeof(action)))
+		return -EFAULT;
+
+	switch (action) {
+	case SECCOMP_RET_KILL:
+	case SECCOMP_RET_TRAP:
+	case SECCOMP_RET_ERRNO:
+	case SECCOMP_RET_TRACE:
+	case SECCOMP_RET_ALLOW:
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
 /* Common entry point for both prctl and syscall. */
 static long do_seccomp(unsigned int op, unsigned int flags,
 		       const char __user *uargs)
@@ -819,6 +840,11 @@ static long do_seccomp(unsigned int op, unsigned int flags,
 		return seccomp_set_mode_strict();
 	case SECCOMP_SET_MODE_FILTER:
 		return seccomp_set_mode_filter(flags, uargs);
+	case SECCOMP_GET_ACTION_AVAIL:
+		if (flags != 0)
+			return -EINVAL;
+
+		return seccomp_get_action_avail(uargs);
 	default:
 		return -EINVAL;
 	}
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 2fb49d99588d..1f2888f6678b 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1731,6 +1731,10 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
 #define SECCOMP_SET_MODE_FILTER 1
 #endif
 
+#ifndef SECCOMP_GET_ACTION_AVAIL
+#define SECCOMP_GET_ACTION_AVAIL 2
+#endif
+
 #ifndef SECCOMP_FILTER_FLAG_TSYNC
 #define SECCOMP_FILTER_FLAG_TSYNC 1
 #endif
@@ -2469,6 +2473,38 @@ TEST(syscall_restart)
 		_metadata->passed = 0;
 }
 
+TEST(get_action_avail)
+{
+	__u32 actions[] = { SECCOMP_RET_KILL,  SECCOMP_RET_TRAP,
+			    SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
+			    SECCOMP_RET_ALLOW };
+	__u32 unknown_action = 0x10000000U;
+	int i;
+	long ret;
+
+	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
+	ASSERT_NE(ENOSYS, errno) {
+		TH_LOG("Kernel does not support seccomp syscall!");
+	}
+	ASSERT_NE(EINVAL, errno) {
+		TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
+	}
+	EXPECT_EQ(ret, 0);
+
+	for (i = 0; i < ARRAY_SIZE(actions); i++) {
+		ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
+		EXPECT_EQ(ret, 0) {
+			TH_LOG("Expected action (0x%X) not available!",
+			       actions[i]);
+		}
+	}
+
+	/* Check that an unknown action is handled properly (EOPNOTSUPP) */
+	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
+	EXPECT_EQ(ret, -1);
+	EXPECT_EQ(errno, EOPNOTSUPP);
+}
+
 /*
  * TODO:
  * - add microbenchmarks

From 0ddec0fc8900201c0897b87b762b7c420436662f Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Fri, 11 Aug 2017 04:33:54 +0000
Subject: [PATCH 010/391] seccomp: Sysctl to configure actions that are allowed
 to be logged

Adminstrators can write to this sysctl to set the seccomp actions that
are allowed to be logged. Any actions not found in this sysctl will not
be logged.

For example, all SECCOMP_RET_KILL, SECCOMP_RET_TRAP, and
SECCOMP_RET_ERRNO actions would be loggable if "kill trap errno" were
written to the sysctl. SECCOMP_RET_TRACE actions would not be logged
since its string representation ("trace") wasn't present in the sysctl
value.

The path to the sysctl is:

 /proc/sys/kernel/seccomp/actions_logged

The actions_avail sysctl can be read to discover the valid action names
that can be written to the actions_logged sysctl with the exception of
"allow". SECCOMP_RET_ALLOW actions cannot be configured for logging.

The default setting for the sysctl is to allow all actions to be logged
except SECCOMP_RET_ALLOW. While only SECCOMP_RET_KILL actions are
currently logged, an upcoming patch will allow applications to request
additional actions to be logged.

There's one important exception to this sysctl. If a task is
specifically being audited, meaning that an audit context has been
allocated for the task, seccomp will log all actions other than
SECCOMP_RET_ALLOW despite the value of actions_logged. This exception
preserves the existing auditing behavior of tasks with an allocated
audit context.

With this patch, the logic for deciding if an action will be logged is:

if action == RET_ALLOW:
  do not log
else if action == RET_KILL && RET_KILL in actions_logged:
  log
else if audit_enabled && task-is-being-audited:
  log
else:
  do not log

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 .../userspace-api/seccomp_filter.rst          |  18 ++
 include/linux/audit.h                         |   6 +-
 kernel/seccomp.c                              | 171 +++++++++++++++++-
 3 files changed, 187 insertions(+), 8 deletions(-)

diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst
index 35fc7cbf1d95..2d1d8ab04ac5 100644
--- a/Documentation/userspace-api/seccomp_filter.rst
+++ b/Documentation/userspace-api/seccomp_filter.rst
@@ -187,6 +187,24 @@ directory. Here's a description of each file in that directory:
 	program was built, differs from the set of actions actually
 	supported in the current running kernel.
 
+``actions_logged``:
+	A read-write ordered list of seccomp return values (refer to the
+	``SECCOMP_RET_*`` macros above) that are allowed to be logged. Writes
+	to the file do not need to be in ordered form but reads from the file
+	will be ordered in the same way as the actions_avail sysctl.
+
+	It is important to note that the value of ``actions_logged`` does not
+	prevent certain actions from being logged when the audit subsystem is
+	configured to audit a task. If the action is not found in
+	``actions_logged`` list, the final decision on whether to audit the
+	action for that task is ultimately left up to the audit subsystem to
+	decide for all seccomp return values other than ``SECCOMP_RET_ALLOW``.
+
+	The ``allow`` string is not accepted in the ``actions_logged`` sysctl
+	as it is not possible to log ``SECCOMP_RET_ALLOW`` actions. Attempting
+	to write ``allow`` to the sysctl will result in an EINVAL being
+	returned.
+
 Adding architecture support
 ===========================
 
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 2150bdccfbab..8c30f06d639d 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -314,11 +314,7 @@ void audit_core_dumps(long signr);
 
 static inline void audit_seccomp(unsigned long syscall, long signr, int code)
 {
-	if (!audit_enabled)
-		return;
-
-	/* Force a record to be reported if a signal was delivered. */
-	if (signr || unlikely(!audit_dummy_context()))
+	if (audit_enabled && unlikely(!audit_dummy_context()))
 		__audit_seccomp(syscall, signr, code);
 }
 
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 7a6089f66fed..54357e361aea 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -522,6 +522,45 @@ static void seccomp_send_sigsys(int syscall, int reason)
 }
 #endif	/* CONFIG_SECCOMP_FILTER */
 
+/* For use with seccomp_actions_logged */
+#define SECCOMP_LOG_KILL		(1 << 0)
+#define SECCOMP_LOG_TRAP		(1 << 2)
+#define SECCOMP_LOG_ERRNO		(1 << 3)
+#define SECCOMP_LOG_TRACE		(1 << 4)
+#define SECCOMP_LOG_ALLOW		(1 << 5)
+
+static u32 seccomp_actions_logged = SECCOMP_LOG_KILL  | SECCOMP_LOG_TRAP  |
+				    SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE;
+
+static inline void seccomp_log(unsigned long syscall, long signr, u32 action)
+{
+	bool log = false;
+
+	switch (action) {
+	case SECCOMP_RET_ALLOW:
+	case SECCOMP_RET_TRAP:
+	case SECCOMP_RET_ERRNO:
+	case SECCOMP_RET_TRACE:
+		break;
+	case SECCOMP_RET_KILL:
+	default:
+		log = seccomp_actions_logged & SECCOMP_LOG_KILL;
+	}
+
+	/*
+	 * Force an audit message to be emitted when the action is RET_KILL and
+	 * the action is allowed to be logged by the admin.
+	 */
+	if (log)
+		return __audit_seccomp(syscall, signr, action);
+
+	/*
+	 * Let the audit subsystem decide if the action should be audited based
+	 * on whether the current task itself is being audited.
+	 */
+	return audit_seccomp(syscall, signr, action);
+}
+
 /*
  * Secure computing mode 1 allows only read/write/exit/sigreturn.
  * To be fully secure this must be combined with rlimit
@@ -547,7 +586,7 @@ static void __secure_computing_strict(int this_syscall)
 #ifdef SECCOMP_DEBUG
 	dump_stack();
 #endif
-	audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL);
+	seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL);
 	do_exit(SIGKILL);
 }
 
@@ -656,7 +695,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 
 	case SECCOMP_RET_KILL:
 	default:
-		audit_seccomp(this_syscall, SIGSYS, action);
+		seccomp_log(this_syscall, SIGSYS, action);
 		/* Dump core only if this is the last remaining thread. */
 		if (get_nr_threads(current) == 1) {
 			siginfo_t info;
@@ -673,7 +712,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 	unreachable();
 
 skip:
-	audit_seccomp(this_syscall, 0, action);
+	seccomp_log(this_syscall, 0, action);
 	return -1;
 }
 #else
@@ -978,6 +1017,127 @@ static const char seccomp_actions_avail[] = SECCOMP_RET_KILL_NAME	" "
 					    SECCOMP_RET_TRACE_NAME	" "
 					    SECCOMP_RET_ALLOW_NAME;
 
+struct seccomp_log_name {
+	u32		log;
+	const char	*name;
+};
+
+static const struct seccomp_log_name seccomp_log_names[] = {
+	{ SECCOMP_LOG_KILL, SECCOMP_RET_KILL_NAME },
+	{ SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME },
+	{ SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME },
+	{ SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME },
+	{ SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME },
+	{ }
+};
+
+static bool seccomp_names_from_actions_logged(char *names, size_t size,
+					      u32 actions_logged)
+{
+	const struct seccomp_log_name *cur;
+	bool append_space = false;
+
+	for (cur = seccomp_log_names; cur->name && size; cur++) {
+		ssize_t ret;
+
+		if (!(actions_logged & cur->log))
+			continue;
+
+		if (append_space) {
+			ret = strscpy(names, " ", size);
+			if (ret < 0)
+				return false;
+
+			names += ret;
+			size -= ret;
+		} else
+			append_space = true;
+
+		ret = strscpy(names, cur->name, size);
+		if (ret < 0)
+			return false;
+
+		names += ret;
+		size -= ret;
+	}
+
+	return true;
+}
+
+static bool seccomp_action_logged_from_name(u32 *action_logged,
+					    const char *name)
+{
+	const struct seccomp_log_name *cur;
+
+	for (cur = seccomp_log_names; cur->name; cur++) {
+		if (!strcmp(cur->name, name)) {
+			*action_logged = cur->log;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static bool seccomp_actions_logged_from_names(u32 *actions_logged, char *names)
+{
+	char *name;
+
+	*actions_logged = 0;
+	while ((name = strsep(&names, " ")) && *name) {
+		u32 action_logged = 0;
+
+		if (!seccomp_action_logged_from_name(&action_logged, name))
+			return false;
+
+		*actions_logged |= action_logged;
+	}
+
+	return true;
+}
+
+static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write,
+					  void __user *buffer, size_t *lenp,
+					  loff_t *ppos)
+{
+	char names[sizeof(seccomp_actions_avail)];
+	struct ctl_table table;
+	int ret;
+
+	if (write && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	memset(names, 0, sizeof(names));
+
+	if (!write) {
+		if (!seccomp_names_from_actions_logged(names, sizeof(names),
+						       seccomp_actions_logged))
+			return -EINVAL;
+	}
+
+	table = *ro_table;
+	table.data = names;
+	table.maxlen = sizeof(names);
+	ret = proc_dostring(&table, write, buffer, lenp, ppos);
+	if (ret)
+		return ret;
+
+	if (write) {
+		u32 actions_logged;
+
+		if (!seccomp_actions_logged_from_names(&actions_logged,
+						       table.data))
+			return -EINVAL;
+
+		if (actions_logged & SECCOMP_LOG_ALLOW)
+			return -EINVAL;
+
+		seccomp_actions_logged = actions_logged;
+	}
+
+	return 0;
+}
+
 static struct ctl_path seccomp_sysctl_path[] = {
 	{ .procname = "kernel", },
 	{ .procname = "seccomp", },
@@ -992,6 +1152,11 @@ static struct ctl_table seccomp_sysctl_table[] = {
 		.mode		= 0444,
 		.proc_handler	= proc_dostring,
 	},
+	{
+		.procname	= "actions_logged",
+		.mode		= 0644,
+		.proc_handler	= seccomp_actions_logged_handler,
+	},
 	{ }
 };
 

From 2b7ea5b5b5799f2878ed454bb48032bed6d101d3 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Fri, 11 Aug 2017 04:33:55 +0000
Subject: [PATCH 011/391] seccomp: Selftest for detection of filter flag
 support

Userspace needs to be able to reliably detect the support of a filter
flag. A good way of doing that is by attempting to enter filter mode,
with the flag bit(s) in question set, and a NULL pointer for the args
parameter of seccomp(2). EFAULT indicates that the flag is valid and
EINVAL indicates that the flag is invalid.

This patch adds a selftest that can be used to test this method of
detection in userspace.

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 60 +++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 1f2888f6678b..abf708e09892 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1835,6 +1835,66 @@ TEST(seccomp_syscall_mode_lock)
 	}
 }
 
+/*
+ * Test detection of known and unknown filter flags. Userspace needs to be able
+ * to check if a filter flag is supported by the current kernel and a good way
+ * of doing that is by attempting to enter filter mode, with the flag bit in
+ * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
+ * that the flag is valid and EINVAL indicates that the flag is invalid.
+ */
+TEST(detect_seccomp_filter_flags)
+{
+	unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC };
+	unsigned int flag, all_flags;
+	int i;
+	long ret;
+
+	/* Test detection of known-good filter flags */
+	for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
+		flag = flags[i];
+		ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+		ASSERT_NE(ENOSYS, errno) {
+			TH_LOG("Kernel does not support seccomp syscall!");
+		}
+		EXPECT_EQ(-1, ret);
+		EXPECT_EQ(EFAULT, errno) {
+			TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
+			       flag);
+		}
+
+		all_flags |= flag;
+	}
+
+	/* Test detection of all known-good filter flags */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EFAULT, errno) {
+		TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
+		       all_flags);
+	}
+
+	/* Test detection of an unknown filter flag */
+	flag = -1;
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
+		       flag);
+	}
+
+	/*
+	 * Test detection of an unknown filter flag that may simply need to be
+	 * added to this test
+	 */
+	flag = flags[ARRAY_SIZE(flags) - 1] << 1;
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
+		       flag);
+	}
+}
+
 TEST(TSYNC_first)
 {
 	struct sock_filter filter[] = {

From e66a39977985b1e69e17c4042cb290768eca9b02 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Fri, 11 Aug 2017 04:33:56 +0000
Subject: [PATCH 012/391] seccomp: Filter flag to log all actions except
 SECCOMP_RET_ALLOW

Add a new filter flag, SECCOMP_FILTER_FLAG_LOG, that enables logging for
all actions except for SECCOMP_RET_ALLOW for the given filter.

SECCOMP_RET_KILL actions are always logged, when "kill" is in the
actions_logged sysctl, and SECCOMP_RET_ALLOW actions are never logged,
regardless of this flag.

This flag can be used to create noisy filters that result in all
non-allowed actions to be logged. A process may have one noisy filter,
which is loaded with this flag, as well as a quiet filter that's not
loaded with this flag. This allows for the actions in a set of filters
to be selectively conveyed to the admin.

Since a system could have a large number of allocated seccomp_filter
structs, struct packing was taken in consideration. On 64 bit x86, the
new log member takes up one byte of an existing four byte hole in the
struct. On 32 bit x86, the new log member creates a new four byte hole
(unavoidable) and consumes one of those bytes.

Unfortunately, the tests added for SECCOMP_FILTER_FLAG_LOG are not
capable of inspecting the audit log to verify that the actions taken in
the filter were logged.

With this patch, the logic for deciding if an action will be logged is:

if action == RET_ALLOW:
  do not log
else if action == RET_KILL && RET_KILL in actions_logged:
  log
else if filter-requests-logging && action in actions_logged:
  log
else if audit_enabled && process-is-being-audited:
  log
else:
  do not log

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/seccomp.h                       |  3 +-
 include/uapi/linux/seccomp.h                  |  1 +
 kernel/seccomp.c                              | 26 +++++--
 tools/testing/selftests/seccomp/seccomp_bpf.c | 69 ++++++++++++++++++-
 4 files changed, 91 insertions(+), 8 deletions(-)

diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index ecc296c137cd..c8bef436b61d 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -3,7 +3,8 @@
 
 #include <uapi/linux/seccomp.h>
 
-#define SECCOMP_FILTER_FLAG_MASK	(SECCOMP_FILTER_FLAG_TSYNC)
+#define SECCOMP_FILTER_FLAG_MASK	(SECCOMP_FILTER_FLAG_TSYNC | \
+					 SECCOMP_FILTER_FLAG_LOG)
 
 #ifdef CONFIG_SECCOMP
 
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index aaad61cc46bc..19a611d0712e 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -17,6 +17,7 @@
 
 /* Valid flags for SECCOMP_SET_MODE_FILTER */
 #define SECCOMP_FILTER_FLAG_TSYNC	1
+#define SECCOMP_FILTER_FLAG_LOG		2
 
 /*
  * All BPF programs must return a 32-bit value.
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 54357e361aea..ed9fde418fc4 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -44,6 +44,7 @@
  *         get/put helpers should be used when accessing an instance
  *         outside of a lifetime-guarded section.  In general, this
  *         is only needed for handling filters shared across tasks.
+ * @log: true if all actions except for SECCOMP_RET_ALLOW should be logged
  * @prev: points to a previously installed, or inherited, filter
  * @prog: the BPF program to evaluate
  *
@@ -59,6 +60,7 @@
  */
 struct seccomp_filter {
 	refcount_t usage;
+	bool log;
 	struct seccomp_filter *prev;
 	struct bpf_prog *prog;
 };
@@ -452,6 +454,10 @@ static long seccomp_attach_filter(unsigned int flags,
 			return ret;
 	}
 
+	/* Set log flag, if present. */
+	if (flags & SECCOMP_FILTER_FLAG_LOG)
+		filter->log = true;
+
 	/*
 	 * If there is an existing filter, make it the prev and don't drop its
 	 * task reference.
@@ -532,15 +538,22 @@ static void seccomp_send_sigsys(int syscall, int reason)
 static u32 seccomp_actions_logged = SECCOMP_LOG_KILL  | SECCOMP_LOG_TRAP  |
 				    SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE;
 
-static inline void seccomp_log(unsigned long syscall, long signr, u32 action)
+static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
+			       bool requested)
 {
 	bool log = false;
 
 	switch (action) {
 	case SECCOMP_RET_ALLOW:
+		break;
 	case SECCOMP_RET_TRAP:
+		log = requested && seccomp_actions_logged & SECCOMP_LOG_TRAP;
+		break;
 	case SECCOMP_RET_ERRNO:
+		log = requested && seccomp_actions_logged & SECCOMP_LOG_ERRNO;
+		break;
 	case SECCOMP_RET_TRACE:
+		log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE;
 		break;
 	case SECCOMP_RET_KILL:
 	default:
@@ -548,8 +561,9 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action)
 	}
 
 	/*
-	 * Force an audit message to be emitted when the action is RET_KILL and
-	 * the action is allowed to be logged by the admin.
+	 * Force an audit message to be emitted when the action is RET_KILL or
+	 * the FILTER_FLAG_LOG bit was set and the action is allowed to be
+	 * logged by the admin.
 	 */
 	if (log)
 		return __audit_seccomp(syscall, signr, action);
@@ -586,7 +600,7 @@ static void __secure_computing_strict(int this_syscall)
 #ifdef SECCOMP_DEBUG
 	dump_stack();
 #endif
-	seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL);
+	seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL, true);
 	do_exit(SIGKILL);
 }
 
@@ -695,7 +709,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 
 	case SECCOMP_RET_KILL:
 	default:
-		seccomp_log(this_syscall, SIGSYS, action);
+		seccomp_log(this_syscall, SIGSYS, action, true);
 		/* Dump core only if this is the last remaining thread. */
 		if (get_nr_threads(current) == 1) {
 			siginfo_t info;
@@ -712,7 +726,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 	unreachable();
 
 skip:
-	seccomp_log(this_syscall, 0, action);
+	seccomp_log(this_syscall, 0, action, match ? match->log : false);
 	return -1;
 }
 #else
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index abf708e09892..1c8c22ce7740 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1739,6 +1739,10 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
 #define SECCOMP_FILTER_FLAG_TSYNC 1
 #endif
 
+#ifndef SECCOMP_FILTER_FLAG_LOG
+#define SECCOMP_FILTER_FLAG_LOG 2
+#endif
+
 #ifndef seccomp
 int seccomp(unsigned int op, unsigned int flags, void *args)
 {
@@ -1844,7 +1848,8 @@ TEST(seccomp_syscall_mode_lock)
  */
 TEST(detect_seccomp_filter_flags)
 {
-	unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC };
+	unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
+				 SECCOMP_FILTER_FLAG_LOG };
 	unsigned int flag, all_flags;
 	int i;
 	long ret;
@@ -2533,6 +2538,67 @@ TEST(syscall_restart)
 		_metadata->passed = 0;
 }
 
+TEST_SIGNAL(filter_flag_log, SIGSYS)
+{
+	struct sock_filter allow_filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_filter kill_filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog allow_prog = {
+		.len = (unsigned short)ARRAY_SIZE(allow_filter),
+		.filter = allow_filter,
+	};
+	struct sock_fprog kill_prog = {
+		.len = (unsigned short)ARRAY_SIZE(kill_filter),
+		.filter = kill_filter,
+	};
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	/* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
+	ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
+		      &allow_prog);
+	ASSERT_NE(ENOSYS, errno) {
+		TH_LOG("Kernel does not support seccomp syscall!");
+	}
+	EXPECT_NE(0, ret) {
+		TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
+	}
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
+	}
+
+	/* Verify that a simple, permissive filter can be added with no flags */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
+	EXPECT_EQ(0, ret);
+
+	/* See if the same filter can be added with the FILTER_FLAG_LOG flag */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
+		      &allow_prog);
+	ASSERT_NE(EINVAL, errno) {
+		TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
+	}
+	EXPECT_EQ(0, ret);
+
+	/* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
+		      &kill_prog);
+	EXPECT_EQ(0, ret);
+
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* getpid() should never return. */
+	EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
 TEST(get_action_avail)
 {
 	__u32 actions[] = { SECCOMP_RET_KILL,  SECCOMP_RET_TRAP,
@@ -2573,6 +2639,7 @@ TEST(get_action_avail)
  * - endianness checking when appropriate
  * - 64-bit arg prodding
  * - arch value testing (x86 modes especially)
+ * - verify that FILTER_FLAG_LOG filters generate log messages
  * - ...
  */
 

From 59f5cf44a38284eb9e76270c786fb6cc62ef8ac4 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Fri, 11 Aug 2017 04:33:57 +0000
Subject: [PATCH 013/391] seccomp: Action to log before allowing

Add a new action, SECCOMP_RET_LOG, that logs a syscall before allowing
the syscall. At the implementation level, this action is identical to
the existing SECCOMP_RET_ALLOW action. However, it can be very useful when
initially developing a seccomp filter for an application. The developer
can set the default action to be SECCOMP_RET_LOG, maybe mark any
obviously needed syscalls with SECCOMP_RET_ALLOW, and then put the
application through its paces. A list of syscalls that triggered the
default action (SECCOMP_RET_LOG) can be easily gleaned from the logs and
that list can be used to build the syscall whitelist. Finally, the
developer can change the default action to the desired value.

This provides a more friendly experience than seeing the application get
killed, then updating the filter and rebuilding the app, seeing the
application get killed due to a different syscall, then updating the
filter and rebuilding the app, etc.

The functionality is similar to what's supported by the various LSMs.
SELinux has permissive mode, AppArmor has complain mode, SMACK has
bring-up mode, etc.

SECCOMP_RET_LOG is given a lower value than SECCOMP_RET_ALLOW as allow
while logging is slightly more restrictive than quietly allowing.

Unfortunately, the tests added for SECCOMP_RET_LOG are not capable of
inspecting the audit log to verify that the syscall was logged.

With this patch, the logic for deciding if an action will be logged is:

if action == RET_ALLOW:
  do not log
else if action == RET_KILL && RET_KILL in actions_logged:
  log
else if action == RET_LOG && RET_LOG in actions_logged:
  log
else if filter-requests-logging && action in actions_logged:
  log
else if audit_enabled && process-is-being-audited:
  log
else:
  do not log

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 .../userspace-api/seccomp_filter.rst          |  9 ++
 include/uapi/linux/seccomp.h                  |  1 +
 kernel/seccomp.c                              | 23 ++++-
 tools/testing/selftests/seccomp/seccomp_bpf.c | 98 ++++++++++++++++++-
 4 files changed, 125 insertions(+), 6 deletions(-)

diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst
index 2d1d8ab04ac5..f4977357daf2 100644
--- a/Documentation/userspace-api/seccomp_filter.rst
+++ b/Documentation/userspace-api/seccomp_filter.rst
@@ -141,6 +141,15 @@ In precedence order, they are:
 	allow use of ptrace, even of other sandboxed processes, without
 	extreme care; ptracers can use this mechanism to escape.)
 
+``SECCOMP_RET_LOG``:
+	Results in the system call being executed after it is logged. This
+	should be used by application developers to learn which syscalls their
+	application needs without having to iterate through multiple test and
+	development cycles to build the list.
+
+	This action will only be logged if "log" is present in the
+	actions_logged sysctl string.
+
 ``SECCOMP_RET_ALLOW``:
 	Results in the system call being executed.
 
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 19a611d0712e..f94433263e4b 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -31,6 +31,7 @@
 #define SECCOMP_RET_TRAP	0x00030000U /* disallow and force a SIGSYS */
 #define SECCOMP_RET_ERRNO	0x00050000U /* returns an errno */
 #define SECCOMP_RET_TRACE	0x7ff00000U /* pass to a tracer or disallow */
+#define SECCOMP_RET_LOG		0x7ffc0000U /* allow after logging */
 #define SECCOMP_RET_ALLOW	0x7fff0000U /* allow */
 
 /* Masks for the return value sections. */
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index ed9fde418fc4..59cde2ed3b92 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -533,10 +533,12 @@ static void seccomp_send_sigsys(int syscall, int reason)
 #define SECCOMP_LOG_TRAP		(1 << 2)
 #define SECCOMP_LOG_ERRNO		(1 << 3)
 #define SECCOMP_LOG_TRACE		(1 << 4)
-#define SECCOMP_LOG_ALLOW		(1 << 5)
+#define SECCOMP_LOG_LOG			(1 << 5)
+#define SECCOMP_LOG_ALLOW		(1 << 6)
 
 static u32 seccomp_actions_logged = SECCOMP_LOG_KILL  | SECCOMP_LOG_TRAP  |
-				    SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE;
+				    SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE |
+				    SECCOMP_LOG_LOG;
 
 static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
 			       bool requested)
@@ -555,15 +557,18 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
 	case SECCOMP_RET_TRACE:
 		log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE;
 		break;
+	case SECCOMP_RET_LOG:
+		log = seccomp_actions_logged & SECCOMP_LOG_LOG;
+		break;
 	case SECCOMP_RET_KILL:
 	default:
 		log = seccomp_actions_logged & SECCOMP_LOG_KILL;
 	}
 
 	/*
-	 * Force an audit message to be emitted when the action is RET_KILL or
-	 * the FILTER_FLAG_LOG bit was set and the action is allowed to be
-	 * logged by the admin.
+	 * Force an audit message to be emitted when the action is RET_KILL,
+	 * RET_LOG, or the FILTER_FLAG_LOG bit was set and the action is
+	 * allowed to be logged by the admin.
 	 */
 	if (log)
 		return __audit_seccomp(syscall, signr, action);
@@ -699,6 +704,10 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 
 		return 0;
 
+	case SECCOMP_RET_LOG:
+		seccomp_log(this_syscall, 0, action, true);
+		return 0;
+
 	case SECCOMP_RET_ALLOW:
 		/*
 		 * Note that the "match" filter will always be NULL for
@@ -873,6 +882,7 @@ static long seccomp_get_action_avail(const char __user *uaction)
 	case SECCOMP_RET_TRAP:
 	case SECCOMP_RET_ERRNO:
 	case SECCOMP_RET_TRACE:
+	case SECCOMP_RET_LOG:
 	case SECCOMP_RET_ALLOW:
 		break;
 	default:
@@ -1023,12 +1033,14 @@ long seccomp_get_filter(struct task_struct *task, unsigned long filter_off,
 #define SECCOMP_RET_TRAP_NAME		"trap"
 #define SECCOMP_RET_ERRNO_NAME		"errno"
 #define SECCOMP_RET_TRACE_NAME		"trace"
+#define SECCOMP_RET_LOG_NAME		"log"
 #define SECCOMP_RET_ALLOW_NAME		"allow"
 
 static const char seccomp_actions_avail[] = SECCOMP_RET_KILL_NAME	" "
 					    SECCOMP_RET_TRAP_NAME	" "
 					    SECCOMP_RET_ERRNO_NAME	" "
 					    SECCOMP_RET_TRACE_NAME	" "
+					    SECCOMP_RET_LOG_NAME	" "
 					    SECCOMP_RET_ALLOW_NAME;
 
 struct seccomp_log_name {
@@ -1041,6 +1053,7 @@ static const struct seccomp_log_name seccomp_log_names[] = {
 	{ SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME },
 	{ SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME },
 	{ SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME },
+	{ SECCOMP_LOG_LOG, SECCOMP_RET_LOG_NAME },
 	{ SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME },
 	{ }
 };
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 1c8c22ce7740..7372958eccb5 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -74,7 +74,12 @@
 #define SECCOMP_RET_ERRNO       0x00050000U /* returns an errno */
 #define SECCOMP_RET_TRACE       0x7ff00000U /* pass to a tracer or disallow */
 #define SECCOMP_RET_ALLOW       0x7fff0000U /* allow */
+#endif
+#ifndef SECCOMP_RET_LOG
+#define SECCOMP_RET_LOG       0x7ffc0000U /* allow after logging */
+#endif
 
+#ifndef SECCOMP_RET_ACTION
 /* Masks for the return value sections. */
 #define SECCOMP_RET_ACTION      0x7fff0000U
 #define SECCOMP_RET_DATA        0x0000ffffU
@@ -342,6 +347,28 @@ TEST(empty_prog)
 	EXPECT_EQ(EINVAL, errno);
 }
 
+TEST(log_all)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ASSERT_EQ(0, ret);
+
+	/* getppid() should succeed and be logged (no check for logging) */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+}
+
 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
 {
 	struct sock_filter filter[] = {
@@ -756,6 +783,7 @@ TEST_F(TRAP, handler)
 
 FIXTURE_DATA(precedence) {
 	struct sock_fprog allow;
+	struct sock_fprog log;
 	struct sock_fprog trace;
 	struct sock_fprog error;
 	struct sock_fprog trap;
@@ -767,6 +795,13 @@ FIXTURE_SETUP(precedence)
 	struct sock_filter allow_insns[] = {
 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 	};
+	struct sock_filter log_insns[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
+	};
 	struct sock_filter trace_insns[] = {
 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 			offsetof(struct seccomp_data, nr)),
@@ -803,6 +838,7 @@ FIXTURE_SETUP(precedence)
 	memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
 	self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
 	FILTER_ALLOC(allow);
+	FILTER_ALLOC(log);
 	FILTER_ALLOC(trace);
 	FILTER_ALLOC(error);
 	FILTER_ALLOC(trap);
@@ -813,6 +849,7 @@ FIXTURE_TEARDOWN(precedence)
 {
 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
 	FILTER_FREE(allow);
+	FILTER_FREE(log);
 	FILTER_FREE(trace);
 	FILTER_FREE(error);
 	FILTER_FREE(trap);
@@ -830,6 +867,8 @@ TEST_F(precedence, allow_ok)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -854,6 +893,8 @@ TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -885,6 +926,8 @@ TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
@@ -906,6 +949,8 @@ TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -931,6 +976,8 @@ TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -952,6 +999,8 @@ TEST_F(precedence, errno_is_third)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -970,6 +1019,8 @@ TEST_F(precedence, errno_is_third_in_any_order)
 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 	ASSERT_EQ(0, ret);
 
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
@@ -992,6 +1043,8 @@ TEST_F(precedence, trace_is_fourth)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	/* Should work just fine. */
@@ -1013,12 +1066,54 @@ TEST_F(precedence, trace_is_fourth_in_any_order)
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	/* Should work just fine. */
 	EXPECT_EQ(parent, syscall(__NR_getppid));
 	/* No ptracer */
 	EXPECT_EQ(-1, syscall(__NR_getpid));
 }
 
+TEST_F(precedence, log_is_fifth)
+{
+	pid_t mypid, parent;
+	long ret;
+
+	mypid = getpid();
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* Should also work just fine */
+	EXPECT_EQ(mypid, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, log_is_fifth_in_any_order)
+{
+	pid_t mypid, parent;
+	long ret;
+
+	mypid = getpid();
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* Should also work just fine */
+	EXPECT_EQ(mypid, syscall(__NR_getpid));
+}
+
 #ifndef PTRACE_O_TRACESECCOMP
 #define PTRACE_O_TRACESECCOMP	0x00000080
 #endif
@@ -2603,7 +2698,7 @@ TEST(get_action_avail)
 {
 	__u32 actions[] = { SECCOMP_RET_KILL,  SECCOMP_RET_TRAP,
 			    SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
-			    SECCOMP_RET_ALLOW };
+			    SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
 	__u32 unknown_action = 0x10000000U;
 	int i;
 	long ret;
@@ -2640,6 +2735,7 @@ TEST(get_action_avail)
  * - 64-bit arg prodding
  * - arch value testing (x86 modes especially)
  * - verify that FILTER_FLAG_LOG filters generate log messages
+ * - verify that RET_LOG generates log messages
  * - ...
  */
 

From fd76875ca289a3d4722f266fd2d5532a27083903 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 11 Aug 2017 12:53:18 -0700
Subject: [PATCH 014/391] seccomp: Rename SECCOMP_RET_KILL to
 SECCOMP_RET_KILL_THREAD

In preparation for adding SECCOMP_RET_KILL_PROCESS, rename SECCOMP_RET_KILL
to the more accurate SECCOMP_RET_KILL_THREAD.

The existing selftest values are intentionally left as SECCOMP_RET_KILL
just to be sure we're exercising the alias.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/networking/filter.txt           |  2 +-
 .../userspace-api/seccomp_filter.rst          |  4 +-
 include/uapi/linux/seccomp.h                  |  3 +-
 kernel/seccomp.c                              | 39 ++++++++++---------
 samples/seccomp/bpf-direct.c                  |  4 +-
 samples/seccomp/bpf-helper.h                  |  2 +-
 tools/testing/selftests/seccomp/seccomp_bpf.c | 17 ++++----
 7 files changed, 39 insertions(+), 32 deletions(-)

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index b69b205501de..73aa0f12156d 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -337,7 +337,7 @@ Examples for low-level BPF:
   jeq #14, good           /* __NR_rt_sigprocmask */
   jeq #13, good           /* __NR_rt_sigaction */
   jeq #35, good           /* __NR_nanosleep */
-  bad: ret #0             /* SECCOMP_RET_KILL */
+  bad: ret #0             /* SECCOMP_RET_KILL_THREAD */
   good: ret #0x7fff0000   /* SECCOMP_RET_ALLOW */
 
 The above example code can be placed into a file (here called "foo"), and
diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst
index f4977357daf2..d76396f2d8ed 100644
--- a/Documentation/userspace-api/seccomp_filter.rst
+++ b/Documentation/userspace-api/seccomp_filter.rst
@@ -87,11 +87,11 @@ Return values
 A seccomp filter may return any of the following values. If multiple
 filters exist, the return value for the evaluation of a given system
 call will always use the highest precedent value. (For example,
-``SECCOMP_RET_KILL`` will always take precedence.)
+``SECCOMP_RET_KILL_THREAD`` will always take precedence.)
 
 In precedence order, they are:
 
-``SECCOMP_RET_KILL``:
+``SECCOMP_RET_KILL_THREAD``:
 	Results in the task exiting immediately without executing the
 	system call.  The exit status of the task (``status & 0x7f``) will
 	be ``SIGSYS``, not ``SIGKILL``.
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index f94433263e4b..5a03f699eb17 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -27,7 +27,8 @@
  * The ordering ensures that a min_t() over composed return values always
  * selects the least permissive choice.
  */
-#define SECCOMP_RET_KILL	0x00000000U /* kill the task immediately */
+#define SECCOMP_RET_KILL_THREAD	0x00000000U /* kill the thread */
+#define SECCOMP_RET_KILL	SECCOMP_RET_KILL_THREAD
 #define SECCOMP_RET_TRAP	0x00030000U /* disallow and force a SIGSYS */
 #define SECCOMP_RET_ERRNO	0x00050000U /* returns an errno */
 #define SECCOMP_RET_TRACE	0x7ff00000U /* pass to a tracer or disallow */
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 59cde2ed3b92..95ac54cff00f 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -192,7 +192,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd,
 
 	/* Ensure unexpected behavior doesn't result in failing open. */
 	if (unlikely(WARN_ON(f == NULL)))
-		return SECCOMP_RET_KILL;
+		return SECCOMP_RET_KILL_THREAD;
 
 	if (!sd) {
 		populate_seccomp_data(&sd_local);
@@ -529,15 +529,17 @@ static void seccomp_send_sigsys(int syscall, int reason)
 #endif	/* CONFIG_SECCOMP_FILTER */
 
 /* For use with seccomp_actions_logged */
-#define SECCOMP_LOG_KILL		(1 << 0)
+#define SECCOMP_LOG_KILL_THREAD		(1 << 0)
 #define SECCOMP_LOG_TRAP		(1 << 2)
 #define SECCOMP_LOG_ERRNO		(1 << 3)
 #define SECCOMP_LOG_TRACE		(1 << 4)
 #define SECCOMP_LOG_LOG			(1 << 5)
 #define SECCOMP_LOG_ALLOW		(1 << 6)
 
-static u32 seccomp_actions_logged = SECCOMP_LOG_KILL  | SECCOMP_LOG_TRAP  |
-				    SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE |
+static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_THREAD |
+				    SECCOMP_LOG_TRAP  |
+				    SECCOMP_LOG_ERRNO |
+				    SECCOMP_LOG_TRACE |
 				    SECCOMP_LOG_LOG;
 
 static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
@@ -560,13 +562,13 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
 	case SECCOMP_RET_LOG:
 		log = seccomp_actions_logged & SECCOMP_LOG_LOG;
 		break;
-	case SECCOMP_RET_KILL:
+	case SECCOMP_RET_KILL_THREAD:
 	default:
-		log = seccomp_actions_logged & SECCOMP_LOG_KILL;
+		log = seccomp_actions_logged & SECCOMP_LOG_KILL_THREAD;
 	}
 
 	/*
-	 * Force an audit message to be emitted when the action is RET_KILL,
+	 * Force an audit message to be emitted when the action is RET_KILL_*,
 	 * RET_LOG, or the FILTER_FLAG_LOG bit was set and the action is
 	 * allowed to be logged by the admin.
 	 */
@@ -605,7 +607,7 @@ static void __secure_computing_strict(int this_syscall)
 #ifdef SECCOMP_DEBUG
 	dump_stack();
 #endif
-	seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL, true);
+	seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true);
 	do_exit(SIGKILL);
 }
 
@@ -716,7 +718,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 		 */
 		return 0;
 
-	case SECCOMP_RET_KILL:
+	case SECCOMP_RET_KILL_THREAD:
 	default:
 		seccomp_log(this_syscall, SIGSYS, action, true);
 		/* Dump core only if this is the last remaining thread. */
@@ -878,7 +880,7 @@ static long seccomp_get_action_avail(const char __user *uaction)
 		return -EFAULT;
 
 	switch (action) {
-	case SECCOMP_RET_KILL:
+	case SECCOMP_RET_KILL_THREAD:
 	case SECCOMP_RET_TRAP:
 	case SECCOMP_RET_ERRNO:
 	case SECCOMP_RET_TRACE:
@@ -1029,19 +1031,20 @@ long seccomp_get_filter(struct task_struct *task, unsigned long filter_off,
 #ifdef CONFIG_SYSCTL
 
 /* Human readable action names for friendly sysctl interaction */
-#define SECCOMP_RET_KILL_NAME		"kill"
+#define SECCOMP_RET_KILL_THREAD_NAME	"kill_thread"
 #define SECCOMP_RET_TRAP_NAME		"trap"
 #define SECCOMP_RET_ERRNO_NAME		"errno"
 #define SECCOMP_RET_TRACE_NAME		"trace"
 #define SECCOMP_RET_LOG_NAME		"log"
 #define SECCOMP_RET_ALLOW_NAME		"allow"
 
-static const char seccomp_actions_avail[] = SECCOMP_RET_KILL_NAME	" "
-					    SECCOMP_RET_TRAP_NAME	" "
-					    SECCOMP_RET_ERRNO_NAME	" "
-					    SECCOMP_RET_TRACE_NAME	" "
-					    SECCOMP_RET_LOG_NAME	" "
-					    SECCOMP_RET_ALLOW_NAME;
+static const char seccomp_actions_avail[] =
+				SECCOMP_RET_KILL_THREAD_NAME	" "
+				SECCOMP_RET_TRAP_NAME		" "
+				SECCOMP_RET_ERRNO_NAME		" "
+				SECCOMP_RET_TRACE_NAME		" "
+				SECCOMP_RET_LOG_NAME		" "
+				SECCOMP_RET_ALLOW_NAME;
 
 struct seccomp_log_name {
 	u32		log;
@@ -1049,7 +1052,7 @@ struct seccomp_log_name {
 };
 
 static const struct seccomp_log_name seccomp_log_names[] = {
-	{ SECCOMP_LOG_KILL, SECCOMP_RET_KILL_NAME },
+	{ SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME },
 	{ SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME },
 	{ SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME },
 	{ SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME },
diff --git a/samples/seccomp/bpf-direct.c b/samples/seccomp/bpf-direct.c
index 151ec3f52189..235ce3c49ee9 100644
--- a/samples/seccomp/bpf-direct.c
+++ b/samples/seccomp/bpf-direct.c
@@ -129,7 +129,7 @@ static int install_filter(void)
 		/* Check that read is only using stdin. */
 		BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_arg(0)),
 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, STDIN_FILENO, 4, 0),
-		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_THREAD),
 
 		/* Check that write is only using stdout */
 		BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_arg(0)),
@@ -139,7 +139,7 @@ static int install_filter(void)
 
 		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
 		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_TRAP),
-		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_THREAD),
 	};
 	struct sock_fprog prog = {
 		.len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
diff --git a/samples/seccomp/bpf-helper.h b/samples/seccomp/bpf-helper.h
index 1d8de9edd858..83dbe79cbe2c 100644
--- a/samples/seccomp/bpf-helper.h
+++ b/samples/seccomp/bpf-helper.h
@@ -44,7 +44,7 @@ void seccomp_bpf_print(struct sock_filter *filter, size_t count);
 #define ALLOW \
 	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
 #define DENY \
-	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)
+	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_THREAD)
 #define JUMP(labels, label) \
 	BPF_JUMP(BPF_JMP+BPF_JA, FIND_LABEL((labels), (label)), \
 		 JUMP_JT, JUMP_JF)
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 7372958eccb5..a3ba39a32449 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -68,15 +68,18 @@
 #define SECCOMP_MODE_FILTER 2
 #endif
 
+#ifndef SECCOMP_RET_KILL_THREAD
+#define SECCOMP_RET_KILL_THREAD	 0x00000000U /* kill the thread */
+#endif
 #ifndef SECCOMP_RET_KILL
-#define SECCOMP_RET_KILL        0x00000000U /* kill the task immediately */
-#define SECCOMP_RET_TRAP        0x00030000U /* disallow and force a SIGSYS */
-#define SECCOMP_RET_ERRNO       0x00050000U /* returns an errno */
-#define SECCOMP_RET_TRACE       0x7ff00000U /* pass to a tracer or disallow */
-#define SECCOMP_RET_ALLOW       0x7fff0000U /* allow */
+#define SECCOMP_RET_KILL	 SECCOMP_RET_KILL_THREAD
+#define SECCOMP_RET_TRAP	 0x00030000U /* disallow and force a SIGSYS */
+#define SECCOMP_RET_ERRNO	 0x00050000U /* returns an errno */
+#define SECCOMP_RET_TRACE	 0x7ff00000U /* pass to a tracer or disallow */
+#define SECCOMP_RET_ALLOW	 0x7fff0000U /* allow */
 #endif
 #ifndef SECCOMP_RET_LOG
-#define SECCOMP_RET_LOG       0x7ffc0000U /* allow after logging */
+#define SECCOMP_RET_LOG		 0x7ffc0000U /* allow after logging */
 #endif
 
 #ifndef SECCOMP_RET_ACTION
@@ -2696,7 +2699,7 @@ TEST_SIGNAL(filter_flag_log, SIGSYS)
 
 TEST(get_action_avail)
 {
-	__u32 actions[] = { SECCOMP_RET_KILL,  SECCOMP_RET_TRAP,
+	__u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
 			    SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
 			    SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
 	__u32 unknown_action = 0x10000000U;

From 4d3b0b05aae9ee9ce0970dc4cc0fb3fad5e85945 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 11 Aug 2017 13:01:39 -0700
Subject: [PATCH 015/391] seccomp: Introduce SECCOMP_RET_KILL_PROCESS

This introduces the BPF return value for SECCOMP_RET_KILL_PROCESS to kill
an entire process. This cannot yet be reached by seccomp, but it changes
the default-kill behavior (for unknown return values) from kill-thread to
kill-process.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/uapi/linux/seccomp.h | 18 ++++++++++--------
 kernel/seccomp.c             | 22 ++++++++++++++++------
 2 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 5a03f699eb17..7e77c92df78a 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -22,18 +22,20 @@
 /*
  * All BPF programs must return a 32-bit value.
  * The bottom 16-bits are for optional return data.
- * The upper 16-bits are ordered from least permissive values to most.
+ * The upper 16-bits are ordered from least permissive values to most,
+ * as a signed value (so 0x8000000 is negative).
  *
  * The ordering ensures that a min_t() over composed return values always
  * selects the least permissive choice.
  */
-#define SECCOMP_RET_KILL_THREAD	0x00000000U /* kill the thread */
-#define SECCOMP_RET_KILL	SECCOMP_RET_KILL_THREAD
-#define SECCOMP_RET_TRAP	0x00030000U /* disallow and force a SIGSYS */
-#define SECCOMP_RET_ERRNO	0x00050000U /* returns an errno */
-#define SECCOMP_RET_TRACE	0x7ff00000U /* pass to a tracer or disallow */
-#define SECCOMP_RET_LOG		0x7ffc0000U /* allow after logging */
-#define SECCOMP_RET_ALLOW	0x7fff0000U /* allow */
+#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
+#define SECCOMP_RET_KILL_THREAD	 0x00000000U /* kill the thread */
+#define SECCOMP_RET_KILL	 SECCOMP_RET_KILL_THREAD
+#define SECCOMP_RET_TRAP	 0x00030000U /* disallow and force a SIGSYS */
+#define SECCOMP_RET_ERRNO	 0x00050000U /* returns an errno */
+#define SECCOMP_RET_TRACE	 0x7ff00000U /* pass to a tracer or disallow */
+#define SECCOMP_RET_LOG		 0x7ffc0000U /* allow after logging */
+#define SECCOMP_RET_ALLOW	 0x7fff0000U /* allow */
 
 /* Masks for the return value sections. */
 #define SECCOMP_RET_ACTION	0x7fff0000U
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 95ac54cff00f..5c7299b9d953 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -192,7 +192,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd,
 
 	/* Ensure unexpected behavior doesn't result in failing open. */
 	if (unlikely(WARN_ON(f == NULL)))
-		return SECCOMP_RET_KILL_THREAD;
+		return SECCOMP_RET_KILL_PROCESS;
 
 	if (!sd) {
 		populate_seccomp_data(&sd_local);
@@ -529,14 +529,16 @@ static void seccomp_send_sigsys(int syscall, int reason)
 #endif	/* CONFIG_SECCOMP_FILTER */
 
 /* For use with seccomp_actions_logged */
-#define SECCOMP_LOG_KILL_THREAD		(1 << 0)
+#define SECCOMP_LOG_KILL_PROCESS	(1 << 0)
+#define SECCOMP_LOG_KILL_THREAD		(1 << 1)
 #define SECCOMP_LOG_TRAP		(1 << 2)
 #define SECCOMP_LOG_ERRNO		(1 << 3)
 #define SECCOMP_LOG_TRACE		(1 << 4)
 #define SECCOMP_LOG_LOG			(1 << 5)
 #define SECCOMP_LOG_ALLOW		(1 << 6)
 
-static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_THREAD |
+static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_PROCESS |
+				    SECCOMP_LOG_KILL_THREAD  |
 				    SECCOMP_LOG_TRAP  |
 				    SECCOMP_LOG_ERRNO |
 				    SECCOMP_LOG_TRACE |
@@ -563,8 +565,11 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
 		log = seccomp_actions_logged & SECCOMP_LOG_LOG;
 		break;
 	case SECCOMP_RET_KILL_THREAD:
-	default:
 		log = seccomp_actions_logged & SECCOMP_LOG_KILL_THREAD;
+		break;
+	case SECCOMP_RET_KILL_PROCESS:
+	default:
+		log = seccomp_actions_logged & SECCOMP_LOG_KILL_PROCESS;
 	}
 
 	/*
@@ -719,10 +724,12 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 		return 0;
 
 	case SECCOMP_RET_KILL_THREAD:
+	case SECCOMP_RET_KILL_PROCESS:
 	default:
 		seccomp_log(this_syscall, SIGSYS, action, true);
 		/* Dump core only if this is the last remaining thread. */
-		if (get_nr_threads(current) == 1) {
+		if (action == SECCOMP_RET_KILL_PROCESS ||
+		    get_nr_threads(current) == 1) {
 			siginfo_t info;
 
 			/* Show the original registers in the dump. */
@@ -731,7 +738,10 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 			seccomp_init_siginfo(&info, this_syscall, data);
 			do_coredump(&info);
 		}
-		do_exit(SIGSYS);
+		if (action == SECCOMP_RET_KILL_PROCESS)
+			do_group_exit(SIGSYS);
+		else
+			do_exit(SIGSYS);
 	}
 
 	unreachable();

From 0466bdb99e8744bc9befa8d62a317f0fd7fd7421 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 11 Aug 2017 13:12:11 -0700
Subject: [PATCH 016/391] seccomp: Implement SECCOMP_RET_KILL_PROCESS action
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Right now, SECCOMP_RET_KILL_THREAD (neé SECCOMP_RET_KILL) kills the
current thread. There have been a few requests for this to kill the entire
process (the thread group). This cannot be just changed (discovered when
adding coredump support since coredumping kills the entire process)
because there are userspace programs depending on the thread-kill
behavior.

Instead, implement SECCOMP_RET_KILL_PROCESS, which is 0x80000000, and can
be processed as "-1" by the kernel, below the existing RET_KILL that is
ABI-set to "0". For userspace, SECCOMP_RET_ACTION_FULL is added to expand
the mask to the signed bit. Old userspace using the SECCOMP_RET_ACTION
mask will see SECCOMP_RET_KILL_PROCESS as 0 still, but this would only
be visible when examining the siginfo in a core dump from a RET_KILL_*,
where it will think it was thread-killed instead of process-killed.

Attempts to introduce this behavior via other ways (filter flags,
seccomp struct flags, masked RET_DATA bits) all come with weird
side-effects and baggage. This change preserves the central behavioral
expectations of the seccomp filter engine without putting too great
a burden on changes needed in userspace to use the new action.

The new action is discoverable by userspace through either the new
actions_avail sysctl or through the SECCOMP_GET_ACTION_AVAIL seccomp
operation. If used without checking for availability, old kernels
will treat RET_KILL_PROCESS as RET_KILL_THREAD (since the old mask
will produce RET_KILL_THREAD).

Cc: Paul Moore <paul@paul-moore.com>
Cc: Fabricio Voznika <fvoznika@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/userspace-api/seccomp_filter.rst | 7 ++++++-
 include/uapi/linux/seccomp.h                   | 1 +
 kernel/seccomp.c                               | 9 +++++++--
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst
index d76396f2d8ed..099c412951d6 100644
--- a/Documentation/userspace-api/seccomp_filter.rst
+++ b/Documentation/userspace-api/seccomp_filter.rst
@@ -87,10 +87,15 @@ Return values
 A seccomp filter may return any of the following values. If multiple
 filters exist, the return value for the evaluation of a given system
 call will always use the highest precedent value. (For example,
-``SECCOMP_RET_KILL_THREAD`` will always take precedence.)
+``SECCOMP_RET_KILL_PROCESS`` will always take precedence.)
 
 In precedence order, they are:
 
+``SECCOMP_RET_KILL_PROCESS``:
+	Results in the entire process exiting immediately without executing
+	the system call.  The exit status of the task (``status & 0x7f``)
+	will be ``SIGSYS``, not ``SIGKILL``.
+
 ``SECCOMP_RET_KILL_THREAD``:
 	Results in the task exiting immediately without executing the
 	system call.  The exit status of the task (``status & 0x7f``) will
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 7e77c92df78a..f6bc1dea3247 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -38,6 +38,7 @@
 #define SECCOMP_RET_ALLOW	 0x7fff0000U /* allow */
 
 /* Masks for the return value sections. */
+#define SECCOMP_RET_ACTION_FULL	0xffff0000U
 #define SECCOMP_RET_ACTION	0x7fff0000U
 #define SECCOMP_RET_DATA	0x0000ffffU
 
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 5c7299b9d953..c24579dfa7a1 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -181,6 +181,7 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
  *
  * Returns valid seccomp BPF response codes.
  */
+#define ACTION_ONLY(ret) ((s32)((ret) & (SECCOMP_RET_ACTION_FULL)))
 static u32 seccomp_run_filters(const struct seccomp_data *sd,
 			       struct seccomp_filter **match)
 {
@@ -206,7 +207,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd,
 	for (; f; f = f->prev) {
 		u32 cur_ret = BPF_PROG_RUN(f->prog, sd);
 
-		if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) {
+		if (ACTION_ONLY(cur_ret) < ACTION_ONLY(ret)) {
 			ret = cur_ret;
 			*match = f;
 		}
@@ -650,7 +651,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 
 	filter_ret = seccomp_run_filters(sd, &match);
 	data = filter_ret & SECCOMP_RET_DATA;
-	action = filter_ret & SECCOMP_RET_ACTION;
+	action = filter_ret & SECCOMP_RET_ACTION_FULL;
 
 	switch (action) {
 	case SECCOMP_RET_ERRNO:
@@ -890,6 +891,7 @@ static long seccomp_get_action_avail(const char __user *uaction)
 		return -EFAULT;
 
 	switch (action) {
+	case SECCOMP_RET_KILL_PROCESS:
 	case SECCOMP_RET_KILL_THREAD:
 	case SECCOMP_RET_TRAP:
 	case SECCOMP_RET_ERRNO:
@@ -1041,6 +1043,7 @@ long seccomp_get_filter(struct task_struct *task, unsigned long filter_off,
 #ifdef CONFIG_SYSCTL
 
 /* Human readable action names for friendly sysctl interaction */
+#define SECCOMP_RET_KILL_PROCESS_NAME	"kill_process"
 #define SECCOMP_RET_KILL_THREAD_NAME	"kill_thread"
 #define SECCOMP_RET_TRAP_NAME		"trap"
 #define SECCOMP_RET_ERRNO_NAME		"errno"
@@ -1049,6 +1052,7 @@ long seccomp_get_filter(struct task_struct *task, unsigned long filter_off,
 #define SECCOMP_RET_ALLOW_NAME		"allow"
 
 static const char seccomp_actions_avail[] =
+				SECCOMP_RET_KILL_PROCESS_NAME	" "
 				SECCOMP_RET_KILL_THREAD_NAME	" "
 				SECCOMP_RET_TRAP_NAME		" "
 				SECCOMP_RET_ERRNO_NAME		" "
@@ -1062,6 +1066,7 @@ struct seccomp_log_name {
 };
 
 static const struct seccomp_log_name seccomp_log_names[] = {
+	{ SECCOMP_LOG_KILL_PROCESS, SECCOMP_RET_KILL_PROCESS_NAME },
 	{ SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME },
 	{ SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME },
 	{ SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME },

From f3e1821d9e1cc3fb434d7763001791dcd6720c90 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 11 Aug 2017 13:20:33 -0700
Subject: [PATCH 017/391] selftests/seccomp: Test thread vs process killing

This verifies that SECCOMP_RET_KILL_PROCESS is higher priority than
SECCOMP_RET_KILL_THREAD. (This also moves a bunch of defines up earlier
in the file to use them earlier.)

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Tyler Hicks <tyhicks@canonical.com>
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 228 +++++++++++++-----
 1 file changed, 168 insertions(+), 60 deletions(-)

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index a3ba39a32449..0683cd543cd5 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -68,7 +68,17 @@
 #define SECCOMP_MODE_FILTER 2
 #endif
 
-#ifndef SECCOMP_RET_KILL_THREAD
+#ifndef SECCOMP_RET_ALLOW
+struct seccomp_data {
+	int nr;
+	__u32 arch;
+	__u64 instruction_pointer;
+	__u64 args[6];
+};
+#endif
+
+#ifndef SECCOMP_RET_KILL_PROCESS
+#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
 #define SECCOMP_RET_KILL_THREAD	 0x00000000U /* kill the thread */
 #endif
 #ifndef SECCOMP_RET_KILL
@@ -82,17 +92,53 @@
 #define SECCOMP_RET_LOG		 0x7ffc0000U /* allow after logging */
 #endif
 
-#ifndef SECCOMP_RET_ACTION
-/* Masks for the return value sections. */
-#define SECCOMP_RET_ACTION      0x7fff0000U
-#define SECCOMP_RET_DATA        0x0000ffffU
+#ifndef __NR_seccomp
+# if defined(__i386__)
+#  define __NR_seccomp 354
+# elif defined(__x86_64__)
+#  define __NR_seccomp 317
+# elif defined(__arm__)
+#  define __NR_seccomp 383
+# elif defined(__aarch64__)
+#  define __NR_seccomp 277
+# elif defined(__hppa__)
+#  define __NR_seccomp 338
+# elif defined(__powerpc__)
+#  define __NR_seccomp 358
+# elif defined(__s390__)
+#  define __NR_seccomp 348
+# else
+#  warning "seccomp syscall number unknown for this architecture"
+#  define __NR_seccomp 0xffff
+# endif
+#endif
 
-struct seccomp_data {
-	int nr;
-	__u32 arch;
-	__u64 instruction_pointer;
-	__u64 args[6];
-};
+#ifndef SECCOMP_SET_MODE_STRICT
+#define SECCOMP_SET_MODE_STRICT 0
+#endif
+
+#ifndef SECCOMP_SET_MODE_FILTER
+#define SECCOMP_SET_MODE_FILTER 1
+#endif
+
+#ifndef SECCOMP_GET_ACTION_AVAIL
+#define SECCOMP_GET_ACTION_AVAIL 2
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_TSYNC
+#define SECCOMP_FILTER_FLAG_TSYNC 1
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_LOG
+#define SECCOMP_FILTER_FLAG_LOG 2
+#endif
+
+#ifndef seccomp
+int seccomp(unsigned int op, unsigned int flags, void *args)
+{
+	errno = 0;
+	return syscall(__NR_seccomp, op, flags, args);
+}
 #endif
 
 #if __BYTE_ORDER == __LITTLE_ENDIAN
@@ -550,6 +596,117 @@ TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
 	close(fd);
 }
 
+/* This is a thread task to die via seccomp filter violation. */
+void *kill_thread(void *data)
+{
+	bool die = (bool)data;
+
+	if (die) {
+		prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+		return (void *)SIBLING_EXIT_FAILURE;
+	}
+
+	return (void *)SIBLING_EXIT_UNKILLED;
+}
+
+/* Prepare a thread that will kill itself or both of us. */
+void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
+{
+	pthread_t thread;
+	void *status;
+	/* Kill only when calling __NR_prctl. */
+	struct sock_filter filter_thread[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog_thread = {
+		.len = (unsigned short)ARRAY_SIZE(filter_thread),
+		.filter = filter_thread,
+	};
+	struct sock_filter filter_process[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog_process = {
+		.len = (unsigned short)ARRAY_SIZE(filter_process),
+		.filter = filter_process,
+	};
+
+	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
+			     kill_process ? &prog_process : &prog_thread));
+
+	/*
+	 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
+	 * flag cannot be downgraded by a new filter.
+	 */
+	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
+
+	/* Start a thread that will exit immediately. */
+	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
+	ASSERT_EQ(0, pthread_join(thread, &status));
+	ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
+
+	/* Start a thread that will die immediately. */
+	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
+	ASSERT_EQ(0, pthread_join(thread, &status));
+	ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
+
+	/*
+	 * If we get here, only the spawned thread died. Let the parent know
+	 * the whole process didn't die (i.e. this thread, the spawner,
+	 * stayed running).
+	 */
+	exit(42);
+}
+
+TEST(KILL_thread)
+{
+	int status;
+	pid_t child_pid;
+
+	child_pid = fork();
+	ASSERT_LE(0, child_pid);
+	if (child_pid == 0) {
+		kill_thread_or_group(_metadata, false);
+		_exit(38);
+	}
+
+	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+
+	/* If only the thread was killed, we'll see exit 42. */
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(42, WEXITSTATUS(status));
+}
+
+TEST(KILL_process)
+{
+	int status;
+	pid_t child_pid;
+
+	child_pid = fork();
+	ASSERT_LE(0, child_pid);
+	if (child_pid == 0) {
+		kill_thread_or_group(_metadata, true);
+		_exit(38);
+	}
+
+	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+
+	/* If the entire process was killed, we'll see SIGSYS. */
+	ASSERT_TRUE(WIFSIGNALED(status));
+	ASSERT_EQ(SIGSYS, WTERMSIG(status));
+}
+
 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
 TEST(arg_out_of_range)
 {
@@ -1800,55 +1957,6 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
 }
 
-#ifndef __NR_seccomp
-# if defined(__i386__)
-#  define __NR_seccomp 354
-# elif defined(__x86_64__)
-#  define __NR_seccomp 317
-# elif defined(__arm__)
-#  define __NR_seccomp 383
-# elif defined(__aarch64__)
-#  define __NR_seccomp 277
-# elif defined(__hppa__)
-#  define __NR_seccomp 338
-# elif defined(__powerpc__)
-#  define __NR_seccomp 358
-# elif defined(__s390__)
-#  define __NR_seccomp 348
-# else
-#  warning "seccomp syscall number unknown for this architecture"
-#  define __NR_seccomp 0xffff
-# endif
-#endif
-
-#ifndef SECCOMP_SET_MODE_STRICT
-#define SECCOMP_SET_MODE_STRICT 0
-#endif
-
-#ifndef SECCOMP_SET_MODE_FILTER
-#define SECCOMP_SET_MODE_FILTER 1
-#endif
-
-#ifndef SECCOMP_GET_ACTION_AVAIL
-#define SECCOMP_GET_ACTION_AVAIL 2
-#endif
-
-#ifndef SECCOMP_FILTER_FLAG_TSYNC
-#define SECCOMP_FILTER_FLAG_TSYNC 1
-#endif
-
-#ifndef SECCOMP_FILTER_FLAG_LOG
-#define SECCOMP_FILTER_FLAG_LOG 2
-#endif
-
-#ifndef seccomp
-int seccomp(unsigned int op, unsigned int flags, void *args)
-{
-	errno = 0;
-	return syscall(__NR_seccomp, op, flags, args);
-}
-#endif
-
 TEST(seccomp_syscall)
 {
 	struct sock_filter filter[] = {

From 6849243bf4c6155151b294e9f0e0dc9540d6f083 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 16 Aug 2017 20:26:57 -0700
Subject: [PATCH 018/391] samples: Unrename SECCOMP_RET_KILL

Since samples can still be built before header installs, avoid the
cosmetic renaming of SECCOMP_RET_KILL to avoid build failures in -next.

Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 samples/seccomp/bpf-direct.c | 4 ++--
 samples/seccomp/bpf-helper.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/samples/seccomp/bpf-direct.c b/samples/seccomp/bpf-direct.c
index 235ce3c49ee9..151ec3f52189 100644
--- a/samples/seccomp/bpf-direct.c
+++ b/samples/seccomp/bpf-direct.c
@@ -129,7 +129,7 @@ static int install_filter(void)
 		/* Check that read is only using stdin. */
 		BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_arg(0)),
 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, STDIN_FILENO, 4, 0),
-		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_THREAD),
+		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL),
 
 		/* Check that write is only using stdout */
 		BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_arg(0)),
@@ -139,7 +139,7 @@ static int install_filter(void)
 
 		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
 		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_TRAP),
-		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_THREAD),
+		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL),
 	};
 	struct sock_fprog prog = {
 		.len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
diff --git a/samples/seccomp/bpf-helper.h b/samples/seccomp/bpf-helper.h
index 83dbe79cbe2c..1d8de9edd858 100644
--- a/samples/seccomp/bpf-helper.h
+++ b/samples/seccomp/bpf-helper.h
@@ -44,7 +44,7 @@ void seccomp_bpf_print(struct sock_filter *filter, size_t count);
 #define ALLOW \
 	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
 #define DENY \
-	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_THREAD)
+	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)
 #define JUMP(labels, label) \
 	BPF_JUMP(BPF_JMP+BPF_JA, FIND_LABEL((labels), (label)), \
 		 JUMP_JT, JUMP_JF)

From a4e89ffb59235fd11d27107dea3efa4562ac0a12 Mon Sep 17 00:00:00 2001
From: Matt Weber <matthew.weber@rockwellcollins.com>
Date: Wed, 28 Jun 2017 11:14:29 -0500
Subject: [PATCH 019/391] powerpc/e6500: Update machine check for L1D cache err

This patch updates the machine check handler of Linux kernel to
handle the e6500 architecture case. In e6500 core, L1 Data Cache Write
Shadow Mode (DCWS) register is not implemented but L1 data cache always
runs in write shadow mode. So, on L1 data cache parity errors, hardware
will automatically invalidate the data cache but will still log a
machine check interrupt.

Signed-off-by: Ronak Desai <ronak.desai@rockwellcollins.com>
Signed-off-by: Matthew Weber <matthew.weber@rockwellcollins.com>
Signed-off-by: Scott Wood <oss@buserror.net>
---
 arch/powerpc/kernel/traps.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 675d5d2bfcde..410352acfa38 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -393,6 +393,7 @@ static inline int check_io_access(struct pt_regs *regs)
 int machine_check_e500mc(struct pt_regs *regs)
 {
 	unsigned long mcsr = mfspr(SPRN_MCSR);
+	unsigned long pvr = mfspr(SPRN_PVR);
 	unsigned long reason = mcsr;
 	int recoverable = 1;
 
@@ -434,8 +435,15 @@ int machine_check_e500mc(struct pt_regs *regs)
 		 * may still get logged and cause a machine check.  We should
 		 * only treat the non-write shadow case as non-recoverable.
 		 */
-		if (!(mfspr(SPRN_L1CSR2) & L1CSR2_DCWS))
-			recoverable = 0;
+		/* On e6500 core, L1 DCWS (Data cache write shadow mode) bit
+		 * is not implemented but L1 data cache always runs in write
+		 * shadow mode. Hence on data cache parity errors HW will
+		 * automatically invalidate the L1 Data Cache.
+		 */
+		if (PVR_VER(pvr) != PVR_VER_E6500) {
+			if (!(mfspr(SPRN_L1CSR2) & L1CSR2_DCWS))
+				recoverable = 0;
+		}
 	}
 
 	if (reason & MCSR_L2MMU_MHIT) {

From 50dad5fb59d12dc9a5a0e96073ee1e5857ac45a2 Mon Sep 17 00:00:00 2001
From: Himanshu Jha <himanshujha199640@gmail.com>
Date: Wed, 30 Aug 2017 00:33:35 +0530
Subject: [PATCH 020/391] drm/amdkfd: remove memset before memcpy

calling memcpy immediately after memset with the same region of memory
makes memset redundant.

Signed-off-by: Himanshu Jha <himanshujha199640@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 1cae95e2b13a..03bec765b03d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -143,7 +143,6 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 	int num_queues = 0;
 	struct queue *cur;
 
-	memset(&q_properties, 0, sizeof(struct queue_properties));
 	memcpy(&q_properties, properties, sizeof(struct queue_properties));
 	q = NULL;
 	kq = NULL;

From 1fabbf781167052f4480bdcc627378a27e78a559 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Sat, 2 Sep 2017 15:00:25 +0300
Subject: [PATCH 021/391] drm/amdkfd: pass queue's mqd when destroying mqd

In VI, the destroy mqd function needs to inquire fields present in the mqd
structure. That's why we need to pass it to that function instead of NULL.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 681b639f5133..0649dd43e780 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -183,7 +183,7 @@ static void uninitialize(struct kernel_queue *kq)
 {
 	if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ)
 		kq->mqd->destroy_mqd(kq->mqd,
-					NULL,
+					kq->queue->mqd,
 					false,
 					QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
 					kq->queue->pipe,

From fc3100d64f0ae383ae8d845989103da06d62763b Mon Sep 17 00:00:00 2001
From: Pu Hou <bjhoupu@linux.vnet.ibm.com>
Date: Tue, 5 Sep 2017 05:17:24 +0200
Subject: [PATCH 022/391] s390/perf: fix bug when creating per-thread event

A per-thread event could not be created correctly like below:

    perf record --per-thread -e rB0000 -- sleep 1
    Error:
    The sys_perf_event_open() syscall returned with 19 (No such device) for event (rB0000).
    /bin/dmesg may provide additional information.
    No CONFIG_PERF_EVENTS=y kernel support configured?

This bug was introduced by:

    commit c311c797998c1e70eade463dd60b843da4f1a203
    Author: Alexey Dobriyan <adobriyan@gmail.com>
    Date:   Mon May 8 15:56:15 2017 -0700

    cpumask: make "nr_cpumask_bits" unsigned

If a per-thread event is not attached to any CPU, the cpu field
in struct perf_event is -1. The above commit converts the CPU number
to unsigned int, which result in an illegal CPU number.

Fixes: c311c797998c ("cpumask: make "nr_cpumask_bits" unsigned")
Cc: <stable@vger.kernel.org> # v4.12+
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Pu Hou <bjhoupu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/perf_cpum_sf.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index c1bf75ffb875..7e1e40323b78 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -823,9 +823,12 @@ static int cpumsf_pmu_event_init(struct perf_event *event)
 	}
 
 	/* Check online status of the CPU to which the event is pinned */
-	if ((unsigned int)event->cpu >= nr_cpumask_bits ||
-	    (event->cpu >= 0 && !cpu_online(event->cpu)))
-		return -ENODEV;
+	if (event->cpu >= 0) {
+		if ((unsigned int)event->cpu >= nr_cpumask_bits)
+			return -ENODEV;
+		if (!cpu_online(event->cpu))
+			return -ENODEV;
+	}
 
 	/* Force reset of idle/hv excludes regardless of what the
 	 * user requested.

From 673514aff5d797b0cdb3f09097ae0e253b5667c3 Mon Sep 17 00:00:00 2001
From: Stefan Haberland <sth@linux.vnet.ibm.com>
Date: Tue, 12 Sep 2017 17:22:42 +0200
Subject: [PATCH 023/391] s390/dasd: fix race during dasd initialization

Fix a panic in blk_mq_hctx_has_pending() that is caused by a racy call to
blk_mq_run_hw_queues in a dasd function that might get called with the
request queue not yet initialized during initialization.

Signed-off-by: Stefan Haberland <sth@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/dasd.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index ea19b4ff87a2..29f35e29d480 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -1644,7 +1644,9 @@ void dasd_generic_handle_state_change(struct dasd_device *device)
 	dasd_schedule_device_bh(device);
 	if (device->block) {
 		dasd_schedule_block_bh(device->block);
-		blk_mq_run_hw_queues(device->block->request_queue, true);
+		if (device->block->request_queue)
+			blk_mq_run_hw_queues(device->block->request_queue,
+					     true);
 	}
 }
 EXPORT_SYMBOL_GPL(dasd_generic_handle_state_change);
@@ -3759,7 +3761,9 @@ int dasd_generic_path_operational(struct dasd_device *device)
 	dasd_schedule_device_bh(device);
 	if (device->block) {
 		dasd_schedule_block_bh(device->block);
-		blk_mq_run_hw_queues(device->block->request_queue, true);
+		if (device->block->request_queue)
+			blk_mq_run_hw_queues(device->block->request_queue,
+					     true);
 		}
 
 	if (!device->stopped)
@@ -4025,7 +4029,9 @@ int dasd_generic_restore_device(struct ccw_device *cdev)
 
 	if (device->block) {
 		dasd_schedule_block_bh(device->block);
-		blk_mq_run_hw_queues(device->block->request_queue, true);
+		if (device->block->request_queue)
+			blk_mq_run_hw_queues(device->block->request_queue,
+					     true);
 	}
 
 	clear_bit(DASD_FLAG_SUSPENDED, &device->flags);

From e785fa0a164aa11001cba931367c7f94ffaff888 Mon Sep 17 00:00:00 2001
From: Vladis Dronov <vdronov@redhat.com>
Date: Wed, 13 Sep 2017 00:21:21 +0200
Subject: [PATCH 024/391] nl80211: check for the required netlink attributes
 presence

nl80211_set_rekey_data() does not check if the required attributes
NL80211_REKEY_DATA_{REPLAY_CTR,KEK,KCK} are present when processing
NL80211_CMD_SET_REKEY_OFFLOAD request. This request can be issued by
users with CAP_NET_ADMIN privilege and may result in NULL dereference
and a system crash. Add a check for the required attributes presence.
This patch is based on the patch by bo Zhang.

This fixes CVE-2017-12153.

References: https://bugzilla.redhat.com/show_bug.cgi?id=1491046
Fixes: e5497d766ad ("cfg80211/nl80211: support GTK rekey offload")
Cc: <stable@vger.kernel.org> # v3.1-rc1
Reported-by: bo Zhang <zhangbo5891001@gmail.com>
Signed-off-by: Vladis Dronov <vdronov@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0df8023f480b..fbd5593e88cb 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -10903,6 +10903,9 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
 	if (err)
 		return err;
 
+	if (!tb[NL80211_REKEY_DATA_REPLAY_CTR] || !tb[NL80211_REKEY_DATA_KEK] ||
+	    !tb[NL80211_REKEY_DATA_KCK])
+		return -EINVAL;
 	if (nla_len(tb[NL80211_REKEY_DATA_REPLAY_CTR]) != NL80211_REPLAY_CTR_LEN)
 		return -ERANGE;
 	if (nla_len(tb[NL80211_REKEY_DATA_KEK]) != NL80211_KEK_LEN)

From 51ae253870f55e14ba2854ce9577ac2920efef0c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 15 Sep 2017 21:29:13 +0200
Subject: [PATCH 025/391] xen: x86: mark xen_find_pt_base as __init

gcc-4.6 causes a harmless link-time warning:

WARNING: vmlinux.o(.text.unlikely+0x48e): Section mismatch in reference from the function xen_find_pt_base() to the function .init.text:m2p()
The function xen_find_pt_base() references
the function __init m2p().
This is often because xen_find_pt_base lacks a __init
annotation or the annotation of m2p is wrong.

Newer compilers inline this function, so it never shows up, but marking
it __init is the right way to avoid the warning.

Fixes: 70e61199559a ("xen: move p2m list if conflicting with e820 map")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 arch/x86/xen/mmu_pv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 0422ee7e70b3..ddfeebc2b125 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2221,7 +2221,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
  * not the first page table in the page table pool.
  * Iterate through the initial page tables to find the real page table base.
  */
-static phys_addr_t xen_find_pt_base(pmd_t *pmd)
+static phys_addr_t __init xen_find_pt_base(pmd_t *pmd)
 {
 	phys_addr_t pt_base, paddr;
 	unsigned pmdidx;

From 6d57339890c9a9dfcd4ba4f8931b911b962968e3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 15 Sep 2017 17:08:16 +0200
Subject: [PATCH 026/391] dma-coherent: fix rmem_dma_device_init regression

My recent bug fix introduced another bug, which caused rmem_dma_device_init
to always fail, as rmem->priv is never set to anything.

This restores the previous behavior, calling dma_init_coherent_memory()
whenever ->priv is NULL.

Fixes: d35b0996fef3 ("dma-coherent: fix dma_declare_coherent_memory() logic error")
Reported-by: Roy Pledge <roy.pledge@nxp.com>
Tested-by: Roy Pledge <roy.pledge@nxp.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/base/dma-coherent.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c
index a39b2166b145..744f64f43454 100644
--- a/drivers/base/dma-coherent.c
+++ b/drivers/base/dma-coherent.c
@@ -348,16 +348,15 @@ static int rmem_dma_device_init(struct reserved_mem *rmem, struct device *dev)
 	struct dma_coherent_mem *mem = rmem->priv;
 	int ret;
 
-	if (!mem)
-		return -ENODEV;
-
-	ret = dma_init_coherent_memory(rmem->base, rmem->base, rmem->size,
-				       DMA_MEMORY_EXCLUSIVE, &mem);
-
-	if (ret) {
-		pr_err("Reserved memory: failed to init DMA memory pool at %pa, size %ld MiB\n",
-			&rmem->base, (unsigned long)rmem->size / SZ_1M);
-		return ret;
+	if (!mem) {
+		ret = dma_init_coherent_memory(rmem->base, rmem->base,
+					       rmem->size,
+					       DMA_MEMORY_EXCLUSIVE, &mem);
+		if (ret) {
+			pr_err("Reserved memory: failed to init DMA memory pool at %pa, size %ld MiB\n",
+				&rmem->base, (unsigned long)rmem->size / SZ_1M);
+			return ret;
+		}
 	}
 	mem->use_dev_dma_pfn_offset = true;
 	rmem->priv = mem;

From 47061a24e2ee5bd8a40d473d47a5bd823fa0081f Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Sun, 17 Sep 2017 09:03:48 -0700
Subject: [PATCH 027/391] x86/mm: Factor out CR3-building code

Current, the code that assembles a value to load into CR3 is
open-coded everywhere.  Factor it out into helpers build_cr3() and
build_cr3_noflush().

This makes one semantic change: __get_current_cr3_fast() was wrong
on SME systems.  No one noticed because the only caller is in the
VMX code, and there are no CPUs with both SME and VMX.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tom Lendacky <Thomas.Lendacky@amd.com>
Link: http://lkml.kernel.org/r/ce350cf11e93e2842d14d0b95b0199c7d881f527.1505663533.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/mmu_context.h | 15 +++++++++++----
 arch/x86/mm/tlb.c                  | 11 +++++------
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 7ae318c340d9..a999ba6b721f 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -286,6 +286,15 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
 	return __pkru_allows_pkey(vma_pkey(vma), write);
 }
 
+static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
+{
+	return __sme_pa(mm->pgd) | asid;
+}
+
+static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
+{
+	return __sme_pa(mm->pgd) | asid | CR3_NOFLUSH;
+}
 
 /*
  * This can be used from process context to figure out what the value of
@@ -296,10 +305,8 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
  */
 static inline unsigned long __get_current_cr3_fast(void)
 {
-	unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
-
-	if (static_cpu_has(X86_FEATURE_PCID))
-		cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+	unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
+		this_cpu_read(cpu_tlbstate.loaded_mm_asid));
 
 	/* For now, be very restrictive about when this can be called. */
 	VM_WARN_ON(in_nmi() || preemptible());
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 1ab3821f9e26..93fe97cce581 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -126,8 +126,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 	 * isn't free.
 	 */
 #ifdef CONFIG_DEBUG_VM
-	if (WARN_ON_ONCE(__read_cr3() !=
-			 (__sme_pa(real_prev->pgd) | prev_asid))) {
+	if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
 		/*
 		 * If we were to BUG here, we'd be very likely to kill
 		 * the system so hard that we don't see the call trace.
@@ -172,7 +171,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 			 */
 			this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
 				       next_tlb_gen);
-			write_cr3(__sme_pa(next->pgd) | prev_asid);
+			write_cr3(build_cr3(next, prev_asid));
 			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
 					TLB_FLUSH_ALL);
 		}
@@ -216,12 +215,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		if (need_flush) {
 			this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
 			this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
-			write_cr3(__sme_pa(next->pgd) | new_asid);
+			write_cr3(build_cr3(next, new_asid));
 			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
 					TLB_FLUSH_ALL);
 		} else {
 			/* The new ASID is already up to date. */
-			write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH);
+			write_cr3(build_cr3_noflush(next, new_asid));
 			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
 		}
 
@@ -265,7 +264,7 @@ void initialize_tlbstate_and_flush(void)
 		!(cr4_read_shadow() & X86_CR4_PCIDE));
 
 	/* Force ASID 0 and force a TLB flush. */
-	write_cr3(cr3 & ~CR3_PCID_MASK);
+	write_cr3(build_cr3(mm, 0));
 
 	/* Reinitialize tlbstate. */
 	this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);

From 52a2af400c1075219b3f0ce5c96fc961da44018a Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Sun, 17 Sep 2017 09:03:49 -0700
Subject: [PATCH 028/391] x86/mm/64: Stop using CR3.PCID == 0 in ASID-aware
 code

Putting the logical ASID into CR3's PCID bits directly means that we
have two cases to consider separately: ASID == 0 and ASID != 0.
This means that bugs that only hit in one of these cases trigger
nondeterministically.

There were some bugs like this in the past, and I think there's
still one in current kernels.  In particular, we have a number of
ASID-unware code paths that save CR3, write some special value, and
then restore CR3.  This includes suspend/resume, hibernate, kexec,
EFI, and maybe other things I've missed.  This is currently
dangerous: if ASID != 0, then this code sequence will leave garbage
in the TLB tagged for ASID 0.  We could potentially see corruption
when switching back to ASID 0.  In principle, an
initialize_tlbstate_and_flush() call after these sequences would
solve the problem, but EFI, at least, does not call this.  (And it
probably shouldn't -- initialize_tlbstate_and_flush() is rather
expensive.)

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/cdc14bbe5d3c3ef2a562be09a6368ffe9bd947a6.1505663533.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/mmu_context.h | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index a999ba6b721f..c120b5db178a 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -286,14 +286,31 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
 	return __pkru_allows_pkey(vma_pkey(vma), write);
 }
 
+/*
+ * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
+ * bits.  This serves two purposes.  It prevents a nasty situation in
+ * which PCID-unaware code saves CR3, loads some other value (with PCID
+ * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
+ * the saved ASID was nonzero.  It also means that any bugs involving
+ * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
+ * deterministically.
+ */
+
 static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
 {
-	return __sme_pa(mm->pgd) | asid;
+	if (static_cpu_has(X86_FEATURE_PCID)) {
+		VM_WARN_ON_ONCE(asid > 4094);
+		return __sme_pa(mm->pgd) | (asid + 1);
+	} else {
+		VM_WARN_ON_ONCE(asid != 0);
+		return __sme_pa(mm->pgd);
+	}
 }
 
 static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
 {
-	return __sme_pa(mm->pgd) | asid | CR3_NOFLUSH;
+	VM_WARN_ON_ONCE(asid > 4094);
+	return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
 }
 
 /*

From b8b7abaed7a49b350f8ba659ddc264b04931d581 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Sun, 17 Sep 2017 09:03:50 -0700
Subject: [PATCH 029/391] x86/mm/32: Move setup_clear_cpu_cap(X86_FEATURE_PCID)
 earlier

Otherwise we might have the PCID feature bit set during cpu_init().

This is just for robustness.  I haven't seen any actual bugs here.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: cba4671af755 ("x86/mm: Disable PCID on 32-bit kernels")
Link: http://lkml.kernel.org/r/b16dae9d6b0db5d9801ddbebbfd83384097c61f3.1505663533.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/bugs.c   | 8 --------
 arch/x86/kernel/cpu/common.c | 8 ++++++++
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index db684880d74a..0af86d9242da 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -21,14 +21,6 @@
 
 void __init check_bugs(void)
 {
-#ifdef CONFIG_X86_32
-	/*
-	 * Regardless of whether PCID is enumerated, the SDM says
-	 * that it can't be enabled in 32-bit mode.
-	 */
-	setup_clear_cpu_cap(X86_FEATURE_PCID);
-#endif
-
 	identify_boot_cpu();
 
 	if (!IS_ENABLED(CONFIG_SMP)) {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 775f10100d7f..c9176bae7fd8 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -904,6 +904,14 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 
 	setup_force_cpu_cap(X86_FEATURE_ALWAYS);
 	fpu__init_system(c);
+
+#ifdef CONFIG_X86_32
+	/*
+	 * Regardless of whether PCID is enumerated, the SDM says
+	 * that it can't be enabled in 32-bit mode.
+	 */
+	setup_clear_cpu_cap(X86_FEATURE_PCID);
+#endif
 }
 
 void __init early_cpu_init(void)

From 4ba55e65f471d011d3ba2ac2022180ea0877d68e Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Sun, 17 Sep 2017 09:03:51 -0700
Subject: [PATCH 030/391] x86/mm/32: Load a sane CR3 before cpu_init() on
 secondary CPUs

For unknown historical reasons (i.e. Borislav doesn't recall),
32-bit kernels invoke cpu_init() on secondary CPUs with
initial_page_table loaded into CR3.  Then they set
current->active_mm to &init_mm and call enter_lazy_tlb() before
fixing CR3.  This means that the x86 TLB code gets invoked while CR3
is inconsistent, and, with the improved PCID sanity checks I added,
we warn.

Fix it by loading swapper_pg_dir (i.e. init_mm.pgd) earlier.

Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
Reported-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 72c0098d92ce ("x86/mm: Reinitialize TLB state on hotplug and resume")
Link: http://lkml.kernel.org/r/30cdfea504682ba3b9012e77717800a91c22097f.1505663533.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/smpboot.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0854ff169274..ad59edd84de7 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -232,12 +232,6 @@ static void notrace start_secondary(void *unused)
 	 */
 	if (boot_cpu_has(X86_FEATURE_PCID))
 		__write_cr4(__read_cr4() | X86_CR4_PCIDE);
-	cpu_init();
-	x86_cpuinit.early_percpu_clock_init();
-	preempt_disable();
-	smp_callin();
-
-	enable_start_cpu0 = 0;
 
 #ifdef CONFIG_X86_32
 	/* switch away from the initial page table */
@@ -245,6 +239,13 @@ static void notrace start_secondary(void *unused)
 	__flush_tlb_all();
 #endif
 
+	cpu_init();
+	x86_cpuinit.early_percpu_clock_init();
+	preempt_disable();
+	smp_callin();
+
+	enable_start_cpu0 = 0;
+
 	/* otherwise gcc will move up smp_processor_id before the cpu_init */
 	barrier();
 	/*

From bf29ed1567b67854dc13504f685c45a2ea9b2081 Mon Sep 17 00:00:00 2001
From: Thomas Garnier <thgarnie@google.com>
Date: Thu, 7 Sep 2017 08:30:44 -0700
Subject: [PATCH 031/391] syscalls: Use CHECK_DATA_CORRUPTION for
 addr_limit_user_check

Use CHECK_DATA_CORRUPTION instead of BUG_ON to provide more flexibility
on address limit failures. By default, send a SIGKILL signal to kill the
current process preventing exploitation of a bad address limit.

Make the TIF_FSCHECK flag optional so ARM can use this function.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Pratyush Anand <panand@redhat.com>
Cc: Dave Martin <Dave.Martin@arm.com>
Cc: Will Drewry <wad@chromium.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: David Howells <dhowells@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-api@vger.kernel.org
Cc: Yonghong Song <yhs@fb.com>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1504798247-48833-2-git-send-email-keescook@chromium.org
---
 include/linux/syscalls.h | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 95606a2d556f..a78186d826d7 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -221,22 +221,26 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
 	}								\
 	static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__))
 
-#ifdef TIF_FSCHECK
 /*
  * Called before coming back to user-mode. Returning to user-mode with an
  * address limit different than USER_DS can allow to overwrite kernel memory.
  */
 static inline void addr_limit_user_check(void)
 {
-
+#ifdef TIF_FSCHECK
 	if (!test_thread_flag(TIF_FSCHECK))
 		return;
-
-	BUG_ON(!segment_eq(get_fs(), USER_DS));
-	clear_thread_flag(TIF_FSCHECK);
-}
 #endif
 
+	if (CHECK_DATA_CORRUPTION(!segment_eq(get_fs(), USER_DS),
+				  "Invalid address limit on user-mode return"))
+		force_sig(SIGKILL, current);
+
+#ifdef TIF_FSCHECK
+	clear_thread_flag(TIF_FSCHECK);
+#endif
+}
+
 asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
 			       qid_t id, void __user *addr);
 asmlinkage long sys_time(time_t __user *tloc);

From 2404269bc4e77a67875c8db6667be34c9913c96e Mon Sep 17 00:00:00 2001
From: Thomas Garnier <thgarnie@google.com>
Date: Thu, 7 Sep 2017 08:30:45 -0700
Subject: [PATCH 032/391] Revert "arm/syscalls: Check address limit on
 user-mode return"

This reverts commit 73ac5d6a2b6ac3ae8d1e1818f3e9946f97489bc9.

The work pending loop can call set_fs after addr_limit_user_check
removed the _TIF_FSCHECK flag. This may happen at anytime based on how
ARM handles alignment exceptions. It leads to an infinite loop condition.

After discussion, it has been agreed that the generic approach is not
tailored to the ARM architecture and any fix might not be complete. This
patch will be replaced by an architecture specific implementation. The
work flag approach will be kept for other architectures.

Reported-by: Leonard Crestez <leonard.crestez@nxp.com>
Signed-off-by: Thomas Garnier <thgarnie@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Pratyush Anand <panand@redhat.com>
Cc: Dave Martin <Dave.Martin@arm.com>
Cc: Will Drewry <wad@chromium.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: David Howells <dhowells@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-api@vger.kernel.org
Cc: Yonghong Song <yhs@fb.com>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1504798247-48833-3-git-send-email-keescook@chromium.org
---
 arch/arm/include/asm/thread_info.h | 15 ++++++---------
 arch/arm/include/asm/uaccess.h     |  2 --
 arch/arm/kernel/entry-common.S     |  9 ++-------
 arch/arm/kernel/signal.c           |  5 -----
 4 files changed, 8 insertions(+), 23 deletions(-)

diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 1d468b527b7b..776757d1604a 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -139,11 +139,10 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
 #define TIF_NEED_RESCHED	1	/* rescheduling necessary */
 #define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
 #define TIF_UPROBE		3	/* breakpointed or singlestepping */
-#define TIF_FSCHECK		4	/* Check FS is USER_DS on return */
-#define TIF_SYSCALL_TRACE	5	/* syscall trace active */
-#define TIF_SYSCALL_AUDIT	6	/* syscall auditing active */
-#define TIF_SYSCALL_TRACEPOINT	7	/* syscall tracepoint instrumentation */
-#define TIF_SECCOMP		8	/* seccomp syscall filtering active */
+#define TIF_SYSCALL_TRACE	4	/* syscall trace active */
+#define TIF_SYSCALL_AUDIT	5	/* syscall auditing active */
+#define TIF_SYSCALL_TRACEPOINT	6	/* syscall tracepoint instrumentation */
+#define TIF_SECCOMP		7	/* seccomp syscall filtering active */
 
 #define TIF_NOHZ		12	/* in adaptive nohz mode */
 #define TIF_USING_IWMMXT	17
@@ -154,7 +153,6 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_UPROBE		(1 << TIF_UPROBE)
-#define _TIF_FSCHECK		(1 << TIF_FSCHECK)
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
@@ -168,9 +166,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *,
 /*
  * Change these and you break ASM code in entry-common.S
  */
-#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING |	\
-				 _TIF_NOTIFY_RESUME | _TIF_UPROBE |	\
-				 _TIF_FSCHECK)
+#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
+				 _TIF_NOTIFY_RESUME | _TIF_UPROBE)
 
 #endif /* __KERNEL__ */
 #endif /* __ASM_ARM_THREAD_INFO_H */
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 87936dd5d151..0bf2347495f1 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -70,8 +70,6 @@ static inline void set_fs(mm_segment_t fs)
 {
 	current_thread_info()->addr_limit = fs;
 	modify_domain(DOMAIN_KERNEL, fs ? DOMAIN_CLIENT : DOMAIN_MANAGER);
-	/* On user-mode return, check fs is correct */
-	set_thread_flag(TIF_FSCHECK);
 }
 
 #define segment_eq(a, b)	((a) == (b))
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index ca3614dc6938..0b60adf4a5d9 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -49,9 +49,7 @@ ret_fast_syscall:
  UNWIND(.cantunwind	)
 	disable_irq_notrace			@ disable interrupts
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
-	tst	r1, #_TIF_SYSCALL_WORK
-	bne	fast_work_pending
-	tst	r1, #_TIF_WORK_MASK
+	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
 	bne	fast_work_pending
 
 	/* perform architecture specific actions before user return */
@@ -77,15 +75,12 @@ ret_fast_syscall:
 	str	r0, [sp, #S_R0 + S_OFF]!	@ save returned r0
 	disable_irq_notrace			@ disable interrupts
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
-	tst	r1, #_TIF_SYSCALL_WORK
-	bne	fast_work_pending
-	tst	r1, #_TIF_WORK_MASK
+	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
 	beq	no_work_pending
  UNWIND(.fnend		)
 ENDPROC(ret_fast_syscall)
 
 	/* Slower path - fall through to work_pending */
-fast_work_pending:
 #endif
 
 	tst	r1, #_TIF_SYSCALL_WORK
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index e2de50bf8742..5814298ef0b7 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -14,7 +14,6 @@
 #include <linux/uaccess.h>
 #include <linux/tracehook.h>
 #include <linux/uprobes.h>
-#include <linux/syscalls.h>
 
 #include <asm/elf.h>
 #include <asm/cacheflush.h>
@@ -614,10 +613,6 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
 	 * Update the trace code with the current status.
 	 */
 	trace_hardirqs_off();
-
-	/* Check valid user FS if needed */
-	addr_limit_user_check();
-
 	do {
 		if (likely(thread_flags & _TIF_NEED_RESCHED)) {
 			schedule();

From e33f8d32677fa4f4f8996ef46748f86aac81ccff Mon Sep 17 00:00:00 2001
From: Thomas Garnier <thgarnie@google.com>
Date: Thu, 7 Sep 2017 08:30:46 -0700
Subject: [PATCH 033/391] arm/syscalls: Optimize address limit check

Disable the generic address limit check in favor of an architecture
specific optimized implementation. The generic implementation using
pending work flags did not work well with ARM and alignment faults.

The address limit is checked on each syscall return path to user-mode
path as well as the irq user-mode return function. If the address limit
was changed, a function is called to report data corruption (stopping
the kernel or process based on configuration).

The address limit check has to be done before any pending work because
they can reset the address limit and the process is killed using a
SIGKILL signal. For example the lkdtm address limit check does not work
because the signal to kill the process will reset the user-mode address
limit.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Tested-by: Kees Cook <keescook@chromium.org>
Tested-by: Leonard Crestez <leonard.crestez@nxp.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Pratyush Anand <panand@redhat.com>
Cc: Dave Martin <Dave.Martin@arm.com>
Cc: Will Drewry <wad@chromium.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: David Howells <dhowells@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-api@vger.kernel.org
Cc: Yonghong Song <yhs@fb.com>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1504798247-48833-4-git-send-email-keescook@chromium.org
---
 arch/arm/kernel/entry-common.S | 11 +++++++++++
 arch/arm/kernel/signal.c       |  7 +++++++
 2 files changed, 18 insertions(+)

diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 0b60adf4a5d9..99c908226065 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -12,6 +12,7 @@
 #include <asm/unistd.h>
 #include <asm/ftrace.h>
 #include <asm/unwind.h>
+#include <asm/memory.h>
 #ifdef CONFIG_AEABI
 #include <asm/unistd-oabi.h>
 #endif
@@ -48,10 +49,14 @@ ret_fast_syscall:
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	disable_irq_notrace			@ disable interrupts
+	ldr	r2, [tsk, #TI_ADDR_LIMIT]
+	cmp	r2, #TASK_SIZE
+	blne	addr_limit_check_failed
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
 	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
 	bne	fast_work_pending
 
+
 	/* perform architecture specific actions before user return */
 	arch_ret_to_user r1, lr
 
@@ -74,6 +79,9 @@ ret_fast_syscall:
  UNWIND(.cantunwind	)
 	str	r0, [sp, #S_R0 + S_OFF]!	@ save returned r0
 	disable_irq_notrace			@ disable interrupts
+	ldr	r2, [tsk, #TI_ADDR_LIMIT]
+	cmp	r2, #TASK_SIZE
+	blne	addr_limit_check_failed
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
 	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
 	beq	no_work_pending
@@ -106,6 +114,9 @@ ENTRY(ret_to_user)
 ret_slow_syscall:
 	disable_irq_notrace			@ disable interrupts
 ENTRY(ret_to_user_from_irq)
+	ldr	r2, [tsk, #TI_ADDR_LIMIT]
+	cmp	r2, #TASK_SIZE
+	blne	addr_limit_check_failed
 	ldr	r1, [tsk, #TI_FLAGS]
 	tst	r1, #_TIF_WORK_MASK
 	bne	slow_work_pending
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 5814298ef0b7..b67ae12503f3 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -14,6 +14,7 @@
 #include <linux/uaccess.h>
 #include <linux/tracehook.h>
 #include <linux/uprobes.h>
+#include <linux/syscalls.h>
 
 #include <asm/elf.h>
 #include <asm/cacheflush.h>
@@ -673,3 +674,9 @@ struct page *get_signal_page(void)
 
 	return page;
 }
+
+/* Defer to generic check */
+asmlinkage void addr_limit_check_failed(void)
+{
+	addr_limit_user_check();
+}

From a2048e34d4655c06d31400646ae495bbfeb16b27 Mon Sep 17 00:00:00 2001
From: Thomas Garnier <thgarnie@google.com>
Date: Thu, 7 Sep 2017 08:30:47 -0700
Subject: [PATCH 034/391] arm64/syscalls: Move address limit check in loop

A bug was reported on ARM where set_fs might be called after it was
checked on the work pending function. ARM64 is not affected by this bug
but has a similar construct. In order to avoid any similar problems in
the future, the addr_limit_user_check function is moved at the beginning
of the loop.

Fixes: cf7de27ab351 ("arm64/syscalls: Check address limit on user-mode return")
Reported-by: Leonard Crestez <leonard.crestez@nxp.com>
Signed-off-by: Thomas Garnier <thgarnie@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Pratyush Anand <panand@redhat.com>
Cc: Dave Martin <Dave.Martin@arm.com>
Cc: Will Drewry <wad@chromium.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: David Howells <dhowells@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-api@vger.kernel.org
Cc: Yonghong Song <yhs@fb.com>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1504798247-48833-5-git-send-email-keescook@chromium.org
---
 arch/arm64/kernel/signal.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index c45214f8fb54..0bdc96c61bc0 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -751,10 +751,10 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
 	 */
 	trace_hardirqs_off();
 
-	/* Check valid user FS if needed */
-	addr_limit_user_check();
-
 	do {
+		/* Check valid user FS if needed */
+		addr_limit_user_check();
+
 		if (thread_flags & _TIF_NEED_RESCHED) {
 			schedule();
 		} else {

From 8fce3dc5c5d6f6301f67311fa79f333902b58cea Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 15 Sep 2017 21:42:59 +0200
Subject: [PATCH 035/391] clocksource/integrator: Fix section mismatch warning

gcc-4.6 and older fail to inline integrator_clocksource_init, so they
end up showing a harmless warning:

WARNING: vmlinux.o(.text+0x4aa94c): Section mismatch in reference from the function integrator_clocksource_init() to the function .init.text:clocksource_mmio_init()
The function integrator_clocksource_init() references
the function __init clocksource_mmio_init().
This is often because integrator_clocksource_init lacks a __init
annotation or the annotation of clocksource_mmio_init is wrong.

Add the missing __init annotation that makes it build cleanly with all
compilers.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Thierry Reding <treding@nvidia.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: http://lkml.kernel.org/r/20170915194310.1170514-1-arnd@arndb.de
---
 drivers/clocksource/timer-integrator-ap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clocksource/timer-integrator-ap.c b/drivers/clocksource/timer-integrator-ap.c
index 2ff64d9d4fb3..62d24690ba02 100644
--- a/drivers/clocksource/timer-integrator-ap.c
+++ b/drivers/clocksource/timer-integrator-ap.c
@@ -36,8 +36,8 @@ static u64 notrace integrator_read_sched_clock(void)
 	return -readl(sched_clk_base + TIMER_VALUE);
 }
 
-static int integrator_clocksource_init(unsigned long inrate,
-				       void __iomem *base)
+static int __init integrator_clocksource_init(unsigned long inrate,
+					      void __iomem *base)
 {
 	u32 ctrl = TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC;
 	unsigned long rate = inrate;

From b8f3911610529ba531b99d1e109c7a40fb071f0a Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Tue, 12 Sep 2017 15:10:35 +0200
Subject: [PATCH 036/391] mtd: spi-nor: Check consistency of the memory size
 extracted from the SFDP

One field of the flash parameter table contains information about the
flash device size.
Most of the time the data extracted from this field is valid, but
sometimes the BFPT section of the SFDP table is corrupted or invalid and
this field is set to 0xffffffff, thus resulting in an integer overflow
when setting params->size.

Since NOR devices are anayway always smaller than 2^64 bytes, we can
easily stop the BFPT parsing if the size reported in this table is
invalid.

Fixes: f384b352cbf0 ("mtd: spi-nor: parse Serial Flash Discoverable Parameters (SFDP) tables")
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Cyrille Pitchen <cyrille.pitchen@wedev4u.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index cf1d4a15e10a..4425b0283725 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -2127,6 +2127,15 @@ static int spi_nor_parse_bfpt(struct spi_nor *nor,
 	params->size = bfpt.dwords[BFPT_DWORD(2)];
 	if (params->size & BIT(31)) {
 		params->size &= ~BIT(31);
+
+		/*
+		 * Prevent overflows on params->size. Anyway, a NOR of 2^64
+		 * bits is unlikely to exist so this error probably means
+		 * the BFPT we are reading is corrupted/wrong.
+		 */
+		if (params->size > 63)
+			return -EINVAL;
+
 		params->size = 1ULL << params->size;
 	} else {
 		params->size++;

From bfa4133795e5a0badd402dd3f58b13b3cec64a4b Mon Sep 17 00:00:00 2001
From: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
Date: Wed, 6 Sep 2017 23:45:02 +0200
Subject: [PATCH 037/391] mtd: spi-nor: fix DMA unsafe buffer issue in
 spi_nor_read_sfdp()

spi_nor_read_sfdp() calls nor->read() to read the SFDP data.
When the m25p80 driver is used (pretty common case), nor->read() is then
implemented by the m25p80_read() function, which is likely to initialize a
'struct spi_transfer' from its buf argument before appending this
structure inside the 'struct spi_message' argument of spi_sync().

Besides the SPI sub-system states that both .tx_buf and .rx_buf members of
'struct spi_transfer' must point into dma-safe memory. However, two of the
three calls of spi_nor_read_sfdp() were given pointers to stack allocated
memory as buf argument, hence not in a dma-safe area.
Hopefully, the third and last call of spi_nor_read_sfdp() was already
given a kmalloc'ed buffer argument, hence dma-safe.

So this patch fixes this issue by introducing a
spi_nor_read_sfdp_dma_unsafe() function which simply wraps the existing
spi_nor_read_sfdp() function and uses some kmalloc'ed memory as a bounce
buffer.

Fixes: f384b352cbf0 ("mtd: spi-nor: parse Serial Flash Discoverable Parameters (SFDP) tables")
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 36 ++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 4425b0283725..19c000722cbc 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1784,7 +1784,7 @@ spi_nor_set_pp_settings(struct spi_nor_pp_command *pp,
  * @nor:	pointer to a 'struct spi_nor'
  * @addr:	offset in the SFDP area to start reading data from
  * @len:	number of bytes to read
- * @buf:	buffer where the SFDP data are copied into
+ * @buf:	buffer where the SFDP data are copied into (dma-safe memory)
  *
  * Whatever the actual numbers of bytes for address and dummy cycles are
  * for (Fast) Read commands, the Read SFDP (5Ah) instruction is always
@@ -1829,6 +1829,36 @@ static int spi_nor_read_sfdp(struct spi_nor *nor, u32 addr,
 	return ret;
 }
 
+/**
+ * spi_nor_read_sfdp_dma_unsafe() - read Serial Flash Discoverable Parameters.
+ * @nor:	pointer to a 'struct spi_nor'
+ * @addr:	offset in the SFDP area to start reading data from
+ * @len:	number of bytes to read
+ * @buf:	buffer where the SFDP data are copied into
+ *
+ * Wrap spi_nor_read_sfdp() using a kmalloc'ed bounce buffer as @buf is now not
+ * guaranteed to be dma-safe.
+ *
+ * Return: -ENOMEM if kmalloc() fails, the return code of spi_nor_read_sfdp()
+ *          otherwise.
+ */
+static int spi_nor_read_sfdp_dma_unsafe(struct spi_nor *nor, u32 addr,
+					size_t len, void *buf)
+{
+	void *dma_safe_buf;
+	int ret;
+
+	dma_safe_buf = kmalloc(len, GFP_KERNEL);
+	if (!dma_safe_buf)
+		return -ENOMEM;
+
+	ret = spi_nor_read_sfdp(nor, addr, len, dma_safe_buf);
+	memcpy(buf, dma_safe_buf, len);
+	kfree(dma_safe_buf);
+
+	return ret;
+}
+
 struct sfdp_parameter_header {
 	u8		id_lsb;
 	u8		minor;
@@ -2101,7 +2131,7 @@ static int spi_nor_parse_bfpt(struct spi_nor *nor,
 		    bfpt_header->length * sizeof(u32));
 	addr = SFDP_PARAM_HEADER_PTP(bfpt_header);
 	memset(&bfpt, 0, sizeof(bfpt));
-	err = spi_nor_read_sfdp(nor,  addr, len, &bfpt);
+	err = spi_nor_read_sfdp_dma_unsafe(nor,  addr, len, &bfpt);
 	if (err < 0)
 		return err;
 
@@ -2252,7 +2282,7 @@ static int spi_nor_parse_sfdp(struct spi_nor *nor,
 	int i, err;
 
 	/* Get the SFDP header. */
-	err = spi_nor_read_sfdp(nor, 0, sizeof(header), &header);
+	err = spi_nor_read_sfdp_dma_unsafe(nor, 0, sizeof(header), &header);
 	if (err < 0)
 		return err;
 

From 17b694a5476de09e119c517dcc3b71eff6835bdf Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 3 Sep 2017 13:58:31 +0200
Subject: [PATCH 038/391] mtd: nand: lpc32xx_mlc: Fix an error handling path in
 lpc32xx_nand_probe()

If 'clk_prepare_enable()' fails, we must 'put' the corresponding clock.

Fixes: 4d26f012ab59 ("mtd: nand: lpc32xx_mlc: Handle return value of clk_prepare_enable.")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/lpc32xx_mlc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/lpc32xx_mlc.c b/drivers/mtd/nand/lpc32xx_mlc.c
index c3bb358ef01e..5796468db653 100644
--- a/drivers/mtd/nand/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/lpc32xx_mlc.c
@@ -707,7 +707,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev)
 	}
 	res = clk_prepare_enable(host->clk);
 	if (res)
-		goto err_exit1;
+		goto err_put_clk;
 
 	nand_chip->cmd_ctrl = lpc32xx_nand_cmd_ctrl;
 	nand_chip->dev_ready = lpc32xx_nand_device_ready;
@@ -814,6 +814,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev)
 		dma_release_channel(host->dma_chan);
 err_exit2:
 	clk_disable_unprepare(host->clk);
+err_put_clk:
 	clk_put(host->clk);
 err_exit1:
 	lpc32xx_wp_enable(host);

From e580b8bc4316cbb8bbffb5ed7bf1e477064755ed Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Mon, 18 Sep 2017 09:40:12 +0100
Subject: [PATCH 039/391] arm64: efi: Don't include EFI fpsimd save/restore
 code in non-EFI kernels

__efi_fpsimd_begin()/__efi_fpsimd_end() are for use when making EFI
calls only, so using them in non-EFI kernels is not allowed.

This patch compiles them out if CONFIG_EFI is not set.

Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/fpsimd.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 3a68cf38a6b3..f444f374bd7b 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -321,6 +321,8 @@ void kernel_neon_end(void)
 }
 EXPORT_SYMBOL(kernel_neon_end);
 
+#ifdef CONFIG_EFI
+
 static DEFINE_PER_CPU(struct fpsimd_state, efi_fpsimd_state);
 static DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
 
@@ -370,6 +372,8 @@ void __efi_fpsimd_end(void)
 		kernel_neon_end();
 }
 
+#endif /* CONFIG_EFI */
+
 #endif /* CONFIG_KERNEL_MODE_NEON */
 
 #ifdef CONFIG_CPU_PM

From c73cc120a33e12e4e254b4b42bc613204ccb923b Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 18 Sep 2017 11:20:19 +0100
Subject: [PATCH 040/391] arm64: relax assembly code alignment from 16 byte to
 4 byte

Aarch64 instructions must be word aligned.  The current 16 byte
alignment is more than enough.  Relax it into 4 byte alignment.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/linkage.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h
index 636c1bced7d4..1b266292f0be 100644
--- a/arch/arm64/include/asm/linkage.h
+++ b/arch/arm64/include/asm/linkage.h
@@ -1,7 +1,7 @@
 #ifndef __ASM_LINKAGE_H
 #define __ASM_LINKAGE_H
 
-#define __ALIGN		.align 4
-#define __ALIGN_STR	".align 4"
+#define __ALIGN		.align 2
+#define __ALIGN_STR	".align 2"
 
 #endif

From 3d6a7b99e3fa29b92d6288487e057e0a596bd2b0 Mon Sep 17 00:00:00 2001
From: Andrew Pinski <apinski@cavium.com>
Date: Mon, 18 Sep 2017 11:20:20 +0100
Subject: [PATCH 041/391] arm64: ensure the kernel is compiled for LP64

The kernel needs to be compiled as a LP64 binary for ARM64, even when
using a compiler that defaults to code-generation for the ILP32 ABI.
Consequently, we need to explicitly pass '-mabi=lp64' (supported on
gcc-4.9 and newer).

Signed-off-by: Andrew Pinski <Andrew.Pinski@caviumnetworks.com>
Signed-off-by: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
Signed-off-by: Christoph Muellner <christoph.muellner@theobroma-systems.com>
Signed-off-by: Yury Norov <ynorov@caviumnetworks.com>
Reviewed-by: David Daney <ddaney@caviumnetworks.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Makefile | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 9b41f1e3b1a0..939b310913cf 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -50,17 +50,22 @@ KBUILD_CFLAGS	+= -fno-asynchronous-unwind-tables
 KBUILD_CFLAGS	+= $(call cc-option, -mpc-relative-literal-loads)
 KBUILD_AFLAGS	+= $(lseinstr) $(brokengasinst)
 
+KBUILD_CFLAGS	+= $(call cc-option,-mabi=lp64)
+KBUILD_AFLAGS	+= $(call cc-option,-mabi=lp64)
+
 ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
 KBUILD_CPPFLAGS	+= -mbig-endian
 CHECKFLAGS	+= -D__AARCH64EB__
 AS		+= -EB
 LD		+= -EB
+LDFLAGS		+= -maarch64linuxb
 UTS_MACHINE	:= aarch64_be
 else
 KBUILD_CPPFLAGS	+= -mlittle-endian
 CHECKFLAGS	+= -D__AARCH64EL__
 AS		+= -EL
 LD		+= -EL
+LDFLAGS		+= -maarch64linux
 UTS_MACHINE	:= aarch64
 endif
 

From 0a51fb7174f2b7866b4d7a4a5c23b685b674beb6 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Sat, 9 Sep 2017 12:19:22 +0300
Subject: [PATCH 042/391] quota: add missing lock into __dquot_transfer()

Lock dq_dqb_lock around dquot_decr_inodes()

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Fixes: 7b9ca4c61bc2 ("quota: Reduce contention on dq_data_lock")
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 8381db9db6d9..50b0556a124f 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1980,7 +1980,9 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 		ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, 0,
 				      &warn_to[cnt]);
 		if (ret) {
+			spin_lock(&transfer_to[cnt]->dq_dqb_lock);
 			dquot_decr_inodes(transfer_to[cnt], inode_usage);
+			spin_unlock(&transfer_to[cnt]->dq_dqb_lock);
 			goto over_quota;
 		}
 	}

From 0ab0b271bf75073cb254b5ea0593aceae5a42bd3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 16 Sep 2017 22:32:12 +0200
Subject: [PATCH 043/391] isofs: fix build regression

The new isofs_show_options() function fails to build when CONFIG_NLS
is disabled:

fs/isofs/inode.c: In function 'isofs_show_options':
fs/isofs/inode.c:518:44: error: 'CONFIG_NLS_DEFAULT' undeclared (first use in this function)
fs/isofs/inode.c:518:44: note: each undeclared identifier is reported only once for each function it appears in

This adds a check for CONFIG_JOLIET (which selects NLS), matching
the other uses of the iocharset handling in this file.

Fixes: 6fecb86a44f5 ("isofs: Implement show_options")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/isofs/inode.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index db692f554158..447a24d77b89 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -514,9 +514,11 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root)
 	if (sbi->s_fmode != ISOFS_INVALID_MODE)
 		seq_printf(m, ",fmode=%o", sbi->s_fmode);
 
+#ifdef CONFIG_JOLIET
 	if (sbi->s_nls_iocharset &&
 	    strcmp(sbi->s_nls_iocharset->charset, CONFIG_NLS_DEFAULT) != 0)
 		seq_printf(m, ",iocharset=%s", sbi->s_nls_iocharset->charset);
+#endif
 	return 0;
 }
 

From 74378c5c8cdaf0ce9f65e67cbd0613286f2c3bad Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 5 Sep 2017 20:16:27 +0200
Subject: [PATCH 044/391] driver core: Fix link to device power management
 documentation

Correct location as of commit 2728b2d2e5be4b82 (PM / core / docs:
Convert sleep states API document to reST).

Fixes: 2728b2d2e5be4b82 (PM / core / docs: Convert sleep states API document to reST)
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/device.h b/include/linux/device.h
index c6f27207dbe8..1d2607923a24 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -838,7 +838,7 @@ struct dev_links_info {
  * @driver_data: Private pointer for driver specific info.
  * @links:	Links to suppliers and consumers of this device.
  * @power:	For device power management.
- * 		See Documentation/power/admin-guide/devices.rst for details.
+ *		See Documentation/driver-api/pm/devices.rst for details.
  * @pm_domain:	Provide callbacks that are executed during system suspend,
  * 		hibernation, system resume and during runtime PM transitions
  * 		along with subsystem-level and driver-level callbacks.

From 096a2c610df279d001a8f61faa3eb7f98ca6196b Mon Sep 17 00:00:00 2001
From: Rafael Wysocki <rjw@rjwysocki.net>
Date: Fri, 8 Sep 2017 01:20:54 +0200
Subject: [PATCH 045/391] ACPI / PMIC: Add code reviewers to MAINTAINERS

Andy and Mika review code changes under drivers/acpi/pmic/ on
a regular basis and I rely on their help with that, so add them
as code reviwewers for that part of the kernel.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 MAINTAINERS | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 2281af4b41b6..fa4e916b72e9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -352,6 +352,18 @@ L:	linux-acpi@vger.kernel.org
 S:	Maintained
 F:	drivers/acpi/arm64
 
+ACPI PMIC DRIVERS
+M:	"Rafael J. Wysocki" <rjw@rjwysocki.net>
+M:	Len Brown <lenb@kernel.org>
+R:	Andy Shevchenko <andy@infradead.org>
+R:	Mika Westerberg <mika.westerberg@linux.intel.com>
+L:	linux-acpi@vger.kernel.org
+Q:	https://patchwork.kernel.org/project/linux-acpi/list/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+B:	https://bugzilla.kernel.org
+S:	Supported
+F:	drivers/acpi/pmic/
+
 ACPI THERMAL DRIVER
 M:	Zhang Rui <rui.zhang@intel.com>
 L:	linux-acpi@vger.kernel.org

From 41ba8bd0829f5c210715ece8b0f699bed0da8eb5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= <jschoenh@amazon.de>
Date: Tue, 5 Sep 2017 23:14:29 +0200
Subject: [PATCH 046/391] PM / QoS: Use the correct variable to check the QoS
 request type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use the actual function argument for the validation of the request type,
instead of the type field in a fresh (supposedly zero-initialized)
request structure.

Signed-off-by: Jan H. Schönherr <jschoenh@amazon.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/qos.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c
index f850daeffba4..277d43a83f53 100644
--- a/drivers/base/power/qos.c
+++ b/drivers/base/power/qos.c
@@ -277,11 +277,11 @@ void dev_pm_qos_constraints_destroy(struct device *dev)
 	mutex_unlock(&dev_pm_qos_sysfs_mtx);
 }
 
-static bool dev_pm_qos_invalid_request(struct device *dev,
-				       struct dev_pm_qos_request *req)
+static bool dev_pm_qos_invalid_req_type(struct device *dev,
+					enum dev_pm_qos_req_type type)
 {
-	return !req || (req->type == DEV_PM_QOS_LATENCY_TOLERANCE
-			&& !dev->power.set_latency_tolerance);
+	return type == DEV_PM_QOS_LATENCY_TOLERANCE &&
+	       !dev->power.set_latency_tolerance;
 }
 
 static int __dev_pm_qos_add_request(struct device *dev,
@@ -290,7 +290,7 @@ static int __dev_pm_qos_add_request(struct device *dev,
 {
 	int ret = 0;
 
-	if (!dev || dev_pm_qos_invalid_request(dev, req))
+	if (!dev || !req || dev_pm_qos_invalid_req_type(dev, type))
 		return -EINVAL;
 
 	if (WARN(dev_pm_qos_request_active(req),

From 73600b619bf8b9803a0610624a0e11f5ed8f761e Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe.montjoie@gmail.com>
Date: Sat, 2 Sep 2017 10:49:38 +0200
Subject: [PATCH 047/391] mtd: nand: remove unused blockmask variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fix the following build warning:
drivers/mtd/nand/nand_base.c:2671:30: attention : variable ‘blockmask’ set but not used [-Wunused-but-set-variable]

Fixes: 0b4773fd1649 ("mtd: nand: Drop unused cached programming support")
Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_base.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index bcc8cef1c615..12edaae17d81 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2668,7 +2668,7 @@ static uint8_t *nand_fill_oob(struct mtd_info *mtd, uint8_t *oob, size_t len,
 static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
 			     struct mtd_oob_ops *ops)
 {
-	int chipnr, realpage, page, blockmask, column;
+	int chipnr, realpage, page, column;
 	struct nand_chip *chip = mtd_to_nand(mtd);
 	uint32_t writelen = ops->len;
 
@@ -2704,7 +2704,6 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
 
 	realpage = (int)(to >> chip->page_shift);
 	page = realpage & chip->pagemask;
-	blockmask = (1 << (chip->phys_erase_shift - chip->page_shift)) - 1;
 
 	/* Invalidate the page cache, when we write to the cached page */
 	if (to <= ((loff_t)chip->pagebuf << chip->page_shift) &&

From fe9c1c9555a529bf678148f719c1b9f1730f68a3 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 14 Sep 2017 14:38:58 +0200
Subject: [PATCH 048/391] xen: don't compile pv-specific parts if XEN_PV isn't
 configured

xenbus_client.c contains some functions specific for pv guests.
Enclose them with #ifdef CONFIG_XEN_PV to avoid compiling them when
they are not needed (e.g. on ARM).

Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 drivers/xen/xenbus/xenbus_client.c | 130 +++++++++++++++--------------
 1 file changed, 67 insertions(+), 63 deletions(-)

diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 82a8866758ee..a1c17000129b 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -519,64 +519,6 @@ static int __xenbus_map_ring(struct xenbus_device *dev,
 	return err;
 }
 
-static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
-				     grant_ref_t *gnt_refs,
-				     unsigned int nr_grefs,
-				     void **vaddr)
-{
-	struct xenbus_map_node *node;
-	struct vm_struct *area;
-	pte_t *ptes[XENBUS_MAX_RING_GRANTS];
-	phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
-	int err = GNTST_okay;
-	int i;
-	bool leaked;
-
-	*vaddr = NULL;
-
-	if (nr_grefs > XENBUS_MAX_RING_GRANTS)
-		return -EINVAL;
-
-	node = kzalloc(sizeof(*node), GFP_KERNEL);
-	if (!node)
-		return -ENOMEM;
-
-	area = alloc_vm_area(XEN_PAGE_SIZE * nr_grefs, ptes);
-	if (!area) {
-		kfree(node);
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < nr_grefs; i++)
-		phys_addrs[i] = arbitrary_virt_to_machine(ptes[i]).maddr;
-
-	err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles,
-				phys_addrs,
-				GNTMAP_host_map | GNTMAP_contains_pte,
-				&leaked);
-	if (err)
-		goto failed;
-
-	node->nr_handles = nr_grefs;
-	node->pv.area = area;
-
-	spin_lock(&xenbus_valloc_lock);
-	list_add(&node->next, &xenbus_valloc_pages);
-	spin_unlock(&xenbus_valloc_lock);
-
-	*vaddr = area->addr;
-	return 0;
-
-failed:
-	if (!leaked)
-		free_vm_area(area);
-	else
-		pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs);
-
-	kfree(node);
-	return err;
-}
-
 struct map_ring_valloc_hvm
 {
 	unsigned int idx;
@@ -725,6 +667,65 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
 }
 EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
 
+#ifdef CONFIG_XEN_PV
+static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
+				     grant_ref_t *gnt_refs,
+				     unsigned int nr_grefs,
+				     void **vaddr)
+{
+	struct xenbus_map_node *node;
+	struct vm_struct *area;
+	pte_t *ptes[XENBUS_MAX_RING_GRANTS];
+	phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS];
+	int err = GNTST_okay;
+	int i;
+	bool leaked;
+
+	*vaddr = NULL;
+
+	if (nr_grefs > XENBUS_MAX_RING_GRANTS)
+		return -EINVAL;
+
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
+	area = alloc_vm_area(XEN_PAGE_SIZE * nr_grefs, ptes);
+	if (!area) {
+		kfree(node);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < nr_grefs; i++)
+		phys_addrs[i] = arbitrary_virt_to_machine(ptes[i]).maddr;
+
+	err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles,
+				phys_addrs,
+				GNTMAP_host_map | GNTMAP_contains_pte,
+				&leaked);
+	if (err)
+		goto failed;
+
+	node->nr_handles = nr_grefs;
+	node->pv.area = area;
+
+	spin_lock(&xenbus_valloc_lock);
+	list_add(&node->next, &xenbus_valloc_pages);
+	spin_unlock(&xenbus_valloc_lock);
+
+	*vaddr = area->addr;
+	return 0;
+
+failed:
+	if (!leaked)
+		free_vm_area(area);
+	else
+		pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs);
+
+	kfree(node);
+	return err;
+}
+
 static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
 {
 	struct xenbus_map_node *node;
@@ -788,6 +789,12 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
 	return err;
 }
 
+static const struct xenbus_ring_ops ring_ops_pv = {
+	.map = xenbus_map_ring_valloc_pv,
+	.unmap = xenbus_unmap_ring_vfree_pv,
+};
+#endif
+
 struct unmap_ring_vfree_hvm
 {
 	unsigned int idx;
@@ -916,11 +923,6 @@ enum xenbus_state xenbus_read_driver_state(const char *path)
 }
 EXPORT_SYMBOL_GPL(xenbus_read_driver_state);
 
-static const struct xenbus_ring_ops ring_ops_pv = {
-	.map = xenbus_map_ring_valloc_pv,
-	.unmap = xenbus_unmap_ring_vfree_pv,
-};
-
 static const struct xenbus_ring_ops ring_ops_hvm = {
 	.map = xenbus_map_ring_valloc_hvm,
 	.unmap = xenbus_unmap_ring_vfree_hvm,
@@ -928,8 +930,10 @@ static const struct xenbus_ring_ops ring_ops_hvm = {
 
 void __init xenbus_ring_ops_init(void)
 {
+#ifdef CONFIG_XEN_PV
 	if (!xen_feature(XENFEAT_auto_translated_physmap))
 		ring_ops = &ring_ops_pv;
 	else
+#endif
 		ring_ops = &ring_ops_hvm;
 }

From b0ade85165b3caeb0cd908cffe5921a39f25c243 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 10 Sep 2017 13:41:41 +0200
Subject: [PATCH 049/391] netfilter: nat: Do not use ARRAY_SIZE() on spinlocks
 to fix zero div
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If no spinlock debugging options (CONFIG_GENERIC_LOCKBREAK,
CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_LOCK_ALLOC) are enabled on a UP
platform (e.g. m68k defconfig), arch_spinlock_t is an empty struct,
hence using ARRAY_SIZE(nf_nat_locks) causes a division by zero:

    net/netfilter/nf_nat_core.c: In function ‘nf_nat_setup_info’:
    net/netfilter/nf_nat_core.c:432: warning: division by zero
    net/netfilter/nf_nat_core.c: In function ‘__nf_nat_cleanup_conntrack’:
    net/netfilter/nf_nat_core.c:535: warning: division by zero
    net/netfilter/nf_nat_core.c:537: warning: division by zero
    net/netfilter/nf_nat_core.c: In function ‘nf_nat_init’:
    net/netfilter/nf_nat_core.c:810: warning: division by zero
    net/netfilter/nf_nat_core.c:811: warning: division by zero
    net/netfilter/nf_nat_core.c:824: warning: division by zero

Fix this by using the CONNTRACK_LOCKS definition instead.

Suggested-by: Florian Westphal <fw@strlen.de>
Fixes: 8073e960a03bf7b5 ("netfilter: nat: use keyed locks")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_nat_core.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index f393a7086025..af8345fc4fbd 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -429,7 +429,7 @@ nf_nat_setup_info(struct nf_conn *ct,
 
 		srchash = hash_by_src(net,
 				      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-		lock = &nf_nat_locks[srchash % ARRAY_SIZE(nf_nat_locks)];
+		lock = &nf_nat_locks[srchash % CONNTRACK_LOCKS];
 		spin_lock_bh(lock);
 		hlist_add_head_rcu(&ct->nat_bysource,
 				   &nf_nat_bysource[srchash]);
@@ -532,9 +532,9 @@ static void __nf_nat_cleanup_conntrack(struct nf_conn *ct)
 	unsigned int h;
 
 	h = hash_by_src(nf_ct_net(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	spin_lock_bh(&nf_nat_locks[h % ARRAY_SIZE(nf_nat_locks)]);
+	spin_lock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
 	hlist_del_rcu(&ct->nat_bysource);
-	spin_unlock_bh(&nf_nat_locks[h % ARRAY_SIZE(nf_nat_locks)]);
+	spin_unlock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
 }
 
 static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
@@ -807,8 +807,8 @@ static int __init nf_nat_init(void)
 
 	/* Leave them the same for the moment. */
 	nf_nat_htable_size = nf_conntrack_htable_size;
-	if (nf_nat_htable_size < ARRAY_SIZE(nf_nat_locks))
-		nf_nat_htable_size = ARRAY_SIZE(nf_nat_locks);
+	if (nf_nat_htable_size < CONNTRACK_LOCKS)
+		nf_nat_htable_size = CONNTRACK_LOCKS;
 
 	nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
 	if (!nf_nat_bysource)
@@ -821,7 +821,7 @@ static int __init nf_nat_init(void)
 		return ret;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(nf_nat_locks); i++)
+	for (i = 0; i < CONNTRACK_LOCKS; i++)
 		spin_lock_init(&nf_nat_locks[i]);
 
 	nf_ct_helper_expectfn_register(&follow_master_nat);

From 7f4f7dd4417d9efd038b14d39c70170db2e0baa0 Mon Sep 17 00:00:00 2001
From: Vishwanath Pai <vpai@akamai.com>
Date: Mon, 11 Sep 2017 21:52:40 +0200
Subject: [PATCH 050/391] netfilter: ipset: ipset list may return wrong member
 count for set with timeout

Simple testcase:

$ ipset create test hash:ip timeout 5
$ ipset add test 1.2.3.4
$ ipset add test 1.2.2.2
$ sleep 5

$ ipset l
Name: test
Type: hash:ip
Revision: 5
Header: family inet hashsize 1024 maxelem 65536 timeout 5
Size in memory: 296
References: 0
Number of entries: 2
Members:

We return "Number of entries: 2" but no members are listed. That is
because mtype_list runs "ip_set_timeout_expired" and does not list the
expired entries, but set->elements is never upated (until mtype_gc
cleans it up later).

Reviewed-by: Joshua Hunt <johunt@akamai.com>
Signed-off-by: Vishwanath Pai <vpai@akamai.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipset/ip_set_hash_gen.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index f236c0bc7b3f..51063d9ed0f7 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -1041,12 +1041,24 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 static int
 mtype_head(struct ip_set *set, struct sk_buff *skb)
 {
-	const struct htype *h = set->data;
+	struct htype *h = set->data;
 	const struct htable *t;
 	struct nlattr *nested;
 	size_t memsize;
 	u8 htable_bits;
 
+	/* If any members have expired, set->elements will be wrong
+	 * mytype_expire function will update it with the right count.
+	 * we do not hold set->lock here, so grab it first.
+	 * set->elements can still be incorrect in the case of a huge set,
+	 * because elements might time out during the listing.
+	 */
+	if (SET_WITH_TIMEOUT(set)) {
+		spin_lock_bh(&set->lock);
+		mtype_expire(set, h);
+		spin_unlock_bh(&set->lock);
+	}
+
 	rcu_read_lock_bh();
 	t = rcu_dereference_bh_nfnl(h->table);
 	memsize = mtype_ahash_memsize(h, t) + set->ext_size;

From 64cfcaed7b25f69d8b7a091a23961f50c6788a66 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20D=C3=ADaz?= <daniel.diaz@linaro.org>
Date: Fri, 7 Jul 2017 10:27:06 -0500
Subject: [PATCH 051/391] selftests: net: More graceful finding of `ip'.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ip tool might be provided by another package (such as
Busybox), not necessarily implementing the -Version switch.
Trying an actual usage (`ip link show') might be a better
test that would work with all implementations of `ip'.

Signed-off-by: Daniel Díaz <daniel.diaz@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/net/netdevice.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh
index 4e00568d70c2..90cb903c3381 100755
--- a/tools/testing/selftests/net/netdevice.sh
+++ b/tools/testing/selftests/net/netdevice.sh
@@ -178,7 +178,7 @@ if [ "$(id -u)" -ne 0 ];then
 	exit 0
 fi
 
-ip -Version 2>/dev/null >/dev/null
+ip link show 2>/dev/null >/dev/null
 if [ $? -ne 0 ];then
 	echo "SKIP: Could not run test without the ip tool"
 	exit 0

From 96e5335859e30fa2bb2da8befc86292c3c1a73dd Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.vnet.ibm.com>
Date: Mon, 11 Sep 2017 12:49:01 +0200
Subject: [PATCH 052/391] tools: fix testing/selftests/sigaltstack for s390x
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On s390x the compilation of the file sas.c in directory
tools/testing/selftests/sigaltstack fails with this error message:

   root@s35lp76 testing]# make selftests/sigaltstack/sas
   cc     selftests/sigaltstack/sas.c   -o selftests/sigaltstack/sas
   selftests/sigaltstack/sas.c: In function ‘my_usr1’:
   selftests/sigaltstack/sas.c:42:25: error: invalid register name for ‘sp’
     register unsigned long sp asm("sp");
                         ^~
   <builtin>: recipe for target 'selftests/sigaltstack/sas' failed
   make: *** [selftests/sigaltstack/sas] Error 1
   [root@s35lp76 testing]#

On s390x the stack pointer is register r15, the register name "sp"
is unknown.
Make this line platform dependend and use register r15.

With this patch the compilation and test succeeds:

   [root@s35lp76 testing]# ./selftests/sigaltstack/sas
   TAP version 13
   ok 1 Initial sigaltstack state was SS_DISABLE
   # [RUN]	signal USR1
   ok 2 sigaltstack is disabled in sighandler
   # [RUN]	switched to user ctx
   # [RUN]	signal USR2
   # [OK]	Stack preserved
   ok 3 sigaltstack is still SS_AUTODISARM after signal
   Pass 3 Fail 0 Xfail 0 Xpass 0 Skip 0 Error 0
   1..3
   [root@s35lp76 testing]#

Signed-off-by: Thomas Richter <tmricht@linux.vnet.ibm.com>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/sigaltstack/sas.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c
index 7d406c3973ba..97bb150837df 100644
--- a/tools/testing/selftests/sigaltstack/sas.c
+++ b/tools/testing/selftests/sigaltstack/sas.c
@@ -39,7 +39,11 @@ void my_usr1(int sig, siginfo_t *si, void *u)
 	stack_t stk;
 	struct stk_data *p;
 
+#if __s390x__
+	register unsigned long sp asm("%15");
+#else
 	register unsigned long sp asm("sp");
+#endif
 
 	if (sp < (unsigned long)sstack ||
 			sp >= (unsigned long)sstack + SIGSTKSZ) {

From 172a8ca880f1c8b01bc4d1e0239bcb293ef65e0b Mon Sep 17 00:00:00 2001
From: Fathi Boudra <fathi.boudra@linaro.org>
Date: Thu, 29 Jun 2017 12:39:53 +0300
Subject: [PATCH 053/391] selftests: breakpoints: re-order TEST_GEN_PROGS
 targets

breakpoint_test can fail on arm64 with older/unpatched glibc:

 breakpoint_test_arm64.c: In function 'run_test':
 breakpoint_test_arm64.c:170:25: error: 'TRAP_HWBKPT' undeclared (first use
 in this function)

due to glibc missing several of the TRAP_* constants in the userspace
definitions. Specifically TRAP_BRANCH and TRAP_HWBKPT.
See https://sourceware.org/bugzilla/show_bug.cgi?id=21286

It prevents to build step_after_suspend_test afterward, since make won't
continue.

We still want to be able to build and run the test, independently of
breakpoint_test_arm64 build failure. Re-order TEST_GEN_PROGS to be able to
build step_after_suspend_test first.

Signed-off-by: Fathi Boudra <fathi.boudra@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/breakpoints/Makefile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile
index 6b214b7b10fb..247b0a1899d7 100644
--- a/tools/testing/selftests/breakpoints/Makefile
+++ b/tools/testing/selftests/breakpoints/Makefile
@@ -2,14 +2,14 @@
 uname_M := $(shell uname -m 2>/dev/null || echo not)
 ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
 
+TEST_GEN_PROGS := step_after_suspend_test
+
 ifeq ($(ARCH),x86)
-TEST_GEN_PROGS := breakpoint_test
+TEST_GEN_PROGS += breakpoint_test
 endif
 ifneq (,$(filter $(ARCH),aarch64 arm64))
-TEST_GEN_PROGS := breakpoint_test_arm64
+TEST_GEN_PROGS += breakpoint_test_arm64
 endif
 
-TEST_GEN_PROGS += step_after_suspend_test
-
 include ../lib.mk
 

From 67b2e30eb7b346db60afe91b0927738fb604d0a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20D=C3=ADaz?= <daniel.diaz@linaro.org>
Date: Thu, 17 Aug 2017 10:55:30 -0500
Subject: [PATCH 054/391] selftests: intel_pstate: build only on x86
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These tests are only for x86, so don't try to build or run
them on other architectures.

Signed-off-by: Daniel Díaz <daniel.diaz@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/intel_pstate/Makefile | 2 ++
 tools/testing/selftests/intel_pstate/run.sh   | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile
index 849a90ffe8dd..a97e24edde39 100644
--- a/tools/testing/selftests/intel_pstate/Makefile
+++ b/tools/testing/selftests/intel_pstate/Makefile
@@ -1,7 +1,9 @@
 CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE
 LDLIBS := $(LDLIBS) -lm
 
+ifeq (,$(filter $(ARCH),x86))
 TEST_GEN_FILES := msr aperf
+endif
 
 TEST_PROGS := run.sh
 
diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh
index 7868c106b8b1..1b4b8302dfc2 100755
--- a/tools/testing/selftests/intel_pstate/run.sh
+++ b/tools/testing/selftests/intel_pstate/run.sh
@@ -29,6 +29,11 @@
 
 EVALUATE_ONLY=0
 
+if ! uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ | grep -q x86; then
+	echo "$0 # Skipped: Test can only run on x86 architectures."
+	exit 0
+fi
+
 max_cpus=$(($(nproc)-1))
 
 # compile programs

From 6f0003363a13be699ba945cfa4074193e04fbea5 Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Fri, 8 Sep 2017 14:01:18 +0200
Subject: [PATCH 055/391] selftests/intel_pstate: No need to compile test progs
 in the run script

Both test programs are being compiled by make, so no need to compile both
programs in the runner script.
This resolves an error when installing all selftests via make install
and run them in a different environemnt.

Running tests in intel_pstate
========================================
./run.sh: line 35: gcc: command not found
Problem compiling aperf.c.

Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/intel_pstate/run.sh | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh
index 1b4b8302dfc2..d3ab48f91cd6 100755
--- a/tools/testing/selftests/intel_pstate/run.sh
+++ b/tools/testing/selftests/intel_pstate/run.sh
@@ -36,12 +36,6 @@ fi
 
 max_cpus=$(($(nproc)-1))
 
-# compile programs
-gcc aperf.c -Wall -D_GNU_SOURCE -o aperf  -lm
-[ $? -ne 0 ] && echo "Problem compiling aperf.c." && exit 1
-gcc -o msr msr.c -lm
-[ $? -ne 0 ] && echo "Problem compiling msr.c." && exit 1
-
 function run_test () {
 
 	file_ext=$1

From 56a268cd4a410081d477913d51e044039d3a8834 Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Fri, 8 Sep 2017 13:19:23 +0200
Subject: [PATCH 056/391] selftests/bpf: Make bpf_util work on uniprocessor
 systems

The current implementation fails to work on uniprocessor systems.
Fix the parser to also handle the uniprocessor case.

Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/bpf/bpf_util.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index 20ecbaa0d85d..6c53a8906eff 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -12,6 +12,7 @@ static inline unsigned int bpf_num_possible_cpus(void)
 	unsigned int start, end, possible_cpus = 0;
 	char buff[128];
 	FILE *fp;
+	int n;
 
 	fp = fopen(fcpu, "r");
 	if (!fp) {
@@ -20,17 +21,17 @@ static inline unsigned int bpf_num_possible_cpus(void)
 	}
 
 	while (fgets(buff, sizeof(buff), fp)) {
-		if (sscanf(buff, "%u-%u", &start, &end) == 2) {
-			possible_cpus = start == 0 ? end + 1 : 0;
-			break;
+		n = sscanf(buff, "%u-%u", &start, &end);
+		if (n == 0) {
+			printf("Failed to retrieve # possible CPUs!\n");
+			exit(1);
+		} else if (n == 1) {
+			end = start;
 		}
+		possible_cpus = start == 0 ? end + 1 : 0;
+		break;
 	}
-
 	fclose(fp);
-	if (!possible_cpus) {
-		printf("Failed to retrieve # possible CPUs!\n");
-		exit(1);
-	}
 
 	return possible_cpus;
 }

From 84c06566cfb84e5f6aed9582d4570df8406700c9 Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Fri, 8 Sep 2017 14:01:17 +0200
Subject: [PATCH 057/391] selftests/ftrace: multiple_kprobes: Also check for
 support

The multiple_kprobes test case fails to check for KPROBE_EVENT support.
Add the check to prevent a false test result.

Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 .../testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc  | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
index 2a1cb9908746..a4fd4c851a5b 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
@@ -1,6 +1,8 @@
 #!/bin/sh
 # description: Register/unregister many kprobe events
 
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
 # ftrace fentry skip size depends on the machine architecture.
 # Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc64le
 case `uname -m` in

From 63ecc3d9436f8012e49dc846d6cb0a85a3433517 Mon Sep 17 00:00:00 2001
From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Date: Wed, 13 Sep 2017 19:30:51 -0600
Subject: [PATCH 058/391] udpv6: Fix the checksum computation when HW checksum
 does not apply

While trying an ESP transport mode encryption for UDPv6 packets of
datagram size 1436 with MTU 1500, checksum error was observed in
the secondary fragment.

This error occurs due to the UDP payload checksum being missed out
when computing the full checksum for these packets in
udp6_hwcsum_outgoing().

Fixes: d39d938c8228 ("ipv6: Introduce udpv6_send_skb()")
Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e2ecfb137297..40d7234c27b9 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1015,6 +1015,7 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
 		 */
 		offset = skb_transport_offset(skb);
 		skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
+		csum = skb->csum;
 
 		skb->ip_summed = CHECKSUM_NONE;
 

From 265698d7e6132a2d41471135534f4f36ad15b09c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 18 Sep 2017 22:46:36 +0200
Subject: [PATCH 059/391] nl80211: fix null-ptr dereference on invalid mesh
 configuration

If TX rates are specified during mesh join, the channel must
also be specified. Check the channel pointer to avoid a null
pointer dereference if it isn't.

Reported-by: Jouni Malinen <j@w1.fi>
Fixes: 8564e38206de ("cfg80211: add checks for beacon rate, extend to mesh")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index fbd5593e88cb..690874293cfc 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -9987,6 +9987,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
 		if (err)
 			return err;
 
+		if (!setup.chandef.chan)
+			return -EINVAL;
+
 		err = validate_beacon_tx_rate(rdev, setup.chandef.chan->band,
 					      &setup.beacon_rate);
 		if (err)

From 76cc0d3282d4b933fa144fa41fbc5318e0fdca24 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 15 Sep 2017 12:00:07 +0800
Subject: [PATCH 060/391] ip6_gre: skb_push ipv6hdr before packing the header
 in ip6gre_header

Now in ip6gre_header before packing the ipv6 header, it skb_push t->hlen
which only includes encap_hlen + tun_hlen. It means greh and inner header
would be over written by ipv6 stuff and ipv6h might have no chance to set
up.

Jianlin found this issue when using remote any on ip6_gre, the packets he
captured on gre dev are truncated:

22:50:26.210866 Out ethertype IPv6 (0x86dd), length 120: truncated-ip6 -\
8128 bytes missing!(flowlabel 0x92f40, hlim 0, next-header Options (0)  \
payload length: 8192) ::1:2000:0 > ::1:0:86dd: HBH [trunc] ip-proto-128 \
8184

It should also skb_push ipv6hdr so that ipv6h points to the right position
to set ipv6 stuff up.

This patch is to skb_push hlen + sizeof(*ipv6h) and also fix some indents
in ip6gre_header.

Fixes: c12b395a4664 ("gre: Support GRE over IPv6")
Reported-by: Jianlin Shi <jishi@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_gre.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index b7a72d409334..20f66f4c9460 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -940,24 +940,25 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
 }
 
 static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
-			unsigned short type,
-			const void *daddr, const void *saddr, unsigned int len)
+			 unsigned short type, const void *daddr,
+			 const void *saddr, unsigned int len)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
-	struct ipv6hdr *ipv6h = skb_push(skb, t->hlen);
-	__be16 *p = (__be16 *)(ipv6h+1);
+	struct ipv6hdr *ipv6h;
+	__be16 *p;
 
-	ip6_flow_hdr(ipv6h, 0,
-		     ip6_make_flowlabel(dev_net(dev), skb,
-					t->fl.u.ip6.flowlabel, true,
-					&t->fl.u.ip6));
+	ipv6h = skb_push(skb, t->hlen + sizeof(*ipv6h));
+	ip6_flow_hdr(ipv6h, 0, ip6_make_flowlabel(dev_net(dev), skb,
+						  t->fl.u.ip6.flowlabel,
+						  true, &t->fl.u.ip6));
 	ipv6h->hop_limit = t->parms.hop_limit;
 	ipv6h->nexthdr = NEXTHDR_GRE;
 	ipv6h->saddr = t->parms.laddr;
 	ipv6h->daddr = t->parms.raddr;
 
-	p[0]		= t->parms.o_flags;
-	p[1]		= htons(type);
+	p = (__be16 *)(ipv6h + 1);
+	p[0] = t->parms.o_flags;
+	p[1] = htons(type);
 
 	/*
 	 *	Set the source hardware address.

From 7b4dc3c0da0d66e7b20a826c537d41bb73e4df54 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@intel.com>
Date: Fri, 18 Aug 2017 17:49:58 +0800
Subject: [PATCH 061/391] drm/i915/gvt: Fix incorrect PCI BARs reporting

Looking at our virtual PCI device, we can see surprising Region 4 and Region 5.
00:10.0 VGA compatible controller: Intel Corporation Sky Lake Integrated Graphics (rev 06) (prog-if 00 [VGA controller])
        ....
        Region 0: Memory at 140000000 (64-bit, non-prefetchable) [size=16M]
        Region 2: Memory at 180000000 (64-bit, prefetchable) [size=1G]
        Region 4: Memory at <ignored> (32-bit, non-prefetchable)
        Region 5: Memory at <ignored> (32-bit, non-prefetchable)
        Expansion ROM at febd6000 [disabled] [size=2K]

The fact is that we only implemented BAR0 and BAR2. Surprising Region 4 and
Region 5 are shown because we report their size as 0xffffffff. They should
report size 0 instead.

BTW, the physical GPU has a PIO BAR. GVTg hasn't implemented PIO access, so
we ignored this BAR for vGPU device.

v2: fix BAR size value calculation.

Link: https://bugzilla.redhat.com/show_bug.cgi?id=1458032
Signed-off-by: Changbin Du <changbin.du@intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
(cherry picked from commit f1751362d6357a90bc6e53176cec715ff2dbed74)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/gvt/cfg_space.c | 113 ++++++++++++---------------
 1 file changed, 48 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c
index 40af17ec6312..ff3154fe6588 100644
--- a/drivers/gpu/drm/i915/gvt/cfg_space.c
+++ b/drivers/gpu/drm/i915/gvt/cfg_space.c
@@ -197,78 +197,65 @@ static int emulate_pci_command_write(struct intel_vgpu *vgpu,
 static int emulate_pci_bar_write(struct intel_vgpu *vgpu, unsigned int offset,
 	void *p_data, unsigned int bytes)
 {
-	unsigned int bar_index =
-		(rounddown(offset, 8) % PCI_BASE_ADDRESS_0) / 8;
 	u32 new = *(u32 *)(p_data);
 	bool lo = IS_ALIGNED(offset, 8);
 	u64 size;
 	int ret = 0;
 	bool mmio_enabled =
 		vgpu_cfg_space(vgpu)[PCI_COMMAND] & PCI_COMMAND_MEMORY;
+	struct intel_vgpu_pci_bar *bars = vgpu->cfg_space.bar;
 
-	if (WARN_ON(bar_index >= INTEL_GVT_PCI_BAR_MAX))
-		return -EINVAL;
-
+	/*
+	 * Power-up software can determine how much address
+	 * space the device requires by writing a value of
+	 * all 1's to the register and then reading the value
+	 * back. The device will return 0's in all don't-care
+	 * address bits.
+	 */
 	if (new == 0xffffffff) {
-		/*
-		 * Power-up software can determine how much address
-		 * space the device requires by writing a value of
-		 * all 1's to the register and then reading the value
-		 * back. The device will return 0's in all don't-care
-		 * address bits.
-		 */
-		size = vgpu->cfg_space.bar[bar_index].size;
-		if (lo) {
-			new = rounddown(new, size);
-		} else {
-			u32 val = vgpu_cfg_space(vgpu)[rounddown(offset, 8)];
-			/* for 32bit mode bar it returns all-0 in upper 32
-			 * bit, for 64bit mode bar it will calculate the
-			 * size with lower 32bit and return the corresponding
-			 * value
+		switch (offset) {
+		case PCI_BASE_ADDRESS_0:
+		case PCI_BASE_ADDRESS_1:
+			size = ~(bars[INTEL_GVT_PCI_BAR_GTTMMIO].size -1);
+			intel_vgpu_write_pci_bar(vgpu, offset,
+						size >> (lo ? 0 : 32), lo);
+			/*
+			 * Untrap the BAR, since guest hasn't configured a
+			 * valid GPA
 			 */
-			if (val & PCI_BASE_ADDRESS_MEM_TYPE_64)
-				new &= (~(size-1)) >> 32;
-			else
-				new = 0;
-		}
-		/*
-		 * Unmapp & untrap the BAR, since guest hasn't configured a
-		 * valid GPA
-		 */
-		switch (bar_index) {
-		case INTEL_GVT_PCI_BAR_GTTMMIO:
 			ret = trap_gttmmio(vgpu, false);
 			break;
-		case INTEL_GVT_PCI_BAR_APERTURE:
+		case PCI_BASE_ADDRESS_2:
+		case PCI_BASE_ADDRESS_3:
+			size = ~(bars[INTEL_GVT_PCI_BAR_APERTURE].size -1);
+			intel_vgpu_write_pci_bar(vgpu, offset,
+						size >> (lo ? 0 : 32), lo);
 			ret = map_aperture(vgpu, false);
 			break;
+		default:
+			/* Unimplemented BARs */
+			intel_vgpu_write_pci_bar(vgpu, offset, 0x0, false);
 		}
-		intel_vgpu_write_pci_bar(vgpu, offset, new, lo);
 	} else {
-		/*
-		 * Unmapp & untrap the old BAR first, since guest has
-		 * re-configured the BAR
-		 */
-		switch (bar_index) {
-		case INTEL_GVT_PCI_BAR_GTTMMIO:
-			ret = trap_gttmmio(vgpu, false);
+		switch (offset) {
+		case PCI_BASE_ADDRESS_0:
+		case PCI_BASE_ADDRESS_1:
+			/*
+			 * Untrap the old BAR first, since guest has
+			 * re-configured the BAR
+			 */
+			trap_gttmmio(vgpu, false);
+			intel_vgpu_write_pci_bar(vgpu, offset, new, lo);
+			ret = trap_gttmmio(vgpu, mmio_enabled);
 			break;
-		case INTEL_GVT_PCI_BAR_APERTURE:
-			ret = map_aperture(vgpu, false);
+		case PCI_BASE_ADDRESS_2:
+		case PCI_BASE_ADDRESS_3:
+			map_aperture(vgpu, false);
+			intel_vgpu_write_pci_bar(vgpu, offset, new, lo);
+			ret = map_aperture(vgpu, mmio_enabled);
 			break;
-		}
-		intel_vgpu_write_pci_bar(vgpu, offset, new, lo);
-		/* Track the new BAR */
-		if (mmio_enabled) {
-			switch (bar_index) {
-			case INTEL_GVT_PCI_BAR_GTTMMIO:
-				ret = trap_gttmmio(vgpu, true);
-				break;
-			case INTEL_GVT_PCI_BAR_APERTURE:
-				ret = map_aperture(vgpu, true);
-				break;
-			}
+		default:
+			intel_vgpu_write_pci_bar(vgpu, offset, new, lo);
 		}
 	}
 	return ret;
@@ -299,10 +286,7 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset,
 	}
 
 	switch (rounddown(offset, 4)) {
-	case PCI_BASE_ADDRESS_0:
-	case PCI_BASE_ADDRESS_1:
-	case PCI_BASE_ADDRESS_2:
-	case PCI_BASE_ADDRESS_3:
+	case PCI_BASE_ADDRESS_0 ... PCI_BASE_ADDRESS_5:
 		if (WARN_ON(!IS_ALIGNED(offset, 4)))
 			return -EINVAL;
 		return emulate_pci_bar_write(vgpu, offset, p_data, bytes);
@@ -344,7 +328,6 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu,
 	struct intel_gvt *gvt = vgpu->gvt;
 	const struct intel_gvt_device_info *info = &gvt->device_info;
 	u16 *gmch_ctl;
-	int i;
 
 	memcpy(vgpu_cfg_space(vgpu), gvt->firmware.cfg_space,
 	       info->cfg_space_size);
@@ -371,13 +354,13 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu,
 	 */
 	memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_1, 0, 4);
 	memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_3, 0, 4);
+	memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_4, 0, 8);
 	memset(vgpu_cfg_space(vgpu) + INTEL_GVT_PCI_OPREGION, 0, 4);
 
-	for (i = 0; i < INTEL_GVT_MAX_BAR_NUM; i++) {
-		vgpu->cfg_space.bar[i].size = pci_resource_len(
-					      gvt->dev_priv->drm.pdev, i * 2);
-		vgpu->cfg_space.bar[i].tracked = false;
-	}
+	vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_GTTMMIO].size =
+				pci_resource_len(gvt->dev_priv->drm.pdev, 0);
+	vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].size =
+				pci_resource_len(gvt->dev_priv->drm.pdev, 2);
 }
 
 /**

From 814feed316e9d15b0ae869ca729370e7630b8d13 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 10 Sep 2017 10:56:42 +0200
Subject: [PATCH 062/391] drm/i915: Fix an error handling in
 'intel_framebuffer_init()'

We should go through the error handling path to decrease the
'framebuffer_references' as done everywhere else in this function.

Fixes: 2e2adb05736c ("drm/i915: Add render decompression support")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170910085642.13673-1-christophe.jaillet@wanadoo.fr
(cherry picked from commit 37875d6b3af702425ce292de81b77faf94766616)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index f17275519484..00cd17c76fdc 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -14030,7 +14030,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 
 		if (mode_cmd->handles[i] != mode_cmd->handles[0]) {
 			DRM_DEBUG_KMS("bad plane %d handle\n", i);
-			return -EINVAL;
+			goto err;
 		}
 
 		stride_alignment = intel_fb_stride_alignment(fb, i);

From ac73661c6222faef1a97ec44f424234f165e4710 Mon Sep 17 00:00:00 2001
From: "Lee, Shawn C" <shawn.c.lee@intel.com>
Date: Tue, 12 Sep 2017 11:36:30 +0800
Subject: [PATCH 063/391] drm/i915/bxt: set min brightness from VBT

Min brightness value from vbt was missing for BXT platform.
This setting have to refer backlight ic spec to restrict
min backlight output. Without this restriction, driver would
allow to configure lower brightness value and violate
backlight ic requirement.

Fixes: 0fb890c01349 ("drm/i915/bxt: BLC implementation")
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Cooper Chiou <cooper.chiou@intel.com>
Cc: Gary C Wang <gary.c.wang@intel.com>
Signed-off-by: Shawn Lee <shawn.c.lee@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1505187390-7039-1-git-send-email-shawn.c.lee@intel.com
(cherry picked from commit c3881128cb672cf484a52fbb36b5daa9044d9168)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/intel_panel.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index a17b1de7d7e0..d4dd248ac9a8 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -1699,6 +1699,8 @@ bxt_setup_backlight(struct intel_connector *connector, enum pipe unused)
 	if (!panel->backlight.max)
 		return -ENODEV;
 
+	panel->backlight.min = get_backlight_min_vbt(connector);
+
 	val = bxt_get_backlight(connector);
 	val = intel_panel_compute_brightness(connector, val);
 	panel->backlight.level = clamp(val, panel->backlight.min,

From abeae421b03d800d33894df7fbca6d00c70c358e Mon Sep 17 00:00:00 2001
From: Uma Shankar <uma.shankar@intel.com>
Date: Tue, 5 Sep 2017 15:14:31 +0530
Subject: [PATCH 064/391] Revert "drm/i915/bxt: Disable device ready before
 shutdown command"

This reverts commit bbdf0b2ff32a ("drm/i915/bxt: Disable device ready
before shutdown command").

Disable device ready before shutdown command was added previously to
avoid a split screen issue seen on dual link DSI panels. As of now, dual
link is not supported and will need some rework in the upstream
code. For single link DSI panels, the change is not required. This will
cause failure in sending SHUTDOWN packet during disable. Hence reverting
the change. Will handle the change as part of dual link enabling in
upstream.

Fixes: bbdf0b2ff32a ("drm/i915/bxt: Disable device ready before shutdown command")
Cc: <stable@vger.kernel.org> # v4.12+
Signed-off-by: Uma Shankar <uma.shankar@intel.com>
Signed-off-by: Vidya Srinivas <vidya.srinivas@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1504604671-17237-1-git-send-email-vidya.srinivas@intel.com
(cherry picked from commit 33c8d8870c67faf3161898a56af98ac3c1c71450)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/intel_dsi.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index f0c11aec5ea5..7442891762be 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -892,8 +892,6 @@ static void intel_dsi_disable(struct intel_encoder *encoder,
 			      struct intel_crtc_state *old_crtc_state,
 			      struct drm_connector_state *old_conn_state)
 {
-	struct drm_device *dev = encoder->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
 	enum port port;
 
@@ -902,15 +900,6 @@ static void intel_dsi_disable(struct intel_encoder *encoder,
 	intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_OFF);
 	intel_panel_disable_backlight(old_conn_state);
 
-	/*
-	 * Disable Device ready before the port shutdown in order
-	 * to avoid split screen
-	 */
-	if (IS_BROXTON(dev_priv)) {
-		for_each_dsi_port(port, intel_dsi->ports)
-			I915_WRITE(MIPI_DEVICE_READY(port), 0);
-	}
-
 	/*
 	 * According to the spec we should send SHUTDOWN before
 	 * MIPI_SEQ_DISPLAY_OFF only for v3+ VBTs, but field testing

From 8c7a758873577f0d1bb3f879584338f73e6c6bc3 Mon Sep 17 00:00:00 2001
From: "Lee, Shawn C" <shawn.c.lee@intel.com>
Date: Wed, 13 Sep 2017 13:19:20 +0800
Subject: [PATCH 065/391] drm/i915/cnp: set min brightness from VBT

Min brightness value from vbt was missing for CNP platform.
This setting have to refer backlight ic spec to restrict
min backlight output. Without this restriction, driver would
allow to configure lower brightness value and violate
backlight ic requirement.

Fixes: 4c9f7086ac6d ("drm/i915/cnp: Backlight support for CNP.")
Cc: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Shawn Lee <shawn.c.lee@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1505279961-16140-1-git-send-email-shawn.c.lee@intel.com
(cherry picked from commit f44e354f857f207cd361269c5e38e1f96e0b616c)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/intel_panel.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index d4dd248ac9a8..3b1c5d783ee7 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -1737,6 +1737,8 @@ cnp_setup_backlight(struct intel_connector *connector, enum pipe unused)
 	if (!panel->backlight.max)
 		return -ENODEV;
 
+	panel->backlight.min = get_backlight_min_vbt(connector);
+
 	val = bxt_get_backlight(connector);
 	val = intel_panel_compute_brightness(connector, val);
 	panel->backlight.level = clamp(val, panel->backlight.min,

From 99df13b6ea811a63eeacb278d05a5b914ce28073 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 14 Sep 2017 17:42:13 +0100
Subject: [PATCH 066/391] drm/i915: Remove unused 'in_vbl' from
 i915_get_crtc_scanoutpos()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 1bf6ad622b9b ("drm/vblank: drop the mode argument from
drm_calc_vbltimestamp_from_scanoutpos") removed the use of in_vbl, but
did not remove the local variable. Do so now.

Fixes: 1bf6ad622b9b ("drm/vblank: drop the mode argument from drm_calc_vbltimestamp_from_scanoutpos")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170914164213.18461-1-chris@chris-wilson.co.uk
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
(cherry picked from commit e01e71fc49d4c95090a04f898a3fe788c652a04b)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index e21ce9c18b6e..b63893eeca73 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -839,7 +839,6 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
 								pipe);
 	int position;
 	int vbl_start, vbl_end, hsync_start, htotal, vtotal;
-	bool in_vbl = true;
 	unsigned long irqflags;
 
 	if (WARN_ON(!mode->crtc_clock)) {
@@ -922,8 +921,6 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
 
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 
-	in_vbl = position >= vbl_start && position < vbl_end;
-
 	/*
 	 * While in vblank, position will be negative
 	 * counting up towards 0 at vbl_end. And outside

From f2654a4781318dc7ab8d6cde66f1fa39eab980a9 Mon Sep 17 00:00:00 2001
From: Fahad Kunnathadi <fahad.kunnathadi@dexceldesigns.com>
Date: Fri, 15 Sep 2017 12:01:58 +0530
Subject: [PATCH 067/391] net: phy: Fix mask value write on gmii2rgmii
 converter speed register

To clear Speed Selection in MDIO control register(0x10),
ie, clear bits 6 and 13 to zero while keeping other bits same.
Before AND operation,The Mask value has to be perform with bitwise NOT
operation (ie, ~ operator)

This patch clears current speed selection before writing the
new speed settings to gmii2rgmii converter

Fixes: f411a6160bd4 ("net: phy: Add gmiitorgmii converter support")

Signed-off-by: Fahad Kunnathadi <fahad.kunnathadi@dexceldesigns.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/xilinx_gmii2rgmii.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c
index d15dd3938ba8..2e5150b0b8d5 100644
--- a/drivers/net/phy/xilinx_gmii2rgmii.c
+++ b/drivers/net/phy/xilinx_gmii2rgmii.c
@@ -44,7 +44,7 @@ static int xgmiitorgmii_read_status(struct phy_device *phydev)
 	priv->phy_drv->read_status(phydev);
 
 	val = mdiobus_read(phydev->mdio.bus, priv->addr, XILINX_GMII2RGMII_REG);
-	val &= XILINX_GMII2RGMII_SPEED_MASK;
+	val &= ~XILINX_GMII2RGMII_SPEED_MASK;
 
 	if (phydev->speed == SPEED_1000)
 		val |= BMCR_SPEED1000;

From 8c22dab03ad072e45060c299c70d02a4f6fc4aab Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 15 Sep 2017 15:58:33 +0800
Subject: [PATCH 068/391] ip6_tunnel: do not allow loading ip6_tunnel if ipv6
 is disabled in cmdline

If ipv6 has been disabled from cmdline since kernel started, it makes
no sense to allow users to create any ip6 tunnel. Otherwise, it could
some potential problem.

Jianlin found a kernel crash caused by this in ip6_gre when he set
ipv6.disable=1 in grub:

[  209.588865] Unable to handle kernel paging request for data at address 0x00000080
[  209.588872] Faulting instruction address: 0xc000000000a3aa6c
[  209.588879] Oops: Kernel access of bad area, sig: 11 [#1]
[  209.589062] NIP [c000000000a3aa6c] fib_rules_lookup+0x4c/0x260
[  209.589071] LR [c000000000b9ad90] fib6_rule_lookup+0x50/0xb0
[  209.589076] Call Trace:
[  209.589097] fib6_rule_lookup+0x50/0xb0
[  209.589106] rt6_lookup+0xc4/0x110
[  209.589116] ip6gre_tnl_link_config+0x214/0x2f0 [ip6_gre]
[  209.589125] ip6gre_newlink+0x138/0x3a0 [ip6_gre]
[  209.589134] rtnl_newlink+0x798/0xb80
[  209.589142] rtnetlink_rcv_msg+0xec/0x390
[  209.589151] netlink_rcv_skb+0x138/0x150
[  209.589159] rtnetlink_rcv+0x48/0x70
[  209.589169] netlink_unicast+0x538/0x640
[  209.589175] netlink_sendmsg+0x40c/0x480
[  209.589184] ___sys_sendmsg+0x384/0x4e0
[  209.589194] SyS_sendmsg+0xd4/0x140
[  209.589201] SyS_socketcall+0x3e0/0x4f0
[  209.589209] system_call+0x38/0xe0

This patch is to return -EOPNOTSUPP in ip6_tunnel_init if ipv6 has been
disabled from cmdline.

Reported-by: Jianlin Shi <jishi@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index ae73164559d5..f2f21c24915f 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -2259,6 +2259,9 @@ static int __init ip6_tunnel_init(void)
 {
 	int  err;
 
+	if (!ipv6_mod_enabled())
+		return -EOPNOTSUPP;
+
 	err = register_pernet_device(&ip6_tnl_net_ops);
 	if (err < 0)
 		goto out_pernet;

From 3ff4cbec87da48b0ec1f7b6196607b034de0c680 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Sat, 16 Sep 2017 14:02:21 +0200
Subject: [PATCH 069/391] net/sched: cls_matchall: fix crash when used with
 classful qdisc

this script, edited from Linux Advanced Routing and Traffic Control guide

tc q a dev en0 root handle 1: htb default a
tc c a dev en0 parent 1:  classid 1:1 htb rate 6mbit burst 15k
tc c a dev en0 parent 1:1 classid 1:a htb rate 5mbit ceil 6mbit burst 15k
tc c a dev en0 parent 1:1 classid 1:b htb rate 1mbit ceil 6mbit burst 15k
tc f a dev en0 parent 1:0 prio 1 $clsname $clsargs classid 1:b
ping $address -c1
tc -s c s dev en0

classifies traffic to 1:b or 1:a, depending on whether the packet matches
or not the pattern $clsargs of filter $clsname. However, when $clsname is
'matchall', a systematic crash can be observed in htb_classify(). HTB and
classful qdiscs don't assign initial value to struct tcf_result, but then
they expect it to contain valid values after filters have been run. Thus,
current 'matchall' ignores the TCA_MATCHALL_CLASSID attribute, configured
by user, and makes HTB (and classful qdiscs) dereference random pointers.

By assigning head->res to *res in mall_classify(), before the actions are
invoked, we fix this crash and enable TCA_MATCHALL_CLASSID functionality,
that had no effect on 'matchall' classifier since its first introduction.

BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1460213
Reported-by: Jiri Benc <jbenc@redhat.com>
Fixes: b87f7936a932 ("net/sched: introduce Match-all classifier")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Acked-by: Yotam Gigi <yotamg@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_matchall.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 21cc45caf842..eeac606c95ab 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -32,6 +32,7 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	if (tc_skip_sw(head->flags))
 		return -1;
 
+	*res = head->res;
 	return tcf_exts_exec(skb, &head->exts, res);
 }
 

From 51513748ddfe7a5d2812158a1e0570499b0c511c Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 16 Sep 2017 13:10:06 -0700
Subject: [PATCH 070/391] Documentation: networking: fix ASCII art in
 switchdev.txt

Fix ASCII art in Documentation/networking/switchdev.txt:

Change non-ASCII "spaces" to ASCII spaces.

Change 2 erroneous '+' characters in ASCII art to '-' (at the '*'
characters below):

line 32:
                     +--+----+----+----+-*--+----+---+  +-----+-----+
line 41:
                     +--------------+---*------------+

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/switchdev.txt | 64 +++++++++++++-------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt
index 5e40e1f68873..82236a17b5e6 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -13,42 +13,42 @@ an example setup using a data-center-class switch ASIC chip.  Other setups
 with SR-IOV or soft switches, such as OVS, are possible.
 
 
-                             User-space tools
+                             User-space tools
 
-       user space                   |
-      +-------------------------------------------------------------------+
-       kernel                       | Netlink
-                                    |
-                     +--------------+-------------------------------+
-                     |         Network stack                        |
-                     |           (Linux)                            |
-                     |                                              |
-                     +----------------------------------------------+
+       user space                   |
+      +-------------------------------------------------------------------+
+       kernel                       | Netlink
+                                    |
+                     +--------------+-------------------------------+
+                     |         Network stack                        |
+                     |           (Linux)                            |
+                     |                                              |
+                     +----------------------------------------------+
 
                            sw1p2     sw1p4     sw1p6
-                      sw1p1  +  sw1p3  +  sw1p5  +          eth1
-                        +    |    +    |    +    |            +
-                        |    |    |    |    |    |            |
-                     +--+----+----+----+-+--+----+---+  +-----+-----+
-                     |         Switch driver         |  |    mgmt   |
-                     |        (this document)        |  |   driver  |
-                     |                               |  |           |
-                     +--------------+----------------+  +-----------+
-                                    |
-       kernel                       | HW bus (eg PCI)
-      +-------------------------------------------------------------------+
-       hardware                     |
-                     +--------------+---+------------+
-                     |         Switch device (sw1)   |
-                     |  +----+                       +--------+
-                     |  |    v offloaded data path   | mgmt port
-                     |  |    |                       |
-                     +--|----|----+----+----+----+---+
-                        |    |    |    |    |    |
-                        +    +    +    +    +    +
-                       p1   p2   p3   p4   p5   p6
+                      sw1p1  +  sw1p3  +  sw1p5  +          eth1
+                        +    |    +    |    +    |            +
+                        |    |    |    |    |    |            |
+                     +--+----+----+----+----+----+---+  +-----+-----+
+                     |         Switch driver         |  |    mgmt   |
+                     |        (this document)        |  |   driver  |
+                     |                               |  |           |
+                     +--------------+----------------+  +-----------+
+                                    |
+       kernel                       | HW bus (eg PCI)
+      +-------------------------------------------------------------------+
+       hardware                     |
+                     +--------------+----------------+
+                     |         Switch device (sw1)   |
+                     |  +----+                       +--------+
+                     |  |    v offloaded data path   | mgmt port
+                     |  |    |                       |
+                     +--|----|----+----+----+----+---+
+                        |    |    |    |    |    |
+                        +    +    +    +    +    +
+                       p1   p2   p3   p4   p5   p6
 
-                             front-panel ports
+                             front-panel ports
 
 
                                     Fig 1.

From 6ce14f6416c84bd9c81777edf899b57ac5000c87 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 19 Sep 2017 01:49:02 +0200
Subject: [PATCH 071/391] ACPI / watchdog: properly initialize resources

We copy a local resource structure into a list, but only
initialize some of its members, as pointed out by gcc-4.4:

drivers/acpi/acpi_watchdog.c: In function 'acpi_watchdog_init':
drivers/acpi/acpi_watchdog.c:105: error: 'res.child' may be used uninitialized in this function
drivers/acpi/acpi_watchdog.c:105: error: 'res.sibling' may be used uninitialized in this function
drivers/acpi/acpi_watchdog.c:105: error: 'res.parent' may be used uninitialized in this function
drivers/acpi/acpi_watchdog.c:105: error: 'res.desc' may be used uninitialized in this function
drivers/acpi/acpi_watchdog.c:105: error: 'res.name' may be used uninitialized in this function

Newer compilers can presumably optimize the uninitialized access
away entirely and don't warn at all, but rely on the kzalloc()
to zero the structure first. This adds an explicit initialization
to force consistent behavior.

Fixes: 058dfc767008 (ACPI / watchdog: Add support for WDAT hardware watchdog)
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_watchdog.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c
index bf22c29d2517..11b113f8e367 100644
--- a/drivers/acpi/acpi_watchdog.c
+++ b/drivers/acpi/acpi_watchdog.c
@@ -66,7 +66,7 @@ void __init acpi_watchdog_init(void)
 	for (i = 0; i < wdat->entries; i++) {
 		const struct acpi_generic_address *gas;
 		struct resource_entry *rentry;
-		struct resource res;
+		struct resource res = {};
 		bool found;
 
 		gas = &entries[i].register_region;

From 1e3c5ec66119783440ed211ae527674651affa9b Mon Sep 17 00:00:00 2001
From: Sathya Perla <sathya.perla@broadcom.com>
Date: Mon, 18 Sep 2017 17:05:37 +0530
Subject: [PATCH 072/391] bnxt_en: check for ingress qdisc in flower offload

Check for ingress-only qdisc for flower offload, as other qdiscs
are not supported for flower offload.

Suggested-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index ccd699fb2d70..7dd3d131043a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -750,6 +750,10 @@ int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
 {
 	int rc = 0;
 
+	if (!is_classid_clsact_ingress(cls_flower->common.classid) ||
+	    cls_flower->common.chain_index)
+		return -EOPNOTSUPP;
+
 	switch (cls_flower->command) {
 	case TC_CLSFLOWER_REPLACE:
 		rc = bnxt_tc_add_flow(bp, src_fid, cls_flower);

From 582db7e0c4c2fc5bb4f932f268035883385e3692 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Mon, 18 Sep 2017 15:03:46 +0200
Subject: [PATCH 073/391] bpf: devmap: pass on return value of
 bpf_map_precharge_memlock

If bpf_map_precharge_memlock in dev_map_alloc, -ENOMEM is returned
regardless of the actual error produced by bpf_map_precharge_memlock.
Fix it by passing on the error returned by bpf_map_precharge_memlock.

Also return -EINVAL instead of -ENOMEM if the page count overflow check
fails.

This makes dev_map_alloc match the behavior of other bpf maps' alloc
functions wrt. return values.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/devmap.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 959c9a07f318..e093d9a2c4dd 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -75,8 +75,8 @@ static u64 dev_map_bitmap_size(const union bpf_attr *attr)
 static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
 {
 	struct bpf_dtab *dtab;
+	int err = -EINVAL;
 	u64 cost;
-	int err;
 
 	/* check sanity of attributes */
 	if (attr->max_entries == 0 || attr->key_size != 4 ||
@@ -108,6 +108,8 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
 	if (err)
 		goto free_dtab;
 
+	err = -ENOMEM;
+
 	/* A per cpu bitfield with a bit per possible net device */
 	dtab->flush_needed = __alloc_percpu(dev_map_bitmap_size(attr),
 					    __alignof__(unsigned long));
@@ -128,7 +130,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
 free_dtab:
 	free_percpu(dtab->flush_needed);
 	kfree(dtab);
-	return ERR_PTR(-ENOMEM);
+	return ERR_PTR(err);
 }
 
 static void dev_map_free(struct bpf_map *map)

From 5e75fe3927559505682b0053b5745e3f42d66e8c Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 18 Sep 2017 17:19:10 -0700
Subject: [PATCH 074/391] tools/testing/nvdimm: disable labels for nfit_test.1

Improve coverage of NVDIMM-N test scenarios by providing a test bus
incapable of label operations.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/nfit.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index d20791c3f499..bef419d4266d 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -1527,9 +1527,6 @@ static void nfit_test1_setup(struct nfit_test *t)
 	set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
 	set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
 	set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
-	set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en);
-	set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
-	set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
 }
 
 static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,

From 4c7124413aa759b8ea0b90cd39177e525396e662 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Mon, 18 Sep 2017 11:05:16 -0700
Subject: [PATCH 075/391] tcp: remove two unused functions

remove tcp_may_send_now and tcp_snd_test that are no longer used

Fixes: 840a3cbe8969 ("tcp: remove forward retransmit feature")
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  1 -
 net/ipv4/tcp_output.c | 34 ----------------------------------
 2 files changed, 35 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index b510f284427a..3bc910a9bfc6 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -544,7 +544,6 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
 		     int min_tso_segs);
 void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
 			       int nonagle);
-bool tcp_may_send_now(struct sock *sk);
 int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
 int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
 void tcp_retransmit_timer(struct sock *sk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1c839c99114c..517d737059d1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1806,40 +1806,6 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
 	return !after(end_seq, tcp_wnd_end(tp));
 }
 
-/* This checks if the data bearing packet SKB (usually tcp_send_head(sk))
- * should be put on the wire right now.  If so, it returns the number of
- * packets allowed by the congestion window.
- */
-static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
-				 unsigned int cur_mss, int nonagle)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	unsigned int cwnd_quota;
-
-	tcp_init_tso_segs(skb, cur_mss);
-
-	if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
-		return 0;
-
-	cwnd_quota = tcp_cwnd_test(tp, skb);
-	if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
-		cwnd_quota = 0;
-
-	return cwnd_quota;
-}
-
-/* Test if sending is allowed right now. */
-bool tcp_may_send_now(struct sock *sk)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *skb = tcp_send_head(sk);
-
-	return skb &&
-		tcp_snd_test(sk, skb, tcp_current_mss(sk),
-			     (tcp_skb_is_last(sk, skb) ?
-			      tp->nonagle : TCP_NAGLE_PUSH));
-}
-
 /* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
  * which is put after SKB on the list.  It is very much like
  * tcp_fragment() except that it may make several kinds of assumptions

From 33a56086712561b8b9cdc881e0317f4c36861f72 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 18 Sep 2017 14:48:58 -0700
Subject: [PATCH 076/391] libnvdimm, namespace: fix btt claim class crash

Maurice reports:

    BUG: unable to handle kernel NULL pointer dereference at 0000000000000028
    IP: holder_class_store+0x253/0x2b0 [libnvdimm]

...while trying to reconfigure an NVDIMM-N namespace into 'sector' /
'btt' mode. The crash points to this line:

    (gdb) li *(holder_class_store+0x253)
    0x7773 is in holder_class_store (drivers/nvdimm/namespace_devs.c:1420).
    1415            for (i = 0; i < nd_region->ndr_mappings; i++) {
    1416                    struct nd_mapping *nd_mapping = &nd_region->mapping[i];
    1417                    struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
    1418                    struct nd_namespace_index *nsindex;
    1419
    1420                    nsindex = to_namespace_index(ndd, ndd->ns_current);

...where we are failing because ndd is NULL due to NVDIMM-N dimms not
supporting labels.

Long story short, default to the BTTv1 format in the label-less /
NVDIMM-N case.

Fixes: 14e494542636 ("libnvdimm, btt: BTT updates for UEFI 2.7 format")
Cc: <stable@vger.kernel.org>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Reported-by: Maurice A. Saldivar <maurice.a.saldivar@hpe.com>
Tested-by: Maurice A. Saldivar <maurice.a.saldivar@hpe.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/namespace_devs.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 1427a386a033..3e4d1e7998da 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1417,6 +1417,15 @@ static int btt_claim_class(struct device *dev)
 		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
 		struct nd_namespace_index *nsindex;
 
+		/*
+		 * If any of the DIMMs do not support labels the only
+		 * possible BTT format is v1.
+		 */
+		if (!ndd) {
+			loop_bitmask = 0;
+			break;
+		}
+
 		nsindex = to_namespace_index(ndd, ndd->ns_current);
 		if (nsindex == NULL)
 			loop_bitmask |= 1;

From 129c6cda2de2a8ac44fab096152469999b727faf Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 18 Sep 2017 13:03:43 -0700
Subject: [PATCH 077/391] 8139too: revisit napi_complete_done() usage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It seems we have to be more careful in napi_complete_done()
use. This patch is not a revert, as it seems we can
avoid bug that Ville reported by moving the napi_complete_done()
test in the spinlock section.

Many thanks to Ville for detective work and all tests.

Fixes: 617f01211baf ("8139too: use napi_complete_done()")
Reported-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Tested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/8139too.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c
index ca22f2898664..d24b47b8e0b2 100644
--- a/drivers/net/ethernet/realtek/8139too.c
+++ b/drivers/net/ethernet/realtek/8139too.c
@@ -2135,11 +2135,12 @@ static int rtl8139_poll(struct napi_struct *napi, int budget)
 	if (likely(RTL_R16(IntrStatus) & RxAckBits))
 		work_done += rtl8139_rx(dev, tp, budget);
 
-	if (work_done < budget && napi_complete_done(napi, work_done)) {
+	if (work_done < budget) {
 		unsigned long flags;
 
 		spin_lock_irqsave(&tp->lock, flags);
-		RTL_W16_F(IntrMask, rtl8139_intr_mask);
+		if (napi_complete_done(napi, work_done))
+			RTL_W16_F(IntrMask, rtl8139_intr_mask);
 		spin_unlock_irqrestore(&tp->lock, flags);
 	}
 	spin_unlock(&tp->rx_lock);

From 8ecb1a29e11e24c458ee4ee59447d0ddf8274589 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 18 Sep 2017 16:31:30 -0700
Subject: [PATCH 078/391] net: systemport: Fix 64-bit statistics dependency

There are several problems with commit 10377ba7673d ("net: systemport:
Support 64bit statistics", first one got fixed in 7095c973453e ("net:
systemport: Fix 64-bit stats deadlock").

The second problem is that this specific code updates the
stats64.tx_{packets,bytes} from ndo_get_stats64() and that is what we
are returning to ethtool -S. If we are not running a tool that involves
calling ndo_get_stats64(), then we won't get updated ethtool stats.

The solution to this is to update the stats from both call sites,
factoring that into a specific function, While at it, don't just check
the sizeof() but also the type of the statistics in order to use the
64-bit stats seqlock.

Fixes: 10377ba7673d ("net: systemport: Support 64bit statistics")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bcmsysport.c | 52 +++++++++++++---------
 1 file changed, 32 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index c3c53f6cd9e6..83eec9a8c275 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -432,6 +432,27 @@ static void bcm_sysport_update_mib_counters(struct bcm_sysport_priv *priv)
 	netif_dbg(priv, hw, priv->netdev, "updated MIB counters\n");
 }
 
+static void bcm_sysport_update_tx_stats(struct bcm_sysport_priv *priv,
+					u64 *tx_bytes, u64 *tx_packets)
+{
+	struct bcm_sysport_tx_ring *ring;
+	u64 bytes = 0, packets = 0;
+	unsigned int start;
+	unsigned int q;
+
+	for (q = 0; q < priv->netdev->num_tx_queues; q++) {
+		ring = &priv->tx_rings[q];
+		do {
+			start = u64_stats_fetch_begin_irq(&priv->syncp);
+			bytes = ring->bytes;
+			packets = ring->packets;
+		} while (u64_stats_fetch_retry_irq(&priv->syncp, start));
+
+		*tx_bytes += bytes;
+		*tx_packets += packets;
+	}
+}
+
 static void bcm_sysport_get_stats(struct net_device *dev,
 				  struct ethtool_stats *stats, u64 *data)
 {
@@ -439,11 +460,16 @@ static void bcm_sysport_get_stats(struct net_device *dev,
 	struct bcm_sysport_stats64 *stats64 = &priv->stats64;
 	struct u64_stats_sync *syncp = &priv->syncp;
 	struct bcm_sysport_tx_ring *ring;
+	u64 tx_bytes = 0, tx_packets = 0;
 	unsigned int start;
 	int i, j;
 
-	if (netif_running(dev))
+	if (netif_running(dev)) {
 		bcm_sysport_update_mib_counters(priv);
+		bcm_sysport_update_tx_stats(priv, &tx_bytes, &tx_packets);
+		stats64->tx_bytes = tx_bytes;
+		stats64->tx_packets = tx_packets;
+	}
 
 	for (i =  0, j = 0; i < BCM_SYSPORT_STATS_LEN; i++) {
 		const struct bcm_sysport_stats *s;
@@ -461,12 +487,13 @@ static void bcm_sysport_get_stats(struct net_device *dev,
 			continue;
 		p += s->stat_offset;
 
-		if (s->stat_sizeof == sizeof(u64))
+		if (s->stat_sizeof == sizeof(u64) &&
+		    s->type == BCM_SYSPORT_STAT_NETDEV64) {
 			do {
 				start = u64_stats_fetch_begin_irq(syncp);
 				data[i] = *(u64 *)p;
 			} while (u64_stats_fetch_retry_irq(syncp, start));
-		else
+		} else
 			data[i] = *(u32 *)p;
 		j++;
 	}
@@ -1716,27 +1743,12 @@ static void bcm_sysport_get_stats64(struct net_device *dev,
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
 	struct bcm_sysport_stats64 *stats64 = &priv->stats64;
-	struct bcm_sysport_tx_ring *ring;
-	u64 tx_packets = 0, tx_bytes = 0;
 	unsigned int start;
-	unsigned int q;
 
 	netdev_stats_to_stats64(stats, &dev->stats);
 
-	for (q = 0; q < dev->num_tx_queues; q++) {
-		ring = &priv->tx_rings[q];
-		do {
-			start = u64_stats_fetch_begin_irq(&priv->syncp);
-			tx_bytes = ring->bytes;
-			tx_packets = ring->packets;
-		} while (u64_stats_fetch_retry_irq(&priv->syncp, start));
-
-		stats->tx_bytes += tx_bytes;
-		stats->tx_packets += tx_packets;
-	}
-
-	stats64->tx_bytes = stats->tx_bytes;
-	stats64->tx_packets = stats->tx_packets;
+	bcm_sysport_update_tx_stats(priv, &stats->tx_bytes,
+				    &stats->tx_packets);
 
 	do {
 		start = u64_stats_fetch_begin_irq(&priv->syncp);

From c8b850241559d6bad7e640075e282a83a4ad7ead Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Mon, 18 Sep 2017 12:25:22 +0200
Subject: [PATCH 079/391] s390/scm_blk: consistently use blk_status_t as error
 type

Fix these warnings found by sparse:
drivers/s390/block/scm_blk.c:257:24: warning: incorrect type in assignment (different base types)
drivers/s390/block/scm_blk.c:257:24:    expected int [signed] <noident>
drivers/s390/block/scm_blk.c:257:24:    got restricted blk_status_t [usertype] error
drivers/s390/block/scm_blk.c:420:33: warning: incorrect type in argument 2 (different base types)
drivers/s390/block/scm_blk.c:420:33:    expected restricted blk_status_t [usertype] error
drivers/s390/block/scm_blk.c:420:33:    got int [signed] <noident>

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reported-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/scm_blk.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 2e7fd966c515..eb51893c74a4 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -249,7 +249,7 @@ static void scm_request_requeue(struct scm_request *scmrq)
 static void scm_request_finish(struct scm_request *scmrq)
 {
 	struct scm_blk_dev *bdev = scmrq->bdev;
-	int *error;
+	blk_status_t *error;
 	int i;
 
 	for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) {
@@ -415,7 +415,7 @@ void scm_blk_irq(struct scm_device *scmdev, void *data, blk_status_t error)
 
 static void scm_blk_request_done(struct request *req)
 {
-	int *error = blk_mq_rq_to_pdu(req);
+	blk_status_t *error = blk_mq_rq_to_pdu(req);
 
 	blk_mq_end_request(req, *error);
 }
@@ -450,7 +450,7 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
 	atomic_set(&bdev->queued_reqs, 0);
 
 	bdev->tag_set.ops = &scm_mq_ops;
-	bdev->tag_set.cmd_size = sizeof(int);
+	bdev->tag_set.cmd_size = sizeof(blk_status_t);
 	bdev->tag_set.nr_hw_queues = nr_requests;
 	bdev->tag_set.queue_depth = nr_requests_per_io * nr_requests;
 	bdev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;

From 55fb7347579017bdb09550b4e8019447340b7004 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Thu, 14 Sep 2017 13:55:22 +0200
Subject: [PATCH 080/391] s390/cio: recover from bad paths

In some situations we don't receive notification from firmware that
a previously unusable channelpath is usable again.

Schedule recovery for devices that return from path verification
without using all potentially usable paths. The recovery thread will
periodically trigger a path verification on the affected devices.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Suggested-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Reviewed-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/device.c     | 12 +++++++++---
 drivers/s390/cio/device.h     |  1 +
 drivers/s390/cio/device_fsm.c | 12 ++++++++++++
 drivers/s390/cio/io_sch.h     |  2 ++
 4 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 489b583f263d..e5c32f4b5287 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -1225,10 +1225,16 @@ static int device_is_disconnected(struct ccw_device *cdev)
 static int recovery_check(struct device *dev, void *data)
 {
 	struct ccw_device *cdev = to_ccwdev(dev);
+	struct subchannel *sch;
 	int *redo = data;
 
 	spin_lock_irq(cdev->ccwlock);
 	switch (cdev->private->state) {
+	case DEV_STATE_ONLINE:
+		sch = to_subchannel(cdev->dev.parent);
+		if ((sch->schib.pmcw.pam & sch->opm) == sch->vpm)
+			break;
+		/* fall through */
 	case DEV_STATE_DISCONNECTED:
 		CIO_MSG_EVENT(3, "recovery: trigger 0.%x.%04x\n",
 			      cdev->private->dev_id.ssid,
@@ -1260,7 +1266,7 @@ static void recovery_work_func(struct work_struct *unused)
 		}
 		spin_unlock_irq(&recovery_lock);
 	} else
-		CIO_MSG_EVENT(4, "recovery: end\n");
+		CIO_MSG_EVENT(3, "recovery: end\n");
 }
 
 static DECLARE_WORK(recovery_work, recovery_work_func);
@@ -1274,11 +1280,11 @@ static void recovery_func(unsigned long data)
 	schedule_work(&recovery_work);
 }
 
-static void ccw_device_schedule_recovery(void)
+void ccw_device_schedule_recovery(void)
 {
 	unsigned long flags;
 
-	CIO_MSG_EVENT(4, "recovery: schedule\n");
+	CIO_MSG_EVENT(3, "recovery: schedule\n");
 	spin_lock_irqsave(&recovery_lock, flags);
 	if (!timer_pending(&recovery_timer) || (recovery_phase != 0)) {
 		recovery_phase = 0;
diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h
index ec497af99dd8..69cb70f080a5 100644
--- a/drivers/s390/cio/device.h
+++ b/drivers/s390/cio/device.h
@@ -134,6 +134,7 @@ void ccw_device_set_disconnected(struct ccw_device *cdev);
 void ccw_device_set_notoper(struct ccw_device *cdev);
 
 void ccw_device_set_timeout(struct ccw_device *, int);
+void ccw_device_schedule_recovery(void);
 
 /* Channel measurement facility related */
 void retry_set_schib(struct ccw_device *cdev);
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 12016e32e519..f98ea674c3d8 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -476,6 +476,17 @@ static void create_fake_irb(struct irb *irb, int type)
 	}
 }
 
+static void ccw_device_handle_broken_paths(struct ccw_device *cdev)
+{
+	struct subchannel *sch = to_subchannel(cdev->dev.parent);
+	u8 broken_paths = (sch->schib.pmcw.pam & sch->opm) ^ sch->vpm;
+
+	if (broken_paths && (cdev->private->path_broken_mask != broken_paths))
+		ccw_device_schedule_recovery();
+
+	cdev->private->path_broken_mask = broken_paths;
+}
+
 void ccw_device_verify_done(struct ccw_device *cdev, int err)
 {
 	struct subchannel *sch;
@@ -508,6 +519,7 @@ void ccw_device_verify_done(struct ccw_device *cdev, int err)
 			memset(&cdev->private->irb, 0, sizeof(struct irb));
 		}
 		ccw_device_report_path_events(cdev);
+		ccw_device_handle_broken_paths(cdev);
 		break;
 	case -ETIME:
 	case -EUSERS:
diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h
index 220f49145b2f..9a1b56b2df3e 100644
--- a/drivers/s390/cio/io_sch.h
+++ b/drivers/s390/cio/io_sch.h
@@ -131,6 +131,8 @@ struct ccw_device_private {
 				   not operable */
 	u8 path_gone_mask;	/* mask of paths, that became unavailable */
 	u8 path_new_mask;	/* mask of paths, that became available */
+	u8 path_broken_mask;	/* mask of paths, which were found to be
+				   unusable */
 	struct {
 		unsigned int fast:1;	/* post with "channel end" */
 		unsigned int repall:1;	/* report every interrupt status */

From 91c575b335766effa6103eba42a82aea560c365f Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Mon, 18 Sep 2017 16:10:35 +0200
Subject: [PATCH 081/391] s390/mm: make pmdp_invalidate() do invalidation only

Commit 227be799c39a ("s390/mm: uninline pmdp_xxx functions from pgtable.h")
inadvertently changed the behavior of pmdp_invalidate(), so that it now
clears the pmd instead of just marking it as invalid. Fix this by restoring
the original behavior.

A possible impact of the misbehaving pmdp_invalidate() would be the
MADV_DONTNEED races (see commits ced10803 and 58ceeb6b), although we
should not have any negative impact on the related dirty/young flags,
since those flags are not set by the hardware on s390.

Fixes: 227be799c39a ("s390/mm: uninline pmdp_xxx functions from pgtable.h")
Cc: <stable@vger.kernel.org> # v4.6+
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/pgtable.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index dce708e061ea..20e75a2ca93a 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1507,7 +1507,9 @@ static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
 static inline void pmdp_invalidate(struct vm_area_struct *vma,
 				   unsigned long addr, pmd_t *pmdp)
 {
-	pmdp_xchg_direct(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+	pmd_t pmd = __pmd(pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID);
+
+	pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd);
 }
 
 #define __HAVE_ARCH_PMDP_SET_WRPROTECT

From ba385c0594e723d41790ecfb12c610e6f90c7785 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Mon, 18 Sep 2017 16:51:51 +0200
Subject: [PATCH 082/391] s390/mm: fix write access check in gup_huge_pmd()

The check for the _SEGMENT_ENTRY_PROTECT bit in gup_huge_pmd() is the
wrong way around. It must not be set for write==1, and not be checked for
write==0. Fix this similar to how it was fixed for ptes long time ago in
commit 25591b070336 ("[S390] fix get_user_pages_fast").

One impact of this bug would be unnecessarily using the gup slow path for
write==0 on r/w mappings. A potentially more severe impact would be that
gup_huge_pmd() will succeed for write==1 on r/o mappings.

Cc: <stable@vger.kernel.org>
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/mm/gup.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 8ecc25e760fa..98ffe3ee9411 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -56,13 +56,12 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 		unsigned long end, int write, struct page **pages, int *nr)
 {
-	unsigned long mask, result;
 	struct page *head, *page;
+	unsigned long mask;
 	int refs;
 
-	result = write ? 0 : _SEGMENT_ENTRY_PROTECT;
-	mask = result | _SEGMENT_ENTRY_INVALID;
-	if ((pmd_val(pmd) & mask) != result)
+	mask = (write ? _SEGMENT_ENTRY_PROTECT : 0) | _SEGMENT_ENTRY_INVALID;
+	if ((pmd_val(pmd) & mask) != 0)
 		return 0;
 	VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));
 

From a4aaeccc7a91e24cc17f72a7eb2f586f5c3d811d Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 10 Sep 2017 13:43:37 -0700
Subject: [PATCH 083/391] iommu: Add missing dependencies

parisc:allmodconfig, xtensa:allmodconfig, and possibly others generate
the following Kconfig warning.

warning: (IPMMU_VMSA && ARM_SMMU && ARM_SMMU_V3 && QCOM_IOMMU) selects
IOMMU_IO_PGTABLE_LPAE which has unmet direct dependencies (IOMMU_SUPPORT &&
HAS_DMA && (ARM || ARM64 || COMPILE_TEST && !GENERIC_ATOMIC64))

IOMMU_IO_PGTABLE_LPAE depends on (COMPILE_TEST && !GENERIC_ATOMIC64),
so any configuration option selecting it needs to have the same dependencies.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 49bd2ab8c507..8530ef78925d 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -278,7 +278,7 @@ config EXYNOS_IOMMU_DEBUG
 config IPMMU_VMSA
 	bool "Renesas VMSA-compatible IPMMU"
 	depends on ARM || IOMMU_DMA
-	depends on ARCH_RENESAS || COMPILE_TEST
+	depends on ARCH_RENESAS || (COMPILE_TEST && !GENERIC_ATOMIC64)
 	select IOMMU_API
 	select IOMMU_IO_PGTABLE_LPAE
 	select ARM_DMA_USE_IOMMU
@@ -373,7 +373,7 @@ config MTK_IOMMU_V1
 config QCOM_IOMMU
 	# Note: iommu drivers cannot (yet?) be built as modules
 	bool "Qualcomm IOMMU Support"
-	depends on ARCH_QCOM || COMPILE_TEST
+	depends on ARCH_QCOM || (COMPILE_TEST && !GENERIC_ATOMIC64)
 	select IOMMU_API
 	select IOMMU_IO_PGTABLE_LPAE
 	select ARM_DMA_USE_IOMMU

From 3bd71e18c5803c23014df962bdd74af1b34697fe Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 12 Sep 2017 22:10:21 +0200
Subject: [PATCH 084/391] iommu/vt-d: Fix harmless section mismatch warning

Building with gcc-4.6 results in this warning due to
dmar_table_print_dmar_entry being inlined as in newer compiler versions:

WARNING: vmlinux.o(.text+0x5c8bee): Section mismatch in reference from the function dmar_walk_remapping_entries() to the function .init.text:dmar_table_print_dmar_entry()
The function dmar_walk_remapping_entries() references
the function __init dmar_table_print_dmar_entry().
This is often because dmar_walk_remapping_entries lacks a __init
annotation or the annotation of dmar_table_print_dmar_entry is wrong.

This removes the __init annotation to avoid the warning. On compilers
that don't show the warning today, this should have no impact since the
function gets inlined anyway.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dmar.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index ca5ebaeafd6a..57c920c1372d 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -497,7 +497,7 @@ static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
 #define	dmar_parse_one_rhsa		dmar_res_noop
 #endif
 
-static void __init
+static void
 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
 {
 	struct acpi_dmar_hardware_unit *drhd;

From 5baf6bb0fd2388742a0846cc7bcacee6dec78235 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 14 Sep 2017 14:01:00 +0200
Subject: [PATCH 085/391] drm/exynos: Fix locking in the suspend/resume paths

Commit 48a92916729b ("drm/exynos: use drm_for_each_connector_iter()")
replaced unsafe drm_for_each_connector() with drm_for_each_connector_iter()
and removed surrounding drm_modeset_lock calls. However, that lock was
there not only to protect unsafe drm_for_each_connector(), but it was also
required to be held by the dpms code which was called from the loop body.
This patch restores those drm_modeset_lock calls to fix broken suspend
and resume of Exynos DRM subsystem in v4.13 kernel.

Fixes: 48a92916729b ("drm/exynos: use drm_for_each_connector_iter()")
CC: stable@vger.kernel.org # v4.13
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_drv.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index b1f7299600f0..7f3cfc5dd320 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -174,6 +174,7 @@ static int exynos_drm_suspend(struct device *dev)
 	if (pm_runtime_suspended(dev) || !drm_dev)
 		return 0;
 
+	drm_modeset_lock_all(drm_dev);
 	drm_connector_list_iter_begin(drm_dev, &conn_iter);
 	drm_for_each_connector_iter(connector, &conn_iter) {
 		int old_dpms = connector->dpms;
@@ -185,6 +186,7 @@ static int exynos_drm_suspend(struct device *dev)
 		connector->dpms = old_dpms;
 	}
 	drm_connector_list_iter_end(&conn_iter);
+	drm_modeset_unlock_all(drm_dev);
 
 	return 0;
 }
@@ -198,6 +200,7 @@ static int exynos_drm_resume(struct device *dev)
 	if (pm_runtime_suspended(dev) || !drm_dev)
 		return 0;
 
+	drm_modeset_lock_all(drm_dev);
 	drm_connector_list_iter_begin(drm_dev, &conn_iter);
 	drm_for_each_connector_iter(connector, &conn_iter) {
 		if (connector->funcs->dpms) {
@@ -208,6 +211,7 @@ static int exynos_drm_resume(struct device *dev)
 		}
 	}
 	drm_connector_list_iter_end(&conn_iter);
+	drm_modeset_unlock_all(drm_dev);
 
 	return 0;
 }

From 6e8edf8a7d8d98e1734ff41e5c69439419319402 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 14 Sep 2017 14:01:01 +0200
Subject: [PATCH 086/391] drm/exynos: Fix suspend/resume support

Commit 7d902c05b480 ("drm: Nuke drm_atomic_helper_connector_dpms")
removed drm_atomic_helper_connector_dpms() helper saying that it was a dead
code. It was however indirectly used by Exynos DRM driver for implementing
suspend/resume support. To fix this regression (after that patch Exynos DRM
suspend/resume functions became no-ops and hardware fails to suspend),
this patch rewrites them with drm_atomic_helper_suspend/resume() helpers.

Fixes: 7d902c05b480 ("drm: Nuke drm_atomic_helper_connector_dpms")
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_drv.c   | 40 +++++++----------------
 drivers/gpu/drm/exynos/exynos_drm_drv.h   |  1 +
 drivers/gpu/drm/exynos/exynos_drm_fbdev.c | 20 ++++++++++++
 drivers/gpu/drm/exynos/exynos_drm_fbdev.h | 10 ++++++
 4 files changed, 43 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 7f3cfc5dd320..e651a58c18cf 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -168,25 +168,19 @@ static struct drm_driver exynos_drm_driver = {
 static int exynos_drm_suspend(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
-	struct drm_connector *connector;
-	struct drm_connector_list_iter conn_iter;
+	struct exynos_drm_private *private = drm_dev->dev_private;
 
 	if (pm_runtime_suspended(dev) || !drm_dev)
 		return 0;
 
-	drm_modeset_lock_all(drm_dev);
-	drm_connector_list_iter_begin(drm_dev, &conn_iter);
-	drm_for_each_connector_iter(connector, &conn_iter) {
-		int old_dpms = connector->dpms;
-
-		if (connector->funcs->dpms)
-			connector->funcs->dpms(connector, DRM_MODE_DPMS_OFF);
-
-		/* Set the old mode back to the connector for resume */
-		connector->dpms = old_dpms;
+	drm_kms_helper_poll_disable(drm_dev);
+	exynos_drm_fbdev_suspend(drm_dev);
+	private->suspend_state = drm_atomic_helper_suspend(drm_dev);
+	if (IS_ERR(private->suspend_state)) {
+		exynos_drm_fbdev_resume(drm_dev);
+		drm_kms_helper_poll_enable(drm_dev);
+		return PTR_ERR(private->suspend_state);
 	}
-	drm_connector_list_iter_end(&conn_iter);
-	drm_modeset_unlock_all(drm_dev);
 
 	return 0;
 }
@@ -194,24 +188,14 @@ static int exynos_drm_suspend(struct device *dev)
 static int exynos_drm_resume(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
-	struct drm_connector *connector;
-	struct drm_connector_list_iter conn_iter;
+	struct exynos_drm_private *private = drm_dev->dev_private;
 
 	if (pm_runtime_suspended(dev) || !drm_dev)
 		return 0;
 
-	drm_modeset_lock_all(drm_dev);
-	drm_connector_list_iter_begin(drm_dev, &conn_iter);
-	drm_for_each_connector_iter(connector, &conn_iter) {
-		if (connector->funcs->dpms) {
-			int dpms = connector->dpms;
-
-			connector->dpms = DRM_MODE_DPMS_OFF;
-			connector->funcs->dpms(connector, dpms);
-		}
-	}
-	drm_connector_list_iter_end(&conn_iter);
-	drm_modeset_unlock_all(drm_dev);
+	drm_atomic_helper_resume(drm_dev, private->suspend_state);
+	exynos_drm_fbdev_resume(drm_dev);
+	drm_kms_helper_poll_enable(drm_dev);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
index cf131c2aa23e..f8bae4cb4823 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -202,6 +202,7 @@ struct drm_exynos_file_private {
  */
 struct exynos_drm_private {
 	struct drm_fb_helper *fb_helper;
+	struct drm_atomic_state *suspend_state;
 
 	struct device *dma_dev;
 	void *mapping;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
index c3a068409b48..dfb66ecf417b 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
@@ -18,6 +18,8 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/exynos_drm.h>
 
+#include <linux/console.h>
+
 #include "exynos_drm_drv.h"
 #include "exynos_drm_fb.h"
 #include "exynos_drm_fbdev.h"
@@ -285,3 +287,21 @@ void exynos_drm_output_poll_changed(struct drm_device *dev)
 
 	drm_fb_helper_hotplug_event(fb_helper);
 }
+
+void exynos_drm_fbdev_suspend(struct drm_device *dev)
+{
+	struct exynos_drm_private *private = dev->dev_private;
+
+	console_lock();
+	drm_fb_helper_set_suspend(private->fb_helper, 1);
+	console_unlock();
+}
+
+void exynos_drm_fbdev_resume(struct drm_device *dev)
+{
+	struct exynos_drm_private *private = dev->dev_private;
+
+	console_lock();
+	drm_fb_helper_set_suspend(private->fb_helper, 0);
+	console_unlock();
+}
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.h b/drivers/gpu/drm/exynos/exynos_drm_fbdev.h
index 330eef87f718..645d1bb7f665 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.h
@@ -21,6 +21,8 @@ int exynos_drm_fbdev_init(struct drm_device *dev);
 void exynos_drm_fbdev_fini(struct drm_device *dev);
 void exynos_drm_fbdev_restore_mode(struct drm_device *dev);
 void exynos_drm_output_poll_changed(struct drm_device *dev);
+void exynos_drm_fbdev_suspend(struct drm_device *drm);
+void exynos_drm_fbdev_resume(struct drm_device *drm);
 
 #else
 
@@ -39,6 +41,14 @@ static inline void exynos_drm_fbdev_restore_mode(struct drm_device *dev)
 
 #define exynos_drm_output_poll_changed (NULL)
 
+static inline void exynos_drm_fbdev_suspend(struct drm_device *drm)
+{
+}
+
+static inline void exynos_drm_fbdev_resume(struct drm_device *drm)
+{
+}
+
 #endif
 
 #endif

From 9ac30ef6d8ec3533d0feec53e268c4fa32ea700c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 5 Sep 2017 10:19:55 +0200
Subject: [PATCH 087/391] drm: exynos: include linux/irq.h

I ran into a build error on x86:

drivers/gpu/drm/exynos/exynos5433_drm_decon.c: In function 'decon_conf_irq':
drivers/gpu/drm/exynos/exynos5433_drm_decon.c:706:2: error: implicit declaration of function 'irq_set_status_flags'; did you mean 'dquot_state_flag'? [-Werror=implicit-function-declaration]
  irq_set_status_flags(irq, IRQ_NOAUTOEN);

Adding the missing include fixes the error.

Fixes: b37d53a0382c ("drm/exynos/decon5433: move TE handling to DECON")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 730b8d9db187..6be5b53c3b27 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -14,6 +14,7 @@
 #include <linux/clk.h>
 #include <linux/component.h>
 #include <linux/iopoll.h>
+#include <linux/irq.h>
 #include <linux/mfd/syscon.h>
 #include <linux/of_device.h>
 #include <linux/of_gpio.h>

From 0555ac4333d7da7cff83d5b06bd3679a3e1ef584 Mon Sep 17 00:00:00 2001
From: Tycho Andersen <tycho@docker.com>
Date: Mon, 18 Sep 2017 16:35:32 -0600
Subject: [PATCH 088/391] xen, arm64: drop dummy lookup_address()

This is unused, and conflicts with the definition that we'll add for XPFO.

Signed-off-by: Tycho Andersen <tycho@docker.com>
Reviewed-by: Julien Grall <julien.grall@arm.com>
CC: Boris Ostrovsky <boris.ostrovsky@oracle.com>
CC: Juergen Gross <jgross@suse.com>
CC: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 include/xen/arm/page.h | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/include/xen/arm/page.h b/include/xen/arm/page.h
index 415dbc6e43fd..6adc2a955340 100644
--- a/include/xen/arm/page.h
+++ b/include/xen/arm/page.h
@@ -84,16 +84,6 @@ static inline xmaddr_t arbitrary_virt_to_machine(void *vaddr)
 	BUG();
 }
 
-/* TODO: this shouldn't be here but it is because the frontend drivers
- * are using it (its rolled in headers) even though we won't hit the code path.
- * So for right now just punt with this.
- */
-static inline pte_t *lookup_address(unsigned long address, unsigned int *level)
-{
-	BUG();
-	return NULL;
-}
-
 extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
 				   struct gnttab_map_grant_ref *kmap_ops,
 				   struct page **pages, unsigned int count);

From 986a5f70173626eea9863fee6d6e029f0f2bc361 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 11 Sep 2017 14:34:34 +0200
Subject: [PATCH 089/391] iommu/qcom: Depend on HAS_DMA to fix compile error

If NO_DMA=y:

    warning: (IPMMU_VMSA && ARM_SMMU && ARM_SMMU_V3 && QCOM_IOMMU) selects IOMMU_IO_PGTABLE_LPAE which has unmet direct dependencies (IOMMU_SUPPORT && HAS_DMA && (ARM || ARM64 || COMPILE_TEST && !GENERIC_ATOMIC64))

and

    drivers/iommu/io-pgtable-arm.o: In function `__arm_lpae_sync_pte':
    io-pgtable-arm.c:(.text+0x206): undefined reference to `bad_dma_ops'
    drivers/iommu/io-pgtable-arm.o: In function `__arm_lpae_free_pages':
    io-pgtable-arm.c:(.text+0x6a6): undefined reference to `bad_dma_ops'
    drivers/iommu/io-pgtable-arm.o: In function `__arm_lpae_alloc_pages':
    io-pgtable-arm.c:(.text+0x812): undefined reference to `bad_dma_ops'
    io-pgtable-arm.c:(.text+0x81c): undefined reference to `bad_dma_ops'
    io-pgtable-arm.c:(.text+0x862): undefined reference to `bad_dma_ops'
    drivers/iommu/io-pgtable-arm.o: In function `arm_lpae_run_tests':
    io-pgtable-arm.c:(.init.text+0x86): undefined reference to `alloc_io_pgtable_ops'
    io-pgtable-arm.c:(.init.text+0x47c): undefined reference to `free_io_pgtable_ops'
    drivers/iommu/qcom_iommu.o: In function `qcom_iommu_init_domain':
    qcom_iommu.c:(.text+0x1ce): undefined reference to `alloc_io_pgtable_ops'
    drivers/iommu/qcom_iommu.o: In function `qcom_iommu_domain_free':
    qcom_iommu.c:(.text+0x754): undefined reference to `free_io_pgtable_ops'

QCOM_IOMMU selects IOMMU_IO_PGTABLE_LPAE, which bypasses its dependency
on HAS_DMA.  Make QCOM_IOMMU depend on HAS_DMA to fix this.

Fixes: 0ae349a0f33fb040 ("iommu/qcom: Add qcom_iommu")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 8530ef78925d..f3a21343e636 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -374,6 +374,7 @@ config QCOM_IOMMU
 	# Note: iommu drivers cannot (yet?) be built as modules
 	bool "Qualcomm IOMMU Support"
 	depends on ARCH_QCOM || (COMPILE_TEST && !GENERIC_ATOMIC64)
+	depends on HAS_DMA
 	select IOMMU_API
 	select IOMMU_IO_PGTABLE_LPAE
 	select ARM_DMA_USE_IOMMU

From aa767cfb75341a6b93742f4d7d1589ac2532c29e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 11 Sep 2017 22:07:50 +0200
Subject: [PATCH 090/391] of: provide inline helper for of_find_device_by_node

The ipmmu-vmsa driver fails in compile-testing on non-OF platforms:

drivers/iommu/ipmmu-vmsa.o: In function `ipmmu_of_xlate':
ipmmu-vmsa.c:(.text+0x740): undefined reference to `of_find_device_by_node'

It would be reasonable to assume that this interface works but
returns failure on non-OF builds, like it does on machines that
have been booted in another way, so this adds another inline
function helper.

Fixes: 7b2d59611fef ("iommu/ipmmu-vmsa: Replace local utlb code with fwspec ids")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of_platform.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index e0d1946270f3..fb908e598348 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -57,7 +57,14 @@ extern const struct of_device_id of_default_bus_match_table[];
 extern struct platform_device *of_device_alloc(struct device_node *np,
 					 const char *bus_id,
 					 struct device *parent);
+#ifdef CONFIG_OF
 extern struct platform_device *of_find_device_by_node(struct device_node *np);
+#else
+static inline struct platform_device *of_find_device_by_node(struct device_node *np)
+{
+	return NULL;
+}
+#endif
 
 /* Platform devices and busses creation */
 extern struct platform_device *of_platform_device_create(struct device_node *np,

From a6899e900509bb2442239d6198dc8bc64f8437ec Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 14 Sep 2017 23:39:38 +0200
Subject: [PATCH 091/391] dt-bindings: fix vendor prefix for Abracon

Commit 446810f2dd41 ("of: add vendor prefix for Abracon Corporation")
claimed that "abcn" was used as the vendor prefix while in fact "abracon"
was used in the subsequent commits. It is also the only prefix used in the
tree.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
[robh: fix alphabetical order]
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/vendor-prefixes.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index 1ea1fd4232ab..1afd298eddd7 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -3,8 +3,8 @@ Device tree binding vendor prefix registry.  Keep list in alphabetical order.
 This isn't an exhaustive list, but you should add new prefixes to it before
 using them to avoid name-space collisions.
 
-abcn	Abracon Corporation
 abilis	Abilis Systems
+abracon	Abracon Corporation
 actions	Actions Semiconductor Co., Ltd.
 active-semi	Active-Semi International Inc
 ad	Avionic Design GmbH

From 3993491bf27117782bee05debc6a6afa51d61760 Mon Sep 17 00:00:00 2001
From: Ariel Elior <aelior@cavium.com>
Date: Tue, 19 Sep 2017 12:54:34 +0300
Subject: [PATCH 092/391] MAINTAINERS: Remove Yuval Mintz from maintainers list

Remove Yuval from maintaining the bnx2x & qed* modules as he is no longer
working for the company. Thanks Yuval for your huge contributions and
tireless efforts over the many years and various companies.

Ariel
Signed-off-by: Ariel Elior <aelior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 2 --
 1 file changed, 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 2281af4b41b6..955f034fd523 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2853,7 +2853,6 @@ S:	Supported
 F:	drivers/scsi/bnx2i/
 
 BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER
-M:	Yuval Mintz <Yuval.Mintz@cavium.com>
 M:	Ariel Elior <ariel.elior@cavium.com>
 M:	everest-linux-l2@cavium.com
 L:	netdev@vger.kernel.org
@@ -11047,7 +11046,6 @@ S:	Supported
 F:	drivers/scsi/qedi/
 
 QLOGIC QL4xxx ETHERNET DRIVER
-M:	Yuval Mintz <Yuval.Mintz@cavium.com>
 M:	Ariel Elior <Ariel.Elior@cavium.com>
 M:	everest-linux-l2@cavium.com
 L:	netdev@vger.kernel.org

From 29a0cfbf91ba997591535a4f7246835ce8328141 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 18 Sep 2017 12:21:37 +0200
Subject: [PATCH 093/391] libceph: don't allow bidirectional swap of
 pg-upmap-items

This reverts most of commit f53b7665c8ce ("libceph: upmap semantic
changes").

We need to prevent duplicates in the final result.  For example, we
can currently take

  [1,2,3] and apply [(1,2)] and get [2,2,3]

or

  [1,2,3] and apply [(3,2)] and get [1,2,2]

The rest of the system is not prepared to handle duplicates in the
result set like this.

The reverted piece was intended to allow

  [1,2,3] and [(1,2),(2,1)] to get [2,1,3]

to reorder primaries.  First, this bidirectional swap is hard to
implement in a way that also prevents dups.  For example, [1,2,3] and
[(1,4),(2,3),(3,4)] would give [4,3,4] but would we just drop the last
step we'd have [4,3,3] which is also invalid, etc.  Simpler to just not
handle bidirectional swaps.  In practice, they are not needed: if you
just want to choose a different primary then use primary_affinity, or
pg_upmap (not pg_upmap_items).

Cc: stable@vger.kernel.org # 4.13
Link: http://tracker.ceph.com/issues/21410
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 net/ceph/osdmap.c | 33 ++++++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index f358d0bfa76b..79d14d70b7ea 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -2445,19 +2445,34 @@ static void apply_upmap(struct ceph_osdmap *osdmap,
 
 	pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid);
 	if (pg) {
-		for (i = 0; i < raw->size; i++) {
-			for (j = 0; j < pg->pg_upmap_items.len; j++) {
-				int from = pg->pg_upmap_items.from_to[j][0];
-				int to = pg->pg_upmap_items.from_to[j][1];
+		/*
+		 * Note: this approach does not allow a bidirectional swap,
+		 * e.g., [[1,2],[2,1]] applied to [0,1,2] -> [0,2,1].
+		 */
+		for (i = 0; i < pg->pg_upmap_items.len; i++) {
+			int from = pg->pg_upmap_items.from_to[i][0];
+			int to = pg->pg_upmap_items.from_to[i][1];
+			int pos = -1;
+			bool exists = false;
 
-				if (from == raw->osds[i]) {
-					if (!(to != CRUSH_ITEM_NONE &&
-					      to < osdmap->max_osd &&
-					      osdmap->osd_weight[to] == 0))
-						raw->osds[i] = to;
+			/* make sure replacement doesn't already appear */
+			for (j = 0; j < raw->size; j++) {
+				int osd = raw->osds[j];
+
+				if (osd == to) {
+					exists = true;
 					break;
 				}
+				/* ignore mapping if target is marked out */
+				if (osd == from && pos < 0 &&
+				    !(to != CRUSH_ITEM_NONE &&
+				      to < osdmap->max_osd &&
+				      osdmap->osd_weight[to] == 0)) {
+					pos = j;
+				}
 			}
+			if (!exists && pos >= 0)
+				raw->osds[pos] = to;
 		}
 	}
 }

From 3fad4cdac235c5b13227d0c09854c689ae62c70b Mon Sep 17 00:00:00 2001
From: zijun_hu <zijun_hu@htc.com>
Date: Sat, 16 Sep 2017 01:59:41 +0800
Subject: [PATCH 094/391] irqchip/gic-v3: Iterate over possible CPUs by
 for_each_possible_cpu()

get_cpu_number() doesn't use existing helper to iterate over possible
CPUs, It will cause an error in case of discontinuous @cpu_possible_mask
such as 0b11110001, which can result from a core having failed to come
up on a SMP machine.

Fixed by using existing helper for_each_possible_cpu().

Signed-off-by: zijun_hu <zijun_hu@htc.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/irqchip/irq-gic-v3.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 519149ec9053..b5df99c6f680 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1042,7 +1042,7 @@ static int get_cpu_number(struct device_node *dn)
 {
 	const __be32 *cell;
 	u64 hwid;
-	int i;
+	int cpu;
 
 	cell = of_get_property(dn, "reg", NULL);
 	if (!cell)
@@ -1056,9 +1056,9 @@ static int get_cpu_number(struct device_node *dn)
 	if (hwid & ~MPIDR_HWID_BITMASK)
 		return -1;
 
-	for (i = 0; i < num_possible_cpus(); i++)
-		if (cpu_logical_map(i) == hwid)
-			return i;
+	for_each_possible_cpu(cpu)
+		if (cpu_logical_map(cpu) == hwid)
+			return cpu;
 
 	return -1;
 }

From 6c09ffd027255d97c536aa4ac51a90973adfedc6 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 12 Sep 2017 22:08:23 +0200
Subject: [PATCH 095/391] irqchip/gic-v4: Fix building with ancient gcc

gcc-4.5 and earlier don't like named initializers for anonymous
union members:

drivers/irqchip/irq-gic-v4.c: In function 'its_map_vlpi':
drivers/irqchip/irq-gic-v4.c:176:3: error: unknown field 'map' specified in initializer
drivers/irqchip/irq-gic-v4.c:176:3: error: missing braces around initializer
drivers/irqchip/irq-gic-v4.c:176:3: error: (near initialization for 'info.<anonymous>')
drivers/irqchip/irq-gic-v4.c: In function 'its_get_vlpi':
drivers/irqchip/irq-gic-v4.c:192:3: error: unknown field 'map' specified in initializer
drivers/irqchip/irq-gic-v4.c:192:3: error: missing braces around initializer
drivers/irqchip/irq-gic-v4.c:192:3: error: (near initialization for 'info.<anonymous>')
drivers/irqchip/irq-gic-v4.c: In function 'its_prop_update_vlpi':
drivers/irqchip/irq-gic-v4.c:208:3: error: unknown field 'config' specified in initializer
drivers/irqchip/irq-gic-v4.c:208:3: error: missing braces around initializer
drivers/irqchip/irq-gic-v4.c:208:3: error: (near initialization for 'info.<anonymous>')
drivers/irqchip/irq-gic-v4.c:208:3: error: initialization makes pointer from integer without a cast

This is fairly easy to work around, by using extra curly braces.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/irqchip/irq-gic-v4.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c
index 2370e6d9e603..cd0bcc3b7e33 100644
--- a/drivers/irqchip/irq-gic-v4.c
+++ b/drivers/irqchip/irq-gic-v4.c
@@ -173,7 +173,9 @@ int its_map_vlpi(int irq, struct its_vlpi_map *map)
 {
 	struct its_cmd_info info = {
 		.cmd_type = MAP_VLPI,
-		.map      = map,
+		{
+			.map      = map,
+		},
 	};
 
 	/*
@@ -189,7 +191,9 @@ int its_get_vlpi(int irq, struct its_vlpi_map *map)
 {
 	struct its_cmd_info info = {
 		.cmd_type = GET_VLPI,
-		.map      = map,
+		{
+			.map      = map,
+		},
 	};
 
 	return irq_set_vcpu_affinity(irq, &info);
@@ -205,7 +209,9 @@ int its_prop_update_vlpi(int irq, u8 config, bool inv)
 {
 	struct its_cmd_info info = {
 		.cmd_type = inv ? PROP_UPDATE_AND_INV_VLPI : PROP_UPDATE_VLPI,
-		.config   = config,
+		{
+			.config   = config,
+		},
 	};
 
 	return irq_set_vcpu_affinity(irq, &info);

From 90019f8fcd71bf71653690329e32f41489e96122 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Tue, 5 Sep 2017 11:28:46 -0700
Subject: [PATCH 096/391] irqchip.mips-gic: Fix shared interrupt mask writes

The write_gic_smask() & write_gic_rmask() functions take a shared
interrupt number as a parameter, but we're incorrectly providing them a
bitmask with the shared interrupt's bit set. This effectively means that
we mask or unmask the shared interrupt 1<<n rather than shared interrupt
n, and as a result likely drop interrupts.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Fixes: 68898c8765f4 ("irqchip: mips-gic: Drop gic_(re)set_mask() functions")
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mips@linux-mips.org
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/irqchip/irq-mips-gic.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 6e52a88bbd9e..40159ac12ac8 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -169,7 +169,7 @@ static void gic_mask_irq(struct irq_data *d)
 {
 	unsigned int intr = GIC_HWIRQ_TO_SHARED(d->hwirq);
 
-	write_gic_rmask(BIT(intr));
+	write_gic_rmask(intr);
 	gic_clear_pcpu_masks(intr);
 }
 
@@ -179,7 +179,7 @@ static void gic_unmask_irq(struct irq_data *d)
 	unsigned int intr = GIC_HWIRQ_TO_SHARED(d->hwirq);
 	unsigned int cpu;
 
-	write_gic_smask(BIT(intr));
+	write_gic_smask(intr);
 
 	gic_clear_pcpu_masks(intr);
 	cpu = cpumask_first_and(affinity, cpu_online_mask);
@@ -767,7 +767,7 @@ static int __init gic_of_init(struct device_node *node,
 	for (i = 0; i < gic_shared_intrs; i++) {
 		change_gic_pol(i, GIC_POL_ACTIVE_HIGH);
 		change_gic_trig(i, GIC_TRIG_LEVEL);
-		write_gic_rmask(BIT(i));
+		write_gic_rmask(i);
 	}
 
 	for (i = 0; i < gic_vpes; i++) {

From 717e6f2893eb35ce6728c3cacdc297b78d371b31 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Mon, 11 Sep 2017 12:10:08 +0800
Subject: [PATCH 097/391] ceph: avoid panic in create_session_open_msg() if
 utsname() returns NULL

utsname() can return NULL while process is exiting. Kernel releases
file locks during process exits. We send request to mds when releasing
file lock. So it's possible that we open mds session while process is
exiting. utsname() is called in create_session_open_msg().

Link: http://tracker.ceph.com/issues/21275
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
[idryomov@gmail.com: drop utsname.h include from mds_client.c]
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c | 7 ++++---
 fs/ceph/mds_client.h | 3 +++
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 9dd6b836ac9e..84edfc60d87a 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -7,7 +7,6 @@
 #include <linux/sched.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
-#include <linux/utsname.h>
 #include <linux/ratelimit.h>
 
 #include "super.h"
@@ -884,8 +883,8 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
 	void *p;
 
 	const char* metadata[][2] = {
-		{"hostname", utsname()->nodename},
-		{"kernel_version", utsname()->release},
+		{"hostname", mdsc->nodename},
+		{"kernel_version", init_utsname()->release},
 		{"entity_id", opt->name ? : ""},
 		{"root", fsopt->server_path ? : "/"},
 		{NULL, NULL}
@@ -3539,6 +3538,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
 	init_rwsem(&mdsc->pool_perm_rwsem);
 	mdsc->pool_perm_tree = RB_ROOT;
 
+	strncpy(mdsc->nodename, utsname()->nodename,
+		sizeof(mdsc->nodename) - 1);
 	return 0;
 }
 
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index db57ae98ed34..636d6b2ec49c 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -8,6 +8,7 @@
 #include <linux/rbtree.h>
 #include <linux/spinlock.h>
 #include <linux/refcount.h>
+#include <linux/utsname.h>
 
 #include <linux/ceph/types.h>
 #include <linux/ceph/messenger.h>
@@ -368,6 +369,8 @@ struct ceph_mds_client {
 
 	struct rw_semaphore     pool_perm_rwsem;
 	struct rb_root		pool_perm_tree;
+
+	char nodename[__NEW_UTS_LEN + 1];
 };
 
 extern const char *ceph_mds_op_name(int op);

From 19a8d6b7604df85402deecae01d7861cb1d40c89 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Tue, 19 Sep 2017 15:50:42 +0100
Subject: [PATCH 098/391] MIPS: PCI: Move map_irq() hooks out of initdata

04c81c7293df ("MIPS: PCI: Replace pci_fixup_irqs() call with host bridge
IRQ mapping hooks") moved the PCI IRQ fixup to the new host bridge
map/swizzle_irq() hooks mechanism. Those hooks can also be called after
boot, when all the __init/__initdata/__initconst sections have been freed.
Therefore, functions called by them (and the data they refer to) must not
be marked as __init/__initdata/__initconst lest compilation trigger section
mismatch warnings.

Fix all the board files map_irq() hooks by simply removing the respective
__init/__initdata/__initconst section markers and by adding another
persistent hook IRQ map for the txx9 board files.

Fixes: 04c81c7293df ("MIPS: PCI: Replace pci_fixup_irqs() call with host bridge IRQ mapping hooks")
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Steve French <smfrench@gmail.com>
---
 arch/mips/ath79/pci.c           | 12 ++++++------
 arch/mips/pci/fixup-capcella.c  |  4 ++--
 arch/mips/pci/fixup-cobalt.c    |  8 ++++----
 arch/mips/pci/fixup-emma2rh.c   |  4 ++--
 arch/mips/pci/fixup-fuloong2e.c |  2 +-
 arch/mips/pci/fixup-ip32.c      |  4 ++--
 arch/mips/pci/fixup-jmr3927.c   |  2 +-
 arch/mips/pci/fixup-lantiq.c    |  2 +-
 arch/mips/pci/fixup-lemote2f.c  |  4 ++--
 arch/mips/pci/fixup-loongson3.c |  2 +-
 arch/mips/pci/fixup-malta.c     |  4 ++--
 arch/mips/pci/fixup-mpc30x.c    |  6 +++---
 arch/mips/pci/fixup-pmcmsp.c    |  8 ++++----
 arch/mips/pci/fixup-rbtx4927.c  |  2 +-
 arch/mips/pci/fixup-rbtx4938.c  |  2 +-
 arch/mips/pci/fixup-sni.c       | 12 ++++++------
 arch/mips/pci/fixup-tb0219.c    |  2 +-
 arch/mips/pci/fixup-tb0226.c    |  2 +-
 arch/mips/pci/fixup-tb0287.c    |  2 +-
 arch/mips/pci/pci-alchemy.c     |  2 +-
 arch/mips/pci/pci-bcm47xx.c     |  2 +-
 arch/mips/pci/pci-lasat.c       |  2 +-
 arch/mips/pci/pci-mt7620.c      |  2 +-
 arch/mips/pci/pci-octeon.c      |  5 ++---
 arch/mips/pci/pci-rt2880.c      |  2 +-
 arch/mips/pci/pci-rt3883.c      |  2 +-
 arch/mips/pci/pci-tx4938.c      |  2 +-
 arch/mips/pci/pci-tx4939.c      |  4 ++--
 arch/mips/pci/pci-xlp.c         |  2 +-
 arch/mips/pci/pci-xlr.c         |  2 +-
 arch/mips/pci/pcie-octeon.c     |  3 +--
 arch/mips/txx9/generic/pci.c    |  8 ++++++--
 32 files changed, 62 insertions(+), 60 deletions(-)

diff --git a/arch/mips/ath79/pci.c b/arch/mips/ath79/pci.c
index 730c0b03060d..b816cb4a25ff 100644
--- a/arch/mips/ath79/pci.c
+++ b/arch/mips/ath79/pci.c
@@ -22,10 +22,10 @@
 #include "pci.h"
 
 static int (*ath79_pci_plat_dev_init)(struct pci_dev *dev);
-static const struct ath79_pci_irq *ath79_pci_irq_map __initdata;
-static unsigned ath79_pci_nr_irqs __initdata;
+static const struct ath79_pci_irq *ath79_pci_irq_map;
+static unsigned ath79_pci_nr_irqs;
 
-static const struct ath79_pci_irq ar71xx_pci_irq_map[] __initconst = {
+static const struct ath79_pci_irq ar71xx_pci_irq_map[] = {
 	{
 		.slot	= 17,
 		.pin	= 1,
@@ -41,7 +41,7 @@ static const struct ath79_pci_irq ar71xx_pci_irq_map[] __initconst = {
 	}
 };
 
-static const struct ath79_pci_irq ar724x_pci_irq_map[] __initconst = {
+static const struct ath79_pci_irq ar724x_pci_irq_map[] = {
 	{
 		.slot	= 0,
 		.pin	= 1,
@@ -49,7 +49,7 @@ static const struct ath79_pci_irq ar724x_pci_irq_map[] __initconst = {
 	}
 };
 
-static const struct ath79_pci_irq qca955x_pci_irq_map[] __initconst = {
+static const struct ath79_pci_irq qca955x_pci_irq_map[] = {
 	{
 		.bus	= 0,
 		.slot	= 0,
@@ -64,7 +64,7 @@ static const struct ath79_pci_irq qca955x_pci_irq_map[] __initconst = {
 	},
 };
 
-int __init pcibios_map_irq(const struct pci_dev *dev, uint8_t slot, uint8_t pin)
+int pcibios_map_irq(const struct pci_dev *dev, uint8_t slot, uint8_t pin)
 {
 	int irq = -1;
 	int i;
diff --git a/arch/mips/pci/fixup-capcella.c b/arch/mips/pci/fixup-capcella.c
index 1c02f5737367..b4c263f16b15 100644
--- a/arch/mips/pci/fixup-capcella.c
+++ b/arch/mips/pci/fixup-capcella.c
@@ -32,13 +32,13 @@
 #define INTC	PC104PLUS_INTC_IRQ
 #define INTD	PC104PLUS_INTD_IRQ
 
-static char irq_tab_capcella[][5] __initdata = {
+static char irq_tab_capcella[][5] = {
  [11] = { -1, INT1, INT1, INT1, INT1 },
  [12] = { -1, INT2, INT2, INT2, INT2 },
  [14] = { -1, INTA, INTB, INTC, INTD }
 };
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return irq_tab_capcella[slot][pin];
 }
diff --git a/arch/mips/pci/fixup-cobalt.c b/arch/mips/pci/fixup-cobalt.c
index b3ab59318d91..44be65c3e6bb 100644
--- a/arch/mips/pci/fixup-cobalt.c
+++ b/arch/mips/pci/fixup-cobalt.c
@@ -147,7 +147,7 @@ static void qube_raq_via_board_id_fixup(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_0,
 	 qube_raq_via_board_id_fixup);
 
-static char irq_tab_qube1[] __initdata = {
+static char irq_tab_qube1[] = {
   [COBALT_PCICONF_CPU]	   = 0,
   [COBALT_PCICONF_ETH0]	   = QUBE1_ETH0_IRQ,
   [COBALT_PCICONF_RAQSCSI] = SCSI_IRQ,
@@ -156,7 +156,7 @@ static char irq_tab_qube1[] __initdata = {
   [COBALT_PCICONF_ETH1]	   = 0
 };
 
-static char irq_tab_cobalt[] __initdata = {
+static char irq_tab_cobalt[] = {
   [COBALT_PCICONF_CPU]	   = 0,
   [COBALT_PCICONF_ETH0]	   = ETH0_IRQ,
   [COBALT_PCICONF_RAQSCSI] = SCSI_IRQ,
@@ -165,7 +165,7 @@ static char irq_tab_cobalt[] __initdata = {
   [COBALT_PCICONF_ETH1]	   = ETH1_IRQ
 };
 
-static char irq_tab_raq2[] __initdata = {
+static char irq_tab_raq2[] = {
   [COBALT_PCICONF_CPU]	   = 0,
   [COBALT_PCICONF_ETH0]	   = ETH0_IRQ,
   [COBALT_PCICONF_RAQSCSI] = RAQ2_SCSI_IRQ,
@@ -174,7 +174,7 @@ static char irq_tab_raq2[] __initdata = {
   [COBALT_PCICONF_ETH1]	   = ETH1_IRQ
 };
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	if (cobalt_board_id <= COBALT_BRD_ID_QUBE1)
 		return irq_tab_qube1[slot];
diff --git a/arch/mips/pci/fixup-emma2rh.c b/arch/mips/pci/fixup-emma2rh.c
index 19caf775c206..c31cb6af1cd0 100644
--- a/arch/mips/pci/fixup-emma2rh.c
+++ b/arch/mips/pci/fixup-emma2rh.c
@@ -43,7 +43,7 @@
  */
 
 #define MAX_SLOT_NUM 10
-static unsigned char irq_map[][5] __initdata = {
+static unsigned char irq_map[][5] = {
 	[3] = {0, MARKEINS_PCI_IRQ_INTB, MARKEINS_PCI_IRQ_INTC,
 	       MARKEINS_PCI_IRQ_INTD, 0,},
 	[4] = {0, MARKEINS_PCI_IRQ_INTA, 0, 0, 0,},
@@ -85,7 +85,7 @@ static void emma2rh_pci_host_fixup(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_EMMA2RH,
 			 emma2rh_pci_host_fixup);
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return irq_map[slot][pin];
 }
diff --git a/arch/mips/pci/fixup-fuloong2e.c b/arch/mips/pci/fixup-fuloong2e.c
index 50da773faede..b47c2771dc99 100644
--- a/arch/mips/pci/fixup-fuloong2e.c
+++ b/arch/mips/pci/fixup-fuloong2e.c
@@ -19,7 +19,7 @@
 /* South bridge slot number is set by the pci probe process */
 static u8 sb_slot = 5;
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int irq = 0;
 
diff --git a/arch/mips/pci/fixup-ip32.c b/arch/mips/pci/fixup-ip32.c
index 133685e215ee..c6ec18a07e63 100644
--- a/arch/mips/pci/fixup-ip32.c
+++ b/arch/mips/pci/fixup-ip32.c
@@ -21,7 +21,7 @@
 #define INTB   MACEPCI_SHARED0_IRQ
 #define INTC   MACEPCI_SHARED1_IRQ
 #define INTD   MACEPCI_SHARED2_IRQ
-static char irq_tab_mace[][5] __initdata = {
+static char irq_tab_mace[][5] = {
       /* Dummy	INT#A  INT#B  INT#C  INT#D */
 	{0,	    0,	   0,	  0,	 0}, /* This is placeholder row - never used */
 	{0,	SCSI0, SCSI0, SCSI0, SCSI0},
@@ -39,7 +39,7 @@ static char irq_tab_mace[][5] __initdata = {
  * irqs.  I suppose a device without a pin A will thank us for doing it
  * right if there exists such a broken piece of crap.
  */
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return irq_tab_mace[slot][pin];
 }
diff --git a/arch/mips/pci/fixup-jmr3927.c b/arch/mips/pci/fixup-jmr3927.c
index 0f1069527cba..d3102eeea898 100644
--- a/arch/mips/pci/fixup-jmr3927.c
+++ b/arch/mips/pci/fixup-jmr3927.c
@@ -31,7 +31,7 @@
 #include <asm/txx9/pci.h>
 #include <asm/txx9/jmr3927.h>
 
-int __init jmr3927_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int jmr3927_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	unsigned char irq = pin;
 
diff --git a/arch/mips/pci/fixup-lantiq.c b/arch/mips/pci/fixup-lantiq.c
index 2b5427d3f35c..81530a13b349 100644
--- a/arch/mips/pci/fixup-lantiq.c
+++ b/arch/mips/pci/fixup-lantiq.c
@@ -23,7 +23,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
 	return 0;
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return of_irq_parse_and_map_pci(dev, slot, pin);
 }
diff --git a/arch/mips/pci/fixup-lemote2f.c b/arch/mips/pci/fixup-lemote2f.c
index 95ab9a1bd010..20cdfdc08938 100644
--- a/arch/mips/pci/fixup-lemote2f.c
+++ b/arch/mips/pci/fixup-lemote2f.c
@@ -30,7 +30,7 @@
 #define PCID		7
 
 /* all the pci device has the PCIA pin, check the datasheet. */
-static char irq_tab[][5] __initdata = {
+static char irq_tab[][5] = {
 	/*	INTA	INTB	INTC	INTD */
 	{0, 0, 0, 0, 0},	/*  11: Unused */
 	{0, 0, 0, 0, 0},	/*  12: Unused */
@@ -51,7 +51,7 @@ static char irq_tab[][5] __initdata = {
 	{0, 0, 0, 0, 0},	/*  27: Unused */
 };
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int virq;
 
diff --git a/arch/mips/pci/fixup-loongson3.c b/arch/mips/pci/fixup-loongson3.c
index 2b6d5e196f99..8a741c2c6685 100644
--- a/arch/mips/pci/fixup-loongson3.c
+++ b/arch/mips/pci/fixup-loongson3.c
@@ -32,7 +32,7 @@ static void print_fixup_info(const struct pci_dev *pdev)
 			pdev->vendor, pdev->device, pdev->irq);
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	print_fixup_info(dev);
 	return dev->irq;
diff --git a/arch/mips/pci/fixup-malta.c b/arch/mips/pci/fixup-malta.c
index 40e920c653cc..3ec85331795e 100644
--- a/arch/mips/pci/fixup-malta.c
+++ b/arch/mips/pci/fixup-malta.c
@@ -12,7 +12,7 @@
 static char pci_irq[5] = {
 };
 
-static char irq_tab[][5] __initdata = {
+static char irq_tab[][5] = {
 	/*	INTA	INTB	INTC	INTD */
 	{0,	0,	0,	0,	0 },	/*  0: GT64120 PCI bridge */
 	{0,	0,	0,	0,	0 },	/*  1: Unused */
@@ -38,7 +38,7 @@ static char irq_tab[][5] __initdata = {
 	{0,	PCID,	PCIA,	PCIB,	PCIC }	/* 21: PCI Slot 4 */
 };
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int virq;
 	virq = irq_tab[slot][pin];
diff --git a/arch/mips/pci/fixup-mpc30x.c b/arch/mips/pci/fixup-mpc30x.c
index 8e4f8288eca2..66eaf456bc89 100644
--- a/arch/mips/pci/fixup-mpc30x.c
+++ b/arch/mips/pci/fixup-mpc30x.c
@@ -22,19 +22,19 @@
 
 #include <asm/vr41xx/mpc30x.h>
 
-static const int internal_func_irqs[] __initconst = {
+static const int internal_func_irqs[] = {
 	VRC4173_CASCADE_IRQ,
 	VRC4173_AC97_IRQ,
 	VRC4173_USB_IRQ,
 };
 
-static const int irq_tab_mpc30x[] __initconst = {
+static const int irq_tab_mpc30x[] = {
  [12] = VRC4173_PCMCIA1_IRQ,
  [13] = VRC4173_PCMCIA2_IRQ,
  [29] = MQ200_IRQ,
 };
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	if (slot == 30)
 		return internal_func_irqs[PCI_FUNC(dev->devfn)];
diff --git a/arch/mips/pci/fixup-pmcmsp.c b/arch/mips/pci/fixup-pmcmsp.c
index fab405c21c2f..4ad2ef02087b 100644
--- a/arch/mips/pci/fixup-pmcmsp.c
+++ b/arch/mips/pci/fixup-pmcmsp.c
@@ -47,7 +47,7 @@
 
 #if defined(CONFIG_PMC_MSP7120_GW)
 /* Garibaldi Board IRQ wiring to PCI slots */
-static char irq_tab[][5] __initdata = {
+static char irq_tab[][5] = {
 	/* INTA	   INTB	   INTC	   INTD */
 	{0,	0,	0,	0,	0 },	/*    (AD[0]): Unused */
 	{0,	0,	0,	0,	0 },	/*    (AD[1]): Unused */
@@ -86,7 +86,7 @@ static char irq_tab[][5] __initdata = {
 #elif defined(CONFIG_PMC_MSP7120_EVAL)
 
 /* MSP7120 Eval Board IRQ wiring to PCI slots */
-static char irq_tab[][5] __initdata = {
+static char irq_tab[][5] = {
 	/* INTA	   INTB	   INTC	   INTD */
 	{0,	0,	0,	0,	0 },	/*    (AD[0]): Unused */
 	{0,	0,	0,	0,	0 },	/*    (AD[1]): Unused */
@@ -125,7 +125,7 @@ static char irq_tab[][5] __initdata = {
 #else
 
 /* Unknown board -- don't assign any IRQs */
-static char irq_tab[][5] __initdata = {
+static char irq_tab[][5] = {
 	/* INTA	   INTB	   INTC	   INTD */
 	{0,	0,	0,	0,	0 },	/*    (AD[0]): Unused */
 	{0,	0,	0,	0,	0 },	/*    (AD[1]): Unused */
@@ -202,7 +202,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
  *  RETURNS:	 IRQ number
  *
  ****************************************************************************/
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 #if !defined(CONFIG_PMC_MSP7120_GW) && !defined(CONFIG_PMC_MSP7120_EVAL)
 	printk(KERN_WARNING "PCI: unknown board, no PCI IRQs assigned.\n");
diff --git a/arch/mips/pci/fixup-rbtx4927.c b/arch/mips/pci/fixup-rbtx4927.c
index 321db265829c..d6aaed1d6be9 100644
--- a/arch/mips/pci/fixup-rbtx4927.c
+++ b/arch/mips/pci/fixup-rbtx4927.c
@@ -36,7 +36,7 @@
 #include <asm/txx9/pci.h>
 #include <asm/txx9/rbtx4927.h>
 
-int __init rbtx4927_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int rbtx4927_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	unsigned char irq = pin;
 
diff --git a/arch/mips/pci/fixup-rbtx4938.c b/arch/mips/pci/fixup-rbtx4938.c
index a80579af609b..ff22a22db73e 100644
--- a/arch/mips/pci/fixup-rbtx4938.c
+++ b/arch/mips/pci/fixup-rbtx4938.c
@@ -13,7 +13,7 @@
 #include <asm/txx9/pci.h>
 #include <asm/txx9/rbtx4938.h>
 
-int __init rbtx4938_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int rbtx4938_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int irq = tx4938_pcic1_map_irq(dev, slot);
 
diff --git a/arch/mips/pci/fixup-sni.c b/arch/mips/pci/fixup-sni.c
index f67ebeeb4200..adb9a58641e8 100644
--- a/arch/mips/pci/fixup-sni.c
+++ b/arch/mips/pci/fixup-sni.c
@@ -40,7 +40,7 @@
  * seem to be a documentation error.  At least on my RM200C the Cirrus
  * Logic CL-GD5434 VGA is device 3.
  */
-static char irq_tab_rm200[8][5] __initdata = {
+static char irq_tab_rm200[8][5] = {
 	/*	 INTA  INTB  INTC  INTD */
 	{     0,    0,	  0,	0,    0 },	/* EISA bridge */
 	{  SCSI, SCSI, SCSI, SCSI, SCSI },	/* SCSI */
@@ -57,7 +57,7 @@ static char irq_tab_rm200[8][5] __initdata = {
  *
  * The VGA card is optional for RM300 systems.
  */
-static char irq_tab_rm300d[8][5] __initdata = {
+static char irq_tab_rm300d[8][5] = {
 	/*	 INTA  INTB  INTC  INTD */
 	{     0,    0,	  0,	0,    0 },	/* EISA bridge */
 	{  SCSI, SCSI, SCSI, SCSI, SCSI },	/* SCSI */
@@ -69,7 +69,7 @@ static char irq_tab_rm300d[8][5] __initdata = {
 	{     0, INTD, INTA, INTB, INTC },	/* Slot 4 */
 };
 
-static char irq_tab_rm300e[5][5] __initdata = {
+static char irq_tab_rm300e[5][5] = {
 	/*	 INTA  INTB  INTC  INTD */
 	{     0,    0,	  0,	0,    0 },	/* HOST bridge */
 	{  SCSI, SCSI, SCSI, SCSI, SCSI },	/* SCSI */
@@ -96,7 +96,7 @@ static char irq_tab_rm300e[5][5] __initdata = {
 #define INTC	PCIT_IRQ_INTC
 #define INTD	PCIT_IRQ_INTD
 
-static char irq_tab_pcit[13][5] __initdata = {
+static char irq_tab_pcit[13][5] = {
 	/*	 INTA  INTB  INTC  INTD */
 	{     0,     0,	    0,	   0,	  0 },	/* HOST bridge */
 	{ SCSI0, SCSI0, SCSI0, SCSI0, SCSI0 },	/* SCSI */
@@ -113,7 +113,7 @@ static char irq_tab_pcit[13][5] __initdata = {
 	{     0,  INTA,	 INTB,	INTC,  INTD },	/* Slot 5 */
 };
 
-static char irq_tab_pcit_cplus[13][5] __initdata = {
+static char irq_tab_pcit_cplus[13][5] = {
 	/*	 INTA  INTB  INTC  INTD */
 	{     0,     0,	    0,	   0,	  0 },	/* HOST bridge */
 	{     0,  INTB,	 INTC,	INTD,  INTA },	/* PCI Slot 9 */
@@ -130,7 +130,7 @@ static inline int is_rm300_revd(void)
 	return (csmsr & 0xa0) == 0x20;
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	switch (sni_brd_type) {
 	case SNI_BRD_PCI_TOWER_CPLUS:
diff --git a/arch/mips/pci/fixup-tb0219.c b/arch/mips/pci/fixup-tb0219.c
index d0b0083fbd27..cc581535f257 100644
--- a/arch/mips/pci/fixup-tb0219.c
+++ b/arch/mips/pci/fixup-tb0219.c
@@ -23,7 +23,7 @@
 
 #include <asm/vr41xx/tb0219.h>
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int irq = -1;
 
diff --git a/arch/mips/pci/fixup-tb0226.c b/arch/mips/pci/fixup-tb0226.c
index 4196ccf3ea3d..b827b5cad5fd 100644
--- a/arch/mips/pci/fixup-tb0226.c
+++ b/arch/mips/pci/fixup-tb0226.c
@@ -23,7 +23,7 @@
 #include <asm/vr41xx/giu.h>
 #include <asm/vr41xx/tb0226.h>
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int irq = -1;
 
diff --git a/arch/mips/pci/fixup-tb0287.c b/arch/mips/pci/fixup-tb0287.c
index 8c5039ed75d7..98f26285f2e3 100644
--- a/arch/mips/pci/fixup-tb0287.c
+++ b/arch/mips/pci/fixup-tb0287.c
@@ -22,7 +22,7 @@
 
 #include <asm/vr41xx/tb0287.h>
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	unsigned char bus;
 	int irq = -1;
diff --git a/arch/mips/pci/pci-alchemy.c b/arch/mips/pci/pci-alchemy.c
index e99ca7702d8a..f15ec98de2de 100644
--- a/arch/mips/pci/pci-alchemy.c
+++ b/arch/mips/pci/pci-alchemy.c
@@ -522,7 +522,7 @@ static int __init alchemy_pci_init(void)
 arch_initcall(alchemy_pci_init);
 
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	struct alchemy_pci_context *ctx = dev->sysdata;
 	if (ctx && ctx->board_map_irq)
diff --git a/arch/mips/pci/pci-bcm47xx.c b/arch/mips/pci/pci-bcm47xx.c
index 76f16eaed0ad..230d7dd273e2 100644
--- a/arch/mips/pci/pci-bcm47xx.c
+++ b/arch/mips/pci/pci-bcm47xx.c
@@ -28,7 +28,7 @@
 #include <linux/bcma/bcma.h>
 #include <bcm47xx.h>
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return 0;
 }
diff --git a/arch/mips/pci/pci-lasat.c b/arch/mips/pci/pci-lasat.c
index 40d2797d2bc4..47f4ee6bbb3b 100644
--- a/arch/mips/pci/pci-lasat.c
+++ b/arch/mips/pci/pci-lasat.c
@@ -61,7 +61,7 @@ arch_initcall(lasat_pci_setup);
 #define LASAT_IRQ_PCIC	 (LASAT_IRQ_BASE + 7)
 #define LASAT_IRQ_PCID	 (LASAT_IRQ_BASE + 8)
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	switch (slot) {
 	case 1:
diff --git a/arch/mips/pci/pci-mt7620.c b/arch/mips/pci/pci-mt7620.c
index 4e633c1e7ff3..90fba9bf98da 100644
--- a/arch/mips/pci/pci-mt7620.c
+++ b/arch/mips/pci/pci-mt7620.c
@@ -361,7 +361,7 @@ static int mt7620_pci_probe(struct platform_device *pdev)
 	return 0;
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	u16 cmd;
 	u32 val;
diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c
index 9ee01936862e..3e92a06fa772 100644
--- a/arch/mips/pci/pci-octeon.c
+++ b/arch/mips/pci/pci-octeon.c
@@ -59,8 +59,7 @@ union octeon_pci_address {
 	} s;
 };
 
-int __initconst (*octeon_pcibios_map_irq)(const struct pci_dev *dev,
-					 u8 slot, u8 pin);
+int (*octeon_pcibios_map_irq)(const struct pci_dev *dev, u8 slot, u8 pin);
 enum octeon_dma_bar_type octeon_dma_bar_type = OCTEON_DMA_BAR_TYPE_INVALID;
 
 /**
@@ -74,7 +73,7 @@ enum octeon_dma_bar_type octeon_dma_bar_type = OCTEON_DMA_BAR_TYPE_INVALID;
  *		 as it goes through each bridge.
  * Returns Interrupt number for the device
  */
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	if (octeon_pcibios_map_irq)
 		return octeon_pcibios_map_irq(dev, slot, pin);
diff --git a/arch/mips/pci/pci-rt2880.c b/arch/mips/pci/pci-rt2880.c
index d6360fe73d05..711cdccdf65b 100644
--- a/arch/mips/pci/pci-rt2880.c
+++ b/arch/mips/pci/pci-rt2880.c
@@ -181,7 +181,7 @@ static inline void rt2880_pci_write_u32(unsigned long reg, u32 val)
 	spin_unlock_irqrestore(&rt2880_pci_lock, flags);
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	u16 cmd;
 	int irq = -1;
diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c
index 04f8ea953297..958899ffe99c 100644
--- a/arch/mips/pci/pci-rt3883.c
+++ b/arch/mips/pci/pci-rt3883.c
@@ -564,7 +564,7 @@ static int rt3883_pci_probe(struct platform_device *pdev)
 	return err;
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return of_irq_parse_and_map_pci(dev, slot, pin);
 }
diff --git a/arch/mips/pci/pci-tx4938.c b/arch/mips/pci/pci-tx4938.c
index 000c0e1f9ef8..a6418460e3c4 100644
--- a/arch/mips/pci/pci-tx4938.c
+++ b/arch/mips/pci/pci-tx4938.c
@@ -112,7 +112,7 @@ int __init tx4938_pciclk66_setup(void)
 	return pciclk;
 }
 
-int __init tx4938_pcic1_map_irq(const struct pci_dev *dev, u8 slot)
+int tx4938_pcic1_map_irq(const struct pci_dev *dev, u8 slot)
 {
 	if (get_tx4927_pcicptr(dev->bus->sysdata) == tx4938_pcic1ptr) {
 		switch (slot) {
diff --git a/arch/mips/pci/pci-tx4939.c b/arch/mips/pci/pci-tx4939.c
index 9d6acc00f348..09a65f7dbe7c 100644
--- a/arch/mips/pci/pci-tx4939.c
+++ b/arch/mips/pci/pci-tx4939.c
@@ -48,7 +48,7 @@ void __init tx4939_report_pci1clk(void)
 		((pciclk + 50000) / 100000) % 10);
 }
 
-int __init tx4939_pcic1_map_irq(const struct pci_dev *dev, u8 slot)
+int tx4939_pcic1_map_irq(const struct pci_dev *dev, u8 slot)
 {
 	if (get_tx4927_pcicptr(dev->bus->sysdata) == tx4939_pcic1ptr) {
 		switch (slot) {
@@ -68,7 +68,7 @@ int __init tx4939_pcic1_map_irq(const struct pci_dev *dev, u8 slot)
 	return -1;
 }
 
-int __init tx4939_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int tx4939_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int irq = tx4939_pcic1_map_irq(dev, slot);
 
diff --git a/arch/mips/pci/pci-xlp.c b/arch/mips/pci/pci-xlp.c
index 7babf01600cb..9eff9137f78e 100644
--- a/arch/mips/pci/pci-xlp.c
+++ b/arch/mips/pci/pci-xlp.c
@@ -205,7 +205,7 @@ int xlp_socdev_to_node(const struct pci_dev *lnkdev)
 		return PCI_SLOT(lnkdev->devfn) / 8;
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	struct pci_dev *lnkdev;
 	int lnkfunc, node;
diff --git a/arch/mips/pci/pci-xlr.c b/arch/mips/pci/pci-xlr.c
index 26d2dabef281..2a1c81a129ba 100644
--- a/arch/mips/pci/pci-xlr.c
+++ b/arch/mips/pci/pci-xlr.c
@@ -315,7 +315,7 @@ static void xls_pcie_ack_b(struct irq_data *d)
 	}
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return get_irq_vector(dev);
 }
diff --git a/arch/mips/pci/pcie-octeon.c b/arch/mips/pci/pcie-octeon.c
index ad3584dbc9d7..fd2887415bc8 100644
--- a/arch/mips/pci/pcie-octeon.c
+++ b/arch/mips/pci/pcie-octeon.c
@@ -1464,8 +1464,7 @@ static int cvmx_pcie_rc_initialize(int pcie_port)
  *		 as it goes through each bridge.
  * Returns Interrupt number for the device
  */
-int __init octeon_pcie_pcibios_map_irq(const struct pci_dev *dev,
-				       u8 slot, u8 pin)
+int octeon_pcie_pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	/*
 	 * The EBH5600 board with the PCI to PCIe bridge mistakenly
diff --git a/arch/mips/txx9/generic/pci.c b/arch/mips/txx9/generic/pci.c
index 0bd2a1e1ff9a..fb998726bd5d 100644
--- a/arch/mips/txx9/generic/pci.c
+++ b/arch/mips/txx9/generic/pci.c
@@ -386,9 +386,10 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
 	return 0;
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+static int (*txx9_pci_map_irq)(const struct pci_dev *dev, u8 slot, u8 pin);
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
-	return txx9_board_vec->pci_map_irq(dev, slot, pin);
+	return txx9_pci_map_irq(dev, slot, pin);
 }
 
 char * (*txx9_board_pcibios_setup)(char *str) __initdata;
@@ -424,5 +425,8 @@ char *__init txx9_pcibios_setup(char *str)
 			txx9_pci_err_action = TXX9_PCI_ERR_IGNORE;
 		return NULL;
 	}
+
+	txx9_pci_map_irq = txx9_board_vec->pci_map_irq;
+
 	return str;
 }

From fbcab13d2e2511a858590846ac2e2d7cbd830591 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Tue, 19 Sep 2017 09:51:28 -0400
Subject: [PATCH 099/391] selftests: silence test output by default

Some of the networking tests are very noisy and make it impossible to
see if we actually passed the tests as they run.  Default to suppressing
the output from any tests run in order to make it easier to track what
failed.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/lib.mk | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 693616651da5..4665463779f5 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -24,7 +24,7 @@ define RUN_TESTS
 			echo "selftests: Warning: file $$BASENAME_TEST is not executable, correct this.";\
 			echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; \
 		else					\
-			cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests:  $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\
+			cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests:  $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\
 		fi;					\
 	done;
 endef
@@ -55,7 +55,7 @@ endif
 define EMIT_TESTS
 	@for TEST in $(TEST_GEN_PROGS) $(TEST_PROGS); do \
 		BASENAME_TEST=`basename $$TEST`;	\
-		echo "(./$$BASENAME_TEST && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \
+		echo "(./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \
 	done;
 endef
 

From 422d8dc6fd3afecd66bd6acfcd73a2d53e338ff3 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Tue, 19 Sep 2017 09:51:26 -0400
Subject: [PATCH 100/391] selftest: add a reuseaddr test

This is to test for a regression introduced by

b9470c27607b ("inet: kill smallest_size and smallest_port")

which introduced a problem with reuseaddr and bind conflicts.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/net/.gitignore        |   1 +
 tools/testing/selftests/net/Makefile          |   2 +-
 .../selftests/net/reuseaddr_conflict.c        | 114 ++++++++++++++++++
 3 files changed, 116 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/net/reuseaddr_conflict.c

diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 9801253e4802..c612d6e38c62 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -6,3 +6,4 @@ reuseport_bpf
 reuseport_bpf_cpu
 reuseport_bpf_numa
 reuseport_dualstack
+reuseaddr_conflict
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index de1f5772b878..3df542c84610 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -7,7 +7,7 @@ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetl
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket
 TEST_GEN_FILES += reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
-TEST_GEN_FILES += reuseport_dualstack msg_zerocopy
+TEST_GEN_FILES += reuseport_dualstack msg_zerocopy reuseaddr_conflict
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c
new file mode 100644
index 000000000000..7c5b12664b03
--- /dev/null
+++ b/tools/testing/selftests/net/reuseaddr_conflict.c
@@ -0,0 +1,114 @@
+/*
+ * Test for the regression introduced by
+ *
+ * b9470c27607b ("inet: kill smallest_size and smallest_port")
+ *
+ * If we open an ipv4 socket on a port with reuseaddr we shouldn't reset the tb
+ * when we open the ipv6 conterpart, which is what was happening previously.
+ */
+#include <errno.h>
+#include <error.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define PORT 9999
+
+int open_port(int ipv6, int any)
+{
+	int fd = -1;
+	int reuseaddr = 1;
+	int v6only = 1;
+	int addrlen;
+	int ret = -1;
+	struct sockaddr *addr;
+	int family = ipv6 ? AF_INET6 : AF_INET;
+
+	struct sockaddr_in6 addr6 = {
+		.sin6_family = AF_INET6,
+		.sin6_port = htons(PORT),
+		.sin6_addr = in6addr_any
+	};
+	struct sockaddr_in addr4 = {
+		.sin_family = AF_INET,
+		.sin_port = htons(PORT),
+		.sin_addr.s_addr = any ? htonl(INADDR_ANY) : inet_addr("127.0.0.1"),
+	};
+
+
+	if (ipv6) {
+		addr = (struct sockaddr*)&addr6;
+		addrlen = sizeof(addr6);
+	} else {
+		addr = (struct sockaddr*)&addr4;
+		addrlen = sizeof(addr4);
+	}
+
+	if ((fd = socket(family, SOCK_STREAM, IPPROTO_TCP)) < 0) {
+		perror("socket");
+		goto out;
+	}
+
+	if (ipv6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (void*)&v6only,
+			       sizeof(v6only)) < 0) {
+		perror("setsockopt IPV6_V6ONLY");
+		goto out;
+	}
+
+	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr,
+		       sizeof(reuseaddr)) < 0) {
+		perror("setsockopt SO_REUSEADDR");
+		goto out;
+	}
+
+	if (bind(fd, addr, addrlen) < 0) {
+		perror("bind");
+		goto out;
+	}
+
+	if (any)
+		return fd;
+
+	if (listen(fd, 1) < 0) {
+		perror("listen");
+		goto out;
+	}
+	return fd;
+out:
+	close(fd);
+	return ret;
+}
+
+int main(void)
+{
+	int listenfd;
+	int fd1, fd2;
+
+	fprintf(stderr, "Opening 127.0.0.1:%d\n", PORT);
+	listenfd = open_port(0, 0);
+	if (listenfd < 0)
+		error(1, errno, "Couldn't open listen socket");
+	fprintf(stderr, "Opening INADDR_ANY:%d\n", PORT);
+	fd1 = open_port(0, 1);
+	if (fd1 >= 0)
+		error(1, 0, "Was allowed to create an ipv4 reuseport on a already bound non-reuseport socket");
+	fprintf(stderr, "Opening in6addr_any:%d\n", PORT);
+	fd1 = open_port(1, 1);
+	if (fd1 < 0)
+		error(1, errno, "Couldn't open ipv6 reuseport");
+	fprintf(stderr, "Opening INADDR_ANY:%d\n", PORT);
+	fd2 = open_port(0, 1);
+	if (fd2 >= 0)
+		error(1, 0, "Was allowed to create an ipv4 reuseport on a already bound non-reuseport socket");
+	close(fd1);
+	fprintf(stderr, "Opening INADDR_ANY:%d after closing ipv6 socket\n", PORT);
+	fd1 = open_port(0, 1);
+	if (fd1 >= 0)
+		error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6");
+	fprintf(stderr, "Success");
+	return 0;
+}

From e06d79fbc338935ef27befc84f8b8b2e5f878a10 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Tue, 19 Sep 2017 09:51:27 -0400
Subject: [PATCH 101/391] selftests: actually run the various net selftests

These self tests are just self contained binaries, they are not run by
any of the scripts in the directory.  This means they need to be marked
with TEST_GEN_PROGS to actually be run, not TEST_GEN_FILES.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/net/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 3df542c84610..d86bca991f45 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,9 +5,9 @@ CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
 TEST_GEN_FILES =  socket
-TEST_GEN_FILES += psock_fanout psock_tpacket
-TEST_GEN_FILES += reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
-TEST_GEN_FILES += reuseport_dualstack msg_zerocopy reuseaddr_conflict
+TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
+TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
+TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict
 
 include ../lib.mk
 

From 06e8852ceecccdeff6841a6c5cd78a947a75d5bc Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Fri, 8 Sep 2017 14:01:19 +0200
Subject: [PATCH 102/391] selftests/net: msg_zerocopy enable build with older
 kernel headers

Explicitly define SO_EE_ORIGIN_ZEROCOPY.
This makes the test program build with older kernel headers,
e.g. from Debian 9.

Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/net/msg_zerocopy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index 40232af5b023..3ab6ec403905 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -55,7 +55,7 @@
 #include <unistd.h>
 
 #ifndef SO_EE_ORIGIN_ZEROCOPY
-#define SO_EE_ORIGIN_ZEROCOPY		SO_EE_ORIGIN_UPAGE
+#define SO_EE_ORIGIN_ZEROCOPY		5
 #endif
 
 #ifndef SO_ZEROCOPY

From 9e987b70ada27554c5d176421de1d167218c49b5 Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Fri, 15 Sep 2017 17:35:27 -0700
Subject: [PATCH 103/391] ACPI / bus: Make ACPI_HANDLE() work for non-GPL code
 again

Due to commit db3e50f3234b (device property: Get rid of struct
fwnode_handle type field), ACPI_HANDLE() inadvertently became
a GPL-only call. The call path that led to that was:

ACPI_HANDLE()
    ACPI_COMPANION()
        to_acpi_device_node()
            is_acpi_device_node()
                acpi_device_fwnode_ops
                    DECLARE_ACPI_FWNODE_OPS(acpi_device_fwnode_ops);

...and the new DECLARE_ACPI_FWNODE_OPS() includes
EXPORT_SYMBOL_GPL, whereas previously it was a static struct.

In order to avoid changing any of that, let's instead provide ever
so slightly better encapsulation of those struct fwnode_operations
instances. Those do not really need to be directly used in
inline function calls in header files. Simply moving two small
functions (is_acpi_device_node and is_acpi_data_node) out of
acpi_bus.h, and into a .c file, does that.

That leaves the internals of struct fwnode_operations as GPL-only
(which I think was the intent all along), but un-breaks any driver
code out there that relies on the ACPI subsystem's being (historically)
an EXPORT_SYMBOL-usable system. By that, I mean, ACPI_HANDLE() and
other basic ACPI calls were non-GPL-protected.

Also, while I'm there, remove a tiny bit of redundancy that was missed
in the earlier commit, by having is_acpi_node() use the other two
routines, instead of checking fwnode directly.

Fixes: db3e50f3234b (device property: Get rid of struct fwnode_handle type field)
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/property.c | 13 +++++++++++++
 include/acpi/acpi_bus.h | 18 ++++--------------
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index c1c216163de3..1e3c2517a1ac 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -1293,3 +1293,16 @@ static int acpi_fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode,
 DECLARE_ACPI_FWNODE_OPS(acpi_device_fwnode_ops);
 DECLARE_ACPI_FWNODE_OPS(acpi_data_fwnode_ops);
 const struct fwnode_operations acpi_static_fwnode_ops;
+
+bool is_acpi_device_node(const struct fwnode_handle *fwnode)
+{
+	return !IS_ERR_OR_NULL(fwnode) &&
+		fwnode->ops == &acpi_device_fwnode_ops;
+}
+EXPORT_SYMBOL(is_acpi_device_node);
+
+bool is_acpi_data_node(const struct fwnode_handle *fwnode)
+{
+	return !IS_ERR_OR_NULL(fwnode) && fwnode->ops == &acpi_data_fwnode_ops;
+}
+EXPORT_SYMBOL(is_acpi_data_node);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index dedf9d789166..fa1505292f6c 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -399,17 +399,12 @@ extern const struct fwnode_operations acpi_device_fwnode_ops;
 extern const struct fwnode_operations acpi_data_fwnode_ops;
 extern const struct fwnode_operations acpi_static_fwnode_ops;
 
+bool is_acpi_device_node(const struct fwnode_handle *fwnode);
+bool is_acpi_data_node(const struct fwnode_handle *fwnode);
+
 static inline bool is_acpi_node(const struct fwnode_handle *fwnode)
 {
-	return !IS_ERR_OR_NULL(fwnode) &&
-		(fwnode->ops == &acpi_device_fwnode_ops
-		 || fwnode->ops == &acpi_data_fwnode_ops);
-}
-
-static inline bool is_acpi_device_node(const struct fwnode_handle *fwnode)
-{
-	return !IS_ERR_OR_NULL(fwnode) &&
-		fwnode->ops == &acpi_device_fwnode_ops;
+	return (is_acpi_device_node(fwnode) || is_acpi_data_node(fwnode));
 }
 
 #define to_acpi_device_node(__fwnode)					\
@@ -422,11 +417,6 @@ static inline bool is_acpi_device_node(const struct fwnode_handle *fwnode)
 			NULL;						\
 	})
 
-static inline bool is_acpi_data_node(const struct fwnode_handle *fwnode)
-{
-	return !IS_ERR_OR_NULL(fwnode) && fwnode->ops == &acpi_data_fwnode_ops;
-}
-
 #define to_acpi_data_node(__fwnode)					\
 	({								\
 		typeof(__fwnode) __to_acpi_data_node_fwnode = __fwnode;	\

From 6073512cc8e2c48bed5c6625c02c5e4ae50cec34 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Mon, 18 Sep 2017 14:59:20 +0200
Subject: [PATCH 104/391] net: phy: Kconfig: Fix PHY infrastructure menu in
 menuconfig

Since the integration of PHYLINK, the configuration option which
used to be under the PHY infrastructure menu in menuconfig ended
up one level up (the network device driver section)

By placing PHYLINK option right after PHYLIB entry, it broke the
way Kconfig used to build the menu. See kconfig-language.txt, section
"Menu structure", 2nd method.

This is fixed by placing the PHYLINK option just before PHYLIB.

Fixes: 9525ae83959b ("phylink: add phylink infrastructure")
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/Kconfig | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index a9d16a3af514..cd931cf9dcc2 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -160,15 +160,6 @@ config MDIO_XGENE
 
 endif
 
-menuconfig PHYLIB
-	tristate "PHY Device support and infrastructure"
-	depends on NETDEVICES
-	select MDIO_DEVICE
-	help
-	  Ethernet controllers are usually attached to PHY
-	  devices.  This option provides infrastructure for
-	  managing PHY devices.
-
 config PHYLINK
 	tristate
 	depends on NETDEVICES
@@ -179,6 +170,15 @@ config PHYLINK
 	  configuration links, PHYs, and Serdes links with MAC level
 	  autonegotiation modes.
 
+menuconfig PHYLIB
+	tristate "PHY Device support and infrastructure"
+	depends on NETDEVICES
+	select MDIO_DEVICE
+	help
+	  Ethernet controllers are usually attached to PHY
+	  devices.  This option provides infrastructure for
+	  managing PHY devices.
+
 if PHYLIB
 
 config SWPHY

From b247c211ee367cfe975dd54e381094083adfd8d3 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 19 Sep 2017 02:43:13 +0200
Subject: [PATCH 105/391] PM: docs: Drop an excess character from devices.rst

Drop an excess "`" from Documentation/driver-api/pm/devices.rst.

Fixes: 2728b2d2e5be (PM / core / docs: Convert sleep states API document to reST)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/driver-api/pm/devices.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/driver-api/pm/devices.rst b/Documentation/driver-api/pm/devices.rst
index bedd32388dac..a0dc2879a152 100644
--- a/Documentation/driver-api/pm/devices.rst
+++ b/Documentation/driver-api/pm/devices.rst
@@ -675,7 +675,7 @@ sub-domain of the parent domain.
 
 Support for power domains is provided through the :c:member:`pm_domain` field of
 |struct device|.  This field is a pointer to an object of type
-|struct dev_pm_domain|, defined in :file:`include/linux/pm.h``, providing a set
+|struct dev_pm_domain|, defined in :file:`include/linux/pm.h`, providing a set
 of power management callbacks analogous to the subsystem-level and device driver
 callbacks that are executed for the given device during all power transitions,
 instead of the respective subsystem-level callbacks.  Specifically, if a

From 157c460e10cb6eca29ccbd0f023db159d0c55ec7 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 19 Sep 2017 02:22:39 +0200
Subject: [PATCH 106/391] PM: core: Fix device_pm_check_callbacks()

The device_pm_check_callbacks() function doesn't check legacy
->suspend and ->resume callback pointers under the device's
bus type, class and driver, so in some cases it may set the
no_pm_callbacks flag for the device incorrectly and then the
callbacks may be skipped during system suspend/resume, which
shouldn't happen.

Fixes: aa8e54b55947 (PM / sleep: Go direct_complete if driver has no callbacks)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: 4.5+ <stable@vger.kernel.org> # 4.5+
---
 drivers/base/power/main.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index ea1732ed7a9d..770b1539a083 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1860,10 +1860,13 @@ void device_pm_check_callbacks(struct device *dev)
 {
 	spin_lock_irq(&dev->power.lock);
 	dev->power.no_pm_callbacks =
-		(!dev->bus || pm_ops_is_empty(dev->bus->pm)) &&
-		(!dev->class || pm_ops_is_empty(dev->class->pm)) &&
+		(!dev->bus || (pm_ops_is_empty(dev->bus->pm) &&
+		 !dev->bus->suspend && !dev->bus->resume)) &&
+		(!dev->class || (pm_ops_is_empty(dev->class->pm) &&
+		 !dev->class->suspend && !dev->class->resume)) &&
 		(!dev->type || pm_ops_is_empty(dev->type->pm)) &&
 		(!dev->pm_domain || pm_ops_is_empty(&dev->pm_domain->ops)) &&
-		(!dev->driver || pm_ops_is_empty(dev->driver->pm));
+		(!dev->driver || (pm_ops_is_empty(dev->driver->pm) &&
+		 !dev->driver->suspend && !dev->driver->resume));
 	spin_unlock_irq(&dev->power.lock);
 }

From ff76898c0a14a43f71bfe63dd1903087e96ce238 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 19 Sep 2017 08:23:22 -0700
Subject: [PATCH 107/391] cpufreq: dt-platdev: Add some missing platforms to
 the blacklist

Commit edeec420de24 (cpufreq: dt-platdev: Automatically create cpufreq
device with OPP v2) missed adding few platforms to the blacklist which
create the cpufreq-dt device from their own drivers, after some
dependencies are sorted out.

And for those platforms, both the platform specific driver and the
cpufreq-dt-platdev driver try to create the cpufreq-dt device now.

Fix that by including those platforms in the blacklist. This doesn't include
the TI platforms, for which there is a separate patch.

Fixes: edeec420de24 (cpufreq: dt-cpufreq: platdev Automatically create device with OPP v2)
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq-dt-platdev.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index a020da7940d6..430edadca527 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -106,6 +106,18 @@ static const struct of_device_id whitelist[] __initconst = {
  * platforms using "operating-points-v2" property.
  */
 static const struct of_device_id blacklist[] __initconst = {
+	{ .compatible = "calxeda,highbank", },
+	{ .compatible = "calxeda,ecx-2000", },
+
+	{ .compatible = "marvell,armadaxp", },
+
+	{ .compatible = "nvidia,tegra124", },
+
+	{ .compatible = "st,stih407", },
+	{ .compatible = "st,stih410", },
+
+	{ .compatible = "sigma,tango4", },
+
 	{ }
 };
 

From ed40fad9a568efe83f5e80897ded71117b6911b3 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Thu, 31 Aug 2017 22:24:36 +0200
Subject: [PATCH 108/391] ARM: cpuidle: Avoid memleak if init fail

In case there are no DT idle states defined or
cpuidle_register_driver() fails, the copy of the idle driver is leaked:

    unreferenced object 0xede0dc00 (size 1024):
    comm "swapper/0", pid 1, jiffies 4294937431 (age 744.510s)
    hex dump (first 32 bytes):
    94 9e 0b c1 00 00 00 00 00 00 00 00 00 00 00 00 ................
    57 46 49 00 00 00 00 00 00 00 00 00 00 00 00 00 WFI.............
    backtrace:
    [<c1295f04>] arm_idle_init+0x44/0x1ac
    [<c0301e6c>] do_one_initcall+0x3c/0x16c
    [<c1200d70>] kernel_init_freeable+0x110/0x1d0
    [<c0cb3624>] kernel_init+0x8/0x114
    [<c0307a98>] ret_from_fork+0x14/0x3c

So fix this by freeing the unregistered copy in error case.

Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Fixes: d50a7d8acd78 (ARM: cpuidle: Support asymmetric idle definition)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle-arm.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c
index 7080c384ad5d..52a75053ee03 100644
--- a/drivers/cpuidle/cpuidle-arm.c
+++ b/drivers/cpuidle/cpuidle-arm.c
@@ -104,13 +104,13 @@ static int __init arm_idle_init(void)
 		ret = dt_init_idle_driver(drv, arm_idle_state_match, 1);
 		if (ret <= 0) {
 			ret = ret ? : -ENODEV;
-			goto out_fail;
+			goto init_fail;
 		}
 
 		ret = cpuidle_register_driver(drv);
 		if (ret) {
 			pr_err("Failed to register cpuidle driver\n");
-			goto out_fail;
+			goto init_fail;
 		}
 
 		/*
@@ -149,6 +149,8 @@ static int __init arm_idle_init(void)
 	}
 
 	return 0;
+init_fail:
+	kfree(drv);
 out_fail:
 	while (--cpu >= 0) {
 		dev = per_cpu(cpuidle_devices, cpu);

From 0c0bceb796878a5baea1f47033215fac0774e498 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Fri, 8 Sep 2017 12:24:41 +0300
Subject: [PATCH 109/391] ACPI: properties: Return _DSD hierarchical extension
 (data) sub-nodes correctly

The recently merged patch "ACPI: Prepare for constifying
acpi_get_next_subnode() fwnode argument" was part of a patchset
constifying the fwnode arguments across the fwnode property API. The
purpose of the patch was to allow returning non-const fwnodes from a data
structure the root of which is const.

Unfortunately the patch introduced the functionality, in particular when
starting parsed from an ACPI device node, the hierarchical data extension
nodes would not be enumerated.

Restore the old behaviour while still retaining constness properties of
the patch.

Fixes: 01c1da289791 "ACPI: Prepare for constifying acpi_get_next_subnode() fwnode argument"
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/property.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index c1c216163de3..265b74f440b6 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -908,11 +908,12 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
 					    struct fwnode_handle *child)
 {
 	const struct acpi_device *adev = to_acpi_device_node(fwnode);
-	struct acpi_device *child_adev = NULL;
 	const struct list_head *head;
 	struct list_head *next;
 
 	if (!child || is_acpi_device_node(child)) {
+		struct acpi_device *child_adev;
+
 		if (adev)
 			head = &adev->children;
 		else
@@ -922,8 +923,8 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
 			goto nondev;
 
 		if (child) {
-			child_adev = to_acpi_device_node(child);
-			next = child_adev->node.next;
+			adev = to_acpi_device_node(child);
+			next = adev->node.next;
 			if (next == head) {
 				child = NULL;
 				goto nondev;
@@ -941,8 +942,8 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
 		const struct acpi_data_node *data = to_acpi_data_node(fwnode);
 		struct acpi_data_node *dn;
 
-		if (child_adev)
-			head = &child_adev->data.subnodes;
+		if (adev)
+			head = &adev->data.subnodes;
 		else if (data)
 			head = &data->data.subnodes;
 		else

From 0647169cf9aa441700eb8f23ea49be060626534b Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 19 Sep 2017 12:41:37 +0200
Subject: [PATCH 110/391] rhashtable: Documentation tweak

Clarify that rhashtable_walk_{stop,start} will not reset the iterator to
the beginning of the hash table.  Confusion between rhashtable_walk_enter
and rhashtable_walk_start has already lead to a bug.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/rhashtable.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 707ca5d677c6..ddd7dde87c3c 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -735,9 +735,9 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
  * rhashtable_walk_start - Start a hash table walk
  * @iter:	Hash table iterator
  *
- * Start a hash table walk.  Note that we take the RCU lock in all
- * cases including when we return an error.  So you must always call
- * rhashtable_walk_stop to clean up.
+ * Start a hash table walk at the current iterator position.  Note that we take
+ * the RCU lock in all cases including when we return an error.  So you must
+ * always call rhashtable_walk_stop to clean up.
  *
  * Returns zero if successful.
  *
@@ -846,7 +846,8 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_next);
  * rhashtable_walk_stop - Finish a hash table walk
  * @iter:	Hash table iterator
  *
- * Finish a hash table walk.
+ * Finish a hash table walk.  Does not reset the iterator to the start of the
+ * hash table.
  */
 void rhashtable_walk_stop(struct rhashtable_iter *iter)
 	__releases(RCU)

From 930651a75bf1ba6893a8b8475270664ebdb6cf4a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 19 Sep 2017 09:15:59 -0700
Subject: [PATCH 111/391] bpf: do not disable/enable BH in bpf_map_free_id()

syzkaller reported following splat [1]

Since hard irq are disabled by the caller, bpf_map_free_id()
should not try to enable/disable BH.

Another solution would be to change htab_map_delete_elem() to
defer the free_htab_elem() call after
raw_spin_unlock_irqrestore(&b->lock, flags), but this might be not
enough to cover other code paths.

[1]
WARNING: CPU: 1 PID: 8052 at kernel/softirq.c:161 __local_bh_enable_ip
+0x1e/0x160 kernel/softirq.c:161
Kernel panic - not syncing: panic_on_warn set ...

CPU: 1 PID: 8052 Comm: syz-executor1 Not tainted 4.13.0-next-20170915+
#23
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:16 [inline]
 dump_stack+0x194/0x257 lib/dump_stack.c:52
 panic+0x1e4/0x417 kernel/panic.c:181
 __warn+0x1c4/0x1d9 kernel/panic.c:542
 report_bug+0x211/0x2d0 lib/bug.c:183
 fixup_bug+0x40/0x90 arch/x86/kernel/traps.c:178
 do_trap_no_signal arch/x86/kernel/traps.c:212 [inline]
 do_trap+0x260/0x390 arch/x86/kernel/traps.c:261
 do_error_trap+0x120/0x390 arch/x86/kernel/traps.c:298
 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:311
 invalid_op+0x18/0x20 arch/x86/entry/entry_64.S:905
RIP: 0010:__local_bh_enable_ip+0x1e/0x160 kernel/softirq.c:161
RSP: 0018:ffff8801cdcd7748 EFLAGS: 00010046
RAX: 0000000000000082 RBX: 0000000000000201 RCX: 0000000000000000
RDX: 1ffffffff0b5933c RSI: 0000000000000201 RDI: ffffffff85ac99e0
RBP: ffff8801cdcd7758 R08: ffffffff85b87158 R09: 1ffff10039b9aec6
R10: ffff8801c99f24c0 R11: 0000000000000002 R12: ffffffff817b0b47
R13: dffffc0000000000 R14: ffff8801cdcd77e8 R15: 0000000000000001
 __raw_spin_unlock_bh include/linux/spinlock_api_smp.h:176 [inline]
 _raw_spin_unlock_bh+0x30/0x40 kernel/locking/spinlock.c:207
 spin_unlock_bh include/linux/spinlock.h:361 [inline]
 bpf_map_free_id kernel/bpf/syscall.c:197 [inline]
 __bpf_map_put+0x267/0x320 kernel/bpf/syscall.c:227
 bpf_map_put+0x1a/0x20 kernel/bpf/syscall.c:235
 bpf_map_fd_put_ptr+0x15/0x20 kernel/bpf/map_in_map.c:96
 free_htab_elem+0xc3/0x1b0 kernel/bpf/hashtab.c:658
 htab_map_delete_elem+0x74d/0x970 kernel/bpf/hashtab.c:1063
 map_delete_elem kernel/bpf/syscall.c:633 [inline]
 SYSC_bpf kernel/bpf/syscall.c:1479 [inline]
 SyS_bpf+0x2188/0x46a0 kernel/bpf/syscall.c:1451
 entry_SYSCALL_64_fastpath+0x1f/0xbe

Fixes: f3f1c054c288 ("bpf: Introduce bpf_map ID")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Martin KaFai Lau <kafai@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/syscall.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index cb17e1cd1d43..25d074920a00 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -186,15 +186,17 @@ static int bpf_map_alloc_id(struct bpf_map *map)
 
 static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
 {
+	unsigned long flags;
+
 	if (do_idr_lock)
-		spin_lock_bh(&map_idr_lock);
+		spin_lock_irqsave(&map_idr_lock, flags);
 	else
 		__acquire(&map_idr_lock);
 
 	idr_remove(&map_idr, map->id);
 
 	if (do_idr_lock)
-		spin_unlock_bh(&map_idr_lock);
+		spin_unlock_irqrestore(&map_idr_lock, flags);
 	else
 		__release(&map_idr_lock);
 }

From 039cc1c1eb971ffe1973fddb8769a80fd4950a6f Mon Sep 17 00:00:00 2001
From: Dave Gerlach <d-gerlach@ti.com>
Date: Tue, 19 Sep 2017 15:06:13 -0500
Subject: [PATCH 112/391] cpufreq: ti-cpufreq: Support additional am43xx
 platforms

Rather than letting the ti-cpufreq driver match against 'ti,am4372'
machine compatible during probe let's match against 'ti,am43' so that we
can support both 'ti,am4372' and 'ti,am438x' platforms which both match
to this compatible.

Signed-off-by: Dave Gerlach <d-gerlach@ti.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/ti-cpufreq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c
index b29cd3398463..4bf47de6101f 100644
--- a/drivers/cpufreq/ti-cpufreq.c
+++ b/drivers/cpufreq/ti-cpufreq.c
@@ -190,7 +190,7 @@ static int ti_cpufreq_setup_syscon_register(struct ti_cpufreq_data *opp_data)
 
 static const struct of_device_id ti_cpufreq_of_match[] = {
 	{ .compatible = "ti,am33xx", .data = &am3x_soc_data, },
-	{ .compatible = "ti,am4372", .data = &am4x_soc_data, },
+	{ .compatible = "ti,am43", .data = &am4x_soc_data, },
 	{ .compatible = "ti,dra7", .data = &dra7_soc_data },
 	{},
 };

From 2a4776e14f7da3d48fffb4edbb82355742f23478 Mon Sep 17 00:00:00 2001
From: Lipeng <lipeng321@huawei.com>
Date: Tue, 19 Sep 2017 17:17:10 +0100
Subject: [PATCH 113/391] net: hns3: Fixes initialization of phy address from
 firmware

Default phy address of every port is 0. Therefore, phy address for
each port need to be fetched from firmware and device initialized
with fetched non-default phy address.

Fixes: 6427264ef330 ("net: hns3: Add HNS3 Acceleration Engine &
Compatibility Layer Support")
Signed-off-by: Lipeng <lipeng321@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index bb45365fb817..db4e07dac29a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1066,6 +1066,7 @@ static int hclge_configure(struct hclge_dev *hdev)
 	for (i = 0; i < ETH_ALEN; i++)
 		hdev->hw.mac.mac_addr[i] = cfg.mac_addr[i];
 	hdev->hw.mac.media_type = cfg.media_type;
+	hdev->hw.mac.phy_addr = cfg.phy_addr;
 	hdev->num_desc = cfg.tqp_desc_num;
 	hdev->tm_info.num_pg = 1;
 	hdev->tm_info.num_tc = cfg.tc_num;

From c5b1b97522ef32d2170c9aa1a0c1eec179acbb3a Mon Sep 17 00:00:00 2001
From: Lipeng <lipeng321@huawei.com>
Date: Tue, 19 Sep 2017 17:17:11 +0100
Subject: [PATCH 114/391] net: hns3: Fixes the command used to unmap ring from
 vector

This patch fixes the IMP command being used to unmap the vector
from the corresponding ring.

Fixes: 6427264ef330 ("net: hns3: Add HNS3 Acceleration Engine &
Compatibility Layer Support")
Signed-off-by: Lipeng <lipeng321@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index db4e07dac29a..e324bc6e9f4f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2779,7 +2779,7 @@ static int hclge_unmap_ring_from_vector(
 			}
 			i = 0;
 			hclge_cmd_setup_basic_desc(&desc,
-						   HCLGE_OPC_ADD_RING_TO_VECTOR,
+						   HCLGE_OPC_DEL_RING_TO_VECTOR,
 						   false);
 			req->int_vector_id = vector_id;
 		}

From 0305b443a3ba5b84aa474786026df04a70460135 Mon Sep 17 00:00:00 2001
From: Lipeng <lipeng321@huawei.com>
Date: Tue, 19 Sep 2017 17:17:12 +0100
Subject: [PATCH 115/391] net: hns3: Fixes ring-to-vector map-and-unmap command

This patch fixes the vector-to-ring map and unmap command and adds
INT_GL(for, Gap Limiting Interrupts) and VF id to it as required
by the hardware interface.

Fixes: 6427264ef330 ("net: hns3: Add HNS3 Acceleration Engine &
Compatibility Layer Support")
Signed-off-by: Lipeng <lipeng321@huawei.com>
Signed-off-by: Mingguang Qu <qumingguang@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h  | 8 ++++++--
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 8 ++++++++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 91ae0135ee50..c2b613b40509 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -238,7 +238,7 @@ struct hclge_tqp_map {
 	u8 rsv[18];
 };
 
-#define HCLGE_VECTOR_ELEMENTS_PER_CMD	11
+#define HCLGE_VECTOR_ELEMENTS_PER_CMD	10
 
 enum hclge_int_type {
 	HCLGE_INT_TX,
@@ -252,8 +252,12 @@ struct hclge_ctrl_vector_chain {
 #define HCLGE_INT_TYPE_S	0
 #define HCLGE_INT_TYPE_M	0x3
 #define HCLGE_TQP_ID_S		2
-#define HCLGE_TQP_ID_M		(0x3fff << HCLGE_TQP_ID_S)
+#define HCLGE_TQP_ID_M		(0x7ff << HCLGE_TQP_ID_S)
+#define HCLGE_INT_GL_IDX_S	13
+#define HCLGE_INT_GL_IDX_M	(0x3 << HCLGE_INT_GL_IDX_S)
 	__le16 tqp_type_and_id[HCLGE_VECTOR_ELEMENTS_PER_CMD];
+	u8 vfid;
+	u8 rsv;
 };
 
 #define HCLGE_TC_NUM		8
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index e324bc6e9f4f..eafd9c678162 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2680,7 +2680,11 @@ int hclge_map_vport_ring_to_vector(struct hclge_vport *vport, int vector_id,
 			       hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
 		hnae_set_field(req->tqp_type_and_id[i], HCLGE_TQP_ID_M,
 			       HCLGE_TQP_ID_S,	node->tqp_index);
+		hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_GL_IDX_M,
+			       HCLGE_INT_GL_IDX_S,
+			       hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
 		req->tqp_type_and_id[i] = cpu_to_le16(req->tqp_type_and_id[i]);
+		req->vfid = vport->vport_id;
 
 		if (++i >= HCLGE_VECTOR_ELEMENTS_PER_CMD) {
 			req->int_cause_num = HCLGE_VECTOR_ELEMENTS_PER_CMD;
@@ -2764,8 +2768,12 @@ static int hclge_unmap_ring_from_vector(
 			       hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
 		hnae_set_field(req->tqp_type_and_id[i], HCLGE_TQP_ID_M,
 			       HCLGE_TQP_ID_S,	node->tqp_index);
+		hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_GL_IDX_M,
+			       HCLGE_INT_GL_IDX_S,
+			       hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
 
 		req->tqp_type_and_id[i] = cpu_to_le16(req->tqp_type_and_id[i]);
+		req->vfid = vport->vport_id;
 
 		if (++i >= HCLGE_VECTOR_ELEMENTS_PER_CMD) {
 			req->int_cause_num = HCLGE_VECTOR_ELEMENTS_PER_CMD;

From 139e8792537b327252a9676591a78e6408d50a85 Mon Sep 17 00:00:00 2001
From: Lipeng <lipeng321@huawei.com>
Date: Tue, 19 Sep 2017 17:17:13 +0100
Subject: [PATCH 116/391] net: hns3: Fixes the initialization of MAC address in
 hardware

This patch fixes the initialization of MAC address, fetched from HNS3
firmware i.e. when it is not randomly generated, to the HNS3 hardware.

Fixes: ca60906d2795 ("net: hns3: Add support of HNS3 Ethernet Driver for
hip08 SoC")
Signed-off-by: Lipeng <lipeng321@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
index 1c3e29447891..4d68d6ea5143 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
@@ -2705,10 +2705,11 @@ static void hns3_init_mac_addr(struct net_device *netdev)
 		eth_hw_addr_random(netdev);
 		dev_warn(priv->dev, "using random MAC address %pM\n",
 			 netdev->dev_addr);
-		/* Also copy this new MAC address into hdev */
-		if (h->ae_algo->ops->set_mac_addr)
-			h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr);
 	}
+
+	if (h->ae_algo->ops->set_mac_addr)
+		h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr);
+
 }
 
 static void hns3_nic_set_priv_ops(struct net_device *netdev)

From fbbb1536b220eb4f4f95cbceae6579489a8adad5 Mon Sep 17 00:00:00 2001
From: Salil Mehta <salil.mehta@huawei.com>
Date: Tue, 19 Sep 2017 17:17:14 +0100
Subject: [PATCH 117/391] net: hns3: Fixes the ether address copy with
 appropriate API

This patch replaces the ethernet address copy instance with more
appropriate ether_addr_copy() function.

Fixes: 6427264ef330 ("net: hns3: Add HNS3 Acceleration Engine &
Compatibility Layer Support")
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index eafd9c678162..8e172afd4876 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1063,8 +1063,7 @@ static int hclge_configure(struct hclge_dev *hdev)
 	hdev->base_tqp_pid = 0;
 	hdev->rss_size_max = 1;
 	hdev->rx_buf_len = cfg.rx_buf_len;
-	for (i = 0; i < ETH_ALEN; i++)
-		hdev->hw.mac.mac_addr[i] = cfg.mac_addr[i];
+	ether_addr_copy(hdev->hw.mac.mac_addr, cfg.mac_addr);
 	hdev->hw.mac.media_type = cfg.media_type;
 	hdev->hw.mac.phy_addr = cfg.phy_addr;
 	hdev->num_desc = cfg.tqp_desc_num;

From 5e43aef8491ae3b5feb79cd15260faf39303ef33 Mon Sep 17 00:00:00 2001
From: Lipeng <lipeng321@huawei.com>
Date: Tue, 19 Sep 2017 17:17:15 +0100
Subject: [PATCH 118/391] net: hns3: Fixes the default VLAN-id of PF

When there is no vlan id in the packets, hardware will treat the vlan id
as 0 and look for the mac_vlan table. This patch set the default vlan id
of PF as 0. Without this config, it will fail when look for mac_vlan
table, and hardware will drop packets.

Fixes: 6427264ef330 ("net: hns3: Add HNS3 Acceleration Engine &
Compatibility Layer Support")
Signed-off-by: Mingguang Qu <qumingguang@huawei.com>
Signed-off-by: Lipeng <lipeng321@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 8e172afd4876..74008ef23169 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3673,6 +3673,7 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev)
 {
 #define HCLGE_VLAN_TYPE_VF_TABLE   0
 #define HCLGE_VLAN_TYPE_PORT_TABLE 1
+	struct hnae3_handle *handle;
 	int ret;
 
 	ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_VLAN_TYPE_VF_TABLE,
@@ -3682,8 +3683,11 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev)
 
 	ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_VLAN_TYPE_PORT_TABLE,
 					 true);
+	if (ret)
+		return ret;
 
-	return ret;
+	handle = &hdev->vport[0].nic;
+	return hclge_set_port_vlan_filter(handle, htons(ETH_P_8021Q), 0, false);
 }
 
 static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)

From 90f7b11a5a0081feb7041fcc795c9a131a62a725 Mon Sep 17 00:00:00 2001
From: Lipeng <lipeng321@huawei.com>
Date: Tue, 19 Sep 2017 17:17:16 +0100
Subject: [PATCH 119/391] net: hns3: Fixes the premature exit of loop when
 matching clients

When register/unregister ae_dev, ae_dev should match all client
in the client_list. Enet and roce can co-exists together so we
should continue checking for enet and roce presence together.
So break should not be there.

Above caused problems in loading and unloading of modules.

Fixes: 38eddd126772 ("net: hns3: Add support of the HNAE3 framework")
Signed-off-by: Lipeng <lipeng321@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.c | 43 +++++----------------
 1 file changed, 9 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.c b/drivers/net/ethernet/hisilicon/hns3/hnae3.c
index 59efbd605416..5bcb2238acb2 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.c
@@ -37,20 +37,15 @@ static bool hnae3_client_match(enum hnae3_client_type client_type,
 }
 
 static int hnae3_match_n_instantiate(struct hnae3_client *client,
-				     struct hnae3_ae_dev *ae_dev,
-				     bool is_reg, bool *matched)
+				     struct hnae3_ae_dev *ae_dev, bool is_reg)
 {
 	int ret;
 
-	*matched = false;
-
 	/* check if this client matches the type of ae_dev */
 	if (!(hnae3_client_match(client->type, ae_dev->dev_type) &&
 	      hnae_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))) {
 		return 0;
 	}
-	/* there is a match of client and dev */
-	*matched = true;
 
 	/* now, (un-)instantiate client by calling lower layer */
 	if (is_reg) {
@@ -69,7 +64,6 @@ int hnae3_register_client(struct hnae3_client *client)
 {
 	struct hnae3_client *client_tmp;
 	struct hnae3_ae_dev *ae_dev;
-	bool matched;
 	int ret = 0;
 
 	mutex_lock(&hnae3_common_lock);
@@ -86,7 +80,7 @@ int hnae3_register_client(struct hnae3_client *client)
 		/* if the client could not be initialized on current port, for
 		 * any error reasons, move on to next available port
 		 */
-		ret = hnae3_match_n_instantiate(client, ae_dev, true, &matched);
+		ret = hnae3_match_n_instantiate(client, ae_dev, true);
 		if (ret)
 			dev_err(&ae_dev->pdev->dev,
 				"match and instantiation failed for port\n");
@@ -102,12 +96,11 @@ EXPORT_SYMBOL(hnae3_register_client);
 void hnae3_unregister_client(struct hnae3_client *client)
 {
 	struct hnae3_ae_dev *ae_dev;
-	bool matched;
 
 	mutex_lock(&hnae3_common_lock);
 	/* un-initialize the client on every matched port */
 	list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
-		hnae3_match_n_instantiate(client, ae_dev, false, &matched);
+		hnae3_match_n_instantiate(client, ae_dev, false);
 	}
 
 	list_del(&client->node);
@@ -124,7 +117,6 @@ int hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo)
 	const struct pci_device_id *id;
 	struct hnae3_ae_dev *ae_dev;
 	struct hnae3_client *client;
-	bool matched;
 	int ret = 0;
 
 	mutex_lock(&hnae3_common_lock);
@@ -151,13 +143,10 @@ int hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo)
 		 * initialize the figure out client instance
 		 */
 		list_for_each_entry(client, &hnae3_client_list, node) {
-			ret = hnae3_match_n_instantiate(client, ae_dev, true,
-							&matched);
+			ret = hnae3_match_n_instantiate(client, ae_dev, true);
 			if (ret)
 				dev_err(&ae_dev->pdev->dev,
 					"match and instantiation failed\n");
-			if (matched)
-				break;
 		}
 	}
 
@@ -175,7 +164,6 @@ void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo)
 	const struct pci_device_id *id;
 	struct hnae3_ae_dev *ae_dev;
 	struct hnae3_client *client;
-	bool matched;
 
 	mutex_lock(&hnae3_common_lock);
 	/* Check if there are matched ae_dev */
@@ -187,12 +175,8 @@ void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo)
 		/* check the client list for the match with this ae_dev type and
 		 * un-initialize the figure out client instance
 		 */
-		list_for_each_entry(client, &hnae3_client_list, node) {
-			hnae3_match_n_instantiate(client, ae_dev, false,
-						  &matched);
-			if (matched)
-				break;
-		}
+		list_for_each_entry(client, &hnae3_client_list, node)
+			hnae3_match_n_instantiate(client, ae_dev, false);
 
 		ae_algo->ops->uninit_ae_dev(ae_dev);
 		hnae_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0);
@@ -212,7 +196,6 @@ int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev)
 	const struct pci_device_id *id;
 	struct hnae3_ae_algo *ae_algo;
 	struct hnae3_client *client;
-	bool matched;
 	int ret = 0;
 
 	mutex_lock(&hnae3_common_lock);
@@ -246,13 +229,10 @@ int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev)
 	 * initialize the figure out client instance
 	 */
 	list_for_each_entry(client, &hnae3_client_list, node) {
-		ret = hnae3_match_n_instantiate(client, ae_dev, true,
-						&matched);
+		ret = hnae3_match_n_instantiate(client, ae_dev, true);
 		if (ret)
 			dev_err(&ae_dev->pdev->dev,
 				"match and instantiation failed\n");
-		if (matched)
-			break;
 	}
 
 out_err:
@@ -270,7 +250,6 @@ void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev)
 	const struct pci_device_id *id;
 	struct hnae3_ae_algo *ae_algo;
 	struct hnae3_client *client;
-	bool matched;
 
 	mutex_lock(&hnae3_common_lock);
 	/* Check if there are matched ae_algo */
@@ -279,12 +258,8 @@ void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev)
 		if (!id)
 			continue;
 
-		list_for_each_entry(client, &hnae3_client_list, node) {
-			hnae3_match_n_instantiate(client, ae_dev, false,
-						  &matched);
-			if (matched)
-				break;
-		}
+		list_for_each_entry(client, &hnae3_client_list, node)
+			hnae3_match_n_instantiate(client, ae_dev, false);
 
 		ae_algo->ops->uninit_ae_dev(ae_dev);
 		hnae_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0);

From b5b7db8d680464b1d631fd016f5e093419f0bfd9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 19 Sep 2017 10:05:57 -0700
Subject: [PATCH 120/391] tcp: fastopen: fix on syn-data transmit failure

Our recent change exposed a bug in TCP Fastopen Client that syzkaller
found right away [1]

When we prepare skb with SYN+DATA, we attempt to transmit it,
and we update socket state as if the transmit was a success.

In socket RTX queue we have two skbs, one with the SYN alone,
and a second one containing the DATA.

When (malicious) ACK comes in, we now complain that second one had no
skb_mstamp.

The proper fix is to make sure that if the transmit failed, we do not
pretend we sent the DATA skb, and make it our send_head.

When 3WHS completes, we can now send the DATA right away, without having
to wait for a timeout.

[1]
WARNING: CPU: 0 PID: 100189 at net/ipv4/tcp_input.c:3117 tcp_clean_rtx_queue+0x2057/0x2ab0 net/ipv4/tcp_input.c:3117()

 WARN_ON_ONCE(last_ackt == 0);

Modules linked in:
CPU: 0 PID: 100189 Comm: syz-executor1 Not tainted
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
 0000000000000000 ffff8800b35cb1d8 ffffffff81cad00d 0000000000000000
 ffffffff828a4347 ffff88009f86c080 ffffffff8316eb20 0000000000000d7f
 ffff8800b35cb220 ffffffff812c33c2 ffff8800baad2440 00000009d46575c0
Call Trace:
 [<ffffffff81cad00d>] __dump_stack
 [<ffffffff81cad00d>] dump_stack+0xc1/0x124
 [<ffffffff812c33c2>] warn_slowpath_common+0xe2/0x150
 [<ffffffff812c361e>] warn_slowpath_null+0x2e/0x40
 [<ffffffff828a4347>] tcp_clean_rtx_queue+0x2057/0x2ab0 n
 [<ffffffff828ae6fd>] tcp_ack+0x151d/0x3930
 [<ffffffff828baa09>] tcp_rcv_state_process+0x1c69/0x4fd0
 [<ffffffff828efb7f>] tcp_v4_do_rcv+0x54f/0x7c0
 [<ffffffff8258aacb>] sk_backlog_rcv
 [<ffffffff8258aacb>] __release_sock+0x12b/0x3a0
 [<ffffffff8258ad9e>] release_sock+0x5e/0x1c0
 [<ffffffff8294a785>] inet_wait_for_connect
 [<ffffffff8294a785>] __inet_stream_connect+0x545/0xc50
 [<ffffffff82886f08>] tcp_sendmsg_fastopen
 [<ffffffff82886f08>] tcp_sendmsg+0x2298/0x35a0
 [<ffffffff82952515>] inet_sendmsg+0xe5/0x520
 [<ffffffff8257152f>] sock_sendmsg_nosec
 [<ffffffff8257152f>] sock_sendmsg+0xcf/0x110

Fixes: 8c72c65b426b ("tcp: update skb->skb_mstamp more carefully")
Fixes: 783237e8daf1 ("net-tcp: Fast Open client - sending SYN-data")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 517d737059d1..0bc9e46a5369 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3389,6 +3389,10 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
 		goto done;
 	}
 
+	/* data was not sent, this is our new send_head */
+	sk->sk_send_head = syn_data;
+	tp->packets_out -= tcp_skb_pcount(syn_data);
+
 fallback:
 	/* Send a regular SYN with Fast Open cookie request option */
 	if (fo->cookie.len > 0)
@@ -3441,6 +3445,11 @@ int tcp_connect(struct sock *sk)
 	 */
 	tp->snd_nxt = tp->write_seq;
 	tp->pushed_seq = tp->write_seq;
+	buff = tcp_send_head(sk);
+	if (unlikely(buff)) {
+		tp->snd_nxt	= TCP_SKB_CB(buff)->seq;
+		tp->pushed_seq	= TCP_SKB_CB(buff)->seq;
+	}
 	TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
 
 	/* Timer for repeating the SYN until an answer. */

From f55956065ec94e3e9371463d693a1029c4cc3007 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@googlemail.com>
Date: Tue, 19 Sep 2017 19:35:18 +0200
Subject: [PATCH 121/391] net: emac: Fix napi poll list corruption

This patch is pretty much a carbon copy of
commit 3079c652141f ("caif: Fix napi poll list corruption")
with "caif" replaced by "emac".

The commit d75b1ade567f ("net: less interrupt masking in NAPI")
breaks emac.

It is now required that if the entire budget is consumed when poll
returns, the napi poll_list must remain empty.  However, like some
other drivers emac tries to do a last-ditch check and if there is
more work it will call napi_reschedule and then immediately process
some of this new work.  Should the entire budget be consumed while
processing such new work then we will violate the new caller
contract.

This patch fixes this by not touching any work when we reschedule
in emac.

Signed-off-by: Christian Lamparter <chunkeey@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/emac/mal.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c
index 2c74baa2398a..fff09dcf9e34 100644
--- a/drivers/net/ethernet/ibm/emac/mal.c
+++ b/drivers/net/ethernet/ibm/emac/mal.c
@@ -402,7 +402,7 @@ static int mal_poll(struct napi_struct *napi, int budget)
 	unsigned long flags;
 
 	MAL_DBG2(mal, "poll(%d)" NL, budget);
- again:
+
 	/* Process TX skbs */
 	list_for_each(l, &mal->poll_list) {
 		struct mal_commac *mc =
@@ -451,7 +451,6 @@ static int mal_poll(struct napi_struct *napi, int budget)
 			spin_lock_irqsave(&mal->lock, flags);
 			mal_disable_eob_irq(mal);
 			spin_unlock_irqrestore(&mal->lock, flags);
-			goto again;
 		}
 		mc->ops->poll_tx(mc->dev);
 	}

From 7c30013133964aaa2f45c17d6e9782ac6cfd7f5f Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 20 Sep 2017 00:44:21 +0200
Subject: [PATCH 122/391] bpf: fix ri->map_owner pointer on bpf_prog_realloc

Commit 109980b894e9 ("bpf: don't select potentially stale
ri->map from buggy xdp progs") passed the pointer to the prog
itself to be loaded into r4 prior on bpf_redirect_map() helper
call, so that we can store the owner into ri->map_owner out of
the helper.

Issue with that is that the actual address of the prog is still
subject to change when subsequent rewrites occur that require
slow path in bpf_prog_realloc() to alloc more memory, e.g. from
patching inlining helper functions or constant blinding. Thus,
we really need to take prog->aux as the address we're holding,
which also works with prog clones as they share the same aux
object.

Instead of then fetching aux->prog during runtime, which could
potentially incur cache misses due to false sharing, we are
going to just use aux for comparison on the map owner. This
will also keep the patchlet of the same size, and later check
in xdp_map_invalid() only accesses read-only aux pointer from
the prog, it's also in the same cacheline already from prior
access when calling bpf_func.

Fixes: 109980b894e9 ("bpf: don't select potentially stale ri->map from buggy xdp progs")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c |  7 ++++++-
 net/core/filter.c     | 24 +++++++++++++++---------
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 799b2451ef2d..b914fbe1383e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4205,7 +4205,12 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 		}
 
 		if (insn->imm == BPF_FUNC_redirect_map) {
-			u64 addr = (unsigned long)prog;
+			/* Note, we cannot use prog directly as imm as subsequent
+			 * rewrites would still change the prog pointer. The only
+			 * stable address we can use is aux, which also works with
+			 * prog clones during blinding.
+			 */
+			u64 addr = (unsigned long)prog->aux;
 			struct bpf_insn r4_ld[] = {
 				BPF_LD_IMM64(BPF_REG_4, addr),
 				*insn,
diff --git a/net/core/filter.c b/net/core/filter.c
index 24dd33dd9f04..82edad58d066 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1794,7 +1794,7 @@ struct redirect_info {
 	u32 flags;
 	struct bpf_map *map;
 	struct bpf_map *map_to_flush;
-	const struct bpf_prog *map_owner;
+	unsigned long   map_owner;
 };
 
 static DEFINE_PER_CPU(struct redirect_info, redirect_info);
@@ -2500,11 +2500,17 @@ void xdp_do_flush_map(void)
 }
 EXPORT_SYMBOL_GPL(xdp_do_flush_map);
 
+static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
+				   unsigned long aux)
+{
+	return (unsigned long)xdp_prog->aux != aux;
+}
+
 static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
 			       struct bpf_prog *xdp_prog)
 {
 	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
-	const struct bpf_prog *map_owner = ri->map_owner;
+	unsigned long map_owner = ri->map_owner;
 	struct bpf_map *map = ri->map;
 	struct net_device *fwd = NULL;
 	u32 index = ri->ifindex;
@@ -2512,9 +2518,9 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
 
 	ri->ifindex = 0;
 	ri->map = NULL;
-	ri->map_owner = NULL;
+	ri->map_owner = 0;
 
-	if (unlikely(map_owner != xdp_prog)) {
+	if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
 		err = -EFAULT;
 		map = NULL;
 		goto err;
@@ -2574,7 +2580,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 			    struct bpf_prog *xdp_prog)
 {
 	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
-	const struct bpf_prog *map_owner = ri->map_owner;
+	unsigned long map_owner = ri->map_owner;
 	struct bpf_map *map = ri->map;
 	struct net_device *fwd = NULL;
 	u32 index = ri->ifindex;
@@ -2583,10 +2589,10 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 
 	ri->ifindex = 0;
 	ri->map = NULL;
-	ri->map_owner = NULL;
+	ri->map_owner = 0;
 
 	if (map) {
-		if (unlikely(map_owner != xdp_prog)) {
+		if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
 			err = -EFAULT;
 			map = NULL;
 			goto err;
@@ -2632,7 +2638,7 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
 	ri->ifindex = ifindex;
 	ri->flags = flags;
 	ri->map = NULL;
-	ri->map_owner = NULL;
+	ri->map_owner = 0;
 
 	return XDP_REDIRECT;
 }
@@ -2646,7 +2652,7 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
 };
 
 BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags,
-	   const struct bpf_prog *, map_owner)
+	   unsigned long, map_owner)
 {
 	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
 

From 6819a14ecbe2e089e5c5bb74edecafdde2028a00 Mon Sep 17 00:00:00 2001
From: Mike Manning <mmanning@brocade.com>
Date: Mon, 4 Sep 2017 15:52:55 +0100
Subject: [PATCH 123/391] net: ipv6: fix regression of no RTM_DELADDR sent
 after DAD failure

Commit f784ad3d79e5 ("ipv6: do not send RTM_DELADDR for tentative
addresses") incorrectly assumes that no RTM_NEWADDR are sent for
addresses in tentative state, as this does happen for the standard
IPv6 use-case of DAD failure, see the call to ipv6_ifa_notify() in
addconf_dad_stop(). So as a result of this change, no RTM_DELADDR is
sent after DAD failure for a link-local when strict DAD (accept_dad=2)
is configured, or on the next admin down in other cases. The absence
of this notification breaks backwards compatibility and causes problems
after DAD failure if this notification was being relied on. The
solution is to allow RTM_DELADDR to still be sent after DAD failure.

Fixes: f784ad3d79e5 ("ipv6: do not send RTM_DELADDR for tentative addresses")
Signed-off-by: Mike Manning <mmanning@brocade.com>
Cc: Mahesh Bandewar <maheshb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c2e2a78787ec..d7dbcc8eda10 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4940,9 +4940,10 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
 
 	/* Don't send DELADDR notification for TENTATIVE address,
 	 * since NEWADDR notification is sent only after removing
-	 * TENTATIVE flag.
+	 * TENTATIVE flag, if DAD has not failed.
 	 */
-	if (ifa->flags & IFA_F_TENTATIVE && event == RTM_DELADDR)
+	if (ifa->flags & IFA_F_TENTATIVE && !(ifa->flags & IFA_F_DADFAILED) &&
+	    event == RTM_DELADDR)
 		return;
 
 	skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC);

From 35e015e1f5773417952fe91ce8790baf9b4237a2 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@redhat.com>
Date: Tue, 12 Sep 2017 17:46:37 +0200
Subject: [PATCH 124/391] ipv6: fix net.ipv6.conf.all interface DAD handlers

Currently, writing into
net.ipv6.conf.all.{accept_dad,use_optimistic,optimistic_dad} has no effect.
Fix handling of these flags by:

- using the maximum of global and per-interface values for the
  accept_dad flag. That is, if at least one of the two values is
  non-zero, enable DAD on the interface. If at least one value is
  set to 2, enable DAD and disable IPv6 operation on the interface if
  MAC-based link-local address was found

- using the logical OR of global and per-interface values for the
  optimistic_dad flag. If at least one of them is set to one, optimistic
  duplicate address detection (RFC 4429) is enabled on the interface

- using the logical OR of global and per-interface values for the
  use_optimistic flag. If at least one of them is set to one,
  optimistic addresses won't be marked as deprecated during source address
  selection on the interface.

While at it, as we're modifying the prototype for ipv6_use_optimistic_addr(),
drop inline, and let the compiler decide.

Fixes: 7fd2561e4ebd ("net: ipv6: Add a sysctl to make optimistic addresses useful candidates")
Signed-off-by: Matteo Croce <mcroce@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 18 +++++++++++++----
 net/ipv6/addrconf.c                    | 27 +++++++++++++++++++-------
 2 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index b3345d0fe0a6..77f4de59dc9c 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1680,6 +1680,9 @@ accept_dad - INTEGER
 	2: Enable DAD, and disable IPv6 operation if MAC-based duplicate
 	   link-local address has been found.
 
+	DAD operation and mode on a given interface will be selected according
+	to the maximum value of conf/{all,interface}/accept_dad.
+
 force_tllao - BOOLEAN
 	Enable sending the target link-layer address option even when
 	responding to a unicast neighbor solicitation.
@@ -1727,16 +1730,23 @@ suppress_frag_ndisc - INTEGER
 
 optimistic_dad - BOOLEAN
 	Whether to perform Optimistic Duplicate Address Detection (RFC 4429).
-		0: disabled (default)
-		1: enabled
+	0: disabled (default)
+	1: enabled
+
+	Optimistic Duplicate Address Detection for the interface will be enabled
+	if at least one of conf/{all,interface}/optimistic_dad is set to 1,
+	it will be disabled otherwise.
 
 use_optimistic - BOOLEAN
 	If enabled, do not classify optimistic addresses as deprecated during
 	source address selection.  Preferred addresses will still be chosen
 	before optimistic addresses, subject to other ranking in the source
 	address selection algorithm.
-		0: disabled (default)
-		1: enabled
+	0: disabled (default)
+	1: enabled
+
+	This will be enabled if at least one of
+	conf/{all,interface}/use_optimistic is set to 1, disabled otherwise.
 
 stable_secret - IPv6 address
 	This IPv6 address will be used as a secret to generate IPv6
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d7dbcc8eda10..96861c702c06 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1399,10 +1399,18 @@ static inline int ipv6_saddr_preferred(int type)
 	return 0;
 }
 
-static inline bool ipv6_use_optimistic_addr(struct inet6_dev *idev)
+static bool ipv6_use_optimistic_addr(struct net *net,
+				     struct inet6_dev *idev)
 {
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
-	return idev && idev->cnf.optimistic_dad && idev->cnf.use_optimistic;
+	if (!idev)
+		return false;
+	if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
+		return false;
+	if (!net->ipv6.devconf_all->use_optimistic && !idev->cnf.use_optimistic)
+		return false;
+
+	return true;
 #else
 	return false;
 #endif
@@ -1472,7 +1480,7 @@ static int ipv6_get_saddr_eval(struct net *net,
 		/* Rule 3: Avoid deprecated and optimistic addresses */
 		u8 avoid = IFA_F_DEPRECATED;
 
-		if (!ipv6_use_optimistic_addr(score->ifa->idev))
+		if (!ipv6_use_optimistic_addr(net, score->ifa->idev))
 			avoid |= IFA_F_OPTIMISTIC;
 		ret = ipv6_saddr_preferred(score->addr_type) ||
 		      !(score->ifa->flags & avoid);
@@ -2460,7 +2468,8 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
 		int max_addresses = in6_dev->cnf.max_addresses;
 
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
-		if (in6_dev->cnf.optimistic_dad &&
+		if ((net->ipv6.devconf_all->optimistic_dad ||
+		     in6_dev->cnf.optimistic_dad) &&
 		    !net->ipv6.devconf_all->forwarding && sllao)
 			addr_flags |= IFA_F_OPTIMISTIC;
 #endif
@@ -3051,7 +3060,8 @@ void addrconf_add_linklocal(struct inet6_dev *idev,
 	u32 addr_flags = flags | IFA_F_PERMANENT;
 
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
-	if (idev->cnf.optimistic_dad &&
+	if ((dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad ||
+	     idev->cnf.optimistic_dad) &&
 	    !dev_net(idev->dev)->ipv6.devconf_all->forwarding)
 		addr_flags |= IFA_F_OPTIMISTIC;
 #endif
@@ -3810,6 +3820,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
 		goto out;
 
 	if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
+	    dev_net(dev)->ipv6.devconf_all->accept_dad < 1 ||
 	    idev->cnf.accept_dad < 1 ||
 	    !(ifp->flags&IFA_F_TENTATIVE) ||
 	    ifp->flags & IFA_F_NODAD) {
@@ -3841,7 +3852,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
 	 */
 	if (ifp->flags & IFA_F_OPTIMISTIC) {
 		ip6_ins_rt(ifp->rt);
-		if (ipv6_use_optimistic_addr(idev)) {
+		if (ipv6_use_optimistic_addr(dev_net(dev), idev)) {
 			/* Because optimistic nodes can use this address,
 			 * notify listeners. If DAD fails, RTM_DELADDR is sent.
 			 */
@@ -3897,7 +3908,9 @@ static void addrconf_dad_work(struct work_struct *w)
 		action = DAD_ABORT;
 		ifp->state = INET6_IFADDR_STATE_POSTDAD;
 
-		if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6 &&
+		if ((dev_net(idev->dev)->ipv6.devconf_all->accept_dad > 1 ||
+		     idev->cnf.accept_dad > 1) &&
+		    !idev->cnf.disable_ipv6 &&
 		    !(ifp->flags & IFA_F_STABLE_PRIVACY)) {
 			struct in6_addr addr;
 

From 23586b66d84ba3184b8820277f3fc42761640f87 Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Mon, 18 Sep 2017 18:18:45 -0500
Subject: [PATCH 125/391] Fix SMB3.1.1 guest authentication to Samba

Samba rejects SMB3.1.1 dialect (vers=3.1.1) negotiate requests from
the kernel client due to the two byte pad at the end of the negotiate
contexts.

CC: Stable <stable@vger.kernel.org>
Signed-off-by: Steve French <smfrench@gmail.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
---
 fs/cifs/smb2pdu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 5c16591a128e..b0eaebe627e9 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -439,7 +439,7 @@ assemble_neg_contexts(struct smb2_negotiate_req *req)
 	build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt);
 	req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
 	req->NegotiateContextCount = cpu_to_le16(2);
-	inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context) + 2
+	inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context)
 			+ sizeof(struct smb2_encryption_neg_context)); /* calculate hash */
 }
 #else

From 04fc52fb222d35e1f7a0d5d85b19a676ea1e10e8 Mon Sep 17 00:00:00 2001
From: Maciej Purski <m.purski@samsung.com>
Date: Tue, 5 Sep 2017 14:23:02 +0200
Subject: [PATCH 126/391] drm/exynos/hdmi: Fix unsafe list iteration

Function hdmi_mode_fixup() used bare list_for_each entry, which was
unsafe and caused memory corruption detected by kasan.
It now uses drm_for_each_connector_iter macro, which is now recommended
by the documentation and safe.

Signed-off-by: Maciej Purski <m.purski@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_hdmi.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c
index 214fa5e51963..0109ff40b1db 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c
@@ -944,22 +944,27 @@ static bool hdmi_mode_fixup(struct drm_encoder *encoder,
 	struct drm_device *dev = encoder->dev;
 	struct drm_connector *connector;
 	struct drm_display_mode *m;
+	struct drm_connector_list_iter conn_iter;
 	int mode_ok;
 
 	drm_mode_set_crtcinfo(adjusted_mode, 0);
 
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+	drm_connector_list_iter_begin(dev, &conn_iter);
+	drm_for_each_connector_iter(connector, &conn_iter) {
 		if (connector->encoder == encoder)
 			break;
 	}
+	if (connector)
+		drm_connector_get(connector);
+	drm_connector_list_iter_end(&conn_iter);
 
-	if (connector->encoder != encoder)
+	if (!connector)
 		return true;
 
 	mode_ok = hdmi_mode_valid(connector, adjusted_mode);
 
 	if (mode_ok == MODE_OK)
-		return true;
+		goto cleanup;
 
 	/*
 	 * Find the most suitable mode and copy it to adjusted_mode.
@@ -979,6 +984,9 @@ static bool hdmi_mode_fixup(struct drm_encoder *encoder,
 		}
 	}
 
+cleanup:
+	drm_connector_put(connector);
+
 	return true;
 }
 

From 8632ec8cdcaead67c019f1ba36760d1bdd0c5d23 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 23 Aug 2017 15:37:53 +1000
Subject: [PATCH 127/391] powerpc/configs: Update for CONFIG_SND changes

Commit eb3b705aaed9 ("ALSA: Make CONFIG_SND_OSSEMUL user-selectable")
means we need to set CONFIG_SND_OSSEMUL in our configs, otherwise we
lose some of the SND symbols.

And commit 0181307abc1d ("ALSA: seq: Reorganize kconfig and build")
reorganised things, which causes the churn.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/configs/g5_defconfig       | 5 +++--
 arch/powerpc/configs/gamecube_defconfig | 5 +++--
 arch/powerpc/configs/pasemi_defconfig   | 3 ++-
 arch/powerpc/configs/pmac32_defconfig   | 7 ++++---
 arch/powerpc/configs/ppc64_defconfig    | 7 ++++---
 arch/powerpc/configs/ppc64e_defconfig   | 7 ++++---
 arch/powerpc/configs/ppc6xx_defconfig   | 7 ++++---
 arch/powerpc/configs/wii_defconfig      | 5 +++--
 8 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig
index e084fa548d73..063817fee61c 100644
--- a/arch/powerpc/configs/g5_defconfig
+++ b/arch/powerpc/configs/g5_defconfig
@@ -138,10 +138,11 @@ CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_SOUND=m
 CONFIG_SND=m
-CONFIG_SND_SEQUENCER=m
+CONFIG_SND_OSSEMUL=y
 CONFIG_SND_MIXER_OSS=m
 CONFIG_SND_PCM_OSS=m
-CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQUENCER_OSS=m
 CONFIG_SND_POWERMAC=m
 CONFIG_SND_AOA=m
 CONFIG_SND_AOA_FABRIC_LAYOUT=m
diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig
index 79bbc8238b32..805b0f87653c 100644
--- a/arch/powerpc/configs/gamecube_defconfig
+++ b/arch/powerpc/configs/gamecube_defconfig
@@ -64,11 +64,12 @@ CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_CLUT224 is not set
 CONFIG_SOUND=y
 CONFIG_SND=y
-CONFIG_SND_SEQUENCER=y
+CONFIG_SND_OSSEMUL=y
 CONFIG_SND_MIXER_OSS=y
 CONFIG_SND_PCM_OSS=y
-CONFIG_SND_SEQUENCER_OSS=y
 # CONFIG_SND_VERBOSE_PROCFS is not set
+CONFIG_SND_SEQUENCER=y
+CONFIG_SND_SEQUENCER_OSS=y
 # CONFIG_USB_SUPPORT is not set
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_GENERIC=y
diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig
index 8cf4a46bef86..6daa56f8895c 100644
--- a/arch/powerpc/configs/pasemi_defconfig
+++ b/arch/powerpc/configs/pasemi_defconfig
@@ -115,9 +115,10 @@ CONFIG_VGACON_SOFT_SCROLLBACK=y
 CONFIG_LOGO=y
 CONFIG_SOUND=y
 CONFIG_SND=y
-CONFIG_SND_SEQUENCER=y
+CONFIG_SND_OSSEMUL=y
 CONFIG_SND_MIXER_OSS=y
 CONFIG_SND_PCM_OSS=y
+CONFIG_SND_SEQUENCER=y
 CONFIG_SND_SEQUENCER_OSS=y
 CONFIG_SND_USB_AUDIO=y
 CONFIG_SND_USB_USX2Y=y
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index 8e798b1fbc99..1aab9a62a681 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -227,11 +227,12 @@ CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_SOUND=m
 CONFIG_SND=m
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_OSSEMUL=y
 CONFIG_SND_MIXER_OSS=m
 CONFIG_SND_PCM_OSS=m
-CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_SEQUENCER_OSS=m
 CONFIG_SND_DUMMY=m
 CONFIG_SND_POWERMAC=m
 CONFIG_SND_AOA=m
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 791db775a09c..6ddca80c52c3 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -222,11 +222,12 @@ CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_SOUND=m
 CONFIG_SND=m
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_OSSEMUL=y
 CONFIG_SND_MIXER_OSS=m
 CONFIG_SND_PCM_OSS=m
-CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_SEQUENCER_OSS=m
 CONFIG_SND_POWERMAC=m
 CONFIG_SND_AOA=m
 CONFIG_SND_AOA_FABRIC_LAYOUT=m
diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig
index d0fe0f8f77c2..41d85cb3c9a2 100644
--- a/arch/powerpc/configs/ppc64e_defconfig
+++ b/arch/powerpc/configs/ppc64e_defconfig
@@ -141,11 +141,12 @@ CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 CONFIG_SOUND=m
 CONFIG_SND=m
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_OSSEMUL=y
 CONFIG_SND_MIXER_OSS=m
 CONFIG_SND_PCM_OSS=m
-CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_SEQUENCER_OSS=m
 CONFIG_HID_DRAGONRISE=y
 CONFIG_HID_GYRATION=y
 CONFIG_HID_TWINHAN=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index ae6eba482d75..da0e8d535eb8 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -789,17 +789,18 @@ CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_VGA16 is not set
 CONFIG_SOUND=m
 CONFIG_SND=m
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_OSSEMUL=y
 CONFIG_SND_MIXER_OSS=m
 CONFIG_SND_PCM_OSS=m
-CONFIG_SND_SEQUENCER_OSS=y
 CONFIG_SND_DYNAMIC_MINORS=y
 # CONFIG_SND_SUPPORT_OLD_API is not set
 CONFIG_SND_VERBOSE_PRINTK=y
 CONFIG_SND_DEBUG=y
 CONFIG_SND_DEBUG_VERBOSE=y
 CONFIG_SND_PCM_XRUN_DEBUG=y
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_SEQUENCER_OSS=m
 CONFIG_SND_DUMMY=m
 CONFIG_SND_VIRMIDI=m
 CONFIG_SND_MTPAV=m
diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig
index aef41b17a8bc..9c7400a19e9d 100644
--- a/arch/powerpc/configs/wii_defconfig
+++ b/arch/powerpc/configs/wii_defconfig
@@ -79,11 +79,12 @@ CONFIG_FB=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_SOUND=y
 CONFIG_SND=y
-CONFIG_SND_SEQUENCER=y
+CONFIG_SND_OSSEMUL=y
 CONFIG_SND_MIXER_OSS=y
 CONFIG_SND_PCM_OSS=y
-CONFIG_SND_SEQUENCER_OSS=y
 # CONFIG_SND_VERBOSE_PROCFS is not set
+CONFIG_SND_SEQUENCER=y
+CONFIG_SND_SEQUENCER_OSS=y
 CONFIG_HID_APPLE=m
 CONFIG_HID_WACOM=m
 CONFIG_MMC=y

From 4917fcb58cc73f6b81455e3c5f960144809ddf1a Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Date: Tue, 19 Sep 2017 11:47:06 +0530
Subject: [PATCH 128/391] powerpc/sysrq: Fix oops whem ppmu is not registered

Kernel crashes if power pmu is not registered and user tries to dump
regs with 'echo p > /proc/sysrq-trigger'. Sample log:

  Unable to handle kernel paging request for data at address 0x00000008
  Faulting instruction address: 0xc0000000000d52f0

  NIP [c0000000000d52f0] perf_event_print_debug+0x10/0x230
  LR [c00000000058a938] sysrq_handle_showregs+0x38/0x50
  Call Trace:
   printk+0x38/0x4c (unreliable)
   __handle_sysrq+0xe4/0x270
   write_sysrq_trigger+0x64/0x80
   proc_reg_write+0x80/0xd0
   __vfs_write+0x40/0x200
   vfs_write+0xc8/0x240
   SyS_write+0x60/0x110
   system_call+0x58/0x6c

Fixes: 5f6d0380c640 ("powerpc/perf: Define perf_event_print_debug() to print PMU register values")
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Reviewed-by: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/core-book3s.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 2e3eb7431571..9e3da168d54c 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -793,6 +793,11 @@ void perf_event_print_debug(void)
 	u32 pmcs[MAX_HWEVENTS];
 	int i;
 
+	if (!ppmu) {
+		pr_info("Performance monitor hardware not registered.\n");
+		return;
+	}
+
 	if (!ppmu->n_counter)
 		return;
 

From c1fa0768a8713b135848f78fd43ffc208d8ded70 Mon Sep 17 00:00:00 2001
From: Gustavo Romero <gromero@linux.vnet.ibm.com>
Date: Wed, 13 Sep 2017 22:13:48 -0400
Subject: [PATCH 129/391] powerpc/tm: Flush TM only if CPU has TM feature

Commit cd63f3c ("powerpc/tm: Fix saving of TM SPRs in core dump")
added code to access TM SPRs in flush_tmregs_to_thread(). However
flush_tmregs_to_thread() does not check if TM feature is available on
CPU before trying to access TM SPRs in order to copy live state to
thread structures. flush_tmregs_to_thread() is indeed guarded by
CONFIG_PPC_TRANSACTIONAL_MEM but it might be the case that kernel
was compiled with CONFIG_PPC_TRANSACTIONAL_MEM enabled and ran on
a CPU without TM feature available, thus rendering the execution
of TM instructions that are treated by the CPU as illegal instructions.

The fix is just to add proper checking in flush_tmregs_to_thread()
if CPU has the TM feature before accessing any TM-specific resource,
returning immediately if TM is no available on the CPU. Adding
that checking in flush_tmregs_to_thread() instead of in places
where it is called, like in vsr_get() and vsr_set(), is better because
avoids the same problem cropping up elsewhere.

Cc: stable@vger.kernel.org # v4.13+
Fixes: cd63f3c ("powerpc/tm: Fix saving of TM SPRs in core dump")
Signed-off-by: Gustavo Romero <gromero@linux.vnet.ibm.com>
Reviewed-by: Cyril Bur <cyrilbur@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/ptrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 07cd22e35405..f52ad5bb7109 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -131,7 +131,7 @@ static void flush_tmregs_to_thread(struct task_struct *tsk)
 	 * in the appropriate thread structures from live.
 	 */
 
-	if (tsk != current)
+	if ((!cpu_has_feature(CPU_FTR_TM)) || (tsk != current))
 		return;
 
 	if (MSR_TM_SUSPENDED(mfmsr())) {

From ad47ff3e33503e0969db2d4f9a40942aa6414598 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Tue, 19 Sep 2017 20:45:52 +1000
Subject: [PATCH 130/391] powerpc/sstep: Fix issues with set_cr0()

set_cr0() broke when we changed analyse_instr() to not modify the
register state. Instead of looking at regs->gpr[x] which has not
been updated yet, we need to look at op->val.

Fixes: 3cdfcbfd32b9 ("powerpc: Change analyse_instr so it doesn't modify *regs")
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/lib/sstep.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index fb9f58b868e7..9d72e5900320 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -944,9 +944,9 @@ NOKPROBE_SYMBOL(emulate_dcbz);
 		: "r" (addr), "i" (-EFAULT), "0" (err))
 
 static nokprobe_inline void set_cr0(const struct pt_regs *regs,
-				    struct instruction_op *op, int rd)
+				    struct instruction_op *op)
 {
-	long val = regs->gpr[rd];
+	long val = op->val;
 
 	op->type |= SETCC;
 	op->ccval = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000);
@@ -1326,7 +1326,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
 	case 13:	/* addic. */
 		imm = (short) instr;
 		add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0);
-		set_cr0(regs, op, rd);
+		set_cr0(regs, op);
 		return 1;
 
 	case 14:	/* addi */
@@ -1397,13 +1397,13 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
 
 	case 28:	/* andi. */
 		op->val = regs->gpr[rd] & (unsigned short) instr;
-		set_cr0(regs, op, ra);
+		set_cr0(regs, op);
 		goto logical_done_nocc;
 
 	case 29:	/* andis. */
 		imm = (unsigned short) instr;
 		op->val = regs->gpr[rd] & (imm << 16);
-		set_cr0(regs, op, ra);
+		set_cr0(regs, op);
 		goto logical_done_nocc;
 
 #ifdef __powerpc64__
@@ -2526,7 +2526,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
 
  logical_done:
 	if (instr & 1)
-		set_cr0(regs, op, ra);
+		set_cr0(regs, op);
  logical_done_nocc:
 	op->reg = ra;
 	op->type |= SETREG;
@@ -2534,7 +2534,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
 
  arith_done:
 	if (instr & 1)
-		set_cr0(regs, op, rd);
+		set_cr0(regs, op);
  compute_done:
 	op->reg = rd;
 	op->type |= SETREG;

From 5bcaa4cc41923871777c3d13906267e812775094 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Tue, 19 Sep 2017 20:45:53 +1000
Subject: [PATCH 131/391] powerpc/sstep: Fix issues with mcrf

mcrf broke when we changed analyse_instr() to not modify the register
state. The instruction writes to the CR, so we need to store the result
in op->ccval, not op->val.

Fixes: 3cdfcbfd32b9 ("powerpc: Change analyse_instr so it doesn't modify *regs")
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/lib/sstep.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 9d72e5900320..c4cda1afb49d 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1513,10 +1513,10 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
 			op->type = COMPUTE + SETCC;
 			imm = 0xf0000000UL;
 			val = regs->gpr[rd];
-			op->val = regs->ccr;
+			op->ccval = regs->ccr;
 			for (sh = 0; sh < 8; ++sh) {
 				if (instr & (0x80000 >> sh))
-					op->val = (op->val & ~imm) |
+					op->ccval = (op->ccval & ~imm) |
 						(val & imm);
 				imm >>= 4;
 			}

From 1575fe06f6b1d156ed31fb22c8631d49d79751d8 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 20 Sep 2017 09:32:19 +1000
Subject: [PATCH 132/391] powerpc/sstep: mullw should calculate a 64 bit signed
 result

mullw should do a 32 bit signed multiply and create a 64 bit signed
result. It currently truncates the result to 32 bits.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/lib/sstep.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index c4cda1afb49d..5e8418c28bd8 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1651,8 +1651,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
 			goto arith_done;
 
 		case 235:	/* mullw */
-			op->val = (unsigned int) regs->gpr[ra] *
-				(unsigned int) regs->gpr[rb];
+			op->val = (long)(int) regs->gpr[ra] *
+				(int) regs->gpr[rb];
+
 			goto arith_done;
 
 		case 266:	/* add */

From 5d298baa41883fc421acfd932799c0f4177249ae Mon Sep 17 00:00:00 2001
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
Date: Thu, 31 Aug 2017 17:17:41 +0530
Subject: [PATCH 133/391] powerpc/powernv: Clear LPCR[PECE1] via stop-api only
 for deep state offline

Commit 24be85a23d1f ("powerpc/powernv: Clear PECE1 in LPCR via
stop-api only on Hotplug") clears the PECE1 bit of the LPCR via
stop-api during CPU-Hotplug to prevent wakeup due to a decrementer on
an offlined CPU which is in a deep stop state.

In the case where the stop-api support is found to be lacking, the
commit 785a12afdb4a ("powerpc/powernv/idle: Disable LOSE_FULL_CONTEXT
states when stop-api fails") disables deep states that lose hypervisor
context. Thus in this case, the offlined CPU will be put to some
shallow idle state.

However, we currently unconditionally clear the PECE1 in LPCR via
stop-api during CPU-Hotplug even when deep states are disabled due to
stop-api failure.

Fix this by clearing PECE1 of LPCR via stop-api during CPU-Hotplug
*only* when the offlined CPU will be put to a deep state that loses
hypervisor context.

Fixes: 24be85a23d1f ("powerpc/powernv: Clear PECE1 in LPCR via stop-api only on Hotplug")
Reported-by: Pavithra Prakash <pavirampu@linux.vnet.ibm.com>
Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Tested-by: Pavithra Prakash <pavrampu@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/idle.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 9f59041a172b..443d5ca71995 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -393,7 +393,13 @@ static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
 	u64 pir = get_hard_smp_processor_id(cpu);
 
 	mtspr(SPRN_LPCR, lpcr_val);
-	opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
+
+	/*
+	 * Program the LPCR via stop-api only if the deepest stop state
+	 * can lose hypervisor context.
+	 */
+	if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)
+		opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
 }
 
 /*

From 590d08d3da45e9fed423b08ab38d71886c07abc8 Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Tue, 19 Sep 2017 11:43:47 -0500
Subject: [PATCH 134/391] SMB3: Fix endian warning

Multi-dialect negotiate patch had a minor endian error.

Signed-off-by: Steve French <smfrench@gmail.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
CC: Stable <stable@vger.kernel.org> # 4.13+
---
 fs/cifs/smb2pdu.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index b0eaebe627e9..b4c58a1db1ae 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -570,10 +570,11 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 			/* ops set to 3.0 by default for default so update */
 			ses->server->ops = &smb21_operations;
 		}
-	} else if (rsp->DialectRevision != ses->server->vals->protocol_id) {
+	} else if (le16_to_cpu(rsp->DialectRevision) !=
+				ses->server->vals->protocol_id) {
 		/* if requested single dialect ensure returned dialect matched */
 		cifs_dbg(VFS, "Illegal 0x%x dialect returned: not requested\n",
-			cpu_to_le16(rsp->DialectRevision));
+			le16_to_cpu(rsp->DialectRevision));
 		return -EIO;
 	}
 

From c721c38957fb19982416f6be71aae7b30630d83b Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Tue, 19 Sep 2017 18:40:03 -0500
Subject: [PATCH 135/391] SMB3: Warn user if trying to sign connection that
 authenticated as guest

It can be confusing if user ends up authenticated as guest but they
requested signing (server will return error validating signed packets)
so add log message for this.

Signed-off-by: Steve French <smfrench@gmail.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
CC: Stable <stable@vger.kernel.org>
---
 fs/cifs/smb2pdu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index b4c58a1db1ae..d499ce265c3b 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1176,6 +1176,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
 	while (sess_data->func)
 		sess_data->func(sess_data);
 
+	if ((ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST) && (ses->sign))
+		cifs_dbg(VFS, "signing requested but authenticated as guest\n");
 	rc = sess_data->result;
 out:
 	kfree(sess_data);

From 6e82e929d98552ed5156919e0f532eeba2da704b Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Wed, 20 Sep 2017 13:09:23 +1000
Subject: [PATCH 136/391] cifs: show 'soft' in the mount options for hard
 mounts

Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsfs.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 180b3356ff86..a864e6575309 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -461,6 +461,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
 		seq_puts(s, ",nocase");
 	if (tcon->retry)
 		seq_puts(s, ",hard");
+	else
+		seq_puts(s, ",soft");
 	if (tcon->use_persistent)
 		seq_puts(s, ",persistenthandles");
 	else if (tcon->use_resilient)

From fd0b19ed5389187829b854900511c9195875bb42 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Tue, 19 Sep 2017 22:07:18 -0700
Subject: [PATCH 137/391] MIPS: Fix perf event init

Commit c311c797998c ("cpumask: make "nr_cpumask_bits" unsigned")
modified mipspmu_event_init() to cast the struct perf_event cpu field to
an unsigned integer before it is compared with nr_cpumask_bits (and
*ahem* did so without copying the linux-mips mailing list or any MIPS
developers...). This is broken because the cpu field may be -1 for
events which follow a process rather than being affine to a particular
CPU. When this is the case the cast to an unsigned int results in a
value equal to ULONG_MAX, which is always greater than nr_cpumask_bits
so we always fail mipspmu_event_init() and return -ENODEV.

The check against nr_cpumask_bits seems nonsensical anyway, so this
patch simply removes it. The cpu field is going to either be -1 or a
valid CPU number. Comparing it with nr_cpumask_bits is effectively
checking that it's a valid cpu number, but it seems safe to rely on the
core perf events code to ensure that's the case.

The end result is that this fixes use of perf on MIPS when not
constraining events to a particular CPU, and fixes the "perf list hw"
command which fails to list any events without this.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Fixes: c311c797998c ("cpumask: make "nr_cpumask_bits" unsigned")
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-mips@linux-mips.org
Cc: stable <stable@vger.kernel.org> # v4.12+
Patchwork: https://patchwork.linux-mips.org/patch/17323/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/perf_event_mipsxx.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 9e6c74bf66c4..6668f67a61c3 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -618,8 +618,7 @@ static int mipspmu_event_init(struct perf_event *event)
 		return -ENOENT;
 	}
 
-	if ((unsigned int)event->cpu >= nr_cpumask_bits ||
-	    (event->cpu >= 0 && !cpu_online(event->cpu)))
+	if (event->cpu >= 0 && !cpu_online(event->cpu))
 		return -ENODEV;
 
 	if (!atomic_inc_not_zero(&active_events)) {

From bd6227a150fdb56e7bb734976ef6e53a2c1cb334 Mon Sep 17 00:00:00 2001
From: Stephan Mueller <smueller@chronox.de>
Date: Thu, 14 Sep 2017 17:10:28 +0200
Subject: [PATCH 138/391] crypto: drbg - fix freeing of resources

During the change to use aligned buffers, the deallocation code path was
not updated correctly. The current code tries to free the aligned buffer
pointer and not the original buffer pointer as it is supposed to.

Thus, the code is updated to free the original buffer pointer and set
the aligned buffer pointer that is used throughout the code to NULL.

Fixes: 3cfc3b9721123 ("crypto: drbg - use aligned buffers")
CC: <stable@vger.kernel.org>
CC: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/drbg.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/crypto/drbg.c b/crypto/drbg.c
index 633a88e93ab0..70018397e59a 100644
--- a/crypto/drbg.c
+++ b/crypto/drbg.c
@@ -1133,10 +1133,10 @@ static inline void drbg_dealloc_state(struct drbg_state *drbg)
 {
 	if (!drbg)
 		return;
-	kzfree(drbg->V);
-	drbg->Vbuf = NULL;
-	kzfree(drbg->C);
-	drbg->Cbuf = NULL;
+	kzfree(drbg->Vbuf);
+	drbg->V = NULL;
+	kzfree(drbg->Cbuf);
+	drbg->C = NULL;
 	kzfree(drbg->scratchpadbuf);
 	drbg->scratchpadbuf = NULL;
 	drbg->reseed_ctr = 0;

From 569f11c9f788959b640116b5bbd6d8a1f07326da Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 18 Sep 2017 14:42:00 -0500
Subject: [PATCH 139/391] crypto: x86/blowfish - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Use R12 instead of RBP.  R12 can't be used as the RT0 register because
of x86 instruction encoding limitations.  So use R12 for CTX and RDI for
CTX.  This means that CTX is no longer an implicit function argument.
Instead it needs to be explicitly copied from RDI.

Reported-by: Eric Biggers <ebiggers@google.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/blowfish-x86_64-asm_64.S | 48 +++++++++++++-----------
 1 file changed, 26 insertions(+), 22 deletions(-)

diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S
index 246c67006ed0..8c1fcb6bad21 100644
--- a/arch/x86/crypto/blowfish-x86_64-asm_64.S
+++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S
@@ -33,7 +33,7 @@
 #define s3	((16 + 2 + (3 * 256)) * 4)
 
 /* register macros */
-#define CTX %rdi
+#define CTX %r12
 #define RIO %rsi
 
 #define RX0 %rax
@@ -56,12 +56,12 @@
 #define RX2bh %ch
 #define RX3bh %dh
 
-#define RT0 %rbp
+#define RT0 %rdi
 #define RT1 %rsi
 #define RT2 %r8
 #define RT3 %r9
 
-#define RT0d %ebp
+#define RT0d %edi
 #define RT1d %esi
 #define RT2d %r8d
 #define RT3d %r9d
@@ -120,13 +120,14 @@
 
 ENTRY(__blowfish_enc_blk)
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	%rsi: dst
 	 *	%rdx: src
 	 *	%rcx: bool, if true: xor output
 	 */
-	movq %rbp, %r11;
+	movq %r12, %r11;
 
+	movq %rdi, CTX;
 	movq %rsi, %r10;
 	movq %rdx, RIO;
 
@@ -142,7 +143,7 @@ ENTRY(__blowfish_enc_blk)
 	round_enc(14);
 	add_roundkey_enc(16);
 
-	movq %r11, %rbp;
+	movq %r11, %r12;
 
 	movq %r10, RIO;
 	test %cl, %cl;
@@ -157,12 +158,13 @@ ENDPROC(__blowfish_enc_blk)
 
 ENTRY(blowfish_dec_blk)
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
-	movq %rbp, %r11;
+	movq %r12, %r11;
 
+	movq %rdi, CTX;
 	movq %rsi, %r10;
 	movq %rdx, RIO;
 
@@ -181,7 +183,7 @@ ENTRY(blowfish_dec_blk)
 	movq %r10, RIO;
 	write_block();
 
-	movq %r11, %rbp;
+	movq %r11, %r12;
 
 	ret;
 ENDPROC(blowfish_dec_blk)
@@ -298,20 +300,21 @@ ENDPROC(blowfish_dec_blk)
 
 ENTRY(__blowfish_enc_blk_4way)
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	%rsi: dst
 	 *	%rdx: src
 	 *	%rcx: bool, if true: xor output
 	 */
-	pushq %rbp;
+	pushq %r12;
 	pushq %rbx;
 	pushq %rcx;
 
-	preload_roundkey_enc(0);
-
+	movq %rdi, CTX
 	movq %rsi, %r11;
 	movq %rdx, RIO;
 
+	preload_roundkey_enc(0);
+
 	read_block4();
 
 	round_enc4(0);
@@ -324,39 +327,40 @@ ENTRY(__blowfish_enc_blk_4way)
 	round_enc4(14);
 	add_preloaded_roundkey4();
 
-	popq %rbp;
+	popq %r12;
 	movq %r11, RIO;
 
-	test %bpl, %bpl;
+	test %r12b, %r12b;
 	jnz .L__enc_xor4;
 
 	write_block4();
 
 	popq %rbx;
-	popq %rbp;
+	popq %r12;
 	ret;
 
 .L__enc_xor4:
 	xor_block4();
 
 	popq %rbx;
-	popq %rbp;
+	popq %r12;
 	ret;
 ENDPROC(__blowfish_enc_blk_4way)
 
 ENTRY(blowfish_dec_blk_4way)
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
-	pushq %rbp;
+	pushq %r12;
 	pushq %rbx;
-	preload_roundkey_dec(17);
 
-	movq %rsi, %r11;
+	movq %rdi, CTX;
+	movq %rsi, %r11
 	movq %rdx, RIO;
 
+	preload_roundkey_dec(17);
 	read_block4();
 
 	round_dec4(17);
@@ -373,7 +377,7 @@ ENTRY(blowfish_dec_blk_4way)
 	write_block4();
 
 	popq %rbx;
-	popq %rbp;
+	popq %r12;
 
 	ret;
 ENDPROC(blowfish_dec_blk_4way)

From b46c9d717645529417ca9045cfdbf59f84922573 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 18 Sep 2017 14:42:01 -0500
Subject: [PATCH 140/391] crypto: x86/camellia - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Use R12 instead of RBP.  Both are callee-saved registers, so the
substitution is straightforward.

Reported-by: Eric Biggers <ebiggers@google.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/camellia-x86_64-asm_64.S | 26 ++++++++++++------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S
index 310319c601ed..95ba6956a7f6 100644
--- a/arch/x86/crypto/camellia-x86_64-asm_64.S
+++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
@@ -75,17 +75,17 @@
 #define RCD1bh %dh
 
 #define RT0 %rsi
-#define RT1 %rbp
+#define RT1 %r12
 #define RT2 %r8
 
 #define RT0d %esi
-#define RT1d %ebp
+#define RT1d %r12d
 #define RT2d %r8d
 
 #define RT2bl %r8b
 
 #define RXOR %r9
-#define RRBP %r10
+#define RR12 %r10
 #define RDST %r11
 
 #define RXORd %r9d
@@ -197,7 +197,7 @@ ENTRY(__camellia_enc_blk)
 	 *	%rdx: src
 	 *	%rcx: bool xor
 	 */
-	movq %rbp, RRBP;
+	movq %r12, RR12;
 
 	movq %rcx, RXOR;
 	movq %rsi, RDST;
@@ -227,13 +227,13 @@ ENTRY(__camellia_enc_blk)
 
 	enc_outunpack(mov, RT1);
 
-	movq RRBP, %rbp;
+	movq RR12, %r12;
 	ret;
 
 .L__enc_xor:
 	enc_outunpack(xor, RT1);
 
-	movq RRBP, %rbp;
+	movq RR12, %r12;
 	ret;
 ENDPROC(__camellia_enc_blk)
 
@@ -248,7 +248,7 @@ ENTRY(camellia_dec_blk)
 	movl $24, RXORd;
 	cmovel RXORd, RT2d; /* max */
 
-	movq %rbp, RRBP;
+	movq %r12, RR12;
 	movq %rsi, RDST;
 	movq %rdx, RIO;
 
@@ -271,7 +271,7 @@ ENTRY(camellia_dec_blk)
 
 	dec_outunpack();
 
-	movq RRBP, %rbp;
+	movq RR12, %r12;
 	ret;
 ENDPROC(camellia_dec_blk)
 
@@ -433,7 +433,7 @@ ENTRY(__camellia_enc_blk_2way)
 	 */
 	pushq %rbx;
 
-	movq %rbp, RRBP;
+	movq %r12, RR12;
 	movq %rcx, RXOR;
 	movq %rsi, RDST;
 	movq %rdx, RIO;
@@ -461,14 +461,14 @@ ENTRY(__camellia_enc_blk_2way)
 
 	enc_outunpack2(mov, RT2);
 
-	movq RRBP, %rbp;
+	movq RR12, %r12;
 	popq %rbx;
 	ret;
 
 .L__enc2_xor:
 	enc_outunpack2(xor, RT2);
 
-	movq RRBP, %rbp;
+	movq RR12, %r12;
 	popq %rbx;
 	ret;
 ENDPROC(__camellia_enc_blk_2way)
@@ -485,7 +485,7 @@ ENTRY(camellia_dec_blk_2way)
 	cmovel RXORd, RT2d; /* max */
 
 	movq %rbx, RXOR;
-	movq %rbp, RRBP;
+	movq %r12, RR12;
 	movq %rsi, RDST;
 	movq %rdx, RIO;
 
@@ -508,7 +508,7 @@ ENTRY(camellia_dec_blk_2way)
 
 	dec_outunpack2();
 
-	movq RRBP, %rbp;
+	movq RR12, %r12;
 	movq RXOR, %rbx;
 	ret;
 ENDPROC(camellia_dec_blk_2way)

From 4b15606664a2f8d7c4f0092fb0305fe1c7c65b7b Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 18 Sep 2017 14:42:02 -0500
Subject: [PATCH 141/391] crypto: x86/cast5 - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Use R15 instead of RBP.  R15 can't be used as the RID1 register because
of x86 instruction encoding limitations.  So use R15 for CTX and RDI for
CTX.  This means that CTX is no longer an implicit function argument.
Instead it needs to be explicitly copied from RDI.

Reported-by: Eric Biggers <ebiggers@google.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 47 +++++++++++++++--------
 1 file changed, 30 insertions(+), 17 deletions(-)

diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
index b4a8806234ea..86107c961bb4 100644
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -47,7 +47,7 @@
 /**********************************************************************
   16-way AVX cast5
  **********************************************************************/
-#define CTX %rdi
+#define CTX %r15
 
 #define RL1 %xmm0
 #define RR1 %xmm1
@@ -70,8 +70,8 @@
 
 #define RTMP %xmm15
 
-#define RID1  %rbp
-#define RID1d %ebp
+#define RID1  %rdi
+#define RID1d %edi
 #define RID2  %rsi
 #define RID2d %esi
 
@@ -226,7 +226,7 @@
 .align 16
 __cast5_enc_blk16:
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	RL1: blocks 1 and 2
 	 *	RR1: blocks 3 and 4
 	 *	RL2: blocks 5 and 6
@@ -246,9 +246,11 @@ __cast5_enc_blk16:
 	 *	RR4: encrypted blocks 15 and 16
 	 */
 
-	pushq %rbp;
+	pushq %r15;
 	pushq %rbx;
 
+	movq %rdi, CTX;
+
 	vmovdqa .Lbswap_mask, RKM;
 	vmovd .Lfirst_mask, R1ST;
 	vmovd .L32_mask, R32;
@@ -283,7 +285,7 @@ __cast5_enc_blk16:
 
 .L__skip_enc:
 	popq %rbx;
-	popq %rbp;
+	popq %r15;
 
 	vmovdqa .Lbswap_mask, RKM;
 
@@ -298,7 +300,7 @@ ENDPROC(__cast5_enc_blk16)
 .align 16
 __cast5_dec_blk16:
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	RL1: encrypted blocks 1 and 2
 	 *	RR1: encrypted blocks 3 and 4
 	 *	RL2: encrypted blocks 5 and 6
@@ -318,9 +320,11 @@ __cast5_dec_blk16:
 	 *	RR4: decrypted blocks 15 and 16
 	 */
 
-	pushq %rbp;
+	pushq %r15;
 	pushq %rbx;
 
+	movq %rdi, CTX;
+
 	vmovdqa .Lbswap_mask, RKM;
 	vmovd .Lfirst_mask, R1ST;
 	vmovd .L32_mask, R32;
@@ -356,7 +360,7 @@ __cast5_dec_blk16:
 
 	vmovdqa .Lbswap_mask, RKM;
 	popq %rbx;
-	popq %rbp;
+	popq %r15;
 
 	outunpack_blocks(RR1, RL1, RTMP, RX, RKM);
 	outunpack_blocks(RR2, RL2, RTMP, RX, RKM);
@@ -372,12 +376,14 @@ ENDPROC(__cast5_dec_blk16)
 
 ENTRY(cast5_ecb_enc_16way)
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
 	FRAME_BEGIN
+	pushq %r15;
 
+	movq %rdi, CTX;
 	movq %rsi, %r11;
 
 	vmovdqu (0*4*4)(%rdx), RL1;
@@ -400,18 +406,22 @@ ENTRY(cast5_ecb_enc_16way)
 	vmovdqu RR4, (6*4*4)(%r11);
 	vmovdqu RL4, (7*4*4)(%r11);
 
+	popq %r15;
 	FRAME_END
 	ret;
 ENDPROC(cast5_ecb_enc_16way)
 
 ENTRY(cast5_ecb_dec_16way)
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
 
 	FRAME_BEGIN
+	pushq %r15;
+
+	movq %rdi, CTX;
 	movq %rsi, %r11;
 
 	vmovdqu (0*4*4)(%rdx), RL1;
@@ -434,20 +444,22 @@ ENTRY(cast5_ecb_dec_16way)
 	vmovdqu RR4, (6*4*4)(%r11);
 	vmovdqu RL4, (7*4*4)(%r11);
 
+	popq %r15;
 	FRAME_END
 	ret;
 ENDPROC(cast5_ecb_dec_16way)
 
 ENTRY(cast5_cbc_dec_16way)
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
 	FRAME_BEGIN
-
 	pushq %r12;
+	pushq %r15;
 
+	movq %rdi, CTX;
 	movq %rsi, %r11;
 	movq %rdx, %r12;
 
@@ -483,23 +495,24 @@ ENTRY(cast5_cbc_dec_16way)
 	vmovdqu RR4, (6*16)(%r11);
 	vmovdqu RL4, (7*16)(%r11);
 
+	popq %r15;
 	popq %r12;
-
 	FRAME_END
 	ret;
 ENDPROC(cast5_cbc_dec_16way)
 
 ENTRY(cast5_ctr_16way)
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	%rsi: dst
 	 *	%rdx: src
 	 *	%rcx: iv (big endian, 64bit)
 	 */
 	FRAME_BEGIN
-
 	pushq %r12;
+	pushq %r15;
 
+	movq %rdi, CTX;
 	movq %rsi, %r11;
 	movq %rdx, %r12;
 
@@ -558,8 +571,8 @@ ENTRY(cast5_ctr_16way)
 	vmovdqu RR4, (6*16)(%r11);
 	vmovdqu RL4, (7*16)(%r11);
 
+	popq %r15;
 	popq %r12;
-
 	FRAME_END
 	ret;
 ENDPROC(cast5_ctr_16way)

From c66cc3be2951fad4d7d7f799baf57c8c5cc8d655 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 18 Sep 2017 14:42:03 -0500
Subject: [PATCH 142/391] crypto: x86/cast6 - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Use R15 instead of RBP.  R15 can't be used as the RID1 register because
of x86 instruction encoding limitations.  So use R15 for CTX and RDI for
CTX.  This means that CTX is no longer an implicit function argument.
Instead it needs to be explicitly copied from RDI.

Reported-by: Eric Biggers <ebiggers@google.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 50 +++++++++++++++--------
 1 file changed, 34 insertions(+), 16 deletions(-)

diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
index 952d3156a933..7f30b6f0d72c 100644
--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -47,7 +47,7 @@
 /**********************************************************************
   8-way AVX cast6
  **********************************************************************/
-#define CTX %rdi
+#define CTX %r15
 
 #define RA1 %xmm0
 #define RB1 %xmm1
@@ -70,8 +70,8 @@
 
 #define RTMP %xmm15
 
-#define RID1  %rbp
-#define RID1d %ebp
+#define RID1  %rdi
+#define RID1d %edi
 #define RID2  %rsi
 #define RID2d %esi
 
@@ -264,15 +264,17 @@
 .align 8
 __cast6_enc_blk8:
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
 	 * output:
 	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
 	 */
 
-	pushq %rbp;
+	pushq %r15;
 	pushq %rbx;
 
+	movq %rdi, CTX;
+
 	vmovdqa .Lbswap_mask, RKM;
 	vmovd .Lfirst_mask, R1ST;
 	vmovd .L32_mask, R32;
@@ -297,7 +299,7 @@ __cast6_enc_blk8:
 	QBAR(11);
 
 	popq %rbx;
-	popq %rbp;
+	popq %r15;
 
 	vmovdqa .Lbswap_mask, RKM;
 
@@ -310,15 +312,17 @@ ENDPROC(__cast6_enc_blk8)
 .align 8
 __cast6_dec_blk8:
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
 	 * output:
 	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks
 	 */
 
-	pushq %rbp;
+	pushq %r15;
 	pushq %rbx;
 
+	movq %rdi, CTX;
+
 	vmovdqa .Lbswap_mask, RKM;
 	vmovd .Lfirst_mask, R1ST;
 	vmovd .L32_mask, R32;
@@ -343,7 +347,7 @@ __cast6_dec_blk8:
 	QBAR(0);
 
 	popq %rbx;
-	popq %rbp;
+	popq %r15;
 
 	vmovdqa .Lbswap_mask, RKM;
 	outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
@@ -354,12 +358,14 @@ ENDPROC(__cast6_dec_blk8)
 
 ENTRY(cast6_ecb_enc_8way)
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
 	FRAME_BEGIN
+	pushq %r15;
 
+	movq %rdi, CTX;
 	movq %rsi, %r11;
 
 	load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
@@ -368,18 +374,21 @@ ENTRY(cast6_ecb_enc_8way)
 
 	store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
+	popq %r15;
 	FRAME_END
 	ret;
 ENDPROC(cast6_ecb_enc_8way)
 
 ENTRY(cast6_ecb_dec_8way)
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
 	FRAME_BEGIN
+	pushq %r15;
 
+	movq %rdi, CTX;
 	movq %rsi, %r11;
 
 	load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
@@ -388,20 +397,22 @@ ENTRY(cast6_ecb_dec_8way)
 
 	store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
+	popq %r15;
 	FRAME_END
 	ret;
 ENDPROC(cast6_ecb_dec_8way)
 
 ENTRY(cast6_cbc_dec_8way)
 	/* input:
-	 *	%rdi: ctx, CTX
+	 *	%rdi: ctx
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
 	FRAME_BEGIN
-
 	pushq %r12;
+	pushq %r15;
 
+	movq %rdi, CTX;
 	movq %rsi, %r11;
 	movq %rdx, %r12;
 
@@ -411,8 +422,8 @@ ENTRY(cast6_cbc_dec_8way)
 
 	store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
+	popq %r15;
 	popq %r12;
-
 	FRAME_END
 	ret;
 ENDPROC(cast6_cbc_dec_8way)
@@ -425,9 +436,10 @@ ENTRY(cast6_ctr_8way)
 	 *	%rcx: iv (little endian, 128bit)
 	 */
 	FRAME_BEGIN
-
 	pushq %r12;
+	pushq %r15
 
+	movq %rdi, CTX;
 	movq %rsi, %r11;
 	movq %rdx, %r12;
 
@@ -438,8 +450,8 @@ ENTRY(cast6_ctr_8way)
 
 	store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
+	popq %r15;
 	popq %r12;
-
 	FRAME_END
 	ret;
 ENDPROC(cast6_ctr_8way)
@@ -452,7 +464,9 @@ ENTRY(cast6_xts_enc_8way)
 	 *	%rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
 	 */
 	FRAME_BEGIN
+	pushq %r15;
 
+	movq %rdi, CTX
 	movq %rsi, %r11;
 
 	/* regs <= src, dst <= IVs, regs <= regs xor IVs */
@@ -464,6 +478,7 @@ ENTRY(cast6_xts_enc_8way)
 	/* dst <= regs xor IVs(in dst) */
 	store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
+	popq %r15;
 	FRAME_END
 	ret;
 ENDPROC(cast6_xts_enc_8way)
@@ -476,7 +491,9 @@ ENTRY(cast6_xts_dec_8way)
 	 *	%rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
 	 */
 	FRAME_BEGIN
+	pushq %r15;
 
+	movq %rdi, CTX
 	movq %rsi, %r11;
 
 	/* regs <= src, dst <= IVs, regs <= regs xor IVs */
@@ -488,6 +505,7 @@ ENTRY(cast6_xts_dec_8way)
 	/* dst <= regs xor IVs(in dst) */
 	store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
+	popq %r15;
 	FRAME_END
 	ret;
 ENDPROC(cast6_xts_dec_8way)

From 3ed7b4d67c6745300c9b5c6baa55da1161b57f60 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 18 Sep 2017 14:42:04 -0500
Subject: [PATCH 143/391] crypto: x86/des3_ede - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Use RSI instead of RBP for RT1.  Since RSI is also used as a the 'dst'
function argument, it needs to be saved on the stack until the argument
is needed.

Reported-by: Eric Biggers <ebiggers@google.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/des3_ede-asm_64.S | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S
index f3e91647ca27..8e49ce117494 100644
--- a/arch/x86/crypto/des3_ede-asm_64.S
+++ b/arch/x86/crypto/des3_ede-asm_64.S
@@ -64,12 +64,12 @@
 #define RW2bh %ch
 
 #define RT0 %r15
-#define RT1 %rbp
+#define RT1 %rsi
 #define RT2 %r14
 #define RT3 %rdx
 
 #define RT0d %r15d
-#define RT1d %ebp
+#define RT1d %esi
 #define RT2d %r14d
 #define RT3d %edx
 
@@ -177,13 +177,14 @@ ENTRY(des3_ede_x86_64_crypt_blk)
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
-	pushq %rbp;
 	pushq %rbx;
 	pushq %r12;
 	pushq %r13;
 	pushq %r14;
 	pushq %r15;
 
+	pushq %rsi; /* dst */
+
 	read_block(%rdx, RL0, RR0);
 	initial_permutation(RL0, RR0);
 
@@ -241,6 +242,8 @@ ENTRY(des3_ede_x86_64_crypt_blk)
 	round1(32+15, RL0, RR0, dummy2);
 
 	final_permutation(RR0, RL0);
+
+	popq %rsi /* dst */
 	write_block(%rsi, RR0, RL0);
 
 	popq %r15;
@@ -248,7 +251,6 @@ ENTRY(des3_ede_x86_64_crypt_blk)
 	popq %r13;
 	popq %r12;
 	popq %rbx;
-	popq %rbp;
 
 	ret;
 ENDPROC(des3_ede_x86_64_crypt_blk)
@@ -432,13 +434,14 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
 	 *	%rdx: src (3 blocks)
 	 */
 
-	pushq %rbp;
 	pushq %rbx;
 	pushq %r12;
 	pushq %r13;
 	pushq %r14;
 	pushq %r15;
 
+	pushq %rsi /* dst */
+
 	/* load input */
 	movl 0 * 4(%rdx), RL0d;
 	movl 1 * 4(%rdx), RR0d;
@@ -520,6 +523,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
 	bswapl RR2d;
 	bswapl RL2d;
 
+	popq %rsi /* dst */
 	movl RR0d, 0 * 4(%rsi);
 	movl RL0d, 1 * 4(%rsi);
 	movl RR1d, 2 * 4(%rsi);
@@ -532,7 +536,6 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
 	popq %r13;
 	popq %r12;
 	popq %rbx;
-	popq %rbp;
 
 	ret;
 ENDPROC(des3_ede_x86_64_crypt_blk_3way)

From d7b1722c72aa915283ada27709c6feeb392f6038 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 18 Sep 2017 14:42:05 -0500
Subject: [PATCH 144/391] crypto: x86/sha1-avx2 - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Use R11 instead of RBP.  Since R11 isn't a callee-saved register, it
doesn't need to be saved and restored on the stack.

Reported-by: Eric Biggers <ebiggers@google.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/sha1_avx2_x86_64_asm.S | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
index 1eab79c9ac48..9f712a7dfd79 100644
--- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S
+++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
@@ -89,7 +89,7 @@
 #define	REG_RE	%rdx
 #define	REG_RTA	%r12
 #define	REG_RTB	%rbx
-#define	REG_T1	%ebp
+#define	REG_T1	%r11d
 #define	xmm_mov	vmovups
 #define	avx2_zeroupper	vzeroupper
 #define	RND_F1	1
@@ -637,7 +637,6 @@ _loop3:
 	ENTRY(\name)
 
 	push	%rbx
-	push	%rbp
 	push	%r12
 	push	%r13
 	push	%r14
@@ -673,7 +672,6 @@ _loop3:
 	pop	%r14
 	pop	%r13
 	pop	%r12
-	pop	%rbp
 	pop	%rbx
 
 	ret

From 6488bce756861b94810e54f83416d5e74c0f18bf Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 18 Sep 2017 14:42:06 -0500
Subject: [PATCH 145/391] crypto: x86/sha1-ssse3 - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Swap the usages of R12 and RBP.  Use R12 for the REG_D register, and use
RBP to store the pre-aligned stack pointer.

Reported-by: Eric Biggers <ebiggers@google.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/sha1_ssse3_asm.S | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S
index a4109506a5e8..6204bd53528c 100644
--- a/arch/x86/crypto/sha1_ssse3_asm.S
+++ b/arch/x86/crypto/sha1_ssse3_asm.S
@@ -37,7 +37,7 @@
 #define REG_A	%ecx
 #define REG_B	%esi
 #define REG_C	%edi
-#define REG_D	%ebp
+#define REG_D	%r12d
 #define REG_E	%edx
 
 #define REG_T1	%eax
@@ -74,10 +74,10 @@
 	ENTRY(\name)
 
 	push	%rbx
-	push	%rbp
 	push	%r12
+	push	%rbp
+	mov	%rsp, %rbp
 
-	mov	%rsp, %r12
 	sub	$64, %rsp		# allocate workspace
 	and	$~15, %rsp		# align stack
 
@@ -99,10 +99,9 @@
 	xor	%rax, %rax
 	rep stosq
 
-	mov	%r12, %rsp		# deallocate workspace
-
-	pop	%r12
+	mov	%rbp, %rsp		# deallocate workspace
 	pop	%rbp
+	pop	%r12
 	pop	%rbx
 	ret
 

From 673ac6fbc74f835e2125df9ee39e8a2a423832e2 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 18 Sep 2017 14:42:07 -0500
Subject: [PATCH 146/391] crypto: x86/sha256-avx - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Swap the usages of R12 and RBP.  Use R12 for the TBL register, and use
RBP to store the pre-aligned stack pointer.

Reported-by: Eric Biggers <ebiggers@google.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/sha256-avx-asm.S | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S
index e08888a1a5f2..001bbcf93c79 100644
--- a/arch/x86/crypto/sha256-avx-asm.S
+++ b/arch/x86/crypto/sha256-avx-asm.S
@@ -103,7 +103,7 @@ SRND = %rsi       # clobbers INP
 c = %ecx
 d = %r8d
 e = %edx
-TBL = %rbp
+TBL = %r12
 a = %eax
 b = %ebx
 
@@ -350,13 +350,13 @@ a = TMP_
 ENTRY(sha256_transform_avx)
 .align 32
 	pushq   %rbx
-	pushq   %rbp
+	pushq   %r12
 	pushq   %r13
 	pushq   %r14
 	pushq   %r15
-	pushq   %r12
+	pushq	%rbp
+	movq	%rsp, %rbp
 
-	mov	%rsp, %r12
 	subq    $STACK_SIZE, %rsp	# allocate stack space
 	and	$~15, %rsp		# align stack pointer
 
@@ -452,13 +452,12 @@ loop2:
 
 done_hash:
 
-	mov	%r12, %rsp
-
-	popq	%r12
+	mov	%rbp, %rsp
+	popq	%rbp
 	popq    %r15
 	popq    %r14
 	popq    %r13
-	popq    %rbp
+	popq	%r12
 	popq    %rbx
 	ret
 ENDPROC(sha256_transform_avx)

From d3dfbfe2e6e7ecd620531d5201314ad14c4ed5b3 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 18 Sep 2017 14:42:08 -0500
Subject: [PATCH 147/391] crypto: x86/sha256-avx2 - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

There's no need to use RBP as a temporary register for the TBL value,
because it always stores the same value: the address of the K256 table.
Instead just reference the address of K256 directly.

Reported-by: Eric Biggers <ebiggers@google.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/sha256-avx2-asm.S | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S
index 89c8f09787d2..1420db15dcdd 100644
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/crypto/sha256-avx2-asm.S
@@ -98,8 +98,6 @@ d	= %r8d
 e       = %edx	# clobbers NUM_BLKS
 y3	= %esi	# clobbers INP
 
-
-TBL	= %rbp
 SRND	= CTX	# SRND is same register as CTX
 
 a = %eax
@@ -531,7 +529,6 @@ STACK_SIZE	= _RSP      + _RSP_SIZE
 ENTRY(sha256_transform_rorx)
 .align 32
 	pushq	%rbx
-	pushq	%rbp
 	pushq	%r12
 	pushq	%r13
 	pushq	%r14
@@ -568,8 +565,6 @@ ENTRY(sha256_transform_rorx)
 	mov	CTX, _CTX(%rsp)
 
 loop0:
-	lea     K256(%rip), TBL
-
 	## Load first 16 dwords from two blocks
 	VMOVDQ	0*32(INP),XTMP0
 	VMOVDQ	1*32(INP),XTMP1
@@ -597,19 +592,19 @@ last_block_enter:
 
 .align 16
 loop1:
-	vpaddd	0*32(TBL, SRND), X0, XFER
+	vpaddd	K256+0*32(SRND), X0, XFER
 	vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
 	FOUR_ROUNDS_AND_SCHED	_XFER + 0*32
 
-	vpaddd	1*32(TBL, SRND), X0, XFER
+	vpaddd	K256+1*32(SRND), X0, XFER
 	vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
 	FOUR_ROUNDS_AND_SCHED	_XFER + 1*32
 
-	vpaddd	2*32(TBL, SRND), X0, XFER
+	vpaddd	K256+2*32(SRND), X0, XFER
 	vmovdqa XFER, 2*32+_XFER(%rsp, SRND)
 	FOUR_ROUNDS_AND_SCHED	_XFER + 2*32
 
-	vpaddd	3*32(TBL, SRND), X0, XFER
+	vpaddd	K256+3*32(SRND), X0, XFER
 	vmovdqa XFER, 3*32+_XFER(%rsp, SRND)
 	FOUR_ROUNDS_AND_SCHED	_XFER + 3*32
 
@@ -619,10 +614,11 @@ loop1:
 
 loop2:
 	## Do last 16 rounds with no scheduling
-	vpaddd	0*32(TBL, SRND), X0, XFER
+	vpaddd	K256+0*32(SRND), X0, XFER
 	vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
 	DO_4ROUNDS	_XFER + 0*32
-	vpaddd	1*32(TBL, SRND), X1, XFER
+
+	vpaddd	K256+1*32(SRND), X1, XFER
 	vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
 	DO_4ROUNDS	_XFER + 1*32
 	add	$2*32, SRND
@@ -676,9 +672,6 @@ loop3:
 	ja	done_hash
 
 do_last_block:
-	#### do last block
-	lea	K256(%rip), TBL
-
 	VMOVDQ	0*16(INP),XWORD0
 	VMOVDQ	1*16(INP),XWORD1
 	VMOVDQ	2*16(INP),XWORD2
@@ -718,7 +711,6 @@ done_hash:
 	popq	%r14
 	popq	%r13
 	popq	%r12
-	popq	%rbp
 	popq	%rbx
 	ret
 ENDPROC(sha256_transform_rorx)

From 539012dcbdd1ff028268764385ed1f6d600812a7 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 18 Sep 2017 14:42:09 -0500
Subject: [PATCH 148/391] crypto: x86/sha256-ssse3 - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Swap the usages of R12 and RBP.  Use R12 for the TBL register, and use
RBP to store the pre-aligned stack pointer.

Reported-by: Eric Biggers <ebiggers@google.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/sha256-ssse3-asm.S | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S
index 39b83c93e7fd..c6c05ed2c16a 100644
--- a/arch/x86/crypto/sha256-ssse3-asm.S
+++ b/arch/x86/crypto/sha256-ssse3-asm.S
@@ -95,7 +95,7 @@ SRND = %rsi       # clobbers INP
 c = %ecx
 d = %r8d
 e = %edx
-TBL = %rbp
+TBL = %r12
 a = %eax
 b = %ebx
 
@@ -356,13 +356,13 @@ a = TMP_
 ENTRY(sha256_transform_ssse3)
 .align 32
 	pushq   %rbx
-	pushq   %rbp
+	pushq   %r12
 	pushq   %r13
 	pushq   %r14
 	pushq   %r15
-	pushq   %r12
+	pushq   %rbp
+	mov	%rsp, %rbp
 
-	mov	%rsp, %r12
 	subq    $STACK_SIZE, %rsp
 	and	$~15, %rsp
 
@@ -462,13 +462,12 @@ loop2:
 
 done_hash:
 
-	mov	%r12, %rsp
-
-	popq    %r12
+	mov	%rbp, %rsp
+	popq	%rbp
 	popq    %r15
 	popq    %r14
 	popq    %r13
-	popq    %rbp
+	popq    %r12
 	popq    %rbx
 
 	ret

From ca04c823763e5b82c237cabe0c17f547ecdc6271 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 18 Sep 2017 14:42:10 -0500
Subject: [PATCH 149/391] crypto: sha512-avx2 - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Mix things up a little bit to get rid of the RBP usage, without hurting
performance too much.  Use RDI instead of RBP for the TBL pointer.  That
will clobber CTX, so spill CTX onto the stack and use R12 to read it in
the outer loop.  R12 is used as a non-persistent temporary variable
elsewhere, so it's safe to use.

Also remove the unused y4 variable.

Reported-by: Eric Biggers <ebiggers3@gmail.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/sha512-avx2-asm.S | 75 ++++++++++++++++---------------
 1 file changed, 39 insertions(+), 36 deletions(-)

diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S
index 7f5f6c6ec72e..b16d56005162 100644
--- a/arch/x86/crypto/sha512-avx2-asm.S
+++ b/arch/x86/crypto/sha512-avx2-asm.S
@@ -69,8 +69,9 @@ XFER  = YTMP0
 
 BYTE_FLIP_MASK  = %ymm9
 
-# 1st arg
-CTX         = %rdi
+# 1st arg is %rdi, which is saved to the stack and accessed later via %r12
+CTX1        = %rdi
+CTX2        = %r12
 # 2nd arg
 INP         = %rsi
 # 3rd arg
@@ -81,7 +82,7 @@ d           = %r8
 e           = %rdx
 y3          = %rsi
 
-TBL   = %rbp
+TBL   = %rdi # clobbers CTX1
 
 a     = %rax
 b     = %rbx
@@ -91,26 +92,26 @@ g     = %r10
 h     = %r11
 old_h = %r11
 
-T1    = %r12
+T1    = %r12 # clobbers CTX2
 y0    = %r13
 y1    = %r14
 y2    = %r15
 
-y4    = %r12
-
 # Local variables (stack frame)
 XFER_SIZE = 4*8
 SRND_SIZE = 1*8
 INP_SIZE = 1*8
 INPEND_SIZE = 1*8
+CTX_SIZE = 1*8
 RSPSAVE_SIZE = 1*8
-GPRSAVE_SIZE = 6*8
+GPRSAVE_SIZE = 5*8
 
 frame_XFER = 0
 frame_SRND = frame_XFER + XFER_SIZE
 frame_INP = frame_SRND + SRND_SIZE
 frame_INPEND = frame_INP + INP_SIZE
-frame_RSPSAVE = frame_INPEND + INPEND_SIZE
+frame_CTX = frame_INPEND + INPEND_SIZE
+frame_RSPSAVE = frame_CTX + CTX_SIZE
 frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
 frame_size = frame_GPRSAVE + GPRSAVE_SIZE
 
@@ -576,12 +577,11 @@ ENTRY(sha512_transform_rorx)
 	mov	%rax, frame_RSPSAVE(%rsp)
 
 	# Save GPRs
-	mov	%rbp, frame_GPRSAVE(%rsp)
-	mov	%rbx, 8*1+frame_GPRSAVE(%rsp)
-	mov	%r12, 8*2+frame_GPRSAVE(%rsp)
-	mov	%r13, 8*3+frame_GPRSAVE(%rsp)
-	mov	%r14, 8*4+frame_GPRSAVE(%rsp)
-	mov	%r15, 8*5+frame_GPRSAVE(%rsp)
+	mov	%rbx, 8*0+frame_GPRSAVE(%rsp)
+	mov	%r12, 8*1+frame_GPRSAVE(%rsp)
+	mov	%r13, 8*2+frame_GPRSAVE(%rsp)
+	mov	%r14, 8*3+frame_GPRSAVE(%rsp)
+	mov	%r15, 8*4+frame_GPRSAVE(%rsp)
 
 	shl	$7, NUM_BLKS	# convert to bytes
 	jz	done_hash
@@ -589,14 +589,17 @@ ENTRY(sha512_transform_rorx)
 	mov	NUM_BLKS, frame_INPEND(%rsp)
 
 	## load initial digest
-	mov	8*0(CTX),a
-	mov	8*1(CTX),b
-	mov	8*2(CTX),c
-	mov	8*3(CTX),d
-	mov	8*4(CTX),e
-	mov	8*5(CTX),f
-	mov	8*6(CTX),g
-	mov	8*7(CTX),h
+	mov	8*0(CTX1), a
+	mov	8*1(CTX1), b
+	mov	8*2(CTX1), c
+	mov	8*3(CTX1), d
+	mov	8*4(CTX1), e
+	mov	8*5(CTX1), f
+	mov	8*6(CTX1), g
+	mov	8*7(CTX1), h
+
+	# save %rdi (CTX) before it gets clobbered
+	mov	%rdi, frame_CTX(%rsp)
 
 	vmovdqa	PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK
 
@@ -652,14 +655,15 @@ loop2:
 	subq	$1, frame_SRND(%rsp)
 	jne	loop2
 
-	addm	8*0(CTX),a
-	addm	8*1(CTX),b
-	addm	8*2(CTX),c
-	addm	8*3(CTX),d
-	addm	8*4(CTX),e
-	addm	8*5(CTX),f
-	addm	8*6(CTX),g
-	addm	8*7(CTX),h
+	mov	frame_CTX(%rsp), CTX2
+	addm	8*0(CTX2), a
+	addm	8*1(CTX2), b
+	addm	8*2(CTX2), c
+	addm	8*3(CTX2), d
+	addm	8*4(CTX2), e
+	addm	8*5(CTX2), f
+	addm	8*6(CTX2), g
+	addm	8*7(CTX2), h
 
 	mov	frame_INP(%rsp), INP
 	add	$128, INP
@@ -669,12 +673,11 @@ loop2:
 done_hash:
 
 # Restore GPRs
-	mov	frame_GPRSAVE(%rsp)     ,%rbp
-	mov	8*1+frame_GPRSAVE(%rsp) ,%rbx
-	mov	8*2+frame_GPRSAVE(%rsp) ,%r12
-	mov	8*3+frame_GPRSAVE(%rsp) ,%r13
-	mov	8*4+frame_GPRSAVE(%rsp) ,%r14
-	mov	8*5+frame_GPRSAVE(%rsp) ,%r15
+	mov	8*0+frame_GPRSAVE(%rsp), %rbx
+	mov	8*1+frame_GPRSAVE(%rsp), %r12
+	mov	8*2+frame_GPRSAVE(%rsp), %r13
+	mov	8*3+frame_GPRSAVE(%rsp), %r14
+	mov	8*4+frame_GPRSAVE(%rsp), %r15
 
 	# Restore Stack Pointer
 	mov	frame_RSPSAVE(%rsp), %rsp

From 8f182f845d0fa26ecc18d3bdcc3d1077a0ea3a31 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 18 Sep 2017 14:42:11 -0500
Subject: [PATCH 150/391] crypto: x86/twofish - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Use R13 instead of RBP.  Both are callee-saved registers, so the
substitution is straightforward.

Reported-by: Eric Biggers <ebiggers@google.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
index b3f49d286348..73b471da3622 100644
--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -76,8 +76,8 @@
 #define RT %xmm14
 #define RR %xmm15
 
-#define RID1  %rbp
-#define RID1d %ebp
+#define RID1  %r13
+#define RID1d %r13d
 #define RID2  %rsi
 #define RID2d %esi
 
@@ -259,7 +259,7 @@ __twofish_enc_blk8:
 
 	vmovdqu w(CTX), RK1;
 
-	pushq %rbp;
+	pushq %r13;
 	pushq %rbx;
 	pushq %rcx;
 
@@ -282,7 +282,7 @@ __twofish_enc_blk8:
 
 	popq %rcx;
 	popq %rbx;
-	popq %rbp;
+	popq %r13;
 
 	outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
 	outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
@@ -301,7 +301,7 @@ __twofish_dec_blk8:
 
 	vmovdqu (w+4*4)(CTX), RK1;
 
-	pushq %rbp;
+	pushq %r13;
 	pushq %rbx;
 
 	inpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
@@ -322,7 +322,7 @@ __twofish_dec_blk8:
 	vmovdqu (w)(CTX), RK1;
 
 	popq %rbx;
-	popq %rbp;
+	popq %r13;
 
 	outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
 	outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);

From afd62fa26343be6445479e75de9f07092a061459 Mon Sep 17 00:00:00 2001
From: LEROY Christophe <christophe.leroy@c-s.fr>
Date: Wed, 13 Sep 2017 12:44:51 +0200
Subject: [PATCH 151/391] crypto: talitos - fix sha224

Kernel crypto tests report the following error at startup

[    2.752626] alg: hash: Test 4 failed for sha224-talitos
[    2.757907] 00000000: 30 e2 86 e2 e7 8a dd 0d d7 eb 9f d5 83 fe f1 b0
00000010: 2d 5a 6c a5 f9 55 ea fd 0e 72 05 22

This patch fixes it

Cc: <stable@vger.kernel.org>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/talitos.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 79791c690858..6596761bc0c8 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1756,9 +1756,9 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
 		req_ctx->swinit = 0;
 	} else {
 		desc->ptr[1] = zero_entry;
-		/* Indicate next op is not the first. */
-		req_ctx->first = 0;
 	}
+	/* Indicate next op is not the first. */
+	req_ctx->first = 0;
 
 	/* HMAC key */
 	if (ctx->keylen)

From 886a27c0fc8a34633aadb0986dba11d8c150ae2e Mon Sep 17 00:00:00 2001
From: LEROY Christophe <christophe.leroy@c-s.fr>
Date: Wed, 13 Sep 2017 12:44:57 +0200
Subject: [PATCH 152/391] crypto: talitos - fix hashing

md5sum on some files gives wrong result

Exemple:

With the md5sum from libkcapi:
c15115c05bad51113f81bdaee735dd09  test

With the original md5sum:
bbdf41d80ba7e8b2b7be3a0772be76cb  test

This patch fixes this issue

Cc: <stable@vger.kernel.org>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/talitos.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 6596761bc0c8..8a48fc4f3642 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1769,7 +1769,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
 
 	sg_count = edesc->src_nents ?: 1;
 	if (is_sec1 && sg_count > 1)
-		sg_copy_to_buffer(areq->src, sg_count, edesc->buf, length);
+		sg_copy_to_buffer(req_ctx->psrc, sg_count, edesc->buf, length);
 	else
 		sg_count = dma_map_sg(dev, req_ctx->psrc, sg_count,
 				      DMA_TO_DEVICE);

From 56136631573baa537a15e0012055ffe8cfec1a33 Mon Sep 17 00:00:00 2001
From: LEROY Christophe <christophe.leroy@c-s.fr>
Date: Tue, 12 Sep 2017 11:03:39 +0200
Subject: [PATCH 153/391] crypto: talitos - Don't provide setkey for non hmac
 hashing algs.

Today, md5sum fails with error -ENOKEY because a setkey
function is set for non hmac hashing algs, see strace output below:

mmap(NULL, 378880, PROT_READ, MAP_SHARED, 6, 0) = 0x77f50000
accept(3, 0, NULL)                      = 7
vmsplice(5, [{"bin/\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 378880}], 1, SPLICE_F_MORE|SPLICE_F_GIFT) = 262144
splice(4, NULL, 7, NULL, 262144, SPLICE_F_MORE) = -1 ENOKEY (Required key not available)
write(2, "Generation of hash for file kcap"..., 50) = 50
munmap(0x77f50000, 378880)              = 0

This patch ensures that setkey() function is set only
for hmac hashing.

Cc: <stable@vger.kernel.org>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/talitos.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 8a48fc4f3642..dff88838dce7 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -3057,7 +3057,8 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev,
 		t_alg->algt.alg.hash.final = ahash_final;
 		t_alg->algt.alg.hash.finup = ahash_finup;
 		t_alg->algt.alg.hash.digest = ahash_digest;
-		t_alg->algt.alg.hash.setkey = ahash_setkey;
+		if (!strncmp(alg->cra_name, "hmac", 4))
+			t_alg->algt.alg.hash.setkey = ahash_setkey;
 		t_alg->algt.alg.hash.import = ahash_import;
 		t_alg->algt.alg.hash.export = ahash_export;
 

From 3e1166b94e661ed51af1fe1fe5f74bd83450b50f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 12 Sep 2017 12:12:16 +0200
Subject: [PATCH 154/391] crypto: inside-secure - fix gcc-4.9 warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All older compiler versions up to gcc-4.9 produce these
harmless warnings:

drivers/crypto/inside-secure/safexcel_cipher.c:389:9: warning: missing braces around initializer [-Wmissing-braces]
drivers/crypto/inside-secure/safexcel_cipher.c:389:9: warning: (near initialization for ‘result.completion’) [-Wmissing-braces]
drivers/crypto/inside-secure/safexcel_hash.c:422:9: warning: missing braces around initializer [-Wmissing-braces]
drivers/crypto/inside-secure/safexcel_hash.c:422:9: warning: (near initialization for ‘result.completion’) [-Wmissing-braces]

This changes the syntax to something that works on all versions
without warnings.

Fixes: 1b44c5a60c13 ("crypto: inside-secure - add SafeXcel EIP197 crypto engine driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Antoine Tenart <antoine.tenart@free-electrons.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/inside-secure/safexcel_cipher.c | 2 +-
 drivers/crypto/inside-secure/safexcel_hash.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c
index d2207ac5ba19..5438552bc6d7 100644
--- a/drivers/crypto/inside-secure/safexcel_cipher.c
+++ b/drivers/crypto/inside-secure/safexcel_cipher.c
@@ -386,7 +386,7 @@ static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm)
 	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct safexcel_crypto_priv *priv = ctx->priv;
 	struct skcipher_request req;
-	struct safexcel_inv_result result = { 0 };
+	struct safexcel_inv_result result = {};
 	int ring = ctx->base.ring;
 
 	memset(&req, 0, sizeof(struct skcipher_request));
diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c
index 3f819399cd95..3980f946874f 100644
--- a/drivers/crypto/inside-secure/safexcel_hash.c
+++ b/drivers/crypto/inside-secure/safexcel_hash.c
@@ -419,7 +419,7 @@ static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm)
 	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct safexcel_crypto_priv *priv = ctx->priv;
 	struct ahash_request req;
-	struct safexcel_inv_result result = { 0 };
+	struct safexcel_inv_result result = {};
 	int ring = ctx->base.ring;
 
 	memset(&req, 0, sizeof(struct ahash_request));

From c056d910f08029662080a01b4ce2110e2c9a27b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
Date: Fri, 1 Sep 2017 17:12:59 +0300
Subject: [PATCH 155/391] crypto: caam - fix LS1021A support on ARMv7
 multiplatform kernel
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When built using multi_v7_defconfig, driver does not work on LS1021A:
[...]
caam 1700000.crypto: can't identify CAAM ipg clk: -2
caam: probe of 1700000.crypto failed with error -2
[...]

It turns out we have to detect at runtime whether driver is running
on an i.MX platform or not.

Cc: <stable@vger.kernel.org>
Fixes: 6c3af9559352 ("crypto: caam - add support for LS1021A")
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/Kconfig |  5 +---
 drivers/crypto/caam/ctrl.c  | 19 ++++++------
 drivers/crypto/caam/regs.h  | 59 ++++++++++++++++++-------------------
 3 files changed, 39 insertions(+), 44 deletions(-)

diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig
index e36aeacd7635..1eb852765469 100644
--- a/drivers/crypto/caam/Kconfig
+++ b/drivers/crypto/caam/Kconfig
@@ -1,6 +1,7 @@
 config CRYPTO_DEV_FSL_CAAM
 	tristate "Freescale CAAM-Multicore driver backend"
 	depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE
+	select SOC_BUS
 	help
 	  Enables the driver module for Freescale's Cryptographic Accelerator
 	  and Assurance Module (CAAM), also known as the SEC version 4 (SEC4).
@@ -141,10 +142,6 @@ config CRYPTO_DEV_FSL_CAAM_RNG_API
 	  To compile this as a module, choose M here: the module
 	  will be called caamrng.
 
-config CRYPTO_DEV_FSL_CAAM_IMX
-	def_bool SOC_IMX6 || SOC_IMX7D
-	depends on CRYPTO_DEV_FSL_CAAM
-
 config CRYPTO_DEV_FSL_CAAM_DEBUG
 	bool "Enable debug output in CAAM driver"
 	depends on CRYPTO_DEV_FSL_CAAM
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index dacb53fb690e..027e121c6f70 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -7,6 +7,7 @@
 #include <linux/device.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/sys_soc.h>
 
 #include "compat.h"
 #include "regs.h"
@@ -19,6 +20,8 @@ bool caam_little_end;
 EXPORT_SYMBOL(caam_little_end);
 bool caam_dpaa2;
 EXPORT_SYMBOL(caam_dpaa2);
+bool caam_imx;
+EXPORT_SYMBOL(caam_imx);
 
 #ifdef CONFIG_CAAM_QI
 #include "qi.h"
@@ -28,19 +31,11 @@ EXPORT_SYMBOL(caam_dpaa2);
  * i.MX targets tend to have clock control subsystems that can
  * enable/disable clocking to our device.
  */
-#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
 static inline struct clk *caam_drv_identify_clk(struct device *dev,
 						char *clk_name)
 {
-	return devm_clk_get(dev, clk_name);
+	return caam_imx ? devm_clk_get(dev, clk_name) : NULL;
 }
-#else
-static inline struct clk *caam_drv_identify_clk(struct device *dev,
-						char *clk_name)
-{
-	return NULL;
-}
-#endif
 
 /*
  * Descriptor to instantiate RNG State Handle 0 in normal mode and
@@ -430,6 +425,10 @@ static int caam_probe(struct platform_device *pdev)
 {
 	int ret, ring, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN;
 	u64 caam_id;
+	static const struct soc_device_attribute imx_soc[] = {
+		{.family = "Freescale i.MX"},
+		{},
+	};
 	struct device *dev;
 	struct device_node *nprop, *np;
 	struct caam_ctrl __iomem *ctrl;
@@ -451,6 +450,8 @@ static int caam_probe(struct platform_device *pdev)
 	dev_set_drvdata(dev, ctrlpriv);
 	nprop = pdev->dev.of_node;
 
+	caam_imx = (bool)soc_device_match(imx_soc);
+
 	/* Enable clocking */
 	clk = caam_drv_identify_clk(&pdev->dev, "ipg");
 	if (IS_ERR(clk)) {
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index 2b5efff9ec3c..17cfd23a38fa 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -67,6 +67,7 @@
  */
 
 extern bool caam_little_end;
+extern bool caam_imx;
 
 #define caam_to_cpu(len)				\
 static inline u##len caam##len ## _to_cpu(u##len val)	\
@@ -154,13 +155,10 @@ static inline u64 rd_reg64(void __iomem *reg)
 #else /* CONFIG_64BIT */
 static inline void wr_reg64(void __iomem *reg, u64 data)
 {
-#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
-	if (caam_little_end) {
+	if (!caam_imx && caam_little_end) {
 		wr_reg32((u32 __iomem *)(reg) + 1, data >> 32);
 		wr_reg32((u32 __iomem *)(reg), data);
-	} else
-#endif
-	{
+	} else {
 		wr_reg32((u32 __iomem *)(reg), data >> 32);
 		wr_reg32((u32 __iomem *)(reg) + 1, data);
 	}
@@ -168,41 +166,40 @@ static inline void wr_reg64(void __iomem *reg, u64 data)
 
 static inline u64 rd_reg64(void __iomem *reg)
 {
-#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
-	if (caam_little_end)
+	if (!caam_imx && caam_little_end)
 		return ((u64)rd_reg32((u32 __iomem *)(reg) + 1) << 32 |
 			(u64)rd_reg32((u32 __iomem *)(reg)));
-	else
-#endif
-		return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 |
-			(u64)rd_reg32((u32 __iomem *)(reg) + 1));
+
+	return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 |
+		(u64)rd_reg32((u32 __iomem *)(reg) + 1));
 }
 #endif /* CONFIG_64BIT  */
 
+static inline u64 cpu_to_caam_dma64(dma_addr_t value)
+{
+	if (caam_imx)
+		return (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) |
+			 (u64)cpu_to_caam32(upper_32_bits(value)));
+
+	return cpu_to_caam64(value);
+}
+
+static inline u64 caam_dma64_to_cpu(u64 value)
+{
+	if (caam_imx)
+		return (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) |
+			 (u64)caam32_to_cpu(upper_32_bits(value)));
+
+	return caam64_to_cpu(value);
+}
+
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-#ifdef CONFIG_SOC_IMX7D
-#define cpu_to_caam_dma(value) \
-		(((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \
-		  (u64)cpu_to_caam32(upper_32_bits(value)))
-#define caam_dma_to_cpu(value) \
-		(((u64)caam32_to_cpu(lower_32_bits(value)) << 32) | \
-		  (u64)caam32_to_cpu(upper_32_bits(value)))
-#else
-#define cpu_to_caam_dma(value) cpu_to_caam64(value)
-#define caam_dma_to_cpu(value) caam64_to_cpu(value)
-#endif /* CONFIG_SOC_IMX7D */
+#define cpu_to_caam_dma(value) cpu_to_caam_dma64(value)
+#define caam_dma_to_cpu(value) caam_dma64_to_cpu(value)
 #else
 #define cpu_to_caam_dma(value) cpu_to_caam32(value)
 #define caam_dma_to_cpu(value) caam32_to_cpu(value)
-#endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT  */
-
-#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
-#define cpu_to_caam_dma64(value) \
-		(((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \
-		 (u64)cpu_to_caam32(upper_32_bits(value)))
-#else
-#define cpu_to_caam_dma64(value) cpu_to_caam64(value)
-#endif
+#endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT */
 
 /*
  * jr_outentry

From e117765a117da3ece15689cb8a759d16c415b08c Mon Sep 17 00:00:00 2001
From: Stephan Mueller <smueller@chronox.de>
Date: Wed, 30 Aug 2017 09:17:39 +0200
Subject: [PATCH 156/391] crypto: af_alg - update correct dst SGL entry

When two adjacent TX SGL are processed and parts of both TX SGLs
are pulled into the per-request TX SGL, the wrong per-request
TX SGL entries were updated.

This fixes a NULL pointer dereference when a cipher implementation walks
the TX SGL where some of the SGL entries were NULL.

Fixes: e870456d8e7c ("crypto: algif_skcipher - overhaul memory...")
Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/af_alg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index ffa9f4ccd9b4..337cf382718e 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -619,14 +619,14 @@ void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst,
 	struct af_alg_ctx *ctx = ask->private;
 	struct af_alg_tsgl *sgl;
 	struct scatterlist *sg;
-	unsigned int i, j;
+	unsigned int i, j = 0;
 
 	while (!list_empty(&ctx->tsgl_list)) {
 		sgl = list_first_entry(&ctx->tsgl_list, struct af_alg_tsgl,
 				       list);
 		sg = sgl->sg;
 
-		for (i = 0, j = 0; i < sgl->cur; i++) {
+		for (i = 0; i < sgl->cur; i++) {
 			size_t plen = min_t(size_t, used, sg[i].length);
 			struct page *page = sg_page(sg + i);
 

From 8afafa6fba7809c0785018b77c95b19e58b35b94 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Fri, 15 Sep 2017 15:38:21 +0530
Subject: [PATCH 157/391] powerpc/kprobes: Update optprobes to use
 emulate_update_regs()

Optprobes depended on an updated regs->nip from analyse_instr() to
identify the location to branch back from the optprobes trampoline.
However, since commit 3cdfcbfd32b9d ("powerpc: Change analyse_instr so
it doesn't modify *regs"), analyse_instr() doesn't update the registers
anymore.  Due to this, we end up branching back from the optprobes
trampoline to the same branch into the trampoline resulting in a loop.

Fix this by calling out to emulate_update_regs() before using the nip.
Additionally, explicitly compare the return value from analyse_instr()
to 1, rather than just checking for !0 so as to guard against any
future changes to analyse_instr() that may result in -1 being returned
in more scenarios.

Fixes: 3cdfcbfd32b9d ("powerpc: Change analyse_instr so it doesn't modify *regs")
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/optprobes.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index 6f8273f5e988..91e037ab20a1 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -104,8 +104,10 @@ static unsigned long can_optimize(struct kprobe *p)
 	 * and that can be emulated.
 	 */
 	if (!is_conditional_branch(*p->ainsn.insn) &&
-			analyse_instr(&op, &regs, *p->ainsn.insn))
+			analyse_instr(&op, &regs, *p->ainsn.insn) == 1) {
+		emulate_update_regs(&regs, &op);
 		nip = regs.nip;
+	}
 
 	return nip;
 }

From 1b25fda0533462c9cee3a22e8a7bea68fa670af2 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 19 Sep 2017 12:52:22 +0200
Subject: [PATCH 158/391] s390/topology: alternative topology for topology-less
 machines

If running on machines that do not provide topology information we
currently generate a "fake" topology which defines the maximum
distance between each cpu: each cpu will be put into an own drawer.

Historically this used to be the best option for (virtual) machines in
overcommited hypervisors.

For some workloads however it is better to generate a different
topology where all cpus are siblings within a package (all cpus are
core siblings). This shows performance improvements of up to 10%,
depending on the workload.

In order to keep the current behaviour, but also allow to switch to
the different core sibling topology use the existing "topology="
kernel parameter:

Specifying "topology=on" on machines without topology information will
generate the core siblings (fake) topology information, instead of the
default topology information where all cpus have the maximum distance.

On machines which provide topology information specifying
"topology=on" does not have any effect.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/early.c    | 12 -------
 arch/s390/kernel/topology.c | 72 +++++++++++++++++++++++++++++++------
 2 files changed, 61 insertions(+), 23 deletions(-)

diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index ca8cd80e8feb..60181caf8e8a 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -404,18 +404,6 @@ static inline void save_vector_registers(void)
 #endif
 }
 
-static int __init topology_setup(char *str)
-{
-	bool enabled;
-	int rc;
-
-	rc = kstrtobool(str, &enabled);
-	if (!rc && !enabled)
-		S390_lowcore.machine_flags &= ~MACHINE_FLAG_TOPOLOGY;
-	return rc;
-}
-early_param("topology", topology_setup);
-
 static int __init disable_vector_extension(char *str)
 {
 	S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index bb47c92476f0..a0ce9c83f589 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -29,12 +29,20 @@
 #define PTF_VERTICAL	(1UL)
 #define PTF_CHECK	(2UL)
 
+enum {
+	TOPOLOGY_MODE_HW,
+	TOPOLOGY_MODE_SINGLE,
+	TOPOLOGY_MODE_PACKAGE,
+	TOPOLOGY_MODE_UNINITIALIZED
+};
+
 struct mask_info {
 	struct mask_info *next;
 	unsigned char id;
 	cpumask_t mask;
 };
 
+static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED;
 static void set_topology_timer(void);
 static void topology_work_fn(struct work_struct *work);
 static struct sysinfo_15_1_x *tl_info;
@@ -59,11 +67,26 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
 	cpumask_t mask;
 
 	cpumask_copy(&mask, cpumask_of(cpu));
-	if (!MACHINE_HAS_TOPOLOGY)
-		return mask;
-	for (; info; info = info->next) {
-		if (cpumask_test_cpu(cpu, &info->mask))
-			return info->mask;
+	switch (topology_mode) {
+	case TOPOLOGY_MODE_HW:
+		while (info) {
+			if (cpumask_test_cpu(cpu, &info->mask)) {
+				mask = info->mask;
+				break;
+			}
+			info = info->next;
+		}
+		if (cpumask_empty(&mask))
+			cpumask_copy(&mask, cpumask_of(cpu));
+		break;
+	case TOPOLOGY_MODE_PACKAGE:
+		cpumask_copy(&mask, cpu_present_mask);
+		break;
+	default:
+		/* fallthrough */
+	case TOPOLOGY_MODE_SINGLE:
+		cpumask_copy(&mask, cpumask_of(cpu));
+		break;
 	}
 	return mask;
 }
@@ -74,7 +97,7 @@ static cpumask_t cpu_thread_map(unsigned int cpu)
 	int i;
 
 	cpumask_copy(&mask, cpumask_of(cpu));
-	if (!MACHINE_HAS_TOPOLOGY)
+	if (topology_mode != TOPOLOGY_MODE_HW)
 		return mask;
 	cpu -= cpu % (smp_cpu_mtid + 1);
 	for (i = 0; i <= smp_cpu_mtid; i++)
@@ -223,7 +246,7 @@ int topology_set_cpu_management(int fc)
 static void update_cpu_masks(void)
 {
 	struct cpu_topology_s390 *topo;
-	int cpu;
+	int cpu, id;
 
 	for_each_possible_cpu(cpu) {
 		topo = &cpu_topology[cpu];
@@ -231,12 +254,13 @@ static void update_cpu_masks(void)
 		topo->core_mask = cpu_group_map(&socket_info, cpu);
 		topo->book_mask = cpu_group_map(&book_info, cpu);
 		topo->drawer_mask = cpu_group_map(&drawer_info, cpu);
-		if (!MACHINE_HAS_TOPOLOGY) {
+		if (topology_mode != TOPOLOGY_MODE_HW) {
+			id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu;
 			topo->thread_id = cpu;
 			topo->core_id = cpu;
-			topo->socket_id = cpu;
-			topo->book_id = cpu;
-			topo->drawer_id = cpu;
+			topo->socket_id = id;
+			topo->book_id = id;
+			topo->drawer_id = id;
 			if (cpu_present(cpu))
 				cpumask_set_cpu(cpu, &cpus_with_topology);
 		}
@@ -459,6 +483,12 @@ void __init topology_init_early(void)
 	struct sysinfo_15_1_x *info;
 
 	set_sched_topology(s390_topology);
+	if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) {
+		if (MACHINE_HAS_TOPOLOGY)
+			topology_mode = TOPOLOGY_MODE_HW;
+		else
+			topology_mode = TOPOLOGY_MODE_SINGLE;
+	}
 	if (!MACHINE_HAS_TOPOLOGY)
 		goto out;
 	tl_info = memblock_virt_alloc(PAGE_SIZE, PAGE_SIZE);
@@ -474,6 +504,26 @@ void __init topology_init_early(void)
 	__arch_update_cpu_topology();
 }
 
+static inline int topology_get_mode(int enabled)
+{
+	if (!enabled)
+		return TOPOLOGY_MODE_SINGLE;
+	return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
+}
+
+static int __init topology_setup(char *str)
+{
+	bool enabled;
+	int rc;
+
+	rc = kstrtobool(str, &enabled);
+	if (rc)
+		return rc;
+	topology_mode = topology_get_mode(enabled);
+	return 0;
+}
+early_param("topology", topology_setup);
+
 static int __init topology_init(void)
 {
 	if (MACHINE_HAS_TOPOLOGY)

From 51dce3867c6c63c7500332e5448c2ba76808d6b5 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 14 Sep 2017 14:42:32 +0200
Subject: [PATCH 159/391] s390/topology: enable / disable topology dynamically

Add a new sysctl file /proc/sys/s390/topology which displays if
topology is on (1) or off (0) as specified by the "topology=" kernel
parameter.

This allows to change topology information during runtime and
configuring it via /etc/sysctl.conf instead of using the kernel line
parameter.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/topology.c | 76 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 74 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index a0ce9c83f589..ed0bdd220e1a 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -8,6 +8,8 @@
 
 #include <linux/workqueue.h>
 #include <linux/bootmem.h>
+#include <linux/uaccess.h>
+#include <linux/sysctl.h>
 #include <linux/cpuset.h>
 #include <linux/device.h>
 #include <linux/export.h>
@@ -207,10 +209,8 @@ static void topology_update_polarization_simple(void)
 {
 	int cpu;
 
-	mutex_lock(&smp_cpu_state_mutex);
 	for_each_possible_cpu(cpu)
 		smp_cpu_set_polarization(cpu, POLARIZATION_HRZ);
-	mutex_unlock(&smp_cpu_state_mutex);
 }
 
 static int ptf(unsigned long fc)
@@ -278,6 +278,7 @@ static int __arch_update_cpu_topology(void)
 	struct sysinfo_15_1_x *info = tl_info;
 	int rc = 0;
 
+	mutex_lock(&smp_cpu_state_mutex);
 	cpumask_clear(&cpus_with_topology);
 	if (MACHINE_HAS_TOPOLOGY) {
 		rc = 1;
@@ -287,6 +288,7 @@ static int __arch_update_cpu_topology(void)
 	update_cpu_masks();
 	if (!MACHINE_HAS_TOPOLOGY)
 		topology_update_polarization_simple();
+	mutex_unlock(&smp_cpu_state_mutex);
 	return rc;
 }
 
@@ -313,6 +315,11 @@ void topology_schedule_update(void)
 	schedule_work(&topology_work);
 }
 
+static void topology_flush_work(void)
+{
+	flush_work(&topology_work);
+}
+
 static void topology_timer_fn(unsigned long ignored)
 {
 	if (ptf(PTF_CHECK))
@@ -511,6 +518,11 @@ static inline int topology_get_mode(int enabled)
 	return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
 }
 
+static inline int topology_is_enabled(void)
+{
+	return topology_mode != TOPOLOGY_MODE_SINGLE;
+}
+
 static int __init topology_setup(char *str)
 {
 	bool enabled;
@@ -524,12 +536,72 @@ static int __init topology_setup(char *str)
 }
 early_param("topology", topology_setup);
 
+static int topology_ctl_handler(struct ctl_table *ctl, int write,
+				void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	unsigned int len;
+	int new_mode;
+	char buf[2];
+
+	if (!*lenp || *ppos) {
+		*lenp = 0;
+		return 0;
+	}
+	if (!write) {
+		strncpy(buf, topology_is_enabled() ? "1\n" : "0\n",
+			ARRAY_SIZE(buf));
+		len = strnlen(buf, ARRAY_SIZE(buf));
+		if (len > *lenp)
+			len = *lenp;
+		if (copy_to_user(buffer, buf, len))
+			return -EFAULT;
+		goto out;
+	}
+	len = *lenp;
+	if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len))
+		return -EFAULT;
+	if (buf[0] != '0' && buf[0] != '1')
+		return -EINVAL;
+	mutex_lock(&smp_cpu_state_mutex);
+	new_mode = topology_get_mode(buf[0] == '1');
+	if (topology_mode != new_mode) {
+		topology_mode = new_mode;
+		topology_schedule_update();
+	}
+	mutex_unlock(&smp_cpu_state_mutex);
+	topology_flush_work();
+out:
+	*lenp = len;
+	*ppos += len;
+	return 0;
+}
+
+static struct ctl_table topology_ctl_table[] = {
+	{
+		.procname	= "topology",
+		.mode		= 0644,
+		.proc_handler	= topology_ctl_handler,
+	},
+	{ },
+};
+
+static struct ctl_table topology_dir_table[] = {
+	{
+		.procname	= "s390",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= topology_ctl_table,
+	},
+	{ },
+};
+
 static int __init topology_init(void)
 {
 	if (MACHINE_HAS_TOPOLOGY)
 		set_topology_timer();
 	else
 		topology_update_polarization_simple();
+	register_sysctl_table(topology_dir_table);
 	return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
 }
 device_initcall(topology_init);

From 9e09007486c883de4aa78a384e1d54c3fe6e42d5 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 18 Sep 2017 18:12:36 +0900
Subject: [PATCH 160/391] kbuild: rpm-pkg: delete firmware_install to fix build
 error

Commit 5620a0d1aacd ("firmware: delete in-kernel firmware") deleted
in-kernel firmware support, including "make firmware_install".

Since then, "make rpm-pkg" / "make binrpm-pkg" fails to build with
the error:

  make[2]: *** No rule to make target `firmware_install'.  Stop.

Commit df85b2d767aa ("firmware: Restore support for built-in firmware")
restored the build infrastructure for CONFIG_EXTRA_FIRMWARE, but this
is out of the scope of "make firmware_install".  So, the right thing to
do is to kill the use of "make firmware_install".

Fixes: 5620a0d1aacd ("firmware: delete in-kernel firmware")
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 scripts/package/mkspec | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/scripts/package/mkspec b/scripts/package/mkspec
index bb43f153fd8e..bf3e9abb2846 100755
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec
@@ -88,11 +88,8 @@ echo 'mkdir -p $RPM_BUILD_ROOT/boot/efi $RPM_BUILD_ROOT/lib/modules'
 echo "%else"
 echo 'mkdir -p $RPM_BUILD_ROOT/boot $RPM_BUILD_ROOT/lib/modules'
 echo "%endif"
-echo 'mkdir -p $RPM_BUILD_ROOT'"/lib/firmware/$KERNELRELEASE"
 
-echo 'INSTALL_MOD_PATH=$RPM_BUILD_ROOT make %{?_smp_mflags} KBUILD_SRC= mod-fw= modules_install'
-echo 'INSTALL_FW_PATH=$RPM_BUILD_ROOT'"/lib/firmware/$KERNELRELEASE"
-echo 'make INSTALL_FW_PATH=$INSTALL_FW_PATH' firmware_install
+echo 'INSTALL_MOD_PATH=$RPM_BUILD_ROOT make %{?_smp_mflags} KBUILD_SRC= modules_install'
 echo "%ifarch ia64"
 echo 'cp $KBUILD_IMAGE $RPM_BUILD_ROOT'"/boot/efi/vmlinuz-$KERNELRELEASE"
 echo 'ln -s '"efi/vmlinuz-$KERNELRELEASE" '$RPM_BUILD_ROOT'"/boot/"
@@ -119,7 +116,7 @@ if ! $PREBUILT; then
 echo 'rm -f $RPM_BUILD_ROOT'"/lib/modules/$KERNELRELEASE/build"
 echo 'rm -f $RPM_BUILD_ROOT'"/lib/modules/$KERNELRELEASE/source"
 echo "mkdir -p "'$RPM_BUILD_ROOT'"/usr/src/kernels/$KERNELRELEASE"
-echo "EXCLUDES=\"$RCS_TAR_IGNORE --exclude .tmp_versions --exclude=*vmlinux* --exclude=*.o --exclude=*.ko --exclude=*.cmd --exclude=Documentation --exclude=firmware --exclude .config.old --exclude .missing-syscalls.d\""
+echo "EXCLUDES=\"$RCS_TAR_IGNORE --exclude .tmp_versions --exclude=*vmlinux* --exclude=*.o --exclude=*.ko --exclude=*.cmd --exclude=Documentation --exclude .config.old --exclude .missing-syscalls.d\""
 echo "tar "'$EXCLUDES'" -cf- . | (cd "'$RPM_BUILD_ROOT'"/usr/src/kernels/$KERNELRELEASE;tar xvf -)"
 echo 'cd $RPM_BUILD_ROOT'"/lib/modules/$KERNELRELEASE"
 echo "ln -sf /usr/src/kernels/$KERNELRELEASE build"
@@ -154,7 +151,6 @@ echo '%defattr (-, root, root)'
 echo "/lib/modules/$KERNELRELEASE"
 echo "%exclude /lib/modules/$KERNELRELEASE/build"
 echo "%exclude /lib/modules/$KERNELRELEASE/source"
-echo "/lib/firmware/$KERNELRELEASE"
 echo "/boot/*"
 echo ""
 echo "%files headers"

From cc18abbe449aafc013831a8e0440afc336ae1cba Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 18 Sep 2017 18:22:19 +0900
Subject: [PATCH 161/391] kbuild: deb-pkg: remove firmware package support

Commit 5620a0d1aacd ("firmware: delete in-kernel firmware") deleted
in-kernel firmware support, including the firmware install command.

So, the firmware package does not make sense any more.  Remove it.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Reviewed-by: Riku Voipio <riku.voipio@linaro.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 scripts/package/builddeb | 22 +---------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index aad67000e4dd..0bc87473f68f 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -92,12 +92,10 @@ else
 fi
 sourcename=$KDEB_SOURCENAME
 tmpdir="$objtree/debian/tmp"
-fwdir="$objtree/debian/fwtmp"
 kernel_headers_dir="$objtree/debian/hdrtmp"
 libc_headers_dir="$objtree/debian/headertmp"
 dbg_dir="$objtree/debian/dbgtmp"
 packagename=linux-image-$version
-fwpackagename=linux-firmware-image-$version
 kernel_headers_packagename=linux-headers-$version
 libc_headers_packagename=linux-libc-dev
 dbg_packagename=$packagename-dbg
@@ -126,10 +124,9 @@ esac
 BUILD_DEBUG="$(grep -s '^CONFIG_DEBUG_INFO=y' $KCONFIG_CONFIG || true)"
 
 # Setup the directory structure
-rm -rf "$tmpdir" "$fwdir" "$kernel_headers_dir" "$libc_headers_dir" "$dbg_dir" $objtree/debian/files
+rm -rf "$tmpdir" "$kernel_headers_dir" "$libc_headers_dir" "$dbg_dir" $objtree/debian/files
 mkdir -m 755 -p "$tmpdir/DEBIAN"
 mkdir -p "$tmpdir/lib" "$tmpdir/boot"
-mkdir -p "$fwdir/lib/firmware/$version/"
 mkdir -p "$kernel_headers_dir/lib/modules/$version/"
 
 # Build and install the kernel
@@ -306,7 +303,6 @@ else
 	cat <<EOF >> debian/control
 
 Package: $packagename
-Suggests: $fwpackagename
 Architecture: any
 Description: Linux kernel, version $version
  This package contains the Linux kernel, modules and corresponding other
@@ -345,22 +341,6 @@ Description: Linux kernel headers for $KERNELRELEASE on \${kernel:debarch}
  This is useful for people who need to build external modules
 EOF
 
-# Do we have firmware? Move it out of the way and build it into a package.
-if [ -e "$tmpdir/lib/firmware" ]; then
-	mv "$tmpdir/lib/firmware"/* "$fwdir/lib/firmware/$version/"
-	rmdir "$tmpdir/lib/firmware"
-
-	cat <<EOF >> debian/control
-
-Package: $fwpackagename
-Architecture: all
-Description: Linux kernel firmware, version $version
- This package contains firmware from the Linux kernel, version $version.
-EOF
-
-	create_package "$fwpackagename" "$fwdir"
-fi
-
 cat <<EOF >> debian/control
 
 Package: $libc_headers_packagename

From 25b080bd53f2873785fa049f570ac1b361c11d72 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 20 Sep 2017 22:01:26 +0900
Subject: [PATCH 162/391] kbuild: rpm-pkg: fix version number handling

The "Release:" field of the spec file is determined based on the
.version file.

However, the .version file is not copied to the source tar file.
So, when we build the kernel from the source package, the UTS_VERSION
always indicates #1.  This does not match with "rpm -q".

The kernel UTS_VERSION and "rpm -q" do not agree for binrpm-pkg, either.
Please note the kernel has already been built before the spec file is
created.  Currently, mkspec invokes mkversion.  This script returns an
incremented version.  So, the "Release:" field of the spec file is
greater than the version in the kernel by one.

For the source package build (where .version file is missing), we can
give KBUILD_BUILD_VERSION=%{release} to the build command.

For the binary package build, we can simply read out the .version file
because it contains the version number that was used for building the
kernel image.

We can remove scripts/mkversion because scripts/package/Makefile need
not touch the .version file.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/mkversion        | 6 ------
 scripts/package/Makefile | 5 -----
 scripts/package/mkspec   | 6 ++----
 3 files changed, 2 insertions(+), 15 deletions(-)
 delete mode 100644 scripts/mkversion

diff --git a/scripts/mkversion b/scripts/mkversion
deleted file mode 100644
index c12addc9c7ef..000000000000
--- a/scripts/mkversion
+++ /dev/null
@@ -1,6 +0,0 @@
-if [ ! -f .version ]
-then
-    echo 1
-else
-    expr 0`cat .version` + 1
-fi
diff --git a/scripts/package/Makefile b/scripts/package/Makefile
index 71b4a8af9d4d..73f9f3192b9f 100644
--- a/scripts/package/Makefile
+++ b/scripts/package/Makefile
@@ -50,8 +50,6 @@ rpm-pkg rpm: FORCE
 	$(MAKE) clean
 	$(CONFIG_SHELL) $(MKSPEC) >$(objtree)/kernel.spec
 	$(call cmd,src_tar,$(KERNELPATH),kernel.spec)
-	$(CONFIG_SHELL) $(srctree)/scripts/mkversion > $(objtree)/.tmp_version
-	mv -f $(objtree)/.tmp_version $(objtree)/.version
 	rpmbuild $(RPMOPTS) --target $(UTS_MACHINE) -ta $(KERNELPATH).tar.gz
 	rm $(KERNELPATH).tar.gz kernel.spec
 
@@ -60,9 +58,6 @@ rpm-pkg rpm: FORCE
 binrpm-pkg: FORCE
 	$(MAKE) KBUILD_SRC=
 	$(CONFIG_SHELL) $(MKSPEC) prebuilt > $(objtree)/binkernel.spec
-	$(CONFIG_SHELL) $(srctree)/scripts/mkversion > $(objtree)/.tmp_version
-	mv -f $(objtree)/.tmp_version $(objtree)/.version
-
 	rpmbuild $(RPMOPTS) --define "_builddir $(objtree)" --target \
 		$(UTS_MACHINE) -bb $(objtree)/binkernel.spec
 	rm binkernel.spec
diff --git a/scripts/package/mkspec b/scripts/package/mkspec
index bf3e9abb2846..f47f17aae135 100755
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec
@@ -27,9 +27,7 @@ __KERNELRELEASE=`echo $KERNELRELEASE | sed -e "s/-/_/g"`
 echo "Name: kernel"
 echo "Summary: The Linux Kernel"
 echo "Version: $__KERNELRELEASE"
-# we need to determine the NEXT version number so that uname and
-# rpm -q will agree
-echo "Release: `. $srctree/scripts/mkversion`"
+echo "Release: $(cat .version 2>/dev/null || echo 1)"
 echo "License: GPL"
 echo "Group: System Environment/Kernel"
 echo "Vendor: The Linux Community"
@@ -77,7 +75,7 @@ fi
 echo "%build"
 
 if ! $PREBUILT; then
-echo "make clean && make %{?_smp_mflags}"
+echo "make clean && make %{?_smp_mflags} KBUILD_BUILD_VERSION=%{release}"
 echo ""
 fi
 

From 35f3c984548524be5eb4c3f5295cf58d909e8746 Mon Sep 17 00:00:00 2001
From: Frank Rowand <frank.rowand@sony.com>
Date: Mon, 18 Sep 2017 17:18:44 -0700
Subject: [PATCH 163/391] scripts/dtc: dtx_diff - 2nd update of include dts
 paths to match build

Update dtx_diff include paths in the same manner as:
commit b12869a8d519 ("of: remove drivers/of/testcase-data from
include search path for CPP"), commit 5ffa2aed389c ("of: remove
arch/$(SRCARCH)/boot/dts from include search path for CPP"), and
commit 50f9ddaf64e1 ("of: search scripts/dtc/include-prefixes path
for both CPP and DTC").

Remove proposed include path kernel/dts/, which was never implemented
for the dtb build.

For the diff case, each source file is compiled separately.  For
each of those compiles, provide the location of the source file
as an include path, not the location of both source files.

Signed-off-by: Frank Rowand <frank.rowand@sony.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 scripts/dtc/dtx_diff | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/scripts/dtc/dtx_diff b/scripts/dtc/dtx_diff
index f9a3d8d23c64..8c4fbad2055e 100755
--- a/scripts/dtc/dtx_diff
+++ b/scripts/dtc/dtx_diff
@@ -86,6 +86,7 @@ eod
 compile_to_dts() {
 
 	dtx="$1"
+	dtc_include="$2"
 
 	if [ -d "${dtx}" ] ; then
 
@@ -113,7 +114,7 @@ compile_to_dts() {
 		# -----  input is DTS (source)
 
 		if ( cpp ${cpp_flags} -x assembler-with-cpp ${dtx} \
-			| ${DTC} -I dts ) ; then
+			| ${DTC} ${dtc_include} -I dts ) ; then
 			return
 		fi
 
@@ -320,18 +321,13 @@ fi
 
 cpp_flags="\
 	-nostdinc                                  \
-	-I${srctree}/arch/${ARCH}/boot/dts         \
 	-I${srctree}/scripts/dtc/include-prefixes  \
-	-I${srctree}/drivers/of/testcase-data      \
 	-undef -D__DTS__"
 
-dtc_flags="\
-	-i ${srctree}/arch/${ARCH}/boot/dts/ \
-	-i ${srctree}/kernel/dts             \
-	${dtx_path_1_dtc_include}            \
-	${dtx_path_2_dtc_include}"
-
-DTC="${DTC} ${dtc_flags} -O dts -qq -f ${dtc_sort} -o -"
+DTC="\
+	${DTC}                                     \
+	-i ${srctree}/scripts/dtc/include-prefixes \
+	-O dts -qq -f ${dtc_sort} -o -"
 
 
 # -----  do the diff or decompile
@@ -339,11 +335,11 @@ DTC="${DTC} ${dtc_flags} -O dts -qq -f ${dtc_sort} -o -"
 if (( ${cmd_diff} )) ; then
 
 	diff ${diff_flags} --label "${dtx_file_1}" --label "${dtx_file_2}" \
-		<(compile_to_dts "${dtx_file_1}") \
-		<(compile_to_dts "${dtx_file_2}")
+		<(compile_to_dts "${dtx_file_1}" "${dtx_path_1_dtc_include}") \
+		<(compile_to_dts "${dtx_file_2}" "${dtx_path_2_dtc_include}")
 
 else
 
-	compile_to_dts "${dtx_file_1}"
+	compile_to_dts "${dtx_file_1}" "${dtx_path_1_dtc_include}"
 
 fi

From 749aaf3372b8b56b8743c3e27700d64c8bd06921 Mon Sep 17 00:00:00 2001
From: John Keeping <john@metanate.com>
Date: Wed, 20 Sep 2017 13:56:06 -0500
Subject: [PATCH 164/391] PCI: endpoint: Use correct "end of test" interrupt

pci_epf_test_raise_irq() reads the interrupt to use for the response from
reg->command, but this has been cleared at the beginning of the command
handler so the value is always zero at this point.

Instead, extract the interrupt index before handling the command and then
pass the requested interrupt into pci_epf_test_raise_irq().  This allows us
to remove the specific code to extract the interrupt for
COMMAND_RAISE_MSI_IRQ since it is now handled in common code.

Fixes: 3ecf3232c54c ("PCI: endpoint: Do not reset *command* inadvertently")
Signed-off-by: John Keeping <john@metanate.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 drivers/pci/endpoint/functions/pci-epf-test.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c
index 4ddc6e8f9fe7..f9308c2f22e6 100644
--- a/drivers/pci/endpoint/functions/pci-epf-test.c
+++ b/drivers/pci/endpoint/functions/pci-epf-test.c
@@ -251,9 +251,8 @@ static int pci_epf_test_write(struct pci_epf_test *epf_test)
 	return ret;
 }
 
-static void pci_epf_test_raise_irq(struct pci_epf_test *epf_test)
+static void pci_epf_test_raise_irq(struct pci_epf_test *epf_test, u8 irq)
 {
-	u8 irq;
 	u8 msi_count;
 	struct pci_epf *epf = epf_test->epf;
 	struct pci_epc *epc = epf->epc;
@@ -262,7 +261,6 @@ static void pci_epf_test_raise_irq(struct pci_epf_test *epf_test)
 
 	reg->status |= STATUS_IRQ_RAISED;
 	msi_count = pci_epc_get_msi(epc);
-	irq = (reg->command & MSI_NUMBER_MASK) >> MSI_NUMBER_SHIFT;
 	if (irq > msi_count || msi_count <= 0)
 		pci_epc_raise_irq(epc, PCI_EPC_IRQ_LEGACY, 0);
 	else
@@ -289,6 +287,8 @@ static void pci_epf_test_cmd_handler(struct work_struct *work)
 	reg->command = 0;
 	reg->status = 0;
 
+	irq = (command & MSI_NUMBER_MASK) >> MSI_NUMBER_SHIFT;
+
 	if (command & COMMAND_RAISE_LEGACY_IRQ) {
 		reg->status = STATUS_IRQ_RAISED;
 		pci_epc_raise_irq(epc, PCI_EPC_IRQ_LEGACY, 0);
@@ -301,7 +301,7 @@ static void pci_epf_test_cmd_handler(struct work_struct *work)
 			reg->status |= STATUS_WRITE_FAIL;
 		else
 			reg->status |= STATUS_WRITE_SUCCESS;
-		pci_epf_test_raise_irq(epf_test);
+		pci_epf_test_raise_irq(epf_test, irq);
 		goto reset_handler;
 	}
 
@@ -311,7 +311,7 @@ static void pci_epf_test_cmd_handler(struct work_struct *work)
 			reg->status |= STATUS_READ_SUCCESS;
 		else
 			reg->status |= STATUS_READ_FAIL;
-		pci_epf_test_raise_irq(epf_test);
+		pci_epf_test_raise_irq(epf_test, irq);
 		goto reset_handler;
 	}
 
@@ -321,13 +321,12 @@ static void pci_epf_test_cmd_handler(struct work_struct *work)
 			reg->status |= STATUS_COPY_SUCCESS;
 		else
 			reg->status |= STATUS_COPY_FAIL;
-		pci_epf_test_raise_irq(epf_test);
+		pci_epf_test_raise_irq(epf_test, irq);
 		goto reset_handler;
 	}
 
 	if (command & COMMAND_RAISE_MSI_IRQ) {
 		msi_count = pci_epc_get_msi(epc);
-		irq = (command & MSI_NUMBER_MASK) >> MSI_NUMBER_SHIFT;
 		if (irq > msi_count || msi_count <= 0)
 			goto reset_handler;
 		reg->status = STATUS_IRQ_RAISED;

From 008ba2a13f2d04c947adc536d19debb8fe66f110 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 14 Sep 2017 17:14:41 -0400
Subject: [PATCH 165/391] packet: hold bind lock when rebinding to fanout hook

Packet socket bind operations must hold the po->bind_lock. This keeps
po->running consistent with whether the socket is actually on a ptype
list to receive packets.

fanout_add unbinds a socket and its packet_rcv/tpacket_rcv call, then
binds the fanout object to receive through packet_rcv_fanout.

Make it hold the po->bind_lock when testing po->running and rebinding.
Else, it can race with other rebind operations, such as that in
packet_set_ring from packet_rcv to tpacket_rcv. Concurrent updates
can result in a socket being added to a fanout group twice, causing
use-after-free KASAN bug reports, among others.

Reported independently by both trinity and syzkaller.
Verified that the syzkaller reproducer passes after this patch.

Fixes: dc99f600698d ("packet: Add fanout support.")
Reported-by: nixioaming <nixiaoming@huawei.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index c26172995511..d288f52c53f7 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1684,10 +1684,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 
 	mutex_lock(&fanout_mutex);
 
-	err = -EINVAL;
-	if (!po->running)
-		goto out;
-
 	err = -EALREADY;
 	if (po->fanout)
 		goto out;
@@ -1749,7 +1745,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 		list_add(&match->list, &fanout_list);
 	}
 	err = -EINVAL;
-	if (match->type == type &&
+
+	spin_lock(&po->bind_lock);
+	if (po->running &&
+	    match->type == type &&
 	    match->prot_hook.type == po->prot_hook.type &&
 	    match->prot_hook.dev == po->prot_hook.dev) {
 		err = -ENOSPC;
@@ -1761,6 +1760,13 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 			err = 0;
 		}
 	}
+	spin_unlock(&po->bind_lock);
+
+	if (err && !refcount_read(&match->sk_ref)) {
+		list_del(&match->list);
+		kfree(match);
+	}
+
 out:
 	if (err && rollover) {
 		kfree(rollover);

From ec9dd352d591f0c90402ec67a317c1ed4fb2e638 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 18 Sep 2017 16:38:36 -0700
Subject: [PATCH 166/391] bpf: one perf event close won't free bpf program
 attached by another perf event

This patch fixes a bug exhibited by the following scenario:
  1. fd1 = perf_event_open with attr.config = ID1
  2. attach bpf program prog1 to fd1
  3. fd2 = perf_event_open with attr.config = ID1
     <this will be successful>
  4. user program closes fd2 and prog1 is detached from the tracepoint.
  5. user program with fd1 does not work properly as tracepoint
     no output any more.

The issue happens at step 4. Multiple perf_event_open can be called
successfully, but only one bpf prog pointer in the tp_event. In the
current logic, any fd release for the same tp_event will free
the tp_event->prog.

The fix is to free tp_event->prog only when the closing fd
corresponds to the one which registered the program.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/trace_events.h | 1 +
 kernel/events/core.c         | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 7f11050746ae..2e0f22298fe9 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -272,6 +272,7 @@ struct trace_event_call {
 	int				perf_refcount;
 	struct hlist_head __percpu	*perf_events;
 	struct bpf_prog			*prog;
+	struct perf_event		*bpf_prog_owner;
 
 	int	(*perf_perm)(struct trace_event_call *,
 			     struct perf_event *);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3e691b75b2db..6bc21e202ae4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8171,6 +8171,7 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
 		}
 	}
 	event->tp_event->prog = prog;
+	event->tp_event->bpf_prog_owner = event;
 
 	return 0;
 }
@@ -8185,7 +8186,7 @@ static void perf_event_free_bpf_prog(struct perf_event *event)
 		return;
 
 	prog = event->tp_event->prog;
-	if (prog) {
+	if (prog && event->tp_event->bpf_prog_owner == event) {
 		event->tp_event->prog = NULL;
 		bpf_prog_put(prog);
 	}

From 1fa089ec6d68849bfe60abd141375dc9b21b0ee8 Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Wed, 20 Sep 2017 16:46:49 -0500
Subject: [PATCH 167/391] [SMB3] Update session and share information displayed
 for debugging SMB2/SMB3

We were not displaying some key fields (session status and capabilities and
whether guest authenticated) for SMB2/SMB3 session in /proc/fs/cifs/DebugData.

This is needed for real world triage of problems with the (now much more
common) SMB3 mounts.

Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifs_debug.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 9727e1dcacd5..cbb9534b89b4 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -160,8 +160,13 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
 			if ((ses->serverDomain == NULL) ||
 				(ses->serverOS == NULL) ||
 				(ses->serverNOS == NULL)) {
-				seq_printf(m, "\n%d) entry for %s not fully "
-					   "displayed\n\t", i, ses->serverName);
+				seq_printf(m, "\n%d) Name: %s Uses: %d Capability: 0x%x\tSession Status: %d\t",
+					i, ses->serverName, ses->ses_count,
+					ses->capabilities, ses->status);
+				if (ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST)
+					seq_printf(m, "Guest\t");
+				else if (ses->session_flags & SMB2_SESSION_FLAG_IS_NULL)
+					seq_printf(m, "Anonymous\t");
 			} else {
 				seq_printf(m,
 				    "\n%d) Name: %s  Domain: %s Uses: %d OS:"

From c2a64bb9fcd31c39feddf30748b4ee8d82e53c6a Mon Sep 17 00:00:00 2001
From: Meng Xu <mengxu.gatech@gmail.com>
Date: Tue, 19 Sep 2017 13:19:13 -0400
Subject: [PATCH 168/391] net: compat: assert the size of cmsg copied in is as
 expected

The actual length of cmsg fetched in during the second loop
(i.e., kcmsg - kcmsg_base) could be different from what we
get from the first loop (i.e., kcmlen).

The main reason is that the two get_user() calls in the two
loops (i.e., get_user(ucmlen, &ucmsg->cmsg_len) and
__get_user(ucmlen, &ucmsg->cmsg_len)) could cause ucmlen
to have different values even they fetch from the same userspace
address, as user can race to change the memory content in
&ucmsg->cmsg_len across fetches.

Although in the second loop, the sanity check
if ((char *)kcmsg_base + kcmlen - (char *)kcmsg < CMSG_ALIGN(tmp))
is inplace, it only ensures that the cmsg fetched in during the
second loop does not exceed the length of kcmlen, but not
necessarily equal to kcmlen. But indicated by the assignment
kmsg->msg_controllen = kcmlen, we should enforce that.

This patch adds this additional sanity check and ensures that
what is recorded in kmsg->msg_controllen is the actual cmsg length.

Signed-off-by: Meng Xu <mengxu.gatech@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/compat.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/net/compat.c b/net/compat.c
index 6ded6c821d7a..22381719718c 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -185,6 +185,13 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
 		ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen);
 	}
 
+	/*
+	 * check the length of messages copied in is the same as the
+	 * what we get from the first loop
+	 */
+	if ((char *)kcmsg - (char *)kcmsg_base != kcmlen)
+		goto Einval;
+
 	/* Ok, looks like we made it.  Hook it up and return success. */
 	kmsg->msg_control = kcmsg_base;
 	kmsg->msg_controllen = kcmlen;

From 92dd5452c1be873a1193561f4f691763103d22ac Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Tue, 19 Sep 2017 18:45:56 +0100
Subject: [PATCH 169/391] net: change skb->mac_header when Generic XDP calls
 adjust_head

Since XDP's view of the packet includes the MAC header, moving the start-
 of-packet with bpf_xdp_adjust_head needs to also update the offset of the
 MAC header (which is relative to skb->head, not to the skb->data that was
 changed).
Without this, tcpdump sees packets starting from the old MAC header rather
 than the new one, at least in my tests on the loopback device.

Fixes: b5cdae3291f7 ("net: Generic XDP")
Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/dev.c b/net/core/dev.c
index fb766d906148..9a2254f9802f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3892,6 +3892,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 		__skb_pull(skb, off);
 	else if (off < 0)
 		__skb_push(skb, -off);
+	skb->mac_header += off;
 
 	switch (act) {
 	case XDP_REDIRECT:

From 5e62d98c4bbc243f3dca18e73a754b629839fc5c Mon Sep 17 00:00:00 2001
From: Troy Kisky <troy.kisky@boundarydevices.com>
Date: Tue, 19 Sep 2017 17:33:07 -0700
Subject: [PATCH 170/391] net: fec: only check queue 0 if RXF_0/TXF_0 interrupt
 is set

Before queue 0 was always checked if any queue caused an interrupt.
It is better to just mark queue 0 if queue 0 has caused an interrupt.

Signed-off-by: Troy Kisky <troy.kisky@boundarydevices.com>
Acked-by: Fugang Duan <Fugang.duan@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 56f56d6ada9c..464055fb33d5 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1559,14 +1559,14 @@ fec_enet_collect_events(struct fec_enet_private *fep, uint int_events)
 	if (int_events == 0)
 		return false;
 
-	if (int_events & FEC_ENET_RXF)
+	if (int_events & FEC_ENET_RXF_0)
 		fep->work_rx |= (1 << 2);
 	if (int_events & FEC_ENET_RXF_1)
 		fep->work_rx |= (1 << 0);
 	if (int_events & FEC_ENET_RXF_2)
 		fep->work_rx |= (1 << 1);
 
-	if (int_events & FEC_ENET_TXF)
+	if (int_events & FEC_ENET_TXF_0)
 		fep->work_tx |= (1 << 2);
 	if (int_events & FEC_ENET_TXF_1)
 		fep->work_tx |= (1 << 0);

From 7063c163cd4a20184b3bbada503dab8f254a8c56 Mon Sep 17 00:00:00 2001
From: Troy Kisky <troy.kisky@boundarydevices.com>
Date: Tue, 19 Sep 2017 17:33:08 -0700
Subject: [PATCH 171/391] net: fec: remove unused interrupt FEC_ENET_TS_TIMER

FEC_ENET_TS_TIMER is not checked in the interrupt routine
so there is no need to enable it.

Signed-off-by: Troy Kisky <troy.kisky@boundarydevices.com>
Acked-by: Fugang Duan <fugang.duan@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 38c7b21e5d63..ede1876a9a19 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -374,8 +374,8 @@ struct bufdesc_ex {
 #define FEC_ENET_TS_AVAIL       ((uint)0x00010000)
 #define FEC_ENET_TS_TIMER       ((uint)0x00008000)
 
-#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII | FEC_ENET_TS_TIMER)
-#define FEC_NAPI_IMASK	(FEC_ENET_MII | FEC_ENET_TS_TIMER)
+#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII)
+#define FEC_NAPI_IMASK	FEC_ENET_MII
 #define FEC_RX_DISABLED_IMASK (FEC_DEFAULT_IMASK & (~FEC_ENET_RXF))
 
 /* ENET interrupt coalescing macro define */

From e24ee2780a1d6786b94139ebf95b44c9ae62bf13 Mon Sep 17 00:00:00 2001
From: Troy Kisky <troy.kisky@boundarydevices.com>
Date: Tue, 19 Sep 2017 17:33:09 -0700
Subject: [PATCH 172/391] net: fec: return IRQ_HANDLED if
 fec_ptp_check_pps_event handled it

fec_ptp_check_pps_event will return 1 if FEC_T_TF_MASK caused
an interrupt. Don't return IRQ_NONE in this case.

Signed-off-by: Troy Kisky <troy.kisky@boundarydevices.com>
Acked-by: Fugang Duan <fugang.duan@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 464055fb33d5..3dc2d771a222 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1604,8 +1604,8 @@ fec_enet_interrupt(int irq, void *dev_id)
 	}
 
 	if (fep->ptp_clock)
-		fec_ptp_check_pps_event(fep);
-
+		if (fec_ptp_check_pps_event(fep))
+			ret = IRQ_HANDLED;
 	return ret;
 }
 

From 02388bf87f72e1d47174cd8f81c34443920eb5a0 Mon Sep 17 00:00:00 2001
From: Meng Xu <mengxu.gatech@gmail.com>
Date: Tue, 19 Sep 2017 21:49:55 -0400
Subject: [PATCH 173/391] isdn/i4l: fetch the ppp_write buffer in one shot

In isdn_ppp_write(), the header (i.e., protobuf) of the buffer is
fetched twice from userspace. The first fetch is used to peek at the
protocol of the message and reset the huptimer if necessary; while the
second fetch copies in the whole buffer. However, given that buf resides
in userspace memory, a user process can race to change its memory content
across fetches. By doing so, we can either avoid resetting the huptimer
for any type of packets (by first setting proto to PPP_LCP and later
change to the actual type) or force resetting the huptimer for LCP
packets.

This patch changes this double-fetch behavior into two single fetches
decided by condition (lp->isdn_device < 0 || lp->isdn_channel <0).
A more detailed discussion can be found at
https://marc.info/?l=linux-kernel&m=150586376926123&w=2

Signed-off-by: Meng Xu <mengxu.gatech@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/i4l/isdn_ppp.c | 37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index 6c44609fd83a..cd2b3c69771a 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -825,7 +825,6 @@ isdn_ppp_write(int min, struct file *file, const char __user *buf, int count)
 	isdn_net_local *lp;
 	struct ippp_struct *is;
 	int proto;
-	unsigned char protobuf[4];
 
 	is = file->private_data;
 
@@ -839,24 +838,28 @@ isdn_ppp_write(int min, struct file *file, const char __user *buf, int count)
 	if (!lp)
 		printk(KERN_DEBUG "isdn_ppp_write: lp == NULL\n");
 	else {
-		/*
-		 * Don't reset huptimer for
-		 * LCP packets. (Echo requests).
-		 */
-		if (copy_from_user(protobuf, buf, 4))
-			return -EFAULT;
-		proto = PPP_PROTOCOL(protobuf);
-		if (proto != PPP_LCP)
-			lp->huptimer = 0;
+		if (lp->isdn_device < 0 || lp->isdn_channel < 0) {
+			unsigned char protobuf[4];
+			/*
+			 * Don't reset huptimer for
+			 * LCP packets. (Echo requests).
+			 */
+			if (copy_from_user(protobuf, buf, 4))
+				return -EFAULT;
+
+			proto = PPP_PROTOCOL(protobuf);
+			if (proto != PPP_LCP)
+				lp->huptimer = 0;
 
-		if (lp->isdn_device < 0 || lp->isdn_channel < 0)
 			return 0;
+		}
 
 		if ((dev->drv[lp->isdn_device]->flags & DRV_FLAG_RUNNING) &&
 		    lp->dialstate == 0 &&
 		    (lp->flags & ISDN_NET_CONNECTED)) {
 			unsigned short hl;
 			struct sk_buff *skb;
+			unsigned char *cpy_buf;
 			/*
 			 * we need to reserve enough space in front of
 			 * sk_buff. old call to dev_alloc_skb only reserved
@@ -869,11 +872,21 @@ isdn_ppp_write(int min, struct file *file, const char __user *buf, int count)
 				return count;
 			}
 			skb_reserve(skb, hl);
-			if (copy_from_user(skb_put(skb, count), buf, count))
+			cpy_buf = skb_put(skb, count);
+			if (copy_from_user(cpy_buf, buf, count))
 			{
 				kfree_skb(skb);
 				return -EFAULT;
 			}
+
+			/*
+			 * Don't reset huptimer for
+			 * LCP packets. (Echo requests).
+			 */
+			proto = PPP_PROTOCOL(cpy_buf);
+			if (proto != PPP_LCP)
+				lp->huptimer = 0;
+
 			if (is->debug & 0x40) {
 				printk(KERN_DEBUG "ppp xmit: len %d\n", (int) skb->len);
 				isdn_ppp_frame_log("xmit", skb->data, skb->len, 32, is->unit, lp->ppp_slot);

From e92a0843795779678397ac0790a76de20f79cc13 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 20 Sep 2017 18:52:50 +0800
Subject: [PATCH 174/391] net: hns3: Cleanup for ROCE capability flag in ae_dev

This patch add the ROCE supported flag in the driver_data
field of pci_device_id, delete roce_pci_tbl and change
HNAE_DEV_SUPPORT_ROCE_B to HNAE3_DEV_SUPPORT_ROCE_B.
This cleanup is done in order to support adding capability
in pci_device_id and to fix initialization failure when
cmd is not supported.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  5 +++-
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 25 +++----------------
 .../hisilicon/hns3/hns3pf/hns3_enet.c         | 16 ++++++++----
 3 files changed, 19 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index b2f28ae81273..0f7b61a92f44 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -49,7 +49,10 @@
 #define HNAE3_CLASS_NAME_SIZE 16
 
 #define HNAE3_DEV_INITED_B			0x0
-#define HNAE_DEV_SUPPORT_ROCE_B			0x1
+#define HNAE3_DEV_SUPPORT_ROCE_B		0x1
+
+#define hnae3_dev_roce_supported(hdev) \
+	hnae_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B)
 
 #define ring_ptr_move_fw(ring, p) \
 	((ring)->p = ((ring)->p + 1) % (ring)->desc_num)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 74008ef23169..6953d19c6475 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -46,17 +46,7 @@ static const struct pci_device_id ae_algo_pci_tbl[] = {
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0},
-	/* Required last entry */
-	{0, }
-};
-
-static const struct pci_device_id roce_pci_tbl[] = {
-	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), 0},
-	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), 0},
-	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0},
-	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0},
-	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0},
-	/* Required last entry */
+	/* required last entry */
 	{0, }
 };
 
@@ -894,7 +884,7 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev)
 	hdev->num_tqps = __le16_to_cpu(req->tqp_num);
 	hdev->pkt_buf_size = __le16_to_cpu(req->buf_size) << HCLGE_BUF_UNIT_S;
 
-	if (hnae_get_bit(hdev->ae_dev->flag, HNAE_DEV_SUPPORT_ROCE_B)) {
+	if (hnae3_dev_roce_supported(hdev)) {
 		hdev->num_roce_msix =
 		hnae_get_field(__le16_to_cpu(req->pf_intr_vector_number),
 			       HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
@@ -3932,8 +3922,7 @@ static int hclge_init_client_instance(struct hnae3_client *client,
 				goto err;
 
 			if (hdev->roce_client &&
-			    hnae_get_bit(hdev->ae_dev->flag,
-					 HNAE_DEV_SUPPORT_ROCE_B)) {
+			    hnae3_dev_roce_supported(hdev)) {
 				struct hnae3_client *rc = hdev->roce_client;
 
 				ret = hclge_init_roce_base_info(vport);
@@ -3956,8 +3945,7 @@ static int hclge_init_client_instance(struct hnae3_client *client,
 
 			break;
 		case HNAE3_CLIENT_ROCE:
-			if (hnae_get_bit(hdev->ae_dev->flag,
-					 HNAE_DEV_SUPPORT_ROCE_B)) {
+			if (hnae3_dev_roce_supported(hdev)) {
 				hdev->roce_client = client;
 				vport->roce.client = client;
 			}
@@ -4069,7 +4057,6 @@ static void hclge_pci_uninit(struct hclge_dev *hdev)
 static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 {
 	struct pci_dev *pdev = ae_dev->pdev;
-	const struct pci_device_id *id;
 	struct hclge_dev *hdev;
 	int ret;
 
@@ -4084,10 +4071,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 	hdev->ae_dev = ae_dev;
 	ae_dev->priv = hdev;
 
-	id = pci_match_id(roce_pci_tbl, ae_dev->pdev);
-	if (id)
-		hnae_set_bit(ae_dev->flag, HNAE_DEV_SUPPORT_ROCE_B, 1);
-
 	ret = hclge_pci_init(hdev);
 	if (ret) {
 		dev_err(&pdev->dev, "PCI init failed\n");
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
index 4d68d6ea5143..94d8bb5b92f0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
@@ -41,11 +41,16 @@ static struct hnae3_client client;
 static const struct pci_device_id hns3_pci_tbl[] = {
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_GE), 0},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE), 0},
-	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), 0},
-	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), 0},
-	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0},
-	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0},
-	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA),
+	 BIT(HNAE3_DEV_SUPPORT_ROCE_B)},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC),
+	 BIT(HNAE3_DEV_SUPPORT_ROCE_B)},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA),
+	 BIT(HNAE3_DEV_SUPPORT_ROCE_B)},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC),
+	 BIT(HNAE3_DEV_SUPPORT_ROCE_B)},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC),
+	 BIT(HNAE3_DEV_SUPPORT_ROCE_B)},
 	/* required last entry */
 	{0, }
 };
@@ -1348,6 +1353,7 @@ static int hns3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 
 	ae_dev->pdev = pdev;
+	ae_dev->flag = ent->driver_data;
 	ae_dev->dev_type = HNAE3_DEV_KNIC;
 	pci_set_drvdata(pdev, ae_dev);
 

From 2daf4a6536f3109ed0ed758cec14743e0e5c20ea Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 20 Sep 2017 18:52:51 +0800
Subject: [PATCH 175/391] net: hns3: Fix initialization when cmd is not
 supported

When ae_dev doesn't support DCB, rx_priv_wl_config,
common_thrd_config and tm_qs_bp_cfg can't be called, otherwise
cmd return fail, which causes the hclge module initialization
process to fail.
This patch fix it by adding a DCB capability flag to check if
the ae_dev support DCB.

Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  7 +++++
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 26 +++++++++++--------
 .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.c |  4 +++
 .../hisilicon/hns3/hns3pf/hns3_enet.c         | 10 +++----
 4 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 0f7b61a92f44..ad685f5aa6d1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -50,10 +50,17 @@
 
 #define HNAE3_DEV_INITED_B			0x0
 #define HNAE3_DEV_SUPPORT_ROCE_B		0x1
+#define HNAE3_DEV_SUPPORT_DCB_B			0x2
+
+#define HNAE3_DEV_SUPPORT_ROCE_DCB_BITS (BIT(HNAE3_DEV_SUPPORT_DCB_B) |\
+		BIT(HNAE3_DEV_SUPPORT_ROCE_B))
 
 #define hnae3_dev_roce_supported(hdev) \
 	hnae_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B)
 
+#define hnae3_dev_dcb_supported(hdev) \
+	hnae_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_DCB_B)
+
 #define ring_ptr_move_fw(ring, p) \
 	((ring)->p = ((ring)->p + 1) % (ring)->desc_num)
 #define ring_ptr_move_bw(ring, p) \
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 6953d19c6475..903f43a8c2a1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1772,18 +1772,22 @@ int hclge_buffer_alloc(struct hclge_dev *hdev)
 		return ret;
 	}
 
-	ret = hclge_rx_priv_wl_config(hdev);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"could not configure rx private waterline %d\n", ret);
-		return ret;
-	}
+	if (hnae3_dev_dcb_supported(hdev)) {
+		ret = hclge_rx_priv_wl_config(hdev);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"could not configure rx private waterline %d\n",
+				ret);
+			return ret;
+		}
 
-	ret = hclge_common_thrd_config(hdev);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"could not configure common threshold %d\n", ret);
-		return ret;
+		ret = hclge_common_thrd_config(hdev);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"could not configure common threshold %d\n",
+				ret);
+			return ret;
+		}
 	}
 
 	ret = hclge_common_wl_config(hdev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 1c577d268f00..c91dbf19c4b1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -976,6 +976,10 @@ int hclge_pause_setup_hw(struct hclge_dev *hdev)
 	if (ret)
 		return ret;
 
+	/* Only DCB-supported dev supports qset back pressure setting */
+	if (!hnae3_dev_dcb_supported(hdev))
+		return 0;
+
 	for (i = 0; i < hdev->tm_info.num_tc; i++) {
 		ret = hclge_tm_qs_bp_cfg(hdev, i);
 		if (ret)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
index 94d8bb5b92f0..35369e1c8036 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
@@ -42,15 +42,15 @@ static const struct pci_device_id hns3_pci_tbl[] = {
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_GE), 0},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE), 0},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA),
-	 BIT(HNAE3_DEV_SUPPORT_ROCE_B)},
+	 HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC),
-	 BIT(HNAE3_DEV_SUPPORT_ROCE_B)},
+	 HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA),
-	 BIT(HNAE3_DEV_SUPPORT_ROCE_B)},
+	 HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC),
-	 BIT(HNAE3_DEV_SUPPORT_ROCE_B)},
+	 HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC),
-	 BIT(HNAE3_DEV_SUPPORT_ROCE_B)},
+	 HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
 	/* required last entry */
 	{0, }
 };

From d221df4e0faae2b9cc8ad78f3e5e777461b6b542 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 20 Sep 2017 18:52:52 +0800
Subject: [PATCH 176/391] net: hns3: Fix for DEFAULT_DV when dev doesn't
 support DCB

When ae_dev doesn't support DCB, DEFAULT_DV must be set to
a lower value, otherwise the buffer allocation process will
fail.
This patch fix it by setting it to 30K bytes.

Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h  | 1 +
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index c2b613b40509..30e2ad5ac0da 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -688,6 +688,7 @@ struct hclge_reset_tqp_queue {
 #define HCLGE_DEFAULT_TX_BUF		0x4000	 /* 16k  bytes */
 #define HCLGE_TOTAL_PKT_BUF		0x108000 /* 1.03125M bytes */
 #define HCLGE_DEFAULT_DV		0xA000	 /* 40k byte */
+#define HCLGE_DEFAULT_NON_DCB_DV	0x7800	/* 30K byte */
 
 #define HCLGE_TYPE_CRQ			0
 #define HCLGE_TYPE_CSQ			1
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 903f43a8c2a1..796370adf99c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1444,7 +1444,11 @@ static bool  hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all)
 	tc_num = hclge_get_tc_num(hdev);
 	pfc_enable_num = hclge_get_pfc_enalbe_num(hdev);
 
-	shared_buf_min = 2 * hdev->mps + HCLGE_DEFAULT_DV;
+	if (hnae3_dev_dcb_supported(hdev))
+		shared_buf_min = 2 * hdev->mps + HCLGE_DEFAULT_DV;
+	else
+		shared_buf_min = 2 * hdev->mps + HCLGE_DEFAULT_NON_DCB_DV;
+
 	shared_buf_tc = pfc_enable_num * hdev->mps +
 			(tc_num - pfc_enable_num) * hdev->mps / 2 +
 			hdev->mps;

From bb1fe9ea6371e075d3d1448cd3ff6441d31307be Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 20 Sep 2017 18:52:53 +0800
Subject: [PATCH 177/391] net: hns3: Fix for not setting rx private buffer size
 to zero

When rx private buffer is disabled, there may be some case that
the rx private buffer is not set to zero, which may cause buffer
allocation process to fail.
This patch fixes this problem by setting priv->enable to 0 and
priv->buf_size to zero when rx private buffer is disabled.

Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c  | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 796370adf99c..a7d8fb1e15f6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1504,6 +1504,11 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
 				priv->wl.high = 2 * hdev->mps;
 				priv->buf_size = priv->wl.high;
 			}
+		} else {
+			priv->enable = 0;
+			priv->wl.low = 0;
+			priv->wl.high = 0;
+			priv->buf_size = 0;
 		}
 	}
 
@@ -1516,8 +1521,15 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
 		priv = &hdev->priv_buf[i];
 
-		if (hdev->hw_tc_map & BIT(i))
-			priv->enable = 1;
+		priv->enable = 0;
+		priv->wl.low = 0;
+		priv->wl.high = 0;
+		priv->buf_size = 0;
+
+		if (!(hdev->hw_tc_map & BIT(i)))
+			continue;
+
+		priv->enable = 1;
 
 		if (hdev->tm_info.hw_pfc_map & BIT(i)) {
 			priv->wl.low = 128;

From b8c8bf47da5576657370798da6f18a8cb0245d5b Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 20 Sep 2017 18:52:54 +0800
Subject: [PATCH 178/391] net: hns3: Fix for rx_priv_buf_alloc not setting rx
 shared buffer

rx_priv_buf_alloc is used to tell hardware how much buffer is
used for rx direction, right now only the private buffer is
assigned.
For ae_dev that doesn't support DCB, private rx buffer is assigned
to zero, only shared rx buffer is used. So not setting the shared
rx buffer cause dropping of packet in SSU.

Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h  | 3 ++-
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 4 ++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 30e2ad5ac0da..758cf3948131 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -270,7 +270,8 @@ struct hclge_tx_buff_alloc {
 
 struct hclge_rx_priv_buff {
 	__le16 buf_num[HCLGE_TC_NUM];
-	u8 rsv[8];
+	__le16 shared_buf;
+	u8 rsv[6];
 };
 
 struct hclge_query_version {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index a7d8fb1e15f6..e313552bb23d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1622,6 +1622,10 @@ static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev)
 			cpu_to_le16(true << HCLGE_TC0_PRI_BUF_EN_B);
 	}
 
+	req->shared_buf =
+		cpu_to_le16((hdev->s_buf.buf_size >> HCLGE_BUF_UNIT_S) |
+			    (1 << HCLGE_TC0_PRI_BUF_EN_B));
+
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,

From d602a52540c9b92e0dd152cfe1d0848c23f08894 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 20 Sep 2017 18:52:55 +0800
Subject: [PATCH 179/391] net: hns3: Fix for rx priv buf allocation when DCB is
 not supported

When hdev doesn't support DCB, rx private buffer is not allocated,
otherwise there is not enough buffer for rx shared buffer, causing
buffer allocation process to fail.
This patch fixes by checking the dcb capability in
hclge_rx_buffer_calc.

Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index e313552bb23d..c660f0caf709 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1489,6 +1489,16 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
 	struct hclge_priv_buf *priv;
 	int i;
 
+	/* When DCB is not supported, rx private
+	 * buffer is not allocated.
+	 */
+	if (!hnae3_dev_dcb_supported(hdev)) {
+		if (!hclge_is_rx_buf_ok(hdev, rx_all))
+			return -ENOMEM;
+
+		return 0;
+	}
+
 	/* step 1, try to alloc private buffer for all enabled tc */
 	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
 		priv = &hdev->priv_buf[i];

From c4726338d928c824f56c27734d837b8244132705 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 20 Sep 2017 18:52:56 +0800
Subject: [PATCH 180/391] net: hns3: Fix typo error for feild in hclge_tm

This patch fixes a typo error for feild, which should be field.

Fixes: 848440544b41f ("net: hns3: Add support of TX Scheduler & Shaper to HNS3 driver")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 20 +++++++++----------
 .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.h |  4 ++--
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index c91dbf19c4b1..fe659f752237 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -280,11 +280,11 @@ static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev,
 
 	shap_cfg_cmd->pg_id = pg_id;
 
-	hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, IR_B, ir_b);
-	hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, IR_U, ir_u);
-	hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, IR_S, ir_s);
-	hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, BS_B, bs_b);
-	hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, BS_S, bs_s);
+	hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, IR_B, ir_b);
+	hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, IR_U, ir_u);
+	hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, IR_S, ir_s);
+	hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, BS_B, bs_b);
+	hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, BS_S, bs_s);
 
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
@@ -307,11 +307,11 @@ static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev,
 
 	shap_cfg_cmd->pri_id = pri_id;
 
-	hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, IR_B, ir_b);
-	hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, IR_U, ir_u);
-	hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, IR_S, ir_s);
-	hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, BS_B, bs_b);
-	hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, BS_S, bs_s);
+	hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, IR_B, ir_b);
+	hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, IR_U, ir_u);
+	hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, IR_S, ir_s);
+	hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, BS_B, bs_b);
+	hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, BS_S, bs_s);
 
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index 7e67337dfaf2..85158b0d73fe 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -94,10 +94,10 @@ struct hclge_bp_to_qs_map_cmd {
 	u32 rsvd1;
 };
 
-#define hclge_tm_set_feild(dest, string, val) \
+#define hclge_tm_set_field(dest, string, val) \
 			hnae_set_field((dest), (HCLGE_TM_SHAP_##string##_MSK), \
 				       (HCLGE_TM_SHAP_##string##_LSH), val)
-#define hclge_tm_get_feild(src, string) \
+#define hclge_tm_get_field(src, string) \
 			hnae_get_field((src), (HCLGE_TM_SHAP_##string##_MSK), \
 				       (HCLGE_TM_SHAP_##string##_LSH))
 

From 68ece54efd417d415462adbaa2700cba50de3ff6 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 20 Sep 2017 18:52:57 +0800
Subject: [PATCH 181/391] net: hns3: Fix for setting rss_size incorrectly

rss_size is 1, 2, 4, 8, 16, 32, 64, 128, but acutal tc queue
size can be any u16 less than 128. If tc queue size is 5, we
set the rss_size to 8, indirection table will be used to limit
the size of actual queue size.
It may cause dropping of receiving packet in hardware if
rss_size is not set correctly.
For now, each TC has the same rss size.

Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 80 +++++++++----------
 .../hisilicon/hns3/hns3pf/hclge_main.h        |  1 +
 .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.c |  1 +
 3 files changed, 40 insertions(+), 42 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index c660f0caf709..e0685e630afe 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2606,6 +2606,7 @@ static int hclge_rss_init_hw(struct hclge_dev *hdev)
 	u16 tc_valid[HCLGE_MAX_TC_NUM];
 	u16 tc_size[HCLGE_MAX_TC_NUM];
 	u32 *rss_indir = NULL;
+	u16 rss_size = 0, roundup_size;
 	const u8 *key;
 	int i, ret, j;
 
@@ -2620,7 +2621,13 @@ static int hclge_rss_init_hw(struct hclge_dev *hdev)
 	for (j = 0; j < hdev->num_vmdq_vport + 1; j++) {
 		for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++) {
 			vport[j].rss_indirection_tbl[i] =
-				i % hdev->rss_size_max;
+				i % vport[j].alloc_rss_size;
+
+			/* vport 0 is for PF */
+			if (j != 0)
+				continue;
+
+			rss_size = vport[j].alloc_rss_size;
 			rss_indir[i] = vport[j].rss_indirection_tbl[i];
 		}
 	}
@@ -2637,42 +2644,31 @@ static int hclge_rss_init_hw(struct hclge_dev *hdev)
 	if (ret)
 		goto err;
 
-	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
-		if (hdev->hw_tc_map & BIT(i))
-			tc_valid[i] = 1;
-		else
-			tc_valid[i] = 0;
-
-		switch (hdev->rss_size_max) {
-		case HCLGE_RSS_TC_SIZE_0:
-			tc_size[i] = 0;
-			break;
-		case HCLGE_RSS_TC_SIZE_1:
-			tc_size[i] = 1;
-			break;
-		case HCLGE_RSS_TC_SIZE_2:
-			tc_size[i] = 2;
-			break;
-		case HCLGE_RSS_TC_SIZE_3:
-			tc_size[i] = 3;
-			break;
-		case HCLGE_RSS_TC_SIZE_4:
-			tc_size[i] = 4;
-			break;
-		case HCLGE_RSS_TC_SIZE_5:
-			tc_size[i] = 5;
-			break;
-		case HCLGE_RSS_TC_SIZE_6:
-			tc_size[i] = 6;
-			break;
-		case HCLGE_RSS_TC_SIZE_7:
-			tc_size[i] = 7;
-			break;
-		default:
-			break;
-		}
-		tc_offset[i] = hdev->rss_size_max * i;
+	/* Each TC have the same queue size, and tc_size set to hardware is
+	 * the log2 of roundup power of two of rss_size, the acutal queue
+	 * size is limited by indirection table.
+	 */
+	if (rss_size > HCLGE_RSS_TC_SIZE_7 || rss_size == 0) {
+		dev_err(&hdev->pdev->dev,
+			"Configure rss tc size failed, invalid TC_SIZE = %d\n",
+			rss_size);
+		return -EINVAL;
 	}
+
+	roundup_size = roundup_pow_of_two(rss_size);
+	roundup_size = ilog2(roundup_size);
+
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		tc_valid[i] = 0;
+
+		if (!(hdev->hw_tc_map & BIT(i)))
+			continue;
+
+		tc_valid[i] = 1;
+		tc_size[i] = roundup_size;
+		tc_offset[i] = rss_size * i;
+	}
+
 	ret = hclge_set_rss_tc_mode(hdev, tc_valid, tc_size, tc_offset);
 
 err:
@@ -4167,12 +4163,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 		return ret;
 	}
 
-	ret = hclge_rss_init_hw(hdev);
-	if (ret) {
-		dev_err(&pdev->dev, "Rss init fail, ret =%d\n", ret);
-		return  ret;
-	}
-
 	ret = hclge_init_vlan_config(hdev);
 	if (ret) {
 		dev_err(&pdev->dev, "VLAN init fail, ret =%d\n", ret);
@@ -4185,6 +4175,12 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 		return ret;
 	}
 
+	ret = hclge_rss_init_hw(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Rss init fail, ret =%d\n", ret);
+		return ret;
+	}
+
 	setup_timer(&hdev->service_timer, hclge_service_timer,
 		    (unsigned long)hdev);
 	INIT_WORK(&hdev->service_task, hclge_service_task);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index edb10ad075eb..7f8dd129c10d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -477,6 +477,7 @@ struct hclge_vport {
 	u8  rss_hash_key[HCLGE_RSS_KEY_SIZE]; /* User configured hash keys */
 	/* User configured lookup table entries */
 	u8  rss_indirection_tbl[HCLGE_RSS_IND_TBL_SIZE];
+	u16 alloc_rss_size;
 
 	u16 qs_offset;
 	u16 bw_limit;		/* VSI BW Limit (0 = disabled) */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index fe659f752237..b7ba7aa66620 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -397,6 +397,7 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
 			kinfo->num_tqps / kinfo->num_tc);
 	vport->qs_offset = hdev->tm_info.num_tc * vport->vport_id;
 	vport->dwrr = 100;  /* 100 percent as init */
+	vport->alloc_rss_size = kinfo->rss_size;
 
 	for (i = 0; i < kinfo->num_tc; i++) {
 		if (hdev->hw_tc_map & BIT(i)) {

From c5795c5308af81568d1573598716091120c85a38 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 20 Sep 2017 18:52:58 +0800
Subject: [PATCH 182/391] net: hns3: Fix for pri to tc mapping in TM

Current mapping between pri and tc is one to one,
so user can't map multi priorities to the same tc.
This patch changes the mapping to many to one.

Fixes: 848440544b41f ("net: hns3: Add support of TX Scheduler & Shaper to HNS3 driver")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h      |  3 ++-
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h  |  2 +-
 .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.c    | 16 +++++++++-------
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index ad685f5aa6d1..1a01cadfe5f3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -376,12 +376,12 @@ struct hnae3_ae_algo {
 struct hnae3_tc_info {
 	u16	tqp_offset;	/* TQP offset from base TQP */
 	u16	tqp_count;	/* Total TQPs */
-	u8	up;		/* user priority */
 	u8	tc;		/* TC index */
 	bool	enable;		/* If this TC is enable or not */
 };
 
 #define HNAE3_MAX_TC		8
+#define HNAE3_MAX_USER_PRIO	8
 struct hnae3_knic_private_info {
 	struct net_device *netdev; /* Set by KNIC client when init instance */
 	u16 rss_size;		   /* Allocated RSS queues */
@@ -389,6 +389,7 @@ struct hnae3_knic_private_info {
 	u16 num_desc;
 
 	u8 num_tc;		   /* Total number of enabled TCs */
+	u8 prio_tc[HNAE3_MAX_USER_PRIO];  /* TC indexed by prio */
 	struct hnae3_tc_info tc_info[HNAE3_MAX_TC]; /* Idx of array is HW TC */
 
 	u16 num_tqps;		  /* total number of TQPs in this handle */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 7f8dd129c10d..9fcfd9395424 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -176,7 +176,6 @@ struct hclge_pg_info {
 struct hclge_tc_info {
 	u8 tc_id;
 	u8 tc_sch_mode;		/* 0: sp; 1: dwrr */
-	u8 up;
 	u8 pgid;
 	u32 bw_limit;
 };
@@ -197,6 +196,7 @@ struct hclge_tm_info {
 	u8 num_tc;
 	u8 num_pg;      /* It must be 1 if vNET-Base schd */
 	u8 pg_dwrr[HCLGE_PG_NUM];
+	u8 prio_tc[HNAE3_MAX_USER_PRIO];
 	struct hclge_pg_info pg_info[HCLGE_PG_NUM];
 	struct hclge_tc_info tc_info[HNAE3_MAX_TC];
 	enum hclge_fc_mode fc_mode;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index b7ba7aa66620..73a75d7cc551 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -128,9 +128,7 @@ static int hclge_fill_pri_array(struct hclge_dev *hdev, u8 *pri, u8 pri_id)
 {
 	u8 tc;
 
-	for (tc = 0; tc < hdev->tm_info.num_tc; tc++)
-		if (hdev->tm_info.tc_info[tc].up == pri_id)
-			break;
+	tc = hdev->tm_info.prio_tc[pri_id];
 
 	if (tc >= hdev->tm_info.num_tc)
 		return -EINVAL;
@@ -158,7 +156,7 @@ static int hclge_up_to_tc_map(struct hclge_dev *hdev)
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PRI_TO_TC_MAPPING, false);
 
-	for (pri_id = 0; pri_id < hdev->tm_info.num_tc; pri_id++) {
+	for (pri_id = 0; pri_id < HNAE3_MAX_USER_PRIO; pri_id++) {
 		ret = hclge_fill_pri_array(hdev, pri, pri_id);
 		if (ret)
 			return ret;
@@ -405,16 +403,17 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
 			kinfo->tc_info[i].tqp_offset = i * kinfo->rss_size;
 			kinfo->tc_info[i].tqp_count = kinfo->rss_size;
 			kinfo->tc_info[i].tc = i;
-			kinfo->tc_info[i].up = hdev->tm_info.tc_info[i].up;
 		} else {
 			/* Set to default queue if TC is disable */
 			kinfo->tc_info[i].enable = false;
 			kinfo->tc_info[i].tqp_offset = 0;
 			kinfo->tc_info[i].tqp_count = 1;
 			kinfo->tc_info[i].tc = 0;
-			kinfo->tc_info[i].up = 0;
 		}
 	}
+
+	memcpy(kinfo->prio_tc, hdev->tm_info.prio_tc,
+	       FIELD_SIZEOF(struct hnae3_knic_private_info, prio_tc));
 }
 
 static void hclge_tm_vport_info_update(struct hclge_dev *hdev)
@@ -436,12 +435,15 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev)
 	for (i = 0; i < hdev->tm_info.num_tc; i++) {
 		hdev->tm_info.tc_info[i].tc_id = i;
 		hdev->tm_info.tc_info[i].tc_sch_mode = HCLGE_SCH_MODE_DWRR;
-		hdev->tm_info.tc_info[i].up = i;
 		hdev->tm_info.tc_info[i].pgid = 0;
 		hdev->tm_info.tc_info[i].bw_limit =
 			hdev->tm_info.pg_info[0].bw_limit;
 	}
 
+	for (i = 0; i < HNAE3_MAX_USER_PRIO; i++)
+		hdev->tm_info.prio_tc[i] =
+			(i >= hdev->tm_info.num_tc) ? 0 : i;
+
 	hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
 }
 

From 4d61eda812041ef9c820d7f147884133fd3307bc Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 19 Sep 2017 16:27:39 +0100
Subject: [PATCH 183/391] CIFS: make arrays static const, reduces object code
 size

Don't populate the read-only arrays types[] on the stack, instead make
them both static const.  Makes the object code smaller by over 200 bytes:

Before:
   text	   data	    bss	    dec	    hex	filename
 111503	  37696	    448	 149647	  2488f	fs/cifs/file.o

After:
   text	   data	    bss	    dec	    hex	filename
 111140	  37856	    448	 149444	  247c4	fs/cifs/file.o

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Steve French <smfrench@gmail.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
---
 fs/cifs/file.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0786f19d288f..822311919163 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1102,8 +1102,10 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
 	struct cifs_tcon *tcon;
 	unsigned int num, max_num, max_buf;
 	LOCKING_ANDX_RANGE *buf, *cur;
-	int types[] = {LOCKING_ANDX_LARGE_FILES,
-		       LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
+	static const int types[] = {
+		LOCKING_ANDX_LARGE_FILES,
+		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
+	};
 	int i;
 
 	xid = get_xid();
@@ -1434,8 +1436,10 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
 		  unsigned int xid)
 {
 	int rc = 0, stored_rc;
-	int types[] = {LOCKING_ANDX_LARGE_FILES,
-		       LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
+	static const int types[] = {
+		LOCKING_ANDX_LARGE_FILES,
+		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
+	};
 	unsigned int i;
 	unsigned int max_num, num, max_buf;
 	LOCKING_ANDX_RANGE *buf, *cur;

From 94183331e815617246b1baa97e0916f358c794bb Mon Sep 17 00:00:00 2001
From: Shu Wang <shuwang@redhat.com>
Date: Thu, 7 Sep 2017 16:03:27 +0800
Subject: [PATCH 184/391] cifs: release cifs root_cred after exit_cifs

memory leak was found by kmemleak. exit_cifs_spnego
should be called before cifs module removed, or
cifs root_cred will not be released.

kmemleak report:
unreferenced object 0xffff880070a3ce40 (size 192):
  backtrace:
     kmemleak_alloc+0x4a/0xa0
     kmem_cache_alloc+0xc7/0x1d0
     prepare_kernel_cred+0x20/0x120
     init_cifs_spnego+0x2d/0x170 [cifs]
     0xffffffffc07801f3
     do_one_initcall+0x51/0x1b0
     do_init_module+0x60/0x1fd
     load_module+0x161e/0x1b60
     SYSC_finit_module+0xa9/0x100
     SyS_finit_module+0xe/0x10

Signed-off-by: Shu Wang <shuwang@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
CC: Stable <stable@vger.kernel.org>
---
 fs/cifs/cifsfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index a864e6575309..8c8b75d33f31 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1449,7 +1449,7 @@ exit_cifs(void)
 	exit_cifs_idmap();
 #endif
 #ifdef CONFIG_CIFS_UPCALL
-	unregister_key_type(&cifs_spnego_key_type);
+	exit_cifs_spnego();
 #endif
 	cifs_destroy_request_bufs();
 	cifs_destroy_mids();

From f5c4ba816315d3b813af16f5571f86c8d4e897bd Mon Sep 17 00:00:00 2001
From: Shu Wang <shuwang@redhat.com>
Date: Fri, 8 Sep 2017 18:48:33 +0800
Subject: [PATCH 185/391] cifs: release auth_key.response for reconnect.

There is a race that cause cifs reconnect in cifs_mount,
- cifs_mount
  - cifs_get_tcp_session
    - [ start thread cifs_demultiplex_thread
      - cifs_read_from_socket: -ECONNABORTED
        - DELAY_WORK smb2_reconnect_server ]
  - cifs_setup_session
  - [ smb2_reconnect_server ]

auth_key.response was allocated in cifs_setup_session, and
will release when the session destoried. So when session re-
connect, auth_key.response should be check and released.

Tested with my system:
CIFS VFS: Free previous auth_key.response = ffff8800320bbf80

A simple auth_key.response allocation call trace:
- cifs_setup_session
- SMB2_sess_setup
- SMB2_sess_auth_rawntlmssp_authenticate
- build_ntlmssp_auth_blob
- setup_ntlmv2_rsp

Signed-off-by: Shu Wang <shuwang@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
CC: Stable <stable@vger.kernel.org>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
---
 fs/cifs/connect.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 8d38b22afb2b..0bfc2280436d 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -4154,6 +4154,14 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
 	cifs_dbg(FYI, "Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d\n",
 		 server->sec_mode, server->capabilities, server->timeAdj);
 
+	if (ses->auth_key.response) {
+		cifs_dbg(VFS, "Free previous auth_key.response = %p\n",
+			 ses->auth_key.response);
+		kfree(ses->auth_key.response);
+		ses->auth_key.response = NULL;
+		ses->auth_key.len = 0;
+	}
+
 	if (server->ops->sess_setup)
 		rc = server->ops->sess_setup(xid, ses, nls_info);
 

From 0603c96f3af50e2f9299fa410c224ab1d465e0f9 Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Wed, 20 Sep 2017 19:57:18 -0500
Subject: [PATCH 186/391] SMB: Validate negotiate (to protect against
 downgrade) even if signing off

As long as signing is supported (ie not a guest user connection) and
connection is SMB3 or SMB3.02, then validate negotiate (protect
against man in the middle downgrade attacks).  We had been doing this
only when signing was required, not when signing was just enabled,
but this more closely matches recommended SMB3 behavior and is
better security.  Suggested by Metze.

Signed-off-by: Steve French <smfrench@gmail.com>
Reviewed-by: Jeremy Allison <jra@samba.org>
Acked-by: Stefan Metzmacher <metze@samba.org>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
CC: Stable <stable@vger.kernel.org>
---
 fs/cifs/smb2pdu.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index d499ce265c3b..6f0e6343c15e 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -656,15 +656,22 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
 
 	/*
 	 * validation ioctl must be signed, so no point sending this if we
-	 * can not sign it.  We could eventually change this to selectively
+	 * can not sign it (ie are not known user).  Even if signing is not
+	 * required (enabled but not negotiated), in those cases we selectively
 	 * sign just this, the first and only signed request on a connection.
-	 * This is good enough for now since a user who wants better security
-	 * would also enable signing on the mount. Having validation of
-	 * negotiate info for signed connections helps reduce attack vectors
+	 * Having validation of negotiate info  helps reduce attack vectors.
 	 */
-	if (tcon->ses->server->sign == false)
+	if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST)
 		return 0; /* validation requires signing */
 
+	if (tcon->ses->user_name == NULL) {
+		cifs_dbg(FYI, "Can't validate negotiate: null user mount\n");
+		return 0; /* validation requires signing */
+	}
+
+	if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_NULL)
+		cifs_dbg(VFS, "Unexpected null user (anonymous) auth flag sent by server\n");
+
 	vneg_inbuf.Capabilities =
 			cpu_to_le32(tcon->ses->server->vals->req_capabilities);
 	memcpy(vneg_inbuf.Guid, tcon->ses->server->client_guid,

From a90bcb86ae700c12432446c4aa1819e7b8e172ec Mon Sep 17 00:00:00 2001
From: Petar Penkov <ppenkov@google.com>
Date: Tue, 29 Aug 2017 11:20:32 -0700
Subject: [PATCH 187/391] iov_iter: fix page_copy_sane for compound pages

Issue is that if the data crosses a page boundary inside a compound
page, this check will incorrectly trigger a WARN_ON.

To fix this, compute the order using the head of the compound page and
adjust the offset to be relative to that head.

Fixes: 72e809ed81ed ("iov_iter: sanity checks for copy to/from page
primitives")

Signed-off-by: Petar Penkov <ppenkov@google.com>
CC: Al Viro <viro@zeniv.linux.org.uk>
CC: Eric Dumazet <edumazet@google.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 lib/iov_iter.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 52c8dd6d8e82..1c1c06ddc20a 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -687,8 +687,10 @@ EXPORT_SYMBOL(_copy_from_iter_full_nocache);
 
 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
 {
-	size_t v = n + offset;
-	if (likely(n <= v && v <= (PAGE_SIZE << compound_order(page))))
+	struct page *head = compound_head(page);
+	size_t v = n + offset + page_address(page) - page_address(head);
+
+	if (likely(n <= v && v <= (PAGE_SIZE << compound_order(head))))
 		return true;
 	WARN_ON(1);
 	return false;

From 58aff0af757356065f33290d96a9cd46dfbcae88 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 18 Sep 2017 17:47:38 +0100
Subject: [PATCH 188/391] ipc/shm: Fix order of parameters when calling
 copy_compat_shmid_to_user

Commit 553f770ef71b ("ipc: move compat shmctl to native") moved the
compat IPC syscall handling into ipc/shm.c and refactored the struct
accessors in the process. Unfortunately, the call to
copy_compat_shmid_to_user when handling a compat {IPC,SHM}_STAT command
gets the arguments the wrong way round, passing a kernel stack address
as the user buffer (destination) and the user buffer as the kernel stack
address (source).

This patch fixes the parameter ordering so the buffers are accessed
correctly.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 ipc/shm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ipc/shm.c b/ipc/shm.c
index 1b3adfe3c60e..1e2b1692ba2c 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1237,7 +1237,7 @@ COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr)
 		err = shmctl_stat(ns, shmid, cmd, &sem64);
 		if (err < 0)
 			return err;
-		if (copy_compat_shmid_to_user(&sem64, uptr, version))
+		if (copy_compat_shmid_to_user(uptr, &sem64, version))
 			err = -EFAULT;
 		return err;
 

From 3e77adeea3c5393c9b624832f65441e92867f618 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Thu, 7 Sep 2017 16:35:40 +1000
Subject: [PATCH 189/391] powerpc/eeh: Create PHB PEs after EEH is initialized

Otherwise we end up not yet having computed the right diag data size
on powernv where EEH initialization is delayed, thus causing memory
corruption later on when calling OPAL.

Fixes: 5cb1f8fdddb7 ("powerpc/powernv/pci: Dynamically allocate PHB diag data")
Cc: stable@vger.kernel.org # v4.13+
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Russell Currey <ruscur@russell.cc>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/eeh.c     |  4 ++++
 arch/powerpc/kernel/eeh_dev.c | 18 ------------------
 2 files changed, 4 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 9e816787c0d4..116000b45531 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1019,6 +1019,10 @@ int eeh_init(void)
 	} else if ((ret = eeh_ops->init()))
 		return ret;
 
+	/* Initialize PHB PEs */
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+		eeh_dev_phb_init_dynamic(hose);
+
 	/* Initialize EEH event */
 	ret = eeh_event_init();
 	if (ret)
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
index ad04ecd63c20..a34e6912c15e 100644
--- a/arch/powerpc/kernel/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -78,21 +78,3 @@ void eeh_dev_phb_init_dynamic(struct pci_controller *phb)
 	/* EEH PE for PHB */
 	eeh_phb_pe_create(phb);
 }
-
-/**
- * eeh_dev_phb_init - Create EEH devices for devices included in existing PHBs
- *
- * Scan all the existing PHBs and create EEH devices for their OF
- * nodes and their children OF nodes
- */
-static int __init eeh_dev_phb_init(void)
-{
-	struct pci_controller *phb, *tmp;
-
-	list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
-		eeh_dev_phb_init_dynamic(phb);
-
-	return 0;
-}
-
-core_initcall(eeh_dev_phb_init);

From 087ff6a5ae3052bb2835e191094b793789cb8817 Mon Sep 17 00:00:00 2001
From: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
Date: Wed, 20 Sep 2017 17:02:51 -0400
Subject: [PATCH 190/391] powerpc/pseries: Fix "OF: ERROR: Bad of_node_put() on
 /cpus" during DLPAR

Commit 215ee763f8cb ("powerpc: pseries: remove dlpar_attach_node
dependency on full path") reworked dlpar_attach_node() to no longer
look up the parent node "/cpus", but instead to have the parent node
passed by the caller in the function parameter list.

As a result dlpar_attach_node() is no longer responsible for freeing
the reference to the parent node. However, commit 215ee763f8cb failed
to remove the of_node_put(parent) call in dlpar_attach_node(), or to
take into account that the reference to the parent in the caller
dlpar_cpu_add() needs to be held until after dlpar_attach_node()
returns.

As a result doing repeated cpu add/remove dlpar operations will
eventually result in the following error:

  OF: ERROR: Bad of_node_put() on /cpus
  CPU: 0 PID: 10896 Comm: drmgr Not tainted 4.13.0-autotest #1
  Call Trace:
   dump_stack+0x15c/0x1f8 (unreliable)
   of_node_release+0x1a4/0x1c0
   kobject_put+0x1a8/0x310
   kobject_del+0xbc/0xf0
   __of_detach_node_sysfs+0x144/0x210
   of_detach_node+0xf0/0x180
   dlpar_detach_node+0xc4/0x120
   dlpar_cpu_remove+0x280/0x560
   dlpar_cpu_release+0xbc/0x1b0
   arch_cpu_release+0x6c/0xb0
   cpu_release_store+0xa0/0x100
   dev_attr_store+0x68/0xa0
   sysfs_kf_write+0xa8/0xf0
   kernfs_fop_write+0x2cc/0x400
   __vfs_write+0x5c/0x340
   vfs_write+0x1a8/0x3d0
   SyS_write+0xa8/0x1a0
   system_call+0x58/0x6c

Fix the issue by removing the of_node_put(parent) call from
dlpar_attach_node(), and ensuring that the reference to the parent
node is properly held and released by the caller dlpar_cpu_add().

Fixes: 215ee763f8cb ("powerpc: pseries: remove dlpar_attach_node dependency on full path")
Signed-off-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
Reported-by: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
[mpe: Add a comment in the code and frob the change log slightly]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/pseries/dlpar.c       | 1 -
 arch/powerpc/platforms/pseries/hotplug-cpu.c | 6 +++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 783f36364690..e45b5f10645a 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -266,7 +266,6 @@ int dlpar_attach_node(struct device_node *dn, struct device_node *parent)
 		return rc;
 	}
 
-	of_node_put(dn->parent);
 	return 0;
 }
 
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index fc0d8f97c03a..fadb95efbb9e 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -462,15 +462,19 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
 	}
 
 	dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
-	of_node_put(parent);
 	if (!dn) {
 		pr_warn("Failed call to configure-connector, drc index: %x\n",
 			drc_index);
 		dlpar_release_drc(drc_index);
+		of_node_put(parent);
 		return -EINVAL;
 	}
 
 	rc = dlpar_attach_node(dn, parent);
+
+	/* Regardless we are done with parent now */
+	of_node_put(parent);
+
 	if (rc) {
 		saved_rc = rc;
 		pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n",

From b537ca6fede69a281dc524983e5e633d79a10a08 Mon Sep 17 00:00:00 2001
From: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
Date: Wed, 20 Sep 2017 17:02:52 -0400
Subject: [PATCH 191/391] powerpc/pseries: Fix parent_dn reference leak in
 add_dt_node()

A reference to the parent device node is held by add_dt_node() for the
node to be added. If the call to dlpar_configure_connector() fails
add_dt_node() returns ENOENT and that reference is not freed.

Add a call to of_node_put(parent_dn) prior to bailing out after a
failed dlpar_configure_connector() call.

Fixes: 8d5ff320766f ("powerpc/pseries: Make dlpar_configure_connector parent node aware")
Cc: stable@vger.kernel.org # v3.12+
Signed-off-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/pseries/mobility.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 210ce632d63e..f7042ad492ba 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -226,8 +226,10 @@ static int add_dt_node(__be32 parent_phandle, __be32 drc_index)
 		return -ENOENT;
 
 	dn = dlpar_configure_connector(drc_index, parent_dn);
-	if (!dn)
+	if (!dn) {
+		of_node_put(parent_dn);
 		return -ENOENT;
+	}
 
 	rc = dlpar_attach_node(dn, parent_dn);
 	if (rc)

From 0551968add53777fddd18f4ffb4e3bbc1f646d79 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 21 Sep 2017 11:54:44 +0200
Subject: [PATCH 192/391] Revert "genirq: Restrict effective affinity to
 interrupts actually using it"

This reverts commit 74def747bcd09692bdbf8c6a15350795b0f11ca8.

The change to the helper function is only correct for the /proc/irq/
readout usage, but breaks the existing x86 usage of that function.

Reported-by: Yanko Kaneti <yaneti@declera.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irq.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index b99a784635ff..d4728bf6a537 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -783,10 +783,7 @@ static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d)
 static inline
 struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d)
 {
-	if (!cpumask_empty(d->common->effective_affinity))
-		return d->common->effective_affinity;
-
-	return d->common->affinity;
+	return d->common->effective_affinity;
 }
 static inline void irq_data_update_effective_affinity(struct irq_data *d,
 						      const struct cpumask *m)

From 2bc84526d174a2a89c76438f049fc03ac259a159 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Wed, 6 Sep 2017 16:44:35 -0600
Subject: [PATCH 193/391] Makefile: kselftest and kselftest-clean fail for make
 O=dir case

kselftest and kselftest-clean targets fail when object directory is
specified to relocate objects. Fix it so it can find the source tree
to build from.

make O=/tmp/kselftest_top kselftest

make[1]: Entering directory '/tmp/kselftest_top'
make[2]: Entering directory '/tmp/kselftest_top'
make[2]: *** tools/testing/selftests: No such file or directory.  Stop.
make[2]: Leaving directory '/tmp/kselftest_top'
./linux-kselftest/Makefile:1185: recipe for target
'kselftest' failed
make[1]: *** [kselftest] Error 2
make[1]: Leaving directory '/tmp/kselftest_top'
Makefile:145: recipe for target 'sub-make' failed
make: *** [sub-make] Error 2

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
Acked-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 64cbc66cebca..9a70578922d3 100644
--- a/Makefile
+++ b/Makefile
@@ -1172,11 +1172,11 @@ headers_check: headers_install
 
 PHONY += kselftest
 kselftest:
-	$(Q)$(MAKE) -C tools/testing/selftests run_tests
+	$(Q)$(MAKE) -C $(srctree)/tools/testing/selftests run_tests
 
 PHONY += kselftest-clean
 kselftest-clean:
-	$(Q)$(MAKE) -C tools/testing/selftests clean
+	$(Q)$(MAKE) -C $(srctree)/tools/testing/selftests clean
 
 PHONY += kselftest-merge
 kselftest-merge:

From 8050ef2b83a18f628f9501af958fbff39443d58d Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Wed, 6 Sep 2017 18:36:22 -0600
Subject: [PATCH 194/391] selftests: lib.mk: kselftest and kselftest-clean fail
 for make O=dir case

kselftest and kselftest-clean targets fail when object directory is
specified to relocate objects. Main Makefile make O= path clears the
built-in defines LINK.c, COMPILE.S, LINK.S, and RM that are used in
lib.mk to build and clean targets. Define them.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/lib.mk | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 4665463779f5..266d3ed4bb41 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -7,6 +7,7 @@ OUTPUT := $(shell pwd)
 endif
 
 TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
+TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED))
 TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
 
 all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
@@ -62,6 +63,11 @@ endef
 emit_tests:
 	$(EMIT_TESTS)
 
+# define if isn't already. It is undefined in make O= case.
+ifeq ($(RM),)
+RM := rm -f
+endif
+
 define CLEAN
 	$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
 endef
@@ -69,6 +75,15 @@ endef
 clean:
 	$(CLEAN)
 
+# When make O= with kselftest target from main level
+# the following aren't defined.
+#
+ifneq ($(KBUILD_SRC),)
+LINK.c = $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH)
+COMPILE.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c
+LINK.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH)
+endif
+
 $(OUTPUT)/%:%.c
 	$(LINK.c) $^ $(LDLIBS) -o $@
 

From 52fd1d082398b928a86d4fdf33c9f3abe1bf7914 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Thu, 7 Sep 2017 19:57:43 -0600
Subject: [PATCH 195/391] selftests: Makefile: clear LDFLAGS for make O=dir
 use-case

kselftest target fails when object directory is specified to relocate
objects. Inherited "LDFLAGS = -m" fails the test builds. Clear it.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/Makefile | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 26ce4f7168be..f4368db011ea 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -52,6 +52,10 @@ override LDFLAGS =
 override MAKEFLAGS =
 endif
 
+ifneq ($(KBUILD_SRC),)
+override LDFLAGS =
+endif
+
 BUILD := $(O)
 ifndef BUILD
   BUILD := $(KBUILD_OUTPUT)

From e0a5696a23290c31c5ac2c76f0e7fe50a12c1fc6 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Thu, 7 Sep 2017 20:04:26 -0600
Subject: [PATCH 196/391] selftests: lib.mk: fix test executable status check
 to use full path

Fix test executable status check to use full path for make O=dir case,m
when tests are relocated to user specified object directory. Without the
full path, this check fails to find the file and fails the test.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/lib.mk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 266d3ed4bb41..fd1cbbbca8d7 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -21,7 +21,7 @@ define RUN_TESTS
 		test_num=`echo $$test_num+1 | bc`;	\
 		echo "selftests: $$BASENAME_TEST";	\
 		echo "========================================";	\
-		if [ ! -x $$BASENAME_TEST ]; then	\
+		if [ ! -x $$TEST ]; then	\
 			echo "selftests: Warning: file $$BASENAME_TEST is not executable, correct this.";\
 			echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; \
 		else					\

From e2fb65594cae9a016fab4639a5d22a914f1e16c8 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Fri, 8 Sep 2017 16:05:50 -0600
Subject: [PATCH 197/391] selftests: watchdog: fix to use TEST_GEN_PROGS and
 remove clean

TEST_PROGS should be used for test scripts that don't ned to be built.
Use TEST_GEN_PROGS instead which is intended for test executables.

Remove clean target and let the common clean take care of cleaning.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/watchdog/Makefile | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/tools/testing/selftests/watchdog/Makefile b/tools/testing/selftests/watchdog/Makefile
index f863c664e3d1..ee068511fd0b 100644
--- a/tools/testing/selftests/watchdog/Makefile
+++ b/tools/testing/selftests/watchdog/Makefile
@@ -1,8 +1,3 @@
-TEST_PROGS := watchdog-test
-
-all: $(TEST_PROGS)
+TEST_GEN_PROGS := watchdog-test
 
 include ../lib.mk
-
-clean:
-	rm -fr $(TEST_PROGS)

From be16a244c199983656e58ddb10d80c67197e502f Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Mon, 11 Sep 2017 13:06:33 -0600
Subject: [PATCH 198/391] selftests: lib.mk: add TEST_CUSTOM_PROGS to allow
 custom test run/install

Some tests such as sync can't use generic build rules in lib.mk and require
custom rules. Currently there is no provision to allow custom builds and
test such as sync use TEST_PROGS which is reserved for test shell scripts.
Add TEST_CUSTOM_PROGS variable to lib.mk to run and install custom tests
built by individual test make files.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/lib.mk | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index fd1cbbbca8d7..b4699c71aee4 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -6,6 +6,12 @@ ifeq (0,$(MAKELEVEL))
 OUTPUT := $(shell pwd)
 endif
 
+# The following are built by lib.mk common compile rules.
+# TEST_CUSTOM_PROGS should be used by tests that require
+# custom build rule and prevent common build rule use.
+# TEST_PROGS are for test shell scripts.
+# TEST_CUSTOM_PROGS and TEST_PROGS will be run by common run_tests
+# and install targets. Common clean doesn't touch them.
 TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
 TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED))
 TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
@@ -31,7 +37,7 @@ define RUN_TESTS
 endef
 
 run_tests: all
-	$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_PROGS))
+	$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS))
 
 define INSTALL_RULE
 	@if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then					\
@@ -39,10 +45,10 @@ define INSTALL_RULE
 		echo "rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/";	\
 		rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/;		\
 	fi
-	@if [ "X$(TEST_GEN_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then					\
+	@if [ "X$(TEST_GEN_PROGS)$(TEST_CUSTOM_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then					\
 		mkdir -p ${INSTALL_PATH};										\
-		echo "rsync -a $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/";	\
-		rsync -a $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/;		\
+		echo "rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/";	\
+		rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/;		\
 	fi
 endef
 
@@ -54,7 +60,7 @@ else
 endif
 
 define EMIT_TESTS
-	@for TEST in $(TEST_GEN_PROGS) $(TEST_PROGS); do \
+	@for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \
 		BASENAME_TEST=`basename $$TEST`;	\
 		echo "(./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \
 	done;

From 38f7251852a0cd47f34af4a6f84df0fadafa8ac7 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Mon, 11 Sep 2017 13:46:44 -0600
Subject: [PATCH 199/391] selftests: sync: use TEST_CUSTOM_PROGS instead of
 TEST_PROGS

lib.mk var TEST_CUSTOM_PROGS is for tests that need custom build
rules. TEST_PROGS is used for test shell scripts. Fix it to use
TEST_CUSTOM_PROGS. lib.mk will run and install them.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/sync/Makefile | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/sync/Makefile b/tools/testing/selftests/sync/Makefile
index 4981c6b6d050..43db80b71e80 100644
--- a/tools/testing/selftests/sync/Makefile
+++ b/tools/testing/selftests/sync/Makefile
@@ -2,9 +2,11 @@ CFLAGS += -O2 -g -std=gnu89 -pthread -Wall -Wextra
 CFLAGS += -I../../../../usr/include/
 LDFLAGS += -pthread
 
-TEST_PROGS = sync_test
+# lib.mk TEST_CUSTOM_PROGS var is for custome tests that need special
+# build rules. lib.mk will run and install them.
+TEST_CUSTOM_PROGS = sync_test
 
-all: $(TEST_PROGS)
+all: $(TEST_CUSTOM_PROGS)
 
 include ../lib.mk
 

From b2fc6ade9fe2118591e7f51e21b51f65e36f138f Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Mon, 11 Sep 2017 14:14:41 -0600
Subject: [PATCH 200/391] selftests: sync: kselftest and kselftest-clean fail
 for make O=dir case

sync test fails to build when object directory is specified to relocate
object files. Fix it to specify the correct path. Fix clean target to
remove objects. Also include simplified logic to use TEST_CUSTOM_PROGS
in build and clean targets instead of hard-coding the test name each
time.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/sync/Makefile | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/sync/Makefile b/tools/testing/selftests/sync/Makefile
index 43db80b71e80..8e04d0afcbd7 100644
--- a/tools/testing/selftests/sync/Makefile
+++ b/tools/testing/selftests/sync/Makefile
@@ -2,14 +2,16 @@ CFLAGS += -O2 -g -std=gnu89 -pthread -Wall -Wextra
 CFLAGS += -I../../../../usr/include/
 LDFLAGS += -pthread
 
-# lib.mk TEST_CUSTOM_PROGS var is for custome tests that need special
-# build rules. lib.mk will run and install them.
-TEST_CUSTOM_PROGS = sync_test
-
-all: $(TEST_CUSTOM_PROGS)
+.PHONY: all clean
 
 include ../lib.mk
 
+# lib.mk TEST_CUSTOM_PROGS var is for custom tests that need special
+# build rules. lib.mk will run and install them.
+
+TEST_CUSTOM_PROGS := $(OUTPUT)/sync_test
+all: $(TEST_CUSTOM_PROGS)
+
 OBJS = sync_test.o sync.o
 
 TESTS += sync_alloc.o
@@ -20,6 +22,16 @@ TESTS += sync_stress_parallelism.o
 TESTS += sync_stress_consumer.o
 TESTS += sync_stress_merge.o
 
-sync_test: $(OBJS) $(TESTS)
+OBJS := $(patsubst %,$(OUTPUT)/%,$(OBJS))
+TESTS := $(patsubst %,$(OUTPUT)/%,$(TESTS))
 
-EXTRA_CLEAN := sync_test $(OBJS) $(TESTS)
+$(TEST_CUSTOM_PROGS): $(TESTS) $(OBJS)
+	$(CC) -o $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS) $(CFLAGS) $(LDFLAGS)
+
+$(OBJS): $(OUTPUT)/%.o: %.c
+	$(CC) -c $^ -o $@
+
+$(TESTS): $(OUTPUT)/%.o: %.c
+	$(CC) -c $^ -o $@
+
+EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS)

From 1a940687e424080a6ed285f2de8b2f0d018edf3e Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Mon, 11 Sep 2017 19:03:11 -0600
Subject: [PATCH 201/391] selftests: lib.mk: copy test scripts and test files
 for make O=dir run

For make O=dir run_tests to work, test scripts, test files, and other
dependencies need to be copied over to the object directory. Running
tests from the object directory is necessary to avoid making the source
tree dirty.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/lib.mk | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index b4699c71aee4..f65886af7c0c 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -37,7 +37,18 @@ define RUN_TESTS
 endef
 
 run_tests: all
+ifneq ($(KBUILD_SRC),)
+	@if [ "X$(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)" != "X" ]; then
+		@rsync -aq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT)
+	fi
+	@if [ "X$(TEST_PROGS)" != "X" ]; then
+		$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(OUTPUT)/$(TEST_PROGS))
+	else
+		$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS))
+	fi
+else
 	$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS))
+endif
 
 define INSTALL_RULE
 	@if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then					\

From 9bbe7dc05c1f80200621a626ec2266987f4b42b3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 18 Sep 2017 17:55:24 +0200
Subject: [PATCH 202/391] MIPS: MSP71xx: Include asm/setup.h

msp71xx_defconfig can not be built at the in v4.14-rc1

arch/mips/pmcs-msp71xx/msp_smp.c:72:2: error: implicit declaration of function 'set_vi_handler' [-Werror=implicit-function-declaration]

I don't know what caused the regression, but including the right
header is the obvious fix.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/17309/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/pmcs-msp71xx/msp_smp.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/mips/pmcs-msp71xx/msp_smp.c b/arch/mips/pmcs-msp71xx/msp_smp.c
index ffa0f7101a97..2b08242ade62 100644
--- a/arch/mips/pmcs-msp71xx/msp_smp.c
+++ b/arch/mips/pmcs-msp71xx/msp_smp.c
@@ -22,6 +22,8 @@
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 
+#include <asm/setup.h>
+
 #ifdef CONFIG_MIPS_MT_SMP
 #define MIPS_CPU_IPI_RESCHED_IRQ 0	/* SW int 0 for resched */
 #define MIPS_CPU_IPI_CALL_IRQ 1		/* SW int 1 for call */

From c22c8043105591a8b74142cf837604087cdba40b Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Tue, 19 Sep 2017 14:11:22 +0100
Subject: [PATCH 203/391] MIPS: Fix input modify in __write_64bit_c0_split()

The inline asm in __write_64bit_c0_split() modifies the 64-bit input
operand by shifting the high register left by 32, and constructing the
full 64-bit value in the low register (even on a 32-bit kernel), so if
that value is used again it could cause breakage as GCC would assume the
registers haven't changed when they have.

To quote the GCC extended asm documentation:
> Warning: Do not modify the contents of input-only operands (except for
> inputs tied to outputs). The compiler assumes that on exit from the
> asm statement these operands contain the same values as they had
> before executing the statement.

Avoid modifying the input by using a temporary variable as an output
which is modified instead of the input and not otherwise used. The asm
is always __volatile__ so GCC shouldn't optimise it out. The low
register of the temporary output is written before the high register of
the input is read, so we have two constraint alternatives, one where
both use the same registers (for when the input value isn't subsequently
used), and one with an early clobber on the output in case the low
output uses the same register as the high input. This allows the
resulting assembly to remain mostly unchanged.

A diff of a MIPS32r6 kernel reveals only three differences, two in
relation to write_c0_r10k_diag() in cpu_probe() (register allocation
rearranged slightly but otherwise identical), and one in relation to
write_c0_cvmmemctl2() in kvm_vz_local_flush_guesttlb_all(), but the
octeon CPU is only supported on 64-bit kernels where
__write_64bit_c0_split() isn't used so that shouldn't matter in
practice. So there currently doesn't appear to be anything broken by
this bug.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/17315/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/mipsregs.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index e4ed1bc9a734..a6810923b3f0 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -1377,29 +1377,32 @@ do {									\
 
 #define __write_64bit_c0_split(source, sel, val)			\
 do {									\
+	unsigned long long __tmp;					\
 	unsigned long __flags;						\
 									\
 	local_irq_save(__flags);					\
 	if (sel == 0)							\
 		__asm__ __volatile__(					\
 			".set\tmips64\n\t"				\
-			"dsll\t%L0, %L0, 32\n\t"			\
+			"dsll\t%L0, %L1, 32\n\t"			\
 			"dsrl\t%L0, %L0, 32\n\t"			\
-			"dsll\t%M0, %M0, 32\n\t"			\
+			"dsll\t%M0, %M1, 32\n\t"			\
 			"or\t%L0, %L0, %M0\n\t"				\
 			"dmtc0\t%L0, " #source "\n\t"			\
 			".set\tmips0"					\
-			: : "r" (val));					\
+			: "=&r,r" (__tmp)				\
+			: "r,0" (val));					\
 	else								\
 		__asm__ __volatile__(					\
 			".set\tmips64\n\t"				\
-			"dsll\t%L0, %L0, 32\n\t"			\
+			"dsll\t%L0, %L1, 32\n\t"			\
 			"dsrl\t%L0, %L0, 32\n\t"			\
-			"dsll\t%M0, %M0, 32\n\t"			\
+			"dsll\t%M0, %M1, 32\n\t"			\
 			"or\t%L0, %L0, %M0\n\t"				\
 			"dmtc0\t%L0, " #source ", " #sel "\n\t"		\
 			".set\tmips0"					\
-			: : "r" (val));					\
+			: "=&r,r" (__tmp)				\
+			: "r,0" (val));					\
 	local_irq_restore(__flags);					\
 } while (0)
 

From 8eba3651f1dad49c83bb7f8d672301dac4c6add6 Mon Sep 17 00:00:00 2001
From: Manuel Lauss <manuel.lauss@gmail.com>
Date: Tue, 12 Sep 2017 20:36:28 +0200
Subject: [PATCH 204/391] MIPS: PCI: fix pcibios_map_irq section mismatch

Drop  the __init from pcibios_map_irq() to make this section mis-
match go away:

WARNING: vmlinux.o(.text+0x56acd4): Section mismatch in reference from the function pcibios_scanbus() to the function .init.text:pcibios_map_irq()
The function pcibios_scanbus() references
the function __init pcibios_map_irq().
This is often because pcibios_scanbus lacks a __init
annotation or the annotation of pcibios_map_irq is wrong.

Run-Tested only on Alchemy.

Signed-off-by: Manuel Lauss <manuel.lauss@gmail.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/17267/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/pci/fixup-capcella.c  | 2 +-
 arch/mips/pci/fixup-cobalt.c    | 2 +-
 arch/mips/pci/fixup-emma2rh.c   | 2 +-
 arch/mips/pci/fixup-fuloong2e.c | 2 +-
 arch/mips/pci/fixup-ip32.c      | 2 +-
 arch/mips/pci/fixup-lantiq.c    | 2 +-
 arch/mips/pci/fixup-lemote2f.c  | 2 +-
 arch/mips/pci/fixup-loongson3.c | 2 +-
 arch/mips/pci/fixup-malta.c     | 2 +-
 arch/mips/pci/fixup-mpc30x.c    | 2 +-
 arch/mips/pci/fixup-pmcmsp.c    | 2 +-
 arch/mips/pci/fixup-sni.c       | 2 +-
 arch/mips/pci/fixup-tb0219.c    | 2 +-
 arch/mips/pci/fixup-tb0226.c    | 2 +-
 arch/mips/pci/fixup-tb0287.c    | 2 +-
 arch/mips/pci/pci-alchemy.c     | 2 +-
 arch/mips/pci/pci-bcm47xx.c     | 2 +-
 arch/mips/pci/pci-lasat.c       | 2 +-
 arch/mips/pci/pci-mt7620.c      | 2 +-
 arch/mips/pci/pci-octeon.c      | 4 ++--
 arch/mips/pci/pci-rt2880.c      | 2 +-
 arch/mips/pci/pci-rt3883.c      | 2 +-
 arch/mips/pci/pci-xlp.c         | 2 +-
 arch/mips/pci/pci-xlr.c         | 2 +-
 24 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/arch/mips/pci/fixup-capcella.c b/arch/mips/pci/fixup-capcella.c
index 1c02f5737367..35a671595a37 100644
--- a/arch/mips/pci/fixup-capcella.c
+++ b/arch/mips/pci/fixup-capcella.c
@@ -38,7 +38,7 @@ static char irq_tab_capcella[][5] __initdata = {
  [14] = { -1, INTA, INTB, INTC, INTD }
 };
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return irq_tab_capcella[slot][pin];
 }
diff --git a/arch/mips/pci/fixup-cobalt.c b/arch/mips/pci/fixup-cobalt.c
index b3ab59318d91..62810d3fe99b 100644
--- a/arch/mips/pci/fixup-cobalt.c
+++ b/arch/mips/pci/fixup-cobalt.c
@@ -174,7 +174,7 @@ static char irq_tab_raq2[] __initdata = {
   [COBALT_PCICONF_ETH1]	   = ETH1_IRQ
 };
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	if (cobalt_board_id <= COBALT_BRD_ID_QUBE1)
 		return irq_tab_qube1[slot];
diff --git a/arch/mips/pci/fixup-emma2rh.c b/arch/mips/pci/fixup-emma2rh.c
index 19caf775c206..4832ac9f118a 100644
--- a/arch/mips/pci/fixup-emma2rh.c
+++ b/arch/mips/pci/fixup-emma2rh.c
@@ -85,7 +85,7 @@ static void emma2rh_pci_host_fixup(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_EMMA2RH,
 			 emma2rh_pci_host_fixup);
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return irq_map[slot][pin];
 }
diff --git a/arch/mips/pci/fixup-fuloong2e.c b/arch/mips/pci/fixup-fuloong2e.c
index 50da773faede..b47c2771dc99 100644
--- a/arch/mips/pci/fixup-fuloong2e.c
+++ b/arch/mips/pci/fixup-fuloong2e.c
@@ -19,7 +19,7 @@
 /* South bridge slot number is set by the pci probe process */
 static u8 sb_slot = 5;
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int irq = 0;
 
diff --git a/arch/mips/pci/fixup-ip32.c b/arch/mips/pci/fixup-ip32.c
index 133685e215ee..ea29f5450be3 100644
--- a/arch/mips/pci/fixup-ip32.c
+++ b/arch/mips/pci/fixup-ip32.c
@@ -39,7 +39,7 @@ static char irq_tab_mace[][5] __initdata = {
  * irqs.  I suppose a device without a pin A will thank us for doing it
  * right if there exists such a broken piece of crap.
  */
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return irq_tab_mace[slot][pin];
 }
diff --git a/arch/mips/pci/fixup-lantiq.c b/arch/mips/pci/fixup-lantiq.c
index 2b5427d3f35c..81530a13b349 100644
--- a/arch/mips/pci/fixup-lantiq.c
+++ b/arch/mips/pci/fixup-lantiq.c
@@ -23,7 +23,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
 	return 0;
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return of_irq_parse_and_map_pci(dev, slot, pin);
 }
diff --git a/arch/mips/pci/fixup-lemote2f.c b/arch/mips/pci/fixup-lemote2f.c
index 95ab9a1bd010..7e5991e0e323 100644
--- a/arch/mips/pci/fixup-lemote2f.c
+++ b/arch/mips/pci/fixup-lemote2f.c
@@ -51,7 +51,7 @@ static char irq_tab[][5] __initdata = {
 	{0, 0, 0, 0, 0},	/*  27: Unused */
 };
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int virq;
 
diff --git a/arch/mips/pci/fixup-loongson3.c b/arch/mips/pci/fixup-loongson3.c
index 2b6d5e196f99..8a741c2c6685 100644
--- a/arch/mips/pci/fixup-loongson3.c
+++ b/arch/mips/pci/fixup-loongson3.c
@@ -32,7 +32,7 @@ static void print_fixup_info(const struct pci_dev *pdev)
 			pdev->vendor, pdev->device, pdev->irq);
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	print_fixup_info(dev);
 	return dev->irq;
diff --git a/arch/mips/pci/fixup-malta.c b/arch/mips/pci/fixup-malta.c
index 40e920c653cc..1f5f25e39590 100644
--- a/arch/mips/pci/fixup-malta.c
+++ b/arch/mips/pci/fixup-malta.c
@@ -38,7 +38,7 @@ static char irq_tab[][5] __initdata = {
 	{0,	PCID,	PCIA,	PCIB,	PCIC }	/* 21: PCI Slot 4 */
 };
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int virq;
 	virq = irq_tab[slot][pin];
diff --git a/arch/mips/pci/fixup-mpc30x.c b/arch/mips/pci/fixup-mpc30x.c
index 8e4f8288eca2..5da62c76e271 100644
--- a/arch/mips/pci/fixup-mpc30x.c
+++ b/arch/mips/pci/fixup-mpc30x.c
@@ -34,7 +34,7 @@ static const int irq_tab_mpc30x[] __initconst = {
  [29] = MQ200_IRQ,
 };
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	if (slot == 30)
 		return internal_func_irqs[PCI_FUNC(dev->devfn)];
diff --git a/arch/mips/pci/fixup-pmcmsp.c b/arch/mips/pci/fixup-pmcmsp.c
index fab405c21c2f..f2b7b1e4395b 100644
--- a/arch/mips/pci/fixup-pmcmsp.c
+++ b/arch/mips/pci/fixup-pmcmsp.c
@@ -202,7 +202,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
  *  RETURNS:	 IRQ number
  *
  ****************************************************************************/
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 #if !defined(CONFIG_PMC_MSP7120_GW) && !defined(CONFIG_PMC_MSP7120_EVAL)
 	printk(KERN_WARNING "PCI: unknown board, no PCI IRQs assigned.\n");
diff --git a/arch/mips/pci/fixup-sni.c b/arch/mips/pci/fixup-sni.c
index f67ebeeb4200..309e1c562959 100644
--- a/arch/mips/pci/fixup-sni.c
+++ b/arch/mips/pci/fixup-sni.c
@@ -130,7 +130,7 @@ static inline int is_rm300_revd(void)
 	return (csmsr & 0xa0) == 0x20;
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	switch (sni_brd_type) {
 	case SNI_BRD_PCI_TOWER_CPLUS:
diff --git a/arch/mips/pci/fixup-tb0219.c b/arch/mips/pci/fixup-tb0219.c
index d0b0083fbd27..cc581535f257 100644
--- a/arch/mips/pci/fixup-tb0219.c
+++ b/arch/mips/pci/fixup-tb0219.c
@@ -23,7 +23,7 @@
 
 #include <asm/vr41xx/tb0219.h>
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int irq = -1;
 
diff --git a/arch/mips/pci/fixup-tb0226.c b/arch/mips/pci/fixup-tb0226.c
index 4196ccf3ea3d..b827b5cad5fd 100644
--- a/arch/mips/pci/fixup-tb0226.c
+++ b/arch/mips/pci/fixup-tb0226.c
@@ -23,7 +23,7 @@
 #include <asm/vr41xx/giu.h>
 #include <asm/vr41xx/tb0226.h>
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	int irq = -1;
 
diff --git a/arch/mips/pci/fixup-tb0287.c b/arch/mips/pci/fixup-tb0287.c
index 8c5039ed75d7..98f26285f2e3 100644
--- a/arch/mips/pci/fixup-tb0287.c
+++ b/arch/mips/pci/fixup-tb0287.c
@@ -22,7 +22,7 @@
 
 #include <asm/vr41xx/tb0287.h>
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	unsigned char bus;
 	int irq = -1;
diff --git a/arch/mips/pci/pci-alchemy.c b/arch/mips/pci/pci-alchemy.c
index e99ca7702d8a..f15ec98de2de 100644
--- a/arch/mips/pci/pci-alchemy.c
+++ b/arch/mips/pci/pci-alchemy.c
@@ -522,7 +522,7 @@ static int __init alchemy_pci_init(void)
 arch_initcall(alchemy_pci_init);
 
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	struct alchemy_pci_context *ctx = dev->sysdata;
 	if (ctx && ctx->board_map_irq)
diff --git a/arch/mips/pci/pci-bcm47xx.c b/arch/mips/pci/pci-bcm47xx.c
index 76f16eaed0ad..230d7dd273e2 100644
--- a/arch/mips/pci/pci-bcm47xx.c
+++ b/arch/mips/pci/pci-bcm47xx.c
@@ -28,7 +28,7 @@
 #include <linux/bcma/bcma.h>
 #include <bcm47xx.h>
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return 0;
 }
diff --git a/arch/mips/pci/pci-lasat.c b/arch/mips/pci/pci-lasat.c
index 40d2797d2bc4..47f4ee6bbb3b 100644
--- a/arch/mips/pci/pci-lasat.c
+++ b/arch/mips/pci/pci-lasat.c
@@ -61,7 +61,7 @@ arch_initcall(lasat_pci_setup);
 #define LASAT_IRQ_PCIC	 (LASAT_IRQ_BASE + 7)
 #define LASAT_IRQ_PCID	 (LASAT_IRQ_BASE + 8)
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	switch (slot) {
 	case 1:
diff --git a/arch/mips/pci/pci-mt7620.c b/arch/mips/pci/pci-mt7620.c
index 4e633c1e7ff3..90fba9bf98da 100644
--- a/arch/mips/pci/pci-mt7620.c
+++ b/arch/mips/pci/pci-mt7620.c
@@ -361,7 +361,7 @@ static int mt7620_pci_probe(struct platform_device *pdev)
 	return 0;
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	u16 cmd;
 	u32 val;
diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c
index 9ee01936862e..771f5de6362d 100644
--- a/arch/mips/pci/pci-octeon.c
+++ b/arch/mips/pci/pci-octeon.c
@@ -59,7 +59,7 @@ union octeon_pci_address {
 	} s;
 };
 
-int __initconst (*octeon_pcibios_map_irq)(const struct pci_dev *dev,
+int (*octeon_pcibios_map_irq)(const struct pci_dev *dev,
 					 u8 slot, u8 pin);
 enum octeon_dma_bar_type octeon_dma_bar_type = OCTEON_DMA_BAR_TYPE_INVALID;
 
@@ -74,7 +74,7 @@ enum octeon_dma_bar_type octeon_dma_bar_type = OCTEON_DMA_BAR_TYPE_INVALID;
  *		 as it goes through each bridge.
  * Returns Interrupt number for the device
  */
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	if (octeon_pcibios_map_irq)
 		return octeon_pcibios_map_irq(dev, slot, pin);
diff --git a/arch/mips/pci/pci-rt2880.c b/arch/mips/pci/pci-rt2880.c
index d6360fe73d05..711cdccdf65b 100644
--- a/arch/mips/pci/pci-rt2880.c
+++ b/arch/mips/pci/pci-rt2880.c
@@ -181,7 +181,7 @@ static inline void rt2880_pci_write_u32(unsigned long reg, u32 val)
 	spin_unlock_irqrestore(&rt2880_pci_lock, flags);
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	u16 cmd;
 	int irq = -1;
diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c
index 04f8ea953297..958899ffe99c 100644
--- a/arch/mips/pci/pci-rt3883.c
+++ b/arch/mips/pci/pci-rt3883.c
@@ -564,7 +564,7 @@ static int rt3883_pci_probe(struct platform_device *pdev)
 	return err;
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return of_irq_parse_and_map_pci(dev, slot, pin);
 }
diff --git a/arch/mips/pci/pci-xlp.c b/arch/mips/pci/pci-xlp.c
index 7babf01600cb..9eff9137f78e 100644
--- a/arch/mips/pci/pci-xlp.c
+++ b/arch/mips/pci/pci-xlp.c
@@ -205,7 +205,7 @@ int xlp_socdev_to_node(const struct pci_dev *lnkdev)
 		return PCI_SLOT(lnkdev->devfn) / 8;
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	struct pci_dev *lnkdev;
 	int lnkfunc, node;
diff --git a/arch/mips/pci/pci-xlr.c b/arch/mips/pci/pci-xlr.c
index 26d2dabef281..2a1c81a129ba 100644
--- a/arch/mips/pci/pci-xlr.c
+++ b/arch/mips/pci/pci-xlr.c
@@ -315,7 +315,7 @@ static void xls_pcie_ack_b(struct irq_data *d)
 	}
 }
 
-int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	return get_irq_vector(dev);
 }

From c8e1812960eeae42e2183154927028511c4bc566 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Wed, 20 Sep 2017 15:45:36 +0300
Subject: [PATCH 205/391] net_sched: always reset qdisc backlog in
 qdisc_reset()

SKB stored in qdisc->gso_skb also counted into backlog.

Some qdiscs don't reset backlog to zero in ->reset(),
for example sfq just dequeue and free all queued skb.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 92237e75dbbc..bf8c81e07c70 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -685,6 +685,7 @@ void qdisc_reset(struct Qdisc *qdisc)
 		qdisc->gso_skb = NULL;
 	}
 	qdisc->q.qlen = 0;
+	qdisc->qstats.backlog = 0;
 }
 EXPORT_SYMBOL(qdisc_reset);
 

From 21f4d5cc25ec0e6e8eb8420dd2c399e6d2fc7d14 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Wed, 20 Sep 2017 15:46:11 +0300
Subject: [PATCH 206/391] net_sched/hfsc: fix curve activation in
 hfsc_change_class()

If real-time or fair-share curves are enabled in hfsc_change_class()
class isn't inserted into rb-trees yet. Thus init_ed() and init_vf()
must be called in place of update_ed() and update_vf().

Remove isn't required because for now curves cannot be disabled.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_hfsc.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index daaf214e5201..3f88b75488b0 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -958,6 +958,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	}
 
 	if (cl != NULL) {
+		int old_flags;
+
 		if (parentid) {
 			if (cl->cl_parent &&
 			    cl->cl_parent->cl_common.classid != parentid)
@@ -978,6 +980,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 		}
 
 		sch_tree_lock(sch);
+		old_flags = cl->cl_flags;
+
 		if (rsc != NULL)
 			hfsc_change_rsc(cl, rsc, cur_time);
 		if (fsc != NULL)
@@ -986,10 +990,21 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 			hfsc_change_usc(cl, usc, cur_time);
 
 		if (cl->qdisc->q.qlen != 0) {
-			if (cl->cl_flags & HFSC_RSC)
-				update_ed(cl, qdisc_peek_len(cl->qdisc));
-			if (cl->cl_flags & HFSC_FSC)
-				update_vf(cl, 0, cur_time);
+			int len = qdisc_peek_len(cl->qdisc);
+
+			if (cl->cl_flags & HFSC_RSC) {
+				if (old_flags & HFSC_RSC)
+					update_ed(cl, len);
+				else
+					init_ed(cl, len);
+			}
+
+			if (cl->cl_flags & HFSC_FSC) {
+				if (old_flags & HFSC_FSC)
+					update_vf(cl, 0, cur_time);
+				else
+					init_vf(cl, len);
+			}
 		}
 		sch_tree_unlock(sch);
 

From fe2502e49b58606580c77b3d84e42f946de182d8 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Wed, 20 Sep 2017 09:18:45 -0700
Subject: [PATCH 207/391] net_sched: remove cls_flower idr on failure

Fixes: c15ab236d69d ("net/sched: Change cls_flower to use IDR")
Cc: Chris Mi <chrism@mellanox.com>
Cc: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_flower.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 1a267e77c6de..d230cb4c8094 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -922,28 +922,28 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 
 		if (!tc_flags_valid(fnew->flags)) {
 			err = -EINVAL;
-			goto errout;
+			goto errout_idr;
 		}
 	}
 
 	err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
 	if (err)
-		goto errout;
+		goto errout_idr;
 
 	err = fl_check_assign_mask(head, &mask);
 	if (err)
-		goto errout;
+		goto errout_idr;
 
 	if (!tc_skip_sw(fnew->flags)) {
 		if (!fold && fl_lookup(head, &fnew->mkey)) {
 			err = -EEXIST;
-			goto errout;
+			goto errout_idr;
 		}
 
 		err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
 					     head->ht_params);
 		if (err)
-			goto errout;
+			goto errout_idr;
 	}
 
 	if (!tc_skip_hw(fnew->flags)) {
@@ -952,7 +952,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 					   &mask.key,
 					   fnew);
 		if (err)
-			goto errout;
+			goto errout_idr;
 	}
 
 	if (!tc_in_hw(fnew->flags))
@@ -981,6 +981,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	kfree(tb);
 	return 0;
 
+errout_idr:
+	if (fnew->handle)
+		idr_remove_ext(&head->handle_idr, fnew->handle);
 errout:
 	tcf_exts_destroy(&fnew->exts);
 	kfree(fnew);

From 0ab09befdbb7ca9b969d6206108629ddff43876e Mon Sep 17 00:00:00 2001
From: Alex Ng <alexng@microsoft.com>
Date: Wed, 20 Sep 2017 11:17:35 -0700
Subject: [PATCH 208/391] hv_netvsc: fix send buffer failure on MTU change
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If MTU is changed the host would reject the send buffer change.
This problem is result of recent change to allow changing send
buffer size.

Every time we change the MTU, we store the previous net_device section
count before destroying the buffer, but we don’t store the previous
section size. When we reinitialize the buffer, its size is calculated
by multiplying the previous count and previous size. Since we
continuously increase the MTU, the host returns us a decreasing count
value while the section size is reinitialized to 1728 bytes every
time.

This eventually leads to a condition where the calculated buf_size is
so small that the host rejects it.

Fixes: 8b5327975ae1 ("netvsc: allow controlling send/recv buffer size")
Signed-off-by: Alex Ng <alexng@microsoft.com>
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/hyperv_net.h | 2 ++
 drivers/net/hyperv/netvsc.c     | 7 ++-----
 drivers/net/hyperv/netvsc_drv.c | 8 ++++++++
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index d98cdfb1536b..5176be76ca7d 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -150,6 +150,8 @@ struct netvsc_device_info {
 	u32  num_chn;
 	u32  send_sections;
 	u32  recv_sections;
+	u32  send_section_size;
+	u32  recv_section_size;
 };
 
 enum rndis_device_state {
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index a5511b7326af..8d5077fb0492 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -76,9 +76,6 @@ static struct netvsc_device *alloc_net_device(void)
 	net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
 	net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
 
-	net_device->recv_section_size = NETVSC_RECV_SECTION_SIZE;
-	net_device->send_section_size = NETVSC_SEND_SECTION_SIZE;
-
 	init_completion(&net_device->channel_init_wait);
 	init_waitqueue_head(&net_device->subchan_open);
 	INIT_WORK(&net_device->subchan_work, rndis_set_subchannel);
@@ -262,7 +259,7 @@ static int netvsc_init_buf(struct hv_device *device,
 	int ret = 0;
 
 	/* Get receive buffer area. */
-	buf_size = device_info->recv_sections * net_device->recv_section_size;
+	buf_size = device_info->recv_sections * device_info->recv_section_size;
 	buf_size = roundup(buf_size, PAGE_SIZE);
 
 	net_device->recv_buf = vzalloc(buf_size);
@@ -344,7 +341,7 @@ static int netvsc_init_buf(struct hv_device *device,
 		goto cleanup;
 
 	/* Now setup the send buffer. */
-	buf_size = device_info->send_sections * net_device->send_section_size;
+	buf_size = device_info->send_sections * device_info->send_section_size;
 	buf_size = round_up(buf_size, PAGE_SIZE);
 
 	net_device->send_buf = vzalloc(buf_size);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index d4902ee5f260..a32ae02e1b6c 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -848,7 +848,9 @@ static int netvsc_set_channels(struct net_device *net,
 	device_info.num_chn = count;
 	device_info.ring_size = ring_size;
 	device_info.send_sections = nvdev->send_section_cnt;
+	device_info.send_section_size = nvdev->send_section_size;
 	device_info.recv_sections = nvdev->recv_section_cnt;
+	device_info.recv_section_size = nvdev->recv_section_size;
 
 	rndis_filter_device_remove(dev, nvdev);
 
@@ -963,7 +965,9 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 	device_info.ring_size = ring_size;
 	device_info.num_chn = nvdev->num_chn;
 	device_info.send_sections = nvdev->send_section_cnt;
+	device_info.send_section_size = nvdev->send_section_size;
 	device_info.recv_sections = nvdev->recv_section_cnt;
+	device_info.recv_section_size = nvdev->recv_section_size;
 
 	rndis_filter_device_remove(hdev, nvdev);
 
@@ -1485,7 +1489,9 @@ static int netvsc_set_ringparam(struct net_device *ndev,
 	device_info.num_chn = nvdev->num_chn;
 	device_info.ring_size = ring_size;
 	device_info.send_sections = new_tx;
+	device_info.send_section_size = nvdev->send_section_size;
 	device_info.recv_sections = new_rx;
+	device_info.recv_section_size = nvdev->recv_section_size;
 
 	netif_device_detach(ndev);
 	was_opened = rndis_filter_opened(nvdev);
@@ -1934,7 +1940,9 @@ static int netvsc_probe(struct hv_device *dev,
 	device_info.ring_size = ring_size;
 	device_info.num_chn = VRSS_CHANNEL_DEFAULT;
 	device_info.send_sections = NETVSC_DEFAULT_TX;
+	device_info.send_section_size = NETVSC_SEND_SECTION_SIZE;
 	device_info.recv_sections = NETVSC_DEFAULT_RX;
+	device_info.recv_section_size = NETVSC_RECV_SECTION_SIZE;
 
 	nvdev = rndis_filter_device_add(dev, &device_info);
 	if (IS_ERR(nvdev)) {

From 4a7a3860caac1a8779e8c459d8abe21b111798d6 Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@codeaurora.org>
Date: Wed, 20 Sep 2017 15:32:53 -0500
Subject: [PATCH 209/391] net: qcom/emac: add software control for pause frame
 mode

The EMAC has the option of sending only a single pause frame when
flow control is enabled and the RX queue is full.  Although sending
only one pause frame has little value, this would allow admins to
enable automatic flow control without having to worry about the EMAC
flooding nearby switches with pause frames if the kernel hangs.

The option is enabled by using the single-pause-mode private flag.

Signed-off-by: Timur Tabi <timur@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/qualcomm/emac/emac-ethtool.c | 30 +++++++++++++++++++
 drivers/net/ethernet/qualcomm/emac/emac-mac.c | 22 ++++++++++++++
 drivers/net/ethernet/qualcomm/emac/emac.c     |  3 ++
 drivers/net/ethernet/qualcomm/emac/emac.h     |  3 ++
 4 files changed, 58 insertions(+)

diff --git a/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c b/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c
index bbe24639aa5a..c8c6231b87f3 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c
@@ -88,6 +88,8 @@ static void emac_set_msglevel(struct net_device *netdev, u32 data)
 static int emac_get_sset_count(struct net_device *netdev, int sset)
 {
 	switch (sset) {
+	case ETH_SS_PRIV_FLAGS:
+		return 1;
 	case ETH_SS_STATS:
 		return EMAC_STATS_LEN;
 	default:
@@ -100,6 +102,10 @@ static void emac_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 	unsigned int i;
 
 	switch (stringset) {
+	case ETH_SS_PRIV_FLAGS:
+		strcpy(data, "single-pause-mode");
+		break;
+
 	case ETH_SS_STATS:
 		for (i = 0; i < EMAC_STATS_LEN; i++) {
 			strlcpy(data, emac_ethtool_stat_strings[i],
@@ -230,6 +236,27 @@ static int emac_get_regs_len(struct net_device *netdev)
 	return EMAC_MAX_REG_SIZE * sizeof(u32);
 }
 
+#define EMAC_PRIV_ENABLE_SINGLE_PAUSE	BIT(0)
+
+static int emac_set_priv_flags(struct net_device *netdev, u32 flags)
+{
+	struct emac_adapter *adpt = netdev_priv(netdev);
+
+	adpt->single_pause_mode = !!(flags & EMAC_PRIV_ENABLE_SINGLE_PAUSE);
+
+	if (netif_running(netdev))
+		return emac_reinit_locked(adpt);
+
+	return 0;
+}
+
+static u32 emac_get_priv_flags(struct net_device *netdev)
+{
+	struct emac_adapter *adpt = netdev_priv(netdev);
+
+	return adpt->single_pause_mode ? EMAC_PRIV_ENABLE_SINGLE_PAUSE : 0;
+}
+
 static const struct ethtool_ops emac_ethtool_ops = {
 	.get_link_ksettings = phy_ethtool_get_link_ksettings,
 	.set_link_ksettings = phy_ethtool_set_link_ksettings,
@@ -253,6 +280,9 @@ static const struct ethtool_ops emac_ethtool_ops = {
 
 	.get_regs_len    = emac_get_regs_len,
 	.get_regs        = emac_get_regs,
+
+	.set_priv_flags = emac_set_priv_flags,
+	.get_priv_flags = emac_get_priv_flags,
 };
 
 void emac_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
index bcd4708b3745..0ea3ca09c689 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
@@ -551,6 +551,28 @@ static void emac_mac_start(struct emac_adapter *adpt)
 	mac &= ~(HUGEN | VLAN_STRIP | TPAUSE | SIMR | HUGE | MULTI_ALL |
 		 DEBUG_MODE | SINGLE_PAUSE_MODE);
 
+	/* Enable single-pause-frame mode if requested.
+	 *
+	 * If enabled, the EMAC will send a single pause frame when the RX
+	 * queue is full.  This normally leads to packet loss because
+	 * the pause frame disables the remote MAC only for 33ms (the quanta),
+	 * and then the remote MAC continues sending packets even though
+	 * the RX queue is still full.
+	 *
+	 * If disabled, the EMAC sends a pause frame every 31ms until the RX
+	 * queue is no longer full.  Normally, this is the preferred
+	 * method of operation.  However, when the system is hung (e.g.
+	 * cores are halted), the EMAC interrupt handler is never called
+	 * and so the RX queue fills up quickly and stays full.  The resuling
+	 * non-stop "flood" of pause frames sometimes has the effect of
+	 * disabling nearby switches.  In some cases, other nearby switches
+	 * are also affected, shutting down the entire network.
+	 *
+	 * The user can enable or disable single-pause-frame mode
+	 * via ethtool.
+	 */
+	mac |= adpt->single_pause_mode ? SINGLE_PAUSE_MODE : 0;
+
 	writel_relaxed(csr1, adpt->csr + EMAC_EMAC_WRAPPER_CSR1);
 
 	writel_relaxed(mac, adpt->base + EMAC_MAC_CTRL);
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 60850bfa3d32..759543512117 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -443,6 +443,9 @@ static void emac_init_adapter(struct emac_adapter *adpt)
 
 	/* default to automatic flow control */
 	adpt->automatic = true;
+
+	/* Disable single-pause-frame mode by default */
+	adpt->single_pause_mode = false;
 }
 
 /* Get the clock */
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.h b/drivers/net/ethernet/qualcomm/emac/emac.h
index 8ee4ec6aef2e..d7c9f44209d4 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.h
+++ b/drivers/net/ethernet/qualcomm/emac/emac.h
@@ -363,6 +363,9 @@ struct emac_adapter {
 	bool				tx_flow_control;
 	bool				rx_flow_control;
 
+	/* True == use single-pause-frame mode. */
+	bool				single_pause_mode;
+
 	/* Ring parameter */
 	u8				tpd_burst;
 	u8				rfd_burst;

From 19cab8872692960535aa6d12e3a295ac51d1a648 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 20 Sep 2017 15:52:13 -0700
Subject: [PATCH 210/391] net: ethtool: Add back transceiver type

Commit 3f1ac7a700d0 ("net: ethtool: add new ETHTOOL_xLINKSETTINGS API")
deprecated the ethtool_cmd::transceiver field, which was fine in
premise, except that the PHY library was actually using it to report the
type of transceiver: internal or external.

Use the first word of the reserved field to put this __u8 transceiver
field back in. It is made read-only, and we don't expect the
ETHTOOL_xLINKSETTINGS API to be doing anything with this anyway, so this
is mostly for the legacy path where we do:

ethtool_get_settings()
-> dev->ethtool_ops->get_link_ksettings()
   -> convert_link_ksettings_to_legacy_settings()

to have no information loss compared to the legacy get_settings API.

Fixes: 3f1ac7a700d0 ("net: ethtool: add new ETHTOOL_xLINKSETTINGS API")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 6 +++++-
 net/core/ethtool.c           | 2 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 9c041dae8e2c..5bd1b1de4ea0 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1753,6 +1753,8 @@ enum ethtool_reset_flags {
  *	%ethtool_link_mode_bit_indices for the link modes, and other
  *	link features that the link partner advertised through
  *	autonegotiation; 0 if unknown or not applicable.  Read-only.
+ * @transceiver: Used to distinguish different possible PHY types,
+ *	reported consistently by PHYLIB.  Read-only.
  *
  * If autonegotiation is disabled, the speed and @duplex represent the
  * fixed link mode and are writable if the driver supports multiple
@@ -1804,7 +1806,9 @@ struct ethtool_link_settings {
 	__u8	eth_tp_mdix;
 	__u8	eth_tp_mdix_ctrl;
 	__s8	link_mode_masks_nwords;
-	__u32	reserved[8];
+	__u8	transceiver;
+	__u8	reserved1[3];
+	__u32	reserved[7];
 	__u32	link_mode_masks[0];
 	/* layout of link_mode_masks fields:
 	 * __u32 map_supported[link_mode_masks_nwords];
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 6a582ae4c5d9..3228411ada0f 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -525,6 +525,8 @@ convert_link_ksettings_to_legacy_settings(
 		= link_ksettings->base.eth_tp_mdix;
 	legacy_settings->eth_tp_mdix_ctrl
 		= link_ksettings->base.eth_tp_mdix_ctrl;
+	legacy_settings->transceiver
+		= link_ksettings->base.transceiver;
 	return retval;
 }
 

From ceb628134a75564d7bfa8e4ef902e6e588339e11 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 20 Sep 2017 15:52:14 -0700
Subject: [PATCH 211/391] net: phy: Keep reporting transceiver type

With commit 2d55173e71b0 ("phy: add generic function to support
ksetting support"), we lost the ability to report the transceiver type
like we used to. Now that we have added back the transceiver type to
ethtool_link_settings, we can report it back like we used to and have no
loss of information.

Fixes: 3f1ac7a700d0 ("net: ethtool: add new ETHTOOL_xLINKSETTINGS API")
Fixes: 2d55173e71b0 ("phy: add generic function to support ksetting support")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index e842d2cd1ee7..2b1e67bc1e73 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -373,7 +373,8 @@ void phy_ethtool_ksettings_get(struct phy_device *phydev,
 		cmd->base.port = PORT_BNC;
 	else
 		cmd->base.port = PORT_MII;
-
+	cmd->base.transceiver = phy_is_internal(phydev) ?
+				XCVR_INTERNAL : XCVR_EXTERNAL;
 	cmd->base.phy_address = phydev->mdio.addr;
 	cmd->base.autoneg = phydev->autoneg;
 	cmd->base.eth_tp_mdix_ctrl = phydev->mdix_ctrl;

From 8a7ffeb795f864dd605b579c05934cba95dc8ad3 Mon Sep 17 00:00:00 2001
From: Nisar Sayed <Nisar.Sayed@microchip.com>
Date: Thu, 21 Sep 2017 02:36:36 +0530
Subject: [PATCH 212/391] lan78xx: Fix for eeprom read/write when device auto
 suspend

Fix for eeprom read/write when device auto suspend

Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver")
Signed-off-by: Nisar Sayed <Nisar.Sayed@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/lan78xx.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index b99a7fb09f8e..fcf85ae37435 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1265,30 +1265,46 @@ static int lan78xx_ethtool_get_eeprom(struct net_device *netdev,
 				      struct ethtool_eeprom *ee, u8 *data)
 {
 	struct lan78xx_net *dev = netdev_priv(netdev);
+	int ret;
+
+	ret = usb_autopm_get_interface(dev->intf);
+	if (ret)
+		return ret;
 
 	ee->magic = LAN78XX_EEPROM_MAGIC;
 
-	return lan78xx_read_raw_eeprom(dev, ee->offset, ee->len, data);
+	ret = lan78xx_read_raw_eeprom(dev, ee->offset, ee->len, data);
+
+	usb_autopm_put_interface(dev->intf);
+
+	return ret;
 }
 
 static int lan78xx_ethtool_set_eeprom(struct net_device *netdev,
 				      struct ethtool_eeprom *ee, u8 *data)
 {
 	struct lan78xx_net *dev = netdev_priv(netdev);
+	int ret;
+
+	ret = usb_autopm_get_interface(dev->intf);
+	if (ret)
+		return ret;
 
 	/* Allow entire eeprom update only */
 	if ((ee->magic == LAN78XX_EEPROM_MAGIC) &&
 	    (ee->offset == 0) &&
 	    (ee->len == 512) &&
 	    (data[0] == EEPROM_INDICATOR))
-		return lan78xx_write_raw_eeprom(dev, ee->offset, ee->len, data);
+		ret = lan78xx_write_raw_eeprom(dev, ee->offset, ee->len, data);
 	else if ((ee->magic == LAN78XX_OTP_MAGIC) &&
 		 (ee->offset == 0) &&
 		 (ee->len == 512) &&
 		 (data[0] == OTP_INDICATOR_1))
-		return lan78xx_write_raw_otp(dev, ee->offset, ee->len, data);
+		ret = lan78xx_write_raw_otp(dev, ee->offset, ee->len, data);
 
-	return -EINVAL;
+	usb_autopm_put_interface(dev->intf);
+
+	return ret;
 }
 
 static void lan78xx_get_strings(struct net_device *netdev, u32 stringset,

From c077682282dd34c75e8477c22dffe7c9aebc6e98 Mon Sep 17 00:00:00 2001
From: Nisar Sayed <Nisar.Sayed@microchip.com>
Date: Thu, 21 Sep 2017 02:36:37 +0530
Subject: [PATCH 213/391] lan78xx: Allow EEPROM write for less than
 MAX_EEPROM_SIZE

Allow EEPROM write for less than MAX_EEPROM_SIZE

Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver")
Signed-off-by: Nisar Sayed <Nisar.Sayed@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/lan78xx.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index fcf85ae37435..f8c63eec8353 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1290,11 +1290,10 @@ static int lan78xx_ethtool_set_eeprom(struct net_device *netdev,
 	if (ret)
 		return ret;
 
-	/* Allow entire eeprom update only */
-	if ((ee->magic == LAN78XX_EEPROM_MAGIC) &&
-	    (ee->offset == 0) &&
-	    (ee->len == 512) &&
-	    (data[0] == EEPROM_INDICATOR))
+	/* Invalid EEPROM_INDICATOR at offset zero will result in a failure
+	 * to load data from EEPROM
+	 */
+	if (ee->magic == LAN78XX_EEPROM_MAGIC)
 		ret = lan78xx_write_raw_eeprom(dev, ee->offset, ee->len, data);
 	else if ((ee->magic == LAN78XX_OTP_MAGIC) &&
 		 (ee->offset == 0) &&

From e365280521029c9366bab038915274ddaa1b7195 Mon Sep 17 00:00:00 2001
From: Nisar Sayed <Nisar.Sayed@microchip.com>
Date: Thu, 21 Sep 2017 02:36:38 +0530
Subject: [PATCH 214/391] lan78xx: Use default values loaded from EEPROM/OTP
 after reset

Use default value of auto duplex and auto speed values loaded
from EEPROM/OTP after reset. The LAN78xx allows platform
configurations to be loaded from EEPROM/OTP.
Ex: When external phy is connected, the MAC can be configured to
have correct auto speed, auto duplex, auto polarity configured
from the EEPROM/OTP.

Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver")
Signed-off-by: Nisar Sayed <Nisar.Sayed@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/lan78xx.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index f8c63eec8353..0161f77641fa 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -2449,7 +2449,6 @@ static int lan78xx_reset(struct lan78xx_net *dev)
 	/* LAN7801 only has RGMII mode */
 	if (dev->chipid == ID_REV_CHIP_ID_7801_)
 		buf &= ~MAC_CR_GMII_EN_;
-	buf |= MAC_CR_AUTO_DUPLEX_ | MAC_CR_AUTO_SPEED_;
 	ret = lan78xx_write_reg(dev, MAC_CR, buf);
 
 	ret = lan78xx_read_reg(dev, MAC_TX, &buf);

From f0ef1f4f2b772c0a1c8b35a6ae3edf974cc110dd Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Thu, 21 Sep 2017 08:24:27 +0200
Subject: [PATCH 215/391] net: stmmac: Cocci spatch "of_table"

Make sure (of/i2c/platform)_device_id tables are NULL terminated.
Found by coccinelle spatch "misc/of_table.cocci"

Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index a366b3747eeb..8a280b48e3a9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -315,6 +315,7 @@ static int stmmac_dt_phy(struct plat_stmmacenet_data *plat,
 		{ .compatible = "allwinner,sun8i-h3-emac" },
 		{ .compatible = "allwinner,sun8i-v3s-emac" },
 		{ .compatible = "allwinner,sun50i-a64-emac" },
+		{},
 	};
 
 	/* If phy-handle property is passed from DT, use it as the PHY */

From 09579ac803a3638344b8544b5940793d5358673e Mon Sep 17 00:00:00 2001
From: Hans Wippel <hwippel@linux.vnet.ibm.com>
Date: Thu, 21 Sep 2017 09:16:26 +0200
Subject: [PATCH 216/391] net/smc: add missing dev_put

In the infiniband part, SMC currently uses get_netdev which calls
dev_hold on the returned net device. However, the SMC code never calls
dev_put on that net device resulting in a wrong reference count.

This patch adds a dev_put after the usage of the net device to fix the
issue.

Signed-off-by: Hans Wippel <hwippel@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_ib.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 547e0e113b17..0b5852299158 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -380,6 +380,7 @@ static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
 	ndev = smcibdev->ibdev->get_netdev(smcibdev->ibdev, ibport);
 	if (ndev) {
 		memcpy(&smcibdev->mac, ndev->dev_addr, ETH_ALEN);
+		dev_put(ndev);
 	} else if (!rc) {
 		memcpy(&smcibdev->mac[ibport - 1][0],
 		       &smcibdev->gid[ibport - 1].raw[8], 3);

From 846e344eb7229018457d6d6fc1ab0cc0a167692f Mon Sep 17 00:00:00 2001
From: Hans Wippel <hwippel@linux.vnet.ibm.com>
Date: Thu, 21 Sep 2017 09:16:27 +0200
Subject: [PATCH 217/391] net/smc: add receive timeout check

The SMC receive function currently lacks a timeout check under the
condition that no data were received and no data are available. This
patch adds such a check.

Signed-off-by: Hans Wippel <hwippel@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_rx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index b17a333e9bb0..3e631ae4b6b6 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -148,6 +148,8 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
 				read_done = sock_intr_errno(timeo);
 				break;
 			}
+			if (!timeo)
+				return -EAGAIN;
 		}
 
 		if (!atomic_read(&conn->bytes_to_rcv)) {

From 731b008560e6dfaf5fb297543f17bbe9bb868f3c Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Thu, 21 Sep 2017 09:16:28 +0200
Subject: [PATCH 218/391] net/smc: take RCU read lock for routing cache lookup

smc_netinfo_by_tcpsk() looks up the routing cache. Such a lookup requires
protection by an RCU read lock.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 8c6d24b2995d..2e8d2dabac0c 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -282,6 +282,7 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock,
 			 __be32 *subnet, u8 *prefix_len)
 {
 	struct dst_entry *dst = sk_dst_get(clcsock->sk);
+	struct in_device *in_dev;
 	struct sockaddr_in addr;
 	int rc = -ENOENT;
 	int len;
@@ -298,14 +299,17 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock,
 	/* get address to which the internal TCP socket is bound */
 	kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len);
 	/* analyze IPv4 specific data of net_device belonging to TCP socket */
-	for_ifa(dst->dev->ip_ptr) {
-		if (ifa->ifa_address != addr.sin_addr.s_addr)
+	rcu_read_lock();
+	in_dev = __in_dev_get_rcu(dst->dev);
+	for_ifa(in_dev) {
+		if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
 			continue;
 		*prefix_len = inet_mask_len(ifa->ifa_mask);
 		*subnet = ifa->ifa_address & ifa->ifa_mask;
 		rc = 0;
 		break;
-	} endfor_ifa(dst->dev->ip_ptr);
+	} endfor_ifa(in_dev);
+	rcu_read_unlock();
 
 out_rel:
 	dst_release(dst);

From a6832c3acdb2ceb099ec3c385777fbaa6d5a5fd6 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Thu, 21 Sep 2017 09:16:29 +0200
Subject: [PATCH 219/391] net/smc: adjust net_device refcount

smc_pnet_fill_entry() uses dev_get_by_name() adding a refcount to ndev.
The following smc_pnet_enter() has to reduce the refcount if the entry
to be added exists already in the pnet table.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_pnet.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 78f7af28ae4f..31f8453c25c5 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -181,8 +181,10 @@ static int smc_pnet_enter(struct smc_pnetentry *new_pnetelem)
 			     sizeof(new_pnetelem->ndev->name)) ||
 		    smc_pnet_same_ibname(pnetelem,
 					 new_pnetelem->smcibdev->ibdev->name,
-					 new_pnetelem->ib_port))
+					 new_pnetelem->ib_port)) {
+			dev_put(pnetelem->ndev);
 			goto found;
+		}
 	}
 	list_add_tail(&new_pnetelem->list, &smc_pnettable.pnetlist);
 	rc = 0;

From 8301fa44b41b1ca46a0547809eb173112559dc51 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Thu, 21 Sep 2017 09:16:30 +0200
Subject: [PATCH 220/391] net/smc: adapt send request completion notification

The solicited flag is meaningful for the receive completion queue.
Ask for next work completion of any type on the send queue.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_wr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index ab56bda66783..525d91e0d57e 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -244,7 +244,7 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
 	int rc;
 
 	ib_req_notify_cq(link->smcibdev->roce_cq_send,
-			 IB_CQ_SOLICITED_MASK | IB_CQ_REPORT_MISSED_EVENTS);
+			 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
 	pend = container_of(priv, struct smc_wr_tx_pend, priv);
 	rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx],
 			  &failed_wr);

From 5bc11ddbdf7fc6681db5c3f9a92cdee0f19cee1e Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Thu, 21 Sep 2017 09:16:31 +0200
Subject: [PATCH 221/391] net/smc: longer delay for client link group removal

Client link group creation always follows the server linkgroup creation.
If peer creates a new server link group, client has to create a new
client link group. If peer reuses a server link group for a new
connection, client has to reuse its client link group as well. This
patch introduces a longer delay for client link group removal to make
sure this link group still exists, once the peer decides to reuse a
server link group. This avoids out-of-sync conditions for link groups.
If already scheduled, modify the delay.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 1a16d51e2330..20b66e79c5d6 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -25,8 +25,9 @@
 #include "smc_cdc.h"
 #include "smc_close.h"
 
-#define SMC_LGR_NUM_INCR	256
-#define SMC_LGR_FREE_DELAY	(600 * HZ)
+#define SMC_LGR_NUM_INCR		256
+#define SMC_LGR_FREE_DELAY_SERV		(600 * HZ)
+#define SMC_LGR_FREE_DELAY_CLNT		(SMC_LGR_FREE_DELAY_SERV + 10)
 
 static u32 smc_lgr_num;			/* unique link group number */
 
@@ -107,8 +108,15 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
 		__smc_lgr_unregister_conn(conn);
 	}
 	write_unlock_bh(&lgr->conns_lock);
-	if (reduced && !lgr->conns_num)
-		schedule_delayed_work(&lgr->free_work, SMC_LGR_FREE_DELAY);
+	if (!reduced || lgr->conns_num)
+		return;
+	/* client link group creation always follows the server link group
+	 * creation. For client use a somewhat higher removal delay time,
+	 * otherwise there is a risk of out-of-sync link groups.
+	 */
+	mod_delayed_work(system_wq, &lgr->free_work,
+			 lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
+						 SMC_LGR_FREE_DELAY_SERV);
 }
 
 static void smc_lgr_free_work(struct work_struct *work)

From bfbedfd38378c1ad9df84469403d69c17b074b66 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Thu, 21 Sep 2017 09:16:32 +0200
Subject: [PATCH 222/391] net/smc: terminate link group if out-of-sync is
 received

An out-of-sync condition can just be detected by the client.
If the server receives a CLC DECLINE message indicating an out-of-sync
condition for the link groups, the server must clean up the out-of-sync
link group.
There is no need for an extra third parameter in smc_clc_send_decline().

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c  |  6 ++----
 net/smc/smc_clc.c | 10 +++++-----
 net/smc/smc_clc.h |  3 +--
 3 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 2e8d2dabac0c..745f145d4c4d 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -513,7 +513,7 @@ static int smc_connect_rdma(struct smc_sock *smc)
 	/* RDMA setup failed, switch back to TCP */
 	smc->use_fallback = true;
 	if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
-		rc = smc_clc_send_decline(smc, reason_code, 0);
+		rc = smc_clc_send_decline(smc, reason_code);
 		if (rc < sizeof(struct smc_clc_msg_decline))
 			goto out_err;
 	}
@@ -808,8 +808,6 @@ static void smc_listen_work(struct work_struct *work)
 		rc = local_contact;
 		if (rc == -ENOMEM)
 			reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
-		else if (rc == -ENOLINK)
-			reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
 		goto decline_rdma;
 	}
 	link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
@@ -903,7 +901,7 @@ static void smc_listen_work(struct work_struct *work)
 	smc_conn_free(&new_smc->conn);
 	new_smc->use_fallback = true;
 	if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
-		rc = smc_clc_send_decline(new_smc, reason_code, 0);
+		rc = smc_clc_send_decline(new_smc, reason_code);
 		if (rc < sizeof(struct smc_clc_msg_decline))
 			goto out_err;
 	}
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 3934913ab835..b7dd2743fb5c 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -95,9 +95,10 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 	}
 	if (clcm->type == SMC_CLC_DECLINE) {
 		reason_code = SMC_CLC_DECL_REPLY;
-		if (ntohl(((struct smc_clc_msg_decline *)buf)->peer_diagnosis)
-			== SMC_CLC_DECL_SYNCERR)
+		if (((struct smc_clc_msg_decline *)buf)->hdr.flag) {
 			smc->conn.lgr->sync_err = true;
+			smc_lgr_terminate(smc->conn.lgr);
+		}
 	}
 
 out:
@@ -105,8 +106,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 }
 
 /* send CLC DECLINE message across internal TCP socket */
-int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
-			 u8 out_of_sync)
+int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
 {
 	struct smc_clc_msg_decline dclc;
 	struct msghdr msg;
@@ -118,7 +118,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
 	dclc.hdr.type = SMC_CLC_DECLINE;
 	dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
 	dclc.hdr.version = SMC_CLC_V1;
-	dclc.hdr.flag = out_of_sync ? 1 : 0;
+	dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0;
 	memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid));
 	dclc.peer_diagnosis = htonl(peer_diag_info);
 	memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 13db8ce177c9..1c55414041d4 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -106,8 +106,7 @@ struct smc_ib_device;
 
 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 		     u8 expected_type);
-int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
-			 u8 out_of_sync);
+int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
 int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev,
 			  u8 ibport);
 int smc_clc_send_confirm(struct smc_sock *smc);

From 18e537cd58e8d6932719bfa79cb96a1fbc639199 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Thu, 21 Sep 2017 09:16:33 +0200
Subject: [PATCH 223/391] net/smc: introduce a delay

The number of outstanding work requests is limited. If all work
requests are in use, tx processing is postponed to another scheduling
of the tx worker. Switch to a delayed worker to have a gap for tx
completion queue events before the next retry.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc.h       |  2 +-
 net/smc/smc_close.c | 12 +++++++-----
 net/smc/smc_tx.c    | 12 ++++++++----
 3 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/net/smc/smc.h b/net/smc/smc.h
index 6e44313e4467..0ccd6fa387ad 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -149,7 +149,7 @@ struct smc_connection {
 	atomic_t		sndbuf_space;	/* remaining space in sndbuf */
 	u16			tx_cdc_seq;	/* sequence # for CDC send */
 	spinlock_t		send_lock;	/* protect wr_sends */
-	struct work_struct	tx_work;	/* retry of smc_cdc_msg_send */
+	struct delayed_work	tx_work;	/* retry of smc_cdc_msg_send */
 
 	struct smc_host_cdc_msg	local_rx_ctrl;	/* filled during event_handl.
 						 * .prod cf. TCP rcv_nxt
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 3c2e166b5d22..5201bc103bd8 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -208,7 +208,7 @@ int smc_close_active(struct smc_sock *smc)
 	case SMC_ACTIVE:
 		smc_close_stream_wait(smc, timeout);
 		release_sock(sk);
-		cancel_work_sync(&conn->tx_work);
+		cancel_delayed_work_sync(&conn->tx_work);
 		lock_sock(sk);
 		if (sk->sk_state == SMC_ACTIVE) {
 			/* send close request */
@@ -234,7 +234,7 @@ int smc_close_active(struct smc_sock *smc)
 		if (!smc_cdc_rxed_any_close(conn))
 			smc_close_stream_wait(smc, timeout);
 		release_sock(sk);
-		cancel_work_sync(&conn->tx_work);
+		cancel_delayed_work_sync(&conn->tx_work);
 		lock_sock(sk);
 		if (sk->sk_err != ECONNABORTED) {
 			/* confirm close from peer */
@@ -263,7 +263,9 @@ int smc_close_active(struct smc_sock *smc)
 		/* peer sending PeerConnectionClosed will cause transition */
 		break;
 	case SMC_PROCESSABORT:
-		cancel_work_sync(&conn->tx_work);
+		release_sock(sk);
+		cancel_delayed_work_sync(&conn->tx_work);
+		lock_sock(sk);
 		smc_close_abort(conn);
 		sk->sk_state = SMC_CLOSED;
 		smc_close_wait_tx_pends(smc);
@@ -425,7 +427,7 @@ int smc_close_shutdown_write(struct smc_sock *smc)
 	case SMC_ACTIVE:
 		smc_close_stream_wait(smc, timeout);
 		release_sock(sk);
-		cancel_work_sync(&conn->tx_work);
+		cancel_delayed_work_sync(&conn->tx_work);
 		lock_sock(sk);
 		/* send close wr request */
 		rc = smc_close_wr(conn);
@@ -439,7 +441,7 @@ int smc_close_shutdown_write(struct smc_sock *smc)
 		if (!smc_cdc_rxed_any_close(conn))
 			smc_close_stream_wait(smc, timeout);
 		release_sock(sk);
-		cancel_work_sync(&conn->tx_work);
+		cancel_delayed_work_sync(&conn->tx_work);
 		lock_sock(sk);
 		/* confirm close from peer */
 		rc = smc_close_wr(conn);
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 3c656beb8820..3866573288dd 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -24,6 +24,8 @@
 #include "smc_cdc.h"
 #include "smc_tx.h"
 
+#define SMC_TX_WORK_DELAY	HZ
+
 /***************************** sndbuf producer *******************************/
 
 /* callback implementation for sk.sk_write_space()
@@ -406,7 +408,8 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
 				goto out_unlock;
 			}
 			rc = 0;
-			schedule_work(&conn->tx_work);
+			schedule_delayed_work(&conn->tx_work,
+					      SMC_TX_WORK_DELAY);
 		}
 		goto out_unlock;
 	}
@@ -430,7 +433,7 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
  */
 static void smc_tx_work(struct work_struct *work)
 {
-	struct smc_connection *conn = container_of(work,
+	struct smc_connection *conn = container_of(to_delayed_work(work),
 						   struct smc_connection,
 						   tx_work);
 	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
@@ -468,7 +471,8 @@ void smc_tx_consumer_update(struct smc_connection *conn)
 		if (!rc)
 			rc = smc_cdc_msg_send(conn, wr_buf, pend);
 		if (rc < 0) {
-			schedule_work(&conn->tx_work);
+			schedule_delayed_work(&conn->tx_work,
+					      SMC_TX_WORK_DELAY);
 			return;
 		}
 		smc_curs_write(&conn->rx_curs_confirmed,
@@ -487,6 +491,6 @@ void smc_tx_consumer_update(struct smc_connection *conn)
 void smc_tx_init(struct smc_sock *smc)
 {
 	smc->sk.sk_write_space = smc_tx_write_space;
-	INIT_WORK(&smc->conn.tx_work, smc_tx_work);
+	INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
 	spin_lock_init(&smc->conn.send_lock);
 }

From 8c96feeeb39ba0b89c6121f71e8f7aa2a4d46671 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Thu, 21 Sep 2017 09:16:34 +0200
Subject: [PATCH 224/391] net/smc: no close wait in case of process shut down

Usually socket closing is delayed if there is still data available in
the send buffer to be transmitted. If a process is killed, the delay
should be avoided.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_close.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 5201bc103bd8..f0d16fb825f7 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -174,15 +174,15 @@ int smc_close_active(struct smc_sock *smc)
 {
 	struct smc_cdc_conn_state_flags *txflags =
 		&smc->conn.local_tx_ctrl.conn_state_flags;
-	long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT;
 	struct smc_connection *conn = &smc->conn;
 	struct sock *sk = &smc->sk;
 	int old_state;
+	long timeout;
 	int rc = 0;
 
-	if (sock_flag(sk, SOCK_LINGER) &&
-	    !(current->flags & PF_EXITING))
-		timeout = sk->sk_lingertime;
+	timeout = current->flags & PF_EXITING ?
+		  0 : sock_flag(sk, SOCK_LINGER) ?
+		      sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
 
 again:
 	old_state = sk->sk_state;
@@ -413,13 +413,14 @@ void smc_close_sock_put_work(struct work_struct *work)
 int smc_close_shutdown_write(struct smc_sock *smc)
 {
 	struct smc_connection *conn = &smc->conn;
-	long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT;
 	struct sock *sk = &smc->sk;
 	int old_state;
+	long timeout;
 	int rc = 0;
 
-	if (sock_flag(sk, SOCK_LINGER))
-		timeout = sk->sk_lingertime;
+	timeout = current->flags & PF_EXITING ?
+		  0 : sock_flag(sk, SOCK_LINGER) ?
+		      sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
 
 again:
 	old_state = sk->sk_state;

From e8b95728f724797f958912fd9b765a695595d3a6 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 1 Sep 2017 17:13:43 -0700
Subject: [PATCH 225/391] Input: uinput - avoid FF flush when destroying device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Normally, when input device supporting force feedback effects is being
destroyed, we try to "flush" currently playing effects, so that the
physical device does not continue vibrating (or executing other effects).
Unfortunately this does not work well for uinput as flushing of the effects
deadlocks with the destroy action:

- if device is being destroyed because the file descriptor is being closed,
  then there is noone to even service FF requests;

- if device is being destroyed because userspace sent UI_DEV_DESTROY,
  while theoretically it could be possible to service FF requests,
  userspace is unlikely to do so (they'd need to make sure FF handling
  happens on a separate thread) even if kernel solves the issue with FF
  ioctls deadlocking with UI_DEV_DESTROY ioctl on udev->mutex.

To avoid lockups like the one below, let's install a custom input device
flush handler, and avoid trying to flush force feedback effects when we
destroying the device, and instead rely on uinput to shut off the device
properly.

NMI watchdog: Watchdog detected hard LOCKUP on cpu 3
...
 <<EOE>>  [<ffffffff817a0307>] _raw_spin_lock_irqsave+0x37/0x40
 [<ffffffff810e633d>] complete+0x1d/0x50
 [<ffffffffa00ba08c>] uinput_request_done+0x3c/0x40 [uinput]
 [<ffffffffa00ba587>] uinput_request_submit.part.7+0x47/0xb0 [uinput]
 [<ffffffffa00bb62b>] uinput_dev_erase_effect+0x5b/0x76 [uinput]
 [<ffffffff815d91ad>] erase_effect+0xad/0xf0
 [<ffffffff815d929d>] flush_effects+0x4d/0x90
 [<ffffffff815d4cc0>] input_flush_device+0x40/0x60
 [<ffffffff815daf1c>] evdev_cleanup+0xac/0xc0
 [<ffffffff815daf5b>] evdev_disconnect+0x2b/0x60
 [<ffffffff815d74ac>] __input_unregister_device+0xac/0x150
 [<ffffffff815d75f7>] input_unregister_device+0x47/0x70
 [<ffffffffa00bac45>] uinput_destroy_device+0xb5/0xc0 [uinput]
 [<ffffffffa00bb2de>] uinput_ioctl_handler.isra.9+0x65e/0x740 [uinput]
 [<ffffffff811231ab>] ? do_futex+0x12b/0xad0
 [<ffffffffa00bb3f8>] uinput_ioctl+0x18/0x20 [uinput]
 [<ffffffff81241248>] do_vfs_ioctl+0x298/0x480
 [<ffffffff81337553>] ? security_file_ioctl+0x43/0x60
 [<ffffffff812414a9>] SyS_ioctl+0x79/0x90
 [<ffffffff817a04ee>] entry_SYSCALL_64_fastpath+0x12/0x71

Reported-by: Rodrigo Rivas Costa <rodrigorivascosta@gmail.com>
Reported-by: Clément VUCHENER <clement.vuchener@gmail.com>
Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=193741
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/ff-core.c     | 13 ++++++++++---
 drivers/input/misc/uinput.c | 18 ++++++++++++++++++
 include/linux/input.h       |  1 +
 3 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/drivers/input/ff-core.c b/drivers/input/ff-core.c
index 8f2042432c85..66a46c84e28f 100644
--- a/drivers/input/ff-core.c
+++ b/drivers/input/ff-core.c
@@ -237,9 +237,15 @@ int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file)
 EXPORT_SYMBOL_GPL(input_ff_erase);
 
 /*
- * flush_effects - erase all effects owned by a file handle
+ * input_ff_flush - erase all effects owned by a file handle
+ * @dev: input device to erase effect from
+ * @file: purported owner of the effects
+ *
+ * This function erases all force-feedback effects associated with
+ * the given owner from specified device. Note that @file may be %NULL,
+ * in which case all effects will be erased.
  */
-static int flush_effects(struct input_dev *dev, struct file *file)
+int input_ff_flush(struct input_dev *dev, struct file *file)
 {
 	struct ff_device *ff = dev->ff;
 	int i;
@@ -255,6 +261,7 @@ static int flush_effects(struct input_dev *dev, struct file *file)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(input_ff_flush);
 
 /**
  * input_ff_event() - generic handler for force-feedback events
@@ -343,7 +350,7 @@ int input_ff_create(struct input_dev *dev, unsigned int max_effects)
 	mutex_init(&ff->mutex);
 
 	dev->ff = ff;
-	dev->flush = flush_effects;
+	dev->flush = input_ff_flush;
 	dev->event = input_ff_event;
 	__set_bit(EV_FF, dev->evbit);
 
diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index 022be0e22eba..2cff40be8860 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -230,6 +230,18 @@ static int uinput_dev_erase_effect(struct input_dev *dev, int effect_id)
 	return uinput_request_submit(udev, &request);
 }
 
+static int uinput_dev_flush(struct input_dev *dev, struct file *file)
+{
+	/*
+	 * If we are called with file == NULL that means we are tearing
+	 * down the device, and therefore we can not handle FF erase
+	 * requests: either we are handling UI_DEV_DESTROY (and holding
+	 * the udev->mutex), or the file descriptor is closed and there is
+	 * nobody on the other side anymore.
+	 */
+	return file ? input_ff_flush(dev, file) : 0;
+}
+
 static void uinput_destroy_device(struct uinput_device *udev)
 {
 	const char *name, *phys;
@@ -297,6 +309,12 @@ static int uinput_create_device(struct uinput_device *udev)
 		dev->ff->playback = uinput_dev_playback;
 		dev->ff->set_gain = uinput_dev_set_gain;
 		dev->ff->set_autocenter = uinput_dev_set_autocenter;
+		/*
+		 * The standard input_ff_flush() implementation does
+		 * not quite work for uinput as we can't reasonably
+		 * handle FF requests during device teardown.
+		 */
+		dev->flush = uinput_dev_flush;
 	}
 
 	error = input_register_device(udev->dev);
diff --git a/include/linux/input.h b/include/linux/input.h
index a65e3b24fb18..fb5e23c7ed98 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -529,6 +529,7 @@ int input_ff_event(struct input_dev *dev, unsigned int type, unsigned int code,
 
 int input_ff_upload(struct input_dev *dev, struct ff_effect *effect, struct file *file);
 int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file);
+int input_ff_flush(struct input_dev *dev, struct file *file);
 
 int input_ff_create_memless(struct input_dev *dev, void *data,
 		int (*play_effect)(struct input_dev *, void *, struct ff_effect *));

From 6b4877c7bdc6ae39ce03716df7caeecf204697eb Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 6 Sep 2017 16:22:59 -0700
Subject: [PATCH 226/391] Input: uinput - avoid crash when sending FF request
 to device going away

If FF request comes in while uinput device is going away,
uinput_request_send() will fail with -ENODEV, and uinput_request_submit()
will attempt to mark the slot as unused by calling uinput_request_done().
Unfortunately in this case we haven't initialized request->done completion
yet, and we get a crash:

[   39.402036] BUG: spinlock bad magic on CPU#1, fftest/3108
[   39.402046]  lock: 0xffff88006a93bb00, .magic: 00000000, .owner: /39, .owner_cpu: 1217155072
[   39.402055] CPU: 1 PID: 3108 Comm: fftest Tainted: G        W 4.13.0+ #15
[   39.402059] Hardware name: LENOVO 20HQS0EG02/20HQS0EG02, BIOS N1MET37W (1.22 ) 07/04/2017
[   39.402064]  0000000000000086 f0fad82f3ceaa120 ffff88006a93b9a0 ffffffff9de941bb
[   39.402077]  ffff88026df8ae00 ffff88006a93bb00 ffff88006a93b9c0 ffffffff9dca62b7
[   39.402088]  ffff88006a93bb00 ffff88006a93baf8 ffff88006a93b9e0 ffffffff9dca62e7
[   39.402099] Call Trace:
[   39.402112]  [<ffffffff9de941bb>] dump_stack+0x4d/0x63
[   39.402123]  [<ffffffff9dca62b7>] spin_dump+0x97/0x9c
[   39.402130]  [<ffffffff9dca62e7>] spin_bug+0x2b/0x2d
[   39.402138]  [<ffffffff9dca6373>] do_raw_spin_lock+0x28/0xfd
[   39.402147]  [<ffffffff9e3055cd>] _raw_spin_lock_irqsave+0x19/0x1f
[   39.402154]  [<ffffffff9dca05b7>] complete+0x1d/0x48
[   39.402162]  [<ffffffffc04f30af>] 0xffffffffc04f30af
[   39.402167]  [<ffffffffc04f468c>] 0xffffffffc04f468c
[   39.402177]  [<ffffffff9dd59c16>] ? __slab_free+0x22f/0x359
[   39.402184]  [<ffffffff9dcc13e9>] ? tk_clock_read+0xc/0xe
[   39.402189]  [<ffffffffc04f471f>] 0xffffffffc04f471f
[   39.402195]  [<ffffffff9dc9ffe5>] ? __wake_up+0x44/0x4b
[   39.402200]  [<ffffffffc04f3240>] ? 0xffffffffc04f3240
[   39.402207]  [<ffffffff9e0f57f3>] erase_effect+0xa1/0xd2
[   39.402214]  [<ffffffff9e0f58c6>] input_ff_flush+0x43/0x5c
[   39.402219]  [<ffffffffc04f32ad>] 0xffffffffc04f32ad
[   39.402227]  [<ffffffff9e0f174f>] input_flush_device+0x3d/0x51
[   39.402234]  [<ffffffff9e0f69ae>] evdev_flush+0x49/0x5c
[   39.402243]  [<ffffffff9dd62d6e>] filp_close+0x3f/0x65
[   39.402253]  [<ffffffff9dd7dcf7>] put_files_struct+0x66/0xc1
[   39.402261]  [<ffffffff9dd7ddeb>] exit_files+0x47/0x4e
[   39.402270]  [<ffffffff9dc6b329>] do_exit+0x483/0x969
[   39.402278]  [<ffffffff9dc73211>] ? recalc_sigpending_tsk+0x3d/0x44
[   39.402285]  [<ffffffff9dc6c7a2>] do_group_exit+0x42/0xb0
[   39.402293]  [<ffffffff9dc767e1>] get_signal+0x58d/0x5bf
[   39.402300]  [<ffffffff9dc03701>] do_signal+0x37/0x53e
[   39.402307]  [<ffffffff9e0f8401>] ? evdev_ioctl_handler+0xac8/0xb04
[   39.402314]  [<ffffffff9e0f8464>] ? evdev_ioctl+0x10/0x12
[   39.402321]  [<ffffffff9dd74cfa>] ? do_vfs_ioctl+0x42e/0x501
[   39.402328]  [<ffffffff9dc0170e>] prepare_exit_to_usermode+0x66/0x90
[   39.402333]  [<ffffffff9dc0181b>] syscall_return_slowpath+0xe3/0xec
[   39.402339]  [<ffffffff9e305b7b>] int_ret_from_sys_call+0x25/0x8f

While we could solve this by simply initializing the completion earlier, we
are better off rearranging the code a bit so we avoid calling complete() on
requests that we did not send out. This patch consolidates marking request
slots as free in one place (in uinput_request_submit(), the same place
where we acquire them) and having everyone else simply signal completion
of the requests.

Fixes: 00ce756ce53a ("Input: uinput - mark failed submission requests as free")
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/misc/uinput.c | 39 ++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index 2cff40be8860..443151de90c6 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -98,14 +98,15 @@ static int uinput_request_reserve_slot(struct uinput_device *udev,
 					uinput_request_alloc_id(udev, request));
 }
 
-static void uinput_request_done(struct uinput_device *udev,
-				struct uinput_request *request)
+static void uinput_request_release_slot(struct uinput_device *udev,
+					unsigned int id)
 {
 	/* Mark slot as available */
-	udev->requests[request->id] = NULL;
-	wake_up(&udev->requests_waitq);
+	spin_lock(&udev->requests_lock);
+	udev->requests[id] = NULL;
+	spin_unlock(&udev->requests_lock);
 
-	complete(&request->done);
+	wake_up(&udev->requests_waitq);
 }
 
 static int uinput_request_send(struct uinput_device *udev,
@@ -138,20 +139,22 @@ static int uinput_request_send(struct uinput_device *udev,
 static int uinput_request_submit(struct uinput_device *udev,
 				 struct uinput_request *request)
 {
-	int error;
+	int retval;
 
-	error = uinput_request_reserve_slot(udev, request);
-	if (error)
-		return error;
+	retval = uinput_request_reserve_slot(udev, request);
+	if (retval)
+		return retval;
 
-	error = uinput_request_send(udev, request);
-	if (error) {
-		uinput_request_done(udev, request);
-		return error;
-	}
+	retval = uinput_request_send(udev, request);
+	if (retval)
+		goto out;
 
 	wait_for_completion(&request->done);
-	return request->retval;
+	retval = request->retval;
+
+ out:
+	uinput_request_release_slot(udev, request->id);
+	return retval;
 }
 
 /*
@@ -169,7 +172,7 @@ static void uinput_flush_requests(struct uinput_device *udev)
 		request = udev->requests[i];
 		if (request) {
 			request->retval = -ENODEV;
-			uinput_request_done(udev, request);
+			complete(&request->done);
 		}
 	}
 
@@ -957,7 +960,7 @@ static long uinput_ioctl_handler(struct file *file, unsigned int cmd,
 			}
 
 			req->retval = ff_up.retval;
-			uinput_request_done(udev, req);
+			complete(&req->done);
 			goto out;
 
 		case UI_END_FF_ERASE:
@@ -973,7 +976,7 @@ static long uinput_ioctl_handler(struct file *file, unsigned int cmd,
 			}
 
 			req->retval = ff_erase.retval;
-			uinput_request_done(udev, req);
+			complete(&req->done);
 			goto out;
 	}
 

From 127b8e2692b6c145955bb71e482ca1bc6ce10027 Mon Sep 17 00:00:00 2001
From: Gabriel Fernandez <gabriel.fernandez@st.com>
Date: Tue, 19 Sep 2017 13:44:06 +0200
Subject: [PATCH 227/391] dt-bindings: clk: stm32h7: fix clock-cell size

The clock-cell size is 1 on stm32h7 plaform.

Signed-off-by: Gabriel Fernandez <gabriel.fernandez@st.com>
Fixes: 3e4d618b0722 ("clk: stm32h7: Add stm32h743 clock driver")
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/clock/st,stm32h7-rcc.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/clock/st,stm32h7-rcc.txt b/Documentation/devicetree/bindings/clock/st,stm32h7-rcc.txt
index a135504c7d57..cac24ee10b72 100644
--- a/Documentation/devicetree/bindings/clock/st,stm32h7-rcc.txt
+++ b/Documentation/devicetree/bindings/clock/st,stm32h7-rcc.txt
@@ -32,7 +32,7 @@ Example:
 		compatible = "st,stm32h743-rcc", "st,stm32-rcc";
 		reg = <0x58024400 0x400>;
 		#reset-cells = <1>;
-		#clock-cells = <2>;
+		#clock-cells = <1>;
 		clocks = <&clk_hse>, <&clk_lse>, <&clk_i2s_ckin>;
 
 		st,syscfg = <&pwrcfg>;

From 059fbe8b5171960bd5c2371bb327ac18733773de Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 21 Sep 2017 13:27:02 +0200
Subject: [PATCH 228/391] net: phy: Fix truncation of large IRQ numbers in
 phy_attached_print()

Given NR_IRQS is 2048 on sparc64, and even 32784 on alpha, 3 digits is
not enough to represent interrupt numbers on all architectures.  Hence
PHY interrupt numbers may be truncated during printing.

Increase the buffer size from 4 to 8 bytes to fix this.

Fixes: 5e369aefdce4818c ("net: stmmac: Delete dead code for MDIO registration")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 8cf0c5901f95..67f25ac29025 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -879,7 +879,7 @@ void phy_attached_print(struct phy_device *phydev, const char *fmt, ...)
 {
 	const char *drv_name = phydev->drv ? phydev->drv->name : "unbound";
 	char *irq_str;
-	char irq_num[4];
+	char irq_num[8];
 
 	switch(phydev->irq) {
 	case PHY_POLL:

From 222d7dbd258dad4cd5241c43ef818141fad5a87a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 21 Sep 2017 09:15:46 -0700
Subject: [PATCH 229/391] net: prevent dst uses after free
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In linux-4.13, Wei worked hard to convert dst to a traditional
refcounted model, removing GC.

We now want to make sure a dst refcount can not transition from 0 back
to 1.

The problem here is that input path attached a not refcounted dst to an
skb. Then later, because packet is forwarded and hits skb_dst_force()
before exiting RCU section, we might try to take a refcount on one dst
that is about to be freed, if another cpu saw 1 -> 0 transition in
dst_release() and queued the dst for freeing after one RCU grace period.

Lets unify skb_dst_force() and skb_dst_force_safe(), since we should
always perform the complete check against dst refcount, and not assume
it is not zero.

Bugzilla : https://bugzilla.kernel.org/show_bug.cgi?id=197005

[  989.919496]  skb_dst_force+0x32/0x34
[  989.919498]  __dev_queue_xmit+0x1ad/0x482
[  989.919501]  ? eth_header+0x28/0xc6
[  989.919502]  dev_queue_xmit+0xb/0xd
[  989.919504]  neigh_connected_output+0x9b/0xb4
[  989.919507]  ip_finish_output2+0x234/0x294
[  989.919509]  ? ipt_do_table+0x369/0x388
[  989.919510]  ip_finish_output+0x12c/0x13f
[  989.919512]  ip_output+0x53/0x87
[  989.919513]  ip_forward_finish+0x53/0x5a
[  989.919515]  ip_forward+0x2cb/0x3e6
[  989.919516]  ? pskb_trim_rcsum.part.9+0x4b/0x4b
[  989.919518]  ip_rcv_finish+0x2e2/0x321
[  989.919519]  ip_rcv+0x26f/0x2eb
[  989.919522]  ? vlan_do_receive+0x4f/0x289
[  989.919523]  __netif_receive_skb_core+0x467/0x50b
[  989.919526]  ? tcp_gro_receive+0x239/0x239
[  989.919529]  ? inet_gro_receive+0x226/0x238
[  989.919530]  __netif_receive_skb+0x4d/0x5f
[  989.919532]  netif_receive_skb_internal+0x5c/0xaf
[  989.919533]  napi_gro_receive+0x45/0x81
[  989.919536]  ixgbe_poll+0xc8a/0xf09
[  989.919539]  ? kmem_cache_free_bulk+0x1b6/0x1f7
[  989.919540]  net_rx_action+0xf4/0x266
[  989.919543]  __do_softirq+0xa8/0x19d
[  989.919545]  irq_exit+0x5d/0x6b
[  989.919546]  do_IRQ+0x9c/0xb5
[  989.919548]  common_interrupt+0x93/0x93
[  989.919548]  </IRQ>

Similarly dst_clone() can use dst_hold() helper to have additional
debugging, as a follow up to commit 44ebe79149ff ("net: add debug
atomic_inc_not_zero() in dst_hold()")

In net-next we will convert dst atomic_t to refcount_t for peace of
mind.

Fixes: a4c2fd7f7891 ("net: remove DST_NOCACHE flag")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Wei Wang <weiwan@google.com>
Reported-by: Paweł Staszewski <pstaszewski@itcare.pl>
Bisected-by: Paweł Staszewski <pstaszewski@itcare.pl>
Acked-by: Wei Wang <weiwan@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h   | 22 ++++------------------
 include/net/route.h |  2 +-
 include/net/sock.h  |  2 +-
 3 files changed, 6 insertions(+), 20 deletions(-)

diff --git a/include/net/dst.h b/include/net/dst.h
index 93568bd0a352..06a6765da074 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -271,7 +271,7 @@ static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
 static inline struct dst_entry *dst_clone(struct dst_entry *dst)
 {
 	if (dst)
-		atomic_inc(&dst->__refcnt);
+		dst_hold(dst);
 	return dst;
 }
 
@@ -311,21 +311,6 @@ static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb
 	__skb_dst_copy(nskb, oskb->_skb_refdst);
 }
 
-/**
- * skb_dst_force - makes sure skb dst is refcounted
- * @skb: buffer
- *
- * If dst is not yet refcounted, let's do it
- */
-static inline void skb_dst_force(struct sk_buff *skb)
-{
-	if (skb_dst_is_noref(skb)) {
-		WARN_ON(!rcu_read_lock_held());
-		skb->_skb_refdst &= ~SKB_DST_NOREF;
-		dst_clone(skb_dst(skb));
-	}
-}
-
 /**
  * dst_hold_safe - Take a reference on a dst if possible
  * @dst: pointer to dst entry
@@ -339,16 +324,17 @@ static inline bool dst_hold_safe(struct dst_entry *dst)
 }
 
 /**
- * skb_dst_force_safe - makes sure skb dst is refcounted
+ * skb_dst_force - makes sure skb dst is refcounted
  * @skb: buffer
  *
  * If dst is not yet refcounted and not destroyed, grab a ref on it.
  */
-static inline void skb_dst_force_safe(struct sk_buff *skb)
+static inline void skb_dst_force(struct sk_buff *skb)
 {
 	if (skb_dst_is_noref(skb)) {
 		struct dst_entry *dst = skb_dst(skb);
 
+		WARN_ON(!rcu_read_lock_held());
 		if (!dst_hold_safe(dst))
 			dst = NULL;
 
diff --git a/include/net/route.h b/include/net/route.h
index 1b09a9368c68..57dfc6850d37 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -190,7 +190,7 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
 	rcu_read_lock();
 	err = ip_route_input_noref(skb, dst, src, tos, devin);
 	if (!err) {
-		skb_dst_force_safe(skb);
+		skb_dst_force(skb);
 		if (!skb_dst(skb))
 			err = -EINVAL;
 	}
diff --git a/include/net/sock.h b/include/net/sock.h
index 03a362568357..a6b9a8d1a6df 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -856,7 +856,7 @@ void sk_stream_write_space(struct sock *sk);
 static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
 	/* dont let skb dst not refcounted, we are going to leave rcu lock */
-	skb_dst_force_safe(skb);
+	skb_dst_force(skb);
 
 	if (!sk->sk_backlog.tail)
 		sk->sk_backlog.head = skb;

From e87be9b29c22852ec300826e3b1d551b00c1eb7a Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Thu, 14 Sep 2017 14:30:43 +0200
Subject: [PATCH 230/391] mmc: tmio: remove broken and noisy debug macro

Some change for v4.14 broke the debug output for TMIO. But since it was
not helpful to me and too noisy for my taste anyhow, let's just remove
it instead of fixing it. We'll find something better if we'd need it...

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/tmio_mmc_core.c | 47 --------------------------------
 1 file changed, 47 deletions(-)

diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c
index 12cf8288d663..a7293e186e03 100644
--- a/drivers/mmc/host/tmio_mmc_core.c
+++ b/drivers/mmc/host/tmio_mmc_core.c
@@ -129,50 +129,6 @@ static int tmio_mmc_next_sg(struct tmio_mmc_host *host)
 
 #define CMDREQ_TIMEOUT	5000
 
-#ifdef CONFIG_MMC_DEBUG
-
-#define STATUS_TO_TEXT(a, status, i) \
-	do { \
-		if ((status) & TMIO_STAT_##a) { \
-			if ((i)++) \
-				printk(KERN_DEBUG " | "); \
-			printk(KERN_DEBUG #a); \
-		} \
-	} while (0)
-
-static void pr_debug_status(u32 status)
-{
-	int i = 0;
-
-	pr_debug("status: %08x = ", status);
-	STATUS_TO_TEXT(CARD_REMOVE, status, i);
-	STATUS_TO_TEXT(CARD_INSERT, status, i);
-	STATUS_TO_TEXT(SIGSTATE, status, i);
-	STATUS_TO_TEXT(WRPROTECT, status, i);
-	STATUS_TO_TEXT(CARD_REMOVE_A, status, i);
-	STATUS_TO_TEXT(CARD_INSERT_A, status, i);
-	STATUS_TO_TEXT(SIGSTATE_A, status, i);
-	STATUS_TO_TEXT(CMD_IDX_ERR, status, i);
-	STATUS_TO_TEXT(STOPBIT_ERR, status, i);
-	STATUS_TO_TEXT(ILL_FUNC, status, i);
-	STATUS_TO_TEXT(CMD_BUSY, status, i);
-	STATUS_TO_TEXT(CMDRESPEND, status, i);
-	STATUS_TO_TEXT(DATAEND, status, i);
-	STATUS_TO_TEXT(CRCFAIL, status, i);
-	STATUS_TO_TEXT(DATATIMEOUT, status, i);
-	STATUS_TO_TEXT(CMDTIMEOUT, status, i);
-	STATUS_TO_TEXT(RXOVERFLOW, status, i);
-	STATUS_TO_TEXT(TXUNDERRUN, status, i);
-	STATUS_TO_TEXT(RXRDY, status, i);
-	STATUS_TO_TEXT(TXRQ, status, i);
-	STATUS_TO_TEXT(ILL_ACCESS, status, i);
-	printk("\n");
-}
-
-#else
-#define pr_debug_status(s)  do { } while (0)
-#endif
-
 static void tmio_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
 {
 	struct tmio_mmc_host *host = mmc_priv(mmc);
@@ -762,9 +718,6 @@ irqreturn_t tmio_mmc_irq(int irq, void *devid)
 	status = sd_ctrl_read16_and_16_as_32(host, CTL_STATUS);
 	ireg = status & TMIO_MASK_IRQ & ~host->sdcard_irq_mask;
 
-	pr_debug_status(status);
-	pr_debug_status(ireg);
-
 	/* Clear the status except the interrupt status */
 	sd_ctrl_write32_as_16_and_16(host, CTL_STATUS, TMIO_MASK_IRQ);
 

From 6ae033689d7b1a419def78e8e990b0eab8bb6419 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 18 Sep 2017 15:16:08 +0300
Subject: [PATCH 231/391] mmc: sdhci-pci: Fix voltage switch for some Intel
 host controllers

Some Intel host controllers (e.g. CNP) use an ACPI device-specific method
to ensure correct voltage switching. Fix voltage switch for those, by
adding a call to the DSM.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-pci-core.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index bbaddf18a1b3..d0ccc6729fd2 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -392,6 +392,7 @@ static const struct sdhci_pci_fixes sdhci_intel_pch_sdio = {
 
 enum {
 	INTEL_DSM_FNS		=  0,
+	INTEL_DSM_V18_SWITCH	=  3,
 	INTEL_DSM_DRV_STRENGTH	=  9,
 	INTEL_DSM_D3_RETUNE	= 10,
 };
@@ -557,6 +558,19 @@ static void intel_hs400_enhanced_strobe(struct mmc_host *mmc,
 	sdhci_writel(host, val, INTEL_HS400_ES_REG);
 }
 
+static void sdhci_intel_voltage_switch(struct sdhci_host *host)
+{
+	struct sdhci_pci_slot *slot = sdhci_priv(host);
+	struct intel_host *intel_host = sdhci_pci_priv(slot);
+	struct device *dev = &slot->chip->pdev->dev;
+	u32 result = 0;
+	int err;
+
+	err = intel_dsm(intel_host, dev, INTEL_DSM_V18_SWITCH, &result);
+	pr_debug("%s: %s DSM error %d result %u\n",
+		 mmc_hostname(host->mmc), __func__, err, result);
+}
+
 static const struct sdhci_ops sdhci_intel_byt_ops = {
 	.set_clock		= sdhci_set_clock,
 	.set_power		= sdhci_intel_set_power,
@@ -565,6 +579,7 @@ static const struct sdhci_ops sdhci_intel_byt_ops = {
 	.reset			= sdhci_reset,
 	.set_uhs_signaling	= sdhci_set_uhs_signaling,
 	.hw_reset		= sdhci_pci_hw_reset,
+	.voltage_switch		= sdhci_intel_voltage_switch,
 };
 
 static void byt_read_dsm(struct sdhci_pci_slot *slot)

From c0d05cde2a685cd6486390c62be684ce456d84d6 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 21 Sep 2017 11:20:58 +0100
Subject: [PATCH 232/391] iommu/of: Remove PCI host bridge node check

of_pci_iommu_init() tries to be clever and stop its alias walk at the
device represented by master_np, in case of weird PCI topologies where
the bridge to the IOMMU and the rest of the system is not at the root.
It turns out this is a bit short-sighted, since there are plenty of
other callers of pci_for_each_dma_alias() which would also need the same
behaviour in that situation, and the only platform so far with such a
topology (Cavium ThunderX2) already solves it more generally via a PCI
quirk. As this check is effectively redundant, and returning a boolean
value as an int is a bit broken anyway, let's just get rid of it.

Reported-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Fixes: d87beb749281 ("iommu/of: Handle PCI aliases properly")
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/of_iommu.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index e60e3dba85a0..50947ebb6d17 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -157,10 +157,7 @@ static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data)
 
 	err = of_iommu_xlate(info->dev, &iommu_spec);
 	of_node_put(iommu_spec.np);
-	if (err)
-		return err;
-
-	return info->np == pdev->bus->dev.of_node;
+	return err;
 }
 
 const struct iommu_ops *of_iommu_configure(struct device *dev,

From a88dc7ba15cd4f4ef5102b5185f8fa7ff86e54e1 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Wed, 20 Sep 2017 12:26:38 +0530
Subject: [PATCH 233/391] drivers/perf: arm_pmu_acpi: Release memory obtained
 by kasprintf

Free memory region, if arm_pmu_acpi_probe is not successful.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 drivers/perf/arm_pmu_acpi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c
index 0a9b78705ee8..3303dd8d8eb5 100644
--- a/drivers/perf/arm_pmu_acpi.c
+++ b/drivers/perf/arm_pmu_acpi.c
@@ -235,6 +235,7 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn)
 		ret = armpmu_register(pmu);
 		if (ret) {
 			pr_warn("Failed to register PMU for CPU%d\n", cpu);
+			kfree(pmu->name);
 			return ret;
 		}
 	}

From e6f9bc34d3779cb7b6a337afed5de8be3f0fab77 Mon Sep 17 00:00:00 2001
From: Alex Estrin <alex.estrin@intel.com>
Date: Thu, 31 Aug 2017 09:30:34 -0700
Subject: [PATCH 234/391] IB/core: Fix for core panic

Build with the latest patches resulted in panic:
11384.486289] BUG: unable to handle kernel NULL pointer dereference at
         (null)
[11384.486293] IP:           (null)
[11384.486295] PGD 0
[11384.486295] P4D 0
[11384.486296]
[11384.486299] Oops: 0010 [#1] SMP
......... snip ......
[11384.486401] CPU: 0 PID: 968 Comm: kworker/0:1H Tainted: G        W  O
    4.13.0-a-stream-20170825 #1
[11384.486402] Hardware name: Intel Corporation S2600WT2R/S2600WT2R,
BIOS SE5C610.86B.01.01.0014.121820151719 12/18/2015
[11384.486418] Workqueue: ib-comp-wq ib_cq_poll_work [ib_core]
[11384.486419] task: ffff880850579680 task.stack: ffffc90007fec000
[11384.486420] RIP: 0010:          (null)
[11384.486420] RSP: 0018:ffffc90007fef970 EFLAGS: 00010206
[11384.486421] RAX: ffff88084cfe8000 RBX: ffff88084dce4000 RCX:
ffffc90007fef978
[11384.486422] RDX: 0000000000000000 RSI: 0000000000000001 RDI:
ffff88084cfe8000
[11384.486422] RBP: ffffc90007fefab0 R08: 0000000000000000 R09:
ffff88084dce4080
[11384.486423] R10: ffffffffa02d7f60 R11: 0000000000000000 R12:
ffff88105af65a00
[11384.486423] R13: ffff88084dce4000 R14: 000000000000c000 R15:
000000000000c000
[11384.486424] FS:  0000000000000000(0000) GS:ffff88085f400000(0000)
knlGS:0000000000000000
[11384.486425] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[11384.486425] CR2: 0000000000000000 CR3: 0000000001c09000 CR4:
00000000001406f0
[11384.486426] Call Trace:
[11384.486431]  ? is_valid_mcast_lid.isra.21+0xfb/0x110 [ib_core]
[11384.486436]  ib_attach_mcast+0x6f/0xa0 [ib_core]
[11384.486441]  ipoib_mcast_attach+0x81/0x190 [ib_ipoib]
[11384.486443]  ipoib_mcast_join_complete+0x354/0xb40 [ib_ipoib]
[11384.486448]  mcast_work_handler+0x330/0x6c0 [ib_core]
[11384.486452]  join_handler+0x101/0x220 [ib_core]
[11384.486455]  ib_sa_mcmember_rec_callback+0x54/0x80 [ib_core]
[11384.486459]  recv_handler+0x3a/0x60 [ib_core]
[11384.486462]  ib_mad_recv_done+0x423/0x9b0 [ib_core]
[11384.486466]  __ib_process_cq+0x5d/0xb0 [ib_core]
[11384.486469]  ib_cq_poll_work+0x20/0x60 [ib_core]
[11384.486472]  process_one_work+0x149/0x360
[11384.486474]  worker_thread+0x4d/0x3c0
[11384.486487]  kthread+0x109/0x140
[11384.486488]  ? rescuer_thread+0x380/0x380
[11384.486489]  ? kthread_park+0x60/0x60
[11384.486490]  ? kthread_park+0x60/0x60
[11384.486493]  ret_from_fork+0x25/0x30
[11384.486493] Code:  Bad RIP value.
[11384.486493] Code:  Bad RIP value.
[11384.486496] RIP:           (null) RSP: ffffc90007fef970
[11384.486497] CR2: 0000000000000000
[11384.486531] ---[ end trace b1acec6fb4ff6e75 ]---
[11384.532133] Kernel panic - not syncing: Fatal exception
[11384.536541] Kernel Offset: disabled
[11384.969491] ---[ end Kernel panic - not syncing: Fatal exception
[11384.976875] sched: Unexpected reschedule of offline CPU#1!
[11384.983646] ------------[ cut here ]------------

Rdma device driver may not have implemented (*get_link_layer)()
so it can not be called directly. Should use appropriate helper function.

Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
Fixes: 523633359224 ("IB/core: Fix the validations of a multicast LID in attach or detach operations")
Cc: stable@kernel.org # 4.13
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Alex Estrin <alex.estrin@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/verbs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index ee9e27dc799b..de57d6c11a25 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1646,7 +1646,7 @@ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid)
 	 */
 	if (!ib_query_qp(qp, &attr, IB_QP_STATE | IB_QP_PORT, &init_attr)) {
 		if (attr.qp_state >= IB_QPS_INIT) {
-			if (qp->device->get_link_layer(qp->device, attr.port_num) !=
+			if (rdma_port_get_link_layer(qp->device, attr.port_num) !=
 			    IB_LINK_LAYER_INFINIBAND)
 				return true;
 			goto lid_check;
@@ -1655,7 +1655,7 @@ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid)
 
 	/* Can't get a quick answer, iterate over all ports */
 	for (port = 0; port < qp->device->phys_port_cnt; port++)
-		if (qp->device->get_link_layer(qp->device, port) !=
+		if (rdma_port_get_link_layer(qp->device, port) !=
 		    IB_LINK_LAYER_INFINIBAND)
 			num_eth_ports++;
 

From 3d318605f5e32ff44fb290d9b67573b34213c4c8 Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Wed, 13 Sep 2017 09:52:32 -0700
Subject: [PATCH 235/391] iw_cxgb4: put ep reference in pass_accept_req()

The listening endpoint should always be dereferenced at the end of
pass_accept_req().

Fixes: f86fac79afec ("RDMA/iw_cxgb4: atomic find and reference for listening endpoints")

Cc: stable@vger.kernel.org
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/cxgb4/cm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index ceaa2fa54d32..83322dbc4711 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -2594,9 +2594,9 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
 	c4iw_put_ep(&child_ep->com);
 reject:
 	reject_cr(dev, hwtid, skb);
+out:
 	if (parent_ep)
 		c4iw_put_ep(&parent_ep->com);
-out:
 	return 0;
 }
 

From 3c8415cc7aff467faba25841fb859660ac14a04e Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Tue, 5 Sep 2017 11:52:33 -0700
Subject: [PATCH 236/391] iw_cxgb4: drop listen destroy replies if no ep found

If the thread waiting for a CLOSE_LISTSRV_RPL times out and bails,
then we need to handle a subsequent CPL if it arrives and the stid has
been released.  In this case silently drop it.

Cc: stable@vger.kernel.org
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/cxgb4/cm.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 83322dbc4711..19297e408820 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -2333,9 +2333,14 @@ static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
 	unsigned int stid = GET_TID(rpl);
 	struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
 
+	if (!ep) {
+		pr_debug("%s stid %d lookup failure!\n", __func__, stid);
+		goto out;
+	}
 	pr_debug("%s ep %p\n", __func__, ep);
 	c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
 	c4iw_put_ep(&ep->com);
+out:
 	return 0;
 }
 

From 8b1bbf36b7452c4acb20e91948eaa5e225ea6978 Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Tue, 5 Sep 2017 11:52:34 -0700
Subject: [PATCH 237/391] iw_cxgb4: remove the stid on listen create failure

If a listen create fails, then the server tid (stid) is incorrectly left
in the stid idr table, which can cause a touch-after-free if the stid
is looked up and the already freed endpoint is touched.  So make sure
and remove it in the error path.

Cc: stable@vger.kernel.org
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/cxgb4/cm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 19297e408820..daf7a56e5d7e 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -3462,7 +3462,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
 		cm_id->provider_data = ep;
 		goto out;
 	}
-
+	remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid);
 	cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
 			ep->com.local_addr.ss_family);
 fail2:

From 05f5c38576475439f3124a3e6d62db68de83f7be Mon Sep 17 00:00:00 2001
From: KT Liao <kt.liao@emc.com.tw>
Date: Fri, 22 Sep 2017 10:00:57 -0700
Subject: [PATCH 238/391] Input: elan_i2c - extend Flash-Write delay

The original 20ms delay is only marginally enough delay after a block write
operation during firmware update. Let's increase the delay to ensure that
the controller finishes up storing the page to avoid failures in the
firmware updates.

Signed-off-by: KT Liao <kt.liao@emc.com.tw>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/elan_i2c_i2c.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/mouse/elan_i2c_i2c.c b/drivers/input/mouse/elan_i2c_i2c.c
index 15b1330606c1..e19eb60b3d2f 100644
--- a/drivers/input/mouse/elan_i2c_i2c.c
+++ b/drivers/input/mouse/elan_i2c_i2c.c
@@ -598,7 +598,7 @@ static int elan_i2c_write_fw_block(struct i2c_client *client,
 	}
 
 	/* Wait for F/W to update one page ROM data. */
-	msleep(20);
+	msleep(35);
 
 	error = elan_i2c_read_cmd(client, ETP_I2C_IAP_CTRL_CMD, val);
 	if (error) {

From af3c79beff48fb2bd1c79d02688004cf57215acf Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Date: Thu, 7 Sep 2017 13:38:18 +0300
Subject: [PATCH 239/391] IB/ipoib: Suppress the retry related completion
 errors

IPoIB doesn't support transport/rnr retry schemes as per
RFC so those errors are expected. No need to flood the
log files with them.

Tested-by: Michael Nowak <michael.nowak@oracle.com>
Tested-by: Rafael Alejandro Peralez <rafael.peralez@oracle.com>
Tested-by: Liwen Huang <liwen.huang@oracle.com>
Tested-by: Hong Liu <hong.x.liu@oracle.com>
Reviewed-by: Mukesh Kacker <mukesh.kacker@oracle.com>
Reported-by: Rajiv Raja <rajiv.raja@oracle.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_cm.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 14b62f7472b4..7774654c2ccb 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -823,12 +823,18 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 	    wc->status != IB_WC_WR_FLUSH_ERR) {
 		struct ipoib_neigh *neigh;
 
-		if (wc->status != IB_WC_RNR_RETRY_EXC_ERR)
-			ipoib_warn(priv, "failed cm send event (status=%d, wrid=%d vend_err %x)\n",
-				   wc->status, wr_id, wc->vendor_err);
+		/* IB_WC[_RNR]_RETRY_EXC_ERR error is part of the life cycle,
+		 * so don't make waves.
+		 */
+		if (wc->status == IB_WC_RNR_RETRY_EXC_ERR ||
+		    wc->status == IB_WC_RETRY_EXC_ERR)
+			ipoib_dbg(priv,
+				  "%s: failed cm send event (status=%d, wrid=%d vend_err 0x%x)\n",
+				   __func__, wc->status, wr_id, wc->vendor_err);
 		else
-			ipoib_dbg(priv, "failed cm send event (status=%d, wrid=%d vend_err %x)\n",
-				  wc->status, wr_id, wc->vendor_err);
+			ipoib_warn(priv,
+				    "%s: failed cm send event (status=%d, wrid=%d vend_err 0x%x)\n",
+				   __func__, wc->status, wr_id, wc->vendor_err);
 
 		spin_lock_irqsave(&priv->lock, flags);
 		neigh = tx->neigh;

From 06564f60859bdf7e73d70ae35d7e285e96ae9c46 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 11 Sep 2017 17:03:13 +0100
Subject: [PATCH 240/391] IB/ocrdma: fix incorrect fall-through on switch
 statement

In the case where mbox_status is OCRDMA_MBX_STATUS_FAILED and
add_status is OCRDMA_MBX_STATUS_FAILED err_num is assigned -EAGAIN
however the case OCRDMA_MBX_STATUS_FAILED is missing a break and
falls through to the default case which then re-assigns err_num
to -EFAULT.   Fix this so that err_num is assigned to -EAGAIN
for the add_status OCRDMA_MBX_STATUS_FAILED case and -EFAULT
otherwise.

Detected by CoverityScan CID#703125 ("Missing break in switch")

Fixes: fe2caefcdf58 ("RDMA/ocrdma: Add driver for Emulex OneConnect IBoE RDMA adapter")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index dcb5942f9fb5..65b166cc7437 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -252,7 +252,10 @@ static int ocrdma_get_mbx_errno(u32 status)
 		case OCRDMA_MBX_ADDI_STATUS_INSUFFICIENT_RESOURCES:
 			err_num = -EAGAIN;
 			break;
+		default:
+			err_num = -EFAULT;
 		}
+		break;
 	default:
 		err_num = -EFAULT;
 	}

From cbafad87e1507044c7d442087d41d5e3d432cc4e Mon Sep 17 00:00:00 2001
From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Date: Mon, 18 Sep 2017 12:28:48 +0100
Subject: [PATCH 241/391] IB/mlx5: fix debugfs cleanup

If delay_drop_debugfs_init() fails in any of the operations to create
debugfs, it is calling delay_drop_debugfs_cleanup() as part of its
cleanup. But delay_drop_debugfs_cleanup() checks for 'dbg' and since
we have not yet pointed 'dbg' to the debugfs we need to cleanup, the
cleanup fails and we are left with stray debugfs elements and also a
memory leak.

Fixes: 4a5fd5d2965c ("IB/mlx5: Add necessary delay drop assignment")
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Acked-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx5/main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index ab3c562d5ba7..05fb4bdff6a0 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -3837,11 +3837,13 @@ static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev)
 	if (!dbg)
 		return -ENOMEM;
 
+	dev->delay_drop.dbg = dbg;
+
 	dbg->dir_debugfs =
 		debugfs_create_dir("delay_drop",
 				   dev->mdev->priv.dbg_root);
 	if (!dbg->dir_debugfs)
-		return -ENOMEM;
+		goto out_debugfs;
 
 	dbg->events_cnt_debugfs =
 		debugfs_create_atomic_t("num_timeout_events", 0400,
@@ -3865,8 +3867,6 @@ static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev)
 	if (!dbg->timeout_debugfs)
 		goto out_debugfs;
 
-	dev->delay_drop.dbg = dbg;
-
 	return 0;
 
 out_debugfs:

From e13547bc181ae6c279adf0df054717787f24ee89 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leon@kernel.org>
Date: Tue, 19 Sep 2017 13:22:13 +0300
Subject: [PATCH 242/391] IB/bnxt_re: Fix frame stack compilation warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reduce stack size by dynamically allocating memory instead
of declaring large struct on the stack:

drivers/infiniband/hw/bnxt_re/ib_verbs.c: In function ‘bnxt_re_query_qp’:
drivers/infiniband/hw/bnxt_re/ib_verbs.c:1600:1: warning: the frame size of 1216 bytes is larger than 1024 bytes [-Wframe-larger-than=]
 }
 ^

Cc: Selvin Xavier <selvin.xavier@broadcom.com>
Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver")
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Acked-by: Selvin Xavier <selvin.xavier@broadcom.com>
Reviewed-by: Jonathan Toppins <jtoppins@redhat.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 67 +++++++++++++-----------
 1 file changed, 36 insertions(+), 31 deletions(-)

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 01eee15bbd65..03caf48af8e2 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -1551,43 +1551,46 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 {
 	struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
 	struct bnxt_re_dev *rdev = qp->rdev;
-	struct bnxt_qplib_qp qplib_qp;
+	struct bnxt_qplib_qp *qplib_qp;
 	int rc;
 
-	memset(&qplib_qp, 0, sizeof(struct bnxt_qplib_qp));
-	qplib_qp.id = qp->qplib_qp.id;
-	qplib_qp.ah.host_sgid_index = qp->qplib_qp.ah.host_sgid_index;
+	qplib_qp = kzalloc(sizeof(*qplib_qp), GFP_KERNEL);
+	if (!qplib_qp)
+		return -ENOMEM;
 
-	rc = bnxt_qplib_query_qp(&rdev->qplib_res, &qplib_qp);
+	qplib_qp->id = qp->qplib_qp.id;
+	qplib_qp->ah.host_sgid_index = qp->qplib_qp.ah.host_sgid_index;
+
+	rc = bnxt_qplib_query_qp(&rdev->qplib_res, qplib_qp);
 	if (rc) {
 		dev_err(rdev_to_dev(rdev), "Failed to query HW QP");
-		return rc;
+		goto out;
 	}
-	qp_attr->qp_state = __to_ib_qp_state(qplib_qp.state);
-	qp_attr->en_sqd_async_notify = qplib_qp.en_sqd_async_notify ? 1 : 0;
-	qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp.access);
-	qp_attr->pkey_index = qplib_qp.pkey_index;
-	qp_attr->qkey = qplib_qp.qkey;
+	qp_attr->qp_state = __to_ib_qp_state(qplib_qp->state);
+	qp_attr->en_sqd_async_notify = qplib_qp->en_sqd_async_notify ? 1 : 0;
+	qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp->access);
+	qp_attr->pkey_index = qplib_qp->pkey_index;
+	qp_attr->qkey = qplib_qp->qkey;
 	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
-	rdma_ah_set_grh(&qp_attr->ah_attr, NULL, qplib_qp.ah.flow_label,
-			qplib_qp.ah.host_sgid_index,
-			qplib_qp.ah.hop_limit,
-			qplib_qp.ah.traffic_class);
-	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, qplib_qp.ah.dgid.data);
-	rdma_ah_set_sl(&qp_attr->ah_attr, qplib_qp.ah.sl);
-	ether_addr_copy(qp_attr->ah_attr.roce.dmac, qplib_qp.ah.dmac);
-	qp_attr->path_mtu = __to_ib_mtu(qplib_qp.path_mtu);
-	qp_attr->timeout = qplib_qp.timeout;
-	qp_attr->retry_cnt = qplib_qp.retry_cnt;
-	qp_attr->rnr_retry = qplib_qp.rnr_retry;
-	qp_attr->min_rnr_timer = qplib_qp.min_rnr_timer;
-	qp_attr->rq_psn = qplib_qp.rq.psn;
-	qp_attr->max_rd_atomic = qplib_qp.max_rd_atomic;
-	qp_attr->sq_psn = qplib_qp.sq.psn;
-	qp_attr->max_dest_rd_atomic = qplib_qp.max_dest_rd_atomic;
-	qp_init_attr->sq_sig_type = qplib_qp.sig_type ? IB_SIGNAL_ALL_WR :
-							IB_SIGNAL_REQ_WR;
-	qp_attr->dest_qp_num = qplib_qp.dest_qpn;
+	rdma_ah_set_grh(&qp_attr->ah_attr, NULL, qplib_qp->ah.flow_label,
+			qplib_qp->ah.host_sgid_index,
+			qplib_qp->ah.hop_limit,
+			qplib_qp->ah.traffic_class);
+	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, qplib_qp->ah.dgid.data);
+	rdma_ah_set_sl(&qp_attr->ah_attr, qplib_qp->ah.sl);
+	ether_addr_copy(qp_attr->ah_attr.roce.dmac, qplib_qp->ah.dmac);
+	qp_attr->path_mtu = __to_ib_mtu(qplib_qp->path_mtu);
+	qp_attr->timeout = qplib_qp->timeout;
+	qp_attr->retry_cnt = qplib_qp->retry_cnt;
+	qp_attr->rnr_retry = qplib_qp->rnr_retry;
+	qp_attr->min_rnr_timer = qplib_qp->min_rnr_timer;
+	qp_attr->rq_psn = qplib_qp->rq.psn;
+	qp_attr->max_rd_atomic = qplib_qp->max_rd_atomic;
+	qp_attr->sq_psn = qplib_qp->sq.psn;
+	qp_attr->max_dest_rd_atomic = qplib_qp->max_dest_rd_atomic;
+	qp_init_attr->sq_sig_type = qplib_qp->sig_type ? IB_SIGNAL_ALL_WR :
+							 IB_SIGNAL_REQ_WR;
+	qp_attr->dest_qp_num = qplib_qp->dest_qpn;
 
 	qp_attr->cap.max_send_wr = qp->qplib_qp.sq.max_wqe;
 	qp_attr->cap.max_send_sge = qp->qplib_qp.sq.max_sge;
@@ -1596,7 +1599,9 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 	qp_attr->cap.max_inline_data = qp->qplib_qp.max_inline_data;
 	qp_init_attr->cap = qp_attr->cap;
 
-	return 0;
+out:
+	kfree(qplib_qp);
+	return rc;
 }
 
 /* Routine for sending QP1 packets for RoCE V1 an V2

From 01df7f5a77b926c2d790cf66d942d2a68d4ad5de Mon Sep 17 00:00:00 2001
From: Adit Ranadive <aditr@vmware.com>
Date: Thu, 21 Sep 2017 15:56:21 -0700
Subject: [PATCH 243/391] RDMA/vmw_pvrdma: Fix reporting correct opcodes for
 completion

Since the IB_WC_BIND_MW opcode has been dropped, set the correct
IB WC opcode explicitly.

Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver")
Reviewed-by: Aditya Sarwade <asarwade@vmware.com>
Reviewed-by: Jorgen Hansen <jhansen@vmware.com>
Signed-off-by: Adit Ranadive <aditr@vmware.com>
Signed-off-by: Bryan Tan <bryantan@vmware.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/vmw_pvrdma/pvrdma.h | 29 +++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
index 663a0c301c43..984aa3484928 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -416,9 +416,34 @@ static inline enum ib_wc_status pvrdma_wc_status_to_ib(
 	return (enum ib_wc_status)status;
 }
 
-static inline int pvrdma_wc_opcode_to_ib(int opcode)
+static inline int pvrdma_wc_opcode_to_ib(unsigned int opcode)
 {
-	return opcode;
+	switch (opcode) {
+	case PVRDMA_WC_SEND:
+		return IB_WC_SEND;
+	case PVRDMA_WC_RDMA_WRITE:
+		return IB_WC_RDMA_WRITE;
+	case PVRDMA_WC_RDMA_READ:
+		return IB_WC_RDMA_READ;
+	case PVRDMA_WC_COMP_SWAP:
+		return IB_WC_COMP_SWAP;
+	case PVRDMA_WC_FETCH_ADD:
+		return IB_WC_FETCH_ADD;
+	case PVRDMA_WC_LOCAL_INV:
+		return IB_WC_LOCAL_INV;
+	case PVRDMA_WC_FAST_REG_MR:
+		return IB_WC_REG_MR;
+	case PVRDMA_WC_MASKED_COMP_SWAP:
+		return IB_WC_MASKED_COMP_SWAP;
+	case PVRDMA_WC_MASKED_FETCH_ADD:
+		return IB_WC_MASKED_FETCH_ADD;
+	case PVRDMA_WC_RECV:
+		return IB_WC_RECV;
+	case PVRDMA_WC_RECV_RDMA_WITH_IMM:
+		return IB_WC_RECV_RDMA_WITH_IMM;
+	default:
+		return IB_WC_SEND;
+	}
 }
 
 static inline int pvrdma_wc_flags_to_ib(int flags)

From cd9100ca9e78eb03bf5a0ee2ba98ebdc8cc5880e Mon Sep 17 00:00:00 2001
From: Shiraz Saleem <shiraz.saleem@intel.com>
Date: Tue, 19 Sep 2017 09:19:09 -0500
Subject: [PATCH 244/391] i40iw: Fail open if there are no available MSI-X
 vectors

Check number of available MSI-X vectors for i40iw.
If there are no available vectors, fail the open.

Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/i40iw/i40iw_main.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index cc742c3132c6..5965b59a2f83 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -1400,6 +1400,11 @@ static enum i40iw_status_code i40iw_save_msix_info(struct i40iw_device *iwdev,
 	u32 i;
 	u32 size;
 
+	if (!ldev->msix_count) {
+		i40iw_pr_err("No MSI-X vectors\n");
+		return I40IW_ERR_CONFIG;
+	}
+
 	iwdev->msix_count = ldev->msix_count;
 
 	size = sizeof(struct i40iw_msix_vector) * iwdev->msix_count;
@@ -1550,7 +1555,7 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl,
 
 	status = i40iw_save_msix_info(iwdev, ldev);
 	if (status)
-		goto exit;
+		return status;
 	iwdev->hw.dev_context = (void *)ldev->pcidev;
 	iwdev->hw.hw_addr = ldev->hw_addr;
 	status = i40iw_allocate_dma_mem(&iwdev->hw,

From 47fb3c1610b28dd435b892762280eebf04fe9adf Mon Sep 17 00:00:00 2001
From: Shiraz Saleem <shiraz.saleem@intel.com>
Date: Tue, 19 Sep 2017 09:19:10 -0500
Subject: [PATCH 245/391] i40iw: Prevent multiple netdev event notifier
 registrations

Netdev event notifier registration/de-registration is not
synchronized with a lock and there is a possibility of a
duplicate registration of notifier before the unregister
completes.

Register netdev event notifiers during module init and
de-register them at module exit.

This avoids the need to tie the registration to first netdev
client interface open and de-registration to last client
interface close and the synchronization to achieve it.

This also fixes a crash due to duplicate registration.

BUG: unable to handle kernel paging request at ffffffffa0d60388
IP: [<ffffffff8160f75d>] notifier_call_chain+0x3d/0x70
PGD 190d067 PUD 190e063 PMD 76c840067 PTE 0
Oops: 0000 [#1] SMP
Modules linked in: i40e(OF-) fuse btrfs zlib_deflate raid6_pq xor vfat msdos
[..]
e1000e vxlan ip_tunnel ptp pps_core i2c_core video [last unloaded: i40iw]
CPU: 1 PID: 27101 Comm: modprobe Tainted: GF       W  O--------------   3.10.0-229.el7.x86_64 #1
Hardware name: Gigabyte Technology Co., Ltd. To be filled by O.E.M./Q87M-D2H, BIOS F7 01/17/2014
task: ffff88076e8a96c0 ti: ffff8806959c8000 task.ti: ffff8806959c8000
RIP: 0010:[<ffffffff8160f75d>]  [<ffffffff8160f75d>] notifier_call_chain+0x3d/0x70
RSP: 0018:ffff8806959cbb38  EFLAGS: 00010282
RAX: ffffffffa0d60380 RBX: 00000000fffffffd RCX: 0000000000000000
0708] RDX: 0000000000000000 RSI: ffff88081227a000 RDI: 0000000000000002
RBP: ffff8806959cbb60 R08: 0000000000000246 R09: 000000000000700c
R10: ffff88080e16ea40 R11: 00000000000ae8df R12: ffffffffa0d60380
R13: 0000000000000002 R14: ffff88076e738800 R15: 0000000000000000
FS:  00007f604ef4a740(0000) GS:ffff88083e240000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffffffffa0d60388 CR3: 0000000753cd2000 CR4: 00000000001407e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Stack:
ffffffff819e73a0 0000000000000000 0000000000000002 ffff88076e738800
00000000ffffffff ffff8806959cbba0 ffffffff8109d61d 0000000000000000
0000000000000000 ffff88076e738800 0000000000000000 ffff88076e738800
Call Trace:
[<ffffffff8109d61d>] __blocking_notifier_call_chain+0x4d/0x70
[<ffffffff8109d656>] blocking_notifier_call_chain+0x16/0x20
[<ffffffff8156b9e4>] __inet_del_ifa+0x154/0x2b0
[<ffffffff8156d102>] inetdev_event+0x182/0x530
[<ffffffff8160f76c>] notifier_call_chain+0x4c/0x70
[<ffffffff8109d446>] raw_notifier_call_chain+0x16/0x20
[<ffffffff814f71fd>] call_netdevice_notifiers+0x2d/0x60
[<ffffffff814f8845>] rollback_registered_many+0x105/0x220
[<ffffffff814f89a0>] rollback_registered+0x40/0x70
[<ffffffff814f9c88>] unregister_netdevice_queue+0x48/0x80
[<ffffffff814f9cdc>] unregister_netdev+0x1c/0x30
[<ffffffffa0067139>] i40e_vsi_release+0x2a9/0x2b0 [i40e]
[<ffffffffa00674e8>] i40e_remove+0x128/0x2b0 [i40e]
[<ffffffff813092db>] pci_device_remove+0x3b/0xb0
[<ffffffff813d26ef>] __device_release_driver+0x7f/0xf0
[<ffffffff813d3068>] driver_detach+0xb8/0xc0
[<ffffffff813d22db>] bus_remove_driver+0x9b/0x120
[<ffffffff813d36dc>] driver_unregister+0x2c/0x50
[<ffffffff81307d4c>] pci_unregister_driver+0x2c/0x90
[<ffffffffa008f9d0>] i40e_exit_module+0x10/0x23 [i40e]
[<ffffffff810dad0b>] SyS_delete_module+0x16b/0x2d0
[<ffffffff81013b0c>] ? do_notify_resume+0x9c/0xb0
[<ffffffff81613da9>] system_call_fastpath+0x16/0x1b
Code: e5 41 57 4d 89 c7 41 56 49 89 d6 41 55 49 89 f5 41 54 53 89 cb
75 14 eb 3d 0f 1f 44 00 00 83 eb 01 74 25 4d 85 e4 74 20 4c 89 e0 <4c>
8b 60 08 4c 89 f2 4c 89 ee 48 89 c7 ff 10 4d 85 ff 74 04 41
RIP  [<ffffffff8160f75d>] notifier_call_chain+0x3d/0x70

Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/i40iw/i40iw.h       |  1 -
 drivers/infiniband/hw/i40iw/i40iw_main.c  | 32 ++++++++++++-----------
 drivers/infiniband/hw/i40iw/i40iw_utils.c |  6 ++---
 3 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index 9b1566468744..a65e4cbdce2f 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -201,7 +201,6 @@ enum init_completion_state {
 	CEQ_CREATED,
 	ILQ_CREATED,
 	IEQ_CREATED,
-	INET_NOTIFIER,
 	IP_ADDR_REGISTERED,
 	RDMA_DEV_REGISTERED
 };
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index 5965b59a2f83..27590ae21881 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -99,8 +99,6 @@ static struct notifier_block i40iw_net_notifier = {
 	.notifier_call = i40iw_net_event
 };
 
-static atomic_t i40iw_notifiers_registered;
-
 /**
  * i40iw_find_i40e_handler - find a handler given a client info
  * @ldev: pointer to a client info
@@ -1376,11 +1374,20 @@ static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev,
  */
 static void i40iw_register_notifiers(void)
 {
-	if (atomic_inc_return(&i40iw_notifiers_registered) == 1) {
-		register_inetaddr_notifier(&i40iw_inetaddr_notifier);
-		register_inet6addr_notifier(&i40iw_inetaddr6_notifier);
-		register_netevent_notifier(&i40iw_net_notifier);
-	}
+	register_inetaddr_notifier(&i40iw_inetaddr_notifier);
+	register_inet6addr_notifier(&i40iw_inetaddr6_notifier);
+	register_netevent_notifier(&i40iw_net_notifier);
+}
+
+/**
+ * i40iw_unregister_notifiers - unregister tcp ip notifiers
+ */
+
+static void i40iw_unregister_notifiers(void)
+{
+	unregister_netevent_notifier(&i40iw_net_notifier);
+	unregister_inetaddr_notifier(&i40iw_inetaddr_notifier);
+	unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier);
 }
 
 /**
@@ -1467,12 +1474,6 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev)
 		if (!iwdev->reset)
 			i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
 		/* fallthrough */
-	case INET_NOTIFIER:
-		if (!atomic_dec_return(&i40iw_notifiers_registered)) {
-			unregister_netevent_notifier(&i40iw_net_notifier);
-			unregister_inetaddr_notifier(&i40iw_inetaddr_notifier);
-			unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier);
-		}
 		/* fallthrough */
 	case PBLE_CHUNK_MEM:
 		i40iw_destroy_pble_pool(dev, iwdev->pble_rsrc);
@@ -1672,8 +1673,6 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client)
 			break;
 		iwdev->init_state = PBLE_CHUNK_MEM;
 		iwdev->virtchnl_wq = alloc_ordered_workqueue("iwvch", WQ_MEM_RECLAIM);
-		i40iw_register_notifiers();
-		iwdev->init_state = INET_NOTIFIER;
 		status = i40iw_add_mac_ip(iwdev);
 		if (status)
 			break;
@@ -2023,6 +2022,8 @@ static int __init i40iw_init_module(void)
 	i40iw_client.type = I40E_CLIENT_IWARP;
 	spin_lock_init(&i40iw_handler_lock);
 	ret = i40e_register_client(&i40iw_client);
+	i40iw_register_notifiers();
+
 	return ret;
 }
 
@@ -2034,6 +2035,7 @@ static int __init i40iw_init_module(void)
  */
 static void __exit i40iw_exit_module(void)
 {
+	i40iw_unregister_notifiers();
 	i40e_unregister_client(&i40iw_client);
 }
 
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index 62f1f45b8737..e52dbbb4165e 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -160,7 +160,7 @@ int i40iw_inetaddr_event(struct notifier_block *notifier,
 		return NOTIFY_DONE;
 
 	iwdev = &hdl->device;
-	if (iwdev->init_state < INET_NOTIFIER)
+	if (iwdev->init_state < IP_ADDR_REGISTERED || iwdev->closing)
 		return NOTIFY_DONE;
 
 	netdev = iwdev->ldev->netdev;
@@ -217,7 +217,7 @@ int i40iw_inet6addr_event(struct notifier_block *notifier,
 		return NOTIFY_DONE;
 
 	iwdev = &hdl->device;
-	if (iwdev->init_state < INET_NOTIFIER)
+	if (iwdev->init_state < IP_ADDR_REGISTERED || iwdev->closing)
 		return NOTIFY_DONE;
 
 	netdev = iwdev->ldev->netdev;
@@ -266,7 +266,7 @@ int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void *
 		if (!iwhdl)
 			return NOTIFY_DONE;
 		iwdev = &iwhdl->device;
-		if (iwdev->init_state < INET_NOTIFIER)
+		if (iwdev->init_state < IP_ADDR_REGISTERED || iwdev->closing)
 			return NOTIFY_DONE;
 		p = (__be32 *)neigh->primary_key;
 		i40iw_copy_ip_ntohl(local_ipaddr, p);

From 471b370d52a4a461bf855ff542b544f3e4a5cb3a Mon Sep 17 00:00:00 2001
From: Shiraz Saleem <shiraz.saleem@intel.com>
Date: Tue, 19 Sep 2017 09:19:11 -0500
Subject: [PATCH 246/391] i40iw: Call i40iw_cm_disconn on modify QP to
 disconnect

If QP modify to closing/terminate/error fails, connection is
not torn down as there is no corresponding asynchronous
event that will initiate the teardown.

Add explicit call to i40iw_cm_disconn if not waiting in
modify QP, otherwise schedule it in CM timer.

Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/i40iw/i40iw_verbs.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 1aa411034a27..28b3d02d511b 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -1027,7 +1027,19 @@ int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 				iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSED;
 				iwqp->last_aeq = I40IW_AE_RESET_SENT;
 				spin_unlock_irqrestore(&iwqp->lock, flags);
+				i40iw_cm_disconn(iwqp);
 			}
+		} else {
+			spin_lock_irqsave(&iwqp->lock, flags);
+			if (iwqp->cm_id) {
+				if (atomic_inc_return(&iwqp->close_timer_started) == 1) {
+					iwqp->cm_id->add_ref(iwqp->cm_id);
+					i40iw_schedule_cm_timer(iwqp->cm_node,
+								(struct i40iw_puda_buf *)iwqp,
+								 I40IW_TIMER_TYPE_CLOSE, 1, 0);
+				}
+			}
+			spin_unlock_irqrestore(&iwqp->lock, flags);
 		}
 	}
 	return 0;

From dfc612b3407e88913a58db00b3bca93685d4f4f9 Mon Sep 17 00:00:00 2001
From: Mustafa Ismail <mustafa.ismail@intel.com>
Date: Tue, 19 Sep 2017 09:19:12 -0500
Subject: [PATCH 247/391] i40iw: Add missing VLAN priority

Set the VLAN priority which is in the upper 3 bits of the VLAN
tag field in the QP context.

Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/i40iw/i40iw_cm.c | 3 ++-
 drivers/infiniband/hw/i40iw/i40iw_cm.h | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 14f36ba4e5be..b7215448bb63 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -3255,7 +3255,8 @@ static void i40iw_init_tcp_ctx(struct i40iw_cm_node *cm_node,
 	tcp_info->snd_mss = cpu_to_le32(((u32)cm_node->tcp_cntxt.mss));
 	if (cm_node->vlan_id < VLAN_TAG_PRESENT) {
 		tcp_info->insert_vlan_tag = true;
-		tcp_info->vlan_tag = cpu_to_le16(cm_node->vlan_id);
+		tcp_info->vlan_tag = cpu_to_le16(((u16)cm_node->user_pri << I40IW_VLAN_PRIO_SHIFT) |
+						  cm_node->vlan_id);
 	}
 	if (cm_node->ipv4) {
 		tcp_info->src_port = cpu_to_le16(cm_node->loc_port);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h
index 2e52e38ffcf3..8626e7f1fdd3 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h
@@ -71,6 +71,8 @@
 #define	I40IW_HW_IRD_SETTING_32	32
 #define	I40IW_HW_IRD_SETTING_64	64
 
+#define I40IW_VLAN_PRIO_SHIFT   13
+
 enum ietf_mpa_flags {
 	IETF_MPA_FLAGS_MARKERS = 0x80,	/* receive Markers */
 	IETF_MPA_FLAGS_CRC = 0x40,	/* receive Markers */

From f16dc0aa5ea20a2cf173e82ade5f05bfecaa850a Mon Sep 17 00:00:00 2001
From: Shiraz Saleem <shiraz.saleem@intel.com>
Date: Tue, 19 Sep 2017 09:19:13 -0500
Subject: [PATCH 248/391] i40iw: Add support for port reuse on active side
 connections

During OpenMPI scale up testing, we observe rdma_connect
failures if ports are reused on multiple connections.
This is because the Control Queue-Pair (CQP) command to add
the reused port to Accelerated Port Bit VectorTable (APBVT)
fails as there already exists an entry.

Check for duplicate port before invoking the CQP command
to add APBVT entry and delete the entry only if the port
is not in use.

Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/i40iw/i40iw_cm.c | 151 ++++++++++++-------------
 drivers/infiniband/hw/i40iw/i40iw_cm.h |   3 +
 2 files changed, 78 insertions(+), 76 deletions(-)

diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index b7215448bb63..5230dd3c938c 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -1504,23 +1504,40 @@ static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core,
 }
 
 /**
- * listen_port_in_use - determine if port is in use
- * @port: Listen port number
+ * i40iw_port_in_use - determine if port is in use
+ * @port: port number
+ * @active_side: flag for listener side vs active side
  */
-static bool i40iw_listen_port_in_use(struct i40iw_cm_core *cm_core, u16 port)
+static bool i40iw_port_in_use(struct i40iw_cm_core *cm_core, u16 port, bool active_side)
 {
 	struct i40iw_cm_listener *listen_node;
+	struct i40iw_cm_node *cm_node;
 	unsigned long flags;
 	bool ret = false;
 
-	spin_lock_irqsave(&cm_core->listen_list_lock, flags);
-	list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
-		if (listen_node->loc_port == port) {
-			ret = true;
-			break;
+	if (active_side) {
+		/* search connected node list */
+		spin_lock_irqsave(&cm_core->ht_lock, flags);
+		list_for_each_entry(cm_node, &cm_core->connected_nodes, list) {
+			if (cm_node->loc_port == port) {
+				ret = true;
+				break;
+			}
 		}
+			if (!ret)
+				clear_bit(port, cm_core->active_side_ports);
+		spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+	} else {
+		spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+		list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
+			if (listen_node->loc_port == port) {
+				ret = true;
+				break;
+			}
+		}
+		spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
 	}
-	spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+
 	return ret;
 }
 
@@ -1868,7 +1885,7 @@ static int i40iw_dec_refcnt_listen(struct i40iw_cm_core *cm_core,
 		spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
 
 		if (listener->iwdev) {
-			if (apbvt_del && !i40iw_listen_port_in_use(cm_core, listener->loc_port))
+			if (apbvt_del && !i40iw_port_in_use(cm_core, listener->loc_port, false))
 				i40iw_manage_apbvt(listener->iwdev,
 						   listener->loc_port,
 						   I40IW_MANAGE_APBVT_DEL);
@@ -2247,21 +2264,21 @@ static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node)
 	if (cm_node->listener) {
 		i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true);
 	} else {
-		if (!i40iw_listen_port_in_use(cm_core, cm_node->loc_port) &&
-		    cm_node->apbvt_set) {
+		if (!i40iw_port_in_use(cm_core, cm_node->loc_port, true) && cm_node->apbvt_set) {
 			i40iw_manage_apbvt(cm_node->iwdev,
 					   cm_node->loc_port,
 					   I40IW_MANAGE_APBVT_DEL);
-			i40iw_get_addr_info(cm_node, &nfo);
-			if (cm_node->qhash_set) {
-				i40iw_manage_qhash(cm_node->iwdev,
-						   &nfo,
-						   I40IW_QHASH_TYPE_TCP_ESTABLISHED,
-						   I40IW_QHASH_MANAGE_TYPE_DELETE,
-						   NULL,
-						   false);
-				cm_node->qhash_set = 0;
-			}
+			cm_node->apbvt_set = 0;
+		}
+		i40iw_get_addr_info(cm_node, &nfo);
+		if (cm_node->qhash_set) {
+			i40iw_manage_qhash(cm_node->iwdev,
+					   &nfo,
+					   I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+					   I40IW_QHASH_MANAGE_TYPE_DELETE,
+					   NULL,
+					   false);
+			cm_node->qhash_set = 0;
 		}
 	}
 
@@ -3738,10 +3755,8 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	struct sockaddr_in *raddr;
 	struct sockaddr_in6 *laddr6;
 	struct sockaddr_in6 *raddr6;
-	bool qhash_set = false;
-	int apbvt_set = 0;
-	int err = 0;
-	enum i40iw_status_code status;
+	int ret = 0;
+	unsigned long flags;
 
 	ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn);
 	if (!ibqp)
@@ -3790,32 +3805,6 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	cm_info.user_pri = rt_tos2priority(cm_id->tos);
 	i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB, "%s TOS:[%d] UP:[%d]\n",
 		    __func__, cm_id->tos, cm_info.user_pri);
-	if ((cm_info.ipv4 && (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr)) ||
-	    (!cm_info.ipv4 && memcmp(laddr6->sin6_addr.in6_u.u6_addr32,
-				     raddr6->sin6_addr.in6_u.u6_addr32,
-				     sizeof(laddr6->sin6_addr.in6_u.u6_addr32)))) {
-		status = i40iw_manage_qhash(iwdev,
-					    &cm_info,
-					    I40IW_QHASH_TYPE_TCP_ESTABLISHED,
-					    I40IW_QHASH_MANAGE_TYPE_ADD,
-					    NULL,
-					    true);
-		if (status)
-			return -EINVAL;
-		qhash_set = true;
-	}
-	status = i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD);
-	if (status) {
-		i40iw_manage_qhash(iwdev,
-				   &cm_info,
-				   I40IW_QHASH_TYPE_TCP_ESTABLISHED,
-				   I40IW_QHASH_MANAGE_TYPE_DELETE,
-				   NULL,
-				   false);
-		return -EINVAL;
-	}
-
-	apbvt_set = 1;
 	cm_id->add_ref(cm_id);
 	cm_node = i40iw_create_cm_node(&iwdev->cm_core, iwdev,
 				       conn_param->private_data_len,
@@ -3823,17 +3812,40 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 				       &cm_info);
 
 	if (IS_ERR(cm_node)) {
-		err = PTR_ERR(cm_node);
-		goto err_out;
+		ret = PTR_ERR(cm_node);
+		cm_id->rem_ref(cm_id);
+		return ret;
 	}
 
+	if ((cm_info.ipv4 && (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr)) ||
+	    (!cm_info.ipv4 && memcmp(laddr6->sin6_addr.in6_u.u6_addr32,
+				     raddr6->sin6_addr.in6_u.u6_addr32,
+				     sizeof(laddr6->sin6_addr.in6_u.u6_addr32)))) {
+		if (i40iw_manage_qhash(iwdev, &cm_info, I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+				       I40IW_QHASH_MANAGE_TYPE_ADD, NULL, true)) {
+			ret = -EINVAL;
+			goto err;
+		}
+		cm_node->qhash_set = true;
+	}
+
+	spin_lock_irqsave(&iwdev->cm_core.ht_lock, flags);
+	if (!test_and_set_bit(cm_info.loc_port, iwdev->cm_core.active_side_ports)) {
+		spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags);
+		if (i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD)) {
+			ret =  -EINVAL;
+			goto err;
+		}
+	} else {
+		spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags);
+	}
+
+	cm_node->apbvt_set = true;
 	i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
 	if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
 	    !cm_node->ord_size)
 		cm_node->ord_size = 1;
 
-	cm_node->apbvt_set = apbvt_set;
-	cm_node->qhash_set = qhash_set;
 	iwqp->cm_node = cm_node;
 	cm_node->iwqp = iwqp;
 	iwqp->cm_id = cm_id;
@@ -3841,11 +3853,9 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 
 	if (cm_node->state != I40IW_CM_STATE_OFFLOADED) {
 		cm_node->state = I40IW_CM_STATE_SYN_SENT;
-		err = i40iw_send_syn(cm_node, 0);
-		if (err) {
-			i40iw_rem_ref_cm_node(cm_node);
-			goto err_out;
-		}
+		ret = i40iw_send_syn(cm_node, 0);
+		if (ret)
+			goto err;
 	}
 
 	i40iw_debug(cm_node->dev,
@@ -3854,9 +3864,10 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 		    cm_node->rem_port,
 		    cm_node,
 		    cm_node->cm_id);
+
 	return 0;
 
-err_out:
+err:
 	if (cm_info.ipv4)
 		i40iw_debug(&iwdev->sc_dev,
 			    I40IW_DEBUG_CM,
@@ -3868,22 +3879,10 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 			    "Api - connect() FAILED: dest addr=%pI6",
 			    cm_info.rem_addr);
 
-	if (qhash_set)
-		i40iw_manage_qhash(iwdev,
-				   &cm_info,
-				   I40IW_QHASH_TYPE_TCP_ESTABLISHED,
-				   I40IW_QHASH_MANAGE_TYPE_DELETE,
-				   NULL,
-				   false);
-
-	if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core,
-						   cm_info.loc_port))
-		i40iw_manage_apbvt(iwdev,
-				   cm_info.loc_port,
-				   I40IW_MANAGE_APBVT_DEL);
+	i40iw_rem_ref_cm_node(cm_node);
 	cm_id->rem_ref(cm_id);
 	iwdev->cm_core.stats_connect_errs++;
-	return err;
+	return ret;
 }
 
 /**
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h
index 8626e7f1fdd3..45abef76295b 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h
@@ -71,6 +71,7 @@
 #define	I40IW_HW_IRD_SETTING_32	32
 #define	I40IW_HW_IRD_SETTING_64	64
 
+#define MAX_PORTS		65536
 #define I40IW_VLAN_PRIO_SHIFT   13
 
 enum ietf_mpa_flags {
@@ -413,6 +414,8 @@ struct i40iw_cm_core {
 	spinlock_t ht_lock; /* manage hash table */
 	spinlock_t listen_list_lock; /* listen list */
 
+	unsigned long active_side_ports[BITS_TO_LONGS(MAX_PORTS)];
+
 	u64	stats_nodes_created;
 	u64	stats_nodes_destroyed;
 	u64	stats_listen_created;

From 432654df90f2a7d8ac8438905208074604ed3e00 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Mon, 11 Sep 2017 21:41:43 +0200
Subject: [PATCH 249/391] parisc: Fix too large frame size warnings

The parisc architecture has larger stack frames than most other
architectures on 32-bit kernels.

Increase the maximum allowed stack frame to 1280 bytes for parisc to
avoid warnings in the do_sys_poll() and pat_memconfig() functions.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 lib/Kconfig.debug | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index b19c491cbc4e..2689b7c50c52 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -219,7 +219,8 @@ config FRAME_WARN
 	range 0 8192
 	default 0 if KASAN
 	default 2048 if GCC_PLUGIN_LATENT_ENTROPY
-	default 1024 if !64BIT
+	default 1280 if (!64BIT && PARISC)
+	default 1024 if (!64BIT && !PARISC)
 	default 2048 if 64BIT
 	help
 	  Tell gcc to warn at build time for stack frames larger than this.

From e77900abfd8be4e207412d8b7752dbb9838e2571 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sun, 17 Sep 2017 21:05:02 +0200
Subject: [PATCH 250/391] parisc: Stop unwinding at start of stack

Check stack pointer if we are reaching the stack end and stop unwinding
if we do. This fixes early backtraces and avoids showing unrealistic
call stacks.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/unwind.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index 48dc7d4d20bb..caab39dfa95d 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/kallsyms.h>
 #include <linux/sort.h>
+#include <linux/sched.h>
 
 #include <linux/uaccess.h>
 #include <asm/assembly.h>
@@ -279,6 +280,17 @@ static void unwind_frame_regs(struct unwind_frame_info *info)
 
 			info->prev_sp = sp - 64;
 			info->prev_ip = 0;
+
+			/* The stack is at the end inside the thread_union
+			 * struct. If we reach data, we have reached the
+			 * beginning of the stack and should stop unwinding. */
+			if (info->prev_sp >= (unsigned long) task_thread_info(info->t) &&
+			    info->prev_sp < ((unsigned long) task_thread_info(info->t)
+						+ THREAD_SZ_ALGN)) {
+				info->prev_sp = 0;
+				break;
+			}
+
 			if (get_user(tmp, (unsigned long *)(info->prev_sp - RP_OFFSET))) 
 				break;
 			info->prev_ip = tmp;

From 08b8a99b2c5ea8da4d3dd55056881d12baea1e04 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sun, 17 Sep 2017 21:17:10 +0200
Subject: [PATCH 251/391] parisc: Move start_parisc() into init section

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/setup.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index dee6f9d6a153..a31e91c1782b 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -38,6 +38,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
+#include <linux/start_kernel.h>
 
 #include <asm/processor.h>
 #include <asm/sections.h>
@@ -398,9 +399,8 @@ static int __init parisc_init(void)
 }
 arch_initcall(parisc_init);
 
-void start_parisc(void)
+void __init start_parisc(void)
 {
-	extern void start_kernel(void);
 	extern void early_trap_init(void);
 
 	int ret, cpunum;

From 77089c5274fe2f72db5a2cd956d0d308aed08e68 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sun, 17 Sep 2017 21:15:09 +0200
Subject: [PATCH 252/391] parisc: Add wrapper for pdc_instr() firmware function

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/pdc.h |  1 +
 arch/parisc/kernel/firmware.c | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h
index 26b4455baa83..510341f62d97 100644
--- a/arch/parisc/include/asm/pdc.h
+++ b/arch/parisc/include/asm/pdc.h
@@ -280,6 +280,7 @@ void setup_pdc(void);		/* in inventory.c */
 /* wrapper-functions from pdc.c */
 
 int pdc_add_valid(unsigned long address);
+int pdc_instr(unsigned int *instr);
 int pdc_chassis_info(struct pdc_chassis_info *chassis_info, void *led_info, unsigned long len);
 int pdc_chassis_disp(unsigned long disp);
 int pdc_chassis_warn(unsigned long *warn);
diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index ab80e5c6f651..6d471c00c71a 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -232,6 +232,26 @@ int pdc_add_valid(unsigned long address)
 }
 EXPORT_SYMBOL(pdc_add_valid);
 
+/**
+ * pdc_instr - Get instruction that invokes PDCE_CHECK in HPMC handler.
+ * @instr: Pointer to variable which will get instruction opcode.
+ *
+ * The return value is PDC_OK (0) in case call succeeded.
+ */
+int __init pdc_instr(unsigned int *instr)
+{
+	int retval;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pdc_lock, flags);
+	retval = mem_pdc_call(PDC_INSTR, 0UL, __pa(pdc_result));
+	convert_to_wide(pdc_result);
+	*instr = pdc_result[0];
+	spin_unlock_irqrestore(&pdc_lock, flags);
+
+	return retval;
+}
+
 /**
  * pdc_chassis_info - Return chassis information.
  * @result: The return buffer.

From 8d771b143fe2e3941fc8a32926d21410004578c0 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sun, 17 Sep 2017 21:28:11 +0200
Subject: [PATCH 253/391] parisc: Add PDCE_CHECK instruction to HPMC handler

According to the programming note at page 1-31 of the PA 1.1 Firmware
Architecture document, one should use the PDC_INSTR firmware function to
get the instruction that invokes a PDCE_CHECK in the HPMC handler.  This
patch follows this note and sets the instruction which has been a nop up
until now.
Testing on a C3000 and C8000 showed that this firmware call isn't
implemented on those machines, so maybe it's only needed on older ones.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/traps.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 991654c88eec..230333157fe3 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -817,7 +817,7 @@ void __init initialize_ivt(const void *iva)
 	u32 check = 0;
 	u32 *ivap;
 	u32 *hpmcp;
-	u32 length;
+	u32 length, instr;
 
 	if (strcmp((const char *)iva, "cows can fly"))
 		panic("IVT invalid");
@@ -827,6 +827,14 @@ void __init initialize_ivt(const void *iva)
 	for (i = 0; i < 8; i++)
 	    *ivap++ = 0;
 
+	/*
+	 * Use PDC_INSTR firmware function to get instruction that invokes
+	 * PDCE_CHECK in HPMC handler.  See programming note at page 1-31 of
+	 * the PA 1.1 Firmware Architecture document.
+	 */
+	if (pdc_instr(&instr) == PDC_OK)
+		ivap[0] = instr;
+
 	/* Compute Checksum for HPMC handler */
 	length = os_hpmc_size;
 	ivap[7] = length;

From ea6976483fb0ced259fbaa9e4f68a2cdcee7e312 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Mon, 18 Sep 2017 17:55:24 +0200
Subject: [PATCH 254/391] parisc: Check if initrd was loaded into broken RAM

While scanning the PDT for reported broken memory modules, warn if the
initrd was coincidentally loaded into bad memory.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/pdt.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c
index 05730a83895c..00aed082969b 100644
--- a/arch/parisc/kernel/pdt.c
+++ b/arch/parisc/kernel/pdt.c
@@ -15,6 +15,7 @@
 #include <linux/memblock.h>
 #include <linux/seq_file.h>
 #include <linux/kthread.h>
+#include <linux/initrd.h>
 
 #include <asm/pdc.h>
 #include <asm/pdcpat.h>
@@ -216,8 +217,16 @@ void __init pdc_pdt_init(void)
 	}
 
 	for (i = 0; i < pdt_status.pdt_entries; i++) {
+		unsigned long addr;
+
 		report_mem_err(pdt_entry[i]);
 
+		addr = pdt_entry[i] & PDT_ADDR_PHYS_MASK;
+		if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) &&
+			addr >= initrd_start && addr < initrd_end)
+			pr_crit("CRITICAL: initrd possibly broken "
+				"due to bad memory!\n");
+
 		/* mark memory page bad */
 		memblock_reserve(pdt_entry[i] & PAGE_MASK, PAGE_SIZE);
 	}

From a7e6601f70a53957b1d01c321319f0237bba5202 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Thu, 21 Sep 2017 21:22:27 +0200
Subject: [PATCH 255/391] parisc: Move init_per_cpu() into init section

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/smp.h  | 1 +
 arch/parisc/kernel/processor.c | 2 +-
 arch/parisc/kernel/setup.c     | 2 +-
 arch/parisc/kernel/smp.c       | 3 +--
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/parisc/include/asm/smp.h b/arch/parisc/include/asm/smp.h
index a5dc9066c6d8..ad9c9c3b4136 100644
--- a/arch/parisc/include/asm/smp.h
+++ b/arch/parisc/include/asm/smp.h
@@ -1,6 +1,7 @@
 #ifndef __ASM_SMP_H
 #define __ASM_SMP_H
 
+extern int init_per_cpu(int cpuid);
 
 #if defined(CONFIG_SMP)
 
diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c
index a778bd3c107c..e120d63c1b28 100644
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@@ -317,7 +317,7 @@ void __init collect_boot_cpu_data(void)
  *
  * o Enable CPU profiling hooks.
  */
-int init_per_cpu(int cpunum)
+int __init init_per_cpu(int cpunum)
 {
 	int ret;
 	struct pdc_coproc_cfg coproc_cfg;
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index a31e91c1782b..f7d0c3b33d70 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -49,6 +49,7 @@
 #include <asm/io.h>
 #include <asm/setup.h>
 #include <asm/unwind.h>
+#include <asm/smp.h>
 
 static char __initdata command_line[COMMAND_LINE_SIZE];
 
@@ -116,7 +117,6 @@ void __init dma_ops_init(void)
 }
 #endif
 
-extern int init_per_cpu(int cpuid);
 extern void collect_boot_cpu_data(void);
 
 void __init setup_arch(char **cmdline_p)
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 63365106ea19..30c28ab14540 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -255,12 +255,11 @@ void arch_send_call_function_single_ipi(int cpu)
 static void __init
 smp_cpu_init(int cpunum)
 {
-	extern int init_per_cpu(int);  /* arch/parisc/kernel/processor.c */
 	extern void init_IRQ(void);    /* arch/parisc/kernel/irq.c */
 	extern void start_cpu_itimer(void); /* arch/parisc/kernel/time.c */
 
 	/* Set modes and Enable floating point coprocessor */
-	(void) init_per_cpu(cpunum);
+	init_per_cpu(cpunum);
 
 	disable_sr_hashing();
 

From 606f95e4255845155f62504a9e1f12665b1853c8 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Thu, 21 Sep 2017 21:52:08 +0200
Subject: [PATCH 256/391] parisc: Add HWPOISON page fault handler code

Commit 24587380f61d ("parisc: Add MADV_HWPOISON and MADV_SOFT_OFFLINE") added
the necessary constants to handle hardware-poisoning. Those were needed to
support the page deallocation feature from firmware.

But I completely missed to add the relevant fault handler code. This now
showed up when I ran the madvise07 testcase from the Linux Test Project,
which failed with a kernel BUG at arch/parisc/mm/fault.c:320.

With this patch the parisc kernel now behaves like other platforms and
gives the same kernel syslog warnings when poisoning pages.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/mm/fault.c | 33 +++++++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 5b101f6a5607..e247edbca68e 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -17,6 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/extable.h>
 #include <linux/uaccess.h>
+#include <linux/hugetlb.h>
 
 #include <asm/traps.h>
 
@@ -261,7 +262,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
 	struct task_struct *tsk;
 	struct mm_struct *mm;
 	unsigned long acc_type;
-	int fault;
+	int fault = 0;
 	unsigned int flags;
 
 	if (faulthandler_disabled())
@@ -315,7 +316,8 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
 			goto out_of_memory;
 		else if (fault & VM_FAULT_SIGSEGV)
 			goto bad_area;
-		else if (fault & VM_FAULT_SIGBUS)
+		else if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
+				  VM_FAULT_HWPOISON_LARGE))
 			goto bad_area;
 		BUG();
 	}
@@ -352,8 +354,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
 
 	if (user_mode(regs)) {
 		struct siginfo si;
-
-		show_signal_msg(regs, code, address, tsk, vma);
+		unsigned int lsb = 0;
 
 		switch (code) {
 		case 15:	/* Data TLB miss fault/Data page fault */
@@ -386,6 +387,30 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
 			si.si_code = (code == 26) ? SEGV_ACCERR : SEGV_MAPERR;
 			break;
 		}
+
+#ifdef CONFIG_MEMORY_FAILURE
+		if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
+			printk(KERN_ERR
+	"MCE: Killing %s:%d due to hardware memory corruption fault at %08lx\n",
+			tsk->comm, tsk->pid, address);
+			si.si_signo = SIGBUS;
+			si.si_code = BUS_MCEERR_AR;
+		}
+#endif
+
+		/*
+		 * Either small page or large page may be poisoned.
+		 * In other words, VM_FAULT_HWPOISON_LARGE and
+		 * VM_FAULT_HWPOISON are mutually exclusive.
+		 */
+		if (fault & VM_FAULT_HWPOISON_LARGE)
+			lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
+		else if (fault & VM_FAULT_HWPOISON)
+			lsb = PAGE_SHIFT;
+		else
+			show_signal_msg(regs, code, address, tsk, vma);
+		si.si_addr_lsb = lsb;
+
 		si.si_errno = 0;
 		si.si_addr = (void __user *) address;
 		force_sig_info(si.si_signo, &si, current);

From f9b941baa4b78dafcbc4790c0ed2fbbe2aafed06 Mon Sep 17 00:00:00 2001
From: Devesh Sharma <devesh.sharma@broadcom.com>
Date: Thu, 31 Aug 2017 09:27:28 +0530
Subject: [PATCH 257/391] bnxt_re: Fix update of qplib_qp.mtu when modified

The MTU value in the qplib_qp.mtu should be
consistent with whatever mtu was set during
INIT to RTR.The Next PSN and number of packets
are calculated based on this member in the qplib_qp structure.

Signed-off-by: Narender Reddy <narender.reddy@broadcom.com>
Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 03caf48af8e2..11bbf3e748b0 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -1436,11 +1436,14 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 		qp->qplib_qp.modify_flags |=
 				CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU;
 		qp->qplib_qp.path_mtu = __from_ib_mtu(qp_attr->path_mtu);
+		qp->qplib_qp.mtu = ib_mtu_enum_to_int(qp_attr->path_mtu);
 	} else if (qp_attr->qp_state == IB_QPS_RTR) {
 		qp->qplib_qp.modify_flags |=
 			CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU;
 		qp->qplib_qp.path_mtu =
 			__from_ib_mtu(iboe_get_mtu(rdev->netdev->mtu));
+		qp->qplib_qp.mtu =
+			ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu));
 	}
 
 	if (qp_attr_mask & IB_QP_TIMEOUT) {

From 2b6376305dcb2cb79d0599cf3b9ce98aa860cb4f Mon Sep 17 00:00:00 2001
From: Somnath Kotur <somnath.kotur@broadcom.com>
Date: Thu, 31 Aug 2017 09:27:29 +0530
Subject: [PATCH 258/391] bnxt_re: Stop issuing further cmds to FW once a cmd
 times out

Once a cmd to FW times out(after 20s) it is reasonable to
assume the FW or atleast the control path is dead.
No point issuing further cmds to the FW as each subsequent cmd
with another 20s timeout will cascade resulting in unnecessary
traces and/or NMI Lockups.

Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 4 ++++
 drivers/infiniband/hw/bnxt_re/qplib_rcfw.h | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 391bb7006e8f..2bdb1562bd21 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -107,6 +107,9 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
 		return -EINVAL;
 	}
 
+	if (test_bit(FIRMWARE_TIMED_OUT, &rcfw->flags))
+		return -ETIMEDOUT;
+
 	/* Cmdq are in 16-byte units, each request can consume 1 or more
 	 * cmdqe
 	 */
@@ -226,6 +229,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
 		/* timed out */
 		dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x timedout (%d)msec",
 			cookie, opcode, RCFW_CMD_WAIT_TIME_MS);
+		set_bit(FIRMWARE_TIMED_OUT, &rcfw->flags);
 		return rc;
 	}
 
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 0ed312f17c8d..85b16da287f9 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -162,8 +162,9 @@ struct bnxt_qplib_rcfw {
 	unsigned long		*cmdq_bitmap;
 	u32			bmap_size;
 	unsigned long		flags;
-#define FIRMWARE_INITIALIZED_FLAG	1
+#define FIRMWARE_INITIALIZED_FLAG	BIT(0)
 #define FIRMWARE_FIRST_FLAG		BIT(31)
+#define FIRMWARE_TIMED_OUT		BIT(3)
 	wait_queue_head_t	waitq;
 	int			(*aeq_handler)(struct bnxt_qplib_rcfw *,
 					       struct creq_func_event *);

From 55311d055175cada8249d39436371afe790df699 Mon Sep 17 00:00:00 2001
From: Devesh Sharma <devesh.sharma@broadcom.com>
Date: Thu, 31 Aug 2017 09:27:30 +0530
Subject: [PATCH 259/391] bnxt_re: Fix compare and swap atomic operands

Driver must assign the user supplied compare/swap values in
the wqe to successfully complete the atomic compare and
swap operation.

Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 11bbf3e748b0..0dbdbe1616ab 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -1916,6 +1916,7 @@ static int bnxt_re_build_atomic_wqe(struct ib_send_wr *wr,
 	switch (wr->opcode) {
 	case IB_WR_ATOMIC_CMP_AND_SWP:
 		wqe->type = BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP;
+		wqe->atomic.cmp_data = atomic_wr(wr)->compare_add;
 		wqe->atomic.swap_data = atomic_wr(wr)->swap;
 		break;
 	case IB_WR_ATOMIC_FETCH_AND_ADD:

From 027c892924eac22f5ca8db4100bccde423be797d Mon Sep 17 00:00:00 2001
From: Somnath Kotur <somnath.kotur@broadcom.com>
Date: Thu, 31 Aug 2017 09:27:31 +0530
Subject: [PATCH 260/391] bnxt_re: Free up devices in module_exit path

Clean up all devices added to the bnxt_re_dev_list in the
module_exit entry point.

Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/main.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 82d1cbc27aee..00a3b743c156 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -1375,6 +1375,22 @@ static int __init bnxt_re_mod_init(void)
 
 static void __exit bnxt_re_mod_exit(void)
 {
+	struct bnxt_re_dev *rdev;
+	LIST_HEAD(to_be_deleted);
+
+	mutex_lock(&bnxt_re_dev_lock);
+	/* Free all adapter allocated resources */
+	if (!list_empty(&bnxt_re_dev_list))
+		list_splice_init(&bnxt_re_dev_list, &to_be_deleted);
+	mutex_unlock(&bnxt_re_dev_lock);
+
+	list_for_each_entry(rdev, &to_be_deleted, list) {
+		dev_info(rdev_to_dev(rdev), "Unregistering Device");
+		bnxt_re_dev_stop(rdev);
+		bnxt_re_ib_unreg(rdev, true);
+		bnxt_re_remove_one(rdev);
+		bnxt_re_dev_unreg(rdev);
+	}
 	unregister_netdevice_notifier(&bnxt_re_netdev_notifier);
 	if (bnxt_re_wq)
 		destroy_workqueue(bnxt_re_wq);

From d5917307bb1caa9cb0a915951c57f4cdbacca443 Mon Sep 17 00:00:00 2001
From: Somnath Kotur <somnath.kotur@broadcom.com>
Date: Thu, 31 Aug 2017 09:27:32 +0530
Subject: [PATCH 261/391] bnxt_re: Fix race between the netdev register and
 unregister events

Upon receipt of the NETDEV_REGISTER event from the netdev notifier chain,
the IB stack registration is spawned off to a workqueue since that also
requires an rtnl lock.
There could be 2 kinds of races between the NETDEV_REGISTER and the
NETDEV_UNREGISTER event handling.
a)The NETDEV_UNREGISTER event is received in rapid succession after
the NETDEV_REGISTER event even before the work queue got a chance to run.
b)The NETDEV_UNREGISTER event is received while the workqueue that handles
registration with the IB stack is still in progress.

Handle both the races with a bit flag that is set just before the work item
is queued and cleared in the workqueue after the event is handled just
before the workqueue item is freed.

While adding the new flag, it was noted that the flags are all used in
*_bit() operations which expect a bit number and not a literal constant
with a bit set.  So change the numbers to be bit numbers.

Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/bnxt_re.h | 12 +++++++-----
 drivers/infiniband/hw/bnxt_re/main.c    |  8 ++++++++
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index b3ad37fec578..a25f9d240880 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -93,11 +93,13 @@ struct bnxt_re_dev {
 	struct ib_device		ibdev;
 	struct list_head		list;
 	unsigned long			flags;
-#define BNXT_RE_FLAG_NETDEV_REGISTERED	0
-#define BNXT_RE_FLAG_IBDEV_REGISTERED	1
-#define BNXT_RE_FLAG_GOT_MSIX		2
-#define BNXT_RE_FLAG_RCFW_CHANNEL_EN	8
-#define BNXT_RE_FLAG_QOS_WORK_REG	16
+#define BNXT_RE_FLAG_NETDEV_REGISTERED		0
+#define BNXT_RE_FLAG_IBDEV_REGISTERED		1
+#define BNXT_RE_FLAG_GOT_MSIX			2
+#define BNXT_RE_FLAG_HAVE_L2_REF		3
+#define BNXT_RE_FLAG_RCFW_CHANNEL_EN		4
+#define BNXT_RE_FLAG_QOS_WORK_REG		5
+#define BNXT_RE_FLAG_TASK_IN_PROG		6
 	struct net_device		*netdev;
 	unsigned int			version, major, minor;
 	struct bnxt_en_dev		*en_dev;
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 00a3b743c156..29c3d7e254af 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -1259,6 +1259,8 @@ static void bnxt_re_task(struct work_struct *work)
 	default:
 		break;
 	}
+	smp_mb__before_atomic();
+	clear_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags);
 	kfree(re_work);
 }
 
@@ -1317,6 +1319,11 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
 		break;
 
 	case NETDEV_UNREGISTER:
+		/* netdev notifier will call NETDEV_UNREGISTER again later since
+		 * we are still holding the reference to the netdev
+		 */
+		if (test_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags))
+			goto exit;
 		bnxt_re_ib_unreg(rdev, false);
 		bnxt_re_remove_one(rdev);
 		bnxt_re_dev_unreg(rdev);
@@ -1335,6 +1342,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
 			re_work->vlan_dev = (real_dev == netdev ?
 					     NULL : netdev);
 			INIT_WORK(&re_work->work, bnxt_re_task);
+			set_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags);
 			queue_work(bnxt_re_wq, &re_work->work);
 		}
 	}

From 74828b128115033ff25d4140d732a05a36eaeaf0 Mon Sep 17 00:00:00 2001
From: Somnath Kotur <somnath.kotur@broadcom.com>
Date: Thu, 31 Aug 2017 09:27:33 +0530
Subject: [PATCH 262/391] bnxt_re: Remove RTNL lock dependency in
 bnxt_re_query_port

When there is a NETDEV_UNREGISTER event, bnxt_re driver calls
ib_unregister_device() (RTNL lock held).
ib_unregister_device attempts to flush a worker queue scheduled by
ib_core and that queue might have a pending ib_query_port().
ib_query_port in turn calls bnxt_re_query_port(), which while querying the
link speed using ib_get_eth_speed(), tries to acquire the rtnl_lock() which
was already held by NETDEV_UNREGISTER.
Fixing the issue by removing the link speed query from bnxt_re_query_port()
Now the speed is queried post a successful ib_register_device or whenever
there is a NETDEV_CHANGE event.

Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/bnxt_re.h  |  2 ++
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 11 +++--------
 drivers/infiniband/hw/bnxt_re/main.c     |  4 ++++
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index a25f9d240880..ecbac91b2e14 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -110,6 +110,8 @@ struct bnxt_re_dev {
 
 	struct delayed_work		worker;
 	u8				cur_prio_map;
+	u8				active_speed;
+	u8				active_width;
 
 	/* FP Notification Queue (CQ & SRQ) */
 	struct tasklet_struct		nq_task;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 0dbdbe1616ab..7430ef07a0e1 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -259,14 +259,9 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
 	port_attr->sm_sl = 0;
 	port_attr->subnet_timeout = 0;
 	port_attr->init_type_reply = 0;
-	/* call the underlying netdev's ethtool hooks to query speed settings
-	 * for which we acquire rtnl_lock _only_ if it's registered with
-	 * IB stack to avoid race in the NETDEV_UNREG path
-	 */
-	if (test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags))
-		if (ib_get_eth_speed(ibdev, port_num, &port_attr->active_speed,
-				     &port_attr->active_width))
-			return -EINVAL;
+	port_attr->active_speed = rdev->active_speed;
+	port_attr->active_width = rdev->active_width;
+
 	return 0;
 }
 
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 29c3d7e254af..e7450ea92aa9 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -1161,6 +1161,8 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
 		}
 	}
 	set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
+	ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
+			 &rdev->active_width);
 	bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE);
 	bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_GID_CHANGE);
 
@@ -1255,6 +1257,8 @@ static void bnxt_re_task(struct work_struct *work)
 		else if (netif_carrier_ok(rdev->netdev))
 			bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
 					       IB_EVENT_PORT_ACTIVE);
+		ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
+				 &rdev->active_width);
 		break;
 	default:
 		break;

From 1993519be8bc86342687c61d8d11c3ade62b3b84 Mon Sep 17 00:00:00 2001
From: Selvin Xavier <selvin.xavier@broadcom.com>
Date: Thu, 31 Aug 2017 09:27:34 +0530
Subject: [PATCH 263/391] bnxt_re: Fix memory leak in FRMR path

This patch fixes a memory leak issue when alloc_mr is used.
mr->pages and mr->npages are used only in alloc_mr path. mr->pages
is allocated when alloc_mr is called or in the case of FRMR, while
creating the MR. mr->npages is updated only when the MR created
is used i.e. after invoking map_mr_sg verb, before data transfer.
In the dereg_mr path, if mr->npages is 0, driver ends up not freeing
the memory created.
Removing the npages check from the dereg_mr path for kernel consumers.

Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 7430ef07a0e1..d7be4e4227ce 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -3066,7 +3066,7 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr)
 		return rc;
 	}
 
-	if (mr->npages && mr->pages) {
+	if (mr->pages) {
 		rc = bnxt_qplib_free_fast_reg_page_list(&rdev->qplib_res,
 							&mr->qplib_frpl);
 		kfree(mr->pages);

From 89aaca54ba60e91f02c1c168fbef5d71f71a6d43 Mon Sep 17 00:00:00 2001
From: Somnath Kotur <somnath.kotur@broadcom.com>
Date: Thu, 31 Aug 2017 09:27:35 +0530
Subject: [PATCH 264/391] bnxt_re: Don't issue cmd to delete GID for QP1 GID
 entry before the QP is destroyed

FW needs the 0th GID Entry in the Table to be preserved before
it's corresponding QP1 is deleted, else it will fail the cmd.
Check for the same and return to prevent error msg being logged for
cmd failure.

Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index d7be4e4227ce..0d89621d9fe8 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -314,6 +314,7 @@ int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num,
 	struct bnxt_re_gid_ctx *ctx, **ctx_tbl;
 	struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
 	struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
+	struct bnxt_qplib_gid *gid_to_del;
 
 	/* Delete the entry from the hardware */
 	ctx = *context;
@@ -323,11 +324,25 @@ int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num,
 	if (sgid_tbl && sgid_tbl->active) {
 		if (ctx->idx >= sgid_tbl->max)
 			return -EINVAL;
+		gid_to_del = &sgid_tbl->tbl[ctx->idx];
+		/* DEL_GID is called in WQ context(netdevice_event_work_handler)
+		 * or via the ib_unregister_device path. In the former case QP1
+		 * may not be destroyed yet, in which case just return as FW
+		 * needs that entry to be present and will fail it's deletion.
+		 * We could get invoked again after QP1 is destroyed OR get an
+		 * ADD_GID call with a different GID value for the same index
+		 * where we issue MODIFY_GID cmd to update the GID entry -- TBD
+		 */
+		if (ctx->idx == 0 &&
+		    rdma_link_local_addr((struct in6_addr *)gid_to_del) &&
+		    ctx->refcnt == 1 && rdev->qp1_sqp) {
+			dev_dbg(rdev_to_dev(rdev),
+				"Trying to delete GID0 while QP1 is alive\n");
+			return -EFAULT;
+		}
 		ctx->refcnt--;
 		if (!ctx->refcnt) {
-			rc = bnxt_qplib_del_sgid(sgid_tbl,
-						 &sgid_tbl->tbl[ctx->idx],
-						 true);
+			rc = bnxt_qplib_del_sgid(sgid_tbl, gid_to_del, true);
 			if (rc) {
 				dev_err(rdev_to_dev(rdev),
 					"Failed to remove GID: %#x", rc);
@@ -811,6 +826,8 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp)
 
 		kfree(rdev->sqp_ah);
 		kfree(rdev->qp1_sqp);
+		rdev->qp1_sqp = NULL;
+		rdev->sqp_ah = NULL;
 	}
 
 	if (!IS_ERR_OR_NULL(qp->rumem))

From 19fe43a54fb67b6cc8857e65c78e1dc8aa2e97a3 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 6 Jul 2017 10:56:21 +0200
Subject: [PATCH 265/391] apparmor: Fix shadowed local variable in
 unpack_trans_table()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

with W=2:

    security/apparmor/policy_unpack.c: In function ‘unpack_trans_table’:
    security/apparmor/policy_unpack.c:469: warning: declaration of ‘pos’ shadows a previous local
    security/apparmor/policy_unpack.c:451: warning: shadowed declaration is here

Rename the old "pos" to "saved_pos" to fix this.

Fixes: 5379a3312024a8be ("apparmor: support v7 transition format compatible with label_parse")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: Serge Hallyn <serge@hallyn.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy_unpack.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index c600f4dd1783..2d5a1a007b06 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -448,7 +448,7 @@ static struct aa_dfa *unpack_dfa(struct aa_ext *e)
  */
 static bool unpack_trans_table(struct aa_ext *e, struct aa_profile *profile)
 {
-	void *pos = e->pos;
+	void *saved_pos = e->pos;
 
 	/* exec table is optional */
 	if (unpack_nameX(e, AA_STRUCT, "xtable")) {
@@ -511,7 +511,7 @@ static bool unpack_trans_table(struct aa_ext *e, struct aa_profile *profile)
 
 fail:
 	aa_free_domain_entries(&profile->file.trans);
-	e->pos = pos;
+	e->pos = saved_pos;
 	return 0;
 }
 

From 86aea56f14929ff1c05eca1776e9068e907429d5 Mon Sep 17 00:00:00 2001
From: Christos Gkekas <chris.gekas@gmail.com>
Date: Sat, 8 Jul 2017 20:50:21 +0100
Subject: [PATCH 266/391] apparmor: Fix logical error in verify_header()

verify_header() is currently checking whether interface version is less
than 5 *and* greater than 7, which always evaluates to false. Instead it
should check whether it is less than 5 *or* greater than 7.

Signed-off-by: Christos Gkekas <chris.gekas@gmail.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy_unpack.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 2d5a1a007b06..bda0dce3b582 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -832,7 +832,7 @@ static int verify_header(struct aa_ext *e, int required, const char **ns)
 	 * if not specified use previous version
 	 * Mask off everything that is not kernel abi version
 	 */
-	if (VERSION_LT(e->version, v5) && VERSION_GT(e->version, v7)) {
+	if (VERSION_LT(e->version, v5) || VERSION_GT(e->version, v7)) {
 		audit_iface(NULL, NULL, NULL, "unsupported interface version",
 			    e, error);
 		return error;

From 5d314a81eca29b01939930c1c596dfa44937e970 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 13 Jul 2017 10:39:20 +0300
Subject: [PATCH 267/391] apparmor: Fix an error code in aafs_create()

We accidentally forgot to set the error code on this path.  It means we
return NULL instead of an error pointer.  I looked through a bunch of
callers and I don't think it really causes a big issue, but the
documentation says we're supposed to return error pointers here.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Serge Hallyn <serge@hallyn.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/apparmorfs.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 853c2ec8e0c9..2caeb748070c 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -248,8 +248,10 @@ static struct dentry *aafs_create(const char *name, umode_t mode,
 
 	inode_lock(dir);
 	dentry = lookup_one_len(name, parent, strlen(name));
-	if (IS_ERR(dentry))
+	if (IS_ERR(dentry)) {
+		error = PTR_ERR(dentry);
 		goto fail_lock;
+	}
 
 	if (d_really_is_positive(dentry)) {
 		error = -EEXIST;

From c5561700c9cb951ec3a33a0914c840423b09d7c9 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Mon, 31 Jul 2017 23:44:37 -0700
Subject: [PATCH 268/391] apparmor: Redundant condition: prev_ns. in
 [label.c:1498]

Reported-by: David Binderman <dcb314@hotmail.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/label.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/apparmor/label.c b/security/apparmor/label.c
index e052eaba1cf6..e324f4df3e34 100644
--- a/security/apparmor/label.c
+++ b/security/apparmor/label.c
@@ -1495,7 +1495,7 @@ static int aa_profile_snxprint(char *str, size_t size, struct aa_ns *view,
 		view = profiles_ns(profile);
 
 	if (view != profile->ns &&
-	    (!prev_ns || (prev_ns && *prev_ns != profile->ns))) {
+	    (!prev_ns || (*prev_ns != profile->ns))) {
 		if (prev_ns)
 			*prev_ns = profile->ns;
 		ns_name = aa_ns_name(view, profile->ns,

From cd1dbf76b23d5ab2cba5e657fe20b1e236a408cc Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Tue, 18 Jul 2017 22:56:22 -0700
Subject: [PATCH 269/391] apparmor: add the ability to mediate signals

Add signal mediation where the signal can be mediated based on the
signal, direction, or the label or the peer/target. The signal perms
are verified on a cross check to ensure policy consistency in the case
of incremental policy load/replacement.

The optimization of skipping the cross check when policy is guaranteed
to be consistent (single compile unit) remains to be done.

policy rules have the form of
  SIGNAL_RULE = [ QUALIFIERS ] 'signal' [ SIGNAL ACCESS PERMISSIONS ]
                [ SIGNAL SET ] [ SIGNAL PEER ]

  SIGNAL ACCESS PERMISSIONS = SIGNAL ACCESS | SIGNAL ACCESS LIST

  SIGNAL ACCESS LIST = '(' Comma or space separated list of SIGNAL
                           ACCESS ')'

  SIGNAL ACCESS = ( 'r' | 'w' | 'rw' | 'read' | 'write' | 'send' |
                    'receive' )

  SIGNAL SET = 'set' '=' '(' SIGNAL LIST ')'

  SIGNAL LIST = Comma or space separated list of SIGNALS

  SIGNALS = ( 'hup' | 'int' | 'quit' | 'ill' | 'trap' | 'abrt' |
              'bus' | 'fpe' | 'kill' | 'usr1' | 'segv' | 'usr2' |
	      'pipe' | 'alrm' | 'term' | 'stkflt' | 'chld' | 'cont' |
	      'stop' | 'stp' | 'ttin' | 'ttou' | 'urg' | 'xcpu' |
	      'xfsz' | 'vtalrm' | 'prof' | 'winch' | 'io' | 'pwr' |
	      'sys' | 'emt' | 'exists' | 'rtmin+0' ... 'rtmin+32'
            )

  SIGNAL PEER = 'peer' '=' AARE

eg.
  signal,                                 # allow all signals
  signal send set=(hup, kill) peer=foo,

Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Seth Arnold <seth.arnold@canonical.com>
---
 security/apparmor/apparmorfs.c        |  7 ++
 security/apparmor/include/apparmor.h  |  1 +
 security/apparmor/include/audit.h     |  2 +
 security/apparmor/include/ipc.h       |  6 ++
 security/apparmor/include/sig_names.h | 95 +++++++++++++++++++++++++
 security/apparmor/ipc.c               | 99 +++++++++++++++++++++++++++
 security/apparmor/lsm.c               | 21 ++++++
 7 files changed, 231 insertions(+)
 create mode 100644 security/apparmor/include/sig_names.h

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 2caeb748070c..a5f9e1aa51f7 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -32,6 +32,7 @@
 #include "include/audit.h"
 #include "include/context.h"
 #include "include/crypto.h"
+#include "include/ipc.h"
 #include "include/policy_ns.h"
 #include "include/label.h"
 #include "include/policy.h"
@@ -2129,6 +2130,11 @@ static struct aa_sfs_entry aa_sfs_entry_ptrace[] = {
 	{ }
 };
 
+static struct aa_sfs_entry aa_sfs_entry_signal[] = {
+	AA_SFS_FILE_STRING("mask", AA_SFS_SIG_MASK),
+	{ }
+};
+
 static struct aa_sfs_entry aa_sfs_entry_domain[] = {
 	AA_SFS_FILE_BOOLEAN("change_hat",	1),
 	AA_SFS_FILE_BOOLEAN("change_hatv",	1),
@@ -2179,6 +2185,7 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = {
 	AA_SFS_DIR("rlimit",			aa_sfs_entry_rlimit),
 	AA_SFS_DIR("caps",			aa_sfs_entry_caps),
 	AA_SFS_DIR("ptrace",			aa_sfs_entry_ptrace),
+	AA_SFS_DIR("signal",			aa_sfs_entry_signal),
 	AA_SFS_DIR("query",			aa_sfs_entry_query),
 	{ }
 };
diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h
index aaf893f4e4f5..962a20a75e01 100644
--- a/security/apparmor/include/apparmor.h
+++ b/security/apparmor/include/apparmor.h
@@ -28,6 +28,7 @@
 #define AA_CLASS_RLIMITS	5
 #define AA_CLASS_DOMAIN		6
 #define AA_CLASS_PTRACE		9
+#define AA_CLASS_SIGNAL		10
 #define AA_CLASS_LABEL		16
 
 #define AA_CLASS_LAST		AA_CLASS_LABEL
diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h
index c68839a44351..d9a156ae11b9 100644
--- a/security/apparmor/include/audit.h
+++ b/security/apparmor/include/audit.h
@@ -86,6 +86,7 @@ enum audit_type {
 #define OP_SHUTDOWN "socket_shutdown"
 
 #define OP_PTRACE "ptrace"
+#define OP_SIGNAL "signal"
 
 #define OP_EXEC "exec"
 
@@ -126,6 +127,7 @@ struct apparmor_audit_data {
 			long pos;
 			const char *ns;
 		} iface;
+		int signal;
 		struct {
 			int rlim;
 			unsigned long max;
diff --git a/security/apparmor/include/ipc.h b/security/apparmor/include/ipc.h
index 656fdb81c8a0..5ffc218d1e74 100644
--- a/security/apparmor/include/ipc.h
+++ b/security/apparmor/include/ipc.h
@@ -27,8 +27,14 @@ struct aa_profile;
 
 #define AA_PTRACE_PERM_MASK (AA_PTRACE_READ | AA_PTRACE_TRACE | \
 			     AA_MAY_BE_READ | AA_MAY_BE_TRACED)
+#define AA_SIGNAL_PERM_MASK (MAY_READ | MAY_WRITE)
+
+#define AA_SFS_SIG_MASK "hup int quit ill trap abrt bus fpe kill usr1 " \
+	"segv usr2 pipe alrm term stkflt chld cont stop stp ttin ttou urg " \
+	"xcpu xfsz vtalrm prof winch io pwr sys emt lost"
 
 int aa_may_ptrace(struct aa_label *tracer, struct aa_label *tracee,
 		  u32 request);
+int aa_may_signal(struct aa_label *sender, struct aa_label *target, int sig);
 
 #endif /* __AA_IPC_H */
diff --git a/security/apparmor/include/sig_names.h b/security/apparmor/include/sig_names.h
new file mode 100644
index 000000000000..0d4395f231ca
--- /dev/null
+++ b/security/apparmor/include/sig_names.h
@@ -0,0 +1,95 @@
+#include <linux/signal.h>
+
+#define SIGUNKNOWN 0
+#define MAXMAPPED_SIG 35
+/* provide a mapping of arch signal to internal signal # for mediation
+ * those that are always an alias SIGCLD for SIGCLHD and SIGPOLL for SIGIO
+ * map to the same entry those that may/or may not get a separate entry
+ */
+static const int sig_map[MAXMAPPED_SIG] = {
+	[0] = MAXMAPPED_SIG,	/* existence test */
+	[SIGHUP] = 1,
+	[SIGINT] = 2,
+	[SIGQUIT] = 3,
+	[SIGILL] = 4,
+	[SIGTRAP] = 5,		/* -, 5, - */
+	[SIGABRT] = 6,		/*  SIGIOT: -, 6, - */
+	[SIGBUS] = 7,		/* 10, 7, 10 */
+	[SIGFPE] = 8,
+	[SIGKILL] = 9,
+	[SIGUSR1] = 10,		/* 30, 10, 16 */
+	[SIGSEGV] = 11,
+	[SIGUSR2] = 12,		/* 31, 12, 17 */
+	[SIGPIPE] = 13,
+	[SIGALRM] = 14,
+	[SIGTERM] = 15,
+	[SIGSTKFLT] = 16,	/* -, 16, - */
+	[SIGCHLD] = 17,		/* 20, 17, 18.  SIGCHLD -, -, 18 */
+	[SIGCONT] = 18,		/* 19, 18, 25 */
+	[SIGSTOP] = 19,		/* 17, 19, 23 */
+	[SIGTSTP] = 20,		/* 18, 20, 24 */
+	[SIGTTIN] = 21,		/* 21, 21, 26 */
+	[SIGTTOU] = 22,		/* 22, 22, 27 */
+	[SIGURG] = 23,		/* 16, 23, 21 */
+	[SIGXCPU] = 24,		/* 24, 24, 30 */
+	[SIGXFSZ] = 25,		/* 25, 25, 31 */
+	[SIGVTALRM] = 26,	/* 26, 26, 28 */
+	[SIGPROF] = 27,		/* 27, 27, 29 */
+	[SIGWINCH] = 28,	/* 28, 28, 20 */
+	[SIGIO] = 29,		/* SIGPOLL: 23, 29, 22 */
+	[SIGPWR] = 30,		/* 29, 30, 19.  SIGINFO 29, -, - */
+#ifdef SIGSYS
+	[SIGSYS] = 31,		/* 12, 31, 12. often SIG LOST/UNUSED */
+#endif
+#ifdef SIGEMT
+	[SIGEMT] = 32,		/* 7, - , 7 */
+#endif
+#if defined(SIGLOST) && SIGPWR != SIGLOST		/* sparc */
+	[SIGLOST] = 33,		/* unused on Linux */
+#endif
+#if defined(SIGLOST) && defined(SIGSYS) && SIGLOST != SIGSYS
+	[SIGUNUSED] = 34,	/* -, 31, - */
+#endif
+};
+
+/* this table is ordered post sig_map[sig] mapping */
+static const char *const sig_names[MAXMAPPED_SIG + 1] = {
+	"unknown",
+	"hup",
+	"int",
+	"quit",
+	"ill",
+	"trap",
+	"abrt",
+	"bus",
+	"fpe",
+	"kill",
+	"usr1",
+	"segv",
+	"usr2",
+	"pipe",
+	"alrm",
+	"term",
+	"stkflt",
+	"chld",
+	"cont",
+	"stop",
+	"stp",
+	"ttin",
+	"ttou",
+	"urg",
+	"xcpu",
+	"xfsz",
+	"vtalrm",
+	"prof",
+	"winch",
+	"io",
+	"pwr",
+	"sys",
+	"emt",
+	"lost",
+	"unused",
+
+	"exists",	/* always last existence test mapped to MAXMAPPED_SIG */
+};
+
diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c
index 11e66b5bbc42..66fb9ede9447 100644
--- a/security/apparmor/ipc.c
+++ b/security/apparmor/ipc.c
@@ -20,6 +20,7 @@
 #include "include/context.h"
 #include "include/policy.h"
 #include "include/ipc.h"
+#include "include/sig_names.h"
 
 /**
  * audit_ptrace_mask - convert mask to permission string
@@ -121,3 +122,101 @@ int aa_may_ptrace(struct aa_label *tracer, struct aa_label *tracee,
 }
 
 
+static inline int map_signal_num(int sig)
+{
+	if (sig > SIGRTMAX)
+		return SIGUNKNOWN;
+	else if (sig >= SIGRTMIN)
+		return sig - SIGRTMIN + 128;	/* rt sigs mapped to 128 */
+	else if (sig <= MAXMAPPED_SIG)
+		return sig_map[sig];
+	return SIGUNKNOWN;
+}
+
+/**
+ * audit_file_mask - convert mask to permission string
+ * @buffer: buffer to write string to (NOT NULL)
+ * @mask: permission mask to convert
+ */
+static void audit_signal_mask(struct audit_buffer *ab, u32 mask)
+{
+	if (mask & MAY_READ)
+		audit_log_string(ab, "receive");
+	if (mask & MAY_WRITE)
+		audit_log_string(ab, "send");
+}
+
+/**
+ * audit_cb - call back for signal specific audit fields
+ * @ab: audit_buffer  (NOT NULL)
+ * @va: audit struct to audit values of  (NOT NULL)
+ */
+static void audit_signal_cb(struct audit_buffer *ab, void *va)
+{
+	struct common_audit_data *sa = va;
+
+	if (aad(sa)->request & AA_SIGNAL_PERM_MASK) {
+		audit_log_format(ab, " requested_mask=");
+		audit_signal_mask(ab, aad(sa)->request);
+		if (aad(sa)->denied & AA_SIGNAL_PERM_MASK) {
+			audit_log_format(ab, " denied_mask=");
+			audit_signal_mask(ab, aad(sa)->denied);
+		}
+	}
+	if (aad(sa)->signal <= MAXMAPPED_SIG)
+		audit_log_format(ab, " signal=%s", sig_names[aad(sa)->signal]);
+	else
+		audit_log_format(ab, " signal=rtmin+%d",
+				 aad(sa)->signal - 128);
+	audit_log_format(ab, " peer=");
+	aa_label_xaudit(ab, labels_ns(aad(sa)->label), aad(sa)->peer,
+			FLAGS_NONE, GFP_ATOMIC);
+}
+
+/* TODO: update to handle compound name&name2, conditionals */
+static void profile_match_signal(struct aa_profile *profile, const char *label,
+				 int signal, struct aa_perms *perms)
+{
+	unsigned int state;
+
+	/* TODO: secondary cache check <profile, profile, perm> */
+	state = aa_dfa_next(profile->policy.dfa,
+			    profile->policy.start[AA_CLASS_SIGNAL],
+			    signal);
+	state = aa_dfa_match(profile->policy.dfa, state, label);
+	aa_compute_perms(profile->policy.dfa, state, perms);
+}
+
+static int profile_signal_perm(struct aa_profile *profile,
+			       struct aa_profile *peer, u32 request,
+			       struct common_audit_data *sa)
+{
+	struct aa_perms perms;
+
+	if (profile_unconfined(profile) ||
+	    !PROFILE_MEDIATES(profile, AA_CLASS_SIGNAL))
+		return 0;
+
+	aad(sa)->peer = &peer->label;
+	profile_match_signal(profile, peer->base.hname, aad(sa)->signal,
+			     &perms);
+	aa_apply_modes_to_perms(profile, &perms);
+	return aa_check_perms(profile, &perms, request, sa, audit_signal_cb);
+}
+
+static int aa_signal_cross_perm(struct aa_profile *sender,
+				struct aa_profile *target,
+				struct common_audit_data *sa)
+{
+	return xcheck(profile_signal_perm(sender, target, MAY_WRITE, sa),
+		      profile_signal_perm(target, sender, MAY_READ, sa));
+}
+
+int aa_may_signal(struct aa_label *sender, struct aa_label *target, int sig)
+{
+	DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, OP_SIGNAL);
+
+	aad(&sa)->signal = map_signal_num(sig);
+	return xcheck_labels_profiles(sender, target, aa_signal_cross_perm,
+				      &sa);
+}
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 867bcd154c7e..af22f3dfbcce 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -656,6 +656,26 @@ static int apparmor_task_setrlimit(struct task_struct *task,
 	return error;
 }
 
+static int apparmor_task_kill(struct task_struct *target, struct siginfo *info,
+			      int sig, u32 secid)
+{
+	struct aa_label *cl, *tl;
+	int error;
+
+	if (secid)
+		/* TODO: after secid to label mapping is done.
+		 *  Dealing with USB IO specific behavior
+		 */
+		return 0;
+	cl = __begin_current_label_crit_section();
+	tl = aa_get_task_label(target);
+	error = aa_may_signal(cl, tl, sig);
+	aa_put_label(tl);
+	__end_current_label_crit_section(cl);
+
+	return error;
+}
+
 static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(ptrace_access_check, apparmor_ptrace_access_check),
 	LSM_HOOK_INIT(ptrace_traceme, apparmor_ptrace_traceme),
@@ -697,6 +717,7 @@ static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(bprm_secureexec, apparmor_bprm_secureexec),
 
 	LSM_HOOK_INIT(task_setrlimit, apparmor_task_setrlimit),
+	LSM_HOOK_INIT(task_kill, apparmor_task_kill),
 };
 
 /*

From 2ea3ffb7782a84da33a8382f13ebd016da50079b Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Tue, 18 Jul 2017 23:04:47 -0700
Subject: [PATCH 270/391] apparmor: add mount mediation

Add basic mount mediation. That allows controlling based on basic
mount parameters. It does not include special mount parameters for
apparmor, super block labeling, or any triggers for apparmor namespace
parameter modifications on pivot root.

default userspace policy rules have the form of
  MOUNT RULE = ( MOUNT | REMOUNT | UMOUNT )

  MOUNT = [ QUALIFIERS ] 'mount' [ MOUNT CONDITIONS ] [ SOURCE FILEGLOB ]
          [ '->' MOUNTPOINT FILEGLOB ]

  REMOUNT = [ QUALIFIERS ] 'remount' [ MOUNT CONDITIONS ]
            MOUNTPOINT FILEGLOB

  UMOUNT = [ QUALIFIERS ] 'umount' [ MOUNT CONDITIONS ] MOUNTPOINT FILEGLOB

  MOUNT CONDITIONS = [ ( 'fstype' | 'vfstype' ) ( '=' | 'in' )
                       MOUNT FSTYPE EXPRESSION ]
		       [ 'options' ( '=' | 'in' ) MOUNT FLAGS EXPRESSION ]

  MOUNT FSTYPE EXPRESSION = ( MOUNT FSTYPE LIST | MOUNT EXPRESSION )

  MOUNT FSTYPE LIST = Comma separated list of valid filesystem and
                      virtual filesystem types (eg ext4, debugfs, etc)

  MOUNT FLAGS EXPRESSION = ( MOUNT FLAGS LIST | MOUNT EXPRESSION )

  MOUNT FLAGS LIST = Comma separated list of MOUNT FLAGS.

  MOUNT FLAGS = ( 'ro' | 'rw' | 'nosuid' | 'suid' | 'nodev' | 'dev' |
                  'noexec' | 'exec' | 'sync' | 'async' | 'remount' |
		  'mand' | 'nomand' | 'dirsync' | 'noatime' | 'atime' |
		  'nodiratime' | 'diratime' | 'bind' | 'rbind' | 'move' |
		  'verbose' | 'silent' | 'loud' | 'acl' | 'noacl' |
		  'unbindable' | 'runbindable' | 'private' | 'rprivate' |
		  'slave' | 'rslave' | 'shared' | 'rshared' |
		  'relatime' | 'norelatime' | 'iversion' | 'noiversion' |
		  'strictatime' | 'nouser' | 'user' )

  MOUNT EXPRESSION = ( ALPHANUMERIC | AARE ) ...

  PIVOT ROOT RULE = [ QUALIFIERS ] pivot_root [ oldroot=OLD PUT FILEGLOB ]
                    [ NEW ROOT FILEGLOB ]

  SOURCE FILEGLOB = FILEGLOB

  MOUNTPOINT FILEGLOB = FILEGLOB

eg.
  mount,
  mount /dev/foo,
  mount options=ro /dev/foo -> /mnt/,
  mount options in (ro,atime) /dev/foo -> /mnt/,
  mount options=ro options=atime,

Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Seth Arnold <seth.arnold@canonical.com>
---
 security/apparmor/Makefile           |   2 +-
 security/apparmor/apparmorfs.c       |   8 +-
 security/apparmor/domain.c           |   4 +-
 security/apparmor/include/apparmor.h |   1 +
 security/apparmor/include/audit.h    |  11 +
 security/apparmor/include/domain.h   |   5 +
 security/apparmor/include/mount.h    |  54 +++
 security/apparmor/lsm.c              |  64 +++
 security/apparmor/mount.c            | 696 +++++++++++++++++++++++++++
 9 files changed, 841 insertions(+), 4 deletions(-)
 create mode 100644 security/apparmor/include/mount.h
 create mode 100644 security/apparmor/mount.c

diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile
index a16b195274de..81a34426d024 100644
--- a/security/apparmor/Makefile
+++ b/security/apparmor/Makefile
@@ -4,7 +4,7 @@ obj-$(CONFIG_SECURITY_APPARMOR) += apparmor.o
 
 apparmor-y := apparmorfs.o audit.o capability.o context.o ipc.o lib.o match.o \
               path.o domain.o policy.o policy_unpack.o procattr.o lsm.o \
-              resource.o secid.o file.o policy_ns.o label.o
+              resource.o secid.o file.o policy_ns.o label.o mount.o
 apparmor-$(CONFIG_SECURITY_APPARMOR_HASH) += crypto.o
 
 clean-files := capability_names.h rlim_names.h
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index a5f9e1aa51f7..8fa6c898c44b 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -2159,9 +2159,14 @@ static struct aa_sfs_entry aa_sfs_entry_policy[] = {
 	{ }
 };
 
+static struct aa_sfs_entry aa_sfs_entry_mount[] = {
+	AA_SFS_FILE_STRING("mask", "mount umount pivot_root"),
+	{ }
+};
+
 static struct aa_sfs_entry aa_sfs_entry_ns[] = {
 	AA_SFS_FILE_BOOLEAN("profile",		1),
-	AA_SFS_FILE_BOOLEAN("pivot_root",	1),
+	AA_SFS_FILE_BOOLEAN("pivot_root",	0),
 	{ }
 };
 
@@ -2180,6 +2185,7 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = {
 	AA_SFS_DIR("policy",			aa_sfs_entry_policy),
 	AA_SFS_DIR("domain",			aa_sfs_entry_domain),
 	AA_SFS_DIR("file",			aa_sfs_entry_file),
+	AA_SFS_DIR("mount",			aa_sfs_entry_mount),
 	AA_SFS_DIR("namespaces",		aa_sfs_entry_ns),
 	AA_SFS_FILE_U64("capability",		VFS_CAP_FLAGS_MASK),
 	AA_SFS_DIR("rlimit",			aa_sfs_entry_rlimit),
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index d0594446ae3f..ffc8c75a6785 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -374,8 +374,8 @@ static const char *next_name(int xtype, const char *name)
  *
  * Returns: refcounted label, or NULL on failure (MAYBE NULL)
  */
-static struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex,
-				       const char **name)
+struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex,
+				const char **name)
 {
 	struct aa_label *label = NULL;
 	u32 xtype = xindex & AA_X_TYPE_MASK;
diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h
index 962a20a75e01..829082c35faa 100644
--- a/security/apparmor/include/apparmor.h
+++ b/security/apparmor/include/apparmor.h
@@ -27,6 +27,7 @@
 #define AA_CLASS_NET		4
 #define AA_CLASS_RLIMITS	5
 #define AA_CLASS_DOMAIN		6
+#define AA_CLASS_MOUNT		7
 #define AA_CLASS_PTRACE		9
 #define AA_CLASS_SIGNAL		10
 #define AA_CLASS_LABEL		16
diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h
index d9a156ae11b9..c3fe1c5ef3bc 100644
--- a/security/apparmor/include/audit.h
+++ b/security/apparmor/include/audit.h
@@ -71,6 +71,10 @@ enum audit_type {
 #define OP_FMPROT "file_mprotect"
 #define OP_INHERIT "file_inherit"
 
+#define OP_PIVOTROOT "pivotroot"
+#define OP_MOUNT "mount"
+#define OP_UMOUNT "umount"
+
 #define OP_CREATE "create"
 #define OP_POST_CREATE "post_create"
 #define OP_BIND "bind"
@@ -132,6 +136,13 @@ struct apparmor_audit_data {
 			int rlim;
 			unsigned long max;
 		} rlim;
+		struct {
+			const char *src_name;
+			const char *type;
+			const char *trans;
+			const char *data;
+			unsigned long flags;
+		} mnt;
 	};
 };
 
diff --git a/security/apparmor/include/domain.h b/security/apparmor/include/domain.h
index bab5810b6e9a..db27403346c5 100644
--- a/security/apparmor/include/domain.h
+++ b/security/apparmor/include/domain.h
@@ -15,6 +15,8 @@
 #include <linux/binfmts.h>
 #include <linux/types.h>
 
+#include "label.h"
+
 #ifndef __AA_DOMAIN_H
 #define __AA_DOMAIN_H
 
@@ -29,6 +31,9 @@ struct aa_domain {
 #define AA_CHANGE_ONEXEC  4
 #define AA_CHANGE_STACK 8
 
+struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex,
+				const char **name);
+
 int apparmor_bprm_set_creds(struct linux_binprm *bprm);
 int apparmor_bprm_secureexec(struct linux_binprm *bprm);
 
diff --git a/security/apparmor/include/mount.h b/security/apparmor/include/mount.h
new file mode 100644
index 000000000000..25d6067fa6ef
--- /dev/null
+++ b/security/apparmor/include/mount.h
@@ -0,0 +1,54 @@
+/*
+ * AppArmor security module
+ *
+ * This file contains AppArmor file mediation function definitions.
+ *
+ * Copyright 2017 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#ifndef __AA_MOUNT_H
+#define __AA_MOUNT_H
+
+#include <linux/fs.h>
+#include <linux/path.h>
+
+#include "domain.h"
+#include "policy.h"
+
+/* mount perms */
+#define AA_MAY_PIVOTROOT	0x01
+#define AA_MAY_MOUNT		0x02
+#define AA_MAY_UMOUNT		0x04
+#define AA_AUDIT_DATA		0x40
+#define AA_MNT_CONT_MATCH	0x40
+
+#define AA_MS_IGNORE_MASK (MS_KERNMOUNT | MS_NOSEC | MS_ACTIVE | MS_BORN)
+
+int aa_remount(struct aa_label *label, const struct path *path,
+	       unsigned long flags, void *data);
+
+int aa_bind_mount(struct aa_label *label, const struct path *path,
+		  const char *old_name, unsigned long flags);
+
+
+int aa_mount_change_type(struct aa_label *label, const struct path *path,
+			 unsigned long flags);
+
+int aa_move_mount(struct aa_label *label, const struct path *path,
+		  const char *old_name);
+
+int aa_new_mount(struct aa_label *label, const char *dev_name,
+		 const struct path *path, const char *type, unsigned long flags,
+		 void *data);
+
+int aa_umount(struct aa_label *label, struct vfsmount *mnt, int flags);
+
+int aa_pivotroot(struct aa_label *label, const struct path *old_path,
+		 const struct path *new_path);
+
+#endif /* __AA_MOUNT_H */
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index af22f3dfbcce..4ad0b3a45142 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -38,6 +38,7 @@
 #include "include/policy.h"
 #include "include/policy_ns.h"
 #include "include/procattr.h"
+#include "include/mount.h"
 
 /* Flag indicating whether initialization completed */
 int apparmor_initialized;
@@ -511,6 +512,65 @@ static int apparmor_file_mprotect(struct vm_area_struct *vma,
 			   !(vma->vm_flags & VM_SHARED) ? MAP_PRIVATE : 0);
 }
 
+static int apparmor_sb_mount(const char *dev_name, const struct path *path,
+			     const char *type, unsigned long flags, void *data)
+{
+	struct aa_label *label;
+	int error = 0;
+
+	/* Discard magic */
+	if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
+		flags &= ~MS_MGC_MSK;
+
+	flags &= ~AA_MS_IGNORE_MASK;
+
+	label = __begin_current_label_crit_section();
+	if (!unconfined(label)) {
+		if (flags & MS_REMOUNT)
+			error = aa_remount(label, path, flags, data);
+		else if (flags & MS_BIND)
+			error = aa_bind_mount(label, path, dev_name, flags);
+		else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE |
+				  MS_UNBINDABLE))
+			error = aa_mount_change_type(label, path, flags);
+		else if (flags & MS_MOVE)
+			error = aa_move_mount(label, path, dev_name);
+		else
+			error = aa_new_mount(label, dev_name, path, type,
+					     flags, data);
+	}
+	__end_current_label_crit_section(label);
+
+	return error;
+}
+
+static int apparmor_sb_umount(struct vfsmount *mnt, int flags)
+{
+	struct aa_label *label;
+	int error = 0;
+
+	label = __begin_current_label_crit_section();
+	if (!unconfined(label))
+		error = aa_umount(label, mnt, flags);
+	__end_current_label_crit_section(label);
+
+	return error;
+}
+
+static int apparmor_sb_pivotroot(const struct path *old_path,
+				 const struct path *new_path)
+{
+	struct aa_label *label;
+	int error = 0;
+
+	label = aa_get_current_label();
+	if (!unconfined(label))
+		error = aa_pivotroot(label, old_path, new_path);
+	aa_put_label(label);
+
+	return error;
+}
+
 static int apparmor_getprocattr(struct task_struct *task, char *name,
 				char **value)
 {
@@ -682,6 +742,10 @@ static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(capget, apparmor_capget),
 	LSM_HOOK_INIT(capable, apparmor_capable),
 
+	LSM_HOOK_INIT(sb_mount, apparmor_sb_mount),
+	LSM_HOOK_INIT(sb_umount, apparmor_sb_umount),
+	LSM_HOOK_INIT(sb_pivotroot, apparmor_sb_pivotroot),
+
 	LSM_HOOK_INIT(path_link, apparmor_path_link),
 	LSM_HOOK_INIT(path_unlink, apparmor_path_unlink),
 	LSM_HOOK_INIT(path_symlink, apparmor_path_symlink),
diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c
new file mode 100644
index 000000000000..82a64b58041d
--- /dev/null
+++ b/security/apparmor/mount.c
@@ -0,0 +1,696 @@
+/*
+ * AppArmor security module
+ *
+ * This file contains AppArmor mediation of files
+ *
+ * Copyright (C) 1998-2008 Novell/SUSE
+ * Copyright 2009-2017 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+
+#include "include/apparmor.h"
+#include "include/audit.h"
+#include "include/context.h"
+#include "include/domain.h"
+#include "include/file.h"
+#include "include/match.h"
+#include "include/mount.h"
+#include "include/path.h"
+#include "include/policy.h"
+
+
+static void audit_mnt_flags(struct audit_buffer *ab, unsigned long flags)
+{
+	if (flags & MS_RDONLY)
+		audit_log_format(ab, "ro");
+	else
+		audit_log_format(ab, "rw");
+	if (flags & MS_NOSUID)
+		audit_log_format(ab, ", nosuid");
+	if (flags & MS_NODEV)
+		audit_log_format(ab, ", nodev");
+	if (flags & MS_NOEXEC)
+		audit_log_format(ab, ", noexec");
+	if (flags & MS_SYNCHRONOUS)
+		audit_log_format(ab, ", sync");
+	if (flags & MS_REMOUNT)
+		audit_log_format(ab, ", remount");
+	if (flags & MS_MANDLOCK)
+		audit_log_format(ab, ", mand");
+	if (flags & MS_DIRSYNC)
+		audit_log_format(ab, ", dirsync");
+	if (flags & MS_NOATIME)
+		audit_log_format(ab, ", noatime");
+	if (flags & MS_NODIRATIME)
+		audit_log_format(ab, ", nodiratime");
+	if (flags & MS_BIND)
+		audit_log_format(ab, flags & MS_REC ? ", rbind" : ", bind");
+	if (flags & MS_MOVE)
+		audit_log_format(ab, ", move");
+	if (flags & MS_SILENT)
+		audit_log_format(ab, ", silent");
+	if (flags & MS_POSIXACL)
+		audit_log_format(ab, ", acl");
+	if (flags & MS_UNBINDABLE)
+		audit_log_format(ab, flags & MS_REC ? ", runbindable" :
+				 ", unbindable");
+	if (flags & MS_PRIVATE)
+		audit_log_format(ab, flags & MS_REC ? ", rprivate" :
+				 ", private");
+	if (flags & MS_SLAVE)
+		audit_log_format(ab, flags & MS_REC ? ", rslave" :
+				 ", slave");
+	if (flags & MS_SHARED)
+		audit_log_format(ab, flags & MS_REC ? ", rshared" :
+				 ", shared");
+	if (flags & MS_RELATIME)
+		audit_log_format(ab, ", relatime");
+	if (flags & MS_I_VERSION)
+		audit_log_format(ab, ", iversion");
+	if (flags & MS_STRICTATIME)
+		audit_log_format(ab, ", strictatime");
+	if (flags & MS_NOUSER)
+		audit_log_format(ab, ", nouser");
+}
+
+/**
+ * audit_cb - call back for mount specific audit fields
+ * @ab: audit_buffer  (NOT NULL)
+ * @va: audit struct to audit values of  (NOT NULL)
+ */
+static void audit_cb(struct audit_buffer *ab, void *va)
+{
+	struct common_audit_data *sa = va;
+
+	if (aad(sa)->mnt.type) {
+		audit_log_format(ab, " fstype=");
+		audit_log_untrustedstring(ab, aad(sa)->mnt.type);
+	}
+	if (aad(sa)->mnt.src_name) {
+		audit_log_format(ab, " srcname=");
+		audit_log_untrustedstring(ab, aad(sa)->mnt.src_name);
+	}
+	if (aad(sa)->mnt.trans) {
+		audit_log_format(ab, " trans=");
+		audit_log_untrustedstring(ab, aad(sa)->mnt.trans);
+	}
+	if (aad(sa)->mnt.flags) {
+		audit_log_format(ab, " flags=\"");
+		audit_mnt_flags(ab, aad(sa)->mnt.flags);
+		audit_log_format(ab, "\"");
+	}
+	if (aad(sa)->mnt.data) {
+		audit_log_format(ab, " options=");
+		audit_log_untrustedstring(ab, aad(sa)->mnt.data);
+	}
+}
+
+/**
+ * audit_mount - handle the auditing of mount operations
+ * @profile: the profile being enforced  (NOT NULL)
+ * @op: operation being mediated (NOT NULL)
+ * @name: name of object being mediated (MAYBE NULL)
+ * @src_name: src_name of object being mediated (MAYBE_NULL)
+ * @type: type of filesystem (MAYBE_NULL)
+ * @trans: name of trans (MAYBE NULL)
+ * @flags: filesystem idependent mount flags
+ * @data: filesystem mount flags
+ * @request: permissions requested
+ * @perms: the permissions computed for the request (NOT NULL)
+ * @info: extra information message (MAYBE NULL)
+ * @error: 0 if operation allowed else failure error code
+ *
+ * Returns: %0 or error on failure
+ */
+static int audit_mount(struct aa_profile *profile, const char *op,
+		       const char *name, const char *src_name,
+		       const char *type, const char *trans,
+		       unsigned long flags, const void *data, u32 request,
+		       struct aa_perms *perms, const char *info, int error)
+{
+	int audit_type = AUDIT_APPARMOR_AUTO;
+	DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, op);
+
+	if (likely(!error)) {
+		u32 mask = perms->audit;
+
+		if (unlikely(AUDIT_MODE(profile) == AUDIT_ALL))
+			mask = 0xffff;
+
+		/* mask off perms that are not being force audited */
+		request &= mask;
+
+		if (likely(!request))
+			return 0;
+		audit_type = AUDIT_APPARMOR_AUDIT;
+	} else {
+		/* only report permissions that were denied */
+		request = request & ~perms->allow;
+
+		if (request & perms->kill)
+			audit_type = AUDIT_APPARMOR_KILL;
+
+		/* quiet known rejects, assumes quiet and kill do not overlap */
+		if ((request & perms->quiet) &&
+		    AUDIT_MODE(profile) != AUDIT_NOQUIET &&
+		    AUDIT_MODE(profile) != AUDIT_ALL)
+			request &= ~perms->quiet;
+
+		if (!request)
+			return error;
+	}
+
+	aad(&sa)->name = name;
+	aad(&sa)->mnt.src_name = src_name;
+	aad(&sa)->mnt.type = type;
+	aad(&sa)->mnt.trans = trans;
+	aad(&sa)->mnt.flags = flags;
+	if (data && (perms->audit & AA_AUDIT_DATA))
+		aad(&sa)->mnt.data = data;
+	aad(&sa)->info = info;
+	aad(&sa)->error = error;
+
+	return aa_audit(audit_type, profile, &sa, audit_cb);
+}
+
+/**
+ * match_mnt_flags - Do an ordered match on mount flags
+ * @dfa: dfa to match against
+ * @state: state to start in
+ * @flags: mount flags to match against
+ *
+ * Mount flags are encoded as an ordered match. This is done instead of
+ * checking against a simple bitmask, to allow for logical operations
+ * on the flags.
+ *
+ * Returns: next state after flags match
+ */
+static unsigned int match_mnt_flags(struct aa_dfa *dfa, unsigned int state,
+				    unsigned long flags)
+{
+	unsigned int i;
+
+	for (i = 0; i <= 31 ; ++i) {
+		if ((1 << i) & flags)
+			state = aa_dfa_next(dfa, state, i + 1);
+	}
+
+	return state;
+}
+
+/**
+ * compute_mnt_perms - compute mount permission associated with @state
+ * @dfa: dfa to match against (NOT NULL)
+ * @state: state match finished in
+ *
+ * Returns: mount permissions
+ */
+static struct aa_perms compute_mnt_perms(struct aa_dfa *dfa,
+					   unsigned int state)
+{
+	struct aa_perms perms;
+
+	perms.kill = 0;
+	perms.allow = dfa_user_allow(dfa, state);
+	perms.audit = dfa_user_audit(dfa, state);
+	perms.quiet = dfa_user_quiet(dfa, state);
+	perms.xindex = dfa_user_xindex(dfa, state);
+
+	return perms;
+}
+
+static const char * const mnt_info_table[] = {
+	"match succeeded",
+	"failed mntpnt match",
+	"failed srcname match",
+	"failed type match",
+	"failed flags match",
+	"failed data match"
+};
+
+/*
+ * Returns 0 on success else element that match failed in, this is the
+ * index into the mnt_info_table above
+ */
+static int do_match_mnt(struct aa_dfa *dfa, unsigned int start,
+			const char *mntpnt, const char *devname,
+			const char *type, unsigned long flags,
+			void *data, bool binary, struct aa_perms *perms)
+{
+	unsigned int state;
+
+	AA_BUG(!dfa);
+	AA_BUG(!perms);
+
+	state = aa_dfa_match(dfa, start, mntpnt);
+	state = aa_dfa_null_transition(dfa, state);
+	if (!state)
+		return 1;
+
+	if (devname)
+		state = aa_dfa_match(dfa, state, devname);
+	state = aa_dfa_null_transition(dfa, state);
+	if (!state)
+		return 2;
+
+	if (type)
+		state = aa_dfa_match(dfa, state, type);
+	state = aa_dfa_null_transition(dfa, state);
+	if (!state)
+		return 3;
+
+	state = match_mnt_flags(dfa, state, flags);
+	if (!state)
+		return 4;
+	*perms = compute_mnt_perms(dfa, state);
+	if (perms->allow & AA_MAY_MOUNT)
+		return 0;
+
+	/* only match data if not binary and the DFA flags data is expected */
+	if (data && !binary && (perms->allow & AA_MNT_CONT_MATCH)) {
+		state = aa_dfa_null_transition(dfa, state);
+		if (!state)
+			return 4;
+
+		state = aa_dfa_match(dfa, state, data);
+		if (!state)
+			return 5;
+		*perms = compute_mnt_perms(dfa, state);
+		if (perms->allow & AA_MAY_MOUNT)
+			return 0;
+	}
+
+	/* failed at end of flags match */
+	return 4;
+}
+
+
+static int path_flags(struct aa_profile *profile, const struct path *path)
+{
+	AA_BUG(!profile);
+	AA_BUG(!path);
+
+	return profile->path_flags |
+		(S_ISDIR(path->dentry->d_inode->i_mode) ? PATH_IS_DIR : 0);
+}
+
+/**
+ * match_mnt_path_str - handle path matching for mount
+ * @profile: the confining profile
+ * @mntpath: for the mntpnt (NOT NULL)
+ * @buffer: buffer to be used to lookup mntpath
+ * @devnme: string for the devname/src_name (MAY BE NULL OR ERRPTR)
+ * @type: string for the dev type (MAYBE NULL)
+ * @flags: mount flags to match
+ * @data: fs mount data (MAYBE NULL)
+ * @binary: whether @data is binary
+ * @devinfo: error str if (IS_ERR(@devname))
+ *
+ * Returns: 0 on success else error
+ */
+static int match_mnt_path_str(struct aa_profile *profile,
+			      const struct path *mntpath, char *buffer,
+			      const char *devname, const char *type,
+			      unsigned long flags, void *data, bool binary,
+			      const char *devinfo)
+{
+	struct aa_perms perms = { };
+	const char *mntpnt = NULL, *info = NULL;
+	int pos, error;
+
+	AA_BUG(!profile);
+	AA_BUG(!mntpath);
+	AA_BUG(!buffer);
+
+	error = aa_path_name(mntpath, path_flags(profile, mntpath), buffer,
+			     &mntpnt, &info, profile->disconnected);
+	if (error)
+		goto audit;
+	if (IS_ERR(devname)) {
+		error = PTR_ERR(devname);
+		devname = NULL;
+		info = devinfo;
+		goto audit;
+	}
+
+	error = -EACCES;
+	pos = do_match_mnt(profile->policy.dfa,
+			   profile->policy.start[AA_CLASS_MOUNT],
+			   mntpnt, devname, type, flags, data, binary, &perms);
+	if (pos) {
+		info = mnt_info_table[pos];
+		goto audit;
+	}
+	error = 0;
+
+audit:
+	return audit_mount(profile, OP_MOUNT, mntpnt, devname, type, NULL,
+			   flags, data, AA_MAY_MOUNT, &perms, info, error);
+}
+
+/**
+ * match_mnt - handle path matching for mount
+ * @profile: the confining profile
+ * @mntpath: for the mntpnt (NOT NULL)
+ * @buffer: buffer to be used to lookup mntpath
+ * @devpath: path devname/src_name (MAYBE NULL)
+ * @devbuffer: buffer to be used to lookup devname/src_name
+ * @type: string for the dev type (MAYBE NULL)
+ * @flags: mount flags to match
+ * @data: fs mount data (MAYBE NULL)
+ * @binary: whether @data is binary
+ *
+ * Returns: 0 on success else error
+ */
+static int match_mnt(struct aa_profile *profile, const struct path *path,
+		     char *buffer, struct path *devpath, char *devbuffer,
+		     const char *type, unsigned long flags, void *data,
+		     bool binary)
+{
+	const char *devname = NULL, *info = NULL;
+	int error = -EACCES;
+
+	AA_BUG(!profile);
+	AA_BUG(devpath && !devbuffer);
+
+	if (devpath) {
+		error = aa_path_name(devpath, path_flags(profile, devpath),
+				     devbuffer, &devname, &info,
+				     profile->disconnected);
+		if (error)
+			devname = ERR_PTR(error);
+	}
+
+	return match_mnt_path_str(profile, path, buffer, devname, type, flags,
+				  data, binary, info);
+}
+
+int aa_remount(struct aa_label *label, const struct path *path,
+	       unsigned long flags, void *data)
+{
+	struct aa_profile *profile;
+	char *buffer = NULL;
+	bool binary;
+	int error;
+
+	AA_BUG(!label);
+	AA_BUG(!path);
+
+	binary = path->dentry->d_sb->s_type->fs_flags & FS_BINARY_MOUNTDATA;
+
+	get_buffers(buffer);
+	error = fn_for_each_confined(label, profile,
+			match_mnt(profile, path, buffer, NULL, NULL, NULL,
+				  flags, data, binary));
+	put_buffers(buffer);
+
+	return error;
+}
+
+int aa_bind_mount(struct aa_label *label, const struct path *path,
+		  const char *dev_name, unsigned long flags)
+{
+	struct aa_profile *profile;
+	char *buffer = NULL, *old_buffer = NULL;
+	struct path old_path;
+	int error;
+
+	AA_BUG(!label);
+	AA_BUG(!path);
+
+	if (!dev_name || !*dev_name)
+		return -EINVAL;
+
+	flags &= MS_REC | MS_BIND;
+
+	error = kern_path(dev_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
+	if (error)
+		return error;
+
+	get_buffers(buffer, old_buffer);
+	error = fn_for_each_confined(label, profile,
+			match_mnt(profile, path, buffer, &old_path, old_buffer,
+				  NULL, flags, NULL, false));
+	put_buffers(buffer, old_buffer);
+	path_put(&old_path);
+
+	return error;
+}
+
+int aa_mount_change_type(struct aa_label *label, const struct path *path,
+			 unsigned long flags)
+{
+	struct aa_profile *profile;
+	char *buffer = NULL;
+	int error;
+
+	AA_BUG(!label);
+	AA_BUG(!path);
+
+	/* These are the flags allowed by do_change_type() */
+	flags &= (MS_REC | MS_SILENT | MS_SHARED | MS_PRIVATE | MS_SLAVE |
+		  MS_UNBINDABLE);
+
+	get_buffers(buffer);
+	error = fn_for_each_confined(label, profile,
+			match_mnt(profile, path, buffer, NULL, NULL, NULL,
+				  flags, NULL, false));
+	put_buffers(buffer);
+
+	return error;
+}
+
+int aa_move_mount(struct aa_label *label, const struct path *path,
+		  const char *orig_name)
+{
+	struct aa_profile *profile;
+	char *buffer = NULL, *old_buffer = NULL;
+	struct path old_path;
+	int error;
+
+	AA_BUG(!label);
+	AA_BUG(!path);
+
+	if (!orig_name || !*orig_name)
+		return -EINVAL;
+
+	error = kern_path(orig_name, LOOKUP_FOLLOW, &old_path);
+	if (error)
+		return error;
+
+	get_buffers(buffer, old_buffer);
+	error = fn_for_each_confined(label, profile,
+			match_mnt(profile, path, buffer, &old_path, old_buffer,
+				  NULL, MS_MOVE, NULL, false));
+	put_buffers(buffer, old_buffer);
+	path_put(&old_path);
+
+	return error;
+}
+
+int aa_new_mount(struct aa_label *label, const char *dev_name,
+		 const struct path *path, const char *type, unsigned long flags,
+		 void *data)
+{
+	struct aa_profile *profile;
+	char *buffer = NULL, *dev_buffer = NULL;
+	bool binary = true;
+	int error;
+	int requires_dev = 0;
+	struct path tmp_path, *dev_path = NULL;
+
+	AA_BUG(!label);
+	AA_BUG(!path);
+
+	if (type) {
+		struct file_system_type *fstype;
+
+		fstype = get_fs_type(type);
+		if (!fstype)
+			return -ENODEV;
+		binary = fstype->fs_flags & FS_BINARY_MOUNTDATA;
+		requires_dev = fstype->fs_flags & FS_REQUIRES_DEV;
+		put_filesystem(fstype);
+
+		if (requires_dev) {
+			if (!dev_name || !*dev_name)
+				return -ENOENT;
+
+			error = kern_path(dev_name, LOOKUP_FOLLOW, &tmp_path);
+			if (error)
+				return error;
+			dev_path = &tmp_path;
+		}
+	}
+
+	get_buffers(buffer, dev_buffer);
+	if (dev_path) {
+		error = fn_for_each_confined(label, profile,
+			match_mnt(profile, path, buffer, dev_path, dev_buffer,
+				  type, flags, data, binary));
+	} else {
+		error = fn_for_each_confined(label, profile,
+			match_mnt_path_str(profile, path, buffer, dev_name,
+					   type, flags, data, binary, NULL));
+	}
+	put_buffers(buffer, dev_buffer);
+	if (dev_path)
+		path_put(dev_path);
+
+	return error;
+}
+
+static int profile_umount(struct aa_profile *profile, struct path *path,
+			  char *buffer)
+{
+	struct aa_perms perms = { };
+	const char *name = NULL, *info = NULL;
+	unsigned int state;
+	int error;
+
+	AA_BUG(!profile);
+	AA_BUG(!path);
+
+	error = aa_path_name(path, path_flags(profile, path), buffer, &name,
+			     &info, profile->disconnected);
+	if (error)
+		goto audit;
+
+	state = aa_dfa_match(profile->policy.dfa,
+			     profile->policy.start[AA_CLASS_MOUNT],
+			     name);
+	perms = compute_mnt_perms(profile->policy.dfa, state);
+	if (AA_MAY_UMOUNT & ~perms.allow)
+		error = -EACCES;
+
+audit:
+	return audit_mount(profile, OP_UMOUNT, name, NULL, NULL, NULL, 0, NULL,
+			   AA_MAY_UMOUNT, &perms, info, error);
+}
+
+int aa_umount(struct aa_label *label, struct vfsmount *mnt, int flags)
+{
+	struct aa_profile *profile;
+	char *buffer = NULL;
+	int error;
+	struct path path = { .mnt = mnt, .dentry = mnt->mnt_root };
+
+	AA_BUG(!label);
+	AA_BUG(!mnt);
+
+	get_buffers(buffer);
+	error = fn_for_each_confined(label, profile,
+			profile_umount(profile, &path, buffer));
+	put_buffers(buffer);
+
+	return error;
+}
+
+/* helper fn for transition on pivotroot
+ *
+ * Returns: label for transition or ERR_PTR. Does not return NULL
+ */
+static struct aa_label *build_pivotroot(struct aa_profile *profile,
+					const struct path *new_path,
+					char *new_buffer,
+					const struct path *old_path,
+					char *old_buffer)
+{
+	const char *old_name, *new_name = NULL, *info = NULL;
+	const char *trans_name = NULL;
+	struct aa_perms perms = { };
+	unsigned int state;
+	int error;
+
+	AA_BUG(!profile);
+	AA_BUG(!new_path);
+	AA_BUG(!old_path);
+
+	if (profile_unconfined(profile))
+		return aa_get_newest_label(&profile->label);
+
+	error = aa_path_name(old_path, path_flags(profile, old_path),
+			     old_buffer, &old_name, &info,
+			     profile->disconnected);
+	if (error)
+		goto audit;
+	error = aa_path_name(new_path, path_flags(profile, new_path),
+			     new_buffer, &new_name, &info,
+			     profile->disconnected);
+	if (error)
+		goto audit;
+
+	error = -EACCES;
+	state = aa_dfa_match(profile->policy.dfa,
+			     profile->policy.start[AA_CLASS_MOUNT],
+			     new_name);
+	state = aa_dfa_null_transition(profile->policy.dfa, state);
+	state = aa_dfa_match(profile->policy.dfa, state, old_name);
+	perms = compute_mnt_perms(profile->policy.dfa, state);
+
+	if (AA_MAY_PIVOTROOT & perms.allow)
+		error = 0;
+
+audit:
+	error = audit_mount(profile, OP_PIVOTROOT, new_name, old_name,
+			    NULL, trans_name, 0, NULL, AA_MAY_PIVOTROOT,
+			    &perms, info, error);
+	if (error)
+		return ERR_PTR(error);
+
+	return aa_get_newest_label(&profile->label);
+}
+
+int aa_pivotroot(struct aa_label *label, const struct path *old_path,
+		 const struct path *new_path)
+{
+	struct aa_profile *profile;
+	struct aa_label *target = NULL;
+	char *old_buffer = NULL, *new_buffer = NULL, *info = NULL;
+	int error;
+
+	AA_BUG(!label);
+	AA_BUG(!old_path);
+	AA_BUG(!new_path);
+
+	get_buffers(old_buffer, new_buffer);
+	target = fn_label_build(label, profile, GFP_ATOMIC,
+			build_pivotroot(profile, new_path, new_buffer,
+					old_path, old_buffer));
+	if (!target) {
+		info = "label build failed";
+		error = -ENOMEM;
+		goto fail;
+	} else if (!IS_ERR(target)) {
+		error = aa_replace_current_label(target);
+		if (error) {
+			/* TODO: audit target */
+			aa_put_label(target);
+			goto out;
+		}
+	} else
+		/* already audited error */
+		error = PTR_ERR(target);
+out:
+	put_buffers(old_buffer, new_buffer);
+
+	return error;
+
+fail:
+	/* TODO: add back in auditing of new_name and old_name */
+	error = fn_for_each(label, profile,
+			audit_mount(profile, OP_PIVOTROOT, NULL /*new_name */,
+				    NULL /* old_name */,
+				    NULL, NULL,
+				    0, NULL, AA_MAY_PIVOTROOT, &nullperms, info,
+				    error));
+	goto out;
+}

From f872af75d325cc449b6621a0d30a4f2ba77dd092 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Sun, 6 Aug 2017 05:36:40 -0700
Subject: [PATCH 271/391] apparmor: cleanup conditional check for label in
 label_print

Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Seth Arnold <seth.arnold@canonical.com>
---
 security/apparmor/label.c | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/security/apparmor/label.c b/security/apparmor/label.c
index e324f4df3e34..38be7a89cc31 100644
--- a/security/apparmor/label.c
+++ b/security/apparmor/label.c
@@ -1450,9 +1450,11 @@ bool aa_update_label_name(struct aa_ns *ns, struct aa_label *label, gfp_t gfp)
  * cached label name is present and visible
  * @label->hname only exists if label is namespace hierachical
  */
-static inline bool use_label_hname(struct aa_ns *ns, struct aa_label *label)
+static inline bool use_label_hname(struct aa_ns *ns, struct aa_label *label,
+				   int flags)
 {
-	if (label->hname && labels_ns(label) == ns)
+	if (label->hname && (!ns || labels_ns(label) == ns) &&
+	    !(flags & ~FLAG_SHOW_MODE))
 		return true;
 
 	return false;
@@ -1710,10 +1712,8 @@ void aa_label_xaudit(struct audit_buffer *ab, struct aa_ns *ns,
 	AA_BUG(!ab);
 	AA_BUG(!label);
 
-	if (!ns)
-		ns = labels_ns(label);
-
-	if (!use_label_hname(ns, label) || display_mode(ns, label, flags)) {
+	if (!use_label_hname(ns, label, flags) ||
+	    display_mode(ns, label, flags)) {
 		len  = aa_label_asxprint(&name, ns, label, flags, gfp);
 		if (len == -1) {
 			AA_DEBUG("label print error");
@@ -1738,10 +1738,7 @@ void aa_label_seq_xprint(struct seq_file *f, struct aa_ns *ns,
 	AA_BUG(!f);
 	AA_BUG(!label);
 
-	if (!ns)
-		ns = labels_ns(label);
-
-	if (!use_label_hname(ns, label)) {
+	if (!use_label_hname(ns, label, flags)) {
 		char *str;
 		int len;
 
@@ -1764,10 +1761,7 @@ void aa_label_xprintk(struct aa_ns *ns, struct aa_label *label, int flags,
 {
 	AA_BUG(!label);
 
-	if (!ns)
-		ns = labels_ns(label);
-
-	if (!use_label_hname(ns, label)) {
+	if (!use_label_hname(ns, label, flags)) {
 		char *str;
 		int len;
 

From 26b7899510ae243e392960704ebdba52d05fbb13 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Sun, 6 Aug 2017 05:39:08 -0700
Subject: [PATCH 272/391] apparmor: add support for absolute root view based
 labels

With apparmor policy virtualization based on policy namespace View's
we don't generally want/need absolute root based views, however there
are cases like debugging and some secid based conversions where
using a root based view is important.

Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Seth Arnold <seth.arnold@canonical.com>
---
 security/apparmor/include/label.h |  1 +
 security/apparmor/label.c         | 10 +++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/security/apparmor/include/label.h b/security/apparmor/include/label.h
index 9a283b722755..af22dcbbcb8a 100644
--- a/security/apparmor/include/label.h
+++ b/security/apparmor/include/label.h
@@ -310,6 +310,7 @@ bool aa_update_label_name(struct aa_ns *ns, struct aa_label *label, gfp_t gfp);
 #define FLAG_SHOW_MODE 1
 #define FLAG_VIEW_SUBNS 2
 #define FLAG_HIDDEN_UNCONFINED 4
+#define FLAG_ABS_ROOT 8
 int aa_label_snxprint(char *str, size_t size, struct aa_ns *view,
 		      struct aa_label *label, int flags);
 int aa_label_asxprint(char **strp, struct aa_ns *ns, struct aa_label *label,
diff --git a/security/apparmor/label.c b/security/apparmor/label.c
index 38be7a89cc31..52b4ef14840d 100644
--- a/security/apparmor/label.c
+++ b/security/apparmor/label.c
@@ -1607,8 +1607,13 @@ int aa_label_snxprint(char *str, size_t size, struct aa_ns *ns,
 	AA_BUG(!str && size != 0);
 	AA_BUG(!label);
 
-	if (!ns)
+	if (flags & FLAG_ABS_ROOT) {
+		ns = root_ns;
+		len = snprintf(str, size, "=");
+		update_for_len(total, len, size, str);
+	} else if (!ns) {
 		ns = labels_ns(label);
+	}
 
 	label_for_each(i, label, profile) {
 		if (aa_ns_visible(ns, profile->ns, flags & FLAG_VIEW_SUBNS)) {
@@ -1868,6 +1873,9 @@ struct aa_label *aa_label_parse(struct aa_label *base, const char *str,
 		if (*str == '&')
 			str++;
 	}
+	if (*str == '=')
+		base = &root_ns->unconfined->label;
+
 	error = vec_setup(profile, vec, len, gfp);
 	if (error)
 		return ERR_PTR(error);

From 2410aa96d6b4930ed25fd02c3d173f14b962e0f4 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Tue, 18 Jul 2017 23:37:18 -0700
Subject: [PATCH 273/391] apparmor: make policy_unpack able to audit different
 info messages

Switch unpack auditing to using the generic name field in the audit
struct and make it so we can start adding new info messages about
why an unpack failed.

Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Seth Arnold <seth.arnold@canonical.com>
---
 security/apparmor/include/audit.h |  4 +--
 security/apparmor/policy_unpack.c | 52 ++++++++++++++++++++++---------
 2 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h
index c3fe1c5ef3bc..620e81169659 100644
--- a/security/apparmor/include/audit.h
+++ b/security/apparmor/include/audit.h
@@ -127,9 +127,9 @@ struct apparmor_audit_data {
 			} fs;
 		};
 		struct {
-			const char *name;
-			long pos;
+			struct aa_profile *profile;
 			const char *ns;
+			long pos;
 		} iface;
 		int signal;
 		struct {
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index bda0dce3b582..4ede87c30f8b 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -85,9 +85,9 @@ static void audit_cb(struct audit_buffer *ab, void *va)
 		audit_log_format(ab, " ns=");
 		audit_log_untrustedstring(ab, aad(sa)->iface.ns);
 	}
-	if (aad(sa)->iface.name) {
+	if (aad(sa)->name) {
 		audit_log_format(ab, " name=");
-		audit_log_untrustedstring(ab, aad(sa)->iface.name);
+		audit_log_untrustedstring(ab, aad(sa)->name);
 	}
 	if (aad(sa)->iface.pos)
 		audit_log_format(ab, " offset=%ld", aad(sa)->iface.pos);
@@ -114,9 +114,9 @@ static int audit_iface(struct aa_profile *new, const char *ns_name,
 		aad(&sa)->iface.pos = e->pos - e->start;
 	aad(&sa)->iface.ns = ns_name;
 	if (new)
-		aad(&sa)->iface.name = new->base.hname;
+		aad(&sa)->name = new->base.hname;
 	else
-		aad(&sa)->iface.name = name;
+		aad(&sa)->name = name;
 	aad(&sa)->info = info;
 	aad(&sa)->error = error;
 
@@ -583,6 +583,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 {
 	struct aa_profile *profile = NULL;
 	const char *tmpname, *tmpns = NULL, *name = NULL;
+	const char *info = "failed to unpack profile";
 	size_t ns_len;
 	struct rhashtable_params params = { 0 };
 	char *key = NULL;
@@ -604,8 +605,10 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	tmpname = aa_splitn_fqname(name, strlen(name), &tmpns, &ns_len);
 	if (tmpns) {
 		*ns_name = kstrndup(tmpns, ns_len, GFP_KERNEL);
-		if (!*ns_name)
+		if (!*ns_name) {
+			info = "out of memory";
 			goto fail;
+		}
 		name = tmpname;
 	}
 
@@ -624,12 +627,15 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	if (IS_ERR(profile->xmatch)) {
 		error = PTR_ERR(profile->xmatch);
 		profile->xmatch = NULL;
+		info = "bad xmatch";
 		goto fail;
 	}
 	/* xmatch_len is not optional if xmatch is set */
 	if (profile->xmatch) {
-		if (!unpack_u32(e, &tmp, NULL))
+		if (!unpack_u32(e, &tmp, NULL)) {
+			info = "missing xmatch len";
 			goto fail;
+		}
 		profile->xmatch_len = tmp;
 	}
 
@@ -637,8 +643,11 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	(void) unpack_str(e, &profile->disconnected, "disconnected");
 
 	/* per profile debug flags (complain, audit) */
-	if (!unpack_nameX(e, AA_STRUCT, "flags"))
+	if (!unpack_nameX(e, AA_STRUCT, "flags")) {
+		info = "profile missing flags";
 		goto fail;
+	}
+	info = "failed to unpack profile flags";
 	if (!unpack_u32(e, &tmp, NULL))
 		goto fail;
 	if (tmp & PACKED_FLAG_HAT)
@@ -667,6 +676,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 		/* set a default value if path_flags field is not present */
 		profile->path_flags = PATH_MEDIATE_DELETED;
 
+	info = "failed to unpack profile capabilities";
 	if (!unpack_u32(e, &(profile->caps.allow.cap[0]), NULL))
 		goto fail;
 	if (!unpack_u32(e, &(profile->caps.audit.cap[0]), NULL))
@@ -676,6 +686,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	if (!unpack_u32(e, &tmpcap.cap[0], NULL))
 		goto fail;
 
+	info = "failed to unpack upper profile capabilities";
 	if (unpack_nameX(e, AA_STRUCT, "caps64")) {
 		/* optional upper half of 64 bit caps */
 		if (!unpack_u32(e, &(profile->caps.allow.cap[1]), NULL))
@@ -690,6 +701,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 			goto fail;
 	}
 
+	info = "failed to unpack extended profile capabilities";
 	if (unpack_nameX(e, AA_STRUCT, "capsx")) {
 		/* optional extended caps mediation mask */
 		if (!unpack_u32(e, &(profile->caps.extended.cap[0]), NULL))
@@ -700,11 +712,14 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 			goto fail;
 	}
 
-	if (!unpack_rlimits(e, profile))
+	if (!unpack_rlimits(e, profile)) {
+		info = "failed to unpack profile rlimits";
 		goto fail;
+	}
 
 	if (unpack_nameX(e, AA_STRUCT, "policydb")) {
 		/* generic policy dfa - optional and may be NULL */
+		info = "failed to unpack policydb";
 		profile->policy.dfa = unpack_dfa(e);
 		if (IS_ERR(profile->policy.dfa)) {
 			error = PTR_ERR(profile->policy.dfa);
@@ -734,6 +749,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	if (IS_ERR(profile->file.dfa)) {
 		error = PTR_ERR(profile->file.dfa);
 		profile->file.dfa = NULL;
+		info = "failed to unpack profile file rules";
 		goto fail;
 	} else if (profile->file.dfa) {
 		if (!unpack_u32(e, &profile->file.start, "dfa_start"))
@@ -746,10 +762,13 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	} else
 		profile->file.dfa = aa_get_dfa(nulldfa);
 
-	if (!unpack_trans_table(e, profile))
+	if (!unpack_trans_table(e, profile)) {
+		info = "failed to unpack profile transition table";
 		goto fail;
+	}
 
 	if (unpack_nameX(e, AA_STRUCT, "data")) {
+		info = "out of memory";
 		profile->data = kzalloc(sizeof(*profile->data), GFP_KERNEL);
 		if (!profile->data)
 			goto fail;
@@ -761,8 +780,10 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 		params.hashfn = strhash;
 		params.obj_cmpfn = datacmp;
 
-		if (rhashtable_init(profile->data, &params))
+		if (rhashtable_init(profile->data, &params)) {
+			info = "failed to init key, value hash table";
 			goto fail;
+		}
 
 		while (unpack_strdup(e, &key, NULL)) {
 			data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -784,12 +805,16 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 					       profile->data->p);
 		}
 
-		if (!unpack_nameX(e, AA_STRUCTEND, NULL))
+		if (!unpack_nameX(e, AA_STRUCTEND, NULL)) {
+			info = "failed to unpack end of key, value data table";
 			goto fail;
+		}
 	}
 
-	if (!unpack_nameX(e, AA_STRUCTEND, NULL))
+	if (!unpack_nameX(e, AA_STRUCTEND, NULL)) {
+		info = "failed to unpack end of profile";
 		goto fail;
+	}
 
 	return profile;
 
@@ -798,8 +823,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 		name = NULL;
 	else if (!name)
 		name = "unknown";
-	audit_iface(profile, NULL, name, "failed to unpack profile", e,
-		    error);
+	audit_iface(profile, NULL, name, info, e, error);
 	aa_free_profile(profile);
 
 	return ERR_PTR(error);

From cbf2d0e1a9e4876046a628e0e036a7545a3a4c40 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Tue, 18 Jul 2017 23:41:13 -0700
Subject: [PATCH 274/391] apparmor: add more debug asserts to apparmorfs

Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Seth Arnold <seth.arnold@canonical.com>
---
 security/apparmor/apparmorfs.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 8fa6c898c44b..7acea14c850b 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -1446,6 +1446,10 @@ void __aafs_profile_migrate_dents(struct aa_profile *old,
 {
 	int i;
 
+	AA_BUG(!old);
+	AA_BUG(!new);
+	AA_BUG(!mutex_is_locked(&profiles_ns(old)->lock));
+
 	for (i = 0; i < AAFS_PROF_SIZEOF; i++) {
 		new->dents[i] = old->dents[i];
 		if (new->dents[i])
@@ -1509,6 +1513,9 @@ int __aafs_profile_mkdir(struct aa_profile *profile, struct dentry *parent)
 	struct dentry *dent = NULL, *dir;
 	int error;
 
+	AA_BUG(!profile);
+	AA_BUG(!mutex_is_locked(&profiles_ns(profile)->lock));
+
 	if (!parent) {
 		struct aa_profile *p;
 		p = aa_deref_parent(profile);
@@ -1734,6 +1741,7 @@ void __aafs_ns_rmdir(struct aa_ns *ns)
 
 	if (!ns)
 		return;
+	AA_BUG(!mutex_is_locked(&ns->lock));
 
 	list_for_each_entry(child, &ns->base.profiles, base.list)
 		__aafs_profile_rmdir(child);
@@ -1906,6 +1914,10 @@ static struct aa_ns *__next_ns(struct aa_ns *root, struct aa_ns *ns)
 {
 	struct aa_ns *parent, *next;
 
+	AA_BUG(!root);
+	AA_BUG(!ns);
+	AA_BUG(ns != root && !mutex_is_locked(&ns->parent->lock));
+
 	/* is next namespace a child */
 	if (!list_empty(&ns->sub_ns)) {
 		next = list_first_entry(&ns->sub_ns, typeof(*ns), base.list);
@@ -1940,6 +1952,9 @@ static struct aa_ns *__next_ns(struct aa_ns *root, struct aa_ns *ns)
 static struct aa_profile *__first_profile(struct aa_ns *root,
 					  struct aa_ns *ns)
 {
+	AA_BUG(!root);
+	AA_BUG(ns && !mutex_is_locked(&ns->lock));
+
 	for (; ns; ns = __next_ns(root, ns)) {
 		if (!list_empty(&ns->base.profiles))
 			return list_first_entry(&ns->base.profiles,
@@ -1962,6 +1977,8 @@ static struct aa_profile *__next_profile(struct aa_profile *p)
 	struct aa_profile *parent;
 	struct aa_ns *ns = p->ns;
 
+	AA_BUG(!mutex_is_locked(&profiles_ns(p)->lock));
+
 	/* is next profile a child */
 	if (!list_empty(&p->base.profiles))
 		return list_first_entry(&p->base.profiles, typeof(*p),

From 651e28c5537abb39076d3949fb7618536f1d242e Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Tue, 18 Jul 2017 23:18:33 -0700
Subject: [PATCH 275/391] apparmor: add base infastructure for socket mediation

Provide a basic mediation of sockets. This is not a full net mediation
but just whether a spcific family of socket can be used by an
application, along with setting up some basic infrastructure for
network mediation to follow.

the user space rule hav the basic form of
  NETWORK RULE = [ QUALIFIERS ] 'network' [ DOMAIN ]
                 [ TYPE | PROTOCOL ]

  DOMAIN = ( 'inet' | 'ax25' | 'ipx' | 'appletalk' | 'netrom' |
             'bridge' | 'atmpvc' | 'x25' | 'inet6' | 'rose' |
	     'netbeui' | 'security' | 'key' | 'packet' | 'ash' |
	     'econet' | 'atmsvc' | 'sna' | 'irda' | 'pppox' |
	     'wanpipe' | 'bluetooth' | 'netlink' | 'unix' | 'rds' |
	     'llc' | 'can' | 'tipc' | 'iucv' | 'rxrpc' | 'isdn' |
	     'phonet' | 'ieee802154' | 'caif' | 'alg' | 'nfc' |
	     'vsock' | 'mpls' | 'ib' | 'kcm' ) ','

  TYPE = ( 'stream' | 'dgram' | 'seqpacket' |  'rdm' | 'raw' |
           'packet' )

  PROTOCOL = ( 'tcp' | 'udp' | 'icmp' )

eg.
  network,
  network inet,

Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Seth Arnold <seth.arnold@canonical.com>
---
 security/apparmor/.gitignore       |   1 +
 security/apparmor/Makefile         |  43 +++-
 security/apparmor/apparmorfs.c     |   1 +
 security/apparmor/file.c           |  30 +++
 security/apparmor/include/audit.h  |  26 +-
 security/apparmor/include/net.h    | 114 +++++++++
 security/apparmor/include/perms.h  |   5 +-
 security/apparmor/include/policy.h |  13 +
 security/apparmor/lib.c            |   5 +-
 security/apparmor/lsm.c            | 387 +++++++++++++++++++++++++++++
 security/apparmor/net.c            | 184 ++++++++++++++
 security/apparmor/policy_unpack.c  |  47 +++-
 12 files changed, 840 insertions(+), 16 deletions(-)
 create mode 100644 security/apparmor/include/net.h
 create mode 100644 security/apparmor/net.c

diff --git a/security/apparmor/.gitignore b/security/apparmor/.gitignore
index 9cdec70d72b8..d5b291e94264 100644
--- a/security/apparmor/.gitignore
+++ b/security/apparmor/.gitignore
@@ -1,5 +1,6 @@
 #
 # Generated include files
 #
+net_names.h
 capability_names.h
 rlim_names.h
diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile
index 81a34426d024..dafdd387d42b 100644
--- a/security/apparmor/Makefile
+++ b/security/apparmor/Makefile
@@ -4,11 +4,44 @@ obj-$(CONFIG_SECURITY_APPARMOR) += apparmor.o
 
 apparmor-y := apparmorfs.o audit.o capability.o context.o ipc.o lib.o match.o \
               path.o domain.o policy.o policy_unpack.o procattr.o lsm.o \
-              resource.o secid.o file.o policy_ns.o label.o mount.o
+              resource.o secid.o file.o policy_ns.o label.o mount.o net.o
 apparmor-$(CONFIG_SECURITY_APPARMOR_HASH) += crypto.o
 
-clean-files := capability_names.h rlim_names.h
+clean-files := capability_names.h rlim_names.h net_names.h
 
+# Build a lower case string table of address family names
+# Transform lines from
+#    #define AF_LOCAL		1	/* POSIX name for AF_UNIX	*/
+#    #define AF_INET		2	/* Internet IP Protocol 	*/
+# to
+#    [1] = "local",
+#    [2] = "inet",
+#
+# and build the securityfs entries for the mapping.
+# Transforms lines from
+#    #define AF_INET		2	/* Internet IP Protocol 	*/
+# to
+#    #define AA_SFS_AF_MASK "local inet"
+quiet_cmd_make-af = GEN     $@
+cmd_make-af = echo "static const char *address_family_names[] = {" > $@ ;\
+	sed $< >>$@ -r -n -e "/AF_MAX/d" -e "/AF_LOCAL/d" -e "/AF_ROUTE/d" -e \
+	 's/^\#define[ \t]+AF_([A-Z0-9_]+)[ \t]+([0-9]+)(.*)/[\2] = "\L\1",/p';\
+	echo "};" >> $@ ;\
+	printf '%s' '\#define AA_SFS_AF_MASK "' >> $@ ;\
+	sed -r -n -e "/AF_MAX/d" -e "/AF_LOCAL/d" -e "/AF_ROUTE/d" -e \
+	 's/^\#define[ \t]+AF_([A-Z0-9_]+)[ \t]+([0-9]+)(.*)/\L\1/p'\
+	 $< | tr '\n' ' ' | sed -e 's/ $$/"\n/' >> $@
+
+# Build a lower case string table of sock type names
+# Transform lines from
+#    SOCK_STREAM	= 1,
+# to
+#    [1] = "stream",
+quiet_cmd_make-sock = GEN     $@
+cmd_make-sock = echo "static const char *sock_type_names[] = {" >> $@ ;\
+	sed $^ >>$@ -r -n \
+	-e 's/^\tSOCK_([A-Z0-9_]+)[\t]+=[ \t]+([0-9]+)(.*)/[\2] = "\L\1",/p';\
+	echo "};" >> $@
 
 # Build a lower case string table of capability names
 # Transforms lines from
@@ -61,6 +94,7 @@ cmd_make-rlim = echo "static const char *const rlim_names[RLIM_NLIMITS] = {" \
 	    tr '\n' ' ' | sed -e 's/ $$/"\n/' >> $@
 
 $(obj)/capability.o : $(obj)/capability_names.h
+$(obj)/net.o : $(obj)/net_names.h
 $(obj)/resource.o : $(obj)/rlim_names.h
 $(obj)/capability_names.h : $(srctree)/include/uapi/linux/capability.h \
 			    $(src)/Makefile
@@ -68,3 +102,8 @@ $(obj)/capability_names.h : $(srctree)/include/uapi/linux/capability.h \
 $(obj)/rlim_names.h : $(srctree)/include/uapi/asm-generic/resource.h \
 		      $(src)/Makefile
 	$(call cmd,make-rlim)
+$(obj)/net_names.h : $(srctree)/include/linux/socket.h \
+		     $(srctree)/include/linux/net.h \
+		     $(src)/Makefile
+	$(call cmd,make-af)
+	$(call cmd,make-sock)
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 7acea14c850b..125dad5c3fde 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -2202,6 +2202,7 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = {
 	AA_SFS_DIR("policy",			aa_sfs_entry_policy),
 	AA_SFS_DIR("domain",			aa_sfs_entry_domain),
 	AA_SFS_DIR("file",			aa_sfs_entry_file),
+	AA_SFS_DIR("network",			aa_sfs_entry_network),
 	AA_SFS_DIR("mount",			aa_sfs_entry_mount),
 	AA_SFS_DIR("namespaces",		aa_sfs_entry_ns),
 	AA_SFS_FILE_U64("capability",		VFS_CAP_FLAGS_MASK),
diff --git a/security/apparmor/file.c b/security/apparmor/file.c
index 3382518b87fa..db80221891c6 100644
--- a/security/apparmor/file.c
+++ b/security/apparmor/file.c
@@ -21,6 +21,7 @@
 #include "include/context.h"
 #include "include/file.h"
 #include "include/match.h"
+#include "include/net.h"
 #include "include/path.h"
 #include "include/policy.h"
 #include "include/label.h"
@@ -566,6 +567,32 @@ static int __file_path_perm(const char *op, struct aa_label *label,
 	return error;
 }
 
+static int __file_sock_perm(const char *op, struct aa_label *label,
+			    struct aa_label *flabel, struct file *file,
+			    u32 request, u32 denied)
+{
+	struct socket *sock = (struct socket *) file->private_data;
+	int error;
+
+	AA_BUG(!sock);
+
+	/* revalidation due to label out of date. No revocation at this time */
+	if (!denied && aa_label_is_subset(flabel, label))
+		return 0;
+
+	/* TODO: improve to skip profiles cached in flabel */
+	error = aa_sock_file_perm(label, op, request, sock);
+	if (denied) {
+		/* TODO: improve to skip profiles checked above */
+		/* check every profile in file label to is cached */
+		last_error(error, aa_sock_file_perm(flabel, op, request, sock));
+	}
+	if (!error)
+		update_file_ctx(file_ctx(file), label, request);
+
+	return error;
+}
+
 /**
  * aa_file_perm - do permission revalidation check & audit for @file
  * @op: operation being checked
@@ -610,6 +637,9 @@ int aa_file_perm(const char *op, struct aa_label *label, struct file *file,
 		error = __file_path_perm(op, label, flabel, file, request,
 					 denied);
 
+	else if (S_ISSOCK(file_inode(file)->i_mode))
+		error = __file_sock_perm(op, label, flabel, file, request,
+					 denied);
 done:
 	rcu_read_unlock();
 
diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h
index 620e81169659..ff4316e1068d 100644
--- a/security/apparmor/include/audit.h
+++ b/security/apparmor/include/audit.h
@@ -121,21 +121,29 @@ struct apparmor_audit_data {
 		/* these entries require a custom callback fn */
 		struct {
 			struct aa_label *peer;
-			struct {
-				const char *target;
-				kuid_t ouid;
-			} fs;
+			union {
+				struct {
+					kuid_t ouid;
+					const char *target;
+				} fs;
+				struct {
+					int type, protocol;
+					struct sock *peer_sk;
+					void *addr;
+					int addrlen;
+				} net;
+				int signal;
+				struct {
+					int rlim;
+					unsigned long max;
+				} rlim;
+			};
 		};
 		struct {
 			struct aa_profile *profile;
 			const char *ns;
 			long pos;
 		} iface;
-		int signal;
-		struct {
-			int rlim;
-			unsigned long max;
-		} rlim;
 		struct {
 			const char *src_name;
 			const char *type;
diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h
new file mode 100644
index 000000000000..140c8efcf364
--- /dev/null
+++ b/security/apparmor/include/net.h
@@ -0,0 +1,114 @@
+/*
+ * AppArmor security module
+ *
+ * This file contains AppArmor network mediation definitions.
+ *
+ * Copyright (C) 1998-2008 Novell/SUSE
+ * Copyright 2009-2017 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#ifndef __AA_NET_H
+#define __AA_NET_H
+
+#include <net/sock.h>
+#include <linux/path.h>
+
+#include "apparmorfs.h"
+#include "label.h"
+#include "perms.h"
+#include "policy.h"
+
+#define AA_MAY_SEND		AA_MAY_WRITE
+#define AA_MAY_RECEIVE		AA_MAY_READ
+
+#define AA_MAY_SHUTDOWN		AA_MAY_DELETE
+
+#define AA_MAY_CONNECT		AA_MAY_OPEN
+#define AA_MAY_ACCEPT		0x00100000
+
+#define AA_MAY_BIND		0x00200000
+#define AA_MAY_LISTEN		0x00400000
+
+#define AA_MAY_SETOPT		0x01000000
+#define AA_MAY_GETOPT		0x02000000
+
+#define NET_PERMS_MASK (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CREATE |    \
+			AA_MAY_SHUTDOWN | AA_MAY_BIND | AA_MAY_LISTEN |	  \
+			AA_MAY_CONNECT | AA_MAY_ACCEPT | AA_MAY_SETATTR | \
+			AA_MAY_GETATTR | AA_MAY_SETOPT | AA_MAY_GETOPT)
+
+#define NET_FS_PERMS (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CREATE |	\
+		      AA_MAY_SHUTDOWN | AA_MAY_CONNECT | AA_MAY_RENAME |\
+		      AA_MAY_SETATTR | AA_MAY_GETATTR | AA_MAY_CHMOD |	\
+		      AA_MAY_CHOWN | AA_MAY_CHGRP | AA_MAY_LOCK |	\
+		      AA_MAY_MPROT)
+
+#define NET_PEER_MASK (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CONNECT |	\
+		       AA_MAY_ACCEPT)
+struct aa_sk_ctx {
+	struct aa_label *label;
+	struct aa_label *peer;
+	struct path path;
+};
+
+#define SK_CTX(X) ((X)->sk_security)
+#define SOCK_ctx(X) SOCK_INODE(X)->i_security
+#define DEFINE_AUDIT_NET(NAME, OP, SK, F, T, P)				  \
+	struct lsm_network_audit NAME ## _net = { .sk = (SK),		  \
+						  .family = (F)};	  \
+	DEFINE_AUDIT_DATA(NAME,						  \
+			  ((SK) && (F) != AF_UNIX) ? LSM_AUDIT_DATA_NET : \
+						     LSM_AUDIT_DATA_NONE, \
+			  OP);						  \
+	NAME.u.net = &(NAME ## _net);					  \
+	aad(&NAME)->net.type = (T);					  \
+	aad(&NAME)->net.protocol = (P)
+
+#define DEFINE_AUDIT_SK(NAME, OP, SK)					\
+	DEFINE_AUDIT_NET(NAME, OP, SK, (SK)->sk_family, (SK)->sk_type,	\
+			 (SK)->sk_protocol)
+
+/* struct aa_net - network confinement data
+ * @allow: basic network families permissions
+ * @audit: which network permissions to force audit
+ * @quiet: which network permissions to quiet rejects
+ */
+struct aa_net {
+	u16 allow[AF_MAX];
+	u16 audit[AF_MAX];
+	u16 quiet[AF_MAX];
+};
+
+
+extern struct aa_sfs_entry aa_sfs_entry_network[];
+
+void audit_net_cb(struct audit_buffer *ab, void *va);
+int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa,
+		       u32 request, u16 family, int type);
+int aa_af_perm(struct aa_label *label, const char *op, u32 request, u16 family,
+	       int type, int protocol);
+static inline int aa_profile_af_sk_perm(struct aa_profile *profile,
+					struct common_audit_data *sa,
+					u32 request,
+					struct sock *sk)
+{
+	return aa_profile_af_perm(profile, sa, request, sk->sk_family,
+				  sk->sk_type);
+}
+int aa_sk_perm(const char *op, u32 request, struct sock *sk);
+
+int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request,
+		      struct socket *sock);
+
+
+static inline void aa_free_net_rules(struct aa_net *new)
+{
+	/* NOP */
+}
+
+#endif /* __AA_NET_H */
diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h
index 2b27bb79aec4..af04d5a7d73d 100644
--- a/security/apparmor/include/perms.h
+++ b/security/apparmor/include/perms.h
@@ -135,9 +135,10 @@ extern struct aa_perms allperms;
 
 
 void aa_perm_mask_to_str(char *str, const char *chrs, u32 mask);
-void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask);
+void aa_audit_perm_names(struct audit_buffer *ab, const char * const *names,
+			 u32 mask);
 void aa_audit_perm_mask(struct audit_buffer *ab, u32 mask, const char *chrs,
-			u32 chrsmask, const char **names, u32 namesmask);
+			u32 chrsmask, const char * const *names, u32 namesmask);
 void aa_apply_modes_to_perms(struct aa_profile *profile,
 			     struct aa_perms *perms);
 void aa_compute_perms(struct aa_dfa *dfa, unsigned int state,
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 17fe41a9cac3..4364088a0b9e 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -30,6 +30,7 @@
 #include "file.h"
 #include "lib.h"
 #include "label.h"
+#include "net.h"
 #include "perms.h"
 #include "resource.h"
 
@@ -111,6 +112,7 @@ struct aa_data {
  * @policy: general match rules governing policy
  * @file: The set of rules governing basic file access and domain transitions
  * @caps: capabilities for the profile
+ * @net: network controls for the profile
  * @rlimits: rlimits for the profile
  *
  * @dents: dentries for the profiles file entries in apparmorfs
@@ -148,6 +150,7 @@ struct aa_profile {
 	struct aa_policydb policy;
 	struct aa_file_rules file;
 	struct aa_caps caps;
+	struct aa_net net;
 	struct aa_rlimit rlimits;
 
 	struct aa_loaddata *rawdata;
@@ -220,6 +223,16 @@ static inline unsigned int PROFILE_MEDIATES_SAFE(struct aa_profile *profile,
 	return 0;
 }
 
+static inline unsigned int PROFILE_MEDIATES_AF(struct aa_profile *profile,
+					       u16 AF) {
+	unsigned int state = PROFILE_MEDIATES(profile, AA_CLASS_NET);
+	u16 be_af = cpu_to_be16(AF);
+
+	if (!state)
+		return 0;
+	return aa_dfa_match_len(profile->policy.dfa, state, (char *) &be_af, 2);
+}
+
 /**
  * aa_get_profile - increment refcount on profile @p
  * @p: profile  (MAYBE NULL)
diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c
index 08ca26bcca77..8818621b5d95 100644
--- a/security/apparmor/lib.c
+++ b/security/apparmor/lib.c
@@ -211,7 +211,8 @@ void aa_perm_mask_to_str(char *str, const char *chrs, u32 mask)
 	*str = '\0';
 }
 
-void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask)
+void aa_audit_perm_names(struct audit_buffer *ab, const char * const *names,
+			 u32 mask)
 {
 	const char *fmt = "%s";
 	unsigned int i, perm = 1;
@@ -229,7 +230,7 @@ void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask)
 }
 
 void aa_audit_perm_mask(struct audit_buffer *ab, u32 mask, const char *chrs,
-			u32 chrsmask, const char **names, u32 namesmask)
+			u32 chrsmask, const char * const *names, u32 namesmask)
 {
 	char str[33];
 
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 4ad0b3a45142..cc5ab23a2d84 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -33,6 +33,7 @@
 #include "include/context.h"
 #include "include/file.h"
 #include "include/ipc.h"
+#include "include/net.h"
 #include "include/path.h"
 #include "include/label.h"
 #include "include/policy.h"
@@ -736,6 +737,368 @@ static int apparmor_task_kill(struct task_struct *target, struct siginfo *info,
 	return error;
 }
 
+/**
+ * apparmor_sk_alloc_security - allocate and attach the sk_security field
+ */
+static int apparmor_sk_alloc_security(struct sock *sk, int family, gfp_t flags)
+{
+	struct aa_sk_ctx *ctx;
+
+	ctx = kzalloc(sizeof(*ctx), flags);
+	if (!ctx)
+		return -ENOMEM;
+
+	SK_CTX(sk) = ctx;
+
+	return 0;
+}
+
+/**
+ * apparmor_sk_free_security - free the sk_security field
+ */
+static void apparmor_sk_free_security(struct sock *sk)
+{
+	struct aa_sk_ctx *ctx = SK_CTX(sk);
+
+	SK_CTX(sk) = NULL;
+	aa_put_label(ctx->label);
+	aa_put_label(ctx->peer);
+	path_put(&ctx->path);
+	kfree(ctx);
+}
+
+/**
+ * apparmor_clone_security - clone the sk_security field
+ */
+static void apparmor_sk_clone_security(const struct sock *sk,
+				       struct sock *newsk)
+{
+	struct aa_sk_ctx *ctx = SK_CTX(sk);
+	struct aa_sk_ctx *new = SK_CTX(newsk);
+
+	new->label = aa_get_label(ctx->label);
+	new->peer = aa_get_label(ctx->peer);
+	new->path = ctx->path;
+	path_get(&new->path);
+}
+
+static int aa_sock_create_perm(struct aa_label *label, int family, int type,
+			       int protocol)
+{
+	AA_BUG(!label);
+	AA_BUG(in_interrupt());
+
+	return aa_af_perm(label, OP_CREATE, AA_MAY_CREATE, family, type,
+			  protocol);
+}
+
+
+/**
+ * apparmor_socket_create - check perms before creating a new socket
+ */
+static int apparmor_socket_create(int family, int type, int protocol, int kern)
+{
+	struct aa_label *label;
+	int error = 0;
+
+	label = begin_current_label_crit_section();
+	if (!(kern || unconfined(label)))
+		error = aa_sock_create_perm(label, family, type, protocol);
+	end_current_label_crit_section(label);
+
+	return error;
+}
+
+/**
+ * apparmor_socket_post_create - setup the per-socket security struct
+ *
+ * Note:
+ * -   kernel sockets currently labeled unconfined but we may want to
+ *     move to a special kernel label
+ * -   socket may not have sk here if created with sock_create_lite or
+ *     sock_alloc. These should be accept cases which will be handled in
+ *     sock_graft.
+ */
+static int apparmor_socket_post_create(struct socket *sock, int family,
+				       int type, int protocol, int kern)
+{
+	struct aa_label *label;
+
+	if (kern) {
+		struct aa_ns *ns = aa_get_current_ns();
+
+		label = aa_get_label(ns_unconfined(ns));
+		aa_put_ns(ns);
+	} else
+		label = aa_get_current_label();
+
+	if (sock->sk) {
+		struct aa_sk_ctx *ctx = SK_CTX(sock->sk);
+
+		aa_put_label(ctx->label);
+		ctx->label = aa_get_label(label);
+	}
+	aa_put_label(label);
+
+	return 0;
+}
+
+/**
+ * apparmor_socket_bind - check perms before bind addr to socket
+ */
+static int apparmor_socket_bind(struct socket *sock,
+				struct sockaddr *address, int addrlen)
+{
+	AA_BUG(!sock);
+	AA_BUG(!sock->sk);
+	AA_BUG(!address);
+	AA_BUG(in_interrupt());
+
+	return aa_sk_perm(OP_BIND, AA_MAY_BIND, sock->sk);
+}
+
+/**
+ * apparmor_socket_connect - check perms before connecting @sock to @address
+ */
+static int apparmor_socket_connect(struct socket *sock,
+				   struct sockaddr *address, int addrlen)
+{
+	AA_BUG(!sock);
+	AA_BUG(!sock->sk);
+	AA_BUG(!address);
+	AA_BUG(in_interrupt());
+
+	return aa_sk_perm(OP_CONNECT, AA_MAY_CONNECT, sock->sk);
+}
+
+/**
+ * apparmor_socket_list - check perms before allowing listen
+ */
+static int apparmor_socket_listen(struct socket *sock, int backlog)
+{
+	AA_BUG(!sock);
+	AA_BUG(!sock->sk);
+	AA_BUG(in_interrupt());
+
+	return aa_sk_perm(OP_LISTEN, AA_MAY_LISTEN, sock->sk);
+}
+
+/**
+ * apparmor_socket_accept - check perms before accepting a new connection.
+ *
+ * Note: while @newsock is created and has some information, the accept
+ *       has not been done.
+ */
+static int apparmor_socket_accept(struct socket *sock, struct socket *newsock)
+{
+	AA_BUG(!sock);
+	AA_BUG(!sock->sk);
+	AA_BUG(!newsock);
+	AA_BUG(in_interrupt());
+
+	return aa_sk_perm(OP_ACCEPT, AA_MAY_ACCEPT, sock->sk);
+}
+
+static int aa_sock_msg_perm(const char *op, u32 request, struct socket *sock,
+			    struct msghdr *msg, int size)
+{
+	AA_BUG(!sock);
+	AA_BUG(!sock->sk);
+	AA_BUG(!msg);
+	AA_BUG(in_interrupt());
+
+	return aa_sk_perm(op, request, sock->sk);
+}
+
+/**
+ * apparmor_socket_sendmsg - check perms before sending msg to another socket
+ */
+static int apparmor_socket_sendmsg(struct socket *sock,
+				   struct msghdr *msg, int size)
+{
+	return aa_sock_msg_perm(OP_SENDMSG, AA_MAY_SEND, sock, msg, size);
+}
+
+/**
+ * apparmor_socket_recvmsg - check perms before receiving a message
+ */
+static int apparmor_socket_recvmsg(struct socket *sock,
+				   struct msghdr *msg, int size, int flags)
+{
+	return aa_sock_msg_perm(OP_RECVMSG, AA_MAY_RECEIVE, sock, msg, size);
+}
+
+/* revaliation, get/set attr, shutdown */
+static int aa_sock_perm(const char *op, u32 request, struct socket *sock)
+{
+	AA_BUG(!sock);
+	AA_BUG(!sock->sk);
+	AA_BUG(in_interrupt());
+
+	return aa_sk_perm(op, request, sock->sk);
+}
+
+/**
+ * apparmor_socket_getsockname - check perms before getting the local address
+ */
+static int apparmor_socket_getsockname(struct socket *sock)
+{
+	return aa_sock_perm(OP_GETSOCKNAME, AA_MAY_GETATTR, sock);
+}
+
+/**
+ * apparmor_socket_getpeername - check perms before getting remote address
+ */
+static int apparmor_socket_getpeername(struct socket *sock)
+{
+	return aa_sock_perm(OP_GETPEERNAME, AA_MAY_GETATTR, sock);
+}
+
+/* revaliation, get/set attr, opt */
+static int aa_sock_opt_perm(const char *op, u32 request, struct socket *sock,
+			    int level, int optname)
+{
+	AA_BUG(!sock);
+	AA_BUG(!sock->sk);
+	AA_BUG(in_interrupt());
+
+	return aa_sk_perm(op, request, sock->sk);
+}
+
+/**
+ * apparmor_getsockopt - check perms before getting socket options
+ */
+static int apparmor_socket_getsockopt(struct socket *sock, int level,
+				      int optname)
+{
+	return aa_sock_opt_perm(OP_GETSOCKOPT, AA_MAY_GETOPT, sock,
+				level, optname);
+}
+
+/**
+ * apparmor_setsockopt - check perms before setting socket options
+ */
+static int apparmor_socket_setsockopt(struct socket *sock, int level,
+				      int optname)
+{
+	return aa_sock_opt_perm(OP_SETSOCKOPT, AA_MAY_SETOPT, sock,
+				level, optname);
+}
+
+/**
+ * apparmor_socket_shutdown - check perms before shutting down @sock conn
+ */
+static int apparmor_socket_shutdown(struct socket *sock, int how)
+{
+	return aa_sock_perm(OP_SHUTDOWN, AA_MAY_SHUTDOWN, sock);
+}
+
+/**
+ * apparmor_socket_sock_recv_skb - check perms before associating skb to sk
+ *
+ * Note: can not sleep may be called with locks held
+ *
+ * dont want protocol specific in __skb_recv_datagram()
+ * to deny an incoming connection  socket_sock_rcv_skb()
+ */
+static int apparmor_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	return 0;
+}
+
+
+static struct aa_label *sk_peer_label(struct sock *sk)
+{
+	struct aa_sk_ctx *ctx = SK_CTX(sk);
+
+	if (ctx->peer)
+		return ctx->peer;
+
+	return ERR_PTR(-ENOPROTOOPT);
+}
+
+/**
+ * apparmor_socket_getpeersec_stream - get security context of peer
+ *
+ * Note: for tcp only valid if using ipsec or cipso on lan
+ */
+static int apparmor_socket_getpeersec_stream(struct socket *sock,
+					     char __user *optval,
+					     int __user *optlen,
+					     unsigned int len)
+{
+	char *name;
+	int slen, error = 0;
+	struct aa_label *label;
+	struct aa_label *peer;
+
+	label = begin_current_label_crit_section();
+	peer = sk_peer_label(sock->sk);
+	if (IS_ERR(peer)) {
+		error = PTR_ERR(peer);
+		goto done;
+	}
+	slen = aa_label_asxprint(&name, labels_ns(label), peer,
+				 FLAG_SHOW_MODE | FLAG_VIEW_SUBNS |
+				 FLAG_HIDDEN_UNCONFINED, GFP_KERNEL);
+	/* don't include terminating \0 in slen, it breaks some apps */
+	if (slen < 0) {
+		error = -ENOMEM;
+	} else {
+		if (slen > len) {
+			error = -ERANGE;
+		} else if (copy_to_user(optval, name, slen)) {
+			error = -EFAULT;
+			goto out;
+		}
+		if (put_user(slen, optlen))
+			error = -EFAULT;
+out:
+		kfree(name);
+
+	}
+
+done:
+	end_current_label_crit_section(label);
+
+	return error;
+}
+
+/**
+ * apparmor_socket_getpeersec_dgram - get security label of packet
+ * @sock: the peer socket
+ * @skb: packet data
+ * @secid: pointer to where to put the secid of the packet
+ *
+ * Sets the netlabel socket state on sk from parent
+ */
+static int apparmor_socket_getpeersec_dgram(struct socket *sock,
+					    struct sk_buff *skb, u32 *secid)
+
+{
+	/* TODO: requires secid support */
+	return -ENOPROTOOPT;
+}
+
+/**
+ * apparmor_sock_graft - Initialize newly created socket
+ * @sk: child sock
+ * @parent: parent socket
+ *
+ * Note: could set off of SOCK_CTX(parent) but need to track inode and we can
+ *       just set sk security information off of current creating process label
+ *       Labeling of sk for accept case - probably should be sock based
+ *       instead of task, because of the case where an implicitly labeled
+ *       socket is shared by different tasks.
+ */
+static void apparmor_sock_graft(struct sock *sk, struct socket *parent)
+{
+	struct aa_sk_ctx *ctx = SK_CTX(sk);
+
+	if (!ctx->label)
+		ctx->label = aa_get_current_label();
+}
+
 static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(ptrace_access_check, apparmor_ptrace_access_check),
 	LSM_HOOK_INIT(ptrace_traceme, apparmor_ptrace_traceme),
@@ -770,6 +1133,30 @@ static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(getprocattr, apparmor_getprocattr),
 	LSM_HOOK_INIT(setprocattr, apparmor_setprocattr),
 
+	LSM_HOOK_INIT(sk_alloc_security, apparmor_sk_alloc_security),
+	LSM_HOOK_INIT(sk_free_security, apparmor_sk_free_security),
+	LSM_HOOK_INIT(sk_clone_security, apparmor_sk_clone_security),
+
+	LSM_HOOK_INIT(socket_create, apparmor_socket_create),
+	LSM_HOOK_INIT(socket_post_create, apparmor_socket_post_create),
+	LSM_HOOK_INIT(socket_bind, apparmor_socket_bind),
+	LSM_HOOK_INIT(socket_connect, apparmor_socket_connect),
+	LSM_HOOK_INIT(socket_listen, apparmor_socket_listen),
+	LSM_HOOK_INIT(socket_accept, apparmor_socket_accept),
+	LSM_HOOK_INIT(socket_sendmsg, apparmor_socket_sendmsg),
+	LSM_HOOK_INIT(socket_recvmsg, apparmor_socket_recvmsg),
+	LSM_HOOK_INIT(socket_getsockname, apparmor_socket_getsockname),
+	LSM_HOOK_INIT(socket_getpeername, apparmor_socket_getpeername),
+	LSM_HOOK_INIT(socket_getsockopt, apparmor_socket_getsockopt),
+	LSM_HOOK_INIT(socket_setsockopt, apparmor_socket_setsockopt),
+	LSM_HOOK_INIT(socket_shutdown, apparmor_socket_shutdown),
+	LSM_HOOK_INIT(socket_sock_rcv_skb, apparmor_socket_sock_rcv_skb),
+	LSM_HOOK_INIT(socket_getpeersec_stream,
+		      apparmor_socket_getpeersec_stream),
+	LSM_HOOK_INIT(socket_getpeersec_dgram,
+		      apparmor_socket_getpeersec_dgram),
+	LSM_HOOK_INIT(sock_graft, apparmor_sock_graft),
+
 	LSM_HOOK_INIT(cred_alloc_blank, apparmor_cred_alloc_blank),
 	LSM_HOOK_INIT(cred_free, apparmor_cred_free),
 	LSM_HOOK_INIT(cred_prepare, apparmor_cred_prepare),
diff --git a/security/apparmor/net.c b/security/apparmor/net.c
new file mode 100644
index 000000000000..33d54435f8d6
--- /dev/null
+++ b/security/apparmor/net.c
@@ -0,0 +1,184 @@
+/*
+ * AppArmor security module
+ *
+ * This file contains AppArmor network mediation
+ *
+ * Copyright (C) 1998-2008 Novell/SUSE
+ * Copyright 2009-2017 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include "include/apparmor.h"
+#include "include/audit.h"
+#include "include/context.h"
+#include "include/label.h"
+#include "include/net.h"
+#include "include/policy.h"
+
+#include "net_names.h"
+
+
+struct aa_sfs_entry aa_sfs_entry_network[] = {
+	AA_SFS_FILE_STRING("af_mask",	AA_SFS_AF_MASK),
+	{ }
+};
+
+static const char * const net_mask_names[] = {
+	"unknown",
+	"send",
+	"receive",
+	"unknown",
+
+	"create",
+	"shutdown",
+	"connect",
+	"unknown",
+
+	"setattr",
+	"getattr",
+	"setcred",
+	"getcred",
+
+	"chmod",
+	"chown",
+	"chgrp",
+	"lock",
+
+	"mmap",
+	"mprot",
+	"unknown",
+	"unknown",
+
+	"accept",
+	"bind",
+	"listen",
+	"unknown",
+
+	"setopt",
+	"getopt",
+	"unknown",
+	"unknown",
+
+	"unknown",
+	"unknown",
+	"unknown",
+	"unknown",
+};
+
+
+/* audit callback for net specific fields */
+void audit_net_cb(struct audit_buffer *ab, void *va)
+{
+	struct common_audit_data *sa = va;
+
+	audit_log_format(ab, " family=");
+	if (address_family_names[sa->u.net->family])
+		audit_log_string(ab, address_family_names[sa->u.net->family]);
+	else
+		audit_log_format(ab, "\"unknown(%d)\"", sa->u.net->family);
+	audit_log_format(ab, " sock_type=");
+	if (sock_type_names[aad(sa)->net.type])
+		audit_log_string(ab, sock_type_names[aad(sa)->net.type]);
+	else
+		audit_log_format(ab, "\"unknown(%d)\"", aad(sa)->net.type);
+	audit_log_format(ab, " protocol=%d", aad(sa)->net.protocol);
+
+	if (aad(sa)->request & NET_PERMS_MASK) {
+		audit_log_format(ab, " requested_mask=");
+		aa_audit_perm_mask(ab, aad(sa)->request, NULL, 0,
+				   net_mask_names, NET_PERMS_MASK);
+
+		if (aad(sa)->denied & NET_PERMS_MASK) {
+			audit_log_format(ab, " denied_mask=");
+			aa_audit_perm_mask(ab, aad(sa)->denied, NULL, 0,
+					   net_mask_names, NET_PERMS_MASK);
+		}
+	}
+	if (aad(sa)->peer) {
+		audit_log_format(ab, " peer=");
+		aa_label_xaudit(ab, labels_ns(aad(sa)->label), aad(sa)->peer,
+				FLAGS_NONE, GFP_ATOMIC);
+	}
+}
+
+
+/* Generic af perm */
+int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa,
+		       u32 request, u16 family, int type)
+{
+	struct aa_perms perms = { };
+
+	AA_BUG(family >= AF_MAX);
+	AA_BUG(type < 0 || type >= SOCK_MAX);
+
+	if (profile_unconfined(profile))
+		return 0;
+
+	perms.allow = (profile->net.allow[family] & (1 << type)) ?
+		ALL_PERMS_MASK : 0;
+	perms.audit = (profile->net.audit[family] & (1 << type)) ?
+		ALL_PERMS_MASK : 0;
+	perms.quiet = (profile->net.quiet[family] & (1 << type)) ?
+		ALL_PERMS_MASK : 0;
+	aa_apply_modes_to_perms(profile, &perms);
+
+	return aa_check_perms(profile, &perms, request, sa, audit_net_cb);
+}
+
+int aa_af_perm(struct aa_label *label, const char *op, u32 request, u16 family,
+	       int type, int protocol)
+{
+	struct aa_profile *profile;
+	DEFINE_AUDIT_NET(sa, op, NULL, family, type, protocol);
+
+	return fn_for_each_confined(label, profile,
+			aa_profile_af_perm(profile, &sa, request, family,
+					   type));
+}
+
+static int aa_label_sk_perm(struct aa_label *label, const char *op, u32 request,
+			    struct sock *sk)
+{
+	struct aa_profile *profile;
+	DEFINE_AUDIT_SK(sa, op, sk);
+
+	AA_BUG(!label);
+	AA_BUG(!sk);
+
+	if (unconfined(label))
+		return 0;
+
+	return fn_for_each_confined(label, profile,
+			aa_profile_af_sk_perm(profile, &sa, request, sk));
+}
+
+int aa_sk_perm(const char *op, u32 request, struct sock *sk)
+{
+	struct aa_label *label;
+	int error;
+
+	AA_BUG(!sk);
+	AA_BUG(in_interrupt());
+
+	/* TODO: switch to begin_current_label ???? */
+	label = begin_current_label_crit_section();
+	error = aa_label_sk_perm(label, op, request, sk);
+	end_current_label_crit_section(label);
+
+	return error;
+}
+
+
+int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request,
+		      struct socket *sock)
+{
+	AA_BUG(!label);
+	AA_BUG(!sock);
+	AA_BUG(!sock->sk);
+
+	return aa_label_sk_perm(label, op, request, sock->sk);
+}
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 4ede87c30f8b..5a2aec358322 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -275,6 +275,19 @@ static bool unpack_nameX(struct aa_ext *e, enum aa_code code, const char *name)
 	return 0;
 }
 
+static bool unpack_u16(struct aa_ext *e, u16 *data, const char *name)
+{
+	if (unpack_nameX(e, AA_U16, name)) {
+		if (!inbounds(e, sizeof(u16)))
+			return 0;
+		if (data)
+			*data = le16_to_cpu(get_unaligned((__le16 *) e->pos));
+		e->pos += sizeof(u16);
+		return 1;
+	}
+	return 0;
+}
+
 static bool unpack_u32(struct aa_ext *e, u32 *data, const char *name)
 {
 	if (unpack_nameX(e, AA_U32, name)) {
@@ -584,7 +597,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	struct aa_profile *profile = NULL;
 	const char *tmpname, *tmpns = NULL, *name = NULL;
 	const char *info = "failed to unpack profile";
-	size_t ns_len;
+	size_t size = 0, ns_len;
 	struct rhashtable_params params = { 0 };
 	char *key = NULL;
 	struct aa_data *data;
@@ -717,6 +730,38 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 		goto fail;
 	}
 
+	size = unpack_array(e, "net_allowed_af");
+	if (size) {
+
+		for (i = 0; i < size; i++) {
+			/* discard extraneous rules that this kernel will
+			 * never request
+			 */
+			if (i >= AF_MAX) {
+				u16 tmp;
+
+				if (!unpack_u16(e, &tmp, NULL) ||
+				    !unpack_u16(e, &tmp, NULL) ||
+				    !unpack_u16(e, &tmp, NULL))
+					goto fail;
+				continue;
+			}
+			if (!unpack_u16(e, &profile->net.allow[i], NULL))
+				goto fail;
+			if (!unpack_u16(e, &profile->net.audit[i], NULL))
+				goto fail;
+			if (!unpack_u16(e, &profile->net.quiet[i], NULL))
+				goto fail;
+		}
+		if (!unpack_nameX(e, AA_ARRAYEND, NULL))
+			goto fail;
+	}
+	if (VERSION_LT(e->version, v7)) {
+		/* pre v7 policy always allowed these */
+		profile->net.allow[AF_UNIX] = 0xffff;
+		profile->net.allow[AF_NETLINK] = 0xffff;
+	}
+
 	if (unpack_nameX(e, AA_STRUCT, "policydb")) {
 		/* generic policy dfa - optional and may be NULL */
 		info = "failed to unpack policydb";

From d07881d2edb0ab783846730629ac2faeaafdf4f1 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 16 Aug 2017 08:59:57 -0700
Subject: [PATCH 276/391] apparmor: move new_null_profile to after profile
 lookup fns()

new_null_profile will need to use some of the profile lookup fns()
so move instead of doing forward fn declarations.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy.c | 158 ++++++++++++++++++-------------------
 1 file changed, 79 insertions(+), 79 deletions(-)

diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 244ea4a4a8f0..a81a384a63b1 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -289,85 +289,6 @@ struct aa_profile *aa_alloc_profile(const char *hname, struct aa_proxy *proxy,
 	return NULL;
 }
 
-/**
- * aa_new_null_profile - create or find a null-X learning profile
- * @parent: profile that caused this profile to be created (NOT NULL)
- * @hat: true if the null- learning profile is a hat
- * @base: name to base the null profile off of
- * @gfp: type of allocation
- *
- * Find/Create a null- complain mode profile used in learning mode.  The
- * name of the profile is unique and follows the format of parent//null-XXX.
- * where XXX is based on the @name or if that fails or is not supplied
- * a unique number
- *
- * null profiles are added to the profile list but the list does not
- * hold a count on them so that they are automatically released when
- * not in use.
- *
- * Returns: new refcounted profile else NULL on failure
- */
-struct aa_profile *aa_new_null_profile(struct aa_profile *parent, bool hat,
-				       const char *base, gfp_t gfp)
-{
-	struct aa_profile *profile;
-	char *name;
-
-	AA_BUG(!parent);
-
-	if (base) {
-		name = kmalloc(strlen(parent->base.hname) + 8 + strlen(base),
-			       gfp);
-		if (name) {
-			sprintf(name, "%s//null-%s", parent->base.hname, base);
-			goto name;
-		}
-		/* fall through to try shorter uniq */
-	}
-
-	name = kmalloc(strlen(parent->base.hname) + 2 + 7 + 8, gfp);
-	if (!name)
-		return NULL;
-	sprintf(name, "%s//null-%x", parent->base.hname,
-		atomic_inc_return(&parent->ns->uniq_null));
-
-name:
-	/* lookup to see if this is a dup creation */
-	profile = aa_find_child(parent, basename(name));
-	if (profile)
-		goto out;
-
-	profile = aa_alloc_profile(name, NULL, gfp);
-	if (!profile)
-		goto fail;
-
-	profile->mode = APPARMOR_COMPLAIN;
-	profile->label.flags |= FLAG_NULL;
-	if (hat)
-		profile->label.flags |= FLAG_HAT;
-	profile->path_flags = parent->path_flags;
-
-	/* released on free_profile */
-	rcu_assign_pointer(profile->parent, aa_get_profile(parent));
-	profile->ns = aa_get_ns(parent->ns);
-	profile->file.dfa = aa_get_dfa(nulldfa);
-	profile->policy.dfa = aa_get_dfa(nulldfa);
-
-	mutex_lock(&profile->ns->lock);
-	__add_profile(&parent->base.profiles, profile);
-	mutex_unlock(&profile->ns->lock);
-
-	/* refcount released by caller */
-out:
-	kfree(name);
-
-	return profile;
-
-fail:
-	aa_free_profile(profile);
-	return NULL;
-}
-
 /* TODO: profile accounting - setup in remove */
 
 /**
@@ -558,6 +479,85 @@ struct aa_profile *aa_fqlookupn_profile(struct aa_label *base,
 	return profile;
 }
 
+/**
+ * aa_new_null_profile - create or find a null-X learning profile
+ * @parent: profile that caused this profile to be created (NOT NULL)
+ * @hat: true if the null- learning profile is a hat
+ * @base: name to base the null profile off of
+ * @gfp: type of allocation
+ *
+ * Find/Create a null- complain mode profile used in learning mode.  The
+ * name of the profile is unique and follows the format of parent//null-XXX.
+ * where XXX is based on the @name or if that fails or is not supplied
+ * a unique number
+ *
+ * null profiles are added to the profile list but the list does not
+ * hold a count on them so that they are automatically released when
+ * not in use.
+ *
+ * Returns: new refcounted profile else NULL on failure
+ */
+struct aa_profile *aa_new_null_profile(struct aa_profile *parent, bool hat,
+				       const char *base, gfp_t gfp)
+{
+	struct aa_profile *profile;
+	char *name;
+
+	AA_BUG(!parent);
+
+	if (base) {
+		name = kmalloc(strlen(parent->base.hname) + 8 + strlen(base),
+			       gfp);
+		if (name) {
+			sprintf(name, "%s//null-%s", parent->base.hname, base);
+			goto name;
+		}
+		/* fall through to try shorter uniq */
+	}
+
+	name = kmalloc(strlen(parent->base.hname) + 2 + 7 + 8, gfp);
+	if (!name)
+		return NULL;
+	sprintf(name, "%s//null-%x", parent->base.hname,
+		atomic_inc_return(&parent->ns->uniq_null));
+
+name:
+	/* lookup to see if this is a dup creation */
+	profile = aa_find_child(parent, basename(name));
+	if (profile)
+		goto out;
+
+	profile = aa_alloc_profile(name, NULL, gfp);
+	if (!profile)
+		goto fail;
+
+	profile->mode = APPARMOR_COMPLAIN;
+	profile->label.flags |= FLAG_NULL;
+	if (hat)
+		profile->label.flags |= FLAG_HAT;
+	profile->path_flags = parent->path_flags;
+
+	/* released on free_profile */
+	rcu_assign_pointer(profile->parent, aa_get_profile(parent));
+	profile->ns = aa_get_ns(parent->ns);
+	profile->file.dfa = aa_get_dfa(nulldfa);
+	profile->policy.dfa = aa_get_dfa(nulldfa);
+
+	mutex_lock(&profile->ns->lock);
+	__add_profile(&parent->base.profiles, profile);
+	mutex_unlock(&profile->ns->lock);
+
+	/* refcount released by caller */
+out:
+	kfree(name);
+
+	return profile;
+
+fail:
+	aa_free_profile(profile);
+	return NULL;
+}
+
 /**
  * replacement_allowed - test to see if replacement is allowed
  * @profile: profile to test if it can be replaced  (MAYBE NULL)

From 290638a52a808d658bd04b746b3ca46886c157e0 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 16 Aug 2017 05:40:49 -0700
Subject: [PATCH 277/391] apparmor: fix race condition in null profile creation

There is a race when null- profile is being created between the
initial lookup/creation of the profile and lock/addition of the
profile. This could result in multiple version of a profile being
added to the list which need to be removed/replaced.

Since these are learning profile their is no affect on mediation.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index a81a384a63b1..4243b0c3f0e4 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -500,7 +500,8 @@ struct aa_profile *aa_fqlookupn_profile(struct aa_label *base,
 struct aa_profile *aa_new_null_profile(struct aa_profile *parent, bool hat,
 				       const char *base, gfp_t gfp)
 {
-	struct aa_profile *profile;
+	struct aa_profile *p, *profile;
+	const char *bname;
 	char *name;
 
 	AA_BUG(!parent);
@@ -523,7 +524,8 @@ struct aa_profile *aa_new_null_profile(struct aa_profile *parent, bool hat,
 
 name:
 	/* lookup to see if this is a dup creation */
-	profile = aa_find_child(parent, basename(name));
+	bname = basename(name);
+	profile = aa_find_child(parent, bname);
 	if (profile)
 		goto out;
 
@@ -544,7 +546,13 @@ struct aa_profile *aa_new_null_profile(struct aa_profile *parent, bool hat,
 	profile->policy.dfa = aa_get_dfa(nulldfa);
 
 	mutex_lock(&profile->ns->lock);
-	__add_profile(&parent->base.profiles, profile);
+	p = __find_child(&parent->base.profiles, bname);
+	if (p) {
+		aa_free_profile(profile);
+		profile = aa_get_profile(p);
+	} else {
+		__add_profile(&parent->base.profiles, profile);
+	}
 	mutex_unlock(&profile->ns->lock);
 
 	/* refcount released by caller */

From 15372b97aa7593c6f5bc1afe69f42fd403c40685 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 16 Aug 2017 05:48:06 -0700
Subject: [PATCH 278/391] apparmor: ensure unconfined profiles have dfas
 initialized

Generally unconfined has early bailout tests and does not need the
dfas initialized, however if an early bailout test is ever missed
it will result in an oops.

Be defensive and initialize the unconfined profile to have null dfas
(no permission) so if an early bailout test is missed we fail
closed (no perms granted) instead of oopsing.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy_ns.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/security/apparmor/policy_ns.c b/security/apparmor/policy_ns.c
index 351d3bab3a3d..62a3589c62ab 100644
--- a/security/apparmor/policy_ns.c
+++ b/security/apparmor/policy_ns.c
@@ -112,6 +112,8 @@ static struct aa_ns *alloc_ns(const char *prefix, const char *name)
 	ns->unconfined->label.flags |= FLAG_IX_ON_NAME_ERROR |
 		FLAG_IMMUTIBLE | FLAG_NS_COUNT | FLAG_UNCONFINED;
 	ns->unconfined->mode = APPARMOR_UNCONFINED;
+	ns->unconfined->file.dfa = aa_get_dfa(nulldfa);
+	ns->unconfined->policy.dfa = aa_get_dfa(nulldfa);
 
 	/* ns and ns->unconfined share ns->unconfined refcount */
 	ns->unconfined->ns = ns;

From bc4d82fb946e7b471eab2a80e384227c4eb15652 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 16 Aug 2017 09:33:48 -0700
Subject: [PATCH 279/391] apparmor: fix incorrect type assignment when freeing
 proxies

sparse reports

poisoning the proxy->label before freeing the struct is resulting in
a sparse build warning.
../security/apparmor/label.c:52:30: warning: incorrect type in assignment (different address spaces)
../security/apparmor/label.c:52:30:    expected struct aa_label [noderef] <asn:4>*label
../security/apparmor/label.c:52:30:    got struct aa_label *<noident>

fix with RCU_INIT_POINTER as this is one of those cases where
rcu_assign_pointer() is not needed.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/label.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/apparmor/label.c b/security/apparmor/label.c
index 52b4ef14840d..c5b99b954580 100644
--- a/security/apparmor/label.c
+++ b/security/apparmor/label.c
@@ -49,7 +49,7 @@ static void free_proxy(struct aa_proxy *proxy)
 		/* p->label will not updated any more as p is dead */
 		aa_put_label(rcu_dereference_protected(proxy->label, true));
 		memset(proxy, 0, sizeof(*proxy));
-		proxy->label = (struct aa_label *) PROXY_POISON;
+		RCU_INIT_POINTER(proxy->label, (struct aa_label *)PROXY_POISON);
 		kfree(proxy);
 	}
 }

From b1545dba092ba543eab1f7b5ed757a4988e267c8 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Wed, 23 Aug 2017 12:10:39 -0700
Subject: [PATCH 280/391] apparmor: fix build failure on sparc caused by
 undeclared signals

  In file included from security/apparmor/ipc.c:23:0:
  security/apparmor/include/sig_names.h:26:3: error: 'SIGSTKFLT' undeclared here (not in a function)
    [SIGSTKFLT] = 16, /* -, 16, - */
     ^
  security/apparmor/include/sig_names.h:26:3: error: array index in initializer not of integer type
  security/apparmor/include/sig_names.h:26:3: note: (near initialization for 'sig_map')
  security/apparmor/include/sig_names.h:51:3: error: 'SIGUNUSED' undeclared here (not in a function)
    [SIGUNUSED] = 34, /* -, 31, - */
     ^
  security/apparmor/include/sig_names.h:51:3: error: array index in initializer not of integer type
  security/apparmor/include/sig_names.h:51:3: note: (near initialization for 'sig_map')

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Fixes: c6bf1adaecaa ("apparmor: add the ability to mediate signals")
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/include/sig_names.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/security/apparmor/include/sig_names.h b/security/apparmor/include/sig_names.h
index 0d4395f231ca..92e62fe95292 100644
--- a/security/apparmor/include/sig_names.h
+++ b/security/apparmor/include/sig_names.h
@@ -23,7 +23,9 @@ static const int sig_map[MAXMAPPED_SIG] = {
 	[SIGPIPE] = 13,
 	[SIGALRM] = 14,
 	[SIGTERM] = 15,
+#ifdef SIGSTKFLT
 	[SIGSTKFLT] = 16,	/* -, 16, - */
+#endif
 	[SIGCHLD] = 17,		/* 20, 17, 18.  SIGCHLD -, -, 18 */
 	[SIGCONT] = 18,		/* 19, 18, 25 */
 	[SIGSTOP] = 19,		/* 17, 19, 23 */
@@ -47,7 +49,8 @@ static const int sig_map[MAXMAPPED_SIG] = {
 #if defined(SIGLOST) && SIGPWR != SIGLOST		/* sparc */
 	[SIGLOST] = 33,		/* unused on Linux */
 #endif
-#if defined(SIGLOST) && defined(SIGSYS) && SIGLOST != SIGSYS
+#if defined(SIGUNUSED) && \
+    defined(SIGLOST) && defined(SIGSYS) && SIGLOST != SIGSYS
 	[SIGUNUSED] = 34,	/* -, 31, - */
 #endif
 };

From bf81100f63db7ea243d17b9d5008ba3af2fdf6b2 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Thu, 31 Aug 2017 09:54:43 -0700
Subject: [PATCH 281/391] apparmor: fix apparmorfs DAC access permissions

The DAC access permissions for several apparmorfs files are wrong.

.access - needs to be writable by all tasks to perform queries
the others in the set only provide a read fn so should be read only.

With policy namespace virtualization all apparmor needs to control
the permission and visibility checks directly which means DAC
access has to be allowed for all user, group, and other.

BugLink: http://bugs.launchpad.net/bugs/1713103
Fixes: c97204baf840b ("apparmor: rename apparmor file fns and data to indicate use")
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/apparmorfs.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 125dad5c3fde..518d5928661b 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -2215,12 +2215,12 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = {
 };
 
 static struct aa_sfs_entry aa_sfs_entry_apparmor[] = {
-	AA_SFS_FILE_FOPS(".access", 0640, &aa_sfs_access),
+	AA_SFS_FILE_FOPS(".access", 0666, &aa_sfs_access),
 	AA_SFS_FILE_FOPS(".stacked", 0444, &seq_ns_stacked_fops),
 	AA_SFS_FILE_FOPS(".ns_stacked", 0444, &seq_ns_nsstacked_fops),
-	AA_SFS_FILE_FOPS(".ns_level", 0666, &seq_ns_level_fops),
-	AA_SFS_FILE_FOPS(".ns_name", 0640, &seq_ns_name_fops),
-	AA_SFS_FILE_FOPS("profiles", 0440, &aa_sfs_profiles_fops),
+	AA_SFS_FILE_FOPS(".ns_level", 0444, &seq_ns_level_fops),
+	AA_SFS_FILE_FOPS(".ns_name", 0444, &seq_ns_name_fops),
+	AA_SFS_FILE_FOPS("profiles", 0444, &aa_sfs_profiles_fops),
 	AA_SFS_DIR("features", aa_sfs_entry_features),
 	{ }
 };

From af21b01d1166248f282fc02d0f459c94de06615e Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Fri, 22 Sep 2017 22:24:02 +0200
Subject: [PATCH 282/391] parisc: Reintroduce option to gzip-compress the
 kernel

By adding the feature to build the kernel as self-extracting
executeable, the possibility to simply compress the kernel with gzip was
lost.

This patch now reintroduces this possibilty again and leaves it up to
the user to decide how the kernel should be built.

The palo bootloader is able to natively load both formats.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/Kconfig  | 12 ++++++++++++
 arch/parisc/Makefile |  5 +++++
 2 files changed, 17 insertions(+)

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index ba7b7ddc3844..a57dedbfc7b7 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -257,6 +257,18 @@ config PARISC_PAGE_SIZE_64KB
 
 endchoice
 
+config PARISC_SELF_EXTRACT
+	bool "Build kernel as self-extracting executable"
+	default y
+	help
+	  Say Y if you want to build the parisc kernel as a kind of
+	  self-extracting executable.
+
+	  If you say N here, the kernel will be compressed with gzip
+	  which can be loaded by the palo bootloader directly too.
+
+	  If you don't know what to do here, say Y.
+
 config SMP
 	bool "Symmetric multi-processing support"
 	---help---
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index 58fae5d2449d..01946ebaff72 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -129,8 +129,13 @@ Image: vmlinux
 bzImage: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
+ifdef CONFIG_PARISC_SELF_EXTRACT
 vmlinuz: bzImage
 	$(OBJCOPY) $(boot)/bzImage $@
+else
+vmlinuz: vmlinux
+	@gzip -cf -9 $< > $@
+endif
 
 install:
 	$(CONFIG_SHELL) $(src)/arch/parisc/install.sh \

From 8c031ba63f8f2a9efc471cb45b2ff18271556544 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Fri, 22 Sep 2017 21:57:11 +0200
Subject: [PATCH 283/391] parisc: Unbreak bootloader due to gcc-7 optimizations

gcc-7 optimizes the byte-wise accesses of get_unaligned_le32() into
word-wise accesses if the 32-bit integer output_len is declared as
external. This panics then the bootloader since we don't have the
unaligned access fault trap handler installed during boot time.

Avoid this optimization by declaring output_len as byte-aligned and thus
unbreak the bootloader code.

Additionally, compile the boot code optimized for size.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/boot/compressed/Makefile | 2 +-
 arch/parisc/boot/compressed/misc.c   | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/parisc/boot/compressed/Makefile b/arch/parisc/boot/compressed/Makefile
index 5450a11c9d10..7d7e594bda36 100644
--- a/arch/parisc/boot/compressed/Makefile
+++ b/arch/parisc/boot/compressed/Makefile
@@ -15,7 +15,7 @@ targets += misc.o piggy.o sizes.h head.o real2.o firmware.o
 KBUILD_CFLAGS := -D__KERNEL__ -O2 -DBOOTLOADER
 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
 KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks
-KBUILD_CFLAGS += -fno-PIE -mno-space-regs -mdisable-fpregs
+KBUILD_CFLAGS += -fno-PIE -mno-space-regs -mdisable-fpregs -Os
 ifndef CONFIG_64BIT
 KBUILD_CFLAGS += -mfast-indirect-calls
 endif
diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c
index 13a4bf9ac4da..9345b44b86f0 100644
--- a/arch/parisc/boot/compressed/misc.c
+++ b/arch/parisc/boot/compressed/misc.c
@@ -24,7 +24,8 @@
 /* Symbols defined by linker scripts */
 extern char input_data[];
 extern int input_len;
-extern __le32 output_len;	/* at unaligned address, little-endian */
+/* output_len is inserted by the linker possibly at an unaligned address */
+extern __le32 output_len __aligned(1);
 extern char _text, _end;
 extern char _bss, _ebss;
 extern char _startcode_end;

From c17c02040bf0d186cebd3e66ff349f955575bf38 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Fri, 22 Sep 2017 09:42:42 +0200
Subject: [PATCH 284/391] arch: remove unused *_segments() macros/functions

Some architectures define the no-op macros/functions copy_segments,
release_segments and forget_segments. These are used nowhere in the
tree, so removed them.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Acked-by: Vineet Gupta <vgupta@synopsys.com>   [for arch/arc]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arc/include/asm/processor.h        |  3 ---
 arch/c6x/include/asm/processor.h        |  3 ---
 arch/frv/include/asm/processor.h        |  4 ----
 arch/m32r/include/asm/processor.h       |  8 --------
 arch/metag/include/asm/processor.h      |  3 ---
 arch/mn10300/kernel/process.c           | 12 ------------
 arch/sh/include/asm/processor_32.h      |  4 ----
 arch/sh/include/asm/processor_64.h      |  4 ----
 arch/um/include/asm/processor-generic.h |  5 -----
 arch/xtensa/include/asm/processor.h     |  5 -----
 10 files changed, 51 deletions(-)

diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index d400a2161935..8ee41e988169 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -78,9 +78,6 @@ struct task_struct;
 
 #endif
 
-#define copy_segments(tsk, mm)      do { } while (0)
-#define release_segments(mm)        do { } while (0)
-
 #define KSTK_EIP(tsk)   (task_pt_regs(tsk)->ret)
 #define KSTK_ESP(tsk)   (task_pt_regs(tsk)->sp)
 
diff --git a/arch/c6x/include/asm/processor.h b/arch/c6x/include/asm/processor.h
index 7c87b5be53b5..8f7cce829f8e 100644
--- a/arch/c6x/include/asm/processor.h
+++ b/arch/c6x/include/asm/processor.h
@@ -92,9 +92,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-#define copy_segments(tsk, mm)		do { } while (0)
-#define release_segments(mm)		do { } while (0)
-
 /*
  * saved kernel SP and DP of a blocked thread.
  */
diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h
index e4d08d74ed9f..021cce78b401 100644
--- a/arch/frv/include/asm/processor.h
+++ b/arch/frv/include/asm/processor.h
@@ -92,10 +92,6 @@ static inline void release_thread(struct task_struct *dead_task)
 extern asmlinkage void save_user_regs(struct user_context *target);
 extern asmlinkage void *restore_user_regs(const struct user_context *target, ...);
 
-#define copy_segments(tsk, mm)		do { } while (0)
-#define release_segments(mm)		do { } while (0)
-#define forget_segments()		do { } while (0)
-
 unsigned long get_wchan(struct task_struct *p);
 
 #define	KSTK_EIP(tsk)	((tsk)->thread.frame0->pc)
diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h
index 657874eeeccc..c70fa9ac7169 100644
--- a/arch/m32r/include/asm/processor.h
+++ b/arch/m32r/include/asm/processor.h
@@ -118,14 +118,6 @@ struct mm_struct;
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
-/* Copy and release all segment info associated with a VM */
-extern void copy_segments(struct task_struct *p, struct mm_struct * mm);
-extern void release_segments(struct mm_struct * mm);
-
-/* Copy and release all segment info associated with a VM */
-#define copy_segments(p, mm)  do { } while (0)
-#define release_segments(mm)  do { } while (0)
-
 unsigned long get_wchan(struct task_struct *p);
 #define KSTK_EIP(tsk)  ((tsk)->thread.lr)
 #define KSTK_ESP(tsk)  ((tsk)->thread.sp)
diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h
index ec6a49076980..8ae92d6abfd2 100644
--- a/arch/metag/include/asm/processor.h
+++ b/arch/metag/include/asm/processor.h
@@ -131,9 +131,6 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-#define copy_segments(tsk, mm)		do { } while (0)
-#define release_segments(mm)		do { } while (0)
-
 /*
  * Return saved PC of a blocked thread.
  */
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index 89e8027e07fb..7c475fd99c46 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -59,10 +59,6 @@ void arch_cpu_idle(void)
 }
 #endif
 
-void release_segments(struct mm_struct *mm)
-{
-}
-
 void machine_restart(char *cmd)
 {
 #ifdef CONFIG_KERNEL_DEBUGGER
@@ -112,14 +108,6 @@ void release_thread(struct task_struct *dead_task)
 {
 }
 
-/*
- * we do not have to muck with descriptors here, that is
- * done in switch_mm() as needed.
- */
-void copy_segments(struct task_struct *p, struct mm_struct *new_mm)
-{
-}
-
 /*
  * this gets called so that we can store lazy state into memory and copy the
  * current task into the new thread.
diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h
index 18e0377f72bb..88ce1e22237b 100644
--- a/arch/sh/include/asm/processor_32.h
+++ b/arch/sh/include/asm/processor_32.h
@@ -136,10 +136,6 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_pc, unsigned lo
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
-/* Copy and release all segment info associated with a VM */
-#define copy_segments(p, mm)	do { } while(0)
-#define release_segments(mm)	do { } while(0)
-
 /*
  * FPU lazy state save handling.
  */
diff --git a/arch/sh/include/asm/processor_64.h b/arch/sh/include/asm/processor_64.h
index eedd4f625d07..777a16318aff 100644
--- a/arch/sh/include/asm/processor_64.h
+++ b/arch/sh/include/asm/processor_64.h
@@ -170,10 +170,6 @@ struct mm_struct;
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
-/* Copy and release all segment info associated with a VM */
-#define copy_segments(p, mm)	do { } while (0)
-#define release_segments(mm)	do { } while (0)
-#define forget_segments()	do { } while (0)
 /*
  * FPU lazy state save handling.
  */
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index f6d1a3f747a9..86942a492454 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -58,11 +58,6 @@ static inline void release_thread(struct task_struct *task)
 {
 }
 
-static inline void mm_copy_segments(struct mm_struct *from_mm,
-				    struct mm_struct *new_mm)
-{
-}
-
 #define init_stack	(init_thread_union.stack)
 
 /*
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 30ee8c608853..5b0027d4ecc0 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -208,11 +208,6 @@ struct mm_struct;
 /* Free all resources held by a thread. */
 #define release_thread(thread) do { } while(0)
 
-/* Copy and release all segment info associated with a VM */
-#define copy_segments(p, mm)	do { } while(0)
-#define release_segments(mm)	do { } while(0)
-#define forget_segments()	do { } while (0)
-
 extern unsigned long get_wchan(struct task_struct *p);
 
 #define KSTK_EIP(tsk)		(task_pt_regs(tsk)->pc)

From 6e70e26dc52be62c1f39f81b5f71fa5e643677aa Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Thu, 21 Sep 2017 21:32:29 -0500
Subject: [PATCH 285/391] SMB3: handle new statx fields

We weren't returning the creation time or the two easily supported
attributes (ENCRYPTED or COMPRESSED) for the getattr call to
allow statx to return these fields.

Signed-off-by: Steve French <smfrench@gmail.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>\
Acked-by: Jeff Layton <jlayton@poochiereds.net>
CC: Stable <stable@vger.kernel.org>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/inode.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a8693632235f..7c732cb44164 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -234,6 +234,8 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
 	fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
 	fattr->cf_mtime = cifs_NTtimeToUnix(info->LastModificationTime);
 	fattr->cf_ctime = cifs_NTtimeToUnix(info->LastStatusChange);
+	/* old POSIX extensions don't get create time */
+
 	fattr->cf_mode = le64_to_cpu(info->Permissions);
 
 	/*
@@ -2024,6 +2026,19 @@ int cifs_getattr(const struct path *path, struct kstat *stat,
 	stat->blksize = CIFS_MAX_MSGSIZE;
 	stat->ino = CIFS_I(inode)->uniqueid;
 
+	/* old CIFS Unix Extensions doesn't return create time */
+	if (CIFS_I(inode)->createtime) {
+		stat->result_mask |= STATX_BTIME;
+		stat->btime =
+		      cifs_NTtimeToUnix(cpu_to_le64(CIFS_I(inode)->createtime));
+	}
+
+	stat->attributes_mask |= (STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED);
+	if (CIFS_I(inode)->cifsAttrs & FILE_ATTRIBUTE_COMPRESSED)
+		stat->attributes |= STATX_ATTR_COMPRESSED;
+	if (CIFS_I(inode)->cifsAttrs & FILE_ATTRIBUTE_ENCRYPTED)
+		stat->attributes |= STATX_ATTR_ENCRYPTED;
+
 	/*
 	 * If on a multiuser mount without unix extensions or cifsacl being
 	 * enabled, and the admin hasn't overridden them, set the ownership

From 1013e760d10e614dc10b5624ce9fc41563ba2e65 Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Fri, 22 Sep 2017 01:40:27 -0500
Subject: [PATCH 286/391] SMB3: Don't ignore O_SYNC/O_DSYNC and O_DIRECT flags

Signed-off-by: Steve French <smfrench@gmail.com>
CC: Stable <stable@vger.kernel.org>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/file.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 822311919163..92fdf9c35de2 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -224,6 +224,13 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
 	if (backup_cred(cifs_sb))
 		create_options |= CREATE_OPEN_BACKUP_INTENT;
 
+	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
+	if (f_flags & O_SYNC)
+		create_options |= CREATE_WRITE_THROUGH;
+
+	if (f_flags & O_DIRECT)
+		create_options |= CREATE_NO_BUFFER;
+
 	oparms.tcon = tcon;
 	oparms.cifs_sb = cifs_sb;
 	oparms.desired_access = desired_access;

From b9b95da92de9d498ece7e6a82e2d6dcfc76fd9d8 Mon Sep 17 00:00:00 2001
From: Stefan Schmidt <stefan@osg.samsung.com>
Date: Fri, 22 Sep 2017 14:28:46 +0200
Subject: [PATCH 287/391] MAINTAINERS: update git tree locations for ieee802154
 subsystem

Patches for ieee802154 will go through my new trees towards netdev from
now on. The 6LoWPAN subsystem will stay as is (shared between ieee802154
and bluetooth) and go through the bluetooth tree as usual.

Signed-off-by: Stefan Schmidt <stefan@osg.samsung.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 955f034fd523..61ee134cf6a8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6642,8 +6642,8 @@ M:	Alexander Aring <alex.aring@gmail.com>
 M:	Stefan Schmidt <stefan@osg.samsung.com>
 L:	linux-wpan@vger.kernel.org
 W:	http://wpan.cakelab.org/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth.git
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sschmidt/wpan.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sschmidt/wpan-next.git
 S:	Maintained
 F:	net/ieee802154/
 F:	net/mac802154/

From 581fe0ea61584d88072527ae9fb9dcb9d1f2783e Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 22 Sep 2017 19:42:37 -0400
Subject: [PATCH 288/391] net: orphan frags on stand-alone ptype in
 dev_queue_xmit_nit

Zerocopy skbs frags are copied when the skb is looped to a local sock.
Commit 1080e512d44d ("net: orphan frags on receive") introduced calls
to skb_orphan_frags to deliver_skb and __netif_receive_skb for this.

With msg_zerocopy, these skbs can also exist in the tx path and thus
loop from dev_queue_xmit_nit. This already calls deliver_skb in its
loop. But it does not orphan before a separate pt_prev->func().

Add the missing skb_orphan_frags_rx.

Changes
  v1->v2: handle skb_orphan_frags_rx failure

Fixes: 1f8b977ab32d ("sock: enable MSG_ZEROCOPY")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 9a2254f9802f..588b473194a8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1948,8 +1948,12 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 		goto again;
 	}
 out_unlock:
-	if (pt_prev)
-		pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
+	if (pt_prev) {
+		if (!skb_orphan_frags_rx(skb2, GFP_ATOMIC))
+			pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
+		else
+			kfree_skb(skb2);
+	}
 	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(dev_queue_xmit_nit);

From cbb2fb5c72f48d3029c144be0f0e61da1c7bccf7 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Fri, 22 Sep 2017 20:20:06 -0400
Subject: [PATCH 289/391] net: set tb->fast_sk_family

We need to set the tb->fast_sk_family properly so we can use the proper
comparison function for all subsequent reuseport bind requests.

Fixes: 637bc8bbe6c0 ("inet: reset tb->fastreuseport when adding a reuseport sk")
Reported-and-tested-by: Cole Robinson <crobinso@redhat.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_connection_sock.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index b9c64b40a83a..f87f4805e244 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -328,6 +328,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
 			tb->fastuid = uid;
 			tb->fast_rcv_saddr = sk->sk_rcv_saddr;
 			tb->fast_ipv6_only = ipv6_only_sock(sk);
+			tb->fast_sk_family = sk->sk_family;
 #if IS_ENABLED(CONFIG_IPV6)
 			tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
 #endif
@@ -354,6 +355,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
 				tb->fastuid = uid;
 				tb->fast_rcv_saddr = sk->sk_rcv_saddr;
 				tb->fast_ipv6_only = ipv6_only_sock(sk);
+				tb->fast_sk_family = sk->sk_family;
 #if IS_ENABLED(CONFIG_IPV6)
 				tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
 #endif

From 7a56673b58f2414679e926bba80309a037a4fd35 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Fri, 22 Sep 2017 20:20:07 -0400
Subject: [PATCH 290/391] net: use inet6_rcv_saddr to compare sockets

In ipv6_rcv_saddr_equal() we need to use inet6_rcv_saddr(sk) for the
ipv6 compare with the fast socket information to make sure we're doing
the proper comparisons.

Fixes: 637bc8bbe6c0 ("inet: reset tb->fastreuseport when adding a reuseport sk")
Reported-and-tested-by: Cole Robinson <crobinso@redhat.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_connection_sock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index f87f4805e244..a1bf30438bc5 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -266,7 +266,7 @@ static inline int sk_reuseport_match(struct inet_bind_bucket *tb,
 #if IS_ENABLED(CONFIG_IPV6)
 	if (tb->fast_sk_family == AF_INET6)
 		return ipv6_rcv_saddr_equal(&tb->fast_v6_rcv_saddr,
-					    &sk->sk_v6_rcv_saddr,
+					    inet6_rcv_saddr(sk),
 					    tb->fast_rcv_saddr,
 					    sk->sk_rcv_saddr,
 					    tb->fast_ipv6_only,

From fbed24bcc69d3e48c5402c371f19f5c7688871e5 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Fri, 22 Sep 2017 20:20:08 -0400
Subject: [PATCH 291/391] inet: fix improper empty comparison

When doing my reuseport rework I screwed up and changed a

if (hlist_empty(&tb->owners))

to

if (!hlist_empty(&tb->owners))

This is obviously bad as all of the reuseport/reuse logic was reversed,
which caused weird problems like allowing an ipv4 bind conflict if we
opened an ipv4 only socket on a port followed by an ipv6 only socket on
the same port.

Fixes: b9470c27607b ("inet: kill smallest_size and smallest_port")
Reported-by: Cole Robinson <crobinso@redhat.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_connection_sock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index a1bf30438bc5..c039c937ba90 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -321,7 +321,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
 			goto fail_unlock;
 	}
 success:
-	if (!hlist_empty(&tb->owners)) {
+	if (hlist_empty(&tb->owners)) {
 		tb->fastreuse = reuse;
 		if (sk->sk_reuseport) {
 			tb->fastreuseport = FASTREUSEPORT_ANY;

From 0d0970eef3b03ef08b19da5bc3044410731cf38f Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 20 Sep 2017 16:24:32 -0500
Subject: [PATCH 292/391] objtool: Handle another GCC stack pointer adjustment
 bug

The kbuild bot reported the following warning with GCC 4.4 and a
randconfig:

  net/socket.o: warning: objtool: compat_sock_ioctl()+0x1083: stack state mismatch: cfa1=7+160 cfa2=-1+0

This is caused by another GCC non-optimization, where it backs up and
restores the stack pointer for no apparent reason:

    2f91:       48 89 e0                mov    %rsp,%rax
    2f94:       4c 89 e7                mov    %r12,%rdi
    2f97:       4c 89 f6                mov    %r14,%rsi
    2f9a:       ba 20 00 00 00          mov    $0x20,%edx
    2f9f:       48 89 c4                mov    %rax,%rsp

This issue would have been happily ignored before the following commit:

  dd88a0a0c861 ("objtool: Handle GCC stack pointer adjustment bug")

But now that objtool is paying attention to such stack pointer writes
to/from a register, it needs to understand them properly.  In this case
that means recognizing that the "mov %rsp, %rax" instruction is
potentially a backup of the stack pointer.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: dd88a0a0c861 ("objtool: Handle GCC stack pointer adjustment bug")
Link: http://lkml.kernel.org/r/8c7aa8e9a36fbbb6655d9d8e7cea58958c912da8.1505942196.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/arch/x86/decode.c |  6 ++---
 tools/objtool/check.c           | 43 ++++++++++++++++++++++-----------
 2 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 0e8c8ec4fd4e..0f22768c0d4d 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -208,14 +208,14 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 		break;
 
 	case 0x89:
-		if (rex == 0x48 && modrm == 0xe5) {
+		if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) {
 
-			/* mov %rsp, %rbp */
+			/* mov %rsp, reg */
 			*type = INSN_STACK;
 			op->src.type = OP_SRC_REG;
 			op->src.reg = CFI_SP;
 			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_BP;
+			op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
 			break;
 		}
 
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index f744617c9946..a0c518ecf085 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1203,24 +1203,39 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
 		switch (op->src.type) {
 
 		case OP_SRC_REG:
-			if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP) {
+			if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP &&
+			    cfa->base == CFI_SP &&
+			    regs[CFI_BP].base == CFI_CFA &&
+			    regs[CFI_BP].offset == -cfa->offset) {
 
-				if (cfa->base == CFI_SP &&
-				    regs[CFI_BP].base == CFI_CFA &&
-				    regs[CFI_BP].offset == -cfa->offset) {
+				/* mov %rsp, %rbp */
+				cfa->base = op->dest.reg;
+				state->bp_scratch = false;
+			}
 
-					/* mov %rsp, %rbp */
-					cfa->base = op->dest.reg;
-					state->bp_scratch = false;
-				}
+			else if (op->src.reg == CFI_SP &&
+				 op->dest.reg == CFI_BP && state->drap) {
 
-				else if (state->drap) {
+				/* drap: mov %rsp, %rbp */
+				regs[CFI_BP].base = CFI_BP;
+				regs[CFI_BP].offset = -state->stack_size;
+				state->bp_scratch = false;
+			}
 
-					/* drap: mov %rsp, %rbp */
-					regs[CFI_BP].base = CFI_BP;
-					regs[CFI_BP].offset = -state->stack_size;
-					state->bp_scratch = false;
-				}
+			else if (op->src.reg == CFI_SP && cfa->base == CFI_SP) {
+
+				/*
+				 * mov %rsp, %reg
+				 *
+				 * This is needed for the rare case where GCC
+				 * does:
+				 *
+				 *   mov    %rsp, %rax
+				 *   ...
+				 *   mov    %rax, %rsp
+				 */
+				state->vals[op->dest.reg].base = CFI_CFA;
+				state->vals[op->dest.reg].offset = -state->stack_size;
 			}
 
 			else if (op->dest.reg == cfa->base) {

From f5caf621ee357279e759c0911daf6d55c7d36f03 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 20 Sep 2017 16:24:33 -0500
Subject: [PATCH 293/391] x86/asm: Fix inline asm call constraints for Clang

For inline asm statements which have a CALL instruction, we list the
stack pointer as a constraint to convince GCC to ensure the frame
pointer is set up first:

  static inline void foo()
  {
	register void *__sp asm(_ASM_SP);
	asm("call bar" : "+r" (__sp))
  }

Unfortunately, that pattern causes Clang to corrupt the stack pointer.

The fix is easy: convert the stack pointer register variable to a global
variable.

It should be noted that the end result is different based on the GCC
version.  With GCC 6.4, this patch has exactly the same result as
before:

	defconfig	defconfig-nofp	distro		distro-nofp
 before	9820389		9491555		8816046		8516940
 after	9820389		9491555		8816046		8516940

With GCC 7.2, however, GCC's behavior has changed.  It now changes its
behavior based on the conversion of the register variable to a global.
That somehow convinces it to *always* set up the frame pointer before
inserting *any* inline asm.  (Therefore, listing the variable as an
output constraint is a no-op and is no longer necessary.)  It's a bit
overkill, but the performance impact should be negligible.  And in fact,
there's a nice improvement with frame pointers disabled:

	defconfig	defconfig-nofp	distro		distro-nofp
 before	9796316		9468236		9076191		8790305
 after	9796957		9464267		9076381		8785949

So in summary, while listing the stack pointer as an output constraint
is no longer necessary for newer versions of GCC, it's still needed for
older versions.

Suggested-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Reported-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/3db862e970c432ae823cf515c52b54fec8270e0e.1505942196.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/alternative.h               |  3 +--
 arch/x86/include/asm/asm.h                       | 11 +++++++++++
 arch/x86/include/asm/mshyperv.h                  | 10 ++++------
 arch/x86/include/asm/paravirt_types.h            | 14 +++++++-------
 arch/x86/include/asm/preempt.h                   | 15 +++++----------
 arch/x86/include/asm/processor.h                 |  6 ++----
 arch/x86/include/asm/rwsem.h                     |  4 ++--
 arch/x86/include/asm/uaccess.h                   |  4 ++--
 arch/x86/include/asm/xen/hypercall.h             |  5 ++---
 arch/x86/kvm/emulate.c                           |  3 +--
 arch/x86/kvm/vmx.c                               |  3 +--
 arch/x86/mm/fault.c                              |  3 +--
 tools/objtool/Documentation/stack-validation.txt |  6 +++---
 13 files changed, 42 insertions(+), 45 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 1b020381ab38..c096624137ae 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -218,10 +218,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
 #define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2,   \
 			   output, input...)				      \
 {									      \
-	register void *__sp asm(_ASM_SP);				      \
 	asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
 		"call %P[new2]", feature2)				      \
-		: output, "+r" (__sp)					      \
+		: output, ASM_CALL_CONSTRAINT				      \
 		: [old] "i" (oldfunc), [new1] "i" (newfunc1),		      \
 		  [new2] "i" (newfunc2), ## input);			      \
 }
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 676ee5807d86..c1eadbaf1115 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -132,4 +132,15 @@
 /* For C file, we already have NOKPROBE_SYMBOL macro */
 #endif
 
+#ifndef __ASSEMBLY__
+/*
+ * This output constraint should be used for any inline asm which has a "call"
+ * instruction.  Otherwise the asm may be inserted before the frame pointer
+ * gets set up by the containing function.  If you forget to do this, objtool
+ * may print a "call without frame pointer save/setup" warning.
+ */
+register unsigned int __asm_call_sp asm("esp");
+#define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp)
+#endif
+
 #endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 63cc96f064dc..738503e1f80c 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -179,7 +179,6 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 	u64 input_address = input ? virt_to_phys(input) : 0;
 	u64 output_address = output ? virt_to_phys(output) : 0;
 	u64 hv_status;
-	register void *__sp asm(_ASM_SP);
 
 #ifdef CONFIG_X86_64
 	if (!hv_hypercall_pg)
@@ -187,7 +186,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 
 	__asm__ __volatile__("mov %4, %%r8\n"
 			     "call *%5"
-			     : "=a" (hv_status), "+r" (__sp),
+			     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
 			       "+c" (control), "+d" (input_address)
 			     :  "r" (output_address), "m" (hv_hypercall_pg)
 			     : "cc", "memory", "r8", "r9", "r10", "r11");
@@ -202,7 +201,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 
 	__asm__ __volatile__("call *%7"
 			     : "=A" (hv_status),
-			       "+c" (input_address_lo), "+r" (__sp)
+			       "+c" (input_address_lo), ASM_CALL_CONSTRAINT
 			     : "A" (control),
 			       "b" (input_address_hi),
 			       "D"(output_address_hi), "S"(output_address_lo),
@@ -224,12 +223,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
 {
 	u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
-	register void *__sp asm(_ASM_SP);
 
 #ifdef CONFIG_X86_64
 	{
 		__asm__ __volatile__("call *%4"
-				     : "=a" (hv_status), "+r" (__sp),
+				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
 				       "+c" (control), "+d" (input1)
 				     : "m" (hv_hypercall_pg)
 				     : "cc", "r8", "r9", "r10", "r11");
@@ -242,7 +240,7 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
 		__asm__ __volatile__ ("call *%5"
 				      : "=A"(hv_status),
 					"+c"(input1_lo),
-					"+r"(__sp)
+					ASM_CALL_CONSTRAINT
 				      :	"A" (control),
 					"b" (input1_hi),
 					"m" (hv_hypercall_pg)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 42873edd9f9d..280d94c36dad 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -459,8 +459,8 @@ int paravirt_disable_iospace(void);
  */
 #ifdef CONFIG_X86_32
 #define PVOP_VCALL_ARGS							\
-	unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;	\
-	register void *__sp asm("esp")
+	unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;
+
 #define PVOP_CALL_ARGS			PVOP_VCALL_ARGS
 
 #define PVOP_CALL_ARG1(x)		"a" ((unsigned long)(x))
@@ -480,8 +480,8 @@ int paravirt_disable_iospace(void);
 /* [re]ax isn't an arg, but the return val */
 #define PVOP_VCALL_ARGS						\
 	unsigned long __edi = __edi, __esi = __esi,		\
-		__edx = __edx, __ecx = __ecx, __eax = __eax;	\
-	register void *__sp asm("rsp")
+		__edx = __edx, __ecx = __ecx, __eax = __eax;
+
 #define PVOP_CALL_ARGS		PVOP_VCALL_ARGS
 
 #define PVOP_CALL_ARG1(x)		"D" ((unsigned long)(x))
@@ -532,7 +532,7 @@ int paravirt_disable_iospace(void);
 			asm volatile(pre				\
 				     paravirt_alt(PARAVIRT_CALL)	\
 				     post				\
-				     : call_clbr, "+r" (__sp)		\
+				     : call_clbr, ASM_CALL_CONSTRAINT	\
 				     : paravirt_type(op),		\
 				       paravirt_clobber(clbr),		\
 				       ##__VA_ARGS__			\
@@ -542,7 +542,7 @@ int paravirt_disable_iospace(void);
 			asm volatile(pre				\
 				     paravirt_alt(PARAVIRT_CALL)	\
 				     post				\
-				     : call_clbr, "+r" (__sp)		\
+				     : call_clbr, ASM_CALL_CONSTRAINT	\
 				     : paravirt_type(op),		\
 				       paravirt_clobber(clbr),		\
 				       ##__VA_ARGS__			\
@@ -569,7 +569,7 @@ int paravirt_disable_iospace(void);
 		asm volatile(pre					\
 			     paravirt_alt(PARAVIRT_CALL)		\
 			     post					\
-			     : call_clbr, "+r" (__sp)			\
+			     : call_clbr, ASM_CALL_CONSTRAINT		\
 			     : paravirt_type(op),			\
 			       paravirt_clobber(clbr),			\
 			       ##__VA_ARGS__				\
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index ec1f3c651150..4f44505dbf87 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -100,19 +100,14 @@ static __always_inline bool should_resched(int preempt_offset)
 
 #ifdef CONFIG_PREEMPT
   extern asmlinkage void ___preempt_schedule(void);
-# define __preempt_schedule()					\
-({								\
-	register void *__sp asm(_ASM_SP);			\
-	asm volatile ("call ___preempt_schedule" : "+r"(__sp));	\
-})
+# define __preempt_schedule() \
+	asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT)
 
   extern asmlinkage void preempt_schedule(void);
   extern asmlinkage void ___preempt_schedule_notrace(void);
-# define __preempt_schedule_notrace()					\
-({									\
-	register void *__sp asm(_ASM_SP);				\
-	asm volatile ("call ___preempt_schedule_notrace" : "+r"(__sp));	\
-})
+# define __preempt_schedule_notrace() \
+	asm volatile ("call ___preempt_schedule_notrace" : ASM_CALL_CONSTRAINT)
+
   extern asmlinkage void preempt_schedule_notrace(void);
 #endif
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3fa26a61eabc..b390ff76e58f 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -677,8 +677,6 @@ static inline void sync_core(void)
 	 * Like all of Linux's memory ordering operations, this is a
 	 * compiler barrier as well.
 	 */
-	register void *__sp asm(_ASM_SP);
-
 #ifdef CONFIG_X86_32
 	asm volatile (
 		"pushfl\n\t"
@@ -686,7 +684,7 @@ static inline void sync_core(void)
 		"pushl $1f\n\t"
 		"iret\n\t"
 		"1:"
-		: "+r" (__sp) : : "memory");
+		: ASM_CALL_CONSTRAINT : : "memory");
 #else
 	unsigned int tmp;
 
@@ -703,7 +701,7 @@ static inline void sync_core(void)
 		"iretq\n\t"
 		UNWIND_HINT_RESTORE
 		"1:"
-		: "=&r" (tmp), "+r" (__sp) : : "cc", "memory");
+		: "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
 #endif
 }
 
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index a34e0d4b957d..7116b7931c7b 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -103,7 +103,6 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem)
 ({							\
 	long tmp;					\
 	struct rw_semaphore* ret;			\
-	register void *__sp asm(_ASM_SP);		\
 							\
 	asm volatile("# beginning down_write\n\t"	\
 		     LOCK_PREFIX "  xadd      %1,(%4)\n\t"	\
@@ -114,7 +113,8 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem)
 		     "  call " slow_path "\n"		\
 		     "1:\n"				\
 		     "# ending down_write"		\
-		     : "+m" (sem->count), "=d" (tmp), "=a" (ret), "+r" (__sp) \
+		     : "+m" (sem->count), "=d" (tmp),	\
+		       "=a" (ret), ASM_CALL_CONSTRAINT	\
 		     : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \
 		     : "memory", "cc");			\
 	ret;						\
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 184eb9894dae..78e8fcc87d4c 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -166,11 +166,11 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
 ({									\
 	int __ret_gu;							\
 	register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX);		\
-	register void *__sp asm(_ASM_SP);				\
 	__chk_user_ptr(ptr);						\
 	might_fault();							\
 	asm volatile("call __get_user_%P4"				\
-		     : "=a" (__ret_gu), "=r" (__val_gu), "+r" (__sp)	\
+		     : "=a" (__ret_gu), "=r" (__val_gu),		\
+			ASM_CALL_CONSTRAINT				\
 		     : "0" (ptr), "i" (sizeof(*(ptr))));		\
 	(x) = (__force __typeof__(*(ptr))) __val_gu;			\
 	__builtin_expect(__ret_gu, 0);					\
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 9606688caa4b..128a1a0b1450 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -113,10 +113,9 @@ extern struct { char _entry[32]; } hypercall_page[];
 	register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \
 	register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \
 	register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \
-	register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; \
-	register void *__sp asm(_ASM_SP);
+	register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5;
 
-#define __HYPERCALL_0PARAM	"=r" (__res), "+r" (__sp)
+#define __HYPERCALL_0PARAM	"=r" (__res), ASM_CALL_CONSTRAINT
 #define __HYPERCALL_1PARAM	__HYPERCALL_0PARAM, "+r" (__arg1)
 #define __HYPERCALL_2PARAM	__HYPERCALL_1PARAM, "+r" (__arg2)
 #define __HYPERCALL_3PARAM	__HYPERCALL_2PARAM, "+r" (__arg3)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 16bf6655aa85..f23f13403f33 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -5296,7 +5296,6 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
 
 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
 {
-	register void *__sp asm(_ASM_SP);
 	ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
 
 	if (!(ctxt->d & ByteOp))
@@ -5304,7 +5303,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
 
 	asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
 	    : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
-	      [fastop]"+S"(fop), "+r"(__sp)
+	      [fastop]"+S"(fop), ASM_CALL_CONSTRAINT
 	    : "c"(ctxt->src2.val));
 
 	ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 06c0c6d0541e..6ee237f509dc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9036,7 +9036,6 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
 static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 {
 	u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-	register void *__sp asm(_ASM_SP);
 
 	if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
 			== (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
@@ -9065,7 +9064,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
 #ifdef CONFIG_X86_64
 			[sp]"=&r"(tmp),
 #endif
-			"+r"(__sp)
+			ASM_CALL_CONSTRAINT
 			:
 			[entry]"r"(entry),
 			[ss]"i"(__KERNEL_DS),
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b836a7274e12..39567b5c33da 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -806,7 +806,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 	if (is_vmalloc_addr((void *)address) &&
 	    (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
 	     address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
-		register void *__sp asm("rsp");
 		unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
 		/*
 		 * We're likely to be running with very little stack space
@@ -821,7 +820,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 		asm volatile ("movq %[stack], %%rsp\n\t"
 			      "call handle_stack_overflow\n\t"
 			      "1: jmp 1b"
-			      : "+r" (__sp)
+			      : ASM_CALL_CONSTRAINT
 			      : "D" ("kernel stack overflow (page fault)"),
 				"S" (regs), "d" (address),
 				[stack] "rm" (stack));
diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt
index 6a1af43862df..3995735a878f 100644
--- a/tools/objtool/Documentation/stack-validation.txt
+++ b/tools/objtool/Documentation/stack-validation.txt
@@ -194,10 +194,10 @@ they mean, and suggestions for how to fix them.
    If it's a GCC-compiled .c file, the error may be because the function
    uses an inline asm() statement which has a "call" instruction.  An
    asm() statement with a call instruction must declare the use of the
-   stack pointer in its output operand.  For example, on x86_64:
+   stack pointer in its output operand.  On x86_64, this means adding
+   the ASM_CALL_CONSTRAINT as an output constraint:
 
-     register void *__sp asm("rsp");
-     asm volatile("call func" : "+r" (__sp));
+     asm volatile("call func" : ASM_CALL_CONSTRAINT);
 
    Otherwise the stack frame may not get created before the call.
 

From 28585a832602747cbfa88ad8934013177a3aae38 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 22 Sep 2017 14:10:22 -0700
Subject: [PATCH 294/391] rcu: Allow for page faults in NMI handlers

A number of architecture invoke rcu_irq_enter() on exception entry in
order to allow RCU read-side critical sections in the exception handler
when the exception is from an idle or nohz_full CPU.  This works, at
least unless the exception happens in an NMI handler.  In that case,
rcu_nmi_enter() would already have exited the extended quiescent state,
which would mean that rcu_irq_enter() would (incorrectly) cause RCU
to think that it is again in an extended quiescent state.  This will
in turn result in lockdep splats in response to later RCU read-side
critical sections.

This commit therefore causes rcu_irq_enter() and rcu_irq_exit() to
take no action if there is an rcu_nmi_enter() in effect, thus avoiding
the unscheduled return to RCU quiescent state.  This in turn should
make the kernel safe for on-demand RCU voyeurism.

Link: http://lkml.kernel.org/r/20170922211022.GA18084@linux.vnet.ibm.com

Cc: stable@vger.kernel.org
Fixes: 0be964be0 ("module: Sanitize RCU usage and locking")
Reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/rcu/tree.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 51d4c3acf32d..63bee8e1b193 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -888,6 +888,11 @@ void rcu_irq_exit(void)
 
 	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_exit() invoked with irqs enabled!!!");
 	rdtp = this_cpu_ptr(&rcu_dynticks);
+
+	/* Page faults can happen in NMI handlers, so check... */
+	if (READ_ONCE(rdtp->dynticks_nmi_nesting))
+		return;
+
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
 		     rdtp->dynticks_nesting < 1);
 	if (rdtp->dynticks_nesting <= 1) {
@@ -1020,6 +1025,11 @@ void rcu_irq_enter(void)
 
 	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_enter() invoked with irqs enabled!!!");
 	rdtp = this_cpu_ptr(&rcu_dynticks);
+
+	/* Page faults can happen in NMI handlers, so check... */
+	if (READ_ONCE(rdtp->dynticks_nmi_nesting))
+		return;
+
 	oldval = rdtp->dynticks_nesting;
 	rdtp->dynticks_nesting++;
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&

From 9aadde91b3c035413c806619beb3e3ef6e697953 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Fri, 22 Sep 2017 17:22:19 -0400
Subject: [PATCH 295/391] extable: Consolidate *kernel_text_address() functions

The functionality between kernel_text_address() and _kernel_text_address()
is the same except that _kernel_text_address() does a little more (that
function needs a rename, but that can be done another time). Instead of
having duplicate code in both, simply have _kernel_text_address() calls
kernel_text_address() instead.

This is marked for stable because there's an RCU bug that can happen if
one of these functions gets called while RCU is not watching. That fix
depends on this fix to keep from having to write the fix twice.

Cc: stable@vger.kernel.org
Fixes: 0be964be0 ("module: Sanitize RCU usage and locking")
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/extable.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/kernel/extable.c b/kernel/extable.c
index 38c2412401a1..a7024a494faf 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -102,15 +102,7 @@ int core_kernel_data(unsigned long addr)
 
 int __kernel_text_address(unsigned long addr)
 {
-	if (core_kernel_text(addr))
-		return 1;
-	if (is_module_text_address(addr))
-		return 1;
-	if (is_ftrace_trampoline(addr))
-		return 1;
-	if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr))
-		return 1;
-	if (is_bpf_text_address(addr))
+	if (kernel_text_address(addr))
 		return 1;
 	/*
 	 * There might be init symbols in saved stacktraces.

From e8cac8b1d10589be45671a5ade0926a639b543b7 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Fri, 22 Sep 2017 17:36:32 -0400
Subject: [PATCH 296/391] extable: Enable RCU if it is not watching in
 kernel_text_address()

If kernel_text_address() is called when RCU is not watching, it can cause an
RCU bug because is_module_text_address(), the is_kprobe_*insn_slot()
and is_bpf_text_address() functions require the use of RCU.

Only enable RCU if it is not currently watching before it calls
is_module_text_address(). The use of rcu_nmi_enter() is used to enable RCU
because kernel_text_address() can happen pretty much anywhere (like an NMI),
and even from within an NMI. It is called via save_stack_trace() that can be
called by any WARN() or tracing function, which can happen while RCU is not
watching (for example, going to or coming from idle, or during CPU take down
or bring up).

Cc: stable@vger.kernel.org
Fixes: 0be964be0 ("module: Sanitize RCU usage and locking")
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/extable.c | 35 ++++++++++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/kernel/extable.c b/kernel/extable.c
index a7024a494faf..9aa1cc41ecf7 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -119,17 +119,42 @@ int __kernel_text_address(unsigned long addr)
 
 int kernel_text_address(unsigned long addr)
 {
+	bool no_rcu;
+	int ret = 1;
+
 	if (core_kernel_text(addr))
 		return 1;
+
+	/*
+	 * If a stack dump happens while RCU is not watching, then
+	 * RCU needs to be notified that it requires to start
+	 * watching again. This can happen either by tracing that
+	 * triggers a stack trace, or a WARN() that happens during
+	 * coming back from idle, or cpu on or offlining.
+	 *
+	 * is_module_text_address() as well as the kprobe slots
+	 * and is_bpf_text_address() require RCU to be watching.
+	 */
+	no_rcu = !rcu_is_watching();
+
+	/* Treat this like an NMI as it can happen anywhere */
+	if (no_rcu)
+		rcu_nmi_enter();
+
 	if (is_module_text_address(addr))
-		return 1;
+		goto out;
 	if (is_ftrace_trampoline(addr))
-		return 1;
+		goto out;
 	if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr))
-		return 1;
+		goto out;
 	if (is_bpf_text_address(addr))
-		return 1;
-	return 0;
+		goto out;
+	ret = 0;
+out:
+	if (no_rcu)
+		rcu_nmi_exit();
+
+	return ret;
 }
 
 /*

From 15516c89acce948debc4c598e03c3fee53045797 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 21 Sep 2017 13:00:21 -0400
Subject: [PATCH 297/391] tracing: Remove RCU work arounds from stack tracer

Currently the stack tracer calls rcu_irq_enter() to make sure RCU
is watching when it records a stack trace. But if the stack tracer
is triggered while tracing inside of a rcu_irq_enter(), calling
rcu_irq_enter() unconditionally can be problematic.

The reason for having rcu_irq_enter() in the first place has been
fixed from within the saving of the stack trace code, and there's no
reason for doing it in the stack tracer itself. Just remove it.

Cc: stable@vger.kernel.org
Fixes: 0be964be0 ("module: Sanitize RCU usage and locking")
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Suggested-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_stack.c | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index a4df67cbc711..49cb41412eec 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -96,23 +96,9 @@ check_stack(unsigned long ip, unsigned long *stack)
 	if (in_nmi())
 		return;
 
-	/*
-	 * There's a slight chance that we are tracing inside the
-	 * RCU infrastructure, and rcu_irq_enter() will not work
-	 * as expected.
-	 */
-	if (unlikely(rcu_irq_enter_disabled()))
-		return;
-
 	local_irq_save(flags);
 	arch_spin_lock(&stack_trace_max_lock);
 
-	/*
-	 * RCU may not be watching, make it see us.
-	 * The stack trace code uses rcu_sched.
-	 */
-	rcu_irq_enter();
-
 	/* In case another CPU set the tracer_frame on us */
 	if (unlikely(!frame_size))
 		this_size -= tracer_frame;
@@ -205,7 +191,6 @@ check_stack(unsigned long ip, unsigned long *stack)
 	}
 
  out:
-	rcu_irq_exit();
 	arch_spin_unlock(&stack_trace_max_lock);
 	local_irq_restore(flags);
 }

From ab5348c9c23cd253f5902980d2d8fe067dc24c82 Mon Sep 17 00:00:00 2001
From: Stefan Berger <stefanb@linux.vnet.ibm.com>
Date: Wed, 26 Jul 2017 22:27:05 -0400
Subject: [PATCH 298/391] security: fix description of values returned by
 cap_inode_need_killpriv

cap_inode_need_killpriv returns 1 if security.capability exists and
has a value and inode_killpriv() is required, 0 otherwise. Fix the
description of the return value to reflect this.

Signed-off-by: Stefan Berger <stefanb@linux.vnet.ibm.com>
Reviewed-by: Serge Hallyn <serge@hallyn.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 security/commoncap.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/security/commoncap.c b/security/commoncap.c
index 7abebd782d5e..123426900d25 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -300,10 +300,10 @@ static inline void bprm_clear_caps(struct linux_binprm *bprm)
  *
  * Determine if an inode having a change applied that's marked ATTR_KILL_PRIV
  * affects the security markings on that inode, and if it is, should
- * inode_killpriv() be invoked or the change rejected?
+ * inode_killpriv() be invoked or the change rejected.
  *
- * Returns 0 if granted; +ve if granted, but inode_killpriv() is required; and
- * -ve to deny the change.
+ * Returns 1 if security.capability has a value, meaning inode_killpriv()
+ * is required, 0 otherwise, meaning inode_killpriv() is not required.
  */
 int cap_inode_need_killpriv(struct dentry *dentry)
 {

From c2a9c4bf034ab7415ec272c4c86f1c0b796f80e6 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Thu, 17 Aug 2017 23:04:21 +0530
Subject: [PATCH 299/391] tpm: vtpm: constify vio_device_id

vio_device_id are not supposed to change at runtime. All functions
working with vio_device_id provided by <asm/vio.h> work with
const vio_device_id. So mark the non-const structs as const.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Reviewed-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 drivers/char/tpm/tpm_ibmvtpm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c
index f01d083eced2..d2ce46bd2471 100644
--- a/drivers/char/tpm/tpm_ibmvtpm.c
+++ b/drivers/char/tpm/tpm_ibmvtpm.c
@@ -32,7 +32,7 @@
 
 static const char tpm_ibmvtpm_driver_name[] = "tpm_ibmvtpm";
 
-static struct vio_device_id tpm_ibmvtpm_device_table[] = {
+static const struct vio_device_id tpm_ibmvtpm_device_table[] = {
 	{ "IBM,vtpm", "IBM,vtpm"},
 	{ "", "" }
 };

From e1ec650f9ae7d1e66f77bf14c86e4e9dc629a4d5 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Thu, 6 Jul 2017 23:18:39 +0530
Subject: [PATCH 300/391] tpm: tpm_crb: constify acpi_device_id.

acpi_device_id are not supposed to change at runtime. All functions
working with acpi_device_id provided by <acpi/acpi_bus.h> work with
const acpi_device_id. So mark the non-const structs as const.

File size before:
   text	   data	    bss	    dec	    hex	filename
   4198	    608	      0	   4806	   12c6	drivers/char/tpm/tpm_crb.o

File size After adding 'const':
   text	   data	    bss	    dec	    hex	filename
   4262	    520	      0	   4782	   12ae	drivers/char/tpm/tpm_crb.o

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Tested-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 drivers/char/tpm/tpm_crb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c
index a4ac63a21d8a..8f0a98dea327 100644
--- a/drivers/char/tpm/tpm_crb.c
+++ b/drivers/char/tpm/tpm_crb.c
@@ -665,7 +665,7 @@ static const struct dev_pm_ops crb_pm = {
 	SET_RUNTIME_PM_OPS(crb_pm_runtime_suspend, crb_pm_runtime_resume, NULL)
 };
 
-static struct acpi_device_id crb_device_ids[] = {
+static const struct acpi_device_id crb_device_ids[] = {
 	{"MSFT0101", 0},
 	{"", 0},
 };

From 5d0e4d78149b9745ea34848e950e734f7db3b95f Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Tue, 27 Jun 2017 12:27:23 +0200
Subject: [PATCH 301/391] Documentation: tpm: add powered-while-suspended
 binding documentation

Add a new powered-while-suspended property to control the behavior of the
TPM suspend/resume.

Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Signed-off-by: Sonny Rao <sonnyrao@chromium.org>
Reviewed-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt b/Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt
index 8cb638b7e89c..85c8216fc335 100644
--- a/Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt
+++ b/Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt
@@ -8,6 +8,12 @@ Required properties:
                    the firmware event log
 - linux,sml-size : size of the memory allocated for the firmware event log
 
+Optional properties:
+
+- powered-while-suspended: present when the TPM is left powered on between
+                           suspend and resume (makes the suspend/resume
+                           callbacks do nothing).
+
 Example (for OpenPower Systems with Nuvoton TPM 2.0 on I2C)
 ----------------------------------------------------------
 

From 9f3fc7bcddcb51234e23494531f93ab60475e1c3 Mon Sep 17 00:00:00 2001
From: Hamza Attak <hamza@hpe.com>
Date: Mon, 14 Aug 2017 19:09:16 +0100
Subject: [PATCH 302/391] tpm: replace msleep() with  usleep_range() in TPM
 1.2/2.0 generic drivers

The patch simply replaces all msleep function calls with usleep_range calls
in the generic drivers.

Tested with an Infineon TPM 1.2, using the generic tpm-tis module, for a
thousand PCR extends, we see results going from 1m57s unpatched to 40s
with the new patch. We obtain similar results when using the original and
patched tpm_infineon driver, which is also part of the patch.
Similarly with a STM TPM 2.0, using the CRB driver, it takes about 20ms per
extend unpatched and around 7ms with the new patch.

Note that the PCR consistency is untouched with this patch, each TPM has
been tested with 10 million extends and the aggregated PCR value is
continuously verified to be correct.

As an extension of this work, this could potentially and easily be applied
to other vendor's drivers. Still, these changes are not included in the
proposed patch as they are untested.

Signed-off-by: Hamza Attak <hamza@hpe.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Tested-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 drivers/char/tpm/tpm-interface.c | 10 +++++-----
 drivers/char/tpm/tpm.h           |  9 ++++++++-
 drivers/char/tpm/tpm2-cmd.c      |  2 +-
 drivers/char/tpm/tpm_infineon.c  |  6 +++---
 drivers/char/tpm/tpm_tis_core.c  |  8 ++++----
 5 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index fe597e6c55c4..1d6729be4cd6 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -455,7 +455,7 @@ ssize_t tpm_transmit(struct tpm_chip *chip, struct tpm_space *space,
 			goto out;
 		}
 
-		msleep(TPM_TIMEOUT);	/* CHECK */
+		tpm_msleep(TPM_TIMEOUT);
 		rmb();
 	} while (time_before(jiffies, stop));
 
@@ -970,7 +970,7 @@ int tpm_do_selftest(struct tpm_chip *chip)
 			dev_info(
 			    &chip->dev, HW_ERR
 			    "TPM command timed out during continue self test");
-			msleep(delay_msec);
+			tpm_msleep(delay_msec);
 			continue;
 		}
 
@@ -985,7 +985,7 @@ int tpm_do_selftest(struct tpm_chip *chip)
 		}
 		if (rc != TPM_WARN_DOING_SELFTEST)
 			return rc;
-		msleep(delay_msec);
+		tpm_msleep(delay_msec);
 	} while (--loops > 0);
 
 	return rc;
@@ -1085,7 +1085,7 @@ int wait_for_tpm_stat(struct tpm_chip *chip, u8 mask, unsigned long timeout,
 		}
 	} else {
 		do {
-			msleep(TPM_TIMEOUT);
+			tpm_msleep(TPM_TIMEOUT);
 			status = chip->ops->status(chip);
 			if ((status & mask) == mask)
 				return 0;
@@ -1150,7 +1150,7 @@ int tpm_pm_suspend(struct device *dev)
 		 */
 		if (rc != TPM_WARN_RETRY)
 			break;
-		msleep(TPM_TIMEOUT_RETRY);
+		tpm_msleep(TPM_TIMEOUT_RETRY);
 	}
 
 	if (rc)
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 04fbff2edbf3..2d5466a72e40 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -50,7 +50,8 @@ enum tpm_const {
 
 enum tpm_timeout {
 	TPM_TIMEOUT = 5,	/* msecs */
-	TPM_TIMEOUT_RETRY = 100 /* msecs */
+	TPM_TIMEOUT_RETRY = 100, /* msecs */
+	TPM_TIMEOUT_RANGE_US = 300	/* usecs */
 };
 
 /* TPM addresses */
@@ -527,6 +528,12 @@ int tpm_pm_resume(struct device *dev);
 int wait_for_tpm_stat(struct tpm_chip *chip, u8 mask, unsigned long timeout,
 		      wait_queue_head_t *queue, bool check_cancel);
 
+static inline void tpm_msleep(unsigned int delay_msec)
+{
+	usleep_range(delay_msec * 1000,
+		     (delay_msec * 1000) + TPM_TIMEOUT_RANGE_US);
+};
+
 struct tpm_chip *tpm_chip_find_get(int chip_num);
 __must_check int tpm_try_get_ops(struct tpm_chip *chip);
 void tpm_put_ops(struct tpm_chip *chip);
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index f7f34b2aa981..e1a41b788f08 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -899,7 +899,7 @@ static int tpm2_do_selftest(struct tpm_chip *chip)
 		if (rc != TPM2_RC_TESTING)
 			break;
 
-		msleep(delay_msec);
+		tpm_msleep(delay_msec);
 	}
 
 	return rc;
diff --git a/drivers/char/tpm/tpm_infineon.c b/drivers/char/tpm/tpm_infineon.c
index 3b1b9f9322d5..d8f10047fbba 100644
--- a/drivers/char/tpm/tpm_infineon.c
+++ b/drivers/char/tpm/tpm_infineon.c
@@ -191,7 +191,7 @@ static int wait(struct tpm_chip *chip, int wait_for_bit)
 		/* check the status-register if wait_for_bit is set */
 		if (status & 1 << wait_for_bit)
 			break;
-		msleep(TPM_MSLEEP_TIME);
+		tpm_msleep(TPM_MSLEEP_TIME);
 	}
 	if (i == TPM_MAX_TRIES) {	/* timeout occurs */
 		if (wait_for_bit == STAT_XFE)
@@ -226,7 +226,7 @@ static void tpm_wtx(struct tpm_chip *chip)
 	wait_and_send(chip, TPM_CTRL_WTX);
 	wait_and_send(chip, 0x00);
 	wait_and_send(chip, 0x00);
-	msleep(TPM_WTX_MSLEEP_TIME);
+	tpm_msleep(TPM_WTX_MSLEEP_TIME);
 }
 
 static void tpm_wtx_abort(struct tpm_chip *chip)
@@ -237,7 +237,7 @@ static void tpm_wtx_abort(struct tpm_chip *chip)
 	wait_and_send(chip, 0x00);
 	wait_and_send(chip, 0x00);
 	number_of_wtx = 0;
-	msleep(TPM_WTX_MSLEEP_TIME);
+	tpm_msleep(TPM_WTX_MSLEEP_TIME);
 }
 
 static int tpm_inf_recv(struct tpm_chip *chip, u8 * buf, size_t count)
diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index b617b2eeb080..63bc6c3b949e 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -51,7 +51,7 @@ static int wait_startup(struct tpm_chip *chip, int l)
 
 		if (access & TPM_ACCESS_VALID)
 			return 0;
-		msleep(TPM_TIMEOUT);
+		tpm_msleep(TPM_TIMEOUT);
 	} while (time_before(jiffies, stop));
 	return -1;
 }
@@ -117,7 +117,7 @@ static int request_locality(struct tpm_chip *chip, int l)
 		do {
 			if (check_locality(chip, l))
 				return l;
-			msleep(TPM_TIMEOUT);
+			tpm_msleep(TPM_TIMEOUT);
 		} while (time_before(jiffies, stop));
 	}
 	return -1;
@@ -164,7 +164,7 @@ static int get_burstcount(struct tpm_chip *chip)
 		burstcnt = (value >> 8) & 0xFFFF;
 		if (burstcnt)
 			return burstcnt;
-		msleep(TPM_TIMEOUT);
+		tpm_msleep(TPM_TIMEOUT);
 	} while (time_before(jiffies, stop));
 	return -EBUSY;
 }
@@ -396,7 +396,7 @@ static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len)
 	priv->irq = irq;
 	chip->flags |= TPM_CHIP_FLAG_IRQ;
 	if (!priv->irq_tested)
-		msleep(1);
+		tpm_msleep(1);
 	if (!priv->irq_tested)
 		disable_interrupts(chip);
 	priv->irq_tested = true;

From fb154e0e0a95249459df054241a9e8f4cca56062 Mon Sep 17 00:00:00 2001
From: Michal Suchanek <msuchanek@suse.de>
Date: Fri, 24 Feb 2017 20:35:16 +0100
Subject: [PATCH 303/391] tpm: ibmvtpm: simplify crq initialization and
 document crq format

The crq is passed in registers and is the same on BE and LE hosts.
However, current implementation allocates a structure on-stack to
represent the crq, initializes the members swapping them to BE, and
loads the structure swapping it from BE. This is pointless and causes
GCC warnings about ununitialized members. Get rid of the structure and
the warnings.

Signed-off-by: Michal Suchanek <msuchanek@suse.de>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 drivers/char/tpm/tpm_ibmvtpm.c | 96 +++++++++++++++++++++-------------
 1 file changed, 60 insertions(+), 36 deletions(-)

diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c
index d2ce46bd2471..25f6e2665385 100644
--- a/drivers/char/tpm/tpm_ibmvtpm.c
+++ b/drivers/char/tpm/tpm_ibmvtpm.c
@@ -39,19 +39,63 @@ static const struct vio_device_id tpm_ibmvtpm_device_table[] = {
 MODULE_DEVICE_TABLE(vio, tpm_ibmvtpm_device_table);
 
 /**
+ *
+ * ibmvtpm_send_crq_word - Send a CRQ request
+ * @vdev:	vio device struct
+ * @w1:		pre-constructed first word of tpm crq (second word is reserved)
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvtpm_send_crq_word(struct vio_dev *vdev, u64 w1)
+{
+	return plpar_hcall_norets(H_SEND_CRQ, vdev->unit_address, w1, 0);
+}
+
+/**
+ *
  * ibmvtpm_send_crq - Send a CRQ request
  *
  * @vdev:	vio device struct
- * @w1:		first word
- * @w2:		second word
+ * @valid:	Valid field
+ * @msg:	Type field
+ * @len:	Length field
+ * @data:	Data field
+ *
+ * The ibmvtpm crq is defined as follows:
+ *
+ * Byte  |   0   |   1   |   2   |   3   |   4   |   5   |   6   |   7
+ * -----------------------------------------------------------------------
+ * Word0 | Valid | Type  |     Length    |              Data
+ * -----------------------------------------------------------------------
+ * Word1 |                Reserved
+ * -----------------------------------------------------------------------
+ *
+ * Which matches the following structure (on bigendian host):
+ *
+ * struct ibmvtpm_crq {
+ *         u8 valid;
+ *         u8 msg;
+ *         __be16 len;
+ *         __be32 data;
+ *         __be64 reserved;
+ * } __attribute__((packed, aligned(8)));
+ *
+ * However, the value is passed in a register so just compute the numeric value
+ * to load into the register avoiding byteswap altogether. Endian only affects
+ * memory loads and stores - registers are internally represented the same.
  *
  * Return:
- *	0 -Sucess
+ *	0 (H_SUCCESS) - Success
  *	Non-zero - Failure
  */
-static int ibmvtpm_send_crq(struct vio_dev *vdev, u64 w1, u64 w2)
+static int ibmvtpm_send_crq(struct vio_dev *vdev,
+		u8 valid, u8 msg, u16 len, u32 data)
 {
-	return plpar_hcall_norets(H_SEND_CRQ, vdev->unit_address, w1, w2);
+	u64 w1 = ((u64)valid << 56) | ((u64)msg << 48) | ((u64)len << 32) |
+		(u64)data;
+	return ibmvtpm_send_crq_word(vdev, w1);
 }
 
 /**
@@ -109,8 +153,6 @@ static int tpm_ibmvtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
 {
 	struct ibmvtpm_dev *ibmvtpm = dev_get_drvdata(&chip->dev);
-	struct ibmvtpm_crq crq;
-	__be64 *word = (__be64 *)&crq;
 	int rc, sig;
 
 	if (!ibmvtpm->rtce_buf) {
@@ -137,10 +179,6 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
 	spin_lock(&ibmvtpm->rtce_lock);
 	ibmvtpm->res_len = 0;
 	memcpy((void *)ibmvtpm->rtce_buf, (void *)buf, count);
-	crq.valid = (u8)IBMVTPM_VALID_CMD;
-	crq.msg = (u8)VTPM_TPM_COMMAND;
-	crq.len = cpu_to_be16(count);
-	crq.data = cpu_to_be32(ibmvtpm->rtce_dma_handle);
 
 	/*
 	 * set the processing flag before the Hcall, since we may get the
@@ -148,8 +186,9 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
 	 */
 	ibmvtpm->tpm_processing_cmd = true;
 
-	rc = ibmvtpm_send_crq(ibmvtpm->vdev, be64_to_cpu(word[0]),
-			      be64_to_cpu(word[1]));
+	rc = ibmvtpm_send_crq(ibmvtpm->vdev,
+			IBMVTPM_VALID_CMD, VTPM_TPM_COMMAND,
+			count, ibmvtpm->rtce_dma_handle);
 	if (rc != H_SUCCESS) {
 		dev_err(ibmvtpm->dev, "tpm_ibmvtpm_send failed rc=%d\n", rc);
 		rc = 0;
@@ -182,15 +221,10 @@ static u8 tpm_ibmvtpm_status(struct tpm_chip *chip)
  */
 static int ibmvtpm_crq_get_rtce_size(struct ibmvtpm_dev *ibmvtpm)
 {
-	struct ibmvtpm_crq crq;
-	u64 *buf = (u64 *) &crq;
 	int rc;
 
-	crq.valid = (u8)IBMVTPM_VALID_CMD;
-	crq.msg = (u8)VTPM_GET_RTCE_BUFFER_SIZE;
-
-	rc = ibmvtpm_send_crq(ibmvtpm->vdev, cpu_to_be64(buf[0]),
-			      cpu_to_be64(buf[1]));
+	rc = ibmvtpm_send_crq(ibmvtpm->vdev,
+			IBMVTPM_VALID_CMD, VTPM_GET_RTCE_BUFFER_SIZE, 0, 0);
 	if (rc != H_SUCCESS)
 		dev_err(ibmvtpm->dev,
 			"ibmvtpm_crq_get_rtce_size failed rc=%d\n", rc);
@@ -210,15 +244,10 @@ static int ibmvtpm_crq_get_rtce_size(struct ibmvtpm_dev *ibmvtpm)
  */
 static int ibmvtpm_crq_get_version(struct ibmvtpm_dev *ibmvtpm)
 {
-	struct ibmvtpm_crq crq;
-	u64 *buf = (u64 *) &crq;
 	int rc;
 
-	crq.valid = (u8)IBMVTPM_VALID_CMD;
-	crq.msg = (u8)VTPM_GET_VERSION;
-
-	rc = ibmvtpm_send_crq(ibmvtpm->vdev, cpu_to_be64(buf[0]),
-			      cpu_to_be64(buf[1]));
+	rc = ibmvtpm_send_crq(ibmvtpm->vdev,
+			IBMVTPM_VALID_CMD, VTPM_GET_VERSION, 0, 0);
 	if (rc != H_SUCCESS)
 		dev_err(ibmvtpm->dev,
 			"ibmvtpm_crq_get_version failed rc=%d\n", rc);
@@ -238,7 +267,7 @@ static int ibmvtpm_crq_send_init_complete(struct ibmvtpm_dev *ibmvtpm)
 {
 	int rc;
 
-	rc = ibmvtpm_send_crq(ibmvtpm->vdev, INIT_CRQ_COMP_CMD, 0);
+	rc = ibmvtpm_send_crq_word(ibmvtpm->vdev, INIT_CRQ_COMP_CMD);
 	if (rc != H_SUCCESS)
 		dev_err(ibmvtpm->dev,
 			"ibmvtpm_crq_send_init_complete failed rc=%d\n", rc);
@@ -258,7 +287,7 @@ static int ibmvtpm_crq_send_init(struct ibmvtpm_dev *ibmvtpm)
 {
 	int rc;
 
-	rc = ibmvtpm_send_crq(ibmvtpm->vdev, INIT_CRQ_CMD, 0);
+	rc = ibmvtpm_send_crq_word(ibmvtpm->vdev, INIT_CRQ_CMD);
 	if (rc != H_SUCCESS)
 		dev_err(ibmvtpm->dev,
 			"ibmvtpm_crq_send_init failed rc=%d\n", rc);
@@ -340,15 +369,10 @@ static int tpm_ibmvtpm_suspend(struct device *dev)
 {
 	struct tpm_chip *chip = dev_get_drvdata(dev);
 	struct ibmvtpm_dev *ibmvtpm = dev_get_drvdata(&chip->dev);
-	struct ibmvtpm_crq crq;
-	u64 *buf = (u64 *) &crq;
 	int rc = 0;
 
-	crq.valid = (u8)IBMVTPM_VALID_CMD;
-	crq.msg = (u8)VTPM_PREPARE_TO_SUSPEND;
-
-	rc = ibmvtpm_send_crq(ibmvtpm->vdev, cpu_to_be64(buf[0]),
-			      cpu_to_be64(buf[1]));
+	rc = ibmvtpm_send_crq(ibmvtpm->vdev,
+			IBMVTPM_VALID_CMD, VTPM_PREPARE_TO_SUSPEND, 0, 0);
 	if (rc != H_SUCCESS)
 		dev_err(ibmvtpm->dev,
 			"tpm_ibmvtpm_suspend failed rc=%d\n", rc);

From 656f083116a4799d8c0194976b8a2d66bf306538 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:44 +0200
Subject: [PATCH 304/391] x86/fpu: Rename
 copyin_to_xsaves()/copyout_from_xsaves() to
 copy_user_to_xstate()/copy_xstate_to_user()

The 'copyin/copyout' nomenclature needlessly departs from what the modern FPU code
uses, which is:

 copy_fpregs_to_fpstate()
 copy_fpstate_to_sigframe()
 copy_fregs_to_user()
 copy_fxregs_to_kernel()
 copy_fxregs_to_user()
 copy_kernel_to_fpregs()
 copy_kernel_to_fregs()
 copy_kernel_to_fxregs()
 copy_kernel_to_xregs()
 copy_user_to_fregs()
 copy_user_to_fxregs()
 copy_user_to_xregs()
 copy_xregs_to_kernel()
 copy_xregs_to_user()

I.e. according to this pattern, the following rename should be done:

  copyin_to_xsaves()    -> copy_user_to_xstate()
  copyout_from_xsaves() -> copy_xstate_to_user()

or, if we want to be pedantic, denote that that the user-space format is ptrace:

  copyin_to_xsaves()    -> copy_user_ptrace_to_xstate()
  copyout_from_xsaves() -> copy_xstate_to_user_ptrace()

But I'd suggest the shorter, non-pedantic name.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-2-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h | 4 ++--
 arch/x86/kernel/fpu/regset.c      | 4 ++--
 arch/x86/kernel/fpu/signal.c      | 2 +-
 arch/x86/kernel/fpu/xstate.c      | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 1b2799e0699a..a1baa17e9748 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -48,8 +48,8 @@ void fpu__xstate_clear_all_cpu_caps(void);
 void *get_xsave_addr(struct xregs_state *xsave, int xstate);
 const void *get_xsave_field_ptr(int xstate_field);
 int using_compacted_format(void);
-int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
+int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf,
 			void __user *ubuf, struct xregs_state *xsave);
-int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
+int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
 		     struct xregs_state *xsave);
 #endif
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index b188b16841e3..165d0545c924 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -92,7 +92,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
 	fpu__activate_fpstate_read(fpu);
 
 	if (using_compacted_format()) {
-		ret = copyout_from_xsaves(pos, count, kbuf, ubuf, xsave);
+		ret = copy_xstate_to_user(pos, count, kbuf, ubuf, xsave);
 	} else {
 		fpstate_sanitize_xstate(fpu);
 		/*
@@ -132,7 +132,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 	fpu__activate_fpstate_write(fpu);
 
 	if (boot_cpu_has(X86_FEATURE_XSAVES))
-		ret = copyin_to_xsaves(kbuf, ubuf, xsave);
+		ret = copy_user_to_xstate(kbuf, ubuf, xsave);
 	else
 		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
 
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 83c23c230b4c..b1fe9a1fc4e0 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -324,7 +324,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		fpu__drop(fpu);
 
 		if (using_compacted_format()) {
-			err = copyin_to_xsaves(NULL, buf_fx,
+			err = copy_user_to_xstate(NULL, buf_fx,
 					       &fpu->state.xsave);
 		} else {
 			err = __copy_from_user(&fpu->state.xsave,
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index c24ac1efb12d..e7bb41723eaa 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -951,7 +951,7 @@ static inline int xstate_copyout(unsigned int pos, unsigned int count,
  * zero. This is called from xstateregs_get() and there we check the CPU
  * has XSAVES.
  */
-int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
+int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf,
 			void __user *ubuf, struct xregs_state *xsave)
 {
 	unsigned int offset, size;
@@ -1023,7 +1023,7 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
  * there we check the CPU has XSAVES and a whole standard-sized buffer
  * exists.
  */
-int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
+int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
 		     struct xregs_state *xsave)
 {
 	unsigned int offset, size;

From f0d4f30a7fd299587840a028655285a87f334904 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:45 +0200
Subject: [PATCH 305/391] x86/fpu: Split copy_xstate_to_user() into
 copy_xstate_to_kernel() & copy_xstate_to_user()

copy_xstate_to_user() is a weird API - in part due to a bad API inherited
from the regset APIs.

But don't propagate that bad API choice into the FPU code - so as a first
step split the API into kernel and user buffer handling routines.

(Also split the xstate_copyout() internal helper.)

The split API is a dumb duplication that should be obviously correct, the
real splitting will be done in the next patch.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-3-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h |   4 +-
 arch/x86/kernel/fpu/regset.c      |   5 +-
 arch/x86/kernel/fpu/xstate.c      | 110 ++++++++++++++++++++++++++++--
 3 files changed, 109 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index a1baa17e9748..92dc8ca14124 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -48,8 +48,8 @@ void fpu__xstate_clear_all_cpu_caps(void);
 void *get_xsave_addr(struct xregs_state *xsave, int xstate);
 const void *get_xsave_field_ptr(int xstate_field);
 int using_compacted_format(void);
-int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf,
-			void __user *ubuf, struct xregs_state *xsave);
+int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf, struct xregs_state *xsave);
+int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf, struct xregs_state *xsave);
 int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
 		     struct xregs_state *xsave);
 #endif
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 165d0545c924..b6d12d66d04b 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -92,7 +92,10 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
 	fpu__activate_fpstate_read(fpu);
 
 	if (using_compacted_format()) {
-		ret = copy_xstate_to_user(pos, count, kbuf, ubuf, xsave);
+		if (kbuf)
+			ret = copy_xstate_to_kernel(pos, count, kbuf, ubuf, xsave);
+		else
+			ret = copy_xstate_to_user(pos, count, kbuf, ubuf, xsave);
 	} else {
 		fpstate_sanitize_xstate(fpu);
 		/*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index e7bb41723eaa..38561539cb99 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -924,10 +924,106 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
  * This is similar to user_regset_copyout(), but will not add offset to
  * the source data pointer or increment pos, count, kbuf, and ubuf.
  */
-static inline int xstate_copyout(unsigned int pos, unsigned int count,
-				 void *kbuf, void __user *ubuf,
-				 const void *data, const int start_pos,
-				 const int end_pos)
+static inline int
+__copy_xstate_to_kernel(unsigned int pos, unsigned int count,
+			void *kbuf, void __user *ubuf,
+			const void *data, const int start_pos,
+			const int end_pos)
+{
+	if ((count == 0) || (pos < start_pos))
+		return 0;
+
+	if (end_pos < 0 || pos < end_pos) {
+		unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos));
+
+		if (kbuf) {
+			memcpy(kbuf + pos, data, copy);
+		} else {
+			if (__copy_to_user(ubuf + pos, data, copy))
+				return -EFAULT;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Convert from kernel XSAVES compacted format to standard format and copy
+ * to a kernel-space ptrace buffer.
+ *
+ * It supports partial copy but pos always starts from zero. This is called
+ * from xstateregs_get() and there we check the CPU has XSAVES.
+ */
+int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf,
+			void __user *ubuf, struct xregs_state *xsave)
+{
+	unsigned int offset, size;
+	int ret, i;
+	struct xstate_header header;
+
+	/*
+	 * Currently copy_regset_to_user() starts from pos 0:
+	 */
+	if (unlikely(pos != 0))
+		return -EFAULT;
+
+	/*
+	 * The destination is a ptrace buffer; we put in only user xstates:
+	 */
+	memset(&header, 0, sizeof(header));
+	header.xfeatures = xsave->header.xfeatures;
+	header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
+
+	/*
+	 * Copy xregs_state->header:
+	 */
+	offset = offsetof(struct xregs_state, header);
+	size = sizeof(header);
+
+	ret = __copy_xstate_to_kernel(offset, size, kbuf, ubuf, &header, 0, count);
+
+	if (ret)
+		return ret;
+
+	for (i = 0; i < XFEATURE_MAX; i++) {
+		/*
+		 * Copy only in-use xstates:
+		 */
+		if ((header.xfeatures >> i) & 1) {
+			void *src = __raw_xsave_addr(xsave, 1 << i);
+
+			offset = xstate_offsets[i];
+			size = xstate_sizes[i];
+
+			ret = __copy_xstate_to_kernel(offset, size, kbuf, ubuf, src, 0, count);
+
+			if (ret)
+				return ret;
+
+			if (offset + size >= count)
+				break;
+		}
+
+	}
+
+	/*
+	 * Fill xsave->i387.sw_reserved value for ptrace frame:
+	 */
+	offset = offsetof(struct fxregs_state, sw_reserved);
+	size = sizeof(xstate_fx_sw_bytes);
+
+	ret = __copy_xstate_to_kernel(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count);
+
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static inline int
+__copy_xstate_to_user(unsigned int pos, unsigned int count,
+		      void *kbuf, void __user *ubuf,
+		      const void *data, const int start_pos,
+		      const int end_pos)
 {
 	if ((count == 0) || (pos < start_pos))
 		return 0;
@@ -977,7 +1073,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf,
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(header);
 
-	ret = xstate_copyout(offset, size, kbuf, ubuf, &header, 0, count);
+	ret = __copy_xstate_to_user(offset, size, kbuf, ubuf, &header, 0, count);
 
 	if (ret)
 		return ret;
@@ -992,7 +1088,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf,
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			ret = xstate_copyout(offset, size, kbuf, ubuf, src, 0, count);
+			ret = __copy_xstate_to_user(offset, size, kbuf, ubuf, src, 0, count);
 
 			if (ret)
 				return ret;
@@ -1009,7 +1105,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf,
 	offset = offsetof(struct fxregs_state, sw_reserved);
 	size = sizeof(xstate_fx_sw_bytes);
 
-	ret = xstate_copyout(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count);
+	ret = __copy_xstate_to_user(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count);
 
 	if (ret)
 		return ret;

From 4d981cf2d96f29cdfa7d4972c8b377fe7baa9c4c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:46 +0200
Subject: [PATCH 306/391] x86/fpu: Remove 'ubuf' parameter from the
 copy_xstate_to_kernel() APIs

The 'ubuf' parameter is unused in the _kernel() side of the API, remove it.

This simplifies the code and makes it easier to think about.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-4-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h |  2 +-
 arch/x86/kernel/fpu/regset.c      |  2 +-
 arch/x86/kernel/fpu/xstate.c      | 21 ++++++---------------
 3 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 92dc8ca14124..c762574a245f 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -48,7 +48,7 @@ void fpu__xstate_clear_all_cpu_caps(void);
 void *get_xsave_addr(struct xregs_state *xsave, int xstate);
 const void *get_xsave_field_ptr(int xstate_field);
 int using_compacted_format(void);
-int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf, struct xregs_state *xsave);
+int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, struct xregs_state *xsave);
 int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf, struct xregs_state *xsave);
 int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
 		     struct xregs_state *xsave);
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index b6d12d66d04b..34e74adf9d5d 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -93,7 +93,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
 
 	if (using_compacted_format()) {
 		if (kbuf)
-			ret = copy_xstate_to_kernel(pos, count, kbuf, ubuf, xsave);
+			ret = copy_xstate_to_kernel(pos, count, kbuf, xsave);
 		else
 			ret = copy_xstate_to_user(pos, count, kbuf, ubuf, xsave);
 	} else {
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 38561539cb99..71d3bda2b898 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -926,7 +926,7 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
  */
 static inline int
 __copy_xstate_to_kernel(unsigned int pos, unsigned int count,
-			void *kbuf, void __user *ubuf,
+			void *kbuf,
 			const void *data, const int start_pos,
 			const int end_pos)
 {
@@ -936,12 +936,7 @@ __copy_xstate_to_kernel(unsigned int pos, unsigned int count,
 	if (end_pos < 0 || pos < end_pos) {
 		unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos));
 
-		if (kbuf) {
-			memcpy(kbuf + pos, data, copy);
-		} else {
-			if (__copy_to_user(ubuf + pos, data, copy))
-				return -EFAULT;
-		}
+		memcpy(kbuf + pos, data, copy);
 	}
 	return 0;
 }
@@ -953,8 +948,7 @@ __copy_xstate_to_kernel(unsigned int pos, unsigned int count,
  * It supports partial copy but pos always starts from zero. This is called
  * from xstateregs_get() and there we check the CPU has XSAVES.
  */
-int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf,
-			void __user *ubuf, struct xregs_state *xsave)
+int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, struct xregs_state *xsave)
 {
 	unsigned int offset, size;
 	int ret, i;
@@ -979,8 +973,7 @@ int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf,
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(header);
 
-	ret = __copy_xstate_to_kernel(offset, size, kbuf, ubuf, &header, 0, count);
-
+	ret = __copy_xstate_to_kernel(offset, size, kbuf, &header, 0, count);
 	if (ret)
 		return ret;
 
@@ -994,8 +987,7 @@ int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf,
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			ret = __copy_xstate_to_kernel(offset, size, kbuf, ubuf, src, 0, count);
-
+			ret = __copy_xstate_to_kernel(offset, size, kbuf, src, 0, count);
 			if (ret)
 				return ret;
 
@@ -1011,8 +1003,7 @@ int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf,
 	offset = offsetof(struct fxregs_state, sw_reserved);
 	size = sizeof(xstate_fx_sw_bytes);
 
-	ret = __copy_xstate_to_kernel(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count);
-
+	ret = __copy_xstate_to_kernel(offset, size, kbuf, xstate_fx_sw_bytes, 0, count);
 	if (ret)
 		return ret;
 

From a69c158fb3e7a91220f55029bf222a4e678d16e9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:47 +0200
Subject: [PATCH 307/391] x86/fpu: Remove 'kbuf' parameter from the
 copy_xstate_to_user() APIs

The 'kbuf' parameter is unused in the _user() side of the API, remove it.

This simplifies the code and makes it easier to think about.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-5-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h |  2 +-
 arch/x86/kernel/fpu/regset.c      |  2 +-
 arch/x86/kernel/fpu/xstate.c      | 25 +++++++------------------
 3 files changed, 9 insertions(+), 20 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index c762574a245f..65bd68c30cd0 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -49,7 +49,7 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate);
 const void *get_xsave_field_ptr(int xstate_field);
 int using_compacted_format(void);
 int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, struct xregs_state *xsave);
-int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf, struct xregs_state *xsave);
+int copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, struct xregs_state *xsave);
 int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
 		     struct xregs_state *xsave);
 #endif
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 34e74adf9d5d..fd6dbdd8fde6 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -95,7 +95,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
 		if (kbuf)
 			ret = copy_xstate_to_kernel(pos, count, kbuf, xsave);
 		else
-			ret = copy_xstate_to_user(pos, count, kbuf, ubuf, xsave);
+			ret = copy_xstate_to_user(pos, count, ubuf, xsave);
 	} else {
 		fpstate_sanitize_xstate(fpu);
 		/*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 71d3bda2b898..2d8f3344875d 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1011,10 +1011,7 @@ int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, stru
 }
 
 static inline int
-__copy_xstate_to_user(unsigned int pos, unsigned int count,
-		      void *kbuf, void __user *ubuf,
-		      const void *data, const int start_pos,
-		      const int end_pos)
+__copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, const void *data, const int start_pos, const int end_pos)
 {
 	if ((count == 0) || (pos < start_pos))
 		return 0;
@@ -1022,12 +1019,8 @@ __copy_xstate_to_user(unsigned int pos, unsigned int count,
 	if (end_pos < 0 || pos < end_pos) {
 		unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos));
 
-		if (kbuf) {
-			memcpy(kbuf + pos, data, copy);
-		} else {
-			if (__copy_to_user(ubuf + pos, data, copy))
-				return -EFAULT;
-		}
+		if (__copy_to_user(ubuf + pos, data, copy))
+			return -EFAULT;
 	}
 	return 0;
 }
@@ -1038,8 +1031,7 @@ __copy_xstate_to_user(unsigned int pos, unsigned int count,
  * zero. This is called from xstateregs_get() and there we check the CPU
  * has XSAVES.
  */
-int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf,
-			void __user *ubuf, struct xregs_state *xsave)
+int copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, struct xregs_state *xsave)
 {
 	unsigned int offset, size;
 	int ret, i;
@@ -1064,8 +1056,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf,
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(header);
 
-	ret = __copy_xstate_to_user(offset, size, kbuf, ubuf, &header, 0, count);
-
+	ret = __copy_xstate_to_user(offset, size, ubuf, &header, 0, count);
 	if (ret)
 		return ret;
 
@@ -1079,8 +1070,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf,
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			ret = __copy_xstate_to_user(offset, size, kbuf, ubuf, src, 0, count);
-
+			ret = __copy_xstate_to_user(offset, size, ubuf, src, 0, count);
 			if (ret)
 				return ret;
 
@@ -1096,8 +1086,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void *kbuf,
 	offset = offsetof(struct fxregs_state, sw_reserved);
 	size = sizeof(xstate_fx_sw_bytes);
 
-	ret = __copy_xstate_to_user(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count);
-
+	ret = __copy_xstate_to_user(offset, size, ubuf, xstate_fx_sw_bytes, 0, count);
 	if (ret)
 		return ret;
 

From d7eda6c99cc75f1c41d67abf988f37a10045a370 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:48 +0200
Subject: [PATCH 308/391] x86/fpu: Clean up parameter order in the
 copy_xstate_to_*() APIs

Parameter ordering is weird:

  int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, struct xregs_state *xsave);
  int copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, struct xregs_state *xsave);

'pos' and 'count', which are attributes of the destination buffer, are listed before the destination
buffer itself ...

List them after the primary arguments instead.

This makes the code more similar to regular memcpy() variant APIs.

No change in functionality.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-6-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h |  4 ++--
 arch/x86/kernel/fpu/regset.c      |  4 ++--
 arch/x86/kernel/fpu/xstate.c      | 25 ++++++++++++-------------
 3 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 65bd68c30cd0..e4430b84939d 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -48,8 +48,8 @@ void fpu__xstate_clear_all_cpu_caps(void);
 void *get_xsave_addr(struct xregs_state *xsave, int xstate);
 const void *get_xsave_field_ptr(int xstate_field);
 int using_compacted_format(void);
-int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, struct xregs_state *xsave);
-int copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, struct xregs_state *xsave);
+int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int pos, unsigned int count);
+int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int pos, unsigned int count);
 int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
 		     struct xregs_state *xsave);
 #endif
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index fd6dbdd8fde6..ec1404194b65 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -93,9 +93,9 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
 
 	if (using_compacted_format()) {
 		if (kbuf)
-			ret = copy_xstate_to_kernel(pos, count, kbuf, xsave);
+			ret = copy_xstate_to_kernel(kbuf, xsave, pos, count);
 		else
-			ret = copy_xstate_to_user(pos, count, ubuf, xsave);
+			ret = copy_xstate_to_user(ubuf, xsave, pos, count);
 	} else {
 		fpstate_sanitize_xstate(fpu);
 		/*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 2d8f3344875d..0a299468510f 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -925,10 +925,9 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
  * the source data pointer or increment pos, count, kbuf, and ubuf.
  */
 static inline int
-__copy_xstate_to_kernel(unsigned int pos, unsigned int count,
-			void *kbuf,
-			const void *data, const int start_pos,
-			const int end_pos)
+__copy_xstate_to_kernel(void *kbuf,
+			const void *data,
+			unsigned int pos, unsigned int count, const int start_pos, const int end_pos)
 {
 	if ((count == 0) || (pos < start_pos))
 		return 0;
@@ -948,7 +947,7 @@ __copy_xstate_to_kernel(unsigned int pos, unsigned int count,
  * It supports partial copy but pos always starts from zero. This is called
  * from xstateregs_get() and there we check the CPU has XSAVES.
  */
-int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, struct xregs_state *xsave)
+int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int pos, unsigned int count)
 {
 	unsigned int offset, size;
 	int ret, i;
@@ -973,7 +972,7 @@ int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, stru
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(header);
 
-	ret = __copy_xstate_to_kernel(offset, size, kbuf, &header, 0, count);
+	ret = __copy_xstate_to_kernel(kbuf, &header, offset, size, 0, count);
 	if (ret)
 		return ret;
 
@@ -987,7 +986,7 @@ int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, stru
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			ret = __copy_xstate_to_kernel(offset, size, kbuf, src, 0, count);
+			ret = __copy_xstate_to_kernel(kbuf, src, offset, size, 0, count);
 			if (ret)
 				return ret;
 
@@ -1003,7 +1002,7 @@ int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, stru
 	offset = offsetof(struct fxregs_state, sw_reserved);
 	size = sizeof(xstate_fx_sw_bytes);
 
-	ret = __copy_xstate_to_kernel(offset, size, kbuf, xstate_fx_sw_bytes, 0, count);
+	ret = __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, 0, count);
 	if (ret)
 		return ret;
 
@@ -1011,7 +1010,7 @@ int copy_xstate_to_kernel(unsigned int pos, unsigned int count, void *kbuf, stru
 }
 
 static inline int
-__copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, const void *data, const int start_pos, const int end_pos)
+__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int pos, unsigned int count, const int start_pos, const int end_pos)
 {
 	if ((count == 0) || (pos < start_pos))
 		return 0;
@@ -1031,7 +1030,7 @@ __copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, c
  * zero. This is called from xstateregs_get() and there we check the CPU
  * has XSAVES.
  */
-int copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf, struct xregs_state *xsave)
+int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int pos, unsigned int count)
 {
 	unsigned int offset, size;
 	int ret, i;
@@ -1056,7 +1055,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf,
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(header);
 
-	ret = __copy_xstate_to_user(offset, size, ubuf, &header, 0, count);
+	ret = __copy_xstate_to_user(ubuf, &header, offset, size, 0, count);
 	if (ret)
 		return ret;
 
@@ -1070,7 +1069,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf,
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			ret = __copy_xstate_to_user(offset, size, ubuf, src, 0, count);
+			ret = __copy_xstate_to_user(ubuf, src, offset, size, 0, count);
 			if (ret)
 				return ret;
 
@@ -1086,7 +1085,7 @@ int copy_xstate_to_user(unsigned int pos, unsigned int count, void __user *ubuf,
 	offset = offsetof(struct fxregs_state, sw_reserved);
 	size = sizeof(xstate_fx_sw_bytes);
 
-	ret = __copy_xstate_to_user(offset, size, ubuf, xstate_fx_sw_bytes, 0, count);
+	ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, 0, count);
 	if (ret)
 		return ret;
 

From becb2bb72ff906cc0d2bac3ee9574f694364823b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:49 +0200
Subject: [PATCH 309/391] x86/fpu: Clean up the parameter definitions of
 copy_xstate_to_*()

Remove pointless 'const' of non-pointer input parameter.

Remove unnecessary parenthesis that shows uncertainty about arithmetic operator precedence.

Clarify copy_xstate_to_user() description.

No change in functionality.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-7-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 0a299468510f..9647e7256179 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -927,13 +927,13 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 static inline int
 __copy_xstate_to_kernel(void *kbuf,
 			const void *data,
-			unsigned int pos, unsigned int count, const int start_pos, const int end_pos)
+			unsigned int pos, unsigned int count, int start_pos, int end_pos)
 {
 	if ((count == 0) || (pos < start_pos))
 		return 0;
 
 	if (end_pos < 0 || pos < end_pos) {
-		unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos));
+		unsigned int copy = end_pos < 0 ? count : min(count, end_pos - pos);
 
 		memcpy(kbuf + pos, data, copy);
 	}
@@ -1010,13 +1010,13 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po
 }
 
 static inline int
-__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int pos, unsigned int count, const int start_pos, const int end_pos)
+__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int pos, unsigned int count, int start_pos, int end_pos)
 {
 	if ((count == 0) || (pos < start_pos))
 		return 0;
 
 	if (end_pos < 0 || pos < end_pos) {
-		unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos));
+		unsigned int copy = end_pos < 0 ? count : min(count, end_pos - pos);
 
 		if (__copy_to_user(ubuf + pos, data, copy))
 			return -EFAULT;
@@ -1026,7 +1026,7 @@ __copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int pos, uns
 
 /*
  * Convert from kernel XSAVES compacted format to standard format and copy
- * to a ptrace buffer. It supports partial copy but pos always starts from
+ * to a user-space buffer. It supports partial copy but pos always starts from
  * zero. This is called from xstateregs_get() and there we check the CPU
  * has XSAVES.
  */

From 8a5b731889cbf004b406d988dc591c8a7aac773e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:50 +0200
Subject: [PATCH 310/391] x86/fpu: Remove the 'start_pos' parameter from the
 __copy_xstate_to_*() functions

'start_pos' is always 0, so remove it and remove the pointless check of 'pos < 0'
which can not ever be true as 'pos' is unsigned ...

No change in functionality.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-8-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 9647e7256179..1f50fdaf4c5a 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -927,9 +927,9 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 static inline int
 __copy_xstate_to_kernel(void *kbuf,
 			const void *data,
-			unsigned int pos, unsigned int count, int start_pos, int end_pos)
+			unsigned int pos, unsigned int count, int end_pos)
 {
-	if ((count == 0) || (pos < start_pos))
+	if (!count)
 		return 0;
 
 	if (end_pos < 0 || pos < end_pos) {
@@ -972,7 +972,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(header);
 
-	ret = __copy_xstate_to_kernel(kbuf, &header, offset, size, 0, count);
+	ret = __copy_xstate_to_kernel(kbuf, &header, offset, size, count);
 	if (ret)
 		return ret;
 
@@ -986,7 +986,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			ret = __copy_xstate_to_kernel(kbuf, src, offset, size, 0, count);
+			ret = __copy_xstate_to_kernel(kbuf, src, offset, size, count);
 			if (ret)
 				return ret;
 
@@ -1002,7 +1002,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po
 	offset = offsetof(struct fxregs_state, sw_reserved);
 	size = sizeof(xstate_fx_sw_bytes);
 
-	ret = __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, 0, count);
+	ret = __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, count);
 	if (ret)
 		return ret;
 
@@ -1010,9 +1010,9 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po
 }
 
 static inline int
-__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int pos, unsigned int count, int start_pos, int end_pos)
+__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int pos, unsigned int count, int end_pos)
 {
-	if ((count == 0) || (pos < start_pos))
+	if (!count)
 		return 0;
 
 	if (end_pos < 0 || pos < end_pos) {
@@ -1055,7 +1055,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(header);
 
-	ret = __copy_xstate_to_user(ubuf, &header, offset, size, 0, count);
+	ret = __copy_xstate_to_user(ubuf, &header, offset, size, count);
 	if (ret)
 		return ret;
 
@@ -1069,7 +1069,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			ret = __copy_xstate_to_user(ubuf, src, offset, size, 0, count);
+			ret = __copy_xstate_to_user(ubuf, src, offset, size, count);
 			if (ret)
 				return ret;
 
@@ -1085,7 +1085,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
 	offset = offsetof(struct fxregs_state, sw_reserved);
 	size = sizeof(xstate_fx_sw_bytes);
 
-	ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, 0, count);
+	ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, count);
 	if (ret)
 		return ret;
 

From 56583c9a1400fe1935edd55b24b4fbbc779b59cb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:51 +0200
Subject: [PATCH 311/391] x86/fpu: Clarify parameter names in the
 copy_xstate_to_*() methods

Right now there's a confusing mixture of 'offset' and 'size' parameters:

 - __copy_xstate_to_*() input parameter 'end_pos' not not really an offset,
   but the full size of the copy to be performed.

 - input parameter 'count' to copy_xstate_to_*() shadows that of
   __copy_xstate_to_*()'s 'count' parameter name - but the roles
   are different: the first one is the total number of bytes to
   be copied, while the second one is a partial copy size.

To unconfuse all this, use a consistent set of parameter names:

 - 'size' is the partial copy size within a single xstate component
 - 'size_total' is the total copy requested
 - 'offset_start' is the requested starting offset.
 - 'offset' is the offset within an xstate component.

No change in functionality.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-9-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h |  4 +--
 arch/x86/kernel/fpu/xstate.c      | 44 +++++++++++++++----------------
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index e4430b84939d..fed6617a1079 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -48,8 +48,8 @@ void fpu__xstate_clear_all_cpu_caps(void);
 void *get_xsave_addr(struct xregs_state *xsave, int xstate);
 const void *get_xsave_field_ptr(int xstate_field);
 int using_compacted_format(void);
-int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int pos, unsigned int count);
-int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int pos, unsigned int count);
+int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
+int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
 int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
 		     struct xregs_state *xsave);
 #endif
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 1f50fdaf4c5a..c13083579655 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -927,15 +927,15 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 static inline int
 __copy_xstate_to_kernel(void *kbuf,
 			const void *data,
-			unsigned int pos, unsigned int count, int end_pos)
+			unsigned int offset, unsigned int size, int size_total)
 {
-	if (!count)
+	if (!size)
 		return 0;
 
-	if (end_pos < 0 || pos < end_pos) {
-		unsigned int copy = end_pos < 0 ? count : min(count, end_pos - pos);
+	if (size_total < 0 || offset < size_total) {
+		unsigned int copy = size_total < 0 ? size : min(size, size_total - offset);
 
-		memcpy(kbuf + pos, data, copy);
+		memcpy(kbuf + offset, data, copy);
 	}
 	return 0;
 }
@@ -947,7 +947,7 @@ __copy_xstate_to_kernel(void *kbuf,
  * It supports partial copy but pos always starts from zero. This is called
  * from xstateregs_get() and there we check the CPU has XSAVES.
  */
-int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int pos, unsigned int count)
+int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
 {
 	unsigned int offset, size;
 	int ret, i;
@@ -956,7 +956,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po
 	/*
 	 * Currently copy_regset_to_user() starts from pos 0:
 	 */
-	if (unlikely(pos != 0))
+	if (unlikely(offset_start != 0))
 		return -EFAULT;
 
 	/*
@@ -972,7 +972,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(header);
 
-	ret = __copy_xstate_to_kernel(kbuf, &header, offset, size, count);
+	ret = __copy_xstate_to_kernel(kbuf, &header, offset, size, size_total);
 	if (ret)
 		return ret;
 
@@ -986,11 +986,11 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			ret = __copy_xstate_to_kernel(kbuf, src, offset, size, count);
+			ret = __copy_xstate_to_kernel(kbuf, src, offset, size, size_total);
 			if (ret)
 				return ret;
 
-			if (offset + size >= count)
+			if (offset + size >= size_total)
 				break;
 		}
 
@@ -1002,7 +1002,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po
 	offset = offsetof(struct fxregs_state, sw_reserved);
 	size = sizeof(xstate_fx_sw_bytes);
 
-	ret = __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, count);
+	ret = __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total);
 	if (ret)
 		return ret;
 
@@ -1010,15 +1010,15 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int po
 }
 
 static inline int
-__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int pos, unsigned int count, int end_pos)
+__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int offset, unsigned int size, int size_total)
 {
-	if (!count)
+	if (!size)
 		return 0;
 
-	if (end_pos < 0 || pos < end_pos) {
-		unsigned int copy = end_pos < 0 ? count : min(count, end_pos - pos);
+	if (size_total < 0 || offset < size_total) {
+		unsigned int copy = size_total < 0 ? size : min(size, size_total - offset);
 
-		if (__copy_to_user(ubuf + pos, data, copy))
+		if (__copy_to_user(ubuf + offset, data, copy))
 			return -EFAULT;
 	}
 	return 0;
@@ -1030,7 +1030,7 @@ __copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int pos, uns
  * zero. This is called from xstateregs_get() and there we check the CPU
  * has XSAVES.
  */
-int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int pos, unsigned int count)
+int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
 {
 	unsigned int offset, size;
 	int ret, i;
@@ -1039,7 +1039,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
 	/*
 	 * Currently copy_regset_to_user() starts from pos 0:
 	 */
-	if (unlikely(pos != 0))
+	if (unlikely(offset_start != 0))
 		return -EFAULT;
 
 	/*
@@ -1055,7 +1055,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(header);
 
-	ret = __copy_xstate_to_user(ubuf, &header, offset, size, count);
+	ret = __copy_xstate_to_user(ubuf, &header, offset, size, size_total);
 	if (ret)
 		return ret;
 
@@ -1069,11 +1069,11 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			ret = __copy_xstate_to_user(ubuf, src, offset, size, count);
+			ret = __copy_xstate_to_user(ubuf, src, offset, size, size_total);
 			if (ret)
 				return ret;
 
-			if (offset + size >= count)
+			if (offset + size >= size_total)
 				break;
 		}
 
@@ -1085,7 +1085,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
 	offset = offsetof(struct fxregs_state, sw_reserved);
 	size = sizeof(xstate_fx_sw_bytes);
 
-	ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, count);
+	ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, size_total);
 	if (ret)
 		return ret;
 

From 6ff15f8db7eaf29ef5ead6afbec9b25485fe8703 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:52 +0200
Subject: [PATCH 312/391] x86/fpu: Change 'size_total' parameter to unsigned
 and standardize the size checks in copy_xstate_to_*()

'size_total' is derived from an unsigned input parameter - and then converted
to 'int' and checked for negative ranges:

	if (size_total < 0 || offset < size_total) {

This conversion and the checks are unnecessary obfuscation, reject overly
large requested copy sizes outright and simplify the underlying code.

Reported-by: Rik van Riel <riel@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-10-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index c13083579655..b18c5457065a 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -925,15 +925,11 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
  * the source data pointer or increment pos, count, kbuf, and ubuf.
  */
 static inline int
-__copy_xstate_to_kernel(void *kbuf,
-			const void *data,
-			unsigned int offset, unsigned int size, int size_total)
+__copy_xstate_to_kernel(void *kbuf, const void *data,
+			unsigned int offset, unsigned int size, unsigned int size_total)
 {
-	if (!size)
-		return 0;
-
-	if (size_total < 0 || offset < size_total) {
-		unsigned int copy = size_total < 0 ? size : min(size, size_total - offset);
+	if (offset < size_total) {
+		unsigned int copy = min(size, size_total - offset);
 
 		memcpy(kbuf + offset, data, copy);
 	}
@@ -986,12 +982,13 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
+			/* The next component has to fit fully into the output buffer: */
+			if (offset + size > size_total)
+				break;
+
 			ret = __copy_xstate_to_kernel(kbuf, src, offset, size, size_total);
 			if (ret)
 				return ret;
-
-			if (offset + size >= size_total)
-				break;
 		}
 
 	}
@@ -1010,13 +1007,13 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
 }
 
 static inline int
-__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int offset, unsigned int size, int size_total)
+__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int offset, unsigned int size, unsigned int size_total)
 {
 	if (!size)
 		return 0;
 
-	if (size_total < 0 || offset < size_total) {
-		unsigned int copy = size_total < 0 ? size : min(size, size_total - offset);
+	if (offset < size_total) {
+		unsigned int copy = min(size, size_total - offset);
 
 		if (__copy_to_user(ubuf + offset, data, copy))
 			return -EFAULT;
@@ -1069,12 +1066,13 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
+			/* The next component has to fit fully into the output buffer: */
+			if (offset + size > size_total)
+				break;
+
 			ret = __copy_xstate_to_user(ubuf, src, offset, size, size_total);
 			if (ret)
 				return ret;
-
-			if (offset + size >= size_total)
-				break;
 		}
 
 	}

From 8c0817f4a3188ac5485ce14f96f12a175800b881 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:53 +0200
Subject: [PATCH 313/391] x86/fpu: Simplify __copy_xstate_to_kernel() return
 values

__copy_xstate_to_kernel() can only return 0 (because kernel copies cannot fail),
simplify the code throughout.

No change in functionality.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-11-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index b18c5457065a..00c3b41c3cf1 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -924,7 +924,7 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
  * This is similar to user_regset_copyout(), but will not add offset to
  * the source data pointer or increment pos, count, kbuf, and ubuf.
  */
-static inline int
+static inline void
 __copy_xstate_to_kernel(void *kbuf, const void *data,
 			unsigned int offset, unsigned int size, unsigned int size_total)
 {
@@ -933,7 +933,6 @@ __copy_xstate_to_kernel(void *kbuf, const void *data,
 
 		memcpy(kbuf + offset, data, copy);
 	}
-	return 0;
 }
 
 /*
@@ -946,8 +945,8 @@ __copy_xstate_to_kernel(void *kbuf, const void *data,
 int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
 {
 	unsigned int offset, size;
-	int ret, i;
 	struct xstate_header header;
+	int i;
 
 	/*
 	 * Currently copy_regset_to_user() starts from pos 0:
@@ -968,9 +967,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(header);
 
-	ret = __copy_xstate_to_kernel(kbuf, &header, offset, size, size_total);
-	if (ret)
-		return ret;
+	__copy_xstate_to_kernel(kbuf, &header, offset, size, size_total);
 
 	for (i = 0; i < XFEATURE_MAX; i++) {
 		/*
@@ -986,9 +983,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
 			if (offset + size > size_total)
 				break;
 
-			ret = __copy_xstate_to_kernel(kbuf, src, offset, size, size_total);
-			if (ret)
-				return ret;
+			__copy_xstate_to_kernel(kbuf, src, offset, size, size_total);
 		}
 
 	}
@@ -999,9 +994,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
 	offset = offsetof(struct fxregs_state, sw_reserved);
 	size = sizeof(xstate_fx_sw_bytes);
 
-	ret = __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total);
-	if (ret)
-		return ret;
+	__copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total);
 
 	return 0;
 }

From 79fecc2b7506f29fb91becc65e8788e5ae7eba9f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:54 +0200
Subject: [PATCH 314/391] x86/fpu: Split copy_user_to_xstate() into
 copy_kernel_to_xstate() & copy_user_to_xstate()

Similar to:

  x86/fpu: Split copy_xstate_to_user() into copy_xstate_to_kernel() & copy_xstate_to_user()

No change in functionality.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-12-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h |  4 +-
 arch/x86/kernel/fpu/regset.c      | 10 +++--
 arch/x86/kernel/fpu/xstate.c      | 66 ++++++++++++++++++++++++++++++-
 3 files changed, 74 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index fed6617a1079..79af79dbcab6 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -50,6 +50,6 @@ const void *get_xsave_field_ptr(int xstate_field);
 int using_compacted_format(void);
 int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
 int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
-int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
-		     struct xregs_state *xsave);
+int copy_kernel_to_xstate(const void *kbuf, const void __user *ubuf, struct xregs_state *xsave);
+int copy_user_to_xstate(const void *kbuf, const void __user *ubuf, struct xregs_state *xsave);
 #endif
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index ec1404194b65..cb45dd81d617 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -134,10 +134,14 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 
 	fpu__activate_fpstate_write(fpu);
 
-	if (boot_cpu_has(X86_FEATURE_XSAVES))
-		ret = copy_user_to_xstate(kbuf, ubuf, xsave);
-	else
+	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+		if (kbuf)
+			ret = copy_kernel_to_xstate(kbuf, ubuf, xsave);
+		else
+			ret = copy_user_to_xstate(kbuf, ubuf, xsave);
+	} else {
 		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
+	}
 
 	/*
 	 * In case of failure, mark all states as init:
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 00c3b41c3cf1..1ad25d1b8056 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1084,7 +1084,71 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
 }
 
 /*
- * Convert from a ptrace standard-format buffer to kernel XSAVES format
+ * Convert from a ptrace standard-format kernel buffer to kernel XSAVES format
+ * and copy to the target thread. This is called from xstateregs_set() and
+ * there we check the CPU has XSAVES and a whole standard-sized buffer
+ * exists.
+ */
+int copy_kernel_to_xstate(const void *kbuf, const void __user *ubuf,
+		     struct xregs_state *xsave)
+{
+	unsigned int offset, size;
+	int i;
+	u64 xfeatures;
+	u64 allowed_features;
+
+	offset = offsetof(struct xregs_state, header);
+	size = sizeof(xfeatures);
+
+	if (kbuf) {
+		memcpy(&xfeatures, kbuf + offset, size);
+	} else {
+		if (__copy_from_user(&xfeatures, ubuf + offset, size))
+			return -EFAULT;
+	}
+
+	/*
+	 * Reject if the user sets any disabled or supervisor features:
+	 */
+	allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR;
+
+	if (xfeatures & ~allowed_features)
+		return -EINVAL;
+
+	for (i = 0; i < XFEATURE_MAX; i++) {
+		u64 mask = ((u64)1 << i);
+
+		if (xfeatures & mask) {
+			void *dst = __raw_xsave_addr(xsave, 1 << i);
+
+			offset = xstate_offsets[i];
+			size = xstate_sizes[i];
+
+			if (kbuf) {
+				memcpy(dst, kbuf + offset, size);
+			} else {
+				if (__copy_from_user(dst, ubuf + offset, size))
+					return -EFAULT;
+			}
+		}
+	}
+
+	/*
+	 * The state that came in from userspace was user-state only.
+	 * Mask all the user states out of 'xfeatures':
+	 */
+	xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
+
+	/*
+	 * Add back in the features that came in from userspace:
+	 */
+	xsave->header.xfeatures |= xfeatures;
+
+	return 0;
+}
+
+/*
+ * Convert from a ptrace standard-format user-space buffer to kernel XSAVES format
  * and copy to the target thread. This is called from xstateregs_set() and
  * there we check the CPU has XSAVES and a whole standard-sized buffer
  * exists.

From 59dffa4edba1f15b2bfdbe608aca1efe664c674c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:55 +0200
Subject: [PATCH 315/391] x86/fpu: Remove 'ubuf' parameter from the
 copy_kernel_to_xstate() API

No change in functionality.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-13-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h |  2 +-
 arch/x86/kernel/fpu/regset.c      |  2 +-
 arch/x86/kernel/fpu/xstate.c      | 17 +++--------------
 3 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 79af79dbcab6..f10889bc0c88 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -50,6 +50,6 @@ const void *get_xsave_field_ptr(int xstate_field);
 int using_compacted_format(void);
 int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
 int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
-int copy_kernel_to_xstate(const void *kbuf, const void __user *ubuf, struct xregs_state *xsave);
+int copy_kernel_to_xstate(const void *kbuf, struct xregs_state *xsave);
 int copy_user_to_xstate(const void *kbuf, const void __user *ubuf, struct xregs_state *xsave);
 #endif
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index cb45dd81d617..785302c75f38 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -136,7 +136,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 
 	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
 		if (kbuf)
-			ret = copy_kernel_to_xstate(kbuf, ubuf, xsave);
+			ret = copy_kernel_to_xstate(kbuf, xsave);
 		else
 			ret = copy_user_to_xstate(kbuf, ubuf, xsave);
 	} else {
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 1ad25d1b8056..71cc8d367fdd 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1089,8 +1089,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
  * there we check the CPU has XSAVES and a whole standard-sized buffer
  * exists.
  */
-int copy_kernel_to_xstate(const void *kbuf, const void __user *ubuf,
-		     struct xregs_state *xsave)
+int copy_kernel_to_xstate(const void *kbuf, struct xregs_state *xsave)
 {
 	unsigned int offset, size;
 	int i;
@@ -1100,12 +1099,7 @@ int copy_kernel_to_xstate(const void *kbuf, const void __user *ubuf,
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(xfeatures);
 
-	if (kbuf) {
-		memcpy(&xfeatures, kbuf + offset, size);
-	} else {
-		if (__copy_from_user(&xfeatures, ubuf + offset, size))
-			return -EFAULT;
-	}
+	memcpy(&xfeatures, kbuf + offset, size);
 
 	/*
 	 * Reject if the user sets any disabled or supervisor features:
@@ -1124,12 +1118,7 @@ int copy_kernel_to_xstate(const void *kbuf, const void __user *ubuf,
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			if (kbuf) {
-				memcpy(dst, kbuf + offset, size);
-			} else {
-				if (__copy_from_user(dst, ubuf + offset, size))
-					return -EFAULT;
-			}
+			memcpy(dst, kbuf + offset, size);
 		}
 	}
 

From 7b9094c688f807c110a2dab6f6edc5876bfa7b0b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:56 +0200
Subject: [PATCH 316/391] x86/fpu: Remove 'kbuf' parameter from the
 copy_user_to_xstate() API

No change in functionality.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-14-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h |  2 +-
 arch/x86/kernel/fpu/regset.c      |  2 +-
 arch/x86/kernel/fpu/signal.c      | 11 ++++-------
 arch/x86/kernel/fpu/xstate.c      | 19 +++++--------------
 4 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index f10889bc0c88..4ceb90740d80 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -51,5 +51,5 @@ int using_compacted_format(void);
 int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
 int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
 int copy_kernel_to_xstate(const void *kbuf, struct xregs_state *xsave);
-int copy_user_to_xstate(const void *kbuf, const void __user *ubuf, struct xregs_state *xsave);
+int copy_user_to_xstate(const void __user *ubuf, struct xregs_state *xsave);
 #endif
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 785302c75f38..caf723f31737 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -138,7 +138,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 		if (kbuf)
 			ret = copy_kernel_to_xstate(kbuf, xsave);
 		else
-			ret = copy_user_to_xstate(kbuf, ubuf, xsave);
+			ret = copy_user_to_xstate(ubuf, xsave);
 	} else {
 		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
 	}
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index b1fe9a1fc4e0..2c685b492fd6 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -323,13 +323,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		 */
 		fpu__drop(fpu);
 
-		if (using_compacted_format()) {
-			err = copy_user_to_xstate(NULL, buf_fx,
-					       &fpu->state.xsave);
-		} else {
-			err = __copy_from_user(&fpu->state.xsave,
-					       buf_fx, state_size);
-		}
+		if (using_compacted_format())
+			err = copy_user_to_xstate(buf_fx, &fpu->state.xsave);
+		else
+			err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
 
 		if (err || __copy_from_user(&env, buf, sizeof(env))) {
 			fpstate_init(&fpu->state);
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 71cc8d367fdd..b1f3e4dae2e3 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1142,8 +1142,7 @@ int copy_kernel_to_xstate(const void *kbuf, struct xregs_state *xsave)
  * there we check the CPU has XSAVES and a whole standard-sized buffer
  * exists.
  */
-int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
-		     struct xregs_state *xsave)
+int copy_user_to_xstate(const void __user *ubuf, struct xregs_state *xsave)
 {
 	unsigned int offset, size;
 	int i;
@@ -1153,12 +1152,8 @@ int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(xfeatures);
 
-	if (kbuf) {
-		memcpy(&xfeatures, kbuf + offset, size);
-	} else {
-		if (__copy_from_user(&xfeatures, ubuf + offset, size))
-			return -EFAULT;
-	}
+	if (__copy_from_user(&xfeatures, ubuf + offset, size))
+		return -EFAULT;
 
 	/*
 	 * Reject if the user sets any disabled or supervisor features:
@@ -1177,12 +1172,8 @@ int copy_user_to_xstate(const void *kbuf, const void __user *ubuf,
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			if (kbuf) {
-				memcpy(dst, kbuf + offset, size);
-			} else {
-				if (__copy_from_user(dst, ubuf + offset, size))
-					return -EFAULT;
-			}
+			if (__copy_from_user(dst, ubuf + offset, size))
+				return -EFAULT;
 		}
 	}
 

From 6d7f7da5533a3f841eeb1d9657257c9367924274 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:57 +0200
Subject: [PATCH 317/391] x86/fpu: Flip the parameter order in
 copy_*_to_xstate()

Make it more consistent with regular memcpy() semantics, where the destination
argument comes first.

No change in functionality.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-15-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h | 4 ++--
 arch/x86/kernel/fpu/regset.c      | 4 ++--
 arch/x86/kernel/fpu/signal.c      | 2 +-
 arch/x86/kernel/fpu/xstate.c      | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 4ceb90740d80..579ac2358e63 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -50,6 +50,6 @@ const void *get_xsave_field_ptr(int xstate_field);
 int using_compacted_format(void);
 int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
 int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
-int copy_kernel_to_xstate(const void *kbuf, struct xregs_state *xsave);
-int copy_user_to_xstate(const void __user *ubuf, struct xregs_state *xsave);
+int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
+int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
 #endif
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index caf723f31737..19a7385a912c 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -136,9 +136,9 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 
 	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
 		if (kbuf)
-			ret = copy_kernel_to_xstate(kbuf, xsave);
+			ret = copy_kernel_to_xstate(xsave, kbuf);
 		else
-			ret = copy_user_to_xstate(ubuf, xsave);
+			ret = copy_user_to_xstate(xsave, ubuf);
 	} else {
 		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
 	}
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 2c685b492fd6..2d682dac35d4 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -324,7 +324,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		fpu__drop(fpu);
 
 		if (using_compacted_format())
-			err = copy_user_to_xstate(buf_fx, &fpu->state.xsave);
+			err = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
 		else
 			err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
 
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index b1f3e4dae2e3..0ef35040d0ad 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1089,7 +1089,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
  * there we check the CPU has XSAVES and a whole standard-sized buffer
  * exists.
  */
-int copy_kernel_to_xstate(const void *kbuf, struct xregs_state *xsave)
+int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 {
 	unsigned int offset, size;
 	int i;
@@ -1142,7 +1142,7 @@ int copy_kernel_to_xstate(const void *kbuf, struct xregs_state *xsave)
  * there we check the CPU has XSAVES and a whole standard-sized buffer
  * exists.
  */
-int copy_user_to_xstate(const void __user *ubuf, struct xregs_state *xsave)
+int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 {
 	unsigned int offset, size;
 	int i;

From b3a163081c28d1a4d1ad76259a9d93b34a82f1da Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:58 +0200
Subject: [PATCH 318/391] x86/fpu: Simplify fpu->fpregs_active use

The fpregs_active() inline function is pretty pointless - in almost
all the callsites it can be replaced with a direct fpu->fpregs_active
access.

Do so and eliminate the extra layer of obfuscation.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-16-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/internal.h | 17 +----------------
 arch/x86/kernel/fpu/core.c          |  2 +-
 arch/x86/kernel/fpu/signal.c        |  9 +++++----
 arch/x86/mm/pkeys.c                 |  2 +-
 4 files changed, 8 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 554cdb205d17..b223c57dd5dc 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -542,21 +542,6 @@ static inline void fpregs_activate(struct fpu *fpu)
 	trace_x86_fpu_regs_activated(fpu);
 }
 
-/*
- * The question "does this thread have fpu access?"
- * is slightly racy, since preemption could come in
- * and revoke it immediately after the test.
- *
- * However, even in that very unlikely scenario,
- * we can just assume we have FPU access - typically
- * to save the FP state - we'll just take a #NM
- * fault and get the FPU access back.
- */
-static inline int fpregs_active(void)
-{
-	return current->thread.fpu.fpregs_active;
-}
-
 /*
  * FPU state switching for scheduling.
  *
@@ -617,7 +602,7 @@ static inline void user_fpu_begin(void)
 	struct fpu *fpu = &current->thread.fpu;
 
 	preempt_disable();
-	if (!fpregs_active())
+	if (!fpu->fpregs_active)
 		fpregs_activate(fpu);
 	preempt_enable();
 }
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index e1114f070c2d..bad57248e5a0 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -367,7 +367,7 @@ void fpu__current_fpstate_write_end(void)
 	 * registers may still be out of date.  Update them with
 	 * an XRSTOR if they are active.
 	 */
-	if (fpregs_active())
+	if (fpu->fpregs_active)
 		copy_kernel_to_fpregs(&fpu->state);
 
 	/*
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 2d682dac35d4..684025654d0c 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -155,7 +155,8 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
  */
 int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 {
-	struct xregs_state *xsave = &current->thread.fpu.state.xsave;
+	struct fpu *fpu = &current->thread.fpu;
+	struct xregs_state *xsave = &fpu->state.xsave;
 	struct task_struct *tsk = current;
 	int ia32_fxstate = (buf != buf_fx);
 
@@ -170,13 +171,13 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 			sizeof(struct user_i387_ia32_struct), NULL,
 			(struct _fpstate_32 __user *) buf) ? -1 : 1;
 
-	if (fpregs_active() || using_compacted_format()) {
+	if (fpu->fpregs_active || using_compacted_format()) {
 		/* Save the live register state to the user directly. */
 		if (copy_fpregs_to_sigframe(buf_fx))
 			return -1;
 		/* Update the thread's fxstate to save the fsave header. */
 		if (ia32_fxstate)
-			copy_fxregs_to_kernel(&tsk->thread.fpu);
+			copy_fxregs_to_kernel(fpu);
 	} else {
 		/*
 		 * It is a *bug* if kernel uses compacted-format for xsave
@@ -189,7 +190,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 			return -1;
 		}
 
-		fpstate_sanitize_xstate(&tsk->thread.fpu);
+		fpstate_sanitize_xstate(fpu);
 		if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size))
 			return -1;
 	}
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index 2dab69a706ec..e2c23472233e 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -45,7 +45,7 @@ int __execute_only_pkey(struct mm_struct *mm)
 	 */
 	preempt_disable();
 	if (!need_to_set_mm_pkey &&
-	    fpregs_active() &&
+	    current->thread.fpu.fpregs_active &&
 	    !__pkru_allows_read(read_pkru(), execute_only_pkey)) {
 		preempt_enable();
 		return execute_only_pkey;

From a10b6a16cdad88170f546d008c77453cddf918e6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 14:59:59 +0200
Subject: [PATCH 319/391] x86/fpu: Make the fpu state change in fpu__clear()
 scheduler-atomic

Do this temporarily only, to make it easier to change the FPU state machine,
in particular this change couples the fpu->fpregs_active and fpu->fpstate_active
states: they are only set/cleared together (as far as the scheduler sees them).

This will be removed by later patches.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-17-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index bad57248e5a0..b7dc3833d41a 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -462,9 +462,11 @@ void fpu__clear(struct fpu *fpu)
 	 * Make sure fpstate is cleared and initialized.
 	 */
 	if (static_cpu_has(X86_FEATURE_FPU)) {
+		preempt_disable();
 		fpu__activate_curr(fpu);
 		user_fpu_begin();
 		copy_init_fpstate_to_fpregs();
+		preempt_enable();
 	}
 }
 

From b6aa85558d7e7b18fc3470d2bc1731d2205dd275 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 15:00:00 +0200
Subject: [PATCH 320/391] x86/fpu: Split the state handling in fpu__drop()

Prepare fpu__drop() to use fpu->fpregs_active.

There are two distinct usecases for fpu__drop() in this context:
exit_thread() when called for 'current' in exit(), and when called
for another task in fork().

This patch does not change behavior, it only adds a couple of
debug checks and structures the code to make the ->fpregs_active
change more obviously correct.

All the complications will be removed later on.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-18-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/core.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index b7dc3833d41a..815dfba7781a 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -414,12 +414,19 @@ void fpu__drop(struct fpu *fpu)
 {
 	preempt_disable();
 
-	if (fpu->fpregs_active) {
-		/* Ignore delayed exceptions from user space */
-		asm volatile("1: fwait\n"
-			     "2:\n"
-			     _ASM_EXTABLE(1b, 2b));
-		fpregs_deactivate(fpu);
+	if (fpu == &current->thread.fpu) {
+		WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
+
+		if (fpu->fpregs_active) {
+			/* Ignore delayed exceptions from user space */
+			asm volatile("1: fwait\n"
+				     "2:\n"
+				     _ASM_EXTABLE(1b, 2b));
+			if (fpu->fpregs_active)
+				fpregs_deactivate(fpu);
+		}
+	} else {
+		WARN_ON_FPU(fpu->fpregs_active);
 	}
 
 	fpu->fpstate_active = 0;

From f1c8cd0176078c7bcafdc89cac447cab672a0b5e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 15:00:01 +0200
Subject: [PATCH 321/391] x86/fpu: Change fpu->fpregs_active users to
 fpu->fpstate_active

We want to simplify the FPU state machine by eliminating fpu->fpregs_active,
and we can do that because the two state flags (::fpregs_active and
::fpstate_active) are set essentially together.

The old lazy FPU switching code used to make a distinction - but there's
no lazy switching code anymore, we always switch in an 'eager' fashion.

Do this by first changing all substantial uses of fpu->fpregs_active
to fpu->fpstate_active and adding a few debug checks to double check
our assumption is correct.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-19-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/internal.h |  4 +++-
 arch/x86/kernel/fpu/core.c          | 16 ++++++++++------
 arch/x86/kernel/fpu/signal.c        |  4 +++-
 arch/x86/mm/pkeys.c                 |  3 +--
 4 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index b223c57dd5dc..7fa676f93ac1 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -556,7 +556,9 @@ static inline void fpregs_activate(struct fpu *fpu)
 static inline void
 switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
-	if (old_fpu->fpregs_active) {
+	WARN_ON_FPU(old_fpu->fpregs_active != old_fpu->fpstate_active);
+
+	if (old_fpu->fpstate_active) {
 		if (!copy_fpregs_to_fpstate(old_fpu))
 			old_fpu->last_cpu = -1;
 		else
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 815dfba7781a..eab244622402 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -100,7 +100,7 @@ void __kernel_fpu_begin(void)
 
 	kernel_fpu_disable();
 
-	if (fpu->fpregs_active) {
+	if (fpu->fpstate_active) {
 		/*
 		 * Ignore return value -- we don't care if reg state
 		 * is clobbered.
@@ -116,7 +116,7 @@ void __kernel_fpu_end(void)
 {
 	struct fpu *fpu = &current->thread.fpu;
 
-	if (fpu->fpregs_active)
+	if (fpu->fpstate_active)
 		copy_kernel_to_fpregs(&fpu->state);
 
 	kernel_fpu_enable();
@@ -147,8 +147,10 @@ void fpu__save(struct fpu *fpu)
 	WARN_ON_FPU(fpu != &current->thread.fpu);
 
 	preempt_disable();
+	WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
+
 	trace_x86_fpu_before_save(fpu);
-	if (fpu->fpregs_active) {
+	if (fpu->fpstate_active) {
 		if (!copy_fpregs_to_fpstate(fpu)) {
 			copy_kernel_to_fpregs(&fpu->state);
 		}
@@ -262,11 +264,12 @@ EXPORT_SYMBOL_GPL(fpu__activate_curr);
  */
 void fpu__activate_fpstate_read(struct fpu *fpu)
 {
+	WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
 	/*
 	 * If fpregs are active (in the current CPU), then
 	 * copy them to the fpstate:
 	 */
-	if (fpu->fpregs_active) {
+	if (fpu->fpstate_active) {
 		fpu__save(fpu);
 	} else {
 		if (!fpu->fpstate_active) {
@@ -362,12 +365,13 @@ void fpu__current_fpstate_write_end(void)
 {
 	struct fpu *fpu = &current->thread.fpu;
 
+	WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
 	/*
 	 * 'fpu' now has an updated copy of the state, but the
 	 * registers may still be out of date.  Update them with
 	 * an XRSTOR if they are active.
 	 */
-	if (fpu->fpregs_active)
+	if (fpu->fpstate_active)
 		copy_kernel_to_fpregs(&fpu->state);
 
 	/*
@@ -417,7 +421,7 @@ void fpu__drop(struct fpu *fpu)
 	if (fpu == &current->thread.fpu) {
 		WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
 
-		if (fpu->fpregs_active) {
+		if (fpu->fpstate_active) {
 			/* Ignore delayed exceptions from user space */
 			asm volatile("1: fwait\n"
 				     "2:\n"
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 684025654d0c..a88083ba7f8b 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -171,7 +171,9 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 			sizeof(struct user_i387_ia32_struct), NULL,
 			(struct _fpstate_32 __user *) buf) ? -1 : 1;
 
-	if (fpu->fpregs_active || using_compacted_format()) {
+	WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
+
+	if (fpu->fpstate_active || using_compacted_format()) {
 		/* Save the live register state to the user directly. */
 		if (copy_fpregs_to_sigframe(buf_fx))
 			return -1;
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index e2c23472233e..4d24269c071f 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -18,7 +18,6 @@
 
 #include <asm/cpufeature.h>             /* boot_cpu_has, ...            */
 #include <asm/mmu_context.h>            /* vma_pkey()                   */
-#include <asm/fpu/internal.h>           /* fpregs_active()              */
 
 int __execute_only_pkey(struct mm_struct *mm)
 {
@@ -45,7 +44,7 @@ int __execute_only_pkey(struct mm_struct *mm)
 	 */
 	preempt_disable();
 	if (!need_to_set_mm_pkey &&
-	    current->thread.fpu.fpregs_active &&
+	    current->thread.fpu.fpstate_active &&
 	    !__pkru_allows_read(read_pkru(), execute_only_pkey)) {
 		preempt_enable();
 		return execute_only_pkey;

From 6cf4edbe0526db311a28734609da888fdfcb3604 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 15:00:02 +0200
Subject: [PATCH 322/391] x86/fpu: Decouple
 fpregs_activate()/fpregs_deactivate() from fpu->fpregs_active

The fpregs_activate()/fpregs_deactivate() are currently called in such a pattern:

	if (!fpu->fpregs_active)
		fpregs_activate(fpu);

	...

	if (fpu->fpregs_active)
		fpregs_deactivate(fpu);

But note that it's actually safe to call them without checking the flag first.

This further decouples the fpu->fpregs_active flag from actual FPU logic.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-20-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/internal.h | 7 +------
 arch/x86/kernel/fpu/core.c          | 3 +--
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 7fa676f93ac1..42a601673c09 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -526,8 +526,6 @@ static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
  */
 static inline void fpregs_deactivate(struct fpu *fpu)
 {
-	WARN_ON_FPU(!fpu->fpregs_active);
-
 	fpu->fpregs_active = 0;
 	this_cpu_write(fpu_fpregs_owner_ctx, NULL);
 	trace_x86_fpu_regs_deactivated(fpu);
@@ -535,8 +533,6 @@ static inline void fpregs_deactivate(struct fpu *fpu)
 
 static inline void fpregs_activate(struct fpu *fpu)
 {
-	WARN_ON_FPU(fpu->fpregs_active);
-
 	fpu->fpregs_active = 1;
 	this_cpu_write(fpu_fpregs_owner_ctx, fpu);
 	trace_x86_fpu_regs_activated(fpu);
@@ -604,8 +600,7 @@ static inline void user_fpu_begin(void)
 	struct fpu *fpu = &current->thread.fpu;
 
 	preempt_disable();
-	if (!fpu->fpregs_active)
-		fpregs_activate(fpu);
+	fpregs_activate(fpu);
 	preempt_enable();
 }
 
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index eab244622402..01a47e9edfb4 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -426,8 +426,7 @@ void fpu__drop(struct fpu *fpu)
 			asm volatile("1: fwait\n"
 				     "2:\n"
 				     _ASM_EXTABLE(1b, 2b));
-			if (fpu->fpregs_active)
-				fpregs_deactivate(fpu);
+			fpregs_deactivate(fpu);
 		}
 	} else {
 		WARN_ON_FPU(fpu->fpregs_active);

From 99dc26bda233ee722bbd370bddf20beece3ffb93 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 15:00:03 +0200
Subject: [PATCH 323/391] x86/fpu: Remove struct fpu::fpregs_active

The previous changes paved the way for the removal of the
fpu::fpregs_active state flag - we now only have the
fpu::fpstate_active and fpu::last_cpu fields left.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-21-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/internal.h |  5 -----
 arch/x86/include/asm/fpu/types.h    | 23 -----------------------
 arch/x86/include/asm/trace/fpu.h    |  5 +----
 arch/x86/kernel/fpu/core.c          |  9 ---------
 arch/x86/kernel/fpu/signal.c        |  2 --
 5 files changed, 1 insertion(+), 43 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 42a601673c09..629e7abcd6c9 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -526,14 +526,12 @@ static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
  */
 static inline void fpregs_deactivate(struct fpu *fpu)
 {
-	fpu->fpregs_active = 0;
 	this_cpu_write(fpu_fpregs_owner_ctx, NULL);
 	trace_x86_fpu_regs_deactivated(fpu);
 }
 
 static inline void fpregs_activate(struct fpu *fpu)
 {
-	fpu->fpregs_active = 1;
 	this_cpu_write(fpu_fpregs_owner_ctx, fpu);
 	trace_x86_fpu_regs_activated(fpu);
 }
@@ -552,8 +550,6 @@ static inline void fpregs_activate(struct fpu *fpu)
 static inline void
 switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
-	WARN_ON_FPU(old_fpu->fpregs_active != old_fpu->fpstate_active);
-
 	if (old_fpu->fpstate_active) {
 		if (!copy_fpregs_to_fpstate(old_fpu))
 			old_fpu->last_cpu = -1;
@@ -561,7 +557,6 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 			old_fpu->last_cpu = cpu;
 
 		/* But leave fpu_fpregs_owner_ctx! */
-		old_fpu->fpregs_active = 0;
 		trace_x86_fpu_regs_deactivated(old_fpu);
 	} else
 		old_fpu->last_cpu = -1;
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 3c80f5b9c09d..0c314a397cf5 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -298,29 +298,6 @@ struct fpu {
 	 */
 	unsigned char			fpstate_active;
 
-	/*
-	 * @fpregs_active:
-	 *
-	 * This flag determines whether a given context is actively
-	 * loaded into the FPU's registers and that those registers
-	 * represent the task's current FPU state.
-	 *
-	 * Note the interaction with fpstate_active:
-	 *
-	 *   # task does not use the FPU:
-	 *   fpstate_active == 0
-	 *
-	 *   # task uses the FPU and regs are active:
-	 *   fpstate_active == 1 && fpregs_active == 1
-	 *
-	 *   # the regs are inactive but still match fpstate:
-	 *   fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu
-	 *
-	 * The third state is what we use for the lazy restore optimization
-	 * on lazy-switching CPUs.
-	 */
-	unsigned char			fpregs_active;
-
 	/*
 	 * @state:
 	 *
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index 342e59789fcd..da565aae9fd2 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -12,7 +12,6 @@ DECLARE_EVENT_CLASS(x86_fpu,
 
 	TP_STRUCT__entry(
 		__field(struct fpu *, fpu)
-		__field(bool, fpregs_active)
 		__field(bool, fpstate_active)
 		__field(u64, xfeatures)
 		__field(u64, xcomp_bv)
@@ -20,16 +19,14 @@ DECLARE_EVENT_CLASS(x86_fpu,
 
 	TP_fast_assign(
 		__entry->fpu		= fpu;
-		__entry->fpregs_active	= fpu->fpregs_active;
 		__entry->fpstate_active	= fpu->fpstate_active;
 		if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
 			__entry->xfeatures = fpu->state.xsave.header.xfeatures;
 			__entry->xcomp_bv  = fpu->state.xsave.header.xcomp_bv;
 		}
 	),
-	TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d xfeatures: %llx xcomp_bv: %llx",
+	TP_printk("x86/fpu: %p fpstate_active: %d xfeatures: %llx xcomp_bv: %llx",
 			__entry->fpu,
-			__entry->fpregs_active,
 			__entry->fpstate_active,
 			__entry->xfeatures,
 			__entry->xcomp_bv
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 01a47e9edfb4..93103a909c47 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -147,8 +147,6 @@ void fpu__save(struct fpu *fpu)
 	WARN_ON_FPU(fpu != &current->thread.fpu);
 
 	preempt_disable();
-	WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
-
 	trace_x86_fpu_before_save(fpu);
 	if (fpu->fpstate_active) {
 		if (!copy_fpregs_to_fpstate(fpu)) {
@@ -191,7 +189,6 @@ EXPORT_SYMBOL_GPL(fpstate_init);
 
 int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 {
-	dst_fpu->fpregs_active = 0;
 	dst_fpu->last_cpu = -1;
 
 	if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU))
@@ -264,7 +261,6 @@ EXPORT_SYMBOL_GPL(fpu__activate_curr);
  */
 void fpu__activate_fpstate_read(struct fpu *fpu)
 {
-	WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
 	/*
 	 * If fpregs are active (in the current CPU), then
 	 * copy them to the fpstate:
@@ -365,7 +361,6 @@ void fpu__current_fpstate_write_end(void)
 {
 	struct fpu *fpu = &current->thread.fpu;
 
-	WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
 	/*
 	 * 'fpu' now has an updated copy of the state, but the
 	 * registers may still be out of date.  Update them with
@@ -419,8 +414,6 @@ void fpu__drop(struct fpu *fpu)
 	preempt_disable();
 
 	if (fpu == &current->thread.fpu) {
-		WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
-
 		if (fpu->fpstate_active) {
 			/* Ignore delayed exceptions from user space */
 			asm volatile("1: fwait\n"
@@ -428,8 +421,6 @@ void fpu__drop(struct fpu *fpu)
 				     _ASM_EXTABLE(1b, 2b));
 			fpregs_deactivate(fpu);
 		}
-	} else {
-		WARN_ON_FPU(fpu->fpregs_active);
 	}
 
 	fpu->fpstate_active = 0;
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index a88083ba7f8b..629106e51a29 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -171,8 +171,6 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 			sizeof(struct user_i387_ia32_struct), NULL,
 			(struct _fpstate_32 __user *) buf) ? -1 : 1;
 
-	WARN_ON_FPU(fpu->fpstate_active != fpu->fpregs_active);
-
 	if (fpu->fpstate_active || using_compacted_format()) {
 		/* Save the live register state to the user directly. */
 		if (copy_fpregs_to_sigframe(buf_fx))

From 0852b374173bb57f870d78e6c6839c77b339be5f Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Sat, 23 Sep 2017 15:00:04 +0200
Subject: [PATCH 324/391] x86/fpu: Add FPU state copying quirk to handle XRSTOR
 failure on Intel Skylake CPUs

On Skylake CPUs I noticed that XRSTOR is unable to deal with states
created by copyout_from_xsaves() if the xstate has only SSE/YMM state, and
no FP state. That is, xfeatures had XFEATURE_MASK_SSE set, but not
XFEATURE_MASK_FP.

The reason is that part of the SSE/YMM state lives in the MXCSR and
MXCSR_FLAGS fields of the FP state.

Ensure that whenever we copy SSE or YMM state around, the MXCSR and
MXCSR_FLAGS fields are also copied around.

Signed-off-by: Rik van Riel <riel@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170210085445.0f1cc708@annuminas.surriel.com
Link: http://lkml.kernel.org/r/20170923130016.21448-22-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/types.h |  3 +++
 arch/x86/kernel/fpu/xstate.c     | 42 ++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)

diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 0c314a397cf5..71db45ca8870 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -68,6 +68,9 @@ struct fxregs_state {
 /* Default value for fxregs_state.mxcsr: */
 #define MXCSR_DEFAULT		0x1f80
 
+/* Copy both mxcsr & mxcsr_flags with a single u64 memcpy: */
+#define MXCSR_AND_FLAGS_SIZE sizeof(u64)
+
 /*
  * Software based FPU emulation state. This is arbitrary really,
  * it matches the x87 format to make it easier to understand:
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 0ef35040d0ad..41c52256bdce 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -920,6 +920,23 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 }
 #endif /* ! CONFIG_ARCH_HAS_PKEYS */
 
+/*
+ * Weird legacy quirk: SSE and YMM states store information in the
+ * MXCSR and MXCSR_FLAGS fields of the FP area. That means if the FP
+ * area is marked as unused in the xfeatures header, we need to copy
+ * MXCSR and MXCSR_FLAGS if either SSE or YMM are in use.
+ */
+static inline bool xfeatures_mxcsr_quirk(u64 xfeatures)
+{
+	if (!(xfeatures & (XFEATURE_MASK_SSE|XFEATURE_MASK_YMM)))
+		return 0;
+
+	if (xfeatures & XFEATURE_MASK_FP)
+		return 0;
+
+	return 1;
+}
+
 /*
  * This is similar to user_regset_copyout(), but will not add offset to
  * the source data pointer or increment pos, count, kbuf, and ubuf.
@@ -988,6 +1005,12 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
 
 	}
 
+	if (xfeatures_mxcsr_quirk(header.xfeatures)) {
+		offset = offsetof(struct fxregs_state, mxcsr);
+		size = MXCSR_AND_FLAGS_SIZE;
+		__copy_xstate_to_kernel(kbuf, &xsave->i387.mxcsr, offset, size, size_total);
+	}
+
 	/*
 	 * Fill xsave->i387.sw_reserved value for ptrace frame:
 	 */
@@ -1070,6 +1093,12 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
 
 	}
 
+	if (xfeatures_mxcsr_quirk(header.xfeatures)) {
+		offset = offsetof(struct fxregs_state, mxcsr);
+		size = MXCSR_AND_FLAGS_SIZE;
+		__copy_xstate_to_user(ubuf, &xsave->i387.mxcsr, offset, size, size_total);
+	}
+
 	/*
 	 * Fill xsave->i387.sw_reserved value for ptrace frame:
 	 */
@@ -1122,6 +1151,12 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 		}
 	}
 
+	if (xfeatures_mxcsr_quirk(xfeatures)) {
+		offset = offsetof(struct fxregs_state, mxcsr);
+		size = MXCSR_AND_FLAGS_SIZE;
+		memcpy(&xsave->i387.mxcsr, kbuf + offset, size);
+	}
+
 	/*
 	 * The state that came in from userspace was user-state only.
 	 * Mask all the user states out of 'xfeatures':
@@ -1177,6 +1212,13 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 		}
 	}
 
+	if (xfeatures_mxcsr_quirk(xfeatures)) {
+		offset = offsetof(struct fxregs_state, mxcsr);
+		size = MXCSR_AND_FLAGS_SIZE;
+		if (__copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size))
+			return -EFAULT;
+	}
+
 	/*
 	 * The state that came in from userspace was user-state only.
 	 * Mask all the user states out of 'xfeatures':

From 4f8cef59bad29344aca0e2e6b0ad18dadd078fd0 Mon Sep 17 00:00:00 2001
From: kbuild test robot <fengguang.wu@intel.com>
Date: Sat, 23 Sep 2017 15:00:05 +0200
Subject: [PATCH 325/391] x86/fpu: Fix boolreturn.cocci warnings

arch/x86/kernel/fpu/xstate.c:931:9-10: WARNING: return of 0/1 in function 'xfeatures_mxcsr_quirk' with return type bool

 Return statements in functions returning bool should use true/false instead of 1/0.

Generated by: scripts/coccinelle/misc/boolreturn.cocci

Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kbuild-all@01.org
Cc: tipbuild@zytor.com
Link: http://lkml.kernel.org/r/20170306004553.GA25764@lkp-wsm-ep1
Link: http://lkml.kernel.org/r/20170923130016.21448-23-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 41c52256bdce..fda1109cc355 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -929,12 +929,12 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 static inline bool xfeatures_mxcsr_quirk(u64 xfeatures)
 {
 	if (!(xfeatures & (XFEATURE_MASK_SSE|XFEATURE_MASK_YMM)))
-		return 0;
+		return false;
 
 	if (xfeatures & XFEATURE_MASK_FP)
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 
 /*

From 03eaec81ac09814817e9f0307d572ffe8365f980 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Sat, 23 Sep 2017 15:00:06 +0200
Subject: [PATCH 326/391] x86/fpu: Turn WARN_ON() in context switch into
 WARN_ON_FPU()

copy_xregs_to_kernel checks if the alternatives have been already
patched.

This WARN_ON() is always executed in every context switch.

All the other checks in fpu internal.h are WARN_ON_FPU(), but
this one is plain WARN_ON(). I assume it was forgotten to switch it.

So switch it to WARN_ON_FPU() too to avoid some unnecessary code
in the context switch, and a potentially expensive cache line miss for the
global variable.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170329062605.4970-1-andi@firstfloor.org
Link: http://lkml.kernel.org/r/20170923130016.21448-24-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/internal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 629e7abcd6c9..2dca7c65319c 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -350,7 +350,7 @@ static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
 	u32 hmask = mask >> 32;
 	int err;
 
-	WARN_ON(!alternatives_patched);
+	WARN_ON_FPU(!alternatives_patched);
 
 	XSTATE_XSAVE(xstate, lmask, hmask, err);
 

From e19b205be43d11bff638cad4487008c48d21c103 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 24 Sep 2017 16:38:56 -0700
Subject: [PATCH 327/391] Linux 4.14-rc2

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 64cbc66cebca..d1119941261c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 14
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*

From 814fb7bb7db5433757d76f4c4502c96fc53b0b5e Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sat, 23 Sep 2017 15:00:07 +0200
Subject: [PATCH 328/391] x86/fpu: Don't let userspace set bogus xcomp_bv

On x86, userspace can use the ptrace() or rt_sigreturn() system calls to
set a task's extended state (xstate) or "FPU" registers.  ptrace() can
set them for another task using the PTRACE_SETREGSET request with
NT_X86_XSTATE, while rt_sigreturn() can set them for the current task.
In either case, registers can be set to any value, but the kernel
assumes that the XSAVE area itself remains valid in the sense that the
CPU can restore it.

However, in the case where the kernel is using the uncompacted xstate
format (which it does whenever the XSAVES instruction is unavailable),
it was possible for userspace to set the xcomp_bv field in the
xstate_header to an arbitrary value.  However, all bits in that field
are reserved in the uncompacted case, so when switching to a task with
nonzero xcomp_bv, the XRSTOR instruction failed with a #GP fault.  This
caused the WARN_ON_FPU(err) in copy_kernel_to_xregs() to be hit.  In
addition, since the error is otherwise ignored, the FPU registers from
the task previously executing on the CPU were leaked.

Fix the bug by checking that the user-supplied value of xcomp_bv is 0 in
the uncompacted case, and returning an error otherwise.

The reason for validating xcomp_bv rather than simply overwriting it
with 0 is that we want userspace to see an error if it (incorrectly)
provides an XSAVE area in compacted format rather than in uncompacted
format.

Note that as before, in case of error we clear the task's FPU state.
This is perhaps non-ideal, especially for PTRACE_SETREGSET; it might be
better to return an error before changing anything.  But it seems the
"clear on error" behavior is fine for now, and it's a little tricky to
do otherwise because it would mean we couldn't simply copy the full
userspace state into kernel memory in one __copy_from_user().

This bug was found by syzkaller, which hit the above-mentioned
WARN_ON_FPU():

    WARNING: CPU: 1 PID: 0 at ./arch/x86/include/asm/fpu/internal.h:373 __switch_to+0x5b5/0x5d0
    CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.13.0 #453
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
    task: ffff9ba2bc8e42c0 task.stack: ffffa78cc036c000
    RIP: 0010:__switch_to+0x5b5/0x5d0
    RSP: 0000:ffffa78cc08bbb88 EFLAGS: 00010082
    RAX: 00000000fffffffe RBX: ffff9ba2b8bf2180 RCX: 00000000c0000100
    RDX: 00000000ffffffff RSI: 000000005cb10700 RDI: ffff9ba2b8bf36c0
    RBP: ffffa78cc08bbbd0 R08: 00000000929fdf46 R09: 0000000000000001
    R10: 0000000000000000 R11: 0000000000000000 R12: ffff9ba2bc8e42c0
    R13: 0000000000000000 R14: ffff9ba2b8bf3680 R15: ffff9ba2bf5d7b40
    FS:  00007f7e5cb10700(0000) GS:ffff9ba2bf400000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    CR2: 00000000004005cc CR3: 0000000079fd5000 CR4: 00000000001406e0
    Call Trace:
    Code: 84 00 00 00 00 00 e9 11 fd ff ff 0f ff 66 0f 1f 84 00 00 00 00 00 e9 e7 fa ff ff 0f ff 66 0f 1f 84 00 00 00 00 00 e9 c2 fa ff ff <0f> ff 66 0f 1f 84 00 00 00 00 00 e9 d4 fc ff ff 66 66 2e 0f 1f

Here is a C reproducer.  The expected behavior is that the program spin
forever with no output.  However, on a buggy kernel running on a
processor with the "xsave" feature but without the "xsaves" feature
(e.g. Sandy Bridge through Broadwell for Intel), within a second or two
the program reports that the xmm registers were corrupted, i.e. were not
restored correctly.  With CONFIG_X86_DEBUG_FPU=y it also hits the above
kernel warning.

    #define _GNU_SOURCE
    #include <stdbool.h>
    #include <inttypes.h>
    #include <linux/elf.h>
    #include <stdio.h>
    #include <sys/ptrace.h>
    #include <sys/uio.h>
    #include <sys/wait.h>
    #include <unistd.h>

    int main(void)
    {
        int pid = fork();
        uint64_t xstate[512];
        struct iovec iov = { .iov_base = xstate, .iov_len = sizeof(xstate) };

        if (pid == 0) {
            bool tracee = true;
            for (int i = 0; i < sysconf(_SC_NPROCESSORS_ONLN) && tracee; i++)
                tracee = (fork() != 0);
            uint32_t xmm0[4] = { [0 ... 3] = tracee ? 0x00000000 : 0xDEADBEEF };
            asm volatile("   movdqu %0, %%xmm0\n"
                         "   mov %0, %%rbx\n"
                         "1: movdqu %%xmm0, %0\n"
                         "   mov %0, %%rax\n"
                         "   cmp %%rax, %%rbx\n"
                         "   je 1b\n"
                         : "+m" (xmm0) : : "rax", "rbx", "xmm0");
            printf("BUG: xmm registers corrupted!  tracee=%d, xmm0=%08X%08X%08X%08X\n",
                   tracee, xmm0[0], xmm0[1], xmm0[2], xmm0[3]);
        } else {
            usleep(100000);
            ptrace(PTRACE_ATTACH, pid, 0, 0);
            wait(NULL);
            ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov);
            xstate[65] = -1;
            ptrace(PTRACE_SETREGSET, pid, NT_X86_XSTATE, &iov);
            ptrace(PTRACE_CONT, pid, 0, 0);
            wait(NULL);
        }
        return 1;
    }

Note: the program only tests for the bug using the ptrace() system call.
The bug can also be reproduced using the rt_sigreturn() system call, but
only when called from a 32-bit program, since for 64-bit programs the
kernel restores the FPU state from the signal frame by doing XRSTOR
directly from userspace memory (with proper error checking).

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: <stable@vger.kernel.org> [v3.17+]
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kernel-hardening@lists.openwall.com
Fixes: 0b29643a5843 ("x86/xsaves: Change compacted format xsave area header")
Link: http://lkml.kernel.org/r/20170922174156.16780-2-ebiggers3@gmail.com
Link: http://lkml.kernel.org/r/20170923130016.21448-25-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/regset.c | 4 ++++
 arch/x86/kernel/fpu/signal.c | 9 +++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 19a7385a912c..c764f7405322 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -141,6 +141,10 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 			ret = copy_user_to_xstate(xsave, ubuf);
 	} else {
 		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
+
+		/* xcomp_bv must be 0 when using uncompacted format */
+		if (!ret && xsave->header.xcomp_bv)
+			ret = -EINVAL;
 	}
 
 	/*
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 629106e51a29..da68ea1c3a44 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -324,11 +324,16 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		 */
 		fpu__drop(fpu);
 
-		if (using_compacted_format())
+		if (using_compacted_format()) {
 			err = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
-		else
+		} else {
 			err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
 
+			/* xcomp_bv must be 0 when using uncompacted format */
+			if (!err && state_size > offsetof(struct xregs_state, header) && fpu->state.xsave.header.xcomp_bv)
+				err = -EINVAL;
+		}
+
 		if (err || __copy_from_user(&env, buf, sizeof(env))) {
 			fpstate_init(&fpu->state);
 			trace_x86_fpu_init_state(fpu);

From d5c8028b4788f62b31fb79a331b3ad3e041fa366 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sat, 23 Sep 2017 15:00:09 +0200
Subject: [PATCH 329/391] x86/fpu: Reinitialize FPU registers if restoring FPU
 state fails

Userspace can change the FPU state of a task using the ptrace() or
rt_sigreturn() system calls.  Because reserved bits in the FPU state can
cause the XRSTOR instruction to fail, the kernel has to carefully
validate that no reserved bits or other invalid values are being set.

Unfortunately, there have been bugs in this validation code.  For
example, we were not checking that the 'xcomp_bv' field in the
xstate_header was 0.  As-is, such bugs are exploitable to read the FPU
registers of other processes on the system.  To do so, an attacker can
create a task, assign to it an invalid FPU state, then spin in a loop
and monitor the values of the FPU registers.  Because the task's FPU
registers are not being restored, sometimes the FPU registers will have
the values from another process.

This is likely to continue to be a problem in the future because the
validation done by the CPU instructions like XRSTOR is not immediately
visible to kernel developers.  Nor will invalid FPU states ever be
encountered during ordinary use --- they will only be seen during
fuzzing or exploits.  There can even be reserved bits outside the
xstate_header which are easy to forget about.  For example, the MXCSR
register contains reserved bits, which were not validated by the
KVM_SET_XSAVE ioctl until commit a575813bfe4b ("KVM: x86: Fix load
damaged SSEx MXCSR register").

Therefore, mitigate this class of vulnerability by restoring the FPU
registers from init_fpstate if restoring from the task's state fails.

We actually used to do this, but it was (perhaps unwisely) removed by
commit 9ccc27a5d297 ("x86/fpu: Remove error return values from
copy_kernel_to_*regs() functions").  This new patch is also a bit
different.  First, it only clears the registers, not also the bad
in-memory state; this is simpler and makes it easier to make the
mitigation cover all callers of __copy_kernel_to_fpregs().  Second, it
does the register clearing in an exception handler so that no extra
instructions are added to context switches.  In fact, we *remove*
instructions, since previously we were always zeroing the register
containing 'err' even if CONFIG_X86_DEBUG_FPU was disabled.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: http://lkml.kernel.org/r/20170922174156.16780-4-ebiggers3@gmail.com
Link: http://lkml.kernel.org/r/20170923130016.21448-27-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/internal.h | 51 +++++++++--------------------
 arch/x86/mm/extable.c               | 24 ++++++++++++++
 2 files changed, 39 insertions(+), 36 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 2dca7c65319c..cf290d424e48 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -120,20 +120,11 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu);
 	err;								\
 })
 
-#define check_insn(insn, output, input...)				\
-({									\
-	int err;							\
+#define kernel_insn(insn, output, input...)				\
 	asm volatile("1:" #insn "\n\t"					\
 		     "2:\n"						\
-		     ".section .fixup,\"ax\"\n"				\
-		     "3:  movl $-1,%[err]\n"				\
-		     "    jmp  2b\n"					\
-		     ".previous\n"					\
-		     _ASM_EXTABLE(1b, 3b)				\
-		     : [err] "=r" (err), output				\
-		     : "0"(0), input);					\
-	err;								\
-})
+		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore)	\
+		     : output : input)
 
 static inline int copy_fregs_to_user(struct fregs_state __user *fx)
 {
@@ -153,20 +144,16 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
 
 static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
 {
-	int err;
-
 	if (IS_ENABLED(CONFIG_X86_32)) {
-		err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+		kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
 	} else {
 		if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) {
-			err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
+			kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
 		} else {
 			/* See comment in copy_fxregs_to_kernel() below. */
-			err = check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
+			kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
 		}
 	}
-	/* Copying from a kernel buffer to FPU registers should never fail: */
-	WARN_ON_FPU(err);
 }
 
 static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
@@ -183,9 +170,7 @@ static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
 
 static inline void copy_kernel_to_fregs(struct fregs_state *fx)
 {
-	int err = check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-
-	WARN_ON_FPU(err);
+	kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
 }
 
 static inline int copy_user_to_fregs(struct fregs_state __user *fx)
@@ -281,18 +266,13 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
  * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
  * XSAVE area format.
  */
-#define XSTATE_XRESTORE(st, lmask, hmask, err)				\
+#define XSTATE_XRESTORE(st, lmask, hmask)				\
 	asm volatile(ALTERNATIVE(XRSTOR,				\
 				 XRSTORS, X86_FEATURE_XSAVES)		\
 		     "\n"						\
-		     "xor %[err], %[err]\n"				\
 		     "3:\n"						\
-		     ".pushsection .fixup,\"ax\"\n"			\
-		     "4: movl $-2, %[err]\n"				\
-		     "jmp 3b\n"						\
-		     ".popsection\n"					\
-		     _ASM_EXTABLE(661b, 4b)				\
-		     : [err] "=r" (err)					\
+		     _ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\
+		     :							\
 		     : "D" (st), "m" (*st), "a" (lmask), "d" (hmask)	\
 		     : "memory")
 
@@ -336,7 +316,10 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
 	else
 		XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
 
-	/* We should never fault when copying from a kernel buffer: */
+	/*
+	 * We should never fault when copying from a kernel buffer, and the FPU
+	 * state we set at boot time should be valid.
+	 */
 	WARN_ON_FPU(err);
 }
 
@@ -365,12 +348,8 @@ static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
 {
 	u32 lmask = mask;
 	u32 hmask = mask >> 32;
-	int err;
 
-	XSTATE_XRESTORE(xstate, lmask, hmask, err);
-
-	/* We should never fault when copying from a kernel buffer: */
-	WARN_ON_FPU(err);
+	XSTATE_XRESTORE(xstate, lmask, hmask);
 }
 
 /*
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index c076f710de4c..c3521e2be396 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -2,6 +2,7 @@
 #include <linux/uaccess.h>
 #include <linux/sched/debug.h>
 
+#include <asm/fpu/internal.h>
 #include <asm/traps.h>
 #include <asm/kdebug.h>
 
@@ -78,6 +79,29 @@ bool ex_handler_refcount(const struct exception_table_entry *fixup,
 }
 EXPORT_SYMBOL_GPL(ex_handler_refcount);
 
+/*
+ * Handler for when we fail to restore a task's FPU state.  We should never get
+ * here because the FPU state of a task using the FPU (task->thread.fpu.state)
+ * should always be valid.  However, past bugs have allowed userspace to set
+ * reserved bits in the XSAVE area using PTRACE_SETREGSET or sys_rt_sigreturn().
+ * These caused XRSTOR to fail when switching to the task, leaking the FPU
+ * registers of the task previously executing on the CPU.  Mitigate this class
+ * of vulnerability by restoring from the initial state (essentially, zeroing
+ * out all the FPU registers) if we can't restore from the task's FPU state.
+ */
+bool ex_handler_fprestore(const struct exception_table_entry *fixup,
+			  struct pt_regs *regs, int trapnr)
+{
+	regs->ip = ex_fixup_addr(fixup);
+
+	WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.",
+		  (void *)instruction_pointer(regs));
+
+	__copy_kernel_to_fpregs(&init_fpstate, -1);
+	return true;
+}
+EXPORT_SYMBOL_GPL(ex_handler_fprestore);
+
 bool ex_handler_ext(const struct exception_table_entry *fixup,
 		   struct pt_regs *regs, int trapnr)
 {

From 0add53713b1c07a1c71e27a20e21eb7c180b4e7b Mon Sep 17 00:00:00 2001
From: Michal Simek <michal.simek@xilinx.com>
Date: Tue, 19 Sep 2017 16:40:21 +0200
Subject: [PATCH 330/391] microblaze: Add missing kvm_para.h to Kbuild

Running make allmodconfig;make is throwing compilation error:
  CC      kernel/watchdog.o
In file included from ./include/linux/kvm_para.h:4:0,
                 from kernel/watchdog.c:29:
./include/uapi/linux/kvm_para.h:32:26: fatal error: asm/kvm_para.h: No
such file or directory
 #include <asm/kvm_para.h>
                          ^
compilation terminated.
make[1]: *** [kernel/watchdog.o] Error 1
make: *** [kernel/watchdog.o] Error 2

Reported-by: Michal Hocko <mhocko@kernel.org>
Suggested-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
Fixes: 83f0124ad81e87b ("microblaze: remove asm-generic wrapper headers")
Reviewed-by: Tobias Klauser <tklauser@distanz.ch>
Tested-by: Michal Hocko <mhocko@suse.com>
---
 arch/microblaze/include/uapi/asm/Kbuild | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild
index e77a596f3f1e..06609ca36115 100644
--- a/arch/microblaze/include/uapi/asm/Kbuild
+++ b/arch/microblaze/include/uapi/asm/Kbuild
@@ -7,6 +7,7 @@ generic-y += fcntl.h
 generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
+generic-y += kvm_para.h
 generic-y += mman.h
 generic-y += msgbuf.h
 generic-y += param.h

From 64c99853baca40e2f06038c4a926009edd14c7c3 Mon Sep 17 00:00:00 2001
From: Thomas Meyer <thomas@m3y3r.de>
Date: Thu, 21 Sep 2017 00:29:36 +0200
Subject: [PATCH 331/391] microblaze: Cocci spatch "vma_pages"

Use vma_pages function on vma object instead of explicit computation.
Found by coccinelle spatch "api/vma_pages.cocci"

Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
---
 arch/microblaze/kernel/dma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c
index e45ada8fb006..94700c5270a9 100644
--- a/arch/microblaze/kernel/dma.c
+++ b/arch/microblaze/kernel/dma.c
@@ -165,7 +165,7 @@ int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
 			     unsigned long attrs)
 {
 #ifdef CONFIG_MMU
-	unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	unsigned long user_count = vma_pages(vma);
 	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	unsigned long off = vma->vm_pgoff;
 	unsigned long pfn;

From 428dbf156cc5a8f9994d1f1a5c79373d15476f3c Mon Sep 17 00:00:00 2001
From: Babu Moger <babu.moger@oracle.com>
Date: Mon, 18 Sep 2017 10:53:29 -0600
Subject: [PATCH 332/391] arch: change default endian for microblaze

Fix the default for microblaze. Michal Simek mentioned default for
microblaze should be CPU_LITTLE_ENDIAN.

Fixes : commit 206d3642d8ee ("arch/microblaze: add choice for endianness
	and update Makefile")

Signed-off-by: Babu Moger <babu.moger@oracle.com>
Cc: Michal Simek <monstr@monstr.eu>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
---
 arch/microblaze/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 9d26abdf0dc1..4f798aa671dd 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -39,7 +39,7 @@ config MICROBLAZE
 # Endianness selection
 choice
 	prompt "Endianness selection"
-	default CPU_BIG_ENDIAN
+	default CPU_LITTLE_ENDIAN
 	help
 	  microblaze architectures can be configured for either little or
 	  big endian formats. Be sure to select the appropriate mode.

From 02a4843618fb35f847cf8c31cd3893873aa0edde Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 13 Sep 2017 09:17:57 -0400
Subject: [PATCH 333/391] brd: fix overflow in __brd_direct_access

The code in __brd_direct_access multiplies the pgoff variable by page size
and divides it by 512. It can cause overflow on 32-bit architectures. The
overflow happens if we create ramdisk larger than 4G and use it as a
sparse device.

This patch replaces multiplication and division with multiplication by the
number of sectors per page.

Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Fixes: 1647b9b959c7 ("brd: add dax_operations support")
Cc: stable@vger.kernel.org	# 4.12+
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/brd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index bbd0d186cfc0..2d7178f7754e 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -342,7 +342,7 @@ static long __brd_direct_access(struct brd_device *brd, pgoff_t pgoff,
 
 	if (!brd)
 		return -ENODEV;
-	page = brd_insert_page(brd, PFN_PHYS(pgoff) / 512);
+	page = brd_insert_page(brd, (sector_t)pgoff << PAGE_SECTORS_SHIFT);
 	if (!page)
 		return -ENOSPC;
 	*kaddr = page_address(page);

From f507b54dccfd8000c517d740bc45f20c74532d18 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 7 Sep 2017 13:54:35 +0200
Subject: [PATCH 334/391] bsg-lib: don't free job in bsg_prepare_job

The job structure is allocated as part of the request, so we should not
free it in the error path of bsg_prepare_job.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: stable@vger.kernel.org
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bsg-lib.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index c82408c7cc3c..dbddff8174e5 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -154,7 +154,6 @@ static int bsg_prepare_job(struct device *dev, struct request *req)
 failjob_rls_rqst_payload:
 	kfree(job->request_payload.sg_list);
 failjob_rls_job:
-	kfree(job);
 	return -ENOMEM;
 }
 

From 1dae69bedeeca0b57e441eae491fbd38049c0b47 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Fri, 5 May 2017 22:25:18 -0400
Subject: [PATCH 335/391] nbd: ignore non-nbd ioctl's

In testing we noticed that nbd would spew if you ran a fio job against
the raw device itself.  This is because fio calls a block device
specific ioctl, however the block layer will first pass this back to the
driver ioctl handler in case the driver wants to do something special.
Since the device was setup using netlink this caused us to spew every
time fio called this ioctl.  Since we don't have special handling, just
error out for any non-nbd specific ioctl's that come in.  This fixes the
spew.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/nbd.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 2aa87cbdede0..3684e21d543f 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1194,6 +1194,12 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
+	/* The block layer will pass back some non-nbd ioctls in case we have
+	 * special handling for them, but we don't so just return an error.
+	 */
+	if (_IOC_TYPE(cmd) != 0xab)
+		return -EINVAL;
+
 	mutex_lock(&nbd->config_lock);
 
 	/* Don't allow ioctl operations on a nbd device that was created with

From 5acb3cc2c2e9d3020a4fee43763c6463767f1572 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Wed, 20 Sep 2017 13:12:20 -0600
Subject: [PATCH 336/391] blktrace: Fix potential deadlock between delete &
 sysfs ops

The lockdep code had reported the following unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(s_active#228);
                               lock(&bdev->bd_mutex/1);
                               lock(s_active#228);
  lock(&bdev->bd_mutex);

 *** DEADLOCK ***

The deadlock may happen when one task (CPU1) is trying to delete a
partition in a block device and another task (CPU0) is accessing
tracing sysfs file (e.g. /sys/block/dm-1/trace/act_mask) in that
partition.

The s_active isn't an actual lock. It is a reference count (kn->count)
on the sysfs (kernfs) file. Removal of a sysfs file, however, require
a wait until all the references are gone. The reference count is
treated like a rwsem using lockdep instrumentation code.

The fact that a thread is in the sysfs callback method or in the
ioctl call means there is a reference to the opended sysfs or device
file. That should prevent the underlying block structure from being
removed.

Instead of using bd_mutex in the block_device structure, a new
blk_trace_mutex is now added to the request_queue structure to protect
access to the blk_trace structure.

Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>

Fix typo in patch subject line, and prune a comment detailing how
the code used to work.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c        |  3 +++
 include/linux/blkdev.h  |  1 +
 kernel/trace/blktrace.c | 18 ++++++++++++------
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index aebe676225e6..048be4aa6024 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -854,6 +854,9 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 
 	kobject_init(&q->kobj, &blk_queue_ktype);
 
+#ifdef CONFIG_BLK_DEV_IO_TRACE
+	mutex_init(&q->blk_trace_mutex);
+#endif
 	mutex_init(&q->sysfs_lock);
 	spin_lock_init(&q->__queue_lock);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 460294bb0fa5..02fa42d24b52 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -551,6 +551,7 @@ struct request_queue {
 	int			node;
 #ifdef CONFIG_BLK_DEV_IO_TRACE
 	struct blk_trace	*blk_trace;
+	struct mutex		blk_trace_mutex;
 #endif
 	/*
 	 * for flush operations
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 2a685b45b73b..45a3928544ce 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -648,6 +648,12 @@ int blk_trace_startstop(struct request_queue *q, int start)
 }
 EXPORT_SYMBOL_GPL(blk_trace_startstop);
 
+/*
+ * When reading or writing the blktrace sysfs files, the references to the
+ * opened sysfs or device files should prevent the underlying block device
+ * from being removed. So no further delete protection is really needed.
+ */
+
 /**
  * blk_trace_ioctl: - handle the ioctls associated with tracing
  * @bdev:	the block device
@@ -665,7 +671,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
 	if (!q)
 		return -ENXIO;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&q->blk_trace_mutex);
 
 	switch (cmd) {
 	case BLKTRACESETUP:
@@ -691,7 +697,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
 		break;
 	}
 
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&q->blk_trace_mutex);
 	return ret;
 }
 
@@ -1727,7 +1733,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
 	if (q == NULL)
 		goto out_bdput;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&q->blk_trace_mutex);
 
 	if (attr == &dev_attr_enable) {
 		ret = sprintf(buf, "%u\n", !!q->blk_trace);
@@ -1746,7 +1752,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
 		ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
 
 out_unlock_bdev:
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&q->blk_trace_mutex);
 out_bdput:
 	bdput(bdev);
 out:
@@ -1788,7 +1794,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
 	if (q == NULL)
 		goto out_bdput;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&q->blk_trace_mutex);
 
 	if (attr == &dev_attr_enable) {
 		if (value)
@@ -1814,7 +1820,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
 	}
 
 out_unlock_bdev:
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&q->blk_trace_mutex);
 out_bdput:
 	bdput(bdev);
 out:

From e5313c141b49c1b1af43d1ca81398185d66ad1a6 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Wed, 20 Sep 2017 14:24:34 -0700
Subject: [PATCH 337/391] loop: remove union of use_aio and ref in struct
 loop_cmd

When the request is completed, lo_complete_rq() checks cmd->use_aio.
However, if this is in fact an aio request, cmd->use_aio will have
already been reused as cmd->ref by lo_rw_aio*. Fix it by not using a
union. On x86_64, there's a hole after the union anyways, so this
doesn't make struct loop_cmd any bigger.

Fixes: 92d773324b7e ("block/loop: fix use after free")
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index f68c1d50802f..1f3956702993 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -67,10 +67,8 @@ struct loop_device {
 struct loop_cmd {
 	struct kthread_work work;
 	struct request *rq;
-	union {
-		bool use_aio; /* use AIO interface to handle I/O */
-		atomic_t ref; /* only for aio */
-	};
+	bool use_aio; /* use AIO interface to handle I/O */
+	atomic_t ref; /* only for aio */
 	long ret;
 	struct kiocb iocb;
 	struct bio_vec *bvec;

From 56b7103a06083b8ce1160f8289460ba2f584e182 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 7 Sep 2017 16:27:26 -0700
Subject: [PATCH 338/391] nvme-fc: remove use of FC-specific error codes

The FC-NVME transport used the FC-specific error codes in cases where
it had to fabricate an error to go back up stack. Instead of using the
FC-specific values, now use a generic value (NVME_SC_INTERNAL).

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/fc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index d2e882c0f496..9100779b58c9 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1376,7 +1376,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 	if (atomic_read(&op->state) == FCPOP_STATE_ABORTED)
 		status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1);
 	else if (freq->status)
-		status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
+		status = cpu_to_le16(NVME_SC_INTERNAL << 1);
 
 	/*
 	 * For the linux implementation, if we have an unsuccesful
@@ -1404,7 +1404,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 		 */
 		if (freq->transferred_length !=
 			be32_to_cpu(op->cmd_iu.data_len)) {
-			status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
+			status = cpu_to_le16(NVME_SC_INTERNAL << 1);
 			goto done;
 		}
 		result.u64 = 0;
@@ -1421,7 +1421,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 					freq->transferred_length ||
 			     op->rsp_iu.status_code ||
 			     sqe->common.command_id != cqe->command_id)) {
-			status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
+			status = cpu_to_le16(NVME_SC_INTERNAL << 1);
 			goto done;
 		}
 		result = cqe->result;
@@ -1429,7 +1429,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
 		break;
 
 	default:
-		status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
+		status = cpu_to_le16(NVME_SC_INTERNAL << 1);
 		goto done;
 	}
 

From 29b3d26ecc8046838de88205b7c4b182ac27ff65 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 7 Sep 2017 16:27:27 -0700
Subject: [PATCH 339/391] nvmet-fc: remove use of FC-specific error codes

The FC-NVME target transport used the FC-specific error codes in
return codes when the transport or lldd failed. Instead of using the
FC-specific values, now use a generic value (NVME_SC_INTERNAL).

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/fc.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 421e43bf1dd7..088f07250d76 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -1910,8 +1910,7 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport,
 			spin_lock_irqsave(&fod->flock, flags);
 			fod->writedataactive = false;
 			spin_unlock_irqrestore(&fod->flock, flags);
-			nvmet_req_complete(&fod->req,
-					NVME_SC_FC_TRANSPORT_ERROR);
+			nvmet_req_complete(&fod->req, NVME_SC_INTERNAL);
 		} else /* NVMET_FCOP_READDATA or NVMET_FCOP_READDATA_RSP */ {
 			fcpreq->fcp_error = ret;
 			fcpreq->transferred_length = 0;
@@ -1929,8 +1928,7 @@ __nvmet_fc_fod_op_abort(struct nvmet_fc_fcp_iod *fod, bool abort)
 	/* if in the middle of an io and we need to tear down */
 	if (abort) {
 		if (fcpreq->op == NVMET_FCOP_WRITEDATA) {
-			nvmet_req_complete(&fod->req,
-					NVME_SC_FC_TRANSPORT_ERROR);
+			nvmet_req_complete(&fod->req, NVME_SC_INTERNAL);
 			return true;
 		}
 
@@ -1968,8 +1966,7 @@ nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod)
 			fod->abort = true;
 			spin_unlock(&fod->flock);
 
-			nvmet_req_complete(&fod->req,
-					NVME_SC_FC_TRANSPORT_ERROR);
+			nvmet_req_complete(&fod->req, NVME_SC_INTERNAL);
 			return;
 		}
 

From fc9608e8b4dc3c2545fa0bc5d3eef158ca56ccf8 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 7 Sep 2017 16:27:28 -0700
Subject: [PATCH 340/391] nvmet-fcloop: remove use of FC-specific error codes

The FC-NVME transport loopback test module used the FC-specific error
codes in cases where it emulated a transport abort case. Instead of
using the FC-specific values, now use a generic value (NVME_SC_INTERNAL).

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/fcloop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 1cb9847ec261..1fd1afbb8b2a 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -576,7 +576,7 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
 	tfcp_req->aborted = true;
 	spin_unlock(&tfcp_req->reqlock);
 
-	tfcp_req->status = NVME_SC_FC_TRANSPORT_ABORTED;
+	tfcp_req->status = NVME_SC_INTERNAL;
 
 	/*
 	 * nothing more to do. If io wasn't active, the transport should

From 8e009ce84683fa124b23ff5cb7fd87c48b331b88 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 7 Sep 2017 16:27:29 -0700
Subject: [PATCH 341/391] lpfc: remove use of FC-specific error codes

The lpfc driver uses the FC-specific error when it needed to return an
error to the FC-NVME transport. Convert to use a generic value instead.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/scsi/lpfc/lpfc_nvme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 79ba3ce063a4..23bdb1ca106e 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -884,7 +884,7 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
 					 wcqe->total_data_placed);
 			nCmd->transferred_length = 0;
 			nCmd->rcv_rsplen = 0;
-			nCmd->status = NVME_SC_FC_TRANSPORT_ERROR;
+			nCmd->status = NVME_SC_INTERNAL;
 		}
 	}
 

From 39a550d2d9eaee8b618084e6011441eac6a2a3b7 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 14 Sep 2017 11:30:15 -0700
Subject: [PATCH 342/391] qla2xxx: remove use of FC-specific error codes

The qla2xxx driver uses the FC-specific error when it needed to return an
error to the FC-NVME transport.  Convert to use a generic value instead.

Signed-off-by: James Smart <james.smart@broadcom.com>
Acked-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/scsi/qla2xxx/qla_nvme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c
index 1f59e7a74c7b..6b33a1f24f56 100644
--- a/drivers/scsi/qla2xxx/qla_nvme.c
+++ b/drivers/scsi/qla2xxx/qla_nvme.c
@@ -180,7 +180,7 @@ static void qla_nvme_sp_done(void *ptr, int res)
 		goto rel;
 
 	if (unlikely(res == QLA_FUNCTION_FAILED))
-		fd->status = NVME_SC_FC_TRANSPORT_ERROR;
+		fd->status = NVME_SC_INTERNAL;
 	else
 		fd->status = 0;
 

From c98cb3bd882119e7e1a7c8df2f1eacfcc701450b Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 7 Sep 2017 16:27:25 -0700
Subject: [PATCH 343/391] nvme.h: remove FC transport-specific error values

The NVM express group recinded the reserved range for the transport.
Remove the FC-centric values that had been defined.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/nvme.h | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 87723c86f136..2440be32be1d 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -1127,19 +1127,6 @@ enum {
 	NVME_SC_UNWRITTEN_BLOCK		= 0x287,
 
 	NVME_SC_DNR			= 0x4000,
-
-
-	/*
-	 * FC Transport-specific error status values for NVME commands
-	 *
-	 * Transport-specific status code values must be in the range 0xB0..0xBF
-	 */
-
-	/* Generic FC failure - catchall */
-	NVME_SC_FC_TRANSPORT_ERROR	= 0x00B0,
-
-	/* I/O failure due to FC ABTS'd */
-	NVME_SC_FC_TRANSPORT_ABORTED	= 0x00B1,
 };
 
 struct nvme_completion {

From d85cf207499e6740ab9c490ff4f360af5c432d23 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 7 Sep 2017 13:20:23 -0700
Subject: [PATCH 344/391] nvme: add transport SGL definitions

Add transport SGL defintions from NVMe TP 4008, required for
the final NVMe-FC standard.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/nvme.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 2440be32be1d..9310ce77d8e1 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -471,12 +471,14 @@ enum nvme_opcode {
  *
  * @NVME_SGL_FMT_ADDRESS:     absolute address of the data block
  * @NVME_SGL_FMT_OFFSET:      relative offset of the in-capsule data block
+ * @NVME_SGL_FMT_TRANSPORT_A: transport defined format, value 0xA
  * @NVME_SGL_FMT_INVALIDATE:  RDMA transport specific remote invalidation
  *                            request subtype
  */
 enum {
 	NVME_SGL_FMT_ADDRESS		= 0x00,
 	NVME_SGL_FMT_OFFSET		= 0x01,
+	NVME_SGL_FMT_TRANSPORT_A	= 0x0A,
 	NVME_SGL_FMT_INVALIDATE		= 0x0f,
 };
 
@@ -490,12 +492,16 @@ enum {
  *
  * For struct nvme_keyed_sgl_desc:
  *   @NVME_KEY_SGL_FMT_DATA_DESC:	keyed data block descriptor
+ *
+ * Transport-specific SGL types:
+ *   @NVME_TRANSPORT_SGL_DATA_DESC:	Transport SGL data dlock descriptor
  */
 enum {
 	NVME_SGL_FMT_DATA_DESC		= 0x00,
 	NVME_SGL_FMT_SEG_DESC		= 0x02,
 	NVME_SGL_FMT_LAST_SEG_DESC	= 0x03,
 	NVME_KEY_SGL_FMT_DATA_DESC	= 0x04,
+	NVME_TRANSPORT_SGL_DATA_DESC	= 0x05,
 };
 
 struct nvme_sgl_desc {

From d9d34c0b2327e85da0ad1476575264fe957fc6ef Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 7 Sep 2017 13:20:24 -0700
Subject: [PATCH 345/391] nvme-fc: use transport-specific sgl format

Sync with NVM Express spec change and FC-NVME 1.18.

FC transport sets SGL type to Transport SGL Data Block Descriptor and
subtype to transport-specific value 0x0A.

Removed the warn-on's on the PRP fields. They are unneeded. They were
to check for values from the upper layer that weren't set right, and
for the most part were fine. But, with Async events, which reuse the
same structure and 2nd time issued the SGL overlay converted them to
the Transport SGL values - the warn-on's were errantly firing.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/fc.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 9100779b58c9..af075e998944 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1989,16 +1989,17 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 	 * as well as those by FC-NVME spec.
 	 */
 	WARN_ON_ONCE(sqe->common.metadata);
-	WARN_ON_ONCE(sqe->common.dptr.prp1);
-	WARN_ON_ONCE(sqe->common.dptr.prp2);
 	sqe->common.flags |= NVME_CMD_SGL_METABUF;
 
 	/*
-	 * format SQE DPTR field per FC-NVME rules
-	 *    type=data block descr; subtype=offset;
-	 *    offset is currently 0.
+	 * format SQE DPTR field per FC-NVME rules:
+	 *    type=0x5     Transport SGL Data Block Descriptor
+	 *    subtype=0xA  Transport-specific value
+	 *    address=0
+	 *    length=length of the data series
 	 */
-	sqe->rw.dptr.sgl.type = NVME_SGL_FMT_OFFSET;
+	sqe->rw.dptr.sgl.type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) |
+					NVME_SGL_FMT_TRANSPORT_A;
 	sqe->rw.dptr.sgl.length = cpu_to_le32(data_len);
 	sqe->rw.dptr.sgl.addr = 0;
 

From deb61742e060d4447712598bc11bb50f8b2e51dd Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 11 Sep 2017 16:16:53 -0700
Subject: [PATCH 346/391] nvmet-fc: fix failing max io queue connections

fc transport is treating NVMET_NR_QUEUES as maximum queue count, e.g.
admin queue plus NVMET_NR_QUEUES-1 io queues.  But NVMET_NR_QUEUES is
the number of io queues, so maximum queue count is really
NVMET_NR_QUEUES+1.

Fix the handling in the target fc transport

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/fc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 088f07250d76..c48c83d97e30 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -148,7 +148,7 @@ struct nvmet_fc_tgt_assoc {
 	u32				a_id;
 	struct nvmet_fc_tgtport		*tgtport;
 	struct list_head		a_list;
-	struct nvmet_fc_tgt_queue	*queues[NVMET_NR_QUEUES];
+	struct nvmet_fc_tgt_queue	*queues[NVMET_NR_QUEUES + 1];
 	struct kref			ref;
 };
 
@@ -608,7 +608,7 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
 	unsigned long flags;
 	int ret;
 
-	if (qid >= NVMET_NR_QUEUES)
+	if (qid > NVMET_NR_QUEUES)
 		return NULL;
 
 	queue = kzalloc((sizeof(*queue) +
@@ -888,7 +888,7 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc)
 	int i;
 
 	spin_lock_irqsave(&tgtport->lock, flags);
-	for (i = NVMET_NR_QUEUES - 1; i >= 0; i--) {
+	for (i = NVMET_NR_QUEUES; i >= 0; i--) {
 		queue = assoc->queues[i];
 		if (queue) {
 			if (!nvmet_fc_tgt_q_get(queue))

From 161b8be2bd6abad250d4b3f674bdd5480f15beeb Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Thu, 14 Sep 2017 13:54:39 -0400
Subject: [PATCH 347/391] nvme-pci: initialize queue memory before interrupts

A spurious interrupt before the nvme driver has initialized the completion
queue may inadvertently cause the driver to believe it has a completion
to process. This may result in a NULL dereference since the nvmeq's tags
are not set at this point.

The patch initializes the host's CQ memory so that a spurious interrupt
isn't mistaken for a real completion.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/pci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 4a2121335f48..004018c5dccc 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1313,11 +1313,11 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
 	if (result < 0)
 		goto release_cq;
 
+	nvme_init_queue(nvmeq, qid);
 	result = queue_request_irq(nvmeq);
 	if (result < 0)
 		goto release_sq;
 
-	nvme_init_queue(nvmeq, qid);
 	return result;
 
  release_sq:
@@ -1464,6 +1464,7 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
 		return result;
 
 	nvmeq->cq_vector = 0;
+	nvme_init_queue(nvmeq, 0);
 	result = queue_request_irq(nvmeq);
 	if (result) {
 		nvmeq->cq_vector = -1;
@@ -2156,7 +2157,6 @@ static void nvme_reset_work(struct work_struct *work)
 	if (result)
 		goto out;
 
-	nvme_init_queue(dev->queues[0], 0);
 	result = nvme_alloc_admin_tags(dev);
 	if (result)
 		goto out;

From d08774738446e77734777adcf5d1045237b4475a Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Fri, 15 Sep 2017 13:05:38 -0400
Subject: [PATCH 348/391] nvme-pci: Print invalid SGL only once

The WARN_ONCE macro returns true if the condition is true, not if the
warn was raised, so we're printing the scatter list every time it's
invalid. This is excessive and makes debugging harder, so this patch
prints it just once.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/pci.c | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 004018c5dccc..cb73bc8cad3b 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -24,6 +24,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/once.h>
 #include <linux/pci.h>
 #include <linux/poison.h>
 #include <linux/t10-pi.h>
@@ -540,6 +541,20 @@ static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
 }
 #endif
 
+static void nvme_print_sgl(struct scatterlist *sgl, int nents)
+{
+	int i;
+	struct scatterlist *sg;
+
+	for_each_sg(sgl, sg, nents, i) {
+		dma_addr_t phys = sg_phys(sg);
+		pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d "
+			"dma_address:%pad dma_length:%d\n",
+			i, &phys, sg->offset, sg->length, &sg_dma_address(sg),
+			sg_dma_len(sg));
+	}
+}
+
 static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -622,19 +637,10 @@ static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
 	return BLK_STS_OK;
 
  bad_sgl:
-	if (WARN_ONCE(1, "Invalid SGL for payload:%d nents:%d\n",
-				blk_rq_payload_bytes(req), iod->nents)) {
-		for_each_sg(iod->sg, sg, iod->nents, i) {
-			dma_addr_t phys = sg_phys(sg);
-			pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d "
-			       "dma_address:%pad dma_length:%d\n", i, &phys,
-					sg->offset, sg->length,
-					&sg_dma_address(sg),
-					sg_dma_len(sg));
-		}
-	}
+	WARN(DO_ONCE(nvme_print_sgl, iod->sg, iod->nents),
+			"Invalid SGL for payload:%d nents:%d\n",
+			blk_rq_payload_bytes(req), iod->nents);
 	return BLK_STS_IOERR;
-
 }
 
 static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,

From cd48282cc736377d5abf7c04de8c6ba864ba3794 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 14 Sep 2017 11:03:09 -0700
Subject: [PATCH 349/391] nvme: stop aer posting if controller state not live

If an nvme async_event command completes, in most cases, a new
async event is posted. However, if the controller enters a
resetting or reconnecting state, there is nothing to block the
scheduled work element from posting the async event again. Nor are
there calls from the transport to stop async events when an
association dies.

In the case of FC, where the association is torn down, the aer must
be aborted on the FC link and completes through the normal job
completion path. Thus the terminated async event ends up being
rescheduled even though the controller isn't in a valid state for
the aer, and the reposting gets the transport into a partially torn
down data structure.

It's possible to hit the scenario on rdma, although much less likely
due to an aer completing right as the association is terminated and
as the association teardown reclaims the blk requests via
nvme_cancel_request() so its immediate, not a link-related action
like on FC.

Fix by putting controller state checks in both the async event
completion routine where it schedules the async event and in the
async event work routine before it calls into the transport. It's
effectively a "stop_async_events()" behavior.  The transport, when
it creates a new association with the subsystem will transition
the state back to live and is already restarting the async event
posting.

Signed-off-by: James Smart <james.smart@broadcom.com>
[hch: remove taking a lock over reading the controller state]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/core.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index acc816b67582..d470f031e27f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2590,7 +2590,7 @@ static void nvme_async_event_work(struct work_struct *work)
 		container_of(work, struct nvme_ctrl, async_event_work);
 
 	spin_lock_irq(&ctrl->lock);
-	while (ctrl->event_limit > 0) {
+	while (ctrl->state == NVME_CTRL_LIVE && ctrl->event_limit > 0) {
 		int aer_idx = --ctrl->event_limit;
 
 		spin_unlock_irq(&ctrl->lock);
@@ -2677,7 +2677,8 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
 		/*FALLTHRU*/
 	case NVME_SC_ABORT_REQ:
 		++ctrl->event_limit;
-		queue_work(nvme_wq, &ctrl->async_event_work);
+		if (ctrl->state == NVME_CTRL_LIVE)
+			schedule_work(&ctrl->async_event_work);
 		break;
 	default:
 		break;

From 0951338d9677f546e230685d68631dfd3f81cca5 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 7 Sep 2017 13:18:04 -0700
Subject: [PATCH 350/391] nvme: allow timed-out ios to retry

Currently the nvme_req_needs_retry() applies several checks to see if
a retry is allowed. On of those is whether the current time has exceeded
the start time of the io plus the timeout length. This check, if an io
times out, means there is never a retry allowed for the io. Which means
applications see the io failure.

Remove this check and allow the io to timeout, like it does on other
protocols, and retries to be made.

On the FC transport, a frame can be lost for an individual io, and there
may be no other errors that escalate for the connection/association.
The io will timeout, which causes the transport to escalate into creating
a new association, but the io that timed out, due to this retry logic, has
already failed back to the application and things are hosed.

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/core.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index d470f031e27f..5589f67d2cd8 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -134,8 +134,6 @@ static inline bool nvme_req_needs_retry(struct request *req)
 		return false;
 	if (nvme_req(req)->status & NVME_SC_DNR)
 		return false;
-	if (jiffies - req->start_time >= req->timeout)
-		return false;
 	if (nvme_req(req)->retries >= nvme_max_retries)
 		return false;
 	return true;

From 8edd11c9ad3a6205eea6de9d02eaf64c681a0658 Mon Sep 17 00:00:00 2001
From: "Guilherme G. Piccoli" <gpiccoli@linux.vnet.ibm.com>
Date: Thu, 14 Sep 2017 13:59:28 -0300
Subject: [PATCH 351/391] nvme-fabrics: Allow 0 as KATO value

Currently, driver code allows user to set 0 as KATO
(Keep Alive TimeOut), but this is not being respected.
This patch enforces the expected behavior.

Signed-off-by: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/fabrics.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 47307752dc65..555c976cc2ee 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -565,6 +565,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 	opts->queue_size = NVMF_DEF_QUEUE_SIZE;
 	opts->nr_io_queues = num_online_cpus();
 	opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
+	opts->kato = NVME_DEFAULT_KATO;
 
 	options = o = kstrdup(buf, GFP_KERNEL);
 	if (!options)
@@ -655,21 +656,22 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 				goto out;
 			}
 
-			if (opts->discovery_nqn) {
-				pr_err("Discovery controllers cannot accept keep_alive_tmo != 0\n");
-				ret = -EINVAL;
-				goto out;
-			}
-
 			if (token < 0) {
 				pr_err("Invalid keep_alive_tmo %d\n", token);
 				ret = -EINVAL;
 				goto out;
-			} else if (token == 0) {
+			} else if (token == 0 && !opts->discovery_nqn) {
 				/* Allowed for debug */
 				pr_warn("keep_alive_tmo 0 won't execute keep alives!!!\n");
 			}
 			opts->kato = token;
+
+			if (opts->discovery_nqn && opts->kato) {
+				pr_err("Discovery controllers cannot accept KATO != 0\n");
+				ret = -EINVAL;
+				goto out;
+			}
+
 			break;
 		case NVMF_OPT_CTRL_LOSS_TMO:
 			if (match_int(args, &token)) {
@@ -762,8 +764,6 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 	uuid_copy(&opts->host->id, &hostid);
 
 out:
-	if (!opts->discovery_nqn && !opts->kato)
-		opts->kato = NVME_DEFAULT_KATO;
 	kfree(options);
 	return ret;
 }

From bb1cc74790eb51f52d23c6e5fd9a3bb16030c3d8 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 18 Sep 2017 09:08:29 -0700
Subject: [PATCH 352/391] nvmet: implement valid sqhd values in completions

To support sqhd, for initiators that are following the spec and
paying attention to sqhd vs their sqtail values:

- add sqhd to struct nvmet_sq
- initialize sqhd to 0 in nvmet_sq_setup
- rather than propagate the 0's-based qsize value from the connect message
  which requires a +1 in every sqhd update, and as nothing else references
  it, convert to 1's-based value in nvmt_sq/cq_setup() calls.
- validate connect message sqsize being non-zero per spec.
- updated assign sqhd for every completion that goes back.

Also remove handling the NULL sq case in __nvmet_req_complete, as it can't
happen with the current code.

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/core.c        | 8 ++++----
 drivers/nvme/target/fabrics-cmd.c | 9 +++++++--
 drivers/nvme/target/nvmet.h       | 1 +
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 7c23eaf8e563..c2a768a94235 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -390,10 +390,9 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
 	if (status)
 		nvmet_set_status(req, status);
 
-	/* XXX: need to fill in something useful for sq_head */
-	req->rsp->sq_head = 0;
-	if (likely(req->sq)) /* may happen during early failure */
-		req->rsp->sq_id = cpu_to_le16(req->sq->qid);
+	req->sq->sqhd = (req->sq->sqhd + 1) % req->sq->size;
+	req->rsp->sq_head = cpu_to_le16(req->sq->sqhd);
+	req->rsp->sq_id = cpu_to_le16(req->sq->qid);
 	req->rsp->command_id = req->cmd->common.command_id;
 
 	if (req->ns)
@@ -420,6 +419,7 @@ void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
 		u16 qid, u16 size)
 {
+	sq->sqhd = 0;
 	sq->qid = qid;
 	sq->size = size;
 
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index 859a66725291..db3bf6b8bf9e 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -109,9 +109,14 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
 		pr_warn("queue already connected!\n");
 		return NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
 	}
+	if (!sqsize) {
+		pr_warn("queue size zero!\n");
+		return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
+	}
 
-	nvmet_cq_setup(ctrl, req->cq, qid, sqsize);
-	nvmet_sq_setup(ctrl, req->sq, qid, sqsize);
+	/* note: convert queue size from 0's-based value to 1's-based value */
+	nvmet_cq_setup(ctrl, req->cq, qid, sqsize + 1);
+	nvmet_sq_setup(ctrl, req->sq, qid, sqsize + 1);
 	return 0;
 }
 
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 7d261ab894f4..7b8e20adf760 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -74,6 +74,7 @@ struct nvmet_sq {
 	struct percpu_ref	ref;
 	u16			qid;
 	u16			size;
+	u16			sqhd;
 	struct completion	free_done;
 	struct completion	confirm_done;
 };

From 332391a9935da939319e473b4680e173df75afcf Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Thu, 21 Sep 2017 08:16:29 -0600
Subject: [PATCH 353/391] fs: Fix page cache inconsistency when mixing buffered
 and AIO DIO

Currently when mixing buffered reads and asynchronous direct writes it
is possible to end up with the situation where we have stale data in the
page cache while the new data is already written to disk. This is
permanent until the affected pages are flushed away. Despite the fact
that mixing buffered and direct IO is ill-advised it does pose a thread
for a data integrity, is unexpected and should be fixed.

Fix this by deferring completion of asynchronous direct writes to a
process context in the case that there are mapped pages to be found in
the inode. Later before the completion in dio_complete() invalidate
the pages in question. This ensures that after the completion the pages
in the written area are either unmapped, or populated with up-to-date
data. Also do the same for the iomap case which uses
iomap_dio_complete() instead.

This has a side effect of deferring the completion to a process context
for every AIO DIO that happens on inode that has pages mapped. However
since the consensus is that this is ill-advised practice the performance
implication should not be a problem.

This was based on proposal from Jeff Moyer, thanks!

Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/direct-io.c | 49 +++++++++++++++++++++++++++++++++++++++++++------
 fs/iomap.c     | 29 ++++++++++++++++-------------
 mm/filemap.c   | 10 ++++++++--
 3 files changed, 67 insertions(+), 21 deletions(-)

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 5fa2211e49ae..62cf812ed0e5 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -229,6 +229,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
 {
 	loff_t offset = dio->iocb->ki_pos;
 	ssize_t transferred = 0;
+	int err;
 
 	/*
 	 * AIO submission can race with bio completion to get here while
@@ -258,8 +259,22 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
 	if (ret == 0)
 		ret = transferred;
 
+	/*
+	 * Try again to invalidate clean pages which might have been cached by
+	 * non-direct readahead, or faulted in by get_user_pages() if the source
+	 * of the write was an mmap'ed region of the file we're writing.  Either
+	 * one is a pretty crazy thing to do, so we don't support it 100%.  If
+	 * this invalidation fails, tough, the write still worked...
+	 */
+	if (ret > 0 && dio->op == REQ_OP_WRITE &&
+	    dio->inode->i_mapping->nrpages) {
+		err = invalidate_inode_pages2_range(dio->inode->i_mapping,
+					offset >> PAGE_SHIFT,
+					(offset + ret - 1) >> PAGE_SHIFT);
+		WARN_ON_ONCE(err);
+	}
+
 	if (dio->end_io) {
-		int err;
 
 		// XXX: ki_pos??
 		err = dio->end_io(dio->iocb, offset, ret, dio->private);
@@ -304,6 +319,7 @@ static void dio_bio_end_aio(struct bio *bio)
 	struct dio *dio = bio->bi_private;
 	unsigned long remaining;
 	unsigned long flags;
+	bool defer_completion = false;
 
 	/* cleanup the bio */
 	dio_bio_complete(dio, bio);
@@ -315,7 +331,19 @@ static void dio_bio_end_aio(struct bio *bio)
 	spin_unlock_irqrestore(&dio->bio_lock, flags);
 
 	if (remaining == 0) {
-		if (dio->result && dio->defer_completion) {
+		/*
+		 * Defer completion when defer_completion is set or
+		 * when the inode has pages mapped and this is AIO write.
+		 * We need to invalidate those pages because there is a
+		 * chance they contain stale data in the case buffered IO
+		 * went in between AIO submission and completion into the
+		 * same region.
+		 */
+		if (dio->result)
+			defer_completion = dio->defer_completion ||
+					   (dio->op == REQ_OP_WRITE &&
+					    dio->inode->i_mapping->nrpages);
+		if (defer_completion) {
 			INIT_WORK(&dio->complete_work, dio_aio_complete_work);
 			queue_work(dio->inode->i_sb->s_dio_done_wq,
 				   &dio->complete_work);
@@ -1210,10 +1238,19 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 	 * For AIO O_(D)SYNC writes we need to defer completions to a workqueue
 	 * so that we can call ->fsync.
 	 */
-	if (dio->is_async && iov_iter_rw(iter) == WRITE &&
-	    ((iocb->ki_filp->f_flags & O_DSYNC) ||
-	     IS_SYNC(iocb->ki_filp->f_mapping->host))) {
-		retval = dio_set_defer_completion(dio);
+	if (dio->is_async && iov_iter_rw(iter) == WRITE) {
+		retval = 0;
+		if ((iocb->ki_filp->f_flags & O_DSYNC) ||
+		    IS_SYNC(iocb->ki_filp->f_mapping->host))
+			retval = dio_set_defer_completion(dio);
+		else if (!dio->inode->i_sb->s_dio_done_wq) {
+			/*
+			 * In case of AIO write racing with buffered read we
+			 * need to defer completion. We can't decide this now,
+			 * however the workqueue needs to be initialized here.
+			 */
+			retval = sb_init_dio_done_wq(dio->inode->i_sb);
+		}
 		if (retval) {
 			/*
 			 * We grab i_mutex only for reads so we don't have
diff --git a/fs/iomap.c b/fs/iomap.c
index 269b24a01f32..8194d30bdca0 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -713,8 +713,24 @@ struct iomap_dio {
 static ssize_t iomap_dio_complete(struct iomap_dio *dio)
 {
 	struct kiocb *iocb = dio->iocb;
+	struct inode *inode = file_inode(iocb->ki_filp);
 	ssize_t ret;
 
+	/*
+	 * Try again to invalidate clean pages which might have been cached by
+	 * non-direct readahead, or faulted in by get_user_pages() if the source
+	 * of the write was an mmap'ed region of the file we're writing.  Either
+	 * one is a pretty crazy thing to do, so we don't support it 100%.  If
+	 * this invalidation fails, tough, the write still worked...
+	 */
+	if (!dio->error &&
+	    (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
+		ret = invalidate_inode_pages2_range(inode->i_mapping,
+				iocb->ki_pos >> PAGE_SHIFT,
+				(iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT);
+		WARN_ON_ONCE(ret);
+	}
+
 	if (dio->end_io) {
 		ret = dio->end_io(iocb,
 				dio->error ? dio->error : dio->size,
@@ -1042,19 +1058,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 
 	ret = iomap_dio_complete(dio);
 
-	/*
-	 * Try again to invalidate clean pages which might have been cached by
-	 * non-direct readahead, or faulted in by get_user_pages() if the source
-	 * of the write was an mmap'ed region of the file we're writing.  Either
-	 * one is a pretty crazy thing to do, so we don't support it 100%.  If
-	 * this invalidation fails, tough, the write still worked...
-	 */
-	if (iov_iter_rw(iter) == WRITE) {
-		int err = invalidate_inode_pages2_range(mapping,
-				start >> PAGE_SHIFT, end >> PAGE_SHIFT);
-		WARN_ON_ONCE(err);
-	}
-
 	return ret;
 
 out_free_dio:
diff --git a/mm/filemap.c b/mm/filemap.c
index 870971e20967..db250d0e0565 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2926,9 +2926,15 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
 	 * we're writing.  Either one is a pretty crazy thing to do,
 	 * so we don't support it 100%.  If this invalidation
 	 * fails, tough, the write still worked...
+	 *
+	 * Most of the time we do not need this since dio_complete() will do
+	 * the invalidation for us. However there are some file systems that
+	 * do not end up with dio_complete() being called, so let's not break
+	 * them by removing it completely
 	 */
-	invalidate_inode_pages2_range(mapping,
-				pos >> PAGE_SHIFT, end);
+	if (mapping->nrpages)
+		invalidate_inode_pages2_range(mapping,
+					pos >> PAGE_SHIFT, end);
 
 	if (written > 0) {
 		pos += written;

From f5c156c4c29a3d87176dd6e5c099388e187ec29b Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Thu, 21 Sep 2017 12:17:16 -0700
Subject: [PATCH 354/391] block: fix a crash caused by wrong API

part_stat_show takes a part device not a disk, so we should use
part_to_disk.

Fixes: d62e26b3ffd2("block: pass in queue to inflight accounting")
Cc: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Omar Sandoval <osandov@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/partition-generic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/partition-generic.c b/block/partition-generic.c
index 86e8fe1adcdb..88c555db4e5d 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -112,7 +112,7 @@ ssize_t part_stat_show(struct device *dev,
 		       struct device_attribute *attr, char *buf)
 {
 	struct hd_struct *p = dev_to_part(dev);
-	struct request_queue *q = dev_to_disk(dev)->queue;
+	struct request_queue *q = part_to_disk(p)->queue;
 	unsigned int inflight[2];
 	int cpu;
 

From 9c3340ea7f5dabf88ca096a917cb0ab1f208ef2a Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Mon, 11 Sep 2017 19:11:07 -0600
Subject: [PATCH 355/391] selftests: futex: copy sub-dir test scripts for make
 O=dir run

For make O=dir run_tests to work, test scripts from sub-directories
need to be copied over to the object directory. Running tests from the
object directory is necessary to avoid making the source tree dirty.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
Reviewed-by: Darren Hart (VMware) <dvhart@infradead.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/futex/Makefile | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
index 7c647f619d63..9358cb210fd5 100644
--- a/tools/testing/selftests/futex/Makefile
+++ b/tools/testing/selftests/futex/Makefile
@@ -11,10 +11,13 @@ all:
 		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
 		mkdir $$BUILD_TARGET  -p;	\
 		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+		if [ -e $$DIR/$(TEST_PROGS) ]; then
+			rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/;
+		fi
 	done
 
 override define RUN_TESTS
-	$(OUTPUT)/run.sh
+	cd $(OUTPUT); ./run.sh
 endef
 
 override define INSTALL_RULE

From 8230b905a6780c60372bf5df7bf5f23041ca3196 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Tue, 12 Sep 2017 08:52:13 -0600
Subject: [PATCH 356/391] selftests: mqueue: Use full path to run tests from
 Makefile

Use full path including $(OUTPUT) to run tests from Makefile for
normal case when objects reside in the source tree as well as when
objects are relocated with make O=dir. In both cases $(OUTPUT) will
be set correctly by lib.mk.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/mqueue/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile
index 79a664aeb8d7..0f5e347b068d 100644
--- a/tools/testing/selftests/mqueue/Makefile
+++ b/tools/testing/selftests/mqueue/Makefile
@@ -5,8 +5,8 @@ TEST_GEN_PROGS := mq_open_tests mq_perf_tests
 include ../lib.mk
 
 override define RUN_TESTS
-	@./mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]"
-	@./mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]"
+	$(OUTPUT)/mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]"
+	$(OUTPUT)//mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]"
 endef
 
 override define EMIT_TESTS

From 1ede053632f6380d7e1dba4d781e5eb78621aa3a Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Mon, 18 Sep 2017 17:30:50 -0600
Subject: [PATCH 357/391] selftests: Makefile: fix for loops in targets to run
 silently

Fix for loops in targets to run silently to avoid cluttering the test
results.

Suppresses the following from targets: e.g run from breakpoints

for TARGET in breakpoints; do		\
	BUILD_TARGET=$BUILD/$TARGET;	\
	mkdir $BUILD_TARGET  -p;	\
	make OUTPUT=$BUILD_TARGET -C $TARGET;\
done;

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/Makefile | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index f4368db011ea..ff805643b5f7 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -66,32 +66,32 @@ endif
 
 export BUILD
 all:
-	for TARGET in $(TARGETS); do		\
+	@for TARGET in $(TARGETS); do		\
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		mkdir $$BUILD_TARGET  -p;	\
 		make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
 	done;
 
 run_tests: all
-	for TARGET in $(TARGETS); do \
+	@for TARGET in $(TARGETS); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		make OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
 	done;
 
 hotplug:
-	for TARGET in $(TARGETS_HOTPLUG); do \
+	@for TARGET in $(TARGETS_HOTPLUG); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
 	done;
 
 run_hotplug: hotplug
-	for TARGET in $(TARGETS_HOTPLUG); do \
+	@for TARGET in $(TARGETS_HOTPLUG); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		make OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\
 	done;
 
 clean_hotplug:
-	for TARGET in $(TARGETS_HOTPLUG); do \
+	@for TARGET in $(TARGETS_HOTPLUG); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
 	done;
@@ -107,7 +107,7 @@ install:
 ifdef INSTALL_PATH
 	@# Ask all targets to install their files
 	mkdir -p $(INSTALL_PATH)
-	for TARGET in $(TARGETS); do \
+	@for TARGET in $(TARGETS); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		make OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
 	done;
@@ -132,7 +132,7 @@ else
 endif
 
 clean:
-	for TARGET in $(TARGETS); do \
+	@for TARGET in $(TARGETS); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
 	done;

From 659dbfd8c47adeb03f401d1a1f17091bb63cc5a2 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Mon, 18 Sep 2017 18:46:23 -0600
Subject: [PATCH 358/391] selftests: futex: Makefile: fix for loops in targets
 to run silently

Fix for loops in targets to run silently to avoid cluttering the test
results.

Suppresses the following from targets:

for DIR in functional; do               \
        BUILD_TARGET=./tools/testing/selftests/futex/$DIR; \
        mkdir $BUILD_TARGET  -p;        \
        make OUTPUT=$BUILD_TARGET -C $DIR all;\
done

./tools/testing/selftests/futex/run.sh

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
Reviewed-by: Darren Hart (VMware) <dvhart@infradead.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/futex/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
index 9358cb210fd5..f0c0369ccb79 100644
--- a/tools/testing/selftests/futex/Makefile
+++ b/tools/testing/selftests/futex/Makefile
@@ -7,7 +7,7 @@ TEST_PROGS := run.sh
 include ../lib.mk
 
 all:
-	for DIR in $(SUBDIRS); do		\
+	@for DIR in $(SUBDIRS); do		\
 		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
 		mkdir $$BUILD_TARGET  -p;	\
 		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
@@ -17,7 +17,7 @@ all:
 	done
 
 override define RUN_TESTS
-	cd $(OUTPUT); ./run.sh
+	@cd $(OUTPUT); ./run.sh
 endef
 
 override define INSTALL_RULE
@@ -36,7 +36,7 @@ override define EMIT_TESTS
 endef
 
 override define CLEAN
-	for DIR in $(SUBDIRS); do		\
+	@for DIR in $(SUBDIRS); do		\
 		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
 		mkdir $$BUILD_TARGET  -p;	\
 		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\

From 10859f3855db4c6f10dc7974ff4b3a292f3de8e0 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 7 Sep 2017 16:32:46 -0700
Subject: [PATCH 359/391] selftests/seccomp: Support glibc 2.26 siginfo_t.h

The 2.26 release of glibc changed how siginfo_t is defined, and the earlier
work-around to using the kernel definition are no longer needed. The old
way needs to stay around for a while, though.

Reported-by: Seth Forshee <seth.forshee@canonical.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Will Drewry <wad@chromium.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: linux-kselftest@vger.kernel.org
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Tested-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 4d6f92a9df6b..19cd272c234d 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -6,10 +6,18 @@
  */
 
 #include <sys/types.h>
-#include <asm/siginfo.h>
-#define __have_siginfo_t 1
-#define __have_sigval_t 1
-#define __have_sigevent_t 1
+
+/*
+ * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
+ * we need to use the kernel's siginfo.h file and trick glibc
+ * into accepting it.
+ */
+#if !__GLIBC_PREREQ(2, 26)
+# include <asm/siginfo.h>
+# define __have_siginfo_t 1
+# define __have_sigval_t 1
+# define __have_sigevent_t 1
+#endif
 
 #include <errno.h>
 #include <linux/filter.h>
@@ -676,7 +684,7 @@ TEST_F_SIGNAL(TRAP, ign, SIGSYS)
 	syscall(__NR_getpid);
 }
 
-static struct siginfo TRAP_info;
+static siginfo_t TRAP_info;
 static volatile int TRAP_nr;
 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
 {

From 21aadfa2426d5d199ceb474d0159d079c7f17bfa Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@cn.fujitsu.com>
Date: Thu, 21 Sep 2017 17:13:27 +0800
Subject: [PATCH 360/391] selftests/memfd: correct run_tests.sh permission

to fix the following issue:
------------------
TAP version 13
selftests: run_tests.sh
========================================
selftests: Warning: file run_tests.sh is not executable, correct this.
not ok 1..1 selftests: run_tests.sh [FAIL]
------------------

Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/memfd/run_tests.sh | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 tools/testing/selftests/memfd/run_tests.sh

diff --git a/tools/testing/selftests/memfd/run_tests.sh b/tools/testing/selftests/memfd/run_tests.sh
old mode 100644
new mode 100755

From 01db7fbf5487505b887fbd6a03c51f2adc952196 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Thu, 21 Sep 2017 13:46:01 -0600
Subject: [PATCH 361/391] selftests: timers: set-timer-lat: fix hang when std
 out/err are redirected

do_timer_oneshot() uses select() as a timer with FD_SETSIZE and readfs
is cleared with FD_ZERO without FD_SET.

When stdout and stderr are redirected, the test hangs in select forever.
Fix the problem calling select() with readfds empty and nfds zero. This
is sufficient for using select() for timer.

With this fix "./set-timer-lat > /dev/null 2>&1" no longer hangs.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
Acked-by: Greg Hackmann <ghackmann@google.com>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/timers/set-timer-lat.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c
index 9c92b7bd5641..ea1af5dbc7b6 100644
--- a/tools/testing/selftests/timers/set-timer-lat.c
+++ b/tools/testing/selftests/timers/set-timer-lat.c
@@ -228,7 +228,6 @@ int do_timer_oneshot(int clock_id, int flags)
 	timer_t tm1;
 	const int interval = 0;
 	struct timeval timeout;
-	fd_set fds;
 	int err;
 
 	err = setup_timer(clock_id, flags, interval, &tm1);
@@ -237,9 +236,8 @@ int do_timer_oneshot(int clock_id, int flags)
 
 	memset(&timeout, 0, sizeof(timeout));
 	timeout.tv_sec = 5;
-	FD_ZERO(&fds);
 	do {
-		err = select(FD_SETSIZE, &fds, NULL, NULL, &timeout);
+		err = select(0, NULL, NULL, NULL, &timeout);
 	} while (err == -1 && errno == EINTR);
 
 	timer_delete(tm1);

From eefd95e1f3d47b90dc768e9ebc77d390c4f34809 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Thu, 21 Sep 2017 13:05:18 -0600
Subject: [PATCH 362/391] selftests: timers: set-timer-lat: Fix hang when
 testing unsupported alarms

When timer_create() fails on a bootime or realtime clock, setup_timer()
returns 0 as if timer has been set. Callers wait forever for the timer
to expire.

This hang is seen on a system that doesn't have support for:

CLOCK_REALTIME_ALARM   ABSTIME missing CAP_WAKE_ALARM? : [UNSUPPORTED]

Test hangs waiting for a timer that hasn't been set to expire. Fix
setup_timer() to return 1, add handling in callers to detect the
unsupported case and return 0 without waiting to not fail the test.

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/timers/set-timer-lat.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c
index ea1af5dbc7b6..50da45437daa 100644
--- a/tools/testing/selftests/timers/set-timer-lat.c
+++ b/tools/testing/selftests/timers/set-timer-lat.c
@@ -143,7 +143,8 @@ int setup_timer(int clock_id, int flags, int interval, timer_t *tm1)
 			printf("%-22s %s missing CAP_WAKE_ALARM?    : [UNSUPPORTED]\n",
 					clockstring(clock_id),
 					flags ? "ABSTIME":"RELTIME");
-			return 0;
+			/* Indicate timer isn't set, so caller doesn't wait */
+			return 1;
 		}
 		printf("%s - timer_create() failed\n", clockstring(clock_id));
 		return -1;
@@ -213,8 +214,9 @@ int do_timer(int clock_id, int flags)
 	int err;
 
 	err = setup_timer(clock_id, flags, interval, &tm1);
+	/* Unsupported case - return 0 to not fail the test */
 	if (err)
-		return err;
+		return err == 1 ? 0 : err;
 
 	while (alarmcount < 5)
 		sleep(1);
@@ -231,8 +233,9 @@ int do_timer_oneshot(int clock_id, int flags)
 	int err;
 
 	err = setup_timer(clock_id, flags, interval, &tm1);
+	/* Unsupported case - return 0 to not fail the test */
 	if (err)
-		return err;
+		return err == 1 ? 0 : err;
 
 	memset(&timeout, 0, sizeof(timeout));
 	timeout.tv_sec = 5;

From 10201655b085df8e000822e496e5d4016a167a36 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 19 Sep 2017 07:15:35 -0500
Subject: [PATCH 363/391] gfs2: Fix debugfs glocks dump

The switch to rhashtables (commit 88ffbf3e03) broke the debugfs glock
dump (/sys/kernel/debug/gfs2/<device>/glocks) for dumps bigger than a
single buffer: the right function for restarting an rhashtable iteration
from the beginning of the hash table is rhashtable_walk_enter;
rhashtable_walk_stop + rhashtable_walk_start will just resume from the
current position.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Cc: stable@vger.kernel.org # v4.3+
---
 fs/gfs2/glock.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 98e845b7841b..11066d8647d2 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1945,13 +1945,9 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct gfs2_glock_iter *gi = seq->private;
 	loff_t n = *pos;
-	int ret;
 
-	if (gi->last_pos <= *pos)
-		n = (*pos - gi->last_pos);
-
-	ret = rhashtable_walk_start(&gi->hti);
-	if (ret)
+	rhashtable_walk_enter(&gl_hash_table, &gi->hti);
+	if (rhashtable_walk_start(&gi->hti) != 0)
 		return NULL;
 
 	do {
@@ -1959,6 +1955,7 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
 	} while (gi->gl && n--);
 
 	gi->last_pos = *pos;
+
 	return gi->gl;
 }
 
@@ -1970,6 +1967,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
 	(*pos)++;
 	gi->last_pos = *pos;
 	gfs2_glock_iter_next(gi);
+
 	return gi->gl;
 }
 
@@ -1980,6 +1978,7 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
 
 	gi->gl = NULL;
 	rhashtable_walk_stop(&gi->hti);
+	rhashtable_walk_exit(&gi->hti);
 }
 
 static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
@@ -2042,12 +2041,10 @@ static int __gfs2_glocks_open(struct inode *inode, struct file *file,
 		struct gfs2_glock_iter *gi = seq->private;
 
 		gi->sdp = inode->i_private;
-		gi->last_pos = 0;
 		seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
 		if (seq->buf)
 			seq->size = GFS2_SEQ_GOODSIZE;
 		gi->gl = NULL;
-		rhashtable_walk_enter(&gl_hash_table, &gi->hti);
 	}
 	return ret;
 }
@@ -2063,7 +2060,6 @@ static int gfs2_glocks_release(struct inode *inode, struct file *file)
 	struct gfs2_glock_iter *gi = seq->private;
 
 	gi->gl = NULL;
-	rhashtable_walk_exit(&gi->hti);
 	return seq_release_private(inode, file);
 }
 

From 8cbd96a6285e8eb65232b5afd3e8d9418453a61c Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 21 Sep 2017 08:13:49 -0700
Subject: [PATCH 364/391] nvme: fix sqhd reference when admin queue connect
 fails

Fix bug in sqhd patch.

It wasn't the sq that was at risk. In the case where the admin queue
connect command fails, the sq->size field is not set. Therefore, this
becomes a divide by zero error.

Add a quick check to bypass under this failure condition.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index c2a768a94235..1b208beeef50 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -390,7 +390,8 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
 	if (status)
 		nvmet_set_status(req, status);
 
-	req->sq->sqhd = (req->sq->sqhd + 1) % req->sq->size;
+	if (req->sq->size)
+		req->sq->sqhd = (req->sq->sqhd + 1) % req->sq->size;
 	req->rsp->sq_head = cpu_to_le16(req->sq->sqhd);
 	req->rsp->sq_id = cpu_to_le16(req->sq->qid);
 	req->rsp->command_id = req->cmd->common.command_id;

From 1a40d97288c6ffea9b355139e88fa62f0e5439f7 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 21 Sep 2017 17:01:36 +0300
Subject: [PATCH 365/391] nvme-core: Use nvme_wq to queue async events and fw
 activation

async_event_work might race as it is executed from two different
workqueues at the moment.

Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 5589f67d2cd8..bb2aad078637 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2676,7 +2676,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
 	case NVME_SC_ABORT_REQ:
 		++ctrl->event_limit;
 		if (ctrl->state == NVME_CTRL_LIVE)
-			schedule_work(&ctrl->async_event_work);
+			queue_work(nvme_wq, &ctrl->async_event_work);
 		break;
 	default:
 		break;
@@ -2691,7 +2691,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
 		nvme_queue_scan(ctrl);
 		break;
 	case NVME_AER_NOTICE_FW_ACT_STARTING:
-		schedule_work(&ctrl->fw_act_work);
+		queue_work(nvme_wq, &ctrl->fw_act_work);
 		break;
 	default:
 		dev_warn(ctrl->device, "async event result %08x\n", result);

From 0a960afd60d02808c7f7f36d4aa8a2e07045e1e9 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 21 Sep 2017 17:01:37 +0300
Subject: [PATCH 366/391] nvme-rdma: give up reconnect if state change fails

If we failed to transition to state LIVE after a successful reconnect,
then controller deletion already started. In this case there is no
point moving forward with reconnect.

Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/rdma.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 58983000964b..8441f6b3f617 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -942,7 +942,12 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 	}
 
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
-	WARN_ON_ONCE(!changed);
+	if (!changed) {
+		/* state change failure is ok if we're in DELETING state */
+		WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
+		return;
+	}
+
 	ctrl->ctrl.nr_reconnects = 0;
 
 	nvme_start_ctrl(&ctrl->ctrl);

From e4d753d7e51c0648b9ee33efeed55d45f362fc3d Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 21 Sep 2017 17:01:38 +0300
Subject: [PATCH 367/391] nvme-rdma: don't fully stop the controller in error
 recovery

By calling nvme_stop_ctrl on a already failed controller will wait for the
scan work to complete (only by identify timeout expiration which is 60
seconds). This is unnecessary when we already know that the controller has
failed.

Reported-by: Yi Zhang <yizhan@redhat.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/rdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 8441f6b3f617..92a03ff5fb4d 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -967,7 +967,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 	struct nvme_rdma_ctrl *ctrl = container_of(work,
 			struct nvme_rdma_ctrl, err_work);
 
-	nvme_stop_ctrl(&ctrl->ctrl);
+	nvme_stop_keep_alive(&ctrl->ctrl);
 
 	if (ctrl->ctrl.queue_count > 1) {
 		nvme_stop_queues(&ctrl->ctrl);

From 3688feb582a1bc4e58ad50f5eccfdb90615de27b Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Tue, 19 Sep 2017 15:13:11 -0700
Subject: [PATCH 368/391] nvmet-fc: on port remove call put outside lock

Avoid calling the put routine, as it may traverse to free routines while
holding the target lock.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/fc.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index c48c83d97e30..6850672ad2a2 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -2530,13 +2530,17 @@ nvmet_fc_remove_port(struct nvmet_port *port)
 {
 	struct nvmet_fc_tgtport *tgtport = port->priv;
 	unsigned long flags;
+	bool matched = false;
 
 	spin_lock_irqsave(&nvmet_fc_tgtlock, flags);
 	if (tgtport->port == port) {
-		nvmet_fc_tgtport_put(tgtport);
+		matched = true;
 		tgtport->port = NULL;
 	}
 	spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags);
+
+	if (matched)
+		nvmet_fc_tgtport_put(tgtport);
 }
 
 static struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = {

From 0c319d3a144d4b8f1ea2047fd614d2149b68f889 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Tue, 19 Sep 2017 16:33:56 -0700
Subject: [PATCH 369/391] nvmet-fc: ensure target queue id within range.

When searching for queue id's ensure they are within the expected range.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/fc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 6850672ad2a2..58e010bdda3e 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -783,6 +783,9 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport,
 	u16 qid = nvmet_fc_getqueueid(connection_id);
 	unsigned long flags;
 
+	if (qid > NVMET_NR_QUEUES)
+		return NULL;
+
 	spin_lock_irqsave(&tgtport->lock, flags);
 	list_for_each_entry(assoc, &tgtport->assoc_list, a_list) {
 		if (association_id == assoc->association_id) {

From 6b71f9e1e849f82abb4a8d54ce7f4b1c71f19ac4 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Wed, 20 Sep 2017 11:07:26 -0700
Subject: [PATCH 370/391] nvmet-fc: sync header templates with comments

Comments were incorrect:
- defer_rcv was in host port template. moved to target port template
- Added Mandatory statements for target port template items

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/nvme-fc-driver.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 9c5cb4480806..a726f96010d5 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -346,11 +346,6 @@ struct nvme_fc_remote_port {
  *       indicating an FC transport Aborted status.
  *       Entrypoint is Mandatory.
  *
- * @defer_rcv:  Called by the transport to signal the LLLD that it has
- *       begun processing of a previously received NVME CMD IU. The LLDD
- *       is now free to re-use the rcv buffer associated with the
- *       nvmefc_tgt_fcp_req.
- *
  * @max_hw_queues:  indicates the maximum number of hw queues the LLDD
  *       supports for cpu affinitization.
  *       Value is Mandatory. Must be at least 1.
@@ -806,11 +801,19 @@ struct nvmet_fc_target_port {
  *       outstanding operation (if there was one) to complete, then will
  *       call the fcp_req_release() callback to return the command's
  *       exchange context back to the LLDD.
+ *       Entrypoint is Mandatory.
  *
  * @fcp_req_release:  Called by the transport to return a nvmefc_tgt_fcp_req
  *       to the LLDD after all operations on the fcp operation are complete.
  *       This may be due to the command completing or upon completion of
  *       abort cleanup.
+ *       Entrypoint is Mandatory.
+ *
+ * @defer_rcv:  Called by the transport to signal the LLLD that it has
+ *       begun processing of a previously received NVME CMD IU. The LLDD
+ *       is now free to re-use the rcv buffer associated with the
+ *       nvmefc_tgt_fcp_req.
+ *       Entrypoint is Optional.
  *
  * @max_hw_queues:  indicates the maximum number of hw queues the LLDD
  *       supports for cpu affinitization.

From fddc9923c6d41de9fe7b1f323a3cece53e046c88 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Tue, 19 Sep 2017 14:01:50 -0700
Subject: [PATCH 371/391] nvme-fcloop: fix port deletes and callbacks

Now that there are potentially long delays between when a remoteport or
targetport delete calls is made and when the callback occurs (dev_loss_tmo
timeout), no longer block in the delete routines and move the final nport
puts to the callbacks.

Moved the fcloop_nport_get/put/free routines to avoid forward declarations.

Ensure port_info structs used in registrations are nulled in case fields
are not set (ex: devloss_tmo values).

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/fcloop.c | 102 +++++++++++++----------------------
 1 file changed, 38 insertions(+), 64 deletions(-)

diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 1fd1afbb8b2a..7b75d9de55ab 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -224,8 +224,6 @@ struct fcloop_nport {
 	struct fcloop_lport *lport;
 	struct list_head nport_list;
 	struct kref ref;
-	struct completion rport_unreg_done;
-	struct completion tport_unreg_done;
 	u64 node_name;
 	u64 port_name;
 	u32 port_role;
@@ -630,6 +628,32 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
 	schedule_work(&inireq->iniwork);
 }
 
+static void
+fcloop_nport_free(struct kref *ref)
+{
+	struct fcloop_nport *nport =
+		container_of(ref, struct fcloop_nport, ref);
+	unsigned long flags;
+
+	spin_lock_irqsave(&fcloop_lock, flags);
+	list_del(&nport->nport_list);
+	spin_unlock_irqrestore(&fcloop_lock, flags);
+
+	kfree(nport);
+}
+
+static void
+fcloop_nport_put(struct fcloop_nport *nport)
+{
+	kref_put(&nport->ref, fcloop_nport_free);
+}
+
+static int
+fcloop_nport_get(struct fcloop_nport *nport)
+{
+	return kref_get_unless_zero(&nport->ref);
+}
+
 static void
 fcloop_localport_delete(struct nvme_fc_local_port *localport)
 {
@@ -644,8 +668,7 @@ fcloop_remoteport_delete(struct nvme_fc_remote_port *remoteport)
 {
 	struct fcloop_rport *rport = remoteport->private;
 
-	/* release any threads waiting for the unreg to complete */
-	complete(&rport->nport->rport_unreg_done);
+	fcloop_nport_put(rport->nport);
 }
 
 static void
@@ -653,8 +676,7 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport)
 {
 	struct fcloop_tport *tport = targetport->private;
 
-	/* release any threads waiting for the unreg to complete */
-	complete(&tport->nport->tport_unreg_done);
+	fcloop_nport_put(tport->nport);
 }
 
 #define	FCLOOP_HW_QUEUES		4
@@ -722,6 +744,7 @@ fcloop_create_local_port(struct device *dev, struct device_attribute *attr,
 		goto out_free_opts;
 	}
 
+	memset(&pinfo, 0, sizeof(pinfo));
 	pinfo.node_name = opts->wwnn;
 	pinfo.port_name = opts->wwpn;
 	pinfo.port_role = opts->roles;
@@ -804,32 +827,6 @@ fcloop_delete_local_port(struct device *dev, struct device_attribute *attr,
 	return ret ? ret : count;
 }
 
-static void
-fcloop_nport_free(struct kref *ref)
-{
-	struct fcloop_nport *nport =
-		container_of(ref, struct fcloop_nport, ref);
-	unsigned long flags;
-
-	spin_lock_irqsave(&fcloop_lock, flags);
-	list_del(&nport->nport_list);
-	spin_unlock_irqrestore(&fcloop_lock, flags);
-
-	kfree(nport);
-}
-
-static void
-fcloop_nport_put(struct fcloop_nport *nport)
-{
-	kref_put(&nport->ref, fcloop_nport_free);
-}
-
-static int
-fcloop_nport_get(struct fcloop_nport *nport)
-{
-	return kref_get_unless_zero(&nport->ref);
-}
-
 static struct fcloop_nport *
 fcloop_alloc_nport(const char *buf, size_t count, bool remoteport)
 {
@@ -938,6 +935,7 @@ fcloop_create_remote_port(struct device *dev, struct device_attribute *attr,
 	if (!nport)
 		return -EIO;
 
+	memset(&pinfo, 0, sizeof(pinfo));
 	pinfo.node_name = nport->node_name;
 	pinfo.port_name = nport->port_name;
 	pinfo.port_role = nport->port_role;
@@ -979,24 +977,12 @@ __unlink_remote_port(struct fcloop_nport *nport)
 }
 
 static int
-__wait_remoteport_unreg(struct fcloop_nport *nport, struct fcloop_rport *rport)
+__remoteport_unreg(struct fcloop_nport *nport, struct fcloop_rport *rport)
 {
-	int ret;
-
 	if (!rport)
 		return -EALREADY;
 
-	init_completion(&nport->rport_unreg_done);
-
-	ret = nvme_fc_unregister_remoteport(rport->remoteport);
-	if (ret)
-		return ret;
-
-	wait_for_completion(&nport->rport_unreg_done);
-
-	fcloop_nport_put(nport);
-
-	return ret;
+	return nvme_fc_unregister_remoteport(rport->remoteport);
 }
 
 static ssize_t
@@ -1029,7 +1015,7 @@ fcloop_delete_remote_port(struct device *dev, struct device_attribute *attr,
 	if (!nport)
 		return -ENOENT;
 
-	ret = __wait_remoteport_unreg(nport, rport);
+	ret = __remoteport_unreg(nport, rport);
 
 	return ret ? ret : count;
 }
@@ -1086,24 +1072,12 @@ __unlink_target_port(struct fcloop_nport *nport)
 }
 
 static int
-__wait_targetport_unreg(struct fcloop_nport *nport, struct fcloop_tport *tport)
+__targetport_unreg(struct fcloop_nport *nport, struct fcloop_tport *tport)
 {
-	int ret;
-
 	if (!tport)
 		return -EALREADY;
 
-	init_completion(&nport->tport_unreg_done);
-
-	ret = nvmet_fc_unregister_targetport(tport->targetport);
-	if (ret)
-		return ret;
-
-	wait_for_completion(&nport->tport_unreg_done);
-
-	fcloop_nport_put(nport);
-
-	return ret;
+	return nvmet_fc_unregister_targetport(tport->targetport);
 }
 
 static ssize_t
@@ -1136,7 +1110,7 @@ fcloop_delete_target_port(struct device *dev, struct device_attribute *attr,
 	if (!nport)
 		return -ENOENT;
 
-	ret = __wait_targetport_unreg(nport, tport);
+	ret = __targetport_unreg(nport, tport);
 
 	return ret ? ret : count;
 }
@@ -1223,11 +1197,11 @@ static void __exit fcloop_exit(void)
 
 		spin_unlock_irqrestore(&fcloop_lock, flags);
 
-		ret = __wait_targetport_unreg(nport, tport);
+		ret = __targetport_unreg(nport, tport);
 		if (ret)
 			pr_warn("%s: Failed deleting target port\n", __func__);
 
-		ret = __wait_remoteport_unreg(nport, rport);
+		ret = __remoteport_unreg(nport, rport);
 		if (ret)
 			pr_warn("%s: Failed deleting remote port\n", __func__);
 

From b776e4b1a990045a7c70798f1f353c3160c26594 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 25 Sep 2017 20:38:45 -0400
Subject: [PATCH 372/391] fix a typo in put_compat_shm_info()

"uip" misspelled as "up"; unfortunately, the latter happens to be
a function and gcc is happy to convert it to void *...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 ipc/shm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ipc/shm.c b/ipc/shm.c
index 1e2b1692ba2c..badac463e2c8 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1154,7 +1154,7 @@ static int put_compat_shm_info(struct shm_info *ip,
 	info.shm_swp = ip->shm_swp;
 	info.swap_attempts = ip->swap_attempts;
 	info.swap_successes = ip->swap_successes;
-	return copy_to_user(up, &info, sizeof(info));
+	return copy_to_user(uip, &info, sizeof(info));
 }
 
 static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in,

From 4618e90965f272fe522f2af2523a60d0d4bc78f3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 15:00:10 +0200
Subject: [PATCH 373/391] x86/fpu: Fix fpu__activate_fpstate_read() and update
 comments

fpu__activate_fpstate_read() can be called for the current task
when coredumping - or for stopped tasks when ptrace-ing.

Implement this properly in the code and update the comments.

This also fixes an incorrect (but harmless) warning introduced by
one of the earlier patches.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-28-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/core.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 93103a909c47..afd3f2a5c64e 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -254,18 +254,21 @@ EXPORT_SYMBOL_GPL(fpu__activate_curr);
 /*
  * This function must be called before we read a task's fpstate.
  *
- * If the task has not used the FPU before then initialize its
- * fpstate.
+ * There's two cases where this gets called:
+ *
+ * - for the current task (when coredumping), in which case we have
+ *   to save the latest FPU registers into the fpstate,
+ *
+ * - or it's called for stopped tasks (ptrace), in which case the
+ *   registers were already saved by the context-switch code when
+ *   the task scheduled out - we only have to initialize the registers
+ *   if they've never been initialized.
  *
  * If the task has used the FPU before then save it.
  */
 void fpu__activate_fpstate_read(struct fpu *fpu)
 {
-	/*
-	 * If fpregs are active (in the current CPU), then
-	 * copy them to the fpstate:
-	 */
-	if (fpu->fpstate_active) {
+	if (fpu == &current->thread.fpu) {
 		fpu__save(fpu);
 	} else {
 		if (!fpu->fpstate_active) {

From 685c930d6e58e31e251ec354f9dca3958a4c5040 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 15:00:11 +0200
Subject: [PATCH 374/391] x86/fpu: Remove
 fpu__current_fpstate_write_begin/end()

These functions are not used anymore, so remove them.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Bobby Powers <bobbypowers@gmail.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-29-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/internal.h |  2 -
 arch/x86/kernel/fpu/core.c          | 63 -----------------------------
 2 files changed, 65 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index cf290d424e48..508e4181c4af 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -26,8 +26,6 @@
 extern void fpu__activate_curr(struct fpu *fpu);
 extern void fpu__activate_fpstate_read(struct fpu *fpu);
 extern void fpu__activate_fpstate_write(struct fpu *fpu);
-extern void fpu__current_fpstate_write_begin(void);
-extern void fpu__current_fpstate_write_end(void);
 extern void fpu__save(struct fpu *fpu);
 extern void fpu__restore(struct fpu *fpu);
 extern int  fpu__restore_sig(void __user *buf, int ia32_frame);
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index afd3f2a5c64e..b2cdeb3b1860 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -316,69 +316,6 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
 	}
 }
 
-/*
- * This function must be called before we write the current
- * task's fpstate.
- *
- * This call gets the current FPU register state and moves
- * it in to the 'fpstate'.  Preemption is disabled so that
- * no writes to the 'fpstate' can occur from context
- * swiches.
- *
- * Must be followed by a fpu__current_fpstate_write_end().
- */
-void fpu__current_fpstate_write_begin(void)
-{
-	struct fpu *fpu = &current->thread.fpu;
-
-	/*
-	 * Ensure that the context-switching code does not write
-	 * over the fpstate while we are doing our update.
-	 */
-	preempt_disable();
-
-	/*
-	 * Move the fpregs in to the fpu's 'fpstate'.
-	 */
-	fpu__activate_fpstate_read(fpu);
-
-	/*
-	 * The caller is about to write to 'fpu'.  Ensure that no
-	 * CPU thinks that its fpregs match the fpstate.  This
-	 * ensures we will not be lazy and skip a XRSTOR in the
-	 * future.
-	 */
-	__fpu_invalidate_fpregs_state(fpu);
-}
-
-/*
- * This function must be paired with fpu__current_fpstate_write_begin()
- *
- * This will ensure that the modified fpstate gets placed back in
- * the fpregs if necessary.
- *
- * Note: This function may be called whether or not an _actual_
- * write to the fpstate occurred.
- */
-void fpu__current_fpstate_write_end(void)
-{
-	struct fpu *fpu = &current->thread.fpu;
-
-	/*
-	 * 'fpu' now has an updated copy of the state, but the
-	 * registers may still be out of date.  Update them with
-	 * an XRSTOR if they are active.
-	 */
-	if (fpu->fpstate_active)
-		copy_kernel_to_fpregs(&fpu->state);
-
-	/*
-	 * Our update is done and the fpregs/fpstate are in sync
-	 * if necessary.  Context switches can happen again.
-	 */
-	preempt_enable();
-}
-
 /*
  * 'fpu__restore()' is called to copy FPU registers from
  * the FPU fpstate to the live hw registers and to activate

From e4a81bfcaae1ebbdc6efe74e8ea563144d90e9a9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Tue, 26 Sep 2017 09:43:36 +0200
Subject: [PATCH 375/391] x86/fpu: Rename fpu::fpstate_active to
 fpu::initialized

The x86 FPU code used to have a complex state machine where both the FPU
registers and the FPU state context could be 'active' (or inactive)
independently of each other - which enabled features like lazy FPU restore.

Much of this complexity is gone in the current code: now we basically can
have FPU-less tasks (kernel threads) that don't use (and save/restore) FPU
state at all, plus full FPU users that save/restore directly with no laziness
whatsoever.

But the fpu::fpstate_active still carries bits of the old complexity - meanwhile
this flag has become a simple flag that shows whether the FPU context saving
area in the thread struct is initialized and used, or not.

Rename it to fpu::initialized to express this simplicity in the name as well.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-30-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/ia32/ia32_signal.c         |  2 +-
 arch/x86/include/asm/fpu/internal.h |  4 ++--
 arch/x86/include/asm/fpu/types.h    |  6 +++---
 arch/x86/include/asm/trace/fpu.h    |  8 ++++----
 arch/x86/kernel/fpu/core.c          | 24 ++++++++++++------------
 arch/x86/kernel/fpu/init.c          |  2 +-
 arch/x86/kernel/fpu/regset.c        |  6 +++---
 arch/x86/kernel/fpu/signal.c        |  8 ++++----
 arch/x86/kernel/fpu/xstate.c        |  2 +-
 arch/x86/kernel/signal.c            |  6 +++---
 arch/x86/mm/pkeys.c                 |  2 +-
 11 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index e0bb46c02857..0e2a5edbce00 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -231,7 +231,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
 		 ksig->ka.sa.sa_restorer)
 		sp = (unsigned long) ksig->ka.sa.sa_restorer;
 
-	if (fpu->fpstate_active) {
+	if (fpu->initialized) {
 		unsigned long fx_aligned, math_size;
 
 		sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 508e4181c4af..b26ae05da18a 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -527,7 +527,7 @@ static inline void fpregs_activate(struct fpu *fpu)
 static inline void
 switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
-	if (old_fpu->fpstate_active) {
+	if (old_fpu->initialized) {
 		if (!copy_fpregs_to_fpstate(old_fpu))
 			old_fpu->last_cpu = -1;
 		else
@@ -550,7 +550,7 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
 {
 	bool preload = static_cpu_has(X86_FEATURE_FPU) &&
-		       new_fpu->fpstate_active;
+		       new_fpu->initialized;
 
 	if (preload) {
 		if (!fpregs_state_valid(new_fpu, cpu))
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 71db45ca8870..a1520575d86b 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -293,13 +293,13 @@ struct fpu {
 	unsigned int			last_cpu;
 
 	/*
-	 * @fpstate_active:
+	 * @initialized:
 	 *
-	 * This flag indicates whether this context is active: if the task
+	 * This flag indicates whether this context is initialized: if the task
 	 * is not running then we can restore from this context, if the task
 	 * is running then we should save into this context.
 	 */
-	unsigned char			fpstate_active;
+	unsigned char			initialized;
 
 	/*
 	 * @state:
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index da565aae9fd2..39f7a27bef13 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -12,22 +12,22 @@ DECLARE_EVENT_CLASS(x86_fpu,
 
 	TP_STRUCT__entry(
 		__field(struct fpu *, fpu)
-		__field(bool, fpstate_active)
+		__field(bool, initialized)
 		__field(u64, xfeatures)
 		__field(u64, xcomp_bv)
 		),
 
 	TP_fast_assign(
 		__entry->fpu		= fpu;
-		__entry->fpstate_active	= fpu->fpstate_active;
+		__entry->initialized	= fpu->initialized;
 		if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
 			__entry->xfeatures = fpu->state.xsave.header.xfeatures;
 			__entry->xcomp_bv  = fpu->state.xsave.header.xcomp_bv;
 		}
 	),
-	TP_printk("x86/fpu: %p fpstate_active: %d xfeatures: %llx xcomp_bv: %llx",
+	TP_printk("x86/fpu: %p initialized: %d xfeatures: %llx xcomp_bv: %llx",
 			__entry->fpu,
-			__entry->fpstate_active,
+			__entry->initialized,
 			__entry->xfeatures,
 			__entry->xcomp_bv
 	)
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index b2cdeb3b1860..c8d6032f04d0 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -100,7 +100,7 @@ void __kernel_fpu_begin(void)
 
 	kernel_fpu_disable();
 
-	if (fpu->fpstate_active) {
+	if (fpu->initialized) {
 		/*
 		 * Ignore return value -- we don't care if reg state
 		 * is clobbered.
@@ -116,7 +116,7 @@ void __kernel_fpu_end(void)
 {
 	struct fpu *fpu = &current->thread.fpu;
 
-	if (fpu->fpstate_active)
+	if (fpu->initialized)
 		copy_kernel_to_fpregs(&fpu->state);
 
 	kernel_fpu_enable();
@@ -148,7 +148,7 @@ void fpu__save(struct fpu *fpu)
 
 	preempt_disable();
 	trace_x86_fpu_before_save(fpu);
-	if (fpu->fpstate_active) {
+	if (fpu->initialized) {
 		if (!copy_fpregs_to_fpstate(fpu)) {
 			copy_kernel_to_fpregs(&fpu->state);
 		}
@@ -191,7 +191,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 {
 	dst_fpu->last_cpu = -1;
 
-	if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU))
+	if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU))
 		return 0;
 
 	WARN_ON_FPU(src_fpu != &current->thread.fpu);
@@ -240,13 +240,13 @@ void fpu__activate_curr(struct fpu *fpu)
 {
 	WARN_ON_FPU(fpu != &current->thread.fpu);
 
-	if (!fpu->fpstate_active) {
+	if (!fpu->initialized) {
 		fpstate_init(&fpu->state);
 		trace_x86_fpu_init_state(fpu);
 
 		trace_x86_fpu_activate_state(fpu);
 		/* Safe to do for the current task: */
-		fpu->fpstate_active = 1;
+		fpu->initialized = 1;
 	}
 }
 EXPORT_SYMBOL_GPL(fpu__activate_curr);
@@ -271,13 +271,13 @@ void fpu__activate_fpstate_read(struct fpu *fpu)
 	if (fpu == &current->thread.fpu) {
 		fpu__save(fpu);
 	} else {
-		if (!fpu->fpstate_active) {
+		if (!fpu->initialized) {
 			fpstate_init(&fpu->state);
 			trace_x86_fpu_init_state(fpu);
 
 			trace_x86_fpu_activate_state(fpu);
 			/* Safe to do for current and for stopped child tasks: */
-			fpu->fpstate_active = 1;
+			fpu->initialized = 1;
 		}
 	}
 }
@@ -303,7 +303,7 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
 	 */
 	WARN_ON_FPU(fpu == &current->thread.fpu);
 
-	if (fpu->fpstate_active) {
+	if (fpu->initialized) {
 		/* Invalidate any lazy state: */
 		__fpu_invalidate_fpregs_state(fpu);
 	} else {
@@ -312,7 +312,7 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
 
 		trace_x86_fpu_activate_state(fpu);
 		/* Safe to do for stopped child tasks: */
-		fpu->fpstate_active = 1;
+		fpu->initialized = 1;
 	}
 }
 
@@ -354,7 +354,7 @@ void fpu__drop(struct fpu *fpu)
 	preempt_disable();
 
 	if (fpu == &current->thread.fpu) {
-		if (fpu->fpstate_active) {
+		if (fpu->initialized) {
 			/* Ignore delayed exceptions from user space */
 			asm volatile("1: fwait\n"
 				     "2:\n"
@@ -363,7 +363,7 @@ void fpu__drop(struct fpu *fpu)
 		}
 	}
 
-	fpu->fpstate_active = 0;
+	fpu->initialized = 0;
 
 	trace_x86_fpu_dropped(fpu);
 
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index d5d44c452624..7affb7e3d9a5 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -240,7 +240,7 @@ static void __init fpu__init_system_ctx_switch(void)
 	WARN_ON_FPU(!on_boot_cpu);
 	on_boot_cpu = 0;
 
-	WARN_ON_FPU(current->thread.fpu.fpstate_active);
+	WARN_ON_FPU(current->thread.fpu.initialized);
 }
 
 /*
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index c764f7405322..19e82334e811 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -16,14 +16,14 @@ int regset_fpregs_active(struct task_struct *target, const struct user_regset *r
 {
 	struct fpu *target_fpu = &target->thread.fpu;
 
-	return target_fpu->fpstate_active ? regset->n : 0;
+	return target_fpu->initialized ? regset->n : 0;
 }
 
 int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
 {
 	struct fpu *target_fpu = &target->thread.fpu;
 
-	if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->fpstate_active)
+	if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->initialized)
 		return regset->n;
 	else
 		return 0;
@@ -380,7 +380,7 @@ int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu)
 	struct fpu *fpu = &tsk->thread.fpu;
 	int fpvalid;
 
-	fpvalid = fpu->fpstate_active;
+	fpvalid = fpu->initialized;
 	if (fpvalid)
 		fpvalid = !fpregs_get(tsk, NULL,
 				      0, sizeof(struct user_i387_ia32_struct),
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index da68ea1c3a44..ab2dd24cfea4 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -171,7 +171,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 			sizeof(struct user_i387_ia32_struct), NULL,
 			(struct _fpstate_32 __user *) buf) ? -1 : 1;
 
-	if (fpu->fpstate_active || using_compacted_format()) {
+	if (fpu->initialized || using_compacted_format()) {
 		/* Save the live register state to the user directly. */
 		if (copy_fpregs_to_sigframe(buf_fx))
 			return -1;
@@ -315,12 +315,12 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		int err = 0;
 
 		/*
-		 * Drop the current fpu which clears fpu->fpstate_active. This ensures
+		 * Drop the current fpu which clears fpu->initialized. This ensures
 		 * that any context-switch during the copy of the new state,
 		 * avoids the intermediate state from getting restored/saved.
 		 * Thus avoiding the new restored state from getting corrupted.
 		 * We will be ready to restore/save the state only after
-		 * fpu->fpstate_active is again set.
+		 * fpu->initialized is again set.
 		 */
 		fpu__drop(fpu);
 
@@ -342,7 +342,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 			sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
 		}
 
-		fpu->fpstate_active = 1;
+		fpu->initialized = 1;
 		preempt_disable();
 		fpu__restore(fpu);
 		preempt_enable();
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index fda1109cc355..703e76d027ee 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -867,7 +867,7 @@ const void *get_xsave_field_ptr(int xsave_state)
 {
 	struct fpu *fpu = &current->thread.fpu;
 
-	if (!fpu->fpstate_active)
+	if (!fpu->initialized)
 		return NULL;
 	/*
 	 * fpu__save() takes the CPU's xstate registers
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index e04442345fc0..4e188fda5961 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -263,7 +263,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 		sp = (unsigned long) ka->sa.sa_restorer;
 	}
 
-	if (fpu->fpstate_active) {
+	if (fpu->initialized) {
 		sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32),
 					  &buf_fx, &math_size);
 		*fpstate = (void __user *)sp;
@@ -279,7 +279,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 		return (void __user *)-1L;
 
 	/* save i387 and extended state */
-	if (fpu->fpstate_active &&
+	if (fpu->initialized &&
 	    copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0)
 		return (void __user *)-1L;
 
@@ -755,7 +755,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 		/*
 		 * Ensure the signal handler starts with the new fpu state.
 		 */
-		if (fpu->fpstate_active)
+		if (fpu->initialized)
 			fpu__clear(fpu);
 	}
 	signal_setup_done(failed, ksig, stepping);
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index 4d24269c071f..d7bc0eea20a5 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -44,7 +44,7 @@ int __execute_only_pkey(struct mm_struct *mm)
 	 */
 	preempt_disable();
 	if (!need_to_set_mm_pkey &&
-	    current->thread.fpu.fpstate_active &&
+	    current->thread.fpu.initialized &&
 	    !__pkru_allows_read(read_pkru(), execute_only_pkey)) {
 		preempt_enable();
 		return execute_only_pkey;

From 7f1487c59b7c6dcb20155f4302985da2659a2997 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 15:00:13 +0200
Subject: [PATCH 376/391] x86/fpu: Fix stale comments about lazy FPU logic

We don't do any lazy restore anymore, what we have are two pieces of optimization:

 - no-FPU tasks that don't save/restore the FPU context (kernel threads are such)

 - cached FPU registers maintained via the fpu->last_cpu field. This means that
   if an FPU task context switches to a non-FPU task then we can maintain the
   FPU registers as an in-FPU copies (cache), and skip the restoration of them
   once we switch back to the original FPU-using task.

Update all the comments that still referred to old 'lazy' and 'unlazy' concepts.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-31-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/core.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index c8d6032f04d0..77668d91fdc1 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -205,9 +205,6 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	/*
 	 * Save current FPU registers directly into the child
 	 * FPU context, without any memory-to-memory copying.
-	 * In lazy mode, if the FPU context isn't loaded into
-	 * fpregs, CR0.TS will be set and do_device_not_available
-	 * will load the FPU context.
 	 *
 	 * We have to do all this with preemption disabled,
 	 * mostly because of the FNSAVE case, because in that
@@ -285,13 +282,13 @@ void fpu__activate_fpstate_read(struct fpu *fpu)
 /*
  * This function must be called before we write a task's fpstate.
  *
- * If the task has used the FPU before then unlazy it.
+ * If the task has used the FPU before then invalidate any cached FPU registers.
  * If the task has not used the FPU before then initialize its fpstate.
  *
  * After this function call, after registers in the fpstate are
  * modified and the child task has woken up, the child task will
  * restore the modified FPU state from the modified context. If we
- * didn't clear its lazy status here then the lazy in-registers
+ * didn't clear its cached status here then the cached in-registers
  * state pending on its former CPU could be restored, corrupting
  * the modifications.
  */
@@ -304,7 +301,7 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
 	WARN_ON_FPU(fpu == &current->thread.fpu);
 
 	if (fpu->initialized) {
-		/* Invalidate any lazy state: */
+		/* Invalidate any cached state: */
 		__fpu_invalidate_fpregs_state(fpu);
 	} else {
 		fpstate_init(&fpu->state);

From e10078eba69859359ce8644dd423b4132a6a8913 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 15:00:14 +0200
Subject: [PATCH 377/391] x86/fpu: Simplify and speed up fpu__copy()

fpu__copy() has a preempt_disable()/enable() pair, which it had to do to
be able to atomically unlazy the current task when doing an FNSAVE.

But we don't unlazy tasks anymore, we always do direct saves/restores of
FPU context.

So remove both the unnecessary critical section, and update the comments.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-32-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/core.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 77668d91fdc1..52122dd418ae 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -206,22 +206,13 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	 * Save current FPU registers directly into the child
 	 * FPU context, without any memory-to-memory copying.
 	 *
-	 * We have to do all this with preemption disabled,
-	 * mostly because of the FNSAVE case, because in that
-	 * case we must not allow preemption in the window
-	 * between the FNSAVE and us marking the context lazy.
-	 *
-	 * It shouldn't be an issue as even FNSAVE is plenty
-	 * fast in terms of critical section length.
+	 * ( The function 'fails' in the FNSAVE case, which destroys
+	 *   register contents so we have to copy them back. )
 	 */
-	preempt_disable();
 	if (!copy_fpregs_to_fpstate(dst_fpu)) {
-		memcpy(&src_fpu->state, &dst_fpu->state,
-		       fpu_kernel_xstate_size);
-
+		memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size);
 		copy_kernel_to_fpregs(&src_fpu->state);
 	}
-	preempt_enable();
 
 	trace_x86_fpu_copy_src(src_fpu);
 	trace_x86_fpu_copy_dst(dst_fpu);

From 2ce03d850b9a2f17d55596ecfa86e72b5687a627 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 15:00:15 +0200
Subject: [PATCH 378/391] x86/fpu: Rename fpu__activate_curr() to
 fpu__initialize()

Rename this function to better express that it's all about
initializing the FPU state of a task which goes hand in hand
with the fpu::initialized field.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/20170923130016.21448-33-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/internal.h | 2 +-
 arch/x86/kernel/fpu/core.c          | 8 ++++----
 arch/x86/kernel/fpu/signal.c        | 2 +-
 arch/x86/kvm/x86.c                  | 2 +-
 arch/x86/math-emu/fpu_entry.c       | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index b26ae05da18a..7c980aafb8aa 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -23,7 +23,7 @@
 /*
  * High level FPU state handling functions:
  */
-extern void fpu__activate_curr(struct fpu *fpu);
+extern void fpu__initialize(struct fpu *fpu);
 extern void fpu__activate_fpstate_read(struct fpu *fpu);
 extern void fpu__activate_fpstate_write(struct fpu *fpu);
 extern void fpu__save(struct fpu *fpu);
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 52122dd418ae..07db9d94b68b 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -224,7 +224,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
  * Activate the current task's in-memory FPU context,
  * if it has not been used before:
  */
-void fpu__activate_curr(struct fpu *fpu)
+void fpu__initialize(struct fpu *fpu)
 {
 	WARN_ON_FPU(fpu != &current->thread.fpu);
 
@@ -237,7 +237,7 @@ void fpu__activate_curr(struct fpu *fpu)
 		fpu->initialized = 1;
 	}
 }
-EXPORT_SYMBOL_GPL(fpu__activate_curr);
+EXPORT_SYMBOL_GPL(fpu__initialize);
 
 /*
  * This function must be called before we read a task's fpstate.
@@ -316,7 +316,7 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
  */
 void fpu__restore(struct fpu *fpu)
 {
-	fpu__activate_curr(fpu);
+	fpu__initialize(fpu);
 
 	/* Avoid __kernel_fpu_begin() right after fpregs_activate() */
 	kernel_fpu_disable();
@@ -392,7 +392,7 @@ void fpu__clear(struct fpu *fpu)
 	 */
 	if (static_cpu_has(X86_FEATURE_FPU)) {
 		preempt_disable();
-		fpu__activate_curr(fpu);
+		fpu__initialize(fpu);
 		user_fpu_begin();
 		copy_init_fpstate_to_fpregs();
 		preempt_enable();
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index ab2dd24cfea4..7fa3bdb331e9 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -280,7 +280,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 	if (!access_ok(VERIFY_READ, buf, size))
 		return -EACCES;
 
-	fpu__activate_curr(fpu);
+	fpu__initialize(fpu);
 
 	if (!static_cpu_has(X86_FEATURE_FPU))
 		return fpregs_soft_set(current, NULL,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cd17b7d9a107..03869eb7fcd6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7225,7 +7225,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	int r;
 	sigset_t sigsaved;
 
-	fpu__activate_curr(fpu);
+	fpu__initialize(fpu);
 
 	if (vcpu->sigset_active)
 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index d4a7df2205b8..220638a4cb94 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -114,7 +114,7 @@ void math_emulate(struct math_emu_info *info)
 	struct desc_struct code_descriptor;
 	struct fpu *fpu = &current->thread.fpu;
 
-	fpu__activate_curr(fpu);
+	fpu__initialize(fpu);
 
 #ifdef RE_ENTRANT_CHECKING
 	if (emulating) {

From 369a036de206710ff27a66f9bffe78ef657648c3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 23 Sep 2017 13:37:45 +0200
Subject: [PATCH 379/391] x86/fpu: Rename fpu__activate_fpstate_read/write() to
 fpu__prepare_[read|write]()

As per the new nomenclature we don't 'activate' the FPU state
anymore, we initialize it. So drop the _activate_fpstate name
from these functions, which were a bit of a mouthful anyway,
and name them:

	fpu__prepare_read()
	fpu__prepare_write()

Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/internal.h |  4 ++--
 arch/x86/kernel/fpu/core.c          |  4 ++--
 arch/x86/kernel/fpu/regset.c        | 12 ++++++------
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 7c980aafb8aa..e3221ffa304e 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -24,8 +24,8 @@
  * High level FPU state handling functions:
  */
 extern void fpu__initialize(struct fpu *fpu);
-extern void fpu__activate_fpstate_read(struct fpu *fpu);
-extern void fpu__activate_fpstate_write(struct fpu *fpu);
+extern void fpu__prepare_read(struct fpu *fpu);
+extern void fpu__prepare_write(struct fpu *fpu);
 extern void fpu__save(struct fpu *fpu);
 extern void fpu__restore(struct fpu *fpu);
 extern int  fpu__restore_sig(void __user *buf, int ia32_frame);
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 07db9d94b68b..f92a6593de1e 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -254,7 +254,7 @@ EXPORT_SYMBOL_GPL(fpu__initialize);
  *
  * If the task has used the FPU before then save it.
  */
-void fpu__activate_fpstate_read(struct fpu *fpu)
+void fpu__prepare_read(struct fpu *fpu)
 {
 	if (fpu == &current->thread.fpu) {
 		fpu__save(fpu);
@@ -283,7 +283,7 @@ void fpu__activate_fpstate_read(struct fpu *fpu)
  * state pending on its former CPU could be restored, corrupting
  * the modifications.
  */
-void fpu__activate_fpstate_write(struct fpu *fpu)
+void fpu__prepare_write(struct fpu *fpu)
 {
 	/*
 	 * Only stopped child tasks can be used to modify the FPU
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 19e82334e811..ee8d2f049818 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -38,7 +38,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
 	if (!boot_cpu_has(X86_FEATURE_FXSR))
 		return -ENODEV;
 
-	fpu__activate_fpstate_read(fpu);
+	fpu__prepare_read(fpu);
 	fpstate_sanitize_xstate(fpu);
 
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
@@ -55,7 +55,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
 	if (!boot_cpu_has(X86_FEATURE_FXSR))
 		return -ENODEV;
 
-	fpu__activate_fpstate_write(fpu);
+	fpu__prepare_write(fpu);
 	fpstate_sanitize_xstate(fpu);
 
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
@@ -89,7 +89,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
 
 	xsave = &fpu->state.xsave;
 
-	fpu__activate_fpstate_read(fpu);
+	fpu__prepare_read(fpu);
 
 	if (using_compacted_format()) {
 		if (kbuf)
@@ -132,7 +132,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 
 	xsave = &fpu->state.xsave;
 
-	fpu__activate_fpstate_write(fpu);
+	fpu__prepare_write(fpu);
 
 	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
 		if (kbuf)
@@ -310,7 +310,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
 	struct fpu *fpu = &target->thread.fpu;
 	struct user_i387_ia32_struct env;
 
-	fpu__activate_fpstate_read(fpu);
+	fpu__prepare_read(fpu);
 
 	if (!boot_cpu_has(X86_FEATURE_FPU))
 		return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
@@ -340,7 +340,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
 	struct user_i387_ia32_struct env;
 	int ret;
 
-	fpu__activate_fpstate_write(fpu);
+	fpu__prepare_write(fpu);
 	fpstate_sanitize_xstate(fpu);
 
 	if (!boot_cpu_has(X86_FEATURE_FPU))

From e63e5d5c15c6b1dba26f7cbd1b1089a1d6155db5 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 24 Sep 2017 12:59:04 +0200
Subject: [PATCH 380/391] x86/fpu: Introduce validate_xstate_header()

Move validation of user-supplied xstate_header into a helper function,
in preparation of calling it from both the ptrace and sigreturn syscall
paths.

The new function also considers it to be an error if *any* reserved bits
are set, whereas before we were just clearing most of them silently.

This should reduce the chance of bugs that fail to correctly validate
user-supplied XSAVE areas.  It also will expose any broken userspace
programs that set the other reserved bits; this is desirable because
such programs will lose compatibility with future CPUs and kernels if
those bits are ever used for anything.  (There shouldn't be any such
programs, and in fact in the case where the compacted format is in use
we were already validating xfeatures.  But you never know...)

Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: http://lkml.kernel.org/r/20170924105913.9157-2-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h |  4 ++++
 arch/x86/kernel/fpu/xstate.c      | 24 ++++++++++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 579ac2358e63..83fee2469eb7 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -52,4 +52,8 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
 int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
 int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
 int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
+
+/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
+extern int validate_xstate_header(const struct xstate_header *hdr);
+
 #endif
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 703e76d027ee..2427aeea33b5 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -483,6 +483,30 @@ int using_compacted_format(void)
 	return boot_cpu_has(X86_FEATURE_XSAVES);
 }
 
+/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
+int validate_xstate_header(const struct xstate_header *hdr)
+{
+	/* No unknown or supervisor features may be set */
+	if (hdr->xfeatures & (~xfeatures_mask | XFEATURE_MASK_SUPERVISOR))
+		return -EINVAL;
+
+	/* Userspace must use the uncompacted format */
+	if (hdr->xcomp_bv)
+		return -EINVAL;
+
+	/*
+	 * If 'reserved' is shrunken to add a new field, make sure to validate
+	 * that new field here!
+	 */
+	BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
+
+	/* No reserved bits may be set */
+	if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
+		return -EINVAL;
+
+	return 0;
+}
+
 static void __xstate_dump_leaves(void)
 {
 	int i;

From cf9df81b139b6ebaec188d73758f02ca3b2110e4 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 24 Sep 2017 12:59:05 +0200
Subject: [PATCH 381/391] x86/fpu: Use validate_xstate_header() to validate the
 xstate_header in xstateregs_set()

Tighten the checks in xstateregs_set().

Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: http://lkml.kernel.org/r/20170924105913.9157-3-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/regset.c | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index ee8d2f049818..b831d5b9de99 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -141,28 +141,21 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 			ret = copy_user_to_xstate(xsave, ubuf);
 	} else {
 		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
-
-		/* xcomp_bv must be 0 when using uncompacted format */
-		if (!ret && xsave->header.xcomp_bv)
-			ret = -EINVAL;
+		if (!ret)
+			ret = validate_xstate_header(&xsave->header);
 	}
 
+	/*
+	 * mxcsr reserved bits must be masked to zero for security reasons.
+	 */
+	xsave->i387.mxcsr &= mxcsr_feature_mask;
+
 	/*
 	 * In case of failure, mark all states as init:
 	 */
 	if (ret)
 		fpstate_init(&fpu->state);
 
-	/*
-	 * mxcsr reserved bits must be masked to zero for security reasons.
-	 */
-	xsave->i387.mxcsr &= mxcsr_feature_mask;
-	xsave->header.xfeatures &= xfeatures_mask;
-	/*
-	 * These bits must be zero.
-	 */
-	memset(&xsave->header.reserved, 0, 48);
-
 	return ret;
 }
 

From b11e2e18a7fc8eaa3d592c260d50c7129e094ded Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 24 Sep 2017 12:59:06 +0200
Subject: [PATCH 382/391] x86/fpu: Use validate_xstate_header() to validate the
 xstate_header in __fpu__restore_sig()

Tighten the checks in __fpu__restore_sig() and update comments.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: http://lkml.kernel.org/r/20170924105913.9157-4-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/signal.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 7fa3bdb331e9..fb639e70048f 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -214,8 +214,11 @@ sanitize_restored_xstate(struct task_struct *tsk,
 	struct xstate_header *header = &xsave->header;
 
 	if (use_xsave()) {
-		/* These bits must be zero. */
-		memset(header->reserved, 0, 48);
+		/*
+		 * Note: we don't need to zero the reserved bits in the
+		 * xstate_header here because we either didn't copy them at all,
+		 * or we checked earlier that they aren't set.
+		 */
 
 		/*
 		 * Init the state that is not present in the memory
@@ -224,7 +227,7 @@ sanitize_restored_xstate(struct task_struct *tsk,
 		if (fx_only)
 			header->xfeatures = XFEATURE_MASK_FPSSE;
 		else
-			header->xfeatures &= (xfeatures_mask & xfeatures);
+			header->xfeatures &= xfeatures;
 	}
 
 	if (use_fxsr()) {
@@ -308,7 +311,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		/*
 		 * For 32-bit frames with fxstate, copy the user state to the
 		 * thread's fpu state, reconstruct fxstate from the fsave
-		 * header. Sanitize the copied state etc.
+		 * header. Validate and sanitize the copied state.
 		 */
 		struct fpu *fpu = &tsk->thread.fpu;
 		struct user_i387_ia32_struct env;
@@ -329,9 +332,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		} else {
 			err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
 
-			/* xcomp_bv must be 0 when using uncompacted format */
-			if (!err && state_size > offsetof(struct xregs_state, header) && fpu->state.xsave.header.xcomp_bv)
-				err = -EINVAL;
+			if (!err && state_size > offsetof(struct xregs_state, header))
+				err = validate_xstate_header(&fpu->state.xsave.header);
 		}
 
 		if (err || __copy_from_user(&env, buf, sizeof(env))) {

From 80d8ae86b36791a545ca28ddc95133ea59bba6e0 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 24 Sep 2017 12:59:07 +0200
Subject: [PATCH 383/391] x86/fpu: Copy the full state_header in
 copy_kernel_to_xstate()

This is in preparation to verify the full xstate header as supplied by user-space.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: http://lkml.kernel.org/r/20170924105913.9157-5-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 2427aeea33b5..02591b96bb25 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1148,11 +1148,13 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 	int i;
 	u64 xfeatures;
 	u64 allowed_features;
+	struct xstate_header hdr;
 
 	offset = offsetof(struct xregs_state, header);
-	size = sizeof(xfeatures);
+	size = sizeof(hdr);
 
-	memcpy(&xfeatures, kbuf + offset, size);
+	memcpy(&hdr, kbuf + offset, size);
+	xfeatures = hdr.xfeatures;
 
 	/*
 	 * Reject if the user sets any disabled or supervisor features:

From b89eda482d7849a1c146b6d0a42f4e76369bb08e Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 24 Sep 2017 12:59:08 +0200
Subject: [PATCH 384/391] x86/fpu: Eliminate the 'xfeatures' local variable in
 copy_kernel_to_xstate()

We have this information in the xstate_header.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: http://lkml.kernel.org/r/20170924105913.9157-6-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 02591b96bb25..c97c4a9db52a 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1146,7 +1146,6 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 {
 	unsigned int offset, size;
 	int i;
-	u64 xfeatures;
 	u64 allowed_features;
 	struct xstate_header hdr;
 
@@ -1154,20 +1153,19 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 	size = sizeof(hdr);
 
 	memcpy(&hdr, kbuf + offset, size);
-	xfeatures = hdr.xfeatures;
 
 	/*
 	 * Reject if the user sets any disabled or supervisor features:
 	 */
 	allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR;
 
-	if (xfeatures & ~allowed_features)
+	if (hdr.xfeatures & ~allowed_features)
 		return -EINVAL;
 
 	for (i = 0; i < XFEATURE_MAX; i++) {
 		u64 mask = ((u64)1 << i);
 
-		if (xfeatures & mask) {
+		if (hdr.xfeatures & mask) {
 			void *dst = __raw_xsave_addr(xsave, 1 << i);
 
 			offset = xstate_offsets[i];
@@ -1177,7 +1175,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 		}
 	}
 
-	if (xfeatures_mxcsr_quirk(xfeatures)) {
+	if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
 		offset = offsetof(struct fxregs_state, mxcsr);
 		size = MXCSR_AND_FLAGS_SIZE;
 		memcpy(&xsave->i387.mxcsr, kbuf + offset, size);
@@ -1192,7 +1190,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 	/*
 	 * Add back in the features that came in from userspace:
 	 */
-	xsave->header.xfeatures |= xfeatures;
+	xsave->header.xfeatures |= hdr.xfeatures;
 
 	return 0;
 }

From af95774b3ca080b0e1e651c0fc7680f3444ddda7 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 24 Sep 2017 12:59:09 +0200
Subject: [PATCH 385/391] x86/fpu: Use validate_xstate_header() to validate the
 xstate_header in copy_kernel_to_xstate()

Tighten the checks in copy_kernel_to_xstate().

Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: http://lkml.kernel.org/r/20170924105913.9157-7-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index c97c4a9db52a..325db7850335 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1138,15 +1138,12 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
 
 /*
  * Convert from a ptrace standard-format kernel buffer to kernel XSAVES format
- * and copy to the target thread. This is called from xstateregs_set() and
- * there we check the CPU has XSAVES and a whole standard-sized buffer
- * exists.
+ * and copy to the target thread. This is called from xstateregs_set().
  */
 int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 {
 	unsigned int offset, size;
 	int i;
-	u64 allowed_features;
 	struct xstate_header hdr;
 
 	offset = offsetof(struct xregs_state, header);
@@ -1154,12 +1151,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 
 	memcpy(&hdr, kbuf + offset, size);
 
-	/*
-	 * Reject if the user sets any disabled or supervisor features:
-	 */
-	allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR;
-
-	if (hdr.xfeatures & ~allowed_features)
+	if (validate_xstate_header(&hdr))
 		return -EINVAL;
 
 	for (i = 0; i < XFEATURE_MAX; i++) {

From af2c4322d986a08a6e793b74b83a62b325019c20 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 24 Sep 2017 12:59:10 +0200
Subject: [PATCH 386/391] x86/fpu: Copy the full header in
 copy_user_to_xstate()

This is in preparation to verify the full xstate header as supplied by user-space.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: http://lkml.kernel.org/r/20170924105913.9157-8-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 325db7850335..0cd7b73c25e8 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1199,13 +1199,16 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 	int i;
 	u64 xfeatures;
 	u64 allowed_features;
+	struct xstate_header hdr;
 
 	offset = offsetof(struct xregs_state, header);
-	size = sizeof(xfeatures);
+	size = sizeof(hdr);
 
-	if (__copy_from_user(&xfeatures, ubuf + offset, size))
+	if (__copy_from_user(&hdr, ubuf + offset, size))
 		return -EFAULT;
 
+	xfeatures = hdr.xfeatures;
+
 	/*
 	 * Reject if the user sets any disabled or supervisor features:
 	 */

From 3d703477bcfe8bb57079d97198cf1e342fe1fef9 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 24 Sep 2017 12:59:11 +0200
Subject: [PATCH 387/391] x86/fpu: Eliminate the 'xfeatures' local variable in
 copy_user_to_xstate()

We now have this field in hdr.xfeatures.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: http://lkml.kernel.org/r/20170924105913.9157-9-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 0cd7b73c25e8..b6d78b78b5c2 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1197,7 +1197,6 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 {
 	unsigned int offset, size;
 	int i;
-	u64 xfeatures;
 	u64 allowed_features;
 	struct xstate_header hdr;
 
@@ -1207,20 +1206,18 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 	if (__copy_from_user(&hdr, ubuf + offset, size))
 		return -EFAULT;
 
-	xfeatures = hdr.xfeatures;
-
 	/*
 	 * Reject if the user sets any disabled or supervisor features:
 	 */
 	allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR;
 
-	if (xfeatures & ~allowed_features)
+	if (hdr.xfeatures & ~allowed_features)
 		return -EINVAL;
 
 	for (i = 0; i < XFEATURE_MAX; i++) {
 		u64 mask = ((u64)1 << i);
 
-		if (xfeatures & mask) {
+		if (hdr.xfeatures & mask) {
 			void *dst = __raw_xsave_addr(xsave, 1 << i);
 
 			offset = xstate_offsets[i];
@@ -1231,7 +1228,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 		}
 	}
 
-	if (xfeatures_mxcsr_quirk(xfeatures)) {
+	if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
 		offset = offsetof(struct fxregs_state, mxcsr);
 		size = MXCSR_AND_FLAGS_SIZE;
 		if (__copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size))
@@ -1247,7 +1244,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 	/*
 	 * Add back in the features that came in from userspace:
 	 */
-	xsave->header.xfeatures |= xfeatures;
+	xsave->header.xfeatures |= hdr.xfeatures;
 
 	return 0;
 }

From 98c0fad9d60e8b2cd47e15b7bee7df343648f5bb Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 24 Sep 2017 12:59:12 +0200
Subject: [PATCH 388/391] x86/fpu: Use validate_xstate_header() to validate the
 xstate_header in copy_user_to_xstate()

Tighten the checks in copy_user_to_xstate().

Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: http://lkml.kernel.org/r/20170924105913.9157-10-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index b6d78b78b5c2..f1d5476c9022 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1188,16 +1188,15 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 }
 
 /*
- * Convert from a ptrace standard-format user-space buffer to kernel XSAVES format
- * and copy to the target thread. This is called from xstateregs_set() and
- * there we check the CPU has XSAVES and a whole standard-sized buffer
- * exists.
+ * Convert from a ptrace or sigreturn standard-format user-space buffer to
+ * kernel XSAVES format and copy to the target thread. This is called from
+ * xstateregs_set(), as well as potentially from the sigreturn() and
+ * rt_sigreturn() system calls.
  */
 int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 {
 	unsigned int offset, size;
 	int i;
-	u64 allowed_features;
 	struct xstate_header hdr;
 
 	offset = offsetof(struct xregs_state, header);
@@ -1206,12 +1205,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
 	if (__copy_from_user(&hdr, ubuf + offset, size))
 		return -EFAULT;
 
-	/*
-	 * Reject if the user sets any disabled or supervisor features:
-	 */
-	allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR;
-
-	if (hdr.xfeatures & ~allowed_features)
+	if (validate_xstate_header(&hdr))
 		return -EINVAL;
 
 	for (i = 0; i < XFEATURE_MAX; i++) {

From 738f48cb5fdd5878d11934f1898aa2bcf1578289 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 24 Sep 2017 12:59:13 +0200
Subject: [PATCH 389/391] x86/fpu: Use using_compacted_format() instead of open
 coded X86_FEATURE_XSAVES

This is the canonical method to use.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: http://lkml.kernel.org/r/20170924105913.9157-11-mingo@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/regset.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index b831d5b9de99..3ea151372389 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -134,7 +134,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 
 	fpu__prepare_write(fpu);
 
-	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+	if (using_compacted_format()) {
 		if (kbuf)
 			ret = copy_kernel_to_xstate(xsave, kbuf);
 		else

From fc46820b27a2d9a46f7e90c9ceb4a64a1bc5fab8 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 25 Sep 2017 12:23:03 +0200
Subject: [PATCH 390/391] vfs: Return -ENXIO for negative SEEK_HOLE / SEEK_DATA
 offsets

In generic_file_llseek_size, return -ENXIO for negative offsets as well
as offsets beyond EOF.  This affects filesystems which don't implement
SEEK_HOLE / SEEK_DATA internally, possibly because they don't support
holes.

Fixes xfstest generic/448.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/read_write.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index a2b9a47235c5..f0d4b16873e8 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -112,7 +112,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence,
 		 * In the generic case the entire file is data, so as long as
 		 * offset isn't at the end of the file then the offset is data.
 		 */
-		if (offset >= eof)
+		if ((unsigned long long)offset >= eof)
 			return -ENXIO;
 		break;
 	case SEEK_HOLE:
@@ -120,7 +120,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence,
 		 * There is a virtual hole at the end of the file, so as long as
 		 * offset isn't i_size or larger, return i_size.
 		 */
-		if (offset >= eof)
+		if ((unsigned long long)offset >= eof)
 			return -ENXIO;
 		offset = eof;
 		break;

From 4c6bb69663b3a3f2db8f488356e96acb5460f25f Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 26 Sep 2017 10:36:05 +0200
Subject: [PATCH 391/391] quota: Fix quota corruption with generic/232 test

Eric has reported that since commit d2faa415166b "quota: Do not acquire
dqio_sem for dquot overwrites in v2 format" test generic/232
occasionally fails due to quota information being incorrect. Indeed that
commit was too eager to remove dqio_sem completely from the path that
just overwrites quota structure with updated information. Although that
is innocent on its own, another process that inserts new quota structure
to the same block can perform read-modify-write cycle of that block thus
effectively discarding quota information update if they race in a wrong
way.

Fix the problem by acquiring dqio_sem for reading for overwrites of
quota structure. Note that it *is* possible to completely avoid taking
dqio_sem in the overwrite path however that will require modifying path
inserting / deleting quota structures to avoid RMW cycles of the full
block and for now it is not clear whether it is worth the hassle.

Fixes: d2faa415166b2883428efa92f451774ef44373ac
Reported-and-tested-by: Eric Whitney <enwlinux@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/quota_v2.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index c0187cda2c1e..a73e5b34db41 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -328,12 +328,16 @@ static int v2_write_dquot(struct dquot *dquot)
 	if (!dquot->dq_off) {
 		alloc = true;
 		down_write(&dqopt->dqio_sem);
+	} else {
+		down_read(&dqopt->dqio_sem);
 	}
 	ret = qtree_write_dquot(
 			sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv,
 			dquot);
 	if (alloc)
 		up_write(&dqopt->dqio_sem);
+	else
+		up_read(&dqopt->dqio_sem);
 	return ret;
 }