From 07bf7908950a8b14e81aa1807e3c667eab39287a Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 1 Aug 2018 13:45:11 +0200
Subject: [PATCH 001/499] xfrm: Validate address prefix lengths in the xfrm
 selector.

We don't validate the address prefix lengths in the xfrm
selector we got from userspace. This can lead to undefined
behaviour in the address matching functions if the prefix
is too big for the given address family. Fix this by checking
the prefixes and refuse SA/policy insertation when a prefix
is invalid.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: Air Icy <icytxw@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_user.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 33878e6e0d0a..5151b3ebf068 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -151,10 +151,16 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 	err = -EINVAL;
 	switch (p->family) {
 	case AF_INET:
+		if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32)
+			goto out;
+
 		break;
 
 	case AF_INET6:
 #if IS_ENABLED(CONFIG_IPV6)
+		if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128)
+			goto out;
+
 		break;
 #else
 		err = -EAFNOSUPPORT;
@@ -1359,10 +1365,16 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
 
 	switch (p->sel.family) {
 	case AF_INET:
+		if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32)
+			return -EINVAL;
+
 		break;
 
 	case AF_INET6:
 #if IS_ENABLED(CONFIG_IPV6)
+		if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128)
+			return -EINVAL;
+
 		break;
 #else
 		return  -EAFNOSUPPORT;

From 0aebe40bae6cf5652fdc3d05ecee15fbf5748194 Mon Sep 17 00:00:00 2001
From: Shreyas NC <shreyas.nc@intel.com>
Date: Fri, 27 Jul 2018 14:44:08 +0530
Subject: [PATCH 002/499] soundwire: Fix duplicate stream state assignment

For a SoundWire stream it is expected that a Slave is added to the
stream before Master is added.

So, move the stream state to CONFIGURED after the first Slave is
added and remove the stream state assignment for Master add.
Along with these changes, add additional comments to explain the same.

Signed-off-by: Shreyas NC <shreyas.nc@intel.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/stream.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c
index 4b5e250e8615..7ba6d4d8cd03 100644
--- a/drivers/soundwire/stream.c
+++ b/drivers/soundwire/stream.c
@@ -1123,8 +1123,6 @@ int sdw_stream_add_master(struct sdw_bus *bus,
 	if (ret)
 		goto stream_error;
 
-	stream->state = SDW_STREAM_CONFIGURED;
-
 stream_error:
 	sdw_release_master_stream(stream);
 error:
@@ -1141,6 +1139,10 @@ EXPORT_SYMBOL(sdw_stream_add_master);
  * @stream: SoundWire stream
  * @port_config: Port configuration for audio stream
  * @num_ports: Number of ports
+ *
+ * It is expected that Slave is added before adding Master
+ * to the Stream.
+ *
  */
 int sdw_stream_add_slave(struct sdw_slave *slave,
 		struct sdw_stream_config *stream_config,
@@ -1186,6 +1188,12 @@ int sdw_stream_add_slave(struct sdw_slave *slave,
 	if (ret)
 		goto stream_error;
 
+	/*
+	 * Change stream state to CONFIGURED on first Slave add.
+	 * Bus is not aware of number of Slave(s) in a stream at this
+	 * point so cannot depend on all Slave(s) to be added in order to
+	 * change stream state to CONFIGURED.
+	 */
 	stream->state = SDW_STREAM_CONFIGURED;
 	goto error;
 

From 3fef1a2259c556cce34df2791688cb3001f81c92 Mon Sep 17 00:00:00 2001
From: Shreyas NC <shreyas.nc@intel.com>
Date: Fri, 27 Jul 2018 14:44:09 +0530
Subject: [PATCH 003/499] soundwire: Fix incorrect exit after configuring
 stream

In sdw_stream_add_master() after the Master ports are configured,
the stream is released incorrectly.

So, fix it by avoiding stream release after configuring the Master
for the stream.
While at it, rename the label appropriately.

Signed-off-by: Shreyas NC <shreyas.nc@intel.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/stream.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c
index 7ba6d4d8cd03..b2682272503e 100644
--- a/drivers/soundwire/stream.c
+++ b/drivers/soundwire/stream.c
@@ -1112,7 +1112,7 @@ int sdw_stream_add_master(struct sdw_bus *bus,
 				"Master runtime config failed for stream:%s",
 				stream->name);
 		ret = -ENOMEM;
-		goto error;
+		goto unlock;
 	}
 
 	ret = sdw_config_stream(bus->dev, stream, stream_config, false);
@@ -1123,9 +1123,11 @@ int sdw_stream_add_master(struct sdw_bus *bus,
 	if (ret)
 		goto stream_error;
 
+	goto unlock;
+
 stream_error:
 	sdw_release_master_stream(stream);
-error:
+unlock:
 	mutex_unlock(&bus->bus_lock);
 	return ret;
 }

From 8d6ccf5cebbc7ed1dee9986e36853a78dfb64084 Mon Sep 17 00:00:00 2001
From: Sanyog Kale <sanyog.r.kale@intel.com>
Date: Fri, 27 Jul 2018 14:44:10 +0530
Subject: [PATCH 004/499] soundwire: Fix acquiring bus lock twice during master
 release

As part of sdw_stream_remove_master(), sdw_stream_remove_slave() is called
which results in bus lock being acquired twice.

So, fix it by performing specific Slave remove operations in
sdw_release_master_stream() instead of calling sdw_stream_remove_slave().

Signed-off-by: Sanyog Kale <sanyog.r.kale@intel.com>
Signed-off-by: Shreyas NC <shreyas.nc@intel.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/stream.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c
index b2682272503e..e5c7e1ef6318 100644
--- a/drivers/soundwire/stream.c
+++ b/drivers/soundwire/stream.c
@@ -899,9 +899,10 @@ static void sdw_release_master_stream(struct sdw_stream_runtime *stream)
 	struct sdw_master_runtime *m_rt = stream->m_rt;
 	struct sdw_slave_runtime *s_rt, *_s_rt;
 
-	list_for_each_entry_safe(s_rt, _s_rt,
-			&m_rt->slave_rt_list, m_rt_node)
-		sdw_stream_remove_slave(s_rt->slave, stream);
+	list_for_each_entry_safe(s_rt, _s_rt, &m_rt->slave_rt_list, m_rt_node) {
+		sdw_slave_port_release(s_rt->slave->bus, s_rt->slave, stream);
+		sdw_release_slave_stream(s_rt->slave, stream);
+	}
 
 	list_del(&m_rt->bus_node);
 }

From 823f18f8b860526fc099c222619a126d57d2ad8c Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Wed, 29 Aug 2018 15:36:10 +0300
Subject: [PATCH 005/499] regulator: bd71837: Disable voltage monitoring for
 LDO3/4

There is a HW quirk in BD71837. The shutdown sequence timings for
bucks/LDOs which are enabled via register interface are changed.
At PMIC poweroff the voltage for BUCK6/7 is cut immediately at the
beginning of shut-down sequence. This causes LDO5/6 voltage
monitoring to detect under voltage and force PMIC to emergency
state instead of poweroff. Disable voltage monitoring for LDO5 and
LDO6 at probe to avoid this.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org
---
 drivers/regulator/bd71837-regulator.c | 19 +++++++++++++++
 include/linux/mfd/rohm-bd718x7.h      | 33 ++++++++++++++++++++++++---
 2 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/drivers/regulator/bd71837-regulator.c b/drivers/regulator/bd71837-regulator.c
index 0f8ac8dec3e1..a1bd8aaf4d98 100644
--- a/drivers/regulator/bd71837-regulator.c
+++ b/drivers/regulator/bd71837-regulator.c
@@ -569,6 +569,25 @@ static int bd71837_probe(struct platform_device *pdev)
 			BD71837_REG_REGLOCK);
 	}
 
+	/*
+	 * There is a HW quirk in BD71837. The shutdown sequence timings for
+	 * bucks/LDOs which are controlled via register interface are changed.
+	 * At PMIC poweroff the voltage for BUCK6/7 is cut immediately at the
+	 * beginning of shut-down sequence. As bucks 6 and 7 are parent
+	 * supplies for LDO5 and LDO6 - this causes LDO5/6 voltage
+	 * monitoring to errorneously detect under voltage and force PMIC to
+	 * emergency state instead of poweroff. In order to avoid this we
+	 * disable voltage monitoring for LDO5 and LDO6
+	 */
+	err = regmap_update_bits(pmic->mfd->regmap, BD718XX_REG_MVRFLTMASK2,
+				 BD718XX_LDO5_VRMON80 | BD718XX_LDO6_VRMON80,
+				 BD718XX_LDO5_VRMON80 | BD718XX_LDO6_VRMON80);
+	if (err) {
+		dev_err(&pmic->pdev->dev,
+			"Failed to disable voltage monitoring\n");
+		goto err;
+	}
+
 	for (i = 0; i < ARRAY_SIZE(pmic_regulator_inits); i++) {
 
 		struct regulator_desc *desc;
diff --git a/include/linux/mfd/rohm-bd718x7.h b/include/linux/mfd/rohm-bd718x7.h
index a528747f8aed..e8338e5dc10b 100644
--- a/include/linux/mfd/rohm-bd718x7.h
+++ b/include/linux/mfd/rohm-bd718x7.h
@@ -78,9 +78,9 @@ enum {
 	BD71837_REG_TRANS_COND0        = 0x1F,
 	BD71837_REG_TRANS_COND1        = 0x20,
 	BD71837_REG_VRFAULTEN          = 0x21,
-	BD71837_REG_MVRFLTMASK0        = 0x22,
-	BD71837_REG_MVRFLTMASK1        = 0x23,
-	BD71837_REG_MVRFLTMASK2        = 0x24,
+	BD718XX_REG_MVRFLTMASK0        = 0x22,
+	BD718XX_REG_MVRFLTMASK1        = 0x23,
+	BD718XX_REG_MVRFLTMASK2        = 0x24,
 	BD71837_REG_RCVCFG             = 0x25,
 	BD71837_REG_RCVNUM             = 0x26,
 	BD71837_REG_PWRONCONFIG0       = 0x27,
@@ -159,6 +159,33 @@ enum {
 #define BUCK8_MASK		0x3F
 #define BUCK8_DEFAULT		0x1E
 
+/* BD718XX Voltage monitoring masks */
+#define BD718XX_BUCK1_VRMON80           0x1
+#define BD718XX_BUCK1_VRMON130          0x2
+#define BD718XX_BUCK2_VRMON80           0x4
+#define BD718XX_BUCK2_VRMON130          0x8
+#define BD718XX_1ST_NODVS_BUCK_VRMON80  0x1
+#define BD718XX_1ST_NODVS_BUCK_VRMON130 0x2
+#define BD718XX_2ND_NODVS_BUCK_VRMON80  0x4
+#define BD718XX_2ND_NODVS_BUCK_VRMON130 0x8
+#define BD718XX_3RD_NODVS_BUCK_VRMON80  0x10
+#define BD718XX_3RD_NODVS_BUCK_VRMON130 0x20
+#define BD718XX_4TH_NODVS_BUCK_VRMON80  0x40
+#define BD718XX_4TH_NODVS_BUCK_VRMON130 0x80
+#define BD718XX_LDO1_VRMON80            0x1
+#define BD718XX_LDO2_VRMON80            0x2
+#define BD718XX_LDO3_VRMON80            0x4
+#define BD718XX_LDO4_VRMON80            0x8
+#define BD718XX_LDO5_VRMON80            0x10
+#define BD718XX_LDO6_VRMON80            0x20
+
+/* BD71837 specific voltage monitoring masks */
+#define BD71837_BUCK3_VRMON80           0x10
+#define BD71837_BUCK3_VRMON130          0x20
+#define BD71837_BUCK4_VRMON80           0x40
+#define BD71837_BUCK4_VRMON130          0x80
+#define BD71837_LDO7_VRMON80            0x40
+
 /* BD71837_REG_IRQ bits */
 #define IRQ_SWRST		0x40
 #define IRQ_PWRON_S		0x20

From 4fbf51ee6e822ac28be28ff25883745131e8c4a3 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Thu, 23 Aug 2018 09:55:43 -0400
Subject: [PATCH 006/499] media: video_function_calls.rst: drop obsolete
 video-set-attributes reference

This fixes this warning:

Documentation/media/uapi/dvb/video_function_calls.rst:9: WARNING: toctree contains
reference to nonexisting document 'uapi/dvb/video-set-attributes'

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/dvb/video_function_calls.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Documentation/media/uapi/dvb/video_function_calls.rst b/Documentation/media/uapi/dvb/video_function_calls.rst
index 3f4f6c9ffad7..a4222b6cd2d3 100644
--- a/Documentation/media/uapi/dvb/video_function_calls.rst
+++ b/Documentation/media/uapi/dvb/video_function_calls.rst
@@ -33,4 +33,3 @@ Video Function Calls
     video-clear-buffer
     video-set-streamtype
     video-set-format
-    video-set-attributes

From 312f73b648626a0526a3aceebb0a3192aaba05ce Mon Sep 17 00:00:00 2001
From: Jozef Balga <jozef.balga@gmail.com>
Date: Tue, 21 Aug 2018 05:01:04 -0400
Subject: [PATCH 007/499] media: af9035: prevent buffer overflow on write

When less than 3 bytes are written to the device, memcpy is called with
negative array size which leads to buffer overflow and kernel panic. This
patch adds a condition and returns -EOPNOTSUPP instead.
Fixes bugzilla issue 64871

[mchehab+samsung@kernel.org: fix a merge conflict and changed the
 condition to match the patch's comment, e. g. len == 3 could
 also be valid]
Signed-off-by: Jozef Balga <jozef.balga@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/usb/dvb-usb-v2/af9035.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/media/usb/dvb-usb-v2/af9035.c b/drivers/media/usb/dvb-usb-v2/af9035.c
index 666d319d3d1a..1f6c1eefe389 100644
--- a/drivers/media/usb/dvb-usb-v2/af9035.c
+++ b/drivers/media/usb/dvb-usb-v2/af9035.c
@@ -402,8 +402,10 @@ static int af9035_i2c_master_xfer(struct i2c_adapter *adap,
 			if (msg[0].addr == state->af9033_i2c_addr[1])
 				reg |= 0x100000;
 
-			ret = af9035_wr_regs(d, reg, &msg[0].buf[3],
-					msg[0].len - 3);
+			ret = (msg[0].len >= 3) ? af9035_wr_regs(d, reg,
+							         &msg[0].buf[3],
+							         msg[0].len - 3)
+					        : -EOPNOTSUPP;
 		} else {
 			/* I2C write */
 			u8 buf[MAX_XFER_SIZE];

From 44a9ffd4eb99ada6c1496a735a414414fef30696 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 17 Aug 2018 05:53:42 -0400
Subject: [PATCH 008/499] media: camss: mark PM functions as __maybe_unused

The empty suspend/resume functions cause a build warning
when they are unused:

drivers/media/platform/qcom/camss/camss.c:1001:12: error: 'camss_runtime_resume' defined but not used [-Werror=unused-function]
drivers/media/platform/qcom/camss/camss.c:996:12: error: 'camss_runtime_suspend' defined but not used [-Werror=unused-function]

Mark them as __maybe_unused so the compiler can silently drop them.

Fixes: 02afa816dbbf ("media: camss: Add basic runtime PM support")

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Todor Tomov <todor.tomov@linaro.org>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/platform/qcom/camss/camss.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/qcom/camss/camss.c b/drivers/media/platform/qcom/camss/camss.c
index dcc0c30ef1b1..9f19d5f5966b 100644
--- a/drivers/media/platform/qcom/camss/camss.c
+++ b/drivers/media/platform/qcom/camss/camss.c
@@ -993,12 +993,12 @@ static const struct of_device_id camss_dt_match[] = {
 
 MODULE_DEVICE_TABLE(of, camss_dt_match);
 
-static int camss_runtime_suspend(struct device *dev)
+static int __maybe_unused camss_runtime_suspend(struct device *dev)
 {
 	return 0;
 }
 
-static int camss_runtime_resume(struct device *dev)
+static int __maybe_unused camss_runtime_resume(struct device *dev)
 {
 	return 0;
 }

From 55b518998996cc935d294bbf1f07ddba58b8b89e Mon Sep 17 00:00:00 2001
From: Todor Tomov <todor.tomov@linaro.org>
Date: Tue, 14 Aug 2018 11:57:35 -0400
Subject: [PATCH 009/499] media: camss: Use managed memory allocations

Use managed memory allocations for structs which are used until
the driver is removed.

Signed-off-by: Todor Tomov <todor.tomov@linaro.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/platform/qcom/camss/camss-ispif.c |  4 ++--
 drivers/media/platform/qcom/camss/camss.c       | 11 ++++++-----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/media/platform/qcom/camss/camss-ispif.c b/drivers/media/platform/qcom/camss/camss-ispif.c
index 7f269021d08c..ca7b0917500d 100644
--- a/drivers/media/platform/qcom/camss/camss-ispif.c
+++ b/drivers/media/platform/qcom/camss/camss-ispif.c
@@ -1076,8 +1076,8 @@ int msm_ispif_subdev_init(struct ispif_device *ispif,
 	else
 		return -EINVAL;
 
-	ispif->line = kcalloc(ispif->line_num, sizeof(*ispif->line),
-			      GFP_KERNEL);
+	ispif->line = devm_kcalloc(dev, ispif->line_num, sizeof(*ispif->line),
+				   GFP_KERNEL);
 	if (!ispif->line)
 		return -ENOMEM;
 
diff --git a/drivers/media/platform/qcom/camss/camss.c b/drivers/media/platform/qcom/camss/camss.c
index 9f19d5f5966b..669615fff6a0 100644
--- a/drivers/media/platform/qcom/camss/camss.c
+++ b/drivers/media/platform/qcom/camss/camss.c
@@ -848,17 +848,18 @@ static int camss_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	camss->csiphy = kcalloc(camss->csiphy_num, sizeof(*camss->csiphy),
-				GFP_KERNEL);
+	camss->csiphy = devm_kcalloc(dev, camss->csiphy_num,
+				     sizeof(*camss->csiphy), GFP_KERNEL);
 	if (!camss->csiphy)
 		return -ENOMEM;
 
-	camss->csid = kcalloc(camss->csid_num, sizeof(*camss->csid),
-			      GFP_KERNEL);
+	camss->csid = devm_kcalloc(dev, camss->csid_num, sizeof(*camss->csid),
+				   GFP_KERNEL);
 	if (!camss->csid)
 		return -ENOMEM;
 
-	camss->vfe = kcalloc(camss->vfe_num, sizeof(*camss->vfe), GFP_KERNEL);
+	camss->vfe = devm_kcalloc(dev, camss->vfe_num, sizeof(*camss->vfe),
+				  GFP_KERNEL);
 	if (!camss->vfe)
 		return -ENOMEM;
 

From 3799eca51c5be3cd76047a582ac52087373b54b3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 15 Aug 2018 06:27:28 -0400
Subject: [PATCH 010/499] media: camss: add missing includes

Multiple files in this driver fail to build because of missing
header inclusions:

drivers/media/platform/qcom/camss/camss-csiphy-2ph-1-0.c: In function 'csiphy_hw_version_read':
drivers/media/platform/qcom/camss/camss-csiphy-2ph-1-0.c:31:18: error: implicit declaration of function 'readl_relaxed'; did you mean 'xchg_relaxed'? [-Werror=implicit-function-declaration]
drivers/media/platform/qcom/camss/camss-csiphy-3ph-1-0.c: In function 'csiphy_hw_version_read':
drivers/media/platform/qcom/camss/camss-csiphy-3ph-1-0.c:52:2: error: implicit declaration of function 'writel' [-Werror=implicit-function-declaration]

Add linux/io.h there and in all other files that call
readl/writel and related interfaces.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Todor Tomov <todor.tomov@linaro.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/platform/qcom/camss/camss-csid.c           | 1 +
 drivers/media/platform/qcom/camss/camss-csiphy-2ph-1-0.c | 1 +
 drivers/media/platform/qcom/camss/camss-csiphy-3ph-1-0.c | 1 +
 drivers/media/platform/qcom/camss/camss-csiphy.c         | 1 +
 drivers/media/platform/qcom/camss/camss-ispif.c          | 1 +
 drivers/media/platform/qcom/camss/camss-vfe-4-1.c        | 1 +
 drivers/media/platform/qcom/camss/camss-vfe-4-7.c        | 1 +
 7 files changed, 7 insertions(+)

diff --git a/drivers/media/platform/qcom/camss/camss-csid.c b/drivers/media/platform/qcom/camss/camss-csid.c
index 729b31891466..a5ae85674ffb 100644
--- a/drivers/media/platform/qcom/camss/camss-csid.c
+++ b/drivers/media/platform/qcom/camss/camss-csid.c
@@ -10,6 +10,7 @@
 #include <linux/clk.h>
 #include <linux/completion.h>
 #include <linux/interrupt.h>
+#include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
diff --git a/drivers/media/platform/qcom/camss/camss-csiphy-2ph-1-0.c b/drivers/media/platform/qcom/camss/camss-csiphy-2ph-1-0.c
index c832539397d7..12bce391d71f 100644
--- a/drivers/media/platform/qcom/camss/camss-csiphy-2ph-1-0.c
+++ b/drivers/media/platform/qcom/camss/camss-csiphy-2ph-1-0.c
@@ -12,6 +12,7 @@
 
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/io.h>
 
 #define CAMSS_CSI_PHY_LNn_CFG2(n)		(0x004 + 0x40 * (n))
 #define CAMSS_CSI_PHY_LNn_CFG3(n)		(0x008 + 0x40 * (n))
diff --git a/drivers/media/platform/qcom/camss/camss-csiphy-3ph-1-0.c b/drivers/media/platform/qcom/camss/camss-csiphy-3ph-1-0.c
index bcd0dfd33618..2e65caf1ecae 100644
--- a/drivers/media/platform/qcom/camss/camss-csiphy-3ph-1-0.c
+++ b/drivers/media/platform/qcom/camss/camss-csiphy-3ph-1-0.c
@@ -12,6 +12,7 @@
 
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/io.h>
 
 #define CSIPHY_3PH_LNn_CFG1(n)			(0x000 + 0x100 * (n))
 #define CSIPHY_3PH_LNn_CFG1_SWI_REC_DLY_PRG	(BIT(7) | BIT(6))
diff --git a/drivers/media/platform/qcom/camss/camss-csiphy.c b/drivers/media/platform/qcom/camss/camss-csiphy.c
index 4559f3b1b38c..008afb85023b 100644
--- a/drivers/media/platform/qcom/camss/camss-csiphy.c
+++ b/drivers/media/platform/qcom/camss/camss-csiphy.c
@@ -10,6 +10,7 @@
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
diff --git a/drivers/media/platform/qcom/camss/camss-ispif.c b/drivers/media/platform/qcom/camss/camss-ispif.c
index ca7b0917500d..1f33b4eb198c 100644
--- a/drivers/media/platform/qcom/camss/camss-ispif.c
+++ b/drivers/media/platform/qcom/camss/camss-ispif.c
@@ -10,6 +10,7 @@
 #include <linux/clk.h>
 #include <linux/completion.h>
 #include <linux/interrupt.h>
+#include <linux/io.h>
 #include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/mutex.h>
diff --git a/drivers/media/platform/qcom/camss/camss-vfe-4-1.c b/drivers/media/platform/qcom/camss/camss-vfe-4-1.c
index da3a9fed9f2d..174a36be6f5d 100644
--- a/drivers/media/platform/qcom/camss/camss-vfe-4-1.c
+++ b/drivers/media/platform/qcom/camss/camss-vfe-4-1.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/interrupt.h>
+#include <linux/io.h>
 #include <linux/iopoll.h>
 
 #include "camss-vfe.h"
diff --git a/drivers/media/platform/qcom/camss/camss-vfe-4-7.c b/drivers/media/platform/qcom/camss/camss-vfe-4-7.c
index 4c584bffd179..0dca8bf9281e 100644
--- a/drivers/media/platform/qcom/camss/camss-vfe-4-7.c
+++ b/drivers/media/platform/qcom/camss/camss-vfe-4-7.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/interrupt.h>
+#include <linux/io.h>
 #include <linux/iopoll.h>
 
 #include "camss-vfe.h"

From ef394f3fbecbe61d69450ad8cf0fa8f713c8ce8f Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Fri, 31 Aug 2018 14:34:02 +0200
Subject: [PATCH 011/499] regulator: da9063: fix DT probing with constraints

Commit 1c892e38ce59 ("regulator: da9063: Handle less LDOs on DA9063L")
reordered the da9063_regulator_info[] array, but not the DA9063_ID_*
regulator ids and not the da9063_matches[] array, because ids are used
as indices in the array initializer. This mismatch between regulator id
and da9063_regulator_info[] array index causes the driver probe to fail
because constraints from DT are not applied to the correct regulator:

  da9063 0-0058: Device detected (chip-ID: 0x61, var-ID: 0x50)
  DA9063_BMEM: Bringing 900000uV into 3300000-3300000uV
  DA9063_LDO9: Bringing 3300000uV into 2500000-2500000uV
  DA9063_LDO1: Bringing 900000uV into 3300000-3300000uV
  DA9063_LDO1: failed to apply 3300000-3300000uV constraint(-22)

This patch reorders the DA9063_ID_* as apparently intended, and with
them the entries in the da90630_matches[] array.

Fixes: 1c892e38ce59 ("regulator: da9063: Handle less LDOs on DA9063L")
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/mfd/da9063/pdata.h | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/include/linux/mfd/da9063/pdata.h b/include/linux/mfd/da9063/pdata.h
index 8a125701ef7b..50bed4f89c1a 100644
--- a/include/linux/mfd/da9063/pdata.h
+++ b/include/linux/mfd/da9063/pdata.h
@@ -21,7 +21,7 @@
 /*
  * Regulator configuration
  */
-/* DA9063 regulator IDs */
+/* DA9063 and DA9063L regulator IDs */
 enum {
 	/* BUCKs */
 	DA9063_ID_BCORE1,
@@ -37,18 +37,20 @@ enum {
 	DA9063_ID_BMEM_BIO_MERGED,
 	/* When two BUCKs are merged, they cannot be reused separately */
 
-	/* LDOs */
-	DA9063_ID_LDO1,
-	DA9063_ID_LDO2,
+	/* LDOs on both DA9063 and DA9063L */
 	DA9063_ID_LDO3,
-	DA9063_ID_LDO4,
-	DA9063_ID_LDO5,
-	DA9063_ID_LDO6,
 	DA9063_ID_LDO7,
 	DA9063_ID_LDO8,
 	DA9063_ID_LDO9,
-	DA9063_ID_LDO10,
 	DA9063_ID_LDO11,
+
+	/* DA9063-only LDOs */
+	DA9063_ID_LDO1,
+	DA9063_ID_LDO2,
+	DA9063_ID_LDO4,
+	DA9063_ID_LDO5,
+	DA9063_ID_LDO6,
+	DA9063_ID_LDO10,
 };
 
 /* Regulators platform data */

From 215ab0f021c9fea3c18b75e7d522400ee6a49990 Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Date: Fri, 31 Aug 2018 08:38:49 -0300
Subject: [PATCH 012/499] xfrm6: call kfree_skb when skb is toobig

After commit d6990976af7c5d8f55903bfb4289b6fb030bf754 ("vti6: fix PMTU caching
and reporting on xmit"), some too big skbs might be potentially passed down to
__xfrm6_output, causing it to fail to transmit but not free the skb, causing a
leak of skb, and consequentially a leak of dst references.

After running pmtu.sh, that shows as failure to unregister devices in a namespace:

[  311.397671] unregister_netdevice: waiting for veth_b to become free. Usage count = 1

The fix is to call kfree_skb in case of transmit failures.

Fixes: dd767856a36e ("xfrm6: Don't call icmpv6_send on local error")
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/xfrm6_output.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 5959ce9620eb..6a74080005cf 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -170,9 +170,11 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 	if (toobig && xfrm6_local_dontfrag(skb)) {
 		xfrm6_local_rxpmtu(skb, mtu);
+		kfree_skb(skb);
 		return -EMSGSIZE;
 	} else if (!skb->ignore_df && toobig && skb->sk) {
 		xfrm_local_error(skb, mtu);
+		kfree_skb(skb);
 		return -EMSGSIZE;
 	}
 

From 7001cab1dabc0b72b2b672ef58a90ab64f5e2343 Mon Sep 17 00:00:00 2001
From: Marcel Ziswiler <marcel.ziswiler@toradex.com>
Date: Wed, 29 Aug 2018 08:47:57 +0200
Subject: [PATCH 013/499] spi: tegra20-slink: explicitly enable/disable clock

Depending on the SPI instance one may get an interrupt storm upon
requesting resp. interrupt unless the clock is explicitly enabled
beforehand. This has been observed trying to bring up instance 4 on
T20.

Signed-off-by: Marcel Ziswiler <marcel.ziswiler@toradex.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org
---
 drivers/spi/spi-tegra20-slink.c | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c
index 6f7b946b5ced..1427f343b39a 100644
--- a/drivers/spi/spi-tegra20-slink.c
+++ b/drivers/spi/spi-tegra20-slink.c
@@ -1063,6 +1063,24 @@ static int tegra_slink_probe(struct platform_device *pdev)
 		goto exit_free_master;
 	}
 
+	/* disabled clock may cause interrupt storm upon request */
+	tspi->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(tspi->clk)) {
+		ret = PTR_ERR(tspi->clk);
+		dev_err(&pdev->dev, "Can not get clock %d\n", ret);
+		goto exit_free_master;
+	}
+	ret = clk_prepare(tspi->clk);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Clock prepare failed %d\n", ret);
+		goto exit_free_master;
+	}
+	ret = clk_enable(tspi->clk);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Clock enable failed %d\n", ret);
+		goto exit_free_master;
+	}
+
 	spi_irq = platform_get_irq(pdev, 0);
 	tspi->irq = spi_irq;
 	ret = request_threaded_irq(tspi->irq, tegra_slink_isr,
@@ -1071,14 +1089,7 @@ static int tegra_slink_probe(struct platform_device *pdev)
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to register ISR for IRQ %d\n",
 					tspi->irq);
-		goto exit_free_master;
-	}
-
-	tspi->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(tspi->clk)) {
-		dev_err(&pdev->dev, "can not get clock\n");
-		ret = PTR_ERR(tspi->clk);
-		goto exit_free_irq;
+		goto exit_clk_disable;
 	}
 
 	tspi->rst = devm_reset_control_get_exclusive(&pdev->dev, "spi");
@@ -1138,6 +1149,8 @@ static int tegra_slink_probe(struct platform_device *pdev)
 	tegra_slink_deinit_dma_param(tspi, true);
 exit_free_irq:
 	free_irq(spi_irq, tspi);
+exit_clk_disable:
+	clk_disable(tspi->clk);
 exit_free_master:
 	spi_master_put(master);
 	return ret;
@@ -1150,6 +1163,8 @@ static int tegra_slink_remove(struct platform_device *pdev)
 
 	free_irq(tspi->irq, tspi);
 
+	clk_disable(tspi->clk);
+
 	if (tspi->tx_dma_chan)
 		tegra_slink_deinit_dma_param(tspi, false);
 

From 3edd79cf5a44b12dbb13bc320f5788aed6562b36 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Mon, 3 Sep 2018 16:49:37 +0200
Subject: [PATCH 014/499] regulator: Fix 'do-nothing' value for regulators
 without suspend state

Some regulators don't have all states defined and in such cases regulator
core should not assume anything. However in current implementation
of of_get_regulation_constraints() DO_NOTHING_IN_SUSPEND enable value was
set only for regulators which had suspend node defined, otherwise the
default 0 value was used, what means DISABLE_IN_SUSPEND. This lead to
broken system suspend/resume on boards, which had simple regulator
constraints definition (without suspend state nodes).

To avoid further mismatches between the default and uninitialized values
of the suspend enabled/disabled states, change the values of the them,
so default '0' means DO_NOTHING_IN_SUSPEND.

Fixes: 72069f9957a1: regulator: leave one item to record whether regulator is enabled
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org
---
 drivers/regulator/core.c          | 2 +-
 drivers/regulator/of_regulator.c  | 2 --
 include/linux/regulator/machine.h | 6 +++---
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index bb1324f93143..90215f57270f 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -3161,7 +3161,7 @@ static inline int regulator_suspend_toggle(struct regulator_dev *rdev,
 	if (!rstate->changeable)
 		return -EPERM;
 
-	rstate->enabled = en;
+	rstate->enabled = (en) ? ENABLE_IN_SUSPEND : DISABLE_IN_SUSPEND;
 
 	return 0;
 }
diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c
index 638f17d4c848..210fc20f7de7 100644
--- a/drivers/regulator/of_regulator.c
+++ b/drivers/regulator/of_regulator.c
@@ -213,8 +213,6 @@ static void of_get_regulation_constraints(struct device_node *np,
 		else if (of_property_read_bool(suspend_np,
 					"regulator-off-in-suspend"))
 			suspend_state->enabled = DISABLE_IN_SUSPEND;
-		else
-			suspend_state->enabled = DO_NOTHING_IN_SUSPEND;
 
 		if (!of_property_read_u32(np, "regulator-suspend-min-microvolt",
 					  &pval))
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 3468703d663a..a459a5e973a7 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -48,9 +48,9 @@ struct regulator;
  * DISABLE_IN_SUSPEND	- turn off regulator in suspend states
  * ENABLE_IN_SUSPEND	- keep regulator on in suspend states
  */
-#define DO_NOTHING_IN_SUSPEND	(-1)
-#define DISABLE_IN_SUSPEND	0
-#define ENABLE_IN_SUSPEND	1
+#define DO_NOTHING_IN_SUSPEND	0
+#define DISABLE_IN_SUSPEND	1
+#define ENABLE_IN_SUSPEND	2
 
 /* Regulator active discharge flags */
 enum regulator_active_discharge {

From bfc0698bebcb16d19ecfc89574ad4d696955e5d3 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Mon, 3 Sep 2018 04:36:52 -0700
Subject: [PATCH 015/499] xfrm: reset transport header back to network header
 after all input transforms ahave been applied

A policy may have been set up with multiple transforms (e.g., ESP
and ipcomp). In this situation, the ingress IPsec processing
iterates in xfrm_input() and applies each transform in turn,
processing the nexthdr to find any additional xfrm that may apply.

This patch resets the transport header back to network header
only after the last transformation so that subsequent xfrms
can find the correct transport header.

Fixes: 7785bba299a8 ("esp: Add a software GRO codepath")
Suggested-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/xfrm4_input.c          | 1 +
 net/ipv4/xfrm4_mode_transport.c | 4 +---
 net/ipv6/xfrm6_input.c          | 1 +
 net/ipv6/xfrm6_mode_transport.c | 4 +---
 4 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index bcfc00e88756..f8de2482a529 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -67,6 +67,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
 
 	if (xo && (xo->flags & XFRM_GRO)) {
 		skb_mac_header_rebuild(skb);
+		skb_reset_transport_header(skb);
 		return 0;
 	}
 
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
index 3d36644890bb..1ad2c2c4e250 100644
--- a/net/ipv4/xfrm4_mode_transport.c
+++ b/net/ipv4/xfrm4_mode_transport.c
@@ -46,7 +46,6 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int ihl = skb->data - skb_transport_header(skb);
-	struct xfrm_offload *xo = xfrm_offload(skb);
 
 	if (skb->transport_header != skb->network_header) {
 		memmove(skb_transport_header(skb),
@@ -54,8 +53,7 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 		skb->network_header = skb->transport_header;
 	}
 	ip_hdr(skb)->tot_len = htons(skb->len + ihl);
-	if (!xo || !(xo->flags & XFRM_GRO))
-		skb_reset_transport_header(skb);
+	skb_reset_transport_header(skb);
 	return 0;
 }
 
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 841f4a07438e..9ef490dddcea 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -59,6 +59,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 
 	if (xo && (xo->flags & XFRM_GRO)) {
 		skb_mac_header_rebuild(skb);
+		skb_reset_transport_header(skb);
 		return -1;
 	}
 
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index 9ad07a91708e..3c29da5defe6 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -51,7 +51,6 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int ihl = skb->data - skb_transport_header(skb);
-	struct xfrm_offload *xo = xfrm_offload(skb);
 
 	if (skb->transport_header != skb->network_header) {
 		memmove(skb_transport_header(skb),
@@ -60,8 +59,7 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 	}
 	ipv6_hdr(skb)->payload_len = htons(skb->len + ihl -
 					   sizeof(struct ipv6hdr));
-	if (!xo || !(xo->flags & XFRM_GRO))
-		skb_reset_transport_header(skb);
+	skb_reset_transport_header(skb);
 	return 0;
 }
 

From 782710e333a526780d65918d669cb96646983ba2 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Mon, 3 Sep 2018 04:36:53 -0700
Subject: [PATCH 016/499] xfrm: reset crypto_done when iterating over multiple
 input xfrms

We only support one offloaded xfrm (we do not have devices that
can handle more than one offload), so reset crypto_done in
xfrm_input() when iterating over multiple transforms in xfrm_input,
so that we can invoke the appropriate x->type->input for the
non-offloaded transforms

Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API")
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_input.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 352abca2605f..86f5afbd0a0c 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -453,6 +453,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
 			goto drop;
 		}
+		crypto_done = false;
 	} while (!err);
 
 	err = xfrm_rcv_cb(skb, family, x->type->proto, 0);

From 1723c3155f117ee6e00f28fadf6e9eda4fc85806 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 4 Sep 2018 15:39:30 +0200
Subject: [PATCH 017/499] spi: gpio: Fix copy-and-paste error

This fixes an embarrassing copy-and-paste error in the
errorpath of spi_gpio_request(): we were checking the wrong
struct member for error code right after retrieveing the
sck GPIO.

Fixes: 9b00bc7b901ff672 ("spi: spi-gpio: Rewrite to use GPIO descriptors")
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-gpio.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-gpio.c b/drivers/spi/spi-gpio.c
index 0626e6e3ea0c..421bfc7dda67 100644
--- a/drivers/spi/spi-gpio.c
+++ b/drivers/spi/spi-gpio.c
@@ -300,8 +300,8 @@ static int spi_gpio_request(struct device *dev,
 		*mflags |= SPI_MASTER_NO_RX;
 
 	spi_gpio->sck = devm_gpiod_get(dev, "sck", GPIOD_OUT_LOW);
-	if (IS_ERR(spi_gpio->mosi))
-		return PTR_ERR(spi_gpio->mosi);
+	if (IS_ERR(spi_gpio->sck))
+		return PTR_ERR(spi_gpio->sck);
 
 	for (i = 0; i < num_chipselects; i++) {
 		spi_gpio->cs_gpios[i] = devm_gpiod_get_index(dev, "cs",

From 8682250b3c1b75a45feb7452bc413d004cfe3778 Mon Sep 17 00:00:00 2001
From: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Date: Wed, 5 Sep 2018 08:06:13 +0300
Subject: [PATCH 018/499] mac80211: Always report TX status

If a frame is dropped for any reason, mac80211 wouldn't report the TX
status back to user space.

As the user space may rely on the TX_STATUS to kick its state
machines, resends etc, it's better to just report this frame as not
acked instead.

Signed-off-by: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/status.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 9a6d7208bf4f..001a869c059c 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -479,11 +479,6 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
 	if (!skb)
 		return;
 
-	if (dropped) {
-		dev_kfree_skb_any(skb);
-		return;
-	}
-
 	if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) {
 		u64 cookie = IEEE80211_SKB_CB(skb)->ack.cookie;
 		struct ieee80211_sub_if_data *sdata;
@@ -506,6 +501,8 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
 		}
 		rcu_read_unlock();
 
+		dev_kfree_skb_any(skb);
+	} else if (dropped) {
 		dev_kfree_skb_any(skb);
 	} else {
 		/* consumes skb */

From 94a5b3acd0aef83c0e38b5117eda7b2abf4a05a4 Mon Sep 17 00:00:00 2001
From: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Date: Wed, 5 Sep 2018 08:06:14 +0300
Subject: [PATCH 019/499] mac80211: Don't wake up from PS for offchannel TX

Otherwise the offchannel frame might be queued due to
IEEE80211_QUEUE_STOP_REASON_PS and later dropped (in
ieee80211_tx_frags()).  Anyway, it doesn't make much sense to wake up
the device during ROC.

Signed-off-by: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index f353d9db54bc..131542513c8f 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -214,6 +214,7 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx)
 {
 	struct ieee80211_local *local = tx->local;
 	struct ieee80211_if_managed *ifmgd;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
 
 	/* driver doesn't support power save */
 	if (!ieee80211_hw_check(&local->hw, SUPPORTS_PS))
@@ -242,6 +243,9 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx)
 	if (tx->sdata->vif.type != NL80211_IFTYPE_STATION)
 		return TX_CONTINUE;
 
+	if (unlikely(info->flags & IEEE80211_TX_INTFL_OFFCHAN_TX_OK))
+		return TX_CONTINUE;
+
 	ifmgd = &tx->sdata->u.mgd;
 
 	/*

From 24f33e64fcd0d50a4b1a8e5b41bd0257aa66b0e8 Mon Sep 17 00:00:00 2001
From: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Date: Wed, 5 Sep 2018 08:06:12 +0300
Subject: [PATCH 020/499] cfg80211: reg: Init wiphy_idx in
 regulatory_hint_core()

Core regulatory hints didn't set wiphy_idx to WIPHY_IDX_INVALID. Since
the regulatory request is zeroed, wiphy_idx was always implicitly set to
0. This resulted in updating only phy #0.
Fix that.

Fixes: 806a9e39670b ("cfg80211: make regulatory_request use wiphy_idx instead of wiphy")
Signed-off-by: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
[add fixes tag]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/reg.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 2f702adf2912..765dedb12361 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2867,6 +2867,7 @@ static int regulatory_hint_core(const char *alpha2)
 	request->alpha2[0] = alpha2[0];
 	request->alpha2[1] = alpha2[1];
 	request->initiator = NL80211_REGDOM_SET_BY_CORE;
+	request->wiphy_idx = WIPHY_IDX_INVALID;
 
 	queue_regulatory_request(request);
 

From 6eae4a6c2be387fec41b0d2782c4fffb57159498 Mon Sep 17 00:00:00 2001
From: Bob Copeland <me@bobcopeland.com>
Date: Wed, 5 Sep 2018 06:22:59 -0400
Subject: [PATCH 021/499] mac80211: fix pending queue hang due to TX_DROP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In our environment running lots of mesh nodes, we are seeing the
pending queue hang periodically, with the debugfs queues file showing
lines such as:

    00: 0x00000000/348

i.e. there are a large number of frames but no stop reason set.

One way this could happen is if queue processing from the pending
tasklet exited early without processing all frames, and without having
some future event (incoming frame, stop reason flag, ...) to reschedule
it.

Exactly this can occur today if ieee80211_tx() returns false due to
packet drops or power-save buffering in the tx handlers.  In the
past, this function would return true in such cases, and the change
to false doesn't seem to be intentional.  Fix this case by reverting
to the previous behavior.

Fixes: bb42f2d13ffc ("mac80211: Move reorder-sensitive TX handlers to after TXQ dequeue")
Signed-off-by: Bob Copeland <bobcopeland@fb.com>
Acked-by: Toke Høiland-Jørgensen <toke@toke.dk>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 131542513c8f..25ba24bef8f5 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1894,7 +1894,7 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
 			sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
 
 	if (invoke_tx_handlers_early(&tx))
-		return false;
+		return true;
 
 	if (ieee80211_queue_skb(local, sdata, tx.sta, tx.skb))
 		return true;

From ffa69d6a16f686efe45269342474e421f2aa58b2 Mon Sep 17 00:00:00 2001
From: Gaku Inami <gaku.inami.xw@bp.renesas.com>
Date: Wed, 5 Sep 2018 10:49:36 +0200
Subject: [PATCH 022/499] spi: sh-msiof: Fix invalid SPI use during system
 suspend

If the SPI queue is running during system suspend, the system may lock
up.

Fix this by stopping/restarting the queue during system suspend/resume
by calling spi_master_suspend()/spi_master_resume() from the PM
callbacks.  In-kernel users will receive an -ESHUTDOWN error while
system suspend/resume is in progress.

Signed-off-by: Gaku Inami <gaku.inami.xw@bp.renesas.com>
Signed-off-by: Hiromitsu Yamasaki <hiromitsu.yamasaki.ym@renesas.com>
[geert: Cleanup, reword]
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org
---
 drivers/spi/spi-sh-msiof.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c
index 539d6d1a277a..bfe4e6d4f7bf 100644
--- a/drivers/spi/spi-sh-msiof.c
+++ b/drivers/spi/spi-sh-msiof.c
@@ -1426,12 +1426,37 @@ static const struct platform_device_id spi_driver_ids[] = {
 };
 MODULE_DEVICE_TABLE(platform, spi_driver_ids);
 
+#ifdef CONFIG_PM_SLEEP
+static int sh_msiof_spi_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct sh_msiof_spi_priv *p = platform_get_drvdata(pdev);
+
+	return spi_master_suspend(p->master);
+}
+
+static int sh_msiof_spi_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct sh_msiof_spi_priv *p = platform_get_drvdata(pdev);
+
+	return spi_master_resume(p->master);
+}
+
+static SIMPLE_DEV_PM_OPS(sh_msiof_spi_pm_ops, sh_msiof_spi_suspend,
+			 sh_msiof_spi_resume);
+#define DEV_PM_OPS	&sh_msiof_spi_pm_ops
+#else
+#define DEV_PM_OPS	NULL
+#endif /* CONFIG_PM_SLEEP */
+
 static struct platform_driver sh_msiof_spi_drv = {
 	.probe		= sh_msiof_spi_probe,
 	.remove		= sh_msiof_spi_remove,
 	.id_table	= spi_driver_ids,
 	.driver		= {
 		.name		= "spi_sh_msiof",
+		.pm		= DEV_PM_OPS,
 		.of_match_table = of_match_ptr(sh_msiof_match),
 	},
 };

From 31a5fae4c5a009898da6d177901d5328051641ff Mon Sep 17 00:00:00 2001
From: Hiromitsu Yamasaki <hiromitsu.yamasaki.ym@renesas.com>
Date: Wed, 5 Sep 2018 10:49:37 +0200
Subject: [PATCH 023/499] spi: sh-msiof: Fix handling of write value for SISTR
 register

This patch changes writing to the SISTR register according to the H/W
user's manual.

The TDREQ bit and RDREQ bits of SISTR are read-only, and must be written
their initial values of zero.

Signed-off-by: Hiromitsu Yamasaki <hiromitsu.yamasaki.ym@renesas.com>
[geert: reword]
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org
---
 drivers/spi/spi-sh-msiof.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c
index bfe4e6d4f7bf..101cd6aae2ea 100644
--- a/drivers/spi/spi-sh-msiof.c
+++ b/drivers/spi/spi-sh-msiof.c
@@ -397,7 +397,8 @@ static void sh_msiof_spi_set_mode_regs(struct sh_msiof_spi_priv *p,
 
 static void sh_msiof_reset_str(struct sh_msiof_spi_priv *p)
 {
-	sh_msiof_write(p, STR, sh_msiof_read(p, STR));
+	sh_msiof_write(p, STR,
+		       sh_msiof_read(p, STR) & ~(STR_TDREQ | STR_RDREQ));
 }
 
 static void sh_msiof_spi_write_fifo_8(struct sh_msiof_spi_priv *p,

From c1ca59c22c56930b377a665fdd1b43351887830b Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 5 Sep 2018 10:49:38 +0200
Subject: [PATCH 024/499] spi: rspi: Fix invalid SPI use during system suspend

If the SPI queue is running during system suspend, the system may lock
up.

Fix this by stopping/restarting the queue during system suspend/resume,
by calling spi_master_suspend()/spi_master_resume() from the PM
callbacks.  In-kernel users will receive an -ESHUTDOWN error while
system suspend/resume is in progress.

Based on a patch for sh-msiof by Gaku Inami.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org
---
 drivers/spi/spi-rspi.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
index 95dc4d78618d..f93a4587e3fb 100644
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -1350,12 +1350,36 @@ static const struct platform_device_id spi_driver_ids[] = {
 
 MODULE_DEVICE_TABLE(platform, spi_driver_ids);
 
+#ifdef CONFIG_PM_SLEEP
+static int rspi_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct rspi_data *rspi = platform_get_drvdata(pdev);
+
+	return spi_master_suspend(rspi->master);
+}
+
+static int rspi_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct rspi_data *rspi = platform_get_drvdata(pdev);
+
+	return spi_master_resume(rspi->master);
+}
+
+static SIMPLE_DEV_PM_OPS(rspi_pm_ops, rspi_suspend, rspi_resume);
+#define DEV_PM_OPS	&rspi_pm_ops
+#else
+#define DEV_PM_OPS	NULL
+#endif /* CONFIG_PM_SLEEP */
+
 static struct platform_driver rspi_driver = {
 	.probe =	rspi_probe,
 	.remove =	rspi_remove,
 	.id_table =	spi_driver_ids,
 	.driver		= {
 		.name = "renesas_spi",
+		.pm = DEV_PM_OPS,
 		.of_match_table = of_match_ptr(rspi_of_match),
 	},
 };

From 8dbbaa47b96f6ea5f09f922b4effff3c505cd8cf Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 5 Sep 2018 10:49:39 +0200
Subject: [PATCH 025/499] spi: rspi: Fix interrupted DMA transfers

When interrupted, wait_event_interruptible_timeout() returns
-ERESTARTSYS, and the SPI transfer in progress will fail, as expected:

    m25p80 spi0.0: SPI transfer failed: -512
    spi_master spi0: failed to transfer one message from queue

However, as the underlying DMA transfers may not have completed, all
subsequent SPI transfers may start to fail:

    spi_master spi0: receive timeout
    qspi_transfer_out_in() returned -110
    m25p80 spi0.0: SPI transfer failed: -110
    spi_master spi0: failed to transfer one message from queue

Fix this by calling dmaengine_terminate_all() not only for timeouts, but
also for errors.

This can be reproduced on r8a7991/koelsch, using "hd /dev/mtd0" followed
by CTRL-C.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org
---
 drivers/spi/spi-rspi.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
index f93a4587e3fb..b37de1d991d6 100644
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -598,11 +598,13 @@ static int rspi_dma_transfer(struct rspi_data *rspi, struct sg_table *tx,
 
 	ret = wait_event_interruptible_timeout(rspi->wait,
 					       rspi->dma_callbacked, HZ);
-	if (ret > 0 && rspi->dma_callbacked)
+	if (ret > 0 && rspi->dma_callbacked) {
 		ret = 0;
-	else if (!ret) {
-		dev_err(&rspi->master->dev, "DMA timeout\n");
-		ret = -ETIMEDOUT;
+	} else {
+		if (!ret) {
+			dev_err(&rspi->master->dev, "DMA timeout\n");
+			ret = -ETIMEDOUT;
+		}
 		if (tx)
 			dmaengine_terminate_all(rspi->master->dma_tx);
 		if (rx)

From 88d0895d0ea9d4431507d576c963f2ff9918144d Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 31 Aug 2018 15:08:44 +0200
Subject: [PATCH 026/499] batman-adv: Avoid probe ELP information leak

The probe ELPs for WiFi interfaces are expanded to contain at least
BATADV_ELP_MIN_PROBE_SIZE bytes. This is usually a lot more than the
number of bytes which the template ELP packet requires.

These extra padding bytes were not initialized and thus could contain data
which were previously stored at the same location. It is therefore required
to set it to some predefined or random values to avoid leaking private
information from the system transmitting these kind of packets.

Fixes: e4623c913508 ("batman-adv: Avoid probe ELP information leak")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Acked-by: Antonio Quartulli <a@unstable.cc>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/bat_v_elp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 71c20c1d4002..e103c759b7ab 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -241,7 +241,7 @@ batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh)
 		 * the packet to be exactly of that size to make the link
 		 * throughput estimation effective.
 		 */
-		skb_put(skb, probe_len - hard_iface->bat_v.elp_skb->len);
+		skb_put_zero(skb, probe_len - hard_iface->bat_v.elp_skb->len);
 
 		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
 			   "Sending unicast (probe) ELP packet on interface %s to %pM\n",

From b9fd14c20871e6189f635e49b32d7789e430b3c8 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 31 Aug 2018 16:46:47 +0200
Subject: [PATCH 027/499] batman-adv: Fix segfault when writing to
 throughput_override

The per hardif sysfs file "batman_adv/throughput_override" prints the
resulting change as info text when the users writes to this file. It uses
the helper function batadv_info to add it at the same time to the kernel
ring buffer and to the batman-adv debug log (when CONFIG_BATMAN_ADV_DEBUG
is enabled).

The function batadv_info requires as first parameter the batman-adv softif
net_device. This parameter is then used to find the private buffer which
contains the debug log for this batman-adv interface. But
batadv_store_throughput_override used as first argument the slave
net_device. This slave device doesn't have the batadv_priv private data
which is access by batadv_info.

Writing to this file with CONFIG_BATMAN_ADV_DEBUG enabled can either lead
to a segfault or to memory corruption.

Fixes: 0b5ecc6811bd ("batman-adv: add throughput override attribute to hard_ifaces")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Acked-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/sysfs.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index f2eef43bd2ec..3a76e8970c02 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -1090,8 +1090,9 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj,
 	if (old_tp_override == tp_override)
 		goto out;
 
-	batadv_info(net_dev, "%s: Changing from: %u.%u MBit to: %u.%u MBit\n",
-		    "throughput_override",
+	batadv_info(hard_iface->soft_iface,
+		    "%s: %s: Changing from: %u.%u MBit to: %u.%u MBit\n",
+		    "throughput_override", net_dev->name,
 		    old_tp_override / 10, old_tp_override % 10,
 		    tp_override / 10, tp_override % 10);
 

From a25bab9d723a08bd0bdafb1529faf9094c690b70 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 31 Aug 2018 16:56:29 +0200
Subject: [PATCH 028/499] batman-adv: Fix segfault when writing to sysfs
 elp_interval

The per hardif sysfs file "batman_adv/elp_interval" is using the generic
functions to store/show uint values. The helper __batadv_store_uint_attr
requires the softif net_device as parameter to print the resulting change
as info text when the users writes to this file. It uses the helper
function batadv_info to add it at the same time to the kernel ring buffer
and to the batman-adv debug log (when CONFIG_BATMAN_ADV_DEBUG is enabled).

The function batadv_info requires as first parameter the batman-adv softif
net_device. This parameter is then used to find the private buffer which
contains the debug log for this batman-adv interface. But
batadv_store_throughput_override used as first argument the slave
net_device. This slave device doesn't have the batadv_priv private data
which is access by batadv_info.

Writing to this file with CONFIG_BATMAN_ADV_DEBUG enabled can either lead
to a segfault or to memory corruption.

Fixes: 0744ff8fa8fa ("batman-adv: Add hard_iface specific sysfs wrapper macros for UINT")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Acked-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/sysfs.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 3a76e8970c02..09427fc6494a 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -188,7 +188,8 @@ ssize_t batadv_store_##_name(struct kobject *kobj,			\
 									\
 	return __batadv_store_uint_attr(buff, count, _min, _max,	\
 					_post_func, attr,		\
-					&bat_priv->_var, net_dev);	\
+					&bat_priv->_var, net_dev,	\
+					NULL);	\
 }
 
 #define BATADV_ATTR_SIF_SHOW_UINT(_name, _var)				\
@@ -262,7 +263,9 @@ ssize_t batadv_store_##_name(struct kobject *kobj,			\
 									\
 	length = __batadv_store_uint_attr(buff, count, _min, _max,	\
 					  _post_func, attr,		\
-					  &hard_iface->_var, net_dev);	\
+					  &hard_iface->_var,		\
+					  hard_iface->soft_iface,	\
+					  net_dev);			\
 									\
 	batadv_hardif_put(hard_iface);				\
 	return length;							\
@@ -356,10 +359,12 @@ __batadv_store_bool_attr(char *buff, size_t count,
 
 static int batadv_store_uint_attr(const char *buff, size_t count,
 				  struct net_device *net_dev,
+				  struct net_device *slave_dev,
 				  const char *attr_name,
 				  unsigned int min, unsigned int max,
 				  atomic_t *attr)
 {
+	char ifname[IFNAMSIZ + 3] = "";
 	unsigned long uint_val;
 	int ret;
 
@@ -385,8 +390,11 @@ static int batadv_store_uint_attr(const char *buff, size_t count,
 	if (atomic_read(attr) == uint_val)
 		return count;
 
-	batadv_info(net_dev, "%s: Changing from: %i to: %lu\n",
-		    attr_name, atomic_read(attr), uint_val);
+	if (slave_dev)
+		snprintf(ifname, sizeof(ifname), "%s: ", slave_dev->name);
+
+	batadv_info(net_dev, "%s: %sChanging from: %i to: %lu\n",
+		    attr_name, ifname, atomic_read(attr), uint_val);
 
 	atomic_set(attr, uint_val);
 	return count;
@@ -397,12 +405,13 @@ static ssize_t __batadv_store_uint_attr(const char *buff, size_t count,
 					void (*post_func)(struct net_device *),
 					const struct attribute *attr,
 					atomic_t *attr_store,
-					struct net_device *net_dev)
+					struct net_device *net_dev,
+					struct net_device *slave_dev)
 {
 	int ret;
 
-	ret = batadv_store_uint_attr(buff, count, net_dev, attr->name, min, max,
-				     attr_store);
+	ret = batadv_store_uint_attr(buff, count, net_dev, slave_dev,
+				     attr->name, min, max, attr_store);
 	if (post_func && ret)
 		post_func(net_dev);
 
@@ -571,7 +580,7 @@ static ssize_t batadv_store_gw_sel_class(struct kobject *kobj,
 	return __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE,
 					batadv_post_gw_reselect, attr,
 					&bat_priv->gw.sel_class,
-					bat_priv->soft_iface);
+					bat_priv->soft_iface, NULL);
 }
 
 static ssize_t batadv_show_gw_bwidth(struct kobject *kobj,

From dff9bc42ab0b2d38c5e90ddd79b238fed5b4c7ad Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sun, 12 Aug 2018 21:04:41 +0200
Subject: [PATCH 029/499] batman-adv: Prevent duplicated gateway_node entry

The function batadv_gw_node_add is responsible for adding new gw_node to
the gateway_list. It is expecting that the caller already checked that
there is not already an entry with the same key or not.

But the lock for the list is only held when the list is really modified.
This could lead to duplicated entries because another context could create
an entry with the same key between the check and the list manipulation.

The check and the manipulation of the list must therefore be in the same
locked code section.

Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Acked-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/gateway_client.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 8b198ee798c9..140c61a3f1ec 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -32,6 +32,7 @@
 #include <linux/kernel.h>
 #include <linux/kref.h>
 #include <linux/list.h>
+#include <linux/lockdep.h>
 #include <linux/netdevice.h>
 #include <linux/netlink.h>
 #include <linux/rculist.h>
@@ -348,6 +349,9 @@ void batadv_gw_check_election(struct batadv_priv *bat_priv,
  * @bat_priv: the bat priv with all the soft interface information
  * @orig_node: originator announcing gateway capabilities
  * @gateway: announced bandwidth information
+ *
+ * Has to be called with the appropriate locks being acquired
+ * (gw.list_lock).
  */
 static void batadv_gw_node_add(struct batadv_priv *bat_priv,
 			       struct batadv_orig_node *orig_node,
@@ -355,6 +359,8 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
 {
 	struct batadv_gw_node *gw_node;
 
+	lockdep_assert_held(&bat_priv->gw.list_lock);
+
 	if (gateway->bandwidth_down == 0)
 		return;
 
@@ -369,10 +375,8 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
 	gw_node->bandwidth_down = ntohl(gateway->bandwidth_down);
 	gw_node->bandwidth_up = ntohl(gateway->bandwidth_up);
 
-	spin_lock_bh(&bat_priv->gw.list_lock);
 	kref_get(&gw_node->refcount);
 	hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.gateway_list);
-	spin_unlock_bh(&bat_priv->gw.list_lock);
 
 	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
 		   "Found new gateway %pM -> gw bandwidth: %u.%u/%u.%u MBit\n",
@@ -428,11 +432,14 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
 {
 	struct batadv_gw_node *gw_node, *curr_gw = NULL;
 
+	spin_lock_bh(&bat_priv->gw.list_lock);
 	gw_node = batadv_gw_node_get(bat_priv, orig_node);
 	if (!gw_node) {
 		batadv_gw_node_add(bat_priv, orig_node, gateway);
+		spin_unlock_bh(&bat_priv->gw.list_lock);
 		goto out;
 	}
+	spin_unlock_bh(&bat_priv->gw.list_lock);
 
 	if (gw_node->bandwidth_down == ntohl(gateway->bandwidth_down) &&
 	    gw_node->bandwidth_up == ntohl(gateway->bandwidth_up))

From fa122fec8640eb7186ce5a41b83a4c1744ceef8f Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sun, 12 Aug 2018 21:04:42 +0200
Subject: [PATCH 030/499] batman-adv: Prevent duplicated nc_node entry

The function batadv_nc_get_nc_node is responsible for adding new nc_nodes
to the in_coding_list and out_coding_list. It first checks whether the
entry already is in the list or not. If it is, then the creation of a new
entry is aborted.

But the lock for the list is only held when the list is really modified.
This could lead to duplicated entries because another context could create
an entry with the same key between the check and the list manipulation.

The check and the manipulation of the list must therefore be in the same
locked code section.

Fixes: d56b1705e28c ("batman-adv: network coding - detect coding nodes and remove these after timeout")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Acked-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/network-coding.c | 41 ++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index c3578444f3cb..34caf129a9bf 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -854,24 +854,6 @@ batadv_nc_get_nc_node(struct batadv_priv *bat_priv,
 	spinlock_t *lock; /* Used to lock list selected by "int in_coding" */
 	struct list_head *list;
 
-	/* Check if nc_node is already added */
-	nc_node = batadv_nc_find_nc_node(orig_node, orig_neigh_node, in_coding);
-
-	/* Node found */
-	if (nc_node)
-		return nc_node;
-
-	nc_node = kzalloc(sizeof(*nc_node), GFP_ATOMIC);
-	if (!nc_node)
-		return NULL;
-
-	/* Initialize nc_node */
-	INIT_LIST_HEAD(&nc_node->list);
-	kref_init(&nc_node->refcount);
-	ether_addr_copy(nc_node->addr, orig_node->orig);
-	kref_get(&orig_neigh_node->refcount);
-	nc_node->orig_node = orig_neigh_node;
-
 	/* Select ingoing or outgoing coding node */
 	if (in_coding) {
 		lock = &orig_neigh_node->in_coding_list_lock;
@@ -881,13 +863,34 @@ batadv_nc_get_nc_node(struct batadv_priv *bat_priv,
 		list = &orig_neigh_node->out_coding_list;
 	}
 
+	spin_lock_bh(lock);
+
+	/* Check if nc_node is already added */
+	nc_node = batadv_nc_find_nc_node(orig_node, orig_neigh_node, in_coding);
+
+	/* Node found */
+	if (nc_node)
+		goto unlock;
+
+	nc_node = kzalloc(sizeof(*nc_node), GFP_ATOMIC);
+	if (!nc_node)
+		goto unlock;
+
+	/* Initialize nc_node */
+	INIT_LIST_HEAD(&nc_node->list);
+	kref_init(&nc_node->refcount);
+	ether_addr_copy(nc_node->addr, orig_node->orig);
+	kref_get(&orig_neigh_node->refcount);
+	nc_node->orig_node = orig_neigh_node;
+
 	batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_node %pM -> %pM\n",
 		   nc_node->addr, nc_node->orig_node->orig);
 
 	/* Add nc_node to orig_node */
-	spin_lock_bh(lock);
 	kref_get(&nc_node->refcount);
 	list_add_tail_rcu(&nc_node->list, list);
+
+unlock:
 	spin_unlock_bh(lock);
 
 	return nc_node;

From 94cb82f594ed86be303398d6dfc7640a6f1d45d4 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sun, 12 Aug 2018 21:04:43 +0200
Subject: [PATCH 031/499] batman-adv: Prevent duplicated softif_vlan entry

The function batadv_softif_vlan_get is responsible for adding new
softif_vlan to the softif_vlan_list. It first checks whether the entry
already is in the list or not. If it is, then the creation of a new entry
is aborted.

But the lock for the list is only held when the list is really modified.
This could lead to duplicated entries because another context could create
an entry with the same key between the check and the list manipulation.

The check and the manipulation of the list must therefore be in the same
locked code section.

Fixes: 5d2c05b21337 ("batman-adv: add per VLAN interface attribute framework")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/soft-interface.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 1485263a348b..626ddca332db 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -574,15 +574,20 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
 	struct batadv_softif_vlan *vlan;
 	int err;
 
+	spin_lock_bh(&bat_priv->softif_vlan_list_lock);
+
 	vlan = batadv_softif_vlan_get(bat_priv, vid);
 	if (vlan) {
 		batadv_softif_vlan_put(vlan);
+		spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
 		return -EEXIST;
 	}
 
 	vlan = kzalloc(sizeof(*vlan), GFP_ATOMIC);
-	if (!vlan)
+	if (!vlan) {
+		spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
 		return -ENOMEM;
+	}
 
 	vlan->bat_priv = bat_priv;
 	vlan->vid = vid;
@@ -590,17 +595,23 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
 
 	atomic_set(&vlan->ap_isolation, 0);
 
-	err = batadv_sysfs_add_vlan(bat_priv->soft_iface, vlan);
-	if (err) {
-		kfree(vlan);
-		return err;
-	}
-
-	spin_lock_bh(&bat_priv->softif_vlan_list_lock);
 	kref_get(&vlan->refcount);
 	hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list);
 	spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
 
+	/* batadv_sysfs_add_vlan cannot be in the spinlock section due to the
+	 * sleeping behavior of the sysfs functions and the fs_reclaim lock
+	 */
+	err = batadv_sysfs_add_vlan(bat_priv->soft_iface, vlan);
+	if (err) {
+		/* ref for the function */
+		batadv_softif_vlan_put(vlan);
+
+		/* ref for the list */
+		batadv_softif_vlan_put(vlan);
+		return err;
+	}
+
 	/* add a new TT local entry. This one will be marked with the NOPURGE
 	 * flag
 	 */

From e7136e48ffdfb9f37b0820f619380485eb407361 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sun, 12 Aug 2018 21:04:44 +0200
Subject: [PATCH 032/499] batman-adv: Prevent duplicated global TT entry

The function batadv_tt_global_orig_entry_add is responsible for adding new
tt_orig_list_entry to the orig_list. It first checks whether the entry
already is in the list or not. If it is, then the creation of a new entry
is aborted.

But the lock for the list is only held when the list is really modified.
This could lead to duplicated entries because another context could create
an entry with the same key between the check and the list manipulation.

The check and the manipulation of the list must therefore be in the same
locked code section.

Fixes: d657e621a0f5 ("batman-adv: add reference counting for type batadv_tt_orig_list_entry")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/translation-table.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 12a2b7d21376..d21624c44665 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1613,6 +1613,8 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
 {
 	struct batadv_tt_orig_list_entry *orig_entry;
 
+	spin_lock_bh(&tt_global->list_lock);
+
 	orig_entry = batadv_tt_global_orig_entry_find(tt_global, orig_node);
 	if (orig_entry) {
 		/* refresh the ttvn: the current value could be a bogus one that
@@ -1635,11 +1637,9 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
 	orig_entry->flags = flags;
 	kref_init(&orig_entry->refcount);
 
-	spin_lock_bh(&tt_global->list_lock);
 	kref_get(&orig_entry->refcount);
 	hlist_add_head_rcu(&orig_entry->list,
 			   &tt_global->orig_list);
-	spin_unlock_bh(&tt_global->list_lock);
 	atomic_inc(&tt_global->orig_list_count);
 
 sync_flags:
@@ -1647,6 +1647,8 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
 out:
 	if (orig_entry)
 		batadv_tt_orig_list_entry_put(orig_entry);
+
+	spin_unlock_bh(&tt_global->list_lock);
 }
 
 /**

From ae3cdc97dc10c7a3b31f297dab429bfb774c9ccb Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sun, 12 Aug 2018 21:04:45 +0200
Subject: [PATCH 033/499] batman-adv: Prevent duplicated tvlv handler

The function batadv_tvlv_handler_register is responsible for adding new
tvlv_handler to the handler_list. It first checks whether the entry
already is in the list or not. If it is, then the creation of a new entry
is aborted.

But the lock for the list is only held when the list is really modified.
This could lead to duplicated entries because another context could create
an entry with the same key between the check and the list manipulation.

The check and the manipulation of the list must therefore be in the same
locked code section.

Fixes: ef26157747d4 ("batman-adv: tvlv - basic infrastructure")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/tvlv.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index a637458205d1..40e69c9346d2 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -529,15 +529,20 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
 {
 	struct batadv_tvlv_handler *tvlv_handler;
 
+	spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
+
 	tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version);
 	if (tvlv_handler) {
+		spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
 		batadv_tvlv_handler_put(tvlv_handler);
 		return;
 	}
 
 	tvlv_handler = kzalloc(sizeof(*tvlv_handler), GFP_ATOMIC);
-	if (!tvlv_handler)
+	if (!tvlv_handler) {
+		spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
 		return;
+	}
 
 	tvlv_handler->ogm_handler = optr;
 	tvlv_handler->unicast_handler = uptr;
@@ -547,7 +552,6 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
 	kref_init(&tvlv_handler->refcount);
 	INIT_HLIST_NODE(&tvlv_handler->list);
 
-	spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
 	kref_get(&tvlv_handler->refcount);
 	hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list);
 	spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);

From 5af96b9c59c72fb2af2d19c5cc2f3cdcee391dff Mon Sep 17 00:00:00 2001
From: Marek Lindner <mareklindner@neomailbox.ch>
Date: Fri, 7 Sep 2018 05:45:54 +0800
Subject: [PATCH 034/499] batman-adv: fix backbone_gw refcount on queue_work()
 failure

The backbone_gw refcounter is to be decreased by the queued work and
currently is never decreased if the queue_work() call fails.
Fix by checking the queue_work() return value and decrease refcount
if necessary.

Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/bridge_loop_avoidance.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index ff9659af6b91..5f1aeeded0e3 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1772,6 +1772,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb,
 {
 	struct batadv_bla_backbone_gw *backbone_gw;
 	struct ethhdr *ethhdr;
+	bool ret;
 
 	ethhdr = eth_hdr(skb);
 
@@ -1795,8 +1796,13 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb,
 	if (unlikely(!backbone_gw))
 		return true;
 
-	queue_work(batadv_event_workqueue, &backbone_gw->report_work);
-	/* backbone_gw is unreferenced in the report work function function */
+	ret = queue_work(batadv_event_workqueue, &backbone_gw->report_work);
+
+	/* backbone_gw is unreferenced in the report work function function
+	 * if queue_work() call was successful
+	 */
+	if (!ret)
+		batadv_backbone_gw_put(backbone_gw);
 
 	return true;
 }

From 4c4af6900844ab04c9434c972021d7b48610e06a Mon Sep 17 00:00:00 2001
From: Marek Lindner <mareklindner@neomailbox.ch>
Date: Fri, 7 Sep 2018 05:45:55 +0800
Subject: [PATCH 035/499] batman-adv: fix hardif_neigh refcount on queue_work()
 failure

The hardif_neigh refcounter is to be decreased by the queued work and
currently is never decreased if the queue_work() call fails.
Fix by checking the queue_work() return value and decrease refcount
if necessary.

Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/bat_v_elp.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index e103c759b7ab..9f481cfdf77d 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -268,6 +268,7 @@ static void batadv_v_elp_periodic_work(struct work_struct *work)
 	struct batadv_priv *bat_priv;
 	struct sk_buff *skb;
 	u32 elp_interval;
+	bool ret;
 
 	bat_v = container_of(work, struct batadv_hard_iface_bat_v, elp_wq.work);
 	hard_iface = container_of(bat_v, struct batadv_hard_iface, bat_v);
@@ -329,8 +330,11 @@ static void batadv_v_elp_periodic_work(struct work_struct *work)
 		 * may sleep and that is not allowed in an rcu protected
 		 * context. Therefore schedule a task for that.
 		 */
-		queue_work(batadv_event_workqueue,
-			   &hardif_neigh->bat_v.metric_work);
+		ret = queue_work(batadv_event_workqueue,
+				 &hardif_neigh->bat_v.metric_work);
+
+		if (!ret)
+			batadv_hardif_neigh_put(hardif_neigh);
 	}
 	rcu_read_unlock();
 

From 119f94a6fefcc76d47075b83d2b73d04c895df78 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@codeaurora.org>
Date: Wed, 5 Sep 2018 18:52:22 +0300
Subject: [PATCH 036/499] cfg80211: Address some corner cases in scan result
 channel updating

cfg80211_get_bss_channel() is used to update the RX channel based on the
available frame payload information (channel number from DSSS Parameter
Set element or HT Operation element). This is needed on 2.4 GHz channels
where frames may be received on neighboring channels due to overlapping
frequency range.

This might of some use on the 5 GHz band in some corner cases, but
things are more complex there since there is no n:1 or 1:n mapping
between channel numbers and frequencies due to multiple different
starting frequencies in different operating classes. This could result
in ieee80211_channel_to_frequency() returning incorrect frequency and
ieee80211_get_channel() returning incorrect channel information (or
indication of no match). In the previous implementation, this could
result in some scan results being dropped completely, e.g., for the 4.9
GHz channels. That prevented connection to such BSSs.

Fix this by using the driver-provided channel pointer if
ieee80211_get_channel() does not find matching channel data for the
channel number in the frame payload and if the scan is done with 5 MHz
or 10 MHz channel bandwidth. While doing this, also add comments
describing what the function is trying to achieve to make it easier to
understand what happens here and why.

Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/scan.c | 58 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 49 insertions(+), 9 deletions(-)

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index d36c3eb7b931..d0e7472dd9fd 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1058,13 +1058,23 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
 	return NULL;
 }
 
+/*
+ * Update RX channel information based on the available frame payload
+ * information. This is mainly for the 2.4 GHz band where frames can be received
+ * from neighboring channels and the Beacon frames use the DSSS Parameter Set
+ * element to indicate the current (transmitting) channel, but this might also
+ * be needed on other bands if RX frequency does not match with the actual
+ * operating channel of a BSS.
+ */
 static struct ieee80211_channel *
 cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen,
-			 struct ieee80211_channel *channel)
+			 struct ieee80211_channel *channel,
+			 enum nl80211_bss_scan_width scan_width)
 {
 	const u8 *tmp;
 	u32 freq;
 	int channel_number = -1;
+	struct ieee80211_channel *alt_channel;
 
 	tmp = cfg80211_find_ie(WLAN_EID_DS_PARAMS, ie, ielen);
 	if (tmp && tmp[1] == 1) {
@@ -1078,16 +1088,45 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen,
 		}
 	}
 
-	if (channel_number < 0)
+	if (channel_number < 0) {
+		/* No channel information in frame payload */
 		return channel;
+	}
 
 	freq = ieee80211_channel_to_frequency(channel_number, channel->band);
-	channel = ieee80211_get_channel(wiphy, freq);
-	if (!channel)
+	alt_channel = ieee80211_get_channel(wiphy, freq);
+	if (!alt_channel) {
+		if (channel->band == NL80211_BAND_2GHZ) {
+			/*
+			 * Better not allow unexpected channels when that could
+			 * be going beyond the 1-11 range (e.g., discovering
+			 * BSS on channel 12 when radio is configured for
+			 * channel 11.
+			 */
+			return NULL;
+		}
+
+		/* No match for the payload channel number - ignore it */
+		return channel;
+	}
+
+	if (scan_width == NL80211_BSS_CHAN_WIDTH_10 ||
+	    scan_width == NL80211_BSS_CHAN_WIDTH_5) {
+		/*
+		 * Ignore channel number in 5 and 10 MHz channels where there
+		 * may not be an n:1 or 1:n mapping between frequencies and
+		 * channel numbers.
+		 */
+		return channel;
+	}
+
+	/*
+	 * Use the channel determined through the payload channel number
+	 * instead of the RX channel reported by the driver.
+	 */
+	if (alt_channel->flags & IEEE80211_CHAN_DISABLED)
 		return NULL;
-	if (channel->flags & IEEE80211_CHAN_DISABLED)
-		return NULL;
-	return channel;
+	return alt_channel;
 }
 
 /* Returned bss is reference counted and must be cleaned up appropriately. */
@@ -1112,7 +1151,8 @@ cfg80211_inform_bss_data(struct wiphy *wiphy,
 		    (data->signal < 0 || data->signal > 100)))
 		return NULL;
 
-	channel = cfg80211_get_bss_channel(wiphy, ie, ielen, data->chan);
+	channel = cfg80211_get_bss_channel(wiphy, ie, ielen, data->chan,
+					   data->scan_width);
 	if (!channel)
 		return NULL;
 
@@ -1210,7 +1250,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
 		return NULL;
 
 	channel = cfg80211_get_bss_channel(wiphy, mgmt->u.beacon.variable,
-					   ielen, data->chan);
+					   ielen, data->chan, data->scan_width);
 	if (!channel)
 		return NULL;
 

From cb59bc14e830028d2244861216df038165d7625d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 5 Sep 2018 13:34:02 +0200
Subject: [PATCH 037/499] mac80211: TDLS: fix skb queue/priority assignment

If the TDLS setup happens over a connection to an AP that
doesn't have QoS, we nevertheless assign a non-zero TID
(skb->priority) and queue mapping, which may confuse us or
drivers later.

Fix it by just assigning the special skb->priority and then
using ieee80211_select_queue() just like other data frames
would go through.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tdls.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 5cd5e6e5834e..6c647f425e05 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -16,6 +16,7 @@
 #include "ieee80211_i.h"
 #include "driver-ops.h"
 #include "rate.h"
+#include "wme.h"
 
 /* give usermode some time for retries in setting up the TDLS session */
 #define TDLS_PEER_SETUP_TIMEOUT	(15 * HZ)
@@ -1010,14 +1011,13 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev,
 	switch (action_code) {
 	case WLAN_TDLS_SETUP_REQUEST:
 	case WLAN_TDLS_SETUP_RESPONSE:
-		skb_set_queue_mapping(skb, IEEE80211_AC_BK);
-		skb->priority = 2;
+		skb->priority = 256 + 2;
 		break;
 	default:
-		skb_set_queue_mapping(skb, IEEE80211_AC_VI);
-		skb->priority = 5;
+		skb->priority = 256 + 5;
 		break;
 	}
+	skb_set_queue_mapping(skb, ieee80211_select_queue(sdata, skb));
 
 	/*
 	 * Set the WLAN_TDLS_TEARDOWN flag to indicate a teardown in progress.

From c42055105785580563535e6d3143cad95c7ac7ee Mon Sep 17 00:00:00 2001
From: Yuan-Chi Pang <fu3mo6goo@gmail.com>
Date: Thu, 6 Sep 2018 16:57:48 +0800
Subject: [PATCH 038/499] mac80211: fix TX status reporting for ieee80211s

TX status reporting to ieee80211s is through ieee80211s_update_metric.
There are two problems about ieee80211s_update_metric:

1. The purpose is to estimate the fail probability
to a specific link. No need to restrict to data frame.

2. Current implementation does not work if wireless driver does not
pass tx_status with skb.

Fix this by removing ieee80211_is_data condition, passing
ieee80211_tx_status directly to ieee80211s_update_metric, and
putting it in both __ieee80211_tx_status and ieee80211_tx_status_ext.

Signed-off-by: Yuan-Chi Pang <fu3mo6goo@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mesh.h      | 3 ++-
 net/mac80211/mesh_hwmp.c | 9 +++------
 net/mac80211/status.c    | 4 +++-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index ee56f18cad3f..21526630bf65 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -217,7 +217,8 @@ void mesh_rmc_free(struct ieee80211_sub_if_data *sdata);
 int mesh_rmc_init(struct ieee80211_sub_if_data *sdata);
 void ieee80211s_init(void);
 void ieee80211s_update_metric(struct ieee80211_local *local,
-			      struct sta_info *sta, struct sk_buff *skb);
+			      struct sta_info *sta,
+			      struct ieee80211_tx_status *st);
 void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata);
 void ieee80211_mesh_teardown_sdata(struct ieee80211_sub_if_data *sdata);
 int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index daf9db3c8f24..6950cd0bf594 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -295,15 +295,12 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata,
 }
 
 void ieee80211s_update_metric(struct ieee80211_local *local,
-		struct sta_info *sta, struct sk_buff *skb)
+			      struct sta_info *sta,
+			      struct ieee80211_tx_status *st)
 {
-	struct ieee80211_tx_info *txinfo = IEEE80211_SKB_CB(skb);
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	struct ieee80211_tx_info *txinfo = st->info;
 	int failed;
 
-	if (!ieee80211_is_data(hdr->frame_control))
-		return;
-
 	failed = !(txinfo->flags & IEEE80211_TX_STAT_ACK);
 
 	/* moving average, scaled to 100.
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 001a869c059c..91d7c0cd1882 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -808,7 +808,7 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
 
 		rate_control_tx_status(local, sband, status);
 		if (ieee80211_vif_is_mesh(&sta->sdata->vif))
-			ieee80211s_update_metric(local, sta, skb);
+			ieee80211s_update_metric(local, sta, status);
 
 		if (!(info->flags & IEEE80211_TX_CTL_INJECTED) && acked)
 			ieee80211_frame_acked(sta, skb);
@@ -969,6 +969,8 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw,
 		}
 
 		rate_control_tx_status(local, sband, status);
+		if (ieee80211_vif_is_mesh(&sta->sdata->vif))
+			ieee80211s_update_metric(local, sta, status);
 	}
 
 	if (acked || noack_success) {

From 9e1437937807b0122e8da1ca8765be2adca9aee6 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 11 Sep 2018 10:31:15 +0200
Subject: [PATCH 039/499] xfrm: Fix NULL pointer dereference when skb_dst_force
 clears the dst_entry.

Since commit 222d7dbd258d ("net: prevent dst uses after free")
skb_dst_force() might clear the dst_entry attached to the skb.
The xfrm code don't expect this to happen, so we crash with
a NULL pointer dereference in this case. Fix it by checking
skb_dst(skb) for NULL after skb_dst_force() and drop the packet
in cast the dst_entry was cleared.

Fixes: 222d7dbd258d ("net: prevent dst uses after free")
Reported-by: Tobias Hommel <netdev-list@genoetigt.de>
Reported-by: Kristian Evensen <kristian.evensen@gmail.com>
Reported-by: Wolfgang Walter <linux@stwm.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_output.c | 4 ++++
 net/xfrm/xfrm_policy.c | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 89b178a78dc7..36d15a38ce5e 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -101,6 +101,10 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 		spin_unlock_bh(&x->lock);
 
 		skb_dst_force(skb);
+		if (!skb_dst(skb)) {
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
+			goto error_nolock;
+		}
 
 		if (xfrm_offload(skb)) {
 			x->type_offload->encap(x, skb);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7c5e8978aeaa..626e0f4d1749 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2548,6 +2548,10 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 	}
 
 	skb_dst_force(skb);
+	if (!skb_dst(skb)) {
+		XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
+		return 0;
+	}
 
 	dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE);
 	if (IS_ERR(dst)) {

From 9f34519a82356f6cf0ccb8480ee0ed99b3d0af75 Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Fri, 31 Aug 2018 11:52:00 -0700
Subject: [PATCH 040/499] cxgb4: fix abort_req_rss6 struct

Remove the incorrect WR_HDR field which can cause a misinterpretation
of ABORT CPL by ULDs, such as iw_cxgb4.

Fixes: a3cdaa69e4ae ("cxgb4: Adds CPL support for Shared Receive Queues")
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/net/ethernet/chelsio/cxgb4/t4_msg.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index b8f75a22fb6c..f152da1ce046 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -753,7 +753,6 @@ struct cpl_abort_req_rss {
 };
 
 struct cpl_abort_req_rss6 {
-	WR_HDR;
 	union opcode_tid ot;
 	__be32 srqidx_status;
 };

From b90ca5cc32f59bb214847c6855959702f00c6801 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 11 Sep 2018 21:27:44 -0700
Subject: [PATCH 041/499] filesystem-dax: Fix use of zero page

Use my_zero_pfn instead of ZERO_PAGE(), and pass the vaddr to it instead
of zero so it works on MIPS and s390 who reference the vaddr to select a
zero page.

Cc: <stable@vger.kernel.org>
Fixes: 91d25ba8a6b0 ("dax: use common 4k zero page for dax mmap reads")
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index f32d7125ad0f..b68ce484e1be 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1120,21 +1120,12 @@ static vm_fault_t dax_load_hole(struct address_space *mapping, void *entry,
 {
 	struct inode *inode = mapping->host;
 	unsigned long vaddr = vmf->address;
-	vm_fault_t ret = VM_FAULT_NOPAGE;
-	struct page *zero_page;
-	pfn_t pfn;
+	pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr));
+	vm_fault_t ret;
 
-	zero_page = ZERO_PAGE(0);
-	if (unlikely(!zero_page)) {
-		ret = VM_FAULT_OOM;
-		goto out;
-	}
-
-	pfn = page_to_pfn_t(zero_page);
 	dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_ZERO_PAGE,
 			false);
 	ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
-out:
 	trace_dax_load_hole(inode, vmf, ret);
 	return ret;
 }

From 379e36d3f591cb62e781907edafea17d75e8c1fa Mon Sep 17 00:00:00 2001
From: Ludovic Desroches <ludovic.desroches@microchip.com>
Date: Fri, 7 Sep 2018 10:18:51 +0200
Subject: [PATCH 042/499] ARM: dts: at91: sama5d2_ptc_ek: fix nand pinctrl

The drive strength has to be set to medium otherwise some data
corruption may happen.

Signed-off-by: Ludovic Desroches <ludovic.desroches@microchip.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts b/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts
index b10dccd0958f..3b1baa8605a7 100644
--- a/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts
+++ b/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts
@@ -11,6 +11,7 @@
 #include "sama5d2-pinfunc.h"
 #include <dt-bindings/mfd/atmel-flexcom.h>
 #include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/pinctrl/at91.h>
 
 / {
 	model = "Atmel SAMA5D2 PTC EK";
@@ -299,6 +300,7 @@ re_we_data {
 							 <PIN_PA30__NWE_NANDWE>,
 							 <PIN_PB2__NRD_NANDOE>;
 						bias-pull-up;
+						atmel,drive-strength = <ATMEL_PIO_DRVSTR_ME>;
 					};
 
 					ale_cle_rdy_cs {

From 9d2ac5f4bec00a0697ab77c999ef38753b3cb900 Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo+renesas@jmondi.org>
Date: Fri, 3 Aug 2018 12:40:58 +0200
Subject: [PATCH 043/499] media: i2c: mt9v111: Fix v4l2-ctrl error handling

Fix error handling of v4l2_ctrl creation by inspecting the ctrl.error flag
instead of testing for each returned value correctness.

As reported by Dan Carpenter returning PTR_ERR() on the v4l2_ctrl_new_std()
return value is also wrong, as that function return NULL on error.

While at there re-order the cleanup path to respect the operation inverse
order.

Fixes: aab7ed1c "media: i2c: Add driver for Aptina MT9V111"
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jacopo Mondi <jacopo+renesas@jmondi.org>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
---
 drivers/media/i2c/mt9v111.c | 41 ++++++++++++-------------------------
 1 file changed, 13 insertions(+), 28 deletions(-)

diff --git a/drivers/media/i2c/mt9v111.c b/drivers/media/i2c/mt9v111.c
index b5410aeb5fe2..bb41bea950ac 100644
--- a/drivers/media/i2c/mt9v111.c
+++ b/drivers/media/i2c/mt9v111.c
@@ -1159,41 +1159,21 @@ static int mt9v111_probe(struct i2c_client *client)
 					      V4L2_CID_AUTO_WHITE_BALANCE,
 					      0, 1, 1,
 					      V4L2_WHITE_BALANCE_AUTO);
-	if (IS_ERR_OR_NULL(mt9v111->auto_awb)) {
-		ret = PTR_ERR(mt9v111->auto_awb);
-		goto error_free_ctrls;
-	}
-
 	mt9v111->auto_exp = v4l2_ctrl_new_std_menu(&mt9v111->ctrls,
 						   &mt9v111_ctrl_ops,
 						   V4L2_CID_EXPOSURE_AUTO,
 						   V4L2_EXPOSURE_MANUAL,
 						   0, V4L2_EXPOSURE_AUTO);
-	if (IS_ERR_OR_NULL(mt9v111->auto_exp)) {
-		ret = PTR_ERR(mt9v111->auto_exp);
-		goto error_free_ctrls;
-	}
-
-	/* Initialize timings */
 	mt9v111->hblank = v4l2_ctrl_new_std(&mt9v111->ctrls, &mt9v111_ctrl_ops,
 					    V4L2_CID_HBLANK,
 					    MT9V111_CORE_R05_MIN_HBLANK,
 					    MT9V111_CORE_R05_MAX_HBLANK, 1,
 					    MT9V111_CORE_R05_DEF_HBLANK);
-	if (IS_ERR_OR_NULL(mt9v111->hblank)) {
-		ret = PTR_ERR(mt9v111->hblank);
-		goto error_free_ctrls;
-	}
-
 	mt9v111->vblank = v4l2_ctrl_new_std(&mt9v111->ctrls, &mt9v111_ctrl_ops,
 					    V4L2_CID_VBLANK,
 					    MT9V111_CORE_R06_MIN_VBLANK,
 					    MT9V111_CORE_R06_MAX_VBLANK, 1,
 					    MT9V111_CORE_R06_DEF_VBLANK);
-	if (IS_ERR_OR_NULL(mt9v111->vblank)) {
-		ret = PTR_ERR(mt9v111->vblank);
-		goto error_free_ctrls;
-	}
 
 	/* PIXEL_RATE is fixed: just expose it to user space. */
 	v4l2_ctrl_new_std(&mt9v111->ctrls, &mt9v111_ctrl_ops,
@@ -1201,6 +1181,10 @@ static int mt9v111_probe(struct i2c_client *client)
 			  DIV_ROUND_CLOSEST(mt9v111->sysclk, 2), 1,
 			  DIV_ROUND_CLOSEST(mt9v111->sysclk, 2));
 
+	if (mt9v111->ctrls.error) {
+		ret = mt9v111->ctrls.error;
+		goto error_free_ctrls;
+	}
 	mt9v111->sd.ctrl_handler = &mt9v111->ctrls;
 
 	/* Start with default configuration: 640x480 UYVY. */
@@ -1226,26 +1210,27 @@ static int mt9v111_probe(struct i2c_client *client)
 	mt9v111->pad.flags	= MEDIA_PAD_FL_SOURCE;
 	ret = media_entity_pads_init(&mt9v111->sd.entity, 1, &mt9v111->pad);
 	if (ret)
-		goto error_free_ctrls;
+		goto error_free_entity;
 #endif
 
 	ret = mt9v111_chip_probe(mt9v111);
 	if (ret)
-		goto error_free_ctrls;
+		goto error_free_entity;
 
 	ret = v4l2_async_register_subdev(&mt9v111->sd);
 	if (ret)
-		goto error_free_ctrls;
+		goto error_free_entity;
 
 	return 0;
 
-error_free_ctrls:
-	v4l2_ctrl_handler_free(&mt9v111->ctrls);
-
+error_free_entity:
 #if IS_ENABLED(CONFIG_MEDIA_CONTROLLER)
 	media_entity_cleanup(&mt9v111->sd.entity);
 #endif
 
+error_free_ctrls:
+	v4l2_ctrl_handler_free(&mt9v111->ctrls);
+
 	mutex_destroy(&mt9v111->pwr_mutex);
 	mutex_destroy(&mt9v111->stream_mutex);
 
@@ -1259,12 +1244,12 @@ static int mt9v111_remove(struct i2c_client *client)
 
 	v4l2_async_unregister_subdev(sd);
 
-	v4l2_ctrl_handler_free(&mt9v111->ctrls);
-
 #if IS_ENABLED(CONFIG_MEDIA_CONTROLLER)
 	media_entity_cleanup(&sd->entity);
 #endif
 
+	v4l2_ctrl_handler_free(&mt9v111->ctrls);
+
 	mutex_destroy(&mt9v111->pwr_mutex);
 	mutex_destroy(&mt9v111->stream_mutex);
 

From dab99a0f77e9764a00b0203d0155ca134247f461 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Thu, 30 Aug 2018 06:18:22 -0400
Subject: [PATCH 044/499] media: staging/media/mt9t031/Kconfig: remove bogus
 entry

The 'config SOC_CAMERA_IMX074' is a copy-and-paste error and should be removed.
This Kconfig is for mt9t031 only.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/staging/media/mt9t031/Kconfig | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/staging/media/mt9t031/Kconfig b/drivers/staging/media/mt9t031/Kconfig
index f48e06a03cdb..9a58aaf72edd 100644
--- a/drivers/staging/media/mt9t031/Kconfig
+++ b/drivers/staging/media/mt9t031/Kconfig
@@ -1,9 +1,3 @@
-config SOC_CAMERA_IMX074
-	tristate "imx074 support (DEPRECATED)"
-	depends on SOC_CAMERA && I2C
-	help
-	  This driver supports IMX074 cameras from Sony
-
 config SOC_CAMERA_MT9T031
 	tristate "mt9t031 support (DEPRECATED)"
 	depends on SOC_CAMERA && I2C

From 67e3816842fe6414d629c7515b955952ec40c7d7 Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Fri, 31 Aug 2018 07:16:03 -0700
Subject: [PATCH 045/499] RDMA/uverbs: Atomically flush and mark closed the
 comp event queue

Currently a uverbs completion event queue is flushed of events in
ib_uverbs_comp_event_close() with the queue spinlock held and then
released.  Yet setting ev_queue->is_closed is not set until later in
uverbs_hot_unplug_completion_event_file().

In between the time ib_uverbs_comp_event_close() releases the lock and
uverbs_hot_unplug_completion_event_file() acquires the lock, a completion
event can arrive and be inserted into the event queue by
ib_uverbs_comp_handler().

This can cause a "double add" list_add warning or crash depending on the
kernel configuration, or a memory leak because the event is never dequeued
since the queue is already closed down.

So add setting ev_queue->is_closed = 1 to ib_uverbs_comp_event_close().

Cc: stable@vger.kernel.org
Fixes: 1e7710f3f656 ("IB/core: Change completion channel to use the reworked objects schema")
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/uverbs_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 6d974e2363df..50152c1b1004 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -440,6 +440,7 @@ static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp)
 			list_del(&entry->obj_list);
 		kfree(entry);
 	}
+	file->ev_queue.is_closed = 1;
 	spin_unlock_irq(&file->ev_queue.lock);
 
 	uverbs_close_fd(filp);

From dfb06cba8c73c0704710b2e3fbe2c35ac66a01b4 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Wed, 5 Sep 2018 13:31:40 -0700
Subject: [PATCH 046/499] uaccess: Fix is_source param for check_copy_size() in
 copy_to_iter_mcsafe()

copy_to_iter_mcsafe() is passing in the is_source parameter as "false"
to check_copy_size(). This is different than what copy_to_iter() does.
Also, the addr parameter passed to check_copy_size() is the source so
therefore we should be passing in "true" instead.

Fixes: 8780356ef630 ("x86/asm/memcpy_mcsafe: Define copy_to_iter_mcsafe()")
Cc: <stable@vger.kernel.org>
Reported-by: Fan Du <fan.du@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Vishal Verma <vishal.l.verma@intel.com>
Reported-by: Wenwei Tao <wenwei.tww@alibaba-inc.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/uio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 409c845d4cd3..422b1c01ee0d 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -172,7 +172,7 @@ size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
 static __always_inline __must_check
 size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i)
 {
-	if (unlikely(!check_copy_size(addr, bytes, false)))
+	if (unlikely(!check_copy_size(addr, bytes, true)))
 		return 0;
 	else
 		return _copy_to_iter_mcsafe(addr, bytes, i);

From 3b7d96a0dbb6b630878597a1838fc39f808b761b Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Wed, 8 Aug 2018 18:44:59 +0530
Subject: [PATCH 047/499] clocksource/drivers/ti-32k: Add
 CLOCK_SOURCE_SUSPEND_NONSTOP flag for non-am43 SoCs

The 32k clocksource is NONSTOP for non-am43 SoCs. Hence
add the flag for all the other SoCs.

Reported-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Keerthy <j-keerthy@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/timer-ti-32k.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/clocksource/timer-ti-32k.c b/drivers/clocksource/timer-ti-32k.c
index 29e2e1a78a43..6949a9113dbb 100644
--- a/drivers/clocksource/timer-ti-32k.c
+++ b/drivers/clocksource/timer-ti-32k.c
@@ -97,6 +97,9 @@ static int __init ti_32k_timer_init(struct device_node *np)
 		return -ENXIO;
 	}
 
+	if (!of_machine_is_compatible("ti,am43"))
+		ti_32k_timer.cs.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
+
 	ti_32k_timer.counter = ti_32k_timer.base;
 
 	/*

From 5fe23f262e0548ca7f19fb79f89059a60d087d22 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Wed, 12 Sep 2018 16:27:44 -0700
Subject: [PATCH 048/499] ucma: fix a use-after-free in ucma_resolve_ip()

There is a race condition between ucma_close() and ucma_resolve_ip():

CPU0				CPU1
ucma_resolve_ip():		ucma_close():

ctx = ucma_get_ctx(file, cmd.id);

        list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) {
                mutex_lock(&mut);
                idr_remove(&ctx_idr, ctx->id);
                mutex_unlock(&mut);
		...
                mutex_lock(&mut);
                if (!ctx->closing) {
                        mutex_unlock(&mut);
                        rdma_destroy_id(ctx->cm_id);
		...
                ucma_free_ctx(ctx);

ret = rdma_resolve_addr();
ucma_put_ctx(ctx);

Before idr_remove(), ucma_get_ctx() could still find the ctx
and after rdma_destroy_id(), rdma_resolve_addr() may still
access id_priv pointer. Also, ucma_put_ctx() may use ctx after
ucma_free_ctx() too.

ucma_close() should call ucma_put_ctx() too which tests the
refcnt and waits for the last one releasing it. The similar
pattern is already used by ucma_destroy_id().

Reported-and-tested-by: syzbot+da2591e115d57a9cbb8b@syzkaller.appspotmail.com
Reported-by: syzbot+cfe3c1e8ef634ba8964b@syzkaller.appspotmail.com
Cc: Jason Gunthorpe <jgg@mellanox.com>
Cc: Doug Ledford <dledford@redhat.com>
Cc: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/ucma.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 5f437d1570fb..21863ddde63e 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -1759,6 +1759,8 @@ static int ucma_close(struct inode *inode, struct file *filp)
 		mutex_lock(&mut);
 		if (!ctx->closing) {
 			mutex_unlock(&mut);
+			ucma_put_ctx(ctx);
+			wait_for_completion(&ctx->comp);
 			/* rdma_destroy_id ensures that no event handlers are
 			 * inflight for that id before releasing it.
 			 */

From 807588ac92018bde88a1958f546438e840eb0158 Mon Sep 17 00:00:00 2001
From: Anisse Astier <anisse@astier.eu>
Date: Wed, 12 Sep 2018 15:07:05 +0200
Subject: [PATCH 049/499] HID: i2c-hid: disable runtime PM operations on
 hantick touchpad
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This hantick HTIX5288 touchpad can quickly fall in a wrong state if
there are too many open/close operations. This will either make it stop
reporting any input, or will shift all the input reads by a few bytes,
making it impossible to decode.

Here, we never release the probed touchpad runtime pm while the driver
is loaded, which should disable all runtime pm suspend/resumes.

This fast repetition of sleep/wakeup is also more likely to happen when
using runtime PM, which is why the quirk is done there, and not for all
power downs, which would include suspend or module removal.

Signed-off-by: Anisse Astier <anisse@astier.eu>
Cc: stable@vger.kernel.org
Acked-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Tested-by: Philip Müller <philm@manjaro.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/i2c-hid/i2c-hid.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c
index f3076659361a..a8610f5bf6f5 100644
--- a/drivers/hid/i2c-hid/i2c-hid.c
+++ b/drivers/hid/i2c-hid/i2c-hid.c
@@ -48,6 +48,7 @@
 #define I2C_HID_QUIRK_SET_PWR_WAKEUP_DEV	BIT(0)
 #define I2C_HID_QUIRK_NO_IRQ_AFTER_RESET	BIT(1)
 #define I2C_HID_QUIRK_RESEND_REPORT_DESCR	BIT(2)
+#define I2C_HID_QUIRK_NO_RUNTIME_PM		BIT(3)
 
 /* flags */
 #define I2C_HID_STARTED		0
@@ -169,7 +170,8 @@ static const struct i2c_hid_quirks {
 	{ USB_VENDOR_ID_WEIDA, USB_DEVICE_ID_WEIDA_8755,
 		I2C_HID_QUIRK_SET_PWR_WAKEUP_DEV },
 	{ I2C_VENDOR_ID_HANTICK, I2C_PRODUCT_ID_HANTICK_5288,
-		I2C_HID_QUIRK_NO_IRQ_AFTER_RESET },
+		I2C_HID_QUIRK_NO_IRQ_AFTER_RESET |
+		I2C_HID_QUIRK_NO_RUNTIME_PM },
 	{ USB_VENDOR_ID_SIS_TOUCH, USB_DEVICE_ID_SIS10FB_TOUCH,
 		I2C_HID_QUIRK_RESEND_REPORT_DESCR },
 	{ 0, 0 }
@@ -1105,7 +1107,9 @@ static int i2c_hid_probe(struct i2c_client *client,
 		goto err_mem_free;
 	}
 
-	pm_runtime_put(&client->dev);
+	if (!(ihid->quirks & I2C_HID_QUIRK_NO_RUNTIME_PM))
+		pm_runtime_put(&client->dev);
+
 	return 0;
 
 err_mem_free:
@@ -1130,7 +1134,8 @@ static int i2c_hid_remove(struct i2c_client *client)
 	struct i2c_hid *ihid = i2c_get_clientdata(client);
 	struct hid_device *hid;
 
-	pm_runtime_get_sync(&client->dev);
+	if (!(ihid->quirks & I2C_HID_QUIRK_NO_RUNTIME_PM))
+		pm_runtime_get_sync(&client->dev);
 	pm_runtime_disable(&client->dev);
 	pm_runtime_set_suspended(&client->dev);
 	pm_runtime_put_noidle(&client->dev);

From 36cae568404a298a19a6e8a3f18641075d4cab04 Mon Sep 17 00:00:00 2001
From: Kristian Evensen <kristian.evensen@gmail.com>
Date: Thu, 13 Sep 2018 11:21:49 +0200
Subject: [PATCH 050/499] USB: serial: option: improve Quectel EP06 detection

The Quectel EP06 (and EM06/EG06) LTE modem supports updating the USB
configuration, without the VID/PID or configuration number changing.
When the configuration is updated and interfaces are added/removed, the
interface numbers are updated. This causes our current code for matching
EP06 not to work as intended, as the assumption about reserved
interfaces no longer holds. If for example the diagnostic (first)
interface is removed, option will (try to) bind to the QMI interface.

This patch improves EP06 detection by replacing the current match with
two matches, and those matches check class, subclass and protocol as
well as VID and PID. The diag interface exports class, subclass and
protocol as 0xff. For the other serial interfaces, class is 0xff and
subclass and protocol are both 0x0.

The modem can export the following devices and always in this order:
diag, nmea, at, ppp. qmi and adb. This means that diag can only ever be
interface 0, and interface numbers 1-5 should be marked as reserved. The
three other serial devices can have interface numbers 0-3, but I have
not marked any interfaces as reserved. The reason is that the serial
devices are the only interfaces exported by the device where subclass
and protocol is 0x0.

QMI exports the same class, subclass and protocol values as the diag
interface. However, the two interfaces have different number of
endpoints, QMI has three and diag two. I have added a check for number
of interfaces if VID/PID matches the EP06, and we ignore the device if
number of interfaces equals three (and subclass is set).

Signed-off-by: Kristian Evensen <kristian.evensen@gmail.com>
Acked-by: Dan Williams <dcbw@redhat.com>
[ johan: drop uneeded RSVD(5) for ADB ]
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/option.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 0215b70c4efc..382feafbd127 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1081,8 +1081,9 @@ static const struct usb_device_id option_ids[] = {
 	  .driver_info = RSVD(4) },
 	{ USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96),
 	  .driver_info = RSVD(4) },
-	{ USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06),
-	  .driver_info = RSVD(4) | RSVD(5) },
+	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff),
+	  .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) },
+	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) },
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) },
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) },
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003),
@@ -1985,6 +1986,7 @@ static int option_probe(struct usb_serial *serial,
 {
 	struct usb_interface_descriptor *iface_desc =
 				&serial->interface->cur_altsetting->desc;
+	struct usb_device_descriptor *dev_desc = &serial->dev->descriptor;
 	unsigned long device_flags = id->driver_info;
 
 	/* Never bind to the CD-Rom emulation interface	*/
@@ -1999,6 +2001,18 @@ static int option_probe(struct usb_serial *serial,
 	if (device_flags & RSVD(iface_desc->bInterfaceNumber))
 		return -ENODEV;
 
+	/*
+	 * Don't bind to the QMI device of the Quectel EP06/EG06/EM06. Class,
+	 * subclass and protocol is 0xff for both the diagnostic port and the
+	 * QMI interface, but the diagnostic port only has two endpoints (QMI
+	 * has three).
+	 */
+	if (dev_desc->idVendor == cpu_to_le16(QUECTEL_VENDOR_ID) &&
+	    dev_desc->idProduct == cpu_to_le16(QUECTEL_PRODUCT_EP06) &&
+	    iface_desc->bInterfaceSubClass && iface_desc->bNumEndpoints == 3) {
+		return -ENODEV;
+	}
+
 	/* Store the device flags so we can use them during attach. */
 	usb_set_serial_data(serial, (void *)device_flags);
 

From 35aecc02b5b621782111f64cbb032c7f6a90bb32 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 13 Sep 2018 11:21:50 +0200
Subject: [PATCH 051/499] USB: serial: option: add two-endpoints device-id flag

Allow matching on interfaces having two endpoints by adding a new
device-id flag.

This allows for the handling of devices whose interface numbers can
change (e.g. Quectel EP06) to be contained in the device-id table.

Tested-by: Kristian Evensen <kristian.evensen@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/option.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 382feafbd127..e72ad9f81c73 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -561,6 +561,9 @@ static void option_instat_callback(struct urb *urb);
 /* Interface is reserved */
 #define RSVD(ifnum)	((BIT(ifnum) & 0xff) << 0)
 
+/* Interface must have two endpoints */
+#define NUMEP2		BIT(16)
+
 
 static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) },
@@ -1082,7 +1085,7 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96),
 	  .driver_info = RSVD(4) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff),
-	  .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) },
+	  .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 },
 	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) },
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) },
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) },
@@ -1986,7 +1989,6 @@ static int option_probe(struct usb_serial *serial,
 {
 	struct usb_interface_descriptor *iface_desc =
 				&serial->interface->cur_altsetting->desc;
-	struct usb_device_descriptor *dev_desc = &serial->dev->descriptor;
 	unsigned long device_flags = id->driver_info;
 
 	/* Never bind to the CD-Rom emulation interface	*/
@@ -2002,16 +2004,11 @@ static int option_probe(struct usb_serial *serial,
 		return -ENODEV;
 
 	/*
-	 * Don't bind to the QMI device of the Quectel EP06/EG06/EM06. Class,
-	 * subclass and protocol is 0xff for both the diagnostic port and the
-	 * QMI interface, but the diagnostic port only has two endpoints (QMI
-	 * has three).
+	 * Allow matching on bNumEndpoints for devices whose interface numbers
+	 * can change (e.g. Quectel EP06).
 	 */
-	if (dev_desc->idVendor == cpu_to_le16(QUECTEL_VENDOR_ID) &&
-	    dev_desc->idProduct == cpu_to_le16(QUECTEL_PRODUCT_EP06) &&
-	    iface_desc->bInterfaceSubClass && iface_desc->bNumEndpoints == 3) {
+	if (device_flags & NUMEP2 && iface_desc->bNumEndpoints != 2)
 		return -ENODEV;
-	}
 
 	/* Store the device flags so we can use them during attach. */
 	usb_set_serial_data(serial, (void *)device_flags);

From dabeb13eee81329338b1f8f330dfcc37a86714d7 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 14 Sep 2018 10:47:14 +0200
Subject: [PATCH 052/499] batman-adv: Increase version number to 2018.3

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/main.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 8da3c9336111..3ccc75ee719c 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -25,7 +25,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2018.2"
+#define BATADV_SOURCE_VERSION "2018.3"
 #endif
 
 /* B.A.T.M.A.N. parameters */

From 1a866306e0fbf3caab168c1389e4497702749441 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Wed, 12 Sep 2018 16:34:27 +0200
Subject: [PATCH 053/499] drm/etnaviv: add DMA configuration for etnaviv
 platform device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The etnaviv device is a virtual device backing the DRM device, which may
drive multiple hardware GPU core devices. As most of the dma-mapping handling
is done through the virtual device, we need to make sure that a proper DMA
setup is in place. The easiest way to get a reasonable configuration is
to let the virtual device share the DMA configuration with one of the GPU
devices, so call of_dma_configure() with the right parameters manually.

This assumes that all etnaviv driven GPU devices in the system share the
same DMA configuration. If we ever encounter a SoC where the GPUs are on
busses with different offsets or behind different IOMMUs that will require
much deeper changes to the driver, as we would need to implement etnaviv
specific versions of most of the DRM helper functions.

For now we should be fine with this solution.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Tested-by: Guido Günther <agx@sigxcpu.org>
Tested-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
---
 drivers/gpu/drm/etnaviv/etnaviv_drv.c | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 9b2720b41571..83c1f46670bf 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -592,8 +592,6 @@ static int etnaviv_pdev_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct component_match *match = NULL;
 
-	dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
-
 	if (!dev->platform_data) {
 		struct device_node *core_node;
 
@@ -655,13 +653,30 @@ static int __init etnaviv_init(void)
 	for_each_compatible_node(np, NULL, "vivante,gc") {
 		if (!of_device_is_available(np))
 			continue;
-		pdev = platform_device_register_simple("etnaviv", -1,
-						       NULL, 0);
-		if (IS_ERR(pdev)) {
-			ret = PTR_ERR(pdev);
+
+		pdev = platform_device_alloc("etnaviv", -1);
+		if (!pdev) {
+			ret = -ENOMEM;
 			of_node_put(np);
 			goto unregister_platform_driver;
 		}
+		pdev->dev.coherent_dma_mask = DMA_BIT_MASK(40);
+		pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask;
+
+		/*
+		 * Apply the same DMA configuration to the virtual etnaviv
+		 * device as the GPU we found. This assumes that all Vivante
+		 * GPUs in the system share the same DMA constraints.
+		 */
+		of_dma_configure(&pdev->dev, np, true);
+
+		ret = platform_device_add(pdev);
+		if (ret) {
+			platform_device_put(pdev);
+			of_node_put(np);
+			goto unregister_platform_driver;
+		}
+
 		etnaviv_drm = pdev;
 		of_node_put(np);
 		break;

From f14040bca89258b8a1c71e2112e430462172ce93 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Thu, 13 Sep 2018 15:33:47 +1000
Subject: [PATCH 054/499] KVM: PPC: Book3S HV: Fix guest r11 corruption with
 POWER9 TM workarounds

When we come into the softpatch handler (0x1500), we use r11 to store
the HSRR0 for later use by the denorm handler.

We also use the softpatch handler for the TM workarounds for
POWER9. Unfortunately, in kvmppc_interrupt_hv we later store r11 out
to the vcpu assuming it's still what we got from userspace.

This causes r11 to be corrupted in the VCPU and hence when we restore
the guest, we get a corrupted r11. We've seen this when running TM
tests inside guests on P9.

This fixes the problem by only touching r11 in the denorm case.

Fixes: 4bb3c7a020 ("KVM: PPC: Book3S HV: Work around transactional memory bugs in POWER9")
Cc: <stable@vger.kernel.org> # 4.17+
Test-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Reviewed-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/exceptions-64s.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index ea04dfb8c092..2d8fc8c9da7a 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1314,9 +1314,7 @@ EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100)
 
 #ifdef CONFIG_PPC_DENORMALISATION
 	mfspr	r10,SPRN_HSRR1
-	mfspr	r11,SPRN_HSRR0		/* save HSRR0 */
 	andis.	r10,r10,(HSRR1_DENORM)@h /* denorm? */
-	addi	r11,r11,-4		/* HSRR0 is next instruction */
 	bne+	denorm_assist
 #endif
 
@@ -1382,6 +1380,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
  */
 	XVCPSGNDP32(32)
 denorm_done:
+	mfspr	r11,SPRN_HSRR0
+	subi	r11,r11,4
 	mtspr	SPRN_HSRR0,r11
 	mtcrf	0x80,r9
 	ld	r9,PACA_EXGEN+EX_R9(r13)

From d792d4c4fc866ae224b0b0ca2aabd87d23b4d6cc Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@redhat.com>
Date: Tue, 11 Sep 2018 12:22:25 -0700
Subject: [PATCH 055/499] scsi: ibmvscsis: Fix a stringop-overflow warning

There's currently a warning about string overflow with strncat:

drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c: In function 'ibmvscsis_probe':
drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c:3479:2: error: 'strncat' specified
bound 64 equals destination size [-Werror=stringop-overflow=]
  strncat(vscsi->eye, vdev->name, MAX_EYE);
  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Switch to a single snprintf instead of a strcpy + strcat to handle this
cleanly.

Signed-off-by: Laura Abbott <labbott@redhat.com>
Suggested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
index fac377320158..b3a029ad07cd 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
@@ -3474,8 +3474,7 @@ static int ibmvscsis_probe(struct vio_dev *vdev,
 		vscsi->dds.window[LOCAL].liobn,
 		vscsi->dds.window[REMOTE].liobn);
 
-	strcpy(vscsi->eye, "VSCSI ");
-	strncat(vscsi->eye, vdev->name, MAX_EYE);
+	snprintf(vscsi->eye, sizeof(vscsi->eye), "VSCSI %s", vdev->name);
 
 	vscsi->dds.unit_id = vdev->unit_address;
 	strncpy(vscsi->dds.partition_name, partition_name,

From adad633af7b970bfa5dd1b624a4afc83cac9b235 Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@redhat.com>
Date: Tue, 11 Sep 2018 12:22:26 -0700
Subject: [PATCH 056/499] scsi: ibmvscsis: Ensure partition name is properly
 NUL terminated

While reviewing another part of the code, Kees noticed that the strncpy of the
partition name might not always be NUL terminated. Switch to using strscpy
which does this safely.

Reported-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Laura Abbott <labbott@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
index b3a029ad07cd..f42a619198c4 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
@@ -3477,7 +3477,7 @@ static int ibmvscsis_probe(struct vio_dev *vdev,
 	snprintf(vscsi->eye, sizeof(vscsi->eye), "VSCSI %s", vdev->name);
 
 	vscsi->dds.unit_id = vdev->unit_address;
-	strncpy(vscsi->dds.partition_name, partition_name,
+	strscpy(vscsi->dds.partition_name, partition_name,
 		sizeof(vscsi->dds.partition_name));
 	vscsi->dds.partition_num = partition_number;
 

From 83e32a5910772e1475d3640a429b7686695f04d1 Mon Sep 17 00:00:00 2001
From: Xuewei Zhang <xueweiz@google.com>
Date: Thu, 6 Sep 2018 13:37:19 -0700
Subject: [PATCH 057/499] scsi: sd: Contribute to randomness when running
 rotational device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently a scsi device won't contribute to kernel randomness when it uses
blk-mq. Since we commonly use scsi on rotational device with blk-mq, it make
sense to keep contributing to kernel randomness in these cases. This is
especially important for virtual machines.

commit b5b6e8c8d3b4 ("scsi: virtio_scsi: fix IO hang caused by automatic irq
vector affinity") made all virtio-scsi device to use blk-mq, which does not
contribute to randomness today. So for a virtual machine only having
virtio-scsi disk (which is common), it will simple stop getting randomness
from its disks in today's implementation.

With this patch, if the above VM has rotational virtio-scsi device, then it
can still benefit from the entropy generated from the disk.

Reported-by: Xuewei Zhang <xueweiz@google.com>
Signed-off-by: Xuewei Zhang <xueweiz@google.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Maciej Żenczykowski <maze@google.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sd.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index b79b366a94f7..5e4f10d28065 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2959,6 +2959,9 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp)
 	if (rot == 1) {
 		blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
 		blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
+	} else {
+		blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
+		blk_queue_flag_set(QUEUE_FLAG_ADD_RANDOM, q);
 	}
 
 	if (sdkp->device->type == TYPE_ZBC) {

From 3341ba9f0f2637a51738ef5de5114e13a1cd3eca Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Mon, 3 Sep 2018 12:26:21 +0200
Subject: [PATCH 058/499] mt76x0: fix remove_interface

We wrongly use wcid_mask instead of vif_mask. This creates problems
when the interface is removed more than 8 times, for example with iw:

iw dev <devname> interface add <name> type <type>
iw dev <name> del

This caused 'ifconfig up' to fail with error:

SIOCSIFFLAGS: No space left on device

Fixes: 95e444098a7b ("mt76x0: main file")
Reported-and-tested-by: Sid Hayn <sidhayn@gmail.com>
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/mediatek/mt76/mt76x0/main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/main.c b/drivers/net/wireless/mediatek/mt76/mt76x0/main.c
index cf6ffb1ba4a2..22bc9d368728 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x0/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x0/main.c
@@ -77,9 +77,8 @@ static void mt76x0_remove_interface(struct ieee80211_hw *hw,
 {
 	struct mt76x0_dev *dev = hw->priv;
 	struct mt76_vif *mvif = (struct mt76_vif *) vif->drv_priv;
-	unsigned int wcid = mvif->group_wcid.idx;
 
-	dev->wcid_mask[wcid / BITS_PER_LONG] &= ~BIT(wcid % BITS_PER_LONG);
+	dev->vif_mask &= ~BIT(mvif->idx);
 }
 
 static int mt76x0_config(struct ieee80211_hw *hw, u32 changed)

From 013ad043906b2befd4a9bfb06219ed9fedd92716 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Thu, 13 Sep 2018 21:16:20 -0400
Subject: [PATCH 059/499] dm thin metadata: fix __udivdi3 undefined on 32-bit

sector_div() is only viable for use with sector_t.
dm_block_t is typedef'd to uint64_t -- so use div_u64() instead.

Fixes: 3ab918281 ("dm thin metadata: try to avoid ever aborting transactions")
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-thin-metadata.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 74f6770c70b1..20b0776e39ef 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -832,10 +832,8 @@ static void __set_metadata_reserve(struct dm_pool_metadata *pmd)
 	if (r) {
 		DMERR("could not get size of metadata device");
 		pmd->metadata_reserve = max_blocks;
-	} else {
-		sector_div(total, 10);
-		pmd->metadata_reserve = min(max_blocks, total);
-	}
+	} else
+		pmd->metadata_reserve = min(max_blocks, div_u64(total, 10));
 }
 
 struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,

From 2ea15030c3f0554db695f85e4760da8621c183d3 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 11 Sep 2018 15:06:26 +0200
Subject: [PATCH 060/499] mmc: sdhi: sys_dmac: check for all Gen3 types when
 whitelisting

Fixes: 26eb2607fa28 ("mmc: renesas_sdhi: add eMMC HS400 mode support")
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/renesas_sdhi_sys_dmac.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/renesas_sdhi_sys_dmac.c b/drivers/mmc/host/renesas_sdhi_sys_dmac.c
index 890f192dedbd..5389c4821882 100644
--- a/drivers/mmc/host/renesas_sdhi_sys_dmac.c
+++ b/drivers/mmc/host/renesas_sdhi_sys_dmac.c
@@ -498,7 +498,8 @@ static const struct soc_device_attribute gen3_soc_whitelist[] = {
 
 static int renesas_sdhi_sys_dmac_probe(struct platform_device *pdev)
 {
-	if (of_device_get_match_data(&pdev->dev) == &of_rcar_gen3_compatible &&
+	if ((of_device_get_match_data(&pdev->dev) == &of_rcar_gen3_compatible ||
+	    of_device_get_match_data(&pdev->dev) == &of_rcar_r8a7795_compatible) &&
 	    !soc_device_match(gen3_soc_whitelist))
 		return -ENODEV;
 

From 324493fba77500592bbaa66421729421f139d4b5 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 14 Aug 2018 12:02:31 -0400
Subject: [PATCH 061/499] media: platform: fix cros-ec-cec build error

Fix build when MFD_CROS_EC is not enabled but COMPILE_TEST=y.
Fixes this build error:

ERROR: "cros_ec_cmd_xfer_status" [drivers/media/platform/cros-ec-cec/cros-ec-cec.ko] undefined!

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/platform/Kconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
index 94c1fe0e9787..54fe90acb5b2 100644
--- a/drivers/media/platform/Kconfig
+++ b/drivers/media/platform/Kconfig
@@ -541,6 +541,8 @@ config VIDEO_CROS_EC_CEC
 	depends on MFD_CROS_EC
 	select CEC_CORE
 	select CEC_NOTIFIER
+	select CHROME_PLATFORMS
+	select CROS_EC_PROTO
 	---help---
 	  If you say yes here you will get support for the
 	  ChromeOS Embedded Controller's CEC.

From 250ae0d46d857ed61f3e0abf584901e46bd2c638 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Sun, 2 Sep 2018 12:01:53 +0300
Subject: [PATCH 062/499] net/mlx5: Fix read from coherent memory

Use accessor function READ_ONCE to read from coherent memory modified
by the device and read by the driver. This becomes most important in
preemptive kernels where cond_resched implementation does not have the
side effect which guaranteed the updated value.

Fixes: 269d26f47f6f ("net/mlx5: Reduce command polling interval")
Change-Id: Ie6deeb565ffaf76777b07448c7fbcce3510bbb8a
Signed-off-by: Eli Cohen <eli@mellanox.com>
Reported-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 3ce14d42ddc8..a53736c26c0c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -206,7 +206,7 @@ static void poll_timeout(struct mlx5_cmd_work_ent *ent)
 	u8 own;
 
 	do {
-		own = ent->lay->status_own;
+		own = READ_ONCE(ent->lay->status_own);
 		if (!(own & CMD_OWNER_HW)) {
 			ent->ret = 0;
 			return;

From 6b359d5550a1ae7a1269c9dc1dd73dfdc4d6fe58 Mon Sep 17 00:00:00 2001
From: Alaa Hleihel <alaa@mellanox.com>
Date: Mon, 3 Sep 2018 10:38:14 +0300
Subject: [PATCH 063/499] net/mlx5: Check for SQ and not RQ state when
 modifying hairpin SQ

When modifying hairpin SQ, instead of checking if the next state equals
to MLX5_SQC_STATE_RDY, we compare it against the MLX5_RQC_STATE_RDY enum
value.

The code worked since both of MLX5_RQC_STATE_RDY and MLX5_SQC_STATE_RDY
have the same value today.

This patch fixes this issue.

Fixes: 18e568c390c6 ("net/mlx5: Hairpin pair core object setup")
Change-Id: I6758aa7b4bd137966ae28206b70648c5bc223b46
Signed-off-by: Alaa Hleihel <alaa@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/transobj.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
index dae1c5c5d27c..d2f76070ea7c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
@@ -509,7 +509,7 @@ static int mlx5_hairpin_modify_sq(struct mlx5_core_dev *peer_mdev, u32 sqn,
 
 	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
 
-	if (next_state == MLX5_RQC_STATE_RDY) {
+	if (next_state == MLX5_SQC_STATE_RDY) {
 		MLX5_SET(sqc, sqc, hairpin_peer_rq, peer_rq);
 		MLX5_SET(sqc, sqc, hairpin_peer_vhca, peer_vhca);
 	}

From 8f92e35aff9692028279d3c03e88547df6d15020 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Sat, 15 Sep 2018 00:50:02 -0700
Subject: [PATCH 064/499] net/mlx5e: TLS, Read capabilities only when it is
 safe

Read TLS caps from the core driver only when TLS is supported, i.e
mlx5_accel_is_tls_device returns true.

Fixes: 790af90c00d2 ("net/mlx5e: TLS, build TLS netdev from capabilities")
Change-Id: I5f21ff4d684901af487e366a7e0cf032b54ee9cf
Reported-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Reviewed-by: Boris Pismenny <borisp@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
index eddd7702680b..e88340e196f7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
@@ -183,12 +183,13 @@ static const struct tlsdev_ops mlx5e_tls_ops = {
 
 void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
 {
-	u32 caps = mlx5_accel_tls_device_caps(priv->mdev);
 	struct net_device *netdev = priv->netdev;
+	u32 caps;
 
 	if (!mlx5_accel_is_tls_device(priv->mdev))
 		return;
 
+	caps = mlx5_accel_tls_device_caps(priv->mdev);
 	if (caps & MLX5_ACCEL_TLS_TX) {
 		netdev->features          |= NETIF_F_HW_TLS_TX;
 		netdev->hw_features       |= NETIF_F_HW_TLS_TX;

From b592211c33f745af67a3271ce77c10fc1e6d6241 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Mon, 17 Sep 2018 11:38:47 -0400
Subject: [PATCH 065/499] dm mpath: fix attached_handler_name leak and dangling
 hw_handler_name pointer

Commit e8f74a0f0011 ("dm mpath: eliminate need to use
scsi_device_from_queue") introduced 2 regressions:
1) memory leak occurs if attached_handler_name is not assigned to
   m->hw_handler_name
2) m->hw_handler_name can become a dangling pointer if the
   RETAIN_ATTACHED_HW_HANDLER flag is set and scsi_dh_attach() returns
   -EBUSY.

Fix both of these by clearing 'attached_handler_name' pointer passed to
setup_scsi_dh() after it is assigned to m->hw_handler_name.  And if
setup_scsi_dh() doesn't consume 'attached_handler_name' parse_path()
will kfree() it.

Fixes: e8f74a0f0011 ("dm mpath: eliminate need to use scsi_device_from_queue")
Cc: stable@vger.kernel.org # 4.16+
Reported-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-mpath.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index d94ba6f72ff5..419362c2d8ac 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -806,19 +806,19 @@ static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg,
 }
 
 static int setup_scsi_dh(struct block_device *bdev, struct multipath *m,
-			 const char *attached_handler_name, char **error)
+			 const char **attached_handler_name, char **error)
 {
 	struct request_queue *q = bdev_get_queue(bdev);
 	int r;
 
 	if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) {
 retain:
-		if (attached_handler_name) {
+		if (*attached_handler_name) {
 			/*
 			 * Clear any hw_handler_params associated with a
 			 * handler that isn't already attached.
 			 */
-			if (m->hw_handler_name && strcmp(attached_handler_name, m->hw_handler_name)) {
+			if (m->hw_handler_name && strcmp(*attached_handler_name, m->hw_handler_name)) {
 				kfree(m->hw_handler_params);
 				m->hw_handler_params = NULL;
 			}
@@ -830,7 +830,8 @@ static int setup_scsi_dh(struct block_device *bdev, struct multipath *m,
 			 * handler instead of the original table passed in.
 			 */
 			kfree(m->hw_handler_name);
-			m->hw_handler_name = attached_handler_name;
+			m->hw_handler_name = *attached_handler_name;
+			*attached_handler_name = NULL;
 		}
 	}
 
@@ -867,7 +868,7 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
 	struct pgpath *p;
 	struct multipath *m = ti->private;
 	struct request_queue *q;
-	const char *attached_handler_name;
+	const char *attached_handler_name = NULL;
 
 	/* we need at least a path arg */
 	if (as->argc < 1) {
@@ -890,7 +891,7 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
 	attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL);
 	if (attached_handler_name || m->hw_handler_name) {
 		INIT_DELAYED_WORK(&p->activate_path, activate_path_work);
-		r = setup_scsi_dh(p->path.dev->bdev, m, attached_handler_name, &ti->error);
+		r = setup_scsi_dh(p->path.dev->bdev, m, &attached_handler_name, &ti->error);
 		if (r) {
 			dm_put_device(ti, p->path.dev);
 			goto bad;
@@ -905,6 +906,7 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
 
 	return p;
  bad:
+	kfree(attached_handler_name);
 	free_pgpath(p);
 	return ERR_PTR(r);
 }

From 0328ba90407c82874f9f39f9a5a6e5ad6cb76f44 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 17 Sep 2018 10:02:34 +0200
Subject: [PATCH 066/499] dm raid: remove bogus const from
 decipher_sync_action() return type

With gcc-4.1.2:

    drivers/md/dm-raid.c:3357: warning: type qualifiers ignored on function return type

Remove the "const" keyword to fix this.

Fixes: 36a240a706d43383 ("dm raid: fix RAID leg rebuild errors")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Heinz Mauelshagen <heinzm@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-raid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 5ba067fa0c72..c44925e4e481 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3353,7 +3353,7 @@ static const char *sync_str(enum sync_state state)
 };
 
 /* Return enum sync_state for @mddev derived from @recovery flags */
-static const enum sync_state decipher_sync_action(struct mddev *mddev, unsigned long recovery)
+static enum sync_state decipher_sync_action(struct mddev *mddev, unsigned long recovery)
 {
 	if (test_bit(MD_RECOVERY_FROZEN, &recovery))
 		return st_frozen;

From ffc4c92227db5699493e43eb140b4cb5904c30ff Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 18 Sep 2018 00:36:36 -0400
Subject: [PATCH 067/499] sysfs: Do not return POSIX ACL xattrs via listxattr
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 786534b92f3c introduced a regression that caused listxattr to
return the POSIX ACL attribute names even though sysfs doesn't support
POSIX ACLs.  This happens because simple_xattr_list checks for NULL
i_acl / i_default_acl, but inode_init_always initializes those fields
to ACL_NOT_CACHED ((void *)-1).  For example:
    $ getfattr -m- -d /sys
    /sys: system.posix_acl_access: Operation not supported
    /sys: system.posix_acl_default: Operation not supported
Fix this in simple_xattr_list by checking if the filesystem supports POSIX ACLs.

Fixes: 786534b92f3c ("tmpfs: listxattr should include POSIX ACL xattrs")
Reported-by:  Marc Aurèle La France <tsi@tuyoix.net>
Tested-by: Marc Aurèle La France <tsi@tuyoix.net>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Cc: stable@vger.kernel.org # v4.5+
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/xattr.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/fs/xattr.c b/fs/xattr.c
index daa732550088..0d6a6a4af861 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -948,17 +948,19 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
 	int err = 0;
 
 #ifdef CONFIG_FS_POSIX_ACL
-	if (inode->i_acl) {
-		err = xattr_list_one(&buffer, &remaining_size,
-				     XATTR_NAME_POSIX_ACL_ACCESS);
-		if (err)
-			return err;
-	}
-	if (inode->i_default_acl) {
-		err = xattr_list_one(&buffer, &remaining_size,
-				     XATTR_NAME_POSIX_ACL_DEFAULT);
-		if (err)
-			return err;
+	if (IS_POSIXACL(inode)) {
+		if (inode->i_acl) {
+			err = xattr_list_one(&buffer, &remaining_size,
+					     XATTR_NAME_POSIX_ACL_ACCESS);
+			if (err)
+				return err;
+		}
+		if (inode->i_default_acl) {
+			err = xattr_list_one(&buffer, &remaining_size,
+					     XATTR_NAME_POSIX_ACL_DEFAULT);
+			if (err)
+				return err;
+		}
 	}
 #endif
 

From 51c3c62b58b357e8d35e4cc32f7b4ec907426fe3 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Fri, 14 Sep 2018 11:14:11 +1000
Subject: [PATCH 068/499] powerpc: Avoid code patching freed init sections

This stops us from doing code patching in init sections after they've
been freed.

In this chain:
  kvm_guest_init() ->
    kvm_use_magic_page() ->
      fault_in_pages_readable() ->
	 __get_user() ->
	   __get_user_nocheck() ->
	     barrier_nospec();

We have a code patching location at barrier_nospec() and
kvm_guest_init() is an init function. This whole chain gets inlined,
so when we free the init section (hence kvm_guest_init()), this code
goes away and hence should no longer be patched.

We seen this as userspace memory corruption when using a memory
checker while doing partition migration testing on powervm (this
starts the code patching post migration via
/sys/kernel/mobility/migration). In theory, it could also happen when
using /sys/kernel/debug/powerpc/barrier_nospec.

Cc: stable@vger.kernel.org # 4.13+
Signed-off-by: Michael Neuling <mikey@neuling.org>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/setup.h | 1 +
 arch/powerpc/lib/code-patching.c | 6 ++++++
 arch/powerpc/mm/mem.c            | 2 ++
 3 files changed, 9 insertions(+)

diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 1a951b00465d..1fffbba8d6a5 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -9,6 +9,7 @@ extern void ppc_printk_progress(char *s, unsigned short hex);
 
 extern unsigned int rtas_data;
 extern unsigned long long memory_limit;
+extern bool init_mem_is_free;
 extern unsigned long klimit;
 extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
 
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 850f3b8f4da5..6ae2777c220d 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -28,6 +28,12 @@ static int __patch_instruction(unsigned int *exec_addr, unsigned int instr,
 {
 	int err;
 
+	/* Make sure we aren't patching a freed init section */
+	if (init_mem_is_free && init_section_contains(exec_addr, 4)) {
+		pr_debug("Skipping init section patching addr: 0x%px\n", exec_addr);
+		return 0;
+	}
+
 	__put_user_size(instr, patch_addr, 4, err);
 	if (err)
 		return err;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 5c8530d0c611..04ccb274a620 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -63,6 +63,7 @@
 #endif
 
 unsigned long long memory_limit;
+bool init_mem_is_free;
 
 #ifdef CONFIG_HIGHMEM
 pte_t *kmap_pte;
@@ -396,6 +397,7 @@ void free_initmem(void)
 {
 	ppc_md.progress = ppc_printk_progress;
 	mark_initmem_nx();
+	init_mem_is_free = true;
 	free_initmem_default(POISON_FREE_INITMEM);
 }
 

From e0bf2d4982fe7d9ddaf550dd023803ea286f47fc Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Sun, 26 Aug 2018 19:49:32 +0200
Subject: [PATCH 069/499] serial: mvebu-uart: Fix reporting of effective CSIZE
 to userspace

Apparently, this driver (or the hardware) does not support character
length settings. It's apparently running in 8-bit mode, but it makes
userspace believe it's in 5-bit mode. That makes tcsetattr with CS8
incorrectly fail, breaking e.g. getty from busybox, thus the login shell
on ttyMVx.

Fix by hard-wiring CS8 into c_cflag.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Fixes: 30530791a7a0 ("serial: mvebu-uart: initial support for Armada-3700 serial port")
Cc: stable <stable@vger.kernel.org> # 4.6+
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/mvebu-uart.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c
index d04b5eeea3c6..170e446a2f62 100644
--- a/drivers/tty/serial/mvebu-uart.c
+++ b/drivers/tty/serial/mvebu-uart.c
@@ -511,6 +511,7 @@ static void mvebu_uart_set_termios(struct uart_port *port,
 		termios->c_iflag |= old->c_iflag & ~(INPCK | IGNPAR);
 		termios->c_cflag &= CREAD | CBAUD;
 		termios->c_cflag |= old->c_cflag & ~(CREAD | CBAUD);
+		termios->c_cflag |= CS8;
 	}
 
 	spin_unlock_irqrestore(&port->lock, flags);

From 3216c622a24b0ebb9c159a8d1daf7f17a106b3f5 Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Tue, 28 Aug 2018 12:44:24 +0200
Subject: [PATCH 070/499] tty: serial: lpuart: avoid leaking struct tty_struct

The function tty_port_tty_get() gets a reference to the tty. Since
the code is not using tty_port_tty_set(), the reference is kept
even after closing the tty.

Avoid using tty_port_tty_get() by directly access the tty instance.
Since lpuart_start_rx_dma() is called from the .startup() and
.set_termios() callback, it is safe to assume the tty instance is
valid.

Cc: stable@vger.kernel.org # v4.9+
Fixes: 5887ad43ee02 ("tty: serial: fsl_lpuart: Use cyclic DMA for Rx")
Signed-off-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/fsl_lpuart.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index 51e47a63d61a..3f8d1274fc85 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -979,7 +979,8 @@ static inline int lpuart_start_rx_dma(struct lpuart_port *sport)
 	struct circ_buf *ring = &sport->rx_ring;
 	int ret, nent;
 	int bits, baud;
-	struct tty_struct *tty = tty_port_tty_get(&sport->port.state->port);
+	struct tty_port *port = &sport->port.state->port;
+	struct tty_struct *tty = port->tty;
 	struct ktermios *termios = &tty->termios;
 
 	baud = tty_get_baud_rate(tty);

From be28c1e3ca29887e207f0cbcd294cefe5074bab6 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Fri, 14 Sep 2018 10:32:50 +0000
Subject: [PATCH 071/499] serial: cpm_uart: return immediately from console
 poll

kgdb expects poll function to return immediately and
returning NO_POLL_CHAR when no character is available.

Fixes: f5316b4aea024 ("kgdb,8250,pl011: Return immediately from console poll")
Cc: Jason Wessel <jason.wessel@windriver.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/cpm_uart/cpm_uart_core.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_core.c b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
index 24a5f05e769b..e5389591bb4f 100644
--- a/drivers/tty/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
@@ -1054,8 +1054,8 @@ static int poll_wait_key(char *obuf, struct uart_cpm_port *pinfo)
 	/* Get the address of the host memory buffer.
 	 */
 	bdp = pinfo->rx_cur;
-	while (bdp->cbd_sc & BD_SC_EMPTY)
-		;
+	if (bdp->cbd_sc & BD_SC_EMPTY)
+		return NO_POLL_CHAR;
 
 	/* If the buffer address is in the CPM DPRAM, don't
 	 * convert it.
@@ -1090,7 +1090,11 @@ static int cpm_get_poll_char(struct uart_port *port)
 		poll_chars = 0;
 	}
 	if (poll_chars <= 0) {
-		poll_chars = poll_wait_key(poll_buf, pinfo);
+		int ret = poll_wait_key(poll_buf, pinfo);
+
+		if (ret == NO_POLL_CHAR)
+			return ret;
+		poll_chars = ret;
 		pollp = poll_buf;
 	}
 	poll_chars--;

From fe32416790093b31364c08395727de17ec96ace1 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Tue, 18 Sep 2018 00:52:52 +0100
Subject: [PATCH 072/499] tty: Drop tty->count on tty_reopen() failure

In case of tty_ldisc_reinit() failure, tty->count should be decremented
back, otherwise we will never release_tty().
Tetsuo reported that it fixes noisy warnings on tty release like:
  pts pts4033: tty_release: tty->count(10529) != (#fd's(7) + #kopen's(0))

Fixes: commit 892d1fa7eaae ("tty: Destroy ldisc instance on hangup")

Cc: stable@vger.kernel.org # v4.6+
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jslaby@suse.com>
Reviewed-by: Jiri Slaby <jslaby@suse.cz>
Tested-by: Jiri Slaby <jslaby@suse.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_io.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 32bc3e3fe4d3..5e5da9acaf0a 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -1255,6 +1255,7 @@ static void tty_driver_remove_tty(struct tty_driver *driver, struct tty_struct *
 static int tty_reopen(struct tty_struct *tty)
 {
 	struct tty_driver *driver = tty->driver;
+	int retval;
 
 	if (driver->type == TTY_DRIVER_TYPE_PTY &&
 	    driver->subtype == PTY_TYPE_MASTER)
@@ -1268,10 +1269,14 @@ static int tty_reopen(struct tty_struct *tty)
 
 	tty->count++;
 
-	if (!tty->ldisc)
-		return tty_ldisc_reinit(tty, tty->termios.c_line);
+	if (tty->ldisc)
+		return 0;
 
-	return 0;
+	retval = tty_ldisc_reinit(tty, tty->termios.c_line);
+	if (retval)
+		tty->count--;
+
+	return retval;
 }
 
 /**

From e97267cb4d1ee01ca0929638ec0fcbb0904f903d Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Thu, 16 Aug 2018 15:30:38 -0500
Subject: [PATCH 073/499] tty: vt_ioctl: fix potential Spectre v1

vsa.console is indirectly controlled by user-space, hence leading to
a potential exploitation of the Spectre variant 1 vulnerability.

This issue was detected with the help of Smatch:

drivers/tty/vt/vt_ioctl.c:711 vt_ioctl() warn: potential spectre issue
'vc_cons' [r]

Fix this by sanitizing vsa.console before using it to index vc_cons

Notice that given that speculation windows are large, the policy is
to kill the speculation on the first load and not worry if it can be
completed with a dependent load/store [1].

[1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2

Cc: stable@vger.kernel.org
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Reviewed-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/vt/vt_ioctl.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c
index a78ad10a119b..73cdc0d633dd 100644
--- a/drivers/tty/vt/vt_ioctl.c
+++ b/drivers/tty/vt/vt_ioctl.c
@@ -32,6 +32,8 @@
 #include <asm/io.h>
 #include <linux/uaccess.h>
 
+#include <linux/nospec.h>
+
 #include <linux/kbd_kern.h>
 #include <linux/vt_kern.h>
 #include <linux/kbd_diacr.h>
@@ -700,6 +702,8 @@ int vt_ioctl(struct tty_struct *tty,
 		if (vsa.console == 0 || vsa.console > MAX_NR_CONSOLES)
 			ret = -ENXIO;
 		else {
+			vsa.console = array_index_nospec(vsa.console,
+							 MAX_NR_CONSOLES + 1);
 			vsa.console--;
 			console_lock();
 			ret = vc_allocate(vsa.console);

From 8801922cd94c918e4dc3a108ecaa500c4d40583f Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Tue, 18 Sep 2018 16:10:47 +0300
Subject: [PATCH 074/499] intel_th: Fix device removal logic

Commit a753bfcfdb1f ("intel_th: Make the switch allocate its subdevices")
brings in new subdevice addition/removal logic that's broken for "host
mode": the SWITCH device has no children to begin with, which is not
handled in the code. This results in a null dereference bug later down
the path.

This patch fixes the subdevice removal code to handle host mode correctly.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Fixes: a753bfcfdb1f ("intel_th: Make the switch allocate its subdevices")
CC: stable@vger.kernel.org # v4.14+
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/intel_th/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/hwtracing/intel_th/core.c b/drivers/hwtracing/intel_th/core.c
index da962aa2cef5..4e70ecee2103 100644
--- a/drivers/hwtracing/intel_th/core.c
+++ b/drivers/hwtracing/intel_th/core.c
@@ -139,7 +139,8 @@ static int intel_th_remove(struct device *dev)
 			th->thdev[i] = NULL;
 		}
 
-		th->num_thdevs = lowest;
+		if (lowest >= 0)
+			th->num_thdevs = lowest;
 	}
 
 	if (thdrv->attr_group)

From ebe4582281d6e90972f057318a6edea14810ea48 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Tue, 18 Sep 2018 16:10:48 +0300
Subject: [PATCH 075/499] intel_th: Fix resource handling for ACPI glue layer

The core of the driver expects the resource array from the glue layer
to be indexed by even numbers, as is the case for 64-bit PCI resources.
This doesn't hold true for others, ACPI in this instance, which leads
to an out-of-bounds access and an ioremap() on whatever address that
access fetches.

This patch fixes the problem by reading resource array differently based
on whether the 64-bit flag is set, which would indicate PCI glue layer.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Fixes: ebc57e399b8e ("intel_th: Add ACPI glue layer")
CC: stable@vger.kernel.org # v4.17+
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/intel_th/core.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/hwtracing/intel_th/core.c b/drivers/hwtracing/intel_th/core.c
index 4e70ecee2103..fc6b7f8b62fb 100644
--- a/drivers/hwtracing/intel_th/core.c
+++ b/drivers/hwtracing/intel_th/core.c
@@ -488,7 +488,7 @@ static const struct intel_th_subdevice {
 				.flags	= IORESOURCE_MEM,
 			},
 			{
-				.start	= TH_MMIO_SW,
+				.start	= 1, /* use resource[1] */
 				.end	= 0,
 				.flags	= IORESOURCE_MEM,
 			},
@@ -581,6 +581,7 @@ intel_th_subdevice_alloc(struct intel_th *th,
 	struct intel_th_device *thdev;
 	struct resource res[3];
 	unsigned int req = 0;
+	bool is64bit = false;
 	int r, err;
 
 	thdev = intel_th_device_alloc(th, subdev->type, subdev->name,
@@ -590,12 +591,18 @@ intel_th_subdevice_alloc(struct intel_th *th,
 
 	thdev->drvdata = th->drvdata;
 
+	for (r = 0; r < th->num_resources; r++)
+		if (th->resource[r].flags & IORESOURCE_MEM_64) {
+			is64bit = true;
+			break;
+		}
+
 	memcpy(res, subdev->res,
 	       sizeof(struct resource) * subdev->nres);
 
 	for (r = 0; r < subdev->nres; r++) {
 		struct resource *devres = th->resource;
-		int bar = TH_MMIO_CONFIG;
+		int bar = 0; /* cut subdevices' MMIO from resource[0] */
 
 		/*
 		 * Take .end == 0 to mean 'take the whole bar',
@@ -604,6 +611,8 @@ intel_th_subdevice_alloc(struct intel_th *th,
 		 */
 		if (!res[r].end && res[r].flags == IORESOURCE_MEM) {
 			bar = res[r].start;
+			if (is64bit)
+				bar *= 2;
 			res[r].start = 0;
 			res[r].end = resource_size(&devres[bar]) - 1;
 		}

From 59d08d00d43c644ee2011d7ff1807bdd69f31fe0 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Tue, 18 Sep 2018 16:10:49 +0300
Subject: [PATCH 076/499] intel_th: pci: Add Ice Lake PCH support

This adds Intel(R) Trace Hub PCI ID for Ice Lake PCH.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/intel_th/pci.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c
index c2e55e5d97f6..1cf6290d6435 100644
--- a/drivers/hwtracing/intel_th/pci.c
+++ b/drivers/hwtracing/intel_th/pci.c
@@ -160,6 +160,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x18e1),
 		.driver_data = (kernel_ulong_t)&intel_th_2x,
 	},
+	{
+		/* Ice Lake PCH */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x34a6),
+		.driver_data = (kernel_ulong_t)&intel_th_2x,
+	},
 	{ 0 },
 };
 

From 235fe0851bcf1d6ffd6cef4c7eb0d701d0946053 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Wed, 12 Sep 2018 11:55:26 -0500
Subject: [PATCH 077/499] MAINTAINERS: Update PPC contacts for PCI core error
 handling

The original PCI error recovery functionality was for the powerpc-specific
IBM EEH feature.  PCIe subsequently added some similar features, including
AER and DPC, that can be used on any architecture.

We want the generic PCI core error handling support to work with all of
these features.  Driver error recovery callbacks should be independent of
which feature the platform provides.

Add the generic PCI core error recovery files to the powerpc EEH
MAINTAINERS entry so the powerpc folks will be copied on changes to the
generic PCI error handling strategy.

Add Sam and Oliver as maintainers for this area.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Russell Currey <ruscur@russell.cc>
Acked-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
 MAINTAINERS | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 4ece30f15777..f05d2e9ffc84 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11203,8 +11203,14 @@ F:	tools/pci/
 
 PCI ENHANCED ERROR HANDLING (EEH) FOR POWERPC
 M:	Russell Currey <ruscur@russell.cc>
+M:	Sam Bobroff <sbobroff@linux.ibm.com>
+M:	Oliver O'Halloran <oohall@gmail.com>
 L:	linuxppc-dev@lists.ozlabs.org
 S:	Supported
+F:	Documentation/PCI/pci-error-recovery.txt
+F:	drivers/pci/pcie/aer.c
+F:	drivers/pci/pcie/dpc.c
+F:	drivers/pci/pcie/err.c
 F:	Documentation/powerpc/eeh-pci-error-recovery.txt
 F:	arch/powerpc/kernel/eeh*.c
 F:	arch/powerpc/platforms/*/eeh*.c

From b8a946d8dc245ec3adae65c8eab6f3e73a950af3 Mon Sep 17 00:00:00 2001
From: Song Qiang <songqiang1304521@gmail.com>
Date: Mon, 17 Sep 2018 11:08:01 -0700
Subject: [PATCH 078/499] Input: gpio-keys - fix a documentation index issue

gpio_keys.c now exists in the drivers/input/keyboard/ rather than
drivers/input/.

Signed-off-by: Song Qiang <songqiang.1304521@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 Documentation/devicetree/bindings/input/gpio-keys.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/input/gpio-keys.txt b/Documentation/devicetree/bindings/input/gpio-keys.txt
index 996ce84352cb..7cccc49b6bea 100644
--- a/Documentation/devicetree/bindings/input/gpio-keys.txt
+++ b/Documentation/devicetree/bindings/input/gpio-keys.txt
@@ -1,4 +1,4 @@
-Device-Tree bindings for input/gpio_keys.c keyboard driver
+Device-Tree bindings for input/keyboard/gpio_keys.c keyboard driver
 
 Required properties:
 	- compatible = "gpio-keys";

From 49f62249a9577b0f8c20c7c843d23289d143daf1 Mon Sep 17 00:00:00 2001
From: Anson Huang <Anson.Huang@nxp.com>
Date: Mon, 17 Sep 2018 11:08:52 -0700
Subject: [PATCH 079/499] Input: egalax_ts - add system wakeup support

This patch adds wakeup function support for egalax touch
screen, if "wakeup-source" is added to device tree's egalax
touch screen node, the wakeup function will be enabled, and
egalax touch screen will be able to wakeup system from suspend.

Signed-off-by: Anson Huang <Anson.Huang@nxp.com>
Reviewed-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/egalax_ts.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/input/touchscreen/egalax_ts.c b/drivers/input/touchscreen/egalax_ts.c
index 80e69bb8283e..83ac8c128192 100644
--- a/drivers/input/touchscreen/egalax_ts.c
+++ b/drivers/input/touchscreen/egalax_ts.c
@@ -241,6 +241,9 @@ static int __maybe_unused egalax_ts_suspend(struct device *dev)
 	struct i2c_client *client = to_i2c_client(dev);
 	int ret;
 
+	if (device_may_wakeup(dev))
+		return enable_irq_wake(client->irq);
+
 	ret = i2c_master_send(client, suspend_cmd, MAX_I2C_DATA_LEN);
 	return ret > 0 ? 0 : ret;
 }
@@ -249,6 +252,9 @@ static int __maybe_unused egalax_ts_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 
+	if (device_may_wakeup(dev))
+		return disable_irq_wake(client->irq);
+
 	return egalax_wake_up_device(client);
 }
 

From 9e62df51be993035c577371ffee5477697a56aad Mon Sep 17 00:00:00 2001
From: Andreas Schwab <schwab@linux-m68k.org>
Date: Mon, 17 Sep 2018 12:43:34 -0700
Subject: [PATCH 080/499] Input: atakbd - fix Atari keymap

Fix errors in Atari keymap (mostly in keypad, help and undo keys).

Patch provided on debian-68k ML by Andreas Schwab <schwab@linux-m68k.org>,
keymap array size and unhandled scancode limit adjusted to 0x73 by me.

Tested-by: Michael Schmitz <schmitzmic@gmail.com>
Signed-off-by: Michael Schmitz <schmitzmic@gmail.com>
Signed-off-by: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/keyboard/atakbd.c | 64 ++++++++++++++-------------------
 1 file changed, 26 insertions(+), 38 deletions(-)

diff --git a/drivers/input/keyboard/atakbd.c b/drivers/input/keyboard/atakbd.c
index 6f62da2909ec..e989574a2e20 100644
--- a/drivers/input/keyboard/atakbd.c
+++ b/drivers/input/keyboard/atakbd.c
@@ -75,8 +75,7 @@ MODULE_LICENSE("GPL");
  */
 
 
-static unsigned char atakbd_keycode[0x72] = {	/* American layout */
-	[0]	 = KEY_GRAVE,
+static unsigned char atakbd_keycode[0x73] = {	/* American layout */
 	[1]	 = KEY_ESC,
 	[2]	 = KEY_1,
 	[3]	 = KEY_2,
@@ -117,9 +116,9 @@ static unsigned char atakbd_keycode[0x72] = {	/* American layout */
 	[38]	 = KEY_L,
 	[39]	 = KEY_SEMICOLON,
 	[40]	 = KEY_APOSTROPHE,
-	[41]	 = KEY_BACKSLASH,	/* FIXME, '#' */
+	[41]	 = KEY_GRAVE,
 	[42]	 = KEY_LEFTSHIFT,
-	[43]	 = KEY_GRAVE,		/* FIXME: '~' */
+	[43]	 = KEY_BACKSLASH,
 	[44]	 = KEY_Z,
 	[45]	 = KEY_X,
 	[46]	 = KEY_C,
@@ -145,45 +144,34 @@ static unsigned char atakbd_keycode[0x72] = {	/* American layout */
 	[66]	 = KEY_F8,
 	[67]	 = KEY_F9,
 	[68]	 = KEY_F10,
-	[69]	 = KEY_ESC,
-	[70]	 = KEY_DELETE,
-	[71]	 = KEY_KP7,
-	[72]	 = KEY_KP8,
-	[73]	 = KEY_KP9,
+	[71]	 = KEY_HOME,
+	[72]	 = KEY_UP,
 	[74]	 = KEY_KPMINUS,
-	[75]	 = KEY_KP4,
-	[76]	 = KEY_KP5,
-	[77]	 = KEY_KP6,
+	[75]	 = KEY_LEFT,
+	[77]	 = KEY_RIGHT,
 	[78]	 = KEY_KPPLUS,
-	[79]	 = KEY_KP1,
-	[80]	 = KEY_KP2,
-	[81]	 = KEY_KP3,
-	[82]	 = KEY_KP0,
-	[83]	 = KEY_KPDOT,
-	[90]	 = KEY_KPLEFTPAREN,
-	[91]	 = KEY_KPRIGHTPAREN,
-	[92]	 = KEY_KPASTERISK,	/* FIXME */
-	[93]	 = KEY_KPASTERISK,
-	[94]	 = KEY_KPPLUS,
-	[95]	 = KEY_HELP,
+	[80]	 = KEY_DOWN,
+	[82]	 = KEY_INSERT,
+	[83]	 = KEY_DELETE,
 	[96]	 = KEY_102ND,
-	[97]	 = KEY_KPASTERISK,	/* FIXME */
-	[98]	 = KEY_KPSLASH,
+	[97]	 = KEY_UNDO,
+	[98]	 = KEY_HELP,
 	[99]	 = KEY_KPLEFTPAREN,
 	[100]	 = KEY_KPRIGHTPAREN,
 	[101]	 = KEY_KPSLASH,
 	[102]	 = KEY_KPASTERISK,
-	[103]	 = KEY_UP,
-	[104]	 = KEY_KPASTERISK,	/* FIXME */
-	[105]	 = KEY_LEFT,
-	[106]	 = KEY_RIGHT,
-	[107]	 = KEY_KPASTERISK,	/* FIXME */
-	[108]	 = KEY_DOWN,
-	[109]	 = KEY_KPASTERISK,	/* FIXME */
-	[110]	 = KEY_KPASTERISK,	/* FIXME */
-	[111]	 = KEY_KPASTERISK,	/* FIXME */
-	[112]	 = KEY_KPASTERISK,	/* FIXME */
-	[113]	 = KEY_KPASTERISK	/* FIXME */
+	[103]	 = KEY_KP7,
+	[104]	 = KEY_KP8,
+	[105]	 = KEY_KP9,
+	[106]	 = KEY_KP4,
+	[107]	 = KEY_KP5,
+	[108]	 = KEY_KP6,
+	[109]	 = KEY_KP1,
+	[110]	 = KEY_KP2,
+	[111]	 = KEY_KP3,
+	[112]	 = KEY_KP0,
+	[113]	 = KEY_KPDOT,
+	[114]	 = KEY_KPENTER,
 };
 
 static struct input_dev *atakbd_dev;
@@ -191,7 +179,7 @@ static struct input_dev *atakbd_dev;
 static void atakbd_interrupt(unsigned char scancode, char down)
 {
 
-	if (scancode < 0x72) {		/* scancodes < 0xf2 are keys */
+	if (scancode < 0x73) {		/* scancodes < 0xf3 are keys */
 
 		// report raw events here?
 
@@ -205,7 +193,7 @@ static void atakbd_interrupt(unsigned char scancode, char down)
 			input_report_key(atakbd_dev, scancode, down);
 			input_sync(atakbd_dev);
 		}
-	} else				/* scancodes >= 0xf2 are mouse data, most likely */
+	} else				/* scancodes >= 0xf3 are mouse data, most likely */
 		printk(KERN_INFO "atakbd: unhandled scancode %x\n", scancode);
 
 	return;

From 52d2c7bf7c90217fbe875d2d76f310979c48eb83 Mon Sep 17 00:00:00 2001
From: Michael Schmitz <schmitzmic@gmail.com>
Date: Mon, 17 Sep 2018 15:27:49 -0700
Subject: [PATCH 081/499] Input: atakbd - fix Atari CapsLock behaviour

The CapsLock key on Atari keyboards is not a toggle, it does send the
normal make and break scancodes.

Drop the CapsLock toggle handling code, which did cause the CapsLock
key to merely act as a Shift key.

Tested-by: Michael Schmitz <schmitzmic@gmail.com>
Signed-off-by: Michael Schmitz <schmitzmic@gmail.com>
Signed-off-by: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/keyboard/atakbd.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/input/keyboard/atakbd.c b/drivers/input/keyboard/atakbd.c
index e989574a2e20..6caee807cafa 100644
--- a/drivers/input/keyboard/atakbd.c
+++ b/drivers/input/keyboard/atakbd.c
@@ -185,14 +185,8 @@ static void atakbd_interrupt(unsigned char scancode, char down)
 
 		scancode = atakbd_keycode[scancode];
 
-		if (scancode == KEY_CAPSLOCK) {	/* CapsLock is a toggle switch key on Amiga */
-			input_report_key(atakbd_dev, scancode, 1);
-			input_report_key(atakbd_dev, scancode, 0);
-			input_sync(atakbd_dev);
-		} else {
-			input_report_key(atakbd_dev, scancode, down);
-			input_sync(atakbd_dev);
-		}
+		input_report_key(atakbd_dev, scancode, down);
+		input_sync(atakbd_dev);
 	} else				/* scancodes >= 0xf3 are mouse data, most likely */
 		printk(KERN_INFO "atakbd: unhandled scancode %x\n", scancode);
 

From 91a97507323e1ad4bfc10f4a5922e67cdaf8b3cd Mon Sep 17 00:00:00 2001
From: Aaron Ma <aaron.ma@canonical.com>
Date: Tue, 18 Sep 2018 09:32:22 -0700
Subject: [PATCH 082/499] Input: elantech - enable middle button of touchpad on
 ThinkPad P72

Adding 2 new touchpad IDs to support middle button support.

Cc: stable@vger.kernel.org
Signed-off-by: Aaron Ma <aaron.ma@canonical.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/elantech.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index 44f57cf6675b..2d95e8d93cc7 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -1178,6 +1178,8 @@ static const struct dmi_system_id elantech_dmi_has_middle_button[] = {
 static const char * const middle_button_pnp_ids[] = {
 	"LEN2131", /* ThinkPad P52 w/ NFC */
 	"LEN2132", /* ThinkPad P52 */
+	"LEN2133", /* ThinkPad P72 w/ NFC */
+	"LEN2134", /* ThinkPad P72 */
 	NULL
 };
 

From 4fef1250eafddc5182cd5c3c354a6971bcf7520d Mon Sep 17 00:00:00 2001
From: Peter Hutterer <peter.hutterer@who-t.net>
Date: Tue, 18 Sep 2018 09:53:32 -0700
Subject: [PATCH 083/499] Input: uinput - allow for max == min during
 input_absinfo validation

These values are inclusive, so a range of 1 requires min == max.

Signed-off-by: Peter Hutterer <peter.hutterer@who-t.net>
Reviewed-by: Martin Kepplinger <martin.kepplinger@ginzinger.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/misc/uinput.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index 96a887f33698..eb14ddf69346 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -410,7 +410,7 @@ static int uinput_validate_absinfo(struct input_dev *dev, unsigned int code,
 	min = abs->minimum;
 	max = abs->maximum;
 
-	if ((min != 0 || max != 0) && max <= min) {
+	if ((min != 0 || max != 0) && max < min) {
 		printk(KERN_DEBUG
 		       "%s: invalid abs[%02x] min:%d max:%d\n",
 		       UINPUT_NAME, code, min, max);

From 19a4fbffc94e41abaa2a623a25ce2641d69eccf0 Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Date: Thu, 13 Sep 2018 15:37:04 +0200
Subject: [PATCH 084/499] gpiolib: Free the last requested descriptor

The current code only frees N-1 gpios if an error occurs during
gpiod_set_transitory, gpiod_direction_output or gpiod_direction_input.
Leading to gpios that cannot be used by userspace nor other drivers.

Cc: Timur Tabi <timur@codeaurora.org>
Cc: stable@vger.kernel.org
Fixes: ab3dbcf78f60f46d ("gpioib: do not free unrequested descriptors)
Reported-by: Jan Lorenzen <jl@newtec.dk>
Reported-by: Jim Paris <jim@jtan.com>
Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index e8f8a1999393..a57300c1d649 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -571,7 +571,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip)
 		if (ret)
 			goto out_free_descs;
 		lh->descs[i] = desc;
-		count = i;
+		count = i + 1;
 
 		if (lflags & GPIOHANDLE_REQUEST_ACTIVE_LOW)
 			set_bit(FLAG_ACTIVE_LOW, &desc->flags);

From 8fce3331702316d4bcfeb0771c09ac75d2192bbc Mon Sep 17 00:00:00 2001
From: Jose Abreu <Jose.Abreu@synopsys.com>
Date: Mon, 17 Sep 2018 09:22:56 +0100
Subject: [PATCH 085/499] net: stmmac: Rework coalesce timer and fix
 multi-queue races

This follows David Miller advice and tries to fix coalesce timer in
multi-queue scenarios.

We are now using per-queue coalesce values and per-queue TX timer.

Coalesce timer default values was changed to 1ms and the coalesce frames
to 25.

Tested in B2B setup between XGMAC2 and GMAC5.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Fixes: 	ce736788e8a ("net: stmmac: adding multiple buffers for TX")
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Neil Armstrong <narmstrong@baylibre.com>
Cc: Jerome Brunet <jbrunet@baylibre.com>
Cc: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Joao Pinto <jpinto@synopsys.com>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/common.h  |   4 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac.h  |  14 +-
 .../net/ethernet/stmicro/stmmac/stmmac_main.c | 233 ++++++++++--------
 include/linux/stmmac.h                        |   1 +
 4 files changed, 146 insertions(+), 106 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 1854f270ad66..b1b305f8f414 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -258,10 +258,10 @@ struct stmmac_safety_stats {
 #define MAX_DMA_RIWT		0xff
 #define MIN_DMA_RIWT		0x20
 /* Tx coalesce parameters */
-#define STMMAC_COAL_TX_TIMER	40000
+#define STMMAC_COAL_TX_TIMER	1000
 #define STMMAC_MAX_COAL_TX_TICK	100000
 #define STMMAC_TX_MAX_FRAMES	256
-#define STMMAC_TX_FRAMES	64
+#define STMMAC_TX_FRAMES	25
 
 /* Packets types */
 enum packets_types {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index c0a855b7ab3b..63e1064b27a2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -48,6 +48,8 @@ struct stmmac_tx_info {
 
 /* Frequently used values are kept adjacent for cache effect */
 struct stmmac_tx_queue {
+	u32 tx_count_frames;
+	struct timer_list txtimer;
 	u32 queue_index;
 	struct stmmac_priv *priv_data;
 	struct dma_extended_desc *dma_etx ____cacheline_aligned_in_smp;
@@ -73,7 +75,14 @@ struct stmmac_rx_queue {
 	u32 rx_zeroc_thresh;
 	dma_addr_t dma_rx_phy;
 	u32 rx_tail_addr;
+};
+
+struct stmmac_channel {
 	struct napi_struct napi ____cacheline_aligned_in_smp;
+	struct stmmac_priv *priv_data;
+	u32 index;
+	int has_rx;
+	int has_tx;
 };
 
 struct stmmac_tc_entry {
@@ -109,14 +118,12 @@ struct stmmac_pps_cfg {
 
 struct stmmac_priv {
 	/* Frequently used values are kept adjacent for cache effect */
-	u32 tx_count_frames;
 	u32 tx_coal_frames;
 	u32 tx_coal_timer;
 
 	int tx_coalesce;
 	int hwts_tx_en;
 	bool tx_path_in_lpi_mode;
-	struct timer_list txtimer;
 	bool tso;
 
 	unsigned int dma_buf_sz;
@@ -137,6 +144,9 @@ struct stmmac_priv {
 	/* TX Queue */
 	struct stmmac_tx_queue tx_queue[MTL_MAX_TX_QUEUES];
 
+	/* Generic channel for NAPI */
+	struct stmmac_channel channel[STMMAC_CH_MAX];
+
 	bool oldlink;
 	int speed;
 	int oldduplex;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 9f458bb16f2a..ab9cc0143ff2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -148,12 +148,14 @@ static void stmmac_verify_args(void)
 static void stmmac_disable_all_queues(struct stmmac_priv *priv)
 {
 	u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
+	u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
+	u32 maxq = max(rx_queues_cnt, tx_queues_cnt);
 	u32 queue;
 
-	for (queue = 0; queue < rx_queues_cnt; queue++) {
-		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	for (queue = 0; queue < maxq; queue++) {
+		struct stmmac_channel *ch = &priv->channel[queue];
 
-		napi_disable(&rx_q->napi);
+		napi_disable(&ch->napi);
 	}
 }
 
@@ -164,12 +166,14 @@ static void stmmac_disable_all_queues(struct stmmac_priv *priv)
 static void stmmac_enable_all_queues(struct stmmac_priv *priv)
 {
 	u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
+	u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
+	u32 maxq = max(rx_queues_cnt, tx_queues_cnt);
 	u32 queue;
 
-	for (queue = 0; queue < rx_queues_cnt; queue++) {
-		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	for (queue = 0; queue < maxq; queue++) {
+		struct stmmac_channel *ch = &priv->channel[queue];
 
-		napi_enable(&rx_q->napi);
+		napi_enable(&ch->napi);
 	}
 }
 
@@ -1843,18 +1847,18 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
  * @queue: TX queue index
  * Description: it reclaims the transmit resources after transmission completes.
  */
-static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
+static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 {
 	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
 	unsigned int bytes_compl = 0, pkts_compl = 0;
-	unsigned int entry;
+	unsigned int entry, count = 0;
 
-	netif_tx_lock(priv->dev);
+	__netif_tx_lock_bh(netdev_get_tx_queue(priv->dev, queue));
 
 	priv->xstats.tx_clean++;
 
 	entry = tx_q->dirty_tx;
-	while (entry != tx_q->cur_tx) {
+	while ((entry != tx_q->cur_tx) && (count < budget)) {
 		struct sk_buff *skb = tx_q->tx_skbuff[entry];
 		struct dma_desc *p;
 		int status;
@@ -1870,6 +1874,8 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
 		if (unlikely(status & tx_dma_own))
 			break;
 
+		count++;
+
 		/* Make sure descriptor fields are read after reading
 		 * the own bit.
 		 */
@@ -1937,7 +1943,10 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
 		stmmac_enable_eee_mode(priv);
 		mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer));
 	}
-	netif_tx_unlock(priv->dev);
+
+	__netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue));
+
+	return count;
 }
 
 /**
@@ -2020,6 +2029,33 @@ static bool stmmac_safety_feat_interrupt(struct stmmac_priv *priv)
 	return false;
 }
 
+static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan)
+{
+	int status = stmmac_dma_interrupt_status(priv, priv->ioaddr,
+						 &priv->xstats, chan);
+	struct stmmac_channel *ch = &priv->channel[chan];
+	bool needs_work = false;
+
+	if ((status & handle_rx) && ch->has_rx) {
+		needs_work = true;
+	} else {
+		status &= ~handle_rx;
+	}
+
+	if ((status & handle_tx) && ch->has_tx) {
+		needs_work = true;
+	} else {
+		status &= ~handle_tx;
+	}
+
+	if (needs_work && napi_schedule_prep(&ch->napi)) {
+		stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
+		__napi_schedule(&ch->napi);
+	}
+
+	return status;
+}
+
 /**
  * stmmac_dma_interrupt - DMA ISR
  * @priv: driver private structure
@@ -2034,57 +2070,14 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
 	u32 channels_to_check = tx_channel_count > rx_channel_count ?
 				tx_channel_count : rx_channel_count;
 	u32 chan;
-	bool poll_scheduled = false;
 	int status[max_t(u32, MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES)];
 
 	/* Make sure we never check beyond our status buffer. */
 	if (WARN_ON_ONCE(channels_to_check > ARRAY_SIZE(status)))
 		channels_to_check = ARRAY_SIZE(status);
 
-	/* Each DMA channel can be used for rx and tx simultaneously, yet
-	 * napi_struct is embedded in struct stmmac_rx_queue rather than in a
-	 * stmmac_channel struct.
-	 * Because of this, stmmac_poll currently checks (and possibly wakes)
-	 * all tx queues rather than just a single tx queue.
-	 */
 	for (chan = 0; chan < channels_to_check; chan++)
-		status[chan] = stmmac_dma_interrupt_status(priv, priv->ioaddr,
-				&priv->xstats, chan);
-
-	for (chan = 0; chan < rx_channel_count; chan++) {
-		if (likely(status[chan] & handle_rx)) {
-			struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
-
-			if (likely(napi_schedule_prep(&rx_q->napi))) {
-				stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
-				__napi_schedule(&rx_q->napi);
-				poll_scheduled = true;
-			}
-		}
-	}
-
-	/* If we scheduled poll, we already know that tx queues will be checked.
-	 * If we didn't schedule poll, see if any DMA channel (used by tx) has a
-	 * completed transmission, if so, call stmmac_poll (once).
-	 */
-	if (!poll_scheduled) {
-		for (chan = 0; chan < tx_channel_count; chan++) {
-			if (status[chan] & handle_tx) {
-				/* It doesn't matter what rx queue we choose
-				 * here. We use 0 since it always exists.
-				 */
-				struct stmmac_rx_queue *rx_q =
-					&priv->rx_queue[0];
-
-				if (likely(napi_schedule_prep(&rx_q->napi))) {
-					stmmac_disable_dma_irq(priv,
-							priv->ioaddr, chan);
-					__napi_schedule(&rx_q->napi);
-				}
-				break;
-			}
-		}
-	}
+		status[chan] = stmmac_napi_check(priv, chan);
 
 	for (chan = 0; chan < tx_channel_count; chan++) {
 		if (unlikely(status[chan] & tx_hard_error_bump_tc)) {
@@ -2233,6 +2226,13 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 	return ret;
 }
 
+static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue)
+{
+	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+	mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(priv->tx_coal_timer));
+}
+
 /**
  * stmmac_tx_timer - mitigation sw timer for tx.
  * @data: data pointer
@@ -2241,13 +2241,14 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
  */
 static void stmmac_tx_timer(struct timer_list *t)
 {
-	struct stmmac_priv *priv = from_timer(priv, t, txtimer);
-	u32 tx_queues_count = priv->plat->tx_queues_to_use;
-	u32 queue;
+	struct stmmac_tx_queue *tx_q = from_timer(tx_q, t, txtimer);
+	struct stmmac_priv *priv = tx_q->priv_data;
+	struct stmmac_channel *ch;
 
-	/* let's scan all the tx queues */
-	for (queue = 0; queue < tx_queues_count; queue++)
-		stmmac_tx_clean(priv, queue);
+	ch = &priv->channel[tx_q->queue_index];
+
+	if (likely(napi_schedule_prep(&ch->napi)))
+		__napi_schedule(&ch->napi);
 }
 
 /**
@@ -2260,11 +2261,17 @@ static void stmmac_tx_timer(struct timer_list *t)
  */
 static void stmmac_init_tx_coalesce(struct stmmac_priv *priv)
 {
+	u32 tx_channel_count = priv->plat->tx_queues_to_use;
+	u32 chan;
+
 	priv->tx_coal_frames = STMMAC_TX_FRAMES;
 	priv->tx_coal_timer = STMMAC_COAL_TX_TIMER;
-	timer_setup(&priv->txtimer, stmmac_tx_timer, 0);
-	priv->txtimer.expires = STMMAC_COAL_TIMER(priv->tx_coal_timer);
-	add_timer(&priv->txtimer);
+
+	for (chan = 0; chan < tx_channel_count; chan++) {
+		struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
+
+		timer_setup(&tx_q->txtimer, stmmac_tx_timer, 0);
+	}
 }
 
 static void stmmac_set_rings_length(struct stmmac_priv *priv)
@@ -2592,6 +2599,7 @@ static void stmmac_hw_teardown(struct net_device *dev)
 static int stmmac_open(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
+	u32 chan;
 	int ret;
 
 	stmmac_check_ether_addr(priv);
@@ -2688,7 +2696,9 @@ static int stmmac_open(struct net_device *dev)
 	if (dev->phydev)
 		phy_stop(dev->phydev);
 
-	del_timer_sync(&priv->txtimer);
+	for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++)
+		del_timer_sync(&priv->tx_queue[chan].txtimer);
+
 	stmmac_hw_teardown(dev);
 init_error:
 	free_dma_desc_resources(priv);
@@ -2708,6 +2718,7 @@ static int stmmac_open(struct net_device *dev)
 static int stmmac_release(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
+	u32 chan;
 
 	if (priv->eee_enabled)
 		del_timer_sync(&priv->eee_ctrl_timer);
@@ -2722,7 +2733,8 @@ static int stmmac_release(struct net_device *dev)
 
 	stmmac_disable_all_queues(priv);
 
-	del_timer_sync(&priv->txtimer);
+	for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++)
+		del_timer_sync(&priv->tx_queue[chan].txtimer);
 
 	/* Free the IRQ lines */
 	free_irq(dev->irq, dev);
@@ -2936,14 +2948,13 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	priv->xstats.tx_tso_nfrags += nfrags;
 
 	/* Manage tx mitigation */
-	priv->tx_count_frames += nfrags + 1;
-	if (likely(priv->tx_coal_frames > priv->tx_count_frames)) {
-		mod_timer(&priv->txtimer,
-			  STMMAC_COAL_TIMER(priv->tx_coal_timer));
-	} else {
-		priv->tx_count_frames = 0;
+	tx_q->tx_count_frames += nfrags + 1;
+	if (priv->tx_coal_frames <= tx_q->tx_count_frames) {
 		stmmac_set_tx_ic(priv, desc);
 		priv->xstats.tx_set_ic_bit++;
+		tx_q->tx_count_frames = 0;
+	} else {
+		stmmac_tx_timer_arm(priv, queue);
 	}
 
 	skb_tx_timestamp(skb);
@@ -3146,14 +3157,13 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * This approach takes care about the fragments: desc is the first
 	 * element in case of no SG.
 	 */
-	priv->tx_count_frames += nfrags + 1;
-	if (likely(priv->tx_coal_frames > priv->tx_count_frames)) {
-		mod_timer(&priv->txtimer,
-			  STMMAC_COAL_TIMER(priv->tx_coal_timer));
-	} else {
-		priv->tx_count_frames = 0;
+	tx_q->tx_count_frames += nfrags + 1;
+	if (priv->tx_coal_frames <= tx_q->tx_count_frames) {
 		stmmac_set_tx_ic(priv, desc);
 		priv->xstats.tx_set_ic_bit++;
+		tx_q->tx_count_frames = 0;
+	} else {
+		stmmac_tx_timer_arm(priv, queue);
 	}
 
 	skb_tx_timestamp(skb);
@@ -3199,6 +3209,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
 
 	stmmac_enable_dma_transmission(priv, priv->ioaddr);
+
 	stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
 
 	return NETDEV_TX_OK;
@@ -3319,6 +3330,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
 static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 {
 	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	struct stmmac_channel *ch = &priv->channel[queue];
 	unsigned int entry = rx_q->cur_rx;
 	int coe = priv->hw->rx_csum;
 	unsigned int next_entry;
@@ -3491,7 +3503,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 			else
 				skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-			napi_gro_receive(&rx_q->napi, skb);
+			napi_gro_receive(&ch->napi, skb);
 
 			priv->dev->stats.rx_packets++;
 			priv->dev->stats.rx_bytes += frame_len;
@@ -3514,27 +3526,33 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
  *  Description :
  *  To look at the incoming frames and clear the tx resources.
  */
-static int stmmac_poll(struct napi_struct *napi, int budget)
+static int stmmac_napi_poll(struct napi_struct *napi, int budget)
 {
-	struct stmmac_rx_queue *rx_q =
-		container_of(napi, struct stmmac_rx_queue, napi);
-	struct stmmac_priv *priv = rx_q->priv_data;
-	u32 tx_count = priv->plat->tx_queues_to_use;
-	u32 chan = rx_q->queue_index;
-	int work_done = 0;
-	u32 queue;
+	struct stmmac_channel *ch =
+		container_of(napi, struct stmmac_channel, napi);
+	struct stmmac_priv *priv = ch->priv_data;
+	int work_done = 0, work_rem = budget;
+	u32 chan = ch->index;
 
 	priv->xstats.napi_poll++;
 
-	/* check all the queues */
-	for (queue = 0; queue < tx_count; queue++)
-		stmmac_tx_clean(priv, queue);
+	if (ch->has_tx) {
+		int done = stmmac_tx_clean(priv, work_rem, chan);
 
-	work_done = stmmac_rx(priv, budget, rx_q->queue_index);
-	if (work_done < budget) {
-		napi_complete_done(napi, work_done);
-		stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
+		work_done += done;
+		work_rem -= done;
 	}
+
+	if (ch->has_rx) {
+		int done = stmmac_rx(priv, work_rem, chan);
+
+		work_done += done;
+		work_rem -= done;
+	}
+
+	if (work_done < budget && napi_complete_done(napi, work_done))
+		stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
+
 	return work_done;
 }
 
@@ -4198,8 +4216,8 @@ int stmmac_dvr_probe(struct device *device,
 {
 	struct net_device *ndev = NULL;
 	struct stmmac_priv *priv;
+	u32 queue, maxq;
 	int ret = 0;
-	u32 queue;
 
 	ndev = alloc_etherdev_mqs(sizeof(struct stmmac_priv),
 				  MTL_MAX_TX_QUEUES,
@@ -4322,11 +4340,22 @@ int stmmac_dvr_probe(struct device *device,
 			 "Enable RX Mitigation via HW Watchdog Timer\n");
 	}
 
-	for (queue = 0; queue < priv->plat->rx_queues_to_use; queue++) {
-		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	/* Setup channels NAPI */
+	maxq = max(priv->plat->rx_queues_to_use, priv->plat->tx_queues_to_use);
 
-		netif_napi_add(ndev, &rx_q->napi, stmmac_poll,
-			       (8 * priv->plat->rx_queues_to_use));
+	for (queue = 0; queue < maxq; queue++) {
+		struct stmmac_channel *ch = &priv->channel[queue];
+
+		ch->priv_data = priv;
+		ch->index = queue;
+
+		if (queue < priv->plat->rx_queues_to_use)
+			ch->has_rx = true;
+		if (queue < priv->plat->tx_queues_to_use)
+			ch->has_tx = true;
+
+		netif_napi_add(ndev, &ch->napi, stmmac_napi_poll,
+			       NAPI_POLL_WEIGHT);
 	}
 
 	mutex_init(&priv->lock);
@@ -4372,10 +4401,10 @@ int stmmac_dvr_probe(struct device *device,
 	    priv->hw->pcs != STMMAC_PCS_RTBI)
 		stmmac_mdio_unregister(ndev);
 error_mdio_register:
-	for (queue = 0; queue < priv->plat->rx_queues_to_use; queue++) {
-		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	for (queue = 0; queue < maxq; queue++) {
+		struct stmmac_channel *ch = &priv->channel[queue];
 
-		netif_napi_del(&rx_q->napi);
+		netif_napi_del(&ch->napi);
 	}
 error_hw_init:
 	destroy_workqueue(priv->wq);
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index c43e9a01b892..7ddfc65586b0 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -30,6 +30,7 @@
 
 #define MTL_MAX_RX_QUEUES	8
 #define MTL_MAX_TX_QUEUES	8
+#define STMMAC_CH_MAX		8
 
 #define STMMAC_RX_COE_NONE	0
 #define STMMAC_RX_COE_TYPE1	1

From 0431100b3d82c509729ece1ab22ada2484e209c1 Mon Sep 17 00:00:00 2001
From: Jose Abreu <Jose.Abreu@synopsys.com>
Date: Mon, 17 Sep 2018 09:22:57 +0100
Subject: [PATCH 086/499] net: stmmac: Fixup the tail addr setting in xmit path

Currently we are always setting the tail address of descriptor list to
the end of the pre-allocated list.

According to databook this is not correct. Tail address should point to
the last available descriptor + 1, which means we have to update the
tail address everytime we call the xmit function.

This should make no impact in older versions of MAC but in newer
versions there are some DMA features which allows the IP to fetch
descriptors in advance and in a non sequential order so its critical
that we set the tail address correctly.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Fixes: f748be531d70 ("stmmac: support new GMAC4")
Cc: David S. Miller <davem@davemloft.net>
Cc: Joao Pinto <jpinto@synopsys.com>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index ab9cc0143ff2..75896d6ba6e2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2213,8 +2213,7 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 		stmmac_init_tx_chan(priv, priv->ioaddr, priv->plat->dma_cfg,
 				    tx_q->dma_tx_phy, chan);
 
-		tx_q->tx_tail_addr = tx_q->dma_tx_phy +
-			    (DMA_TX_SIZE * sizeof(struct dma_desc));
+		tx_q->tx_tail_addr = tx_q->dma_tx_phy;
 		stmmac_set_tx_tail_ptr(priv, priv->ioaddr,
 				       tx_q->tx_tail_addr, chan);
 	}
@@ -3003,6 +3002,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
 
+	tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * sizeof(*desc));
 	stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
 
 	return NETDEV_TX_OK;
@@ -3210,6 +3210,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	stmmac_enable_dma_transmission(priv, priv->ioaddr);
 
+	tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * sizeof(*desc));
 	stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
 
 	return NETDEV_TX_OK;

From 0a286afee5a1e8dca86d824209dbd3200294f86f Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Mon, 17 Sep 2018 15:30:06 +0200
Subject: [PATCH 087/499] selftests: pmtu: properly redirect stderr to
 /dev/null

The cleanup function uses "$CMD 2 > /dev/null", which doesn't actually
send stderr to /dev/null, so when the netns doesn't exist, the error
message is shown. Use "2> /dev/null" instead, so that those messages
disappear, as was intended.

Fixes: d1f1b9cbf34c ("selftests: net: Introduce first PMTU test")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 32a194e3e07a..0ab9423d009f 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -178,8 +178,8 @@ setup() {
 
 cleanup() {
 	[ ${cleanup_done} -eq 1 ] && return
-	ip netns del ${NS_A} 2 > /dev/null
-	ip netns del ${NS_B} 2 > /dev/null
+	ip netns del ${NS_A} 2> /dev/null
+	ip netns del ${NS_B} 2> /dev/null
 	cleanup_done=1
 }
 

From 674d9de02aa7d521ebdf66c3958758bdd9c64e11 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Mon, 17 Sep 2018 15:51:40 +0200
Subject: [PATCH 088/499] NFC: Fix possible memory corruption when handling
 SHDLC I-Frame commands

When handling SHDLC I-Frame commands "pipe" field used for indexing
into an array should be checked before usage. If left unchecked it
might access memory outside of the array of size NFC_HCI_MAX_PIPES(127).

Malformed NFC HCI frames could be injected by a malicious NFC device
communicating with the device being attacked (remote attack vector),
or even by an attacker with physical access to the I2C bus such that
they could influence the data transfers on that bus (local attack vector).
skb->data is controlled by the attacker and has only been sanitized in
the most trivial ways (CRC check), therefore we can consider the
create_info struct and all of its members to tainted. 'create_info->pipe'
with max value of 255 (uint8) is used to take an offset of the
hdev->pipes array of 127 elements which can lead to OOB write.

Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Allen Pais <allen.pais@oracle.com>
Cc: "David S. Miller" <davem@davemloft.net>
Suggested-by: Kevin Deus <kdeus@google.com>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/nfc/hci/core.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index ac8030c4bcf8..19cb2e473ea6 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -209,6 +209,11 @@ void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
 		}
 		create_info = (struct hci_create_pipe_resp *)skb->data;
 
+		if (create_info->pipe >= NFC_HCI_MAX_PIPES) {
+			status = NFC_HCI_ANY_E_NOK;
+			goto exit;
+		}
+
 		/* Save the new created pipe and bind with local gate,
 		 * the description for skb->data[3] is destination gate id
 		 * but since we received this cmd from host controller, we
@@ -232,6 +237,11 @@ void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
 		}
 		delete_info = (struct hci_delete_pipe_noti *)skb->data;
 
+		if (delete_info->pipe >= NFC_HCI_MAX_PIPES) {
+			status = NFC_HCI_ANY_E_NOK;
+			goto exit;
+		}
+
 		hdev->pipes[delete_info->pipe].gate = NFC_HCI_INVALID_GATE;
 		hdev->pipes[delete_info->pipe].dest_host = NFC_HCI_INVALID_HOST;
 		break;

From e285d5bfb7e9785d289663baef252dd315e171f8 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Mon, 17 Sep 2018 15:51:41 +0200
Subject: [PATCH 089/499] NFC: Fix the number of pipes

According to ETSI TS 102 622 specification chapter 4.4 pipe identifier
is 7 bits long which allows for 128 unique pipe IDs. Because
NFC_HCI_MAX_PIPES is used as the number of pipes supported and not
as the max pipe ID, its value should be 128 instead of 127.

nfc_hci_recv_from_llc extracts pipe ID from packet header using
NFC_HCI_FRAGMENT(0x7F) mask which allows for pipe ID value of 127.
Same happens when NCI_HCP_MSG_GET_PIPE() is being used. With
pipes array having only 127 elements and pipe ID of 127 the OOB memory
access will result.

Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Allen Pais <allen.pais@oracle.com>
Cc: "David S. Miller" <davem@davemloft.net>
Suggested-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/nfc/hci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
index 316694dafa5b..008f466d1da7 100644
--- a/include/net/nfc/hci.h
+++ b/include/net/nfc/hci.h
@@ -87,7 +87,7 @@ struct nfc_hci_pipe {
  * According to specification 102 622 chapter 4.4 Pipes,
  * the pipe identifier is 7 bits long.
  */
-#define NFC_HCI_MAX_PIPES		127
+#define NFC_HCI_MAX_PIPES		128
 struct nfc_hci_init_data {
 	u8 gate_count;
 	struct nfc_hci_gate gates[NFC_HCI_MAX_CUSTOM_GATES];

From 08e39982ef64f800fd1f9b9b92968d14d5fafa82 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@gmail.com>
Date: Mon, 17 Sep 2018 17:22:40 +0200
Subject: [PATCH 090/499] net: emac: fix fixed-link setup for the RTL8363SB
 switch

On the Netgear WNDAP620, the emac ethernet isn't receiving nor
xmitting any frames from/to the RTL8363SB (identifies itself
as a RTL8367RB).

This is caused by the emac hardware not knowing the forced link
parameters for speed, duplex, pause, etc.

This begs the question, how this was working on the original
driver code, when it was necessary to set the phy_address and
phy_map to 0xffffffff. But I guess without access to the old
PPC405/440/460 hardware, it's not possible to know.

Signed-off-by: Christian Lamparter <chunkeey@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/emac/core.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 372664686309..129f4e9f38da 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -2677,12 +2677,17 @@ static int emac_init_phy(struct emac_instance *dev)
 		if (of_phy_is_fixed_link(np)) {
 			int res = emac_dt_mdio_probe(dev);
 
-			if (!res) {
-				res = of_phy_register_fixed_link(np);
-				if (res)
-					mdiobus_unregister(dev->mii_bus);
+			if (res)
+				return res;
+
+			res = of_phy_register_fixed_link(np);
+			dev->phy_dev = of_phy_find_device(np);
+			if (res || !dev->phy_dev) {
+				mdiobus_unregister(dev->mii_bus);
+				return res ? res : -EINVAL;
 			}
-			return res;
+			emac_adjust_link(dev->ndev);
+			put_device(&dev->phy_dev->mdio.dev);
 		}
 		return 0;
 	}

From 65fac4fe9080714df80d430888834ce87c6716ba Mon Sep 17 00:00:00 2001
From: zhong jiang <zhongjiang@huawei.com>
Date: Tue, 18 Sep 2018 15:15:44 +0800
Subject: [PATCH 091/499] net: bnxt: Fix a uninitialized variable warning.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following compile warning:

drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c:49:5: warning: nvm_param.dir_type may be used uninitialized in this function [-Wmaybe-uninitialized]
  if (nvm_param.dir_type == BNXT_NVM_PORT_CFG)

Signed-off-by: zhong jiang <zhongjiang@huawei.com>
Acked-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index f3b9fbcc705b..790c684f08ab 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -46,6 +46,9 @@ static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg,
 		}
 	}
 
+	if (i == ARRAY_SIZE(nvm_params))
+		return -EOPNOTSUPP;
+
 	if (nvm_param.dir_type == BNXT_NVM_PORT_CFG)
 		idx = bp->pf.port_id;
 	else if (nvm_param.dir_type == BNXT_NVM_FUNC_CFG)

From 2fe397a3959de8a472f165e6d152f64cb77fa2cc Mon Sep 17 00:00:00 2001
From: Kazuya Mizuguchi <kazuya.mizuguchi.ks@renesas.com>
Date: Tue, 18 Sep 2018 12:22:26 +0200
Subject: [PATCH 092/499] ravb: do not write 1 to reserved bits

EtherAVB hardware requires 0 to be written to status register bits in
order to clear them, however, care must be taken not to:

1. Clear other bits, by writing zero to them
2. Write one to reserved bits

This patch corrects the ravb driver with respect to the second point above.
This is done by defining reserved bit masks for the affected registers and,
after auditing the code, ensure all sites that may write a one to a
reserved bit use are suitably masked.

Signed-off-by: Kazuya Mizuguchi <kazuya.mizuguchi.ks@renesas.com>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
Reviewed-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/ravb.h      |  5 +++++
 drivers/net/ethernet/renesas/ravb_main.c | 11 ++++++-----
 drivers/net/ethernet/renesas/ravb_ptp.c  |  2 +-
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
index 1470fc12282b..9b6bf557a2f5 100644
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h
@@ -428,6 +428,7 @@ enum EIS_BIT {
 	EIS_CULF1	= 0x00000080,
 	EIS_TFFF	= 0x00000100,
 	EIS_QFS		= 0x00010000,
+	EIS_RESERVED	= (GENMASK(31, 17) | GENMASK(15, 11)),
 };
 
 /* RIC0 */
@@ -472,6 +473,7 @@ enum RIS0_BIT {
 	RIS0_FRF15	= 0x00008000,
 	RIS0_FRF16	= 0x00010000,
 	RIS0_FRF17	= 0x00020000,
+	RIS0_RESERVED	= GENMASK(31, 18),
 };
 
 /* RIC1 */
@@ -528,6 +530,7 @@ enum RIS2_BIT {
 	RIS2_QFF16	= 0x00010000,
 	RIS2_QFF17	= 0x00020000,
 	RIS2_RFFF	= 0x80000000,
+	RIS2_RESERVED	= GENMASK(30, 18),
 };
 
 /* TIC */
@@ -544,6 +547,7 @@ enum TIS_BIT {
 	TIS_FTF1	= 0x00000002,	/* Undocumented? */
 	TIS_TFUF	= 0x00000100,
 	TIS_TFWF	= 0x00000200,
+	TIS_RESERVED	= (GENMASK(31, 20) | GENMASK(15, 12) | GENMASK(7, 4))
 };
 
 /* ISS */
@@ -617,6 +621,7 @@ enum GIC_BIT {
 enum GIS_BIT {
 	GIS_PTCF	= 0x00000001,	/* Undocumented? */
 	GIS_PTMF	= 0x00000004,
+	GIS_RESERVED	= GENMASK(15, 10),
 };
 
 /* GIE (R-Car Gen3 only) */
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index aff5516b781e..d6f753925352 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -739,10 +739,11 @@ static void ravb_error_interrupt(struct net_device *ndev)
 	u32 eis, ris2;
 
 	eis = ravb_read(ndev, EIS);
-	ravb_write(ndev, ~EIS_QFS, EIS);
+	ravb_write(ndev, ~(EIS_QFS | EIS_RESERVED), EIS);
 	if (eis & EIS_QFS) {
 		ris2 = ravb_read(ndev, RIS2);
-		ravb_write(ndev, ~(RIS2_QFF0 | RIS2_RFFF), RIS2);
+		ravb_write(ndev, ~(RIS2_QFF0 | RIS2_RFFF | RIS2_RESERVED),
+			   RIS2);
 
 		/* Receive Descriptor Empty int */
 		if (ris2 & RIS2_QFF0)
@@ -795,7 +796,7 @@ static bool ravb_timestamp_interrupt(struct net_device *ndev)
 	u32 tis = ravb_read(ndev, TIS);
 
 	if (tis & TIS_TFUF) {
-		ravb_write(ndev, ~TIS_TFUF, TIS);
+		ravb_write(ndev, ~(TIS_TFUF | TIS_RESERVED), TIS);
 		ravb_get_tx_tstamp(ndev);
 		return true;
 	}
@@ -930,7 +931,7 @@ static int ravb_poll(struct napi_struct *napi, int budget)
 		/* Processing RX Descriptor Ring */
 		if (ris0 & mask) {
 			/* Clear RX interrupt */
-			ravb_write(ndev, ~mask, RIS0);
+			ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0);
 			if (ravb_rx(ndev, &quota, q))
 				goto out;
 		}
@@ -938,7 +939,7 @@ static int ravb_poll(struct napi_struct *napi, int budget)
 		if (tis & mask) {
 			spin_lock_irqsave(&priv->lock, flags);
 			/* Clear TX interrupt */
-			ravb_write(ndev, ~mask, TIS);
+			ravb_write(ndev, ~(mask | TIS_RESERVED), TIS);
 			ravb_tx_free(ndev, q, true);
 			netif_wake_subqueue(ndev, q);
 			mmiowb();
diff --git a/drivers/net/ethernet/renesas/ravb_ptp.c b/drivers/net/ethernet/renesas/ravb_ptp.c
index 0721b5c35d91..dce2a40a31e3 100644
--- a/drivers/net/ethernet/renesas/ravb_ptp.c
+++ b/drivers/net/ethernet/renesas/ravb_ptp.c
@@ -315,7 +315,7 @@ void ravb_ptp_interrupt(struct net_device *ndev)
 		}
 	}
 
-	ravb_write(ndev, ~gis, GIS);
+	ravb_write(ndev, ~(gis | GIS_RESERVED), GIS);
 }
 
 void ravb_ptp_init(struct net_device *ndev, struct platform_device *pdev)

From 648a5a7aed346c3b8fe7c32a835edfb0dfbf4451 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.ibm.com>
Date: Tue, 18 Sep 2018 15:46:34 +0200
Subject: [PATCH 093/499] net/smc: fix non-blocking connect problem

In state SMC_INIT smc_poll() delegates polling to the internal
CLC socket. This means, once the connect worker has finished
its kernel_connect() step, the poll wake-up may occur. This is not
intended. The wake-up should occur from the wake up call in
smc_connect_work() after __smc_connect() has finished.
Thus in state SMC_INIT this patch now calls sock_poll_wait() on the
main SMC socket.

Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 2d8a1e15e4f9..9c3976bcde46 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -742,7 +742,10 @@ static void smc_connect_work(struct work_struct *work)
 		smc->sk.sk_err = -rc;
 
 out:
-	smc->sk.sk_state_change(&smc->sk);
+	if (smc->sk.sk_err)
+		smc->sk.sk_state_change(&smc->sk);
+	else
+		smc->sk.sk_write_space(&smc->sk);
 	kfree(smc->connect_info);
 	smc->connect_info = NULL;
 	release_sock(&smc->sk);
@@ -1529,7 +1532,7 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
 		return EPOLLNVAL;
 
 	smc = smc_sk(sock->sk);
-	if ((sk->sk_state == SMC_INIT) || smc->use_fallback) {
+	if (smc->use_fallback) {
 		/* delegate to CLC child sock */
 		mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
 		sk->sk_err = smc->clcsock->sk->sk_err;

From 1ca52fcfaca43665d525645348801a6f4a4b9e9a Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.ibm.com>
Date: Tue, 18 Sep 2018 15:46:35 +0200
Subject: [PATCH 094/499] net/smc: remove duplicate mutex_unlock

For a failing smc_listen_rdma_finish() smc_listen_decline() is
called. If fallback is possible, the new socket is already enqueued
to be accepted in smc_listen_decline(). Avoid enqueuing a second time
afterwards in this case, otherwise the smc_create_lgr_pending lock
is released twice:
[  373.463976] WARNING: bad unlock balance detected!
[  373.463978] 4.18.0-rc7+ #123 Tainted: G           O
[  373.463979] -------------------------------------
[  373.463980] kworker/1:1/30 is trying to release lock (smc_create_lgr_pending) at:
[  373.463990] [<000003ff801205fc>] smc_listen_work+0x22c/0x5d0 [smc]
[  373.463991] but there are no more locks to release!
[  373.463991]
other info that might help us debug this:
[  373.463993] 2 locks held by kworker/1:1/30:
[  373.463994]  #0: 00000000772cbaed ((wq_completion)"events"){+.+.}, at: process_one_work+0x1ec/0x6b0
[  373.464000]  #1: 000000003ad0894a ((work_completion)(&new_smc->smc_listen_work)){+.+.}, at: process_one_work+0x1ec/0x6b0
[  373.464003]
stack backtrace:
[  373.464005] CPU: 1 PID: 30 Comm: kworker/1:1 Kdump: loaded Tainted: G           O      4.18.0-rc7uschi+ #123
[  373.464007] Hardware name: IBM 2827 H43 738 (LPAR)
[  373.464010] Workqueue: events smc_listen_work [smc]
[  373.464011] Call Trace:
[  373.464015] ([<0000000000114100>] show_stack+0x60/0xd8)
[  373.464019]  [<0000000000a8c9bc>] dump_stack+0x9c/0xd8
[  373.464021]  [<00000000001dcaf8>] print_unlock_imbalance_bug+0xf8/0x108
[  373.464022]  [<00000000001e045c>] lock_release+0x114/0x4f8
[  373.464025]  [<0000000000aa87fa>] __mutex_unlock_slowpath+0x4a/0x300
[  373.464027]  [<000003ff801205fc>] smc_listen_work+0x22c/0x5d0 [smc]
[  373.464029]  [<0000000000197a68>] process_one_work+0x2a8/0x6b0
[  373.464030]  [<0000000000197ec2>] worker_thread+0x52/0x410
[  373.464033]  [<000000000019fd0e>] kthread+0x15e/0x178
[  373.464035]  [<0000000000aaf58a>] kernel_thread_starter+0x6/0xc
[  373.464052]  [<0000000000aaf584>] kernel_thread_starter+0x0/0xc
[  373.464054] INFO: lockdep is turned off.

Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 9c3976bcde46..5c6d30eb4a71 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1153,9 +1153,9 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
 }
 
 /* listen worker: finish RDMA setup */
-static void smc_listen_rdma_finish(struct smc_sock *new_smc,
-				   struct smc_clc_msg_accept_confirm *cclc,
-				   int local_contact)
+static int smc_listen_rdma_finish(struct smc_sock *new_smc,
+				  struct smc_clc_msg_accept_confirm *cclc,
+				  int local_contact)
 {
 	struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
 	int reason_code = 0;
@@ -1178,11 +1178,12 @@ static void smc_listen_rdma_finish(struct smc_sock *new_smc,
 		if (reason_code)
 			goto decline;
 	}
-	return;
+	return 0;
 
 decline:
 	mutex_unlock(&smc_create_lgr_pending);
 	smc_listen_decline(new_smc, reason_code, local_contact);
+	return reason_code;
 }
 
 /* setup for RDMA connection of server */
@@ -1279,8 +1280,10 @@ static void smc_listen_work(struct work_struct *work)
 	}
 
 	/* finish worker */
-	if (!ism_supported)
-		smc_listen_rdma_finish(new_smc, &cclc, local_contact);
+	if (!ism_supported) {
+		if (smc_listen_rdma_finish(new_smc, &cclc, local_contact))
+			return;
+	}
 	smc_conn_save_peer_info(new_smc, &cclc);
 	mutex_unlock(&smc_create_lgr_pending);
 	smc_listen_out_connected(new_smc);

From dd65d87a6abd537ae9bb3bdcee8905704b936884 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.ibm.com>
Date: Tue, 18 Sep 2018 15:46:36 +0200
Subject: [PATCH 095/499] net/smc: enable fallback for connection abort in
 state INIT

If a linkgroup is terminated abnormally already due to failing
LLC CONFIRM LINK or LLC ADD LINK, fallback to TCP is still possible.
In this case do not switch to state SMC_PEERABORTWAIT and do not set
sk_err.

Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_close.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index ac961dfb1ea1..ea2b87f29469 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -100,15 +100,14 @@ static void smc_close_active_abort(struct smc_sock *smc)
 	struct smc_cdc_conn_state_flags *txflags =
 		&smc->conn.local_tx_ctrl.conn_state_flags;
 
-	sk->sk_err = ECONNABORTED;
-	if (smc->clcsock && smc->clcsock->sk) {
-		smc->clcsock->sk->sk_err = ECONNABORTED;
-		smc->clcsock->sk->sk_state_change(smc->clcsock->sk);
+	if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) {
+		sk->sk_err = ECONNABORTED;
+		if (smc->clcsock && smc->clcsock->sk) {
+			smc->clcsock->sk->sk_err = ECONNABORTED;
+			smc->clcsock->sk->sk_state_change(smc->clcsock->sk);
+		}
 	}
 	switch (sk->sk_state) {
-	case SMC_INIT:
-		sk->sk_state = SMC_PEERABORTWAIT;
-		break;
 	case SMC_ACTIVE:
 		sk->sk_state = SMC_PEERABORTWAIT;
 		release_sock(sk);
@@ -143,6 +142,7 @@ static void smc_close_active_abort(struct smc_sock *smc)
 	case SMC_PEERFINCLOSEWAIT:
 		sock_put(sk); /* passive closing */
 		break;
+	case SMC_INIT:
 	case SMC_PEERABORTWAIT:
 	case SMC_CLOSED:
 		break;

From 71d117f527425e2d6a5029e8365d82a8d2d6916a Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Tue, 18 Sep 2018 15:46:37 +0200
Subject: [PATCH 096/499] net/smc: no urgent data check for listen sockets

Don't check a listen socket for pending urgent data in smc_poll().

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 5c6d30eb4a71..015231789ed2 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1566,9 +1566,9 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
 				mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
 			if (sk->sk_state == SMC_APPCLOSEWAIT1)
 				mask |= EPOLLIN;
+			if (smc->conn.urg_state == SMC_URG_VALID)
+				mask |= EPOLLPRI;
 		}
-		if (smc->conn.urg_state == SMC_URG_VALID)
-			mask |= EPOLLPRI;
 	}
 
 	return mask;

From 381897798a94065ffcad0772eecdc6b04a7ff23d Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 18 Sep 2018 15:46:38 +0200
Subject: [PATCH 097/499] net/smc: fix sizeof to int comparison

Comparing an int to a size, which is unsigned, causes the int to become
unsigned, giving the wrong result. kernel_sendmsg can return a negative
error code.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_clc.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 83aba9ade060..52241d679cc9 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -446,14 +446,12 @@ int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
 	vec[i++].iov_len = sizeof(trl);
 	/* due to the few bytes needed for clc-handshake this cannot block */
 	len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen);
-	if (len < sizeof(pclc)) {
-		if (len >= 0) {
-			reason_code = -ENETUNREACH;
-			smc->sk.sk_err = -reason_code;
-		} else {
-			smc->sk.sk_err = smc->clcsock->sk->sk_err;
-			reason_code = -smc->sk.sk_err;
-		}
+	if (len < 0) {
+		smc->sk.sk_err = smc->clcsock->sk->sk_err;
+		reason_code = -smc->sk.sk_err;
+	} else if (len < (int)sizeof(pclc)) {
+		reason_code = -ENETUNREACH;
+		smc->sk.sk_err = -reason_code;
 	}
 
 	return reason_code;

From 774268f3e51b53ed432a1ec516574fd5ba469398 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@bootlin.com>
Date: Tue, 18 Sep 2018 16:58:47 +0200
Subject: [PATCH 098/499] net: mvpp2: fix a txq_done race condition

When no Tx IRQ is available, the txq_done() routine (called from
tx_done()) shouldn't be called from the polling function, as in such
case it is already called in the Tx path thanks to an hrtimer. This
mostly occurred when using PPv2.1, as the engine then do not have Tx
IRQs.

Fixes: edc660fa09e2 ("net: mvpp2: replace TX coalescing interrupts with hrtimer")
Reported-by: Stefan Chulski <stefanc@marvell.com>
Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 702fec82d806..38cc01beea79 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -3055,10 +3055,12 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
 				   cause_rx_tx & ~MVPP2_CAUSE_MISC_SUM_MASK);
 	}
 
-	cause_tx = cause_rx_tx & MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK;
-	if (cause_tx) {
-		cause_tx >>= MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_OFFSET;
-		mvpp2_tx_done(port, cause_tx, qv->sw_thread_id);
+	if (port->has_tx_irqs) {
+		cause_tx = cause_rx_tx & MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK;
+		if (cause_tx) {
+			cause_tx >>= MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_OFFSET;
+			mvpp2_tx_done(port, cause_tx, qv->sw_thread_id);
+		}
 	}
 
 	/* Process RX packets */

From 126d6848ef13958e1cb959e96c21d19bc498ade9 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Tue, 18 Sep 2018 16:48:53 +0100
Subject: [PATCH 099/499] sfp: fix oops with ethtool -m

If a network interface is created prior to the SFP socket being
available, ethtool can request module information.  This unfortunately
leads to an oops:

Unable to handle kernel NULL pointer dereference at virtual address 00000008
pgd = (ptrval)
[00000008] *pgd=7c400831, *pte=00000000, *ppte=00000000
Internal error: Oops: 17 [#1] SMP ARM
Modules linked in:
CPU: 0 PID: 1480 Comm: ethtool Not tainted 4.19.0-rc3 #138
Hardware name: Broadcom Northstar Plus SoC
PC is at sfp_get_module_info+0x8/0x10
LR is at dev_ethtool+0x218c/0x2afc

Fix this by not filling in the network device's SFP bus pointer until
SFP is fully bound, thereby avoiding the core calling into the SFP bus
code.

Fixes: ce0aa27ff3f6 ("sfp: add sfp-bus to bridge between network devices and sfp cages")
Reported-by: Florian Fainelli <f.fainelli@gmail.com>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/sfp-bus.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index 740655261e5b..83060fb349f4 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -349,6 +349,7 @@ static int sfp_register_bus(struct sfp_bus *bus)
 	}
 	if (bus->started)
 		bus->socket_ops->start(bus->sfp);
+	bus->netdev->sfp_bus = bus;
 	bus->registered = true;
 	return 0;
 }
@@ -357,6 +358,7 @@ static void sfp_unregister_bus(struct sfp_bus *bus)
 {
 	const struct sfp_upstream_ops *ops = bus->upstream_ops;
 
+	bus->netdev->sfp_bus = NULL;
 	if (bus->registered) {
 		if (bus->started)
 			bus->socket_ops->stop(bus->sfp);
@@ -438,7 +440,6 @@ static void sfp_upstream_clear(struct sfp_bus *bus)
 {
 	bus->upstream_ops = NULL;
 	bus->upstream = NULL;
-	bus->netdev->sfp_bus = NULL;
 	bus->netdev = NULL;
 }
 
@@ -467,7 +468,6 @@ struct sfp_bus *sfp_register_upstream(struct fwnode_handle *fwnode,
 		bus->upstream_ops = ops;
 		bus->upstream = upstream;
 		bus->netdev = ndev;
-		ndev->sfp_bus = bus;
 
 		if (bus->sfp) {
 			ret = sfp_register_bus(bus);

From 8675860592321a43bbb0b5aedc244a9b2321edc3 Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Tue, 18 Sep 2018 13:44:59 -0700
Subject: [PATCH 100/499] Revert "ipv6: fix double refcount of fib6_metrics"

This reverts commit e70a3aad44cc8b24986687ffc98c4a4f6ecf25ea.

This change causes use-after-free on dst->_metrics.
The crash trace looks like this:
[   97.763269] BUG: KASAN: use-after-free in ip6_mtu+0x116/0x140
[   97.769038] Read of size 4 at addr ffff881781d2cf84 by task svw_NetThreadEv/8801

[   97.777954] CPU: 76 PID: 8801 Comm: svw_NetThreadEv Not tainted 4.15.0-smp-DEV #11
[   97.777956] Hardware name: Default string Default string/Indus_QC_02, BIOS 5.46.4 03/29/2018
[   97.777957] Call Trace:
[   97.777971]  [<ffffffff895709db>] dump_stack+0x4d/0x72
[   97.777985]  [<ffffffff881651df>] print_address_description+0x6f/0x260
[   97.777997]  [<ffffffff88165747>] kasan_report+0x257/0x370
[   97.778001]  [<ffffffff894488e6>] ? ip6_mtu+0x116/0x140
[   97.778004]  [<ffffffff881658b9>] __asan_report_load4_noabort+0x19/0x20
[   97.778008]  [<ffffffff894488e6>] ip6_mtu+0x116/0x140
[   97.778013]  [<ffffffff892bb91e>] tcp_current_mss+0x12e/0x280
[   97.778016]  [<ffffffff892bb7f0>] ? tcp_mtu_to_mss+0x2d0/0x2d0
[   97.778022]  [<ffffffff887b45b8>] ? depot_save_stack+0x138/0x4a0
[   97.778037]  [<ffffffff87c38985>] ? __mmdrop+0x145/0x1f0
[   97.778040]  [<ffffffff881643b1>] ? save_stack+0xb1/0xd0
[   97.778046]  [<ffffffff89264c82>] tcp_send_mss+0x22/0x220
[   97.778059]  [<ffffffff89273a49>] tcp_sendmsg_locked+0x4f9/0x39f0
[   97.778062]  [<ffffffff881642b4>] ? kasan_check_write+0x14/0x20
[   97.778066]  [<ffffffff89273550>] ? tcp_sendpage+0x60/0x60
[   97.778070]  [<ffffffff881cb359>] ? rw_copy_check_uvector+0x69/0x280
[   97.778075]  [<ffffffff8873c65f>] ? import_iovec+0x9f/0x430
[   97.778078]  [<ffffffff88164be7>] ? kasan_slab_free+0x87/0xc0
[   97.778082]  [<ffffffff8873c5c0>] ? memzero_page+0x140/0x140
[   97.778085]  [<ffffffff881642b4>] ? kasan_check_write+0x14/0x20
[   97.778088]  [<ffffffff89276f6c>] tcp_sendmsg+0x2c/0x50
[   97.778092]  [<ffffffff89276f6c>] ? tcp_sendmsg+0x2c/0x50
[   97.778098]  [<ffffffff89352d43>] inet_sendmsg+0x103/0x480
[   97.778102]  [<ffffffff89352c40>] ? inet_gso_segment+0x15b0/0x15b0
[   97.778105]  [<ffffffff890294da>] sock_sendmsg+0xba/0xf0
[   97.778108]  [<ffffffff8902ab6a>] ___sys_sendmsg+0x6ca/0x8e0
[   97.778113]  [<ffffffff87dccac1>] ? hrtimer_try_to_cancel+0x71/0x3b0
[   97.778116]  [<ffffffff8902a4a0>] ? copy_msghdr_from_user+0x3d0/0x3d0
[   97.778119]  [<ffffffff881646d1>] ? memset+0x31/0x40
[   97.778123]  [<ffffffff87a0cff5>] ? schedule_hrtimeout_range_clock+0x165/0x380
[   97.778127]  [<ffffffff87a0ce90>] ? hrtimer_nanosleep_restart+0x250/0x250
[   97.778130]  [<ffffffff87dcc700>] ? __hrtimer_init+0x180/0x180
[   97.778133]  [<ffffffff87dd1f82>] ? ktime_get_ts64+0x172/0x200
[   97.778137]  [<ffffffff8822b8ec>] ? __fget_light+0x8c/0x2f0
[   97.778141]  [<ffffffff8902d5c6>] __sys_sendmsg+0xe6/0x190
[   97.778144]  [<ffffffff8902d5c6>] ? __sys_sendmsg+0xe6/0x190
[   97.778147]  [<ffffffff8902d4e0>] ? SyS_shutdown+0x20/0x20
[   97.778152]  [<ffffffff87cd4370>] ? wake_up_q+0xe0/0xe0
[   97.778155]  [<ffffffff8902d670>] ? __sys_sendmsg+0x190/0x190
[   97.778158]  [<ffffffff8902d683>] SyS_sendmsg+0x13/0x20
[   97.778162]  [<ffffffff87a1600c>] do_syscall_64+0x2ac/0x430
[   97.778166]  [<ffffffff87c17515>] ? do_page_fault+0x35/0x3d0
[   97.778171]  [<ffffffff8960131f>] ? page_fault+0x2f/0x50
[   97.778174]  [<ffffffff89600071>] entry_SYSCALL_64_after_hwframe+0x3d/0xa2
[   97.778177] RIP: 0033:0x7f83fa36000d
[   97.778178] RSP: 002b:00007f83ef9229e0 EFLAGS: 00000293 ORIG_RAX: 000000000000002e
[   97.778180] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f83fa36000d
[   97.778182] RDX: 0000000000004000 RSI: 00007f83ef922f00 RDI: 0000000000000036
[   97.778183] RBP: 00007f83ef923040 R08: 00007f83ef9231f8 R09: 00007f83ef923168
[   97.778184] R10: 0000000000000000 R11: 0000000000000293 R12: 00007f83f69c5b40
[   97.778185] R13: 000000000000001c R14: 0000000000000001 R15: 0000000000004000

[   97.779684] Allocated by task 5919:
[   97.783185]  save_stack+0x46/0xd0
[   97.783187]  kasan_kmalloc+0xad/0xe0
[   97.783189]  kmem_cache_alloc_trace+0xdf/0x580
[   97.783190]  ip6_convert_metrics.isra.79+0x7e/0x190
[   97.783192]  ip6_route_info_create+0x60a/0x2480
[   97.783193]  ip6_route_add+0x1d/0x80
[   97.783195]  inet6_rtm_newroute+0xdd/0xf0
[   97.783198]  rtnetlink_rcv_msg+0x641/0xb10
[   97.783200]  netlink_rcv_skb+0x27b/0x3e0
[   97.783202]  rtnetlink_rcv+0x15/0x20
[   97.783203]  netlink_unicast+0x4be/0x720
[   97.783204]  netlink_sendmsg+0x7bc/0xbf0
[   97.783205]  sock_sendmsg+0xba/0xf0
[   97.783207]  ___sys_sendmsg+0x6ca/0x8e0
[   97.783208]  __sys_sendmsg+0xe6/0x190
[   97.783209]  SyS_sendmsg+0x13/0x20
[   97.783211]  do_syscall_64+0x2ac/0x430
[   97.783213]  entry_SYSCALL_64_after_hwframe+0x3d/0xa2

[   97.784709] Freed by task 0:
[   97.785056] knetbase: Error: /proc/sys/net/core/txcs_enable does not exist
[   97.794497]  save_stack+0x46/0xd0
[   97.794499]  kasan_slab_free+0x71/0xc0
[   97.794500]  kfree+0x7c/0xf0
[   97.794501]  fib6_info_destroy_rcu+0x24f/0x310
[   97.794504]  rcu_process_callbacks+0x38b/0x1730
[   97.794506]  __do_softirq+0x1c8/0x5d0

Reported-by: John Sperbeck <jsperbeck@google.com>
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 480a79f47c52..b5d3e6b294ab 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -976,6 +976,10 @@ static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
 	rt->rt6i_flags &= ~RTF_EXPIRES;
 	rcu_assign_pointer(rt->from, from);
 	dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
+	if (from->fib6_metrics != &dst_default_metrics) {
+		rt->dst._metrics |= DST_METRICS_REFCOUNTED;
+		refcount_inc(&from->fib6_metrics->refcnt);
+	}
 }
 
 /* Caller must already hold reference to @ort */

From ce7ea4af0838ffd4667ecad4eb5eec7a25342f1e Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Tue, 18 Sep 2018 13:45:00 -0700
Subject: [PATCH 101/499] ipv6: fix memory leak on dst->_metrics

When dst->_metrics and f6i->fib6_metrics share the same memory, both
take reference count on the dst_metrics structure. However, when dst is
destroyed, ip6_dst_destroy() only invokes dst_destroy_metrics_generic()
which does not take care of READONLY metrics and does not release refcnt.
This causes memory leak.
Similar to ipv4 logic, the fix is to properly release refcnt and free
the memory space pointed by dst->_metrics if refcnt becomes 0.

Fixes: 93531c674315 ("net/ipv6: separate handling of FIB entries from dst based routes")
Reported-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b5d3e6b294ab..826b14de7dbb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -364,11 +364,14 @@ EXPORT_SYMBOL(ip6_dst_alloc);
 
 static void ip6_dst_destroy(struct dst_entry *dst)
 {
+	struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
 	struct rt6_info *rt = (struct rt6_info *)dst;
 	struct fib6_info *from;
 	struct inet6_dev *idev;
 
-	dst_destroy_metrics_generic(dst);
+	if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
+		kfree(p);
+
 	rt6_uncached_list_del(rt);
 
 	idev = rt->rt6i_idev;

From 73aeb2cbcdc9be391b3d32a55319a59ce425426f Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Tue, 11 Sep 2018 04:27:41 +0100
Subject: [PATCH 102/499] ARM: 8787/1: wire up io_pgetevents syscall

Wire up the new io_pgetevents syscall for ARM.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/tools/syscall.tbl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index fbc74b5fa3ed..8edf93b4490f 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -413,3 +413,4 @@
 396	common	pkey_free		sys_pkey_free
 397	common	statx			sys_statx
 398	common	rseq			sys_rseq
+399	common	io_pgetevents		sys_io_pgetevents

From 3a58ac65e2d7969bcdf1b6acb70fa4d12a88e53e Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Thu, 13 Sep 2018 16:48:08 +0100
Subject: [PATCH 103/499] ARM: 8799/1: mm: fix pci_ioremap_io() offset check

IO_SPACE_LIMIT is the ending address of the PCI IO space, i.e
something like 0xfffff (and not 0x100000).

Therefore, when offset = 0xf0000 is passed as argument, this function
fails even though the offset + SZ_64K fits below the
IO_SPACE_LIMIT. This makes the last chunk of 64 KB of the I/O space
not usable as it cannot be mapped.

This patch fixes that by substracing 1 to offset + SZ_64K, so that we
compare the addrss of the last byte of the I/O space against
IO_SPACE_LIMIT instead of the address of the first byte of what is
after the I/O space.

Fixes: c2794437091a4 ("ARM: Add fixed PCI i/o mapping")
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mm/ioremap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index fc91205ff46c..5bf9443cfbaa 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -473,7 +473,7 @@ void pci_ioremap_set_mem_type(int mem_type)
 
 int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr)
 {
-	BUG_ON(offset + SZ_64K > IO_SPACE_LIMIT);
+	BUG_ON(offset + SZ_64K - 1 > IO_SPACE_LIMIT);
 
 	return ioremap_page_range(PCI_IO_VIRT_BASE + offset,
 				  PCI_IO_VIRT_BASE + offset + SZ_64K,

From 9e796c9db93b4840d1b00e550eea26db7cb741e2 Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hpe.com>
Date: Fri, 14 Sep 2018 08:51:14 -0600
Subject: [PATCH 104/499] ext2, dax: set ext2_dax_aops for dax files

Sync syscall to DAX file needs to flush processor cache, but it
currently does not flush to existing DAX files.  This is because
'ext2_da_aops' is set to address_space_operations of existing DAX
files, instead of 'ext2_dax_aops', since S_DAX flag is set after
ext2_set_aops() in the open path.

Similar to ext4, change ext2_iget() to initialize i_flags before
ext2_set_aops().

Fixes: fb094c90748f ("ext2, dax: introduce ext2_dax_aops")
Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Suggested-by: Jan Kara <jack@suse.cz>
Cc: Jan Kara <jack@suse.cz>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Cc: <stable@vger.kernel.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext2/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 7f7ee18fe179..e4bb9386c045 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1448,6 +1448,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
 	}
 	inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
 	ei->i_flags = le32_to_cpu(raw_inode->i_flags);
+	ext2_set_inode_flags(inode);
 	ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
 	ei->i_frag_no = raw_inode->i_frag;
 	ei->i_frag_size = raw_inode->i_fsize;
@@ -1517,7 +1518,6 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
 			   new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
 	}
 	brelse (bh);
-	ext2_set_inode_flags(inode);
 	unlock_new_inode(inode);
 	return inode;
 	

From ee92efe41cf358f4b99e73509f2bfd4733609f26 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 17 Sep 2018 18:10:05 -0700
Subject: [PATCH 105/499] IB/srp: Avoid that sg_reset -d ${srp_device} triggers
 an infinite loop

Use different loop variables for the inner and outer loop. This avoids
that an infinite loop occurs if there are more RDMA channels than
target->req_ring_size.

Fixes: d92c0da71a35 ("IB/srp: Add multichannel support")
Cc: <stable@vger.kernel.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/ulp/srp/ib_srp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 444d16520506..0b34e909505f 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -2951,7 +2951,7 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
 {
 	struct srp_target_port *target = host_to_target(scmnd->device->host);
 	struct srp_rdma_ch *ch;
-	int i;
+	int i, j;
 	u8 status;
 
 	shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
@@ -2965,8 +2965,8 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
 
 	for (i = 0; i < target->ch_count; i++) {
 		ch = &target->ch[i];
-		for (i = 0; i < target->req_ring_size; ++i) {
-			struct srp_request *req = &ch->req_ring[i];
+		for (j = 0; j < target->req_ring_size; ++j) {
+			struct srp_request *req = &ch->req_ring[j];
 
 			srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
 		}

From 080220b687147fd9376878534aba7194f17f6ef5 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 18 Sep 2018 10:13:59 -0700
Subject: [PATCH 106/499] tools: bpf: fix license for a compat header file

libc_compat.h is used by libbpf so make sure it's licensed under
LGPL or BSD license.  The license change should be OK, I'm the only
author of the file.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/tools/libc_compat.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/include/tools/libc_compat.h b/tools/include/tools/libc_compat.h
index 664ced8cb1b0..e907ba6f15e5 100644
--- a/tools/include/tools/libc_compat.h
+++ b/tools/include/tools/libc_compat.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+// SPDX-License-Identifier: (LGPL-2.0+ OR BSD-2-Clause)
 /* Copyright (C) 2018 Netronome Systems, Inc. */
 
 #ifndef __TOOLS_LIBC_COMPAT_H

From 13b91ed77a65cf3bf23353880245d0cbd9138294 Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <icenowy@aosc.io>
Date: Sun, 16 Sep 2018 12:34:09 +0800
Subject: [PATCH 107/499] ARM: dts: sun8i: drop A64 HDMI PHY fallback
 compatible from R40 DT

The R40 HDMI PHY seems to be different to the A64 one, the A64 one
has no input mux, but the R40 one has.

Drop the A64 fallback compatible from the HDMI PHY node in R40 DT.

Signed-off-by: Icenowy Zheng <icenowy@aosc.io>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
[wens@csie.org: Fix subject prefix order]
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
---
 arch/arm/boot/dts/sun8i-r40.dtsi | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/sun8i-r40.dtsi b/arch/arm/boot/dts/sun8i-r40.dtsi
index ffd9f00f74a4..5f547c161baf 100644
--- a/arch/arm/boot/dts/sun8i-r40.dtsi
+++ b/arch/arm/boot/dts/sun8i-r40.dtsi
@@ -800,8 +800,7 @@ hdmi_out: port@1 {
 		};
 
 		hdmi_phy: hdmi-phy@1ef0000 {
-			compatible = "allwinner,sun8i-r40-hdmi-phy",
-				     "allwinner,sun50i-a64-hdmi-phy";
+			compatible = "allwinner,sun8i-r40-hdmi-phy";
 			reg = <0x01ef0000 0x10000>;
 			clocks = <&ccu CLK_BUS_HDMI1>, <&ccu CLK_HDMI_SLOW>,
 				 <&ccu 7>, <&ccu 16>;

From 76c0ddd8c3a683f6e2c6e60e11dc1a1558caf4bc Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 19 Sep 2018 15:02:07 +0200
Subject: [PATCH 108/499] ip6_tunnel: be careful when accessing the inner
 header

the ip6 tunnel xmit ndo assumes that the processed skb always
contains an ip[v6] header, but syzbot has found a way to send
frames that fall short of this assumption, leading to the following splat:

BUG: KMSAN: uninit-value in ip6ip6_tnl_xmit net/ipv6/ip6_tunnel.c:1307
[inline]
BUG: KMSAN: uninit-value in ip6_tnl_start_xmit+0x7d2/0x1ef0
net/ipv6/ip6_tunnel.c:1390
CPU: 0 PID: 4504 Comm: syz-executor558 Not tainted 4.16.0+ #87
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
Call Trace:
  __dump_stack lib/dump_stack.c:17 [inline]
  dump_stack+0x185/0x1d0 lib/dump_stack.c:53
  kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067
  __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:683
  ip6ip6_tnl_xmit net/ipv6/ip6_tunnel.c:1307 [inline]
  ip6_tnl_start_xmit+0x7d2/0x1ef0 net/ipv6/ip6_tunnel.c:1390
  __netdev_start_xmit include/linux/netdevice.h:4066 [inline]
  netdev_start_xmit include/linux/netdevice.h:4075 [inline]
  xmit_one net/core/dev.c:3026 [inline]
  dev_hard_start_xmit+0x5f1/0xc70 net/core/dev.c:3042
  __dev_queue_xmit+0x27ee/0x3520 net/core/dev.c:3557
  dev_queue_xmit+0x4b/0x60 net/core/dev.c:3590
  packet_snd net/packet/af_packet.c:2944 [inline]
  packet_sendmsg+0x7c70/0x8a30 net/packet/af_packet.c:2969
  sock_sendmsg_nosec net/socket.c:630 [inline]
  sock_sendmsg net/socket.c:640 [inline]
  ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046
  __sys_sendmmsg+0x42d/0x800 net/socket.c:2136
  SYSC_sendmmsg+0xc4/0x110 net/socket.c:2167
  SyS_sendmmsg+0x63/0x90 net/socket.c:2162
  do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287
  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
RIP: 0033:0x441819
RSP: 002b:00007ffe58ee8268 EFLAGS: 00000213 ORIG_RAX: 0000000000000133
RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000441819
RDX: 0000000000000002 RSI: 0000000020000100 RDI: 0000000000000003
RBP: 00000000006cd018 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000213 R12: 0000000000402510
R13: 00000000004025a0 R14: 0000000000000000 R15: 0000000000000000

Uninit was created at:
  kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline]
  kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188
  kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314
  kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321
  slab_post_alloc_hook mm/slab.h:445 [inline]
  slab_alloc_node mm/slub.c:2737 [inline]
  __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369
  __kmalloc_reserve net/core/skbuff.c:138 [inline]
  __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206
  alloc_skb include/linux/skbuff.h:984 [inline]
  alloc_skb_with_frags+0x1d4/0xb20 net/core/skbuff.c:5234
  sock_alloc_send_pskb+0xb56/0x1190 net/core/sock.c:2085
  packet_alloc_skb net/packet/af_packet.c:2803 [inline]
  packet_snd net/packet/af_packet.c:2894 [inline]
  packet_sendmsg+0x6454/0x8a30 net/packet/af_packet.c:2969
  sock_sendmsg_nosec net/socket.c:630 [inline]
  sock_sendmsg net/socket.c:640 [inline]
  ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046
  __sys_sendmmsg+0x42d/0x800 net/socket.c:2136
  SYSC_sendmmsg+0xc4/0x110 net/socket.c:2167
  SyS_sendmmsg+0x63/0x90 net/socket.c:2162
  do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287
  entry_SYSCALL_64_after_hwframe+0x3d/0xa2

This change addresses the issue adding the needed check before
accessing the inner header.

The ipv4 side of the issue is apparently there since the ipv4 over ipv6
initial support, and the ipv6 side predates git history.

Fixes: c4d3efafcc93 ("[IPV6] IP6TUNNEL: Add support to IPv4 over IPv6 tunnel.")
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: syzbot+3fde91d4d394747d6db4@syzkaller.appspotmail.com
Tested-by: Alexander Potapenko <glider@google.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 419960b0ba16..a0b6932c3afd 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1234,7 +1234,7 @@ static inline int
 ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
-	const struct iphdr  *iph = ip_hdr(skb);
+	const struct iphdr  *iph;
 	int encap_limit = -1;
 	struct flowi6 fl6;
 	__u8 dsfield;
@@ -1242,6 +1242,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	u8 tproto;
 	int err;
 
+	/* ensure we can access the full inner ip header */
+	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+		return -1;
+
+	iph = ip_hdr(skb);
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 
 	tproto = READ_ONCE(t->parms.proto);
@@ -1306,7 +1311,7 @@ static inline int
 ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
-	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	struct ipv6hdr *ipv6h;
 	int encap_limit = -1;
 	__u16 offset;
 	struct flowi6 fl6;
@@ -1315,6 +1320,10 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	u8 tproto;
 	int err;
 
+	if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
+		return -1;
+
+	ipv6h = ipv6_hdr(skb);
 	tproto = READ_ONCE(t->parms.proto);
 	if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
 	    ip6_tnl_addr_conflict(t, ipv6h))

From cf5cca6e4cc4dea604d8c83e7676f10459d6809c Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@bootlin.com>
Date: Wed, 19 Sep 2018 15:29:06 +0200
Subject: [PATCH 109/499] net: mvneta: fix the Rx desc buffer DMA unmapping

With CONFIG_DMA_API_DEBUG enabled we now get a warning when using the
mvneta driver:

  mvneta d0030000.ethernet: DMA-API: device driver frees DMA memory with
  wrong function [device address=0x000000001165b000] [size=4096 bytes]
  [mapped as page] [unmapped as single]

This is because when using the s/w buffer management, the Rx descriptor
buffer is mapped with dma_map_page but unmapped with dma_unmap_single.
This patch fixes this by using the right unmapping function.

Fixes: 562e2f467e71 ("net: mvneta: Improve the buffer allocation method for SWBM")
Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvneta.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index bc80a678abc3..2db9708f2e24 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -2008,8 +2008,8 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
 				skb_add_rx_frag(rxq->skb, frag_num, page,
 						frag_offset, frag_size,
 						PAGE_SIZE);
-				dma_unmap_single(dev->dev.parent, phys_addr,
-						 PAGE_SIZE, DMA_FROM_DEVICE);
+				dma_unmap_page(dev->dev.parent, phys_addr,
+					       PAGE_SIZE, DMA_FROM_DEVICE);
 				rxq->left_size -= frag_size;
 			}
 		} else {

From 7233b8cab39014620ac9534da11f0f3e506d8fd8 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Tue, 11 Sep 2018 15:38:05 +1000
Subject: [PATCH 110/499] powerpc/powernv/ioda2: Reduce upper limit for DMA
 window size (again)

mpe: This was fixed originally in commit d3d4ffaae439
("powerpc/powernv/ioda2: Reduce upper limit for DMA window size"), but
contrary to what the merge commit says was inadvertently lost by me in
commit ce57c6610cc2 ("Merge branch 'topic/ppc-kvm' into next") which
brought in changes that moved the code to a new file. So reapply it to
the new file.

Original commit message follows:

We use PHB in mode1 which uses bit 59 to select a correct DMA window.
However there is mode2 which uses bits 59:55 and allows up to 32 DMA
windows per a PE.

Even though documentation does not clearly specify that, it seems that
the actual hardware does not support bits 59:55 even in mode1, in
other words we can create a window as big as 1<<58 but DMA simply
won't work.

This reduces the upper limit from 59 to 55 bits to let the userspace
know about the hardware limits.

Fixes: ce57c6610cc2 ("Merge branch 'topic/ppc-kvm' into next")
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/pci-ioda-tce.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
index 6c5db1acbe8d..fe9691040f54 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
@@ -276,7 +276,7 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
 	level_shift = entries_shift + 3;
 	level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT);
 
-	if ((level_shift - 3) * levels + page_shift >= 60)
+	if ((level_shift - 3) * levels + page_shift >= 55)
 		return -EINVAL;
 
 	/* Allocate TCE table */

From 50fdf60181b01b7383b85d4b9acbb842263d96a2 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Wed, 19 Sep 2018 21:59:10 -0700
Subject: [PATCH 111/499] qed: Fix populating the invalid stag value in multi
 function mode.

In multi-function mode, driver receives the stag value (outer vlan)
for a PF from management FW (MFW). If the stag value is negotiated prior to
the driver load, then the stag is not notified to the driver and hence
driver will have the invalid stag value.
The fix is to request the MFW for STAG value during the driver load time.

Fixes: cac6f691 ("qed: Add support for Unified Fabric Port")
Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Tomer Tayar <Tomer.Tayar@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dev.c | 15 ++++++++++++++-
 drivers/net/ethernet/qlogic/qed/qed_hsi.h |  4 ++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 016ca8a7ec8a..97f073fd3725 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -1706,7 +1706,7 @@ static int qed_vf_start(struct qed_hwfn *p_hwfn,
 int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
 {
 	struct qed_load_req_params load_req_params;
-	u32 load_code, param, drv_mb_param;
+	u32 load_code, resp, param, drv_mb_param;
 	bool b_default_mtu = true;
 	struct qed_hwfn *p_hwfn;
 	int rc = 0, mfw_rc, i;
@@ -1852,6 +1852,19 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
 
 	if (IS_PF(cdev)) {
 		p_hwfn = QED_LEADING_HWFN(cdev);
+
+		/* Get pre-negotiated values for stag, bandwidth etc. */
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_SPQ,
+			   "Sending GET_OEM_UPDATES command to trigger stag/bandwidth attention handling\n");
+		drv_mb_param = 1 << DRV_MB_PARAM_DUMMY_OEM_UPDATES_OFFSET;
+		rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
+				 DRV_MSG_CODE_GET_OEM_UPDATES,
+				 drv_mb_param, &resp, &param);
+		if (rc)
+			DP_NOTICE(p_hwfn,
+				  "Failed to send GET_OEM_UPDATES attention request\n");
+
 		drv_mb_param = STORM_FW_VERSION;
 		rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
 				 DRV_MSG_CODE_OV_UPDATE_STORM_FW_VER,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 8faceb691657..9b3ef00e5782 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -12414,6 +12414,7 @@ struct public_drv_mb {
 #define DRV_MSG_SET_RESOURCE_VALUE_MSG		0x35000000
 #define DRV_MSG_CODE_OV_UPDATE_WOL              0x38000000
 #define DRV_MSG_CODE_OV_UPDATE_ESWITCH_MODE     0x39000000
+#define DRV_MSG_CODE_GET_OEM_UPDATES            0x41000000
 
 #define DRV_MSG_CODE_BW_UPDATE_ACK		0x32000000
 #define DRV_MSG_CODE_NIG_DRAIN			0x30000000
@@ -12541,6 +12542,9 @@ struct public_drv_mb {
 #define DRV_MB_PARAM_ESWITCH_MODE_VEB	0x1
 #define DRV_MB_PARAM_ESWITCH_MODE_VEPA	0x2
 
+#define DRV_MB_PARAM_DUMMY_OEM_UPDATES_MASK	0x1
+#define DRV_MB_PARAM_DUMMY_OEM_UPDATES_OFFSET	0
+
 #define DRV_MB_PARAM_SET_LED_MODE_OPER		0x0
 #define DRV_MB_PARAM_SET_LED_MODE_ON		0x1
 #define DRV_MB_PARAM_SET_LED_MODE_OFF		0x2

From 0216da9413afa546627a1b0d319dfd17fef34050 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Wed, 19 Sep 2018 21:59:11 -0700
Subject: [PATCH 112/499] qed: Do not add VLAN 0 tag to untagged frames in
 multi-function mode.

In certain multi-function switch dependent modes, firmware adds vlan tag 0
to the untagged frames. This leads to double tagging for the traffic
if the dcbx is enabled, which is not the desired behavior. To avoid this,
driver needs to set "dcb_dont_add_vlan0" flag.

Fixes: cac6f691 ("qed: Add support for Unified Fabric Port")
Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Tomer Tayar <Tomer.Tayar@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 9 ++++++++-
 drivers/net/ethernet/qlogic/qed/qed_dcbx.h | 1 +
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index 6bb76e6d3c14..d53f33c6b1fc 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -190,6 +190,7 @@ qed_dcbx_dp_protocol(struct qed_hwfn *p_hwfn, struct qed_dcbx_results *p_data)
 
 static void
 qed_dcbx_set_params(struct qed_dcbx_results *p_data,
+		    struct qed_hwfn *p_hwfn,
 		    struct qed_hw_info *p_info,
 		    bool enable,
 		    u8 prio,
@@ -206,6 +207,11 @@ qed_dcbx_set_params(struct qed_dcbx_results *p_data,
 	else
 		p_data->arr[type].update = DONT_UPDATE_DCB_DSCP;
 
+	/* Do not add vlan tag 0 when DCB is enabled and port in UFP/OV mode */
+	if ((test_bit(QED_MF_8021Q_TAGGING, &p_hwfn->cdev->mf_bits) ||
+	     test_bit(QED_MF_8021AD_TAGGING, &p_hwfn->cdev->mf_bits)))
+		p_data->arr[type].dont_add_vlan0 = true;
+
 	/* QM reconf data */
 	if (p_info->personality == personality)
 		qed_hw_info_set_offload_tc(p_info, tc);
@@ -231,7 +237,7 @@ qed_dcbx_update_app_info(struct qed_dcbx_results *p_data,
 
 		personality = qed_dcbx_app_update[i].personality;
 
-		qed_dcbx_set_params(p_data, p_info, enable,
+		qed_dcbx_set_params(p_data, p_hwfn, p_info, enable,
 				    prio, tc, type, personality);
 	}
 }
@@ -954,6 +960,7 @@ static void qed_dcbx_update_protocol_data(struct protocol_dcb_data *p_data,
 	p_data->dcb_enable_flag = p_src->arr[type].enable;
 	p_data->dcb_priority = p_src->arr[type].priority;
 	p_data->dcb_tc = p_src->arr[type].tc;
+	p_data->dcb_dont_add_vlan0 = p_src->arr[type].dont_add_vlan0;
 }
 
 /* Set pf update ramrod command params */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.h b/drivers/net/ethernet/qlogic/qed/qed_dcbx.h
index a4d688c04e18..01f253ea4b22 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.h
@@ -55,6 +55,7 @@ struct qed_dcbx_app_data {
 	u8 update;		/* Update indication */
 	u8 priority;		/* Priority */
 	u8 tc;			/* Traffic Class */
+	bool dont_add_vlan0;	/* Do not insert a vlan tag with id 0 */
 };
 
 #define QED_DCBX_VERSION_DISABLED       0

From 7e3e375ceede8fba5218362d14db7de25fa83f12 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Wed, 19 Sep 2018 21:59:12 -0700
Subject: [PATCH 113/499] qed: Add missing device config for RoCE EDPM in UFP
 mode.

This patch adds support to configure the DORQ to use vlan-id/priority for
roce EDPM.

Fixes: cac6f691 ("qed: Add support for Unified Fabric Port")
Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Tomer Tayar <Tomer.Tayar@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dcbx.c    | 40 ++++++++++---------
 drivers/net/ethernet/qlogic/qed/qed_mcp.c     | 24 +++++++++--
 .../net/ethernet/qlogic/qed/qed_reg_addr.h    |  6 +++
 3 files changed, 48 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index d53f33c6b1fc..f5459de6d60a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -190,11 +190,8 @@ qed_dcbx_dp_protocol(struct qed_hwfn *p_hwfn, struct qed_dcbx_results *p_data)
 
 static void
 qed_dcbx_set_params(struct qed_dcbx_results *p_data,
-		    struct qed_hwfn *p_hwfn,
-		    struct qed_hw_info *p_info,
-		    bool enable,
-		    u8 prio,
-		    u8 tc,
+		    struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+		    bool enable, u8 prio, u8 tc,
 		    enum dcbx_protocol_type type,
 		    enum qed_pci_personality personality)
 {
@@ -213,18 +210,24 @@ qed_dcbx_set_params(struct qed_dcbx_results *p_data,
 		p_data->arr[type].dont_add_vlan0 = true;
 
 	/* QM reconf data */
-	if (p_info->personality == personality)
-		qed_hw_info_set_offload_tc(p_info, tc);
+	if (p_hwfn->hw_info.personality == personality)
+		qed_hw_info_set_offload_tc(&p_hwfn->hw_info, tc);
+
+	/* Configure dcbx vlan priority in doorbell block for roce EDPM */
+	if (test_bit(QED_MF_UFP_SPECIFIC, &p_hwfn->cdev->mf_bits) &&
+	    type == DCBX_PROTOCOL_ROCE) {
+		qed_wr(p_hwfn, p_ptt, DORQ_REG_TAG1_OVRD_MODE, 1);
+		qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_PCP_BB_K2, prio << 1);
+	}
 }
 
 /* Update app protocol data and hw_info fields with the TLV info */
 static void
 qed_dcbx_update_app_info(struct qed_dcbx_results *p_data,
-			 struct qed_hwfn *p_hwfn,
-			 bool enable,
-			 u8 prio, u8 tc, enum dcbx_protocol_type type)
+			 struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+			 bool enable, u8 prio, u8 tc,
+			 enum dcbx_protocol_type type)
 {
-	struct qed_hw_info *p_info = &p_hwfn->hw_info;
 	enum qed_pci_personality personality;
 	enum dcbx_protocol_type id;
 	int i;
@@ -237,7 +240,7 @@ qed_dcbx_update_app_info(struct qed_dcbx_results *p_data,
 
 		personality = qed_dcbx_app_update[i].personality;
 
-		qed_dcbx_set_params(p_data, p_hwfn, p_info, enable,
+		qed_dcbx_set_params(p_data, p_hwfn, p_ptt, enable,
 				    prio, tc, type, personality);
 	}
 }
@@ -271,7 +274,7 @@ qed_dcbx_get_app_protocol_type(struct qed_hwfn *p_hwfn,
  * reconfiguring QM. Get protocol specific data for PF update ramrod command.
  */
 static int
-qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn,
+qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 		     struct qed_dcbx_results *p_data,
 		     struct dcbx_app_priority_entry *p_tbl,
 		     u32 pri_tc_tbl, int count, u8 dcbx_version)
@@ -315,7 +318,7 @@ qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn,
 				enable = true;
 			}
 
-			qed_dcbx_update_app_info(p_data, p_hwfn, enable,
+			qed_dcbx_update_app_info(p_data, p_hwfn, p_ptt, enable,
 						 priority, tc, type);
 		}
 	}
@@ -337,7 +340,7 @@ qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn,
 			continue;
 
 		enable = (type == DCBX_PROTOCOL_ETH) ? false : !!dcbx_version;
-		qed_dcbx_update_app_info(p_data, p_hwfn, enable,
+		qed_dcbx_update_app_info(p_data, p_hwfn, p_ptt, enable,
 					 priority, tc, type);
 	}
 
@@ -347,7 +350,8 @@ qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn,
 /* Parse app TLV's to update TC information in hw_info structure for
  * reconfiguring QM. Get protocol specific data for PF update ramrod command.
  */
-static int qed_dcbx_process_mib_info(struct qed_hwfn *p_hwfn)
+static int
+qed_dcbx_process_mib_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	struct dcbx_app_priority_feature *p_app;
 	struct dcbx_app_priority_entry *p_tbl;
@@ -371,7 +375,7 @@ static int qed_dcbx_process_mib_info(struct qed_hwfn *p_hwfn)
 	p_info = &p_hwfn->hw_info;
 	num_entries = QED_MFW_GET_FIELD(p_app->flags, DCBX_APP_NUM_ENTRIES);
 
-	rc = qed_dcbx_process_tlv(p_hwfn, &data, p_tbl, pri_tc_tbl,
+	rc = qed_dcbx_process_tlv(p_hwfn, p_ptt, &data, p_tbl, pri_tc_tbl,
 				  num_entries, dcbx_version);
 	if (rc)
 		return rc;
@@ -897,7 +901,7 @@ qed_dcbx_mib_update_event(struct qed_hwfn *p_hwfn,
 		return rc;
 
 	if (type == QED_DCBX_OPERATIONAL_MIB) {
-		rc = qed_dcbx_process_mib_info(p_hwfn);
+		rc = qed_dcbx_process_mib_info(p_hwfn, p_ptt);
 		if (!rc) {
 			/* reconfigure tcs of QM queues according
 			 * to negotiation results
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 5d37ec7e9b0b..58c7eb9d8e1b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -1581,13 +1581,29 @@ static void qed_mcp_update_stag(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 	p_hwfn->mcp_info->func_info.ovlan = (u16)shmem_info.ovlan_stag &
 						 FUNC_MF_CFG_OV_STAG_MASK;
 	p_hwfn->hw_info.ovlan = p_hwfn->mcp_info->func_info.ovlan;
-	if ((p_hwfn->hw_info.hw_mode & BIT(MODE_MF_SD)) &&
-	    (p_hwfn->hw_info.ovlan != QED_MCP_VLAN_UNSET)) {
-		qed_wr(p_hwfn, p_ptt,
-		       NIG_REG_LLH_FUNC_TAG_VALUE, p_hwfn->hw_info.ovlan);
+	if (test_bit(QED_MF_OVLAN_CLSS, &p_hwfn->cdev->mf_bits)) {
+		if (p_hwfn->hw_info.ovlan != QED_MCP_VLAN_UNSET) {
+			qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_FUNC_TAG_VALUE,
+			       p_hwfn->hw_info.ovlan);
+			qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_FUNC_TAG_EN, 1);
+
+			/* Configure DB to add external vlan to EDPM packets */
+			qed_wr(p_hwfn, p_ptt, DORQ_REG_TAG1_OVRD_MODE, 1);
+			qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_EXT_VID_BB_K2,
+			       p_hwfn->hw_info.ovlan);
+		} else {
+			qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_FUNC_TAG_EN, 0);
+			qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_FUNC_TAG_VALUE, 0);
+			qed_wr(p_hwfn, p_ptt, DORQ_REG_TAG1_OVRD_MODE, 0);
+			qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_EXT_VID_BB_K2, 0);
+		}
+
 		qed_sp_pf_update_stag(p_hwfn);
 	}
 
+	DP_VERBOSE(p_hwfn, QED_MSG_SP, "ovlan  = %d hw_mode = 0x%x\n",
+		   p_hwfn->mcp_info->func_info.ovlan, p_hwfn->hw_info.hw_mode);
+
 	/* Acknowledge the MFW */
 	qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_S_TAG_UPDATE_ACK, 0,
 		    &resp, &param);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
index f736f70956fd..2440970882c4 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
@@ -216,6 +216,12 @@
 	0x00c000UL
 #define  DORQ_REG_IFEN \
 	0x100040UL
+#define DORQ_REG_TAG1_OVRD_MODE \
+	0x1008b4UL
+#define DORQ_REG_PF_PCP_BB_K2 \
+	0x1008c4UL
+#define DORQ_REG_PF_EXT_VID_BB_K2 \
+	0x1008c8UL
 #define DORQ_REG_DB_DROP_REASON \
 	0x100a2cUL
 #define DORQ_REG_DB_DROP_DETAILS \

From 32bf94fb5c2ec4ec842152d0e5937cd4bb6738fa Mon Sep 17 00:00:00 2001
From: Sean Tranchetti <stranche@codeaurora.org>
Date: Wed, 19 Sep 2018 13:54:56 -0600
Subject: [PATCH 114/499] xfrm: validate template mode

XFRM mode parameters passed as part of the user templates
in the IP_XFRM_POLICY are never properly validated. Passing
values other than valid XFRM modes can cause stack-out-of-bounds
reads to occur later in the XFRM processing:

[  140.535608] ================================================================
[  140.543058] BUG: KASAN: stack-out-of-bounds in xfrm_state_find+0x17e4/0x1cc4
[  140.550306] Read of size 4 at addr ffffffc0238a7a58 by task repro/5148
[  140.557369]
[  140.558927] Call trace:
[  140.558936] dump_backtrace+0x0/0x388
[  140.558940] show_stack+0x24/0x30
[  140.558946] __dump_stack+0x24/0x2c
[  140.558949] dump_stack+0x8c/0xd0
[  140.558956] print_address_description+0x74/0x234
[  140.558960] kasan_report+0x240/0x264
[  140.558963] __asan_report_load4_noabort+0x2c/0x38
[  140.558967] xfrm_state_find+0x17e4/0x1cc4
[  140.558971] xfrm_resolve_and_create_bundle+0x40c/0x1fb8
[  140.558975] xfrm_lookup+0x238/0x1444
[  140.558977] xfrm_lookup_route+0x48/0x11c
[  140.558984] ip_route_output_flow+0x88/0xc4
[  140.558991] raw_sendmsg+0xa74/0x266c
[  140.558996] inet_sendmsg+0x258/0x3b0
[  140.559002] sock_sendmsg+0xbc/0xec
[  140.559005] SyS_sendto+0x3a8/0x5a8
[  140.559008] el0_svc_naked+0x34/0x38
[  140.559009]
[  140.592245] page dumped because: kasan: bad access detected
[  140.597981] page_owner info is not active (free page?)
[  140.603267]
[  140.653503] ================================================================

Signed-off-by: Sean Tranchetti <stranche@codeaurora.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_user.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 5151b3ebf068..d0672c400c2f 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1455,6 +1455,9 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
 		    (ut[i].family != prev_family))
 			return -EINVAL;
 
+		if (ut[i].mode >= XFRM_MODE_MAX)
+			return -EINVAL;
+
 		prev_family = ut[i].family;
 
 		switch (ut[i].family) {

From b3a5402cbcebaf5a9db4d6a3268070e4a099355d Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 18 Sep 2018 18:21:11 +0200
Subject: [PATCH 115/499] ALSA: hda: Fix the audio-component completion timeout

The timeout of audio component binding was incorrectly specified in
msec, not in jiffies, which results in way too shorter timeout than
expected.

Along with fixing it, add the information print about the binding
failure to show the unexpected situation more clearly.

Fixes: a57942bfdd61 ("ALSA: hda: Make audio component support more generic")
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/hda/hdac_i915.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c
index b5282cbbe489..617ff1aa818f 100644
--- a/sound/hda/hdac_i915.c
+++ b/sound/hda/hdac_i915.c
@@ -145,9 +145,11 @@ int snd_hdac_i915_init(struct hdac_bus *bus)
 	if (!acomp->ops) {
 		request_module("i915");
 		/* 10s timeout */
-		wait_for_completion_timeout(&bind_complete, 10 * 1000);
+		wait_for_completion_timeout(&bind_complete,
+					    msecs_to_jiffies(10 * 1000));
 	}
 	if (!acomp->ops) {
+		dev_info(bus->dev, "couldn't bind with audio component\n");
 		snd_hdac_acomp_exit(bus);
 		return -ENODEV;
 	}

From 783f3b4e9ec50491c21746e7e05ec6c39c21f563 Mon Sep 17 00:00:00 2001
From: Bin Liu <b-liu@ti.com>
Date: Mon, 17 Sep 2018 11:40:22 -0500
Subject: [PATCH 116/499] usb: musb: dsps: do not disable CPPI41 irq in driver
 teardown

TI AM335x CPPI 4.1 module uses a single register bit for CPPI interrupts
in both musb controllers. So disabling the CPPI irq in one musb driver
breaks the other musb module.

Since musb is already disabled before tearing down dma controller in
musb_remove(), it is safe to not disable CPPI irq in
musb_dma_controller_destroy().

Fixes: 255348289f71 ("usb: musb: dsps: Manage CPPI 4.1 DMA interrupt in DSPS")
Cc: stable@vger.kernel.org
Signed-off-by: Bin Liu <b-liu@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/musb_dsps.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c
index df827ff57b0d..23a0df79ef21 100644
--- a/drivers/usb/musb/musb_dsps.c
+++ b/drivers/usb/musb/musb_dsps.c
@@ -658,16 +658,6 @@ dsps_dma_controller_create(struct musb *musb, void __iomem *base)
 	return controller;
 }
 
-static void dsps_dma_controller_destroy(struct dma_controller *c)
-{
-	struct musb *musb = c->musb;
-	struct dsps_glue *glue = dev_get_drvdata(musb->controller->parent);
-	void __iomem *usbss_base = glue->usbss_base;
-
-	musb_writel(usbss_base, USBSS_IRQ_CLEARR, USBSS_IRQ_PD_COMP);
-	cppi41_dma_controller_destroy(c);
-}
-
 #ifdef CONFIG_PM_SLEEP
 static void dsps_dma_controller_suspend(struct dsps_glue *glue)
 {
@@ -697,7 +687,7 @@ static struct musb_platform_ops dsps_ops = {
 
 #ifdef CONFIG_USB_TI_CPPI41_DMA
 	.dma_init	= dsps_dma_controller_create,
-	.dma_exit	= dsps_dma_controller_destroy,
+	.dma_exit	= cppi41_dma_controller_destroy,
 #endif
 	.enable		= dsps_musb_enable,
 	.disable	= dsps_musb_disable,

From e871db8d78df1c411032cbb3acfdf8930509360e Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 11 Sep 2018 10:00:44 +0200
Subject: [PATCH 117/499] Revert "usb: cdc-wdm: Fix a sleep-in-atomic-context
 bug in service_outstanding_interrupt()"

This reverts commit 6e22e3af7bb3a7b9dc53cb4687659f6e63fca427.

The bug the patch describes to, has been already fixed in commit
2df6948428542 ("USB: cdc-wdm: don't enable interrupts in USB-giveback")
so need to this, revert it.

Fixes: 6e22e3af7bb3 ("usb: cdc-wdm: Fix a sleep-in-atomic-context bug in service_outstanding_interrupt()")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/cdc-wdm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index 656d247819c9..bec581fb7c63 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -460,7 +460,7 @@ static int service_outstanding_interrupt(struct wdm_device *desc)
 
 	set_bit(WDM_RESPONDING, &desc->flags);
 	spin_unlock_irq(&desc->iuspin);
-	rv = usb_submit_urb(desc->response, GFP_ATOMIC);
+	rv = usb_submit_urb(desc->response, GFP_KERNEL);
 	spin_lock_irq(&desc->iuspin);
 	if (rv) {
 		dev_err(&desc->intf->dev,

From 7a68d9fb851012829c29e770621905529bd9490b Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Wed, 5 Sep 2018 12:07:02 +0200
Subject: [PATCH 118/499] USB: usbdevfs: sanitize flags more

Requesting a ZERO_PACKET or not is sensible only for output.
In the input direction the device decides.
Likewise accepting short packets makes sense only for input.

This allows operation with panic_on_warn without opening up
a local DOS.

Signed-off-by: Oliver Neukum <oneukum@suse.com>
Reported-by: syzbot+843efa30c8821bd69f53@syzkaller.appspotmail.com
Fixes: 0cb54a3e47cb ("USB: debugging code shouldn't alter control flow")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/devio.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 6ce77b33da61..263dd2f309fb 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -1434,10 +1434,13 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
 	struct async *as = NULL;
 	struct usb_ctrlrequest *dr = NULL;
 	unsigned int u, totlen, isofrmlen;
-	int i, ret, is_in, num_sgs = 0, ifnum = -1;
+	int i, ret, num_sgs = 0, ifnum = -1;
 	int number_of_packets = 0;
 	unsigned int stream_id = 0;
 	void *buf;
+	bool is_in;
+	bool allow_short = false;
+	bool allow_zero = false;
 	unsigned long mask =	USBDEVFS_URB_SHORT_NOT_OK |
 				USBDEVFS_URB_BULK_CONTINUATION |
 				USBDEVFS_URB_NO_FSBR |
@@ -1471,6 +1474,8 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
 	u = 0;
 	switch (uurb->type) {
 	case USBDEVFS_URB_TYPE_CONTROL:
+		if (is_in)
+			allow_short = true;
 		if (!usb_endpoint_xfer_control(&ep->desc))
 			return -EINVAL;
 		/* min 8 byte setup packet */
@@ -1511,6 +1516,10 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
 		break;
 
 	case USBDEVFS_URB_TYPE_BULK:
+		if (!is_in)
+			allow_zero = true;
+		else
+			allow_short = true;
 		switch (usb_endpoint_type(&ep->desc)) {
 		case USB_ENDPOINT_XFER_CONTROL:
 		case USB_ENDPOINT_XFER_ISOC:
@@ -1531,6 +1540,10 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
 		if (!usb_endpoint_xfer_int(&ep->desc))
 			return -EINVAL;
  interrupt_urb:
+		if (!is_in)
+			allow_zero = true;
+		else
+			allow_short = true;
 		break;
 
 	case USBDEVFS_URB_TYPE_ISO:
@@ -1676,9 +1689,9 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
 	u = (is_in ? URB_DIR_IN : URB_DIR_OUT);
 	if (uurb->flags & USBDEVFS_URB_ISO_ASAP)
 		u |= URB_ISO_ASAP;
-	if (uurb->flags & USBDEVFS_URB_SHORT_NOT_OK && is_in)
+	if (allow_short && uurb->flags & USBDEVFS_URB_SHORT_NOT_OK)
 		u |= URB_SHORT_NOT_OK;
-	if (uurb->flags & USBDEVFS_URB_ZERO_PACKET)
+	if (allow_zero && uurb->flags & USBDEVFS_URB_ZERO_PACKET)
 		u |= URB_ZERO_PACKET;
 	if (uurb->flags & USBDEVFS_URB_NO_INTERRUPT)
 		u |= URB_NO_INTERRUPT;

From 81e0403b26d94360abd1f6a57311337973bc82cd Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Wed, 5 Sep 2018 12:07:03 +0200
Subject: [PATCH 119/499] USB: usbdevfs: restore warning for nonsensical flags

If we filter flags before they reach the core we need to generate our
own warnings.

Signed-off-by: Oliver Neukum <oneukum@suse.com>
Fixes: 0cb54a3e47cb ("USB: debugging code shouldn't alter control flow")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/devio.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 263dd2f309fb..244417d0dfd1 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -1697,6 +1697,11 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
 		u |= URB_NO_INTERRUPT;
 	as->urb->transfer_flags = u;
 
+	if (!allow_short && uurb->flags & USBDEVFS_URB_SHORT_NOT_OK)
+		dev_warn(&ps->dev->dev, "Requested nonsensical USBDEVFS_URB_SHORT_NOT_OK.\n");
+	if (!allow_zero && uurb->flags & USBDEVFS_URB_ZERO_PACKET)
+		dev_warn(&ps->dev->dev, "Requested nonsensical USBDEVFS_URB_ZERO_PACKET.\n");
+
 	as->urb->transfer_buffer_length = uurb->buffer_length;
 	as->urb->setup_packet = (unsigned char *)dr;
 	dr = NULL;

From c183813fcee44a249339b7c46e1ad271ca1870aa Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 10 Sep 2018 13:58:51 -0400
Subject: [PATCH 120/499] USB: remove LPM management from
 usb_driver_claim_interface()

usb_driver_claim_interface() disables and re-enables Link Power
Management, but it shouldn't do either one, for the reasons listed
below.  This patch removes the two LPM-related function calls from the
routine.

The reason for disabling LPM in the analogous function
usb_probe_interface() is so that drivers won't have to deal with
unwanted LPM transitions in their probe routine.  But
usb_driver_claim_interface() doesn't call the driver's probe routine
(or any other callbacks), so that reason doesn't apply here.

Furthermore, no driver other than usbfs will ever call
usb_driver_claim_interface() unless it is already bound to another
interface in the same device, which means disabling LPM here would be
redundant.  usbfs doesn't interact with LPM at all.

Lastly, the error return from usb_unlocked_disable_lpm() isn't handled
properly; the code doesn't clean up its earlier actions before
returning.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Fixes: 8306095fd2c1 ("USB: Disable USB 3.0 LPM in critical sections.")
CC: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/driver.c | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index e76e95f62f76..7652dcb57998 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -512,7 +512,6 @@ int usb_driver_claim_interface(struct usb_driver *driver,
 	struct device *dev;
 	struct usb_device *udev;
 	int retval = 0;
-	int lpm_disable_error = -ENODEV;
 
 	if (!iface)
 		return -ENODEV;
@@ -533,16 +532,6 @@ int usb_driver_claim_interface(struct usb_driver *driver,
 
 	iface->condition = USB_INTERFACE_BOUND;
 
-	/* See the comment about disabling LPM in usb_probe_interface(). */
-	if (driver->disable_hub_initiated_lpm) {
-		lpm_disable_error = usb_unlocked_disable_lpm(udev);
-		if (lpm_disable_error) {
-			dev_err(&iface->dev, "%s Failed to disable LPM for driver %s\n",
-				__func__, driver->name);
-			return -ENOMEM;
-		}
-	}
-
 	/* Claimed interfaces are initially inactive (suspended) and
 	 * runtime-PM-enabled, but only if the driver has autosuspend
 	 * support.  Otherwise they are marked active, to prevent the
@@ -561,10 +550,6 @@ int usb_driver_claim_interface(struct usb_driver *driver,
 	if (device_is_registered(dev))
 		retval = device_bind_driver(dev);
 
-	/* Attempt to re-enable USB3 LPM, if the disable was successful. */
-	if (!lpm_disable_error)
-		usb_unlocked_enable_lpm(udev);
-
 	return retval;
 }
 EXPORT_SYMBOL_GPL(usb_driver_claim_interface);

From bd729f9d67aa9a303d8925bb8c4f06af25f407d1 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 10 Sep 2018 13:59:59 -0400
Subject: [PATCH 121/499] USB: fix error handling in
 usb_driver_claim_interface()

The syzbot fuzzing project found a use-after-free bug in the USB
core.  The bug was caused by usbfs not unbinding from an interface
when the USB device file was closed, which led another process to
attempt the unbind later on, after the private data structure had been
deallocated.

The reason usbfs did not unbind the interface at the appropriate time
was because it thought the interface had never been claimed in the
first place.  This was caused by the fact that
usb_driver_claim_interface() does not clean up properly when
device_bind_driver() returns an error.  Although the error code gets
passed back to the caller, the iface->dev.driver pointer remains set
and iface->condition remains equal to USB_INTERFACE_BOUND.

This patch adds proper error handling to usb_driver_claim_interface().

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-by: syzbot+f84aa7209ccec829536f@syzkaller.appspotmail.com
CC: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/driver.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 7652dcb57998..a1f225f077cd 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -550,6 +550,21 @@ int usb_driver_claim_interface(struct usb_driver *driver,
 	if (device_is_registered(dev))
 		retval = device_bind_driver(dev);
 
+	if (retval) {
+		dev->driver = NULL;
+		usb_set_intfdata(iface, NULL);
+		iface->needs_remote_wakeup = 0;
+		iface->condition = USB_INTERFACE_UNBOUND;
+
+		/*
+		 * Unbound interfaces are always runtime-PM-disabled
+		 * and runtime-PM-suspended
+		 */
+		if (driver->supports_autosuspend)
+			pm_runtime_disable(dev);
+		pm_runtime_set_suspended(dev);
+	}
+
 	return retval;
 }
 EXPORT_SYMBOL_GPL(usb_driver_claim_interface);

From c9a4cb204e9eb7fa7dfbe3f7d3a674fa530aa193 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 10 Sep 2018 14:00:53 -0400
Subject: [PATCH 122/499] USB: handle NULL config in usb_find_alt_setting()

usb_find_alt_setting() takes a pointer to a struct usb_host_config as
an argument; it searches for an interface with specified interface and
alternate setting numbers in that config.  However, it crashes if the
usb_host_config pointer argument is NULL.

Since this is a general-purpose routine, available for use in many
places, we want to to be more robust.  This patch makes it return NULL
whenever the config argument is NULL.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-by: syzbot+19c3aaef85a89d451eac@syzkaller.appspotmail.com
CC: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/usb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 623be3174fb3..79d8bd7a612e 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -228,6 +228,8 @@ struct usb_host_interface *usb_find_alt_setting(
 	struct usb_interface_cache *intf_cache = NULL;
 	int i;
 
+	if (!config)
+		return NULL;
 	for (i = 0; i < config->desc.bNumInterfaces; i++) {
 		if (config->intf_cache[i]->altsetting[0].desc.bInterfaceNumber
 				== iface_num) {

From 85682a7e3b9c664995ad477520f917039afdc330 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Mon, 10 Sep 2018 06:09:04 +0000
Subject: [PATCH 123/499] powerpc: fix csum_ipv6_magic() on little endian
 platforms

On little endian platforms, csum_ipv6_magic() keeps len and proto in
CPU byte order. This generates a bad results leading to ICMPv6 packets
from other hosts being dropped by powerpc64le platforms.

In order to fix this, len and proto should be converted to network
byte order ie bigendian byte order. However checksumming 0x12345678
and 0x56341278 provide the exact same result so it is enough to
rotate the sum of len and proto by 1 byte.

PPC32 only support bigendian so the fix is needed for PPC64 only

Fixes: e9c4943a107b ("powerpc: Implement csum_ipv6_magic in assembly")
Reported-by: Jianlin Shi <jishi@redhat.com>
Reported-by: Xin Long <lucien.xin@gmail.com>
Cc: <stable@vger.kernel.org> # 4.18+
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Tested-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/lib/checksum_64.S | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
index 886ed94b9c13..d05c8af4ac51 100644
--- a/arch/powerpc/lib/checksum_64.S
+++ b/arch/powerpc/lib/checksum_64.S
@@ -443,6 +443,9 @@ _GLOBAL(csum_ipv6_magic)
 	addc	r0, r8, r9
 	ld	r10, 0(r4)
 	ld	r11, 8(r4)
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+	rotldi	r5, r5, 8
+#endif
 	adde	r0, r0, r10
 	add	r5, r5, r7
 	adde	r0, r0, r11

From 5c54fcac9a9de559b444ac63ec3cd82f1d157a0b Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 19 Sep 2018 10:58:05 +0300
Subject: [PATCH 124/499] usb: roles: Take care of driver module reference
 counting

This fixes potential "BUG: unable to handle kernel paging
request at ..." from happening.

Fixes: fde0aa6c175a ("usb: common: Small class for USB role switches")
Cc: <stable@vger.kernel.org>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/common/roles.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/common/roles.c b/drivers/usb/common/roles.c
index 15cc76e22123..99116af07f1d 100644
--- a/drivers/usb/common/roles.c
+++ b/drivers/usb/common/roles.c
@@ -109,8 +109,15 @@ static void *usb_role_switch_match(struct device_connection *con, int ep,
  */
 struct usb_role_switch *usb_role_switch_get(struct device *dev)
 {
-	return device_connection_find_match(dev, "usb-role-switch", NULL,
-					    usb_role_switch_match);
+	struct usb_role_switch *sw;
+
+	sw = device_connection_find_match(dev, "usb-role-switch", NULL,
+					  usb_role_switch_match);
+
+	if (!IS_ERR_OR_NULL(sw))
+		WARN_ON(!try_module_get(sw->dev.parent->driver->owner));
+
+	return sw;
 }
 EXPORT_SYMBOL_GPL(usb_role_switch_get);
 
@@ -122,8 +129,10 @@ EXPORT_SYMBOL_GPL(usb_role_switch_get);
  */
 void usb_role_switch_put(struct usb_role_switch *sw)
 {
-	if (!IS_ERR_OR_NULL(sw))
+	if (!IS_ERR_OR_NULL(sw)) {
 		put_device(&sw->dev);
+		module_put(sw->dev.parent->driver->owner);
+	}
 }
 EXPORT_SYMBOL_GPL(usb_role_switch_put);
 

From 16c4cb19fa85c648a803752eb63cac0ef69231c2 Mon Sep 17 00:00:00 2001
From: Harry Pan <harry.pan@intel.com>
Date: Fri, 14 Sep 2018 16:58:16 +0800
Subject: [PATCH 125/499] usb: core: safely deal with the dynamic quirk lists

Applying dynamic usbcore quirks in early booting when the slab is
not yet ready would cause kernel panic of null pointer dereference
because the quirk_count has been counted as 1 while the quirk_list
was failed to allocate.

i.e.,
[    1.044970] BUG: unable to handle kernel NULL pointer dereference at           (null)
[    1.044995] IP: [<ffffffffb0953ec7>] usb_detect_quirks+0x88/0xd1
[    1.045016] PGD 0
[    1.045026] Oops: 0000 [#1] PREEMPT SMP
[    1.046986] gsmi: Log Shutdown Reason 0x03
[    1.046995] Modules linked in:
[    1.047008] CPU: 0 PID: 81 Comm: kworker/0:3 Not tainted 4.4.154 #28
[    1.047016] Hardware name: Google Coral/Coral, BIOS Google_Coral.10068.27.0 12/04/2017
[    1.047028] Workqueue: usb_hub_wq hub_event
[    1.047037] task: ffff88017a321c80 task.stack: ffff88017a384000
[    1.047044] RIP: 0010:[<ffffffffb0953ec7>]  [<ffffffffb0953ec7>] usb_detect_quirks+0x88/0xd1

To tackle this odd, let's balance the quirk_count to 0 when the kcalloc
call fails, and defer the quirk setting into a lower level callback
which ensures that the kernel memory management has been initialized.

Fixes: 027bd6cafd9a ("usb: core: Add "quirks" parameter for usbcore")
Signed-off-by: Harry Pan <harry.pan@intel.com>
Acked-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/quirks.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index e77dfe5ed5ec..178d6c6063c0 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -58,6 +58,7 @@ static int quirks_param_set(const char *val, const struct kernel_param *kp)
 	quirk_list = kcalloc(quirk_count, sizeof(struct quirk_entry),
 			     GFP_KERNEL);
 	if (!quirk_list) {
+		quirk_count = 0;
 		mutex_unlock(&quirk_mutex);
 		return -ENOMEM;
 	}
@@ -154,7 +155,7 @@ static struct kparam_string quirks_param_string = {
 	.string = quirks_param,
 };
 
-module_param_cb(quirks, &quirks_param_ops, &quirks_param_string, 0644);
+device_param_cb(quirks, &quirks_param_ops, &quirks_param_string, 0644);
 MODULE_PARM_DESC(quirks, "Add/modify USB quirks by specifying quirks=vendorID:productID:quirks");
 
 /* Lists of quirky USB devices, split in device quirks and interface quirks.

From 3e3b81965cbfa01fda6d77750feedc3c46fc28d0 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 19 Sep 2018 10:58:04 +0300
Subject: [PATCH 126/499] usb: typec: mux: Take care of driver module reference
 counting

Functions typec_mux_get() and typec_switch_get() already
make sure that the mux device reference count is
incremented, but the same must be done to the driver module
as well to prevent the drivers from being unloaded in the
middle of operation.

This fixes a potential "BUG: unable to handle kernel paging
request at ..." from happening.

Fixes: 93dd2112c7b2 ("usb: typec: mux: Get the mux identifier from function parameter")
Acked-by: Hans de Goede <hdegoede@redhat.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/mux.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/typec/mux.c b/drivers/usb/typec/mux.c
index ddaac63ecf12..d990aa510fab 100644
--- a/drivers/usb/typec/mux.c
+++ b/drivers/usb/typec/mux.c
@@ -9,6 +9,7 @@
 
 #include <linux/device.h>
 #include <linux/list.h>
+#include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/usb/typec_mux.h>
 
@@ -49,8 +50,10 @@ struct typec_switch *typec_switch_get(struct device *dev)
 	mutex_lock(&switch_lock);
 	sw = device_connection_find_match(dev, "typec-switch", NULL,
 					  typec_switch_match);
-	if (!IS_ERR_OR_NULL(sw))
+	if (!IS_ERR_OR_NULL(sw)) {
+		WARN_ON(!try_module_get(sw->dev->driver->owner));
 		get_device(sw->dev);
+	}
 	mutex_unlock(&switch_lock);
 
 	return sw;
@@ -65,8 +68,10 @@ EXPORT_SYMBOL_GPL(typec_switch_get);
  */
 void typec_switch_put(struct typec_switch *sw)
 {
-	if (!IS_ERR_OR_NULL(sw))
+	if (!IS_ERR_OR_NULL(sw)) {
+		module_put(sw->dev->driver->owner);
 		put_device(sw->dev);
+	}
 }
 EXPORT_SYMBOL_GPL(typec_switch_put);
 
@@ -136,8 +141,10 @@ struct typec_mux *typec_mux_get(struct device *dev, const char *name)
 
 	mutex_lock(&mux_lock);
 	mux = device_connection_find_match(dev, name, NULL, typec_mux_match);
-	if (!IS_ERR_OR_NULL(mux))
+	if (!IS_ERR_OR_NULL(mux)) {
+		WARN_ON(!try_module_get(mux->dev->driver->owner));
 		get_device(mux->dev);
+	}
 	mutex_unlock(&mux_lock);
 
 	return mux;
@@ -152,8 +159,10 @@ EXPORT_SYMBOL_GPL(typec_mux_get);
  */
 void typec_mux_put(struct typec_mux *mux)
 {
-	if (!IS_ERR_OR_NULL(mux))
+	if (!IS_ERR_OR_NULL(mux)) {
+		module_put(mux->dev->driver->owner);
 		put_device(mux->dev);
+	}
 }
 EXPORT_SYMBOL_GPL(typec_mux_put);
 

From 2823c8716c687d6c7e261a3a02b3cab43809fe9c Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Mon, 27 Aug 2018 10:34:07 -0500
Subject: [PATCH 127/499] b43: fix DMA error related regression with
 proprietary firmware

In commit 66cffd6daab7 ("b43: fix transmit failure when VT is switched"),
a condition is noted where the network controller needs to be reset. Note
that this situation happens when running the open-source firmware
(http://netweb.ing.unibs.it/~openfwwf/), plus a number of other special
conditions.

for a different card model, it is reported that this change breaks
operation running the proprietary firmware
(https://marc.info/?l=linux-wireless&m=153504546924558&w=2). Rather
than reverting the previous patch, the code is tweaked to avoid the
reset unless the open-source firmware is being used.

Fixes: 66cffd6daab7 ("b43: fix transmit failure when VT is switched")
Cc: Stable <stable@vger.kernel.org> # 4.18+
Cc: Taketo Kabe <kabe@sra-tohoku.co.jp>
Reported-and-tested-by: D. Prabhu <d.praabhu@gmail.com>
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/broadcom/b43/dma.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/broadcom/b43/dma.c b/drivers/net/wireless/broadcom/b43/dma.c
index 6b0e1ec346cb..d46d57b989ae 100644
--- a/drivers/net/wireless/broadcom/b43/dma.c
+++ b/drivers/net/wireless/broadcom/b43/dma.c
@@ -1518,13 +1518,15 @@ void b43_dma_handle_txstatus(struct b43_wldev *dev,
 			}
 		} else {
 			/* More than a single header/data pair were missed.
-			 * Report this error, and reset the controller to
+			 * Report this error. If running with open-source
+			 * firmware, then reset the controller to
 			 * revive operation.
 			 */
 			b43dbg(dev->wl,
 			       "Out of order TX status report on DMA ring %d. Expected %d, but got %d\n",
 			       ring->index, firstused, slot);
-			b43_controller_restart(dev, "Out of order TX");
+			if (dev->fw.opensource)
+				b43_controller_restart(dev, "Out of order TX");
 			return;
 		}
 	}

From c716a25b9b70084e1144f77423f5aedd772ea478 Mon Sep 17 00:00:00 2001
From: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Date: Thu, 20 Sep 2018 01:38:58 -0300
Subject: [PATCH 128/499] powerpc/pkeys: Fix reading of ibm,
 processor-storage-keys property

scan_pkey_feature() uses of_property_read_u32_array() to read the
ibm,processor-storage-keys property and calls be32_to_cpu() on the
value it gets. The problem is that of_property_read_u32_array() already
returns the value converted to the CPU byte order.

The value of pkeys_total ends up more or less sane because there's a min()
call in pkey_initialize() which reduces pkeys_total to 32. So in practice
the kernel ignores the fact that the hypervisor reserved one key for
itself (the device tree advertises 31 keys in my test VM).

This is wrong, but the effect in practice is that when a process tries to
allocate the 32nd key, it gets an -EINVAL error instead of -ENOSPC which
would indicate that there aren't any keys available

Fixes: cf43d3b26452 ("powerpc: Enable pkey subsystem")
Cc: stable@vger.kernel.org # v4.16+
Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/pkeys.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c
index 333b1f80c435..b271b283c785 100644
--- a/arch/powerpc/mm/pkeys.c
+++ b/arch/powerpc/mm/pkeys.c
@@ -45,7 +45,7 @@ static void scan_pkey_feature(void)
 	 * Since any pkey can be used for data or execute, we will just treat
 	 * all keys as equal and track them as one entity.
 	 */
-	pkeys_total = be32_to_cpu(vals[0]);
+	pkeys_total = vals[0];
 	pkeys_devtree_defined = true;
 }
 

From 7e620984b62532783912312e334f3c48cdacbd5d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 20 Sep 2018 14:11:17 +0200
Subject: [PATCH 129/499] serial: imx: restore handshaking irq for imx1
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Back in 2015 when irda was dropped from the driver imx1 was broken. This
change reintroduces the support for the third interrupt of the UART.

Fixes: afe9cbb1a6ad ("serial: imx: drop support for IRDA")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Leonard Crestez <leonard.crestez@nxp.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/imx.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index 239c0fa2e981..0f67197a3783 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -2351,6 +2351,14 @@ static int imx_uart_probe(struct platform_device *pdev)
 				ret);
 			return ret;
 		}
+
+		ret = devm_request_irq(&pdev->dev, rtsirq, imx_uart_rtsint, 0,
+				       dev_name(&pdev->dev), sport);
+		if (ret) {
+			dev_err(&pdev->dev, "failed to request rts irq: %d\n",
+				ret);
+			return ret;
+		}
 	} else {
 		ret = devm_request_irq(&pdev->dev, rxirq, imx_uart_int, 0,
 				       dev_name(&pdev->dev), sport);

From f9d5b1d50840320fb6c66fafd1a532a995fc3758 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Thu, 20 Sep 2018 09:31:45 +0300
Subject: [PATCH 130/499] mlxsw: spectrum: Bump required firmware version

MC-aware mode was introduced to mlxsw in commit 7b8195306694 ("mlxsw: spectrum:
Configure MC-aware mode on mlxsw ports") and fixed up later in commit
3a3539cd3632 ("mlxsw: spectrum_buffers: Set up a dedicated pool for BUM
traffic"). As the final piece of puzzle, a firmware issue whereby a wrong
priority was assigned to BUM traffic was corrected in FW version 13.1703.4.
Therefore require this FW version in the driver.

Fixes: 7b8195306694 ("mlxsw: spectrum: Configure MC-aware mode on mlxsw ports")
Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 930700413b1d..b492152c8881 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -44,8 +44,8 @@
 #define MLXSW_SP_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100)
 
 #define MLXSW_SP1_FWREV_MAJOR 13
-#define MLXSW_SP1_FWREV_MINOR 1702
-#define MLXSW_SP1_FWREV_SUBMINOR 6
+#define MLXSW_SP1_FWREV_MINOR 1703
+#define MLXSW_SP1_FWREV_SUBMINOR 4
 #define MLXSW_SP1_FWREV_CAN_RESET_MINOR 1702
 
 static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = {

From 56ce3c5a50f4d8cc95361b1ec7f152006c6320d8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 20 Sep 2018 09:27:30 +0200
Subject: [PATCH 131/499] smc: generic netlink family should be __ro_after_init

The generic netlink family is only initialized during module init,
so it should be __ro_after_init like all other generic netlink
families.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_pnet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 01c6ce042a1c..7cb3e4f07c10 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -461,7 +461,7 @@ static const struct genl_ops smc_pnet_ops[] = {
 };
 
 /* SMC_PNETID family definition */
-static struct genl_family smc_pnet_nl_family = {
+static struct genl_family smc_pnet_nl_family __ro_after_init = {
 	.hdrsize = 0,
 	.name = SMCR_GENL_FAMILY_NAME,
 	.version = SMCR_GENL_FAMILY_VERSION,

From fb6de923ca3358a91525552b4907d4cb38730bdd Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Wed, 19 Sep 2018 15:30:51 -0600
Subject: [PATCH 132/499] regulator: fix crash caused by null driver data

dev_set_drvdata() needs to be called before device_register()
exposes device to userspace. Otherwise kernel crashes after it
gets null pointer from dev_get_drvdata() when userspace tries
to access sysfs entries.

[Removed backtrace for length -- broonie]

Signed-off-by: Yu Zhao <yuzhao@google.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org
---
 drivers/regulator/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 90215f57270f..9577d8941846 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -4395,13 +4395,13 @@ regulator_register(const struct regulator_desc *regulator_desc,
 	    !rdev->desc->fixed_uV)
 		rdev->is_switch = true;
 
+	dev_set_drvdata(&rdev->dev, rdev);
 	ret = device_register(&rdev->dev);
 	if (ret != 0) {
 		put_device(&rdev->dev);
 		goto unset_supplies;
 	}
 
-	dev_set_drvdata(&rdev->dev, rdev);
 	rdev_init_debugfs(rdev);
 
 	/* try to resolve regulators supply since a new one was registered */

From d98627d1360d55e3b28f702caca8b6342c4a4e45 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sun, 27 May 2018 22:42:55 +0100
Subject: [PATCH 133/499] drm/i2c: tda9950: fix timeout counter check

Currently the check to see if the timeout has reached zero is incorrect
and the check is instead checking if the timeout is non-zero and not
zero, hence it will break out of the loop on the first iteration and
the msleep is never executed.  Fix this by breaking from the loop when
timeout is zero.

Detected by CoverityScan, CID#1469404 ("Logically Dead Code")

Fixes: f0316f93897c ("drm/i2c: tda9950: add CEC driver")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 drivers/gpu/drm/i2c/tda9950.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i2c/tda9950.c b/drivers/gpu/drm/i2c/tda9950.c
index 3f7396caad48..f2186409f0cf 100644
--- a/drivers/gpu/drm/i2c/tda9950.c
+++ b/drivers/gpu/drm/i2c/tda9950.c
@@ -307,7 +307,7 @@ static void tda9950_release(struct tda9950_priv *priv)
 	/* Wait up to .5s for it to signal non-busy */
 	do {
 		csr = tda9950_read(client, REG_CSR);
-		if (!(csr & CSR_BUSY) || --timeout)
+		if (!(csr & CSR_BUSY) || !--timeout)
 			break;
 		msleep(10);
 	} while (1);

From e0dccce1193f87597548d0db6ecc942fb92c04cd Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Mon, 27 Aug 2018 14:28:50 +0200
Subject: [PATCH 134/499] drm/i2c: tda9950: set MAX_RETRIES for errors only

The CEC_TX_STATUS_MAX_RETRIES should be set for errors only to
prevent the CEC framework from retrying the transmit. If the
transmit was successful, then don't set this flag.

Found by running 'cec-compliance -A' on a beaglebone box.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 drivers/gpu/drm/i2c/tda9950.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i2c/tda9950.c b/drivers/gpu/drm/i2c/tda9950.c
index f2186409f0cf..ccd355d0c123 100644
--- a/drivers/gpu/drm/i2c/tda9950.c
+++ b/drivers/gpu/drm/i2c/tda9950.c
@@ -188,7 +188,8 @@ static irqreturn_t tda9950_irq(int irq, void *data)
 			break;
 		}
 		/* TDA9950 executes all retries for us */
-		tx_status |= CEC_TX_STATUS_MAX_RETRIES;
+		if (tx_status != CEC_TX_STATUS_OK)
+			tx_status |= CEC_TX_STATUS_MAX_RETRIES;
 		cec_transmit_done(priv->adap, tx_status, arb_lost_cnt,
 				  nack_cnt, 0, err_cnt);
 		break;

From a173f066c7cfc031acb8f541708041e009fc9812 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 17 Sep 2018 08:20:36 -0700
Subject: [PATCH 135/499] netfilter: bridge: Don't sabotage nf_hook calls from
 an l3mdev

For starters, the bridge netfilter code registers operations that
are invoked any time nh_hook is called. Specifically, ip_sabotage_in
watches for nested calls for NF_INET_PRE_ROUTING when a bridge is in
the stack.

Packet wise, the bridge netfilter hook runs first. br_nf_pre_routing
allocates nf_bridge, sets in_prerouting to 1 and calls NF_HOOK for
NF_INET_PRE_ROUTING. It's finish function, br_nf_pre_routing_finish,
then resets in_prerouting flag to 0 and the packet continues up the
stack. The packet eventually makes it to the VRF driver and it invokes
nf_hook for NF_INET_PRE_ROUTING in case any rules have been added against
the vrf device.

Because of the registered operations the call to nf_hook causes
ip_sabotage_in to be invoked. That function sees the nf_bridge on the
skb and that in_prerouting is not set. Thinking it is an invalid nested
call it steals (drops) the packet.

Update ip_sabotage_in to recognize that the bridge or one of its upper
devices (e.g., vlan) can be enslaved to a VRF (L3 master device) and
allow the packet to go through the nf_hook a second time.

Fixes: 73e20b761acf ("net: vrf: Add support for PREROUTING rules on vrf device")
Reported-by: D'Souza, Nelson <ndsouza@ciena.com>
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/br_netfilter_hooks.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 6e0dc6bcd32a..37278dc280eb 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -835,7 +835,8 @@ static unsigned int ip_sabotage_in(void *priv,
 				   struct sk_buff *skb,
 				   const struct nf_hook_state *state)
 {
-	if (skb->nf_bridge && !skb->nf_bridge->in_prerouting) {
+	if (skb->nf_bridge && !skb->nf_bridge->in_prerouting &&
+	    !netif_is_l3_master(skb->dev)) {
 		state->okfn(state->net, state->sk, skb);
 		return NF_STOLEN;
 	}

From bab4344975fe2c719eda32de59298d6de26fe126 Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Mon, 17 Sep 2018 22:21:36 -0700
Subject: [PATCH 136/499] netfilter: nft_osf: use enum nft_data_types for
 nft_validate_register_store

The function nft_validate_register_store requires a struct of type
struct nft_data_types. NFTA_DATA_VALUE is of type enum
nft_verdict_attributes. Pass the correct enum type.

This fixes a warning seen with Clang:
  net/netfilter/nft_osf.c:52:8: warning: implicit conversion from
    enumeration type 'enum nft_data_attributes' to different enumeration
    type 'enum nft_data_types' [-Wenum-conversion]
                                          NFTA_DATA_VALUE, NFT_OSF_MAXGENRELEN);
                                          ^~~~~~~~~~~~~~~

Fixes: b96af92d6eaf ("netfilter: nf_tables: implement Passive OS fingerprint module in nft_osf")
Link: https://github.com/ClangBuiltLinux/linux/issues/103
Signed-off-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_osf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index 5af74b37f423..a35fb59ace73 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -49,7 +49,7 @@ static int nft_osf_init(const struct nft_ctx *ctx,
 
 	priv->dreg = nft_parse_register(tb[NFTA_OSF_DREG]);
 	err = nft_validate_register_store(ctx, priv->dreg, NULL,
-					  NFTA_DATA_VALUE, NFT_OSF_MAXGENRELEN);
+					  NFT_DATA_VALUE, NFT_OSF_MAXGENRELEN);
 	if (err < 0)
 		return err;
 

From 346fa83d10934cf206e2fd0f514bf8ce186f08fe Mon Sep 17 00:00:00 2001
From: zhong jiang <zhongjiang@huawei.com>
Date: Wed, 19 Sep 2018 20:21:11 +0800
Subject: [PATCH 137/499] netfilter: conntrack: get rid of double sizeof

sizeof(sizeof()) is quite strange and does not seem to be what
is wanted here.

The issue is detected with the help of Coccinelle.

Fixes: 39215846740a ("netfilter: conntrack: remove nlattr_size pointer from l4proto trackers")
Signed-off-by: zhong jiang <zhongjiang@huawei.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_proto_tcp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index b4bdf9eda7b7..247b89784a6f 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1213,8 +1213,8 @@ static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
 #define TCP_NLATTR_SIZE	( \
 	NLA_ALIGN(NLA_HDRLEN + 1) + \
 	NLA_ALIGN(NLA_HDRLEN + 1) + \
-	NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags))) + \
-	NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags))))
+	NLA_ALIGN(NLA_HDRLEN + sizeof(struct nf_ct_tcp_flags)) + \
+	NLA_ALIGN(NLA_HDRLEN + sizeof(struct nf_ct_tcp_flags)))
 
 static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
 {

From 8c6ec3613e7b0aade20a3196169c0bab32ed3e3f Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Wed, 19 Sep 2018 19:01:37 +0200
Subject: [PATCH 138/499] bnxt_en: don't try to offload VLAN 'modify' action

bnxt offload code currently supports only 'push' and 'pop' operation: let
.ndo_setup_tc() return -EOPNOTSUPP if VLAN 'modify' action is configured.

Fixes: 2ae7408fedfe ("bnxt_en: bnxt: add TC flower filter offload support")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Acked-by: Sathya Perla <sathya.perla@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index 092c817f8f11..e1594c9df4c6 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -75,17 +75,23 @@ static int bnxt_tc_parse_redir(struct bnxt *bp,
 	return 0;
 }
 
-static void bnxt_tc_parse_vlan(struct bnxt *bp,
-			       struct bnxt_tc_actions *actions,
-			       const struct tc_action *tc_act)
+static int bnxt_tc_parse_vlan(struct bnxt *bp,
+			      struct bnxt_tc_actions *actions,
+			      const struct tc_action *tc_act)
 {
-	if (tcf_vlan_action(tc_act) == TCA_VLAN_ACT_POP) {
+	switch (tcf_vlan_action(tc_act)) {
+	case TCA_VLAN_ACT_POP:
 		actions->flags |= BNXT_TC_ACTION_FLAG_POP_VLAN;
-	} else if (tcf_vlan_action(tc_act) == TCA_VLAN_ACT_PUSH) {
+		break;
+	case TCA_VLAN_ACT_PUSH:
 		actions->flags |= BNXT_TC_ACTION_FLAG_PUSH_VLAN;
 		actions->push_vlan_tci = htons(tcf_vlan_push_vid(tc_act));
 		actions->push_vlan_tpid = tcf_vlan_push_proto(tc_act);
+		break;
+	default:
+		return -EOPNOTSUPP;
 	}
+	return 0;
 }
 
 static int bnxt_tc_parse_tunnel_set(struct bnxt *bp,
@@ -134,7 +140,9 @@ static int bnxt_tc_parse_actions(struct bnxt *bp,
 
 		/* Push/pop VLAN */
 		if (is_tcf_vlan(tc_act)) {
-			bnxt_tc_parse_vlan(bp, actions, tc_act);
+			rc = bnxt_tc_parse_vlan(bp, actions, tc_act);
+			if (rc)
+				return rc;
 			continue;
 		}
 

From d7ab5cdce54da631f0c8c11e506c974536a3581e Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 20 Sep 2018 17:27:28 +0800
Subject: [PATCH 139/499] sctp: update dst pmtu with the correct daddr

When processing pmtu update from an icmp packet, it calls .update_pmtu
with sk instead of skb in sctp_transport_update_pmtu.

However for sctp, the daddr in the transport might be different from
inet_sock->inet_daddr or sk->sk_v6_daddr, which is used to update or
create the route cache. The incorrect daddr will cause a different
route cache created for the path.

So before calling .update_pmtu, inet_sock->inet_daddr/sk->sk_v6_daddr
should be updated with the daddr in the transport, and update it back
after it's done.

The issue has existed since route exceptions introduction.

Fixes: 4895c771c7f0 ("ipv4: Add FIB nexthop exceptions.")
Reported-by: ian.periam@dialogic.com
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/transport.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 12cac85da994..033696e6f74f 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -260,6 +260,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
 bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
 {
 	struct dst_entry *dst = sctp_transport_dst_check(t);
+	struct sock *sk = t->asoc->base.sk;
 	bool change = true;
 
 	if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
@@ -271,12 +272,19 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
 	pmtu = SCTP_TRUNC4(pmtu);
 
 	if (dst) {
-		dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu);
+		struct sctp_pf *pf = sctp_get_pf_specific(dst->ops->family);
+		union sctp_addr addr;
+
+		pf->af->from_sk(&addr, sk);
+		pf->to_sk_daddr(&t->ipaddr, sk);
+		dst->ops->update_pmtu(dst, sk, NULL, pmtu);
+		pf->to_sk_daddr(&addr, sk);
+
 		dst = sctp_transport_dst_check(t);
 	}
 
 	if (!dst) {
-		t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk);
+		t->af_specific->get_dst(t, &t->saddr, &t->fl, sk);
 		dst = t->dst;
 	}
 

From 60489f085574157c343fc62a32f997fe7346a659 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 20 Sep 2018 09:31:10 +0200
Subject: [PATCH 140/499] spi: spi-mem: Add missing description for data.nbytes
 field

Add a description for spi_mem_op.data.nbytes to the kerneldoc header.

Fixes: c36ff266dc82 ("spi: Extend the core to ease integration of SPI memory controllers")
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi-mem.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h
index 62722fb7472d..0cfbb1ad8d96 100644
--- a/include/linux/spi/spi-mem.h
+++ b/include/linux/spi/spi-mem.h
@@ -79,6 +79,8 @@ enum spi_mem_data_dir {
  * @dummy.buswidth: number of IO lanes used to transmit the dummy bytes
  * @data.buswidth: number of IO lanes used to send/receive the data
  * @data.dir: direction of the transfer
+ * @data.nbytes: number of data bytes to send/receive. Can be zero if the
+ *		 operation does not involve transferring data
  * @data.buf.in: input buffer
  * @data.buf.out: output buffer
  */

From c949a8e8b43f2c75567269bcc9a50d704ae3c420 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 20 Sep 2018 09:31:11 +0200
Subject: [PATCH 141/499] spi: spi-mem: Move the DMA-able constraint doc to the
 kerneldoc header

We'd better have that documented in the kerneldoc header, so that it's
exposed to the doc generated by Sphinx.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi-mem.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h
index 0cfbb1ad8d96..7195fbc234aa 100644
--- a/include/linux/spi/spi-mem.h
+++ b/include/linux/spi/spi-mem.h
@@ -81,8 +81,8 @@ enum spi_mem_data_dir {
  * @data.dir: direction of the transfer
  * @data.nbytes: number of data bytes to send/receive. Can be zero if the
  *		 operation does not involve transferring data
- * @data.buf.in: input buffer
- * @data.buf.out: output buffer
+ * @data.buf.in: input buffer (must be DMA-able)
+ * @data.buf.out: output buffer (must be DMA-able)
  */
 struct spi_mem_op {
 	struct {
@@ -105,7 +105,6 @@ struct spi_mem_op {
 		u8 buswidth;
 		enum spi_mem_data_dir dir;
 		unsigned int nbytes;
-		/* buf.{in,out} must be DMA-able. */
 		union {
 			void *in;
 			const void *out;

From b3027b7746ce1e5a9429715ee6492aca2a6e4cf0 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Wed, 19 Sep 2018 17:06:36 +0100
Subject: [PATCH 142/499] MAINTAINERS: Move mobiveil PCI driver entry where it
 belongs

Commit 92f9ccca4c08 ("PCI: mobiveil: Add Mobiveil PCIe Host Bridge IP
driver DT bindings") managed to add a MAINTAINERS entry where it does
not really belong (ie in the middle of a totally unrelated series of
entries and in the wrong alphabetical order).

Fix it.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Subrahmanya Lingappa <l.subrahmanya@mobiveil.co.in>
---
 MAINTAINERS | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index f05d2e9ffc84..700408b7bc53 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9716,13 +9716,6 @@ Q:	http://patchwork.linuxtv.org/project/linux-media/list/
 S:	Maintained
 F:	drivers/media/dvb-frontends/mn88473*
 
-PCI DRIVER FOR MOBIVEIL PCIE IP
-M:	Subrahmanya Lingappa <l.subrahmanya@mobiveil.co.in>
-L:	linux-pci@vger.kernel.org
-S:	Supported
-F:	Documentation/devicetree/bindings/pci/mobiveil-pcie.txt
-F:	drivers/pci/controller/pcie-mobiveil.c
-
 MODULE SUPPORT
 M:	Jessica Yu <jeyu@kernel.org>
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jeyu/linux.git modules-next
@@ -11137,6 +11130,13 @@ F:	include/uapi/linux/switchtec_ioctl.h
 F:	include/linux/switchtec.h
 F:	drivers/ntb/hw/mscc/
 
+PCI DRIVER FOR MOBIVEIL PCIE IP
+M:	Subrahmanya Lingappa <l.subrahmanya@mobiveil.co.in>
+L:	linux-pci@vger.kernel.org
+S:	Supported
+F:	Documentation/devicetree/bindings/pci/mobiveil-pcie.txt
+F:	drivers/pci/controller/pcie-mobiveil.c
+
 PCI DRIVER FOR MVEBU (Marvell Armada 370 and Armada XP SOC support)
 M:	Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
 M:	Jason Cooper <jason@lakedaemon.net>

From 9024143e700f89d74b8cdaf316a3499d74fc56fe Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Date: Thu, 20 Sep 2018 16:32:52 -0500
Subject: [PATCH 143/499] PCI: dwc: Fix scheduling while atomic issues

When programming the inbound/outbound ATUs, we call usleep_range() after
each checking PCIE_ATU_ENABLE bit. Unfortunately, the ATU programming
can be executed in atomic context:

inbound ATU programming could be called through
pci_epc_write_header()
  =>dw_pcie_ep_write_header()
    =>dw_pcie_prog_inbound_atu()

outbound ATU programming could be called through
pci_bus_read_config_dword()
  =>dw_pcie_rd_conf()
    =>dw_pcie_prog_outbound_atu()

Fix this issue by calling mdelay() instead.

Fixes: f8aed6ec624f ("PCI: dwc: designware: Add EP mode support")
Fixes: d8bbeb39fbf3 ("PCI: designware: Wait for iATU enable")
Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
[lorenzo.pieralisi@arm.com: commit log update]
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
---
 drivers/pci/controller/dwc/pcie-designware.c | 8 ++++----
 drivers/pci/controller/dwc/pcie-designware.h | 3 +--
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c
index 778c4f76a884..2153956a0b20 100644
--- a/drivers/pci/controller/dwc/pcie-designware.c
+++ b/drivers/pci/controller/dwc/pcie-designware.c
@@ -135,7 +135,7 @@ static void dw_pcie_prog_outbound_atu_unroll(struct dw_pcie *pci, int index,
 		if (val & PCIE_ATU_ENABLE)
 			return;
 
-		usleep_range(LINK_WAIT_IATU_MIN, LINK_WAIT_IATU_MAX);
+		mdelay(LINK_WAIT_IATU);
 	}
 	dev_err(pci->dev, "Outbound iATU is not being enabled\n");
 }
@@ -178,7 +178,7 @@ void dw_pcie_prog_outbound_atu(struct dw_pcie *pci, int index, int type,
 		if (val & PCIE_ATU_ENABLE)
 			return;
 
-		usleep_range(LINK_WAIT_IATU_MIN, LINK_WAIT_IATU_MAX);
+		mdelay(LINK_WAIT_IATU);
 	}
 	dev_err(pci->dev, "Outbound iATU is not being enabled\n");
 }
@@ -236,7 +236,7 @@ static int dw_pcie_prog_inbound_atu_unroll(struct dw_pcie *pci, int index,
 		if (val & PCIE_ATU_ENABLE)
 			return 0;
 
-		usleep_range(LINK_WAIT_IATU_MIN, LINK_WAIT_IATU_MAX);
+		mdelay(LINK_WAIT_IATU);
 	}
 	dev_err(pci->dev, "Inbound iATU is not being enabled\n");
 
@@ -282,7 +282,7 @@ int dw_pcie_prog_inbound_atu(struct dw_pcie *pci, int index, int bar,
 		if (val & PCIE_ATU_ENABLE)
 			return 0;
 
-		usleep_range(LINK_WAIT_IATU_MIN, LINK_WAIT_IATU_MAX);
+		mdelay(LINK_WAIT_IATU);
 	}
 	dev_err(pci->dev, "Inbound iATU is not being enabled\n");
 
diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
index 96126fd8403c..9f1a5e399b70 100644
--- a/drivers/pci/controller/dwc/pcie-designware.h
+++ b/drivers/pci/controller/dwc/pcie-designware.h
@@ -26,8 +26,7 @@
 
 /* Parameters for the waiting for iATU enabled routine */
 #define LINK_WAIT_MAX_IATU_RETRIES	5
-#define LINK_WAIT_IATU_MIN		9000
-#define LINK_WAIT_IATU_MAX		10000
+#define LINK_WAIT_IATU			9
 
 /* Synopsys-specific PCIe configuration registers */
 #define PCIE_PORT_LINK_CONTROL		0x710

From 4eeed3686981ff887bbdd7254139e2eca276534c Mon Sep 17 00:00:00 2001
From: Majd Dibbiny <majd@mellanox.com>
Date: Tue, 18 Sep 2018 10:51:37 +0300
Subject: [PATCH 144/499] RDMA/uverbs: Fix validity check for modify QP

Uverbs shouldn't enforce QP state in the command unless the user set the QP
state bit in the attribute mask.

In addition, only copy qp attr fields which have the corresponding bit set
in the attribute mask over to the internal attr structure.

Fixes: 88de869bbe4f ("RDMA/uverbs: Ensure validity of current QP state value")
Fixes: bc38a6abdd5a ("[PATCH] IB uverbs: core implementation")
Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/uverbs_cmd.c | 68 ++++++++++++++++++----------
 1 file changed, 45 insertions(+), 23 deletions(-)

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index a21d5214afc3..e012ca80f9d1 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -2027,33 +2027,55 @@ static int modify_qp(struct ib_uverbs_file *file,
 
 	if ((cmd->base.attr_mask & IB_QP_CUR_STATE &&
 	    cmd->base.cur_qp_state > IB_QPS_ERR) ||
-	    cmd->base.qp_state > IB_QPS_ERR) {
+	    (cmd->base.attr_mask & IB_QP_STATE &&
+	    cmd->base.qp_state > IB_QPS_ERR)) {
 		ret = -EINVAL;
 		goto release_qp;
 	}
 
-	attr->qp_state		  = cmd->base.qp_state;
-	attr->cur_qp_state	  = cmd->base.cur_qp_state;
-	attr->path_mtu		  = cmd->base.path_mtu;
-	attr->path_mig_state	  = cmd->base.path_mig_state;
-	attr->qkey		  = cmd->base.qkey;
-	attr->rq_psn		  = cmd->base.rq_psn;
-	attr->sq_psn		  = cmd->base.sq_psn;
-	attr->dest_qp_num	  = cmd->base.dest_qp_num;
-	attr->qp_access_flags	  = cmd->base.qp_access_flags;
-	attr->pkey_index	  = cmd->base.pkey_index;
-	attr->alt_pkey_index	  = cmd->base.alt_pkey_index;
-	attr->en_sqd_async_notify = cmd->base.en_sqd_async_notify;
-	attr->max_rd_atomic	  = cmd->base.max_rd_atomic;
-	attr->max_dest_rd_atomic  = cmd->base.max_dest_rd_atomic;
-	attr->min_rnr_timer	  = cmd->base.min_rnr_timer;
-	attr->port_num		  = cmd->base.port_num;
-	attr->timeout		  = cmd->base.timeout;
-	attr->retry_cnt		  = cmd->base.retry_cnt;
-	attr->rnr_retry		  = cmd->base.rnr_retry;
-	attr->alt_port_num	  = cmd->base.alt_port_num;
-	attr->alt_timeout	  = cmd->base.alt_timeout;
-	attr->rate_limit	  = cmd->rate_limit;
+	if (cmd->base.attr_mask & IB_QP_STATE)
+		attr->qp_state = cmd->base.qp_state;
+	if (cmd->base.attr_mask & IB_QP_CUR_STATE)
+		attr->cur_qp_state = cmd->base.cur_qp_state;
+	if (cmd->base.attr_mask & IB_QP_PATH_MTU)
+		attr->path_mtu = cmd->base.path_mtu;
+	if (cmd->base.attr_mask & IB_QP_PATH_MIG_STATE)
+		attr->path_mig_state = cmd->base.path_mig_state;
+	if (cmd->base.attr_mask & IB_QP_QKEY)
+		attr->qkey = cmd->base.qkey;
+	if (cmd->base.attr_mask & IB_QP_RQ_PSN)
+		attr->rq_psn = cmd->base.rq_psn;
+	if (cmd->base.attr_mask & IB_QP_SQ_PSN)
+		attr->sq_psn = cmd->base.sq_psn;
+	if (cmd->base.attr_mask & IB_QP_DEST_QPN)
+		attr->dest_qp_num = cmd->base.dest_qp_num;
+	if (cmd->base.attr_mask & IB_QP_ACCESS_FLAGS)
+		attr->qp_access_flags = cmd->base.qp_access_flags;
+	if (cmd->base.attr_mask & IB_QP_PKEY_INDEX)
+		attr->pkey_index = cmd->base.pkey_index;
+	if (cmd->base.attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
+		attr->en_sqd_async_notify = cmd->base.en_sqd_async_notify;
+	if (cmd->base.attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
+		attr->max_rd_atomic = cmd->base.max_rd_atomic;
+	if (cmd->base.attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+		attr->max_dest_rd_atomic = cmd->base.max_dest_rd_atomic;
+	if (cmd->base.attr_mask & IB_QP_MIN_RNR_TIMER)
+		attr->min_rnr_timer = cmd->base.min_rnr_timer;
+	if (cmd->base.attr_mask & IB_QP_PORT)
+		attr->port_num = cmd->base.port_num;
+	if (cmd->base.attr_mask & IB_QP_TIMEOUT)
+		attr->timeout = cmd->base.timeout;
+	if (cmd->base.attr_mask & IB_QP_RETRY_CNT)
+		attr->retry_cnt = cmd->base.retry_cnt;
+	if (cmd->base.attr_mask & IB_QP_RNR_RETRY)
+		attr->rnr_retry = cmd->base.rnr_retry;
+	if (cmd->base.attr_mask & IB_QP_ALT_PATH) {
+		attr->alt_port_num = cmd->base.alt_port_num;
+		attr->alt_timeout = cmd->base.alt_timeout;
+		attr->alt_pkey_index = cmd->base.alt_pkey_index;
+	}
+	if (cmd->base.attr_mask & IB_QP_RATE_LIMIT)
+		attr->rate_limit = cmd->rate_limit;
 
 	if (cmd->base.attr_mask & IB_QP_AV)
 		copy_ah_attr_from_uverbs(qp->device, &attr->ah_attr,

From 0dbfaa9f2813787679e296eb5476e40938ab48c8 Mon Sep 17 00:00:00 2001
From: Ira Weiny <ira.weiny@intel.com>
Date: Thu, 20 Sep 2018 12:58:46 -0700
Subject: [PATCH 145/499] IB/hfi1: Fix SL array bounds check

The SL specified by a user needs to be a valid SL.

Add a range check to the user specified SL value which protects from
running off the end of the SL to SC table.

CC: stable@vger.kernel.org
Fixes: 7724105686e7 ("IB/hfi1: add driver files")
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/hfi1/verbs.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 13374c727b14..a7c586a5589d 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1582,6 +1582,7 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr)
 	struct hfi1_pportdata *ppd;
 	struct hfi1_devdata *dd;
 	u8 sc5;
+	u8 sl;
 
 	if (hfi1_check_mcast(rdma_ah_get_dlid(ah_attr)) &&
 	    !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
@@ -1590,8 +1591,13 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr)
 	/* test the mapping for validity */
 	ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr));
 	ppd = ppd_from_ibp(ibp);
-	sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)];
 	dd = dd_from_ppd(ppd);
+
+	sl = rdma_ah_get_sl(ah_attr);
+	if (sl >= ARRAY_SIZE(ibp->sl_to_sc))
+		return -EINVAL;
+
+	sc5 = ibp->sl_to_sc[sl];
 	if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf)
 		return -EINVAL;
 	return 0;

From 94694d18cf27a6faad91487a38ce516c2b16e7d9 Mon Sep 17 00:00:00 2001
From: "Michael J. Ruhl" <michael.j.ruhl@intel.com>
Date: Thu, 20 Sep 2018 12:58:56 -0700
Subject: [PATCH 146/499] IB/hfi1: Invalid user input can result in crash

If the number of packets in a user sdma request does not match
the actual iovectors being sent, sdma_cleanup can be called on
an uninitialized request structure, resulting in a crash similar
to this:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
IP: [<ffffffffc0ae8bb7>] __sdma_txclean+0x57/0x1e0 [hfi1]
PGD 8000001044f61067 PUD 1052706067 PMD 0
Oops: 0000 [#1] SMP
CPU: 30 PID: 69912 Comm: upsm Kdump: loaded Tainted: G           OE
------------   3.10.0-862.el7.x86_64 #1
Hardware name: Intel Corporation S2600KPR/S2600KPR, BIOS
SE5C610.86B.01.01.0019.101220160604 10/12/2016
task: ffff8b331c890000 ti: ffff8b2ed1f98000 task.ti: ffff8b2ed1f98000
RIP: 0010:[<ffffffffc0ae8bb7>]  [<ffffffffc0ae8bb7>] __sdma_txclean+0x57/0x1e0
[hfi1]
RSP: 0018:ffff8b2ed1f9bab0  EFLAGS: 00010286
RAX: 0000000000008b2b RBX: ffff8b2adf6e0000 RCX: 0000000000000000
RDX: 00000000000000a0 RSI: ffff8b2e9eedc540 RDI: ffff8b2adf6e0000
RBP: ffff8b2ed1f9bad8 R08: 0000000000000000 R09: ffffffffc0b04a06
R10: ffff8b331c890190 R11: ffffe6ed00bf1840 R12: ffff8b3315480000
R13: ffff8b33154800f0 R14: 00000000fffffff2 R15: ffff8b2e9eedc540
FS:  00007f035ac47740(0000) GS:ffff8b331e100000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000008 CR3: 0000000c03fe6000 CR4: 00000000001607e0
Call Trace:
 [<ffffffffc0b0570d>] user_sdma_send_pkts+0xdcd/0x1990 [hfi1]
 [<ffffffff9fe75fb0>] ? gup_pud_range+0x140/0x290
 [<ffffffffc0ad3105>] ? hfi1_mmu_rb_insert+0x155/0x1b0 [hfi1]
 [<ffffffffc0b0777b>] hfi1_user_sdma_process_request+0xc5b/0x11b0 [hfi1]
 [<ffffffffc0ac193a>] hfi1_aio_write+0xba/0x110 [hfi1]
 [<ffffffffa001a2bb>] do_sync_readv_writev+0x7b/0xd0
 [<ffffffffa001bede>] do_readv_writev+0xce/0x260
 [<ffffffffa022b089>] ? tty_ldisc_deref+0x19/0x20
 [<ffffffffa02268c0>] ? n_tty_ioctl+0xe0/0xe0
 [<ffffffffa001c105>] vfs_writev+0x35/0x60
 [<ffffffffa001c2bf>] SyS_writev+0x7f/0x110
 [<ffffffffa051f7d5>] system_call_fastpath+0x1c/0x21
Code: 06 49 c7 47 18 00 00 00 00 0f 87 89 01 00 00 5b 41 5c 41 5d 41 5e 41 5f
5d c3 66 2e 0f 1f 84 00 00 00 00 00 48 8b 4e 10 48 89 fb <48> 8b 51 08 49 89 d4
83 e2 0c 41 81 e4 00 e0 00 00 48 c1 ea 02
RIP  [<ffffffffc0ae8bb7>] __sdma_txclean+0x57/0x1e0 [hfi1]
 RSP <ffff8b2ed1f9bab0>
CR2: 0000000000000008

There are two exit points from user_sdma_send_pkts().  One (free_tx)
merely frees the slab entry and one (free_txreq) cleans the sdma_txreq
prior to freeing the slab entry.   The free_txreq variation can only be
called after one of the sdma_init*() variations has been called.

In the panic case, the slab entry had been allocated but not inited.

Fix the issue by exiting through free_tx thus avoiding sdma_clean().

Cc: <stable@vger.kernel.org> # 4.9.x+
Fixes: 7724105686e7 ("IB/hfi1: add driver files")
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Lukasz Odzioba <lukasz.odzioba@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/hfi1/user_sdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index a3a7b33196d6..5c88706121c1 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -828,7 +828,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
 			if (READ_ONCE(iovec->offset) == iovec->iov.iov_len) {
 				if (++req->iov_idx == req->data_iovs) {
 					ret = -EFAULT;
-					goto free_txreq;
+					goto free_tx;
 				}
 				iovec = &req->iovs[req->iov_idx];
 				WARN_ON(iovec->offset);

From d623500b3c4efd8d4e945ac9003c6b87b469a9ab Mon Sep 17 00:00:00 2001
From: "Michael J. Ruhl" <michael.j.ruhl@intel.com>
Date: Thu, 20 Sep 2018 12:59:05 -0700
Subject: [PATCH 147/499] IB/hfi1: Fix context recovery when PBC has an
 UnsupportedVL

If a packet stream uses an UnsupportedVL (virtual lane), the send
engine will not send the packet, and it will not indicate that an
error has occurred.  This will cause the packet stream to block.

HFI has 8 virtual lanes available for packet streams.  Each lane can
be enabled or disabled using the UnsupportedVL mask.  If a lane is
disabled, adding a packet to the send context must be disallowed.

The current mask for determining unsupported VLs defaults to 0 (allow
all).  This is incorrect.  Only the VLs that are defined should be
allowed.

Determine which VLs are disabled (mtu == 0), and set the appropriate
unsupported bit in the mask.  The correct mask will allow the send
engine to error on the invalid VL, and error recovery will work
correctly.

Cc: <stable@vger.kernel.org> # 4.9.x+
Fixes: 7724105686e7 ("IB/hfi1: add driver files")
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Lukasz Odzioba <lukasz.odzioba@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/hfi1/pio.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index c2c1cba5b23b..cd962c9ea6bc 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -86,6 +86,7 @@ void pio_send_control(struct hfi1_devdata *dd, int op)
 	unsigned long flags;
 	int write = 1;	/* write sendctrl back */
 	int flush = 0;	/* re-read sendctrl to make sure it is flushed */
+	int i;
 
 	spin_lock_irqsave(&dd->sendctrl_lock, flags);
 
@@ -95,9 +96,13 @@ void pio_send_control(struct hfi1_devdata *dd, int op)
 		reg |= SEND_CTRL_SEND_ENABLE_SMASK;
 	/* Fall through */
 	case PSC_DATA_VL_ENABLE:
+		mask = 0;
+		for (i = 0; i < ARRAY_SIZE(dd->vld); i++)
+			if (!dd->vld[i].mtu)
+				mask |= BIT_ULL(i);
 		/* Disallow sending on VLs not enabled */
-		mask = (((~0ull) << num_vls) & SEND_CTRL_UNSUPPORTED_VL_MASK) <<
-				SEND_CTRL_UNSUPPORTED_VL_SHIFT;
+		mask = (mask & SEND_CTRL_UNSUPPORTED_VL_MASK) <<
+			SEND_CTRL_UNSUPPORTED_VL_SHIFT;
 		reg = (reg & ~SEND_CTRL_UNSUPPORTED_VL_SMASK) | mask;
 		break;
 	case PSC_GLOBAL_DISABLE:

From b4a4957d3d1c328b733fce783b7264996f866ad2 Mon Sep 17 00:00:00 2001
From: "Michael J. Ruhl" <michael.j.ruhl@intel.com>
Date: Thu, 20 Sep 2018 12:59:14 -0700
Subject: [PATCH 148/499] IB/hfi1: Fix destroy_qp hang after a link down

rvt_destroy_qp() cannot complete until all in process packets have
been released from the underlying hardware.  If a link down event
occurs, an application can hang with a kernel stack similar to:

cat /proc/<app PID>/stack
 quiesce_qp+0x178/0x250 [hfi1]
 rvt_reset_qp+0x23d/0x400 [rdmavt]
 rvt_destroy_qp+0x69/0x210 [rdmavt]
 ib_destroy_qp+0xba/0x1c0 [ib_core]
 nvme_rdma_destroy_queue_ib+0x46/0x80 [nvme_rdma]
 nvme_rdma_free_queue+0x3c/0xd0 [nvme_rdma]
 nvme_rdma_destroy_io_queues+0x88/0xd0 [nvme_rdma]
 nvme_rdma_error_recovery_work+0x52/0xf0 [nvme_rdma]
 process_one_work+0x17a/0x440
 worker_thread+0x126/0x3c0
 kthread+0xcf/0xe0
 ret_from_fork+0x58/0x90
 0xffffffffffffffff

quiesce_qp() waits until all outstanding packets have been freed.
This wait should be momentary.  During a link down event, the cleanup
handling does not ensure that all packets caught by the link down are
flushed properly.

This is caused by the fact that the freeze path and the link down
event is handled the same.  This is not correct.  The freeze path
waits until the HFI is unfrozen and then restarts PIO.  A link down
is not a freeze event.  The link down path cannot restart the PIO
until link is restored.  If the PIO path is restarted before the link
comes up, the application (QP) using the PIO path will hang (until
link is restored).

Fix by separating the linkdown path from the freeze path and use the
link down path for link down events.

Close a race condition sc_disable() by acquiring both the progress
and release locks.

Close a race condition in sc_stop() by moving the setting of the flag
bits under the alloc lock.

Cc: <stable@vger.kernel.org> # 4.9.x+
Fixes: 7724105686e7 ("IB/hfi1: add driver files")
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/hfi1/chip.c |  6 ++++-
 drivers/infiniband/hw/hfi1/pio.c  | 42 +++++++++++++++++++++++++------
 drivers/infiniband/hw/hfi1/pio.h  |  2 ++
 3 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 2c19bf772451..e1668bcc2d13 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -6733,6 +6733,7 @@ void start_freeze_handling(struct hfi1_pportdata *ppd, int flags)
 	struct hfi1_devdata *dd = ppd->dd;
 	struct send_context *sc;
 	int i;
+	int sc_flags;
 
 	if (flags & FREEZE_SELF)
 		write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK);
@@ -6743,11 +6744,13 @@ void start_freeze_handling(struct hfi1_pportdata *ppd, int flags)
 	/* notify all SDMA engines that they are going into a freeze */
 	sdma_freeze_notify(dd, !!(flags & FREEZE_LINK_DOWN));
 
+	sc_flags = SCF_FROZEN | SCF_HALTED | (flags & FREEZE_LINK_DOWN ?
+					      SCF_LINK_DOWN : 0);
 	/* do halt pre-handling on all enabled send contexts */
 	for (i = 0; i < dd->num_send_contexts; i++) {
 		sc = dd->send_contexts[i].sc;
 		if (sc && (sc->flags & SCF_ENABLED))
-			sc_stop(sc, SCF_FROZEN | SCF_HALTED);
+			sc_stop(sc, sc_flags);
 	}
 
 	/* Send context are frozen. Notify user space */
@@ -10674,6 +10677,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 		add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
 
 		handle_linkup_change(dd, 1);
+		pio_kernel_linkup(dd);
 
 		/*
 		 * After link up, a new link width will have been set.
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index cd962c9ea6bc..752057647f09 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -926,20 +926,18 @@ void sc_free(struct send_context *sc)
 void sc_disable(struct send_context *sc)
 {
 	u64 reg;
-	unsigned long flags;
 	struct pio_buf *pbuf;
 
 	if (!sc)
 		return;
 
 	/* do all steps, even if already disabled */
-	spin_lock_irqsave(&sc->alloc_lock, flags);
+	spin_lock_irq(&sc->alloc_lock);
 	reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL));
 	reg &= ~SC(CTRL_CTXT_ENABLE_SMASK);
 	sc->flags &= ~SCF_ENABLED;
 	sc_wait_for_packet_egress(sc, 1);
 	write_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL), reg);
-	spin_unlock_irqrestore(&sc->alloc_lock, flags);
 
 	/*
 	 * Flush any waiters.  Once the context is disabled,
@@ -949,7 +947,7 @@ void sc_disable(struct send_context *sc)
 	 * proceed with the flush.
 	 */
 	udelay(1);
-	spin_lock_irqsave(&sc->release_lock, flags);
+	spin_lock(&sc->release_lock);
 	if (sc->sr) {	/* this context has a shadow ring */
 		while (sc->sr_tail != sc->sr_head) {
 			pbuf = &sc->sr[sc->sr_tail].pbuf;
@@ -960,7 +958,8 @@ void sc_disable(struct send_context *sc)
 				sc->sr_tail = 0;
 		}
 	}
-	spin_unlock_irqrestore(&sc->release_lock, flags);
+	spin_unlock(&sc->release_lock);
+	spin_unlock_irq(&sc->alloc_lock);
 }
 
 /* return SendEgressCtxtStatus.PacketOccupancy */
@@ -1183,11 +1182,39 @@ void pio_kernel_unfreeze(struct hfi1_devdata *dd)
 		sc = dd->send_contexts[i].sc;
 		if (!sc || !(sc->flags & SCF_FROZEN) || sc->type == SC_USER)
 			continue;
+		if (sc->flags & SCF_LINK_DOWN)
+			continue;
 
 		sc_enable(sc);	/* will clear the sc frozen flag */
 	}
 }
 
+/**
+ * pio_kernel_linkup() - Re-enable send contexts after linkup event
+ * @dd: valid devive data
+ *
+ * When the link goes down, the freeze path is taken.  However, a link down
+ * event is different from a freeze because if the send context is re-enabled
+ * whowever is sending data will start sending data again, which will hang
+ * any QP that is sending data.
+ *
+ * The freeze path now looks at the type of event that occurs and takes this
+ * path for link down event.
+ */
+void pio_kernel_linkup(struct hfi1_devdata *dd)
+{
+	struct send_context *sc;
+	int i;
+
+	for (i = 0; i < dd->num_send_contexts; i++) {
+		sc = dd->send_contexts[i].sc;
+		if (!sc || !(sc->flags & SCF_LINK_DOWN) || sc->type == SC_USER)
+			continue;
+
+		sc_enable(sc);	/* will clear the sc link down flag */
+	}
+}
+
 /*
  * Wait for the SendPioInitCtxt.PioInitInProgress bit to clear.
  * Returns:
@@ -1387,11 +1414,10 @@ void sc_stop(struct send_context *sc, int flag)
 {
 	unsigned long flags;
 
-	/* mark the context */
-	sc->flags |= flag;
-
 	/* stop buffer allocations */
 	spin_lock_irqsave(&sc->alloc_lock, flags);
+	/* mark the context */
+	sc->flags |= flag;
 	sc->flags &= ~SCF_ENABLED;
 	spin_unlock_irqrestore(&sc->alloc_lock, flags);
 	wake_up(&sc->halt_wait);
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index 058b08f459ab..aaf372c3e5d6 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -139,6 +139,7 @@ struct send_context {
 #define SCF_IN_FREE 0x02
 #define SCF_HALTED  0x04
 #define SCF_FROZEN  0x08
+#define SCF_LINK_DOWN 0x10
 
 struct send_context_info {
 	struct send_context *sc;	/* allocated working context */
@@ -306,6 +307,7 @@ void set_pio_integrity(struct send_context *sc);
 void pio_reset_all(struct hfi1_devdata *dd);
 void pio_freeze(struct hfi1_devdata *dd);
 void pio_kernel_unfreeze(struct hfi1_devdata *dd);
+void pio_kernel_linkup(struct hfi1_devdata *dd);
 
 /* global PIO send control operations */
 #define PSC_GLOBAL_ENABLE 0

From d87161bea405e3260377026ca8a704a3f68bd67a Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 13 Sep 2018 14:28:48 +0300
Subject: [PATCH 149/499] scsi: ufs: Disable blk-mq for now

blk-mq does not support runtime pm, so disable blk-mq support for now.

Fixes: d5038a13eca7 ("scsi: core: switch to scsi-mq by default")
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufshcd.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 9d5d2ca7fc4f..c55f38ec391c 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -7940,6 +7940,13 @@ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle)
 		err = -ENOMEM;
 		goto out_error;
 	}
+
+	/*
+	 * Do not use blk-mq at this time because blk-mq does not support
+	 * runtime pm.
+	 */
+	host->use_blk_mq = false;
+
 	hba = shost_priv(host);
 	hba->host = host;
 	hba->dev = dev;

From 9e210178267b80c4eeb832fade7e146a18c84915 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 13 Sep 2018 15:41:10 -0700
Subject: [PATCH 150/499] scsi: lpfc: Synchronize access to remoteport via
 rport

The driver currently uses the ndlp to get the local rport which is then used
to get the nvme transport remoteport pointer. There can be cases where a stale
remoteport pointer is obtained as synchronization isn't done through the
different dereferences.

Correct by using locks to synchronize the dereferences.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_attr.c    | 15 ++++++++++-----
 drivers/scsi/lpfc/lpfc_debugfs.c | 10 +++++-----
 drivers/scsi/lpfc/lpfc_nvme.c    | 11 ++++++++---
 3 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 057a60abe664..1a6ed9b0a249 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -360,12 +360,12 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 		goto buffer_done;
 
 	list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
+		nrport = NULL;
+		spin_lock(&vport->phba->hbalock);
 		rport = lpfc_ndlp_get_nrport(ndlp);
-		if (!rport)
-			continue;
-
-		/* local short-hand pointer. */
-		nrport = rport->remoteport;
+		if (rport)
+			nrport = rport->remoteport;
+		spin_unlock(&vport->phba->hbalock);
 		if (!nrport)
 			continue;
 
@@ -3386,6 +3386,7 @@ lpfc_update_rport_devloss_tmo(struct lpfc_vport *vport)
 	struct lpfc_nodelist  *ndlp;
 #if (IS_ENABLED(CONFIG_NVME_FC))
 	struct lpfc_nvme_rport *rport;
+	struct nvme_fc_remote_port *remoteport = NULL;
 #endif
 
 	shost = lpfc_shost_from_vport(vport);
@@ -3396,8 +3397,12 @@ lpfc_update_rport_devloss_tmo(struct lpfc_vport *vport)
 		if (ndlp->rport)
 			ndlp->rport->dev_loss_tmo = vport->cfg_devloss_tmo;
 #if (IS_ENABLED(CONFIG_NVME_FC))
+		spin_lock(&vport->phba->hbalock);
 		rport = lpfc_ndlp_get_nrport(ndlp);
 		if (rport)
+			remoteport = rport->remoteport;
+		spin_unlock(&vport->phba->hbalock);
+		if (remoteport)
 			nvme_fc_set_remoteport_devloss(rport->remoteport,
 						       vport->cfg_devloss_tmo);
 #endif
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 9df0c051349f..aec5b10a8c85 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -551,7 +551,7 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
 	unsigned char *statep;
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvmet_tgtport *tgtp;
-	struct nvme_fc_remote_port *nrport;
+	struct nvme_fc_remote_port *nrport = NULL;
 	struct lpfc_nvme_rport *rport;
 
 	cnt = (LPFC_NODELIST_SIZE / LPFC_NODELIST_ENTRY_SIZE);
@@ -696,11 +696,11 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
 	len += snprintf(buf + len, size - len, "\tRport List:\n");
 	list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
 		/* local short-hand pointer. */
+		spin_lock(&phba->hbalock);
 		rport = lpfc_ndlp_get_nrport(ndlp);
-		if (!rport)
-			continue;
-
-		nrport = rport->remoteport;
+		if (rport)
+			nrport = rport->remoteport;
+		spin_unlock(&phba->hbalock);
 		if (!nrport)
 			continue;
 
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 028462e5994d..918ae18ef8a8 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -2725,7 +2725,9 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 	rpinfo.port_name = wwn_to_u64(ndlp->nlp_portname.u.wwn);
 	rpinfo.node_name = wwn_to_u64(ndlp->nlp_nodename.u.wwn);
 
+	spin_lock_irq(&vport->phba->hbalock);
 	oldrport = lpfc_ndlp_get_nrport(ndlp);
+	spin_unlock_irq(&vport->phba->hbalock);
 	if (!oldrport)
 		lpfc_nlp_get(ndlp);
 
@@ -2840,7 +2842,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
 	struct lpfc_nvme_rport *rport;
-	struct nvme_fc_remote_port *remoteport;
+	struct nvme_fc_remote_port *remoteport = NULL;
 
 	localport = vport->localport;
 
@@ -2854,11 +2856,14 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 	if (!lport)
 		goto input_err;
 
+	spin_lock_irq(&vport->phba->hbalock);
 	rport = lpfc_ndlp_get_nrport(ndlp);
-	if (!rport)
+	if (rport)
+		remoteport = rport->remoteport;
+	spin_unlock_irq(&vport->phba->hbalock);
+	if (!remoteport)
 		goto input_err;
 
-	remoteport = rport->remoteport;
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
 			 "6033 Unreg nvme remoteport %p, portname x%llx, "
 			 "port_id x%06x, portstate x%x port type x%x\n",

From 10bc6a6042c900934a097988b5d50e3cf3f91781 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 20 Sep 2018 22:47:09 +0200
Subject: [PATCH 151/499] r8169: fix autoneg issue on resume with RTL8168E

It was reported that chip version 33 (RTL8168E) ends up with
10MBit/Half on a 1GBit link after resuming from S3 (with different
link partners). For whatever reason the PHY on this chip doesn't
properly start a renegotiation when soft-reset.
Explicitly requesting a renegotiation fixes this.

Fixes: a2965f12fde6 ("r8169: remove rtl8169_set_speed_xmii")
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reported-by: Neil MacLeod <neil@nmacleod.com>
Tested-by: Neil MacLeod <neil@nmacleod.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index bb529ff2ca81..ab30aaeac6d3 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -4071,6 +4071,15 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 	phy_speed_up(dev->phydev);
 
 	genphy_soft_reset(dev->phydev);
+
+	/* It was reported that chip version 33 ends up with 10MBit/Half on a
+	 * 1GBit link after resuming from S3. For whatever reason the PHY on
+	 * this chip doesn't properly start a renegotiation when soft-reset.
+	 * Explicitly requesting a renegotiation fixes this.
+	 */
+	if (tp->mac_version == RTL_GIGA_MAC_VER_33 &&
+	    dev->phydev->autoneg == AUTONEG_ENABLE)
+		phy_restart_aneg(dev->phydev);
 }
 
 static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr)

From 13cc6f48c7434ce46ba6dbc90003a136a263d75a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
Date: Fri, 14 Sep 2018 18:34:28 +0300
Subject: [PATCH 152/499] crypto: caam/jr - fix ablkcipher_edesc pointer
 arithmetic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In some cases the zero-length hw_desc array at the end of
ablkcipher_edesc struct requires for 4B of tail padding.

Due to tail padding and the way pointers to S/G table and IV
are computed:
	edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) +
			 desc_bytes;
	iv = (u8 *)edesc->hw_desc + desc_bytes + sec4_sg_bytes;
first 4 bytes of IV are overwritten by S/G table.

Update computation of pointer to S/G table to rely on offset of hw_desc
member and not on sizeof() operator.

Cc: <stable@vger.kernel.org> # 4.13+
Fixes: 115957bb3e59 ("crypto: caam - fix IV DMA mapping and updating")
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/caamalg.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index d67667970f7e..ec40f991e6c6 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -1553,8 +1553,8 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	edesc->src_nents = src_nents;
 	edesc->dst_nents = dst_nents;
 	edesc->sec4_sg_bytes = sec4_sg_bytes;
-	edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) +
-			 desc_bytes;
+	edesc->sec4_sg = (struct sec4_sg_entry *)((u8 *)edesc->hw_desc +
+						  desc_bytes);
 	edesc->iv_dir = DMA_TO_DEVICE;
 
 	/* Make sure IV is located in a DMAable area */
@@ -1757,8 +1757,8 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
 	edesc->src_nents = src_nents;
 	edesc->dst_nents = dst_nents;
 	edesc->sec4_sg_bytes = sec4_sg_bytes;
-	edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) +
-			 desc_bytes;
+	edesc->sec4_sg = (struct sec4_sg_entry *)((u8 *)edesc->hw_desc +
+						  desc_bytes);
 	edesc->iv_dir = DMA_FROM_DEVICE;
 
 	/* Make sure IV is located in a DMAable area */

From 5b3686c7aaade973b8806dba4ecc99bec8c988f3 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 9 Aug 2018 14:44:29 +0800
Subject: [PATCH 153/499] ieee802154: Use kmemdup instead of duplicating it in
 ca8210_test_int_driver_write

Replace calls to kmalloc followed by a memcpy with a direct call to
kmemdup.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 drivers/net/ieee802154/ca8210.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c
index 58299fb666ed..e21279dde85c 100644
--- a/drivers/net/ieee802154/ca8210.c
+++ b/drivers/net/ieee802154/ca8210.c
@@ -634,10 +634,9 @@ static int ca8210_test_int_driver_write(
 	for (i = 0; i < len; i++)
 		dev_dbg(&priv->spi->dev, "%#03x\n", buf[i]);
 
-	fifo_buffer = kmalloc(len, GFP_KERNEL);
+	fifo_buffer = kmemdup(buf, len, GFP_KERNEL);
 	if (!fifo_buffer)
 		return -ENOMEM;
-	memcpy(fifo_buffer, buf, len);
 	kfifo_in(&test->up_fifo, &fifo_buffer, 4);
 	wake_up_interruptible(&priv->test.readq);
 

From 69be1984ded00a11b1ed0888c6d8e4f35370372f Mon Sep 17 00:00:00 2001
From: Alexandru Gheorghe <alexandru-cosmin.gheorghe@arm.com>
Date: Mon, 16 Jul 2018 11:07:07 +0100
Subject: [PATCH 154/499] drm: mali-dp: Call drm_crtc_vblank_reset on device
 init

Currently, if userspace calls drm_wait_vblank before the crtc is
activated the crtc vblank_enable hook is called, which in case of
malidp driver triggers some warninngs. This happens because on
device init we don't inform the drm core about the vblank state
by calling drm_crtc_vblank_on/off/reset which together with
drm_vblank_get have some magic that prevents calling drm_vblank_enable
when crtc is off.

Signed-off-by: Alexandru Gheorghe <alexandru-cosmin.gheorghe@arm.com>
Acked-by: Liviu Dudau <liviu.dudau@arm.com>
Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
---
 drivers/gpu/drm/arm/malidp_drv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c
index 08b5bb219816..94d6dabec2dc 100644
--- a/drivers/gpu/drm/arm/malidp_drv.c
+++ b/drivers/gpu/drm/arm/malidp_drv.c
@@ -754,6 +754,7 @@ static int malidp_bind(struct device *dev)
 	drm->irq_enabled = true;
 
 	ret = drm_vblank_init(drm, drm->mode_config.num_crtc);
+	drm_crtc_vblank_reset(&malidp->crtc);
 	if (ret < 0) {
 		DRM_ERROR("failed to initialise vblank\n");
 		goto vblank_fail;

From 89578d04b52c872aef6b1257b5f6caf6bcc35abe Mon Sep 17 00:00:00 2001
From: Alexandru Gheorghe <alexandru-cosmin.gheorghe@arm.com>
Date: Wed, 22 Aug 2018 16:18:19 +0100
Subject: [PATCH 155/499] drm/malidp: Fix writeback in NV12

When we want to writeback to memory in NV12 format we need to program
the RGB2YUV coefficients. Currently, we don't program the coefficients
and NV12 doesn't work at all.

This patchset fixes that by programming a sane default(bt709, limited
range) as rgb2yuv coefficients.

In the long run, probably we need to think of a way for userspace to
be able to program that, but for now I think this is better than not
working at all or not advertising NV12 as a supported format for
memwrite.

Changes since v1:
 - Write the rgb2yuv coefficients only once, since we don't change
   them at all, just write them the first time NV12 is programmed,
   suggested by Brian Starkey, here [1]

[1] https://lists.freedesktop.org/archives/dri-devel/2018-August/186819.html

Signed-off-by: Alexandru Gheorghe <alexandru-cosmin.gheorghe@arm.com>
Acked-by: Liviu Dudau <liviu.dudau@arm.com>
Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
---
 drivers/gpu/drm/arm/malidp_hw.c   | 25 +++++++++++++++++++++++--
 drivers/gpu/drm/arm/malidp_hw.h   |  3 ++-
 drivers/gpu/drm/arm/malidp_mw.c   | 25 +++++++++++++++++++++----
 drivers/gpu/drm/arm/malidp_regs.h |  2 ++
 4 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/arm/malidp_hw.c b/drivers/gpu/drm/arm/malidp_hw.c
index c94a4422e0e9..2781e462c1ed 100644
--- a/drivers/gpu/drm/arm/malidp_hw.c
+++ b/drivers/gpu/drm/arm/malidp_hw.c
@@ -384,7 +384,8 @@ static long malidp500_se_calc_mclk(struct malidp_hw_device *hwdev,
 
 static int malidp500_enable_memwrite(struct malidp_hw_device *hwdev,
 				     dma_addr_t *addrs, s32 *pitches,
-				     int num_planes, u16 w, u16 h, u32 fmt_id)
+				     int num_planes, u16 w, u16 h, u32 fmt_id,
+				     const s16 *rgb2yuv_coeffs)
 {
 	u32 base = MALIDP500_SE_MEMWRITE_BASE;
 	u32 de_base = malidp_get_block_base(hwdev, MALIDP_DE_BLOCK);
@@ -416,6 +417,16 @@ static int malidp500_enable_memwrite(struct malidp_hw_device *hwdev,
 
 	malidp_hw_write(hwdev, MALIDP_DE_H_ACTIVE(w) | MALIDP_DE_V_ACTIVE(h),
 			MALIDP500_SE_MEMWRITE_OUT_SIZE);
+
+	if (rgb2yuv_coeffs) {
+		int i;
+
+		for (i = 0; i < MALIDP_COLORADJ_NUM_COEFFS; i++) {
+			malidp_hw_write(hwdev, rgb2yuv_coeffs[i],
+					MALIDP500_SE_RGB_YUV_COEFFS + i * 4);
+		}
+	}
+
 	malidp_hw_setbits(hwdev, MALIDP_SE_MEMWRITE_EN, MALIDP500_SE_CONTROL);
 
 	return 0;
@@ -658,7 +669,8 @@ static long malidp550_se_calc_mclk(struct malidp_hw_device *hwdev,
 
 static int malidp550_enable_memwrite(struct malidp_hw_device *hwdev,
 				     dma_addr_t *addrs, s32 *pitches,
-				     int num_planes, u16 w, u16 h, u32 fmt_id)
+				     int num_planes, u16 w, u16 h, u32 fmt_id,
+				     const s16 *rgb2yuv_coeffs)
 {
 	u32 base = MALIDP550_SE_MEMWRITE_BASE;
 	u32 de_base = malidp_get_block_base(hwdev, MALIDP_DE_BLOCK);
@@ -689,6 +701,15 @@ static int malidp550_enable_memwrite(struct malidp_hw_device *hwdev,
 	malidp_hw_setbits(hwdev, MALIDP550_SE_MEMWRITE_ONESHOT | MALIDP_SE_MEMWRITE_EN,
 			  MALIDP550_SE_CONTROL);
 
+	if (rgb2yuv_coeffs) {
+		int i;
+
+		for (i = 0; i < MALIDP_COLORADJ_NUM_COEFFS; i++) {
+			malidp_hw_write(hwdev, rgb2yuv_coeffs[i],
+					MALIDP550_SE_RGB_YUV_COEFFS + i * 4);
+		}
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/arm/malidp_hw.h b/drivers/gpu/drm/arm/malidp_hw.h
index ad2e96915d44..9fc94c08190f 100644
--- a/drivers/gpu/drm/arm/malidp_hw.h
+++ b/drivers/gpu/drm/arm/malidp_hw.h
@@ -191,7 +191,8 @@ struct malidp_hw {
 	 * @param fmt_id - internal format ID of output buffer
 	 */
 	int (*enable_memwrite)(struct malidp_hw_device *hwdev, dma_addr_t *addrs,
-			       s32 *pitches, int num_planes, u16 w, u16 h, u32 fmt_id);
+			       s32 *pitches, int num_planes, u16 w, u16 h, u32 fmt_id,
+			       const s16 *rgb2yuv_coeffs);
 
 	/*
 	 * Disable the writing to memory of the next frame's content.
diff --git a/drivers/gpu/drm/arm/malidp_mw.c b/drivers/gpu/drm/arm/malidp_mw.c
index ba6ae66387c9..91472e5e0c8b 100644
--- a/drivers/gpu/drm/arm/malidp_mw.c
+++ b/drivers/gpu/drm/arm/malidp_mw.c
@@ -26,6 +26,8 @@ struct malidp_mw_connector_state {
 	s32 pitches[2];
 	u8 format;
 	u8 n_planes;
+	bool rgb2yuv_initialized;
+	const s16 *rgb2yuv_coeffs;
 };
 
 static int malidp_mw_connector_get_modes(struct drm_connector *connector)
@@ -84,7 +86,7 @@ static void malidp_mw_connector_destroy(struct drm_connector *connector)
 static struct drm_connector_state *
 malidp_mw_connector_duplicate_state(struct drm_connector *connector)
 {
-	struct malidp_mw_connector_state *mw_state;
+	struct malidp_mw_connector_state *mw_state, *mw_current_state;
 
 	if (WARN_ON(!connector->state))
 		return NULL;
@@ -93,7 +95,10 @@ malidp_mw_connector_duplicate_state(struct drm_connector *connector)
 	if (!mw_state)
 		return NULL;
 
-	/* No need to preserve any of our driver-local data */
+	mw_current_state = to_mw_state(connector->state);
+	mw_state->rgb2yuv_coeffs = mw_current_state->rgb2yuv_coeffs;
+	mw_state->rgb2yuv_initialized = mw_current_state->rgb2yuv_initialized;
+
 	__drm_atomic_helper_connector_duplicate_state(connector, &mw_state->base);
 
 	return &mw_state->base;
@@ -108,6 +113,13 @@ static const struct drm_connector_funcs malidp_mw_connector_funcs = {
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
 
+static const s16 rgb2yuv_coeffs_bt709_limited[MALIDP_COLORADJ_NUM_COEFFS] = {
+	47,  157,   16,
+	-26,  -87,  112,
+	112, -102,  -10,
+	16,  128,  128
+};
+
 static int
 malidp_mw_encoder_atomic_check(struct drm_encoder *encoder,
 			       struct drm_crtc_state *crtc_state,
@@ -157,6 +169,9 @@ malidp_mw_encoder_atomic_check(struct drm_encoder *encoder,
 	}
 	mw_state->n_planes = n_planes;
 
+	if (fb->format->is_yuv)
+		mw_state->rgb2yuv_coeffs = rgb2yuv_coeffs_bt709_limited;
+
 	return 0;
 }
 
@@ -239,10 +254,12 @@ void malidp_mw_atomic_commit(struct drm_device *drm,
 
 		drm_writeback_queue_job(mw_conn, conn_state->writeback_job);
 		conn_state->writeback_job = NULL;
-
 		hwdev->hw->enable_memwrite(hwdev, mw_state->addrs,
 					   mw_state->pitches, mw_state->n_planes,
-					   fb->width, fb->height, mw_state->format);
+					   fb->width, fb->height, mw_state->format,
+					   !mw_state->rgb2yuv_initialized ?
+					   mw_state->rgb2yuv_coeffs : NULL);
+		mw_state->rgb2yuv_initialized = !!mw_state->rgb2yuv_coeffs;
 	} else {
 		DRM_DEV_DEBUG_DRIVER(drm->dev, "Disable memwrite\n");
 		hwdev->hw->disable_memwrite(hwdev);
diff --git a/drivers/gpu/drm/arm/malidp_regs.h b/drivers/gpu/drm/arm/malidp_regs.h
index 3579d36b2a71..6ffe849774f2 100644
--- a/drivers/gpu/drm/arm/malidp_regs.h
+++ b/drivers/gpu/drm/arm/malidp_regs.h
@@ -205,6 +205,7 @@
 #define MALIDP500_SE_BASE		0x00c00
 #define MALIDP500_SE_CONTROL		0x00c0c
 #define MALIDP500_SE_MEMWRITE_OUT_SIZE	0x00c2c
+#define MALIDP500_SE_RGB_YUV_COEFFS	0x00C74
 #define MALIDP500_SE_MEMWRITE_BASE	0x00e00
 #define MALIDP500_DC_IRQ_BASE		0x00f00
 #define MALIDP500_CONFIG_VALID		0x00f00
@@ -238,6 +239,7 @@
 #define MALIDP550_SE_CONTROL		0x08010
 #define   MALIDP550_SE_MEMWRITE_ONESHOT	(1 << 7)
 #define MALIDP550_SE_MEMWRITE_OUT_SIZE	0x08030
+#define MALIDP550_SE_RGB_YUV_COEFFS	0x08078
 #define MALIDP550_SE_MEMWRITE_BASE	0x08100
 #define MALIDP550_DC_BASE		0x0c000
 #define MALIDP550_DC_CONTROL		0x0c010

From 98e616fe7c94f9c787092b6364405d99bdf42153 Mon Sep 17 00:00:00 2001
From: zhong jiang <zhongjiang@huawei.com>
Date: Sat, 8 Sep 2018 21:44:08 +0800
Subject: [PATCH 156/499] ieee802154: remove unecessary condition check before
 debugfs_remove_recursive

debugfs_remove_recursive has taken IS_ERR_OR_NULL into account. So just
remove the condition check before debugfs_remove_recursive.

Signed-off-by: zhong jiang <zhongjiang@huawei.com>
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 drivers/net/ieee802154/adf7242.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c
index 23a52b9293f3..cd1d8faccca5 100644
--- a/drivers/net/ieee802154/adf7242.c
+++ b/drivers/net/ieee802154/adf7242.c
@@ -1308,8 +1308,7 @@ static int adf7242_remove(struct spi_device *spi)
 {
 	struct adf7242_local *lp = spi_get_drvdata(spi);
 
-	if (!IS_ERR_OR_NULL(lp->debugfs_root))
-		debugfs_remove_recursive(lp->debugfs_root);
+	debugfs_remove_recursive(lp->debugfs_root);
 
 	cancel_delayed_work_sync(&lp->work);
 	destroy_workqueue(lp->wqueue);

From 652ef42c134da1bbb03bd4c9b4291dfaf8d7febb Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@bootlin.com>
Date: Thu, 20 Sep 2018 12:08:54 +0200
Subject: [PATCH 157/499] net: mscc: fix the frame extraction into the skb

When extracting frames from the Ocelot switch, the frame check sequence
(FCS) is present at the end of the data extracted. The FCS was put into
the sk buffer which introduced some issues (as length related ones), as
the FCS shouldn't be part of an Rx sk buffer.

This patch fixes the Ocelot switch extraction behaviour by discarding
the FCS.

Fixes: a556c76adc05 ("net: mscc: Add initial Ocelot switch support")
Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot_board.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c
index 26bb3b18f3be..3cdf63e35b53 100644
--- a/drivers/net/ethernet/mscc/ocelot_board.c
+++ b/drivers/net/ethernet/mscc/ocelot_board.c
@@ -91,7 +91,7 @@ static irqreturn_t ocelot_xtr_irq_handler(int irq, void *arg)
 		struct sk_buff *skb;
 		struct net_device *dev;
 		u32 *buf;
-		int sz, len;
+		int sz, len, buf_len;
 		u32 ifh[4];
 		u32 val;
 		struct frame_info info;
@@ -116,14 +116,20 @@ static irqreturn_t ocelot_xtr_irq_handler(int irq, void *arg)
 			err = -ENOMEM;
 			break;
 		}
-		buf = (u32 *)skb_put(skb, info.len);
+		buf_len = info.len - ETH_FCS_LEN;
+		buf = (u32 *)skb_put(skb, buf_len);
 
 		len = 0;
 		do {
 			sz = ocelot_rx_frame_word(ocelot, grp, false, &val);
 			*buf++ = val;
 			len += sz;
-		} while ((sz == 4) && (len < info.len));
+		} while (len < buf_len);
+
+		/* Read the FCS and discard it */
+		sz = ocelot_rx_frame_word(ocelot, grp, false, &val);
+		/* Update the statistics if part of the FCS was read before */
+		len -= ETH_FCS_LEN - sz;
 
 		if (sz < 0) {
 			err = sz;

From 1816494330a83f2a064499d8ed2797045641f92c Mon Sep 17 00:00:00 2001
From: Vincent Pelletier <plr.vincent@gmail.com>
Date: Sun, 9 Sep 2018 04:09:26 +0000
Subject: [PATCH 158/499] scsi: target: iscsi: Use hex2bin instead of a
 re-implementation

This change has the following effects, in order of descreasing importance:

1) Prevent a stack buffer overflow

2) Do not append an unnecessary NULL to an anyway binary buffer, which
   is writing one byte past client_digest when caller is:
   chap_string_to_hex(client_digest, chap_r, strlen(chap_r));

The latter was found by KASAN (see below) when input value hes expected size
(32 hex chars), and further analysis revealed a stack buffer overflow can
happen when network-received value is longer, allowing an unauthenticated
remote attacker to smash up to 17 bytes after destination buffer (16 bytes
attacker-controlled and one null).  As switching to hex2bin requires
specifying destination buffer length, and does not internally append any null,
it solves both issues.

This addresses CVE-2018-14633.

Beyond this:

- Validate received value length and check hex2bin accepted the input, to log
  this rejection reason instead of just failing authentication.

- Only log received CHAP_R and CHAP_C values once they passed sanity checks.

==================================================================
BUG: KASAN: stack-out-of-bounds in chap_string_to_hex+0x32/0x60 [iscsi_target_mod]
Write of size 1 at addr ffff8801090ef7c8 by task kworker/0:0/1021

CPU: 0 PID: 1021 Comm: kworker/0:0 Tainted: G           O      4.17.8kasan.sess.connops+ #2
Hardware name: To be filled by O.E.M. To be filled by O.E.M./Aptio CRB, BIOS 5.6.5 05/19/2014
Workqueue: events iscsi_target_do_login_rx [iscsi_target_mod]
Call Trace:
 dump_stack+0x71/0xac
 print_address_description+0x65/0x22e
 ? chap_string_to_hex+0x32/0x60 [iscsi_target_mod]
 kasan_report.cold.6+0x241/0x2fd
 chap_string_to_hex+0x32/0x60 [iscsi_target_mod]
 chap_server_compute_md5.isra.2+0x2cb/0x860 [iscsi_target_mod]
 ? chap_binaryhex_to_asciihex.constprop.5+0x50/0x50 [iscsi_target_mod]
 ? ftrace_caller_op_ptr+0xe/0xe
 ? __orc_find+0x6f/0xc0
 ? unwind_next_frame+0x231/0x850
 ? kthread+0x1a0/0x1c0
 ? ret_from_fork+0x35/0x40
 ? ret_from_fork+0x35/0x40
 ? iscsi_target_do_login_rx+0x3bc/0x4c0 [iscsi_target_mod]
 ? deref_stack_reg+0xd0/0xd0
 ? iscsi_target_do_login_rx+0x3bc/0x4c0 [iscsi_target_mod]
 ? is_module_text_address+0xa/0x11
 ? kernel_text_address+0x4c/0x110
 ? __save_stack_trace+0x82/0x100
 ? ret_from_fork+0x35/0x40
 ? save_stack+0x8c/0xb0
 ? 0xffffffffc1660000
 ? iscsi_target_do_login+0x155/0x8d0 [iscsi_target_mod]
 ? iscsi_target_do_login_rx+0x3bc/0x4c0 [iscsi_target_mod]
 ? process_one_work+0x35c/0x640
 ? worker_thread+0x66/0x5d0
 ? kthread+0x1a0/0x1c0
 ? ret_from_fork+0x35/0x40
 ? iscsi_update_param_value+0x80/0x80 [iscsi_target_mod]
 ? iscsit_release_cmd+0x170/0x170 [iscsi_target_mod]
 chap_main_loop+0x172/0x570 [iscsi_target_mod]
 ? chap_server_compute_md5.isra.2+0x860/0x860 [iscsi_target_mod]
 ? rx_data+0xd6/0x120 [iscsi_target_mod]
 ? iscsit_print_session_params+0xd0/0xd0 [iscsi_target_mod]
 ? cyc2ns_read_begin.part.2+0x90/0x90
 ? _raw_spin_lock_irqsave+0x25/0x50
 ? memcmp+0x45/0x70
 iscsi_target_do_login+0x875/0x8d0 [iscsi_target_mod]
 ? iscsi_target_check_first_request.isra.5+0x1a0/0x1a0 [iscsi_target_mod]
 ? del_timer+0xe0/0xe0
 ? memset+0x1f/0x40
 ? flush_sigqueue+0x29/0xd0
 iscsi_target_do_login_rx+0x3bc/0x4c0 [iscsi_target_mod]
 ? iscsi_target_nego_release+0x80/0x80 [iscsi_target_mod]
 ? iscsi_target_restore_sock_callbacks+0x130/0x130 [iscsi_target_mod]
 process_one_work+0x35c/0x640
 worker_thread+0x66/0x5d0
 ? flush_rcu_work+0x40/0x40
 kthread+0x1a0/0x1c0
 ? kthread_bind+0x30/0x30
 ret_from_fork+0x35/0x40

The buggy address belongs to the page:
page:ffffea0004243bc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0
flags: 0x17fffc000000000()
raw: 017fffc000000000 0000000000000000 0000000000000000 00000000ffffffff
raw: ffffea0004243c20 ffffea0004243ba0 0000000000000000 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff8801090ef680: f2 f2 f2 f2 f2 f2 f2 01 f2 f2 f2 f2 f2 f2 f2 00
 ffff8801090ef700: f2 f2 f2 f2 f2 f2 f2 00 02 f2 f2 f2 f2 f2 f2 00
>ffff8801090ef780: 00 f2 f2 f2 f2 f2 f2 00 00 f2 f2 f2 f2 f2 f2 00
                                              ^
 ffff8801090ef800: 00 f2 f2 f2 f2 f2 f2 00 00 00 00 02 f2 f2 f2 f2
 ffff8801090ef880: f2 f2 f2 00 00 00 00 00 00 00 00 f2 f2 f2 f2 00
==================================================================

Signed-off-by: Vincent Pelletier <plr.vincent@gmail.com>
Reviewed-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/iscsi/iscsi_target_auth.c | 30 +++++++++++-------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c
index 9518ffd8b8ba..6c3b4c022894 100644
--- a/drivers/target/iscsi/iscsi_target_auth.c
+++ b/drivers/target/iscsi/iscsi_target_auth.c
@@ -26,18 +26,6 @@
 #include "iscsi_target_nego.h"
 #include "iscsi_target_auth.h"
 
-static int chap_string_to_hex(unsigned char *dst, unsigned char *src, int len)
-{
-	int j = DIV_ROUND_UP(len, 2), rc;
-
-	rc = hex2bin(dst, src, j);
-	if (rc < 0)
-		pr_debug("CHAP string contains non hex digit symbols\n");
-
-	dst[j] = '\0';
-	return j;
-}
-
 static void chap_binaryhex_to_asciihex(char *dst, char *src, int src_len)
 {
 	int i;
@@ -248,9 +236,16 @@ static int chap_server_compute_md5(
 		pr_err("Could not find CHAP_R.\n");
 		goto out;
 	}
+	if (strlen(chap_r) != MD5_SIGNATURE_SIZE * 2) {
+		pr_err("Malformed CHAP_R\n");
+		goto out;
+	}
+	if (hex2bin(client_digest, chap_r, MD5_SIGNATURE_SIZE) < 0) {
+		pr_err("Malformed CHAP_R\n");
+		goto out;
+	}
 
 	pr_debug("[server] Got CHAP_R=%s\n", chap_r);
-	chap_string_to_hex(client_digest, chap_r, strlen(chap_r));
 
 	tfm = crypto_alloc_shash("md5", 0, 0);
 	if (IS_ERR(tfm)) {
@@ -349,9 +344,7 @@ static int chap_server_compute_md5(
 		pr_err("Could not find CHAP_C.\n");
 		goto out;
 	}
-	pr_debug("[server] Got CHAP_C=%s\n", challenge);
-	challenge_len = chap_string_to_hex(challenge_binhex, challenge,
-				strlen(challenge));
+	challenge_len = DIV_ROUND_UP(strlen(challenge), 2);
 	if (!challenge_len) {
 		pr_err("Unable to convert incoming challenge\n");
 		goto out;
@@ -360,6 +353,11 @@ static int chap_server_compute_md5(
 		pr_err("CHAP_C exceeds maximum binary size of 1024 bytes\n");
 		goto out;
 	}
+	if (hex2bin(challenge_binhex, challenge, challenge_len) < 0) {
+		pr_err("Malformed CHAP_C\n");
+		goto out;
+	}
+	pr_debug("[server] Got CHAP_C=%s\n", challenge);
 	/*
 	 * During mutual authentication, the CHAP_C generated by the
 	 * initiator must not match the original CHAP_C generated by

From 8c39e2699f8acb2e29782a834e56306da24937fe Mon Sep 17 00:00:00 2001
From: Vincent Pelletier <plr.vincent@gmail.com>
Date: Sun, 9 Sep 2018 04:09:27 +0000
Subject: [PATCH 159/499] scsi: target: iscsi: Use bin2hex instead of a
 re-implementation

Signed-off-by: Vincent Pelletier <plr.vincent@gmail.com>
Reviewed-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/iscsi/iscsi_target_auth.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c
index 6c3b4c022894..4e680d753941 100644
--- a/drivers/target/iscsi/iscsi_target_auth.c
+++ b/drivers/target/iscsi/iscsi_target_auth.c
@@ -26,15 +26,6 @@
 #include "iscsi_target_nego.h"
 #include "iscsi_target_auth.h"
 
-static void chap_binaryhex_to_asciihex(char *dst, char *src, int src_len)
-{
-	int i;
-
-	for (i = 0; i < src_len; i++) {
-		sprintf(&dst[i*2], "%02x", (int) src[i] & 0xff);
-	}
-}
-
 static int chap_gen_challenge(
 	struct iscsi_conn *conn,
 	int caller,
@@ -50,7 +41,7 @@ static int chap_gen_challenge(
 	ret = get_random_bytes_wait(chap->challenge, CHAP_CHALLENGE_LENGTH);
 	if (unlikely(ret))
 		return ret;
-	chap_binaryhex_to_asciihex(challenge_asciihex, chap->challenge,
+	bin2hex(challenge_asciihex, chap->challenge,
 				CHAP_CHALLENGE_LENGTH);
 	/*
 	 * Set CHAP_C, and copy the generated challenge into c_str.
@@ -289,7 +280,7 @@ static int chap_server_compute_md5(
 		goto out;
 	}
 
-	chap_binaryhex_to_asciihex(response, server_digest, MD5_SIGNATURE_SIZE);
+	bin2hex(response, server_digest, MD5_SIGNATURE_SIZE);
 	pr_debug("[server] MD5 Server Digest: %s\n", response);
 
 	if (memcmp(server_digest, client_digest, MD5_SIGNATURE_SIZE) != 0) {
@@ -411,7 +402,7 @@ static int chap_server_compute_md5(
 	/*
 	 * Convert response from binary hex to ascii hext.
 	 */
-	chap_binaryhex_to_asciihex(response, digest, MD5_SIGNATURE_SIZE);
+	bin2hex(response, digest, MD5_SIGNATURE_SIZE);
 	*nr_out_len += sprintf(nr_out_ptr + *nr_out_len, "CHAP_R=0x%s",
 			response);
 	*nr_out_len += 1;

From 318ddb34b2052f838aa243d07173e2badf3e630e Mon Sep 17 00:00:00 2001
From: Wen Xiong <wenxiong@linux.vnet.ibm.com>
Date: Thu, 20 Sep 2018 19:32:12 -0500
Subject: [PATCH 160/499] scsi: ipr: System hung while dlpar adding primary ipr
 adapter back

While dlpar adding primary ipr adapter back, driver goes through adapter
initialization then schedule ipr_worker_thread to start te disk scan by
dropping the host lock, calling scsi_add_device.  Then get the adapter reset
request again, so driver does scsi_block_requests, this will cause the
scsi_add_device get hung until we unblock. But we can't run ipr_worker_thread
to do the unblock because its stuck in scsi_add_device.

This patch fixes the issue.

[mkp: typo and whitespace fixes]

Signed-off-by: Wen Xiong <wenxiong@linux.vnet.ibm.com>
Acked-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ipr.c | 110 ++++++++++++++++++++++++++-------------------
 drivers/scsi/ipr.h |   1 +
 2 files changed, 64 insertions(+), 47 deletions(-)

diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index f2ec80b0ffc0..271990bc065b 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -3335,64 +3335,19 @@ static void ipr_release_dump(struct kref *kref)
 	LEAVE;
 }
 
-/**
- * ipr_worker_thread - Worker thread
- * @work:		ioa config struct
- *
- * Called at task level from a work thread. This function takes care
- * of adding and removing device from the mid-layer as configuration
- * changes are detected by the adapter.
- *
- * Return value:
- * 	nothing
- **/
-static void ipr_worker_thread(struct work_struct *work)
+static void ipr_add_remove_thread(struct work_struct *work)
 {
 	unsigned long lock_flags;
 	struct ipr_resource_entry *res;
 	struct scsi_device *sdev;
-	struct ipr_dump *dump;
 	struct ipr_ioa_cfg *ioa_cfg =
-		container_of(work, struct ipr_ioa_cfg, work_q);
+		container_of(work, struct ipr_ioa_cfg, scsi_add_work_q);
 	u8 bus, target, lun;
 	int did_work;
 
 	ENTER;
 	spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
 
-	if (ioa_cfg->sdt_state == READ_DUMP) {
-		dump = ioa_cfg->dump;
-		if (!dump) {
-			spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
-			return;
-		}
-		kref_get(&dump->kref);
-		spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
-		ipr_get_ioa_dump(ioa_cfg, dump);
-		kref_put(&dump->kref, ipr_release_dump);
-
-		spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
-		if (ioa_cfg->sdt_state == DUMP_OBTAINED && !ioa_cfg->dump_timeout)
-			ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE);
-		spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
-		return;
-	}
-
-	if (ioa_cfg->scsi_unblock) {
-		ioa_cfg->scsi_unblock = 0;
-		ioa_cfg->scsi_blocked = 0;
-		spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
-		scsi_unblock_requests(ioa_cfg->host);
-		spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
-		if (ioa_cfg->scsi_blocked)
-			scsi_block_requests(ioa_cfg->host);
-	}
-
-	if (!ioa_cfg->scan_enabled) {
-		spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
-		return;
-	}
-
 restart:
 	do {
 		did_work = 0;
@@ -3439,6 +3394,66 @@ static void ipr_worker_thread(struct work_struct *work)
 	LEAVE;
 }
 
+/**
+ * ipr_worker_thread - Worker thread
+ * @work:		ioa config struct
+ *
+ * Called at task level from a work thread. This function takes care
+ * of adding and removing device from the mid-layer as configuration
+ * changes are detected by the adapter.
+ *
+ * Return value:
+ * 	nothing
+ **/
+static void ipr_worker_thread(struct work_struct *work)
+{
+	unsigned long lock_flags;
+	struct ipr_dump *dump;
+	struct ipr_ioa_cfg *ioa_cfg =
+		container_of(work, struct ipr_ioa_cfg, work_q);
+
+	ENTER;
+	spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
+
+	if (ioa_cfg->sdt_state == READ_DUMP) {
+		dump = ioa_cfg->dump;
+		if (!dump) {
+			spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
+			return;
+		}
+		kref_get(&dump->kref);
+		spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
+		ipr_get_ioa_dump(ioa_cfg, dump);
+		kref_put(&dump->kref, ipr_release_dump);
+
+		spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
+		if (ioa_cfg->sdt_state == DUMP_OBTAINED && !ioa_cfg->dump_timeout)
+			ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE);
+		spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
+		return;
+	}
+
+	if (ioa_cfg->scsi_unblock) {
+		ioa_cfg->scsi_unblock = 0;
+		ioa_cfg->scsi_blocked = 0;
+		spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
+		scsi_unblock_requests(ioa_cfg->host);
+		spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
+		if (ioa_cfg->scsi_blocked)
+			scsi_block_requests(ioa_cfg->host);
+	}
+
+	if (!ioa_cfg->scan_enabled) {
+		spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
+		return;
+	}
+
+	schedule_work(&ioa_cfg->scsi_add_work_q);
+
+	spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
+	LEAVE;
+}
+
 #ifdef CONFIG_SCSI_IPR_TRACE
 /**
  * ipr_read_trace - Dump the adapter trace
@@ -9933,6 +9948,7 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg,
 	INIT_LIST_HEAD(&ioa_cfg->free_res_q);
 	INIT_LIST_HEAD(&ioa_cfg->used_res_q);
 	INIT_WORK(&ioa_cfg->work_q, ipr_worker_thread);
+	INIT_WORK(&ioa_cfg->scsi_add_work_q, ipr_add_remove_thread);
 	init_waitqueue_head(&ioa_cfg->reset_wait_q);
 	init_waitqueue_head(&ioa_cfg->msi_wait_q);
 	init_waitqueue_head(&ioa_cfg->eeh_wait_q);
diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h
index 68afbbde54d3..f6baa2351313 100644
--- a/drivers/scsi/ipr.h
+++ b/drivers/scsi/ipr.h
@@ -1575,6 +1575,7 @@ struct ipr_ioa_cfg {
 	u8 saved_mode_page_len;
 
 	struct work_struct work_q;
+	struct work_struct scsi_add_work_q;
 	struct workqueue_struct *reset_work_q;
 
 	wait_queue_head_t reset_wait_q;

From f1f1fadacaf08b7cf11714c0c29f8fa4d4ef68a9 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Fri, 21 Sep 2018 09:01:01 +0200
Subject: [PATCH 161/499] scsi: sd: don't crash the host on invalid commands

When sd_init_command() get's a command with a unknown req_op() it crashes the
system via BUG().

This makes debugging the actual reason for the broken request cmd_flags pretty
hard as the system is down before it's able to write out debugging data on the
serial console or the trace buffer.

Change the BUG() to a WARN_ON() and return BLKPREP_KILL to fail gracefully and
return an I/O error to the producer of the request.

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Bart Van Assche <bvanassche@acm.org>
Cc: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sd.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 5e4f10d28065..4a57ffecc7e6 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1276,7 +1276,8 @@ static int sd_init_command(struct scsi_cmnd *cmd)
 	case REQ_OP_ZONE_RESET:
 		return sd_zbc_setup_reset_cmnd(cmd);
 	default:
-		BUG();
+		WARN_ON_ONCE(1);
+		return BLKPREP_KILL;
 	}
 }
 

From 5607fff303636d48b88414c6be353d9fed700af2 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Tue, 18 Sep 2018 09:01:44 -0700
Subject: [PATCH 162/499] bpf: sockmap only allow ESTABLISHED sock state

After this patch we only allow socks that are in ESTABLISHED state or
are being added via a sock_ops event that is transitioning into an
ESTABLISHED state. By allowing sock_ops events we allow users to
manage sockmaps directly from sock ops programs. The two supported
sock_ops ops are BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB and
BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB.

Similar to TLS ULP this ensures sk_user_data is correct.

Reported-by: Eric Dumazet <edumazet@google.com>
Fixes: 1aa12bdf1bfb ("bpf: sockmap, add sock close() hook to remove socks")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/sockmap.c | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 488ef9663c01..1f97b559892a 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -2097,8 +2097,12 @@ static int sock_map_update_elem(struct bpf_map *map,
 		return -EINVAL;
 	}
 
+	/* ULPs are currently supported only for TCP sockets in ESTABLISHED
+	 * state.
+	 */
 	if (skops.sk->sk_type != SOCK_STREAM ||
-	    skops.sk->sk_protocol != IPPROTO_TCP) {
+	    skops.sk->sk_protocol != IPPROTO_TCP ||
+	    skops.sk->sk_state != TCP_ESTABLISHED) {
 		fput(socket->file);
 		return -EOPNOTSUPP;
 	}
@@ -2453,6 +2457,16 @@ static int sock_hash_update_elem(struct bpf_map *map,
 		return -EINVAL;
 	}
 
+	/* ULPs are currently supported only for TCP sockets in ESTABLISHED
+	 * state.
+	 */
+	if (skops.sk->sk_type != SOCK_STREAM ||
+	    skops.sk->sk_protocol != IPPROTO_TCP ||
+	    skops.sk->sk_state != TCP_ESTABLISHED) {
+		fput(socket->file);
+		return -EOPNOTSUPP;
+	}
+
 	lock_sock(skops.sk);
 	preempt_disable();
 	rcu_read_lock();
@@ -2543,10 +2557,22 @@ const struct bpf_map_ops sock_hash_ops = {
 	.map_check_btf = map_check_no_btf,
 };
 
+static bool bpf_is_valid_sock_op(struct bpf_sock_ops_kern *ops)
+{
+	return ops->op == BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB ||
+	       ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB;
+}
 BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
 	   struct bpf_map *, map, void *, key, u64, flags)
 {
 	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	/* ULPs are currently supported only for TCP sockets in ESTABLISHED
+	 * state. This checks that the sock ops triggering the update is
+	 * one indicating we are (or will be soon) in an ESTABLISHED state.
+	 */
+	if (!bpf_is_valid_sock_op(bpf_sock))
+		return -EOPNOTSUPP;
 	return sock_map_ctx_update_elem(bpf_sock, map, key, flags);
 }
 
@@ -2565,6 +2591,9 @@ BPF_CALL_4(bpf_sock_hash_update, struct bpf_sock_ops_kern *, bpf_sock,
 	   struct bpf_map *, map, void *, key, u64, flags)
 {
 	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	if (!bpf_is_valid_sock_op(bpf_sock))
+		return -EOPNOTSUPP;
 	return sock_hash_ctx_update_elem(bpf_sock, map, key, flags);
 }
 

From b05545e15e1ff1d6a6a8593971275f9cc3e6b92b Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Tue, 18 Sep 2018 09:01:49 -0700
Subject: [PATCH 163/499] bpf: sockmap, fix transition through disconnect
 without close

It is possible (via shutdown()) for TCP socks to go trough TCP_CLOSE
state via tcp_disconnect() without actually calling tcp_close which
would then call our bpf_tcp_close() callback. Because of this a user
could disconnect a socket then put it in a LISTEN state which would
break our assumptions about sockets always being ESTABLISHED state.

To resolve this rely on the unhash hook, which is called in the
disconnect case, to remove the sock from the sockmap.

Reported-by: Eric Dumazet <edumazet@google.com>
Fixes: 1aa12bdf1bfb ("bpf: sockmap, add sock close() hook to remove socks")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/sockmap.c | 60 ++++++++++++++++++++++++++++++--------------
 1 file changed, 41 insertions(+), 19 deletions(-)

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 1f97b559892a..0a0f2ec75370 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -132,6 +132,7 @@ struct smap_psock {
 	struct work_struct gc_work;
 
 	struct proto *sk_proto;
+	void (*save_unhash)(struct sock *sk);
 	void (*save_close)(struct sock *sk, long timeout);
 	void (*save_data_ready)(struct sock *sk);
 	void (*save_write_space)(struct sock *sk);
@@ -143,6 +144,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
 			    int offset, size_t size, int flags);
+static void bpf_tcp_unhash(struct sock *sk);
 static void bpf_tcp_close(struct sock *sk, long timeout);
 
 static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
@@ -184,6 +186,7 @@ static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS],
 			 struct proto *base)
 {
 	prot[SOCKMAP_BASE]			= *base;
+	prot[SOCKMAP_BASE].unhash		= bpf_tcp_unhash;
 	prot[SOCKMAP_BASE].close		= bpf_tcp_close;
 	prot[SOCKMAP_BASE].recvmsg		= bpf_tcp_recvmsg;
 	prot[SOCKMAP_BASE].stream_memory_read	= bpf_tcp_stream_read;
@@ -217,6 +220,7 @@ static int bpf_tcp_init(struct sock *sk)
 		return -EBUSY;
 	}
 
+	psock->save_unhash = sk->sk_prot->unhash;
 	psock->save_close = sk->sk_prot->close;
 	psock->sk_proto = sk->sk_prot;
 
@@ -305,30 +309,12 @@ static struct smap_psock_map_entry *psock_map_pop(struct sock *sk,
 	return e;
 }
 
-static void bpf_tcp_close(struct sock *sk, long timeout)
+static void bpf_tcp_remove(struct sock *sk, struct smap_psock *psock)
 {
-	void (*close_fun)(struct sock *sk, long timeout);
 	struct smap_psock_map_entry *e;
 	struct sk_msg_buff *md, *mtmp;
-	struct smap_psock *psock;
 	struct sock *osk;
 
-	lock_sock(sk);
-	rcu_read_lock();
-	psock = smap_psock_sk(sk);
-	if (unlikely(!psock)) {
-		rcu_read_unlock();
-		release_sock(sk);
-		return sk->sk_prot->close(sk, timeout);
-	}
-
-	/* The psock may be destroyed anytime after exiting the RCU critial
-	 * section so by the time we use close_fun the psock may no longer
-	 * be valid. However, bpf_tcp_close is called with the sock lock
-	 * held so the close hook and sk are still valid.
-	 */
-	close_fun = psock->save_close;
-
 	if (psock->cork) {
 		free_start_sg(psock->sock, psock->cork, true);
 		kfree(psock->cork);
@@ -379,6 +365,42 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
 		kfree(e);
 		e = psock_map_pop(sk, psock);
 	}
+}
+
+static void bpf_tcp_unhash(struct sock *sk)
+{
+	void (*unhash_fun)(struct sock *sk);
+	struct smap_psock *psock;
+
+	rcu_read_lock();
+	psock = smap_psock_sk(sk);
+	if (unlikely(!psock)) {
+		rcu_read_unlock();
+		if (sk->sk_prot->unhash)
+			sk->sk_prot->unhash(sk);
+		return;
+	}
+	unhash_fun = psock->save_unhash;
+	bpf_tcp_remove(sk, psock);
+	rcu_read_unlock();
+	unhash_fun(sk);
+}
+
+static void bpf_tcp_close(struct sock *sk, long timeout)
+{
+	void (*close_fun)(struct sock *sk, long timeout);
+	struct smap_psock *psock;
+
+	lock_sock(sk);
+	rcu_read_lock();
+	psock = smap_psock_sk(sk);
+	if (unlikely(!psock)) {
+		rcu_read_unlock();
+		release_sock(sk);
+		return sk->sk_prot->close(sk, timeout);
+	}
+	close_fun = psock->save_close;
+	bpf_tcp_remove(sk, psock);
 	rcu_read_unlock();
 	release_sock(sk);
 	close_fun(sk, timeout);

From 5028027844cfc6168e39650abecd817ba64c9d98 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Tue, 18 Sep 2018 09:01:54 -0700
Subject: [PATCH 164/499] bpf: test_maps, only support ESTABLISHED socks

Ensure that sockets added to a sock{map|hash} that is not in the
ESTABLISHED state is rejected.

Fixes: 1aa12bdf1bfb ("bpf: sockmap, add sock close() hook to remove socks")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_maps.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 6f54f84144a0..9b552c0fc47d 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -580,7 +580,11 @@ static void test_sockmap(int tasks, void *data)
 	/* Test update without programs */
 	for (i = 0; i < 6; i++) {
 		err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
-		if (err) {
+		if (i < 2 && !err) {
+			printf("Allowed update sockmap '%i:%i' not in ESTABLISHED\n",
+			       i, sfd[i]);
+			goto out_sockmap;
+		} else if (i >= 2 && err) {
 			printf("Failed noprog update sockmap '%i:%i'\n",
 			       i, sfd[i]);
 			goto out_sockmap;
@@ -741,7 +745,7 @@ static void test_sockmap(int tasks, void *data)
 	}
 
 	/* Test map update elem afterwards fd lives in fd and map_fd */
-	for (i = 0; i < 6; i++) {
+	for (i = 2; i < 6; i++) {
 		err = bpf_map_update_elem(map_fd_rx, &i, &sfd[i], BPF_ANY);
 		if (err) {
 			printf("Failed map_fd_rx update sockmap %i '%i:%i'\n",
@@ -845,7 +849,7 @@ static void test_sockmap(int tasks, void *data)
 	}
 
 	/* Delete the elems without programs */
-	for (i = 0; i < 6; i++) {
+	for (i = 2; i < 6; i++) {
 		err = bpf_map_delete_elem(fd, &i);
 		if (err) {
 			printf("Failed delete sockmap %i '%i:%i'\n",

From f88b4c01b97e09535505cf3c327fdbce55c27f00 Mon Sep 17 00:00:00 2001
From: Sean Tranchetti <stranche@codeaurora.org>
Date: Thu, 20 Sep 2018 14:29:45 -0600
Subject: [PATCH 165/499] netlabel: check for IPV4MASK in addrinfo_get

netlbl_unlabel_addrinfo_get() assumes that if it finds the
NLBL_UNLABEL_A_IPV4ADDR attribute, it must also have the
NLBL_UNLABEL_A_IPV4MASK attribute as well. However, this is
not necessarily the case as the current checks in
netlbl_unlabel_staticadd() and friends are not sufficent to
enforce this.

If passed a netlink message with NLBL_UNLABEL_A_IPV4ADDR,
NLBL_UNLABEL_A_IPV6ADDR, and NLBL_UNLABEL_A_IPV6MASK attributes,
these functions will all call netlbl_unlabel_addrinfo_get() which
will then attempt dereference NULL when fetching the non-existent
NLBL_UNLABEL_A_IPV4MASK attribute:

Unable to handle kernel NULL pointer dereference at virtual address 0
Process unlab (pid: 31762, stack limit = 0xffffff80502d8000)
Call trace:
	netlbl_unlabel_addrinfo_get+0x44/0xd8
	netlbl_unlabel_staticremovedef+0x98/0xe0
	genl_rcv_msg+0x354/0x388
	netlink_rcv_skb+0xac/0x118
	genl_rcv+0x34/0x48
	netlink_unicast+0x158/0x1f0
	netlink_sendmsg+0x32c/0x338
	sock_sendmsg+0x44/0x60
	___sys_sendmsg+0x1d0/0x2a8
	__sys_sendmsg+0x64/0xb4
	SyS_sendmsg+0x34/0x4c
	el0_svc_naked+0x34/0x38
Code: 51001149 7100113f 540000a0 f9401508 (79400108)
---[ end trace f6438a488e737143 ]---
Kernel panic - not syncing: Fatal exception

Signed-off-by: Sean Tranchetti <stranche@codeaurora.org>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/netlabel_unlabeled.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index c070dfc0190a..c92894c3e40a 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -781,7 +781,8 @@ static int netlbl_unlabel_addrinfo_get(struct genl_info *info,
 {
 	u32 addr_len;
 
-	if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR]) {
+	if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR] &&
+	    info->attrs[NLBL_UNLABEL_A_IPV4MASK]) {
 		addr_len = nla_len(info->attrs[NLBL_UNLABEL_A_IPV4ADDR]);
 		if (addr_len != sizeof(struct in_addr) &&
 		    addr_len != nla_len(info->attrs[NLBL_UNLABEL_A_IPV4MASK]))

From 86f9bd1ff61c413a2a251fa736463295e4e24733 Mon Sep 17 00:00:00 2001
From: Jeff Barnhill <0xeffeff@gmail.com>
Date: Fri, 21 Sep 2018 00:45:27 +0000
Subject: [PATCH 166/499] net/ipv6: Display all addresses in output of
 /proc/net/if_inet6

The backend handling for /proc/net/if_inet6 in addrconf.c doesn't properly
handle starting/stopping the iteration.  The problem is that at some point
during the iteration, an overflow is detected and the process is
subsequently stopped.  The item being shown via seq_printf() when the
overflow occurs is not actually shown, though.  When start() is
subsequently called to resume iterating, it returns the next item, and
thus the item that was being processed when the overflow occurred never
gets printed.

Alter the meaning of the private data member "offset".  Currently, when it
is not 0 (which only happens at the very beginning), "offset" represents
the next hlist item to be printed.  After this change, "offset" always
represents the current item.

This is also consistent with the private data member "bucket", which
represents the current bucket, and also the use of "pos" as defined in
seq_file.txt:
    The pos passed to start() will always be either zero, or the most
    recent pos used in the previous session.

Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d51a8c0b3372..c63ccce6425f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4201,7 +4201,6 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
 				p++;
 				continue;
 			}
-			state->offset++;
 			return ifa;
 		}
 
@@ -4225,13 +4224,12 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
 		return ifa;
 	}
 
+	state->offset = 0;
 	while (++state->bucket < IN6_ADDR_HSIZE) {
-		state->offset = 0;
 		hlist_for_each_entry_rcu(ifa,
 				     &inet6_addr_lst[state->bucket], addr_lst) {
 			if (!net_eq(dev_net(ifa->idev->dev), net))
 				continue;
-			state->offset++;
 			return ifa;
 		}
 	}

From 54be5b8ce33f8d1a05b258070c81ed98f935883d Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Fri, 21 Sep 2018 02:53:17 +0000
Subject: [PATCH 167/499] PCI: hv: Fix return value check in
 hv_pci_assign_slots()

In case of error, the function pci_create_slot() returns ERR_PTR() and
never returns NULL. The NULL test in the return value check should be
replaced with IS_ERR().

Fixes: a15f2c08c708 ("PCI: hv: support reporting serial number as slot information")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/pci/controller/pci-hyperv.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index ee80e79db21a..9ba4d12c179c 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -1484,8 +1484,10 @@ static void hv_pci_assign_slots(struct hv_pcibus_device *hbus)
 		snprintf(name, SLOT_NAME_SIZE, "%u", hpdev->desc.ser);
 		hpdev->pci_slot = pci_create_slot(hbus->pci_bus, slot_nr,
 					  name, NULL);
-		if (!hpdev->pci_slot)
+		if (IS_ERR(hpdev->pci_slot)) {
 			pr_warn("pci_create slot %s failed\n", name);
+			hpdev->pci_slot = NULL;
+		}
 	}
 }
 

From 72b462798c2a258725dd741d1c6c139446e20d12 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 21 Sep 2018 10:53:47 +0800
Subject: [PATCH 168/499] net: seeq: fix return type of ndo_start_xmit function

The method ndo_start_xmit() is defined as returning an 'netdev_tx_t',
which is a typedef for an enum type, so make sure the implementation in
this driver has returns 'netdev_tx_t' value, and change the function
return type to netdev_tx_t.

Found by coccinelle.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/seeq/ether3.c  | 5 +++--
 drivers/net/ethernet/seeq/sgiseeq.c | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c
index c5bc124b41a9..d1bb73bf9914 100644
--- a/drivers/net/ethernet/seeq/ether3.c
+++ b/drivers/net/ethernet/seeq/ether3.c
@@ -77,7 +77,8 @@ static void	ether3_setmulticastlist(struct net_device *dev);
 static int	ether3_rx(struct net_device *dev, unsigned int maxcnt);
 static void	ether3_tx(struct net_device *dev);
 static int	ether3_open (struct net_device *dev);
-static int	ether3_sendpacket (struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t	ether3_sendpacket(struct sk_buff *skb,
+					  struct net_device *dev);
 static irqreturn_t ether3_interrupt (int irq, void *dev_id);
 static int	ether3_close (struct net_device *dev);
 static void	ether3_setmulticastlist (struct net_device *dev);
@@ -481,7 +482,7 @@ static void ether3_timeout(struct net_device *dev)
 /*
  * Transmit a packet
  */
-static int
+static netdev_tx_t
 ether3_sendpacket(struct sk_buff *skb, struct net_device *dev)
 {
 	unsigned long flags;
diff --git a/drivers/net/ethernet/seeq/sgiseeq.c b/drivers/net/ethernet/seeq/sgiseeq.c
index 573691bc3b71..70cce63a6081 100644
--- a/drivers/net/ethernet/seeq/sgiseeq.c
+++ b/drivers/net/ethernet/seeq/sgiseeq.c
@@ -578,7 +578,8 @@ static inline int sgiseeq_reset(struct net_device *dev)
 	return 0;
 }
 
-static int sgiseeq_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t
+sgiseeq_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct sgiseeq_private *sp = netdev_priv(dev);
 	struct hpc3_ethregs *hregs = sp->hregs;

From f3bf939f3d4552becb115d76e17b3e5957bab4a2 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 21 Sep 2018 11:02:37 +0800
Subject: [PATCH 169/499] net: cirrus: fix return type of ndo_start_xmit
 function

The method ndo_start_xmit() is defined as returning an 'netdev_tx_t',
which is a typedef for an enum type, so make sure the implementation in
this driver has returns 'netdev_tx_t' value, and change the function
return type to netdev_tx_t.

Found by coccinelle.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cirrus/ep93xx_eth.c | 2 +-
 drivers/net/ethernet/cirrus/mac89x0.c    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c
index e2a702996db4..13dfdfca49fc 100644
--- a/drivers/net/ethernet/cirrus/ep93xx_eth.c
+++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c
@@ -332,7 +332,7 @@ static int ep93xx_poll(struct napi_struct *napi, int budget)
 	return rx;
 }
 
-static int ep93xx_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ep93xx_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ep93xx_priv *ep = netdev_priv(dev);
 	struct ep93xx_tdesc *txd;
diff --git a/drivers/net/ethernet/cirrus/mac89x0.c b/drivers/net/ethernet/cirrus/mac89x0.c
index 3f8fe8fd79cc..6324e80960c3 100644
--- a/drivers/net/ethernet/cirrus/mac89x0.c
+++ b/drivers/net/ethernet/cirrus/mac89x0.c
@@ -113,7 +113,7 @@ struct net_local {
 
 /* Index to functions, as function prototypes. */
 static int net_open(struct net_device *dev);
-static int net_send_packet(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t net_send_packet(struct sk_buff *skb, struct net_device *dev);
 static irqreturn_t net_interrupt(int irq, void *dev_id);
 static void set_multicast_list(struct net_device *dev);
 static void net_rx(struct net_device *dev);
@@ -324,7 +324,7 @@ net_open(struct net_device *dev)
 	return 0;
 }
 
-static int
+static netdev_tx_t
 net_send_packet(struct sk_buff *skb, struct net_device *dev)
 {
 	struct net_local *lp = netdev_priv(dev);

From 28d304efb88f18aadfe9219f0e4b2b0ef9558a3b Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 21 Sep 2018 11:05:50 +0800
Subject: [PATCH 170/499] net: sgi: fix return type of ndo_start_xmit function

The method ndo_start_xmit() is defined as returning an 'netdev_tx_t',
which is a typedef for an enum type, so make sure the implementation in
this driver has returns 'netdev_tx_t' value, and change the function
return type to netdev_tx_t.

Found by coccinelle.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sgi/ioc3-eth.c | 4 ++--
 drivers/net/ethernet/sgi/meth.c     | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c
index 18d533fdf14c..3140999642ba 100644
--- a/drivers/net/ethernet/sgi/ioc3-eth.c
+++ b/drivers/net/ethernet/sgi/ioc3-eth.c
@@ -99,7 +99,7 @@ struct ioc3_private {
 
 static int ioc3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 static void ioc3_set_multicast_list(struct net_device *dev);
-static int ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev);
 static void ioc3_timeout(struct net_device *dev);
 static inline unsigned int ioc3_hash(const unsigned char *addr);
 static inline void ioc3_stop(struct ioc3_private *ip);
@@ -1390,7 +1390,7 @@ static struct pci_driver ioc3_driver = {
 	.remove		= ioc3_remove_one,
 };
 
-static int ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	unsigned long data;
 	struct ioc3_private *ip = netdev_priv(dev);
diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c
index ea55abd62ec7..703fbbefea44 100644
--- a/drivers/net/ethernet/sgi/meth.c
+++ b/drivers/net/ethernet/sgi/meth.c
@@ -697,7 +697,7 @@ static void meth_add_to_tx_ring(struct meth_private *priv, struct sk_buff *skb)
 /*
  * Transmit a packet (called by the kernel)
  */
-static int meth_tx(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t meth_tx(struct sk_buff *skb, struct net_device *dev)
 {
 	struct meth_private *priv = netdev_priv(dev);
 	unsigned long flags;

From f0f25516e3b9efff2a34059347d13361d26e314a Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 21 Sep 2018 11:35:11 +0800
Subject: [PATCH 171/499] net: wiznet: fix return type of ndo_start_xmit
 function

The method ndo_start_xmit() is defined as returning an 'netdev_tx_t',
which is a typedef for an enum type, so make sure the implementation in
this driver has returns 'netdev_tx_t' value, and change the function
return type to netdev_tx_t.

Found by coccinelle.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/wiznet/w5100.c | 2 +-
 drivers/net/ethernet/wiznet/w5300.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c
index 2bdfb39215e9..d8ba512f166a 100644
--- a/drivers/net/ethernet/wiznet/w5100.c
+++ b/drivers/net/ethernet/wiznet/w5100.c
@@ -835,7 +835,7 @@ static void w5100_tx_work(struct work_struct *work)
 	w5100_tx_skb(priv->ndev, skb);
 }
 
-static int w5100_start_tx(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t w5100_start_tx(struct sk_buff *skb, struct net_device *ndev)
 {
 	struct w5100_priv *priv = netdev_priv(ndev);
 
diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c
index 56ae573001e8..80fdbff67d82 100644
--- a/drivers/net/ethernet/wiznet/w5300.c
+++ b/drivers/net/ethernet/wiznet/w5300.c
@@ -365,7 +365,7 @@ static void w5300_tx_timeout(struct net_device *ndev)
 	netif_wake_queue(ndev);
 }
 
-static int w5300_start_tx(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t w5300_start_tx(struct sk_buff *skb, struct net_device *ndev)
 {
 	struct w5300_priv *priv = netdev_priv(ndev);
 

From 648c361a568dcd098a3496c6c66bca4c8473e52b Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 21 Sep 2018 11:44:05 +0800
Subject: [PATCH 172/499] net: i825xx: fix return type of ndo_start_xmit
 function

The method ndo_start_xmit() is defined as returning an 'netdev_tx_t',
which is a typedef for an enum type, so make sure the implementation in
this driver has returns 'netdev_tx_t' value, and change the function
return type to netdev_tx_t.

Found by coccinelle.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/i825xx/ether1.c     | 5 +++--
 drivers/net/ethernet/i825xx/lib82596.c   | 4 ++--
 drivers/net/ethernet/i825xx/sun3_82586.c | 6 ++++--
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/i825xx/ether1.c b/drivers/net/ethernet/i825xx/ether1.c
index dc983450354b..35f6291a3672 100644
--- a/drivers/net/ethernet/i825xx/ether1.c
+++ b/drivers/net/ethernet/i825xx/ether1.c
@@ -64,7 +64,8 @@ static unsigned int net_debug = NET_DEBUG;
 #define RX_AREA_END	0x0fc00
 
 static int ether1_open(struct net_device *dev);
-static int ether1_sendpacket(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t ether1_sendpacket(struct sk_buff *skb,
+				     struct net_device *dev);
 static irqreturn_t ether1_interrupt(int irq, void *dev_id);
 static int ether1_close(struct net_device *dev);
 static void ether1_setmulticastlist(struct net_device *dev);
@@ -667,7 +668,7 @@ ether1_timeout(struct net_device *dev)
 	netif_wake_queue(dev);
 }
 
-static int
+static netdev_tx_t
 ether1_sendpacket (struct sk_buff *skb, struct net_device *dev)
 {
 	int tmp, tst, nopaddr, txaddr, tbdaddr, dataddr;
diff --git a/drivers/net/ethernet/i825xx/lib82596.c b/drivers/net/ethernet/i825xx/lib82596.c
index f00a1dc2128c..2f7ae118217f 100644
--- a/drivers/net/ethernet/i825xx/lib82596.c
+++ b/drivers/net/ethernet/i825xx/lib82596.c
@@ -347,7 +347,7 @@ static const char init_setup[] =
 	0x7f /*  *multi IA */ };
 
 static int i596_open(struct net_device *dev);
-static int i596_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t i596_start_xmit(struct sk_buff *skb, struct net_device *dev);
 static irqreturn_t i596_interrupt(int irq, void *dev_id);
 static int i596_close(struct net_device *dev);
 static void i596_add_cmd(struct net_device *dev, struct i596_cmd *cmd);
@@ -966,7 +966,7 @@ static void i596_tx_timeout (struct net_device *dev)
 }
 
 
-static int i596_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t i596_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct i596_private *lp = netdev_priv(dev);
 	struct tx_cmd *tx_cmd;
diff --git a/drivers/net/ethernet/i825xx/sun3_82586.c b/drivers/net/ethernet/i825xx/sun3_82586.c
index 8bb15a8c2a40..1a86184d44c0 100644
--- a/drivers/net/ethernet/i825xx/sun3_82586.c
+++ b/drivers/net/ethernet/i825xx/sun3_82586.c
@@ -121,7 +121,8 @@ static int     sun3_82586_probe1(struct net_device *dev,int ioaddr);
 static irqreturn_t sun3_82586_interrupt(int irq,void *dev_id);
 static int     sun3_82586_open(struct net_device *dev);
 static int     sun3_82586_close(struct net_device *dev);
-static int     sun3_82586_send_packet(struct sk_buff *,struct net_device *);
+static netdev_tx_t     sun3_82586_send_packet(struct sk_buff *,
+					      struct net_device *);
 static struct  net_device_stats *sun3_82586_get_stats(struct net_device *dev);
 static void    set_multicast_list(struct net_device *dev);
 static void    sun3_82586_timeout(struct net_device *dev);
@@ -1002,7 +1003,8 @@ static void sun3_82586_timeout(struct net_device *dev)
  * send frame
  */
 
-static int sun3_82586_send_packet(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t
+sun3_82586_send_packet(struct sk_buff *skb, struct net_device *dev)
 {
 	int len,i;
 #ifndef NO_NOPCOMMANDS

From e6ce3822a9f264b3f24c1acdc624131fb014f2f0 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 21 Sep 2018 11:46:37 +0800
Subject: [PATCH 173/499] net: apple: fix return type of ndo_start_xmit
 function

The method ndo_start_xmit() is defined as returning an 'netdev_tx_t',
which is a typedef for an enum type, so make sure the implementation in
this driver has returns 'netdev_tx_t' value, and change the function
return type to netdev_tx_t.

Found by coccinelle.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/apple/bmac.c    | 4 ++--
 drivers/net/ethernet/apple/mace.c    | 4 ++--
 drivers/net/ethernet/apple/macmace.c | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c
index 024998d6d8c6..6a8e2567f2bd 100644
--- a/drivers/net/ethernet/apple/bmac.c
+++ b/drivers/net/ethernet/apple/bmac.c
@@ -154,7 +154,7 @@ static irqreturn_t bmac_txdma_intr(int irq, void *dev_id);
 static irqreturn_t bmac_rxdma_intr(int irq, void *dev_id);
 static void bmac_set_timeout(struct net_device *dev);
 static void bmac_tx_timeout(struct timer_list *t);
-static int bmac_output(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t bmac_output(struct sk_buff *skb, struct net_device *dev);
 static void bmac_start(struct net_device *dev);
 
 #define	DBDMA_SET(x)	( ((x) | (x) << 16) )
@@ -1456,7 +1456,7 @@ bmac_start(struct net_device *dev)
 	spin_unlock_irqrestore(&bp->lock, flags);
 }
 
-static int
+static netdev_tx_t
 bmac_output(struct sk_buff *skb, struct net_device *dev)
 {
 	struct bmac_data *bp = netdev_priv(dev);
diff --git a/drivers/net/ethernet/apple/mace.c b/drivers/net/ethernet/apple/mace.c
index 0b5429d76bcf..68b9ee489489 100644
--- a/drivers/net/ethernet/apple/mace.c
+++ b/drivers/net/ethernet/apple/mace.c
@@ -78,7 +78,7 @@ struct mace_data {
 
 static int mace_open(struct net_device *dev);
 static int mace_close(struct net_device *dev);
-static int mace_xmit_start(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t mace_xmit_start(struct sk_buff *skb, struct net_device *dev);
 static void mace_set_multicast(struct net_device *dev);
 static void mace_reset(struct net_device *dev);
 static int mace_set_address(struct net_device *dev, void *addr);
@@ -525,7 +525,7 @@ static inline void mace_set_timeout(struct net_device *dev)
     mp->timeout_active = 1;
 }
 
-static int mace_xmit_start(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t mace_xmit_start(struct sk_buff *skb, struct net_device *dev)
 {
     struct mace_data *mp = netdev_priv(dev);
     volatile struct dbdma_regs __iomem *td = mp->tx_dma;
diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c
index 137cbb470af2..376f2c2613e7 100644
--- a/drivers/net/ethernet/apple/macmace.c
+++ b/drivers/net/ethernet/apple/macmace.c
@@ -89,7 +89,7 @@ struct mace_frame {
 
 static int mace_open(struct net_device *dev);
 static int mace_close(struct net_device *dev);
-static int mace_xmit_start(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t mace_xmit_start(struct sk_buff *skb, struct net_device *dev);
 static void mace_set_multicast(struct net_device *dev);
 static int mace_set_address(struct net_device *dev, void *addr);
 static void mace_reset(struct net_device *dev);
@@ -444,7 +444,7 @@ static int mace_close(struct net_device *dev)
  * Transmit a frame
  */
 
-static int mace_xmit_start(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t mace_xmit_start(struct sk_buff *skb, struct net_device *dev)
 {
 	struct mace_data *mp = netdev_priv(dev);
 	unsigned long flags;

From 83fe9a966111b51a34f10c35e568e45bff34de48 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 21 Sep 2018 11:07:55 +0300
Subject: [PATCH 174/499] devlink: double free in devlink_resource_fill()

Smatch reports that devlink_dpipe_send_and_alloc_skb() frees the skb
on error so this is a double free.  We fixed a bunch of these bugs in
commit 7fe4d6dcbcb4 ("devlink: Remove redundant free on error path") but
we accidentally overlooked this one.

Fixes: d9f9b9a4d05f ("devlink: Add support for resource abstraction")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/devlink.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/core/devlink.c b/net/core/devlink.c
index 65fc366a78a4..8c0ed225e280 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2592,7 +2592,7 @@ static int devlink_resource_fill(struct genl_info *info,
 	if (!nlh) {
 		err = devlink_dpipe_send_and_alloc_skb(&skb, info);
 		if (err)
-			goto err_skb_send_alloc;
+			return err;
 		goto send_done;
 	}
 	return genlmsg_reply(skb, info);
@@ -2600,7 +2600,6 @@ static int devlink_resource_fill(struct genl_info *info,
 nla_put_failure:
 	err = -EMSGSIZE;
 err_resource_put:
-err_skb_send_alloc:
 	nlmsg_free(skb);
 	return err;
 }

From 8ac1ee6f4d62e781e3b3fd8b9c42b70371427669 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Fri, 21 Sep 2018 02:44:12 -0700
Subject: [PATCH 175/499] net/mlx4: Use cpumask_available for eq->affinity_mask

Clang warns that the address of a pointer will always evaluated as true
in a boolean context:

drivers/net/ethernet/mellanox/mlx4/eq.c:243:11: warning: address of
array 'eq->affinity_mask' will always evaluate to 'true'
[-Wpointer-bool-conversion]
        if (!eq->affinity_mask || cpumask_empty(eq->affinity_mask))
            ~~~~~^~~~~~~~~~~~~
1 warning generated.

Use cpumask_available, introduced in commit f7e30f01a9e2 ("cpumask: Add
helper cpumask_available()"), which does the proper checking and avoids
this warning.

Link: https://github.com/ClangBuiltLinux/linux/issues/86
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/eq.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 1f3372c1802e..2df92dbd38e1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -240,7 +240,8 @@ static void mlx4_set_eq_affinity_hint(struct mlx4_priv *priv, int vec)
 	struct mlx4_dev *dev = &priv->dev;
 	struct mlx4_eq *eq = &priv->eq_table.eq[vec];
 
-	if (!eq->affinity_mask || cpumask_empty(eq->affinity_mask))
+	if (!cpumask_available(eq->affinity_mask) ||
+	    cpumask_empty(eq->affinity_mask))
 		return;
 
 	hint_err = irq_set_affinity_hint(eq->irq, eq->affinity_mask);

From 8360ed6745df6de2d8ddff8e2116789258fe5890 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Fri, 21 Sep 2018 11:04:51 -0700
Subject: [PATCH 176/499] RDS: IB: Use DEFINE_PER_CPU_SHARED_ALIGNED for
 rds_ib_stats

Clang warns when two declarations' section attributes don't match.

net/rds/ib_stats.c:40:1: warning: section does not match previous
declaration [-Wsection]
DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_ib_statistics, rds_ib_stats);
^
./include/linux/percpu-defs.h:142:2: note: expanded from macro
'DEFINE_PER_CPU_SHARED_ALIGNED'
        DEFINE_PER_CPU_SECTION(type, name,
PER_CPU_SHARED_ALIGNED_SECTION) \
        ^
./include/linux/percpu-defs.h:93:9: note: expanded from macro
'DEFINE_PER_CPU_SECTION'
        extern __PCPU_ATTRS(sec) __typeof__(type) name;
\
               ^
./include/linux/percpu-defs.h:49:26: note: expanded from macro
'__PCPU_ATTRS'
        __percpu __attribute__((section(PER_CPU_BASE_SECTION sec)))
\
                                ^
net/rds/ib.h:446:1: note: previous attribute is here
DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats);
^
./include/linux/percpu-defs.h:111:2: note: expanded from macro
'DECLARE_PER_CPU'
        DECLARE_PER_CPU_SECTION(type, name, "")
        ^
./include/linux/percpu-defs.h:87:9: note: expanded from macro
'DECLARE_PER_CPU_SECTION'
        extern __PCPU_ATTRS(sec) __typeof__(type) name
               ^
./include/linux/percpu-defs.h:49:26: note: expanded from macro
'__PCPU_ATTRS'
        __percpu __attribute__((section(PER_CPU_BASE_SECTION sec)))
\
                                ^
1 warning generated.

The initial definition was added in commit ec16227e1414 ("RDS/IB:
Infiniband transport") and the cache aligned definition was added in
commit e6babe4cc4ce ("RDS/IB: Stats and sysctls") right after. The
definition probably should have been updated in net/rds/ib.h, which is
what this patch does.

Link: https://github.com/ClangBuiltLinux/linux/issues/114
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/rds/ib.h b/net/rds/ib.h
index 73427ff439f9..fd483760c910 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -443,7 +443,7 @@ int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted,
 int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op);
 
 /* ib_stats.c */
-DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats);
+DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_ib_statistics, rds_ib_stats);
 #define rds_ib_stats_inc(member) rds_stats_inc_which(rds_ib_stats, member)
 #define rds_ib_stats_add(member, count) \
 		rds_stats_add_which(rds_ib_stats, member, count)

From 41c9b1be335b5afc3b5fb71c5d16f9d5939cd13f Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Mon, 10 Sep 2018 16:18:29 -0700
Subject: [PATCH 177/499] device-dax: Add missing address_space_operations

With address_space_operations missing for device dax, namely the
.set_page_dirty, we hit a kernel warning when running destructive
ndctl unit test: make TESTS=device-dax check

WARNING: CPU: 3 PID: 7380 at fs/buffer.c:581 __set_page_dirty+0xb1/0xc0

Setting address_space_operations to noop_set_page_dirty and
noop_invalidatepage for device dax to prevent fallback to
__set_page_dirty_buffers() and block_invalidatepage() respectively.

Fixes: 2232c6382a ("device-dax: Enable page_mapping()")

Acked-by: Jeff Moyer <jmoyer@redhat.com>
Reported-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/device.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index bbe4d72ca105..948806e57cee 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -535,6 +535,11 @@ static unsigned long dax_get_unmapped_area(struct file *filp,
 	return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
 }
 
+static const struct address_space_operations dev_dax_aops = {
+	.set_page_dirty		= noop_set_page_dirty,
+	.invalidatepage		= noop_invalidatepage,
+};
+
 static int dax_open(struct inode *inode, struct file *filp)
 {
 	struct dax_device *dax_dev = inode_dax(inode);
@@ -544,6 +549,7 @@ static int dax_open(struct inode *inode, struct file *filp)
 	dev_dbg(&dev_dax->dev, "trace\n");
 	inode->i_mapping = __dax_inode->i_mapping;
 	inode->i_mapping->host = __dax_inode;
+	inode->i_mapping->a_ops = &dev_dax_aops;
 	filp->f_mapping = inode->i_mapping;
 	filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
 	filp->private_data = dev_dax;

From 474ff2600889e16280dbc6ada8bfecb216169a70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= <maze@google.com>
Date: Sat, 22 Sep 2018 01:34:01 -0700
Subject: [PATCH 178/499] net-ethtool: ETHTOOL_GUFO did not and should not
 require CAP_NET_ADMIN
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

So it should not fail with EPERM even though it is no longer implemented...

This is a fix for:
  (userns)$ egrep ^Cap /proc/self/status
  CapInh: 0000003fffffffff
  CapPrm: 0000003fffffffff
  CapEff: 0000003fffffffff
  CapBnd: 0000003fffffffff
  CapAmb: 0000003fffffffff

  (userns)$ tcpdump -i usb_rndis0
  tcpdump: WARNING: usb_rndis0: SIOCETHTOOL(ETHTOOL_GUFO) ioctl failed: Operation not permitted
  Warning: Kernel filter failed: Bad file descriptor
  tcpdump: can't remove kernel filter: Bad file descriptor

With this change it returns EOPNOTSUPP instead of EPERM.

See also https://github.com/the-tcpdump-group/libpcap/issues/689

Fixes: 08a00fea6de2 "net: Remove references to NETIF_F_UFO from ethtool."
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index c9993c6c2fd4..234a0ec2e932 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -2624,6 +2624,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GPHYSTATS:
 	case ETHTOOL_GTSO:
 	case ETHTOOL_GPERMADDR:
+	case ETHTOOL_GUFO:
 	case ETHTOOL_GGSO:
 	case ETHTOOL_GGRO:
 	case ETHTOOL_GFLAGS:

From a4986b08287e0868fad6dd1b6e8ed840b2f1003f Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@bootlin.com>
Date: Wed, 19 Sep 2018 08:58:02 +0200
Subject: [PATCH 179/499] MAINTAINERS: update the Annapurna Labs maintainer
 email

Free Electrons became Bootlin. Update my email accordingly.

Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index a5b256b25905..d8f42ec6d1c0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1251,7 +1251,7 @@ N:	meson
 
 ARM/Annapurna Labs ALPINE ARCHITECTURE
 M:	Tsahee Zidenberg <tsahee@annapurnalabs.com>
-M:	Antoine Tenart <antoine.tenart@free-electrons.com>
+M:	Antoine Tenart <antoine.tenart@bootlin.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/mach-alpine/

From 16fdf8ba98391650ce4bc4f3f71629d8a413bc21 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 23 Sep 2018 12:25:15 -0700
Subject: [PATCH 180/499] rds: Fix build regression.

Use DECLARE_* not DEFINE_*

Fixes: 8360ed6745df ("RDS: IB: Use DEFINE_PER_CPU_SHARED_ALIGNED for rds_ib_stats")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/rds/ib.h b/net/rds/ib.h
index fd483760c910..71ff356ee702 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -443,7 +443,7 @@ int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted,
 int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op);
 
 /* ib_stats.c */
-DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_ib_statistics, rds_ib_stats);
+DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_ib_statistics, rds_ib_stats);
 #define rds_ib_stats_inc(member) rds_stats_inc_which(rds_ib_stats, member)
 #define rds_ib_stats_add(member, count) \
 		rds_stats_add_which(rds_ib_stats, member, count)

From 4451d3f59f2a6f95e5d205c2d04ea072955d080d Mon Sep 17 00:00:00 2001
From: Tao Ren <taoren@fb.com>
Date: Wed, 19 Sep 2018 15:13:31 -0700
Subject: [PATCH 181/499] clocksource/drivers/fttmr010: Fix set_next_event
 handler

Currently, the aspeed MATCH1 register is updated to <current_count -
cycles> in set_next_event handler, with the assumption that COUNT
register value is preserved when the timer is disabled and it continues
decrementing after the timer is enabled. But the assumption is wrong:
RELOAD register is loaded into COUNT register when the aspeed timer is
enabled, which means the next event may be delayed because timer
interrupt won't be generated until <0xFFFFFFFF - current_count +
cycles>.

The problem can be fixed by updating RELOAD register to <cycles>, and
COUNT register will be re-loaded when the timer is enabled and interrupt
is generated when COUNT register overflows.

The test result on Facebook Backpack-CMM BMC hardware (AST2500) shows
the issue is fixed: without the patch, usleep(100) suspends the process
for several milliseconds (and sometimes even over 40 milliseconds);
after applying the fix, usleep(100) takes averagely 240 microseconds to
return under the same workload level.

Signed-off-by: Tao Ren <taoren@fb.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Lei YU <mine260309@gmail.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/timer-fttmr010.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/clocksource/timer-fttmr010.c b/drivers/clocksource/timer-fttmr010.c
index c020038ebfab..cf93f6419b51 100644
--- a/drivers/clocksource/timer-fttmr010.c
+++ b/drivers/clocksource/timer-fttmr010.c
@@ -130,13 +130,17 @@ static int fttmr010_timer_set_next_event(unsigned long cycles,
 	cr &= ~fttmr010->t1_enable_val;
 	writel(cr, fttmr010->base + TIMER_CR);
 
-	/* Setup the match register forward/backward in time */
-	cr = readl(fttmr010->base + TIMER1_COUNT);
-	if (fttmr010->count_down)
-		cr -= cycles;
-	else
-		cr += cycles;
-	writel(cr, fttmr010->base + TIMER1_MATCH1);
+	if (fttmr010->count_down) {
+		/*
+		 * ASPEED Timer Controller will load TIMER1_LOAD register
+		 * into TIMER1_COUNT register when the timer is re-enabled.
+		 */
+		writel(cycles, fttmr010->base + TIMER1_LOAD);
+	} else {
+		/* Setup the match register forward in time */
+		cr = readl(fttmr010->base + TIMER1_COUNT);
+		writel(cr + cycles, fttmr010->base + TIMER1_MATCH1);
+	}
 
 	/* Start */
 	cr = readl(fttmr010->base + TIMER_CR);

From ac3d9dd034e565df2c034ab2ca71f0a9f69153c1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Sep 2018 15:27:38 -0700
Subject: [PATCH 182/499] netpoll: make ndo_poll_controller() optional

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

It seems that all networking drivers that do use NAPI
for their TX completions, should not provide a ndo_poll_controller().

NAPI drivers have netpoll support already handled
in core networking stack, since netpoll_poll_dev()
uses poll_napi(dev) to iterate through registered
NAPI contexts for a device.

This patch allows netpoll_poll_dev() to process NAPI
contexts even for drivers not providing ndo_poll_controller(),
allowing for following patches in NAPI drivers.

Also we export netpoll_poll_dev() so that it can be called
by bonding/team drivers in following patches.

Reported-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Tested-by: Song Liu <songliubraving@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h |  5 +++--
 net/core/netpoll.c      | 19 +++++++------------
 2 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 67662d01130a..3ef82d3a78db 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -49,8 +49,9 @@ struct netpoll_info {
 };
 
 #ifdef CONFIG_NETPOLL
-extern void netpoll_poll_disable(struct net_device *dev);
-extern void netpoll_poll_enable(struct net_device *dev);
+void netpoll_poll_dev(struct net_device *dev);
+void netpoll_poll_disable(struct net_device *dev);
+void netpoll_poll_enable(struct net_device *dev);
 #else
 static inline void netpoll_poll_disable(struct net_device *dev) { return; }
 static inline void netpoll_poll_enable(struct net_device *dev) { return; }
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 57557a6a950c..3219a2932463 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -187,16 +187,16 @@ static void poll_napi(struct net_device *dev)
 	}
 }
 
-static void netpoll_poll_dev(struct net_device *dev)
+void netpoll_poll_dev(struct net_device *dev)
 {
-	const struct net_device_ops *ops;
 	struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
+	const struct net_device_ops *ops;
 
 	/* Don't do any rx activity if the dev_lock mutex is held
 	 * the dev_open/close paths use this to block netpoll activity
 	 * while changing device state
 	 */
-	if (down_trylock(&ni->dev_lock))
+	if (!ni || down_trylock(&ni->dev_lock))
 		return;
 
 	if (!netif_running(dev)) {
@@ -205,13 +205,8 @@ static void netpoll_poll_dev(struct net_device *dev)
 	}
 
 	ops = dev->netdev_ops;
-	if (!ops->ndo_poll_controller) {
-		up(&ni->dev_lock);
-		return;
-	}
-
-	/* Process pending work on NIC */
-	ops->ndo_poll_controller(dev);
+	if (ops->ndo_poll_controller)
+		ops->ndo_poll_controller(dev);
 
 	poll_napi(dev);
 
@@ -219,6 +214,7 @@ static void netpoll_poll_dev(struct net_device *dev)
 
 	zap_completion_queue();
 }
+EXPORT_SYMBOL(netpoll_poll_dev);
 
 void netpoll_poll_disable(struct net_device *dev)
 {
@@ -613,8 +609,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
 	strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
 	INIT_WORK(&np->cleanup_work, netpoll_async_cleanup);
 
-	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
-	    !ndev->netdev_ops->ndo_poll_controller) {
+	if (ndev->priv_flags & IFF_DISABLE_NETPOLL) {
 		np_err(np, "%s doesn't support polling, aborting\n",
 		       np->dev_name);
 		err = -ENOTSUPP;

From 93f62ad5e83a13e0c224dfca5ef40f90c09aad51 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Sep 2018 15:27:39 -0700
Subject: [PATCH 183/499] bonding: use netpoll_poll_dev() helper

We want to allow NAPI drivers to no longer provide
ndo_poll_controller() method, as it has been proven problematic.

team driver must not look at its presence, but instead call
netpoll_poll_dev() which factorize the needed actions.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jay Vosburgh <j.vosburgh@gmail.com>
Cc: Veaceslav Falico <vfalico@gmail.com>
Cc: Andy Gospodarek <andy@greyhouse.net>
Acked-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index a764a83f99da..0d87e11e7f1d 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -971,16 +971,13 @@ static void bond_poll_controller(struct net_device *bond_dev)
 	struct slave *slave = NULL;
 	struct list_head *iter;
 	struct ad_info ad_info;
-	struct netpoll_info *ni;
-	const struct net_device_ops *ops;
 
 	if (BOND_MODE(bond) == BOND_MODE_8023AD)
 		if (bond_3ad_get_active_agg_info(bond, &ad_info))
 			return;
 
 	bond_for_each_slave_rcu(bond, slave, iter) {
-		ops = slave->dev->netdev_ops;
-		if (!bond_slave_is_up(slave) || !ops->ndo_poll_controller)
+		if (!bond_slave_is_up(slave))
 			continue;
 
 		if (BOND_MODE(bond) == BOND_MODE_8023AD) {
@@ -992,11 +989,7 @@ static void bond_poll_controller(struct net_device *bond_dev)
 				continue;
 		}
 
-		ni = rcu_dereference_bh(slave->dev->npinfo);
-		if (down_trylock(&ni->dev_lock))
-			continue;
-		ops->ndo_poll_controller(slave->dev);
-		up(&ni->dev_lock);
+		netpoll_poll_dev(slave->dev);
 	}
 }
 

From b80e71a986c2ab5677dc6b84923cd7030b690800 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Sep 2018 15:27:40 -0700
Subject: [PATCH 184/499] ixgbe: remove ndo_poll_controller

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

ixgbe uses NAPI for TX completions, so we better let core
networking stack call the napi->poll() to avoid the capture.

Reported-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Tested-by: Song Liu <songliubraving@fb.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 25 -------------------
 1 file changed, 25 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 9a23d33a47ed..f27d73a7bf16 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -8768,28 +8768,6 @@ static int ixgbe_del_sanmac_netdev(struct net_device *dev)
 	return err;
 }
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-/*
- * Polling 'interrupt' - used by things like netconsole to send skbs
- * without having to re-enable interrupts. It's not called while
- * the interrupt routine is executing.
- */
-static void ixgbe_netpoll(struct net_device *netdev)
-{
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	int i;
-
-	/* if interface is down do nothing */
-	if (test_bit(__IXGBE_DOWN, &adapter->state))
-		return;
-
-	/* loop through and schedule all active queues */
-	for (i = 0; i < adapter->num_q_vectors; i++)
-		ixgbe_msix_clean_rings(0, adapter->q_vector[i]);
-}
-
-#endif
-
 static void ixgbe_get_ring_stats64(struct rtnl_link_stats64 *stats,
 				   struct ixgbe_ring *ring)
 {
@@ -10251,9 +10229,6 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_get_vf_config	= ixgbe_ndo_get_vf_config,
 	.ndo_get_stats64	= ixgbe_get_stats64,
 	.ndo_setup_tc		= __ixgbe_setup_tc,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller	= ixgbe_netpoll,
-#endif
 #ifdef IXGBE_FCOE
 	.ndo_select_queue	= ixgbe_select_queue,
 	.ndo_fcoe_ddp_setup = ixgbe_fcoe_ddp_get,

From 6f5d941ebadeb1150530f205af0eb5cf5bfeb219 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Sep 2018 15:27:41 -0700
Subject: [PATCH 185/499] ixgbevf: remove ndo_poll_controller

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

ixgbevf uses NAPI for TX completions, so we better let core
networking stack call the napi->poll() to avoid the capture.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 21 -------------------
 1 file changed, 21 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index d86446d202d5..5a228582423b 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -4233,24 +4233,6 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
 	return 0;
 }
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-/* Polling 'interrupt' - used by things like netconsole to send skbs
- * without having to re-enable interrupts. It's not called while
- * the interrupt routine is executing.
- */
-static void ixgbevf_netpoll(struct net_device *netdev)
-{
-	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
-	int i;
-
-	/* if interface is down do nothing */
-	if (test_bit(__IXGBEVF_DOWN, &adapter->state))
-		return;
-	for (i = 0; i < adapter->num_rx_queues; i++)
-		ixgbevf_msix_clean_rings(0, adapter->q_vector[i]);
-}
-#endif /* CONFIG_NET_POLL_CONTROLLER */
-
 static int ixgbevf_suspend(struct pci_dev *pdev, pm_message_t state)
 {
 	struct net_device *netdev = pci_get_drvdata(pdev);
@@ -4482,9 +4464,6 @@ static const struct net_device_ops ixgbevf_netdev_ops = {
 	.ndo_tx_timeout		= ixgbevf_tx_timeout,
 	.ndo_vlan_rx_add_vid	= ixgbevf_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= ixgbevf_vlan_rx_kill_vid,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller	= ixgbevf_netpoll,
-#endif
 	.ndo_features_check	= ixgbevf_features_check,
 	.ndo_bpf		= ixgbevf_xdp,
 };

From dda9d57e2d4245c0d8e309ee7399bf2ebfca64ae Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Sep 2018 15:27:42 -0700
Subject: [PATCH 186/499] fm10k: remove ndo_poll_controller

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
lasts for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

fm10k uses NAPI for TX completions, so we better let core
networking stack call the napi->poll() to avoid the capture.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/fm10k/fm10k.h      |  3 ---
 .../net/ethernet/intel/fm10k/fm10k_netdev.c   |  3 ---
 drivers/net/ethernet/intel/fm10k/fm10k_pci.c  | 22 -------------------
 3 files changed, 28 deletions(-)

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h
index a903a0ba45e1..7d42582ed48d 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k.h
@@ -504,9 +504,6 @@ void fm10k_update_stats(struct fm10k_intfc *interface);
 void fm10k_service_event_schedule(struct fm10k_intfc *interface);
 void fm10k_macvlan_schedule(struct fm10k_intfc *interface);
 void fm10k_update_rx_drop_en(struct fm10k_intfc *interface);
-#ifdef CONFIG_NET_POLL_CONTROLLER
-void fm10k_netpoll(struct net_device *netdev);
-#endif
 
 /* Netdev */
 struct net_device *fm10k_alloc_netdev(const struct fm10k_info *info);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 929f538d28bc..538a8467f434 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -1648,9 +1648,6 @@ static const struct net_device_ops fm10k_netdev_ops = {
 	.ndo_udp_tunnel_del	= fm10k_udp_tunnel_del,
 	.ndo_dfwd_add_station	= fm10k_dfwd_add_station,
 	.ndo_dfwd_del_station	= fm10k_dfwd_del_station,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller	= fm10k_netpoll,
-#endif
 	.ndo_features_check	= fm10k_features_check,
 };
 
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index 15071e4adb98..c859ababeed5 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -1210,28 +1210,6 @@ static irqreturn_t fm10k_msix_mbx_vf(int __always_unused irq, void *data)
 	return IRQ_HANDLED;
 }
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-/**
- *  fm10k_netpoll - A Polling 'interrupt' handler
- *  @netdev: network interface device structure
- *
- *  This is used by netconsole to send skbs without having to re-enable
- *  interrupts. It's not called while the normal interrupt routine is executing.
- **/
-void fm10k_netpoll(struct net_device *netdev)
-{
-	struct fm10k_intfc *interface = netdev_priv(netdev);
-	int i;
-
-	/* if interface is down do nothing */
-	if (test_bit(__FM10K_DOWN, interface->state))
-		return;
-
-	for (i = 0; i < interface->num_q_vectors; i++)
-		fm10k_msix_clean_rings(0, interface->q_vector[i]);
-}
-
-#endif
 #define FM10K_ERR_MSG(type) case (type): error = #type; break
 static void fm10k_handle_fault(struct fm10k_intfc *interface, int type,
 			       struct fm10k_fault *fault)

From 2753166e4be9df4677745a7f7d9d8daa94938a08 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Sep 2018 15:27:43 -0700
Subject: [PATCH 187/499] ixgb: remove ndo_poll_controller

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

ixgb uses NAPI for TX completions, so we better let core
networking stack call the napi->poll() to avoid the capture.

This also removes a problematic use of disable_irq() in
a context it is forbidden, as explained in commit
af3e0fcf7887 ("8139too: Use disable_irq_nosync() in
rtl8139_poll_controller()")

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/ixgb/ixgb_main.c | 25 ---------------------
 1 file changed, 25 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
index d3e72d0f66ef..7722153c4ac2 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
@@ -81,11 +81,6 @@ static int ixgb_vlan_rx_kill_vid(struct net_device *netdev,
 				 __be16 proto, u16 vid);
 static void ixgb_restore_vlan(struct ixgb_adapter *adapter);
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-/* for netdump / net console */
-static void ixgb_netpoll(struct net_device *dev);
-#endif
-
 static pci_ers_result_t ixgb_io_error_detected (struct pci_dev *pdev,
                              enum pci_channel_state state);
 static pci_ers_result_t ixgb_io_slot_reset (struct pci_dev *pdev);
@@ -348,9 +343,6 @@ static const struct net_device_ops ixgb_netdev_ops = {
 	.ndo_tx_timeout		= ixgb_tx_timeout,
 	.ndo_vlan_rx_add_vid	= ixgb_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= ixgb_vlan_rx_kill_vid,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller	= ixgb_netpoll,
-#endif
 	.ndo_fix_features       = ixgb_fix_features,
 	.ndo_set_features       = ixgb_set_features,
 };
@@ -2195,23 +2187,6 @@ ixgb_restore_vlan(struct ixgb_adapter *adapter)
 		ixgb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid);
 }
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-/*
- * Polling 'interrupt' - used by things like netconsole to send skbs
- * without having to re-enable interrupts. It's not called while
- * the interrupt routine is executing.
- */
-
-static void ixgb_netpoll(struct net_device *dev)
-{
-	struct ixgb_adapter *adapter = netdev_priv(dev);
-
-	disable_irq(adapter->pdev->irq);
-	ixgb_intr(adapter->pdev->irq, dev);
-	enable_irq(adapter->pdev->irq);
-}
-#endif
-
 /**
  * ixgb_io_error_detected - called when PCI error is detected
  * @pdev:    pointer to pci device with error

From 0542997edece2d75e0bf3e173dfddf1b2b3ef756 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Sep 2018 15:27:44 -0700
Subject: [PATCH 188/499] igb: remove ndo_poll_controller

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

igb uses NAPI for TX completions, so we better let core
networking stack call the napi->poll() to avoid the capture.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 30 -----------------------
 1 file changed, 30 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index a32c576c1e65..0796cef96fa3 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -205,10 +205,6 @@ static struct notifier_block dca_notifier = {
 	.priority	= 0
 };
 #endif
-#ifdef CONFIG_NET_POLL_CONTROLLER
-/* for netdump / net console */
-static void igb_netpoll(struct net_device *);
-#endif
 #ifdef CONFIG_PCI_IOV
 static unsigned int max_vfs;
 module_param(max_vfs, uint, 0);
@@ -2881,9 +2877,6 @@ static const struct net_device_ops igb_netdev_ops = {
 	.ndo_set_vf_spoofchk	= igb_ndo_set_vf_spoofchk,
 	.ndo_set_vf_trust	= igb_ndo_set_vf_trust,
 	.ndo_get_vf_config	= igb_ndo_get_vf_config,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller	= igb_netpoll,
-#endif
 	.ndo_fix_features	= igb_fix_features,
 	.ndo_set_features	= igb_set_features,
 	.ndo_fdb_add		= igb_ndo_fdb_add,
@@ -9053,29 +9046,6 @@ static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs)
 	return 0;
 }
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-/* Polling 'interrupt' - used by things like netconsole to send skbs
- * without having to re-enable interrupts. It's not called while
- * the interrupt routine is executing.
- */
-static void igb_netpoll(struct net_device *netdev)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct e1000_hw *hw = &adapter->hw;
-	struct igb_q_vector *q_vector;
-	int i;
-
-	for (i = 0; i < adapter->num_q_vectors; i++) {
-		q_vector = adapter->q_vector[i];
-		if (adapter->flags & IGB_FLAG_HAS_MSIX)
-			wr32(E1000_EIMC, q_vector->eims_value);
-		else
-			igb_irq_disable(adapter);
-		napi_schedule(&q_vector->napi);
-	}
-}
-#endif /* CONFIG_NET_POLL_CONTROLLER */
-
 /**
  *  igb_io_error_detected - called when PCI error is detected
  *  @pdev: Pointer to PCI device

From 158a08a694c4ecf9574fe452db24a6b4943853aa Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Sep 2018 15:27:45 -0700
Subject: [PATCH 189/499] ice: remove ndo_poll_controller

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

ice uses NAPI for TX completions, so we better let core
networking stack call the napi->poll() to avoid the capture.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 27 -----------------------
 1 file changed, 27 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index f1e80eed2fd6..3f047bb43348 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -4806,30 +4806,6 @@ void ice_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
 	stats->rx_length_errors = vsi_stats->rx_length_errors;
 }
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-/**
- * ice_netpoll - polling "interrupt" handler
- * @netdev: network interface device structure
- *
- * Used by netconsole to send skbs without having to re-enable interrupts.
- * This is not called in the normal interrupt path.
- */
-static void ice_netpoll(struct net_device *netdev)
-{
-	struct ice_netdev_priv *np = netdev_priv(netdev);
-	struct ice_vsi *vsi = np->vsi;
-	struct ice_pf *pf = vsi->back;
-	int i;
-
-	if (test_bit(__ICE_DOWN, vsi->state) ||
-	    !test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
-		return;
-
-	for (i = 0; i < vsi->num_q_vectors; i++)
-		ice_msix_clean_rings(0, vsi->q_vectors[i]);
-}
-#endif /* CONFIG_NET_POLL_CONTROLLER */
-
 /**
  * ice_napi_disable_all - Disable NAPI for all q_vectors in the VSI
  * @vsi: VSI having NAPI disabled
@@ -5497,9 +5473,6 @@ static const struct net_device_ops ice_netdev_ops = {
 	.ndo_validate_addr = eth_validate_addr,
 	.ndo_change_mtu = ice_change_mtu,
 	.ndo_get_stats64 = ice_get_stats64,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller = ice_netpoll,
-#endif /* CONFIG_NET_POLL_CONTROLLER */
 	.ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid,
 	.ndo_set_features = ice_set_features,

From 1aa28fb98368078bcaf527bf46c0e001db934414 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Sep 2018 15:27:46 -0700
Subject: [PATCH 190/499] i40evf: remove ndo_poll_controller

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

i40evf uses NAPI for TX completions, so we better let core
networking stack call the napi->poll() to avoid the capture.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/intel/i40evf/i40evf_main.c   | 26 -------------------
 1 file changed, 26 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 5906c1c1d19d..fef6d892ed4c 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -396,29 +396,6 @@ static void i40evf_map_rings_to_vectors(struct i40evf_adapter *adapter)
 	adapter->aq_required |= I40EVF_FLAG_AQ_MAP_VECTORS;
 }
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-/**
- * i40evf_netpoll - A Polling 'interrupt' handler
- * @netdev: network interface device structure
- *
- * This is used by netconsole to send skbs without having to re-enable
- * interrupts.  It's not called while the normal interrupt routine is executing.
- **/
-static void i40evf_netpoll(struct net_device *netdev)
-{
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	int q_vectors = adapter->num_msix_vectors - NONQ_VECS;
-	int i;
-
-	/* if interface is down do nothing */
-	if (test_bit(__I40E_VSI_DOWN, adapter->vsi.state))
-		return;
-
-	for (i = 0; i < q_vectors; i++)
-		i40evf_msix_clean_rings(0, &adapter->q_vectors[i]);
-}
-
-#endif
 /**
  * i40evf_irq_affinity_notify - Callback for affinity changes
  * @notify: context as to what irq was changed
@@ -3229,9 +3206,6 @@ static const struct net_device_ops i40evf_netdev_ops = {
 	.ndo_features_check	= i40evf_features_check,
 	.ndo_fix_features	= i40evf_fix_features,
 	.ndo_set_features	= i40evf_set_features,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller	= i40evf_netpoll,
-#endif
 	.ndo_setup_tc		= i40evf_setup_tc,
 };
 

From a24b66c249f71dde11da5ba3b4c397a42ece3fc9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Sep 2018 15:27:47 -0700
Subject: [PATCH 191/499] mlx4: remove ndo_poll_controller

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

mlx4 uses NAPI for TX completions, so we better let core
networking stack call the napi->poll() to avoid the capture.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlx4/en_netdev.c    | 20 -------------------
 1 file changed, 20 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 6785661d1a72..fe49384eba48 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1286,20 +1286,6 @@ static void mlx4_en_do_set_rx_mode(struct work_struct *work)
 	mutex_unlock(&mdev->state_lock);
 }
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void mlx4_en_netpoll(struct net_device *dev)
-{
-	struct mlx4_en_priv *priv = netdev_priv(dev);
-	struct mlx4_en_cq *cq;
-	int i;
-
-	for (i = 0; i < priv->tx_ring_num[TX]; i++) {
-		cq = priv->tx_cq[TX][i];
-		napi_schedule(&cq->napi);
-	}
-}
-#endif
-
 static int mlx4_en_set_rss_steer_rules(struct mlx4_en_priv *priv)
 {
 	u64 reg_id;
@@ -2946,9 +2932,6 @@ static const struct net_device_ops mlx4_netdev_ops = {
 	.ndo_tx_timeout		= mlx4_en_tx_timeout,
 	.ndo_vlan_rx_add_vid	= mlx4_en_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= mlx4_en_vlan_rx_kill_vid,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller	= mlx4_en_netpoll,
-#endif
 	.ndo_set_features	= mlx4_en_set_features,
 	.ndo_fix_features	= mlx4_en_fix_features,
 	.ndo_setup_tc		= __mlx4_en_setup_tc,
@@ -2983,9 +2966,6 @@ static const struct net_device_ops mlx4_netdev_ops_master = {
 	.ndo_set_vf_link_state	= mlx4_en_set_vf_link_state,
 	.ndo_get_vf_stats       = mlx4_en_get_vf_stats,
 	.ndo_get_vf_config	= mlx4_en_get_vf_config,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller	= mlx4_en_netpoll,
-#endif
 	.ndo_set_features	= mlx4_en_set_features,
 	.ndo_fix_features	= mlx4_en_fix_features,
 	.ndo_setup_tc		= __mlx4_en_setup_tc,

From 9c29bcd189f4ab1644b7125713602532d0aefdb7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Sep 2018 15:27:48 -0700
Subject: [PATCH 192/499] mlx5: remove ndo_poll_controller

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

mlx5 uses NAPI for TX completions, so we better let core
networking stack call the napi->poll() to avoid the capture.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5a7939e70190..54118b77dc1f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4315,22 +4315,6 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 	}
 }
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-/* Fake "interrupt" called by netpoll (eg netconsole) to send skbs without
- * reenabling interrupts.
- */
-static void mlx5e_netpoll(struct net_device *dev)
-{
-	struct mlx5e_priv *priv = netdev_priv(dev);
-	struct mlx5e_channels *chs = &priv->channels;
-
-	int i;
-
-	for (i = 0; i < chs->num; i++)
-		napi_schedule(&chs->c[i]->napi);
-}
-#endif
-
 static const struct net_device_ops mlx5e_netdev_ops = {
 	.ndo_open                = mlx5e_open,
 	.ndo_stop                = mlx5e_close,
@@ -4356,9 +4340,6 @@ static const struct net_device_ops mlx5e_netdev_ops = {
 #ifdef CONFIG_MLX5_EN_ARFS
 	.ndo_rx_flow_steer	 = mlx5e_rx_flow_steer,
 #endif
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller     = mlx5e_netpoll,
-#endif
 #ifdef CONFIG_MLX5_ESWITCH
 	/* SRIOV E-Switch NDOs */
 	.ndo_set_vf_mac          = mlx5e_set_vf_mac,

From d8ea6a91ad70ae0881d76446df27d3f58fd478c3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Sep 2018 15:27:49 -0700
Subject: [PATCH 193/499] bnx2x: remove ndo_poll_controller

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

bnx2x uses NAPI for TX completions, so we better let core
networking stack call the napi->poll() to avoid the capture.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 71362b7f6040..fcc2328bb0d9 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -12894,19 +12894,6 @@ static int bnx2x_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	}
 }
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void poll_bnx2x(struct net_device *dev)
-{
-	struct bnx2x *bp = netdev_priv(dev);
-	int i;
-
-	for_each_eth_queue(bp, i) {
-		struct bnx2x_fastpath *fp = &bp->fp[i];
-		napi_schedule(&bnx2x_fp(bp, fp->index, napi));
-	}
-}
-#endif
-
 static int bnx2x_validate_addr(struct net_device *dev)
 {
 	struct bnx2x *bp = netdev_priv(dev);
@@ -13113,9 +13100,6 @@ static const struct net_device_ops bnx2x_netdev_ops = {
 	.ndo_tx_timeout		= bnx2x_tx_timeout,
 	.ndo_vlan_rx_add_vid	= bnx2x_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= bnx2x_vlan_rx_kill_vid,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller	= poll_bnx2x,
-#endif
 	.ndo_setup_tc		= __bnx2x_setup_tc,
 #ifdef CONFIG_BNX2X_SRIOV
 	.ndo_set_vf_mac		= bnx2x_set_vf_mac,

From 58e0e22bff638055278ea73e34d0d07a95260790 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Sep 2018 15:27:50 -0700
Subject: [PATCH 194/499] bnxt: remove ndo_poll_controller

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

bnxt uses NAPI for TX completions, so we better let core
networking stack call the napi->poll() to avoid the capture.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 177587f9c3f1..61957b0bbd8c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -7672,21 +7672,6 @@ static void bnxt_tx_timeout(struct net_device *dev)
 	bnxt_queue_sp_work(bp);
 }
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void bnxt_poll_controller(struct net_device *dev)
-{
-	struct bnxt *bp = netdev_priv(dev);
-	int i;
-
-	/* Only process tx rings/combined rings in netpoll mode. */
-	for (i = 0; i < bp->tx_nr_rings; i++) {
-		struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
-
-		napi_schedule(&txr->bnapi->napi);
-	}
-}
-#endif
-
 static void bnxt_timer(struct timer_list *t)
 {
 	struct bnxt *bp = from_timer(bp, t, timer);
@@ -8519,9 +8504,6 @@ static const struct net_device_ops bnxt_netdev_ops = {
 	.ndo_set_vf_link_state	= bnxt_set_vf_link_state,
 	.ndo_set_vf_spoofchk	= bnxt_set_vf_spoofchk,
 	.ndo_set_vf_trust	= bnxt_set_vf_trust,
-#endif
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller	= bnxt_poll_controller,
 #endif
 	.ndo_setup_tc           = bnxt_setup_tc,
 #ifdef CONFIG_RFS_ACCEL

From 0825ce70318e9c576acbdd5ceb4a8d563263cf8f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Sep 2018 15:27:51 -0700
Subject: [PATCH 195/499] nfp: remove ndo_poll_controller

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

nfp uses NAPI for TX completions, so we better let core
networking stack call the napi->poll() to avoid the capture.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Tested-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/netronome/nfp/nfp_net_common.c    | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 253bdaef1505..8ed38fd5a852 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -3146,21 +3146,6 @@ nfp_net_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
 	return nfp_net_reconfig_mbox(nn, NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL);
 }
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void nfp_net_netpoll(struct net_device *netdev)
-{
-	struct nfp_net *nn = netdev_priv(netdev);
-	int i;
-
-	/* nfp_net's NAPIs are statically allocated so even if there is a race
-	 * with reconfig path this will simply try to schedule some disabled
-	 * NAPI instances.
-	 */
-	for (i = 0; i < nn->dp.num_stack_tx_rings; i++)
-		napi_schedule_irqoff(&nn->r_vecs[i].napi);
-}
-#endif
-
 static void nfp_net_stat64(struct net_device *netdev,
 			   struct rtnl_link_stats64 *stats)
 {
@@ -3519,9 +3504,6 @@ const struct net_device_ops nfp_net_netdev_ops = {
 	.ndo_get_stats64	= nfp_net_stat64,
 	.ndo_vlan_rx_add_vid	= nfp_net_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= nfp_net_vlan_rx_kill_vid,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller	= nfp_net_netpoll,
-#endif
 	.ndo_set_vf_mac         = nfp_app_set_vf_mac,
 	.ndo_set_vf_vlan        = nfp_app_set_vf_vlan,
 	.ndo_set_vf_spoofchk    = nfp_app_set_vf_spoofchk,

From 765cdc209cb89fb81215310a066cd0a3018ffef7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Sep 2018 15:27:52 -0700
Subject: [PATCH 196/499] tun: remove ndo_poll_controller

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

tun uses NAPI for TX completions, so we better let core
networking stack call the napi->poll() to avoid the capture.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 43 -------------------------------------------
 1 file changed, 43 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index ebd07ad82431..e2648b5a3861 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1153,43 +1153,6 @@ static netdev_features_t tun_net_fix_features(struct net_device *dev,
 
 	return (features & tun->set_features) | (features & ~TUN_USER_FEATURES);
 }
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void tun_poll_controller(struct net_device *dev)
-{
-	/*
-	 * Tun only receives frames when:
-	 * 1) the char device endpoint gets data from user space
-	 * 2) the tun socket gets a sendmsg call from user space
-	 * If NAPI is not enabled, since both of those are synchronous
-	 * operations, we are guaranteed never to have pending data when we poll
-	 * for it so there is nothing to do here but return.
-	 * We need this though so netpoll recognizes us as an interface that
-	 * supports polling, which enables bridge devices in virt setups to
-	 * still use netconsole
-	 * If NAPI is enabled, however, we need to schedule polling for all
-	 * queues unless we are using napi_gro_frags(), which we call in
-	 * process context and not in NAPI context.
-	 */
-	struct tun_struct *tun = netdev_priv(dev);
-
-	if (tun->flags & IFF_NAPI) {
-		struct tun_file *tfile;
-		int i;
-
-		if (tun_napi_frags_enabled(tun))
-			return;
-
-		rcu_read_lock();
-		for (i = 0; i < tun->numqueues; i++) {
-			tfile = rcu_dereference(tun->tfiles[i]);
-			if (tfile->napi_enabled)
-				napi_schedule(&tfile->napi);
-		}
-		rcu_read_unlock();
-	}
-	return;
-}
-#endif
 
 static void tun_set_headroom(struct net_device *dev, int new_hr)
 {
@@ -1283,9 +1246,6 @@ static const struct net_device_ops tun_netdev_ops = {
 	.ndo_start_xmit		= tun_net_xmit,
 	.ndo_fix_features	= tun_net_fix_features,
 	.ndo_select_queue	= tun_select_queue,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller	= tun_poll_controller,
-#endif
 	.ndo_set_rx_headroom	= tun_set_headroom,
 	.ndo_get_stats64	= tun_net_get_stats64,
 };
@@ -1365,9 +1325,6 @@ static const struct net_device_ops tap_netdev_ops = {
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_select_queue	= tun_select_queue,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller	= tun_poll_controller,
-#endif
 	.ndo_features_check	= passthru_features_check,
 	.ndo_set_rx_headroom	= tun_set_headroom,
 	.ndo_get_stats64	= tun_net_get_stats64,

From d26ed6b0e5e23190d43ab34bc69cbecdc464a2cf Mon Sep 17 00:00:00 2001
From: Friedemann Gerold <f.gerold@b-c-s.de>
Date: Sat, 15 Sep 2018 18:03:39 +0300
Subject: [PATCH 197/499] net: aquantia: memory corruption on jumbo frames

This patch fixes skb_shared area, which will be corrupted
upon reception of 4K jumbo packets.

Originally build_skb usage purpose was to reuse page for skb to eliminate
needs of extra fragments. But that logic does not take into account that
skb_shared_info should be reserved at the end of skb data area.

In case packet data consumes all the page (4K), skb_shinfo location
overflows the page. As a consequence, __build_skb zeroed shinfo data above
the allocated page, corrupting next page.

The issue is rarely seen in real life because jumbo are normally larger
than 4K and that causes another code path to trigger.
But it 100% reproducible with simple scapy packet, like:

    sendp(IP(dst="192.168.100.3") / TCP(dport=443) \
          / Raw(RandString(size=(4096-40))), iface="enp1s0")

Fixes: 018423e90bee ("net: ethernet: aquantia: Add ring support code")

Reported-by: Friedemann Gerold <f.gerold@b-c-s.de>
Reported-by: Michael Rauch <michael@rauch.be>
Signed-off-by: Friedemann Gerold <f.gerold@b-c-s.de>
Tested-by: Nikita Danilov <nikita.danilov@aquantia.com>
Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/aquantia/atlantic/aq_ring.c  | 30 +++++++++++--------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index b5f1f62e8e25..d1e1a0ba8615 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -225,9 +225,10 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
 		}
 
 		/* for single fragment packets use build_skb() */
-		if (buff->is_eop) {
+		if (buff->is_eop &&
+		    buff->len <= AQ_CFG_RX_FRAME_MAX - AQ_SKB_ALIGN) {
 			skb = build_skb(page_address(buff->page),
-					buff->len + AQ_SKB_ALIGN);
+					AQ_CFG_RX_FRAME_MAX);
 			if (unlikely(!skb)) {
 				err = -ENOMEM;
 				goto err_exit;
@@ -247,18 +248,21 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
 					buff->len - ETH_HLEN,
 					SKB_TRUESIZE(buff->len - ETH_HLEN));
 
-			for (i = 1U, next_ = buff->next,
-			     buff_ = &self->buff_ring[next_]; true;
-			     next_ = buff_->next,
-			     buff_ = &self->buff_ring[next_], ++i) {
-				skb_add_rx_frag(skb, i, buff_->page, 0,
-						buff_->len,
-						SKB_TRUESIZE(buff->len -
-						ETH_HLEN));
-				buff_->is_cleaned = 1;
+			if (!buff->is_eop) {
+				for (i = 1U, next_ = buff->next,
+				     buff_ = &self->buff_ring[next_];
+				     true; next_ = buff_->next,
+				     buff_ = &self->buff_ring[next_], ++i) {
+					skb_add_rx_frag(skb, i,
+							buff_->page, 0,
+							buff_->len,
+							SKB_TRUESIZE(buff->len -
+							ETH_HLEN));
+					buff_->is_cleaned = 1;
 
-				if (buff_->is_eop)
-					break;
+					if (buff_->is_eop)
+						break;
+				}
 			}
 		}
 

From 63e132528032ce937126aba591a7b37ec593a6bb Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 18 Sep 2018 16:34:31 +0300
Subject: [PATCH 198/499] ovl: fix memory leak on unlink of indexed file

The memory leak was detected by kmemleak when running xfstests
overlay/051,053

Fixes: caf70cb2ba5d ("ovl: cleanup orphan index entries")
Cc: <stable@vger.kernel.org> # v4.13
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/util.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 8cfb62cc8672..ace4fe4c39a9 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -683,7 +683,7 @@ static void ovl_cleanup_index(struct dentry *dentry)
 	struct dentry *upperdentry = ovl_dentry_upper(dentry);
 	struct dentry *index = NULL;
 	struct inode *inode;
-	struct qstr name;
+	struct qstr name = { };
 	int err;
 
 	err = ovl_get_index_name(lowerdentry, &name);
@@ -726,6 +726,7 @@ static void ovl_cleanup_index(struct dentry *dentry)
 		goto fail;
 
 out:
+	kfree(name.name);
 	dput(index);
 	return;
 

From 898cc19d8af2d6a80a9f2804b0db2e83c5dd8863 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 18 Sep 2018 16:34:32 +0300
Subject: [PATCH 199/499] ovl: fix freeze protection bypass in ovl_write_iter()

Tested by re-writing to an open overlayfs file while upper ext4 is frozen:

  xfs_io -f /ovl/x
  xfs_io> pwrite 0 4096
                             fsfreeze -f /ext4
  xfs_io> pwrite 0 4096

  WARNING: CPU: 0 PID: 1492 at fs/ext4/ext4_jbd2.c:53 \
           ext4_journal_check_start+0x48/0x82

After the fix, the second write blocks in ovl_write_iter() and avoids
hitting WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE) in
ext4_journal_check_start().

Fixes: 2a92e07edc5e ("ovl: add ovl_write_iter()")
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/file.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index aeaefd2a551b..986313da0c88 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -240,8 +240,10 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 		goto out_unlock;
 
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
+	file_start_write(real.file);
 	ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
 			     ovl_iocb_to_rwf(iocb));
+	file_end_write(real.file);
 	revert_creds(old_cred);
 
 	/* Update size */

From d9d150ae50675e3c1d68047aafb2e981be685d90 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 18 Sep 2018 16:34:33 +0300
Subject: [PATCH 200/499] ovl: fix freeze protection bypass in
 ovl_clone_file_range()

Tested by doing clone on overlayfs while upper xfs+reflink is frozen:

  xfs_io -f /ovl/y
                             fsfreeze -f /xfs
  xfs_io> reflink /ovl/x

Before the fix xfs_io enters xfs_reflink_remap_range() and blocks
in xfs_trans_alloc(). After the fix, xfs_io blocks outside xfs code
in ovl_clone_file_range().

Fixes: 8ede205541ff ("ovl: add reflink/copyfile/dedup support")
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 986313da0c88..5d1b4b38f743 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -461,7 +461,7 @@ static ssize_t ovl_copyfile(struct file *file_in, loff_t pos_in,
 		break;
 
 	case OVL_CLONE:
-		ret = vfs_clone_file_range(real_in.file, pos_in,
+		ret = do_clone_file_range(real_in.file, pos_in,
 					   real_out.file, pos_out, len);
 		break;
 

From a725356b6659469d182d662f22d770d83d3bc7b5 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 18 Sep 2018 16:34:34 +0300
Subject: [PATCH 201/499] vfs: swap names of {do,vfs}_clone_file_range()

Commit 031a072a0b8a ("vfs: call vfs_clone_file_range() under freeze
protection") created a wrapper do_clone_file_range() around
vfs_clone_file_range() moving the freeze protection to former, so
overlayfs could call the latter.

The more common vfs practice is to call do_xxx helpers from vfs_xxx
helpers, where freeze protecction is taken in the vfs_xxx helper, so
this anomality could be a source of confusion.

It seems that commit 8ede205541ff ("ovl: add reflink/copyfile/dedup
support") may have fallen a victim to this confusion -
ovl_clone_file_range() calls the vfs_clone_file_range() helper in the
hope of getting freeze protection on upper fs, but in fact results in
overlayfs allowing to bypass upper fs freeze protection.

Swap the names of the two helpers to conform to common vfs practice
and call the correct helpers from overlayfs and nfsd.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/ioctl.c             |  2 +-
 fs/nfsd/vfs.c          |  3 ++-
 fs/overlayfs/copy_up.c |  2 +-
 fs/overlayfs/file.c    |  2 +-
 fs/read_write.c        | 17 +++++++++++++++--
 include/linux/fs.h     | 17 +++--------------
 6 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/fs/ioctl.c b/fs/ioctl.c
index 3212c29235ce..2005529af560 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -230,7 +230,7 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
 	ret = -EXDEV;
 	if (src_file.file->f_path.mnt != dst_file->f_path.mnt)
 		goto fdput;
-	ret = do_clone_file_range(src_file.file, off, dst_file, destoff, olen);
+	ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen);
 fdput:
 	fdput(src_file);
 	return ret;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 55a099e47ba2..b53e76391e52 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -541,7 +541,8 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
 __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
 		u64 dst_pos, u64 count)
 {
-	return nfserrno(do_clone_file_range(src, src_pos, dst, dst_pos, count));
+	return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos,
+					     count));
 }
 
 ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 296037afecdb..1cc797a08a5b 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -141,7 +141,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
 	}
 
 	/* Try to use clone_file_range to clone up within the same fs */
-	error = vfs_clone_file_range(old_file, 0, new_file, 0, len);
+	error = do_clone_file_range(old_file, 0, new_file, 0, len);
 	if (!error)
 		goto out;
 	/* Couldn't clone, so now we try to copy the data */
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 5d1b4b38f743..986313da0c88 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -461,7 +461,7 @@ static ssize_t ovl_copyfile(struct file *file_in, loff_t pos_in,
 		break;
 
 	case OVL_CLONE:
-		ret = do_clone_file_range(real_in.file, pos_in,
+		ret = vfs_clone_file_range(real_in.file, pos_in,
 					   real_out.file, pos_out, len);
 		break;
 
diff --git a/fs/read_write.c b/fs/read_write.c
index 39b4a21dd933..8a2737f0d61d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1818,8 +1818,8 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
 }
 EXPORT_SYMBOL(vfs_clone_file_prep_inodes);
 
-int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
-		struct file *file_out, loff_t pos_out, u64 len)
+int do_clone_file_range(struct file *file_in, loff_t pos_in,
+			struct file *file_out, loff_t pos_out, u64 len)
 {
 	struct inode *inode_in = file_inode(file_in);
 	struct inode *inode_out = file_inode(file_out);
@@ -1866,6 +1866,19 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
 
 	return ret;
 }
+EXPORT_SYMBOL(do_clone_file_range);
+
+int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
+			 struct file *file_out, loff_t pos_out, u64 len)
+{
+	int ret;
+
+	file_start_write(file_out);
+	ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len);
+	file_end_write(file_out);
+
+	return ret;
+}
 EXPORT_SYMBOL(vfs_clone_file_range);
 
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6c0b4a1c22ff..897eae8faee1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1828,8 +1828,10 @@ extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
 extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
 				      struct inode *inode_out, loff_t pos_out,
 				      u64 *len, bool is_dedupe);
+extern int do_clone_file_range(struct file *file_in, loff_t pos_in,
+			       struct file *file_out, loff_t pos_out, u64 len);
 extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
-		struct file *file_out, loff_t pos_out, u64 len);
+				struct file *file_out, loff_t pos_out, u64 len);
 extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 					 struct inode *dest, loff_t destoff,
 					 loff_t len, bool *is_same);
@@ -2773,19 +2775,6 @@ static inline void file_end_write(struct file *file)
 	__sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE);
 }
 
-static inline int do_clone_file_range(struct file *file_in, loff_t pos_in,
-				      struct file *file_out, loff_t pos_out,
-				      u64 len)
-{
-	int ret;
-
-	file_start_write(file_out);
-	ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len);
-	file_end_write(file_out);
-
-	return ret;
-}
-
 /*
  * get_write_access() gets write permission for a file.
  * put_write_access() releases this write permission.

From afbb1169ed5b58cfca017e368b53e019cf285853 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 19 Sep 2018 20:52:45 +0200
Subject: [PATCH 202/499] HID: i2c-hid: Remove RESEND_REPORT_DESCR quirk and
 its handling

Commit 52cf93e63ee6 ("HID: i2c-hid: Don't reset device upon system resume")
removes the need for the RESEND_REPORT_DESCR quirk for Raydium devices, but
kept it for the SIS device id 10FB touchscreens, as the author of that
commit could not determine if the quirk is still necessary there.

I've tested suspend/resume on a Toshiba Click Mini L9W-B which is the
device for which this quirk was added in the first place and with the
"Don't reset device upon system resume" fix the quirk is no longer
necessary, so this commit removes it.

Note even better I also had some other devices with SIS touchscreens which
suspend/resume issues, where the RESEND_REPORT_DESCR quirk did not help.

I've also tested these devices with the "Don't reset device upon system
resume" fix and I'm happy to report that that fix also fixes touchscreen
resume on the following devices:

Asus T100HA
Asus T200TA
Peaq C1010

Cc: Kai-Heng Feng <kai.heng.feng@canonical.com>
Acked-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-ids.h         |  1 -
 drivers/hid/i2c-hid/i2c-hid.c | 18 +++---------------
 2 files changed, 3 insertions(+), 16 deletions(-)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 5146ee029db4..bc49909aba8e 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -976,7 +976,6 @@
 #define USB_DEVICE_ID_SIS817_TOUCH	0x0817
 #define USB_DEVICE_ID_SIS_TS		0x1013
 #define USB_DEVICE_ID_SIS1030_TOUCH	0x1030
-#define USB_DEVICE_ID_SIS10FB_TOUCH	0x10fb
 
 #define USB_VENDOR_ID_SKYCABLE			0x1223
 #define	USB_DEVICE_ID_SKYCABLE_WIRELESS_PRESENTER	0x3F07
diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c
index a8610f5bf6f5..4e3592e7a3f7 100644
--- a/drivers/hid/i2c-hid/i2c-hid.c
+++ b/drivers/hid/i2c-hid/i2c-hid.c
@@ -47,8 +47,7 @@
 /* quirks to control the device */
 #define I2C_HID_QUIRK_SET_PWR_WAKEUP_DEV	BIT(0)
 #define I2C_HID_QUIRK_NO_IRQ_AFTER_RESET	BIT(1)
-#define I2C_HID_QUIRK_RESEND_REPORT_DESCR	BIT(2)
-#define I2C_HID_QUIRK_NO_RUNTIME_PM		BIT(3)
+#define I2C_HID_QUIRK_NO_RUNTIME_PM		BIT(2)
 
 /* flags */
 #define I2C_HID_STARTED		0
@@ -172,8 +171,6 @@ static const struct i2c_hid_quirks {
 	{ I2C_VENDOR_ID_HANTICK, I2C_PRODUCT_ID_HANTICK_5288,
 		I2C_HID_QUIRK_NO_IRQ_AFTER_RESET |
 		I2C_HID_QUIRK_NO_RUNTIME_PM },
-	{ USB_VENDOR_ID_SIS_TOUCH, USB_DEVICE_ID_SIS10FB_TOUCH,
-		I2C_HID_QUIRK_RESEND_REPORT_DESCR },
 	{ 0, 0 }
 };
 
@@ -1241,22 +1238,13 @@ static int i2c_hid_resume(struct device *dev)
 
 	/* Instead of resetting device, simply powers the device on. This
 	 * solves "incomplete reports" on Raydium devices 2386:3118 and
-	 * 2386:4B33
+	 * 2386:4B33 and fixes various SIS touchscreens no longer sending
+	 * data after a suspend/resume.
 	 */
 	ret = i2c_hid_set_power(client, I2C_HID_PWR_ON);
 	if (ret)
 		return ret;
 
-	/* Some devices need to re-send report descr cmd
-	 * after resume, after this it will be back normal.
-	 * otherwise it issues too many incomplete reports.
-	 */
-	if (ihid->quirks & I2C_HID_QUIRK_RESEND_REPORT_DESCR) {
-		ret = i2c_hid_command(client, &hid_report_descr_cmd, NULL, 0);
-		if (ret)
-			return ret;
-	}
-
 	if (hid->driver && hid->driver->reset_resume) {
 		ret = hid->driver->reset_resume(hid);
 		return ret;

From 9ff3541e3ddf96800ce8fcd225c9e7956da49418 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Tue, 11 Sep 2018 16:45:43 -0700
Subject: [PATCH 203/499] HID: intel-ish-hid: Enable Ice Lake mobile

Added PCI ID for Ice Lake mobile platform.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/intel-ish-hid/ipc/hw-ish.h  | 1 +
 drivers/hid/intel-ish-hid/ipc/pci-ish.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/hid/intel-ish-hid/ipc/hw-ish.h b/drivers/hid/intel-ish-hid/ipc/hw-ish.h
index da133716bed0..08a8327dfd22 100644
--- a/drivers/hid/intel-ish-hid/ipc/hw-ish.h
+++ b/drivers/hid/intel-ish-hid/ipc/hw-ish.h
@@ -29,6 +29,7 @@
 #define CNL_Ax_DEVICE_ID	0x9DFC
 #define GLK_Ax_DEVICE_ID	0x31A2
 #define CNL_H_DEVICE_ID		0xA37C
+#define ICL_MOBILE_DEVICE_ID	0x34FC
 #define SPT_H_DEVICE_ID		0xA135
 
 #define	REVISION_ID_CHT_A0	0x6
diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c
index a1125a5c7965..256b3016116c 100644
--- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c
+++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c
@@ -38,6 +38,7 @@ static const struct pci_device_id ish_pci_tbl[] = {
 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, CNL_Ax_DEVICE_ID)},
 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, GLK_Ax_DEVICE_ID)},
 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, CNL_H_DEVICE_ID)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, ICL_MOBILE_DEVICE_ID)},
 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, SPT_H_DEVICE_ID)},
 	{0, }
 };

From 8604895a34d92f5e186ceb931b0d1b384030ea3d Mon Sep 17 00:00:00 2001
From: Michael Bringmann <mwb@linux.vnet.ibm.com>
Date: Thu, 20 Sep 2018 11:45:13 -0500
Subject: [PATCH 204/499] powerpc/pseries: Fix unitialized timer reset on
 migration

After migration of a powerpc LPAR, the kernel executes code to
update the system state to reflect new platform characteristics.

Such changes include modifications to device tree properties provided
to the system by PHYP. Property notifications received by the
post_mobility_fixup() code are passed along to the kernel in general
through a call to of_update_property() which in turn passes such
events back to all modules through entries like the '.notifier_call'
function within the NUMA module.

When the NUMA module updates its state, it resets its event timer. If
this occurs after a previous call to stop_topology_update() or on a
system without VPHN enabled, the code runs into an unitialized timer
structure and crashes. This patch adds a safety check along this path
toward the problem code.

An example crash log is as follows.

  ibmvscsi 30000081: Re-enabling adapter!
  ------------[ cut here ]------------
  kernel BUG at kernel/time/timer.c:958!
  Oops: Exception in kernel mode, sig: 5 [#1]
  LE SMP NR_CPUS=2048 NUMA pSeries
  Modules linked in: nfsv3 nfs_acl nfs tcp_diag udp_diag inet_diag lockd unix_diag af_packet_diag netlink_diag grace fscache sunrpc xts vmx_crypto pseries_rng sg binfmt_misc ip_tables xfs libcrc32c sd_mod ibmvscsi ibmveth scsi_transport_srp dm_mirror dm_region_hash dm_log dm_mod
  CPU: 11 PID: 3067 Comm: drmgr Not tainted 4.17.0+ #179
  ...
  NIP mod_timer+0x4c/0x400
  LR  reset_topology_timer+0x40/0x60
  Call Trace:
    0xc0000003f9407830 (unreliable)
    reset_topology_timer+0x40/0x60
    dt_update_callback+0x100/0x120
    notifier_call_chain+0x90/0x100
    __blocking_notifier_call_chain+0x60/0x90
    of_property_notify+0x90/0xd0
    of_update_property+0x104/0x150
    update_dt_property+0xdc/0x1f0
    pseries_devicetree_update+0x2d0/0x510
    post_mobility_fixup+0x7c/0xf0
    migration_store+0xa4/0xc0
    kobj_attr_store+0x30/0x60
    sysfs_kf_write+0x64/0xa0
    kernfs_fop_write+0x16c/0x240
    __vfs_write+0x40/0x200
    vfs_write+0xc8/0x240
    ksys_write+0x5c/0x100
    system_call+0x58/0x6c

Fixes: 5d88aa85c00b ("powerpc/pseries: Update CPU maps when device tree is updated")
Cc: stable@vger.kernel.org # v3.10+
Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/numa.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 35ac5422903a..b5a71baedbc2 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1452,7 +1452,8 @@ static struct timer_list topology_timer;
 
 static void reset_topology_timer(void)
 {
-	mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ);
+	if (vphn_enabled)
+		mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ);
 }
 
 #ifdef CONFIG_SMP

From f5fad711c06e652f90f581fc7c2caee327c33d31 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 24 Sep 2018 15:28:10 +0200
Subject: [PATCH 205/499] USB: serial: simple: add Motorola Tetra MTP6550 id

Add device-id for the Motorola Tetra radio MTP6550.

Bus 001 Device 004: ID 0cad:9012 Motorola CGISS
Device Descriptor:
  bLength                18
  bDescriptorType         1
  bcdUSB               2.00
  bDeviceClass            0 (Defined at Interface level)
  bDeviceSubClass         0
  bDeviceProtocol         0
  bMaxPacketSize0        64
  idVendor           0x0cad Motorola CGISS
  idProduct          0x9012
  bcdDevice           24.16
  iManufacturer           1 Motorola Solutions, Inc.
  iProduct                2 TETRA PEI interface
  iSerial                 0
  bNumConfigurations      1
  Configuration Descriptor:
    bLength                 9
    bDescriptorType         2
    wTotalLength           55
    bNumInterfaces          2
    bConfigurationValue     1
    iConfiguration          3 Generic Serial config
    bmAttributes         0x80
      (Bus Powered)
    MaxPower              500mA
    Interface Descriptor:
      bLength                 9
      bDescriptorType         4
      bInterfaceNumber        0
      bAlternateSetting       0
      bNumEndpoints           2
      bInterfaceClass       255 Vendor Specific Class
      bInterfaceSubClass      0
      bInterfaceProtocol      0
      iInterface              0
      Endpoint Descriptor:
        bLength                 7
        bDescriptorType         5
        bEndpointAddress     0x81  EP 1 IN
        bmAttributes            2
          Transfer Type            Bulk
          Synch Type               None
          Usage Type               Data
        wMaxPacketSize     0x0200  1x 512 bytes
        bInterval               0
      Endpoint Descriptor:
        bLength                 7
        bDescriptorType         5
        bEndpointAddress     0x01  EP 1 OUT
        bmAttributes            2
          Transfer Type            Bulk
          Synch Type               None
          Usage Type               Data
        wMaxPacketSize     0x0200  1x 512 bytes
    Interface Descriptor:
      bLength                 9
      bDescriptorType         4
      bInterfaceNumber        1
      bAlternateSetting       0
      bNumEndpoints           2
      bInterfaceClass       255 Vendor Specific Class
      bInterfaceSubClass      0
      bInterfaceProtocol      0
      iInterface              0
      Endpoint Descriptor:
        bLength                 7
        bDescriptorType         5
        bEndpointAddress     0x82  EP 2 IN
        bmAttributes            2
          Transfer Type            Bulk
          Synch Type               None
          Usage Type               Data
        wMaxPacketSize     0x0200  1x 512 bytes
        bInterval               0
      Endpoint Descriptor:
        bLength                 7
        bDescriptorType         5
        bEndpointAddress     0x02  EP 2 OUT
        bmAttributes            2
          Transfer Type            Bulk
          Synch Type               None
          Usage Type               Data
        wMaxPacketSize     0x0200  1x 512 bytes
        bInterval               0
Device Qualifier (for other device speed):
  bLength                10
  bDescriptorType         6
  bcdUSB               2.00
  bDeviceClass            0 (Defined at Interface level)
  bDeviceSubClass         0
  bDeviceProtocol         0
  bMaxPacketSize0        64
  bNumConfigurations      1
Device Status:     0x0000
  (Bus Powered)

Reported-by: Hans Hult <hanshult35@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/usb-serial-simple.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c
index 40864c2bd9dc..4d0273508043 100644
--- a/drivers/usb/serial/usb-serial-simple.c
+++ b/drivers/usb/serial/usb-serial-simple.c
@@ -84,7 +84,8 @@ DEVICE(moto_modem, MOTO_IDS);
 
 /* Motorola Tetra driver */
 #define MOTOROLA_TETRA_IDS()			\
-	{ USB_DEVICE(0x0cad, 0x9011) }	/* Motorola Solutions TETRA PEI */
+	{ USB_DEVICE(0x0cad, 0x9011) },	/* Motorola Solutions TETRA PEI */ \
+	{ USB_DEVICE(0x0cad, 0x9012) }	/* MTP6550 */
 DEVICE(motorola_tetra, MOTOROLA_TETRA_IDS);
 
 /* Novatel Wireless GPS driver */

From de5c95d0f518537f59ee5aef762abc46f868c377 Mon Sep 17 00:00:00 2001
From: Selvin Xavier <selvin.xavier@broadcom.com>
Date: Thu, 20 Sep 2018 22:33:00 -0700
Subject: [PATCH 206/499] RDMA/bnxt_re: Fix system crash during RDMA resource
 initialization

bnxt_re_ib_reg acquires and releases the rtnl lock whenever it accesses
the L2 driver.

The following sequence can trigger a crash

Acquires the rtnl_lock ->
	Registers roce driver callback with L2 driver ->
		release the rtnl lock
bnxt_re acquires the rtnl_lock ->
	Request for MSIx vectors ->
		release the rtnl_lock

Issue happens when bnxt_re proceeds with remaining part of initialization
and L2 driver invokes bnxt_ulp_irq_stop as a part of bnxt_open_nic.

The crash is in bnxt_qplib_nq_stop_irq as the NQ structures are
not initialized yet,

<snip>
[ 3551.726647] BUG: unable to handle kernel NULL pointer dereference at (null)
[ 3551.726656] IP: [<ffffffffc0840ee9>] bnxt_qplib_nq_stop_irq+0x59/0xb0 [bnxt_re]
[ 3551.726674] PGD 0
[ 3551.726679] Oops: 0002 1 SMP
...
[ 3551.726822] Hardware name: Dell Inc. PowerEdge R720/08RW36, BIOS 2.4.3 07/09/2014
[ 3551.726826] task: ffff97e30eec5ee0 ti: ffff97e3173bc000 task.ti: ffff97e3173bc000
[ 3551.726829] RIP: 0010:[<ffffffffc0840ee9>] [<ffffffffc0840ee9>]
bnxt_qplib_nq_stop_irq+0x59/0xb0 [bnxt_re]
...
[ 3551.726872] Call Trace:
[ 3551.726886] [<ffffffffc082cb9e>] bnxt_re_stop_irq+0x4e/0x70 [bnxt_re]
[ 3551.726899] [<ffffffffc07d6a53>] bnxt_ulp_irq_stop+0x43/0x70 [bnxt_en]
[ 3551.726908] [<ffffffffc07c82f4>] bnxt_reserve_rings+0x174/0x1e0 [bnxt_en]
[ 3551.726917] [<ffffffffc07cafd8>] __bnxt_open_nic+0x368/0x9a0 [bnxt_en]
[ 3551.726925] [<ffffffffc07cb62b>] bnxt_open_nic+0x1b/0x50 [bnxt_en]
[ 3551.726934] [<ffffffffc07cc62f>] bnxt_setup_mq_tc+0x11f/0x260 [bnxt_en]
[ 3551.726943] [<ffffffffc07d5f58>] bnxt_dcbnl_ieee_setets+0xb8/0x1f0 [bnxt_en]
[ 3551.726954] [<ffffffff890f983a>] dcbnl_ieee_set+0x9a/0x250
[ 3551.726966] [<ffffffff88fd6d21>] ? __alloc_skb+0xa1/0x2d0
[ 3551.726972] [<ffffffff890f72fa>] dcb_doit+0x13a/0x210
[ 3551.726981] [<ffffffff89003ff7>] rtnetlink_rcv_msg+0xa7/0x260
[ 3551.726989] [<ffffffff88ffdb00>] ? rtnl_unicast+0x20/0x30
[ 3551.726996] [<ffffffff88bf9dc8>] ? __kmalloc_node_track_caller+0x58/0x290
[ 3551.727002] [<ffffffff890f7326>] ? dcb_doit+0x166/0x210
[ 3551.727007] [<ffffffff88fd6d0d>] ? __alloc_skb+0x8d/0x2d0
[ 3551.727012] [<ffffffff89003f50>] ? rtnl_newlink+0x880/0x880
...
[ 3551.727104] [<ffffffff8911f7d5>] system_call_fastpath+0x1c/0x21
...
[ 3551.727164] RIP [<ffffffffc0840ee9>] bnxt_qplib_nq_stop_irq+0x59/0xb0 [bnxt_re]
[ 3551.727175] RSP <ffff97e3173bf788>
[ 3551.727177] CR2: 0000000000000000

Avoid this inconsistent state and  system crash by acquiring
the rtnl lock for the entire duration of device initialization.
Re-factor the code to remove the rtnl lock from the individual function
and acquire and release it from the caller.

Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver")
Fixes: 6e04b1035689 ("RDMA/bnxt_re: Fix broken RoCE driver due to recent L2 driver changes")
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/bnxt_re/main.c | 93 ++++++++++++----------------
 1 file changed, 38 insertions(+), 55 deletions(-)

diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 20b9f31052bf..85cd1a3593d6 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -78,7 +78,7 @@ static struct list_head bnxt_re_dev_list = LIST_HEAD_INIT(bnxt_re_dev_list);
 /* Mutex to protect the list of bnxt_re devices added */
 static DEFINE_MUTEX(bnxt_re_dev_lock);
 static struct workqueue_struct *bnxt_re_wq;
-static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait);
+static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev);
 
 /* SR-IOV helper functions */
 
@@ -182,7 +182,7 @@ static void bnxt_re_shutdown(void *p)
 	if (!rdev)
 		return;
 
-	bnxt_re_ib_unreg(rdev, false);
+	bnxt_re_ib_unreg(rdev);
 }
 
 static void bnxt_re_stop_irq(void *handle)
@@ -251,7 +251,7 @@ static struct bnxt_ulp_ops bnxt_re_ulp_ops = {
 /* Driver registration routines used to let the networking driver (bnxt_en)
  * to know that the RoCE driver is now installed
  */
-static int bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev, bool lock_wait)
+static int bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev)
 {
 	struct bnxt_en_dev *en_dev;
 	int rc;
@@ -260,14 +260,9 @@ static int bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev, bool lock_wait)
 		return -EINVAL;
 
 	en_dev = rdev->en_dev;
-	/* Acquire rtnl lock if it is not invokded from netdev event */
-	if (lock_wait)
-		rtnl_lock();
 
 	rc = en_dev->en_ops->bnxt_unregister_device(rdev->en_dev,
 						    BNXT_ROCE_ULP);
-	if (lock_wait)
-		rtnl_unlock();
 	return rc;
 }
 
@@ -281,14 +276,12 @@ static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev)
 
 	en_dev = rdev->en_dev;
 
-	rtnl_lock();
 	rc = en_dev->en_ops->bnxt_register_device(en_dev, BNXT_ROCE_ULP,
 						  &bnxt_re_ulp_ops, rdev);
-	rtnl_unlock();
 	return rc;
 }
 
-static int bnxt_re_free_msix(struct bnxt_re_dev *rdev, bool lock_wait)
+static int bnxt_re_free_msix(struct bnxt_re_dev *rdev)
 {
 	struct bnxt_en_dev *en_dev;
 	int rc;
@@ -298,13 +291,9 @@ static int bnxt_re_free_msix(struct bnxt_re_dev *rdev, bool lock_wait)
 
 	en_dev = rdev->en_dev;
 
-	if (lock_wait)
-		rtnl_lock();
 
 	rc = en_dev->en_ops->bnxt_free_msix(rdev->en_dev, BNXT_ROCE_ULP);
 
-	if (lock_wait)
-		rtnl_unlock();
 	return rc;
 }
 
@@ -320,7 +309,6 @@ static int bnxt_re_request_msix(struct bnxt_re_dev *rdev)
 
 	num_msix_want = min_t(u32, BNXT_RE_MAX_MSIX, num_online_cpus());
 
-	rtnl_lock();
 	num_msix_got = en_dev->en_ops->bnxt_request_msix(en_dev, BNXT_ROCE_ULP,
 							 rdev->msix_entries,
 							 num_msix_want);
@@ -335,7 +323,6 @@ static int bnxt_re_request_msix(struct bnxt_re_dev *rdev)
 	}
 	rdev->num_msix = num_msix_got;
 done:
-	rtnl_unlock();
 	return rc;
 }
 
@@ -358,24 +345,18 @@ static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg,
 	fw_msg->timeout = timeout;
 }
 
-static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id,
-				 bool lock_wait)
+static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id)
 {
 	struct bnxt_en_dev *en_dev = rdev->en_dev;
 	struct hwrm_ring_free_input req = {0};
 	struct hwrm_ring_free_output resp;
 	struct bnxt_fw_msg fw_msg;
-	bool do_unlock = false;
 	int rc = -EINVAL;
 
 	if (!en_dev)
 		return rc;
 
 	memset(&fw_msg, 0, sizeof(fw_msg));
-	if (lock_wait) {
-		rtnl_lock();
-		do_unlock = true;
-	}
 
 	bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_FREE, -1, -1);
 	req.ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL;
@@ -386,8 +367,6 @@ static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id,
 	if (rc)
 		dev_err(rdev_to_dev(rdev),
 			"Failed to free HW ring:%d :%#x", req.ring_id, rc);
-	if (do_unlock)
-		rtnl_unlock();
 	return rc;
 }
 
@@ -405,7 +384,6 @@ static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, dma_addr_t *dma_arr,
 		return rc;
 
 	memset(&fw_msg, 0, sizeof(fw_msg));
-	rtnl_lock();
 	bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_ALLOC, -1, -1);
 	req.enables = 0;
 	req.page_tbl_addr =  cpu_to_le64(dma_arr[0]);
@@ -426,27 +404,21 @@ static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, dma_addr_t *dma_arr,
 	if (!rc)
 		*fw_ring_id = le16_to_cpu(resp.ring_id);
 
-	rtnl_unlock();
 	return rc;
 }
 
 static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
-				      u32 fw_stats_ctx_id, bool lock_wait)
+				      u32 fw_stats_ctx_id)
 {
 	struct bnxt_en_dev *en_dev = rdev->en_dev;
 	struct hwrm_stat_ctx_free_input req = {0};
 	struct bnxt_fw_msg fw_msg;
-	bool do_unlock = false;
 	int rc = -EINVAL;
 
 	if (!en_dev)
 		return rc;
 
 	memset(&fw_msg, 0, sizeof(fw_msg));
-	if (lock_wait) {
-		rtnl_lock();
-		do_unlock = true;
-	}
 
 	bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_FREE, -1, -1);
 	req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id);
@@ -457,8 +429,6 @@ static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
 		dev_err(rdev_to_dev(rdev),
 			"Failed to free HW stats context %#x", rc);
 
-	if (do_unlock)
-		rtnl_unlock();
 	return rc;
 }
 
@@ -478,7 +448,6 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
 		return rc;
 
 	memset(&fw_msg, 0, sizeof(fw_msg));
-	rtnl_lock();
 
 	bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1);
 	req.update_period_ms = cpu_to_le32(1000);
@@ -490,7 +459,6 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
 	if (!rc)
 		*fw_stats_ctx_id = le32_to_cpu(resp.stat_ctx_id);
 
-	rtnl_unlock();
 	return rc;
 }
 
@@ -929,19 +897,19 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
 	return rc;
 }
 
-static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev, bool lock_wait)
+static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev)
 {
 	int i;
 
 	for (i = 0; i < rdev->num_msix - 1; i++) {
-		bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, lock_wait);
+		bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id);
 		bnxt_qplib_free_nq(&rdev->nq[i]);
 	}
 }
 
-static void bnxt_re_free_res(struct bnxt_re_dev *rdev, bool lock_wait)
+static void bnxt_re_free_res(struct bnxt_re_dev *rdev)
 {
-	bnxt_re_free_nq_res(rdev, lock_wait);
+	bnxt_re_free_nq_res(rdev);
 
 	if (rdev->qplib_res.dpi_tbl.max) {
 		bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
@@ -1219,7 +1187,7 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
 	return 0;
 }
 
-static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait)
+static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
 {
 	int i, rc;
 
@@ -1234,28 +1202,27 @@ static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait)
 		cancel_delayed_work(&rdev->worker);
 
 	bnxt_re_cleanup_res(rdev);
-	bnxt_re_free_res(rdev, lock_wait);
+	bnxt_re_free_res(rdev);
 
 	if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) {
 		rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw);
 		if (rc)
 			dev_warn(rdev_to_dev(rdev),
 				 "Failed to deinitialize RCFW: %#x", rc);
-		bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id,
-					   lock_wait);
+		bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
 		bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx);
 		bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
-		bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, lock_wait);
+		bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id);
 		bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
 	}
 	if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) {
-		rc = bnxt_re_free_msix(rdev, lock_wait);
+		rc = bnxt_re_free_msix(rdev);
 		if (rc)
 			dev_warn(rdev_to_dev(rdev),
 				 "Failed to free MSI-X vectors: %#x", rc);
 	}
 	if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) {
-		rc = bnxt_re_unregister_netdev(rdev, lock_wait);
+		rc = bnxt_re_unregister_netdev(rdev);
 		if (rc)
 			dev_warn(rdev_to_dev(rdev),
 				 "Failed to unregister with netdev: %#x", rc);
@@ -1276,6 +1243,12 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
 {
 	int i, j, rc;
 
+	bool locked;
+
+	/* Acquire rtnl lock through out this function */
+	rtnl_lock();
+	locked = true;
+
 	/* Registered a new RoCE device instance to netdev */
 	rc = bnxt_re_register_netdev(rdev);
 	if (rc) {
@@ -1374,12 +1347,16 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
 		schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
 	}
 
+	rtnl_unlock();
+	locked = false;
+
 	/* Register ib dev */
 	rc = bnxt_re_register_ib(rdev);
 	if (rc) {
 		pr_err("Failed to register with IB: %#x\n", rc);
 		goto fail;
 	}
+	set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
 	dev_info(rdev_to_dev(rdev), "Device registered successfully");
 	for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++) {
 		rc = device_create_file(&rdev->ibdev.dev,
@@ -1395,7 +1372,6 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
 			goto fail;
 		}
 	}
-	set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
 	ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
 			 &rdev->active_width);
 	set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
@@ -1404,17 +1380,21 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
 
 	return 0;
 free_sctx:
-	bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id, true);
+	bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
 free_ctx:
 	bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx);
 disable_rcfw:
 	bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
 free_ring:
-	bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, true);
+	bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id);
 free_rcfw:
 	bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
 fail:
-	bnxt_re_ib_unreg(rdev, true);
+	if (!locked)
+		rtnl_lock();
+	bnxt_re_ib_unreg(rdev);
+	rtnl_unlock();
+
 	return rc;
 }
 
@@ -1567,7 +1547,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
 		 */
 		if (atomic_read(&rdev->sched_count) > 0)
 			goto exit;
-		bnxt_re_ib_unreg(rdev, false);
+		bnxt_re_ib_unreg(rdev);
 		bnxt_re_remove_one(rdev);
 		bnxt_re_dev_unreg(rdev);
 		break;
@@ -1646,7 +1626,10 @@ static void __exit bnxt_re_mod_exit(void)
 		 */
 		flush_workqueue(bnxt_re_wq);
 		bnxt_re_dev_stop(rdev);
-		bnxt_re_ib_unreg(rdev, true);
+		/* Acquire the rtnl_lock as the L2 resources are freed here */
+		rtnl_lock();
+		bnxt_re_ib_unreg(rdev);
+		rtnl_unlock();
 		bnxt_re_remove_one(rdev);
 		bnxt_re_dev_unreg(rdev);
 	}

From 4679b61f2640e07724507fcd9dc070517bc6e6cb Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 24 Sep 2018 17:23:01 +0200
Subject: [PATCH 207/499] KVM: x86: never trap MSR_KERNEL_GS_BASE

KVM has an old optimization whereby accesses to the kernel GS base MSR
are trapped when the guest is in 32-bit and not when it is in 64-bit mode.
The idea is that swapgs is not available in 32-bit mode, thus the
guest has no reason to access the MSR unless in 64-bit mode and
32-bit applications need not pay the price of switching the kernel GS
base between the host and the guest values.

However, this optimization adds complexity to the code for little
benefit (these days most guests are going to be 64-bit anyway) and in fact
broke after commit 678e315e78a7 ("KVM: vmx: add dedicated utility to
access guest's kernel_gs_base", 2018-08-06); the guest kernel GS base
can be corrupted across SMIs and UEFI Secure Boot is therefore broken
(a secure boot Linux guest, for example, fails to reach the login prompt
about half the time).  This patch just removes the optimization; the
kernel GS base MSR is now never trapped by KVM, similarly to the FS and
GS base MSRs.

Fixes: 678e315e78a780dbef384b92339c8414309dbc11
Reviewed-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 47 ++++++++++------------------------------------
 1 file changed, 10 insertions(+), 37 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 06412ba46aa3..8b066480224b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -121,7 +121,6 @@ module_param_named(pml, enable_pml, bool, S_IRUGO);
 
 #define MSR_BITMAP_MODE_X2APIC		1
 #define MSR_BITMAP_MODE_X2APIC_APICV	2
-#define MSR_BITMAP_MODE_LM		4
 
 #define KVM_VMX_TSC_MULTIPLIER_MAX     0xffffffffffffffffULL
 
@@ -2899,8 +2898,7 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
 		vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
 	}
 
-	if (is_long_mode(&vmx->vcpu))
-		wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+	wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
 #else
 	savesegment(fs, fs_sel);
 	savesegment(gs, gs_sel);
@@ -2951,8 +2949,7 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
 	vmx->loaded_cpu_state = NULL;
 
 #ifdef CONFIG_X86_64
-	if (is_long_mode(&vmx->vcpu))
-		rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+	rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
 #endif
 	if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
 		kvm_load_ldt(host_state->ldt_sel);
@@ -2980,24 +2977,19 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
 #ifdef CONFIG_X86_64
 static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
 {
-	if (is_long_mode(&vmx->vcpu)) {
-		preempt_disable();
-		if (vmx->loaded_cpu_state)
-			rdmsrl(MSR_KERNEL_GS_BASE,
-			       vmx->msr_guest_kernel_gs_base);
-		preempt_enable();
-	}
+	preempt_disable();
+	if (vmx->loaded_cpu_state)
+		rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+	preempt_enable();
 	return vmx->msr_guest_kernel_gs_base;
 }
 
 static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
 {
-	if (is_long_mode(&vmx->vcpu)) {
-		preempt_disable();
-		if (vmx->loaded_cpu_state)
-			wrmsrl(MSR_KERNEL_GS_BASE, data);
-		preempt_enable();
-	}
+	preempt_disable();
+	if (vmx->loaded_cpu_state)
+		wrmsrl(MSR_KERNEL_GS_BASE, data);
+	preempt_enable();
 	vmx->msr_guest_kernel_gs_base = data;
 }
 #endif
@@ -5073,19 +5065,6 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 	if (!msr)
 		return;
 
-	/*
-	 * MSR_KERNEL_GS_BASE is not intercepted when the guest is in
-	 * 64-bit mode as a 64-bit kernel may frequently access the
-	 * MSR.  This means we need to manually save/restore the MSR
-	 * when switching between guest and host state, but only if
-	 * the guest is in 64-bit mode.  Sync our cached value if the
-	 * guest is transitioning to 32-bit mode and the CPU contains
-	 * guest state, i.e. the cache is stale.
-	 */
-#ifdef CONFIG_X86_64
-	if (!(efer & EFER_LMA))
-		(void)vmx_read_guest_kernel_gs_base(vmx);
-#endif
 	vcpu->arch.efer = efer;
 	if (efer & EFER_LMA) {
 		vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
@@ -6078,9 +6057,6 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
 			mode |= MSR_BITMAP_MODE_X2APIC_APICV;
 	}
 
-	if (is_long_mode(vcpu))
-		mode |= MSR_BITMAP_MODE_LM;
-
 	return mode;
 }
 
@@ -6121,9 +6097,6 @@ static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
 	if (!changed)
 		return;
 
-	vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW,
-				  !(mode & MSR_BITMAP_MODE_LM));
-
 	if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
 		vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
 

From 469ed9d823b7d240d6b9574f061ded7c3834c167 Mon Sep 17 00:00:00 2001
From: Steve Capper <steve.capper@arm.com>
Date: Fri, 21 Sep 2018 16:34:04 +0100
Subject: [PATCH 208/499] arm64: hugetlb: Fix handling of young ptes

In the contiguous bit hugetlb break-before-make code we assume that all
hugetlb pages are young.

In fact, remove_migration_pte is able to place an old hugetlb pte so
this assumption is not valid.

This patch fixes the contiguous hugetlb scanning code to preserve young
ptes.

Fixes: d8bdcff28764 ("arm64: hugetlb: Add break-before-make logic for contiguous entries")
Signed-off-by: Steve Capper <steve.capper@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/hugetlbpage.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 192b3ba07075..f85be2f8b140 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -117,11 +117,14 @@ static pte_t get_clear_flush(struct mm_struct *mm,
 
 		/*
 		 * If HW_AFDBM is enabled, then the HW could turn on
-		 * the dirty bit for any page in the set, so check
-		 * them all.  All hugetlb entries are already young.
+		 * the dirty or accessed bit for any page in the set,
+		 * so check them all.
 		 */
 		if (pte_dirty(pte))
 			orig_pte = pte_mkdirty(orig_pte);
+
+		if (pte_young(pte))
+			orig_pte = pte_mkyoung(orig_pte);
 	}
 
 	if (valid) {
@@ -340,10 +343,13 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 	if (!pte_same(orig_pte, pte))
 		changed = 1;
 
-	/* Make sure we don't lose the dirty state */
+	/* Make sure we don't lose the dirty or young state */
 	if (pte_dirty(orig_pte))
 		pte = pte_mkdirty(pte);
 
+	if (pte_young(orig_pte))
+		pte = pte_mkyoung(pte);
+
 	hugeprot = pte_pgprot(pte);
 	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
 		set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot));

From 031e6e6b4e1277e76e73a6ab209095ad9bf3ce52 Mon Sep 17 00:00:00 2001
From: Steve Capper <steve.capper@arm.com>
Date: Fri, 21 Sep 2018 16:34:05 +0100
Subject: [PATCH 209/499] arm64: hugetlb: Avoid unnecessary clearing in
 huge_ptep_set_access_flags

For contiguous hugetlb, huge_ptep_set_access_flags performs a
get_clear_flush (which then flushes the TLBs) even when no change of ptes
is necessary.

Unfortunately, this behaviour can lead to back-to-back page faults being
generated when running with multiple threads that access the same
contiguous huge page.

Thread 1                     |  Thread 2
-----------------------------+------------------------------
hugetlb_fault                |
huge_ptep_set_access_flags   |
  -> invalidate pte range    | hugetlb_fault
continue processing          | wait for hugetlb_fault_mutex
release mutex and return     | huge_ptep_set_access_flags
                             |   -> invalidate pte range
hugetlb_fault
...

This patch changes huge_ptep_set_access_flags s.t. we first read the
contiguous range of ptes (whilst preserving dirty information); the pte
range is only then invalidated where necessary and this prevents further
spurious page faults.

Fixes: d8bdcff28764 ("arm64: hugetlb: Add break-before-make logic for contiguous entries")
Reported-by: Lei Zhang <zhang.lei@jp.fujitsu.com>
Signed-off-by: Steve Capper <steve.capper@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/hugetlbpage.c | 38 +++++++++++++++++++++++++++++++++----
 1 file changed, 34 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index f85be2f8b140..f58ea503ad01 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -323,11 +323,40 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 	return get_clear_flush(mm, addr, ptep, pgsize, ncontig);
 }
 
+/*
+ * huge_ptep_set_access_flags will update access flags (dirty, accesssed)
+ * and write permission.
+ *
+ * For a contiguous huge pte range we need to check whether or not write
+ * permission has to change only on the first pte in the set. Then for
+ * all the contiguous ptes we need to check whether or not there is a
+ * discrepancy between dirty or young.
+ */
+static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig)
+{
+	int i;
+
+	if (pte_write(pte) != pte_write(huge_ptep_get(ptep)))
+		return 1;
+
+	for (i = 0; i < ncontig; i++) {
+		pte_t orig_pte = huge_ptep_get(ptep + i);
+
+		if (pte_dirty(pte) != pte_dirty(orig_pte))
+			return 1;
+
+		if (pte_young(pte) != pte_young(orig_pte))
+			return 1;
+	}
+
+	return 0;
+}
+
 int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 			       unsigned long addr, pte_t *ptep,
 			       pte_t pte, int dirty)
 {
-	int ncontig, i, changed = 0;
+	int ncontig, i;
 	size_t pgsize = 0;
 	unsigned long pfn = pte_pfn(pte), dpfn;
 	pgprot_t hugeprot;
@@ -339,9 +368,10 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 	ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
 	dpfn = pgsize >> PAGE_SHIFT;
 
+	if (!__cont_access_flags_changed(ptep, pte, ncontig))
+		return 0;
+
 	orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
-	if (!pte_same(orig_pte, pte))
-		changed = 1;
 
 	/* Make sure we don't lose the dirty or young state */
 	if (pte_dirty(orig_pte))
@@ -354,7 +384,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 	for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
 		set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot));
 
-	return changed;
+	return 1;
 }
 
 void huge_ptep_set_wrprotect(struct mm_struct *mm,

From 3ab97942d0213b6583a5408630a8cbbfbf54730f Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 19 Sep 2018 17:14:01 -0700
Subject: [PATCH 210/499] ARM: dts: BCM63xx: Fix incorrect interrupt specifiers

A number of our interrupts were incorrectly specified, fix both the PPI
and SPI interrupts to be correct.

Fixes: b5762cacc411 ("ARM: bcm63138: add NAND DT support")
Fixes: 46d4bca0445a ("ARM: BCM63XX: add BCM63138 minimal Device Tree")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 arch/arm/boot/dts/bcm63138.dtsi | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/arch/arm/boot/dts/bcm63138.dtsi b/arch/arm/boot/dts/bcm63138.dtsi
index 43ee992ccdcf..6df61518776f 100644
--- a/arch/arm/boot/dts/bcm63138.dtsi
+++ b/arch/arm/boot/dts/bcm63138.dtsi
@@ -106,21 +106,23 @@ gic: interrupt-controller@1e100 {
 		global_timer: timer@1e200 {
 			compatible = "arm,cortex-a9-global-timer";
 			reg = <0x1e200 0x20>;
-			interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>;
 			clocks = <&axi_clk>;
 		};
 
 		local_timer: local-timer@1e600 {
 			compatible = "arm,cortex-a9-twd-timer";
 			reg = <0x1e600 0x20>;
-			interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) |
+						  IRQ_TYPE_EDGE_RISING)>;
 			clocks = <&axi_clk>;
 		};
 
 		twd_watchdog: watchdog@1e620 {
 			compatible = "arm,cortex-a9-twd-wdt";
 			reg = <0x1e620 0x20>;
-			interrupts = <GIC_PPI 14 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) |
+						  IRQ_TYPE_LEVEL_HIGH)>;
 		};
 
 		armpll: armpll {
@@ -158,7 +160,7 @@ timer: timer@80 {
 		serial0: serial@600 {
 			compatible = "brcm,bcm6345-uart";
 			reg = <0x600 0x1b>;
-			interrupts = <GIC_SPI 32 0>;
+			interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>;
 			clocks = <&periph_clk>;
 			clock-names = "periph";
 			status = "disabled";
@@ -167,7 +169,7 @@ serial0: serial@600 {
 		serial1: serial@620 {
 			compatible = "brcm,bcm6345-uart";
 			reg = <0x620 0x1b>;
-			interrupts = <GIC_SPI 33 0>;
+			interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
 			clocks = <&periph_clk>;
 			clock-names = "periph";
 			status = "disabled";
@@ -180,7 +182,7 @@ nand: nand@2000 {
 			reg = <0x2000 0x600>, <0xf0 0x10>;
 			reg-names = "nand", "nand-int-base";
 			status = "disabled";
-			interrupts = <GIC_SPI 38 0>;
+			interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
 			interrupt-names = "nand";
 		};
 

From d8e2262a5044c1a05b4da51e5d0976f10c487a9f Mon Sep 17 00:00:00 2001
From: Saif Hasan <has@fb.com>
Date: Fri, 21 Sep 2018 14:30:05 -0700
Subject: [PATCH 211/499] mpls: allow routes on ip6gre devices

Summary:

This appears to be necessary and sufficient change to enable `MPLS` on
`ip6gre` tunnels (RFC4023).

This diff allows IP6GRE devices to be recognized by MPLS kernel module
and hence user can configure interface to accept packets with mpls
headers as well setup mpls routes on them.

Test Plan:

Test plan consists of multiple containers connected via GRE-V6 tunnel.
Then carrying out testing steps as below.

- Carry out necessary sysctl settings on all containers

```
sysctl -w net.mpls.platform_labels=65536
sysctl -w net.mpls.ip_ttl_propagate=1
sysctl -w net.mpls.conf.lo.input=1
```

- Establish IP6GRE tunnels

```
ip -6 tunnel add name if_1_2_1 mode ip6gre \
  local 2401:db00:21:6048:feed:0::1 \
  remote 2401:db00:21:6048:feed:0::2 key 1
ip link set dev if_1_2_1 up
sysctl -w net.mpls.conf.if_1_2_1.input=1
ip -4 addr add 169.254.0.2/31 dev if_1_2_1 scope link

ip -6 tunnel add name if_1_3_1 mode ip6gre \
  local 2401:db00:21:6048:feed:0::1 \
  remote 2401:db00:21:6048:feed:0::3 key 1
ip link set dev if_1_3_1 up
sysctl -w net.mpls.conf.if_1_3_1.input=1
ip -4 addr add 169.254.0.4/31 dev if_1_3_1 scope link
```

- Install MPLS encap rules on node-1 towards node-2

```
ip route add 192.168.0.11/32 nexthop encap mpls 32/64 \
  via inet 169.254.0.3 dev if_1_2_1
```

- Install MPLS forwarding rules on node-2 and node-3
```
// node2
ip -f mpls route add 32 via inet 169.254.0.7 dev if_2_4_1

// node3
ip -f mpls route add 64 via inet 169.254.0.12 dev if_4_3_1
```

- Ping 192.168.0.11 (node4) from 192.168.0.1 (node1) (where routing
  towards 192.168.0.1 is via IP route directly towards node1 from node4)
```
ping 192.168.0.11
```

- tcpdump on interface to capture ping packets wrapped within MPLS
  header which inturn wrapped within IP6GRE header

```
16:43:41.121073 IP6
  2401:db00:21:6048:feed::1 > 2401:db00:21:6048:feed::2:
  DSTOPT GREv0, key=0x1, length 100:
  MPLS (label 32, exp 0, ttl 255) (label 64, exp 0, [S], ttl 255)
  IP 192.168.0.1 > 192.168.0.11:
  ICMP echo request, id 1208, seq 45, length 64

0x0000:  6000 2cdb 006c 3c3f 2401 db00 0021 6048  `.,..l<?$....!`H
0x0010:  feed 0000 0000 0001 2401 db00 0021 6048  ........$....!`H
0x0020:  feed 0000 0000 0002 2f00 0401 0401 0100  ......../.......
0x0030:  2000 8847 0000 0001 0002 00ff 0004 01ff  ...G............
0x0040:  4500 0054 3280 4000 ff01 c7cb c0a8 0001  E..T2.@.........
0x0050:  c0a8 000b 0800 a8d7 04b8 002d 2d3c a05b  ...........--<.[
0x0060:  0000 0000 bcd8 0100 0000 0000 1011 1213  ................
0x0070:  1415 1617 1819 1a1b 1c1d 1e1f 2021 2223  .............!"#
0x0080:  2425 2627 2829 2a2b 2c2d 2e2f 3031 3233  $%&'()*+,-./0123
0x0090:  3435 3637                                4567
```

Signed-off-by: Saif Hasan <has@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mpls/af_mpls.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 7a4de6d618b1..8fbe6cdbe255 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1533,10 +1533,14 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
 	unsigned int flags;
 
 	if (event == NETDEV_REGISTER) {
-		/* For now just support Ethernet, IPGRE, SIT and IPIP devices */
+
+		/* For now just support Ethernet, IPGRE, IP6GRE, SIT and
+		 * IPIP devices
+		 */
 		if (dev->type == ARPHRD_ETHER ||
 		    dev->type == ARPHRD_LOOPBACK ||
 		    dev->type == ARPHRD_IPGRE ||
+		    dev->type == ARPHRD_IP6GRE ||
 		    dev->type == ARPHRD_SIT ||
 		    dev->type == ARPHRD_TUNNEL) {
 			mdev = mpls_add_dev(dev);

From ccfec9e5cb2d48df5a955b7bf47f7782157d3bc2 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 24 Sep 2018 15:48:19 +0200
Subject: [PATCH 212/499] ip_tunnel: be careful when accessing the inner header

Cong noted that we need the same checks introduced by commit 76c0ddd8c3a6
("ip6_tunnel: be careful when accessing the inner header")
even for ipv4 tunnels.

Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.")
Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_tunnel.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index c4f5602308ed..284a22154b4e 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -627,6 +627,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 		    const struct iphdr *tnl_params, u8 protocol)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
+	unsigned int inner_nhdr_len = 0;
 	const struct iphdr *inner_iph;
 	struct flowi4 fl4;
 	u8     tos, ttl;
@@ -636,6 +637,14 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 	__be32 dst;
 	bool connected;
 
+	/* ensure we can access the inner net header, for several users below */
+	if (skb->protocol == htons(ETH_P_IP))
+		inner_nhdr_len = sizeof(struct iphdr);
+	else if (skb->protocol == htons(ETH_P_IPV6))
+		inner_nhdr_len = sizeof(struct ipv6hdr);
+	if (unlikely(!pskb_may_pull(skb, inner_nhdr_len)))
+		goto tx_error;
+
 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
 	connected = (tunnel->parms.iph.daddr != 0);
 

From f4a518797b40e956b6e6220b42657e045dc24470 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@bootlin.com>
Date: Mon, 24 Sep 2018 16:56:13 +0200
Subject: [PATCH 213/499] net: mvneta: fix the remaining Rx descriptor
 unmapping issues

With CONFIG_DMA_API_DEBUG enabled we get DMA unmapping warning in
various places of the mvneta driver, for example when putting down an
interface while traffic is passing through.

The issue is when using s/w buffer management, the Rx buffers are mapped
using dma_map_page but unmapped with dma_unmap_single. This patch fixes
this by using the right unmapping function.

Fixes: 562e2f467e71 ("net: mvneta: Improve the buffer allocation method for SWBM")
Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
Reviewed-by: Gregory CLEMENT <gregory.clement@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvneta.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 2db9708f2e24..b4ed7d394d07 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1890,8 +1890,8 @@ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp,
 		if (!data || !(rx_desc->buf_phys_addr))
 			continue;
 
-		dma_unmap_single(pp->dev->dev.parent, rx_desc->buf_phys_addr,
-				 MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE);
+		dma_unmap_page(pp->dev->dev.parent, rx_desc->buf_phys_addr,
+			       PAGE_SIZE, DMA_FROM_DEVICE);
 		__free_page(data);
 	}
 }
@@ -2039,9 +2039,8 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
 						frag_offset, frag_size,
 						PAGE_SIZE);
 
-				dma_unmap_single(dev->dev.parent, phys_addr,
-						 PAGE_SIZE,
-						 DMA_FROM_DEVICE);
+				dma_unmap_page(dev->dev.parent, phys_addr,
+					       PAGE_SIZE, DMA_FROM_DEVICE);
 
 				rxq->left_size -= frag_size;
 			}

From 57a489786de9ec37d6e25ef1305dc337047f0236 Mon Sep 17 00:00:00 2001
From: James Cowgill <jcowgill@debian.org>
Date: Thu, 6 Sep 2018 22:57:56 +0100
Subject: [PATCH 214/499] RISC-V: include linux/ftrace.h in asm-prototypes.h

Building a riscv kernel with CONFIG_FUNCTION_TRACER and
CONFIG_MODVERSIONS enabled results in these two warnings:

  MODPOST vmlinux.o
WARNING: EXPORT symbol "return_to_handler" [vmlinux] version generation failed, symbol will not be versioned.
WARNING: EXPORT symbol "_mcount" [vmlinux] version generation failed, symbol will not be versioned.

When exporting symbols from an assembly file, the MODVERSIONS code
requires their prototypes to be defined in asm-prototypes.h (see
scripts/Makefile.build). Since both of these symbols have prototypes
defined in linux/ftrace.h, include this header from RISC-V's
asm-prototypes.h.

Reported-by: Karsten Merker <merker@debian.org>
Signed-off-by: James Cowgill <jcowgill@debian.org>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/include/asm/asm-prototypes.h | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 arch/riscv/include/asm/asm-prototypes.h

diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h
new file mode 100644
index 000000000000..c9fecd120d18
--- /dev/null
+++ b/arch/riscv/include/asm/asm-prototypes.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RISCV_PROTOTYPES_H
+
+#include <linux/ftrace.h>
+#include <asm-generic/asm-prototypes.h>
+
+#endif /* _ASM_RISCV_PROTOTYPES_H */

From 6697576788816985f9c79da190abfaad7c9e1738 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Thu, 20 Sep 2018 11:03:22 -0700
Subject: [PATCH 215/499] i2c: i2c-qcom-geni: Properly handle DMA safe buffers

We shouldn't attempt to DMA map the message buffers passed into this
driver from the i2c core unless the message we're mapping have been
properly setup for DMA. The i2c core indicates such a situation by
setting the I2C_M_DMA_SAFE flag, so check for that flag before using DMA
mode. We can also bounce the buffer if it isn't already mapped properly
by using the i2c_get_dma_safe_msg_buf() APIs, so do that when we
want to use DMA for a message.

This fixes a problem where the kernel oopses cleaning pages for a buffer
that's mapped into the vmalloc space. The pages are returned from
request_firmware() and passed down directly to the i2c master to write
to the i2c touchscreen device. Mapping vmalloc buffers with
dma_map_single() won't work reliably, causing an oops like below:

 Unable to handle kernel paging request at virtual address ffffffc01391d000
 ...

Reported-by: Philip Chen <philipchen@chromium.org>
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-qcom-geni.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c
index 36732eb688a4..9f2eb02481d3 100644
--- a/drivers/i2c/busses/i2c-qcom-geni.c
+++ b/drivers/i2c/busses/i2c-qcom-geni.c
@@ -367,20 +367,26 @@ static int geni_i2c_rx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg,
 	dma_addr_t rx_dma;
 	enum geni_se_xfer_mode mode;
 	unsigned long time_left = XFER_TIMEOUT;
+	void *dma_buf;
 
 	gi2c->cur = msg;
-	mode = msg->len > 32 ? GENI_SE_DMA : GENI_SE_FIFO;
+	mode = GENI_SE_FIFO;
+	dma_buf = i2c_get_dma_safe_msg_buf(msg, 32);
+	if (dma_buf)
+		mode = GENI_SE_DMA;
+
 	geni_se_select_mode(&gi2c->se, mode);
 	writel_relaxed(msg->len, gi2c->se.base + SE_I2C_RX_TRANS_LEN);
 	geni_se_setup_m_cmd(&gi2c->se, I2C_READ, m_param);
 	if (mode == GENI_SE_DMA) {
 		int ret;
 
-		ret = geni_se_rx_dma_prep(&gi2c->se, msg->buf, msg->len,
+		ret = geni_se_rx_dma_prep(&gi2c->se, dma_buf, msg->len,
 								&rx_dma);
 		if (ret) {
 			mode = GENI_SE_FIFO;
 			geni_se_select_mode(&gi2c->se, mode);
+			i2c_put_dma_safe_msg_buf(dma_buf, msg, false);
 		}
 	}
 
@@ -393,6 +399,7 @@ static int geni_i2c_rx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg,
 		if (gi2c->err)
 			geni_i2c_rx_fsm_rst(gi2c);
 		geni_se_rx_dma_unprep(&gi2c->se, rx_dma, msg->len);
+		i2c_put_dma_safe_msg_buf(dma_buf, msg, !gi2c->err);
 	}
 	return gi2c->err;
 }
@@ -403,20 +410,26 @@ static int geni_i2c_tx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg,
 	dma_addr_t tx_dma;
 	enum geni_se_xfer_mode mode;
 	unsigned long time_left;
+	void *dma_buf;
 
 	gi2c->cur = msg;
-	mode = msg->len > 32 ? GENI_SE_DMA : GENI_SE_FIFO;
+	mode = GENI_SE_FIFO;
+	dma_buf = i2c_get_dma_safe_msg_buf(msg, 32);
+	if (dma_buf)
+		mode = GENI_SE_DMA;
+
 	geni_se_select_mode(&gi2c->se, mode);
 	writel_relaxed(msg->len, gi2c->se.base + SE_I2C_TX_TRANS_LEN);
 	geni_se_setup_m_cmd(&gi2c->se, I2C_WRITE, m_param);
 	if (mode == GENI_SE_DMA) {
 		int ret;
 
-		ret = geni_se_tx_dma_prep(&gi2c->se, msg->buf, msg->len,
+		ret = geni_se_tx_dma_prep(&gi2c->se, dma_buf, msg->len,
 								&tx_dma);
 		if (ret) {
 			mode = GENI_SE_FIFO;
 			geni_se_select_mode(&gi2c->se, mode);
+			i2c_put_dma_safe_msg_buf(dma_buf, msg, false);
 		}
 	}
 
@@ -432,6 +445,7 @@ static int geni_i2c_tx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg,
 		if (gi2c->err)
 			geni_i2c_tx_fsm_rst(gi2c);
 		geni_se_tx_dma_unprep(&gi2c->se, tx_dma, msg->len);
+		i2c_put_dma_safe_msg_buf(dma_buf, msg, !gi2c->err);
 	}
 	return gi2c->err;
 }

From ea51e17b953f391c0a7d6684141e76a5ab209fef Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 24 Sep 2018 18:14:37 +0100
Subject: [PATCH 216/499] i2c: i2c-isch: fix spelling mistake "unitialized" ->
 "uninitialized"

Trivial fix to spelling mistake in dev_notice message.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-isch.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-isch.c b/drivers/i2c/busses/i2c-isch.c
index 0cf1379f4e80..5c754bf659e2 100644
--- a/drivers/i2c/busses/i2c-isch.c
+++ b/drivers/i2c/busses/i2c-isch.c
@@ -164,7 +164,7 @@ static s32 sch_access(struct i2c_adapter *adap, u16 addr,
 		 * run ~75 kHz instead which should do no harm.
 		 */
 		dev_notice(&sch_adapter.dev,
-			"Clock divider unitialized. Setting defaults\n");
+			"Clock divider uninitialized. Setting defaults\n");
 		outw(backbone_speed / (4 * 100), SMBHSTCLK);
 	}
 

From 74bc2abca7603c956d1e331e8b9bee7b874c1eec Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Mon, 27 Aug 2018 12:56:24 +0200
Subject: [PATCH 217/499] iommu/rockchip: Free irqs in shutdown handler

In the iommu's shutdown handler we disable runtime-pm which could
result in the irq-handler running unclocked and since commit
    3fc7c5c0cff3 ("iommu/rockchip: Handle errors returned from PM framework")
we warn about that fact.

This can cause warnings on shutdown on some Rockchip machines, so
free the irqs in the shutdown handler before we disable runtime-pm.

Reported-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Fixes: 3fc7c5c0cff3 ("iommu/rockchip: Handle errors returned from PM framework")
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Tested-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/rockchip-iommu.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 258115b10fa9..ad3e2b97469e 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1241,6 +1241,12 @@ static int rk_iommu_probe(struct platform_device *pdev)
 
 static void rk_iommu_shutdown(struct platform_device *pdev)
 {
+	struct rk_iommu *iommu = platform_get_drvdata(pdev);
+	int i = 0, irq;
+
+	while ((irq = platform_get_irq(pdev, i++)) != -ENXIO)
+		devm_free_irq(iommu->dev, irq, iommu);
+
 	pm_runtime_force_suspend(&pdev->dev);
 }
 

From b85bfa246efd24ea3fdb5ee949c28e3110c6d299 Mon Sep 17 00:00:00 2001
From: Daniel Kurtz <djkurtz@chromium.org>
Date: Sat, 22 Sep 2018 13:58:26 -0600
Subject: [PATCH 218/499] pinctrl/amd: poll InterruptEnable bits in
 amd_gpio_irq_set_type

From the AMD BKDG, if WAKE_INT_MASTER_REG.MaskStsEn is set, a software
write to the debounce registers of *any* gpio will block wake/interrupt
status generation for *all* gpios for a length of time that depends on
WAKE_INT_MASTER_REG.MaskStsLength[11:0].  During this period the Interrupt
Delivery bit (INTERRUPT_ENABLE) will read as 0.

In commit 4c1de0414a1340 ("pinctrl/amd: poll InterruptEnable bits in
enable_irq") we tried to fix this same "gpio Interrupts are blocked
immediately after writing debounce registers" problem, but incorrectly
assumed it only affected the gpio whose debounce was being configured
and not ALL gpios.

To solve this for all gpios, we move the polling loop from
amd_gpio_irq_enable() to amd_gpio_irq_set_type(), while holding the gpio
spinlock.  This ensures that another gpio operation (e.g.
amd_gpio_irq_unmask()) can read a temporarily disabled IRQ and
incorrectly disable it while trying to modify some other register bits.

Fixes: 4c1de0414a1340 pinctrl/amd: poll InterruptEnable bits in enable_irq
Signed-off-by: Daniel Kurtz <djkurtz@chromium.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinctrl-amd.c | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
index 41ccc759b8b8..1425c2874d40 100644
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c
@@ -348,21 +348,12 @@ static void amd_gpio_irq_enable(struct irq_data *d)
 	unsigned long flags;
 	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
 	struct amd_gpio *gpio_dev = gpiochip_get_data(gc);
-	u32 mask = BIT(INTERRUPT_ENABLE_OFF) | BIT(INTERRUPT_MASK_OFF);
 
 	raw_spin_lock_irqsave(&gpio_dev->lock, flags);
 	pin_reg = readl(gpio_dev->base + (d->hwirq)*4);
 	pin_reg |= BIT(INTERRUPT_ENABLE_OFF);
 	pin_reg |= BIT(INTERRUPT_MASK_OFF);
 	writel(pin_reg, gpio_dev->base + (d->hwirq)*4);
-	/*
-	 * When debounce logic is enabled it takes ~900 us before interrupts
-	 * can be enabled.  During this "debounce warm up" period the
-	 * "INTERRUPT_ENABLE" bit will read as 0. Poll the bit here until it
-	 * reads back as 1, signaling that interrupts are now enabled.
-	 */
-	while ((readl(gpio_dev->base + (d->hwirq)*4) & mask) != mask)
-		continue;
 	raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
 }
 
@@ -426,7 +417,7 @@ static void amd_gpio_irq_eoi(struct irq_data *d)
 static int amd_gpio_irq_set_type(struct irq_data *d, unsigned int type)
 {
 	int ret = 0;
-	u32 pin_reg;
+	u32 pin_reg, pin_reg_irq_en, mask;
 	unsigned long flags, irq_flags;
 	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
 	struct amd_gpio *gpio_dev = gpiochip_get_data(gc);
@@ -495,6 +486,28 @@ static int amd_gpio_irq_set_type(struct irq_data *d, unsigned int type)
 	}
 
 	pin_reg |= CLR_INTR_STAT << INTERRUPT_STS_OFF;
+	/*
+	 * If WAKE_INT_MASTER_REG.MaskStsEn is set, a software write to the
+	 * debounce registers of any GPIO will block wake/interrupt status
+	 * generation for *all* GPIOs for a lenght of time that depends on
+	 * WAKE_INT_MASTER_REG.MaskStsLength[11:0].  During this period the
+	 * INTERRUPT_ENABLE bit will read as 0.
+	 *
+	 * We temporarily enable irq for the GPIO whose configuration is
+	 * changing, and then wait for it to read back as 1 to know when
+	 * debounce has settled and then disable the irq again.
+	 * We do this polling with the spinlock held to ensure other GPIO
+	 * access routines do not read an incorrect value for the irq enable
+	 * bit of other GPIOs.  We keep the GPIO masked while polling to avoid
+	 * spurious irqs, and disable the irq again after polling.
+	 */
+	mask = BIT(INTERRUPT_ENABLE_OFF);
+	pin_reg_irq_en = pin_reg;
+	pin_reg_irq_en |= mask;
+	pin_reg_irq_en &= ~BIT(INTERRUPT_MASK_OFF);
+	writel(pin_reg_irq_en, gpio_dev->base + (d->hwirq)*4);
+	while ((readl(gpio_dev->base + (d->hwirq)*4) & mask) != mask)
+		continue;
 	writel(pin_reg, gpio_dev->base + (d->hwirq)*4);
 	raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
 

From e50d95e2ad1266f8d3fcdf0724f03dbdffd400aa Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 24 Sep 2018 17:32:11 +0300
Subject: [PATCH 219/499] pinctrl: cannonlake: Fix HOSTSW_OWN register offset
 of H variant

It turns out the HOSTSW_OWN register offset is different between LP and
H variants. The latter should use 0xc0 instead so fix that.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=199911
Fixes: a663ccf0fea1 ("pinctrl: intel: Add Intel Cannon Lake PCH-H pin controller support")
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/intel/pinctrl-cannonlake.c | 33 +++++++++++++---------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/drivers/pinctrl/intel/pinctrl-cannonlake.c b/drivers/pinctrl/intel/pinctrl-cannonlake.c
index 8d48371caaa2..e7f45d96b0cb 100644
--- a/drivers/pinctrl/intel/pinctrl-cannonlake.c
+++ b/drivers/pinctrl/intel/pinctrl-cannonlake.c
@@ -15,10 +15,11 @@
 
 #include "pinctrl-intel.h"
 
-#define CNL_PAD_OWN	0x020
-#define CNL_PADCFGLOCK	0x080
-#define CNL_HOSTSW_OWN	0x0b0
-#define CNL_GPI_IE	0x120
+#define CNL_PAD_OWN		0x020
+#define CNL_PADCFGLOCK		0x080
+#define CNL_LP_HOSTSW_OWN	0x0b0
+#define CNL_H_HOSTSW_OWN	0x0c0
+#define CNL_GPI_IE		0x120
 
 #define CNL_GPP(r, s, e, g)				\
 	{						\
@@ -30,12 +31,12 @@
 
 #define CNL_NO_GPIO	-1
 
-#define CNL_COMMUNITY(b, s, e, g)			\
+#define CNL_COMMUNITY(b, s, e, o, g)			\
 	{						\
 		.barno = (b),				\
 		.padown_offset = CNL_PAD_OWN,		\
 		.padcfglock_offset = CNL_PADCFGLOCK,	\
-		.hostown_offset = CNL_HOSTSW_OWN,	\
+		.hostown_offset = (o),			\
 		.ie_offset = CNL_GPI_IE,		\
 		.pin_base = (s),			\
 		.npins = ((e) - (s) + 1),		\
@@ -43,6 +44,12 @@
 		.ngpps = ARRAY_SIZE(g),			\
 	}
 
+#define CNLLP_COMMUNITY(b, s, e, g)			\
+	CNL_COMMUNITY(b, s, e, CNL_LP_HOSTSW_OWN, g)
+
+#define CNLH_COMMUNITY(b, s, e, g)			\
+	CNL_COMMUNITY(b, s, e, CNL_H_HOSTSW_OWN, g)
+
 /* Cannon Lake-H */
 static const struct pinctrl_pin_desc cnlh_pins[] = {
 	/* GPP_A */
@@ -442,10 +449,10 @@ static const struct intel_function cnlh_functions[] = {
 };
 
 static const struct intel_community cnlh_communities[] = {
-	CNL_COMMUNITY(0, 0, 50, cnlh_community0_gpps),
-	CNL_COMMUNITY(1, 51, 154, cnlh_community1_gpps),
-	CNL_COMMUNITY(2, 155, 248, cnlh_community3_gpps),
-	CNL_COMMUNITY(3, 249, 298, cnlh_community4_gpps),
+	CNLH_COMMUNITY(0, 0, 50, cnlh_community0_gpps),
+	CNLH_COMMUNITY(1, 51, 154, cnlh_community1_gpps),
+	CNLH_COMMUNITY(2, 155, 248, cnlh_community3_gpps),
+	CNLH_COMMUNITY(3, 249, 298, cnlh_community4_gpps),
 };
 
 static const struct intel_pinctrl_soc_data cnlh_soc_data = {
@@ -803,9 +810,9 @@ static const struct intel_padgroup cnllp_community4_gpps[] = {
 };
 
 static const struct intel_community cnllp_communities[] = {
-	CNL_COMMUNITY(0, 0, 67, cnllp_community0_gpps),
-	CNL_COMMUNITY(1, 68, 180, cnllp_community1_gpps),
-	CNL_COMMUNITY(2, 181, 243, cnllp_community4_gpps),
+	CNLLP_COMMUNITY(0, 0, 67, cnllp_community0_gpps),
+	CNLLP_COMMUNITY(1, 68, 180, cnllp_community1_gpps),
+	CNLLP_COMMUNITY(2, 181, 243, cnllp_community4_gpps),
 };
 
 static const struct intel_pinctrl_soc_data cnllp_soc_data = {

From 72923e5488f0604fac8ef2c7e683fabd3b4c203b Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 24 Sep 2018 17:32:12 +0300
Subject: [PATCH 220/499] Revert "pinctrl: intel: Do pin translation when lock
 IRQ"

This reverts commit 55aedef50d4d810670916d9fce4a40d5da2079e7.

Commit 55aedef50d4d ("pinctrl: intel: Do pin translation when lock IRQ")
added special translation from GPIO number to hardware pin number to
irq_reqres/relres hooks to avoid failure when IRQs are requested. The
actual failure happened inside gpiochip_lock_as_irq() because it calls
gpiod_get_direction() and pinctrl-intel.c::intel_gpio_get_direction()
implementation originally missed the translation so the two hooks made
it work by skipping the ->get_direction() call entirely (it overwrote
the default GPIOLIB provided functions).

The proper fix that adds translation to GPIO callbacks was merged with
commit 96147db1e1df ("pinctrl: intel: Do pin translation in other GPIO
operations as well"). This allows us to use the default GPIOLIB provided
functions again.

In addition as find out by Benjamin Tissoires the two functions
(intel_gpio_irq_reqres()/intel_gpio_irq_relres()) now cause problems of
their own because they operate on pin numbers and pass that pin number
to gpiochip_lock_as_irq() which actually expects a GPIO number.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=199911
Fixes: 55aedef50d4d ("pinctrl: intel: Do pin translation when lock IRQ")
Reported-and-tested-by: Benjamin Tissoires <benjamin.tissoires@gmail.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/intel/pinctrl-intel.c | 32 ---------------------------
 1 file changed, 32 deletions(-)

diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c
index ec8dafc94694..1ea3438ea67e 100644
--- a/drivers/pinctrl/intel/pinctrl-intel.c
+++ b/drivers/pinctrl/intel/pinctrl-intel.c
@@ -887,36 +887,6 @@ static const struct gpio_chip intel_gpio_chip = {
 	.set_config = gpiochip_generic_config,
 };
 
-static int intel_gpio_irq_reqres(struct irq_data *d)
-{
-	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
-	struct intel_pinctrl *pctrl = gpiochip_get_data(gc);
-	int pin;
-	int ret;
-
-	pin = intel_gpio_to_pin(pctrl, irqd_to_hwirq(d), NULL, NULL);
-	if (pin >= 0) {
-		ret = gpiochip_lock_as_irq(gc, pin);
-		if (ret) {
-			dev_err(pctrl->dev, "unable to lock HW IRQ %d for IRQ\n",
-				pin);
-			return ret;
-		}
-	}
-	return 0;
-}
-
-static void intel_gpio_irq_relres(struct irq_data *d)
-{
-	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
-	struct intel_pinctrl *pctrl = gpiochip_get_data(gc);
-	int pin;
-
-	pin = intel_gpio_to_pin(pctrl, irqd_to_hwirq(d), NULL, NULL);
-	if (pin >= 0)
-		gpiochip_unlock_as_irq(gc, pin);
-}
-
 static void intel_gpio_irq_ack(struct irq_data *d)
 {
 	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
@@ -1132,8 +1102,6 @@ static irqreturn_t intel_gpio_irq(int irq, void *data)
 
 static struct irq_chip intel_gpio_irqchip = {
 	.name = "intel-gpio",
-	.irq_request_resources = intel_gpio_irq_reqres,
-	.irq_release_resources = intel_gpio_irq_relres,
 	.irq_enable = intel_gpio_irq_enable,
 	.irq_ack = intel_gpio_irq_ack,
 	.irq_mask = intel_gpio_irq_mask,

From 8c0f9f5b309d627182d5da72a69246f58bde1026 Mon Sep 17 00:00:00 2001
From: Lubomir Rintel <lkundrak@v3.sk>
Date: Mon, 24 Sep 2018 13:18:34 +0100
Subject: [PATCH 221/499] Revert "uapi/linux/keyctl.h: don't use C++ reserved
 keyword as a struct member name"

This changes UAPI, breaking iwd and libell:

  ell/key.c: In function 'kernel_dh_compute':
  ell/key.c:205:38: error: 'struct keyctl_dh_params' has no member named 'private'; did you mean 'dh_private'?
    struct keyctl_dh_params params = { .private = private,
                                        ^~~~~~~
                                        dh_private

This reverts commit 8a2336e549d385bb0b46880435b411df8d8200e8.

Fixes: 8a2336e549d3 ("uapi/linux/keyctl.h: don't use C++ reserved keyword as a struct member name")
Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Randy Dunlap <rdunlap@infradead.org>
cc: Mat Martineau <mathew.j.martineau@linux.intel.com>
cc: Stephan Mueller <smueller@chronox.de>
cc: James Morris <jmorris@namei.org>
cc: "Serge E. Hallyn" <serge@hallyn.com>
cc: Mat Martineau <mathew.j.martineau@linux.intel.com>
cc: Andrew Morton <akpm@linux-foundation.org>
cc: Linus Torvalds <torvalds@linux-foundation.org>
cc: <stable@vger.kernel.org>
Signed-off-by: James Morris <james.morris@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/keyctl.h | 2 +-
 security/keys/dh.c          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h
index 910cc4334b21..7b8c9e19bad1 100644
--- a/include/uapi/linux/keyctl.h
+++ b/include/uapi/linux/keyctl.h
@@ -65,7 +65,7 @@
 
 /* keyctl structures */
 struct keyctl_dh_params {
-	__s32 dh_private;
+	__s32 private;
 	__s32 prime;
 	__s32 base;
 };
diff --git a/security/keys/dh.c b/security/keys/dh.c
index 3b602a1e27fa..711e89d8c415 100644
--- a/security/keys/dh.c
+++ b/security/keys/dh.c
@@ -300,7 +300,7 @@ long __keyctl_dh_compute(struct keyctl_dh_params __user *params,
 	}
 	dh_inputs.g_size = dlen;
 
-	dlen = dh_data_from_key(pcopy.dh_private, &dh_inputs.key);
+	dlen = dh_data_from_key(pcopy.private, &dh_inputs.key);
 	if (dlen < 0) {
 		ret = dlen;
 		goto out2;

From be9e6598aeb0db70a7927d6b3bb4d3d6fb1c3e18 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Sat, 8 Sep 2018 09:42:53 +0800
Subject: [PATCH 222/499] iommu/vt-d: Handle memory shortage on pasid table
 allocation

Pasid table memory allocation could return failure due to memory
shortage. Limit the pasid table size to 1MiB because current 8MiB
contiguous physical memory allocation can be hard to come by. W/o
a PASID table, the device could continue to work with only shared
virtual memory impacted. So, let's go ahead with context mapping
even the memory allocation for pasid table failed.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107783
Fixes: cc580e41260d ("iommu/vt-d: Per PCI device pasid table interfaces")

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Reported-and-tested-by: Pelton Kyle D <kyle.d.pelton@intel.com>
Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 6 +++---
 drivers/iommu/intel-pasid.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 5f3f10cf9d9d..bedc801b06a0 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2540,9 +2540,9 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
 	if (dev && dev_is_pci(dev) && info->pasid_supported) {
 		ret = intel_pasid_alloc_table(dev);
 		if (ret) {
-			__dmar_remove_one_dev_info(info);
-			spin_unlock_irqrestore(&device_domain_lock, flags);
-			return NULL;
+			pr_warn("No pasid table for %s, pasid disabled\n",
+				dev_name(dev));
+			info->pasid_supported = 0;
 		}
 	}
 	spin_unlock_irqrestore(&device_domain_lock, flags);
diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h
index 1c05ed6fc5a5..1fb5e12b029a 100644
--- a/drivers/iommu/intel-pasid.h
+++ b/drivers/iommu/intel-pasid.h
@@ -11,7 +11,7 @@
 #define __INTEL_PASID_H
 
 #define PASID_MIN			0x1
-#define PASID_MAX			0x100000
+#define PASID_MAX			0x20000
 
 struct pasid_entry {
 	u64 val;

From cf13435b730a502e814c63c84d93db131e563f5f Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Mon, 24 Sep 2018 17:27:04 +1000
Subject: [PATCH 223/499] powerpc/tm: Fix userspace r13 corruption

When we treclaim we store the userspace checkpointed r13 to a scratch
SPR and then later save the scratch SPR to the user thread struct.

Unfortunately, this doesn't work as accessing the user thread struct
can take an SLB fault and the SLB fault handler will write the same
scratch SPRG that now contains the userspace r13.

To fix this, we store r13 to the kernel stack (which can't fault)
before we access the user thread struct.

Found by running P8 guest + powervm + disable_1tb_segments + TM. Seen
as a random userspace segfault with r13 looking like a kernel address.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Reviewed-by: Breno Leitao <leitao@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/tm.S | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 6bffbc5affe7..183e8d75936f 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -176,13 +176,20 @@ _GLOBAL(tm_reclaim)
 	std	r1, PACATMSCRATCH(r13)
 	ld	r1, PACAR1(r13)
 
-	/* Store the PPR in r11 and reset to decent value */
 	std	r11, GPR11(r1)			/* Temporary stash */
 
+	/*
+	 * Store r13 away so we can free up the scratch SPR for the SLB fault
+	 * handler (needed once we start accessing the thread_struct).
+	 */
+	GET_SCRATCH0(r11)
+	std	r11, GPR13(r1)
+
 	/* Reset MSR RI so we can take SLB faults again */
 	li	r11, MSR_RI
 	mtmsrd	r11, 1
 
+	/* Store the PPR in r11 and reset to decent value */
 	mfspr	r11, SPRN_PPR
 	HMT_MEDIUM
 
@@ -211,7 +218,7 @@ _GLOBAL(tm_reclaim)
 	ld	r4, GPR7(r1)			/* user r7 */
 	ld	r5, GPR11(r1)			/* user r11 */
 	ld	r6, GPR12(r1)			/* user r12 */
-	GET_SCRATCH0(8)				/* user r13 */
+	ld	r8, GPR13(r1)			/* user r13 */
 	std	r3, GPR1(r7)
 	std	r4, GPR7(r7)
 	std	r5, GPR11(r7)

From 96dc89d526ef77604376f06220e3d2931a0bfd58 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Tue, 25 Sep 2018 19:36:47 +1000
Subject: [PATCH 224/499] powerpc/tm: Avoid possible userspace r1 corruption on
 reclaim

Current we store the userspace r1 to PACATMSCRATCH before finally
saving it to the thread struct.

In theory an exception could be taken here (like a machine check or
SLB miss) that could write PACATMSCRATCH and hence corrupt the
userspace r1. The SLB fault currently doesn't touch PACATMSCRATCH, but
others do.

We've never actually seen this happen but it's theoretically
possible. Either way, the code is fragile as it is.

This patch saves r1 to the kernel stack (which can't fault) before we
turn MSR[RI] back on. PACATMSCRATCH is still used but only with
MSR[RI] off. We then copy r1 from the kernel stack to the thread
struct once we have MSR[RI] back on.

Suggested-by: Breno Leitao <leitao@debian.org>
Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/tm.S | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 183e8d75936f..7716374786bd 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -178,6 +178,13 @@ _GLOBAL(tm_reclaim)
 
 	std	r11, GPR11(r1)			/* Temporary stash */
 
+	/*
+	 * Move the saved user r1 to the kernel stack in case PACATMSCRATCH is
+	 * clobbered by an exception once we turn on MSR_RI below.
+	 */
+	ld	r11, PACATMSCRATCH(r13)
+	std	r11, GPR1(r1)
+
 	/*
 	 * Store r13 away so we can free up the scratch SPR for the SLB fault
 	 * handler (needed once we start accessing the thread_struct).
@@ -214,7 +221,7 @@ _GLOBAL(tm_reclaim)
 	SAVE_GPR(8, r7)				/* user r8 */
 	SAVE_GPR(9, r7)				/* user r9 */
 	SAVE_GPR(10, r7)			/* user r10 */
-	ld	r3, PACATMSCRATCH(r13)		/* user r1 */
+	ld	r3, GPR1(r1)			/* user r1 */
 	ld	r4, GPR7(r1)			/* user r7 */
 	ld	r5, GPR11(r1)			/* user r11 */
 	ld	r6, GPR12(r1)			/* user r12 */

From 2483ef056f6e42f61cd266452e2841165dfe1b5c Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Tue, 25 Sep 2018 17:55:15 +0530
Subject: [PATCH 225/499] powerpc/numa: Use associativity if VPHN hcall is
 successful

Currently associativity is used to lookup node-id even if the
preceding VPHN hcall failed. However this can cause CPU to be made
part of the wrong node, (most likely to be node 0). This is because
VPHN is not enabled on KVM guests.

With 2ea6263 ("powerpc/topology: Get topology for shared processors at
boot"), associativity is used to set to the wrong node. Hence KVM
guest topology is broken.

For example : A 4 node KVM guest before would have reported.

  [root@localhost ~]#  numactl -H
  available: 4 nodes (0-3)
  node 0 cpus: 0 1 2 3
  node 0 size: 1746 MB
  node 0 free: 1604 MB
  node 1 cpus: 4 5 6 7
  node 1 size: 2044 MB
  node 1 free: 1765 MB
  node 2 cpus: 8 9 10 11
  node 2 size: 2044 MB
  node 2 free: 1837 MB
  node 3 cpus: 12 13 14 15
  node 3 size: 2044 MB
  node 3 free: 1903 MB
  node distances:
  node   0   1   2   3
    0:  10  40  40  40
    1:  40  10  40  40
    2:  40  40  10  40
    3:  40  40  40  10

Would now report:

  [root@localhost ~]# numactl -H
  available: 4 nodes (0-3)
  node 0 cpus: 0 2 3 4 5 6 7 8 9 10 11 12 13 14 15
  node 0 size: 1746 MB
  node 0 free: 1244 MB
  node 1 cpus:
  node 1 size: 2044 MB
  node 1 free: 2032 MB
  node 2 cpus: 1
  node 2 size: 2044 MB
  node 2 free: 2028 MB
  node 3 cpus:
  node 3 size: 2044 MB
  node 3 free: 2032 MB
  node distances:
  node   0   1   2   3
    0:  10  40  40  40
    1:  40  10  40  40
    2:  40  40  10  40
    3:  40  40  40  10

Fix this by skipping associativity lookup if the VPHN hcall failed.

Fixes: 2ea626306810 ("powerpc/topology: Get topology for shared processors at boot")
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/numa.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b5a71baedbc2..59d07bd5374a 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1204,7 +1204,9 @@ int find_and_online_cpu_nid(int cpu)
 	int new_nid;
 
 	/* Use associativity from first thread for all siblings */
-	vphn_get_associativity(cpu, associativity);
+	if (vphn_get_associativity(cpu, associativity))
+		return cpu_to_node(cpu);
+
 	new_nid = associativity_to_nid(associativity);
 	if (new_nid < 0 || !node_possible(new_nid))
 		new_nid = first_online_node;

From e1e5d8a9fe737d94ccc0ccbaf0c97f69a8f3e000 Mon Sep 17 00:00:00 2001
From: Michal Simek <michal.simek@xilinx.com>
Date: Tue, 25 Sep 2018 08:32:50 +0200
Subject: [PATCH 226/499] net: macb: Clean 64b dma addresses if they are not
 detected

Clear ADDR64 dma bit in DMACFG register in case that HW_DMA_CAP_64B is
not detected on 64bit system.
The issue was observed when bootloader(u-boot) does not check macb
feature at DCFG6 register (DAW64_OFFSET) and enabling 64bit dma support
by default. Then macb driver is reading DMACFG register back and only
adding 64bit dma configuration but not cleaning it out.

Signed-off-by: Michal Simek <michal.simek@xilinx.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cadence/macb_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index f1a86b422617..58b9744c4058 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -2160,6 +2160,7 @@ static void macb_configure_dma(struct macb *bp)
 		else
 			dmacfg &= ~GEM_BIT(TXCOEN);
 
+		dmacfg &= ~GEM_BIT(ADDR64);
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 		if (bp->hw_dma_cap & HW_DMA_CAP_64B)
 			dmacfg |= GEM_BIT(ADDR64);

From 780e83c259fc33e8959fed8dfdad17e378d72b62 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Tue, 25 Sep 2018 02:12:30 -0600
Subject: [PATCH 227/499] xen-netback: fix input validation in
 xenvif_set_hash_mapping()

Both len and off are frontend specified values, so we need to make
sure there's no overflow when adding the two for the bounds check. We
also want to avoid undefined behavior and hence use off to index into
->hash.mapping[] only after bounds checking. This at the same time
allows to take care of not applying off twice for the bounds checking
against vif->num_queues.

It is also insufficient to bounds check copy_op.len, as this is len
truncated to 16 bits.

This is XSA-270 / CVE-2018-15471.

Reported-by: Felix Wilhelm <fwilhelm@google.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
Tested-by: Paul Durrant <paul.durrant@citrix.com>
Cc: stable@vger.kernel.org [4.7 onwards]
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/hash.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c
index 3c4c58b9fe76..3b6fb5b3bdb2 100644
--- a/drivers/net/xen-netback/hash.c
+++ b/drivers/net/xen-netback/hash.c
@@ -332,20 +332,22 @@ u32 xenvif_set_hash_mapping_size(struct xenvif *vif, u32 size)
 u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len,
 			    u32 off)
 {
-	u32 *mapping = &vif->hash.mapping[off];
+	u32 *mapping = vif->hash.mapping;
 	struct gnttab_copy copy_op = {
 		.source.u.ref = gref,
 		.source.domid = vif->domid,
-		.dest.u.gmfn = virt_to_gfn(mapping),
 		.dest.domid = DOMID_SELF,
-		.dest.offset = xen_offset_in_page(mapping),
-		.len = len * sizeof(u32),
+		.len = len * sizeof(*mapping),
 		.flags = GNTCOPY_source_gref
 	};
 
-	if ((off + len > vif->hash.size) || copy_op.len > XEN_PAGE_SIZE)
+	if ((off + len < off) || (off + len > vif->hash.size) ||
+	    len > XEN_PAGE_SIZE / sizeof(*mapping))
 		return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER;
 
+	copy_op.dest.u.gmfn = virt_to_gfn(mapping + off);
+	copy_op.dest.offset = xen_offset_in_page(mapping + off);
+
 	while (len-- != 0)
 		if (mapping[off++] >= vif->num_queues)
 			return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER;

From 22f9cde3401077ea450b69bf9b0bba373e12e454 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Tue, 25 Sep 2018 02:13:01 -0600
Subject: [PATCH 228/499] xen-netback: validate queue numbers in
 xenvif_set_hash_mapping()

Checking them before the grant copy means nothing as to the validity of
the incoming request. As we shouldn't make the new data live before
having validated it, introduce a second instance of the mapping array.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/common.h    |  3 ++-
 drivers/net/xen-netback/hash.c      | 20 ++++++++++++++------
 drivers/net/xen-netback/interface.c |  3 ++-
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index a46a1e94505d..936c0b3e0ba2 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -241,8 +241,9 @@ struct xenvif_hash_cache {
 struct xenvif_hash {
 	unsigned int alg;
 	u32 flags;
+	bool mapping_sel;
 	u8 key[XEN_NETBK_MAX_HASH_KEY_SIZE];
-	u32 mapping[XEN_NETBK_MAX_HASH_MAPPING_SIZE];
+	u32 mapping[2][XEN_NETBK_MAX_HASH_MAPPING_SIZE];
 	unsigned int size;
 	struct xenvif_hash_cache cache;
 };
diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c
index 3b6fb5b3bdb2..dc9841ea2fff 100644
--- a/drivers/net/xen-netback/hash.c
+++ b/drivers/net/xen-netback/hash.c
@@ -324,7 +324,8 @@ u32 xenvif_set_hash_mapping_size(struct xenvif *vif, u32 size)
 		return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER;
 
 	vif->hash.size = size;
-	memset(vif->hash.mapping, 0, sizeof(u32) * size);
+	memset(vif->hash.mapping[vif->hash.mapping_sel], 0,
+	       sizeof(u32) * size);
 
 	return XEN_NETIF_CTRL_STATUS_SUCCESS;
 }
@@ -332,7 +333,7 @@ u32 xenvif_set_hash_mapping_size(struct xenvif *vif, u32 size)
 u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len,
 			    u32 off)
 {
-	u32 *mapping = vif->hash.mapping;
+	u32 *mapping = vif->hash.mapping[!vif->hash.mapping_sel];
 	struct gnttab_copy copy_op = {
 		.source.u.ref = gref,
 		.source.domid = vif->domid,
@@ -348,9 +349,8 @@ u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len,
 	copy_op.dest.u.gmfn = virt_to_gfn(mapping + off);
 	copy_op.dest.offset = xen_offset_in_page(mapping + off);
 
-	while (len-- != 0)
-		if (mapping[off++] >= vif->num_queues)
-			return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER;
+	memcpy(mapping, vif->hash.mapping[vif->hash.mapping_sel],
+	       vif->hash.size * sizeof(*mapping));
 
 	if (copy_op.len != 0) {
 		gnttab_batch_copy(&copy_op, 1);
@@ -359,6 +359,12 @@ u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len,
 			return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER;
 	}
 
+	while (len-- != 0)
+		if (mapping[off++] >= vif->num_queues)
+			return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER;
+
+	vif->hash.mapping_sel = !vif->hash.mapping_sel;
+
 	return XEN_NETIF_CTRL_STATUS_SUCCESS;
 }
 
@@ -410,6 +416,8 @@ void xenvif_dump_hash_info(struct xenvif *vif, struct seq_file *m)
 	}
 
 	if (vif->hash.size != 0) {
+		const u32 *mapping = vif->hash.mapping[vif->hash.mapping_sel];
+
 		seq_puts(m, "\nHash Mapping:\n");
 
 		for (i = 0; i < vif->hash.size; ) {
@@ -422,7 +430,7 @@ void xenvif_dump_hash_info(struct xenvif *vif, struct seq_file *m)
 			seq_printf(m, "[%4u - %4u]: ", i, i + n - 1);
 
 			for (j = 0; j < n; j++, i++)
-				seq_printf(m, "%4u ", vif->hash.mapping[i]);
+				seq_printf(m, "%4u ", mapping[i]);
 
 			seq_puts(m, "\n");
 		}
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 92274c237200..f6ae23fc3f6b 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -162,7 +162,8 @@ static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb,
 	if (size == 0)
 		return skb_get_hash_raw(skb) % dev->real_num_tx_queues;
 
-	return vif->hash.mapping[skb_get_hash_raw(skb) % size];
+	return vif->hash.mapping[vif->hash.mapping_sel]
+				[skb_get_hash_raw(skb) % size];
 }
 
 static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)

From 871088bf92e11efb69bbdbd537e48c0ad4f63729 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Tue, 25 Sep 2018 02:13:37 -0600
Subject: [PATCH 229/499] xen-netback: handle page straddling in
 xenvif_set_hash_mapping()

There's no guarantee that the mapping array doesn't cross a page
boundary. Use a second grant copy operation if necessary.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Wei Liu <wei.liu2@citrix.com>
Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/hash.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c
index dc9841ea2fff..0ccb021f1e78 100644
--- a/drivers/net/xen-netback/hash.c
+++ b/drivers/net/xen-netback/hash.c
@@ -334,28 +334,39 @@ u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len,
 			    u32 off)
 {
 	u32 *mapping = vif->hash.mapping[!vif->hash.mapping_sel];
-	struct gnttab_copy copy_op = {
+	unsigned int nr = 1;
+	struct gnttab_copy copy_op[2] = {{
 		.source.u.ref = gref,
 		.source.domid = vif->domid,
 		.dest.domid = DOMID_SELF,
 		.len = len * sizeof(*mapping),
 		.flags = GNTCOPY_source_gref
-	};
+	}};
 
 	if ((off + len < off) || (off + len > vif->hash.size) ||
 	    len > XEN_PAGE_SIZE / sizeof(*mapping))
 		return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER;
 
-	copy_op.dest.u.gmfn = virt_to_gfn(mapping + off);
-	copy_op.dest.offset = xen_offset_in_page(mapping + off);
+	copy_op[0].dest.u.gmfn = virt_to_gfn(mapping + off);
+	copy_op[0].dest.offset = xen_offset_in_page(mapping + off);
+	if (copy_op[0].dest.offset + copy_op[0].len > XEN_PAGE_SIZE) {
+		copy_op[1] = copy_op[0];
+		copy_op[1].source.offset = XEN_PAGE_SIZE - copy_op[0].dest.offset;
+		copy_op[1].dest.u.gmfn = virt_to_gfn(mapping + off + len);
+		copy_op[1].dest.offset = 0;
+		copy_op[1].len = copy_op[0].len - copy_op[1].source.offset;
+		copy_op[0].len = copy_op[1].source.offset;
+		nr = 2;
+	}
 
 	memcpy(mapping, vif->hash.mapping[vif->hash.mapping_sel],
 	       vif->hash.size * sizeof(*mapping));
 
-	if (copy_op.len != 0) {
-		gnttab_batch_copy(&copy_op, 1);
+	if (copy_op[0].len != 0) {
+		gnttab_batch_copy(copy_op, nr);
 
-		if (copy_op.status != GNTST_okay)
+		if (copy_op[0].status != GNTST_okay ||
+		    copy_op[nr - 1].status != GNTST_okay)
 			return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER;
 	}
 

From 2e9361efa707e186d91b938e44f9e326725259f7 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Tue, 25 Sep 2018 10:21:55 +0100
Subject: [PATCH 230/499] net: hns: fix for unmapping problem when SMMU is on

If SMMU is on, there is more likely that skb_shinfo(skb)->frags[i]
can not send by a single BD. when this happen, the
hns_nic_net_xmit_hw function map the whole data in a frags using
skb_frag_dma_map, but unmap each BD' data individually when tx is
done, which causes problem when SMMU is on.

This patch fixes this problem by ummapping the whole data in a
frags when tx is done.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Reviewed-by: Yisen Zhuang <yisen.zhuang@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hnae.c     |  2 +-
 drivers/net/ethernet/hisilicon/hns/hns_enet.c | 30 ++++++++++++-------
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c
index a051e582d541..79d03f8ee7b1 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.c
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.c
@@ -84,7 +84,7 @@ static void hnae_unmap_buffer(struct hnae_ring *ring, struct hnae_desc_cb *cb)
 	if (cb->type == DESC_TYPE_SKB)
 		dma_unmap_single(ring_to_dev(ring), cb->dma, cb->length,
 				 ring_to_dma_dir(ring));
-	else
+	else if (cb->length)
 		dma_unmap_page(ring_to_dev(ring), cb->dma, cb->length,
 			       ring_to_dma_dir(ring));
 }
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index f56855e63c96..5ce23d4b717e 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -40,9 +40,9 @@
 #define SKB_TMP_LEN(SKB) \
 	(((SKB)->transport_header - (SKB)->mac_header) + tcp_hdrlen(SKB))
 
-static void fill_v2_desc(struct hnae_ring *ring, void *priv,
-			 int size, dma_addr_t dma, int frag_end,
-			 int buf_num, enum hns_desc_type type, int mtu)
+static void fill_v2_desc_hw(struct hnae_ring *ring, void *priv, int size,
+			    int send_sz, dma_addr_t dma, int frag_end,
+			    int buf_num, enum hns_desc_type type, int mtu)
 {
 	struct hnae_desc *desc = &ring->desc[ring->next_to_use];
 	struct hnae_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
@@ -64,7 +64,7 @@ static void fill_v2_desc(struct hnae_ring *ring, void *priv,
 	desc_cb->type = type;
 
 	desc->addr = cpu_to_le64(dma);
-	desc->tx.send_size = cpu_to_le16((u16)size);
+	desc->tx.send_size = cpu_to_le16((u16)send_sz);
 
 	/* config bd buffer end */
 	hnae_set_bit(rrcfv, HNSV2_TXD_VLD_B, 1);
@@ -133,6 +133,14 @@ static void fill_v2_desc(struct hnae_ring *ring, void *priv,
 	ring_ptr_move_fw(ring, next_to_use);
 }
 
+static void fill_v2_desc(struct hnae_ring *ring, void *priv,
+			 int size, dma_addr_t dma, int frag_end,
+			 int buf_num, enum hns_desc_type type, int mtu)
+{
+	fill_v2_desc_hw(ring, priv, size, size, dma, frag_end,
+			buf_num, type, mtu);
+}
+
 static const struct acpi_device_id hns_enet_acpi_match[] = {
 	{ "HISI00C1", 0 },
 	{ "HISI00C2", 0 },
@@ -289,15 +297,15 @@ static void fill_tso_desc(struct hnae_ring *ring, void *priv,
 
 	/* when the frag size is bigger than hardware, split this frag */
 	for (k = 0; k < frag_buf_num; k++)
-		fill_v2_desc(ring, priv,
-			     (k == frag_buf_num - 1) ?
+		fill_v2_desc_hw(ring, priv, k == 0 ? size : 0,
+				(k == frag_buf_num - 1) ?
 					sizeoflast : BD_MAX_SEND_SIZE,
-			     dma + BD_MAX_SEND_SIZE * k,
-			     frag_end && (k == frag_buf_num - 1) ? 1 : 0,
-			     buf_num,
-			     (type == DESC_TYPE_SKB && !k) ?
+				dma + BD_MAX_SEND_SIZE * k,
+				frag_end && (k == frag_buf_num - 1) ? 1 : 0,
+				buf_num,
+				(type == DESC_TYPE_SKB && !k) ?
 					DESC_TYPE_SKB : DESC_TYPE_PAGE,
-			     mtu);
+				mtu);
 }
 
 netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev,

From 92ef12b32feab8f277b69e9fb89ede2796777f4d Mon Sep 17 00:00:00 2001
From: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Date: Tue, 25 Sep 2018 18:21:58 +0200
Subject: [PATCH 231/499] tipc: fix flow control accounting for implicit
 connect

In the case of implicit connect message with data > 1K, the flow
control accounting is incorrect. At this state, the socket does not
know the peer nodes capability and falls back to legacy flow control
by return 1, however the receiver of this message will perform the
new block accounting. This leads to a slack and eventually traffic
disturbance.

In this commit, we perform tipc_node_get_capabilities() at implicit
connect and perform accounting based on the peer's capability.

Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 3f03ddd0e35b..b6f99b021d09 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1419,8 +1419,10 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
 	/* Handle implicit connection setup */
 	if (unlikely(dest)) {
 		rc = __tipc_sendmsg(sock, m, dlen);
-		if (dlen && (dlen == rc))
+		if (dlen && dlen == rc) {
+			tsk->peer_caps = tipc_node_get_capabilities(net, dnode);
 			tsk->snt_unacked = tsk_inc(tsk, dlen + msg_hdr_sz(hdr));
+		}
 		return rc;
 	}
 

From 69383c5913a5d74b4aa7e2b78a5cf14ebfbba538 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Tue, 25 Sep 2018 14:57:42 +0000
Subject: [PATCH 232/499] ovl: make symbol 'ovl_aops' static

Fixes the following sparse warning:

fs/overlayfs/inode.c:507:39: warning:
 symbol 'ovl_aops' was not declared. Should it be static?

Fixes: 5b910bd615ba ("ovl: fix GPF in swapfile_activate of file from overlayfs over xfs")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index b6ac545b5a32..3b7ed5d2279c 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -504,7 +504,7 @@ static const struct inode_operations ovl_special_inode_operations = {
 	.update_time	= ovl_update_time,
 };
 
-const struct address_space_operations ovl_aops = {
+static const struct address_space_operations ovl_aops = {
 	/* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
 	.direct_IO		= noop_direct_IO,
 };

From a9360abd3de0aad745d25d003923d56afb28a04b Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 25 Sep 2018 11:23:55 +0300
Subject: [PATCH 233/499] IB/uverbs: Free uapi on destroy

Make sure we free struct uverbs_api once we clean the radix tree. It was
allocated by uverbs_alloc_api().

Fixes: 9ed3e5f44772 ("IB/uverbs: Build the specs into a radix tree at runtime")
Reported-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/uverbs_uapi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
index 73ea6f0db88f..be854628a7c6 100644
--- a/drivers/infiniband/core/uverbs_uapi.c
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -248,6 +248,7 @@ void uverbs_destroy_api(struct uverbs_api *uapi)
 		kfree(rcu_dereference_protected(*slot, true));
 		radix_tree_iter_delete(&uapi->radix, &iter, slot);
 	}
+	kfree(uapi);
 }
 
 struct uverbs_api *uverbs_alloc_api(

From e8ef090a614292db01b5956a6f5467afbe6c5cf7 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Tue, 25 Sep 2018 12:11:12 +0300
Subject: [PATCH 234/499] IB/mlx5: Destroy the DEVX object upon error flow

Upon DEVX object creation the object must be destroyed upon a follows
error flow.

Fixes: 7efce3691d33 ("IB/mlx5: Add obj create and destroy functionality")
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/devx.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index ac116d63e466..f2f11e652dcd 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -723,6 +723,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
 		attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE);
 	struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
 	struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
 	struct devx_obj *obj;
 	int err;
 
@@ -754,10 +755,12 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
 
 	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
 	if (err)
-		goto obj_free;
+		goto obj_destroy;
 
 	return 0;
 
+obj_destroy:
+	mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
 obj_free:
 	kfree(obj);
 	return err;

From 64e9e22e68512da8df3c9a7430f07621e48db3c2 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Thu, 23 Aug 2018 23:36:00 +0200
Subject: [PATCH 235/499] soc: fsl: qbman: qman: avoid allocating from non
 existing gen_pool

If the qman driver didn't probe, calling qman_alloc_fqid_range,
qman_alloc_pool_range or qman_alloc_cgrid_range (as done in dpaa_eth) will
pass a NULL pointer to gen_pool_alloc, leading to a NULL pointer
dereference.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Reviewed-by: Roy Pledge <roy.pledge@nxp.com>
Signed-off-by: Li Yang <leoyang.li@nxp.com>
(cherry picked from commit f72487a2788aa70c3aee1d0ebd5470de9bac953a)
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 drivers/soc/fsl/qbman/qman.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c
index ecb22749df0b..8cc015183043 100644
--- a/drivers/soc/fsl/qbman/qman.c
+++ b/drivers/soc/fsl/qbman/qman.c
@@ -2729,6 +2729,9 @@ static int qman_alloc_range(struct gen_pool *p, u32 *result, u32 cnt)
 {
 	unsigned long addr;
 
+	if (!p)
+		return -ENODEV;
+
 	addr = gen_pool_alloc(p, cnt);
 	if (!addr)
 		return -ENOMEM;

From 96fc74333f84cfdf8d434c6c07254e215e2aad00 Mon Sep 17 00:00:00 2001
From: Zhao Qiang <qiang.zhao@nxp.com>
Date: Thu, 1 Feb 2018 14:54:32 +0800
Subject: [PATCH 236/499] soc: fsl: qe: Fix copy/paste bug in
 ucc_get_tdm_sync_shift()

There is a copy and paste bug so we accidentally use the RX_ shift when
we're in TX_ mode.

Fixes: bb8b2062aff3 ("fsl/qe: setup clock source for TDM mode")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Zhao Qiang <qiang.zhao@nxp.com>
Signed-off-by: Li Yang <leoyang.li@nxp.com>
(cherry picked from commit 3cb31b634052ed458922e0c8e2b4b093d7fb60b9)
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 drivers/soc/fsl/qe/ucc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/soc/fsl/qe/ucc.c b/drivers/soc/fsl/qe/ucc.c
index c646d8713861..681f7d4b7724 100644
--- a/drivers/soc/fsl/qe/ucc.c
+++ b/drivers/soc/fsl/qe/ucc.c
@@ -626,7 +626,7 @@ static u32 ucc_get_tdm_sync_shift(enum comm_dir mode, u32 tdm_num)
 {
 	u32 shift;
 
-	shift = (mode == COMM_DIR_RX) ? RX_SYNC_SHIFT_BASE : RX_SYNC_SHIFT_BASE;
+	shift = (mode == COMM_DIR_RX) ? RX_SYNC_SHIFT_BASE : TX_SYNC_SHIFT_BASE;
 	shift -= tdm_num * 2;
 
 	return shift;

From 5c5702e259dc66e6fceed5117effab79c186e87a Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Tue, 25 Sep 2018 12:10:40 +0300
Subject: [PATCH 237/499] RDMA/core: Set right entry state before releasing
 reference

Currently add_modify_gid() for IB link layer has followong issue
in cache update path.

When GID update event occurs, core releases reference to the GID
table without updating its state and/or entry pointer.

CPU-0                              CPU-1
------                             -----
ib_cache_update()                    IPoIB ULP
   add_modify_gid()                   [..]
      put_gid_entry()
      refcnt = 0, but
      state = valid,
      entry is valid.
      (work item is not yet executed).
                                   ipoib_create_ah()
                                     rdma_create_ah()
                                        rdma_get_gid_attr() <--
                                   	Tries to acquire gid_attr
                                        which has refcnt = 0.
                                   	This is incorrect.

GID entry state and entry pointer is provides the accurate GID enty
state. Such fields must be updated with rwlock to protect against
readers and, such fields must be in sane state before refcount can drop
to zero. Otherwise above race condition can happen leading to
use-after-free situation.

Following backtrace has been observed when cache update for an IB port
is triggered while IPoIB ULP is creating an AH.

Therefore, when updating GID entry, first mark a valid entry as invalid
through state and set the barrier so that no callers can acquired
the GID entry, followed by release reference to it.

refcount_t: increment on 0; use-after-free.
WARNING: CPU: 4 PID: 29106 at lib/refcount.c:153 refcount_inc_checked+0x30/0x50
Workqueue: ib-comp-unb-wq ib_cq_poll_work [ib_core]
RIP: 0010:refcount_inc_checked+0x30/0x50
RSP: 0018:ffff8802ad36f600 EFLAGS: 00010082
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
RDX: 0000000000000002 RSI: 0000000000000008 RDI: ffffffff86710100
RBP: ffff8802d6e60a30 R08: ffffed005d67bf8b R09: ffffed005d67bf8b
R10: 0000000000000001 R11: ffffed005d67bf8a R12: ffff88027620cee8
R13: ffff8802d6e60988 R14: ffff8802d6e60a78 R15: 0000000000000202
FS: 0000000000000000(0000) GS:ffff8802eb200000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f3ab35e5c88 CR3: 00000002ce84a000 CR4: 00000000000006e0
IPv6: ADDRCONF(NETDEV_CHANGE): ib1: link becomes ready
Call Trace:
rdma_get_gid_attr+0x220/0x310 [ib_core]
? lock_acquire+0x145/0x3a0
rdma_fill_sgid_attr+0x32c/0x470 [ib_core]
rdma_create_ah+0x89/0x160 [ib_core]
? rdma_fill_sgid_attr+0x470/0x470 [ib_core]
? ipoib_create_ah+0x52/0x260 [ib_ipoib]
ipoib_create_ah+0xf5/0x260 [ib_ipoib]
ipoib_mcast_join_complete+0xbbe/0x2540 [ib_ipoib]

Fixes: b150c3862d21 ("IB/core: Introduce GID entry reference counts")
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/cache.c | 98 ++++++++++++++++-----------------
 1 file changed, 49 insertions(+), 49 deletions(-)

diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 0bee1f4b914e..3208ad6ad540 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -337,55 +337,6 @@ static int add_roce_gid(struct ib_gid_table_entry *entry)
 	return 0;
 }
 
-/**
- * add_modify_gid - Add or modify GID table entry
- *
- * @table:	GID table in which GID to be added or modified
- * @attr:	Attributes of the GID
- *
- * Returns 0 on success or appropriate error code. It accepts zero
- * GID addition for non RoCE ports for HCA's who report them as valid
- * GID. However such zero GIDs are not added to the cache.
- */
-static int add_modify_gid(struct ib_gid_table *table,
-			  const struct ib_gid_attr *attr)
-{
-	struct ib_gid_table_entry *entry;
-	int ret = 0;
-
-	/*
-	 * Invalidate any old entry in the table to make it safe to write to
-	 * this index.
-	 */
-	if (is_gid_entry_valid(table->data_vec[attr->index]))
-		put_gid_entry(table->data_vec[attr->index]);
-
-	/*
-	 * Some HCA's report multiple GID entries with only one valid GID, and
-	 * leave other unused entries as the zero GID. Convert zero GIDs to
-	 * empty table entries instead of storing them.
-	 */
-	if (rdma_is_zero_gid(&attr->gid))
-		return 0;
-
-	entry = alloc_gid_entry(attr);
-	if (!entry)
-		return -ENOMEM;
-
-	if (rdma_protocol_roce(attr->device, attr->port_num)) {
-		ret = add_roce_gid(entry);
-		if (ret)
-			goto done;
-	}
-
-	store_gid_entry(table, entry);
-	return 0;
-
-done:
-	put_gid_entry(entry);
-	return ret;
-}
-
 /**
  * del_gid - Delete GID table entry
  *
@@ -419,6 +370,55 @@ static void del_gid(struct ib_device *ib_dev, u8 port,
 	put_gid_entry_locked(entry);
 }
 
+/**
+ * add_modify_gid - Add or modify GID table entry
+ *
+ * @table:	GID table in which GID to be added or modified
+ * @attr:	Attributes of the GID
+ *
+ * Returns 0 on success or appropriate error code. It accepts zero
+ * GID addition for non RoCE ports for HCA's who report them as valid
+ * GID. However such zero GIDs are not added to the cache.
+ */
+static int add_modify_gid(struct ib_gid_table *table,
+			  const struct ib_gid_attr *attr)
+{
+	struct ib_gid_table_entry *entry;
+	int ret = 0;
+
+	/*
+	 * Invalidate any old entry in the table to make it safe to write to
+	 * this index.
+	 */
+	if (is_gid_entry_valid(table->data_vec[attr->index]))
+		del_gid(attr->device, attr->port_num, table, attr->index);
+
+	/*
+	 * Some HCA's report multiple GID entries with only one valid GID, and
+	 * leave other unused entries as the zero GID. Convert zero GIDs to
+	 * empty table entries instead of storing them.
+	 */
+	if (rdma_is_zero_gid(&attr->gid))
+		return 0;
+
+	entry = alloc_gid_entry(attr);
+	if (!entry)
+		return -ENOMEM;
+
+	if (rdma_protocol_roce(attr->device, attr->port_num)) {
+		ret = add_roce_gid(entry);
+		if (ret)
+			goto done;
+	}
+
+	store_gid_entry(table, entry);
+	return 0;
+
+done:
+	put_gid_entry(entry);
+	return ret;
+}
+
 /* rwlock should be read locked, or lock should be held */
 static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
 		    const struct ib_gid_attr *val, bool default_gid,

From 974c24c5bed75b53e229a6f68a0533b6d5f48feb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 11 Sep 2018 11:00:49 +0200
Subject: [PATCH 238/499] dma-mapping: add the missing
 ARCH_HAS_SYNC_DMA_FOR_CPU_ALL declaration

The patch adding the infrastructure failed to actually add the symbol
declaration, oops..

Fixes: faef87723a ("dma-noncoherent: add a arch_sync_dma_for_cpu_all hook")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Paul Burton <paul.burton@mips.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 kernel/dma/Kconfig | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 9bd54304446f..1b1d63b3634b 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -23,6 +23,9 @@ config ARCH_HAS_SYNC_DMA_FOR_CPU
 	bool
 	select NEED_DMA_MAP_STATE
 
+config ARCH_HAS_SYNC_DMA_FOR_CPU_ALL
+	bool
+
 config DMA_DIRECT_OPS
 	bool
 	depends on HAS_DMA

From bb830add192e9d8338082c0fc2c209e23b43d865 Mon Sep 17 00:00:00 2001
From: Susobhan Dey <susobhan.dey@gmail.com>
Date: Tue, 25 Sep 2018 12:29:15 -0700
Subject: [PATCH 239/499] nvme: properly propagate errors in nvme_mpath_init

Signed-off-by: Susobhan Dey <susobhan.dey@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/multipath.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 5a9562881d4e..9fe3fff818b8 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -537,8 +537,10 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
 
 	INIT_WORK(&ctrl->ana_work, nvme_ana_work);
 	ctrl->ana_log_buf = kmalloc(ctrl->ana_log_size, GFP_KERNEL);
-	if (!ctrl->ana_log_buf)
+	if (!ctrl->ana_log_buf) {
+		error = -ENOMEM;
 		goto out;
+	}
 
 	error = nvme_read_ana_log(ctrl, true);
 	if (error)
@@ -547,7 +549,7 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
 out_free_ana_log_buf:
 	kfree(ctrl->ana_log_buf);
 out:
-	return -ENOMEM;
+	return error;
 }
 
 void nvme_mpath_uninit(struct nvme_ctrl *ctrl)

From 530ca2c9bd6949c72c9b5cfc330cb3dbccaa3f5b Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Tue, 25 Sep 2018 10:36:20 -0600
Subject: [PATCH 240/499] blk-mq: Allow blocking queue tag iter callbacks

A recent commit runs tag iterator callbacks under the rcu read lock,
but existing callbacks do not satisfy the non-blocking requirement.
The commit intended to prevent an iterator from accessing a queue that's
being modified. This patch fixes the original issue by taking a queue
reference instead of reading it, which allows callbacks to make blocking
calls.

Fixes: f5bbbbe4d6357 ("blk-mq: sync the update nr_hw_queues with blk_mq_queue_tag_busy_iter")
Acked-by: Jianchao Wang <jianchao.w.wang@oracle.com>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-tag.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 94e1ed667b6e..41317c50a446 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -322,16 +322,11 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
 
 	/*
 	 * __blk_mq_update_nr_hw_queues will update the nr_hw_queues and
-	 * queue_hw_ctx after freeze the queue. So we could use q_usage_counter
-	 * to avoid race with it. __blk_mq_update_nr_hw_queues will users
-	 * synchronize_rcu to ensure all of the users go out of the critical
-	 * section below and see zeroed q_usage_counter.
+	 * queue_hw_ctx after freeze the queue, so we use q_usage_counter
+	 * to avoid race with it.
 	 */
-	rcu_read_lock();
-	if (percpu_ref_is_zero(&q->q_usage_counter)) {
-		rcu_read_unlock();
+	if (!percpu_ref_tryget(&q->q_usage_counter))
 		return;
-	}
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		struct blk_mq_tags *tags = hctx->tags;
@@ -347,7 +342,7 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
 			bt_for_each(hctx, &tags->breserved_tags, fn, priv, true);
 		bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false);
 	}
-	rcu_read_unlock();
+	blk_queue_exit(q);
 }
 
 static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,

From 082dc55e3c5e559b0e6855c0de23ebd674bcc91e Mon Sep 17 00:00:00 2001
From: Amelie Delaunay <amelie.delaunay@st.com>
Date: Thu, 20 Sep 2018 18:34:16 +0200
Subject: [PATCH 241/499] ARM: dts: stm32: update SPI6 dmas property on
 stm32mp157c

Remove unused parameter from SPI6 dmas property on stm32mp157c SoC.

Fixes: dc3f8c86c10d ("ARM: dts: stm32: add SPI support on stm32mp157c")
Signed-off-by: Amelie Delaunay <amelie.delaunay@st.com>
Signed-off-by: Alexandre Torgue <alexandre.torgue@st.com>
[olof: Without this patch, SPI6 will fall back to interrupt mode with
lower perfmance]
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/boot/dts/stm32mp157c.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/stm32mp157c.dtsi b/arch/arm/boot/dts/stm32mp157c.dtsi
index 661be948ab74..185541a5b69f 100644
--- a/arch/arm/boot/dts/stm32mp157c.dtsi
+++ b/arch/arm/boot/dts/stm32mp157c.dtsi
@@ -1078,8 +1078,8 @@ spi6: spi@5c001000 {
 			interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
 			clocks = <&rcc SPI6_K>;
 			resets = <&rcc SPI6_R>;
-			dmas = <&mdma1 34 0x0 0x40008 0x0 0x0 0>,
-			       <&mdma1 35 0x0 0x40002 0x0 0x0 0>;
+			dmas = <&mdma1 34 0x0 0x40008 0x0 0x0>,
+			       <&mdma1 35 0x0 0x40002 0x0 0x0>;
 			dma-names = "rx", "tx";
 			status = "disabled";
 		};

From 94b6ddce71780575fbbf9d2c36afc8440e61a281 Mon Sep 17 00:00:00 2001
From: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Date: Tue, 25 Sep 2018 21:56:57 +0200
Subject: [PATCH 242/499] tipc: reset bearer if device carrier not ok

If we detect that under lying carrier detects errors and goes down,
we reset the bearer.

Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bearer.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 418f03d0be90..645c16052052 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -609,16 +609,18 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
 
 	switch (evt) {
 	case NETDEV_CHANGE:
-		if (netif_carrier_ok(dev))
+		if (netif_carrier_ok(dev) && netif_oper_up(dev)) {
+			test_and_set_bit_lock(0, &b->up);
 			break;
-		/* else: fall through */
-	case NETDEV_UP:
-		test_and_set_bit_lock(0, &b->up);
-		break;
+		}
+		/* fall through */
 	case NETDEV_GOING_DOWN:
 		clear_bit_unlock(0, &b->up);
 		tipc_reset_bearer(net, b);
 		break;
+	case NETDEV_UP:
+		test_and_set_bit_lock(0, &b->up);
+		break;
 	case NETDEV_CHANGEMTU:
 		if (tipc_mtu_bad(dev, 0)) {
 			bearer_disable(net, b);

From 3f32d0be6c16b902b687453c962d17eea5b8ea19 Mon Sep 17 00:00:00 2001
From: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Date: Tue, 25 Sep 2018 22:09:10 +0200
Subject: [PATCH 243/499] tipc: lock wakeup & inputq at tipc_link_reset()

In tipc_link_reset() we copy the wakeup queue to input queue using
skb_queue_splice_init(link->wakeupq, link->inputq).
This is performed without holding any locks. The lists might be
simultaneously be accessed by other cpu threads in tipc_sk_rcv(),
something leading to to random missing packets.

Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index b1f0bee54eac..26cc033ee167 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -841,9 +841,14 @@ void tipc_link_reset(struct tipc_link *l)
 	l->in_session = false;
 	l->session++;
 	l->mtu = l->advertised_mtu;
+	spin_lock_bh(&l->wakeupq.lock);
+	spin_lock_bh(&l->inputq->lock);
+	skb_queue_splice_init(&l->wakeupq, l->inputq);
+	spin_unlock_bh(&l->inputq->lock);
+	spin_unlock_bh(&l->wakeupq.lock);
+
 	__skb_queue_purge(&l->transmq);
 	__skb_queue_purge(&l->deferdq);
-	skb_queue_splice_init(&l->wakeupq, l->inputq);
 	__skb_queue_purge(&l->backlogq);
 	l->backlog[TIPC_LOW_IMPORTANCE].len = 0;
 	l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0;

From 5ebb1bc2d63d90dd204169e21fd7a0b4bb8c776e Mon Sep 17 00:00:00 2001
From: Arindam Nath <arindam.nath@amd.com>
Date: Tue, 18 Sep 2018 15:40:58 +0530
Subject: [PATCH 244/499] iommu/amd: Return devid as alias for ACPI HID devices

ACPI HID devices do not actually have an alias for
them in the IVRS. But dev_data->alias is still used
for indexing into the IOMMU device table for devices
being handled by the IOMMU. So for ACPI HID devices,
we simply return the corresponding devid as an alias,
as parsed from IVRS table.

Signed-off-by: Arindam Nath <arindam.nath@amd.com>
Fixes: 2bf9a0a12749 ('iommu/amd: Add iommu support for ACPI HID devices')
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 4e04fff23977..73e47d93e7a0 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -246,7 +246,13 @@ static u16 get_alias(struct device *dev)
 
 	/* The callers make sure that get_device_id() does not fail here */
 	devid = get_device_id(dev);
+
+	/* For ACPI HID devices, we simply return the devid as such */
+	if (!dev_is_pci(dev))
+		return devid;
+
 	ivrs_alias = amd_iommu_alias_table[devid];
+
 	pci_for_each_dma_alias(pdev, __last_alias, &pci_alias);
 
 	if (ivrs_alias == pci_alias)

From 8105f9b8a8879bff7f1d43d0720c993a99c9d135 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sat, 22 Sep 2018 18:35:31 +0200
Subject: [PATCH 245/499] mac80211: allocate TXQs for active monitor interfaces

Monitor mode interfaces with the active flag are passed down to the driver.
Drivers using TXQ expect that all interfaces have allocated TXQs before
they get added.

Fixes: 79af1f866193d ("mac80211: avoid allocating TXQs that won't be used")
Cc: stable@vger.kernel.org
Reported-by: Catrinel Catrinescu <cc@80211.de>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/iface.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 5e6cf2cee965..5836ddeac9e3 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1756,7 +1756,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 
 		if (local->ops->wake_tx_queue &&
 		    type != NL80211_IFTYPE_AP_VLAN &&
-		    type != NL80211_IFTYPE_MONITOR)
+		    (type != NL80211_IFTYPE_MONITOR ||
+		     (params->flags & MONITOR_FLAG_ACTIVE)))
 			txq_size += sizeof(struct txq_info) +
 				    local->hw.txq_data_size;
 

From 0bcbf6518456f63038a290bd359237d31f6f8ac3 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 23 Sep 2018 11:59:13 -0700
Subject: [PATCH 246/499] cfg80211: fix reg_query_regdb_wmm kernel-doc

Drop @ptr from kernel-doc for function reg_query_regdb_wmm().
This function parameter was recently removed so update the
kernel-doc to match that and remove the kernel-doc warnings.

Removes 109 occurrences of this warning message:
../include/net/cfg80211.h:4869: warning: Excess function parameter 'ptr' description in 'reg_query_regdb_wmm'

Fixes: 38cb87ee47fb ("cfg80211: make wmm_rule part of the reg_rule structure")

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Johannes Berg <johannes.berg@intel.com>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: linux-wireless@vger.kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 8ebabc9873d1..4de121e24ce5 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4852,8 +4852,6 @@ const char *reg_initiator_name(enum nl80211_reg_initiator initiator);
  *
  * @alpha2: the ISO/IEC 3166 alpha2 wmm rule to be queried.
  * @freq: the freqency(in MHz) to be queried.
- * @ptr: pointer where the regdb wmm data is to be stored (or %NULL if
- *	irrelevant). This can be used later for deduplication.
  * @rule: pointer to store the wmm rule from the regulatory db.
  *
  * Self-managed wireless drivers can use this function to  query

From 30fe6d50eb088783c8729c7d930f65296b2b3fa7 Mon Sep 17 00:00:00 2001
From: Masashi Honma <masashi.honma@gmail.com>
Date: Tue, 25 Sep 2018 11:15:00 +0900
Subject: [PATCH 247/499] nl80211: Fix possible Spectre-v1 for
 NL80211_TXRATE_HT

Use array_index_nospec() to sanitize ridx with respect to speculation.

Signed-off-by: Masashi Honma <masashi.honma@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 4b8ec659e797..bd26230de63e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3756,6 +3756,7 @@ static bool ht_rateset_to_mask(struct ieee80211_supported_band *sband,
 			return false;
 
 		/* check availability */
+		ridx = array_index_nospec(ridx, IEEE80211_HT_MCS_MASK_LEN);
 		if (sband->ht_cap.mcs.rx_mask[ridx] & rbit)
 			mcs[ridx] |= rbit;
 		else

From 628980e5c8f038f730582c6ee50b7410741cd96e Mon Sep 17 00:00:00 2001
From: Martin Willi <martin@strongswan.org>
Date: Tue, 25 Sep 2018 09:41:13 +0200
Subject: [PATCH 248/499] mac80211_hwsim: fix locking when iterating radios
 during ns exit

The cleanup of radios during namespace exit has recently been reworked
to directly delete a radio while temporarily releasing the spinlock,
fixing a race condition between the work-queue execution and namespace
exits. However, the temporary unlock allows unsafe modifications on the
iterated list, resulting in a potential crash when continuing the
iteration of additional radios.

Move radios about to destroy to a temporary list, and clean that up
after releasing the spinlock once iteration is complete.

Fixes: 8cfd36a0b53a ("mac80211_hwsim: fix use-after-free bug in hwsim_exit_net")
Signed-off-by: Martin Willi <martin@strongswan.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 1068757ec42e..f1150d321875 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -3646,6 +3646,7 @@ static __net_init int hwsim_init_net(struct net *net)
 static void __net_exit hwsim_exit_net(struct net *net)
 {
 	struct mac80211_hwsim_data *data, *tmp;
+	LIST_HEAD(list);
 
 	spin_lock_bh(&hwsim_radio_lock);
 	list_for_each_entry_safe(data, tmp, &hwsim_radios, list) {
@@ -3656,17 +3657,19 @@ static void __net_exit hwsim_exit_net(struct net *net)
 		if (data->netgroup == hwsim_net_get_netgroup(&init_net))
 			continue;
 
-		list_del(&data->list);
+		list_move(&data->list, &list);
 		rhashtable_remove_fast(&hwsim_radios_rht, &data->rht,
 				       hwsim_rht_params);
 		hwsim_radios_generation++;
-		spin_unlock_bh(&hwsim_radio_lock);
+	}
+	spin_unlock_bh(&hwsim_radio_lock);
+
+	list_for_each_entry_safe(data, tmp, &list, list) {
+		list_del(&data->list);
 		mac80211_hwsim_del_radio(data,
 					 wiphy_name(data->hw->wiphy),
 					 NULL);
-		spin_lock_bh(&hwsim_radio_lock);
 	}
-	spin_unlock_bh(&hwsim_radio_lock);
 
 	ida_simple_remove(&hwsim_netgroup_ida, hwsim_net_get_netgroup(net));
 }

From f1c47eb61d52379de5747d02bb36be20d7a2d0d3 Mon Sep 17 00:00:00 2001
From: Martin Willi <martin@strongswan.org>
Date: Tue, 25 Sep 2018 09:41:14 +0200
Subject: [PATCH 249/499] mac80211_hwsim: fix race in radio destruction from
 netlink notifier

The asynchronous destruction from a work-queue of radios tagged with
destroy-on-close may race with the owning namespace about to exit,
resulting in potential use-after-free of that namespace.

Instead of using a work-queue, move radios about to destroy to a
temporary list, which can be worked on synchronously after releasing
the lock. This should be safe to do from the netlink socket notifier,
as the namespace is guaranteed to not get released.

Signed-off-by: Martin Willi <martin@strongswan.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index f1150d321875..6fccb4d717db 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -520,7 +520,6 @@ struct mac80211_hwsim_data {
 	int channels, idx;
 	bool use_chanctx;
 	bool destroy_on_close;
-	struct work_struct destroy_work;
 	u32 portid;
 	char alpha2[2];
 	const struct ieee80211_regdomain *regd;
@@ -3565,30 +3564,27 @@ static struct genl_family hwsim_genl_family __ro_after_init = {
 	.n_mcgrps = ARRAY_SIZE(hwsim_mcgrps),
 };
 
-static void destroy_radio(struct work_struct *work)
-{
-	struct mac80211_hwsim_data *data =
-		container_of(work, struct mac80211_hwsim_data, destroy_work);
-
-	hwsim_radios_generation++;
-	mac80211_hwsim_del_radio(data, wiphy_name(data->hw->wiphy), NULL);
-}
-
 static void remove_user_radios(u32 portid)
 {
 	struct mac80211_hwsim_data *entry, *tmp;
+	LIST_HEAD(list);
 
 	spin_lock_bh(&hwsim_radio_lock);
 	list_for_each_entry_safe(entry, tmp, &hwsim_radios, list) {
 		if (entry->destroy_on_close && entry->portid == portid) {
-			list_del(&entry->list);
+			list_move(&entry->list, &list);
 			rhashtable_remove_fast(&hwsim_radios_rht, &entry->rht,
 					       hwsim_rht_params);
-			INIT_WORK(&entry->destroy_work, destroy_radio);
-			queue_work(hwsim_wq, &entry->destroy_work);
+			hwsim_radios_generation++;
 		}
 	}
 	spin_unlock_bh(&hwsim_radio_lock);
+
+	list_for_each_entry_safe(entry, tmp, &list, list) {
+		list_del(&entry->list);
+		mac80211_hwsim_del_radio(entry, wiphy_name(entry->hw->wiphy),
+					 NULL);
+	}
 }
 
 static int mac80211_hwsim_netlink_notify(struct notifier_block *nb,

From 28ef8b49a338dc1844e86b7954cfffc7dfa2660a Mon Sep 17 00:00:00 2001
From: Martin Willi <martin@strongswan.org>
Date: Tue, 25 Sep 2018 09:51:02 +0200
Subject: [PATCH 250/499] mac80211_hwsim: do not omit multicast announce of
 first added radio

The allocation of hwsim radio identifiers uses a post-increment from 0,
so the first radio has idx 0. This idx is explicitly excluded from
multicast announcements ever since, but it is unclear why.

Drop that idx check and announce the first radio as well. This makes
userspace happy if it relies on these events.

Signed-off-by: Martin Willi <martin@strongswan.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 6fccb4d717db..07442ada6dd0 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2934,8 +2934,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 	hwsim_radios_generation++;
 	spin_unlock_bh(&hwsim_radio_lock);
 
-	if (idx > 0)
-		hwsim_mcast_new_radio(idx, info, param);
+	hwsim_mcast_new_radio(idx, info, param);
 
 	return idx;
 

From cb28c306b93b71f2741ce1a5a66289db26715f4d Mon Sep 17 00:00:00 2001
From: Matias Karhumaa <matias.karhumaa@gmail.com>
Date: Wed, 26 Sep 2018 09:13:46 +0300
Subject: [PATCH 251/499] Bluetooth: SMP: fix crash in unpairing

In case unpair_device() was called through mgmt interface at the same time
when pairing was in progress, Bluetooth kernel module crash was seen.

[  600.351225] general protection fault: 0000 [#1] SMP PTI
[  600.351235] CPU: 1 PID: 11096 Comm: btmgmt Tainted: G           OE     4.19.0-rc1+ #1
[  600.351238] Hardware name: Dell Inc. Latitude E5440/08RCYC, BIOS A18 05/14/2017
[  600.351272] RIP: 0010:smp_chan_destroy.isra.10+0xce/0x2c0 [bluetooth]
[  600.351276] Code: c0 0f 84 b4 01 00 00 80 78 28 04 0f 84 53 01 00 00 4d 85 ed 0f 85 ab 00 00 00 48 8b 08 48 8b 50 08 be 10 00 00 00 48 89 51 08 <48> 89 0a 48 b9 00 02 00 00 00 00 ad de 48 89 48 08 48 8b 83 00 01
[  600.351279] RSP: 0018:ffffa9be839b3b50 EFLAGS: 00010246
[  600.351282] RAX: ffff9c999ac565a0 RBX: ffff9c9996e98c00 RCX: ffff9c999aa28b60
[  600.351285] RDX: dead000000000200 RSI: 0000000000000010 RDI: ffff9c999e403500
[  600.351287] RBP: ffffa9be839b3b70 R08: 0000000000000000 R09: ffffffff92a25c00
[  600.351290] R10: ffffa9be839b3ae8 R11: 0000000000000001 R12: ffff9c995375b800
[  600.351292] R13: 0000000000000000 R14: ffff9c99619a5000 R15: ffff9c9962a01c00
[  600.351295] FS:  00007fb2be27c700(0000) GS:ffff9c999e880000(0000) knlGS:0000000000000000
[  600.351298] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  600.351300] CR2: 00007fb2bdadbad0 CR3: 000000041c328001 CR4: 00000000001606e0
[  600.351302] Call Trace:
[  600.351325]  smp_failure+0x4f/0x70 [bluetooth]
[  600.351345]  smp_cancel_pairing+0x74/0x80 [bluetooth]
[  600.351370]  unpair_device+0x1c1/0x330 [bluetooth]
[  600.351399]  hci_sock_sendmsg+0x960/0x9f0 [bluetooth]
[  600.351409]  ? apparmor_socket_sendmsg+0x1e/0x20
[  600.351417]  sock_sendmsg+0x3e/0x50
[  600.351422]  sock_write_iter+0x85/0xf0
[  600.351429]  do_iter_readv_writev+0x12b/0x1b0
[  600.351434]  do_iter_write+0x87/0x1a0
[  600.351439]  vfs_writev+0x98/0x110
[  600.351443]  ? ep_poll+0x16d/0x3d0
[  600.351447]  ? ep_modify+0x73/0x170
[  600.351451]  do_writev+0x61/0xf0
[  600.351455]  ? do_writev+0x61/0xf0
[  600.351460]  __x64_sys_writev+0x1c/0x20
[  600.351465]  do_syscall_64+0x5a/0x110
[  600.351471]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[  600.351474] RIP: 0033:0x7fb2bdb62fe0
[  600.351477] Code: 73 01 c3 48 8b 0d b8 6e 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 69 c7 2c 00 00 75 10 b8 14 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 de 80 01 00 48 89 04 24
[  600.351479] RSP: 002b:00007ffe062cb8f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000014
[  600.351484] RAX: ffffffffffffffda RBX: 000000000255b3d0 RCX: 00007fb2bdb62fe0
[  600.351487] RDX: 0000000000000001 RSI: 00007ffe062cb920 RDI: 0000000000000004
[  600.351490] RBP: 00007ffe062cb920 R08: 000000000255bd80 R09: 0000000000000000
[  600.351494] R10: 0000000000000353 R11: 0000000000000246 R12: 0000000000000001
[  600.351497] R13: 00007ffe062cbbe0 R14: 0000000000000000 R15: 0000000000000000
[  600.351501] Modules linked in: algif_hash algif_skcipher af_alg cmac ipt_MASQUERADE nf_conntrack_netlink nfnetlink xfrm_user xfrm_algo iptable_nat nf_nat_ipv4 xt_addrtype iptable_filter ip_tables xt_conntrack x_tables nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c br_netfilter bridge stp llc overlay arc4 nls_iso8859_1 dm_crypt intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp dell_laptop kvm_intel crct10dif_pclmul dell_smm_hwmon crc32_pclmul ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper intel_cstate intel_rapl_perf uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 videobuf2_common videodev media hid_multitouch input_leds joydev serio_raw dell_wmi snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic dell_smbios dcdbas sparse_keymap
[  600.351569]  snd_hda_intel btusb snd_hda_codec btrtl btbcm btintel snd_hda_core bluetooth(OE) snd_hwdep snd_pcm iwlmvm ecdh_generic wmi_bmof dell_wmi_descriptor snd_seq_midi mac80211 snd_seq_midi_event lpc_ich iwlwifi snd_rawmidi snd_seq snd_seq_device snd_timer cfg80211 snd soundcore mei_me mei dell_rbtn dell_smo8800 mac_hid parport_pc ppdev lp parport autofs4 hid_generic usbhid hid i915 nouveau kvmgt vfio_mdev mdev vfio_iommu_type1 vfio kvm irqbypass i2c_algo_bit ttm drm_kms_helper syscopyarea sysfillrect sysimgblt mxm_wmi psmouse ahci sdhci_pci cqhci libahci fb_sys_fops sdhci drm e1000e video wmi
[  600.351637] ---[ end trace e49e9f1df09c94fb ]---
[  600.351664] RIP: 0010:smp_chan_destroy.isra.10+0xce/0x2c0 [bluetooth]
[  600.351666] Code: c0 0f 84 b4 01 00 00 80 78 28 04 0f 84 53 01 00 00 4d 85 ed 0f 85 ab 00 00 00 48 8b 08 48 8b 50 08 be 10 00 00 00 48 89 51 08 <48> 89 0a 48 b9 00 02 00 00 00 00 ad de 48 89 48 08 48 8b 83 00 01
[  600.351669] RSP: 0018:ffffa9be839b3b50 EFLAGS: 00010246
[  600.351672] RAX: ffff9c999ac565a0 RBX: ffff9c9996e98c00 RCX: ffff9c999aa28b60
[  600.351674] RDX: dead000000000200 RSI: 0000000000000010 RDI: ffff9c999e403500
[  600.351676] RBP: ffffa9be839b3b70 R08: 0000000000000000 R09: ffffffff92a25c00
[  600.351679] R10: ffffa9be839b3ae8 R11: 0000000000000001 R12: ffff9c995375b800
[  600.351681] R13: 0000000000000000 R14: ffff9c99619a5000 R15: ffff9c9962a01c00
[  600.351684] FS:  00007fb2be27c700(0000) GS:ffff9c999e880000(0000) knlGS:0000000000000000
[  600.351686] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  600.351689] CR2: 00007fb2bdadbad0 CR3: 000000041c328001 CR4: 00000000001606e0

Crash happened because list_del_rcu() was called twice for smp->ltk. This
was possible if unpair_device was called right after ltk was generated
but before keys were distributed.

In this commit smp_cancel_pairing was refactored to cancel pairing if it
is in progress and otherwise just removes keys. Once keys are removed from
rcu list, pointers to smp context's keys are set to NULL to make sure
removed list items are not accessed later.

This commit also adjusts the functionality of mgmt unpair_device() little
bit. Previously pairing was canceled only if pairing was in state that
keys were already generated. With this commit unpair_device() cancels
pairing already in earlier states.

Bug was found by fuzzing kernel SMP implementation using Synopsys
Defensics.

Reported-by: Pekka Oikarainen <pekka.oikarainen@synopsys.com>
Signed-off-by: Matias Karhumaa <matias.karhumaa@gmail.com>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/mgmt.c |  7 ++-----
 net/bluetooth/smp.c  | 29 +++++++++++++++++++++++++----
 net/bluetooth/smp.h  |  3 ++-
 3 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 3bdc8f3ca259..ccce954f8146 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -2434,9 +2434,8 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data,
 	/* LE address type */
 	addr_type = le_addr_type(cp->addr.type);
 
-	hci_remove_irk(hdev, &cp->addr.bdaddr, addr_type);
-
-	err = hci_remove_ltk(hdev, &cp->addr.bdaddr, addr_type);
+	/* Abort any ongoing SMP pairing. Removes ltk and irk if they exist. */
+	err = smp_cancel_and_remove_pairing(hdev, &cp->addr.bdaddr, addr_type);
 	if (err < 0) {
 		err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE,
 					MGMT_STATUS_NOT_PAIRED, &rp,
@@ -2450,8 +2449,6 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data,
 		goto done;
 	}
 
-	/* Abort any ongoing SMP pairing */
-	smp_cancel_pairing(conn);
 
 	/* Defer clearing up the connection parameters until closing to
 	 * give a chance of keeping them if a repairing happens.
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 3a7b0773536b..73f7211d0431 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -2422,30 +2422,51 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
 	return ret;
 }
 
-void smp_cancel_pairing(struct hci_conn *hcon)
+int smp_cancel_and_remove_pairing(struct hci_dev *hdev, bdaddr_t *bdaddr,
+				  u8 addr_type)
 {
-	struct l2cap_conn *conn = hcon->l2cap_data;
+	struct hci_conn *hcon;
+	struct l2cap_conn *conn;
 	struct l2cap_chan *chan;
 	struct smp_chan *smp;
+	int err;
 
+	err = hci_remove_ltk(hdev, bdaddr, addr_type);
+	hci_remove_irk(hdev, bdaddr, addr_type);
+
+	hcon = hci_conn_hash_lookup_le(hdev, bdaddr, addr_type);
+	if (!hcon)
+		goto done;
+
+	conn = hcon->l2cap_data;
 	if (!conn)
-		return;
+		goto done;
 
 	chan = conn->smp;
 	if (!chan)
-		return;
+		goto done;
 
 	l2cap_chan_lock(chan);
 
 	smp = chan->data;
 	if (smp) {
+		/* Set keys to NULL to make sure smp_failure() does not try to
+		 * remove and free already invalidated rcu list entries. */
+		smp->ltk = NULL;
+		smp->slave_ltk = NULL;
+		smp->remote_irk = NULL;
+
 		if (test_bit(SMP_FLAG_COMPLETE, &smp->flags))
 			smp_failure(conn, 0);
 		else
 			smp_failure(conn, SMP_UNSPECIFIED);
+		err = 0;
 	}
 
 	l2cap_chan_unlock(chan);
+
+done:
+	return err;
 }
 
 static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb)
diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h
index 0ff6247eaa6c..121edadd5f8d 100644
--- a/net/bluetooth/smp.h
+++ b/net/bluetooth/smp.h
@@ -181,7 +181,8 @@ enum smp_key_pref {
 };
 
 /* SMP Commands */
-void smp_cancel_pairing(struct hci_conn *hcon);
+int smp_cancel_and_remove_pairing(struct hci_dev *hdev, bdaddr_t *bdaddr,
+				  u8 addr_type);
 bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level,
 			     enum smp_key_pref key_pref);
 int smp_conn_security(struct hci_conn *hcon, __u8 sec_level);

From 337fe9f5c1e7de1f391c6a692531379d2aa2ee11 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason@jlekstrand.net>
Date: Wed, 26 Sep 2018 02:17:03 -0500
Subject: [PATCH 252/499] drm/syncobj: Don't leak fences when WAIT_FOR_SUBMIT
 is set

We attempt to get fences earlier in the hopes that everything will
already have fences and no callbacks will be needed.  If we do succeed
in getting a fence, getting one a second time will result in a duplicate
ref with no unref.  This is causing memory leaks in Vulkan applications
that create a lot of fences; playing for a few hours can, apparently,
bring down the system.

Cc: stable@vger.kernel.org
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107899
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20180926071703.15257-1-jason.ekstrand@intel.com
---
 drivers/gpu/drm/drm_syncobj.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index adb3cb27d31e..759278fef35a 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -97,6 +97,8 @@ static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,
 {
 	int ret;
 
+	WARN_ON(*fence);
+
 	*fence = drm_syncobj_fence_get(syncobj);
 	if (*fence)
 		return 1;
@@ -743,6 +745,9 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
 
 	if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
 		for (i = 0; i < count; ++i) {
+			if (entries[i].fence)
+				continue;
+
 			drm_syncobj_fence_get_or_add_callback(syncobjs[i],
 							      &entries[i].fence,
 							      &entries[i].syncobj_cb,

From f188b99f0b2d33794b4af8a225f95d1e968c0a3f Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Wed, 26 Sep 2018 15:39:28 -0500
Subject: [PATCH 253/499] ACPI / hotplug / PCI: Don't scan for non-hotplug
 bridges if slot is not bridge

HP 6730b laptop has an ethernet NIC connected to one of the PCIe root
ports.  The root ports themselves are native PCIe hotplug capable.  Now,
during boot after PCI devices are scanned the BIOS triggers ACPI bus check
directly to the NIC:

  ACPI: \_SB_.PCI0.RP06.NIC_: Bus check in hotplug_event()

It is not clear why it is sending bus check but regardless the ACPI hotplug
notify handler calls enable_slot() directly (instead of going through
acpiphp_check_bridge() as there is no bridge), which ends up handling
special case for non-hotplug bridges with native PCIe hotplug.  This
results a crash of some kind but the reporter only sees black screen so it
is hard to figure out the exact spot and what actually happens.  Based on
a few fix proposals it was tracked to crash somewhere inside
pci_assign_unassigned_bridge_resources().

In any case we should not really be in that special branch at all because
the ACPI notify happened to a slot that is not a PCI bridge (it is just a
regular PCI device).

Fix this so that we only go to that special branch if we are calling
enable_slot() for a bridge (e.g., the ACPI notification was for the
bridge).

Link: https://bugzilla.kernel.org/show_bug.cgi?id=201127
Fixes: 84c8b58ed3ad ("ACPI / hotplug / PCI: Don't scan bridges managed by native hotplug")
Reported-by: Peter Anemone <peter.anemone@gmail.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
CC: stable@vger.kernel.org	# v4.18+
---
 drivers/pci/hotplug/acpiphp_glue.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index ef0b1b6ba86f..12afa7fdf77e 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -457,17 +457,18 @@ static void acpiphp_native_scan_bridge(struct pci_dev *bridge)
 /**
  * enable_slot - enable, configure a slot
  * @slot: slot to be enabled
+ * @bridge: true if enable is for the whole bridge (not a single slot)
  *
  * This function should be called per *physical slot*,
  * not per each slot object in ACPI namespace.
  */
-static void enable_slot(struct acpiphp_slot *slot)
+static void enable_slot(struct acpiphp_slot *slot, bool bridge)
 {
 	struct pci_dev *dev;
 	struct pci_bus *bus = slot->bus;
 	struct acpiphp_func *func;
 
-	if (bus->self && hotplug_is_native(bus->self)) {
+	if (bridge && bus->self && hotplug_is_native(bus->self)) {
 		/*
 		 * If native hotplug is used, it will take care of hotplug
 		 * slot management and resource allocation for hotplug
@@ -701,7 +702,7 @@ static void acpiphp_check_bridge(struct acpiphp_bridge *bridge)
 					trim_stale_devices(dev);
 
 			/* configure all functions */
-			enable_slot(slot);
+			enable_slot(slot, true);
 		} else {
 			disable_slot(slot);
 		}
@@ -785,7 +786,7 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)
 		if (bridge)
 			acpiphp_check_bridge(bridge);
 		else if (!(slot->flags & SLOT_IS_GOING_AWAY))
-			enable_slot(slot);
+			enable_slot(slot, false);
 
 		break;
 
@@ -973,7 +974,7 @@ int acpiphp_enable_slot(struct acpiphp_slot *slot)
 
 	/* configure all functions */
 	if (!(slot->flags & SLOT_ENABLED))
-		enable_slot(slot);
+		enable_slot(slot, false);
 
 	pci_unlock_rescan_remove();
 	return 0;

From 1d71926bbd59facc4bdb6f13117d3a1aee8b83ba Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 18 Sep 2018 16:16:56 -0700
Subject: [PATCH 254/499] mmc: core: Fix debounce time to use microseconds

The debounce value in device tree is in milliseconds but needs to be in
microseconds for mmc_gpiod_request_cd().

Fixes: bfd694d5e21c ("mmc: core: Add tunable delay before detecting card
after card is inserted")
Cc: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Cc: stable@vger.kernel.org # v4.18+
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/host.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index abf9e884386c..f57f5de54206 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -235,7 +235,7 @@ int mmc_of_parse(struct mmc_host *host)
 			host->caps |= MMC_CAP_NEEDS_POLL;
 
 		ret = mmc_gpiod_request_cd(host, "cd", 0, true,
-					   cd_debounce_delay_ms,
+					   cd_debounce_delay_ms * 1000,
 					   &cd_gpio_invert);
 		if (!ret)
 			dev_info(host->parent, "Got CD GPIO\n");

From 854f31ccdd7964c9c2e68da234a3a8aedb51cf6b Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Thu, 27 Sep 2018 10:55:13 +0900
Subject: [PATCH 255/499] block: fix deadline elevator drain for zoned block
 devices

When the deadline scheduler is used with a zoned block device, writes
to a zone will be dispatched one at a time. This causes the warning
message:

deadline: forced dispatching is broken (nr_sorted=X), please report this

to be displayed when switching to another elevator with the legacy I/O
path while write requests to a zone are being retained in the scheduler
queue.

Prevent this message from being displayed when executing
elv_drain_elevator() for a zoned block device. __blk_drain_queue() will
loop until all writes are dispatched and completed, resulting in the
desired elevator queue drain without extensive modifications to the
deadline code itself to handle forced-dispatch calls.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Fixes: 8dc8146f9c92 ("deadline-iosched: Introduce zone locking support")
Cc: stable@vger.kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/elevator.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/elevator.c b/block/elevator.c
index 6a06b5d040e5..fae58b2f906f 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -609,7 +609,7 @@ void elv_drain_elevator(struct request_queue *q)
 
 	while (e->type->ops.sq.elevator_dispatch_fn(q, 1))
 		;
-	if (q->nr_sorted && printed++ < 10) {
+	if (q->nr_sorted && !blk_queue_is_zoned(q) && printed++ < 10 ) {
 		printk(KERN_ERR "%s: forced dispatching is broken "
 		       "(nr_sorted=%u), please report this\n",
 		       q->elevator->type->elevator_name, q->nr_sorted);

From 36f19d5b4f99fa9fa8263877e5f8e669d7fddc14 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 26 Sep 2018 17:35:14 -0700
Subject: [PATCH 256/499] net/ipv6: Remove extra call to ip6_convert_metrics
 for multipath case

The change to move metrics from the dst to rt6_info moved the call
to ip6_convert_metrics from ip6_route_add to ip6_route_info_create. In
doing so it makes the call in ip6_route_info_append redundant and
actually leaks the metrics installed as part of the ip6_route_info_create.
Remove the now unnecessary call.

Fixes: d4ead6b34b67f ("net/ipv6: move metrics from dst to rt6_info")
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 826b14de7dbb..a366c05a239d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4321,11 +4321,6 @@ static int ip6_route_info_append(struct net *net,
 	if (!nh)
 		return -ENOMEM;
 	nh->fib6_info = rt;
-	err = ip6_convert_metrics(net, rt, r_cfg);
-	if (err) {
-		kfree(nh);
-		return err;
-	}
 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
 	list_add_tail(&nh->next, rt6_nh_list);
 

From e0511f6c1ccdd153cf063764e93ac177a8553c5d Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sun, 23 Sep 2018 15:38:21 +0200
Subject: [PATCH 257/499] net: phy: fix WoL handling when suspending the PHY

Actually there's nothing wrong with the two changes marked as "Fixes",
they just revealed a problem which has been existing before.
After having switched r8169 to phylib it was reported that WoL from
shutdown doesn't work any longer (WoL from suspend isn't affected).
Reason is that during shutdown phy_disconnect()->phy_detach()->
phy_suspend() is called.
A similar issue occurs when the phylib state machine calls
phy_suspend() when handling state PHY_HALTED.

Core of the problem is that phy_suspend() suspends the PHY when it
should not due to WoL. phy_suspend() checks for WoL already, but this
works only if the PHY driver handles WoL (what is rarely the case).
Typically WoL is handled by the MAC driver.

phylib knows about this and handles it in mdio_bus_phy_may_suspend(),
but that's used only when suspending the system, not in other cases
like shutdown.

Therefore factor out the relevant check from
mdio_bus_phy_may_suspend() to a new function phy_may_suspend() and
use it in phy_suspend().

Last but not least change phy_detach() to call phy_suspend() before
attached_dev is set to NULL. phy_suspend() accesses attached_dev
when checking whether the MAC driver activated WoL.

Fixes: f1e911d5d0df ("r8169: add basic phylib support")
Fixes: e8cfd9d6c772 ("net: phy: call state machine synchronously in phy_stop")
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 42 ++++++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index db1172db1e7c..2c80d3c44cd6 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -75,6 +75,26 @@ extern struct phy_driver genphy_10g_driver;
 static LIST_HEAD(phy_fixup_list);
 static DEFINE_MUTEX(phy_fixup_lock);
 
+static bool phy_may_suspend(struct phy_device *phydev)
+{
+	struct net_device *netdev = phydev->attached_dev;
+
+	if (!netdev)
+		return true;
+
+	/* Don't suspend PHY if the attached netdev parent may wakeup.
+	 * The parent may point to a PCI device, as in tg3 driver.
+	 */
+	if (netdev->dev.parent && device_may_wakeup(netdev->dev.parent))
+		return false;
+
+	/* Also don't suspend PHY if the netdev itself may wakeup. This
+	 * is the case for devices w/o underlaying pwr. mgmt. aware bus,
+	 * e.g. SoC devices.
+	 */
+	return !device_may_wakeup(&netdev->dev);
+}
+
 #ifdef CONFIG_PM
 static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
 {
@@ -93,20 +113,7 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
 	if (!netdev)
 		return !phydev->suspended;
 
-	/* Don't suspend PHY if the attached netdev parent may wakeup.
-	 * The parent may point to a PCI device, as in tg3 driver.
-	 */
-	if (netdev->dev.parent && device_may_wakeup(netdev->dev.parent))
-		return false;
-
-	/* Also don't suspend PHY if the netdev itself may wakeup. This
-	 * is the case for devices w/o underlaying pwr. mgmt. aware bus,
-	 * e.g. SoC devices.
-	 */
-	if (device_may_wakeup(&netdev->dev))
-		return false;
-
-	return true;
+	return phy_may_suspend(phydev);
 }
 
 static int mdio_bus_phy_suspend(struct device *dev)
@@ -1132,9 +1139,9 @@ void phy_detach(struct phy_device *phydev)
 		sysfs_remove_link(&dev->dev.kobj, "phydev");
 		sysfs_remove_link(&phydev->mdio.dev.kobj, "attached_dev");
 	}
+	phy_suspend(phydev);
 	phydev->attached_dev->phydev = NULL;
 	phydev->attached_dev = NULL;
-	phy_suspend(phydev);
 	phydev->phylink = NULL;
 
 	phy_led_triggers_unregister(phydev);
@@ -1171,9 +1178,12 @@ int phy_suspend(struct phy_device *phydev)
 	struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
 	int ret = 0;
 
+	if (phydev->suspended)
+		return 0;
+
 	/* If the device has WOL enabled, we cannot suspend the PHY */
 	phy_ethtool_get_wol(phydev, &wol);
-	if (wol.wolopts)
+	if (wol.wolopts || !phy_may_suspend(phydev))
 		return -EBUSY;
 
 	if (phydev->drv && phydrv->suspend)

From d31d1d03aa909aa6257d9d581eb0eb5d0ed366e2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 26 Sep 2018 20:01:36 -0700
Subject: [PATCH 258/499] Revert "net: phy: fix WoL handling when suspending
 the PHY"

This reverts commit e0511f6c1ccdd153cf063764e93ac177a8553c5d.

I commited the wrong version of these changes.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 42 ++++++++++++++----------------------
 1 file changed, 16 insertions(+), 26 deletions(-)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 2c80d3c44cd6..db1172db1e7c 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -75,26 +75,6 @@ extern struct phy_driver genphy_10g_driver;
 static LIST_HEAD(phy_fixup_list);
 static DEFINE_MUTEX(phy_fixup_lock);
 
-static bool phy_may_suspend(struct phy_device *phydev)
-{
-	struct net_device *netdev = phydev->attached_dev;
-
-	if (!netdev)
-		return true;
-
-	/* Don't suspend PHY if the attached netdev parent may wakeup.
-	 * The parent may point to a PCI device, as in tg3 driver.
-	 */
-	if (netdev->dev.parent && device_may_wakeup(netdev->dev.parent))
-		return false;
-
-	/* Also don't suspend PHY if the netdev itself may wakeup. This
-	 * is the case for devices w/o underlaying pwr. mgmt. aware bus,
-	 * e.g. SoC devices.
-	 */
-	return !device_may_wakeup(&netdev->dev);
-}
-
 #ifdef CONFIG_PM
 static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
 {
@@ -113,7 +93,20 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
 	if (!netdev)
 		return !phydev->suspended;
 
-	return phy_may_suspend(phydev);
+	/* Don't suspend PHY if the attached netdev parent may wakeup.
+	 * The parent may point to a PCI device, as in tg3 driver.
+	 */
+	if (netdev->dev.parent && device_may_wakeup(netdev->dev.parent))
+		return false;
+
+	/* Also don't suspend PHY if the netdev itself may wakeup. This
+	 * is the case for devices w/o underlaying pwr. mgmt. aware bus,
+	 * e.g. SoC devices.
+	 */
+	if (device_may_wakeup(&netdev->dev))
+		return false;
+
+	return true;
 }
 
 static int mdio_bus_phy_suspend(struct device *dev)
@@ -1139,9 +1132,9 @@ void phy_detach(struct phy_device *phydev)
 		sysfs_remove_link(&dev->dev.kobj, "phydev");
 		sysfs_remove_link(&phydev->mdio.dev.kobj, "attached_dev");
 	}
-	phy_suspend(phydev);
 	phydev->attached_dev->phydev = NULL;
 	phydev->attached_dev = NULL;
+	phy_suspend(phydev);
 	phydev->phylink = NULL;
 
 	phy_led_triggers_unregister(phydev);
@@ -1178,12 +1171,9 @@ int phy_suspend(struct phy_device *phydev)
 	struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
 	int ret = 0;
 
-	if (phydev->suspended)
-		return 0;
-
 	/* If the device has WOL enabled, we cannot suspend the PHY */
 	phy_ethtool_get_wol(phydev, &wol);
-	if (wol.wolopts || !phy_may_suspend(phydev))
+	if (wol.wolopts)
 		return -EBUSY;
 
 	if (phydev->drv && phydrv->suspend)

From 6194114324139dc16f3251c67ed853bd6d4ae056 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 24 Sep 2018 21:58:59 +0200
Subject: [PATCH 259/499] net: core: add member wol_enabled to struct
 net_device

Add flag wol_enabled to struct net_device indicating whether
Wake-on-LAN is enabled. As first user phy_suspend() will use it to
decide whether PHY can be suspended or not.

Fixes: f1e911d5d0df ("r8169: add basic phylib support")
Fixes: e8cfd9d6c772 ("net: phy: call state machine synchronously in phy_stop")
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 +++
 net/core/ethtool.c        | 9 ++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ca5ab98053c8..c7861e4b402c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1730,6 +1730,8 @@ enum netdev_priv_flags {
  *			switch driver and used to set the phys state of the
  *			switch port.
  *
+ *	@wol_enabled:	Wake-on-LAN is enabled
+ *
  *	FIXME: cleanup struct net_device such that network protocol info
  *	moves out.
  */
@@ -2014,6 +2016,7 @@ struct net_device {
 	struct lock_class_key	*qdisc_tx_busylock;
 	struct lock_class_key	*qdisc_running_key;
 	bool			proto_down;
+	unsigned		wol_enabled:1;
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 234a0ec2e932..0762aaf8e964 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1483,6 +1483,7 @@ static int ethtool_get_wol(struct net_device *dev, char __user *useraddr)
 static int ethtool_set_wol(struct net_device *dev, char __user *useraddr)
 {
 	struct ethtool_wolinfo wol;
+	int ret;
 
 	if (!dev->ethtool_ops->set_wol)
 		return -EOPNOTSUPP;
@@ -1490,7 +1491,13 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr)
 	if (copy_from_user(&wol, useraddr, sizeof(wol)))
 		return -EFAULT;
 
-	return dev->ethtool_ops->set_wol(dev, &wol);
+	ret = dev->ethtool_ops->set_wol(dev, &wol);
+	if (ret)
+		return ret;
+
+	dev->wol_enabled = !!wol.wolopts;
+
+	return 0;
 }
 
 static int ethtool_get_eee(struct net_device *dev, char __user *useraddr)

From 93f41e67dc8ff0fd987120a6ef2717f21462c534 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 24 Sep 2018 22:01:32 +0200
Subject: [PATCH 260/499] net: phy: fix WoL handling when suspending the PHY

Core of the problem is that phy_suspend() suspends the PHY when it
should not because of WoL. phy_suspend() checks for WoL already, but
this works only if the PHY driver handles WoL (what is rarely the case).
Typically WoL is handled by the MAC driver.

This patch uses new member wol_enabled of struct net_device as
additional criteria in the check when not to suspend the PHY because
of WoL.

Last but not least change phy_detach() to call phy_suspend() before
attached_dev is set to NULL. phy_suspend() accesses attached_dev
when checking whether the MAC driver activated WoL.

Fixes: f1e911d5d0df ("r8169: add basic phylib support")
Fixes: e8cfd9d6c772 ("net: phy: call state machine synchronously in phy_stop")
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index db1172db1e7c..19ab8a7d1e48 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -93,7 +93,12 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
 	if (!netdev)
 		return !phydev->suspended;
 
-	/* Don't suspend PHY if the attached netdev parent may wakeup.
+	if (netdev->wol_enabled)
+		return false;
+
+	/* As long as not all affected network drivers support the
+	 * wol_enabled flag, let's check for hints that WoL is enabled.
+	 * Don't suspend PHY if the attached netdev parent may wake up.
 	 * The parent may point to a PCI device, as in tg3 driver.
 	 */
 	if (netdev->dev.parent && device_may_wakeup(netdev->dev.parent))
@@ -1132,9 +1137,9 @@ void phy_detach(struct phy_device *phydev)
 		sysfs_remove_link(&dev->dev.kobj, "phydev");
 		sysfs_remove_link(&phydev->mdio.dev.kobj, "attached_dev");
 	}
+	phy_suspend(phydev);
 	phydev->attached_dev->phydev = NULL;
 	phydev->attached_dev = NULL;
-	phy_suspend(phydev);
 	phydev->phylink = NULL;
 
 	phy_led_triggers_unregister(phydev);
@@ -1168,12 +1173,13 @@ EXPORT_SYMBOL(phy_detach);
 int phy_suspend(struct phy_device *phydev)
 {
 	struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver);
+	struct net_device *netdev = phydev->attached_dev;
 	struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
 	int ret = 0;
 
 	/* If the device has WOL enabled, we cannot suspend the PHY */
 	phy_ethtool_get_wol(phydev, &wol);
-	if (wol.wolopts)
+	if (wol.wolopts || (netdev && netdev->wol_enabled))
 		return -EBUSY;
 
 	if (phydev->drv && phydrv->suspend)

From 5a94df70d3878ae597dd8331ec0add491bdfe851 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 24 Sep 2018 16:51:41 +0100
Subject: [PATCH 261/499] qed: fix spelling mistake "toogle" -> "toggle"

Trivial fix to spelling mistake in DP_VERBOSE message

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_rdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index be941cfaa2d4..c71391b9c757 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -228,7 +228,7 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
 				 num_cons, "Toggle");
 	if (rc) {
 		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
-			   "Failed to allocate toogle bits, rc = %d\n", rc);
+			   "Failed to allocate toggle bits, rc = %d\n", rc);
 		goto free_cq_map;
 	}
 

From 079db3fd4ef164a59006425a988b1c73639fade8 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 24 Sep 2018 18:39:51 +0100
Subject: [PATCH 262/499] wimax/i2400m: fix spelling mistake "not unitialized"
 -> "uninitialized"

Trivial fix to spelling mistake in ms_to_errno array of error messages
and remove confusing "not" from the error text since the error code
refers to an uninitialized error code.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wimax/i2400m/control.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wimax/i2400m/control.c b/drivers/net/wimax/i2400m/control.c
index 094cea775d0c..ef298d8525c5 100644
--- a/drivers/net/wimax/i2400m/control.c
+++ b/drivers/net/wimax/i2400m/control.c
@@ -257,7 +257,7 @@ static const struct
 	[I2400M_MS_ACCESSIBILITY_ERROR] = { "accesibility error", -EIO },
 	[I2400M_MS_BUSY] = { "busy", -EBUSY },
 	[I2400M_MS_CORRUPTED_TLV] = { "corrupted TLV", -EILSEQ },
-	[I2400M_MS_UNINITIALIZED] = { "not unitialized", -EILSEQ },
+	[I2400M_MS_UNINITIALIZED] = { "uninitialized", -EILSEQ },
 	[I2400M_MS_UNKNOWN_ERROR] = { "unknown error", -EIO },
 	[I2400M_MS_PRODUCTION_ERROR] = { "production error", -EIO },
 	[I2400M_MS_NO_RF] = { "no RF", -EIO },

From a898fba32229efd5e6b6154f83fa86a7145156b9 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Mon, 24 Sep 2018 13:53:34 -0700
Subject: [PATCH 263/499] qed: Avoid implicit enum conversion in
 qed_set_tunn_cls_info

Clang warns when one enumerated type is implicitly converted to another.

drivers/net/ethernet/qlogic/qed/qed_sp_commands.c:163:25: warning:
implicit conversion from enumeration type 'enum tunnel_clss' to
different enumeration type 'enum qed_tunn_clss' [-Wenum-conversion]
        p_tun->vxlan.tun_cls = type;
                             ~ ^~~~
drivers/net/ethernet/qlogic/qed/qed_sp_commands.c:165:26: warning:
implicit conversion from enumeration type 'enum tunnel_clss' to
different enumeration type 'enum qed_tunn_clss' [-Wenum-conversion]
        p_tun->l2_gre.tun_cls = type;
                              ~ ^~~~
drivers/net/ethernet/qlogic/qed/qed_sp_commands.c:167:26: warning:
implicit conversion from enumeration type 'enum tunnel_clss' to
different enumeration type 'enum qed_tunn_clss' [-Wenum-conversion]
        p_tun->ip_gre.tun_cls = type;
                              ~ ^~~~
drivers/net/ethernet/qlogic/qed/qed_sp_commands.c:169:29: warning:
implicit conversion from enumeration type 'enum tunnel_clss' to
different enumeration type 'enum qed_tunn_clss' [-Wenum-conversion]
        p_tun->l2_geneve.tun_cls = type;
                                 ~ ^~~~
drivers/net/ethernet/qlogic/qed/qed_sp_commands.c:171:29: warning:
implicit conversion from enumeration type 'enum tunnel_clss' to
different enumeration type 'enum qed_tunn_clss' [-Wenum-conversion]
        p_tun->ip_geneve.tun_cls = type;
                                 ~ ^~~~
5 warnings generated.

Avoid this by changing type to an int.

Link: https://github.com/ClangBuiltLinux/linux/issues/125
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_sp_commands.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
index 8de644b4721e..77b6248ad3b9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
@@ -154,7 +154,7 @@ qed_set_pf_update_tunn_mode(struct qed_tunnel_info *p_tun,
 static void qed_set_tunn_cls_info(struct qed_tunnel_info *p_tun,
 				  struct qed_tunnel_info *p_src)
 {
-	enum tunnel_clss type;
+	int type;
 
 	p_tun->b_update_rx_cls = p_src->b_update_rx_cls;
 	p_tun->b_update_tx_cls = p_src->b_update_tx_cls;

From db803f36e56f23b5a2266807e190d1dc11554d54 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Mon, 24 Sep 2018 14:05:27 -0700
Subject: [PATCH 264/499] qed: Fix mask parameter in qed_vf_prep_tunn_req_tlv

Clang complains when one enumerated type is implicitly converted to
another.

drivers/net/ethernet/qlogic/qed/qed_vf.c:686:6: warning: implicit
conversion from enumeration type 'enum qed_tunn_mode' to different
enumeration type 'enum qed_tunn_clss' [-Wenum-conversion]
                                 QED_MODE_L2GENEVE_TUNN,
                                 ^~~~~~~~~~~~~~~~~~~~~~

Update mask's parameter to expect qed_tunn_mode, which is what was
intended.

Link: https://github.com/ClangBuiltLinux/linux/issues/125
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_vf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c
index 3d4269659820..fcd8da08274f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c
@@ -572,7 +572,7 @@ int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn)
 static void
 __qed_vf_prep_tunn_req_tlv(struct vfpf_update_tunn_param_tlv *p_req,
 			   struct qed_tunn_update_type *p_src,
-			   enum qed_tunn_clss mask, u8 *p_cls)
+			   enum qed_tunn_mode mask, u8 *p_cls)
 {
 	if (p_src->b_update_mode) {
 		p_req->tun_mode_update_mask |= BIT(mask);
@@ -587,7 +587,7 @@ __qed_vf_prep_tunn_req_tlv(struct vfpf_update_tunn_param_tlv *p_req,
 static void
 qed_vf_prep_tunn_req_tlv(struct vfpf_update_tunn_param_tlv *p_req,
 			 struct qed_tunn_update_type *p_src,
-			 enum qed_tunn_clss mask,
+			 enum qed_tunn_mode mask,
 			 u8 *p_cls, struct qed_tunn_update_udp_port *p_port,
 			 u8 *p_update_port, u16 *p_udp_port)
 {

From d3a315795b4ce8b105a64a90699103121bde04a8 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Mon, 24 Sep 2018 14:34:53 -0700
Subject: [PATCH 265/499] qed: Avoid implicit enum conversion in
 qed_roce_mode_to_flavor

Clang warns when one enumerated type is implicitly converted to another.

drivers/net/ethernet/qlogic/qed/qed_roce.c:153:12: warning: implicit
conversion from enumeration type 'enum roce_mode' to different
enumeration type 'enum roce_flavor' [-Wenum-conversion]
                flavor = ROCE_V2_IPV6;
                       ~ ^~~~~~~~~~~~
drivers/net/ethernet/qlogic/qed/qed_roce.c:156:12: warning: implicit
conversion from enumeration type 'enum roce_mode' to different
enumeration type 'enum roce_flavor' [-Wenum-conversion]
                flavor = MAX_ROCE_MODE;
                       ~ ^~~~~~~~~~~~~
2 warnings generated.

Use the appropriate values from the expected type, roce_flavor:

ROCE_V2_IPV6 = RROCE_IPV6 = 2
MAX_ROCE_MODE = MAX_ROCE_FLAVOR = 3

While we're add it, ditch the local variable flavor, we can just return
the value directly from the switch statement.

Link: https://github.com/ClangBuiltLinux/linux/issues/125
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_roce.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
index 7d7a64c55ff1..f9167d1354bb 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -140,23 +140,16 @@ static void qed_rdma_copy_gids(struct qed_rdma_qp *qp, __le32 *src_gid,
 
 static enum roce_flavor qed_roce_mode_to_flavor(enum roce_mode roce_mode)
 {
-	enum roce_flavor flavor;
-
 	switch (roce_mode) {
 	case ROCE_V1:
-		flavor = PLAIN_ROCE;
-		break;
+		return PLAIN_ROCE;
 	case ROCE_V2_IPV4:
-		flavor = RROCE_IPV4;
-		break;
+		return RROCE_IPV4;
 	case ROCE_V2_IPV6:
-		flavor = ROCE_V2_IPV6;
-		break;
+		return RROCE_IPV6;
 	default:
-		flavor = MAX_ROCE_MODE;
-		break;
+		return MAX_ROCE_FLAVOR;
 	}
-	return flavor;
 }
 
 static void qed_roce_free_cid_pair(struct qed_hwfn *p_hwfn, u16 cid)

From 6a9e461f6fe4434e6172304b69774daff9a3ac4c Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Mon, 24 Sep 2018 14:39:42 -0700
Subject: [PATCH 266/499] bonding: pass link-local packets to bonding master
 also.

Commit b89f04c61efe ("bonding: deliver link-local packets with
skb->dev set to link that packets arrived on") changed the behavior
of how link-local-multicast packets are processed. The change in
the behavior broke some legacy use cases where these packets are
expected to arrive on bonding master device also.

This patch passes the packet to the stack with the link it arrived
on as well as passes to the bonding-master device to preserve the
legacy use case.

Fixes: b89f04c61efe ("bonding: deliver link-local packets with skb->dev set to link that packets arrived on")
Reported-by: Michal Soltys <soltys@ziu.info>
Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 0d87e11e7f1d..8c0a0908875d 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1170,9 +1170,26 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb)
 		}
 	}
 
-	/* don't change skb->dev for link-local packets */
-	if (is_link_local_ether_addr(eth_hdr(skb)->h_dest))
+	/* Link-local multicast packets should be passed to the
+	 * stack on the link they arrive as well as pass them to the
+	 * bond-master device. These packets are mostly usable when
+	 * stack receives it with the link on which they arrive
+	 * (e.g. LLDP) they also must be available on master. Some of
+	 * the use cases include (but are not limited to): LLDP agents
+	 * that must be able to operate both on enslaved interfaces as
+	 * well as on bonds themselves; linux bridges that must be able
+	 * to process/pass BPDUs from attached bonds when any kind of
+	 * STP version is enabled on the network.
+	 */
+	if (is_link_local_ether_addr(eth_hdr(skb)->h_dest)) {
+		struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+
+		if (nskb) {
+			nskb->dev = bond->dev;
+			netif_rx(nskb);
+		}
 		return RX_HANDLER_PASS;
+	}
 	if (bond_should_deliver_exact_match(skb, slave, bond))
 		return RX_HANDLER_EXACT;
 

From d4859d749aa7090ffb743d15648adb962a1baeae Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Mon, 24 Sep 2018 14:40:11 -0700
Subject: [PATCH 267/499] bonding: avoid possible dead-lock

Syzkaller reported this on a slightly older kernel but it's still
applicable to the current kernel -

======================================================
WARNING: possible circular locking dependency detected
4.18.0-next-20180823+ #46 Not tainted
------------------------------------------------------
syz-executor4/26841 is trying to acquire lock:
00000000dd41ef48 ((wq_completion)bond_dev->name){+.+.}, at: flush_workqueue+0x2db/0x1e10 kernel/workqueue.c:2652

but task is already holding lock:
00000000768ab431 (rtnl_mutex){+.+.}, at: rtnl_lock net/core/rtnetlink.c:77 [inline]
00000000768ab431 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x412/0xc30 net/core/rtnetlink.c:4708

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 (rtnl_mutex){+.+.}:
       __mutex_lock_common kernel/locking/mutex.c:925 [inline]
       __mutex_lock+0x171/0x1700 kernel/locking/mutex.c:1073
       mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:1088
       rtnl_lock+0x17/0x20 net/core/rtnetlink.c:77
       bond_netdev_notify drivers/net/bonding/bond_main.c:1310 [inline]
       bond_netdev_notify_work+0x44/0xd0 drivers/net/bonding/bond_main.c:1320
       process_one_work+0xc73/0x1aa0 kernel/workqueue.c:2153
       worker_thread+0x189/0x13c0 kernel/workqueue.c:2296
       kthread+0x35a/0x420 kernel/kthread.c:246
       ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415

-> #1 ((work_completion)(&(&nnw->work)->work)){+.+.}:
       process_one_work+0xc0b/0x1aa0 kernel/workqueue.c:2129
       worker_thread+0x189/0x13c0 kernel/workqueue.c:2296
       kthread+0x35a/0x420 kernel/kthread.c:246
       ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415

-> #0 ((wq_completion)bond_dev->name){+.+.}:
       lock_acquire+0x1e4/0x4f0 kernel/locking/lockdep.c:3901
       flush_workqueue+0x30a/0x1e10 kernel/workqueue.c:2655
       drain_workqueue+0x2a9/0x640 kernel/workqueue.c:2820
       destroy_workqueue+0xc6/0x9d0 kernel/workqueue.c:4155
       __alloc_workqueue_key+0xef9/0x1190 kernel/workqueue.c:4138
       bond_init+0x269/0x940 drivers/net/bonding/bond_main.c:4734
       register_netdevice+0x337/0x1100 net/core/dev.c:8410
       bond_newlink+0x49/0xa0 drivers/net/bonding/bond_netlink.c:453
       rtnl_newlink+0xef4/0x1d50 net/core/rtnetlink.c:3099
       rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4711
       netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454
       rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4729
       netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
       netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343
       netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908
       sock_sendmsg_nosec net/socket.c:622 [inline]
       sock_sendmsg+0xd5/0x120 net/socket.c:632
       ___sys_sendmsg+0x7fd/0x930 net/socket.c:2115
       __sys_sendmsg+0x11d/0x290 net/socket.c:2153
       __do_sys_sendmsg net/socket.c:2162 [inline]
       __se_sys_sendmsg net/socket.c:2160 [inline]
       __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2160
       do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
       entry_SYSCALL_64_after_hwframe+0x49/0xbe

other info that might help us debug this:

Chain exists of:
  (wq_completion)bond_dev->name --> (work_completion)(&(&nnw->work)->work) --> rtnl_mutex

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(rtnl_mutex);
                               lock((work_completion)(&(&nnw->work)->work));
                               lock(rtnl_mutex);
  lock((wq_completion)bond_dev->name);

 *** DEADLOCK ***

1 lock held by syz-executor4/26841:

stack backtrace:
CPU: 1 PID: 26841 Comm: syz-executor4 Not tainted 4.18.0-next-20180823+ #46
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113
 print_circular_bug.isra.34.cold.55+0x1bd/0x27d kernel/locking/lockdep.c:1222
 check_prev_add kernel/locking/lockdep.c:1862 [inline]
 check_prevs_add kernel/locking/lockdep.c:1975 [inline]
 validate_chain kernel/locking/lockdep.c:2416 [inline]
 __lock_acquire+0x3449/0x5020 kernel/locking/lockdep.c:3412
 lock_acquire+0x1e4/0x4f0 kernel/locking/lockdep.c:3901
 flush_workqueue+0x30a/0x1e10 kernel/workqueue.c:2655
 drain_workqueue+0x2a9/0x640 kernel/workqueue.c:2820
 destroy_workqueue+0xc6/0x9d0 kernel/workqueue.c:4155
 __alloc_workqueue_key+0xef9/0x1190 kernel/workqueue.c:4138
 bond_init+0x269/0x940 drivers/net/bonding/bond_main.c:4734
 register_netdevice+0x337/0x1100 net/core/dev.c:8410
 bond_newlink+0x49/0xa0 drivers/net/bonding/bond_netlink.c:453
 rtnl_newlink+0xef4/0x1d50 net/core/rtnetlink.c:3099
 rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4711
 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454
 rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4729
 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
 netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343
 netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908
 sock_sendmsg_nosec net/socket.c:622 [inline]
 sock_sendmsg+0xd5/0x120 net/socket.c:632
 ___sys_sendmsg+0x7fd/0x930 net/socket.c:2115
 __sys_sendmsg+0x11d/0x290 net/socket.c:2153
 __do_sys_sendmsg net/socket.c:2162 [inline]
 __se_sys_sendmsg net/socket.c:2160 [inline]
 __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2160
 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x457089
Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007f2df20a5c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007f2df20a66d4 RCX: 0000000000457089
RDX: 0000000000000000 RSI: 0000000020000180 RDI: 0000000000000003
RBP: 0000000000930140 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff
R13: 00000000004d40b8 R14: 00000000004c8ad8 R15: 0000000000000001

Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 43 +++++++++++++--------------------
 include/net/bonding.h           |  7 +-----
 2 files changed, 18 insertions(+), 32 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 8c0a0908875d..c05c01a00755 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -210,6 +210,7 @@ static void bond_get_stats(struct net_device *bond_dev,
 static void bond_slave_arr_handler(struct work_struct *work);
 static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act,
 				  int mod);
+static void bond_netdev_notify_work(struct work_struct *work);
 
 /*---------------------------- General routines -----------------------------*/
 
@@ -1286,6 +1287,8 @@ static struct slave *bond_alloc_slave(struct bonding *bond)
 			return NULL;
 		}
 	}
+	INIT_DELAYED_WORK(&slave->notify_work, bond_netdev_notify_work);
+
 	return slave;
 }
 
@@ -1293,6 +1296,7 @@ static void bond_free_slave(struct slave *slave)
 {
 	struct bonding *bond = bond_get_bond_by_slave(slave);
 
+	cancel_delayed_work_sync(&slave->notify_work);
 	if (BOND_MODE(bond) == BOND_MODE_8023AD)
 		kfree(SLAVE_AD_INFO(slave));
 
@@ -1314,39 +1318,26 @@ static void bond_fill_ifslave(struct slave *slave, struct ifslave *info)
 	info->link_failure_count = slave->link_failure_count;
 }
 
-static void bond_netdev_notify(struct net_device *dev,
-			       struct netdev_bonding_info *info)
-{
-	rtnl_lock();
-	netdev_bonding_info_change(dev, info);
-	rtnl_unlock();
-}
-
 static void bond_netdev_notify_work(struct work_struct *_work)
 {
-	struct netdev_notify_work *w =
-		container_of(_work, struct netdev_notify_work, work.work);
+	struct slave *slave = container_of(_work, struct slave,
+					   notify_work.work);
 
-	bond_netdev_notify(w->dev, &w->bonding_info);
-	dev_put(w->dev);
-	kfree(w);
+	if (rtnl_trylock()) {
+		struct netdev_bonding_info binfo;
+
+		bond_fill_ifslave(slave, &binfo.slave);
+		bond_fill_ifbond(slave->bond, &binfo.master);
+		netdev_bonding_info_change(slave->dev, &binfo);
+		rtnl_unlock();
+	} else {
+		queue_delayed_work(slave->bond->wq, &slave->notify_work, 1);
+	}
 }
 
 void bond_queue_slave_event(struct slave *slave)
 {
-	struct bonding *bond = slave->bond;
-	struct netdev_notify_work *nnw = kzalloc(sizeof(*nnw), GFP_ATOMIC);
-
-	if (!nnw)
-		return;
-
-	dev_hold(slave->dev);
-	nnw->dev = slave->dev;
-	bond_fill_ifslave(slave, &nnw->bonding_info.slave);
-	bond_fill_ifbond(bond, &nnw->bonding_info.master);
-	INIT_DELAYED_WORK(&nnw->work, bond_netdev_notify_work);
-
-	queue_delayed_work(slave->bond->wq, &nnw->work, 0);
+	queue_delayed_work(slave->bond->wq, &slave->notify_work, 0);
 }
 
 void bond_lower_state_changed(struct slave *slave)
diff --git a/include/net/bonding.h b/include/net/bonding.h
index a2d058170ea3..b46d68acf701 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -139,12 +139,6 @@ struct bond_parm_tbl {
 	int mode;
 };
 
-struct netdev_notify_work {
-	struct delayed_work	work;
-	struct net_device	*dev;
-	struct netdev_bonding_info bonding_info;
-};
-
 struct slave {
 	struct net_device *dev; /* first - useful for panic debug */
 	struct bonding *bond; /* our master */
@@ -172,6 +166,7 @@ struct slave {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	struct netpoll *np;
 #endif
+	struct delayed_work notify_work;
 	struct kobject kobj;
 	struct rtnl_link_stats64 slave_stats;
 };

From 1c492a9d55ba99079210ed901dd8a5423f980487 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Mon, 24 Sep 2018 15:17:03 -0700
Subject: [PATCH 268/499] qed: Avoid constant logical operation warning in
 qed_vf_pf_acquire

Clang warns when a constant is used in a boolean context as it thinks a
bitwise operation may have been intended.

drivers/net/ethernet/qlogic/qed/qed_vf.c:415:27: warning: use of logical
'&&' with constant operand [-Wconstant-logical-operand]
        if (!p_iov->b_pre_fp_hsi &&
                                 ^
drivers/net/ethernet/qlogic/qed/qed_vf.c:415:27: note: use '&' for a
bitwise operation
        if (!p_iov->b_pre_fp_hsi &&
                                 ^~
                                 &
drivers/net/ethernet/qlogic/qed/qed_vf.c:415:27: note: remove constant
to silence this warning
        if (!p_iov->b_pre_fp_hsi &&
                                ~^~
1 warning generated.

This has been here since commit 1fe614d10f45 ("qed: Relax VF firmware
requirements") and I am not entirely sure why since 0 isn't a special
case. Just remove the statement causing Clang to warn since it isn't
required.

Link: https://github.com/ClangBuiltLinux/linux/issues/126
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_vf.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c
index fcd8da08274f..be118d057b92 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c
@@ -413,7 +413,6 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn)
 	}
 
 	if (!p_iov->b_pre_fp_hsi &&
-	    ETH_HSI_VER_MINOR &&
 	    (resp->pfdev_info.minor_fp_hsi < ETH_HSI_VER_MINOR)) {
 		DP_INFO(p_hwfn,
 			"PF is using older fastpath HSI; %02x.%02x is configured\n",

From 77f2d753819b7d50c16abfb778caf1fe075faed0 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Mon, 24 Sep 2018 14:42:12 -0700
Subject: [PATCH 269/499] qed: Avoid implicit enum conversion in
 qed_iwarp_parse_rx_pkt

Clang warns when one enumerated type is implicitly converted to another.

drivers/net/ethernet/qlogic/qed/qed_iwarp.c:1713:25: warning: implicit
conversion from enumeration type 'enum tcp_ip_version' to different
enumeration type 'enum qed_tcp_ip_version' [-Wenum-conversion]
                cm_info->ip_version = TCP_IPV4;
                                    ~ ^~~~~~~~
drivers/net/ethernet/qlogic/qed/qed_iwarp.c:1733:25: warning: implicit
conversion from enumeration type 'enum tcp_ip_version' to different
enumeration type 'enum qed_tcp_ip_version' [-Wenum-conversion]
                cm_info->ip_version = TCP_IPV6;
                                    ~ ^~~~~~~~
2 warnings generated.

Use the appropriate values from the expected type, qed_tcp_ip_version:

TCP_IPV4 = QED_TCP_IPV4 = 0
TCP_IPV6 = QED_TCP_IPV6 = 1

Link: https://github.com/ClangBuiltLinux/linux/issues/125
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index 17f3dfa2cc94..e860bdf0f752 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -1710,7 +1710,7 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn,
 
 		cm_info->local_ip[0] = ntohl(iph->daddr);
 		cm_info->remote_ip[0] = ntohl(iph->saddr);
-		cm_info->ip_version = TCP_IPV4;
+		cm_info->ip_version = QED_TCP_IPV4;
 
 		ip_hlen = (iph->ihl) * sizeof(u32);
 		*payload_len = ntohs(iph->tot_len) - ip_hlen;
@@ -1730,7 +1730,7 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn,
 			cm_info->remote_ip[i] =
 			    ntohl(ip6h->saddr.in6_u.u6_addr32[i]);
 		}
-		cm_info->ip_version = TCP_IPV6;
+		cm_info->ip_version = QED_TCP_IPV6;
 
 		ip_hlen = sizeof(*ip6h);
 		*payload_len = ntohs(ip6h->payload_len);

From 3e322474485931e7ea6e4c5560089991a7f03cbc Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Tue, 25 Sep 2018 01:50:00 +0200
Subject: [PATCH 270/499] net: phy: sfp: Fix unregistering of HWMON SFP device

A HWMON device is only registered is the SFP module supports the
diagnostic page and is complient to SFF8472. Don't unconditionally
unregister the hwmon device when the SFP module is remove, otherwise
we access data structures which don't exist.

Reported-by: Florian Fainelli <f.fainelli@gmail.com>
Fixes: 1323061a018a ("net: phy: sfp: Add HWMON support for module sensors")
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/sfp.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 52fffb98fde9..6e13b8832bc7 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -1098,8 +1098,11 @@ static int sfp_hwmon_insert(struct sfp *sfp)
 
 static void sfp_hwmon_remove(struct sfp *sfp)
 {
-	hwmon_device_unregister(sfp->hwmon_dev);
-	kfree(sfp->hwmon_name);
+	if (!IS_ERR_OR_NULL(sfp->hwmon_dev)) {
+		hwmon_device_unregister(sfp->hwmon_dev);
+		sfp->hwmon_dev = NULL;
+		kfree(sfp->hwmon_name);
+	}
 }
 #else
 static int sfp_hwmon_insert(struct sfp *sfp)

From 8fd780698745ba121530c5c20fd237aacde4c371 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 26 Sep 2018 10:35:42 +0800
Subject: [PATCH 271/499] vxlan: fill ttl inherit info

When add vxlan ttl inherit support, I forgot to fill it when dump
vlxan info. Fix it now.

Fixes: 72f6d71e491e6 ("vxlan: add ttl inherit support")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index ababba37d735..2b8da2b7e721 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -3539,6 +3539,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
 		nla_total_size(sizeof(__u32)) +	/* IFLA_VXLAN_LINK */
 		nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_LOCAL{6} */
 		nla_total_size(sizeof(__u8)) +	/* IFLA_VXLAN_TTL */
+		nla_total_size(sizeof(__u8)) +	/* IFLA_VXLAN_TTL_INHERIT */
 		nla_total_size(sizeof(__u8)) +	/* IFLA_VXLAN_TOS */
 		nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */
 		nla_total_size(sizeof(__u8)) +	/* IFLA_VXLAN_LEARNING */
@@ -3603,6 +3604,8 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	}
 
 	if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->cfg.ttl) ||
+	    nla_put_u8(skb, IFLA_VXLAN_TTL_INHERIT,
+		       !!(vxlan->cfg.flags & VXLAN_F_TTL_INHERIT)) ||
 	    nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
 	    nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
 	    nla_put_u8(skb, IFLA_VXLAN_LEARNING,

From 73f21c653f930f438d53eed29b5e4c65c8a0f906 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Wed, 26 Sep 2018 00:41:04 -0400
Subject: [PATCH 272/499] bnxt_en: Fix TX timeout during netpoll.

The current netpoll implementation in the bnxt_en driver has problems
that may miss TX completion events.  bnxt_poll_work() in effect is
only handling at most 1 TX packet before exiting.  In addition,
there may be in flight TX completions that ->poll() may miss even
after we fix bnxt_poll_work() to handle all visible TX completions.
netpoll may not call ->poll() again and HW may not generate IRQ
because the driver does not ARM the IRQ when the budget (0 for netpoll)
is reached.

We fix it by handling all TX completions and to always ARM the IRQ
when we exit ->poll() with 0 budget.

Also, the logic to ACK the completion ring in case it is almost filled
with TX completions need to be adjusted to take care of the 0 budget
case, as discussed with Eric Dumazet <edumazet@google.com>

Reported-by: Song Liu <songliubraving@fb.com>
Reviewed-by: Song Liu <songliubraving@fb.com>
Tested-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 61957b0bbd8c..0478e562abac 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1884,8 +1884,11 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
 		if (TX_CMP_TYPE(txcmp) == CMP_TYPE_TX_L2_CMP) {
 			tx_pkts++;
 			/* return full budget so NAPI will complete. */
-			if (unlikely(tx_pkts > bp->tx_wake_thresh))
+			if (unlikely(tx_pkts > bp->tx_wake_thresh)) {
 				rx_pkts = budget;
+				raw_cons = NEXT_RAW_CMP(raw_cons);
+				break;
+			}
 		} else if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) {
 			if (likely(budget))
 				rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event);
@@ -1913,7 +1916,7 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
 		}
 		raw_cons = NEXT_RAW_CMP(raw_cons);
 
-		if (rx_pkts == budget)
+		if (rx_pkts && rx_pkts == budget)
 			break;
 	}
 
@@ -2027,8 +2030,12 @@ static int bnxt_poll(struct napi_struct *napi, int budget)
 	while (1) {
 		work_done += bnxt_poll_work(bp, bnapi, budget - work_done);
 
-		if (work_done >= budget)
+		if (work_done >= budget) {
+			if (!budget)
+				BNXT_CP_DB_REARM(cpr->cp_doorbell,
+						 cpr->cp_raw_cons);
 			break;
+		}
 
 		if (!bnxt_has_work(bp, cpr)) {
 			if (napi_complete_done(napi, work_done))

From d4ce58082f206bf6e7d697380c7bc5480a8b0264 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= <maze@google.com>
Date: Tue, 25 Sep 2018 21:59:28 -0700
Subject: [PATCH 273/499] net-tcp: /proc/sys/net/ipv4/tcp_probe_interval is a
 u32 not int
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

(fix documentation and sysctl access to treat it as such)

Tested:
  # zcat /proc/config.gz | egrep ^CONFIG_HZ
  CONFIG_HZ_1000=y
  CONFIG_HZ=1000
  # echo $[(1<<32)/1000 + 1] | tee /proc/sys/net/ipv4/tcp_probe_interval
  4294968
  tee: /proc/sys/net/ipv4/tcp_probe_interval: Invalid argument
  # echo $[(1<<32)/1000] | tee /proc/sys/net/ipv4/tcp_probe_interval
  4294967
  # echo 0 | tee /proc/sys/net/ipv4/tcp_probe_interval
  # echo -1 | tee /proc/sys/net/ipv4/tcp_probe_interval
  -1
  tee: /proc/sys/net/ipv4/tcp_probe_interval: Invalid argument

Signed-off-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 2 +-
 net/ipv4/sysctl_net_ipv4.c             | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 8313a636dd53..960de8fe3f40 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -425,7 +425,7 @@ tcp_mtu_probing - INTEGER
 	  1 - Disabled by default, enabled when an ICMP black hole detected
 	  2 - Always enabled, use initial MSS of tcp_base_mss.
 
-tcp_probe_interval - INTEGER
+tcp_probe_interval - UNSIGNED INTEGER
 	Controls how often to start TCP Packetization-Layer Path MTU
 	Discovery reprobe. The default is reprobing every 10 minutes as
 	per RFC4821.
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index b92f422f2fa8..891ed2f91467 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -48,6 +48,7 @@ static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
 static int ip_ping_group_range_min[] = { 0, 0 };
 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
 static int comp_sack_nr_max = 255;
+static u32 u32_max_div_HZ = UINT_MAX / HZ;
 
 /* obsolete */
 static int sysctl_tcp_low_latency __read_mostly;
@@ -745,9 +746,10 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname	= "tcp_probe_interval",
 		.data		= &init_net.ipv4.sysctl_tcp_probe_interval,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u32),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_douintvec_minmax,
+		.extra2		= &u32_max_div_HZ,
 	},
 	{
 		.procname	= "igmp_link_local_mcast_reports",

From 1222a16014888ed9733c11e221730d4a8196222b Mon Sep 17 00:00:00 2001
From: Masashi Honma <masashi.honma@gmail.com>
Date: Tue, 25 Sep 2018 11:15:01 +0900
Subject: [PATCH 274/499] nl80211: Fix possible Spectre-v1 for CQM RSSI
 thresholds

Use array_index_nospec() to sanitize i with respect to speculation.

Note that the user doesn't control i directly, but can make it out
of bounds by not finding a threshold in the array.

Signed-off-by: Masashi Honma <masashi.honma@gmail.com>
[add note about user control, as explained by Masashi]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index bd26230de63e..176edfefcbaa 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -10231,7 +10231,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	s32 last, low, high;
 	u32 hyst;
-	int i, n;
+	int i, n, low_index;
 	int err;
 
 	/* RSSI reporting disabled? */
@@ -10268,10 +10268,19 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
 		if (last < wdev->cqm_config->rssi_thresholds[i])
 			break;
 
-	low = i > 0 ?
-		(wdev->cqm_config->rssi_thresholds[i - 1] - hyst) : S32_MIN;
-	high = i < n ?
-		(wdev->cqm_config->rssi_thresholds[i] + hyst - 1) : S32_MAX;
+	low_index = i - 1;
+	if (low_index >= 0) {
+		low_index = array_index_nospec(low_index, n);
+		low = wdev->cqm_config->rssi_thresholds[low_index] - hyst;
+	} else {
+		low = S32_MIN;
+	}
+	if (i < n) {
+		i = array_index_nospec(i, n);
+		high = wdev->cqm_config->rssi_thresholds[i] + hyst - 1;
+	} else {
+		high = S32_MAX;
+	}
 
 	return rdev_set_cqm_rssi_range_config(rdev, dev, low, high);
 }

From 52bf4a900d9cede3eb14982d0f2c5e6db6d97cc3 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 25 Apr 2018 12:14:39 +0200
Subject: [PATCH 275/499] clocksource/drivers/timer-atmel-pit: Properly handle
 error cases

The smatch utility reports a possible leak:

smatch warnings:
drivers/clocksource/timer-atmel-pit.c:183 at91sam926x_pit_dt_init() warn: possible memory leak of 'data'

Ensure data is freed before exiting with an error.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/timer-atmel-pit.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c
index ec8a4376f74f..2fab18fae4fc 100644
--- a/drivers/clocksource/timer-atmel-pit.c
+++ b/drivers/clocksource/timer-atmel-pit.c
@@ -180,26 +180,29 @@ static int __init at91sam926x_pit_dt_init(struct device_node *node)
 	data->base = of_iomap(node, 0);
 	if (!data->base) {
 		pr_err("Could not map PIT address\n");
-		return -ENXIO;
+		ret = -ENXIO;
+		goto exit;
 	}
 
 	data->mck = of_clk_get(node, 0);
 	if (IS_ERR(data->mck)) {
 		pr_err("Unable to get mck clk\n");
-		return PTR_ERR(data->mck);
+		ret = PTR_ERR(data->mck);
+		goto exit;
 	}
 
 	ret = clk_prepare_enable(data->mck);
 	if (ret) {
 		pr_err("Unable to enable mck\n");
-		return ret;
+		goto exit;
 	}
 
 	/* Get the interrupts property */
 	data->irq = irq_of_parse_and_map(node, 0);
 	if (!data->irq) {
 		pr_err("Unable to get IRQ from DT\n");
-		return -EINVAL;
+		ret = -EINVAL;
+		goto exit;
 	}
 
 	/*
@@ -227,7 +230,7 @@ static int __init at91sam926x_pit_dt_init(struct device_node *node)
 	ret = clocksource_register_hz(&data->clksrc, pit_rate);
 	if (ret) {
 		pr_err("Failed to register clocksource\n");
-		return ret;
+		goto exit;
 	}
 
 	/* Set up irq handler */
@@ -236,7 +239,8 @@ static int __init at91sam926x_pit_dt_init(struct device_node *node)
 			  "at91_tick", data);
 	if (ret) {
 		pr_err("Unable to setup IRQ\n");
-		return ret;
+		clocksource_unregister(&data->clksrc);
+		goto exit;
 	}
 
 	/* Set up and register clockevents */
@@ -254,6 +258,10 @@ static int __init at91sam926x_pit_dt_init(struct device_node *node)
 	clockevents_register_device(&data->clkevt);
 
 	return 0;
+
+exit:
+	kfree(data);
+	return ret;
 }
 TIMER_OF_DECLARE(at91sam926x_pit, "atmel,at91sam9260-pit",
 		       at91sam926x_pit_dt_init);

From 659902db7008f233e3d514c774b5ff3d46c8edd8 Mon Sep 17 00:00:00 2001
From: zhong jiang <zhongjiang@huawei.com>
Date: Fri, 14 Sep 2018 13:04:37 +0800
Subject: [PATCH 276/499] ieee802154: ca8210: remove redundant condition check
 before debugfs_remove

debugfs_remove has taken the IS_ERR into account. Just
remove the unnecessary condition.

Signed-off-by: zhong jiang <zhongjiang@huawei.com>
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 drivers/net/ieee802154/ca8210.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c
index e21279dde85c..0ff5a403a8dc 100644
--- a/drivers/net/ieee802154/ca8210.c
+++ b/drivers/net/ieee802154/ca8210.c
@@ -3043,8 +3043,7 @@ static void ca8210_test_interface_clear(struct ca8210_priv *priv)
 {
 	struct ca8210_test *test = &priv->test;
 
-	if (!IS_ERR(test->ca8210_dfs_spi_int))
-		debugfs_remove(test->ca8210_dfs_spi_int);
+	debugfs_remove(test->ca8210_dfs_spi_int);
 	kfifo_free(&test->up_fifo);
 	dev_info(&priv->spi->dev, "Test interface removed\n");
 }

From 092ffc51fb3f9b8369e737c9320bf0bffb2c898f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 27 Sep 2018 15:13:07 +0100
Subject: [PATCH 277/499] rxrpc: Remove dup code from
 rxrpc_find_connection_rcu()

rxrpc_find_connection_rcu() initialises variable k twice with the same
information.  Remove one of the initialisations.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 net/rxrpc/conn_object.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 77440a356b14..1746b48cb165 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -85,9 +85,6 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
 	if (rxrpc_extract_addr_from_skb(local, &srx, skb) < 0)
 		goto not_found;
 
-	k.epoch	= sp->hdr.epoch;
-	k.cid	= sp->hdr.cid & RXRPC_CIDMASK;
-
 	/* We may have to handle mixing IPv4 and IPv6 */
 	if (srx.transport.family != local->srx.transport.family) {
 		pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n",

From f151ba989d149bbdfc90e5405724bbea094f9b17 Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Sat, 22 Sep 2018 15:55:49 -0400
Subject: [PATCH 278/499] xen/blkfront: When purging persistent grants, keep
 them in the buffer

Commit a46b53672b2c ("xen/blkfront: cleanup stale persistent grants")
added support for purging persistent grants when they are not in use. As
part of the purge, the grants were removed from the grant buffer, This
eventually causes the buffer to become empty, with BUG_ON triggered in
get_free_grant(). This can be observed even on an idle system, within
20-30 minutes.

We should keep the grants in the buffer when purging, and only free the
grant ref.

Fixes: a46b53672b2c ("xen/blkfront: cleanup stale persistent grants")
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/xen-blkfront.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index a71d817e900d..3b441fe69c0d 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2667,11 +2667,9 @@ static void purge_persistent_grants(struct blkfront_info *info)
 			    gnttab_query_foreign_access(gnt_list_entry->gref))
 				continue;
 
-			list_del(&gnt_list_entry->node);
 			gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL);
+			gnt_list_entry->gref = GRANT_INVALID_REF;
 			rinfo->persistent_gnts_c--;
-			__free_page(gnt_list_entry->page);
-			kfree(gnt_list_entry);
 		}
 
 		spin_unlock_irqrestore(&rinfo->ring_lock, flags);

From d6a77ba0eb92d8ffa4b05a442fc20d0a9b11c4c4 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 27 Sep 2018 14:41:30 +0200
Subject: [PATCH 279/499] Revert "drm/panel: Add device_link from panel device
 to DRM device"

This reverts commit 0c08754b59da5557532d946599854e6df28edc22.

commit 0c08754b59da
("drm/panel: Add device_link from panel device to DRM device")
creates a circular dependency under these circumstances:

1. The panel depends on dsi-host because it is MIPI-DSI child
   device.
2. dsi-host depends on the drm parent device (connector->dev->dev)
   this should be allowed.
3. drm parent dev (connector->dev->dev) depends on the panel
   after this patch.

This makes the dependency circular and while it appears it
does not affect any in-tree drivers (they do not seem to have
dsi hosts depending on the same parent device) this does not
seem right.

As noted in a response from Andrzej Hajda, the intent is
likely to make the panel dependent on the DRM device
(connector->dev) not its parent. But we have no way of
doing that since the DRM device doesn't contain any
struct device on its own (arguably it should).

Revert this until a proper approach is figured out.

Cc: Jyri Sarha <jsarha@ti.com>
Cc: Eric Anholt <eric@anholt.net>
Cc: Andrzej Hajda <a.hajda@samsung.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20180927124130.9102-1-linus.walleij@linaro.org
---
 drivers/gpu/drm/drm_panel.c | 10 ----------
 include/drm/drm_panel.h     |  1 -
 2 files changed, 11 deletions(-)

diff --git a/drivers/gpu/drm/drm_panel.c b/drivers/gpu/drm/drm_panel.c
index b902361dee6e..1d9a9d2fe0e0 100644
--- a/drivers/gpu/drm/drm_panel.c
+++ b/drivers/gpu/drm/drm_panel.c
@@ -24,7 +24,6 @@
 #include <linux/err.h>
 #include <linux/module.h>
 
-#include <drm/drm_device.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_panel.h>
 
@@ -105,13 +104,6 @@ int drm_panel_attach(struct drm_panel *panel, struct drm_connector *connector)
 	if (panel->connector)
 		return -EBUSY;
 
-	panel->link = device_link_add(connector->dev->dev, panel->dev, 0);
-	if (!panel->link) {
-		dev_err(panel->dev, "failed to link panel to %s\n",
-			dev_name(connector->dev->dev));
-		return -EINVAL;
-	}
-
 	panel->connector = connector;
 	panel->drm = connector->dev;
 
@@ -133,8 +125,6 @@ EXPORT_SYMBOL(drm_panel_attach);
  */
 int drm_panel_detach(struct drm_panel *panel)
 {
-	device_link_del(panel->link);
-
 	panel->connector = NULL;
 	panel->drm = NULL;
 
diff --git a/include/drm/drm_panel.h b/include/drm/drm_panel.h
index 582a0ec0aa70..777814755fa6 100644
--- a/include/drm/drm_panel.h
+++ b/include/drm/drm_panel.h
@@ -89,7 +89,6 @@ struct drm_panel {
 	struct drm_device *drm;
 	struct drm_connector *connector;
 	struct device *dev;
-	struct device_link *link;
 
 	const struct drm_panel_funcs *funcs;
 

From 61ea6f5831974ebd1a57baffd7cc30600a2e26fc Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Thu, 27 Sep 2018 20:48:39 +0800
Subject: [PATCH 280/499] drm/amdgpu: Fix vce work queue was not cancelled when
 suspend
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The vce cancel_delayed_work_sync never be called.
driver call the function in error path.

This caused the A+A suspend hang when runtime pm enebled.
As we will visit the smu in the idle queue. this will cause
smu hang because the dgpu has been suspend, and the dgpu also
will be waked up. As the smu has been hang, so the dgpu resume
will failed.

Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Feifei Xu <Feifei.Xu@amd.com>
Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 0cc5190f4f36..5f3f54073818 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -258,6 +258,8 @@ int amdgpu_vce_suspend(struct amdgpu_device *adev)
 {
 	int i;
 
+	cancel_delayed_work_sync(&adev->vce.idle_work);
+
 	if (adev->vce.vcpu_bo == NULL)
 		return 0;
 
@@ -268,7 +270,6 @@ int amdgpu_vce_suspend(struct amdgpu_device *adev)
 	if (i == AMDGPU_MAX_VCE_HANDLES)
 		return 0;
 
-	cancel_delayed_work_sync(&adev->vce.idle_work);
 	/* TODO: suspending running encoding sessions isn't supported */
 	return -EINVAL;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index fd654a4406db..400fc74bbae2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -153,11 +153,11 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev)
 	unsigned size;
 	void *ptr;
 
+	cancel_delayed_work_sync(&adev->vcn.idle_work);
+
 	if (adev->vcn.vcpu_bo == NULL)
 		return 0;
 
-	cancel_delayed_work_sync(&adev->vcn.idle_work);
-
 	size = amdgpu_bo_size(adev->vcn.vcpu_bo);
 	ptr = adev->vcn.cpu_addr;
 

From 599760d6d0abbab71f9726b49858d2ec45e74c0a Mon Sep 17 00:00:00 2001
From: Roman Li <Roman.Li@amd.com>
Date: Wed, 26 Sep 2018 13:42:16 -0400
Subject: [PATCH 281/499] drm/amd/display: Fix Vega10 lightup on S3 resume

[Why]
There have been a few reports of Vega10 display remaining blank
after S3 resume. The regression is caused by workaround for mode
change on Vega10 - skip set_bandwidth if stream count is 0.
As a result we skipped dispclk reset on suspend, thus on resume
we may skip the clock update assuming it hasn't been changed.
On some systems it causes display blank or 'out of range'.

[How]
Revert "drm/amd/display: Fix Vega10 black screen after mode change"
Verified that it hadn't cause mode change regression.

Signed-off-by: Roman Li <Roman.Li@amd.com>
Reviewed-by: Sun peng Li <Sunpeng.Li@amd.com>
Acked-by: Leo Li <sunpeng.li@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/display/dc/dce110/dce110_hw_sequencer.c  |  2 +-
 .../drm/amd/display/dc/dce110/dce110_hw_sequencer.h  |  5 -----
 .../drm/amd/display/dc/dce120/dce120_hw_sequencer.c  | 12 ------------
 3 files changed, 1 insertion(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 14384d9675a8..b2f308766a9e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -2560,7 +2560,7 @@ static void pplib_apply_display_requirements(
 	dc->prev_display_config = *pp_display_cfg;
 }
 
-void dce110_set_bandwidth(
+static void dce110_set_bandwidth(
 		struct dc *dc,
 		struct dc_state *context,
 		bool decrease_allowed)
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h
index e4c5db75c4c6..d6db3dbd9015 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h
@@ -68,11 +68,6 @@ void dce110_fill_display_configs(
 	const struct dc_state *context,
 	struct dm_pp_display_configuration *pp_display_cfg);
 
-void dce110_set_bandwidth(
-		struct dc *dc,
-		struct dc_state *context,
-		bool decrease_allowed);
-
 uint32_t dce110_get_min_vblank_time_us(const struct dc_state *context);
 
 void dp_receiver_power_ctrl(struct dc_link *link, bool on);
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.c
index 5853522a6182..eb0f5f9a973b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_hw_sequencer.c
@@ -244,17 +244,6 @@ static void dce120_update_dchub(
 	dh_data->dchub_info_valid = false;
 }
 
-static void dce120_set_bandwidth(
-		struct dc *dc,
-		struct dc_state *context,
-		bool decrease_allowed)
-{
-	if (context->stream_count <= 0)
-		return;
-
-	dce110_set_bandwidth(dc, context, decrease_allowed);
-}
-
 void dce120_hw_sequencer_construct(struct dc *dc)
 {
 	/* All registers used by dce11.2 match those in dce11 in offset and
@@ -263,6 +252,5 @@ void dce120_hw_sequencer_construct(struct dc *dc)
 	dce110_hw_sequencer_construct(dc);
 	dc->hwss.enable_display_power_gating = dce120_enable_display_power_gating;
 	dc->hwss.update_dchub = dce120_update_dchub;
-	dc->hwss.set_bandwidth = dce120_set_bandwidth;
 }
 

From fbbdadf2faf17cd88e9c447701495540377c5743 Mon Sep 17 00:00:00 2001
From: Bhawanpreet Lakha <Bhawanpreet.Lakha@amd.com>
Date: Wed, 26 Sep 2018 13:42:10 -0400
Subject: [PATCH 282/499] drm/amd/display: Fix Edid emulation for linux

[Why]
EDID emulation didn't work properly for linux, as we stop programming
if nothing is connected physically.

[How]
We get a flag from DRM when we want to do edid emulation. We check if
this flag is true and nothing is connected physically, if so we only
program the front end using VIRTUAL_SIGNAL.

Signed-off-by: Bhawanpreet Lakha <Bhawanpreet.Lakha@amd.com>
Reviewed-by: Harry Wentland <Harry.Wentland@amd.com>
Acked-by: Leo Li <sunpeng.li@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 139 +++++++++++++++++-
 drivers/gpu/drm/amd/display/dc/core/dc_link.c |   4 +-
 drivers/gpu/drm/amd/display/dc/dc_link.h      |   1 +
 3 files changed, 137 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 800f481a6995..96875950845a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -641,6 +641,87 @@ amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state,
 	return NULL;
 }
 
+static void emulated_link_detect(struct dc_link *link)
+{
+	struct dc_sink_init_data sink_init_data = { 0 };
+	struct display_sink_capability sink_caps = { 0 };
+	enum dc_edid_status edid_status;
+	struct dc_context *dc_ctx = link->ctx;
+	struct dc_sink *sink = NULL;
+	struct dc_sink *prev_sink = NULL;
+
+	link->type = dc_connection_none;
+	prev_sink = link->local_sink;
+
+	if (prev_sink != NULL)
+		dc_sink_retain(prev_sink);
+
+	switch (link->connector_signal) {
+	case SIGNAL_TYPE_HDMI_TYPE_A: {
+		sink_caps.transaction_type = DDC_TRANSACTION_TYPE_I2C;
+		sink_caps.signal = SIGNAL_TYPE_HDMI_TYPE_A;
+		break;
+	}
+
+	case SIGNAL_TYPE_DVI_SINGLE_LINK: {
+		sink_caps.transaction_type = DDC_TRANSACTION_TYPE_I2C;
+		sink_caps.signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
+		break;
+	}
+
+	case SIGNAL_TYPE_DVI_DUAL_LINK: {
+		sink_caps.transaction_type = DDC_TRANSACTION_TYPE_I2C;
+		sink_caps.signal = SIGNAL_TYPE_DVI_DUAL_LINK;
+		break;
+	}
+
+	case SIGNAL_TYPE_LVDS: {
+		sink_caps.transaction_type = DDC_TRANSACTION_TYPE_I2C;
+		sink_caps.signal = SIGNAL_TYPE_LVDS;
+		break;
+	}
+
+	case SIGNAL_TYPE_EDP: {
+		sink_caps.transaction_type =
+			DDC_TRANSACTION_TYPE_I2C_OVER_AUX;
+		sink_caps.signal = SIGNAL_TYPE_EDP;
+		break;
+	}
+
+	case SIGNAL_TYPE_DISPLAY_PORT: {
+		sink_caps.transaction_type =
+			DDC_TRANSACTION_TYPE_I2C_OVER_AUX;
+		sink_caps.signal = SIGNAL_TYPE_VIRTUAL;
+		break;
+	}
+
+	default:
+		DC_ERROR("Invalid connector type! signal:%d\n",
+			link->connector_signal);
+		return;
+	}
+
+	sink_init_data.link = link;
+	sink_init_data.sink_signal = sink_caps.signal;
+
+	sink = dc_sink_create(&sink_init_data);
+	if (!sink) {
+		DC_ERROR("Failed to create sink!\n");
+		return;
+	}
+
+	link->local_sink = sink;
+
+	edid_status = dm_helpers_read_local_edid(
+			link->ctx,
+			link,
+			sink);
+
+	if (edid_status != EDID_OK)
+		DC_ERROR("Failed to read EDID");
+
+}
+
 static int dm_resume(void *handle)
 {
 	struct amdgpu_device *adev = handle;
@@ -654,6 +735,7 @@ static int dm_resume(void *handle)
 	struct drm_plane *plane;
 	struct drm_plane_state *new_plane_state;
 	struct dm_plane_state *dm_new_plane_state;
+	enum dc_connection_type new_connection_type = dc_connection_none;
 	int ret;
 	int i;
 
@@ -684,7 +766,13 @@ static int dm_resume(void *handle)
 			continue;
 
 		mutex_lock(&aconnector->hpd_lock);
-		dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD);
+		if (!dc_link_detect_sink(aconnector->dc_link, &new_connection_type))
+			DRM_ERROR("KMS: Failed to detect connector\n");
+
+		if (aconnector->base.force && new_connection_type == dc_connection_none)
+			emulated_link_detect(aconnector->dc_link);
+		else
+			dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD);
 
 		if (aconnector->fake_enable && aconnector->dc_link->local_sink)
 			aconnector->fake_enable = false;
@@ -922,6 +1010,7 @@ static void handle_hpd_irq(void *param)
 	struct amdgpu_dm_connector *aconnector = (struct amdgpu_dm_connector *)param;
 	struct drm_connector *connector = &aconnector->base;
 	struct drm_device *dev = connector->dev;
+	enum dc_connection_type new_connection_type = dc_connection_none;
 
 	/* In case of failure or MST no need to update connector status or notify the OS
 	 * since (for MST case) MST does this in it's own context.
@@ -931,7 +1020,21 @@ static void handle_hpd_irq(void *param)
 	if (aconnector->fake_enable)
 		aconnector->fake_enable = false;
 
-	if (dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD)) {
+	if (!dc_link_detect_sink(aconnector->dc_link, &new_connection_type))
+		DRM_ERROR("KMS: Failed to detect connector\n");
+
+	if (aconnector->base.force && new_connection_type == dc_connection_none) {
+		emulated_link_detect(aconnector->dc_link);
+
+
+		drm_modeset_lock_all(dev);
+		dm_restore_drm_connector_state(dev, connector);
+		drm_modeset_unlock_all(dev);
+
+		if (aconnector->base.force == DRM_FORCE_UNSPECIFIED)
+			drm_kms_helper_hotplug_event(dev);
+
+	} else if (dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD)) {
 		amdgpu_dm_update_connector_after_detect(aconnector);
 
 
@@ -1031,6 +1134,7 @@ static void handle_hpd_rx_irq(void *param)
 	struct drm_device *dev = connector->dev;
 	struct dc_link *dc_link = aconnector->dc_link;
 	bool is_mst_root_connector = aconnector->mst_mgr.mst_state;
+	enum dc_connection_type new_connection_type = dc_connection_none;
 
 	/* TODO:Temporary add mutex to protect hpd interrupt not have a gpio
 	 * conflict, after implement i2c helper, this mutex should be
@@ -1042,7 +1146,24 @@ static void handle_hpd_rx_irq(void *param)
 	if (dc_link_handle_hpd_rx_irq(dc_link, NULL, NULL) &&
 			!is_mst_root_connector) {
 		/* Downstream Port status changed. */
-		if (dc_link_detect(dc_link, DETECT_REASON_HPDRX)) {
+		if (!dc_link_detect_sink(dc_link, &new_connection_type))
+			DRM_ERROR("KMS: Failed to detect connector\n");
+
+		if (aconnector->base.force && new_connection_type == dc_connection_none) {
+			emulated_link_detect(dc_link);
+
+			if (aconnector->fake_enable)
+				aconnector->fake_enable = false;
+
+			amdgpu_dm_update_connector_after_detect(aconnector);
+
+
+			drm_modeset_lock_all(dev);
+			dm_restore_drm_connector_state(dev, connector);
+			drm_modeset_unlock_all(dev);
+
+			drm_kms_helper_hotplug_event(dev);
+		} else if (dc_link_detect(dc_link, DETECT_REASON_HPDRX)) {
 
 			if (aconnector->fake_enable)
 				aconnector->fake_enable = false;
@@ -1433,6 +1554,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 	struct amdgpu_mode_info *mode_info = &adev->mode_info;
 	uint32_t link_cnt;
 	int32_t total_overlay_planes, total_primary_planes;
+	enum dc_connection_type new_connection_type = dc_connection_none;
 
 	link_cnt = dm->dc->caps.max_links;
 	if (amdgpu_dm_mode_config_init(dm->adev)) {
@@ -1499,7 +1621,14 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 
 		link = dc_get_link_at_index(dm->dc, i);
 
-		if (dc_link_detect(link, DETECT_REASON_BOOT)) {
+		if (!dc_link_detect_sink(link, &new_connection_type))
+			DRM_ERROR("KMS: Failed to detect connector\n");
+
+		if (aconnector->base.force && new_connection_type == dc_connection_none) {
+			emulated_link_detect(link);
+			amdgpu_dm_update_connector_after_detect(aconnector);
+
+		} else if (dc_link_detect(link, DETECT_REASON_BOOT)) {
 			amdgpu_dm_update_connector_after_detect(aconnector);
 			register_backlight_device(dm, link);
 		}
@@ -2494,7 +2623,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 	if (dm_state && dm_state->freesync_capable)
 		stream->ignore_msa_timing_param = true;
 finish:
-	if (sink && sink->sink_signal == SIGNAL_TYPE_VIRTUAL)
+	if (sink && sink->sink_signal == SIGNAL_TYPE_VIRTUAL && aconnector->base.force != DRM_FORCE_ON)
 		dc_sink_release(sink);
 
 	return stream;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 37eaf72ace54..fced3c1c2ef5 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -195,7 +195,7 @@ static bool program_hpd_filter(
 	return result;
 }
 
-static bool detect_sink(struct dc_link *link, enum dc_connection_type *type)
+bool dc_link_detect_sink(struct dc_link *link, enum dc_connection_type *type)
 {
 	uint32_t is_hpd_high = 0;
 	struct gpio *hpd_pin;
@@ -604,7 +604,7 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
 	if (link->connector_signal == SIGNAL_TYPE_VIRTUAL)
 		return false;
 
-	if (false == detect_sink(link, &new_connection_type)) {
+	if (false == dc_link_detect_sink(link, &new_connection_type)) {
 		BREAK_TO_DEBUGGER();
 		return false;
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h
index d43cefbc43d3..1b48ab9aea89 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_link.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_link.h
@@ -215,6 +215,7 @@ void dc_link_enable_hpd_filter(struct dc_link *link, bool enable);
 
 bool dc_link_is_dp_sink_present(struct dc_link *link);
 
+bool dc_link_detect_sink(struct dc_link *link, enum dc_connection_type *type);
 /*
  * DPCD access interfaces
  */

From d6d1cd2578c4da0764ad334e3411c1c1b1557f58 Mon Sep 17 00:00:00 2001
From: Xue Liu <liuxuenetmail@gmail.com>
Date: Fri, 31 Aug 2018 23:46:41 +0200
Subject: [PATCH 283/499] ieee802154: mcr20a: Replace magic number with
 constants

The combination of defined constants are used to present the
state of IRQ so the magic numbers has been replaced.

This is a simple coding style change which should have no impact on
runtime code execution.

Signed-off-by: Xue Liu <liuxuenetmail@gmail.com>
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 drivers/net/ieee802154/mcr20a.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c
index e428277781ac..04891429a554 100644
--- a/drivers/net/ieee802154/mcr20a.c
+++ b/drivers/net/ieee802154/mcr20a.c
@@ -903,19 +903,19 @@ mcr20a_irq_clean_complete(void *context)
 
 	switch (seq_state) {
 	/* TX IRQ, RX IRQ and SEQ IRQ */
-	case (0x03):
+	case (DAR_IRQSTS1_TXIRQ | DAR_IRQSTS1_SEQIRQ):
 		if (lp->is_tx) {
 			lp->is_tx = 0;
 			dev_dbg(printdev(lp), "TX is done. No ACK\n");
 			mcr20a_handle_tx_complete(lp);
 		}
 		break;
-	case (0x05):
+	case (DAR_IRQSTS1_RXIRQ | DAR_IRQSTS1_SEQIRQ):
 			/* rx is starting */
 			dev_dbg(printdev(lp), "RX is starting\n");
 			mcr20a_handle_rx(lp);
 		break;
-	case (0x07):
+	case (DAR_IRQSTS1_RXIRQ | DAR_IRQSTS1_TXIRQ | DAR_IRQSTS1_SEQIRQ):
 		if (lp->is_tx) {
 			/* tx is done */
 			lp->is_tx = 0;
@@ -927,7 +927,7 @@ mcr20a_irq_clean_complete(void *context)
 			mcr20a_handle_rx(lp);
 		}
 		break;
-	case (0x01):
+	case (DAR_IRQSTS1_SEQIRQ):
 		if (lp->is_tx) {
 			dev_dbg(printdev(lp), "TX is starting\n");
 			mcr20a_handle_tx(lp);

From 0f843e65d9eef4936929bb036c5f771fb261eea4 Mon Sep 17 00:00:00 2001
From: Guoju Fang <fangguoju@gmail.com>
Date: Thu, 27 Sep 2018 23:41:46 +0800
Subject: [PATCH 284/499] bcache: add separate workqueue for journal_write to
 avoid deadlock

After write SSD completed, bcache schedules journal_write work to
system_wq, which is a public workqueue in system, without WQ_MEM_RECLAIM
flag. system_wq is also a bound wq, and there may be no idle kworker on
current processor. Creating a new kworker may unfortunately need to
reclaim memory first, by shrinking cache and slab used by vfs, which
depends on bcache device. That's a deadlock.

This patch create a new workqueue for journal_write with WQ_MEM_RECLAIM
flag. It's rescuer thread will work to avoid the deadlock.

Signed-off-by: Guoju Fang <fangguoju@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Coly Li <colyli@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/bcache/bcache.h  | 1 +
 drivers/md/bcache/journal.c | 6 +++---
 drivers/md/bcache/super.c   | 8 ++++++++
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 83504dd8100a..954dad29e6e8 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -965,6 +965,7 @@ void bch_prio_write(struct cache *ca);
 void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent);
 
 extern struct workqueue_struct *bcache_wq;
+extern struct workqueue_struct *bch_journal_wq;
 extern struct mutex bch_register_lock;
 extern struct list_head bch_cache_sets;
 
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 6116bbf870d8..522c7426f3a0 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -485,7 +485,7 @@ static void do_journal_discard(struct cache *ca)
 
 		closure_get(&ca->set->cl);
 		INIT_WORK(&ja->discard_work, journal_discard_work);
-		schedule_work(&ja->discard_work);
+		queue_work(bch_journal_wq, &ja->discard_work);
 	}
 }
 
@@ -592,7 +592,7 @@ static void journal_write_done(struct closure *cl)
 		: &j->w[0];
 
 	__closure_wake_up(&w->wait);
-	continue_at_nobarrier(cl, journal_write, system_wq);
+	continue_at_nobarrier(cl, journal_write, bch_journal_wq);
 }
 
 static void journal_write_unlock(struct closure *cl)
@@ -627,7 +627,7 @@ static void journal_write_unlocked(struct closure *cl)
 		spin_unlock(&c->journal.lock);
 
 		btree_flush_write(c);
-		continue_at(cl, journal_write, system_wq);
+		continue_at(cl, journal_write, bch_journal_wq);
 		return;
 	}
 
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 94c756c66bd7..30ba9aeb5ee8 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -47,6 +47,7 @@ static int bcache_major;
 static DEFINE_IDA(bcache_device_idx);
 static wait_queue_head_t unregister_wait;
 struct workqueue_struct *bcache_wq;
+struct workqueue_struct *bch_journal_wq;
 
 #define BTREE_MAX_PAGES		(256 * 1024 / PAGE_SIZE)
 /* limitation of partitions number on single bcache device */
@@ -2341,6 +2342,9 @@ static void bcache_exit(void)
 		kobject_put(bcache_kobj);
 	if (bcache_wq)
 		destroy_workqueue(bcache_wq);
+	if (bch_journal_wq)
+		destroy_workqueue(bch_journal_wq);
+
 	if (bcache_major)
 		unregister_blkdev(bcache_major, "bcache");
 	unregister_reboot_notifier(&reboot);
@@ -2370,6 +2374,10 @@ static int __init bcache_init(void)
 	if (!bcache_wq)
 		goto err;
 
+	bch_journal_wq = alloc_workqueue("bch_journal", WQ_MEM_RECLAIM, 0);
+	if (!bch_journal_wq)
+		goto err;
+
 	bcache_kobj = kobject_create_and_add("bcache", fs_kobj);
 	if (!bcache_kobj)
 		goto err;

From bdec8d7fa55e6f5314ed72e5a0b435d90ff90548 Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong@redhat.com>
Date: Thu, 27 Sep 2018 20:38:45 +0800
Subject: [PATCH 285/499] x86/boot: Fix kexec booting failure in the SEV bit
 detection code

Commit

  1958b5fc4010 ("x86/boot: Add early boot support when running with SEV active")

can occasionally cause system resets when kexec-ing a second kernel even
if SEV is not active.

That's because get_sev_encryption_bit() uses 32-bit rIP-relative
addressing to read the value of enc_bit - a variable which caches a
previously detected encryption bit position - but kexec may allocate
the early boot code to a higher location, beyond the 32-bit addressing
limit.

In this case, garbage will be read and get_sev_encryption_bit() will
return the wrong value, leading to accessing memory with the wrong
encryption setting.

Therefore, remove enc_bit, and thus get rid of the need to do 32-bit
rIP-relative addressing in the first place.

 [ bp: massage commit message heavily. ]

Fixes: 1958b5fc4010 ("x86/boot: Add early boot support when running with SEV active")
Suggested-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Kairui Song <kasong@redhat.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Cc: linux-kernel@vger.kernel.org
Cc: tglx@linutronix.de
Cc: mingo@redhat.com
Cc: hpa@zytor.com
Cc: brijesh.singh@amd.com
Cc: kexec@lists.infradead.org
Cc: dyoung@redhat.com
Cc: bhe@redhat.com
Cc: ghook@redhat.com
Link: https://lkml.kernel.org/r/20180927123845.32052-1-kasong@redhat.com
---
 arch/x86/boot/compressed/mem_encrypt.S | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S
index eaa843a52907..a480356e0ed8 100644
--- a/arch/x86/boot/compressed/mem_encrypt.S
+++ b/arch/x86/boot/compressed/mem_encrypt.S
@@ -25,20 +25,6 @@ ENTRY(get_sev_encryption_bit)
 	push	%ebx
 	push	%ecx
 	push	%edx
-	push	%edi
-
-	/*
-	 * RIP-relative addressing is needed to access the encryption bit
-	 * variable. Since we are running in 32-bit mode we need this call/pop
-	 * sequence to get the proper relative addressing.
-	 */
-	call	1f
-1:	popl	%edi
-	subl	$1b, %edi
-
-	movl	enc_bit(%edi), %eax
-	cmpl	$0, %eax
-	jge	.Lsev_exit
 
 	/* Check if running under a hypervisor */
 	movl	$1, %eax
@@ -69,15 +55,12 @@ ENTRY(get_sev_encryption_bit)
 
 	movl	%ebx, %eax
 	andl	$0x3f, %eax		/* Return the encryption bit location */
-	movl	%eax, enc_bit(%edi)
 	jmp	.Lsev_exit
 
 .Lno_sev:
 	xor	%eax, %eax
-	movl	%eax, enc_bit(%edi)
 
 .Lsev_exit:
-	pop	%edi
 	pop	%edx
 	pop	%ecx
 	pop	%ebx
@@ -113,8 +96,6 @@ ENTRY(set_sev_encryption_mask)
 ENDPROC(set_sev_encryption_mask)
 
 	.data
-enc_bit:
-	.int	0xffffffff
 
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 	.balign	8

From f52afc93cd018fe6910133a05d44671192d1aeb0 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 27 Sep 2018 13:23:32 +0200
Subject: [PATCH 286/499] dax: Fix deadlock in dax_lock_mapping_entry()

When dax_lock_mapping_entry() has to sleep to obtain entry lock, it will
fail to unlock mapping->i_pages spinlock and thus immediately deadlock
against itself when retrying to grab the entry lock again. Fix the
problem by unlocking mapping->i_pages before retrying.

Fixes: c2a7d2a11552 ("filesystem-dax: Introduce dax_lock_mapping_entry()")
Reported-by: Barret Rhoden <brho@google.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/dax.c b/fs/dax.c
index f32d7125ad0f..e4ef8af31aa6 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -447,6 +447,7 @@ bool dax_lock_mapping_entry(struct page *page)
 			xa_unlock_irq(&mapping->i_pages);
 			break;
 		} else if (IS_ERR(entry)) {
+			xa_unlock_irq(&mapping->i_pages);
 			WARN_ON_ONCE(PTR_ERR(entry) != -EAGAIN);
 			continue;
 		}

From 3baafeffa48a12b3cec9a0b6d4049fba02d53cea Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Tue, 25 Sep 2018 16:56:53 +0300
Subject: [PATCH 287/499] iwlwifi: 1000: set the TFD queue size

.max_tfd_queue_size was ommited for 1000 card serries leading to oops in
swiotlb.

Fixes: 7b3e42ea2ead ("iwlwifi: support multiple tfd queue max sizes for different devices")
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/intel/iwlwifi/cfg/1000.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/1000.c b/drivers/net/wireless/intel/iwlwifi/cfg/1000.c
index 591687984962..497fd766d87c 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/1000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/1000.c
@@ -51,6 +51,7 @@
 
 static const struct iwl_base_params iwl1000_base_params = {
 	.num_of_queues = IWLAGN_NUM_QUEUES,
+	.max_tfd_queue_size = 256,
 	.eeprom_size = OTP_LOW_IMAGE_SIZE,
 	.pll_cfg = true,
 	.max_ll_items = OTP_MAX_LL_ITEMS_1000,

From ce01a1575f45bf319e374592656441021a7f5823 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Thu, 27 Sep 2018 14:39:19 -0400
Subject: [PATCH 288/499] rseq/selftests: fix parametrized test with -fpie

On x86-64, the parametrized selftest code for rseq crashes with a
segmentation fault when compiled with -fpie. This happens when the
param_test binary is loaded at an address beyond 32-bit on x86-64.

The issue is caused by use of a 32-bit register to hold the address
of the loop counter variable.

Fix this by using a 64-bit register to calculate the address of the
loop counter variables as an offset from rip.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: <stable@vger.kernel.org> # v4.18
Cc: Shuah Khan <shuah@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Joel Fernandes <joelaf@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Watson <davejwatson@fb.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: linux-kselftest@vger.kernel.org
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: Chris Lameter <cl@linux.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Paul Turner <pjt@google.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ben Maurer <bmaurer@fb.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
---
 tools/testing/selftests/rseq/param_test.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index 642d4e12abea..eec2663261f2 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -56,15 +56,13 @@ unsigned int yield_mod_cnt, nr_abort;
 			printf(fmt, ## __VA_ARGS__);	\
 	} while (0)
 
-#if defined(__x86_64__) || defined(__i386__)
+#ifdef __i386__
 
 #define INJECT_ASM_REG	"eax"
 
 #define RSEQ_INJECT_CLOBBER \
 	, INJECT_ASM_REG
 
-#ifdef __i386__
-
 #define RSEQ_INJECT_ASM(n) \
 	"mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
 	"test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
@@ -76,9 +74,16 @@ unsigned int yield_mod_cnt, nr_abort;
 
 #elif defined(__x86_64__)
 
+#define INJECT_ASM_REG_P	"rax"
+#define INJECT_ASM_REG		"eax"
+
+#define RSEQ_INJECT_CLOBBER \
+	, INJECT_ASM_REG_P \
+	, INJECT_ASM_REG
+
 #define RSEQ_INJECT_ASM(n) \
-	"lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG "\n\t" \
-	"mov (%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
+	"lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
+	"mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
 	"test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
 	"jz 333f\n\t" \
 	"222:\n\t" \
@@ -86,10 +91,6 @@ unsigned int yield_mod_cnt, nr_abort;
 	"jnz 222b\n\t" \
 	"333:\n\t"
 
-#else
-#error "Unsupported architecture"
-#endif
-
 #elif defined(__s390__)
 
 #define RSEQ_INJECT_INPUT \

From 587562d0c7cd6861f4f90a2eb811cccb1a376f5f Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 26 Sep 2018 14:35:50 +0200
Subject: [PATCH 289/499] blk-mq: I/O and timer unplugs are inverted in
 blktrace

trace_block_unplug() takes true for explicit unplugs and false for
implicit unplugs.  schedule() unplugs are implicit and should be
reported as timer unplugs.  While correct in the legacy code, this has
been inverted in blk-mq since 4.11.

Cc: stable@vger.kernel.org
Fixes: bd166ef183c2 ("blk-mq-sched: add framework for MQ capable IO schedulers")
Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 85a1c1a59c72..e3c39ea8e17b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1628,7 +1628,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 		BUG_ON(!rq->q);
 		if (rq->mq_ctx != this_ctx) {
 			if (this_ctx) {
-				trace_block_unplug(this_q, depth, from_schedule);
+				trace_block_unplug(this_q, depth, !from_schedule);
 				blk_mq_sched_insert_requests(this_q, this_ctx,
 								&ctx_list,
 								from_schedule);
@@ -1648,7 +1648,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 	 * on 'ctx_list'. Do those.
 	 */
 	if (this_ctx) {
-		trace_block_unplug(this_q, depth, from_schedule);
+		trace_block_unplug(this_q, depth, !from_schedule);
 		blk_mq_sched_insert_requests(this_q, this_ctx, &ctx_list,
 						from_schedule);
 	}

From 083874549fdfefa629dfa752785e20427dde1511 Mon Sep 17 00:00:00 2001
From: Daniel Drake <drake@endlessm.com>
Date: Thu, 27 Sep 2018 15:47:33 -0500
Subject: [PATCH 290/499] PCI: Reprogram bridge prefetch registers on resume

On 38+ Intel-based ASUS products, the NVIDIA GPU becomes unusable after S3
suspend/resume.  The affected products include multiple generations of
NVIDIA GPUs and Intel SoCs.  After resume, nouveau logs many errors such
as:

  fifo: fault 00 [READ] at 0000005555555000 engine 00 [GR] client 04
        [HUB/FE] reason 4a [] on channel -1 [007fa91000 unknown]
  DRM: failed to idle channel 0 [DRM]

Similarly, the NVIDIA proprietary driver also fails after resume (black
screen, 100% CPU usage in Xorg process).  We shipped a sample to NVIDIA for
diagnosis, and their response indicated that it's a problem with the parent
PCI bridge (on the Intel SoC), not the GPU.

Runtime suspend/resume works fine, only S3 suspend is affected.

We found a workaround: on resume, rewrite the Intel PCI bridge
'Prefetchable Base Upper 32 Bits' register (PCI_PREF_BASE_UPPER32).  In the
cases that I checked, this register has value 0 and we just have to rewrite
that value.

Linux already saves and restores PCI config space during suspend/resume,
but this register was being skipped because upon resume, it already has
value 0 (the correct, pre-suspend value).

Intel appear to have previously acknowledged this behaviour and the
requirement to rewrite this register:
https://bugzilla.kernel.org/show_bug.cgi?id=116851#c23

Based on that, rewrite the prefetch register values even when that appears
unnecessary.

We have confirmed this solution on all the affected models we have in-hands
(X542UQ, UX533FD, X530UN, V272UN).

Additionally, this solves an issue where r8169 MSI-X interrupts were broken
after S3 suspend/resume on ASUS X441UAR.  This issue was recently worked
around in commit 7bb05b85bc2d ("r8169: don't use MSI-X on RTL8106e").  It
also fixes the same issue on RTL6186evl/8111evl on an Aimfor-tech laptop
that we had not yet patched.  I suspect it will also fix the issue that was
worked around in commit 7c53a722459c ("r8169: don't use MSI-X on
RTL8168g").

Thomas Martitz reports that this change also solves an issue where the AMD
Radeon Polaris 10 GPU on the HP Zbook 14u G5 is unresponsive after S3
suspend/resume.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=201069
Signed-off-by: Daniel Drake <drake@endlessm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-By: Peter Wu <peter@lekensteyn.nl>
CC: stable@vger.kernel.org
---
 drivers/pci/pci.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 1835f3a7aa8d..51b6c81671c1 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1289,12 +1289,12 @@ int pci_save_state(struct pci_dev *dev)
 EXPORT_SYMBOL(pci_save_state);
 
 static void pci_restore_config_dword(struct pci_dev *pdev, int offset,
-				     u32 saved_val, int retry)
+				     u32 saved_val, int retry, bool force)
 {
 	u32 val;
 
 	pci_read_config_dword(pdev, offset, &val);
-	if (val == saved_val)
+	if (!force && val == saved_val)
 		return;
 
 	for (;;) {
@@ -1313,25 +1313,36 @@ static void pci_restore_config_dword(struct pci_dev *pdev, int offset,
 }
 
 static void pci_restore_config_space_range(struct pci_dev *pdev,
-					   int start, int end, int retry)
+					   int start, int end, int retry,
+					   bool force)
 {
 	int index;
 
 	for (index = end; index >= start; index--)
 		pci_restore_config_dword(pdev, 4 * index,
 					 pdev->saved_config_space[index],
-					 retry);
+					 retry, force);
 }
 
 static void pci_restore_config_space(struct pci_dev *pdev)
 {
 	if (pdev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
-		pci_restore_config_space_range(pdev, 10, 15, 0);
+		pci_restore_config_space_range(pdev, 10, 15, 0, false);
 		/* Restore BARs before the command register. */
-		pci_restore_config_space_range(pdev, 4, 9, 10);
-		pci_restore_config_space_range(pdev, 0, 3, 0);
+		pci_restore_config_space_range(pdev, 4, 9, 10, false);
+		pci_restore_config_space_range(pdev, 0, 3, 0, false);
+	} else if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
+		pci_restore_config_space_range(pdev, 12, 15, 0, false);
+
+		/*
+		 * Force rewriting of prefetch registers to avoid S3 resume
+		 * issues on Intel PCI bridges that occur when these
+		 * registers are not explicitly written.
+		 */
+		pci_restore_config_space_range(pdev, 9, 11, 0, true);
+		pci_restore_config_space_range(pdev, 0, 8, 0, false);
 	} else {
-		pci_restore_config_space_range(pdev, 0, 15, 0);
+		pci_restore_config_space_range(pdev, 0, 15, 0, false);
 	}
 }
 

From add92a817e60e308a419693413a38d9d1e663aff Mon Sep 17 00:00:00 2001
From: Harsh Jain <harsh@chelsio.com>
Date: Wed, 19 Sep 2018 22:42:16 +0530
Subject: [PATCH 291/499] crypto: chelsio - Fix memory corruption in DMA Mapped
 buffers.

Update PCI Id in "cpl_rx_phys_dsgl" header. In case pci_chan_id and
tx_chan_id are not derived from same queue, H/W can send request
completion indication before completing DMA Transfer.

Herbert, It would be good if fix can be merge to stable tree.
For 4.14 kernel, It requires some update to avoid mege conficts.

Cc: <stable@vger.kernel.org>
Signed-off-by: Harsh Jain <harsh@chelsio.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/chelsio/chcr_algo.c   | 32 +++++++++++++++++++---------
 drivers/crypto/chelsio/chcr_crypto.h |  2 ++
 2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index 5c539af8ed60..010bbf607797 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -367,7 +367,8 @@ static inline void dsgl_walk_init(struct dsgl_walk *walk,
 	walk->to = (struct phys_sge_pairs *)(dsgl + 1);
 }
 
-static inline void dsgl_walk_end(struct dsgl_walk *walk, unsigned short qid)
+static inline void dsgl_walk_end(struct dsgl_walk *walk, unsigned short qid,
+				 int pci_chan_id)
 {
 	struct cpl_rx_phys_dsgl *phys_cpl;
 
@@ -385,6 +386,7 @@ static inline void dsgl_walk_end(struct dsgl_walk *walk, unsigned short qid)
 	phys_cpl->rss_hdr_int.opcode = CPL_RX_PHYS_ADDR;
 	phys_cpl->rss_hdr_int.qid = htons(qid);
 	phys_cpl->rss_hdr_int.hash_val = 0;
+	phys_cpl->rss_hdr_int.channel = pci_chan_id;
 }
 
 static inline void dsgl_walk_add_page(struct dsgl_walk *walk,
@@ -718,7 +720,7 @@ static inline void create_wreq(struct chcr_context *ctx,
 		FILL_WR_RX_Q_ID(ctx->dev->rx_channel_id, qid,
 				!!lcb, ctx->tx_qidx);
 
-	chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->dev->tx_channel_id,
+	chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->tx_chan_id,
 						       qid);
 	chcr_req->ulptx.len = htonl((DIV_ROUND_UP(len16, 16) -
 				     ((sizeof(chcr_req->wreq)) >> 4)));
@@ -1339,16 +1341,23 @@ static int chcr_device_init(struct chcr_context *ctx)
 				    adap->vres.ncrypto_fc);
 		rxq_perchan = u_ctx->lldi.nrxq / u_ctx->lldi.nchan;
 		txq_perchan = ntxq / u_ctx->lldi.nchan;
-		rxq_idx = ctx->dev->tx_channel_id * rxq_perchan;
-		rxq_idx += id % rxq_perchan;
-		txq_idx = ctx->dev->tx_channel_id * txq_perchan;
-		txq_idx += id % txq_perchan;
 		spin_lock(&ctx->dev->lock_chcr_dev);
-		ctx->rx_qidx = rxq_idx;
-		ctx->tx_qidx = txq_idx;
+		ctx->tx_chan_id = ctx->dev->tx_channel_id;
 		ctx->dev->tx_channel_id = !ctx->dev->tx_channel_id;
 		ctx->dev->rx_channel_id = 0;
 		spin_unlock(&ctx->dev->lock_chcr_dev);
+		rxq_idx = ctx->tx_chan_id * rxq_perchan;
+		rxq_idx += id % rxq_perchan;
+		txq_idx = ctx->tx_chan_id * txq_perchan;
+		txq_idx += id % txq_perchan;
+		ctx->rx_qidx = rxq_idx;
+		ctx->tx_qidx = txq_idx;
+		/* Channel Id used by SGE to forward packet to Host.
+		 * Same value should be used in cpl_fw6_pld RSS_CH field
+		 * by FW. Driver programs PCI channel ID to be used in fw
+		 * at the time of queue allocation with value "pi->tx_chan"
+		 */
+		ctx->pci_chan_id = txq_idx / txq_perchan;
 	}
 out:
 	return err;
@@ -2503,6 +2512,7 @@ void chcr_add_aead_dst_ent(struct aead_request *req,
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct dsgl_walk dsgl_walk;
 	unsigned int authsize = crypto_aead_authsize(tfm);
+	struct chcr_context *ctx = a_ctx(tfm);
 	u32 temp;
 
 	dsgl_walk_init(&dsgl_walk, phys_cpl);
@@ -2512,7 +2522,7 @@ void chcr_add_aead_dst_ent(struct aead_request *req,
 	dsgl_walk_add_page(&dsgl_walk, IV, &reqctx->iv_dma);
 	temp = req->cryptlen + (reqctx->op ? -authsize : authsize);
 	dsgl_walk_add_sg(&dsgl_walk, req->dst, temp, req->assoclen);
-	dsgl_walk_end(&dsgl_walk, qid);
+	dsgl_walk_end(&dsgl_walk, qid, ctx->pci_chan_id);
 }
 
 void chcr_add_cipher_src_ent(struct ablkcipher_request *req,
@@ -2544,6 +2554,8 @@ void chcr_add_cipher_dst_ent(struct ablkcipher_request *req,
 			     unsigned short qid)
 {
 	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(wrparam->req);
+	struct chcr_context *ctx = c_ctx(tfm);
 	struct dsgl_walk dsgl_walk;
 
 	dsgl_walk_init(&dsgl_walk, phys_cpl);
@@ -2552,7 +2564,7 @@ void chcr_add_cipher_dst_ent(struct ablkcipher_request *req,
 	reqctx->dstsg = dsgl_walk.last_sg;
 	reqctx->dst_ofst = dsgl_walk.last_sg_len;
 
-	dsgl_walk_end(&dsgl_walk, qid);
+	dsgl_walk_end(&dsgl_walk, qid, ctx->pci_chan_id);
 }
 
 void chcr_add_hash_src_ent(struct ahash_request *req,
diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h
index 54835cb109e5..0d2c70c344f3 100644
--- a/drivers/crypto/chelsio/chcr_crypto.h
+++ b/drivers/crypto/chelsio/chcr_crypto.h
@@ -255,6 +255,8 @@ struct chcr_context {
 	struct chcr_dev *dev;
 	unsigned char tx_qidx;
 	unsigned char rx_qidx;
+	unsigned char tx_chan_id;
+	unsigned char pci_chan_id;
 	struct __crypto_ctx crypto_ctx[0];
 };
 

From d80771c08363ad7fbf0f56f5301e7ca65065c582 Mon Sep 17 00:00:00 2001
From: Leonard Crestez <leonard.crestez@nxp.com>
Date: Fri, 21 Sep 2018 18:03:18 +0300
Subject: [PATCH 292/499] crypto: mxs-dcp - Fix wait logic on chan threads

When compiling with CONFIG_DEBUG_ATOMIC_SLEEP=y the mxs-dcp driver
prints warnings such as:

WARNING: CPU: 0 PID: 120 at kernel/sched/core.c:7736 __might_sleep+0x98/0x9c
do not call blocking ops when !TASK_RUNNING; state=1 set at [<8081978c>] dcp_chan_thread_sha+0x3c/0x2ec

The problem is that blocking ops will manipulate current->state
themselves so it is not allowed to call them between
set_current_state(TASK_INTERRUPTIBLE) and schedule().

Fix this by converting the per-chan mutex to a spinlock (it only
protects tiny list ops anyway) and rearranging the wait logic so that
callbacks are called current->state as TASK_RUNNING. Those callbacks
will indeed call blocking ops themselves so this is required.

Cc: <stable@vger.kernel.org>
Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/mxs-dcp.c | 53 +++++++++++++++++++++++-----------------
 1 file changed, 30 insertions(+), 23 deletions(-)

diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c
index a10c418d4e5c..56bd28174f52 100644
--- a/drivers/crypto/mxs-dcp.c
+++ b/drivers/crypto/mxs-dcp.c
@@ -63,7 +63,7 @@ struct dcp {
 	struct dcp_coherent_block	*coh;
 
 	struct completion		completion[DCP_MAX_CHANS];
-	struct mutex			mutex[DCP_MAX_CHANS];
+	spinlock_t			lock[DCP_MAX_CHANS];
 	struct task_struct		*thread[DCP_MAX_CHANS];
 	struct crypto_queue		queue[DCP_MAX_CHANS];
 };
@@ -349,13 +349,20 @@ static int dcp_chan_thread_aes(void *data)
 
 	int ret;
 
-	do {
-		__set_current_state(TASK_INTERRUPTIBLE);
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
 
-		mutex_lock(&sdcp->mutex[chan]);
+		spin_lock(&sdcp->lock[chan]);
 		backlog = crypto_get_backlog(&sdcp->queue[chan]);
 		arq = crypto_dequeue_request(&sdcp->queue[chan]);
-		mutex_unlock(&sdcp->mutex[chan]);
+		spin_unlock(&sdcp->lock[chan]);
+
+		if (!backlog && !arq) {
+			schedule();
+			continue;
+		}
+
+		set_current_state(TASK_RUNNING);
 
 		if (backlog)
 			backlog->complete(backlog, -EINPROGRESS);
@@ -363,11 +370,8 @@ static int dcp_chan_thread_aes(void *data)
 		if (arq) {
 			ret = mxs_dcp_aes_block_crypt(arq);
 			arq->complete(arq, ret);
-			continue;
 		}
-
-		schedule();
-	} while (!kthread_should_stop());
+	}
 
 	return 0;
 }
@@ -409,9 +413,9 @@ static int mxs_dcp_aes_enqueue(struct ablkcipher_request *req, int enc, int ecb)
 	rctx->ecb = ecb;
 	actx->chan = DCP_CHAN_CRYPTO;
 
-	mutex_lock(&sdcp->mutex[actx->chan]);
+	spin_lock(&sdcp->lock[actx->chan]);
 	ret = crypto_enqueue_request(&sdcp->queue[actx->chan], &req->base);
-	mutex_unlock(&sdcp->mutex[actx->chan]);
+	spin_unlock(&sdcp->lock[actx->chan]);
 
 	wake_up_process(sdcp->thread[actx->chan]);
 
@@ -640,13 +644,20 @@ static int dcp_chan_thread_sha(void *data)
 	struct ahash_request *req;
 	int ret, fini;
 
-	do {
-		__set_current_state(TASK_INTERRUPTIBLE);
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
 
-		mutex_lock(&sdcp->mutex[chan]);
+		spin_lock(&sdcp->lock[chan]);
 		backlog = crypto_get_backlog(&sdcp->queue[chan]);
 		arq = crypto_dequeue_request(&sdcp->queue[chan]);
-		mutex_unlock(&sdcp->mutex[chan]);
+		spin_unlock(&sdcp->lock[chan]);
+
+		if (!backlog && !arq) {
+			schedule();
+			continue;
+		}
+
+		set_current_state(TASK_RUNNING);
 
 		if (backlog)
 			backlog->complete(backlog, -EINPROGRESS);
@@ -658,12 +669,8 @@ static int dcp_chan_thread_sha(void *data)
 			ret = dcp_sha_req_to_buf(arq);
 			fini = rctx->fini;
 			arq->complete(arq, ret);
-			if (!fini)
-				continue;
 		}
-
-		schedule();
-	} while (!kthread_should_stop());
+	}
 
 	return 0;
 }
@@ -721,9 +728,9 @@ static int dcp_sha_update_fx(struct ahash_request *req, int fini)
 		rctx->init = 1;
 	}
 
-	mutex_lock(&sdcp->mutex[actx->chan]);
+	spin_lock(&sdcp->lock[actx->chan]);
 	ret = crypto_enqueue_request(&sdcp->queue[actx->chan], &req->base);
-	mutex_unlock(&sdcp->mutex[actx->chan]);
+	spin_unlock(&sdcp->lock[actx->chan]);
 
 	wake_up_process(sdcp->thread[actx->chan]);
 	mutex_unlock(&actx->mutex);
@@ -997,7 +1004,7 @@ static int mxs_dcp_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, sdcp);
 
 	for (i = 0; i < DCP_MAX_CHANS; i++) {
-		mutex_init(&sdcp->mutex[i]);
+		spin_lock_init(&sdcp->lock[i]);
 		init_completion(&sdcp->completion[i]);
 		crypto_init_queue(&sdcp->queue[i], 50);
 	}

From ba439a6cbfa2936a6713f64cb499de7943673fe3 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Sat, 22 Sep 2018 20:41:55 -0400
Subject: [PATCH 293/499] crypto: qat - Fix KASAN stack-out-of-bounds bug in
 adf_probe()

The following KASAN warning was printed when booting a 64-bit kernel
on some systems with Intel CPUs:

[   44.512826] ==================================================================
[   44.520165] BUG: KASAN: stack-out-of-bounds in find_first_bit+0xb0/0xc0
[   44.526786] Read of size 8 at addr ffff88041e02fc50 by task kworker/0:2/124

[   44.535253] CPU: 0 PID: 124 Comm: kworker/0:2 Tainted: G               X --------- ---  4.18.0-12.el8.x86_64+debug #1
[   44.545858] Hardware name: Intel Corporation PURLEY/PURLEY, BIOS BKVDTRL1.86B.0005.D08.1712070559 12/07/2017
[   44.555682] Workqueue: events work_for_cpu_fn
[   44.560043] Call Trace:
[   44.562502]  dump_stack+0x9a/0xe9
[   44.565832]  print_address_description+0x65/0x22e
[   44.570683]  ? find_first_bit+0xb0/0xc0
[   44.570689]  kasan_report.cold.6+0x92/0x19f
[   44.578726]  find_first_bit+0xb0/0xc0
[   44.578737]  adf_probe+0x9eb/0x19a0 [qat_c62x]
[   44.578751]  ? adf_remove+0x110/0x110 [qat_c62x]
[   44.591490]  ? mark_held_locks+0xc8/0x140
[   44.591498]  ? _raw_spin_unlock+0x30/0x30
[   44.591505]  ? trace_hardirqs_on_caller+0x381/0x570
[   44.604418]  ? adf_remove+0x110/0x110 [qat_c62x]
[   44.604427]  local_pci_probe+0xd4/0x180
[   44.604432]  ? pci_device_shutdown+0x110/0x110
[   44.617386]  work_for_cpu_fn+0x51/0xa0
[   44.621145]  process_one_work+0x8fe/0x16e0
[   44.625263]  ? pwq_dec_nr_in_flight+0x2d0/0x2d0
[   44.629799]  ? lock_acquire+0x14c/0x400
[   44.633645]  ? move_linked_works+0x12e/0x2a0
[   44.637928]  worker_thread+0x536/0xb50
[   44.641690]  ? __kthread_parkme+0xb6/0x180
[   44.645796]  ? process_one_work+0x16e0/0x16e0
[   44.650160]  kthread+0x30c/0x3d0
[   44.653400]  ? kthread_create_worker_on_cpu+0xc0/0xc0
[   44.658457]  ret_from_fork+0x3a/0x50

[   44.663557] The buggy address belongs to the page:
[   44.668350] page:ffffea0010780bc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0
[   44.676356] flags: 0x17ffffc0000000()
[   44.680023] raw: 0017ffffc0000000 ffffea0010780bc8 ffffea0010780bc8 0000000000000000
[   44.687769] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
[   44.695510] page dumped because: kasan: bad access detected

[   44.702578] Memory state around the buggy address:
[   44.707372]  ffff88041e02fb00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[   44.714593]  ffff88041e02fb80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[   44.721810] >ffff88041e02fc00: 00 00 00 00 00 00 f1 f1 f1 f1 04 f2 f2 f2 f2 f2
[   44.729028]                                                  ^
[   44.734864]  ffff88041e02fc80: f2 f2 00 00 00 00 f3 f3 f3 f3 00 00 00 00 00 00
[   44.742082]  ffff88041e02fd00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[   44.749299] ==================================================================

Looking into the code:

  int ret, bar_mask;
    :
  for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask,

It is casting a 32-bit integer pointer to a 64-bit unsigned long
pointer. There are two problems here. First, the 32-bit pointer address
may not be 64-bit aligned. Secondly, it is accessing an extra 4 bytes.

This is fixed by changing the bar_mask type to unsigned long.

Cc: <stable@vger.kernel.org>
Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_c3xxx/adf_drv.c      | 6 +++---
 drivers/crypto/qat/qat_c3xxxvf/adf_drv.c    | 6 +++---
 drivers/crypto/qat/qat_c62x/adf_drv.c       | 6 +++---
 drivers/crypto/qat/qat_c62xvf/adf_drv.c     | 6 +++---
 drivers/crypto/qat/qat_dh895xcc/adf_drv.c   | 6 +++---
 drivers/crypto/qat/qat_dh895xccvf/adf_drv.c | 6 +++---
 6 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/drivers/crypto/qat/qat_c3xxx/adf_drv.c b/drivers/crypto/qat/qat_c3xxx/adf_drv.c
index ba197f34c252..763c2166ee0e 100644
--- a/drivers/crypto/qat/qat_c3xxx/adf_drv.c
+++ b/drivers/crypto/qat/qat_c3xxx/adf_drv.c
@@ -123,7 +123,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct adf_hw_device_data *hw_data;
 	char name[ADF_DEVICE_NAME_LENGTH];
 	unsigned int i, bar_nr;
-	int ret, bar_mask;
+	unsigned long bar_mask;
+	int ret;
 
 	switch (ent->device) {
 	case ADF_C3XXX_PCI_DEVICE_ID:
@@ -235,8 +236,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	/* Find and map all the device's BARS */
 	i = 0;
 	bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
-	for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask,
-			 ADF_PCI_MAX_BARS * 2) {
+	for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) {
 		struct adf_bar *bar = &accel_pci_dev->pci_bars[i++];
 
 		bar->base_addr = pci_resource_start(pdev, bar_nr);
diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c
index 24ec908eb26c..613c7d5644ce 100644
--- a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c
+++ b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c
@@ -125,7 +125,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct adf_hw_device_data *hw_data;
 	char name[ADF_DEVICE_NAME_LENGTH];
 	unsigned int i, bar_nr;
-	int ret, bar_mask;
+	unsigned long bar_mask;
+	int ret;
 
 	switch (ent->device) {
 	case ADF_C3XXXIOV_PCI_DEVICE_ID:
@@ -215,8 +216,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	/* Find and map all the device's BARS */
 	i = 0;
 	bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
-	for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask,
-			 ADF_PCI_MAX_BARS * 2) {
+	for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) {
 		struct adf_bar *bar = &accel_pci_dev->pci_bars[i++];
 
 		bar->base_addr = pci_resource_start(pdev, bar_nr);
diff --git a/drivers/crypto/qat/qat_c62x/adf_drv.c b/drivers/crypto/qat/qat_c62x/adf_drv.c
index 59a5a0df50b6..9cb832963357 100644
--- a/drivers/crypto/qat/qat_c62x/adf_drv.c
+++ b/drivers/crypto/qat/qat_c62x/adf_drv.c
@@ -123,7 +123,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct adf_hw_device_data *hw_data;
 	char name[ADF_DEVICE_NAME_LENGTH];
 	unsigned int i, bar_nr;
-	int ret, bar_mask;
+	unsigned long bar_mask;
+	int ret;
 
 	switch (ent->device) {
 	case ADF_C62X_PCI_DEVICE_ID:
@@ -235,8 +236,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	/* Find and map all the device's BARS */
 	i = (hw_data->fuses & ADF_DEVICE_FUSECTL_MASK) ? 1 : 0;
 	bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
-	for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask,
-			 ADF_PCI_MAX_BARS * 2) {
+	for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) {
 		struct adf_bar *bar = &accel_pci_dev->pci_bars[i++];
 
 		bar->base_addr = pci_resource_start(pdev, bar_nr);
diff --git a/drivers/crypto/qat/qat_c62xvf/adf_drv.c b/drivers/crypto/qat/qat_c62xvf/adf_drv.c
index b9f3e0e4fde9..278452b8ef81 100644
--- a/drivers/crypto/qat/qat_c62xvf/adf_drv.c
+++ b/drivers/crypto/qat/qat_c62xvf/adf_drv.c
@@ -125,7 +125,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct adf_hw_device_data *hw_data;
 	char name[ADF_DEVICE_NAME_LENGTH];
 	unsigned int i, bar_nr;
-	int ret, bar_mask;
+	unsigned long bar_mask;
+	int ret;
 
 	switch (ent->device) {
 	case ADF_C62XIOV_PCI_DEVICE_ID:
@@ -215,8 +216,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	/* Find and map all the device's BARS */
 	i = 0;
 	bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
-	for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask,
-			 ADF_PCI_MAX_BARS * 2) {
+	for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) {
 		struct adf_bar *bar = &accel_pci_dev->pci_bars[i++];
 
 		bar->base_addr = pci_resource_start(pdev, bar_nr);
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
index be5c5a988ca5..3a9708ef4ce2 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
@@ -123,7 +123,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct adf_hw_device_data *hw_data;
 	char name[ADF_DEVICE_NAME_LENGTH];
 	unsigned int i, bar_nr;
-	int ret, bar_mask;
+	unsigned long bar_mask;
+	int ret;
 
 	switch (ent->device) {
 	case ADF_DH895XCC_PCI_DEVICE_ID:
@@ -237,8 +238,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	/* Find and map all the device's BARS */
 	i = 0;
 	bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
-	for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask,
-			 ADF_PCI_MAX_BARS * 2) {
+	for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) {
 		struct adf_bar *bar = &accel_pci_dev->pci_bars[i++];
 
 		bar->base_addr = pci_resource_start(pdev, bar_nr);
diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c
index 26ab17bfc6da..3da0f951cb59 100644
--- a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c
+++ b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c
@@ -125,7 +125,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct adf_hw_device_data *hw_data;
 	char name[ADF_DEVICE_NAME_LENGTH];
 	unsigned int i, bar_nr;
-	int ret, bar_mask;
+	unsigned long bar_mask;
+	int ret;
 
 	switch (ent->device) {
 	case ADF_DH895XCCIOV_PCI_DEVICE_ID:
@@ -215,8 +216,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	/* Find and map all the device's BARS */
 	i = 0;
 	bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
-	for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask,
-			 ADF_PCI_MAX_BARS * 2) {
+	for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) {
 		struct adf_bar *bar = &accel_pci_dev->pci_bars[i++];
 
 		bar->base_addr = pci_resource_start(pdev, bar_nr);

From 7e0cf1c983b5b24426d130fd949a055d520acc9a Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Fri, 28 Sep 2018 14:53:18 +1000
Subject: [PATCH 294/499] selftests/powerpc: Fix Makefiles for headers_install
 change

Commit b2d35fa5fc80 ("selftests: add headers_install to lib.mk")
introduced a requirement that Makefiles more than one level below the
selftests directory need to define top_srcdir, but it didn't update
any of the powerpc Makefiles.

This broke building all the powerpc selftests with eg:

  make[1]: Entering directory '/src/linux/tools/testing/selftests/powerpc'
  BUILD_TARGET=/src/linux/tools/testing/selftests/powerpc/alignment; mkdir -p $BUILD_TARGET; make OUTPUT=$BUILD_TARGET -k -C alignment all
  make[2]: Entering directory '/src/linux/tools/testing/selftests/powerpc/alignment'
  ../../lib.mk:20: ../../../../scripts/subarch.include: No such file or directory
  make[2]: *** No rule to make target '../../../../scripts/subarch.include'.
  make[2]: Failed to remake makefile '../../../../scripts/subarch.include'.
  Makefile:38: recipe for target 'alignment' failed

Fix it by setting top_srcdir in the affected Makefiles.

Fixes: b2d35fa5fc80 ("selftests: add headers_install to lib.mk")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/alignment/Makefile     | 1 +
 tools/testing/selftests/powerpc/benchmarks/Makefile    | 1 +
 tools/testing/selftests/powerpc/cache_shape/Makefile   | 1 +
 tools/testing/selftests/powerpc/copyloops/Makefile     | 1 +
 tools/testing/selftests/powerpc/dscr/Makefile          | 1 +
 tools/testing/selftests/powerpc/math/Makefile          | 1 +
 tools/testing/selftests/powerpc/mm/Makefile            | 1 +
 tools/testing/selftests/powerpc/pmu/Makefile           | 1 +
 tools/testing/selftests/powerpc/pmu/ebb/Makefile       | 1 +
 tools/testing/selftests/powerpc/primitives/Makefile    | 1 +
 tools/testing/selftests/powerpc/ptrace/Makefile        | 1 +
 tools/testing/selftests/powerpc/signal/Makefile        | 1 +
 tools/testing/selftests/powerpc/stringloops/Makefile   | 1 +
 tools/testing/selftests/powerpc/switch_endian/Makefile | 1 +
 tools/testing/selftests/powerpc/syscalls/Makefile      | 1 +
 tools/testing/selftests/powerpc/tm/Makefile            | 1 +
 tools/testing/selftests/powerpc/vphn/Makefile          | 1 +
 17 files changed, 17 insertions(+)

diff --git a/tools/testing/selftests/powerpc/alignment/Makefile b/tools/testing/selftests/powerpc/alignment/Makefile
index 93baacab7693..d056486f49de 100644
--- a/tools/testing/selftests/powerpc/alignment/Makefile
+++ b/tools/testing/selftests/powerpc/alignment/Makefile
@@ -1,5 +1,6 @@
 TEST_GEN_PROGS := copy_first_unaligned alignment_handler
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile
index b4d7432a0ecd..d40300a65b42 100644
--- a/tools/testing/selftests/powerpc/benchmarks/Makefile
+++ b/tools/testing/selftests/powerpc/benchmarks/Makefile
@@ -4,6 +4,7 @@ TEST_GEN_FILES := exec_target
 
 CFLAGS += -O2
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/cache_shape/Makefile b/tools/testing/selftests/powerpc/cache_shape/Makefile
index 1be547434a49..ede4d3dae750 100644
--- a/tools/testing/selftests/powerpc/cache_shape/Makefile
+++ b/tools/testing/selftests/powerpc/cache_shape/Makefile
@@ -5,6 +5,7 @@ all: $(TEST_PROGS)
 
 $(TEST_PROGS): ../harness.c ../utils.c
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 clean:
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index 1cf89a34d97c..44574f3818b3 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -17,6 +17,7 @@ TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
 
 EXTRA_SOURCES := validate.c ../harness.c stubs.S
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(OUTPUT)/copyuser_64_t%:	copyuser_64.S $(EXTRA_SOURCES)
diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile
index 55d7db7a616b..5df476364b4d 100644
--- a/tools/testing/selftests/powerpc/dscr/Makefile
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -3,6 +3,7 @@ TEST_GEN_PROGS := dscr_default_test dscr_explicit_test dscr_user_test	\
 	      dscr_inherit_test dscr_inherit_exec_test dscr_sysfs_test	\
 	      dscr_sysfs_thread_test
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(OUTPUT)/dscr_default_test: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile
index 0dd3a01fdab9..11a10d7a2bbd 100644
--- a/tools/testing/selftests/powerpc/math/Makefile
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal vsx_preempt
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index 8ebbe96d80a8..33ced6e0ad25 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -5,6 +5,7 @@ noarg:
 TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors
 TEST_GEN_FILES := tempfile
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile
index 6e1629bf5b09..19046db995fe 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -5,6 +5,7 @@ noarg:
 TEST_GEN_PROGS := count_instructions l3_bank_test per_event_excludes
 EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 all: $(TEST_GEN_PROGS) ebb
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
index c4e64bc2e265..bd5dfa509272 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -17,6 +17,7 @@ TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test	\
 	 lost_exception_test no_handler_test			\
 	 cycles_with_mmcr2_test
 
+top_srcdir = ../../../../../..
 include ../../../lib.mk
 
 $(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c \
diff --git a/tools/testing/selftests/powerpc/primitives/Makefile b/tools/testing/selftests/powerpc/primitives/Makefile
index 175366db7be8..ea2b7bd09e36 100644
--- a/tools/testing/selftests/powerpc/primitives/Makefile
+++ b/tools/testing/selftests/powerpc/primitives/Makefile
@@ -2,6 +2,7 @@ CFLAGS += -I$(CURDIR)
 
 TEST_GEN_PROGS := load_unaligned_zeropad
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile
index 28f5b781a553..923d531265f8 100644
--- a/tools/testing/selftests/powerpc/ptrace/Makefile
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -4,6 +4,7 @@ TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
               ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \
               perf-hwbreak
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 all: $(TEST_PROGS)
diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile
index a7cbd5082e27..1fca25c6ace0 100644
--- a/tools/testing/selftests/powerpc/signal/Makefile
+++ b/tools/testing/selftests/powerpc/signal/Makefile
@@ -8,6 +8,7 @@ $(TEST_PROGS): ../harness.c ../utils.c signal.S
 CFLAGS += -maltivec
 signal_tm: CFLAGS += -mhtm
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 clean:
diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile
index 10b35c87a4f4..7fc0623d85c3 100644
--- a/tools/testing/selftests/powerpc/stringloops/Makefile
+++ b/tools/testing/selftests/powerpc/stringloops/Makefile
@@ -29,6 +29,7 @@ endif
 
 ASFLAGS = $(CFLAGS)
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(TEST_GEN_PROGS): $(EXTRA_SOURCES)
diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile
index 30b8ff8fb82e..fcd2dcb8972b 100644
--- a/tools/testing/selftests/powerpc/switch_endian/Makefile
+++ b/tools/testing/selftests/powerpc/switch_endian/Makefile
@@ -5,6 +5,7 @@ ASFLAGS += -O2 -Wall -g -nostdlib -m64
 
 EXTRA_CLEAN = $(OUTPUT)/*.o $(OUTPUT)/check-reversed.S
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(OUTPUT)/switch_endian_test: $(OUTPUT)/check-reversed.S
diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile
index da22ca7c38c1..161b8846336f 100644
--- a/tools/testing/selftests/powerpc/syscalls/Makefile
+++ b/tools/testing/selftests/powerpc/syscalls/Makefile
@@ -2,6 +2,7 @@ TEST_GEN_PROGS := ipc_unmuxed
 
 CFLAGS += -I../../../../../usr/include
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index c0e45d2dde25..9fc2cf6fbc92 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -6,6 +6,7 @@ TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack
 	tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \
 	$(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/vphn/Makefile b/tools/testing/selftests/powerpc/vphn/Makefile
index f8ced26748f8..fb82068c9fda 100644
--- a/tools/testing/selftests/powerpc/vphn/Makefile
+++ b/tools/testing/selftests/powerpc/vphn/Makefile
@@ -2,6 +2,7 @@ TEST_GEN_PROGS := test-vphn
 
 CFLAGS += -m64
 
+top_srcdir = ../../../../..
 include ../../lib.mk
 
 $(TEST_GEN_PROGS): ../harness.c

From 3cc5746e5ad7688e274e193fa71278d98aa52759 Mon Sep 17 00:00:00 2001
From: Nilesh Javali <nilesh.javali@cavium.com>
Date: Thu, 27 Sep 2018 05:15:35 -0700
Subject: [PATCH 295/499] scsi: qedi: Initialize the stats mutex lock

Fix kernel NULL pointer dereference,

Call Trace:
  [<ffffffff9b7658e6>] __mutex_lock_slowpath+0xa6/0x1d0
  [<ffffffff9b764cef>] mutex_lock+0x1f/0x2f
  [<ffffffffc061b5e1>] qedi_get_protocol_tlv_data+0x61/0x450 [qedi]
  [<ffffffff9b1f9d8e>] ? map_vm_area+0x2e/0x40
  [<ffffffff9b1fc370>] ? __vmalloc_node_range+0x170/0x280
  [<ffffffffc0b81c3d>] ? qed_mfw_process_tlv_req+0x27d/0xbd0 [qed]
  [<ffffffffc0b6461b>] qed_mfw_fill_tlv_data+0x4b/0xb0 [qed]
  [<ffffffffc0b81c59>] qed_mfw_process_tlv_req+0x299/0xbd0 [qed]
  [<ffffffff9b02a59e>] ? __switch_to+0xce/0x580
  [<ffffffffc0b61e5b>] qed_slowpath_task+0x5b/0x80 [qed]

Signed-off-by: Nilesh Javali <nilesh.javali@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedi/qedi_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index cc8e64dc65ad..e5bd035ebad0 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -2472,6 +2472,7 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
 		/* start qedi context */
 		spin_lock_init(&qedi->hba_lock);
 		spin_lock_init(&qedi->task_idx_lock);
+		mutex_init(&qedi->stats_lock);
 	}
 	qedi_ops->ll2->register_cb_ops(qedi->cdev, &qedi_ll2_cb_ops, qedi);
 	qedi_ops->ll2->start(qedi->cdev, &params);

From dc71db34e4f3c06b8277c8f3c2ff014610607a8c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 27 Sep 2018 15:13:08 +0100
Subject: [PATCH 296/499] rxrpc: Fix checks as to whether we should set up a
 new call

There's a check in rxrpc_data_ready() that's checking the CLIENT_INITIATED
flag in the packet type field rather than in the packet flags field.

Fix this by creating a pair of helper functions to check whether the packet
is going to the client or to the server and use them generally.

Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code")
Signed-off-by: David Howells <dhowells@redhat.com>
---
 net/rxrpc/ar-internal.h | 10 ++++++++++
 net/rxrpc/conn_object.c |  2 +-
 net/rxrpc/input.c       | 12 ++++--------
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index c97558710421..9fcb3e197b14 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -463,6 +463,16 @@ struct rxrpc_connection {
 	u8			out_clientflag;	/* RXRPC_CLIENT_INITIATED if we are client */
 };
 
+static inline bool rxrpc_to_server(const struct rxrpc_skb_priv *sp)
+{
+	return sp->hdr.flags & RXRPC_CLIENT_INITIATED;
+}
+
+static inline bool rxrpc_to_client(const struct rxrpc_skb_priv *sp)
+{
+	return !rxrpc_to_server(sp);
+}
+
 /*
  * Flags in call->flags.
  */
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 1746b48cb165..390ba50cfab4 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -96,7 +96,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
 	k.epoch	= sp->hdr.epoch;
 	k.cid	= sp->hdr.cid & RXRPC_CIDMASK;
 
-	if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) {
+	if (rxrpc_to_server(sp)) {
 		/* We need to look up service connections by the full protocol
 		 * parameter set.  We look up the peer first as an intermediate
 		 * step and then the connection from the peer's tree.
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index cfdc199c6351..ec299c627f77 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1177,10 +1177,6 @@ void rxrpc_data_ready(struct sock *udp_sk)
 
 	trace_rxrpc_rx_packet(sp);
 
-	_net("Rx RxRPC %s ep=%x call=%x:%x",
-	     sp->hdr.flags & RXRPC_CLIENT_INITIATED ? "ToServer" : "ToClient",
-	     sp->hdr.epoch, sp->hdr.cid, sp->hdr.callNumber);
-
 	if (sp->hdr.type >= RXRPC_N_PACKET_TYPES ||
 	    !((RXRPC_SUPPORTED_PACKET_TYPES >> sp->hdr.type) & 1)) {
 		_proto("Rx Bad Packet Type %u", sp->hdr.type);
@@ -1189,13 +1185,13 @@ void rxrpc_data_ready(struct sock *udp_sk)
 
 	switch (sp->hdr.type) {
 	case RXRPC_PACKET_TYPE_VERSION:
-		if (!(sp->hdr.flags & RXRPC_CLIENT_INITIATED))
+		if (rxrpc_to_client(sp))
 			goto discard;
 		rxrpc_post_packet_to_local(local, skb);
 		goto out;
 
 	case RXRPC_PACKET_TYPE_BUSY:
-		if (sp->hdr.flags & RXRPC_CLIENT_INITIATED)
+		if (rxrpc_to_server(sp))
 			goto discard;
 		/* Fall through */
 
@@ -1280,7 +1276,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
 		call = rcu_dereference(chan->call);
 
 		if (sp->hdr.callNumber > chan->call_id) {
-			if (!(sp->hdr.flags & RXRPC_CLIENT_INITIATED)) {
+			if (rxrpc_to_client(sp)) {
 				rcu_read_unlock();
 				goto reject_packet;
 			}
@@ -1303,7 +1299,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
 	}
 
 	if (!call || atomic_read(&call->usage) == 0) {
-		if (!(sp->hdr.type & RXRPC_CLIENT_INITIATED) ||
+		if (rxrpc_to_client(sp) ||
 		    sp->hdr.callNumber == 0 ||
 		    sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
 			goto bad_message_unlock;

From b604dd9883f783a94020d772e4fe03160f455372 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 27 Sep 2018 15:13:08 +0100
Subject: [PATCH 297/499] rxrpc: Fix RTT gathering

Fix RTT information gathering in AF_RXRPC by the following means:

 (1) Enable Rx timestamping on the transport socket with SO_TIMESTAMPNS.

 (2) If the sk_buff doesn't have a timestamp set when rxrpc_data_ready()
     collects it, set it at that point.

 (3) Allow ACKs to be requested on the last packet of a client call, but
     not a service call.  We need to be careful lest we undo:

	bf7d620abf22c321208a4da4f435e7af52551a21
	Author: David Howells <dhowells@redhat.com>
	Date:   Thu Oct 6 08:11:51 2016 +0100
	rxrpc: Don't request an ACK on the last DATA packet of a call's Tx phase

     but that only really applies to service calls that we're handling,
     since the client side gets to send the final ACK (or not).

 (4) When about to transmit an ACK or DATA packet, record the Tx timestamp
     before only; don't update the timestamp afterwards.

 (5) Switch the ordering between recording the serial and recording the
     timestamp to always set the serial number first.  The serial number
     shouldn't be seen referenced by an ACK packet until we've transmitted
     the packet bearing it - so in the Rx path, we don't need the timestamp
     until we've checked the serial number.

Fixes: cf1a6474f807 ("rxrpc: Add per-peer RTT tracker")
Signed-off-by: David Howells <dhowells@redhat.com>
---
 net/rxrpc/input.c        |  8 ++++++--
 net/rxrpc/local_object.c |  9 +++++++++
 net/rxrpc/output.c       | 31 ++++++++++++++++++-------------
 3 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index ec299c627f77..7f9ed3a60b9a 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -622,13 +622,14 @@ static void rxrpc_input_requested_ack(struct rxrpc_call *call,
 		if (!skb)
 			continue;
 
+		sent_at = skb->tstamp;
+		smp_rmb(); /* Read timestamp before serial. */
 		sp = rxrpc_skb(skb);
 		if (sp->hdr.serial != orig_serial)
 			continue;
-		smp_rmb();
-		sent_at = skb->tstamp;
 		goto found;
 	}
+
 	return;
 
 found:
@@ -1143,6 +1144,9 @@ void rxrpc_data_ready(struct sock *udp_sk)
 		return;
 	}
 
+	if (skb->tstamp == 0)
+		skb->tstamp = ktime_get_real();
+
 	rxrpc_new_skb(skb, rxrpc_skb_rx_received);
 
 	_net("recv skb %p", skb);
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 777c3ed4cfc0..81de7d889ffa 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -173,6 +173,15 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
 			_debug("setsockopt failed");
 			goto error;
 		}
+
+		/* We want receive timestamps. */
+		opt = 1;
+		ret = kernel_setsockopt(local->socket, SOL_SOCKET, SO_TIMESTAMPNS,
+					(char *)&opt, sizeof(opt));
+		if (ret < 0) {
+			_debug("setsockopt failed");
+			goto error;
+		}
 		break;
 
 	default:
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index ccf5de160444..8a4da3fe96df 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -124,7 +124,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
 	struct kvec iov[2];
 	rxrpc_serial_t serial;
 	rxrpc_seq_t hard_ack, top;
-	ktime_t now;
 	size_t len, n;
 	int ret;
 	u8 reason;
@@ -196,9 +195,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
 		/* We need to stick a time in before we send the packet in case
 		 * the reply gets back before kernel_sendmsg() completes - but
 		 * asking UDP to send the packet can take a relatively long
-		 * time, so we update the time after, on the assumption that
-		 * the packet transmission is more likely to happen towards the
-		 * end of the kernel_sendmsg() call.
+		 * time.
 		 */
 		call->ping_time = ktime_get_real();
 		set_bit(RXRPC_CALL_PINGING, &call->flags);
@@ -206,9 +203,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
 	}
 
 	ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
-	now = ktime_get_real();
-	if (ping)
-		call->ping_time = now;
 	conn->params.peer->last_tx_at = ktime_get_seconds();
 	if (ret < 0)
 		trace_rxrpc_tx_fail(call->debug_id, serial, ret,
@@ -363,8 +357,14 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
 
 	/* If our RTT cache needs working on, request an ACK.  Also request
 	 * ACKs if a DATA packet appears to have been lost.
+	 *
+	 * However, we mustn't request an ACK on the last reply packet of a
+	 * service call, lest OpenAFS incorrectly send us an ACK with some
+	 * soft-ACKs in it and then never follow up with a proper hard ACK.
 	 */
-	if (!(sp->hdr.flags & RXRPC_LAST_PACKET) &&
+	if ((!(sp->hdr.flags & RXRPC_LAST_PACKET) ||
+	     rxrpc_to_server(sp)
+	     ) &&
 	    (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) ||
 	     retrans ||
 	     call->cong_mode == RXRPC_CALL_SLOW_START ||
@@ -390,6 +390,11 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
 		goto send_fragmentable;
 
 	down_read(&conn->params.local->defrag_sem);
+
+	sp->hdr.serial = serial;
+	smp_wmb(); /* Set serial before timestamp */
+	skb->tstamp = ktime_get_real();
+
 	/* send the packet by UDP
 	 * - returns -EMSGSIZE if UDP would have to fragment the packet
 	 *   to go out of the interface
@@ -413,12 +418,8 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
 	trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags,
 			    retrans, lost);
 	if (ret >= 0) {
-		ktime_t now = ktime_get_real();
-		skb->tstamp = now;
-		smp_wmb();
-		sp->hdr.serial = serial;
 		if (whdr.flags & RXRPC_REQUEST_ACK) {
-			call->peer->rtt_last_req = now;
+			call->peer->rtt_last_req = skb->tstamp;
 			trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial);
 			if (call->peer->rtt_usage > 1) {
 				unsigned long nowj = jiffies, ack_lost_at;
@@ -457,6 +458,10 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
 
 	down_write(&conn->params.local->defrag_sem);
 
+	sp->hdr.serial = serial;
+	smp_wmb(); /* Set serial before timestamp */
+	skb->tstamp = ktime_get_real();
+
 	switch (conn->params.local->srx.transport.family) {
 	case AF_INET:
 		opt = IP_PMTUDISC_DONT;

From ece64fec164f523bfbe874abdef2a0e6ff376251 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 27 Sep 2018 15:13:08 +0100
Subject: [PATCH 298/499] rxrpc: Emit BUSY packets when supposed to rather than
 ABORTs

In the input path, a received sk_buff can be marked for rejection by
setting RXRPC_SKB_MARK_* in skb->mark and, if needed, some auxiliary data
(such as an abort code) in skb->priority.  The rejection is handled by
queueing the sk_buff up for dealing with in process context.  The output
code reads the mark and priority and, theoretically, generates an
appropriate response packet.

However, if RXRPC_SKB_MARK_BUSY is set, this isn't noticed and an ABORT
message with a random abort code is generated (since skb->priority wasn't
set to anything).

Fix this by outputting the appropriate sort of packet.

Also, whilst we're at it, most of the marks are no longer used, so remove
them and rename the remaining two to something more obvious.

Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code")
Signed-off-by: David Howells <dhowells@redhat.com>
---
 net/rxrpc/ar-internal.h | 13 ++++---------
 net/rxrpc/call_accept.c |  6 +++---
 net/rxrpc/input.c       |  2 +-
 net/rxrpc/output.c      | 23 ++++++++++++++++++-----
 4 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 9fcb3e197b14..e8861cb78070 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -40,17 +40,12 @@ struct rxrpc_crypt {
 struct rxrpc_connection;
 
 /*
- * Mark applied to socket buffers.
+ * Mark applied to socket buffers in skb->mark.  skb->priority is used
+ * to pass supplementary information.
  */
 enum rxrpc_skb_mark {
-	RXRPC_SKB_MARK_DATA,		/* data message */
-	RXRPC_SKB_MARK_FINAL_ACK,	/* final ACK received message */
-	RXRPC_SKB_MARK_BUSY,		/* server busy message */
-	RXRPC_SKB_MARK_REMOTE_ABORT,	/* remote abort message */
-	RXRPC_SKB_MARK_LOCAL_ABORT,	/* local abort message */
-	RXRPC_SKB_MARK_NET_ERROR,	/* network error message */
-	RXRPC_SKB_MARK_LOCAL_ERROR,	/* local error message */
-	RXRPC_SKB_MARK_NEW_CALL,	/* local error message */
+	RXRPC_SKB_MARK_REJECT_BUSY,	/* Reject with BUSY */
+	RXRPC_SKB_MARK_REJECT_ABORT,	/* Reject with ABORT (code in skb->priority) */
 };
 
 /*
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 9d1e298b784c..e88f131c1d7f 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -353,7 +353,7 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
 
 	trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
 			  RX_INVALID_OPERATION, EOPNOTSUPP);
-	skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT;
+	skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
 	skb->priority = RX_INVALID_OPERATION;
 	_leave(" = NULL [service]");
 	return NULL;
@@ -364,7 +364,7 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
 	    rx->sk.sk_state == RXRPC_CLOSE) {
 		trace_rxrpc_abort(0, "CLS", sp->hdr.cid, sp->hdr.callNumber,
 				  sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN);
-		skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT;
+		skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
 		skb->priority = RX_INVALID_OPERATION;
 		_leave(" = NULL [close]");
 		call = NULL;
@@ -373,7 +373,7 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
 
 	call = rxrpc_alloc_incoming_call(rx, local, conn, skb);
 	if (!call) {
-		skb->mark = RXRPC_SKB_MARK_BUSY;
+		skb->mark = RXRPC_SKB_MARK_REJECT_BUSY;
 		_leave(" = NULL [busy]");
 		call = NULL;
 		goto out;
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 7f9ed3a60b9a..b0f12471f5e7 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1354,7 +1354,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
 protocol_error:
 	skb->priority = RX_PROTOCOL_ERROR;
 post_abort:
-	skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT;
+	skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
 reject_packet:
 	trace_rxrpc_rx_done(skb->mark, skb->priority);
 	rxrpc_reject_packet(local, skb);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 8a4da3fe96df..e8fb8922bca8 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -524,7 +524,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
 	struct kvec iov[2];
 	size_t size;
 	__be32 code;
-	int ret;
+	int ret, ioc;
 
 	_enter("%d", local->debug_id);
 
@@ -532,7 +532,6 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
 	iov[0].iov_len = sizeof(whdr);
 	iov[1].iov_base = &code;
 	iov[1].iov_len = sizeof(code);
-	size = sizeof(whdr) + sizeof(code);
 
 	msg.msg_name = &srx.transport;
 	msg.msg_control = NULL;
@@ -540,17 +539,31 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
 	msg.msg_flags = 0;
 
 	memset(&whdr, 0, sizeof(whdr));
-	whdr.type = RXRPC_PACKET_TYPE_ABORT;
 
 	while ((skb = skb_dequeue(&local->reject_queue))) {
 		rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
 		sp = rxrpc_skb(skb);
 
+		switch (skb->mark) {
+		case RXRPC_SKB_MARK_REJECT_BUSY:
+			whdr.type = RXRPC_PACKET_TYPE_BUSY;
+			size = sizeof(whdr);
+			ioc = 1;
+			break;
+		case RXRPC_SKB_MARK_REJECT_ABORT:
+			whdr.type = RXRPC_PACKET_TYPE_ABORT;
+			code = htonl(skb->priority);
+			size = sizeof(whdr) + sizeof(code);
+			ioc = 2;
+			break;
+		default:
+			rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+			continue;
+		}
+
 		if (rxrpc_extract_addr_from_skb(local, &srx, skb) == 0) {
 			msg.msg_namelen = srx.transport_len;
 
-			code = htonl(skb->priority);
-
 			whdr.epoch	= htonl(sp->hdr.epoch);
 			whdr.cid	= htonl(sp->hdr.cid);
 			whdr.callNumber	= htonl(sp->hdr.callNumber);

From 403fc2a138457f1071b186786a7589ef7382c8bc Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 27 Sep 2018 15:13:08 +0100
Subject: [PATCH 299/499] rxrpc: Improve up-front incoming packet checking

Do more up-front checking on incoming packets to weed out invalid ones and
also ones aimed at services that we don't support.

Whilst we're at it, replace the clearing of call and skew if we don't find
a connection with just initialising the variables to zero at the top of the
function.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 net/rxrpc/input.c    | 63 +++++++++++++++++++++++++++++++++++---------
 net/rxrpc/protocol.h | 15 -----------
 2 files changed, 50 insertions(+), 28 deletions(-)

diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index b0f12471f5e7..a569e9e010d1 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1125,12 +1125,13 @@ void rxrpc_data_ready(struct sock *udp_sk)
 {
 	struct rxrpc_connection *conn;
 	struct rxrpc_channel *chan;
-	struct rxrpc_call *call;
+	struct rxrpc_call *call = NULL;
 	struct rxrpc_skb_priv *sp;
 	struct rxrpc_local *local = udp_sk->sk_user_data;
+	struct rxrpc_sock *rx;
 	struct sk_buff *skb;
 	unsigned int channel;
-	int ret, skew;
+	int ret, skew = 0;
 
 	_enter("%p", udp_sk);
 
@@ -1181,12 +1182,6 @@ void rxrpc_data_ready(struct sock *udp_sk)
 
 	trace_rxrpc_rx_packet(sp);
 
-	if (sp->hdr.type >= RXRPC_N_PACKET_TYPES ||
-	    !((RXRPC_SUPPORTED_PACKET_TYPES >> sp->hdr.type) & 1)) {
-		_proto("Rx Bad Packet Type %u", sp->hdr.type);
-		goto bad_message;
-	}
-
 	switch (sp->hdr.type) {
 	case RXRPC_PACKET_TYPE_VERSION:
 		if (rxrpc_to_client(sp))
@@ -1198,24 +1193,63 @@ void rxrpc_data_ready(struct sock *udp_sk)
 		if (rxrpc_to_server(sp))
 			goto discard;
 		/* Fall through */
+	case RXRPC_PACKET_TYPE_ACK:
+	case RXRPC_PACKET_TYPE_ACKALL:
+		if (sp->hdr.callNumber == 0)
+			goto bad_message;
+		/* Fall through */
+	case RXRPC_PACKET_TYPE_ABORT:
+		break;
 
 	case RXRPC_PACKET_TYPE_DATA:
-		if (sp->hdr.callNumber == 0)
+		if (sp->hdr.callNumber == 0 ||
+		    sp->hdr.seq == 0)
 			goto bad_message;
 		if (sp->hdr.flags & RXRPC_JUMBO_PACKET &&
 		    !rxrpc_validate_jumbo(skb))
 			goto bad_message;
 		break;
 
+	case RXRPC_PACKET_TYPE_CHALLENGE:
+		if (rxrpc_to_server(sp))
+			goto discard;
+		break;
+	case RXRPC_PACKET_TYPE_RESPONSE:
+		if (rxrpc_to_client(sp))
+			goto discard;
+		break;
+
 		/* Packet types 9-11 should just be ignored. */
 	case RXRPC_PACKET_TYPE_PARAMS:
 	case RXRPC_PACKET_TYPE_10:
 	case RXRPC_PACKET_TYPE_11:
 		goto discard;
+
+	default:
+		_proto("Rx Bad Packet Type %u", sp->hdr.type);
+		goto bad_message;
 	}
 
+	if (sp->hdr.serviceId == 0)
+		goto bad_message;
+
 	rcu_read_lock();
 
+	if (rxrpc_to_server(sp)) {
+		/* Weed out packets to services we're not offering.  Packets
+		 * that would begin a call are explicitly rejected and the rest
+		 * are just discarded.
+		 */
+		rx = rcu_dereference(local->service);
+		if (!rx || (sp->hdr.serviceId != rx->srx.srx_service &&
+			    sp->hdr.serviceId != rx->second_service)) {
+			if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
+			    sp->hdr.seq == 1)
+				goto unsupported_service;
+			goto discard_unlock;
+		}
+	}
+
 	conn = rxrpc_find_connection_rcu(local, skb);
 	if (conn) {
 		if (sp->hdr.securityIndex != conn->security_ix)
@@ -1297,14 +1331,10 @@ void rxrpc_data_ready(struct sock *udp_sk)
 			if (!test_bit(RXRPC_CALL_RX_HEARD, &call->flags))
 				set_bit(RXRPC_CALL_RX_HEARD, &call->flags);
 		}
-	} else {
-		skew = 0;
-		call = NULL;
 	}
 
 	if (!call || atomic_read(&call->usage) == 0) {
 		if (rxrpc_to_client(sp) ||
-		    sp->hdr.callNumber == 0 ||
 		    sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
 			goto bad_message_unlock;
 		if (sp->hdr.seq != 1)
@@ -1340,6 +1370,13 @@ void rxrpc_data_ready(struct sock *udp_sk)
 	skb->priority = RXKADINCONSISTENCY;
 	goto post_abort;
 
+unsupported_service:
+	rcu_read_unlock();
+	trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+			  RX_INVALID_OPERATION, EOPNOTSUPP);
+	skb->priority = RX_INVALID_OPERATION;
+	goto post_abort;
+
 reupgrade:
 	rcu_read_unlock();
 	trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h
index 93da73bf7098..f9cb83c938f3 100644
--- a/net/rxrpc/protocol.h
+++ b/net/rxrpc/protocol.h
@@ -50,7 +50,6 @@ struct rxrpc_wire_header {
 #define RXRPC_PACKET_TYPE_10		10	/* Ignored */
 #define RXRPC_PACKET_TYPE_11		11	/* Ignored */
 #define RXRPC_PACKET_TYPE_VERSION	13	/* version string request */
-#define RXRPC_N_PACKET_TYPES		14	/* number of packet types (incl type 0) */
 
 	uint8_t		flags;		/* packet flags */
 #define RXRPC_CLIENT_INITIATED	0x01		/* signifies a packet generated by a client */
@@ -72,20 +71,6 @@ struct rxrpc_wire_header {
 
 } __packed;
 
-#define RXRPC_SUPPORTED_PACKET_TYPES (			\
-		(1 << RXRPC_PACKET_TYPE_DATA) |		\
-		(1 << RXRPC_PACKET_TYPE_ACK) |		\
-		(1 << RXRPC_PACKET_TYPE_BUSY) |		\
-		(1 << RXRPC_PACKET_TYPE_ABORT) |	\
-		(1 << RXRPC_PACKET_TYPE_ACKALL) |	\
-		(1 << RXRPC_PACKET_TYPE_CHALLENGE) |	\
-		(1 << RXRPC_PACKET_TYPE_RESPONSE) |	\
-		/*(1 << RXRPC_PACKET_TYPE_DEBUG) | */	\
-		(1 << RXRPC_PACKET_TYPE_PARAMS) |	\
-		(1 << RXRPC_PACKET_TYPE_10) |		\
-		(1 << RXRPC_PACKET_TYPE_11) |		\
-		(1 << RXRPC_PACKET_TYPE_VERSION))
-
 /*****************************************************************************/
 /*
  * jumbo packet secondary header

From 0099dc589bfa7caf6f2608c4cbc1181cfee22b0c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 27 Sep 2018 15:13:09 +0100
Subject: [PATCH 300/499] rxrpc: Make service call handling more robust

Make the following changes to improve the robustness of the code that sets
up a new service call:

 (1) Cache the rxrpc_sock struct obtained in rxrpc_data_ready() to do a
     service ID check and pass that along to rxrpc_new_incoming_call().
     This means that I can remove the check from rxrpc_new_incoming_call()
     without the need to worry about the socket attached to the local
     endpoint getting replaced - which would invalidate the check.

 (2) Cache the rxrpc_peer struct, thereby allowing the peer search to be
     done once.  The peer is passed to rxrpc_new_incoming_call(), thereby
     saving the need to repeat the search.

     This also reduces the possibility of rxrpc_publish_service_conn()
     BUG()'ing due to the detection of a duplicate connection, despite the
     initial search done by rxrpc_find_connection_rcu() having turned up
     nothing.

     This BUG() shouldn't ever get hit since rxrpc_data_ready() *should* be
     non-reentrant and the result of the initial search should still hold
     true, but it has proven possible to hit.

     I *think* this may be due to __rxrpc_lookup_peer_rcu() cutting short
     the iteration over the hash table if it finds a matching peer with a
     zero usage count, but I don't know for sure since it's only ever been
     hit once that I know of.

     Another possibility is that a bug in rxrpc_data_ready() that checked
     the wrong byte in the header for the RXRPC_CLIENT_INITIATED flag
     might've let through a packet that caused a spurious and invalid call
     to be set up.  That is addressed in another patch.

 (3) Fix __rxrpc_lookup_peer_rcu() to skip peer records that have a zero
     usage count rather than stopping and returning not found, just in case
     there's another peer record behind it in the bucket.

 (4) Don't search the peer records in rxrpc_alloc_incoming_call(), but
     rather either use the peer cached in (2) or, if one wasn't found,
     preemptively install a new one.

Fixes: 8496af50eb38 ("rxrpc: Use RCU to access a peer's service connection tree")
Signed-off-by: David Howells <dhowells@redhat.com>
---
 net/rxrpc/ar-internal.h |  8 +++++---
 net/rxrpc/call_accept.c | 41 ++++++++++++-----------------------------
 net/rxrpc/conn_object.c |  7 ++++++-
 net/rxrpc/input.c       |  7 ++++---
 net/rxrpc/peer_object.c | 35 +++++++++++------------------------
 5 files changed, 38 insertions(+), 60 deletions(-)

diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index e8861cb78070..c72686193d83 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -722,6 +722,8 @@ extern struct workqueue_struct *rxrpc_workqueue;
 int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t);
 void rxrpc_discard_prealloc(struct rxrpc_sock *);
 struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *,
+					   struct rxrpc_sock *,
+					   struct rxrpc_peer *,
 					   struct rxrpc_connection *,
 					   struct sk_buff *);
 void rxrpc_accept_incoming_calls(struct rxrpc_local *);
@@ -913,7 +915,8 @@ extern unsigned int rxrpc_closed_conn_expiry;
 
 struct rxrpc_connection *rxrpc_alloc_connection(gfp_t);
 struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *,
-						   struct sk_buff *);
+						   struct sk_buff *,
+						   struct rxrpc_peer **);
 void __rxrpc_disconnect_call(struct rxrpc_connection *, struct rxrpc_call *);
 void rxrpc_disconnect_call(struct rxrpc_call *);
 void rxrpc_kill_connection(struct rxrpc_connection *);
@@ -1049,8 +1052,7 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *,
 struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *,
 				     struct sockaddr_rxrpc *, gfp_t);
 struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t);
-struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *,
-					      struct rxrpc_peer *);
+void rxrpc_new_incoming_peer(struct rxrpc_local *, struct rxrpc_peer *);
 void rxrpc_destroy_all_peers(struct rxrpc_net *);
 struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *);
 struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *);
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index e88f131c1d7f..9c7f26d06a52 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -249,11 +249,11 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
  */
 static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
 						    struct rxrpc_local *local,
+						    struct rxrpc_peer *peer,
 						    struct rxrpc_connection *conn,
 						    struct sk_buff *skb)
 {
 	struct rxrpc_backlog *b = rx->backlog;
-	struct rxrpc_peer *peer, *xpeer;
 	struct rxrpc_call *call;
 	unsigned short call_head, conn_head, peer_head;
 	unsigned short call_tail, conn_tail, peer_tail;
@@ -276,21 +276,18 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
 		return NULL;
 
 	if (!conn) {
-		/* No connection.  We're going to need a peer to start off
-		 * with.  If one doesn't yet exist, use a spare from the
-		 * preallocation set.  We dump the address into the spare in
-		 * anticipation - and to save on stack space.
-		 */
-		xpeer = b->peer_backlog[peer_tail];
-		if (rxrpc_extract_addr_from_skb(local, &xpeer->srx, skb) < 0)
-			return NULL;
-
-		peer = rxrpc_lookup_incoming_peer(local, xpeer);
-		if (peer == xpeer) {
+		if (peer && !rxrpc_get_peer_maybe(peer))
+			peer = NULL;
+		if (!peer) {
+			peer = b->peer_backlog[peer_tail];
+			if (rxrpc_extract_addr_from_skb(local, &peer->srx, skb) < 0)
+				return NULL;
 			b->peer_backlog[peer_tail] = NULL;
 			smp_store_release(&b->peer_backlog_tail,
 					  (peer_tail + 1) &
 					  (RXRPC_BACKLOG_MAX - 1));
+
+			rxrpc_new_incoming_peer(local, peer);
 		}
 
 		/* Now allocate and set up the connection */
@@ -335,30 +332,16 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
  * The call is returned with the user access mutex held.
  */
 struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
+					   struct rxrpc_sock *rx,
+					   struct rxrpc_peer *peer,
 					   struct rxrpc_connection *conn,
 					   struct sk_buff *skb)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-	struct rxrpc_sock *rx;
 	struct rxrpc_call *call;
-	u16 service_id = sp->hdr.serviceId;
 
 	_enter("");
 
-	/* Get the socket providing the service */
-	rx = rcu_dereference(local->service);
-	if (rx && (service_id == rx->srx.srx_service ||
-		   service_id == rx->second_service))
-		goto found_service;
-
-	trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
-			  RX_INVALID_OPERATION, EOPNOTSUPP);
-	skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
-	skb->priority = RX_INVALID_OPERATION;
-	_leave(" = NULL [service]");
-	return NULL;
-
-found_service:
 	spin_lock(&rx->incoming_lock);
 	if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED ||
 	    rx->sk.sk_state == RXRPC_CLOSE) {
@@ -371,7 +354,7 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
 		goto out;
 	}
 
-	call = rxrpc_alloc_incoming_call(rx, local, conn, skb);
+	call = rxrpc_alloc_incoming_call(rx, local, peer, conn, skb);
 	if (!call) {
 		skb->mark = RXRPC_SKB_MARK_REJECT_BUSY;
 		_leave(" = NULL [busy]");
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 390ba50cfab4..b4438f98dc5c 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -69,10 +69,14 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
  * If successful, a pointer to the connection is returned, but no ref is taken.
  * NULL is returned if there is no match.
  *
+ * When searching for a service call, if we find a peer but no connection, we
+ * return that through *_peer in case we need to create a new service call.
+ *
  * The caller must be holding the RCU read lock.
  */
 struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
-						   struct sk_buff *skb)
+						   struct sk_buff *skb,
+						   struct rxrpc_peer **_peer)
 {
 	struct rxrpc_connection *conn;
 	struct rxrpc_conn_proto k;
@@ -104,6 +108,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
 		peer = rxrpc_lookup_peer_rcu(local, &srx);
 		if (!peer)
 			goto not_found;
+		*_peer = peer;
 		conn = rxrpc_find_service_conn_rcu(peer, skb);
 		if (!conn || atomic_read(&conn->usage) == 0)
 			goto not_found;
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index a569e9e010d1..800f5b8a1baa 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1128,7 +1128,8 @@ void rxrpc_data_ready(struct sock *udp_sk)
 	struct rxrpc_call *call = NULL;
 	struct rxrpc_skb_priv *sp;
 	struct rxrpc_local *local = udp_sk->sk_user_data;
-	struct rxrpc_sock *rx;
+	struct rxrpc_peer *peer = NULL;
+	struct rxrpc_sock *rx = NULL;
 	struct sk_buff *skb;
 	unsigned int channel;
 	int ret, skew = 0;
@@ -1250,7 +1251,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
 		}
 	}
 
-	conn = rxrpc_find_connection_rcu(local, skb);
+	conn = rxrpc_find_connection_rcu(local, skb, &peer);
 	if (conn) {
 		if (sp->hdr.securityIndex != conn->security_ix)
 			goto wrong_security;
@@ -1339,7 +1340,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
 			goto bad_message_unlock;
 		if (sp->hdr.seq != 1)
 			goto discard_unlock;
-		call = rxrpc_new_incoming_call(local, conn, skb);
+		call = rxrpc_new_incoming_call(local, rx, peer, conn, skb);
 		if (!call) {
 			rcu_read_unlock();
 			goto reject_packet;
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 1dc7648e3eff..70083e8fb6e5 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -124,11 +124,9 @@ static struct rxrpc_peer *__rxrpc_lookup_peer_rcu(
 	struct rxrpc_net *rxnet = local->rxnet;
 
 	hash_for_each_possible_rcu(rxnet->peer_hash, peer, hash_link, hash_key) {
-		if (rxrpc_peer_cmp_key(peer, local, srx, hash_key) == 0) {
-			if (atomic_read(&peer->usage) == 0)
-				return NULL;
+		if (rxrpc_peer_cmp_key(peer, local, srx, hash_key) == 0 &&
+		    atomic_read(&peer->usage) > 0)
 			return peer;
-		}
 	}
 
 	return NULL;
@@ -299,34 +297,23 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local,
 }
 
 /*
- * Set up a new incoming peer.  The address is prestored in the preallocated
- * peer.
+ * Set up a new incoming peer.  There shouldn't be any other matching peers
+ * since we've already done a search in the list from the non-reentrant context
+ * (the data_ready handler) that is the only place we can add new peers.
  */
-struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local,
-					      struct rxrpc_peer *prealloc)
+void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer)
 {
-	struct rxrpc_peer *peer;
 	struct rxrpc_net *rxnet = local->rxnet;
 	unsigned long hash_key;
 
-	hash_key = rxrpc_peer_hash_key(local, &prealloc->srx);
-	prealloc->local = local;
-	rxrpc_init_peer(prealloc, hash_key);
+	hash_key = rxrpc_peer_hash_key(local, &peer->srx);
+	peer->local = local;
+	rxrpc_init_peer(peer, hash_key);
 
 	spin_lock(&rxnet->peer_hash_lock);
-
-	/* Need to check that we aren't racing with someone else */
-	peer = __rxrpc_lookup_peer_rcu(local, &prealloc->srx, hash_key);
-	if (peer && !rxrpc_get_peer_maybe(peer))
-		peer = NULL;
-	if (!peer) {
-		peer = prealloc;
-		hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key);
-		list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive_new);
-	}
-
+	hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key);
+	list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive_new);
 	spin_unlock(&rxnet->peer_hash_lock);
-	return peer;
 }
 
 /*

From 37a675e768d7606fe8a53e0c459c9b53e121ac20 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 27 Sep 2018 15:13:09 +0100
Subject: [PATCH 301/499] rxrpc: Fix transport sockopts to get IPv4 errors on
 an IPv6 socket

It seems that enabling IPV6_RECVERR on an IPv6 socket doesn't also turn on
IP_RECVERR, so neither local errors nor ICMP-transported remote errors from
IPv4 peer addresses are returned to the AF_RXRPC protocol.

Make the sockopt setting code in rxrpc_open_socket() fall through from the
AF_INET6 case to the AF_INET case to turn on all the AF_INET options too in
the AF_INET6 case.

Fixes: f2aeed3a591f ("rxrpc: Fix error reception on AF_INET6 sockets")
Signed-off-by: David Howells <dhowells@redhat.com>
---
 net/rxrpc/local_object.c | 43 +++++++++++++++++++++-------------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 81de7d889ffa..94d234e9c685 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -135,6 +135,29 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
 	}
 
 	switch (local->srx.transport.family) {
+	case AF_INET6:
+		/* we want to receive ICMPv6 errors */
+		opt = 1;
+		ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_RECVERR,
+					(char *) &opt, sizeof(opt));
+		if (ret < 0) {
+			_debug("setsockopt failed");
+			goto error;
+		}
+
+		/* we want to set the don't fragment bit */
+		opt = IPV6_PMTUDISC_DO;
+		ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_MTU_DISCOVER,
+					(char *) &opt, sizeof(opt));
+		if (ret < 0) {
+			_debug("setsockopt failed");
+			goto error;
+		}
+
+		/* Fall through and set IPv4 options too otherwise we don't get
+		 * errors from IPv4 packets sent through the IPv6 socket.
+		 */
+
 	case AF_INET:
 		/* we want to receive ICMP errors */
 		opt = 1;
@@ -153,26 +176,6 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
 			_debug("setsockopt failed");
 			goto error;
 		}
-		break;
-
-	case AF_INET6:
-		/* we want to receive ICMP errors */
-		opt = 1;
-		ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_RECVERR,
-					(char *) &opt, sizeof(opt));
-		if (ret < 0) {
-			_debug("setsockopt failed");
-			goto error;
-		}
-
-		/* we want to set the don't fragment bit */
-		opt = IPV6_PMTUDISC_DO;
-		ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_MTU_DISCOVER,
-					(char *) &opt, sizeof(opt));
-		if (ret < 0) {
-			_debug("setsockopt failed");
-			goto error;
-		}
 
 		/* We want receive timestamps. */
 		opt = 1;

From f334430316e7fd37c4821ebec627e27714bb5d76 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 27 Sep 2018 15:13:09 +0100
Subject: [PATCH 302/499] rxrpc: Fix error distribution

Fix error distribution by immediately delivering the errors to all the
affected calls rather than deferring them to a worker thread.  The problem
with the latter is that retries and things can happen in the meantime when we
want to stop that sooner.

To this end:

 (1) Stop the error distributor from removing calls from the error_targets
     list so that peer->lock isn't needed to synchronise against other adds
     and removals.

 (2) Require the peer's error_targets list to be accessed with RCU, thereby
     avoiding the need to take peer->lock over distribution.

 (3) Don't attempt to affect a call's state if it is already marked complete.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/trace/events/rxrpc.h |  4 +---
 net/rxrpc/ar-internal.h      |  5 ----
 net/rxrpc/call_object.c      |  2 +-
 net/rxrpc/conn_client.c      |  4 ++--
 net/rxrpc/conn_object.c      |  2 +-
 net/rxrpc/peer_event.c       | 46 +++++++++---------------------------
 net/rxrpc/peer_object.c      | 17 -------------
 7 files changed, 16 insertions(+), 64 deletions(-)

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 196587b8f204..837393fa897b 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -56,7 +56,6 @@ enum rxrpc_peer_trace {
 	rxrpc_peer_new,
 	rxrpc_peer_processing,
 	rxrpc_peer_put,
-	rxrpc_peer_queued_error,
 };
 
 enum rxrpc_conn_trace {
@@ -257,8 +256,7 @@ enum rxrpc_tx_point {
 	EM(rxrpc_peer_got,			"GOT") \
 	EM(rxrpc_peer_new,			"NEW") \
 	EM(rxrpc_peer_processing,		"PRO") \
-	EM(rxrpc_peer_put,			"PUT") \
-	E_(rxrpc_peer_queued_error,		"QER")
+	E_(rxrpc_peer_put,			"PUT")
 
 #define rxrpc_conn_traces \
 	EM(rxrpc_conn_got,			"GOT") \
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index c72686193d83..ef9554131434 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -288,7 +288,6 @@ struct rxrpc_peer {
 	struct hlist_node	hash_link;
 	struct rxrpc_local	*local;
 	struct hlist_head	error_targets;	/* targets for net error distribution */
-	struct work_struct	error_distributor;
 	struct rb_root		service_conns;	/* Service connections */
 	struct list_head	keepalive_link;	/* Link in net->peer_keepalive[] */
 	time64_t		last_tx_at;	/* Last time packet sent here */
@@ -299,8 +298,6 @@ struct rxrpc_peer {
 	unsigned int		maxdata;	/* data size (MTU - hdrsize) */
 	unsigned short		hdrsize;	/* header size (IP + UDP + RxRPC) */
 	int			debug_id;	/* debug ID for printks */
-	int			error_report;	/* Net (+0) or local (+1000000) to distribute */
-#define RXRPC_LOCAL_ERROR_OFFSET 1000000
 	struct sockaddr_rxrpc	srx;		/* remote address */
 
 	/* calculated RTT cache */
@@ -1039,7 +1036,6 @@ void rxrpc_send_keepalive(struct rxrpc_peer *);
  * peer_event.c
  */
 void rxrpc_error_report(struct sock *);
-void rxrpc_peer_error_distributor(struct work_struct *);
 void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace,
 			rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t);
 void rxrpc_peer_keepalive_worker(struct work_struct *);
@@ -1057,7 +1053,6 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *);
 struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *);
 struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *);
 void rxrpc_put_peer(struct rxrpc_peer *);
-void __rxrpc_queue_peer_error(struct rxrpc_peer *);
 
 /*
  * proc.c
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 9486293fef5c..799f75b6900d 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -400,7 +400,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
 	rcu_assign_pointer(conn->channels[chan].call, call);
 
 	spin_lock(&conn->params.peer->lock);
-	hlist_add_head(&call->error_link, &conn->params.peer->error_targets);
+	hlist_add_head_rcu(&call->error_link, &conn->params.peer->error_targets);
 	spin_unlock(&conn->params.peer->lock);
 
 	_net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id);
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index f8f37188a932..8acf74fe24c0 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -710,8 +710,8 @@ int rxrpc_connect_call(struct rxrpc_call *call,
 	}
 
 	spin_lock_bh(&call->conn->params.peer->lock);
-	hlist_add_head(&call->error_link,
-		       &call->conn->params.peer->error_targets);
+	hlist_add_head_rcu(&call->error_link,
+			   &call->conn->params.peer->error_targets);
 	spin_unlock_bh(&call->conn->params.peer->lock);
 
 out:
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index b4438f98dc5c..885dae829f4a 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -216,7 +216,7 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
 	call->peer->cong_cwnd = call->cong_cwnd;
 
 	spin_lock_bh(&conn->params.peer->lock);
-	hlist_del_init(&call->error_link);
+	hlist_del_rcu(&call->error_link);
 	spin_unlock_bh(&conn->params.peer->lock);
 
 	if (rxrpc_is_client_call(call))
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 4f9da2f51c69..f3e6fc670da2 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -23,6 +23,8 @@
 #include "ar-internal.h"
 
 static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *);
+static void rxrpc_distribute_error(struct rxrpc_peer *, int,
+				   enum rxrpc_call_completion);
 
 /*
  * Find the peer associated with an ICMP packet.
@@ -194,8 +196,6 @@ void rxrpc_error_report(struct sock *sk)
 	rcu_read_unlock();
 	rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
 
-	/* The ref we obtained is passed off to the work item */
-	__rxrpc_queue_peer_error(peer);
 	_leave("");
 }
 
@@ -205,6 +205,7 @@ void rxrpc_error_report(struct sock *sk)
 static void rxrpc_store_error(struct rxrpc_peer *peer,
 			      struct sock_exterr_skb *serr)
 {
+	enum rxrpc_call_completion compl = RXRPC_CALL_NETWORK_ERROR;
 	struct sock_extended_err *ee;
 	int err;
 
@@ -255,7 +256,7 @@ static void rxrpc_store_error(struct rxrpc_peer *peer,
 	case SO_EE_ORIGIN_NONE:
 	case SO_EE_ORIGIN_LOCAL:
 		_proto("Rx Received local error { error=%d }", err);
-		err += RXRPC_LOCAL_ERROR_OFFSET;
+		compl = RXRPC_CALL_LOCAL_ERROR;
 		break;
 
 	case SO_EE_ORIGIN_ICMP6:
@@ -264,48 +265,23 @@ static void rxrpc_store_error(struct rxrpc_peer *peer,
 		break;
 	}
 
-	peer->error_report = err;
+	rxrpc_distribute_error(peer, err, compl);
 }
 
 /*
- * Distribute an error that occurred on a peer
+ * Distribute an error that occurred on a peer.
  */
-void rxrpc_peer_error_distributor(struct work_struct *work)
+static void rxrpc_distribute_error(struct rxrpc_peer *peer, int error,
+				   enum rxrpc_call_completion compl)
 {
-	struct rxrpc_peer *peer =
-		container_of(work, struct rxrpc_peer, error_distributor);
 	struct rxrpc_call *call;
-	enum rxrpc_call_completion compl;
-	int error;
 
-	_enter("");
-
-	error = READ_ONCE(peer->error_report);
-	if (error < RXRPC_LOCAL_ERROR_OFFSET) {
-		compl = RXRPC_CALL_NETWORK_ERROR;
-	} else {
-		compl = RXRPC_CALL_LOCAL_ERROR;
-		error -= RXRPC_LOCAL_ERROR_OFFSET;
-	}
-
-	_debug("ISSUE ERROR %s %d", rxrpc_call_completions[compl], error);
-
-	spin_lock_bh(&peer->lock);
-
-	while (!hlist_empty(&peer->error_targets)) {
-		call = hlist_entry(peer->error_targets.first,
-				   struct rxrpc_call, error_link);
-		hlist_del_init(&call->error_link);
+	hlist_for_each_entry_rcu(call, &peer->error_targets, error_link) {
 		rxrpc_see_call(call);
-
-		if (rxrpc_set_call_completion(call, compl, 0, -error))
+		if (call->state < RXRPC_CALL_COMPLETE &&
+		    rxrpc_set_call_completion(call, compl, 0, -error))
 			rxrpc_notify_socket(call);
 	}
-
-	spin_unlock_bh(&peer->lock);
-
-	rxrpc_put_peer(peer);
-	_leave("");
 }
 
 /*
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 70083e8fb6e5..01a9febfa367 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -220,8 +220,6 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
 		atomic_set(&peer->usage, 1);
 		peer->local = local;
 		INIT_HLIST_HEAD(&peer->error_targets);
-		INIT_WORK(&peer->error_distributor,
-			  &rxrpc_peer_error_distributor);
 		peer->service_conns = RB_ROOT;
 		seqlock_init(&peer->service_conn_lock);
 		spin_lock_init(&peer->lock);
@@ -402,21 +400,6 @@ struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer)
 	return peer;
 }
 
-/*
- * Queue a peer record.  This passes the caller's ref to the workqueue.
- */
-void __rxrpc_queue_peer_error(struct rxrpc_peer *peer)
-{
-	const void *here = __builtin_return_address(0);
-	int n;
-
-	n = atomic_read(&peer->usage);
-	if (rxrpc_queue_work(&peer->error_distributor))
-		trace_rxrpc_peer(peer, rxrpc_peer_queued_error, n, here);
-	else
-		rxrpc_put_peer(peer);
-}
-
 /*
  * Discard a peer record.
  */

From a13f814a67b12a2f29d1decf4b4f4e700658a517 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Thu, 30 Aug 2018 17:56:52 +0900
Subject: [PATCH 303/499] netfilter: nft_set_rbtree: add missing rb_erase() in
 GC routine

The nft_set_gc_batch_check() checks whether gc buffer is full.
If gc buffer is full, gc buffer is released by
the nft_set_gc_batch_complete() internally.
In case of rbtree, the rb_erase() should be called before calling the
nft_set_gc_batch_complete(). therefore the rb_erase() should
be called before calling the nft_set_gc_batch_check() too.

test commands:
   table ip filter {
	   set set1 {
		   type ipv4_addr; flags interval, timeout;
		   gc-interval 10s;
		   timeout 1s;
		   elements = {
			   1-2,
			   3-4,
			   5-6,
			   ...
			   10000-10001,
		   }
	   }
   }
   %nft -f test.nft

splat looks like:
[  430.273885] kasan: GPF could be caused by NULL-ptr deref or user memory access
[  430.282158] general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
[  430.283116] CPU: 1 PID: 190 Comm: kworker/1:2 Tainted: G    B             4.18.0+ #7
[  430.283116] Workqueue: events_power_efficient nft_rbtree_gc [nf_tables_set]
[  430.313559] RIP: 0010:rb_next+0x81/0x130
[  430.313559] Code: 08 49 bd 00 00 00 00 00 fc ff df 48 bb 00 00 00 00 00 fc ff df 48 85 c0 75 05 eb 58 48 89 d4
[  430.313559] RSP: 0018:ffff88010cdb7680 EFLAGS: 00010207
[  430.313559] RAX: 0000000000b84854 RBX: dffffc0000000000 RCX: ffffffff83f01973
[  430.313559] RDX: 000000000017090c RSI: 0000000000000008 RDI: 0000000000b84864
[  430.313559] RBP: ffff8801060d4588 R08: fffffbfff09bc349 R09: fffffbfff09bc349
[  430.313559] R10: 0000000000000001 R11: fffffbfff09bc348 R12: ffff880100f081a8
[  430.313559] R13: dffffc0000000000 R14: ffff880100ff8688 R15: dffffc0000000000
[  430.313559] FS:  0000000000000000(0000) GS:ffff88011b400000(0000) knlGS:0000000000000000
[  430.313559] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  430.313559] CR2: 0000000001551008 CR3: 000000005dc16000 CR4: 00000000001006e0
[  430.313559] Call Trace:
[  430.313559]  nft_rbtree_gc+0x112/0x5c0 [nf_tables_set]
[  430.313559]  process_one_work+0xc13/0x1ec0
[  430.313559]  ? _raw_spin_unlock_irq+0x29/0x40
[  430.313559]  ? pwq_dec_nr_in_flight+0x3c0/0x3c0
[  430.313559]  ? set_load_weight+0x270/0x270
[  430.313559]  ? __switch_to_asm+0x34/0x70
[  430.313559]  ? __switch_to_asm+0x40/0x70
[  430.313559]  ? __switch_to_asm+0x34/0x70
[  430.313559]  ? __switch_to_asm+0x34/0x70
[  430.313559]  ? __switch_to_asm+0x40/0x70
[  430.313559]  ? __switch_to_asm+0x34/0x70
[  430.313559]  ? __switch_to_asm+0x40/0x70
[  430.313559]  ? __switch_to_asm+0x34/0x70
[  430.313559]  ? __switch_to_asm+0x34/0x70
[  430.313559]  ? __switch_to_asm+0x40/0x70
[  430.313559]  ? __switch_to_asm+0x34/0x70
[  430.313559]  ? __schedule+0x6d3/0x1f50
[  430.313559]  ? find_held_lock+0x39/0x1c0
[  430.313559]  ? __sched_text_start+0x8/0x8
[  430.313559]  ? cyc2ns_read_end+0x10/0x10
[  430.313559]  ? save_trace+0x300/0x300
[  430.313559]  ? sched_clock_local+0xd4/0x140
[  430.313559]  ? find_held_lock+0x39/0x1c0
[  430.313559]  ? worker_thread+0x353/0x1120
[  430.313559]  ? worker_thread+0x353/0x1120
[  430.313559]  ? lock_contended+0xe70/0xe70
[  430.313559]  ? __lock_acquire+0x4500/0x4500
[  430.535635]  ? do_raw_spin_unlock+0xa5/0x330
[  430.535635]  ? do_raw_spin_trylock+0x101/0x1a0
[  430.535635]  ? do_raw_spin_lock+0x1f0/0x1f0
[  430.535635]  ? _raw_spin_lock_irq+0x10/0x70
[  430.535635]  worker_thread+0x15d/0x1120
[ ... ]

Fixes: 8d8540c4f5e0 ("netfilter: nft_set_rbtree: add timeout support")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_set_rbtree.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 55e2d9215c0d..0e5ec126f6ad 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -355,12 +355,11 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
 
 static void nft_rbtree_gc(struct work_struct *work)
 {
+	struct nft_rbtree_elem *rbe, *rbe_end = NULL, *rbe_prev = NULL;
 	struct nft_set_gc_batch *gcb = NULL;
-	struct rb_node *node, *prev = NULL;
-	struct nft_rbtree_elem *rbe;
 	struct nft_rbtree *priv;
+	struct rb_node *node;
 	struct nft_set *set;
-	int i;
 
 	priv = container_of(work, struct nft_rbtree, gc_work.work);
 	set  = nft_set_container_of(priv);
@@ -371,7 +370,7 @@ static void nft_rbtree_gc(struct work_struct *work)
 		rbe = rb_entry(node, struct nft_rbtree_elem, node);
 
 		if (nft_rbtree_interval_end(rbe)) {
-			prev = node;
+			rbe_end = rbe;
 			continue;
 		}
 		if (!nft_set_elem_expired(&rbe->ext))
@@ -379,29 +378,30 @@ static void nft_rbtree_gc(struct work_struct *work)
 		if (nft_set_elem_mark_busy(&rbe->ext))
 			continue;
 
+		if (rbe_prev) {
+			rb_erase(&rbe_prev->node, &priv->root);
+			rbe_prev = NULL;
+		}
 		gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
 		if (!gcb)
 			break;
 
 		atomic_dec(&set->nelems);
 		nft_set_gc_batch_add(gcb, rbe);
+		rbe_prev = rbe;
 
-		if (prev) {
-			rbe = rb_entry(prev, struct nft_rbtree_elem, node);
+		if (rbe_end) {
 			atomic_dec(&set->nelems);
-			nft_set_gc_batch_add(gcb, rbe);
-			prev = NULL;
+			nft_set_gc_batch_add(gcb, rbe_end);
+			rb_erase(&rbe_end->node, &priv->root);
+			rbe_end = NULL;
 		}
 		node = rb_next(node);
 		if (!node)
 			break;
 	}
-	if (gcb) {
-		for (i = 0; i < gcb->head.cnt; i++) {
-			rbe = gcb->elems[i];
-			rb_erase(&rbe->node, &priv->root);
-		}
-	}
+	if (rbe_prev)
+		rb_erase(&rbe_prev->node, &priv->root);
 	write_seqcount_end(&priv->count);
 	write_unlock_bh(&priv->lock);
 

From 421c119f558761556afca6a62ad183bc2d8659e0 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 24 Sep 2018 14:10:04 +0200
Subject: [PATCH 304/499] netfilter: avoid erronous array bounds warning

Unfortunately some versions of gcc emit following warning:
  $ make net/xfrm/xfrm_output.o
  linux/compiler.h:252:20: warning: array subscript is above array bounds [-Warray-bounds]
  hook_head = rcu_dereference(net->nf.hooks_arp[hook]);
                            ^~~~~~~~~~~~~~~~~~~~~
xfrm_output_resume passes skb_dst(skb)->ops->family as its 'pf' arg so compiler
can't know that we'll never access hooks_arp[].
(NFPROTO_IPV4 or NFPROTO_IPV6 are only possible cases).

Avoid this by adding an explicit WARN_ON_ONCE() check.

This patch has no effect if the family is a compile-time constant as gcc
will remove the switch() construct entirely.

Reported-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 07efffd0c759..bbe99d2b28b4 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -215,6 +215,8 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
 		break;
 	case NFPROTO_ARP:
 #ifdef CONFIG_NETFILTER_FAMILY_ARP
+		if (WARN_ON_ONCE(hook >= ARRAY_SIZE(net->nf.hooks_arp)))
+			break;
 		hook_head = rcu_dereference(net->nf.hooks_arp[hook]);
 #endif
 		break;

From 40e4f26e6a14fc1496eabb8b0004a547303114e6 Mon Sep 17 00:00:00 2001
From: Flavio Leitner <fbl@redhat.com>
Date: Thu, 27 Sep 2018 19:36:28 -0300
Subject: [PATCH 305/499] netfilter: xt_socket: check sk before checking for
 netns.

Only check for the network namespace if the socket is available.

Fixes: f564650106a6 ("netfilter: check if the socket netns is correct.")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Flavio Leitner <fbl@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_socket.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 0472f3472842..ada144e5645b 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -56,7 +56,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 	struct sk_buff *pskb = (struct sk_buff *)skb;
 	struct sock *sk = skb->sk;
 
-	if (!net_eq(xt_net(par), sock_net(sk)))
+	if (sk && !net_eq(xt_net(par), sock_net(sk)))
 		sk = NULL;
 
 	if (!sk)
@@ -117,7 +117,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
 	struct sk_buff *pskb = (struct sk_buff *)skb;
 	struct sock *sk = skb->sk;
 
-	if (!net_eq(xt_net(par), sock_net(sk)))
+	if (sk && !net_eq(xt_net(par), sock_net(sk)))
 		sk = NULL;
 
 	if (!sk)

From 4288ea006c73e37c2a4f60dfaef20dd167b8df31 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Fri, 28 Sep 2018 14:33:21 +0100
Subject: [PATCH 306/499] bpf: harden flags check in
 cgroup_storage_update_elem()

cgroup_storage_update_elem() shouldn't accept any flags
argument values except BPF_ANY and BPF_EXIST to guarantee
the backward compatibility, had a new flag value been added.

Fixes: de9cbbaadba5 ("bpf: introduce cgroup storage maps")
Signed-off-by: Roman Gushchin <guro@fb.com>
Reported-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/local_storage.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 22ad967d1e5f..94126cbffc88 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -129,7 +129,7 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
 	struct bpf_cgroup_storage *storage;
 	struct bpf_storage_buffer *new;
 
-	if (flags & BPF_NOEXIST)
+	if (flags != BPF_ANY && flags != BPF_EXIST)
 		return -EINVAL;
 
 	storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,

From 15c206887603a452f13fbfde2db0f8830d37028c Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 28 Sep 2018 09:40:17 -0600
Subject: [PATCH 307/499] Revert "xen/blkfront: When purging persistent grants,
 keep them in the buffer"

Fix didn't work for all cases, reverting to add a (hopefully)
better fix.

This reverts commit f151ba989d149bbdfc90e5405724bbea094f9b17.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/xen-blkfront.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 3b441fe69c0d..a71d817e900d 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2667,9 +2667,11 @@ static void purge_persistent_grants(struct blkfront_info *info)
 			    gnttab_query_foreign_access(gnt_list_entry->gref))
 				continue;
 
+			list_del(&gnt_list_entry->node);
 			gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL);
-			gnt_list_entry->gref = GRANT_INVALID_REF;
 			rinfo->persistent_gnts_c--;
+			__free_page(gnt_list_entry->page);
+			kfree(gnt_list_entry);
 		}
 
 		spin_unlock_irqrestore(&rinfo->ring_lock, flags);

From 6c7678674014b4552caf0e5aa0ca34078a377482 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 28 Sep 2018 09:28:27 +0200
Subject: [PATCH 308/499] xen/blkfront: correct purging of persistent grants

Commit a46b53672b2c2e3770b38a4abf90d16364d2584b ("xen/blkfront: cleanup
stale persistent grants") introduced a regression as purged persistent
grants were not pu into the list of free grants again. Correct that.

Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/xen-blkfront.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index a71d817e900d..429d20131c7e 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2670,8 +2670,8 @@ static void purge_persistent_grants(struct blkfront_info *info)
 			list_del(&gnt_list_entry->node);
 			gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL);
 			rinfo->persistent_gnts_c--;
-			__free_page(gnt_list_entry->page);
-			kfree(gnt_list_entry);
+			gnt_list_entry->gref = GRANT_INVALID_REF;
+			list_add_tail(&gnt_list_entry->node, &rinfo->grants);
 		}
 
 		spin_unlock_irqrestore(&rinfo->ring_lock, flags);

From 1b09d9c232cdaea59fb50ac437d3921ed1f1eafb Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Fri, 28 Sep 2018 14:20:40 +0200
Subject: [PATCH 309/499] mmc: slot-gpio: Fix debounce time to use miliseconds
 again

The debounce value passed to mmc_gpiod_request_cd() function is in
microseconds, but msecs_to_jiffies() requires the value to be in
miliseconds to properly calculate the delay, so adjust the value stored
in cd_debounce_delay_ms context entry.

Fixes: 1d71926bbd59 ("mmc: core: Fix debounce time to use microseconds")
Fixes: bfd694d5e21c ("mmc: core: Add tunable delay before detecting card
after card is inserted")
Cc: stable@vger.kernel.org # v4.18+
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/slot-gpio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c
index 2a833686784b..86803a3a04dc 100644
--- a/drivers/mmc/core/slot-gpio.c
+++ b/drivers/mmc/core/slot-gpio.c
@@ -271,7 +271,7 @@ int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id,
 	if (debounce) {
 		ret = gpiod_set_debounce(desc, debounce);
 		if (ret < 0)
-			ctx->cd_debounce_delay_ms = debounce;
+			ctx->cd_debounce_delay_ms = debounce / 1000;
 	}
 
 	if (gpio_invert)

From 065a2cdcbdf8eb9aefb66e1a24b2d684b8b8852b Mon Sep 17 00:00:00 2001
From: zhong jiang <zhongjiang@huawei.com>
Date: Wed, 26 Sep 2018 18:07:09 +0200
Subject: [PATCH 310/499] s390: qeth_core_mpc: Use ARRAY_SIZE instead of
 reimplementing its function

Use the common code ARRAY_SIZE macro instead of a private implementation.

Reviewed-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: zhong jiang <zhongjiang@huawei.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_mpc.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/s390/net/qeth_core_mpc.c b/drivers/s390/net/qeth_core_mpc.c
index 5bcb8dafc3ee..e8263ded0af0 100644
--- a/drivers/s390/net/qeth_core_mpc.c
+++ b/drivers/s390/net/qeth_core_mpc.c
@@ -222,8 +222,7 @@ static struct ipa_rc_msg qeth_ipa_rc_msg[] = {
 char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc)
 {
 	int x = 0;
-	qeth_ipa_rc_msg[sizeof(qeth_ipa_rc_msg) /
-			sizeof(struct ipa_rc_msg) - 1].rc = rc;
+	qeth_ipa_rc_msg[ARRAY_SIZE(qeth_ipa_rc_msg) - 1].rc = rc;
 	while (qeth_ipa_rc_msg[x].rc != rc)
 		x++;
 	return qeth_ipa_rc_msg[x].msg;
@@ -270,9 +269,7 @@ static struct ipa_cmd_names qeth_ipa_cmd_names[] = {
 char *qeth_get_ipa_cmd_name(enum qeth_ipa_cmds cmd)
 {
 	int x = 0;
-	qeth_ipa_cmd_names[
-		sizeof(qeth_ipa_cmd_names) /
-			sizeof(struct ipa_cmd_names)-1].cmd = cmd;
+	qeth_ipa_cmd_names[ARRAY_SIZE(qeth_ipa_cmd_names) - 1].cmd = cmd;
 	while (qeth_ipa_cmd_names[x].cmd != cmd)
 		x++;
 	return qeth_ipa_cmd_names[x].name;

From 048a7f8b4ec085d5c56ad4a3bf450389a4aed5f9 Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Wed, 26 Sep 2018 18:07:10 +0200
Subject: [PATCH 311/499] s390: qeth: Fix potential array overrun in cmd/rc
 lookup

Functions qeth_get_ipa_msg and qeth_get_ipa_cmd_name are modifying
the last member of global arrays without any locking that I can see.
If two instances of either function are running at the same time,
it could cause a race ultimately leading to an array overrun (the
contents of the last entry of the array is the only guarantee that
the loop will ever stop).

Performing the lookups without modifying the arrays is admittedly
slower (two comparisons per iteration instead of one) but these
are operations which are rare (should only be needed in error
cases or when debugging, not during successful operation) and it
seems still less costly than introducing a mutex to protect the
arrays in question.

As a side bonus, it allows us to declare both arrays as const data.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Cc: Julian Wiedmann <jwi@linux.ibm.com>
Cc: Ursula Braun <ubraun@linux.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c |  2 +-
 drivers/s390/net/qeth_core_mpc.c  | 30 ++++++++++++++++--------------
 drivers/s390/net/qeth_core_mpc.h  |  4 ++--
 3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index de8282420f96..ffce6f39828a 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -610,7 +610,7 @@ static void qeth_put_reply(struct qeth_reply *reply)
 static void qeth_issue_ipa_msg(struct qeth_ipa_cmd *cmd, int rc,
 		struct qeth_card *card)
 {
-	char *ipa_name;
+	const char *ipa_name;
 	int com = cmd->hdr.command;
 	ipa_name = qeth_get_ipa_cmd_name(com);
 	if (rc)
diff --git a/drivers/s390/net/qeth_core_mpc.c b/drivers/s390/net/qeth_core_mpc.c
index e8263ded0af0..e891c0b52f4c 100644
--- a/drivers/s390/net/qeth_core_mpc.c
+++ b/drivers/s390/net/qeth_core_mpc.c
@@ -148,10 +148,10 @@ EXPORT_SYMBOL_GPL(IPA_PDU_HEADER);
 
 struct ipa_rc_msg {
 	enum qeth_ipa_return_codes rc;
-	char *msg;
+	const char *msg;
 };
 
-static struct ipa_rc_msg qeth_ipa_rc_msg[] = {
+static const struct ipa_rc_msg qeth_ipa_rc_msg[] = {
 	{IPA_RC_SUCCESS,		"success"},
 	{IPA_RC_NOTSUPP,		"Command not supported"},
 	{IPA_RC_IP_TABLE_FULL,		"Add Addr IP Table Full - ipv6"},
@@ -219,22 +219,23 @@ static struct ipa_rc_msg qeth_ipa_rc_msg[] = {
 
 
 
-char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc)
+const char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc)
 {
-	int x = 0;
-	qeth_ipa_rc_msg[ARRAY_SIZE(qeth_ipa_rc_msg) - 1].rc = rc;
-	while (qeth_ipa_rc_msg[x].rc != rc)
-		x++;
+	int x;
+
+	for (x = 0; x < ARRAY_SIZE(qeth_ipa_rc_msg) - 1; x++)
+		if (qeth_ipa_rc_msg[x].rc == rc)
+			return qeth_ipa_rc_msg[x].msg;
 	return qeth_ipa_rc_msg[x].msg;
 }
 
 
 struct ipa_cmd_names {
 	enum qeth_ipa_cmds cmd;
-	char *name;
+	const char *name;
 };
 
-static struct ipa_cmd_names qeth_ipa_cmd_names[] = {
+static const struct ipa_cmd_names qeth_ipa_cmd_names[] = {
 	{IPA_CMD_STARTLAN,	"startlan"},
 	{IPA_CMD_STOPLAN,	"stoplan"},
 	{IPA_CMD_SETVMAC,	"setvmac"},
@@ -266,11 +267,12 @@ static struct ipa_cmd_names qeth_ipa_cmd_names[] = {
 	{IPA_CMD_UNKNOWN,	"unknown"},
 };
 
-char *qeth_get_ipa_cmd_name(enum qeth_ipa_cmds cmd)
+const char *qeth_get_ipa_cmd_name(enum qeth_ipa_cmds cmd)
 {
-	int x = 0;
-	qeth_ipa_cmd_names[ARRAY_SIZE(qeth_ipa_cmd_names) - 1].cmd = cmd;
-	while (qeth_ipa_cmd_names[x].cmd != cmd)
-		x++;
+	int x;
+
+	for (x = 0; x < ARRAY_SIZE(qeth_ipa_cmd_names) - 1; x++)
+		if (qeth_ipa_cmd_names[x].cmd == cmd)
+			return qeth_ipa_cmd_names[x].name;
 	return qeth_ipa_cmd_names[x].name;
 }
diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h
index aa8b9196b089..aa5de1fe01e1 100644
--- a/drivers/s390/net/qeth_core_mpc.h
+++ b/drivers/s390/net/qeth_core_mpc.h
@@ -797,8 +797,8 @@ enum qeth_ipa_arp_return_codes {
 	QETH_IPA_ARP_RC_Q_NO_DATA    = 0x0008,
 };
 
-extern char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc);
-extern char *qeth_get_ipa_cmd_name(enum qeth_ipa_cmds cmd);
+extern const char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc);
+extern const char *qeth_get_ipa_cmd_name(enum qeth_ipa_cmds cmd);
 
 #define QETH_SETASS_BASE_LEN (sizeof(struct qeth_ipacmd_hdr) + \
 			       sizeof(struct qeth_ipacmd_setassparms_hdr))

From ce7d17d6c607aa0d898f4712e75e27d319816b3b Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Thu, 27 Sep 2018 10:47:01 +0200
Subject: [PATCH 312/499] MAINTAINERS: change bridge maintainers

I haven't been doing reviews only but not active development on bridge
code for several years. Roopa and Nikolay have been doing most of
the new features and have agreed to take over as new co-maintainers.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
---
 MAINTAINERS | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 02a39617ec82..b0ca9e214ef0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5470,7 +5470,8 @@ S:	Odd Fixes
 F:	drivers/net/ethernet/agere/
 
 ETHERNET BRIDGE
-M:	Stephen Hemminger <stephen@networkplumber.org>
+M:	Roopa Prabhu <roopa@cumulusnetworks.com>
+M:	Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
 L:	bridge@lists.linux-foundation.org (moderated for non-subscribers)
 L:	netdev@vger.kernel.org
 W:	http://www.linuxfoundation.org/en/Net:Bridge

From cb973127a793c5ade8102aa4ab7bb5e4b1e64190 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Wed, 26 Sep 2018 21:57:03 -0700
Subject: [PATCH 313/499] Update maintainers for bnx2/bnx2x/qlge/qlcnic
 drivers.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Ameen Rahman <Ameen.Rahman@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index b0ca9e214ef0..dcb0191c4f54 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2956,7 +2956,6 @@ F:	include/linux/bcm963xx_tag.h
 
 BROADCOM BNX2 GIGABIT ETHERNET DRIVER
 M:	Rasesh Mody <rasesh.mody@cavium.com>
-M:	Harish Patil <harish.patil@cavium.com>
 M:	Dept-GELinuxNICDev@cavium.com
 L:	netdev@vger.kernel.org
 S:	Supported
@@ -2977,6 +2976,7 @@ F:	drivers/scsi/bnx2i/
 
 BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER
 M:	Ariel Elior <ariel.elior@cavium.com>
+M:	Sudarsana Kalluru <sudarsana.kalluru@cavium.com>
 M:	everest-linux-l2@cavium.com
 L:	netdev@vger.kernel.org
 S:	Supported
@@ -11974,7 +11974,7 @@ F:	Documentation/scsi/LICENSE.qla4xxx
 F:	drivers/scsi/qla4xxx/
 
 QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER
-M:	Harish Patil <harish.patil@cavium.com>
+M:	Shahed Shaikh <Shahed.Shaikh@cavium.com>
 M:	Manish Chopra <manish.chopra@cavium.com>
 M:	Dept-GELinuxNICDev@cavium.com
 L:	netdev@vger.kernel.org
@@ -11982,7 +11982,6 @@ S:	Supported
 F:	drivers/net/ethernet/qlogic/qlcnic/
 
 QLOGIC QLGE 10Gb ETHERNET DRIVER
-M:	Harish Patil <harish.patil@cavium.com>
 M:	Manish Chopra <manish.chopra@cavium.com>
 M:	Dept-GELinuxNICDev@cavium.com
 L:	netdev@vger.kernel.org

From 5f672090e44f4951084c5e1d6b0668a5fc422af8 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Thu, 27 Sep 2018 04:12:10 -0700
Subject: [PATCH 314/499] qed: Fix shmem structure inconsistency between driver
 and the mfw.

The structure shared between driver and the management FW (mfw) differ in
sizes. This would lead to issues when driver try to access the structure
members which are not-aligned with the mfw copy e.g., data_ptr usage in the
case of mfw_tlv request.
Align the driver structure with mfw copy, add reserved field(s) to driver
structure for the members not used by the driver.

Fixes: dd006921d67f ("qed: Add MFW interfaces for TLV request support.)
Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
---
 drivers/net/ethernet/qlogic/qed/qed_hsi.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 9b3ef00e5782..a71382687ef2 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -11987,6 +11987,7 @@ struct public_global {
 	u32 running_bundle_id;
 	s32 external_temperature;
 	u32 mdump_reason;
+	u64 reserved;
 	u32 data_ptr;
 	u32 data_size;
 };

From c24498c6827b71f80fecc9fb1b70a792053d41a9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 27 Sep 2018 09:31:51 -0700
Subject: [PATCH 315/499] netpoll: do not test NAPI_STATE_SCHED in
 poll_one_napi()

Since we do no longer require NAPI drivers to provide
an ndo_poll_controller(), napi_schedule() has not been done
before poll_one_napi() invocation.

So testing NAPI_STATE_SCHED is likely to cause early returns.

While we are at it, remove outdated comment.

Note to future bisections : This change might surface prior
bugs in drivers. See commit 73f21c653f93 ("bnxt_en: Fix TX
timeout during netpoll.") for one occurrence.

Fixes: ac3d9dd034e5 ("netpoll: make ndo_poll_controller() optional")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Tested-by: Song Liu <songliubraving@fb.com>
Cc: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 20 +-------------------
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 3219a2932463..3ae899805f8b 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -135,27 +135,9 @@ static void queue_process(struct work_struct *work)
 	}
 }
 
-/*
- * Check whether delayed processing was scheduled for our NIC. If so,
- * we attempt to grab the poll lock and use ->poll() to pump the card.
- * If this fails, either we've recursed in ->poll() or it's already
- * running on another CPU.
- *
- * Note: we don't mask interrupts with this lock because we're using
- * trylock here and interrupts are already disabled in the softirq
- * case. Further, we test the poll_owner to avoid recursion on UP
- * systems where the lock doesn't exist.
- */
 static void poll_one_napi(struct napi_struct *napi)
 {
-	int work = 0;
-
-	/* net_rx_action's ->poll() invocations and our's are
-	 * synchronized by this test which is only made while
-	 * holding the napi->poll_lock.
-	 */
-	if (!test_bit(NAPI_STATE_SCHED, &napi->state))
-		return;
+	int work;
 
 	/* If we set this bit but see that it has already been set,
 	 * that indicates that napi has been disabled and we need

From e71fb423e0dea3c9f98f0101e965426edfe849cd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 27 Sep 2018 09:31:52 -0700
Subject: [PATCH 316/499] hinic: remove ndo_poll_controller

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

hinic uses NAPI for TX completions, so we better let core
networking stack call the napi->poll() to avoid the capture.

Note that hinic_netpoll() was incorrectly scheduling NAPI
on both RX and TX queues.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Aviad Krawczyk <aviad.krawczyk@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/huawei/hinic/hinic_main.c    | 20 -------------------
 1 file changed, 20 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index 09e9da10b786..4a8f82938ed5 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -789,23 +789,6 @@ static void hinic_get_stats64(struct net_device *netdev,
 	stats->tx_errors  = nic_tx_stats->tx_dropped;
 }
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void hinic_netpoll(struct net_device *netdev)
-{
-	struct hinic_dev *nic_dev = netdev_priv(netdev);
-	int i, num_qps;
-
-	num_qps = hinic_hwdev_num_qps(nic_dev->hwdev);
-	for (i = 0; i < num_qps; i++) {
-		struct hinic_txq *txq = &nic_dev->txqs[i];
-		struct hinic_rxq *rxq = &nic_dev->rxqs[i];
-
-		napi_schedule(&txq->napi);
-		napi_schedule(&rxq->napi);
-	}
-}
-#endif
-
 static const struct net_device_ops hinic_netdev_ops = {
 	.ndo_open = hinic_open,
 	.ndo_stop = hinic_close,
@@ -818,9 +801,6 @@ static const struct net_device_ops hinic_netdev_ops = {
 	.ndo_start_xmit = hinic_xmit_frame,
 	.ndo_tx_timeout = hinic_tx_timeout,
 	.ndo_get_stats64 = hinic_get_stats64,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller = hinic_netpoll,
-#endif
 };
 
 static void netdev_features_init(struct net_device *netdev)

From 226a2dd62c5d789088fcf7804fbe5613887870a5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 27 Sep 2018 09:31:53 -0700
Subject: [PATCH 317/499] ehea: remove ndo_poll_controller

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

ehea uses NAPI for TX completions, so we better let core
networking stack call the napi->poll() to avoid the capture.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Douglas Miller <dougmill@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ehea/ehea_main.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index ba580bfae512..03f64f40b2a3 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -921,17 +921,6 @@ static int ehea_poll(struct napi_struct *napi, int budget)
 	return rx;
 }
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void ehea_netpoll(struct net_device *dev)
-{
-	struct ehea_port *port = netdev_priv(dev);
-	int i;
-
-	for (i = 0; i < port->num_def_qps; i++)
-		napi_schedule(&port->port_res[i].napi);
-}
-#endif
-
 static irqreturn_t ehea_recv_irq_handler(int irq, void *param)
 {
 	struct ehea_port_res *pr = param;
@@ -2953,9 +2942,6 @@ static const struct net_device_ops ehea_netdev_ops = {
 	.ndo_open		= ehea_open,
 	.ndo_stop		= ehea_stop,
 	.ndo_start_xmit		= ehea_start_xmit,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller	= ehea_netpoll,
-#endif
 	.ndo_get_stats64	= ehea_get_stats64,
 	.ndo_set_mac_address	= ehea_set_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,

From 4bd2c03be707253f1157bd759fdd6971e4f70403 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 27 Sep 2018 09:31:54 -0700
Subject: [PATCH 318/499] net: hns: remove ndo_poll_controller

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

hns uses NAPI for TX completions, so we better let core
networking stack call the napi->poll() to avoid the capture.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Yisen Zhuang <yisen.zhuang@huawei.com>
Cc: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_enet.c | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 5ce23d4b717e..28e907831b0e 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -1503,21 +1503,6 @@ static int hns_nic_do_ioctl(struct net_device *netdev, struct ifreq *ifr,
 	return phy_mii_ioctl(phy_dev, ifr, cmd);
 }
 
-/* use only for netconsole to poll with the device without interrupt */
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void hns_nic_poll_controller(struct net_device *ndev)
-{
-	struct hns_nic_priv *priv = netdev_priv(ndev);
-	unsigned long flags;
-	int i;
-
-	local_irq_save(flags);
-	for (i = 0; i < priv->ae_handle->q_num * 2; i++)
-		napi_schedule(&priv->ring_data[i].napi);
-	local_irq_restore(flags);
-}
-#endif
-
 static netdev_tx_t hns_nic_net_xmit(struct sk_buff *skb,
 				    struct net_device *ndev)
 {
@@ -1970,9 +1955,6 @@ static const struct net_device_ops hns_nic_netdev_ops = {
 	.ndo_set_features = hns_nic_set_features,
 	.ndo_fix_features = hns_nic_fix_features,
 	.ndo_get_stats64 = hns_nic_get_stats64,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller = hns_nic_poll_controller,
-#endif
 	.ndo_set_rx_mode = hns_nic_set_rx_mode,
 	.ndo_select_queue = hns_nic_select_queue,
 };

From 260dd2c3e2aeefbe78065f0737dceae1ceb1196a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 27 Sep 2018 09:31:55 -0700
Subject: [PATCH 319/499] virtio_net: remove ndo_poll_controller

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

virto_net uses NAPI for TX completions, so we better let core
networking stack call the napi->poll() to avoid the capture.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 765920905226..dab504ec5e50 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1699,17 +1699,6 @@ static void virtnet_stats(struct net_device *dev,
 	tot->rx_frame_errors = dev->stats.rx_frame_errors;
 }
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void virtnet_netpoll(struct net_device *dev)
-{
-	struct virtnet_info *vi = netdev_priv(dev);
-	int i;
-
-	for (i = 0; i < vi->curr_queue_pairs; i++)
-		napi_schedule(&vi->rq[i].napi);
-}
-#endif
-
 static void virtnet_ack_link_announce(struct virtnet_info *vi)
 {
 	rtnl_lock();
@@ -2447,9 +2436,6 @@ static const struct net_device_ops virtnet_netdev = {
 	.ndo_get_stats64     = virtnet_stats,
 	.ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller = virtnet_netpoll,
-#endif
 	.ndo_bpf		= virtnet_xdp,
 	.ndo_xdp_xmit		= virtnet_xdp_xmit,
 	.ndo_features_check	= passthru_features_check,

From 81b059b2187d77b957bf85318dbd4f36d60555e3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 27 Sep 2018 09:31:56 -0700
Subject: [PATCH 320/499] qlcnic: remove ndo_poll_controller

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

qlcnic uses NAPI for TX completions, so we better let core
networking stack call the napi->poll() to avoid the capture.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Harish Patil <harish.patil@cavium.com>
Cc: Manish Chopra <manish.chopra@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/qlogic/qlcnic/qlcnic_main.c  | 45 -------------------
 1 file changed, 45 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 2d38d1ac2aae..dbd48012224f 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -59,9 +59,6 @@ static int qlcnic_close(struct net_device *netdev);
 static void qlcnic_tx_timeout(struct net_device *netdev);
 static void qlcnic_attach_work(struct work_struct *work);
 static void qlcnic_fwinit_work(struct work_struct *work);
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void qlcnic_poll_controller(struct net_device *netdev);
-#endif
 
 static void qlcnic_idc_debug_info(struct qlcnic_adapter *adapter, u8 encoding);
 static int qlcnic_can_start_firmware(struct qlcnic_adapter *adapter);
@@ -545,9 +542,6 @@ static const struct net_device_ops qlcnic_netdev_ops = {
 	.ndo_udp_tunnel_add	= qlcnic_add_vxlan_port,
 	.ndo_udp_tunnel_del	= qlcnic_del_vxlan_port,
 	.ndo_features_check	= qlcnic_features_check,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller = qlcnic_poll_controller,
-#endif
 #ifdef CONFIG_QLCNIC_SRIOV
 	.ndo_set_vf_mac		= qlcnic_sriov_set_vf_mac,
 	.ndo_set_vf_rate	= qlcnic_sriov_set_vf_tx_rate,
@@ -3200,45 +3194,6 @@ static irqreturn_t qlcnic_msix_tx_intr(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void qlcnic_poll_controller(struct net_device *netdev)
-{
-	struct qlcnic_adapter *adapter = netdev_priv(netdev);
-	struct qlcnic_host_sds_ring *sds_ring;
-	struct qlcnic_recv_context *recv_ctx;
-	struct qlcnic_host_tx_ring *tx_ring;
-	int ring;
-
-	if (!test_bit(__QLCNIC_DEV_UP, &adapter->state))
-		return;
-
-	recv_ctx = adapter->recv_ctx;
-
-	for (ring = 0; ring < adapter->drv_sds_rings; ring++) {
-		sds_ring = &recv_ctx->sds_rings[ring];
-		qlcnic_disable_sds_intr(adapter, sds_ring);
-		napi_schedule(&sds_ring->napi);
-	}
-
-	if (adapter->flags & QLCNIC_MSIX_ENABLED) {
-		/* Only Multi-Tx queue capable devices need to
-		 * schedule NAPI for TX rings
-		 */
-		if ((qlcnic_83xx_check(adapter) &&
-		     (adapter->flags & QLCNIC_TX_INTR_SHARED)) ||
-		    (qlcnic_82xx_check(adapter) &&
-		     !qlcnic_check_multi_tx(adapter)))
-			return;
-
-		for (ring = 0; ring < adapter->drv_tx_rings; ring++) {
-			tx_ring = &adapter->tx_ring[ring];
-			qlcnic_disable_tx_intr(adapter, tx_ring);
-			napi_schedule(&tx_ring->napi);
-		}
-	}
-}
-#endif
-
 static void
 qlcnic_idc_debug_info(struct qlcnic_adapter *adapter, u8 encoding)
 {

From 3548fcf7d877c682c9a5a413c51929739192156d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 27 Sep 2018 09:31:57 -0700
Subject: [PATCH 321/499] qlogic: netxen: remove ndo_poll_controller

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

netxen uses NAPI for TX completions, so we better let core
networking stack call the napi->poll() to avoid the capture.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Manish Chopra <manish.chopra@cavium.com>
Cc: Rahul Verma <rahul.verma@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/qlogic/netxen/netxen_nic_main.c  | 23 -------------------
 1 file changed, 23 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 69aa7fc392c5..59c70be22a84 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -72,9 +72,6 @@ static void netxen_schedule_work(struct netxen_adapter *adapter,
 		work_func_t func, int delay);
 static void netxen_cancel_fw_work(struct netxen_adapter *adapter);
 static int netxen_nic_poll(struct napi_struct *napi, int budget);
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void netxen_nic_poll_controller(struct net_device *netdev);
-#endif
 
 static void netxen_create_sysfs_entries(struct netxen_adapter *adapter);
 static void netxen_remove_sysfs_entries(struct netxen_adapter *adapter);
@@ -581,9 +578,6 @@ static const struct net_device_ops netxen_netdev_ops = {
 	.ndo_tx_timeout	   = netxen_tx_timeout,
 	.ndo_fix_features = netxen_fix_features,
 	.ndo_set_features = netxen_set_features,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller = netxen_nic_poll_controller,
-#endif
 };
 
 static inline bool netxen_function_zero(struct pci_dev *pdev)
@@ -2402,23 +2396,6 @@ static int netxen_nic_poll(struct napi_struct *napi, int budget)
 	return work_done;
 }
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void netxen_nic_poll_controller(struct net_device *netdev)
-{
-	int ring;
-	struct nx_host_sds_ring *sds_ring;
-	struct netxen_adapter *adapter = netdev_priv(netdev);
-	struct netxen_recv_context *recv_ctx = &adapter->recv_ctx;
-
-	disable_irq(adapter->irq);
-	for (ring = 0; ring < adapter->max_sds_rings; ring++) {
-		sds_ring = &recv_ctx->sds_rings[ring];
-		netxen_intr(adapter->irq, sds_ring);
-	}
-	enable_irq(adapter->irq);
-}
-#endif
-
 static int
 nx_incr_dev_ref_cnt(struct netxen_adapter *adapter)
 {

From 21627982e4fff76a053f4d08d7fb56e532e08d52 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 27 Sep 2018 09:31:58 -0700
Subject: [PATCH 322/499] net: ena: remove ndo_poll_controller

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

ena uses NAPI for TX completions, so we better let core
networking stack call the napi->poll() to avoid the capture.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Netanel Belgazal <netanel@amazon.com>
Cc: Saeed Bishara <saeedb@amazon.com>
Cc: Zorik Machulsky <zorik@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 22 --------------------
 1 file changed, 22 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 29b5774dd32d..25621a218f20 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -2185,25 +2185,6 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void ena_netpoll(struct net_device *netdev)
-{
-	struct ena_adapter *adapter = netdev_priv(netdev);
-	int i;
-
-	/* Dont schedule NAPI if the driver is in the middle of reset
-	 * or netdev is down.
-	 */
-
-	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags) ||
-	    test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
-		return;
-
-	for (i = 0; i < adapter->num_queues; i++)
-		napi_schedule(&adapter->ena_napi[i].napi);
-}
-#endif /* CONFIG_NET_POLL_CONTROLLER */
-
 static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
 			    struct net_device *sb_dev,
 			    select_queue_fallback_t fallback)
@@ -2369,9 +2350,6 @@ static const struct net_device_ops ena_netdev_ops = {
 	.ndo_change_mtu		= ena_change_mtu,
 	.ndo_set_mac_address	= NULL,
 	.ndo_validate_addr	= eth_validate_addr,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller	= ena_netpoll,
-#endif /* CONFIG_NET_POLL_CONTROLLER */
 };
 
 static int ena_device_validate_params(struct ena_adapter *adapter,

From 9447a10ff607debe5e30cc438fb56925a559b9d9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 27 Sep 2018 09:31:59 -0700
Subject: [PATCH 323/499] sfc: remove ndo_poll_controller

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

sfc uses NAPI for TX completions, so we better let core
networking stack call the napi->poll() to avoid the capture.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Edward Cree <ecree@solarflare.com>
Cc: Bert Kenward <bkenward@solarflare.com>
Cc: Solarflare linux maintainers <linux-net-drivers@solarflare.com>
Acked-By: Bert Kenward <bkenward@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/efx.c | 26 --------------------------
 1 file changed, 26 deletions(-)

diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 330233286e78..3d0dd39c289e 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -2206,29 +2206,6 @@ static void efx_fini_napi(struct efx_nic *efx)
 		efx_fini_napi_channel(channel);
 }
 
-/**************************************************************************
- *
- * Kernel netpoll interface
- *
- *************************************************************************/
-
-#ifdef CONFIG_NET_POLL_CONTROLLER
-
-/* Although in the common case interrupts will be disabled, this is not
- * guaranteed. However, all our work happens inside the NAPI callback,
- * so no locking is required.
- */
-static void efx_netpoll(struct net_device *net_dev)
-{
-	struct efx_nic *efx = netdev_priv(net_dev);
-	struct efx_channel *channel;
-
-	efx_for_each_channel(channel, efx)
-		efx_schedule_channel(channel);
-}
-
-#endif
-
 /**************************************************************************
  *
  * Kernel net device interface
@@ -2509,9 +2486,6 @@ static const struct net_device_ops efx_netdev_ops = {
 #endif
 	.ndo_get_phys_port_id   = efx_get_phys_port_id,
 	.ndo_get_phys_port_name	= efx_get_phys_port_name,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller = efx_netpoll,
-#endif
 	.ndo_setup_tc		= efx_setup_tc,
 #ifdef CONFIG_RFS_ACCEL
 	.ndo_rx_flow_steer	= efx_filter_rfs,

From a4f570be654de779eaf626a79c4e0aa5a790505f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 27 Sep 2018 09:32:00 -0700
Subject: [PATCH 324/499] sfc-falcon: remove ndo_poll_controller

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

sfc-falcon uses NAPI for TX completions, so we better let core
networking stack call the napi->poll() to avoid the capture.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Solarflare linux maintainers <linux-net-drivers@solarflare.com>
Cc: Edward Cree <ecree@solarflare.com>
Cc: Bert Kenward <bkenward@solarflare.com>
Acked-By: Bert Kenward <bkenward@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/falcon/efx.c | 26 --------------------------
 1 file changed, 26 deletions(-)

diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c
index dd5530a4f8c8..03e2455c502e 100644
--- a/drivers/net/ethernet/sfc/falcon/efx.c
+++ b/drivers/net/ethernet/sfc/falcon/efx.c
@@ -2052,29 +2052,6 @@ static void ef4_fini_napi(struct ef4_nic *efx)
 		ef4_fini_napi_channel(channel);
 }
 
-/**************************************************************************
- *
- * Kernel netpoll interface
- *
- *************************************************************************/
-
-#ifdef CONFIG_NET_POLL_CONTROLLER
-
-/* Although in the common case interrupts will be disabled, this is not
- * guaranteed. However, all our work happens inside the NAPI callback,
- * so no locking is required.
- */
-static void ef4_netpoll(struct net_device *net_dev)
-{
-	struct ef4_nic *efx = netdev_priv(net_dev);
-	struct ef4_channel *channel;
-
-	ef4_for_each_channel(channel, efx)
-		ef4_schedule_channel(channel);
-}
-
-#endif
-
 /**************************************************************************
  *
  * Kernel net device interface
@@ -2250,9 +2227,6 @@ static const struct net_device_ops ef4_netdev_ops = {
 	.ndo_set_mac_address	= ef4_set_mac_address,
 	.ndo_set_rx_mode	= ef4_set_rx_mode,
 	.ndo_set_features	= ef4_set_features,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller = ef4_netpoll,
-#endif
 	.ndo_setup_tc		= ef4_setup_tc,
 #ifdef CONFIG_RFS_ACCEL
 	.ndo_rx_flow_steer	= ef4_filter_rfs,

From 0c3b9d1b37df16ae6046a5a01f769bf3d21b838c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 27 Sep 2018 09:32:01 -0700
Subject: [PATCH 325/499] ibmvnic: remove ndo_poll_controller

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

ibmvnic uses NAPI for TX completions, so we better let core
networking stack call the napi->poll() to avoid the capture.

ibmvnic_netpoll_controller() was completely wrong anyway,
as it was scheduling NAPI to service RX queues (instead of TX),
so I doubt netpoll ever worked on this driver.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Cc: John Allen <jallen@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 4f0daf67b18d..699ef942b615 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2207,19 +2207,6 @@ static int ibmvnic_poll(struct napi_struct *napi, int budget)
 	return frames_processed;
 }
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void ibmvnic_netpoll_controller(struct net_device *dev)
-{
-	struct ibmvnic_adapter *adapter = netdev_priv(dev);
-	int i;
-
-	replenish_pools(netdev_priv(dev));
-	for (i = 0; i < adapter->req_rx_queues; i++)
-		ibmvnic_interrupt_rx(adapter->rx_scrq[i]->irq,
-				     adapter->rx_scrq[i]);
-}
-#endif
-
 static int wait_for_reset(struct ibmvnic_adapter *adapter)
 {
 	int rc, ret;
@@ -2292,9 +2279,6 @@ static const struct net_device_ops ibmvnic_netdev_ops = {
 	.ndo_set_mac_address	= ibmvnic_set_mac,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_tx_timeout		= ibmvnic_tx_timeout,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller	= ibmvnic_netpoll_controller,
-#endif
 	.ndo_change_mtu		= ibmvnic_change_mtu,
 	.ndo_features_check     = ibmvnic_features_check,
 };

From befb1b3c2703897c5b8ffb0044dc5d0e5f27c5d7 Mon Sep 17 00:00:00 2001
From: Reinette Chatre <reinette.chatre@intel.com>
Date: Wed, 19 Sep 2018 10:29:06 -0700
Subject: [PATCH 326/499] perf/core: Add sanity check to deal with pinned event
 failure

It is possible that a failure can occur during the scheduling of a
pinned event. The initial portion of perf_event_read_local() contains
the various error checks an event should pass before it can be
considered valid. Ensure that the potential scheduling failure
of a pinned event is checked for and have a credible error.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: fenghua.yu@intel.com
Cc: tony.luck@intel.com
Cc: acme@kernel.org
Cc: gavin.hindman@intel.com
Cc: jithu.joseph@intel.com
Cc: dave.hansen@intel.com
Cc: hpa@zytor.com
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/6486385d1f30336e9973b24c8c65f5079543d3d3.1537377064.git.reinette.chatre@intel.com
---
 kernel/events/core.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index c80549bf82c6..dcb093e7b377 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3935,6 +3935,12 @@ int perf_event_read_local(struct perf_event *event, u64 *value,
 		goto out;
 	}
 
+	/* If this is a pinned event it must be running on this CPU */
+	if (event->attr.pinned && event->oncpu != smp_processor_id()) {
+		ret = -EBUSY;
+		goto out;
+	}
+
 	/*
 	 * If the event is currently on this CPU, its either a per-task event,
 	 * or local to this CPU. Furthermore it means its ACTIVE (otherwise

From 573bcd380921b5216b62dcd072ec426f5ecbeb9d Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 28 Sep 2018 17:34:21 -0500
Subject: [PATCH 327/499] MAINTAINERS: Remove obsolete drivers/pci pattern from
 ACPI section

Prior to 256a45937093 ("PCI/AER: Squash aerdrv_acpi.c into aerdrv.c"),
drivers/pci/pcie/aer/aerdrv_acpi.c contained code to parse the ACPI HEST
table.  That code now lives in drivers/pci/pcie/aer.c.

Remove the "F: drivers/pci/*/*/*acpi*" pattern because it matches nothing.

We could add a "F: drivers/pci/pcie/aer.c" pattern to the ACPI APEI
section, but that file sees a lot of changes, almost none of which are of
interest to the ACPI folks.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 700408b7bc53..9babd8a0406b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -324,7 +324,6 @@ F:	Documentation/ABI/testing/sysfs-bus-acpi
 F:	Documentation/ABI/testing/configfs-acpi
 F:	drivers/pci/*acpi*
 F:	drivers/pci/*/*acpi*
-F:	drivers/pci/*/*/*acpi*
 F:	tools/power/acpi/
 
 ACPI APEI

From ae29478766f4c8e16edca6fe1e25d73c47991ebe Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Sat, 29 Sep 2018 13:40:40 +1000
Subject: [PATCH 328/499] xfs: don't crash the vfs on a garbage inline symlink

The VFS routine that calls ->get_link blindly copies whatever's returned
into the user's buffer.  If we return a NULL pointer, the vfs will
crash on the null pointer.  Therefore, return -EFSCORRUPTED instead of
blowing up the kernel.

[dgc: clean up with hch's suggestions]

Reported-by: wen.xu@gatech.edu
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/xfs_iops.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index c3e74f9128e8..f48ffd7a8d3e 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -471,8 +471,18 @@ xfs_vn_get_link_inline(
 	struct inode		*inode,
 	struct delayed_call	*done)
 {
+	char			*link;
+
 	ASSERT(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE);
-	return XFS_I(inode)->i_df.if_u1.if_data;
+
+	/*
+	 * The VFS crashes on a NULL pointer, so return -EFSCORRUPTED if
+	 * if_data is junk.
+	 */
+	link = XFS_I(inode)->i_df.if_u1.if_data;
+	if (!link)
+		return ERR_PTR(-EFSCORRUPTED);
+	return link;
 }
 
 STATIC int

From d5a2e2893da0d62c3888c91ae2da798adc17a9b9 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Sat, 29 Sep 2018 13:41:58 +1000
Subject: [PATCH 329/499] xfs: remove last of unnecessary xfs_defer_cancel()
 callers

Now that deferred operations are completely managed via
transactions, it's no longer necessary to cancel the dfops in error
paths that already cancel the associated transaction. There are a
few such calls lingering throughout the codebase.

Remove all remaining unnecessary calls to xfs_defer_cancel(). This
leaves xfs_defer_cancel() calls in two places. The first is the call
in the transaction cancel path itself, which facilitates this patch.
The second is made via the xfs_defer_finish() error path to provide
consistent error semantics with transaction commit. For example,
xfs_trans_commit() expects an xfs_defer_finish() failure to clean up
the dfops structure before it returns.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/libxfs/xfs_attr.c        | 28 ++++++++--------------------
 fs/xfs/libxfs/xfs_attr_remote.c | 10 ++--------
 fs/xfs/xfs_bmap_util.c          | 12 +++++-------
 fs/xfs/xfs_inode.c              | 10 +---------
 4 files changed, 16 insertions(+), 44 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 1e671d4eb6fa..c6299f82a6e4 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -587,7 +587,7 @@ xfs_attr_leaf_addname(
 		 */
 		error = xfs_attr3_leaf_to_node(args);
 		if (error)
-			goto out_defer_cancel;
+			return error;
 		error = xfs_defer_finish(&args->trans);
 		if (error)
 			return error;
@@ -675,7 +675,7 @@ xfs_attr_leaf_addname(
 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
 			/* bp is gone due to xfs_da_shrink_inode */
 			if (error)
-				goto out_defer_cancel;
+				return error;
 			error = xfs_defer_finish(&args->trans);
 			if (error)
 				return error;
@@ -693,9 +693,6 @@ xfs_attr_leaf_addname(
 		error = xfs_attr3_leaf_clearflag(args);
 	}
 	return error;
-out_defer_cancel:
-	xfs_defer_cancel(args->trans);
-	return error;
 }
 
 /*
@@ -738,15 +735,12 @@ xfs_attr_leaf_removename(
 		error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
 		/* bp is gone due to xfs_da_shrink_inode */
 		if (error)
-			goto out_defer_cancel;
+			return error;
 		error = xfs_defer_finish(&args->trans);
 		if (error)
 			return error;
 	}
 	return 0;
-out_defer_cancel:
-	xfs_defer_cancel(args->trans);
-	return error;
 }
 
 /*
@@ -864,7 +858,7 @@ xfs_attr_node_addname(
 			state = NULL;
 			error = xfs_attr3_leaf_to_node(args);
 			if (error)
-				goto out_defer_cancel;
+				goto out;
 			error = xfs_defer_finish(&args->trans);
 			if (error)
 				goto out;
@@ -888,7 +882,7 @@ xfs_attr_node_addname(
 		 */
 		error = xfs_da3_split(state);
 		if (error)
-			goto out_defer_cancel;
+			goto out;
 		error = xfs_defer_finish(&args->trans);
 		if (error)
 			goto out;
@@ -984,7 +978,7 @@ xfs_attr_node_addname(
 		if (retval && (state->path.active > 1)) {
 			error = xfs_da3_join(state);
 			if (error)
-				goto out_defer_cancel;
+				goto out;
 			error = xfs_defer_finish(&args->trans);
 			if (error)
 				goto out;
@@ -1013,9 +1007,6 @@ xfs_attr_node_addname(
 	if (error)
 		return error;
 	return retval;
-out_defer_cancel:
-	xfs_defer_cancel(args->trans);
-	goto out;
 }
 
 /*
@@ -1107,7 +1098,7 @@ xfs_attr_node_removename(
 	if (retval && (state->path.active > 1)) {
 		error = xfs_da3_join(state);
 		if (error)
-			goto out_defer_cancel;
+			goto out;
 		error = xfs_defer_finish(&args->trans);
 		if (error)
 			goto out;
@@ -1138,7 +1129,7 @@ xfs_attr_node_removename(
 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
 			/* bp is gone due to xfs_da_shrink_inode */
 			if (error)
-				goto out_defer_cancel;
+				goto out;
 			error = xfs_defer_finish(&args->trans);
 			if (error)
 				goto out;
@@ -1150,9 +1141,6 @@ xfs_attr_node_removename(
 out:
 	xfs_da_state_free(state);
 	return error;
-out_defer_cancel:
-	xfs_defer_cancel(args->trans);
-	goto out;
 }
 
 /*
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index af094063e402..d89363c6b523 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -485,7 +485,7 @@ xfs_attr_rmtval_set(
 				  blkcnt, XFS_BMAPI_ATTRFORK, args->total, &map,
 				  &nmap);
 		if (error)
-			goto out_defer_cancel;
+			return error;
 		error = xfs_defer_finish(&args->trans);
 		if (error)
 			return error;
@@ -553,9 +553,6 @@ xfs_attr_rmtval_set(
 	}
 	ASSERT(valuelen == 0);
 	return 0;
-out_defer_cancel:
-	xfs_defer_cancel(args->trans);
-	return error;
 }
 
 /*
@@ -625,7 +622,7 @@ xfs_attr_rmtval_remove(
 		error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
 				    XFS_BMAPI_ATTRFORK, 1, &done);
 		if (error)
-			goto out_defer_cancel;
+			return error;
 		error = xfs_defer_finish(&args->trans);
 		if (error)
 			return error;
@@ -638,7 +635,4 @@ xfs_attr_rmtval_remove(
 			return error;
 	}
 	return 0;
-out_defer_cancel:
-	xfs_defer_cancel(args->trans);
-	return error;
 }
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index addbd74ecd8e..ae3cc393724f 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1584,7 +1584,7 @@ xfs_swap_extent_rmap(
 					tirec.br_blockcount, &irec,
 					&nimaps, 0);
 			if (error)
-				goto out_defer;
+				goto out;
 			ASSERT(nimaps == 1);
 			ASSERT(tirec.br_startoff == irec.br_startoff);
 			trace_xfs_swap_extent_rmap_remap_piece(ip, &irec);
@@ -1599,22 +1599,22 @@ xfs_swap_extent_rmap(
 			/* Remove the mapping from the donor file. */
 			error = xfs_bmap_unmap_extent(tp, tip, &uirec);
 			if (error)
-				goto out_defer;
+				goto out;
 
 			/* Remove the mapping from the source file. */
 			error = xfs_bmap_unmap_extent(tp, ip, &irec);
 			if (error)
-				goto out_defer;
+				goto out;
 
 			/* Map the donor file's blocks into the source file. */
 			error = xfs_bmap_map_extent(tp, ip, &uirec);
 			if (error)
-				goto out_defer;
+				goto out;
 
 			/* Map the source file's blocks into the donor file. */
 			error = xfs_bmap_map_extent(tp, tip, &irec);
 			if (error)
-				goto out_defer;
+				goto out;
 
 			error = xfs_defer_finish(tpp);
 			tp = *tpp;
@@ -1636,8 +1636,6 @@ xfs_swap_extent_rmap(
 	tip->i_d.di_flags2 = tip_flags2;
 	return 0;
 
-out_defer:
-	xfs_defer_cancel(tp);
 out:
 	trace_xfs_swap_extent_rmap_error(ip, error, _RET_IP_);
 	tip->i_d.di_flags2 = tip_flags2;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d957a46dc1cb..05db9540e459 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1563,7 +1563,7 @@ xfs_itruncate_extents_flags(
 		error = xfs_bunmapi(tp, ip, first_unmap_block, unmap_len, flags,
 				    XFS_ITRUNC_MAX_EXTENTS, &done);
 		if (error)
-			goto out_bmap_cancel;
+			goto out;
 
 		/*
 		 * Duplicate the transaction that has the permanent
@@ -1599,14 +1599,6 @@ xfs_itruncate_extents_flags(
 out:
 	*tpp = tp;
 	return error;
-out_bmap_cancel:
-	/*
-	 * If the bunmapi call encounters an error, return to the caller where
-	 * the transaction can be properly aborted.  We just need to make sure
-	 * we're not holding any resources that we were not when we came in.
-	 */
-	xfs_defer_cancel(tp);
-	goto out;
 }
 
 int

From d9183105caa926522a4bc8a40e162de7019f1a21 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Sat, 29 Sep 2018 13:44:40 +1000
Subject: [PATCH 330/499] xfs: don't unlock invalidated buf on aborted tx
 commit

xfstests generic/388,475 occasionally reproduce assertion failures
in xfs_buf_item_unpin() when the final bli reference is dropped on
an invalidated buffer and the buffer is not locked as it is expected
to be. Invalidated buffers should remain locked on transaction
commit until the final unpin, at which point the buffer is removed
from the AIL and the bli is freed since stale buffers are not
written back.

The assert failures are associated with filesystem shutdown,
typically due to log I/O errors injected by the test. The
problematic situation can occur if the shutdown happens to cause a
race between an active transaction that has invalidated a particular
buffer and an I/O error on a log buffer that contains the bli
associated with the same (now stale) buffer.

Both transaction and log contexts acquire a bli reference. If the
transaction has already invalidated the buffer by the time the I/O
error occurs and ends up aborting due to shutdown, the transaction
and log hold the last two references to a stale bli. If the
transaction cancel occurs first, it treats the buffer as non-stale
due to the aborted state: the bli reference is dropped and the
buffer is released/unlocked. The log buffer I/O error handling
eventually calls into xfs_buf_item_unpin(), drops the final
reference to the bli and treats it as stale. The buffer wasn't left
locked by xfs_buf_item_unlock(), however, so the assert fails and
the buffer is double unlocked. The latter problem is mitigated by
the fact that the fs is shutdown and no further damage is possible.

->iop_unlock() of an invalidated buffer should behave consistently
with respect to the bli refcount, regardless of aborted state. If
the refcount remains elevated on commit, we know the bli is awaiting
an unpin (since it can't be in another transaction) and will be
handled appropriately on log buffer completion. If the final bli
reference of an invalidated buffer is dropped in ->iop_unlock(), we
can assume the transaction has aborted because invalidation implies
a dirty transaction. In the non-abort case, the log would have
acquired a bli reference in ->iop_pin() and prevented bli release at
->iop_unlock() time. In the abort case the item must be freed and
buffer unlocked because it wasn't pinned by the log.

Rework xfs_buf_item_unlock() to simplify the currently circuitous
and duplicate logic and leave invalidated buffers locked based on
bli refcount, regardless of aborted state. This ensures that a
pinned, stale buffer is always found locked when eventually
unpinned.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/xfs_buf_item.c | 91 ++++++++++++++++++++-----------------------
 fs/xfs/xfs_trace.h    |  1 -
 2 files changed, 42 insertions(+), 50 deletions(-)

diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 1c9d1398980b..42fce70b474d 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -556,73 +556,66 @@ xfs_buf_item_unlock(
 {
 	struct xfs_buf_log_item	*bip = BUF_ITEM(lip);
 	struct xfs_buf		*bp = bip->bli_buf;
+	bool			freed;
 	bool			aborted;
-	bool			hold = !!(bip->bli_flags & XFS_BLI_HOLD);
-	bool			dirty = !!(bip->bli_flags & XFS_BLI_DIRTY);
+	bool			hold = bip->bli_flags & XFS_BLI_HOLD;
+	bool			dirty = bip->bli_flags & XFS_BLI_DIRTY;
+	bool			stale = bip->bli_flags & XFS_BLI_STALE;
 #if defined(DEBUG) || defined(XFS_WARN)
-	bool			ordered = !!(bip->bli_flags & XFS_BLI_ORDERED);
+	bool			ordered = bip->bli_flags & XFS_BLI_ORDERED;
 #endif
 
-	aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags);
-
-	/* Clear the buffer's association with this transaction. */
-	bp->b_transp = NULL;
-
-	/*
-	 * The per-transaction state has been copied above so clear it from the
-	 * bli.
-	 */
-	bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED);
-
-	/*
-	 * If the buf item is marked stale, then don't do anything.  We'll
-	 * unlock the buffer and free the buf item when the buffer is unpinned
-	 * for the last time.
-	 */
-	if (bip->bli_flags & XFS_BLI_STALE) {
-		trace_xfs_buf_item_unlock_stale(bip);
-		ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
-		if (!aborted) {
-			atomic_dec(&bip->bli_refcount);
-			return;
-		}
-	}
-
 	trace_xfs_buf_item_unlock(bip);
 
 	/*
-	 * If the buf item isn't tracking any data, free it, otherwise drop the
-	 * reference we hold to it. If we are aborting the transaction, this may
-	 * be the only reference to the buf item, so we free it anyway
-	 * regardless of whether it is dirty or not. A dirty abort implies a
-	 * shutdown, anyway.
-	 *
 	 * The bli dirty state should match whether the blf has logged segments
 	 * except for ordered buffers, where only the bli should be dirty.
 	 */
 	ASSERT((!ordered && dirty == xfs_buf_item_dirty_format(bip)) ||
 	       (ordered && dirty && !xfs_buf_item_dirty_format(bip)));
+	ASSERT(!stale || (bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
+
+	aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags);
 
 	/*
-	 * Clean buffers, by definition, cannot be in the AIL. However, aborted
-	 * buffers may be in the AIL regardless of dirty state. An aborted
-	 * transaction that invalidates a buffer already in the AIL may have
-	 * marked it stale and cleared the dirty state, for example.
-	 *
-	 * Therefore if we are aborting a buffer and we've just taken the last
-	 * reference away, we have to check if it is in the AIL before freeing
-	 * it. We need to free it in this case, because an aborted transaction
-	 * has already shut the filesystem down and this is the last chance we
-	 * will have to do so.
+	 * Clear the buffer's association with this transaction and
+	 * per-transaction state from the bli, which has been copied above.
 	 */
-	if (atomic_dec_and_test(&bip->bli_refcount)) {
-		if (aborted) {
-			ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
+	bp->b_transp = NULL;
+	bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED);
+
+	/*
+	 * Drop the transaction's bli reference and deal with the item if we had
+	 * the last one. We must free the item if clean or aborted since it
+	 * wasn't pinned by the log and this is the last chance to do so. If the
+	 * bli is freed and dirty (but non-aborted), the buffer was not dirty in
+	 * this transaction but modified by a previous one and still awaiting
+	 * writeback. In that case, the bli is freed on buffer writeback
+	 * completion.
+	 */
+	freed = atomic_dec_and_test(&bip->bli_refcount);
+	if (freed) {
+		ASSERT(!aborted || XFS_FORCED_SHUTDOWN(lip->li_mountp));
+		/*
+		 * An aborted item may be in the AIL regardless of dirty state.
+		 * For example, consider an aborted transaction that invalidated
+		 * a dirty bli and cleared the dirty state.
+		 */
+		if (aborted)
 			xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR);
+		if (aborted || !dirty)
 			xfs_buf_item_relse(bp);
-		} else if (!dirty)
-			xfs_buf_item_relse(bp);
+	} else if (stale) {
+		/*
+		 * Stale buffers remain locked until final unpin unless the bli
+		 * was freed in the branch above. A freed stale bli implies an
+		 * abort because buffer invalidation dirties the bli and
+		 * transaction.
+		 */
+		ASSERT(!freed);
+		return;
 	}
+	ASSERT(!stale || (aborted && freed));
 
 	if (!hold)
 		xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index ad315e83bc02..3043e5ed6495 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -473,7 +473,6 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin);
 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale);
 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale);
 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
 DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);

From 23420d05e67d23728e116321c4afe084ebfa6427 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Sat, 29 Sep 2018 13:45:02 +1000
Subject: [PATCH 331/499] xfs: clean up xfs_trans_brelse()

xfs_trans_brelse() is a bit of a historical mess, similar to
xfs_buf_item_unlock(). It is unnecessarily verbose, has snippets of
commented out code, inconsistency with regard to stale items, etc.

Clean up xfs_trans_brelse() to use similar logic and flow as
xfs_buf_item_unlock() with regard to bli reference count handling.
This patch makes no functional changes, but facilitates further
refactoring of the common bli reference count handling code.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/xfs_trans_buf.c | 112 +++++++++++++++--------------------------
 1 file changed, 40 insertions(+), 72 deletions(-)

diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 15919f67a88f..7498f87ceed3 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -322,49 +322,40 @@ xfs_trans_read_buf_map(
 }
 
 /*
- * Release the buffer bp which was previously acquired with one of the
- * xfs_trans_... buffer allocation routines if the buffer has not
- * been modified within this transaction.  If the buffer is modified
- * within this transaction, do decrement the recursion count but do
- * not release the buffer even if the count goes to 0.  If the buffer is not
- * modified within the transaction, decrement the recursion count and
- * release the buffer if the recursion count goes to 0.
+ * Release a buffer previously joined to the transaction. If the buffer is
+ * modified within this transaction, decrement the recursion count but do not
+ * release the buffer even if the count goes to 0. If the buffer is not modified
+ * within the transaction, decrement the recursion count and release the buffer
+ * if the recursion count goes to 0.
  *
- * If the buffer is to be released and it was not modified before
- * this transaction began, then free the buf_log_item associated with it.
+ * If the buffer is to be released and it was not already dirty before this
+ * transaction began, then also free the buf_log_item associated with it.
  *
- * If the transaction pointer is NULL, make this just a normal
- * brelse() call.
+ * If the transaction pointer is NULL, this is a normal xfs_buf_relse() call.
  */
 void
 xfs_trans_brelse(
-	xfs_trans_t		*tp,
-	xfs_buf_t		*bp)
+	struct xfs_trans	*tp,
+	struct xfs_buf		*bp)
 {
-	struct xfs_buf_log_item	*bip;
-	int			freed;
+	struct xfs_buf_log_item	*bip = bp->b_log_item;
+	bool			freed;
+	bool			dirty;
 
-	/*
-	 * Default to a normal brelse() call if the tp is NULL.
-	 */
-	if (tp == NULL) {
-		ASSERT(bp->b_transp == NULL);
+	ASSERT(bp->b_transp == tp);
+
+	if (!tp) {
 		xfs_buf_relse(bp);
 		return;
 	}
 
-	ASSERT(bp->b_transp == tp);
-	bip = bp->b_log_item;
+	trace_xfs_trans_brelse(bip);
 	ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
-	ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
-	ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
 
-	trace_xfs_trans_brelse(bip);
-
 	/*
-	 * If the release is just for a recursive lock,
-	 * then decrement the count and return.
+	 * If the release is for a recursive lookup, then decrement the count
+	 * and return.
 	 */
 	if (bip->bli_recur > 0) {
 		bip->bli_recur--;
@@ -372,63 +363,40 @@ xfs_trans_brelse(
 	}
 
 	/*
-	 * If the buffer is dirty within this transaction, we can't
+	 * If the buffer is invalidated or dirty in this transaction, we can't
 	 * release it until we commit.
 	 */
 	if (test_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags))
 		return;
-
-	/*
-	 * If the buffer has been invalidated, then we can't release
-	 * it until the transaction commits to disk unless it is re-dirtied
-	 * as part of this transaction.  This prevents us from pulling
-	 * the item from the AIL before we should.
-	 */
 	if (bip->bli_flags & XFS_BLI_STALE)
 		return;
 
+	/*
+	 * Unlink the log item from the transaction and clear the hold flag, if
+	 * set. We wouldn't want the next user of the buffer to get confused.
+	 */
 	ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
-
-	/*
-	 * Free up the log item descriptor tracking the released item.
-	 */
 	xfs_trans_del_item(&bip->bli_item);
+	bip->bli_flags &= ~XFS_BLI_HOLD;
 
 	/*
-	 * Clear the hold flag in the buf log item if it is set.
-	 * We wouldn't want the next user of the buffer to
-	 * get confused.
-	 */
-	if (bip->bli_flags & XFS_BLI_HOLD) {
-		bip->bli_flags &= ~XFS_BLI_HOLD;
-	}
-
-	/*
-	 * Drop our reference to the buf log item.
+	 * Drop the reference to the bli. At this point, the bli must be either
+	 * freed or dirty (or both). If freed, there are a couple cases where we
+	 * are responsible to free the item. If the bli is clean, we're the last
+	 * user of it. If the fs has shut down, the bli may be dirty and AIL
+	 * resident, but won't ever be written back. We therefore may also need
+	 * to remove it from the AIL before freeing it.
 	 */
 	freed = atomic_dec_and_test(&bip->bli_refcount);
-
-	/*
-	 * If the buf item is not tracking data in the log, then we must free it
-	 * before releasing the buffer back to the free pool.
-	 *
-	 * If the fs has shutdown and we dropped the last reference, it may fall
-	 * on us to release a (possibly dirty) bli if it never made it to the
-	 * AIL (e.g., the aborted unpin already happened and didn't release it
-	 * due to our reference). Since we're already shutdown and need
-	 * ail_lock, just force remove from the AIL and release the bli here.
-	 */
-	if (XFS_FORCED_SHUTDOWN(tp->t_mountp) && freed) {
-		xfs_trans_ail_remove(&bip->bli_item, SHUTDOWN_LOG_IO_ERROR);
-		xfs_buf_item_relse(bp);
-	} else if (!(bip->bli_flags & XFS_BLI_DIRTY)) {
-/***
-		ASSERT(bp->b_pincount == 0);
-***/
-		ASSERT(atomic_read(&bip->bli_refcount) == 0);
-		ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
-		ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF));
-		xfs_buf_item_relse(bp);
+	dirty = bip->bli_flags & XFS_BLI_DIRTY;
+	ASSERT(freed || dirty);
+	if (freed) {
+		bool abort = XFS_FORCED_SHUTDOWN(tp->t_mountp);
+		ASSERT(abort || !test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
+		if (abort)
+			xfs_trans_ail_remove(&bip->bli_item, SHUTDOWN_LOG_IO_ERROR);
+		if (!dirty || abort)
+			xfs_buf_item_relse(bp);
 	}
 
 	bp->b_transp = NULL;

From 95808459b110f16b50f03a70ecfa72bb14bd8a96 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Sat, 29 Sep 2018 13:45:26 +1000
Subject: [PATCH 332/499] xfs: refactor xfs_buf_log_item reference count
 handling

The xfs_buf_log_item structure has a reference counter with slightly
tricky semantics. In the common case, a buffer is logged and
committed in a transaction, committed to the on-disk log (added to
the AIL) and then finally written back and removed from the AIL. The
bli refcount covers two potentially overlapping timeframes:

 1. the bli is held in an active transaction
 2. the bli is pinned by the log

The caveat to this approach is that the reference counter does not
purely dictate the lifetime of the bli. IOW, when a dirty buffer is
physically logged and unpinned, the bli refcount may go to zero as
the log item is inserted into the AIL. Only once the buffer is
written back can the bli finally be freed.

The above semantics means that it is not enough for the various
refcount decrementing contexts to release the bli on decrement to
zero. xfs_trans_brelse(), transaction commit (->iop_unlock()) and
unpin (->iop_unpin()) must all drop the associated reference and
make additional checks to determine if the current context is
responsible for freeing the item.

For example, if a transaction holds but does not dirty a particular
bli, the commit may drop the refcount to zero. If the bli itself is
clean, it is also not AIL resident and must be freed at this time.
The same is true for xfs_trans_brelse(). If the transaction dirties
a bli and then aborts or an unpin results in an abort due to a log
I/O error, the last reference count holder is expected to explicitly
remove the item from the AIL and release it (since an abort means
filesystem shutdown and metadata writeback will never occur).

This leads to fairly complex checks being replicated in a few
different places. Since ->iop_unlock() and xfs_trans_brelse() are
nearly identical, refactor the logic into a common helper that
implements and documents the semantics in one place. This patch does
not change behavior.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/xfs_buf_item.c  | 90 +++++++++++++++++++++++++-----------------
 fs/xfs/xfs_buf_item.h  |  1 +
 fs/xfs/xfs_trans_buf.c | 23 +----------
 3 files changed, 56 insertions(+), 58 deletions(-)

diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 42fce70b474d..12d8455bfbb2 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -531,6 +531,49 @@ xfs_buf_item_push(
 	return rval;
 }
 
+/*
+ * Drop the buffer log item refcount and take appropriate action. This helper
+ * determines whether the bli must be freed or not, since a decrement to zero
+ * does not necessarily mean the bli is unused.
+ *
+ * Return true if the bli is freed, false otherwise.
+ */
+bool
+xfs_buf_item_put(
+	struct xfs_buf_log_item	*bip)
+{
+	struct xfs_log_item	*lip = &bip->bli_item;
+	bool			aborted;
+	bool			dirty;
+
+	/* drop the bli ref and return if it wasn't the last one */
+	if (!atomic_dec_and_test(&bip->bli_refcount))
+		return false;
+
+	/*
+	 * We dropped the last ref and must free the item if clean or aborted.
+	 * If the bli is dirty and non-aborted, the buffer was clean in the
+	 * transaction but still awaiting writeback from previous changes. In
+	 * that case, the bli is freed on buffer writeback completion.
+	 */
+	aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) ||
+		  XFS_FORCED_SHUTDOWN(lip->li_mountp);
+	dirty = bip->bli_flags & XFS_BLI_DIRTY;
+	if (dirty && !aborted)
+		return false;
+
+	/*
+	 * The bli is aborted or clean. An aborted item may be in the AIL
+	 * regardless of dirty state.  For example, consider an aborted
+	 * transaction that invalidated a dirty bli and cleared the dirty
+	 * state.
+	 */
+	if (aborted)
+		xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR);
+	xfs_buf_item_relse(bip->bli_buf);
+	return true;
+}
+
 /*
  * Release the buffer associated with the buf log item.  If there is no dirty
  * logged data associated with the buffer recorded in the buf log item, then
@@ -556,13 +599,12 @@ xfs_buf_item_unlock(
 {
 	struct xfs_buf_log_item	*bip = BUF_ITEM(lip);
 	struct xfs_buf		*bp = bip->bli_buf;
-	bool			freed;
-	bool			aborted;
+	bool			released;
 	bool			hold = bip->bli_flags & XFS_BLI_HOLD;
-	bool			dirty = bip->bli_flags & XFS_BLI_DIRTY;
 	bool			stale = bip->bli_flags & XFS_BLI_STALE;
 #if defined(DEBUG) || defined(XFS_WARN)
 	bool			ordered = bip->bli_flags & XFS_BLI_ORDERED;
+	bool			dirty = bip->bli_flags & XFS_BLI_DIRTY;
 #endif
 
 	trace_xfs_buf_item_unlock(bip);
@@ -575,8 +617,6 @@ xfs_buf_item_unlock(
 	       (ordered && dirty && !xfs_buf_item_dirty_format(bip)));
 	ASSERT(!stale || (bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
 
-	aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags);
-
 	/*
 	 * Clear the buffer's association with this transaction and
 	 * per-transaction state from the bli, which has been copied above.
@@ -585,40 +625,16 @@ xfs_buf_item_unlock(
 	bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED);
 
 	/*
-	 * Drop the transaction's bli reference and deal with the item if we had
-	 * the last one. We must free the item if clean or aborted since it
-	 * wasn't pinned by the log and this is the last chance to do so. If the
-	 * bli is freed and dirty (but non-aborted), the buffer was not dirty in
-	 * this transaction but modified by a previous one and still awaiting
-	 * writeback. In that case, the bli is freed on buffer writeback
-	 * completion.
+	 * Unref the item and unlock the buffer unless held or stale. Stale
+	 * buffers remain locked until final unpin unless the bli is freed by
+	 * the unref call. The latter implies shutdown because buffer
+	 * invalidation dirties the bli and transaction.
 	 */
-	freed = atomic_dec_and_test(&bip->bli_refcount);
-	if (freed) {
-		ASSERT(!aborted || XFS_FORCED_SHUTDOWN(lip->li_mountp));
-		/*
-		 * An aborted item may be in the AIL regardless of dirty state.
-		 * For example, consider an aborted transaction that invalidated
-		 * a dirty bli and cleared the dirty state.
-		 */
-		if (aborted)
-			xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR);
-		if (aborted || !dirty)
-			xfs_buf_item_relse(bp);
-	} else if (stale) {
-		/*
-		 * Stale buffers remain locked until final unpin unless the bli
-		 * was freed in the branch above. A freed stale bli implies an
-		 * abort because buffer invalidation dirties the bli and
-		 * transaction.
-		 */
-		ASSERT(!freed);
+	released = xfs_buf_item_put(bip);
+	if (hold || (stale && !released))
 		return;
-	}
-	ASSERT(!stale || (aborted && freed));
-
-	if (!hold)
-		xfs_buf_relse(bp);
+	ASSERT(!stale || test_bit(XFS_LI_ABORTED, &lip->li_flags));
+	xfs_buf_relse(bp);
 }
 
 /*
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 3f7d7b72e7e6..90f65f891fab 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -51,6 +51,7 @@ struct xfs_buf_log_item {
 
 int	xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
 void	xfs_buf_item_relse(struct xfs_buf *);
+bool	xfs_buf_item_put(struct xfs_buf_log_item *);
 void	xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint);
 bool	xfs_buf_item_dirty_format(struct xfs_buf_log_item *);
 void	xfs_buf_attach_iodone(struct xfs_buf *,
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 7498f87ceed3..286a287ac57a 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -339,8 +339,6 @@ xfs_trans_brelse(
 	struct xfs_buf		*bp)
 {
 	struct xfs_buf_log_item	*bip = bp->b_log_item;
-	bool			freed;
-	bool			dirty;
 
 	ASSERT(bp->b_transp == tp);
 
@@ -379,25 +377,8 @@ xfs_trans_brelse(
 	xfs_trans_del_item(&bip->bli_item);
 	bip->bli_flags &= ~XFS_BLI_HOLD;
 
-	/*
-	 * Drop the reference to the bli. At this point, the bli must be either
-	 * freed or dirty (or both). If freed, there are a couple cases where we
-	 * are responsible to free the item. If the bli is clean, we're the last
-	 * user of it. If the fs has shut down, the bli may be dirty and AIL
-	 * resident, but won't ever be written back. We therefore may also need
-	 * to remove it from the AIL before freeing it.
-	 */
-	freed = atomic_dec_and_test(&bip->bli_refcount);
-	dirty = bip->bli_flags & XFS_BLI_DIRTY;
-	ASSERT(freed || dirty);
-	if (freed) {
-		bool abort = XFS_FORCED_SHUTDOWN(tp->t_mountp);
-		ASSERT(abort || !test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
-		if (abort)
-			xfs_trans_ail_remove(&bip->bli_item, SHUTDOWN_LOG_IO_ERROR);
-		if (!dirty || abort)
-			xfs_buf_item_relse(bp);
-	}
+	/* drop the reference to the bli */
+	xfs_buf_item_put(bip);
 
 	bp->b_transp = NULL;
 	xfs_buf_relse(bp);

From 8683edb7755b853f0dd92e07fe2e7a7e675a84d7 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Sat, 29 Sep 2018 13:46:21 +1000
Subject: [PATCH 333/499] xfs: avoid lockdep false positives in xfs_trans_alloc

We've had a few reports of lockdep tripping over memory reclaim
context vs filesystem freeze "deadlocks". They all have looked
to be false positives on analysis, but it seems that they are
being tripped because we take freeze references before we run
a GFP_KERNEL allocation for the struct xfs_trans.

We can avoid this false positive vector just by re-ordering the
operations in xfs_trans_alloc(). That is. we need allocate the
structure before we take the freeze reference and enter the GFP_NOFS
allocation context that follows the xfs_trans around. This prevents
lockdep from seeing the GFP_KERNEL allocation inside the transaction
context, and that prevents it from triggering the freeze level vs
alloc context vs reclaim warnings.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_trans.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index bedc5a5133a5..912b42f5fe4a 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -259,6 +259,14 @@ xfs_trans_alloc(
 	struct xfs_trans	*tp;
 	int			error;
 
+	/*
+	 * Allocate the handle before we do our freeze accounting and setting up
+	 * GFP_NOFS allocation context so that we avoid lockdep false positives
+	 * by doing GFP_KERNEL allocations inside sb_start_intwrite().
+	 */
+	tp = kmem_zone_zalloc(xfs_trans_zone,
+		(flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP);
+
 	if (!(flags & XFS_TRANS_NO_WRITECOUNT))
 		sb_start_intwrite(mp->m_super);
 
@@ -270,8 +278,6 @@ xfs_trans_alloc(
 		mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
 	atomic_inc(&mp->m_active_trans);
 
-	tp = kmem_zone_zalloc(xfs_trans_zone,
-		(flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP);
 	tp->t_magic = XFS_TRANS_HEADER_MAGIC;
 	tp->t_flags = flags;
 	tp->t_mountp = mp;

From df307077916fde42734a49022fc8ed23cc58caa4 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Sat, 29 Sep 2018 13:47:15 +1000
Subject: [PATCH 334/499] xfs: fix transaction leak in
 xfs_reflink_allocate_cow()

When xfs_reflink_allocate_cow() allocates a transaction, it drops
the ILOCK to perform the operation. This Introduces a race condition
where another thread modifying the file can perform the COW
allocation operation underneath us. This result in the retry loop
finding an allocated block and jumping straight to the conversion
code. It does not, however, cancel the transaction it holds and so
this gets leaked. This results in a lockdep warning:

================================================
WARNING: lock held when returning to user space!
4.18.5 #1 Not tainted
------------------------------------------------
worker/6123 is leaving the kernel with locks still held!
1 lock held by worker/6123:
 #0: 000000009eab4f1b (sb_internal#2){.+.+}, at: xfs_trans_alloc+0x17c/0x220

And eventually the filesystem deadlocks because it runs out of log
space that is reserved by the leaked transaction and never gets
released.

The logic flow in xfs_reflink_allocate_cow() is a convoluted mess of
gotos - it's no surprise that it has bug where the flow through
several goto jumps then fails to clean up context from a non-obvious
logic path. CLean up the logic flow and make sure every path does
the right thing.

Reported-by: Alexander Y. Fomichev <git.user@gmail.com>
Tested-by: Alexander Y. Fomichev <git.user@gmail.com>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=200981
Signed-off-by: Dave Chinner <dchinner@redhat.com>
[hch: slight refactor]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/xfs_reflink.c | 133 ++++++++++++++++++++++++++-----------------
 1 file changed, 80 insertions(+), 53 deletions(-)

diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 38f405415b88..d60d0eeed7b9 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -352,6 +352,47 @@ xfs_reflink_convert_cow(
 	return error;
 }
 
+/*
+ * Find the extent that maps the given range in the COW fork. Even if the extent
+ * is not shared we might have a preallocation for it in the COW fork. If so we
+ * use it that rather than trigger a new allocation.
+ */
+static int
+xfs_find_trim_cow_extent(
+	struct xfs_inode	*ip,
+	struct xfs_bmbt_irec	*imap,
+	bool			*shared,
+	bool			*found)
+{
+	xfs_fileoff_t		offset_fsb = imap->br_startoff;
+	xfs_filblks_t		count_fsb = imap->br_blockcount;
+	struct xfs_iext_cursor	icur;
+	struct xfs_bmbt_irec	got;
+	bool			trimmed;
+
+	*found = false;
+
+	/*
+	 * If we don't find an overlapping extent, trim the range we need to
+	 * allocate to fit the hole we found.
+	 */
+	if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got) ||
+	    got.br_startoff > offset_fsb)
+		return xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
+
+	*shared = true;
+	if (isnullstartblock(got.br_startblock)) {
+		xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
+		return 0;
+	}
+
+	/* real extent found - no need to allocate */
+	xfs_trim_extent(&got, offset_fsb, count_fsb);
+	*imap = got;
+	*found = true;
+	return 0;
+}
+
 /* Allocate all CoW reservations covering a range of blocks in a file. */
 int
 xfs_reflink_allocate_cow(
@@ -363,78 +404,64 @@ xfs_reflink_allocate_cow(
 	struct xfs_mount	*mp = ip->i_mount;
 	xfs_fileoff_t		offset_fsb = imap->br_startoff;
 	xfs_filblks_t		count_fsb = imap->br_blockcount;
-	struct xfs_bmbt_irec	got;
-	struct xfs_trans	*tp = NULL;
+	struct xfs_trans	*tp;
 	int			nimaps, error = 0;
-	bool			trimmed;
+	bool			found;
 	xfs_filblks_t		resaligned;
 	xfs_extlen_t		resblks = 0;
-	struct xfs_iext_cursor	icur;
 
-retry:
-	ASSERT(xfs_is_reflink_inode(ip));
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(xfs_is_reflink_inode(ip));
+
+	error = xfs_find_trim_cow_extent(ip, imap, shared, &found);
+	if (error || !*shared)
+		return error;
+	if (found)
+		goto convert;
+
+	resaligned = xfs_aligned_fsb_count(imap->br_startoff,
+		imap->br_blockcount, xfs_get_cowextsz_hint(ip));
+	resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
+
+	xfs_iunlock(ip, *lockmode);
+	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
+	*lockmode = XFS_ILOCK_EXCL;
+	xfs_ilock(ip, *lockmode);
+
+	if (error)
+		return error;
+
+	error = xfs_qm_dqattach_locked(ip, false);
+	if (error)
+		goto out_trans_cancel;
 
 	/*
-	 * Even if the extent is not shared we might have a preallocation for
-	 * it in the COW fork.  If so use it.
+	 * Check for an overlapping extent again now that we dropped the ilock.
 	 */
-	if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got) &&
-	    got.br_startoff <= offset_fsb) {
-		*shared = true;
-
-		/* If we have a real allocation in the COW fork we're done. */
-		if (!isnullstartblock(got.br_startblock)) {
-			xfs_trim_extent(&got, offset_fsb, count_fsb);
-			*imap = got;
-			goto convert;
-		}
-
-		xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
-	} else {
-		error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
-		if (error || !*shared)
-			goto out;
-	}
-
-	if (!tp) {
-		resaligned = xfs_aligned_fsb_count(imap->br_startoff,
-			imap->br_blockcount, xfs_get_cowextsz_hint(ip));
-		resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
-
-		xfs_iunlock(ip, *lockmode);
-		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
-		*lockmode = XFS_ILOCK_EXCL;
-		xfs_ilock(ip, *lockmode);
-
-		if (error)
-			return error;
-
-		error = xfs_qm_dqattach_locked(ip, false);
-		if (error)
-			goto out;
-		goto retry;
+	error = xfs_find_trim_cow_extent(ip, imap, shared, &found);
+	if (error || !*shared)
+		goto out_trans_cancel;
+	if (found) {
+		xfs_trans_cancel(tp);
+		goto convert;
 	}
 
 	error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0,
 			XFS_QMOPT_RES_REGBLKS);
 	if (error)
-		goto out;
+		goto out_trans_cancel;
 
 	xfs_trans_ijoin(tp, ip, 0);
 
-	nimaps = 1;
-
 	/* Allocate the entire reservation as unwritten blocks. */
+	nimaps = 1;
 	error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
 			XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC,
 			resblks, imap, &nimaps);
 	if (error)
-		goto out_trans_cancel;
+		goto out_unreserve;
 
 	xfs_inode_set_cowblocks_tag(ip);
-
-	/* Finish up. */
 	error = xfs_trans_commit(tp);
 	if (error)
 		return error;
@@ -447,12 +474,12 @@ xfs_reflink_allocate_cow(
 		return -ENOSPC;
 convert:
 	return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb);
-out_trans_cancel:
+
+out_unreserve:
 	xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0,
 			XFS_QMOPT_RES_REGBLKS);
-out:
-	if (tp)
-		xfs_trans_cancel(tp);
+out_trans_cancel:
+	xfs_trans_cancel(tp);
 	return error;
 }
 

From 0065b54119973c9089f8222c1c8f9237ed9787a9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 29 Sep 2018 13:47:46 +1000
Subject: [PATCH 335/499] xfs: don't bring in extents in
 xfs_bmap_punch_delalloc_range

This function is only used to punch out delayed allocations on I/O
failure, which means we need to have read the extents earlier.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/xfs_bmap_util.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index ae3cc393724f..6de8d90041ff 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -702,13 +702,9 @@ xfs_bmap_punch_delalloc_range(
 	struct xfs_iext_cursor	icur;
 	int			error = 0;
 
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
-		error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
-		if (error)
-			goto out_unlock;
-	}
+	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
 
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
 		goto out_unlock;
 

From 2863c2ebc4f45d2bc150016f36e2f31c155597e8 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Sat, 29 Sep 2018 13:48:21 +1000
Subject: [PATCH 336/499] xfs: remove duplicated include from alloc.c

Remove duplicated include xfs_alloc.h

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/scrub/alloc.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
index 036b5c7021eb..376bcb585ae6 100644
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -17,7 +17,6 @@
 #include "xfs_sb.h"
 #include "xfs_alloc.h"
 #include "xfs_rmap.h"
-#include "xfs_alloc.h"
 #include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"

From f369a13cead821e679c7415dc66a17ec48cc26bf Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sandeen.net>
Date: Sat, 29 Sep 2018 13:49:00 +1000
Subject: [PATCH 337/499] xfs: don't treat unknown di_flags2 as corruption in
 scrub

xchk_inode_flags2() currently treats any di_flags2 values that the
running kernel doesn't recognize as corruption, and calls
xchk_ino_set_corrupt() if they are set.  However, it's entirely possible
that these flags were set in some newer kernel and are quite valid,
but ignored in this kernel.

(Validators don't care one bit about unknown di_flags2.)

Call xchk_ino_set_warning instead, because this may or may not actually
indicate a problem.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/libxfs/xfs_format.h | 2 ++
 fs/xfs/scrub/inode.c       | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 059bc44c27e8..afbe336600e1 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -1016,6 +1016,8 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
 #define XFS_DIFLAG_EXTSZINHERIT_BIT 12	/* inherit inode extent size */
 #define XFS_DIFLAG_NODEFRAG_BIT     13	/* do not reorganize/defragment */
 #define XFS_DIFLAG_FILESTREAM_BIT   14  /* use filestream allocator */
+/* Do not use bit 15, di_flags is legacy and unchanging now */
+
 #define XFS_DIFLAG_REALTIME      (1 << XFS_DIFLAG_REALTIME_BIT)
 #define XFS_DIFLAG_PREALLOC      (1 << XFS_DIFLAG_PREALLOC_BIT)
 #define XFS_DIFLAG_NEWRTBM       (1 << XFS_DIFLAG_NEWRTBM_BIT)
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 5b3b177c0fc9..e386c9b0b4ab 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -126,6 +126,7 @@ xchk_inode_flags(
 {
 	struct xfs_mount	*mp = sc->mp;
 
+	/* di_flags are all taken, last bit cannot be used */
 	if (flags & ~XFS_DIFLAG_ANY)
 		goto bad;
 
@@ -172,8 +173,9 @@ xchk_inode_flags2(
 {
 	struct xfs_mount	*mp = sc->mp;
 
+	/* Unknown di_flags2 could be from a future kernel */
 	if (flags2 & ~XFS_DIFLAG2_ANY)
-		goto bad;
+		xchk_ino_set_warning(sc, ino);
 
 	/* reflink flag requires reflink feature */
 	if ((flags2 & XFS_DIFLAG2_REFLINK) &&

From f5f3f959b70b272dfe2d05758e43ae52f5ff7748 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 29 Sep 2018 13:49:58 +1000
Subject: [PATCH 338/499] xfs: skip delalloc COW blocks in xfs_reflink_end_cow

The iomap direct I/O code issues a single ->end_io call for the whole
I/O request, and if some of the extents cowered needed a COW operation
it will call xfs_reflink_end_cow over the whole range.

When we do AIO writes we drop the iolock after doing the initial setup,
but before the I/O completion.  Between dropping the lock and completing
the I/O we can have a racing buffered write create new delalloc COW fork
extents in the region covered by the outstanding direct I/O write, and
thus see delalloc COW fork extents in xfs_reflink_end_cow.  As
concurrent writes are fundamentally racy and no guarantees are given we
can simply skip those.

This can be easily reproduced with xfstests generic/208 in always_cow
mode.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/xfs_reflink.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index d60d0eeed7b9..5289e22cb081 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -693,14 +693,12 @@ xfs_reflink_end_cow(
 		if (!del.br_blockcount)
 			goto prev_extent;
 
-		ASSERT(!isnullstartblock(got.br_startblock));
-
 		/*
-		 * Don't remap unwritten extents; these are
-		 * speculatively preallocated CoW extents that have been
-		 * allocated but have not yet been involved in a write.
+		 * Only remap real extent that contain data.  With AIO
+		 * speculatively preallocations can leak into the range we
+		 * are called upon, and we need to skip them.
 		 */
-		if (got.br_state == XFS_EXT_UNWRITTEN)
+		if (!xfs_bmap_is_real_extent(&got))
 			goto prev_extent;
 
 		/* Unmap the old blocks in the data fork. */

From 339e1a3fcdd1990e5ec115325ccb4c6f4cc5ee16 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Sat, 29 Sep 2018 13:50:13 +1000
Subject: [PATCH 339/499] xfs: validate inode di_forkoff

Verify the inode di_forkoff, lifted from xfs_repair's
process_check_inode_forkoff().

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/libxfs/xfs_inode_buf.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 30d1d60f1d46..09d9c8cfa4a0 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -415,6 +415,31 @@ xfs_dinode_verify_fork(
 	return NULL;
 }
 
+static xfs_failaddr_t
+xfs_dinode_verify_forkoff(
+	struct xfs_dinode	*dip,
+	struct xfs_mount	*mp)
+{
+	if (!XFS_DFORK_Q(dip))
+		return NULL;
+
+	switch (dip->di_format)  {
+	case XFS_DINODE_FMT_DEV:
+		if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3))
+			return __this_address;
+		break;
+	case XFS_DINODE_FMT_LOCAL:	/* fall through ... */
+	case XFS_DINODE_FMT_EXTENTS:    /* fall through ... */
+	case XFS_DINODE_FMT_BTREE:
+		if (dip->di_forkoff >= (XFS_LITINO(mp, dip->di_version) >> 3))
+			return __this_address;
+		break;
+	default:
+		return __this_address;
+	}
+	return NULL;
+}
+
 xfs_failaddr_t
 xfs_dinode_verify(
 	struct xfs_mount	*mp,
@@ -470,6 +495,11 @@ xfs_dinode_verify(
 	if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp)
 		return __this_address;
 
+	/* check for illegal values of forkoff */
+	fa = xfs_dinode_verify_forkoff(dip, mp);
+	if (fa)
+		return fa;
+
 	/* Do we have appropriate data fork formats for the mode? */
 	switch (mode & S_IFMT) {
 	case S_IFIFO:

From ec2ed0b5e96fea2a4539bd7f7f43499ecd45be8b Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Sat, 29 Sep 2018 13:50:41 +1000
Subject: [PATCH 340/499] xfs: remove invalid log recovery first/last cycle
 check

One of the first steps of log recovery is to check for the special
case of a zeroed log. If the first cycle in the log is zero or the
tail portion of the log is zeroed, the head is set to the first
instance of cycle 0. xlog_find_zeroed() includes a sanity check that
enforces that the first cycle in the log must be 1 if the last cycle
is 0. While this is true in most cases, the check is not totally
valid because it doesn't consider the case where the filesystem
crashed after a partial/out of order log buffer completion that
wraps around the end of the physical log.

For example, consider a filesystem that has completed most of the
first cycle of the log, reaches the end of the physical log and
splits the next single log buffer write into two in order to wrap
around the end of the log. If these I/Os are reordered, the second
(wrapped) I/O completes and the first happens to fail, the log is
left in a state where the last cycle of the log is 0 and the first
cycle is 2. This causes the xlog_find_zeroed() sanity check to fail
and prevents the filesystem from mounting. This situation has been
reproduced on particular systems via repeated runs of generic/475.

This is an expected state that log recovery already knows how to
deal with, however. Since the log is still partially zeroed, the
head is detected correctly and points to a valid tail. The
subsequent stale block detection clears blocks beyond the head up to
the tail (within a maximum range), with the express purpose of
clearing such out of order writes. As expected, this removes the out
of order cycle 2 blocks at the physical start of the log.

In other words, the only thing that prevents a clean mount and
recovery of the filesystem in this scenario is the specific (last ==
0 && first != 1) sanity check in xlog_find_zeroed(). Since the log
head/tail are now independently validated via cycle, log record and
CRC checks, this highly specific first cycle check is of dubious
value. Remove it and rely on the higher level validation to
determine whether log content is sane and recoverable.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/xfs_log_recover.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index a21dc61ec09e..1fc9e9042e0e 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1570,16 +1570,6 @@ xlog_find_zeroed(
 	if (last_cycle != 0) {		/* log completely written to */
 		xlog_put_bp(bp);
 		return 0;
-	} else if (first_cycle != 1) {
-		/*
-		 * If the cycle of the last block is zero, the cycle of
-		 * the first block must be 1. If it's not, maybe we're
-		 * not looking at a log... Bail out.
-		 */
-		xfs_warn(log->l_mp,
-			"Log inconsistent or not a log (last==0, first!=1)");
-		error = -EINVAL;
-		goto bp_err;
 	}
 
 	/* we have a partially zeroed log */

From 561295a32579e2db3b22f74d1f45d5c4c4fd9375 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Sat, 29 Sep 2018 13:51:01 +1000
Subject: [PATCH 341/499] iomap: set page dirty after partial delalloc on
 mkwrite

The iomap page fault mechanism currently dirties the associated page
after the full block range of the page has been allocated. This
leaves the page susceptible to delayed allocations without ever
being set dirty on sub-page block sized filesystems.

For example, consider a page fault on a page with one preexisting
real (non-delalloc) block allocated in the middle of the page. The
first iomap_apply() iteration performs delayed allocation on the
range up to the preexisting block, the next iteration finds the
preexisting block, and the last iteration attempts to perform
delayed allocation on the range after the prexisting block to the
end of the page. If the first allocation succeeds and the final
allocation fails with -ENOSPC, iomap_apply() returns the error and
iomap_page_mkwrite() fails to dirty the page having already
performed partial delayed allocation. This eventually results in the
page being invalidated without ever converting the delayed
allocation to real blocks.

This problem is reliably reproduced by generic/083 on XFS on ppc64
systems (64k page size, 4k block size). It results in leaked
delalloc blocks on inode reclaim, which triggers an assert failure
in xfs_fs_destroy_inode() and filesystem accounting inconsistency.

Move the set_page_dirty() call from iomap_page_mkwrite() to the
actor callback, similar to how the buffer head implementation works.
The actor callback is called iff ->iomap_begin() returns success, so
ensures the page is dirtied as soon as possible after an allocation.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/iomap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/iomap.c b/fs/iomap.c
index 74762b1ec233..ec15cf2ec696 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1051,6 +1051,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
 	} else {
 		WARN_ON_ONCE(!PageUptodate(page));
 		iomap_page_create(inode, page);
+		set_page_dirty(page);
 	}
 
 	return length;
@@ -1090,7 +1091,6 @@ int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
 		length -= ret;
 	}
 
-	set_page_dirty(page);
 	wait_for_stable_page(page);
 	return VM_FAULT_LOCKED;
 out_unlock:

From d51aea13dd6753186a2bea7619029c460bdf0c4c Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Wed, 19 Sep 2018 17:22:21 -0700
Subject: [PATCH 342/499] cpufreq: qcom-kryo: Fix section annotations

There is currently a warning when building the Kryo cpufreq driver into
the kernel image:

WARNING: vmlinux.o(.text+0x8aa424): Section mismatch in reference from
the function qcom_cpufreq_kryo_probe() to the function
.init.text:qcom_cpufreq_kryo_get_msm_id()
The function qcom_cpufreq_kryo_probe() references
the function __init qcom_cpufreq_kryo_get_msm_id().
This is often because qcom_cpufreq_kryo_probe lacks a __init
annotation or the annotation of qcom_cpufreq_kryo_get_msm_id is wrong.

Remove the '__init' annotation from qcom_cpufreq_kryo_get_msm_id
so that there is no more mismatch warning.

Additionally, Nick noticed that the remove function was marked as
'__init' when it should really be marked as '__exit'.

Fixes: 46e2856b8e18 (cpufreq: Add Kryo CPU scaling driver)
Fixes: 5ad7346b4ae2 (cpufreq: kryo: Add module remove and exit)
Reported-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Cc: 4.18+ <stable@vger.kernel.org> # 4.18+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/qcom-cpufreq-kryo.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/qcom-cpufreq-kryo.c b/drivers/cpufreq/qcom-cpufreq-kryo.c
index a1830fa25fc5..2a3675c24032 100644
--- a/drivers/cpufreq/qcom-cpufreq-kryo.c
+++ b/drivers/cpufreq/qcom-cpufreq-kryo.c
@@ -44,7 +44,7 @@ enum _msm8996_version {
 
 struct platform_device *cpufreq_dt_pdev, *kryo_cpufreq_pdev;
 
-static enum _msm8996_version __init qcom_cpufreq_kryo_get_msm_id(void)
+static enum _msm8996_version qcom_cpufreq_kryo_get_msm_id(void)
 {
 	size_t len;
 	u32 *msm_id;
@@ -222,7 +222,7 @@ static int __init qcom_cpufreq_kryo_init(void)
 }
 module_init(qcom_cpufreq_kryo_init);
 
-static void __init qcom_cpufreq_kryo_exit(void)
+static void __exit qcom_cpufreq_kryo_exit(void)
 {
 	platform_device_unregister(kryo_cpufreq_pdev);
 	platform_driver_unregister(&qcom_cpufreq_kryo_driver);

From c4ce446e33d7a0e978256ac6fea4c80e59d9de5f Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 28 Sep 2018 16:18:50 -0700
Subject: [PATCH 343/499] asix: Check for supported Wake-on-LAN modes

The driver currently silently accepts unsupported Wake-on-LAN modes
(other than WAKE_PHY or WAKE_MAGIC) without reporting that to the user,
which is confusing.

Fixes: 2e55cc7210fe ("[PATCH] USB: usbnet (3/9) module for ASIX Ethernet adapters")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/asix_common.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c
index e95dd12edec4..023b8d0bf175 100644
--- a/drivers/net/usb/asix_common.c
+++ b/drivers/net/usb/asix_common.c
@@ -607,6 +607,9 @@ int asix_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo)
 	struct usbnet *dev = netdev_priv(net);
 	u8 opt = 0;
 
+	if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC))
+		return -EINVAL;
+
 	if (wolinfo->wolopts & WAKE_PHY)
 		opt |= AX_MONITOR_LINK;
 	if (wolinfo->wolopts & WAKE_MAGIC)

From 5ba6b4aa9a410c5e2c6417df52b5e2118ea9b467 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 28 Sep 2018 16:18:51 -0700
Subject: [PATCH 344/499] ax88179_178a: Check for supported Wake-on-LAN modes

The driver currently silently accepts unsupported Wake-on-LAN modes
(other than WAKE_PHY or WAKE_MAGIC) without reporting that to the user,
which is confusing.

Fixes: e2ca90c276e1 ("ax88179_178a: ASIX AX88179_178A USB 3.0/2.0 to gigabit ethernet adapter driver")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/ax88179_178a.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index 9e8ad372f419..2207f7a7d1ff 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -566,6 +566,9 @@ ax88179_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo)
 	struct usbnet *dev = netdev_priv(net);
 	u8 opt = 0;
 
+	if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC))
+		return -EINVAL;
+
 	if (wolinfo->wolopts & WAKE_PHY)
 		opt |= AX_MONITOR_MODE_RWLC;
 	if (wolinfo->wolopts & WAKE_MAGIC)

From eb9ad088f96653a26b340f7c447c44cf023d5cdc Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 28 Sep 2018 16:18:52 -0700
Subject: [PATCH 345/499] lan78xx: Check for supported Wake-on-LAN modes

The driver supports a fair amount of Wake-on-LAN modes, but is not
checking that the user specified one that is supported.

Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Woojung Huh <Woojung.Huh@Microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/lan78xx.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index a9991c5f4736..c3c9ba44e2a1 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1401,19 +1401,10 @@ static int lan78xx_set_wol(struct net_device *netdev,
 	if (ret < 0)
 		return ret;
 
-	pdata->wol = 0;
-	if (wol->wolopts & WAKE_UCAST)
-		pdata->wol |= WAKE_UCAST;
-	if (wol->wolopts & WAKE_MCAST)
-		pdata->wol |= WAKE_MCAST;
-	if (wol->wolopts & WAKE_BCAST)
-		pdata->wol |= WAKE_BCAST;
-	if (wol->wolopts & WAKE_MAGIC)
-		pdata->wol |= WAKE_MAGIC;
-	if (wol->wolopts & WAKE_PHY)
-		pdata->wol |= WAKE_PHY;
-	if (wol->wolopts & WAKE_ARP)
-		pdata->wol |= WAKE_ARP;
+	if (wol->wolopts & ~WAKE_ALL)
+		return -EINVAL;
+
+	pdata->wol = wol->wolopts;
 
 	device_set_wakeup_enable(&dev->udev->dev, (bool)wol->wolopts);
 

From c5cb93e994ffb43b7b3b1ff10b9f928f54574a36 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 28 Sep 2018 16:18:53 -0700
Subject: [PATCH 346/499] sr9800: Check for supported Wake-on-LAN modes

The driver currently silently accepts unsupported Wake-on-LAN modes
(other than WAKE_PHY or WAKE_MAGIC) without reporting that to the user,
which is confusing.

Fixes: 19a38d8e0aa3 ("USB2NET : SR9800 : One chip USB2.0 USB2NET SR9800 Device Driver Support")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/sr9800.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c
index 9277a0f228df..35f39f23d881 100644
--- a/drivers/net/usb/sr9800.c
+++ b/drivers/net/usb/sr9800.c
@@ -421,6 +421,9 @@ sr_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo)
 	struct usbnet *dev = netdev_priv(net);
 	u8 opt = 0;
 
+	if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC))
+		return -EINVAL;
+
 	if (wolinfo->wolopts & WAKE_PHY)
 		opt |= SR_MONITOR_LINK;
 	if (wolinfo->wolopts & WAKE_MAGIC)

From f2750df1548bd8a2b060eb609fc43ca82811af4c Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 28 Sep 2018 16:18:54 -0700
Subject: [PATCH 347/499] r8152: Check for supported Wake-on-LAN Modes

The driver does not check for Wake-on-LAN modes specified by an user,
but will conditionally set the device as wake-up enabled or not based on
that, which could be a very confusing user experience.

Fixes: 21ff2e8976b1 ("r8152: support WOL")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 2cd71bdb6484..f1b5201cc320 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -4506,6 +4506,9 @@ static int rtl8152_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 	if (!rtl_can_wakeup(tp))
 		return -EOPNOTSUPP;
 
+	if (wol->wolopts & ~WAKE_ANY)
+		return -EINVAL;
+
 	ret = usb_autopm_get_interface(tp->intf);
 	if (ret < 0)
 		goto out_set_wol;

From 9c734b2769a73eea2e9e9767c0e0bf839ff23679 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 28 Sep 2018 16:18:55 -0700
Subject: [PATCH 348/499] smsc75xx: Check for Wake-on-LAN modes

The driver does not check for Wake-on-LAN modes specified by an user,
but will conditionally set the device as wake-up enabled or not based on
that, which could be a very confusing user experience.

Fixes: 6c636503260d ("smsc75xx: add wol magic packet support")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/smsc75xx.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index 05553d252446..e5a4cbb366dc 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -731,6 +731,9 @@ static int smsc75xx_ethtool_set_wol(struct net_device *net,
 	struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]);
 	int ret;
 
+	if (wolinfo->wolopts & ~SUPPORTED_WAKE)
+		return -EINVAL;
+
 	pdata->wolopts = wolinfo->wolopts & SUPPORTED_WAKE;
 
 	ret = device_set_wakeup_enable(&dev->udev->dev, pdata->wolopts);

From c530c471ba37bdd9fe1c7185b01455c00ae606fb Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 28 Sep 2018 16:18:56 -0700
Subject: [PATCH 349/499] smsc95xx: Check for Wake-on-LAN modes

The driver does not check for Wake-on-LAN modes specified by an user,
but will conditionally set the device as wake-up enabled or not based on
that, which could be a very confusing user experience.

Fixes: e0e474a83c18 ("smsc95xx: add wol magic packet support")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/smsc95xx.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 06b4d290784d..262e7a3c23cb 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -774,6 +774,9 @@ static int smsc95xx_ethtool_set_wol(struct net_device *net,
 	struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
 	int ret;
 
+	if (wolinfo->wolopts & ~SUPPORTED_WAKE)
+		return -EINVAL;
+
 	pdata->wolopts = wolinfo->wolopts & SUPPORTED_WAKE;
 
 	ret = device_set_wakeup_enable(&dev->udev->dev, pdata->wolopts);

From c140eb166d681f66bd7e99fb121357db1a503e7f Mon Sep 17 00:00:00 2001
From: LUU Duc Canh <canh.d.luu@dektech.com.au>
Date: Wed, 26 Sep 2018 21:00:54 +0200
Subject: [PATCH 350/499] tipc: fix failover problem

We see the following scenario:
1) Link endpoint B on node 1 discovers that its peer endpoint is gone.
   Since there is a second working link, failover procedure is started.
2) Link endpoint A on node 1 sends a FAILOVER message to peer endpoint
   A on node 2. The node item 1->2 goes to state FAILINGOVER.
3) Linke endpoint A/2 receives the failover, and is supposed to take
   down its parallell link endpoint B/2, while producing a FAILOVER
   message to send back to A/1.
4) However, B/2 has already been deleted, so no FAILOVER message can
   created.
5) Node 1->2 remains in state FAILINGOVER forever, refusing to receive
   any messages that can bring B/1 up again. We are left with a non-
   redundant link between node 1 and 2.

We fix this with letting endpoint A/2 build a dummy FAILOVER message
to send to back to A/1, so that the situation can be resolved.

Signed-off-by: LUU Duc Canh <canh.d.luu@dektech.com.au>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 35 +++++++++++++++++++++++++++++++++++
 net/tipc/link.h |  3 +++
 net/tipc/node.c | 11 +++++++++++
 3 files changed, 49 insertions(+)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 26cc033ee167..4ed650ce6e61 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -410,6 +410,11 @@ char *tipc_link_name(struct tipc_link *l)
 	return l->name;
 }
 
+u32 tipc_link_state(struct tipc_link *l)
+{
+	return l->state;
+}
+
 /**
  * tipc_link_create - create a new link
  * @n: pointer to associated node
@@ -1385,6 +1390,36 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
 	__skb_queue_tail(xmitq, skb);
 }
 
+void tipc_link_create_dummy_tnl_msg(struct tipc_link *l,
+				    struct sk_buff_head *xmitq)
+{
+	u32 onode = tipc_own_addr(l->net);
+	struct tipc_msg *hdr, *ihdr;
+	struct sk_buff_head tnlq;
+	struct sk_buff *skb;
+	u32 dnode = l->addr;
+
+	skb_queue_head_init(&tnlq);
+	skb = tipc_msg_create(TUNNEL_PROTOCOL, FAILOVER_MSG,
+			      INT_H_SIZE, BASIC_H_SIZE,
+			      dnode, onode, 0, 0, 0);
+	if (!skb) {
+		pr_warn("%sunable to create tunnel packet\n", link_co_err);
+		return;
+	}
+
+	hdr = buf_msg(skb);
+	msg_set_msgcnt(hdr, 1);
+	msg_set_bearer_id(hdr, l->peer_bearer_id);
+
+	ihdr = (struct tipc_msg *)msg_data(hdr);
+	tipc_msg_init(onode, ihdr, TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG,
+		      BASIC_H_SIZE, dnode);
+	msg_set_errcode(ihdr, TIPC_ERR_NO_PORT);
+	__skb_queue_tail(&tnlq, skb);
+	tipc_link_xmit(l, &tnlq, xmitq);
+}
+
 /* tipc_link_tnl_prepare(): prepare and return a list of tunnel packets
  * with contents of the link's transmit and backlog queues.
  */
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 7bc494a33fdf..90488c538a4e 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -88,6 +88,8 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
 			 struct tipc_link **link);
 void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
 			   int mtyp, struct sk_buff_head *xmitq);
+void tipc_link_create_dummy_tnl_msg(struct tipc_link *tnl,
+				    struct sk_buff_head *xmitq);
 void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq);
 int tipc_link_fsm_evt(struct tipc_link *l, int evt);
 bool tipc_link_is_up(struct tipc_link *l);
@@ -107,6 +109,7 @@ u16 tipc_link_rcv_nxt(struct tipc_link *l);
 u16 tipc_link_acked(struct tipc_link *l);
 u32 tipc_link_id(struct tipc_link *l);
 char *tipc_link_name(struct tipc_link *l);
+u32 tipc_link_state(struct tipc_link *l);
 char tipc_link_plane(struct tipc_link *l);
 int tipc_link_prio(struct tipc_link *l);
 int tipc_link_window(struct tipc_link *l);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 68014f1b6976..b0ee25f1f2e6 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -111,6 +111,7 @@ struct tipc_node {
 	int action_flags;
 	struct list_head list;
 	int state;
+	bool failover_sent;
 	u16 sync_point;
 	int link_cnt;
 	u16 working_links;
@@ -680,6 +681,7 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
 		*slot0 = bearer_id;
 		*slot1 = bearer_id;
 		tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT);
+		n->failover_sent = false;
 		n->action_flags |= TIPC_NOTIFY_NODE_UP;
 		tipc_link_set_active(nl, true);
 		tipc_bcast_add_peer(n->net, nl, xmitq);
@@ -1615,6 +1617,15 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
 			tipc_skb_queue_splice_tail_init(tipc_link_inputq(pl),
 							tipc_link_inputq(l));
 		}
+		/* If parallel link was already down, and this happened before
+		 * the tunnel link came up, FAILOVER was never sent. Ensure that
+		 * FAILOVER is sent to get peer out of NODE_FAILINGOVER state.
+		 */
+		if (n->state != NODE_FAILINGOVER && !n->failover_sent) {
+			tipc_link_create_dummy_tnl_msg(l, xmitq);
+			n->failover_sent = true;
+		}
+
 		/* If pkts arrive out of order, use lowest calculated syncpt */
 		if (less(syncpt, n->sync_point))
 			n->sync_point = syncpt;

From c333fa0c4f220f8f7ea5acd6b0ebf3bf13fd684d Mon Sep 17 00:00:00 2001
From: Shahed Shaikh <shahed.shaikh@cavium.com>
Date: Wed, 26 Sep 2018 12:41:10 -0700
Subject: [PATCH 351/499] qlcnic: fix Tx descriptor corruption on 82xx devices

In regular NIC transmission flow, driver always configures MAC using
Tx queue zero descriptor as a part of MAC learning flow.
But with multi Tx queue supported NIC, regular transmission can occur on
any non-zero Tx queue and from that context it uses
Tx queue zero descriptor to configure MAC, at the same time TX queue
zero could be used by another CPU for regular transmission
which could lead to Tx queue zero descriptor corruption and cause FW
abort.

This patch fixes this in such a way that driver always configures
learned MAC address from the same Tx queue which is used for
regular transmission.

Fixes: 7e2cf4feba05 ("qlcnic: change driver hardware interface mechanism")
Signed-off-by: Shahed Shaikh <shahed.shaikh@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic.h         |  8 +++++---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c |  3 ++-
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h |  3 ++-
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h      |  3 ++-
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c      | 12 ++++++------
 5 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index 81312924df14..0c443ea98479 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -1800,7 +1800,8 @@ struct qlcnic_hardware_ops {
 	int (*config_loopback) (struct qlcnic_adapter *, u8);
 	int (*clear_loopback) (struct qlcnic_adapter *, u8);
 	int (*config_promisc_mode) (struct qlcnic_adapter *, u32);
-	void (*change_l2_filter) (struct qlcnic_adapter *, u64 *, u16);
+	void (*change_l2_filter)(struct qlcnic_adapter *adapter, u64 *addr,
+				 u16 vlan, struct qlcnic_host_tx_ring *tx_ring);
 	int (*get_board_info) (struct qlcnic_adapter *);
 	void (*set_mac_filter_count) (struct qlcnic_adapter *);
 	void (*free_mac_list) (struct qlcnic_adapter *);
@@ -2064,9 +2065,10 @@ static inline int qlcnic_nic_set_promisc(struct qlcnic_adapter *adapter,
 }
 
 static inline void qlcnic_change_filter(struct qlcnic_adapter *adapter,
-					u64 *addr, u16 id)
+					u64 *addr, u16 vlan,
+					struct qlcnic_host_tx_ring *tx_ring)
 {
-	adapter->ahw->hw_ops->change_l2_filter(adapter, addr, id);
+	adapter->ahw->hw_ops->change_l2_filter(adapter, addr, vlan, tx_ring);
 }
 
 static inline int qlcnic_get_board_info(struct qlcnic_adapter *adapter)
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index 569d54ededec..a79d84f99102 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
@@ -2135,7 +2135,8 @@ int qlcnic_83xx_sre_macaddr_change(struct qlcnic_adapter *adapter, u8 *addr,
 }
 
 void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *adapter, u64 *addr,
-				  u16 vlan_id)
+				  u16 vlan_id,
+				  struct qlcnic_host_tx_ring *tx_ring)
 {
 	u8 mac[ETH_ALEN];
 	memcpy(&mac, addr, ETH_ALEN);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
index b75a81246856..73fe2f64491d 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
@@ -550,7 +550,8 @@ int qlcnic_83xx_wrt_reg_indirect(struct qlcnic_adapter *, ulong, u32);
 int qlcnic_83xx_nic_set_promisc(struct qlcnic_adapter *, u32);
 int qlcnic_83xx_config_hw_lro(struct qlcnic_adapter *, int);
 int qlcnic_83xx_config_rss(struct qlcnic_adapter *, int);
-void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *, u64 *, u16);
+void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *adapter, u64 *addr,
+				  u16 vlan, struct qlcnic_host_tx_ring *ring);
 int qlcnic_83xx_get_pci_info(struct qlcnic_adapter *, struct qlcnic_pci_info *);
 int qlcnic_83xx_set_nic_info(struct qlcnic_adapter *, struct qlcnic_info *);
 void qlcnic_83xx_initialize_nic(struct qlcnic_adapter *, int);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h
index 4bb33af8e2b3..56a3bd9e37dc 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h
@@ -173,7 +173,8 @@ int qlcnic_82xx_napi_add(struct qlcnic_adapter *adapter,
 			 struct net_device *netdev);
 void qlcnic_82xx_get_beacon_state(struct qlcnic_adapter *);
 void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter,
-			       u64 *uaddr, u16 vlan_id);
+			       u64 *uaddr, u16 vlan_id,
+			       struct qlcnic_host_tx_ring *tx_ring);
 int qlcnic_82xx_config_intr_coalesce(struct qlcnic_adapter *,
 				     struct ethtool_coalesce *);
 int qlcnic_82xx_set_rx_coalesce(struct qlcnic_adapter *);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index 84dd83031a1b..9647578cbe6a 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
@@ -268,13 +268,12 @@ static void qlcnic_add_lb_filter(struct qlcnic_adapter *adapter,
 }
 
 void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, u64 *uaddr,
-			       u16 vlan_id)
+			       u16 vlan_id, struct qlcnic_host_tx_ring *tx_ring)
 {
 	struct cmd_desc_type0 *hwdesc;
 	struct qlcnic_nic_req *req;
 	struct qlcnic_mac_req *mac_req;
 	struct qlcnic_vlan_req *vlan_req;
-	struct qlcnic_host_tx_ring *tx_ring = adapter->tx_ring;
 	u32 producer;
 	u64 word;
 
@@ -301,7 +300,8 @@ void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, u64 *uaddr,
 
 static void qlcnic_send_filter(struct qlcnic_adapter *adapter,
 			       struct cmd_desc_type0 *first_desc,
-			       struct sk_buff *skb)
+			       struct sk_buff *skb,
+			       struct qlcnic_host_tx_ring *tx_ring)
 {
 	struct vlan_ethhdr *vh = (struct vlan_ethhdr *)(skb->data);
 	struct ethhdr *phdr = (struct ethhdr *)(skb->data);
@@ -335,7 +335,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter,
 		    tmp_fil->vlan_id == vlan_id) {
 			if (jiffies > (QLCNIC_READD_AGE * HZ + tmp_fil->ftime))
 				qlcnic_change_filter(adapter, &src_addr,
-						     vlan_id);
+						     vlan_id, tx_ring);
 			tmp_fil->ftime = jiffies;
 			return;
 		}
@@ -350,7 +350,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter,
 	if (!fil)
 		return;
 
-	qlcnic_change_filter(adapter, &src_addr, vlan_id);
+	qlcnic_change_filter(adapter, &src_addr, vlan_id, tx_ring);
 	fil->ftime = jiffies;
 	fil->vlan_id = vlan_id;
 	memcpy(fil->faddr, &src_addr, ETH_ALEN);
@@ -766,7 +766,7 @@ netdev_tx_t qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 	}
 
 	if (adapter->drv_mac_learn)
-		qlcnic_send_filter(adapter, first_desc, skb);
+		qlcnic_send_filter(adapter, first_desc, skb, tx_ring);
 
 	tx_ring->tx_stats.tx_bytes += skb->len;
 	tx_ring->tx_stats.xmit_called++;

From ac8bd9e13be22a3d24bfc80972d4688e3e50e457 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 26 Sep 2018 22:12:39 +0200
Subject: [PATCH 352/499] r8169: Disable clk during suspend / resume

Disable the clk during suspend to save power. Note that tp->clk may be
NULL, the clk core functions handle this without problems.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Carlo Caione <carlo@endlessm.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index ab30aaeac6d3..d6b53f53909a 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -6869,8 +6869,10 @@ static int rtl8169_suspend(struct device *device)
 {
 	struct pci_dev *pdev = to_pci_dev(device);
 	struct net_device *dev = pci_get_drvdata(pdev);
+	struct rtl8169_private *tp = netdev_priv(dev);
 
 	rtl8169_net_suspend(dev);
+	clk_disable_unprepare(tp->clk);
 
 	return 0;
 }
@@ -6898,6 +6900,9 @@ static int rtl8169_resume(struct device *device)
 {
 	struct pci_dev *pdev = to_pci_dev(device);
 	struct net_device *dev = pci_get_drvdata(pdev);
+	struct rtl8169_private *tp = netdev_priv(dev);
+
+	clk_prepare_enable(tp->clk);
 
 	if (netif_running(dev))
 		__rtl8169_resume(dev);

From 43955a45dc0b4f3be7f0c3afc0e080ed59bb5280 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 26 Sep 2018 22:19:42 +0200
Subject: [PATCH 353/499] netlink: fix typo in nla_parse_nested() comment

Fix a simple typo: attribuets -> attributes

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 0c154f98e987..39e1d875d507 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -153,7 +153,7 @@
  *   nla_find()				find attribute in stream of attributes
  *   nla_find_nested()			find attribute in nested attributes
  *   nla_parse()			parse and validate stream of attrs
- *   nla_parse_nested()			parse nested attribuets
+ *   nla_parse_nested()			parse nested attributes
  *   nla_for_each_attr()		loop over all attributes
  *   nla_for_each_nested()		loop over the nested attributes
  *=========================================================================

From 03d179a840ce9e694db9d69bb643fdee04cfd28f Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Sun, 30 Sep 2018 13:50:05 +0200
Subject: [PATCH 354/499] MAINTAINERS: fix reference to moved drivers/{misc =>
 auxdisplay}/panel.c

Commit 51c1e9b554c9 ("auxdisplay: Move panel.c to drivers/auxdisplay folder")
moved the file, but the MAINTAINERS reference was not updated.

Link: https://lore.kernel.org/lkml/20180928220131.31075-1-joe@perches.com/
Reported-by: Joe Perches <joe@perches.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 02a39617ec82..e6fde62a1d65 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10949,7 +10949,7 @@ M:	Willy Tarreau <willy@haproxy.com>
 M:	Ksenija Stanojevic <ksenija.stanojevic@gmail.com>
 S:	Odd Fixes
 F:	Documentation/auxdisplay/lcd-panel-cgram.txt
-F:	drivers/misc/panel.c
+F:	drivers/auxdisplay/panel.c
 
 PARALLEL PORT SUBSYSTEM
 M:	Sudip Mukherjee <sudipm.mukherjee@gmail.com>

From 17b57b1883c1285f3d0dc2266e8f79286a7bef38 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 30 Sep 2018 07:15:35 -0700
Subject: [PATCH 355/499] Linux 4.19-rc6

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 0c90c4354979..6c3da3e10f07 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 19
 SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION = -rc6
 NAME = Merciless Moray
 
 # *DOCUMENTATION*

From 41e270f6898e7502be9fd6920ee0a108ca259d36 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Mon, 17 Sep 2018 04:14:54 +0000
Subject: [PATCH 356/499] Drivers: hv: vmbus: Use get/put_cpu() in
 vmbus_connect()

With CONFIG_DEBUG_PREEMPT=y, I always see this warning:
BUG: using smp_processor_id() in preemptible [00000000]

Fix the false warning by using get/put_cpu().

Here vmbus_connect() sends a message to the host and waits for the
host's response. The host will deliver the response message and an
interrupt on CPU msg->target_vcpu, and later the interrupt handler
will wake up vmbus_connect(). vmbus_connect() doesn't really have
to run on the same cpu as CPU msg->target_vcpu, so it's safe to
call put_cpu() just here.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: stable@vger.kernel.org
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/connection.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index ced041899456..f4d08c8ac7f8 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -76,6 +76,7 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
 					__u32 version)
 {
 	int ret = 0;
+	unsigned int cur_cpu;
 	struct vmbus_channel_initiate_contact *msg;
 	unsigned long flags;
 
@@ -118,9 +119,10 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
 	 * the CPU attempting to connect may not be CPU 0.
 	 */
 	if (version >= VERSION_WIN8_1) {
-		msg->target_vcpu =
-			hv_cpu_number_to_vp_number(smp_processor_id());
-		vmbus_connection.connect_cpu = smp_processor_id();
+		cur_cpu = get_cpu();
+		msg->target_vcpu = hv_cpu_number_to_vp_number(cur_cpu);
+		vmbus_connection.connect_cpu = cur_cpu;
+		put_cpu();
 	} else {
 		msg->target_vcpu = 0;
 		vmbus_connection.connect_cpu = 0;

From 34bd283396af1f05c43fa64d51378d38affb1cf1 Mon Sep 17 00:00:00 2001
From: Alan Tull <atull@kernel.org>
Date: Wed, 12 Sep 2018 09:43:23 -0500
Subject: [PATCH 357/499] fpga: do not access region struct after
 fpga_region_unregister

A couple drivers were accessing the region struct after it had been
freed.  Save off the pointer to the mgr before the region struct gets
freed.

Signed-off-by: Alan Tull <atull@kernel.org>
Acked-by: Moritz Fischer <mdf@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/fpga/dfl-fme-region.c | 4 +++-
 drivers/fpga/of-fpga-region.c | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/fpga/dfl-fme-region.c b/drivers/fpga/dfl-fme-region.c
index 0b7e19c27c6d..51a5ac2293a7 100644
--- a/drivers/fpga/dfl-fme-region.c
+++ b/drivers/fpga/dfl-fme-region.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/fpga/fpga-mgr.h>
 #include <linux/fpga/fpga-region.h>
 
 #include "dfl-fme-pr.h"
@@ -66,9 +67,10 @@ static int fme_region_probe(struct platform_device *pdev)
 static int fme_region_remove(struct platform_device *pdev)
 {
 	struct fpga_region *region = dev_get_drvdata(&pdev->dev);
+	struct fpga_manager *mgr = region->mgr;
 
 	fpga_region_unregister(region);
-	fpga_mgr_put(region->mgr);
+	fpga_mgr_put(mgr);
 
 	return 0;
 }
diff --git a/drivers/fpga/of-fpga-region.c b/drivers/fpga/of-fpga-region.c
index 35fabb8083fb..052a1342ab7e 100644
--- a/drivers/fpga/of-fpga-region.c
+++ b/drivers/fpga/of-fpga-region.c
@@ -437,9 +437,10 @@ static int of_fpga_region_probe(struct platform_device *pdev)
 static int of_fpga_region_remove(struct platform_device *pdev)
 {
 	struct fpga_region *region = platform_get_drvdata(pdev);
+	struct fpga_manager *mgr = region->mgr;
 
 	fpga_region_unregister(region);
-	fpga_mgr_put(region->mgr);
+	fpga_mgr_put(mgr);
 
 	return 0;
 }

From c2d68afba86d1ff01e7300c68bc16a9234dcd8e9 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Mon, 17 Sep 2018 04:14:55 +0000
Subject: [PATCH 358/499] tools: hv: fcopy: set 'error' in case an unknown
 operation was requested

'error' variable is left uninitialized in case we see an unknown operation.
As we don't immediately return and proceed to pwrite() we need to set it
to something, HV_E_FAIL sounds good enough.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/hv/hv_fcopy_daemon.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/hv/hv_fcopy_daemon.c b/tools/hv/hv_fcopy_daemon.c
index d78aed86af09..8ff8cb1a11f4 100644
--- a/tools/hv/hv_fcopy_daemon.c
+++ b/tools/hv/hv_fcopy_daemon.c
@@ -234,6 +234,7 @@ int main(int argc, char *argv[])
 			break;
 
 		default:
+			error = HV_E_FAIL;
 			syslog(LOG_ERR, "Unknown operation: %d",
 				buffer.hdr.operation);
 

From b4d9a0e5ca139640a45bcda87bc718f85d8008af Mon Sep 17 00:00:00 2001
From: Alan Tull <atull@kernel.org>
Date: Wed, 12 Sep 2018 09:43:24 -0500
Subject: [PATCH 359/499] fpga: bridge: fix obvious function documentation
 error

fpga_bridge_dev_match() returns a FPGA bridge struct, not a
FPGA manager struct so s/manager/bridge/.

Signed-off-by: Alan Tull <atull@kernel.org>
Acked-by: Moritz Fischer <mdf@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/fpga/fpga-bridge.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/fpga/fpga-bridge.c b/drivers/fpga/fpga-bridge.c
index 24b8f98b73ec..c983dac97501 100644
--- a/drivers/fpga/fpga-bridge.c
+++ b/drivers/fpga/fpga-bridge.c
@@ -125,7 +125,7 @@ static int fpga_bridge_dev_match(struct device *dev, const void *data)
  *
  * Given a device, get an exclusive reference to a fpga bridge.
  *
- * Return: fpga manager struct or IS_ERR() condition containing error code.
+ * Return: fpga bridge struct or IS_ERR() condition containing error code.
  */
 struct fpga_bridge *fpga_bridge_get(struct device *dev,
 				    struct fpga_image_info *info)

From 492ecf6d6598e7c3b0b1372653ed21da50ddfb09 Mon Sep 17 00:00:00 2001
From: Alan Tull <atull@kernel.org>
Date: Wed, 12 Sep 2018 09:43:25 -0500
Subject: [PATCH 360/499] docs: fpga: document fpga manager flags

Add flags #defines to kerneldoc documentation in a
useful place.

Signed-off-by: Alan Tull <atull@kernel.org>
Acked-by: Moritz Fischer <mdf@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/fpga/fpga-mgr.rst |  5 +++++
 include/linux/fpga/fpga-mgr.h              | 20 ++++++++++++++------
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/Documentation/driver-api/fpga/fpga-mgr.rst b/Documentation/driver-api/fpga/fpga-mgr.rst
index 4b3825da48d9..82b6dbbd31cd 100644
--- a/Documentation/driver-api/fpga/fpga-mgr.rst
+++ b/Documentation/driver-api/fpga/fpga-mgr.rst
@@ -184,6 +184,11 @@ API for implementing a new FPGA Manager driver
 API for programming an FPGA
 ---------------------------
 
+FPGA Manager flags
+
+.. kernel-doc:: include/linux/fpga/fpga-mgr.h
+   :doc: FPGA Manager flags
+
 .. kernel-doc:: include/linux/fpga/fpga-mgr.h
    :functions: fpga_image_info
 
diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h
index 8942e61f0028..8ab5df769923 100644
--- a/include/linux/fpga/fpga-mgr.h
+++ b/include/linux/fpga/fpga-mgr.h
@@ -53,12 +53,20 @@ enum fpga_mgr_states {
 	FPGA_MGR_STATE_OPERATING,
 };
 
-/*
- * FPGA Manager flags
- * FPGA_MGR_PARTIAL_RECONFIG: do partial reconfiguration if supported
- * FPGA_MGR_EXTERNAL_CONFIG: FPGA has been configured prior to Linux booting
- * FPGA_MGR_BITSTREAM_LSB_FIRST: SPI bitstream bit order is LSB first
- * FPGA_MGR_COMPRESSED_BITSTREAM: FPGA bitstream is compressed
+/**
+ * DOC: FPGA Manager flags
+ *
+ * Flags used in the &fpga_image_info->flags field
+ *
+ * %FPGA_MGR_PARTIAL_RECONFIG: do partial reconfiguration if supported
+ *
+ * %FPGA_MGR_EXTERNAL_CONFIG: FPGA has been configured prior to Linux booting
+ *
+ * %FPGA_MGR_ENCRYPTED_BITSTREAM: indicates bitstream is encrypted
+ *
+ * %FPGA_MGR_BITSTREAM_LSB_FIRST: SPI bitstream bit order is LSB first
+ *
+ * %FPGA_MGR_COMPRESSED_BITSTREAM: FPGA bitstream is compressed
  */
 #define FPGA_MGR_PARTIAL_RECONFIG	BIT(0)
 #define FPGA_MGR_EXTERNAL_CONFIG	BIT(1)

From 7012040576c6ae25a47035659ee48673612c2c27 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Wed, 19 Sep 2018 18:09:38 -0700
Subject: [PATCH 361/499] firmware: Always initialize the fw_priv list object

When freeing the fw_priv the item is taken off the list. This causes an
oops in the FW_OPT_NOCACHE case as the list object is not initialized.

Make sure to initialize the list object regardless of this flag.

Fixes: 422b3db2a503 ("firmware: Fix security issue with request_firmware_into_buf()")
Cc: stable@vger.kernel.org
Cc: Rishabh Bhatnagar <rishabhb@codeaurora.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/firmware_loader/main.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c
index b3c0498ee433..8e9213b36e31 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -226,8 +226,11 @@ static int alloc_lookup_fw_priv(const char *fw_name,
 	}
 
 	tmp = __allocate_fw_priv(fw_name, fwc, dbuf, size);
-	if (tmp && !(opt_flags & FW_OPT_NOCACHE))
-		list_add(&tmp->list, &fwc->head);
+	if (tmp) {
+		INIT_LIST_HEAD(&tmp->list);
+		if (!(opt_flags & FW_OPT_NOCACHE))
+			list_add(&tmp->list, &fwc->head);
+	}
 	spin_unlock(&fwc->lock);
 
 	*fw_priv = tmp;

From bac6f6cda206ad7cbe0c73c35e494377ce9c4749 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 28 Sep 2018 15:17:50 -0700
Subject: [PATCH 362/499] pstore/ram: Fix failure-path memory leak in
 ramoops_init

As reported by nixiaoming, with some minor clarifications:

1) memory leak in ramoops_register_dummy():
   dummy_data = kzalloc(sizeof(*dummy_data), GFP_KERNEL);
   but no kfree() if platform_device_register_data() fails.

2) memory leak in ramoops_init():
   Missing platform_device_unregister(dummy) and kfree(dummy_data)
   if platform_driver_register(&ramoops_driver) fails.

I've clarified the purpose of ramoops_register_dummy(), and added a
common cleanup routine for all three failure paths to call.

Reported-by: nixiaoming <nixiaoming@huawei.com>
Cc: stable@vger.kernel.org
Cc: Anton Vorontsov <anton@enomsg.org>
Cc: Colin Cross <ccross@android.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Joel Fernandes <joelaf@google.com>
Cc: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 fs/pstore/ram.c | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index bbd1e357c23d..f4fd2e72add4 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -898,8 +898,22 @@ static struct platform_driver ramoops_driver = {
 	},
 };
 
-static void ramoops_register_dummy(void)
+static inline void ramoops_unregister_dummy(void)
 {
+	platform_device_unregister(dummy);
+	dummy = NULL;
+
+	kfree(dummy_data);
+	dummy_data = NULL;
+}
+
+static void __init ramoops_register_dummy(void)
+{
+	/*
+	 * Prepare a dummy platform data structure to carry the module
+	 * parameters. If mem_size isn't set, then there are no module
+	 * parameters, and we can skip this.
+	 */
 	if (!mem_size)
 		return;
 
@@ -932,21 +946,28 @@ static void ramoops_register_dummy(void)
 	if (IS_ERR(dummy)) {
 		pr_info("could not create platform device: %ld\n",
 			PTR_ERR(dummy));
+		dummy = NULL;
+		ramoops_unregister_dummy();
 	}
 }
 
 static int __init ramoops_init(void)
 {
+	int ret;
+
 	ramoops_register_dummy();
-	return platform_driver_register(&ramoops_driver);
+	ret = platform_driver_register(&ramoops_driver);
+	if (ret != 0)
+		ramoops_unregister_dummy();
+
+	return ret;
 }
 late_initcall(ramoops_init);
 
 static void __exit ramoops_exit(void)
 {
 	platform_driver_unregister(&ramoops_driver);
-	platform_device_unregister(dummy);
-	kfree(dummy_data);
+	ramoops_unregister_dummy();
 }
 module_exit(ramoops_exit);
 

From e55ec4ddbef9897199c307dfb23167e3801fdaf5 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Mon, 1 Oct 2018 08:11:07 +1000
Subject: [PATCH 363/499] xfs: fix error handling in xfs_bmap_extents_to_btree

Commit 01239d77b9dd ("xfs: fix a null pointer dereference in
xfs_bmap_extents_to_btree") attempted to fix a null pointer
dreference when a fuzzing corruption of some kind was found.
This fix was flawed, resulting in assert failures like:

XFS: Assertion failed: ifp->if_broot == NULL, file: fs/xfs/libxfs/xfs_bmap.c, line: 715
.....
Call Trace:
  xfs_bmap_extents_to_btree+0x6b9/0x7b0
  __xfs_bunmapi+0xae7/0xf00
  ? xfs_log_reserve+0x1c8/0x290
  xfs_reflink_remap_extent+0x20b/0x620
  xfs_reflink_remap_blocks+0x7e/0x290
  xfs_reflink_remap_range+0x311/0x530
  vfs_dedupe_file_range_one+0xd7/0xe0
  vfs_dedupe_file_range+0x15b/0x1a0
  do_vfs_ioctl+0x267/0x6c0

The problem is that the error handling code now asserts that the
inode fork is not in btree format before the error handling code
undoes the modifications that put the fork back in extent format.
Fix this by moving the assert back to after the xfs_iroot_realloc()
call that returns the fork to extent format, and clean up the jump
labels to be meaningful.

Also, returning ENOSPC when xfs_btree_get_bufl() fails to
instantiate the buffer that was allocated (the actual fix in the
commit mentioned above) is incorrect. This is a fatal error - only
an invalid block address or a filesystem shutdown can result in
failing to get a buffer here.

Hence change this to EFSCORRUPTED so that the higher layer knows
this was a corruption related failure and should not treat it as an
ENOSPC error.  This should result in a shutdown (via cancelling a
dirty transaction) which is necessary as we do not attempt to clean
up the (invalid) block that we have already allocated.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 2760314fdf7f..a47670332326 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -673,7 +673,8 @@ xfs_bmap_extents_to_btree(
 	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
 
 	/*
-	 * Make space in the inode incore.
+	 * Make space in the inode incore. This needs to be undone if we fail
+	 * to expand the root.
 	 */
 	xfs_iroot_realloc(ip, 1, whichfork);
 	ifp->if_flags |= XFS_IFBROOT;
@@ -711,16 +712,15 @@ xfs_bmap_extents_to_btree(
 	args.minlen = args.maxlen = args.prod = 1;
 	args.wasdel = wasdel;
 	*logflagsp = 0;
-	if ((error = xfs_alloc_vextent(&args))) {
-		ASSERT(ifp->if_broot == NULL);
-		goto err1;
-	}
+	error = xfs_alloc_vextent(&args);
+	if (error)
+		goto out_root_realloc;
 
 	if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
-		ASSERT(ifp->if_broot == NULL);
 		error = -ENOSPC;
-		goto err1;
+		goto out_root_realloc;
 	}
+
 	/*
 	 * Allocation can't fail, the space was reserved.
 	 */
@@ -732,9 +732,10 @@ xfs_bmap_extents_to_btree(
 	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
 	abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
 	if (!abp) {
-		error = -ENOSPC;
-		goto err2;
+		error = -EFSCORRUPTED;
+		goto out_unreserve_dquot;
 	}
+
 	/*
 	 * Fill in the child block.
 	 */
@@ -775,11 +776,12 @@ xfs_bmap_extents_to_btree(
 	*logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
 	return 0;
 
-err2:
+out_unreserve_dquot:
 	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
-err1:
+out_root_realloc:
 	xfs_iroot_realloc(ip, -1, whichfork);
 	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
+	ASSERT(ifp->if_broot == NULL);
 	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 
 	return error;

From 08d9db00fe0e300d6df976e6c294f974988226dd Mon Sep 17 00:00:00 2001
From: Edgar Cherkasov <echerkasov@dev.rtsoft.ru>
Date: Thu, 27 Sep 2018 11:56:03 +0300
Subject: [PATCH 364/499] i2c: i2c-scmi: fix for i2c_smbus_write_block_data

The i2c-scmi driver crashes when the SMBus Write Block transaction is
executed:

WARNING: CPU: 9 PID: 2194 at mm/page_alloc.c:3931 __alloc_pages_slowpath+0x9db/0xec0
 Call Trace:
  ? get_page_from_freelist+0x49d/0x11f0
  ? alloc_pages_current+0x6a/0xe0
  ? new_slab+0x499/0x690
  __alloc_pages_nodemask+0x265/0x280
  alloc_pages_current+0x6a/0xe0
  kmalloc_order+0x18/0x40
  kmalloc_order_trace+0x24/0xb0
  ? acpi_ut_allocate_object_desc_dbg+0x62/0x10c
  __kmalloc+0x203/0x220
  acpi_os_allocate_zeroed+0x34/0x36
  acpi_ut_copy_eobject_to_iobject+0x266/0x31e
  acpi_evaluate_object+0x166/0x3b2
  acpi_smbus_cmi_access+0x144/0x530 [i2c_scmi]
  i2c_smbus_xfer+0xda/0x370
  i2cdev_ioctl_smbus+0x1bd/0x270
  i2cdev_ioctl+0xaa/0x250
  do_vfs_ioctl+0xa4/0x600
  SyS_ioctl+0x79/0x90
  do_syscall_64+0x73/0x130
  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
ACPI Error: Evaluating _SBW: 4 (20170831/smbus_cmi-185)

This problem occurs because the length of ACPI Buffer object is not
defined/initialized in the code before a corresponding ACPI method is
called. The obvious patch below fixes this issue.

Signed-off-by: Edgar Cherkasov <echerkasov@dev.rtsoft.ru>
Acked-by: Viktor Krasnov <vkrasnov@dev.rtsoft.ru>
Acked-by: Michael Brunner <Michael.Brunner@kontron.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-scmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/i2c/busses/i2c-scmi.c b/drivers/i2c/busses/i2c-scmi.c
index a01389b85f13..7e9a2bbf5ddc 100644
--- a/drivers/i2c/busses/i2c-scmi.c
+++ b/drivers/i2c/busses/i2c-scmi.c
@@ -152,6 +152,7 @@ acpi_smbus_cmi_access(struct i2c_adapter *adap, u16 addr, unsigned short flags,
 			mt_params[3].type = ACPI_TYPE_INTEGER;
 			mt_params[3].integer.value = len;
 			mt_params[4].type = ACPI_TYPE_BUFFER;
+			mt_params[4].buffer.length = len;
 			mt_params[4].buffer.pointer = data->block + 1;
 		}
 		break;

From 1feda5eb77fc279a4c4eb93b9a0ac3bf7254b57a Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Fri, 28 Sep 2018 18:09:23 +0200
Subject: [PATCH 365/499] drm/exynos: Use selected dma_dev default iommu domain
 instead of a fake one

Instead of allocating a fake IOMMU domain for all Exynos DRM components,
simply reuse the default IOMMU domain of the already selected DMA device.
This allows some design changes in IOMMU framework without breaking IOMMU
support in Exynos DRM.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_iommu.h | 34 ++++-------------------
 1 file changed, 6 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_iommu.h b/drivers/gpu/drm/exynos/exynos_drm_iommu.h
index 87f6b5672e11..797d9ee5f15a 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_iommu.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_iommu.h
@@ -55,37 +55,12 @@ static inline void __exynos_iommu_detach(struct exynos_drm_private *priv,
 static inline int __exynos_iommu_create_mapping(struct exynos_drm_private *priv,
 					unsigned long start, unsigned long size)
 {
-	struct iommu_domain *domain;
-	int ret;
-
-	domain = iommu_domain_alloc(priv->dma_dev->bus);
-	if (!domain)
-		return -ENOMEM;
-
-	ret = iommu_get_dma_cookie(domain);
-	if (ret)
-		goto free_domain;
-
-	ret = iommu_dma_init_domain(domain, start, size, NULL);
-	if (ret)
-		goto put_cookie;
-
-	priv->mapping = domain;
+	priv->mapping = iommu_get_domain_for_dev(priv->dma_dev);
 	return 0;
-
-put_cookie:
-	iommu_put_dma_cookie(domain);
-free_domain:
-	iommu_domain_free(domain);
-	return ret;
 }
 
 static inline void __exynos_iommu_release_mapping(struct exynos_drm_private *priv)
 {
-	struct iommu_domain *domain = priv->mapping;
-
-	iommu_put_dma_cookie(domain);
-	iommu_domain_free(domain);
 	priv->mapping = NULL;
 }
 
@@ -94,7 +69,9 @@ static inline int __exynos_iommu_attach(struct exynos_drm_private *priv,
 {
 	struct iommu_domain *domain = priv->mapping;
 
-	return iommu_attach_device(domain, dev);
+	if (dev != priv->dma_dev)
+		return iommu_attach_device(domain, dev);
+	return 0;
 }
 
 static inline void __exynos_iommu_detach(struct exynos_drm_private *priv,
@@ -102,7 +79,8 @@ static inline void __exynos_iommu_detach(struct exynos_drm_private *priv,
 {
 	struct iommu_domain *domain = priv->mapping;
 
-	iommu_detach_device(domain, dev);
+	if (dev != priv->dma_dev)
+		iommu_detach_device(domain, dev);
 }
 #else
 #error Unsupported architecture and IOMMU/DMA-mapping glue code

From 848e616e66d4592fe9afc40743d3504deb7632b4 Mon Sep 17 00:00:00 2001
From: Stefan Seyfried <seife+kernel@b1-systems.com>
Date: Sun, 30 Sep 2018 12:53:00 +0200
Subject: [PATCH 366/499] cfg80211: fix wext-compat memory leak

cfg80211_wext_giwrate and sinfo.pertid might allocate sinfo.pertid via
rdev_get_station(), but never release it. Fix that.

Fixes: 8689c051a201 ("cfg80211: dynamically allocate per-tid stats for station info")
Signed-off-by: Stefan Seyfried <seife+kernel@b1-systems.com>
[johannes: fix error path, use cfg80211_sinfo_release_content(), add Fixes]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/wext-compat.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 167f7025ac98..06943d9c9835 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1278,12 +1278,16 @@ static int cfg80211_wext_giwrate(struct net_device *dev,
 	if (err)
 		return err;
 
-	if (!(sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE)))
-		return -EOPNOTSUPP;
+	if (!(sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE))) {
+		err = -EOPNOTSUPP;
+		goto free;
+	}
 
 	rate->value = 100000 * cfg80211_calculate_bitrate(&sinfo.txrate);
 
-	return 0;
+free:
+	cfg80211_sinfo_release_content(&sinfo);
+	return err;
 }
 
 /* Get wireless statistics.  Called by /proc/net/wireless and by SIOCGIWSTATS */
@@ -1293,7 +1297,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
 	/* we are under RTNL - globally locked - so can use static structs */
 	static struct iw_statistics wstats;
-	static struct station_info sinfo;
+	static struct station_info sinfo = {};
 	u8 bssid[ETH_ALEN];
 
 	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION)
@@ -1352,6 +1356,8 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
 	if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED))
 		wstats.discard.retries = sinfo.tx_failed;
 
+	cfg80211_sinfo_release_content(&sinfo);
+
 	return &wstats;
 }
 

From 211710ca74adf790b46ab3867fcce8047b573cd1 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sat, 29 Sep 2018 16:01:58 +0200
Subject: [PATCH 367/499] mac80211: fix setting IEEE80211_KEY_FLAG_RX_MGMT for
 AP mode keys

key->sta is only valid after ieee80211_key_link, which is called later
in this function. Because of that, the IEEE80211_KEY_FLAG_RX_MGMT is
never set when management frame protection is enabled.

Fixes: e548c49e6dc6b ("mac80211: add key flag for management keys")
Cc: stable@vger.kernel.org
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index d25da0e66da1..5d22eda8a6b1 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -427,7 +427,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_AP_VLAN:
 		/* Keys without a station are used for TX only */
-		if (key->sta && test_sta_flag(key->sta, WLAN_STA_MFP))
+		if (sta && test_sta_flag(sta, WLAN_STA_MFP))
 			key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT;
 		break;
 	case NL80211_IFTYPE_ADHOC:

From 1db58529454742f67ebd96e3588315e880b72837 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Thu, 27 Sep 2018 17:05:04 -0600
Subject: [PATCH 368/499] cfg80211: fix use-after-free in reg_process_hint()

reg_process_hint_country_ie() can free regulatory_request and return
REG_REQ_ALREADY_SET. We shouldn't use regulatory_request after it's
called. KASAN error was observed when this happens.

BUG: KASAN: use-after-free in reg_process_hint+0x839/0x8aa [cfg80211]
Read of size 4 at addr ffff8800c430d434 by task kworker/1:3/89
<snipped>
Workqueue: events reg_todo [cfg80211]
Call Trace:
 dump_stack+0xc1/0x10c
 ? _atomic_dec_and_lock+0x1ad/0x1ad
 ? _raw_spin_lock_irqsave+0xa0/0xd2
 print_address_description+0x86/0x26f
 ? reg_process_hint+0x839/0x8aa [cfg80211]
 kasan_report+0x241/0x29b
 reg_process_hint+0x839/0x8aa [cfg80211]
 reg_todo+0x204/0x5b9 [cfg80211]
 process_one_work+0x55f/0x8d0
 ? worker_detach_from_pool+0x1b5/0x1b5
 ? _raw_spin_unlock_irq+0x65/0xdd
 ? _raw_spin_unlock_irqrestore+0xf3/0xf3
 worker_thread+0x5dd/0x841
 ? kthread_parkme+0x1d/0x1d
 kthread+0x270/0x285
 ? pr_cont_work+0xe3/0xe3
 ? rcu_read_unlock_sched_notrace+0xca/0xca
 ret_from_fork+0x22/0x40

Allocated by task 2718:
 set_track+0x63/0xfa
 __kmalloc+0x119/0x1ac
 regulatory_hint_country_ie+0x38/0x329 [cfg80211]
 __cfg80211_connect_result+0x854/0xadd [cfg80211]
 cfg80211_rx_assoc_resp+0x3bc/0x4f0 [cfg80211]
smsc95xx v1.0.6
 ieee80211_sta_rx_queued_mgmt+0x1803/0x7ed5 [mac80211]
 ieee80211_iface_work+0x411/0x696 [mac80211]
 process_one_work+0x55f/0x8d0
 worker_thread+0x5dd/0x841
 kthread+0x270/0x285
 ret_from_fork+0x22/0x40

Freed by task 89:
 set_track+0x63/0xfa
 kasan_slab_free+0x6a/0x87
 kfree+0xdc/0x470
 reg_process_hint+0x31e/0x8aa [cfg80211]
 reg_todo+0x204/0x5b9 [cfg80211]
 process_one_work+0x55f/0x8d0
 worker_thread+0x5dd/0x841
 kthread+0x270/0x285
 ret_from_fork+0x22/0x40
<snipped>

Signed-off-by: Yu Zhao <yuzhao@google.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/reg.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 765dedb12361..24cfa2776f50 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2661,11 +2661,12 @@ static void reg_process_hint(struct regulatory_request *reg_request)
 {
 	struct wiphy *wiphy = NULL;
 	enum reg_request_treatment treatment;
+	enum nl80211_reg_initiator initiator = reg_request->initiator;
 
 	if (reg_request->wiphy_idx != WIPHY_IDX_INVALID)
 		wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx);
 
-	switch (reg_request->initiator) {
+	switch (initiator) {
 	case NL80211_REGDOM_SET_BY_CORE:
 		treatment = reg_process_hint_core(reg_request);
 		break;
@@ -2683,7 +2684,7 @@ static void reg_process_hint(struct regulatory_request *reg_request)
 		treatment = reg_process_hint_country_ie(wiphy, reg_request);
 		break;
 	default:
-		WARN(1, "invalid initiator %d\n", reg_request->initiator);
+		WARN(1, "invalid initiator %d\n", initiator);
 		goto out_free;
 	}
 
@@ -2698,7 +2699,7 @@ static void reg_process_hint(struct regulatory_request *reg_request)
 	 */
 	if (treatment == REG_REQ_ALREADY_SET && wiphy &&
 	    wiphy->regulatory_flags & REGULATORY_STRICT_REG) {
-		wiphy_update_regulatory(wiphy, reg_request->initiator);
+		wiphy_update_regulatory(wiphy, initiator);
 		wiphy_all_share_dfs_chan_state(wiphy);
 		reg_check_channels();
 	}

From d26c25a9d19b5976b319af528886f89cf455692d Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Thu, 27 Sep 2018 16:53:21 +0100
Subject: [PATCH 369/499] arm64: KVM: Tighten guest core register access from
 userspace

We currently allow userspace to access the core register file
in about any possible way, including straddling multiple
registers and doing unaligned accesses.

This is not the expected use of the ABI, and nobody is actually
using it that way. Let's tighten it by explicitly checking
the size and alignment for each field of the register file.

Cc: <stable@vger.kernel.org>
Fixes: 2f4a07c5f9fe ("arm64: KVM: guest one-reg interface")
Reviewed-by: Christoffer Dall <christoffer.dall@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
[maz: rewrote Dave's initial patch to be more easily backported]
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kvm/guest.c | 45 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 07256b08226c..3088463bafc1 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -57,6 +57,45 @@ static u64 core_reg_offset_from_id(u64 id)
 	return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE);
 }
 
+static int validate_core_offset(const struct kvm_one_reg *reg)
+{
+	u64 off = core_reg_offset_from_id(reg->id);
+	int size;
+
+	switch (off) {
+	case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
+	     KVM_REG_ARM_CORE_REG(regs.regs[30]):
+	case KVM_REG_ARM_CORE_REG(regs.sp):
+	case KVM_REG_ARM_CORE_REG(regs.pc):
+	case KVM_REG_ARM_CORE_REG(regs.pstate):
+	case KVM_REG_ARM_CORE_REG(sp_el1):
+	case KVM_REG_ARM_CORE_REG(elr_el1):
+	case KVM_REG_ARM_CORE_REG(spsr[0]) ...
+	     KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
+		size = sizeof(__u64);
+		break;
+
+	case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
+	     KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
+		size = sizeof(__uint128_t);
+		break;
+
+	case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
+	case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
+		size = sizeof(__u32);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	if (KVM_REG_SIZE(reg->id) == size &&
+	    IS_ALIGNED(off, size / sizeof(__u32)))
+		return 0;
+
+	return -EINVAL;
+}
+
 static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 {
 	/*
@@ -76,6 +115,9 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	    (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
 		return -ENOENT;
 
+	if (validate_core_offset(reg))
+		return -EINVAL;
+
 	if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id)))
 		return -EFAULT;
 
@@ -98,6 +140,9 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	    (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
 		return -ENOENT;
 
+	if (validate_core_offset(reg))
+		return -EINVAL;
+
 	if (KVM_REG_SIZE(reg->id) > sizeof(tmp))
 		return -EINVAL;
 

From 2a3f93459d689d990b3ecfbe782fec89b97d3279 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 27 Sep 2018 16:53:22 +0100
Subject: [PATCH 370/499] arm64: KVM: Sanitize PSTATE.M when being set from
 userspace

Not all execution modes are valid for a guest, and some of them
depend on what the HW actually supports. Let's verify that what
userspace provides is compatible with both the VM settings and
the HW capabilities.

Cc: <stable@vger.kernel.org>
Fixes: 0d854a60b1d7 ("arm64: KVM: enable initialization of a 32bit vcpu")
Reviewed-by: Christoffer Dall <christoffer.dall@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kvm/guest.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 3088463bafc1..a6c9fbaeaefc 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -152,17 +152,25 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	}
 
 	if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) {
-		u32 mode = (*(u32 *)valp) & PSR_AA32_MODE_MASK;
+		u64 mode = (*(u64 *)valp) & PSR_AA32_MODE_MASK;
 		switch (mode) {
 		case PSR_AA32_MODE_USR:
+			if (!system_supports_32bit_el0())
+				return -EINVAL;
+			break;
 		case PSR_AA32_MODE_FIQ:
 		case PSR_AA32_MODE_IRQ:
 		case PSR_AA32_MODE_SVC:
 		case PSR_AA32_MODE_ABT:
 		case PSR_AA32_MODE_UND:
+			if (!vcpu_el1_is_32bit(vcpu))
+				return -EINVAL;
+			break;
 		case PSR_MODE_EL0t:
 		case PSR_MODE_EL1t:
 		case PSR_MODE_EL1h:
+			if (vcpu_el1_is_32bit(vcpu))
+				return -EINVAL;
 			break;
 		default:
 			err = -EINVAL;

From 5f76f6f5ff96587af5acd5930f7d9fea81e0d1a8 Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Fri, 14 Sep 2018 03:25:52 +0300
Subject: [PATCH 371/499] KVM: nVMX: Do not expose MPX VMX controls when guest
 MPX disabled

Before this commit, KVM exposes MPX VMX controls to L1 guest only based
on if KVM and host processor supports MPX virtualization.
However, these controls should be exposed to guest only in case guest
vCPU supports MPX.

Without this change, a L1 guest running with kernel which don't have
commit 691bd4340bef ("kvm: vmx: allow host to access guest
MSR_IA32_BNDCFGS") asserts in QEMU on the following:
	qemu-kvm: error: failed to set MSR 0xd90 to 0x0
	qemu-kvm: .../qemu-2.10.0/target/i386/kvm.c:1801 kvm_put_msrs:
	Assertion 'ret == cpu->kvm_msr_buf->nmsrs failed'
This is because L1 KVM kvm_init_msr_list() will see that
vmx_mpx_supported() (As it only checks MPX VMX controls support) and
therefore KVM_GET_MSR_INDEX_LIST IOCTL will include MSR_IA32_BNDCFGS.
However, later when L1 will attempt to set this MSR via KVM_SET_MSRS
IOCTL, it will fail because !guest_cpuid_has_mpx(vcpu).

Therefore, fix the issue by exposing MPX VMX controls to L1 guest only
when vCPU supports MPX.

Fixes: 36be0b9deb23 ("KVM: x86: Add nested virtualization support for MPX")

Reported-by: Eyal Moscovici <eyal.moscovici@oracle.com>
Reviewed-by: Nikita Leshchenko <nikita.leshchenko@oracle.com>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8b066480224b..d32e9c5e743b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3525,9 +3525,6 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
 		VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
 		VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
 
-	if (kvm_mpx_supported())
-		msrs->exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
-
 	/* We support free control of debug control saving. */
 	msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
 
@@ -3544,8 +3541,6 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
 		VM_ENTRY_LOAD_IA32_PAT;
 	msrs->entry_ctls_high |=
 		(VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
-	if (kvm_mpx_supported())
-		msrs->entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
 
 	/* We support free control of debug control loading. */
 	msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
@@ -11237,6 +11232,23 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
 #undef cr4_fixed1_update
 }
 
+static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (kvm_mpx_supported()) {
+		bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX);
+
+		if (mpx_enabled) {
+			vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
+			vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
+		} else {
+			vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS;
+			vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS;
+		}
+	}
+}
+
 static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -11253,8 +11265,10 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 		to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
 			~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
 
-	if (nested_vmx_allowed(vcpu))
+	if (nested_vmx_allowed(vcpu)) {
 		nested_vmx_cr_fixed1_bits_update(vcpu);
+		nested_vmx_entry_exit_ctls_update(vcpu);
+	}
 }
 
 static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)

From 503234b3fdcaa578395c07e393ea3e5d13958824 Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Fri, 14 Sep 2018 03:25:53 +0300
Subject: [PATCH 372/499] KVM: x86: Do not use kvm_x86_ops->mpx_supported()
 directly

Commit a87036add092 ("KVM: x86: disable MPX if host did not enable
MPX XSAVE features") introduced kvm_mpx_supported() to return true
iff MPX is enabled in the host.

However, that commit seems to have missed replacing some calls to
kvm_x86_ops->mpx_supported() to kvm_mpx_supported().

Complete original commit by replacing remaining calls to
kvm_mpx_supported().

Fixes: a87036add092 ("KVM: x86: disable MPX if host did not enable
MPX XSAVE features")

Suggested-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 2 +-
 arch/x86/kvm/x86.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d32e9c5e743b..5bb2acd8a52c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -12036,7 +12036,7 @@ static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
 	set_cr4_guest_host_mask(vmx);
 
-	if (vmx_mpx_supported())
+	if (kvm_mpx_supported())
 		vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
 
 	if (enable_vpid) {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index edbf00ec56b3..ca717737347e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4698,7 +4698,7 @@ static void kvm_init_msr_list(void)
 		 */
 		switch (msrs_to_save[i]) {
 		case MSR_IA32_BNDCFGS:
-			if (!kvm_x86_ops->mpx_supported())
+			if (!kvm_mpx_supported())
 				continue;
 			break;
 		case MSR_TSC_AUX:

From 62cf9bd8118c4009f02c477ef78c723f49e53e16 Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Fri, 14 Sep 2018 03:25:54 +0300
Subject: [PATCH 373/499] KVM: nVMX: Fix emulation of VM_ENTRY_LOAD_BNDCFGS

L2 IA32_BNDCFGS should be updated with vmcs12->guest_bndcfgs only
when VM_ENTRY_LOAD_BNDCFGS is specified in vmcs12->vm_entry_controls.

Otherwise, L2 IA32_BNDCFGS should be set to vmcs01->guest_bndcfgs which
is L1 IA32_BNDCFGS.

Reviewed-by: Nikita Leshchenko <nikita.leshchenko@oracle.com>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5bb2acd8a52c..b16b2664cfe1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -856,6 +856,7 @@ struct nested_vmx {
 
 	/* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
 	u64 vmcs01_debugctl;
+	u64 vmcs01_guest_bndcfgs;
 
 	u16 vpid02;
 	u16 last_vpid;
@@ -12036,8 +12037,13 @@ static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
 	set_cr4_guest_host_mask(vmx);
 
-	if (kvm_mpx_supported())
-		vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
+	if (kvm_mpx_supported()) {
+		if (vmx->nested.nested_run_pending &&
+			(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
+			vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
+		else
+			vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
+	}
 
 	if (enable_vpid) {
 		if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
@@ -12591,6 +12597,9 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
 
 	if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
 		vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
+	if (kvm_mpx_supported() &&
+		!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
+		vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
 
 	vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
 	vmx_segment_cache_clear(vmx);

From fe804cd6774938814242aa67bb7e7cbc934b1203 Mon Sep 17 00:00:00 2001
From: Stefan Raspl <stefan.raspl@de.ibm.com>
Date: Tue, 25 Sep 2018 09:19:36 +0200
Subject: [PATCH 374/499] tools/kvm_stat: cut down decimal places in update
 interval dialog

We currently display the default number of decimal places for floats in
_show_set_update_interval(), which is quite pointless. Cutting down to a
single decimal place.

Signed-off-by: Stefan Raspl <raspl@linux.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/kvm/kvm_stat/kvm_stat | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 439b8a27488d..195ba486640f 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -1325,7 +1325,7 @@ class Tui(object):
         msg = ''
         while True:
             self.screen.erase()
-            self.screen.addstr(0, 0, 'Set update interval (defaults to %fs).' %
+            self.screen.addstr(0, 0, 'Set update interval (defaults to %.1fs).' %
                                DELAY_DEFAULT, curses.A_BOLD)
             self.screen.addstr(4, 0, msg)
             self.screen.addstr(2, 0, 'Change delay from %.1fs to ' %

From daa07cbc9ae3da2d61b7ce900c0b9107d134f2c1 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Tue, 25 Sep 2018 13:20:00 -0700
Subject: [PATCH 375/499] KVM: x86: fix L1TF's MMIO GFN calculation

One defense against L1TF in KVM is to always set the upper five bits
of the *legal* physical address in the SPTEs for non-present and
reserved SPTEs, e.g. MMIO SPTEs.  In the MMIO case, the GFN of the
MMIO SPTE may overlap with the upper five bits that are being usurped
to defend against L1TF.  To preserve the GFN, the bits of the GFN that
overlap with the repurposed bits are shifted left into the reserved
bits, i.e. the GFN in the SPTE will be split into high and low parts.
When retrieving the GFN from the MMIO SPTE, e.g. to check for an MMIO
access, get_mmio_spte_gfn() unshifts the affected bits and restores
the original GFN for comparison.  Unfortunately, get_mmio_spte_gfn()
neglects to mask off the reserved bits in the SPTE that were used to
store the upper chunk of the GFN.  As a result, KVM fails to detect
MMIO accesses whose GPA overlaps the repurprosed bits, which in turn
causes guest panics and hangs.

Fix the bug by generating a mask that covers the lower chunk of the
GFN, i.e. the bits that aren't shifted by the L1TF mitigation.  The
alternative approach would be to explicitly zero the five reserved
bits that are used to store the upper chunk of the GFN, but that
requires additional run-time computation and makes an already-ugly
bit of code even more inscrutable.

I considered adding a WARN_ON_ONCE(low_phys_bits-1 <= PAGE_SHIFT) to
warn if GENMASK_ULL() generated a nonsensical value, but that seemed
silly since that would mean a system that supports VMX has less than
18 bits of physical address space...

Reported-by: Sakari Ailus <sakari.ailus@iki.fi>
Fixes: d9b47449c1a1 ("kvm: x86: Set highest physical address bits in non-present/reserved SPTEs")
Cc: Junaid Shahid <junaids@google.com>
Cc: Jim Mattson <jmattson@google.com>
Cc: stable@vger.kernel.org
Reviewed-by: Junaid Shahid <junaids@google.com>
Tested-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d7e9bce6ff61..51b953ad9d4e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -249,6 +249,17 @@ static u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
  */
 static const u64 shadow_nonpresent_or_rsvd_mask_len = 5;
 
+/*
+ * In some cases, we need to preserve the GFN of a non-present or reserved
+ * SPTE when we usurp the upper five bits of the physical address space to
+ * defend against L1TF, e.g. for MMIO SPTEs.  To preserve the GFN, we'll
+ * shift bits of the GFN that overlap with shadow_nonpresent_or_rsvd_mask
+ * left into the reserved bits, i.e. the GFN in the SPTE will be split into
+ * high and low parts.  This mask covers the lower bits of the GFN.
+ */
+static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
+
+
 static void mmu_spte_set(u64 *sptep, u64 spte);
 static union kvm_mmu_page_role
 kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu);
@@ -357,9 +368,7 @@ static bool is_mmio_spte(u64 spte)
 
 static gfn_t get_mmio_spte_gfn(u64 spte)
 {
-	u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask |
-		   shadow_nonpresent_or_rsvd_mask;
-	u64 gpa = spte & ~mask;
+	u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask;
 
 	gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len)
 	       & shadow_nonpresent_or_rsvd_mask;
@@ -423,6 +432,8 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
 
 static void kvm_mmu_reset_all_pte_masks(void)
 {
+	u8 low_phys_bits;
+
 	shadow_user_mask = 0;
 	shadow_accessed_mask = 0;
 	shadow_dirty_mask = 0;
@@ -437,12 +448,17 @@ static void kvm_mmu_reset_all_pte_masks(void)
 	 * appropriate mask to guard against L1TF attacks. Otherwise, it is
 	 * assumed that the CPU is not vulnerable to L1TF.
 	 */
+	low_phys_bits = boot_cpu_data.x86_phys_bits;
 	if (boot_cpu_data.x86_phys_bits <
-	    52 - shadow_nonpresent_or_rsvd_mask_len)
+	    52 - shadow_nonpresent_or_rsvd_mask_len) {
 		shadow_nonpresent_or_rsvd_mask =
 			rsvd_bits(boot_cpu_data.x86_phys_bits -
 				  shadow_nonpresent_or_rsvd_mask_len,
 				  boot_cpu_data.x86_phys_bits - 1);
+		low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len;
+	}
+	shadow_nonpresent_or_rsvd_lower_gfn_mask =
+		GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT);
 }
 
 static int is_cpuid_PSE36(void)

From bda6b1c95751bbf1779562b8eca3c20b270c0f53 Mon Sep 17 00:00:00 2001
From: Anusha Srivatsa <anusha.srivatsa@intel.com>
Date: Fri, 17 Aug 2018 10:33:30 -0700
Subject: [PATCH 376/499] drm/i915: Do not redefine the has_csr parameter.

Let us reuse the already defined has_csr check and not
redefine it.

The main difference is that in effect this will flip .has_csr to 1
(via GEN9_FEATURES which GEN11_FEATURES pulls in).

Suggested-by: Imre Deak <imre.deak@intel.com>
Cc: Imre Deak <imre.deak@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=107382
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1534527210-16841-1-git-send-email-anusha.srivatsa@intel.com
(cherry picked from commit da4468a1aa75457e6134127b19761b7ba62ce945)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/i915_pci.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 6a4d1388ad2d..1df3ce134cd0 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -592,7 +592,6 @@ static const struct intel_device_info intel_cannonlake_info = {
 	GEN10_FEATURES, \
 	GEN(11), \
 	.ddb_size = 2048, \
-	.has_csr = 0, \
 	.has_logical_ring_elsq = 1
 
 static const struct intel_device_info intel_icelake_11_info = {

From 4ca8ca9fe7dc792000c3762de5081a4d6dc33667 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 26 Sep 2018 11:47:18 +0100
Subject: [PATCH 377/499] drm/i915: Avoid compiler warning for maybe unused
 gu_misc_iir

/kisskb/src/drivers/gpu/drm/i915/i915_irq.c: warning: 'gu_misc_iir' may be used uninitialized in this function [-Wuninitialized]:  => 3120:10

Silence the compiler warning by ensuring that the local variable is
initialised and removing the guard that is confusing the older gcc.

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Fixes: df0d28c185ad ("drm/i915/icl: GSE interrupt moves from DE_MISC to GU_MISC")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180926104718.17462-1-chris@chris-wilson.co.uk
(cherry picked from commit 7a90938332d80faf973fbcffdf6e674e7b8f0914)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c | 33 ++++++++++++---------------------
 1 file changed, 12 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 90628a47ae17..29877969310d 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3091,36 +3091,27 @@ gen11_gt_irq_handler(struct drm_i915_private * const i915,
 	spin_unlock(&i915->irq_lock);
 }
 
-static void
-gen11_gu_misc_irq_ack(struct drm_i915_private *dev_priv, const u32 master_ctl,
-		      u32 *iir)
+static u32
+gen11_gu_misc_irq_ack(struct drm_i915_private *dev_priv, const u32 master_ctl)
 {
 	void __iomem * const regs = dev_priv->regs;
+	u32 iir;
 
 	if (!(master_ctl & GEN11_GU_MISC_IRQ))
-		return;
+		return 0;
 
-	*iir = raw_reg_read(regs, GEN11_GU_MISC_IIR);
-	if (likely(*iir))
-		raw_reg_write(regs, GEN11_GU_MISC_IIR, *iir);
+	iir = raw_reg_read(regs, GEN11_GU_MISC_IIR);
+	if (likely(iir))
+		raw_reg_write(regs, GEN11_GU_MISC_IIR, iir);
+
+	return iir;
 }
 
 static void
-gen11_gu_misc_irq_handler(struct drm_i915_private *dev_priv,
-			  const u32 master_ctl, const u32 iir)
+gen11_gu_misc_irq_handler(struct drm_i915_private *dev_priv, const u32 iir)
 {
-	if (!(master_ctl & GEN11_GU_MISC_IRQ))
-		return;
-
-	if (unlikely(!iir)) {
-		DRM_ERROR("GU_MISC iir blank!\n");
-		return;
-	}
-
 	if (iir & GEN11_GU_MISC_GSE)
 		intel_opregion_asle_intr(dev_priv);
-	else
-		DRM_ERROR("Unexpected GU_MISC interrupt 0x%x\n", iir);
 }
 
 static irqreturn_t gen11_irq_handler(int irq, void *arg)
@@ -3157,12 +3148,12 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg)
 		enable_rpm_wakeref_asserts(i915);
 	}
 
-	gen11_gu_misc_irq_ack(i915, master_ctl, &gu_misc_iir);
+	gu_misc_iir = gen11_gu_misc_irq_ack(i915, master_ctl);
 
 	/* Acknowledge and enable interrupts. */
 	raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, GEN11_MASTER_IRQ | master_ctl);
 
-	gen11_gu_misc_irq_handler(i915, master_ctl, gu_misc_iir);
+	gen11_gu_misc_irq_handler(i915, gu_misc_iir);
 
 	return IRQ_HANDLED;
 }

From 4d8fcf216c90bc25e34ae2200aa8985ee3158898 Mon Sep 17 00:00:00 2001
From: Alaa Hleihel <alaa@mellanox.com>
Date: Wed, 5 Sep 2018 11:43:23 +0300
Subject: [PATCH 378/499] net/mlx5e: Avoid unbounded peer devices when
 unpairing TC hairpin rules

If the peer device was already unbound, then do not attempt to modify
it's resources, otherwise we will crash on dereferencing non-existing
device.

Fixes: 5c65c564c962 ("net/mlx5e: Support offloading TC NIC hairpin flows")
Signed-off-by: Alaa Hleihel <alaa@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  1 +
 .../net/ethernet/mellanox/mlx5/core/en/fs.h   |  2 +
 .../net/ethernet/mellanox/mlx5/core/en_main.c |  2 +-
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 62 ++++++++++++++++++-
 .../ethernet/mellanox/mlx5/core/transobj.c    |  5 +-
 include/linux/mlx5/transobj.h                 |  2 +
 6 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index db2cfcd21d43..0f189f873859 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -54,6 +54,7 @@
 #include "en_stats.h"
 #include "en/fs.h"
 
+extern const struct net_device_ops mlx5e_netdev_ops;
 struct page_pool;
 
 #define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index bbf69e859b78..1431232c9a09 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -16,6 +16,8 @@ struct mlx5e_tc_table {
 
 	DECLARE_HASHTABLE(mod_hdr_tbl, 8);
 	DECLARE_HASHTABLE(hairpin_tbl, 8);
+
+	struct notifier_block     netdevice_nb;
 };
 
 struct mlx5e_flow_table {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 54118b77dc1f..f291d1bf1558 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4315,7 +4315,7 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 	}
 }
 
-static const struct net_device_ops mlx5e_netdev_ops = {
+const struct net_device_ops mlx5e_netdev_ops = {
 	.ndo_open                = mlx5e_open,
 	.ndo_stop                = mlx5e_close,
 	.ndo_start_xmit          = mlx5e_xmit,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 9fed54017659..52e05f3ece50 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2946,14 +2946,71 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv,
 	return 0;
 }
 
+static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
+					      struct mlx5e_priv *peer_priv)
+{
+	struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
+	struct mlx5e_hairpin_entry *hpe;
+	u16 peer_vhca_id;
+	int bkt;
+
+	if (!same_hw_devs(priv, peer_priv))
+		return;
+
+	peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
+
+	hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) {
+		if (hpe->peer_vhca_id == peer_vhca_id)
+			hpe->hp->pair->peer_gone = true;
+	}
+}
+
+static int mlx5e_tc_netdev_event(struct notifier_block *this,
+				 unsigned long event, void *ptr)
+{
+	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+	struct mlx5e_flow_steering *fs;
+	struct mlx5e_priv *peer_priv;
+	struct mlx5e_tc_table *tc;
+	struct mlx5e_priv *priv;
+
+	if (ndev->netdev_ops != &mlx5e_netdev_ops ||
+	    event != NETDEV_UNREGISTER ||
+	    ndev->reg_state == NETREG_REGISTERED)
+		return NOTIFY_DONE;
+
+	tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
+	fs = container_of(tc, struct mlx5e_flow_steering, tc);
+	priv = container_of(fs, struct mlx5e_priv, fs);
+	peer_priv = netdev_priv(ndev);
+	if (priv == peer_priv ||
+	    !(priv->netdev->features & NETIF_F_HW_TC))
+		return NOTIFY_DONE;
+
+	mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv);
+
+	return NOTIFY_DONE;
+}
+
 int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
 {
 	struct mlx5e_tc_table *tc = &priv->fs.tc;
+	int err;
 
 	hash_init(tc->mod_hdr_tbl);
 	hash_init(tc->hairpin_tbl);
 
-	return rhashtable_init(&tc->ht, &tc_ht_params);
+	err = rhashtable_init(&tc->ht, &tc_ht_params);
+	if (err)
+		return err;
+
+	tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
+	if (register_netdevice_notifier(&tc->netdevice_nb)) {
+		tc->netdevice_nb.notifier_call = NULL;
+		mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
+	}
+
+	return err;
 }
 
 static void _mlx5e_tc_del_flow(void *ptr, void *arg)
@@ -2969,6 +3026,9 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
 {
 	struct mlx5e_tc_table *tc = &priv->fs.tc;
 
+	if (tc->netdevice_nb.notifier_call)
+		unregister_netdevice_notifier(&tc->netdevice_nb);
+
 	rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
 
 	if (!IS_ERR_OR_NULL(tc->t)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
index d2f76070ea7c..a1ee9a8a769e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
@@ -475,7 +475,8 @@ static void mlx5_hairpin_destroy_queues(struct mlx5_hairpin *hp)
 
 	for (i = 0; i < hp->num_channels; i++) {
 		mlx5_core_destroy_rq(hp->func_mdev, hp->rqn[i]);
-		mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]);
+		if (!hp->peer_gone)
+			mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]);
 	}
 }
 
@@ -567,6 +568,8 @@ static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp)
 				       MLX5_RQC_STATE_RST, 0, 0);
 
 	/* unset peer SQs */
+	if (hp->peer_gone)
+		return;
 	for (i = 0; i < hp->num_channels; i++)
 		mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY,
 				       MLX5_SQC_STATE_RST, 0, 0);
diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h
index 83a33a1873a6..7f5ca2cd3a32 100644
--- a/include/linux/mlx5/transobj.h
+++ b/include/linux/mlx5/transobj.h
@@ -90,6 +90,8 @@ struct mlx5_hairpin {
 
 	u32 *rqn;
 	u32 *sqn;
+
+	bool peer_gone;
 };
 
 struct mlx5_hairpin *

From 11aa5800ed66ed0415b7509f02881c76417d212a Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Sun, 16 Sep 2018 14:45:27 +0300
Subject: [PATCH 379/499] net/mlx5: E-Switch, Fix out of bound access when
 setting vport rate

The code that deals with eswitch vport bw guarantee was going beyond the
eswitch vport array limit, fix that.  This was pointed out by the kernel
address sanitizer (KASAN).

The error from KASAN log:
[2018-09-15 15:04:45] BUG: KASAN: slab-out-of-bounds in
mlx5_eswitch_set_vport_rate+0x8c1/0xae0 [mlx5_core]

Fixes: c9497c98901c ("net/mlx5: Add support for setting VF min rate")
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 2b252cde5cc2..ea7dedc2d5ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -2000,7 +2000,7 @@ static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw)
 	u32 max_guarantee = 0;
 	int i;
 
-	for (i = 0; i <= esw->total_vports; i++) {
+	for (i = 0; i < esw->total_vports; i++) {
 		evport = &esw->vports[i];
 		if (!evport->enabled || evport->info.min_rate < max_guarantee)
 			continue;
@@ -2020,7 +2020,7 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider)
 	int err;
 	int i;
 
-	for (i = 0; i <= esw->total_vports; i++) {
+	for (i = 0; i < esw->total_vports; i++) {
 		evport = &esw->vports[i];
 		if (!evport->enabled)
 			continue;

From cee26487620bc9bc3c7db21b6984d91f7bae12ae Mon Sep 17 00:00:00 2001
From: Jianbo Liu <jianbol@mellanox.com>
Date: Sat, 25 Aug 2018 03:29:58 +0000
Subject: [PATCH 380/499] net/mlx5e: Set vlan masks for all offloaded TC rules

In flow steering, if asked to, the hardware matches on the first ethertype
which is not vlan. It's possible to set a rule as follows, which is meant
to match on untagged packet, but will match on a vlan packet:
    tc filter add dev eth0 parent ffff: protocol ip flower ...

To avoid this for packets with single tag, we set vlan masks to tell
hardware to check the tags for every matched packet.

Fixes: 095b6cfd69ce ('net/mlx5e: Add TC vlan match parsing')
Signed-off-by: Jianbo Liu <jianbol@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 52e05f3ece50..85796727093e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1368,6 +1368,9 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 
 			*match_level = MLX5_MATCH_L2;
 		}
+	} else {
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, svlan_tag, 1);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
 	}
 
 	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CVLAN)) {

From 95375f2ab2960c135484d83ea9f8f357cb1be26a Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Date: Mon, 1 Oct 2018 14:49:34 +0200
Subject: [PATCH 381/499] PCI: mvebu: Fix PCI I/O mapping creation sequence
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit ee1604381a371 ("PCI: mvebu: Only remap I/O space if configured") had
the side effect that the PCI I/O mapping was created much earlier than
before, at a point where the probe() of the driver could still fail. This
is for example a problem if one gets an -EPROBE_DEFER at some point during
probe(), after pci_ioremap_io() has been called.

Indeed, there is currently no function to undo what pci_ioremap_io() did,
and switching to pci_remap_iospace() is not an option in pci-mvebu due to
the need for special memory attributes on Armada 38x.

Reverting ee1604381a371 ("PCI: mvebu: Only remap I/O space if configured")
would be a possibility, but it would require also reverting 42342073e38b5
("PCI: mvebu: Convert to use pci_host_bridge directly"). So instead, we use
an open-coded version of pci_host_probe() that creates the PCI I/O mapping
at a point where we are guaranteed not to fail anymore.

Fixes: ee1604381a371 ("PCI: mvebu: Only remap I/O space if configured")
Reported-by: Jan Kundrát <jan.kundrat@cesnet.cz>
Tested-by: Jan Kundrát <jan.kundrat@cesnet.cz>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
---
 drivers/pci/controller/pci-mvebu.c | 54 +++++++++++++++++++++++++++---
 1 file changed, 49 insertions(+), 5 deletions(-)

diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c
index 50eb0729385b..a41d79b8d46a 100644
--- a/drivers/pci/controller/pci-mvebu.c
+++ b/drivers/pci/controller/pci-mvebu.c
@@ -1145,7 +1145,6 @@ static int mvebu_pcie_parse_request_resources(struct mvebu_pcie *pcie)
 {
 	struct device *dev = &pcie->pdev->dev;
 	struct device_node *np = dev->of_node;
-	unsigned int i;
 	int ret;
 
 	INIT_LIST_HEAD(&pcie->resources);
@@ -1179,15 +1178,60 @@ static int mvebu_pcie_parse_request_resources(struct mvebu_pcie *pcie)
 					 resource_size(&pcie->io) - 1);
 		pcie->realio.name = "PCI I/O";
 
-		for (i = 0; i < resource_size(&pcie->realio); i += SZ_64K)
-			pci_ioremap_io(i, pcie->io.start + i);
-
 		pci_add_resource(&pcie->resources, &pcie->realio);
 	}
 
 	return devm_request_pci_bus_resources(dev, &pcie->resources);
 }
 
+/*
+ * This is a copy of pci_host_probe(), except that it does the I/O
+ * remap as the last step, once we are sure we won't fail.
+ *
+ * It should be removed once the I/O remap error handling issue has
+ * been sorted out.
+ */
+static int mvebu_pci_host_probe(struct pci_host_bridge *bridge)
+{
+	struct mvebu_pcie *pcie;
+	struct pci_bus *bus, *child;
+	int ret;
+
+	ret = pci_scan_root_bus_bridge(bridge);
+	if (ret < 0) {
+		dev_err(bridge->dev.parent, "Scanning root bridge failed");
+		return ret;
+	}
+
+	pcie = pci_host_bridge_priv(bridge);
+	if (resource_size(&pcie->io) != 0) {
+		unsigned int i;
+
+		for (i = 0; i < resource_size(&pcie->realio); i += SZ_64K)
+			pci_ioremap_io(i, pcie->io.start + i);
+	}
+
+	bus = bridge->bus;
+
+	/*
+	 * We insert PCI resources into the iomem_resource and
+	 * ioport_resource trees in either pci_bus_claim_resources()
+	 * or pci_bus_assign_resources().
+	 */
+	if (pci_has_flag(PCI_PROBE_ONLY)) {
+		pci_bus_claim_resources(bus);
+	} else {
+		pci_bus_size_bridges(bus);
+		pci_bus_assign_resources(bus);
+
+		list_for_each_entry(child, &bus->children, node)
+			pcie_bus_configure_settings(child);
+	}
+
+	pci_bus_add_devices(bus);
+	return 0;
+}
+
 static int mvebu_pcie_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -1268,7 +1312,7 @@ static int mvebu_pcie_probe(struct platform_device *pdev)
 	bridge->align_resource = mvebu_pcie_align_resource;
 	bridge->msi = pcie->msi;
 
-	return pci_host_probe(bridge);
+	return mvebu_pci_host_probe(bridge);
 }
 
 static const struct of_device_id mvebu_pcie_of_match_table[] = {

From 1ad98e9d1bdf4724c0a8532fabd84bf3c457c2bc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 1 Oct 2018 15:02:26 -0700
Subject: [PATCH 382/499] tcp/dccp: fix lockdep issue when SYN is backlogged

In normal SYN processing, packets are handled without listener
lock and in RCU protected ingress path.

But syzkaller is known to be able to trick us and SYN
packets might be processed in process context, after being
queued into socket backlog.

In commit 06f877d613be ("tcp/dccp: fix other lockdep splats
accessing ireq_opt") I made a very stupid fix, that happened
to work mostly because of the regular path being RCU protected.

Really the thing protecting ireq->ireq_opt is RCU read lock,
and the pseudo request refcnt is not relevant.

This patch extends what I did in commit 449809a66c1d ("tcp/dccp:
block BH for SYN processing") by adding an extra rcu_read_{lock|unlock}
pair in the paths that might be taken when processing SYN from
socket backlog (thus possibly in process context)

Fixes: 06f877d613be ("tcp/dccp: fix other lockdep splats accessing ireq_opt")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h | 3 +--
 net/dccp/input.c        | 4 +++-
 net/ipv4/tcp_input.c    | 4 +++-
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index e03b93360f33..a8cd5cf9ff5b 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -132,8 +132,7 @@ static inline int inet_request_bound_dev_if(const struct sock *sk,
 
 static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq)
 {
-	return rcu_dereference_check(ireq->ireq_opt,
-				     refcount_read(&ireq->req.rsk_refcnt) > 0);
+	return rcu_dereference(ireq->ireq_opt);
 }
 
 struct inet_cork {
diff --git a/net/dccp/input.c b/net/dccp/input.c
index d28d46bff6ab..85d6c879383d 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -606,11 +606,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	if (sk->sk_state == DCCP_LISTEN) {
 		if (dh->dccph_type == DCCP_PKT_REQUEST) {
 			/* It is possible that we process SYN packets from backlog,
-			 * so we need to make sure to disable BH right there.
+			 * so we need to make sure to disable BH and RCU right there.
 			 */
+			rcu_read_lock();
 			local_bh_disable();
 			acceptable = inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) >= 0;
 			local_bh_enable();
+			rcu_read_unlock();
 			if (!acceptable)
 				return 1;
 			consume_skb(skb);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4cf2f7bb2802..47e08c1b5bc3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6009,11 +6009,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 			if (th->fin)
 				goto discard;
 			/* It is possible that we process SYN packets from backlog,
-			 * so we need to make sure to disable BH right there.
+			 * so we need to make sure to disable BH and RCU right there.
 			 */
+			rcu_read_lock();
 			local_bh_disable();
 			acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
 			local_bh_enable();
+			rcu_read_unlock();
 
 			if (!acceptable)
 				return 1;

From 242cdad873a75652f97c35aad61270581e0f3a2e Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Fri, 21 Sep 2018 12:24:31 +0930
Subject: [PATCH 383/499] lib/xz: Put CRC32_POLY_LE in xz_private.h

This fixes a regression introduced by faa16bc404d72a5 ("lib: Use
existing define with polynomial").

The cleanup added a dependency on include/linux, which broke the PowerPC
boot wrapper/decompresser when KERNEL_XZ is enabled:

  BOOTCC  arch/powerpc/boot/decompress.o
 In file included from arch/powerpc/boot/../../../lib/decompress_unxz.c:233,
                 from arch/powerpc/boot/decompress.c:42:
 arch/powerpc/boot/../../../lib/xz/xz_crc32.c:18:10: fatal error:
 linux/crc32poly.h: No such file or directory
  #include <linux/crc32poly.h>
           ^~~~~~~~~~~~~~~~~~~

The powerpc decompresser is a hairy corner of the kernel. Even while building
a 64-bit kernel it needs to build a 32-bit binary and therefore avoid including
files from include/linux.

This allows users of the xz library to avoid including headers from
'include/linux/' while still achieving the cleanup of the magic number.

Fixes: faa16bc404d72a5 ("lib: Use existing define with polynomial")
Reported-by: Meelis Roos <mroos@linux.ee>
Reported-by: kbuild test robot <lkp@intel.com>
Suggested-by: Christophe LEROY <christophe.leroy@c-s.fr>
Signed-off-by: Joel Stanley <joel@jms.id.au>
Tested-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 lib/xz/xz_crc32.c   | 1 -
 lib/xz/xz_private.h | 4 ++++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/lib/xz/xz_crc32.c b/lib/xz/xz_crc32.c
index 25a5d87e2e4c..912aae5fa09e 100644
--- a/lib/xz/xz_crc32.c
+++ b/lib/xz/xz_crc32.c
@@ -15,7 +15,6 @@
  * but they are bigger and use more memory for the lookup table.
  */
 
-#include <linux/crc32poly.h>
 #include "xz_private.h"
 
 /*
diff --git a/lib/xz/xz_private.h b/lib/xz/xz_private.h
index 482b90f363fe..09360ebb510e 100644
--- a/lib/xz/xz_private.h
+++ b/lib/xz/xz_private.h
@@ -102,6 +102,10 @@
 #	endif
 #endif
 
+#ifndef CRC32_POLY_LE
+#define CRC32_POLY_LE 0xedb88320
+#endif
+
 /*
  * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
  * before calling xz_dec_lzma2_run().

From f2924d4b16ae138c2de6a0e73f526fb638330858 Mon Sep 17 00:00:00 2001
From: Romain Izard <romain.izard.pro@gmail.com>
Date: Thu, 20 Sep 2018 16:49:04 +0200
Subject: [PATCH 384/499] usb: cdc_acm: Do not leak URB buffers

When the ACM TTY port is disconnected, the URBs it uses must be killed, and
then the buffers must be freed. Unfortunately a previous refactor removed
the code freeing the buffers because it looked extremely similar to the
code killing the URBs.

As a result, there were many new leaks for each plug/unplug cycle of a
CDC-ACM device, that were detected by kmemleak.

Restore the missing code, and the memory leak is removed.

Fixes: ba8c931ded8d ("cdc-acm: refactor killing urbs")
Signed-off-by: Romain Izard <romain.izard.pro@gmail.com>
Acked-by: Oliver Neukum <oneukum@suse.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/cdc-acm.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index f9b40a9dc4d3..bc03b0a690b4 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1514,6 +1514,7 @@ static void acm_disconnect(struct usb_interface *intf)
 {
 	struct acm *acm = usb_get_intfdata(intf);
 	struct tty_struct *tty;
+	int i;
 
 	/* sibling interface is already cleaning up */
 	if (!acm)
@@ -1544,6 +1545,11 @@ static void acm_disconnect(struct usb_interface *intf)
 
 	tty_unregister_device(acm_tty_driver, acm->minor);
 
+	usb_free_urb(acm->ctrlurb);
+	for (i = 0; i < ACM_NW; i++)
+		usb_free_urb(acm->wb[i].urb);
+	for (i = 0; i < acm->rx_buflimit; i++)
+		usb_free_urb(acm->read_urbs[i]);
 	acm_write_buffers_free(acm);
 	usb_free_coherent(acm->dev, acm->ctrlsize, acm->ctrl_buffer, acm->ctrl_dma);
 	acm_read_buffers_free(acm);

From ffe84e01bb1b38c7eb9c6b6da127a6c136d251df Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Mon, 1 Oct 2018 18:36:07 +0300
Subject: [PATCH 385/499] xhci: Add missing CAS workaround for Intel Sunrise
 Point xHCI

The workaround for missing CAS bit is also needed for xHC on Intel
sunrisepoint PCH. For more details see:

Intel 100/c230 series PCH specification update Doc #332692-006 Errata #8

Cc: <stable@vger.kernel.org>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-pci.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 6372edf339d9..722860eb5a91 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -185,6 +185,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 	}
 	if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
 	    (pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI ||
+	     pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI ||
+	     pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI ||
 	     pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI ||
 	     pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI))
 		xhci->quirks |= XHCI_MISSING_CAS;

From 555df5820e733cded7eb8d0bf78b2a791be51d75 Mon Sep 17 00:00:00 2001
From: Chunfeng Yun <chunfeng.yun@mediatek.com>
Date: Mon, 1 Oct 2018 18:36:08 +0300
Subject: [PATCH 386/499] usb: xhci-mtk: resume USB3 roothub first

Give USB3 devices a better chance to enumerate at USB3 speeds if
they are connected to a suspended host.
Porting from "671ffdff5b13 xhci: resume USB 3 roothub first"

Cc: <stable@vger.kernel.org>
Signed-off-by: Chunfeng Yun <chunfeng.yun@mediatek.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-mtk.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c
index 7334da9e9779..71d0d33c3286 100644
--- a/drivers/usb/host/xhci-mtk.c
+++ b/drivers/usb/host/xhci-mtk.c
@@ -642,10 +642,10 @@ static int __maybe_unused xhci_mtk_resume(struct device *dev)
 	xhci_mtk_host_enable(mtk);
 
 	xhci_dbg(xhci, "%s: restart port polling\n", __func__);
-	set_bit(HCD_FLAG_POLL_RH, &hcd->flags);
-	usb_hcd_poll_rh_status(hcd);
 	set_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags);
 	usb_hcd_poll_rh_status(xhci->shared_hcd);
+	set_bit(HCD_FLAG_POLL_RH, &hcd->flags);
+	usb_hcd_poll_rh_status(hcd);
 	return 0;
 }
 

From aeadd93f2b0a609f603ac33e574b97a9832d1b90 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 22 Sep 2018 16:46:48 +0300
Subject: [PATCH 387/499] net: sched: act_ipt: check for underflow in
 __tcf_ipt_init()

If "td->u.target_size" is larger than sizeof(struct xt_entry_target) we
return -EINVAL.  But we don't check whether it's smaller than
sizeof(struct xt_entry_target) and that could lead to an out of bounds
read.

Fixes: 7ba699c604ab ("[NET_SCHED]: Convert actions from rtnetlink to new netlink API")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_ipt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 23273b5303fd..8525de811616 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -135,7 +135,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 	}
 
 	td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
-	if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) {
+	if (nla_len(tb[TCA_IPT_TARG]) != td->u.target_size) {
 		if (exists)
 			tcf_idr_release(*a, bind);
 		else

From d949cfedbcbab4e91590576cbace2671924ad69c Mon Sep 17 00:00:00 2001
From: LUU Duc Canh <canh.d.luu@dektech.com.au>
Date: Wed, 26 Sep 2018 22:28:52 +0200
Subject: [PATCH 388/499] tipc: ignore STATE_MSG on wrong link session

The initial session number when a link is created is based on a random
value, taken from struct tipc_net->random. It is then incremented for
each link reset to avoid mixing protocol messages from different link
sessions.

However, when a bearer is reset all its links are deleted, and will
later be re-created using the same random value as the first time.
This means that if the link never went down between creation and
deletion we will still sometimes have two subsequent sessions with
the same session number. In virtual environments with potentially
long transmission times this has turned out to be a real problem.

We now fix this by randomizing the session number each time a link
is created.

With a session number size of 16 bits this gives a risk of session
collision of 1/64k. To reduce this further, we also introduce a sanity
check on the very first STATE message arriving at a link. If this has
an acknowledge value differing from 0, which is logically impossible,
we ignore the message. The final risk for session collision is hence
reduced to 1/4G, which should be sufficient.

Signed-off-by: LUU Duc Canh <canh.d.luu@dektech.com.au>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 3 +++
 net/tipc/node.c | 5 +++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 4ed650ce6e61..fb886b525d95 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1516,6 +1516,9 @@ bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr)
 			return false;
 		if (session != curr_session)
 			return false;
+		/* Extra sanity check */
+		if (!link_is_up(l) && msg_ack(hdr))
+			return false;
 		if (!(l->peer_caps & TIPC_LINK_PROTO_SEQNO))
 			return true;
 		/* Accept only STATE with new sequence number */
diff --git a/net/tipc/node.c b/net/tipc/node.c
index b0ee25f1f2e6..2afc4f8c37a7 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -913,6 +913,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
 	bool reset = true;
 	char *if_name;
 	unsigned long intv;
+	u16 session;
 
 	*dupl_addr = false;
 	*respond = false;
@@ -999,9 +1000,10 @@ void tipc_node_check_dest(struct net *net, u32 addr,
 			goto exit;
 
 		if_name = strchr(b->name, ':') + 1;
+		get_random_bytes(&session, sizeof(u16));
 		if (!tipc_link_create(net, if_name, b->identity, b->tolerance,
 				      b->net_plane, b->mtu, b->priority,
-				      b->window, mod(tipc_net(net)->random),
+				      b->window, session,
 				      tipc_own_addr(net), addr, peer_id,
 				      n->capabilities,
 				      tipc_bc_sndlink(n->net), n->bc_entry.link,
@@ -1625,7 +1627,6 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
 			tipc_link_create_dummy_tnl_msg(l, xmitq);
 			n->failover_sent = true;
 		}
-
 		/* If pkts arrive out of order, use lowest calculated syncpt */
 		if (less(syncpt, n->sync_point))
 			n->sync_point = syncpt;

From 7f6d6558ae44bc193eb28df3617c364d3bb6df39 Mon Sep 17 00:00:00 2001
From: Flavio Leitner <fbl@redhat.com>
Date: Fri, 28 Sep 2018 14:55:34 -0300
Subject: [PATCH 389/499] Revert "openvswitch: Fix template leak in error
 cases."

This reverts commit 90c7afc96cbbd77f44094b5b651261968e97de67.

When the commit was merged, the code used nf_ct_put() to free
the entry, but later on commit 76644232e612 ("openvswitch: Free
tmpl with tmpl_free.") replaced that with nf_ct_tmpl_free which
is a more appropriate. Now the original problem is removed.

Then 44d6e2f27328 ("net: Replace NF_CT_ASSERT() with WARN_ON().")
replaced a debug assert with a WARN_ON() which is trigged now.

Signed-off-by: Flavio Leitner <fbl@redhat.com>
Acked-by: Joe Stringer <joe@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/conntrack.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 86a75105af1a..0aeb34c6389d 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1624,10 +1624,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
 		OVS_NLERR(log, "Failed to allocate conntrack template");
 		return -ENOMEM;
 	}
-
-	__set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
-	nf_conntrack_get(&ct_info.ct->ct_general);
-
 	if (helper) {
 		err = ovs_ct_add_helper(&ct_info, helper, key, log);
 		if (err)
@@ -1639,6 +1635,8 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
 	if (err)
 		goto err_free_ct;
 
+	__set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
+	nf_conntrack_get(&ct_info.ct->ct_general);
 	return 0;
 err_free_ct:
 	__ovs_ct_free_action(&ct_info);

From 893626d6a353d1356528f94e081246ecf233d77a Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 28 Sep 2018 12:28:41 -0700
Subject: [PATCH 390/499] rtnetlink: Fail dump if target netnsid is invalid

Link dumps can return results from a target namespace. If the namespace id
is invalid, then the dump request should fail if get_target_net fails
rather than continuing with a dump of the current namespace.

Fixes: 79e1ad148c844 ("rtnetlink: use netnsid to query interface")
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 63ce2283a456..7f37fe9c65a5 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1898,10 +1898,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 		if (tb[IFLA_IF_NETNSID]) {
 			netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
 			tgt_net = get_target_net(skb->sk, netnsid);
-			if (IS_ERR(tgt_net)) {
-				tgt_net = net;
-				netnsid = -1;
-			}
+			if (IS_ERR(tgt_net))
+				return PTR_ERR(tgt_net);
 		}
 
 		if (tb[IFLA_EXT_MASK])

From 6fe9487892b32cb1c8b8b0d552ed7222a527fe30 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@codemonkey.org.uk>
Date: Fri, 28 Sep 2018 16:26:08 -0400
Subject: [PATCH 391/499] bond: take rcu lock in netpoll_send_skb_on_dev

The bonding driver lacks the rcu lock when it calls down into
netdev_lower_get_next_private_rcu from bond_poll_controller, which
results in a trace like:

WARNING: CPU: 2 PID: 179 at net/core/dev.c:6567 netdev_lower_get_next_private_rcu+0x34/0x40
CPU: 2 PID: 179 Comm: kworker/u16:15 Not tainted 4.19.0-rc5-backup+ #1
Workqueue: bond0 bond_mii_monitor
RIP: 0010:netdev_lower_get_next_private_rcu+0x34/0x40
Code: 48 89 fb e8 fe 29 63 ff 85 c0 74 1e 48 8b 45 00 48 81 c3 c0 00 00 00 48 8b 00 48 39 d8 74 0f 48 89 45 00 48 8b 40 f8 5b 5d c3 <0f> 0b eb de 31 c0 eb f5 0f 1f 40 00 0f 1f 44 00 00 48 8>
RSP: 0018:ffffc9000087fa68 EFLAGS: 00010046
RAX: 0000000000000000 RBX: ffff880429614560 RCX: 0000000000000000
RDX: 0000000000000001 RSI: 00000000ffffffff RDI: ffffffffa184ada0
RBP: ffffc9000087fa80 R08: 0000000000000001 R09: 0000000000000000
R10: ffffc9000087f9f0 R11: ffff880429798040 R12: ffff8804289d5980
R13: ffffffffa1511f60 R14: 00000000000000c8 R15: 00000000ffffffff
FS:  0000000000000000(0000) GS:ffff88042f880000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f4b78fce180 CR3: 000000018180f006 CR4: 00000000001606e0
Call Trace:
 bond_poll_controller+0x52/0x170
 netpoll_poll_dev+0x79/0x290
 netpoll_send_skb_on_dev+0x158/0x2c0
 netpoll_send_udp+0x2d5/0x430
 write_ext_msg+0x1e0/0x210
 console_unlock+0x3c4/0x630
 vprintk_emit+0xfa/0x2f0
 printk+0x52/0x6e
 ? __netdev_printk+0x12b/0x220
 netdev_info+0x64/0x80
 ? bond_3ad_set_carrier+0xe9/0x180
 bond_select_active_slave+0x1fc/0x310
 bond_mii_monitor+0x709/0x9b0
 process_one_work+0x221/0x5e0
 worker_thread+0x4f/0x3b0
 kthread+0x100/0x140
 ? process_one_work+0x5e0/0x5e0
 ? kthread_delayed_work_timer_fn+0x90/0x90
 ret_from_fork+0x24/0x30

We're also doing rcu dereferences a layer up in netpoll_send_skb_on_dev
before we call down into netpoll_poll_dev, so just take the lock there.

Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 3ae899805f8b..de1d1ba92f2d 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -312,6 +312,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 	/* It is up to the caller to keep npinfo alive. */
 	struct netpoll_info *npinfo;
 
+	rcu_read_lock_bh();
 	lockdep_assert_irqs_disabled();
 
 	npinfo = rcu_dereference_bh(np->dev->npinfo);
@@ -356,6 +357,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 		skb_queue_tail(&npinfo->txq, skb);
 		schedule_delayed_work(&npinfo->tx_work,0);
 	}
+	rcu_read_unlock_bh();
 }
 EXPORT_SYMBOL(netpoll_send_skb_on_dev);
 

From 06e55addd3f40b5294e448c2cb7605ca4f28c2e3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 28 Sep 2018 14:51:47 -0700
Subject: [PATCH 392/499] tun: remove unused parameters

tun_napi_disable() and tun_napi_del() do not need
a pointer to the tun_struct

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index e2648b5a3861..71d10fb59849 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -324,13 +324,13 @@ static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile,
 	}
 }
 
-static void tun_napi_disable(struct tun_struct *tun, struct tun_file *tfile)
+static void tun_napi_disable(struct tun_file *tfile)
 {
 	if (tfile->napi_enabled)
 		napi_disable(&tfile->napi);
 }
 
-static void tun_napi_del(struct tun_struct *tun, struct tun_file *tfile)
+static void tun_napi_del(struct tun_file *tfile)
 {
 	if (tfile->napi_enabled)
 		netif_napi_del(&tfile->napi);
@@ -690,8 +690,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
 	tun = rtnl_dereference(tfile->tun);
 
 	if (tun && clean) {
-		tun_napi_disable(tun, tfile);
-		tun_napi_del(tun, tfile);
+		tun_napi_disable(tfile);
+		tun_napi_del(tfile);
 	}
 
 	if (tun && !tfile->detached) {
@@ -758,7 +758,7 @@ static void tun_detach_all(struct net_device *dev)
 	for (i = 0; i < n; i++) {
 		tfile = rtnl_dereference(tun->tfiles[i]);
 		BUG_ON(!tfile);
-		tun_napi_disable(tun, tfile);
+		tun_napi_disable(tfile);
 		tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN;
 		tfile->socket.sk->sk_data_ready(tfile->socket.sk);
 		RCU_INIT_POINTER(tfile->tun, NULL);
@@ -774,7 +774,7 @@ static void tun_detach_all(struct net_device *dev)
 	synchronize_net();
 	for (i = 0; i < n; i++) {
 		tfile = rtnl_dereference(tun->tfiles[i]);
-		tun_napi_del(tun, tfile);
+		tun_napi_del(tfile);
 		/* Drop read queue */
 		tun_queue_purge(tfile);
 		xdp_rxq_info_unreg(&tfile->xdp_rxq);

From c7256f579f8302ce2c038181c30060d0b40017b2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 28 Sep 2018 14:51:48 -0700
Subject: [PATCH 393/499] tun: initialize napi_mutex unconditionally

This is the first part to fix following syzbot report :

console output: https://syzkaller.appspot.com/x/log.txt?x=145378e6400000
kernel config:  https://syzkaller.appspot.com/x/.config?x=443816db871edd66
dashboard link: https://syzkaller.appspot.com/bug?extid=e662df0ac1d753b57e80

Following patch is fixing the race condition, but it seems safer
to initialize this mutex at tfile creation anyway.

Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot+e662df0ac1d753b57e80@syzkaller.appspotmail.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 71d10fb59849..729686babbf3 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -320,7 +320,6 @@ static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile,
 		netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll,
 			       NAPI_POLL_WEIGHT);
 		napi_enable(&tfile->napi);
-		mutex_init(&tfile->napi_mutex);
 	}
 }
 
@@ -3199,6 +3198,7 @@ static int tun_chr_open(struct inode *inode, struct file * file)
 		return -ENOMEM;
 	}
 
+	mutex_init(&tfile->napi_mutex);
 	RCU_INIT_POINTER(tfile->tun, NULL);
 	tfile->flags = 0;
 	tfile->ifindex = 0;

From af3fb24eecb2c59246e03c99386037fd5ad84ffd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 28 Sep 2018 14:51:49 -0700
Subject: [PATCH 394/499] tun: napi flags belong to tfile

Since tun->flags might be shared by multiple tfile structures,
it is better to make sure tun_get_user() is using the flags
for the current tfile.

Presence of the READ_ONCE() in tun_napi_frags_enabled() gave a hint
of what could happen, but we need something stronger to please
syzbot.

kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] PREEMPT SMP KASAN
CPU: 0 PID: 13647 Comm: syz-executor5 Not tainted 4.19.0-rc5+ #59
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:dev_gro_receive+0x132/0x2720 net/core/dev.c:5427
Code: 48 c1 ea 03 80 3c 02 00 0f 85 6e 20 00 00 48 b8 00 00 00 00 00 fc ff df 4d 8b 6e 10 49 8d bd d0 00 00 00 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 59 20 00 00 4d 8b a5 d0 00 00 00 31 ff 41 81 e4
RSP: 0018:ffff8801c400f410 EFLAGS: 00010202
RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff8618d325
RDX: 000000000000001a RSI: ffffffff86189f97 RDI: 00000000000000d0
RBP: ffff8801c400f608 R08: ffff8801c8fb4300 R09: 0000000000000000
R10: ffffed0038801ed7 R11: 0000000000000003 R12: ffff8801d327d358
R13: 0000000000000000 R14: ffff8801c16dd8c0 R15: 0000000000000004
FS:  00007fe003615700(0000) GS:ffff8801dac00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fe1f3c43db8 CR3: 00000001bebb2000 CR4: 00000000001406f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 napi_gro_frags+0x3f4/0xc90 net/core/dev.c:5715
 tun_get_user+0x31d5/0x42a0 drivers/net/tun.c:1922
 tun_chr_write_iter+0xb9/0x154 drivers/net/tun.c:1967
 call_write_iter include/linux/fs.h:1808 [inline]
 new_sync_write fs/read_write.c:474 [inline]
 __vfs_write+0x6b8/0x9f0 fs/read_write.c:487
 vfs_write+0x1fc/0x560 fs/read_write.c:549
 ksys_write+0x101/0x260 fs/read_write.c:598
 __do_sys_write fs/read_write.c:610 [inline]
 __se_sys_write fs/read_write.c:607 [inline]
 __x64_sys_write+0x73/0xb0 fs/read_write.c:607
 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x457579
Code: 1d b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb b3 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007fe003614c78 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000457579
RDX: 0000000000000012 RSI: 0000000020000000 RDI: 000000000000000a
RBP: 000000000072c040 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe0036156d4
R13: 00000000004c5574 R14: 00000000004d8e98 R15: 00000000ffffffff
Modules linked in:

RIP: 0010:dev_gro_receive+0x132/0x2720 net/core/dev.c:5427
Code: 48 c1 ea 03 80 3c 02 00 0f 85 6e 20 00 00 48 b8 00 00 00 00 00 fc ff df 4d 8b 6e 10 49 8d bd d0 00 00 00 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 59 20 00 00 4d 8b a5 d0 00 00 00 31 ff 41 81 e4
RSP: 0018:ffff8801c400f410 EFLAGS: 00010202
RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff8618d325
RDX: 000000000000001a RSI: ffffffff86189f97 RDI: 00000000000000d0
RBP: ffff8801c400f608 R08: ffff8801c8fb4300 R09: 0000000000000000
R10: ffffed0038801ed7 R11: 0000000000000003 R12: ffff8801d327d358
R13: 0000000000000000 R14: ffff8801c16dd8c0 R15: 0000000000000004
FS:  00007fe003615700(0000) GS:ffff8801dac00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fe1f3c43db8 CR3: 00000001bebb2000 CR4: 00000000001406f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400

Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 729686babbf3..50e9cc19023a 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -181,6 +181,7 @@ struct tun_file {
 	};
 	struct napi_struct napi;
 	bool napi_enabled;
+	bool napi_frags_enabled;
 	struct mutex napi_mutex;	/* Protects access to the above napi */
 	struct list_head next;
 	struct tun_struct *detached;
@@ -313,9 +314,10 @@ static int tun_napi_poll(struct napi_struct *napi, int budget)
 }
 
 static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile,
-			  bool napi_en)
+			  bool napi_en, bool napi_frags)
 {
 	tfile->napi_enabled = napi_en;
+	tfile->napi_frags_enabled = napi_en && napi_frags;
 	if (napi_en) {
 		netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll,
 			       NAPI_POLL_WEIGHT);
@@ -335,9 +337,9 @@ static void tun_napi_del(struct tun_file *tfile)
 		netif_napi_del(&tfile->napi);
 }
 
-static bool tun_napi_frags_enabled(const struct tun_struct *tun)
+static bool tun_napi_frags_enabled(const struct tun_file *tfile)
 {
-	return READ_ONCE(tun->flags) & IFF_NAPI_FRAGS;
+	return tfile->napi_frags_enabled;
 }
 
 #ifdef CONFIG_TUN_VNET_CROSS_LE
@@ -792,7 +794,7 @@ static void tun_detach_all(struct net_device *dev)
 }
 
 static int tun_attach(struct tun_struct *tun, struct file *file,
-		      bool skip_filter, bool napi)
+		      bool skip_filter, bool napi, bool napi_frags)
 {
 	struct tun_file *tfile = file->private_data;
 	struct net_device *dev = tun->dev;
@@ -865,7 +867,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
 		tun_enable_queue(tfile);
 	} else {
 		sock_hold(&tfile->sk);
-		tun_napi_init(tun, tfile, napi);
+		tun_napi_init(tun, tfile, napi, napi_frags);
 	}
 
 	tun_set_real_num_queues(tun);
@@ -1708,7 +1710,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	int err;
 	u32 rxhash = 0;
 	int skb_xdp = 1;
-	bool frags = tun_napi_frags_enabled(tun);
+	bool frags = tun_napi_frags_enabled(tfile);
 
 	if (!(tun->dev->flags & IFF_UP))
 		return -EIO;
@@ -2533,7 +2535,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 			return err;
 
 		err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER,
-				 ifr->ifr_flags & IFF_NAPI);
+				 ifr->ifr_flags & IFF_NAPI,
+				 ifr->ifr_flags & IFF_NAPI_FRAGS);
 		if (err < 0)
 			return err;
 
@@ -2631,7 +2634,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 			      (ifr->ifr_flags & TUN_FEATURES);
 
 		INIT_LIST_HEAD(&tun->disabled);
-		err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI);
+		err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI,
+				 ifr->ifr_flags & IFF_NAPI_FRAGS);
 		if (err < 0)
 			goto err_free_flow;
 
@@ -2780,7 +2784,8 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
 		ret = security_tun_dev_attach_queue(tun->security);
 		if (ret < 0)
 			goto unlock;
-		ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI);
+		ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI,
+				 tun->flags & IFF_NAPI_FRAGS);
 	} else if (ifr->ifr_flags & IFF_DETACH_QUEUE) {
 		tun = rtnl_dereference(tfile->tun);
 		if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached)

From 715bd9d12f84d8f5cc8ad21d888f9bc304a8eb0b Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Mon, 1 Oct 2018 12:52:15 -0700
Subject: [PATCH 395/499] x86/vdso: Fix asm constraints on vDSO syscall
 fallbacks

The syscall fallbacks in the vDSO have incorrect asm constraints.
They are not marked as writing to their outputs -- instead, they are
marked as clobbering "memory", which is useless.  In particular, gcc
is smart enough to know that the timespec parameter hasn't escaped,
so a memory clobber doesn't clobber it.  And passing a pointer as an
asm *input* does not tell gcc that the pointed-to value is changed.

Add in the fact that the asm instructions weren't volatile, and gcc
was free to omit them entirely unless their sole output (the return
value) is used.  Which it is (phew!), but that stops happening with
some upcoming patches.

As a trivial example, the following code:

void test_fallback(struct timespec *ts)
{
	vdso_fallback_gettime(CLOCK_MONOTONIC, ts);
}

compiles to:

00000000000000c0 <test_fallback>:
  c0:   c3                      retq

To add insult to injury, the RCX and R11 clobbers on 64-bit
builds were missing.

The "memory" clobber is also unnecessary -- no ordering with respect to
other memory operations is needed, but that's going to be fixed in a
separate not-for-stable patch.

Fixes: 2aae950b21e4 ("x86_64: Add vDSO for x86-64 with gettimeofday/clock_gettime/getcpu")
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/2c0231690551989d2fafa60ed0e7b5cc8b403908.1538422295.git.luto@kernel.org
---
 arch/x86/entry/vdso/vclock_gettime.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index f19856d95c60..134e2d2e8add 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -43,8 +43,9 @@ extern u8 hvclock_page
 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
 {
 	long ret;
-	asm("syscall" : "=a" (ret) :
-	    "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
+	asm ("syscall" : "=a" (ret), "=m" (*ts) :
+	     "0" (__NR_clock_gettime), "D" (clock), "S" (ts) :
+	     "memory", "rcx", "r11");
 	return ret;
 }
 
@@ -52,8 +53,9 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
 {
 	long ret;
 
-	asm("syscall" : "=a" (ret) :
-	    "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
+	asm ("syscall" : "=a" (ret), "=m" (*tv), "=m" (*tz) :
+	     "0" (__NR_gettimeofday), "D" (tv), "S" (tz) :
+	     "memory", "rcx", "r11");
 	return ret;
 }
 
@@ -64,12 +66,12 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
 {
 	long ret;
 
-	asm(
+	asm (
 		"mov %%ebx, %%edx \n"
 		"mov %2, %%ebx \n"
 		"call __kernel_vsyscall \n"
 		"mov %%edx, %%ebx \n"
-		: "=a" (ret)
+		: "=a" (ret), "=m" (*ts)
 		: "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
 		: "memory", "edx");
 	return ret;
@@ -79,12 +81,12 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
 {
 	long ret;
 
-	asm(
+	asm (
 		"mov %%ebx, %%edx \n"
 		"mov %2, %%ebx \n"
 		"call __kernel_vsyscall \n"
 		"mov %%edx, %%ebx \n"
-		: "=a" (ret)
+		: "=a" (ret), "=m" (*tv), "=m" (*tz)
 		: "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
 		: "memory", "edx");
 	return ret;

From ad5f97faff4231e72b96bd96adbe1b6e977a9b86 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 28 Sep 2018 23:51:54 +0200
Subject: [PATCH 396/499] r8169: fix network stalls due to missing bit
 TXCFG_AUTO_FIFO
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some of the chip-specific hw_start functions set bit TXCFG_AUTO_FIFO
in register TxConfig. The original patch changed the order of some
calls resulting in these changes being overwritten by
rtl_set_tx_config_registers() in rtl_hw_start(). This eventually
resulted in network stalls especially under high load.

Analyzing the chip-specific hw_start functions all chip version from
34, with the exception of version 39, need this bit set.
This patch moves setting this bit to rtl_set_tx_config_registers().

Fixes: 4fd48c4ac0a0 ("r8169: move common initializations to tp->hw_start")
Reported-by: Ortwin Glück <odi@odi.ch>
Reported-by: David Arendt <admin@prnet.org>
Root-caused-by: Maciej S. Szmigiero <mail@maciej.szmigiero.name>
Tested-by: Tony Atkinson <tatkinson@linux.com>
Tested-by: David Arendt <admin@prnet.org>
Tested-by: Ortwin Glück <odi@odi.ch>
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index d6b53f53909a..a94b874982dc 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -4536,9 +4536,14 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp)
 
 static void rtl_set_tx_config_registers(struct rtl8169_private *tp)
 {
-	/* Set DMA burst size and Interframe Gap Time */
-	RTL_W32(tp, TxConfig, (TX_DMA_BURST << TxDMAShift) |
-		(InterFrameGap << TxInterFrameGapShift));
+	u32 val = TX_DMA_BURST << TxDMAShift |
+		  InterFrameGap << TxInterFrameGapShift;
+
+	if (tp->mac_version >= RTL_GIGA_MAC_VER_34 &&
+	    tp->mac_version != RTL_GIGA_MAC_VER_39)
+		val |= TXCFG_AUTO_FIFO;
+
+	RTL_W32(tp, TxConfig, val);
 }
 
 static void rtl_set_rx_max_size(struct rtl8169_private *tp)
@@ -5033,7 +5038,6 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
 
 	rtl_disable_clock_request(tp);
 
-	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
 	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
 
 	/* Adjust EEE LED frequency */
@@ -5067,7 +5071,6 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp)
 
 	rtl_disable_clock_request(tp);
 
-	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
 	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
 	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
 	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN);
@@ -5112,8 +5115,6 @@ static void rtl_hw_start_8411(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 {
-	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
-
 	rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x080002, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x48, ERIAR_EXGMAC);
@@ -5211,8 +5212,6 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 	rtl_hw_aspm_clkreq_enable(tp, false);
 	rtl_ephy_init(tp, e_info_8168h_1, ARRAY_SIZE(e_info_8168h_1));
 
-	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
-
 	rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x48, ERIAR_EXGMAC);
@@ -5295,8 +5294,6 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
 {
 	rtl8168ep_stop_cmac(tp);
 
-	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
-
 	rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x2f, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x5f, ERIAR_EXGMAC);
@@ -5618,7 +5615,6 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
 	/* Force LAN exit from ASPM if Rx/Tx are not idle */
 	RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800);
 
-	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
 	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
 
 	rtl_ephy_init(tp, e_info_8402, ARRAY_SIZE(e_info_8402));

From 7c03e7035ac1cf2a6165754e4f3a49c2f1977838 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Mon, 1 Oct 2018 12:52:16 -0700
Subject: [PATCH 397/499] selftests/x86: Add clock_gettime() tests to test_vdso

Now that the vDSO implementation of clock_gettime() is getting
reworked, add a selftest for it.  This tests that its output is
consistent with the syscall version.

This is marked for stable to serve as a test for commit

  715bd9d12f84 ("x86/vdso: Fix asm constraints on vDSO syscall fallbacks")

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/082399674de2619b2befd8c0dde49b260605b126.1538422295.git.luto@kernel.org
---
 tools/testing/selftests/x86/test_vdso.c | 99 +++++++++++++++++++++++++
 1 file changed, 99 insertions(+)

diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c
index 235259011704..49f7294fb382 100644
--- a/tools/testing/selftests/x86/test_vdso.c
+++ b/tools/testing/selftests/x86/test_vdso.c
@@ -17,6 +17,7 @@
 #include <errno.h>
 #include <sched.h>
 #include <stdbool.h>
+#include <limits.h>
 
 #ifndef SYS_getcpu
 # ifdef __x86_64__
@@ -31,6 +32,10 @@
 
 int nerrs = 0;
 
+typedef int (*vgettime_t)(clockid_t, struct timespec *);
+
+vgettime_t vdso_clock_gettime;
+
 typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
 
 getcpu_t vgetcpu;
@@ -95,6 +100,10 @@ static void fill_function_pointers()
 		printf("Warning: failed to find getcpu in vDSO\n");
 
 	vgetcpu = (getcpu_t) vsyscall_getcpu();
+
+	vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+	if (!vdso_clock_gettime)
+		printf("Warning: failed to find clock_gettime in vDSO\n");
 }
 
 static long sys_getcpu(unsigned * cpu, unsigned * node,
@@ -103,6 +112,11 @@ static long sys_getcpu(unsigned * cpu, unsigned * node,
 	return syscall(__NR_getcpu, cpu, node, cache);
 }
 
+static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
+{
+	return syscall(__NR_clock_gettime, id, ts);
+}
+
 static void test_getcpu(void)
 {
 	printf("[RUN]\tTesting getcpu...\n");
@@ -155,10 +169,95 @@ static void test_getcpu(void)
 	}
 }
 
+static bool ts_leq(const struct timespec *a, const struct timespec *b)
+{
+	if (a->tv_sec != b->tv_sec)
+		return a->tv_sec < b->tv_sec;
+	else
+		return a->tv_nsec <= b->tv_nsec;
+}
+
+static char const * const clocknames[] = {
+	[0] = "CLOCK_REALTIME",
+	[1] = "CLOCK_MONOTONIC",
+	[2] = "CLOCK_PROCESS_CPUTIME_ID",
+	[3] = "CLOCK_THREAD_CPUTIME_ID",
+	[4] = "CLOCK_MONOTONIC_RAW",
+	[5] = "CLOCK_REALTIME_COARSE",
+	[6] = "CLOCK_MONOTONIC_COARSE",
+	[7] = "CLOCK_BOOTTIME",
+	[8] = "CLOCK_REALTIME_ALARM",
+	[9] = "CLOCK_BOOTTIME_ALARM",
+	[10] = "CLOCK_SGI_CYCLE",
+	[11] = "CLOCK_TAI",
+};
+
+static void test_one_clock_gettime(int clock, const char *name)
+{
+	struct timespec start, vdso, end;
+	int vdso_ret, end_ret;
+
+	printf("[RUN]\tTesting clock_gettime for clock %s (%d)...\n", name, clock);
+
+	if (sys_clock_gettime(clock, &start) < 0) {
+		if (errno == EINVAL) {
+			vdso_ret = vdso_clock_gettime(clock, &vdso);
+			if (vdso_ret == -EINVAL) {
+				printf("[OK]\tNo such clock.\n");
+			} else {
+				printf("[FAIL]\tNo such clock, but __vdso_clock_gettime returned %d\n", vdso_ret);
+				nerrs++;
+			}
+		} else {
+			printf("[WARN]\t clock_gettime(%d) syscall returned error %d\n", clock, errno);
+		}
+		return;
+	}
+
+	vdso_ret = vdso_clock_gettime(clock, &vdso);
+	end_ret = sys_clock_gettime(clock, &end);
+
+	if (vdso_ret != 0 || end_ret != 0) {
+		printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+		       vdso_ret, errno);
+		nerrs++;
+		return;
+	}
+
+	printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n",
+	       (unsigned long long)start.tv_sec, start.tv_nsec,
+	       (unsigned long long)vdso.tv_sec, vdso.tv_nsec,
+	       (unsigned long long)end.tv_sec, end.tv_nsec);
+
+	if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) {
+		printf("[FAIL]\tTimes are out of sequence\n");
+		nerrs++;
+	}
+}
+
+static void test_clock_gettime(void)
+{
+	for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
+	     clock++) {
+		test_one_clock_gettime(clock, clocknames[clock]);
+	}
+
+	/* Also test some invalid clock ids */
+	test_one_clock_gettime(-1, "invalid");
+	test_one_clock_gettime(INT_MIN, "invalid");
+	test_one_clock_gettime(INT_MAX, "invalid");
+}
+
 int main(int argc, char **argv)
 {
 	fill_function_pointers();
 
+	test_clock_gettime();
+
+	/*
+	 * Test getcpu() last so that, if something goes wrong setting affinity,
+	 * we still run the other tests.
+	 */
 	test_getcpu();
 
 	return nerrs ? 1 : 0;

From a9f9772114c8b07ae75bcb3654bd017461248095 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 25 Sep 2018 17:58:35 +0200
Subject: [PATCH 398/499] perf/core: Fix perf_pmu_unregister() locking

When we unregister a PMU, we fail to serialize the @pmu_idr properly.
Fix that by doing the entire thing under pmu_lock.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Fixes: 2e80a82a49c4 ("perf: Dynamic pmu types")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index dcb093e7b377..dfb1d951789e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9431,9 +9431,7 @@ static void free_pmu_context(struct pmu *pmu)
 	if (pmu->task_ctx_nr > perf_invalid_context)
 		return;
 
-	mutex_lock(&pmus_lock);
 	free_percpu(pmu->pmu_cpu_context);
-	mutex_unlock(&pmus_lock);
 }
 
 /*
@@ -9689,12 +9687,8 @@ EXPORT_SYMBOL_GPL(perf_pmu_register);
 
 void perf_pmu_unregister(struct pmu *pmu)
 {
-	int remove_device;
-
 	mutex_lock(&pmus_lock);
-	remove_device = pmu_bus_running;
 	list_del_rcu(&pmu->entry);
-	mutex_unlock(&pmus_lock);
 
 	/*
 	 * We dereference the pmu list under both SRCU and regular RCU, so
@@ -9706,13 +9700,14 @@ void perf_pmu_unregister(struct pmu *pmu)
 	free_percpu(pmu->pmu_disable_count);
 	if (pmu->type >= PERF_TYPE_MAX)
 		idr_remove(&pmu_idr, pmu->type);
-	if (remove_device) {
+	if (pmu_bus_running) {
 		if (pmu->nr_addr_filters)
 			device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);
 		device_del(pmu->dev);
 		put_device(pmu->dev);
 	}
 	free_pmu_context(pmu);
+	mutex_unlock(&pmus_lock);
 }
 EXPORT_SYMBOL_GPL(perf_pmu_unregister);
 

From 6265adb9726098b7f4f7ca70bc51992b25fdd9d6 Mon Sep 17 00:00:00 2001
From: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Date: Mon, 10 Sep 2018 10:47:50 -0400
Subject: [PATCH 399/499] perf/x86/intel/uncore: Use boot_cpu_data.phys_proc_id
 instead of hardcorded physical package ID 0

Physical package id 0 doesn't always exist, we should use
boot_cpu_data.phys_proc_id here.

Signed-off-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masayoshi Mizuma <msys.mizuma@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/20180910144750.6782-1-msys.mizuma@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/uncore_snbep.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 51d7c117e3c7..53b981dcdb42 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3061,7 +3061,7 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = {
 
 void bdx_uncore_cpu_init(void)
 {
-	int pkg = topology_phys_to_logical_pkg(0);
+	int pkg = topology_phys_to_logical_pkg(boot_cpu_data.phys_proc_id);
 
 	if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
 		bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;

From cd6fb677ce7e460c25bdd66f689734102ec7d642 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Sun, 23 Sep 2018 18:13:43 +0200
Subject: [PATCH 400/499] perf/ring_buffer: Prevent concurent ring buffer
 access

Some of the scheduling tracepoints allow the perf_tp_event
code to write to ring buffer under different cpu than the
code is running on.

This results in corrupted ring buffer data demonstrated in
following perf commands:

  # perf record -e 'sched:sched_switch,sched:sched_wakeup' perf bench sched messaging
  # Running 'sched/messaging' benchmark:
  # 20 sender and receiver processes per group
  # 10 groups == 400 processes run

       Total time: 0.383 [sec]
  [ perf record: Woken up 8 times to write data ]
  0x42b890 [0]: failed to process type: -1765585640
  [ perf record: Captured and wrote 4.825 MB perf.data (29669 samples) ]

  # perf report --stdio
  0x42b890 [0]: failed to process type: -1765585640

The reason for the corruption are some of the scheduling tracepoints,
that have __perf_task dfined and thus allow to store data to another
cpu ring buffer:

  sched_waking
  sched_wakeup
  sched_wakeup_new
  sched_stat_wait
  sched_stat_sleep
  sched_stat_iowait
  sched_stat_blocked

The perf_tp_event function first store samples for current cpu
related events defined for tracepoint:

    hlist_for_each_entry_rcu(event, head, hlist_entry)
      perf_swevent_event(event, count, &data, regs);

And then iterates events of the 'task' and store the sample
for any task's event that passes tracepoint checks:

  ctx = rcu_dereference(task->perf_event_ctxp[perf_sw_context]);

  list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
    if (event->attr.type != PERF_TYPE_TRACEPOINT)
      continue;
    if (event->attr.config != entry->type)
      continue;

    perf_swevent_event(event, count, &data, regs);
  }

Above code can race with same code running on another cpu,
ending up with 2 cpus trying to store under the same ring
buffer, which is specifically not allowed.

This patch prevents the problem, by allowing only events with the same
current cpu to receive the event.

NOTE: this requires the use of (per-task-)per-cpu buffers for this
feature to work; perf-record does this.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
[peterz: small edits to Changelog]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrew Vagin <avagin@openvz.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Fixes: e6dab5ffab59 ("perf/trace: Add ability to set a target task for events")
Link: http://lkml.kernel.org/r/20180923161343.GB15054@krava
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index dfb1d951789e..5a97f34bc14c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8314,6 +8314,8 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
 			goto unlock;
 
 		list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+			if (event->cpu != smp_processor_id())
+				continue;
 			if (event->attr.type != PERF_TYPE_TRACEPOINT)
 				continue;
 			if (event->attr.config != entry->type)

From 9d92cfeaf5215158d26d2991be7f7ff865cb98f3 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Fri, 21 Sep 2018 07:07:06 -0700
Subject: [PATCH 401/499] perf/x86/intel/uncore: Fix PCI BDF address of M3UPI
 on SKX

The counters on M3UPI Link 0 and Link 3 don't count properly, and writing
0 to these counters may causes system crash on some machines.

The PCI BDF addresses of the M3UPI in the current code are incorrect.

The correct addresses should be:

  D18:F1	0x204D
  D18:F2	0x204E
  D18:F5	0x204D

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Fixes: cd34cd97b7b4 ("perf/x86/intel/uncore: Add Skylake server uncore support")
Link: http://lkml.kernel.org/r/1537538826-55489-1-git-send-email-kan.liang@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/uncore_snbep.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 53b981dcdb42..c07bee31abe8 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3931,16 +3931,16 @@ static const struct pci_device_id skx_uncore_pci_ids[] = {
 		.driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 5, SKX_PCI_UNCORE_M2PCIE, 3),
 	},
 	{ /* M3UPI0 Link 0 */
-		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C),
-		.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, SKX_PCI_UNCORE_M3UPI, 0),
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 0),
 	},
 	{ /* M3UPI0 Link 1 */
-		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D),
-		.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 1),
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204E),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 2, SKX_PCI_UNCORE_M3UPI, 1),
 	},
 	{ /* M3UPI1 Link 2 */
-		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C),
-		.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 4, SKX_PCI_UNCORE_M3UPI, 2),
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 5, SKX_PCI_UNCORE_M3UPI, 2),
 	},
 	{ /* end: all zeroes */ }
 };

From d7cbbe49a9304520181fb8c9272d1327deec8453 Mon Sep 17 00:00:00 2001
From: "Natarajan, Janakarajan" <Janakarajan.Natarajan@amd.com>
Date: Thu, 27 Sep 2018 15:51:55 +0000
Subject: [PATCH 402/499] perf/x86/amd/uncore: Set ThreadMask and SliceMask for
 L3 Cache perf events

In Family 17h, some L3 Cache Performance events require the ThreadMask
and SliceMask to be set. For other events, these fields do not affect
the count either way.

Set ThreadMask and SliceMask to 0xFF and 0xF respectively.

Signed-off-by: Janakarajan Natarajan <Janakarajan.Natarajan@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H . Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Suravee <Suravee.Suthikulpanit@amd.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/Message-ID:
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/amd/uncore.c      | 10 ++++++++++
 arch/x86/include/asm/perf_event.h |  8 ++++++++
 2 files changed, 18 insertions(+)

diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 981ba5e8241b..8671de126eac 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -36,6 +36,7 @@
 
 static int num_counters_llc;
 static int num_counters_nb;
+static bool l3_mask;
 
 static HLIST_HEAD(uncore_unused_list);
 
@@ -209,6 +210,13 @@ static int amd_uncore_event_init(struct perf_event *event)
 	hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
 	hwc->idx = -1;
 
+	/*
+	 * SliceMask and ThreadMask need to be set for certain L3 events in
+	 * Family 17h. For other events, the two fields do not affect the count.
+	 */
+	if (l3_mask)
+		hwc->config |= (AMD64_L3_SLICE_MASK | AMD64_L3_THREAD_MASK);
+
 	if (event->cpu < 0)
 		return -EINVAL;
 
@@ -525,6 +533,7 @@ static int __init amd_uncore_init(void)
 		amd_llc_pmu.name	  = "amd_l3";
 		format_attr_event_df.show = &event_show_df;
 		format_attr_event_l3.show = &event_show_l3;
+		l3_mask			  = true;
 	} else {
 		num_counters_nb		  = NUM_COUNTERS_NB;
 		num_counters_llc	  = NUM_COUNTERS_L2;
@@ -532,6 +541,7 @@ static int __init amd_uncore_init(void)
 		amd_llc_pmu.name	  = "amd_l2";
 		format_attr_event_df	  = format_attr_event;
 		format_attr_event_l3	  = format_attr_event;
+		l3_mask			  = false;
 	}
 
 	amd_nb_pmu.attr_groups	= amd_uncore_attr_groups_df;
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 12f54082f4c8..78241b736f2a 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -46,6 +46,14 @@
 #define INTEL_ARCH_EVENT_MASK	\
 	(ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT)
 
+#define AMD64_L3_SLICE_SHIFT				48
+#define AMD64_L3_SLICE_MASK				\
+	((0xFULL) << AMD64_L3_SLICE_SHIFT)
+
+#define AMD64_L3_THREAD_SHIFT				56
+#define AMD64_L3_THREAD_MASK				\
+	((0xFFULL) << AMD64_L3_THREAD_SHIFT)
+
 #define X86_RAW_EVENT_MASK		\
 	(ARCH_PERFMON_EVENTSEL_EVENT |	\
 	 ARCH_PERFMON_EVENTSEL_UMASK |	\

From a4739eca4456e3d140cc656c5331d42b7465f91d Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Fri, 21 Sep 2018 23:18:56 +0530
Subject: [PATCH 403/499] sched/numa: Stop multiple tasks from moving to the
 CPU at the same time

Task migration under NUMA balancing can happen in parallel. More than
one task might choose to migrate to the same CPU at the same time. This
can result in:

- During task swap, choosing a task that was not part of the evaluation.
- During task swap, task which just got moved into its preferred node,
  moving to a completely different node.
- During task swap, task failing to move to the preferred node, will have
  to wait an extra interval for the next migrate opportunity.
- During task movement, multiple task movements can cause load imbalance.

This problem is more likely if there are more cores per node or more
nodes in the system.

Use a per run-queue variable to check if NUMA-balance is active on the
run-queue.

Specjbb2005 results (8 warehouses)
Higher bops are better

2 Socket - 2  Node Haswell - X86
JVMS  Prev    Current  %Change
4     200194  203353   1.57797
1     311331  328205   5.41995

2 Socket - 4 Node Power8 - PowerNV
JVMS  Prev    Current  %Change
1     197654  214384   8.46429

2 Socket - 2  Node Power9 - PowerNV
JVMS  Prev    Current  %Change
4     192605  188553   -2.10379
1     213402  196273   -8.02664

4 Socket - 4  Node Power7 - PowerVM
JVMS  Prev     Current  %Change
8     52227.1  57581.2  10.2516
1     102529   103468   0.915838

There is a regression on power 9 box. If we look at the details,
that box has a sudden jump in cache-misses with this patch.
All other parameters seem to be pointing towards NUMA
consolidation.

perf stats 8th warehouse Multi JVM 2 Socket - 2  Node Haswell - X86
Event                     Before          After
cs                        13,345,784      13,941,377
migrations                1,127,820       1,157,323
faults                    374,736         382,175
cache-misses              55,132,054,603  54,993,823,500
sched:sched_move_numa     1,923           2,005
sched:sched_stick_numa    52              14
sched:sched_swap_numa     595             529
migrate:mm_migrate_pages  1,932           1,573

vmstat 8th warehouse Multi JVM 2 Socket - 2  Node Haswell - X86
Event                   Before  After
numa_hint_faults        60605   67099
numa_hint_faults_local  51804   58456
numa_hit                239945  240416
numa_huge_pte_updates   14      18
numa_interleave         60      65
numa_local              239865  240339
numa_other              80      77
numa_pages_migrated     1931    1574
numa_pte_updates        67823   77182

perf stats 8th warehouse Single JVM 2 Socket - 2  Node Haswell - X86
Event                     Before          After
cs                        3,016,467       3,176,453
migrations                37,326          30,238
faults                    115,342         87,869
cache-misses              11,692,155,554  12,544,479,391
sched:sched_move_numa     965             23
sched:sched_stick_numa    8               0
sched:sched_swap_numa     35              6
migrate:mm_migrate_pages  1,168           10

vmstat 8th warehouse Single JVM 2 Socket - 2  Node Haswell - X86
Event                   Before  After
numa_hint_faults        16286   236
numa_hint_faults_local  11863   201
numa_hit                112482  72293
numa_huge_pte_updates   33      0
numa_interleave         20      26
numa_local              112419  72233
numa_other              63      60
numa_pages_migrated     1144    8
numa_pte_updates        32859   0

perf stats 8th warehouse Multi JVM 2 Socket - 2  Node Power9 - PowerNV
Event                     Before       After
cs                        8,629,724    8,478,820
migrations                221,052      171,323
faults                    308,661      307,499
cache-misses              135,574,913  240,353,599
sched:sched_move_numa     147          214
sched:sched_stick_numa    0            0
sched:sched_swap_numa     2            4
migrate:mm_migrate_pages  64           89

vmstat 8th warehouse Multi JVM 2 Socket - 2  Node Power9 - PowerNV
Event                   Before  After
numa_hint_faults        11481   5301
numa_hint_faults_local  10968   4745
numa_hit                89773   92943
numa_huge_pte_updates   0       0
numa_interleave         1116    899
numa_local              89220   92345
numa_other              553     598
numa_pages_migrated     62      88
numa_pte_updates        11694   5505

perf stats 8th warehouse Single JVM 2 Socket - 2  Node Power9 - PowerNV
Event                     Before     After
cs                        2,272,887  2,066,172
migrations                12,206     11,076
faults                    163,704    149,544
cache-misses              4,801,186  10,398,067
sched:sched_move_numa     44         43
sched:sched_stick_numa    0          0
sched:sched_swap_numa     0          0
migrate:mm_migrate_pages  17         6

vmstat 8th warehouse Single JVM 2 Socket - 2  Node Power9 - PowerNV
Event                   Before  After
numa_hint_faults        2261    3552
numa_hint_faults_local  1993    3347
numa_hit                25726   25611
numa_huge_pte_updates   0       0
numa_interleave         239     213
numa_local              25498   25583
numa_other              228     28
numa_pages_migrated     17      6
numa_pte_updates        2266    3535

perf stats 8th warehouse Multi JVM 4 Socket - 4  Node Power7 - PowerVM
Event                     Before           After
cs                        117,980,962      99,358,136
migrations                3,950,220        4,041,607
faults                    736,979          749,653
cache-misses              224,976,072,879  225,562,543,251
sched:sched_move_numa     504              771
sched:sched_stick_numa    50               14
sched:sched_swap_numa     239              204
migrate:mm_migrate_pages  1,260            1,180

vmstat 8th warehouse Multi JVM 4 Socket - 4  Node Power7 - PowerVM
Event                   Before  After
numa_hint_faults        18293   27409
numa_hint_faults_local  11969   20677
numa_hit                240854  239988
numa_huge_pte_updates   0       0
numa_interleave         0       0
numa_local              240851  239983
numa_other              3       5
numa_pages_migrated     1190    1016
numa_pte_updates        18106   27916

perf stats 8th warehouse Single JVM 4 Socket - 4  Node Power7 - PowerVM
Event                     Before          After
cs                        61,053,158      60,899,307
migrations                551,586         544,668
faults                    244,174         270,834
cache-misses              74,326,766,973  74,543,455,635
sched:sched_move_numa     344             735
sched:sched_stick_numa    24              25
sched:sched_swap_numa     140             174
migrate:mm_migrate_pages  568             816

vmstat 8th warehouse Single JVM 4 Socket - 4  Node Power7 - PowerVM
Event                   Before  After
numa_hint_faults        6461    11059
numa_hint_faults_local  2283    4733
numa_hit                35661   41384
numa_huge_pte_updates   0       0
numa_interleave         0       0
numa_local              35661   41383
numa_other              0       1
numa_pages_migrated     568     815
numa_pte_updates        6518    11323

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Rik van Riel <riel@surriel.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Jirka Hladky <jhladky@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1537552141-27815-2-git-send-email-srikar@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c  | 22 ++++++++++++++++++++++
 kernel/sched/sched.h |  1 +
 2 files changed, 23 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f808ddf2a868..3b0b75de1141 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1514,6 +1514,21 @@ struct task_numa_env {
 static void task_numa_assign(struct task_numa_env *env,
 			     struct task_struct *p, long imp)
 {
+	struct rq *rq = cpu_rq(env->dst_cpu);
+
+	/* Bail out if run-queue part of active NUMA balance. */
+	if (xchg(&rq->numa_migrate_on, 1))
+		return;
+
+	/*
+	 * Clear previous best_cpu/rq numa-migrate flag, since task now
+	 * found a better CPU to move/swap.
+	 */
+	if (env->best_cpu != -1) {
+		rq = cpu_rq(env->best_cpu);
+		WRITE_ONCE(rq->numa_migrate_on, 0);
+	}
+
 	if (env->best_task)
 		put_task_struct(env->best_task);
 	if (p)
@@ -1569,6 +1584,9 @@ static void task_numa_compare(struct task_numa_env *env,
 	long moveimp = imp;
 	int dist = env->dist;
 
+	if (READ_ONCE(dst_rq->numa_migrate_on))
+		return;
+
 	rcu_read_lock();
 	cur = task_rcu_dereference(&dst_rq->curr);
 	if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur)))
@@ -1710,6 +1728,7 @@ static int task_numa_migrate(struct task_struct *p)
 		.best_cpu = -1,
 	};
 	struct sched_domain *sd;
+	struct rq *best_rq;
 	unsigned long taskweight, groupweight;
 	int nid, ret, dist;
 	long taskimp, groupimp;
@@ -1811,14 +1830,17 @@ static int task_numa_migrate(struct task_struct *p)
 	 */
 	p->numa_scan_period = task_scan_start(p);
 
+	best_rq = cpu_rq(env.best_cpu);
 	if (env.best_task == NULL) {
 		ret = migrate_task_to(p, env.best_cpu);
+		WRITE_ONCE(best_rq->numa_migrate_on, 0);
 		if (ret != 0)
 			trace_sched_stick_numa(p, env.src_cpu, env.best_cpu);
 		return ret;
 	}
 
 	ret = migrate_swap(p, env.best_task, env.best_cpu, env.src_cpu);
+	WRITE_ONCE(best_rq->numa_migrate_on, 0);
 
 	if (ret != 0)
 		trace_sched_stick_numa(p, env.src_cpu, task_cpu(env.best_task));
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4a2e8cae63c4..0b9161241bda 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -783,6 +783,7 @@ struct rq {
 #ifdef CONFIG_NUMA_BALANCING
 	unsigned int		nr_numa_running;
 	unsigned int		nr_preferred_running;
+	unsigned int		numa_migrate_on;
 #endif
 	#define CPU_LOAD_IDX_MAX 5
 	unsigned long		cpu_load[CPU_LOAD_IDX_MAX];

From 1327237a5978b00bcc665c33046c9bae75da1154 Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Fri, 21 Sep 2018 23:18:57 +0530
Subject: [PATCH 404/499] sched/numa: Pass destination CPU as a parameter to
 migrate_task_rq

This additional parameter (new_cpu) is used later for identifying if
task migration is across nodes.

No functional change.

Specjbb2005 results (8 warehouses)
Higher bops are better

2 Socket - 2  Node Haswell - X86
JVMS  Prev    Current  %Change
4     203353  200668   -1.32036
1     328205  321791   -1.95427

2 Socket - 4 Node Power8 - PowerNV
JVMS  Prev    Current  %Change
1     214384  204848   -4.44809

2 Socket - 2  Node Power9 - PowerNV
JVMS  Prev    Current  %Change
4     188553  188098   -0.241311
1     196273  200351   2.07772

4 Socket - 4  Node Power7 - PowerVM
JVMS  Prev     Current  %Change
8     57581.2  58145.9  0.980702
1     103468   103798   0.318939

Brings out the variance between different specjbb2005 runs.

Some events stats before and after applying the patch.

perf stats 8th warehouse Multi JVM 2 Socket - 2  Node Haswell - X86
Event                     Before          After
cs                        13,941,377      13,912,183
migrations                1,157,323       1,155,931
faults                    382,175         367,139
cache-misses              54,993,823,500  54,240,196,814
sched:sched_move_numa     2,005           1,571
sched:sched_stick_numa    14              9
sched:sched_swap_numa     529             463
migrate:mm_migrate_pages  1,573           703

vmstat 8th warehouse Multi JVM 2 Socket - 2  Node Haswell - X86
Event                   Before  After
numa_hint_faults        67099   50155
numa_hint_faults_local  58456   45264
numa_hit                240416  239652
numa_huge_pte_updates   18      36
numa_interleave         65      68
numa_local              240339  239576
numa_other              77      76
numa_pages_migrated     1574    680
numa_pte_updates        77182   71146

perf stats 8th warehouse Single JVM 2 Socket - 2  Node Haswell - X86
Event                     Before          After
cs                        3,176,453       3,156,720
migrations                30,238          30,354
faults                    87,869          97,261
cache-misses              12,544,479,391  12,400,026,826
sched:sched_move_numa     23              4
sched:sched_stick_numa    0               0
sched:sched_swap_numa     6               1
migrate:mm_migrate_pages  10              20

vmstat 8th warehouse Single JVM 2 Socket - 2  Node Haswell - X86
Event                   Before  After
numa_hint_faults        236     272
numa_hint_faults_local  201     186
numa_hit                72293   71362
numa_huge_pte_updates   0       0
numa_interleave         26      23
numa_local              72233   71299
numa_other              60      63
numa_pages_migrated     8       2
numa_pte_updates        0       0

perf stats 8th warehouse Multi JVM 2 Socket - 2  Node Power9 - PowerNV
Event                     Before       After
cs                        8,478,820    8,606,824
migrations                171,323      155,352
faults                    307,499      301,409
cache-misses              240,353,599  157,759,224
sched:sched_move_numa     214          168
sched:sched_stick_numa    0            0
sched:sched_swap_numa     4            3
migrate:mm_migrate_pages  89           125

vmstat 8th warehouse Multi JVM 2 Socket - 2  Node Power9 - PowerNV
Event                   Before  After
numa_hint_faults        5301    4650
numa_hint_faults_local  4745    3946
numa_hit                92943   90489
numa_huge_pte_updates   0       0
numa_interleave         899     892
numa_local              92345   90034
numa_other              598     455
numa_pages_migrated     88      124
numa_pte_updates        5505    4818

perf stats 8th warehouse Single JVM 2 Socket - 2  Node Power9 - PowerNV
Event                     Before      After
cs                        2,066,172   2,113,167
migrations                11,076      10,533
faults                    149,544     142,727
cache-misses              10,398,067  5,594,192
sched:sched_move_numa     43          10
sched:sched_stick_numa    0           0
sched:sched_swap_numa     0           0
migrate:mm_migrate_pages  6           6

vmstat 8th warehouse Single JVM 2 Socket - 2  Node Power9 - PowerNV
Event                   Before  After
numa_hint_faults        3552    744
numa_hint_faults_local  3347    584
numa_hit                25611   25551
numa_huge_pte_updates   0       0
numa_interleave         213     263
numa_local              25583   25302
numa_other              28      249
numa_pages_migrated     6       6
numa_pte_updates        3535    744

perf stats 8th warehouse Multi JVM 4 Socket - 4  Node Power7 - PowerVM
Event                     Before           After
cs                        99,358,136       101,227,352
migrations                4,041,607        4,151,829
faults                    749,653          745,233
cache-misses              225,562,543,251  224,669,561,766
sched:sched_move_numa     771              617
sched:sched_stick_numa    14               2
sched:sched_swap_numa     204              187
migrate:mm_migrate_pages  1,180            316

vmstat 8th warehouse Multi JVM 4 Socket - 4  Node Power7 - PowerVM
Event                   Before  After
numa_hint_faults        27409   24195
numa_hint_faults_local  20677   21639
numa_hit                239988  238331
numa_huge_pte_updates   0       0
numa_interleave         0       0
numa_local              239983  238331
numa_other              5       0
numa_pages_migrated     1016    204
numa_pte_updates        27916   24561

perf stats 8th warehouse Single JVM 4 Socket - 4  Node Power7 - PowerVM
Event                     Before          After
cs                        60,899,307      62,738,978
migrations                544,668         562,702
faults                    270,834         228,465
cache-misses              74,543,455,635  75,778,067,952
sched:sched_move_numa     735             648
sched:sched_stick_numa    25              13
sched:sched_swap_numa     174             137
migrate:mm_migrate_pages  816             733

vmstat 8th warehouse Single JVM 4 Socket - 4  Node Power7 - PowerVM
Event                   Before  After
numa_hint_faults        11059   10281
numa_hint_faults_local  4733    3242
numa_hit                41384   36338
numa_huge_pte_updates   0       0
numa_interleave         0       0
numa_local              41383   36338
numa_other              1       0
numa_pages_migrated     815     706
numa_pte_updates        11323   10176

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Jirka Hladky <jhladky@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1537552141-27815-3-git-send-email-srikar@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c     | 2 +-
 kernel/sched/deadline.c | 2 +-
 kernel/sched/fair.c     | 2 +-
 kernel/sched/sched.h    | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 625bc9897f62..ad97f3ba5ec5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1167,7 +1167,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 
 	if (task_cpu(p) != new_cpu) {
 		if (p->sched_class->migrate_task_rq)
-			p->sched_class->migrate_task_rq(p);
+			p->sched_class->migrate_task_rq(p, new_cpu);
 		p->se.nr_migrations++;
 		rseq_migrate(p);
 		perf_event_task_migrate(p);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 997ea7b839fa..91e4202b0634 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1607,7 +1607,7 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
 	return cpu;
 }
 
-static void migrate_task_rq_dl(struct task_struct *p)
+static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused)
 {
 	struct rq *rq;
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3b0b75de1141..bc768156239f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6297,7 +6297,7 @@ static void detach_entity_cfs_rq(struct sched_entity *se);
  * cfs_rq_of(p) references at time of call are still valid and identify the
  * previous CPU. The caller guarantees p->pi_lock or task_rq(p)->lock is held.
  */
-static void migrate_task_rq_fair(struct task_struct *p)
+static void migrate_task_rq_fair(struct task_struct *p, int new_cpu __maybe_unused)
 {
 	/*
 	 * As blocked tasks retain absolute vruntime the migration needs to
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0b9161241bda..455fa330de04 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1524,7 +1524,7 @@ struct sched_class {
 
 #ifdef CONFIG_SMP
 	int  (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
-	void (*migrate_task_rq)(struct task_struct *p);
+	void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
 
 	void (*task_woken)(struct rq *this_rq, struct task_struct *task);
 

From 3f9672baaa70fc62765857f13f007feb01f9ad33 Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Fri, 21 Sep 2018 23:18:58 +0530
Subject: [PATCH 405/499] sched/numa: Reset scan rate whenever task moves
 across nodes

Currently task scan rate is reset when NUMA balancer migrates the task
to a different node. If NUMA balancer initiates a swap, reset is only
applicable to the task that initiates the swap. Similarly no scan rate
reset is done if the task is migrated across nodes by traditional load
balancer.

Instead move the scan reset to the migrate_task_rq. This ensures the
task moved out of its preferred node, either gets back to its preferred
node quickly or finds a new preferred node. Doing so, would be fair to
all tasks migrating across nodes.

Specjbb2005 results (8 warehouses)
Higher bops are better

2 Socket - 2  Node Haswell - X86
JVMS  Prev    Current  %Change
4     200668  203370   1.3465
1     321791  328431   2.06345

2 Socket - 4 Node Power8 - PowerNV
JVMS  Prev    Current  %Change
1     204848  206070   0.59654

2 Socket - 2  Node Power9 - PowerNV
JVMS  Prev    Current  %Change
4     188098  188386   0.153112
1     200351  201566   0.606436

4 Socket - 4  Node Power7 - PowerVM
JVMS  Prev     Current  %Change
8     58145.9  59157.4  1.73959
1     103798   105495   1.63491

Some events stats before and after applying the patch.

perf stats 8th warehouse Multi JVM 2 Socket - 2  Node Haswell - X86
Event                     Before          After
cs                        13,912,183      13,825,492
migrations                1,155,931       1,152,509
faults                    367,139         371,948
cache-misses              54,240,196,814  55,654,206,041
sched:sched_move_numa     1,571           1,856
sched:sched_stick_numa    9               4
sched:sched_swap_numa     463             428
migrate:mm_migrate_pages  703             898

vmstat 8th warehouse Multi JVM 2 Socket - 2  Node Haswell - X86
Event                   Before  After
numa_hint_faults        50155   57146
numa_hint_faults_local  45264   51612
numa_hit                239652  238164
numa_huge_pte_updates   36      16
numa_interleave         68      63
numa_local              239576  238085
numa_other              76      79
numa_pages_migrated     680     883
numa_pte_updates        71146   67540

perf stats 8th warehouse Single JVM 2 Socket - 2  Node Haswell - X86
Event                     Before          After
cs                        3,156,720       3,288,525
migrations                30,354          38,652
faults                    97,261          111,678
cache-misses              12,400,026,826  12,111,197,376
sched:sched_move_numa     4               900
sched:sched_stick_numa    0               0
sched:sched_swap_numa     1               5
migrate:mm_migrate_pages  20              714

vmstat 8th warehouse Single JVM 2 Socket - 2  Node Haswell - X86
Event                   Before  After
numa_hint_faults        272     18572
numa_hint_faults_local  186     14850
numa_hit                71362   73197
numa_huge_pte_updates   0       11
numa_interleave         23      25
numa_local              71299   73138
numa_other              63      59
numa_pages_migrated     2       712
numa_pte_updates        0       24021

perf stats 8th warehouse Multi JVM 2 Socket - 2  Node Power9 - PowerNV
Event                     Before       After
cs                        8,606,824    8,451,543
migrations                155,352      202,804
faults                    301,409      310,024
cache-misses              157,759,224  253,522,507
sched:sched_move_numa     168          213
sched:sched_stick_numa    0            0
sched:sched_swap_numa     3            2
migrate:mm_migrate_pages  125          88

vmstat 8th warehouse Multi JVM 2 Socket - 2  Node Power9 - PowerNV
Event                   Before  After
numa_hint_faults        4650    11830
numa_hint_faults_local  3946    11301
numa_hit                90489   90038
numa_huge_pte_updates   0       0
numa_interleave         892     855
numa_local              90034   89796
numa_other              455     242
numa_pages_migrated     124     88
numa_pte_updates        4818    12039

perf stats 8th warehouse Single JVM 2 Socket - 2  Node Power9 - PowerNV
Event                     Before     After
cs                        2,113,167  2,049,153
migrations                10,533     11,405
faults                    142,727    162,309
cache-misses              5,594,192  7,203,343
sched:sched_move_numa     10         22
sched:sched_stick_numa    0          0
sched:sched_swap_numa     0          0
migrate:mm_migrate_pages  6          1

vmstat 8th warehouse Single JVM 2 Socket - 2  Node Power9 - PowerNV
Event                   Before  After
numa_hint_faults        744     1693
numa_hint_faults_local  584     1669
numa_hit                25551   25177
numa_huge_pte_updates   0       0
numa_interleave         263     194
numa_local              25302   24993
numa_other              249     184
numa_pages_migrated     6       1
numa_pte_updates        744     1577

perf stats 8th warehouse Multi JVM 4 Socket - 4  Node Power7 - PowerVM
Event                     Before           After
cs                        101,227,352      94,515,937
migrations                4,151,829        4,203,554
faults                    745,233          832,697
cache-misses              224,669,561,766  226,248,698,331
sched:sched_move_numa     617              1,730
sched:sched_stick_numa    2                14
sched:sched_swap_numa     187              432
migrate:mm_migrate_pages  316              1,398

vmstat 8th warehouse Multi JVM 4 Socket - 4  Node Power7 - PowerVM
Event                   Before  After
numa_hint_faults        24195   80079
numa_hint_faults_local  21639   68620
numa_hit                238331  241187
numa_huge_pte_updates   0       0
numa_interleave         0       0
numa_local              238331  241186
numa_other              0       1
numa_pages_migrated     204     1347
numa_pte_updates        24561   80729

perf stats 8th warehouse Single JVM 4 Socket - 4  Node Power7 - PowerVM
Event                     Before          After
cs                        62,738,978      63,704,961
migrations                562,702         573,404
faults                    228,465         230,878
cache-misses              75,778,067,952  76,568,222,781
sched:sched_move_numa     648             509
sched:sched_stick_numa    13              31
sched:sched_swap_numa     137             182
migrate:mm_migrate_pages  733             541

vmstat 8th warehouse Single JVM 4 Socket - 4  Node Power7 - PowerVM
Event                   Before  After
numa_hint_faults        10281   8501
numa_hint_faults_local  3242    2960
numa_hit                36338   35526
numa_huge_pte_updates   0       0
numa_interleave         0       0
numa_local              36338   35526
numa_other              0       0
numa_pages_migrated     706     539
numa_pte_updates        10176   8433

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Jirka Hladky <jhladky@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1537552141-27815-4-git-send-email-srikar@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bc768156239f..5cbfb3068bc6 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1824,12 +1824,6 @@ static int task_numa_migrate(struct task_struct *p)
 	if (env.best_cpu == -1)
 		return -EAGAIN;
 
-	/*
-	 * Reset the scan period if the task is being rescheduled on an
-	 * alternative node to recheck if the tasks is now properly placed.
-	 */
-	p->numa_scan_period = task_scan_start(p);
-
 	best_rq = cpu_rq(env.best_cpu);
 	if (env.best_task == NULL) {
 		ret = migrate_task_to(p, env.best_cpu);
@@ -2618,6 +2612,18 @@ void task_tick_numa(struct rq *rq, struct task_struct *curr)
 	}
 }
 
+static void update_scan_period(struct task_struct *p, int new_cpu)
+{
+	int src_nid = cpu_to_node(task_cpu(p));
+	int dst_nid = cpu_to_node(new_cpu);
+
+	if (!p->mm || !p->numa_faults || (p->flags & PF_EXITING))
+		return;
+
+	if (src_nid != dst_nid)
+		p->numa_scan_period = task_scan_start(p);
+}
+
 #else
 static void task_tick_numa(struct rq *rq, struct task_struct *curr)
 {
@@ -2631,6 +2637,10 @@ static inline void account_numa_dequeue(struct rq *rq, struct task_struct *p)
 {
 }
 
+static inline void update_scan_period(struct task_struct *p, int new_cpu)
+{
+}
+
 #endif /* CONFIG_NUMA_BALANCING */
 
 static void
@@ -6297,7 +6307,7 @@ static void detach_entity_cfs_rq(struct sched_entity *se);
  * cfs_rq_of(p) references at time of call are still valid and identify the
  * previous CPU. The caller guarantees p->pi_lock or task_rq(p)->lock is held.
  */
-static void migrate_task_rq_fair(struct task_struct *p, int new_cpu __maybe_unused)
+static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
 {
 	/*
 	 * As blocked tasks retain absolute vruntime the migration needs to
@@ -6350,6 +6360,8 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu __maybe_unus
 
 	/* We have migrated, no longer consider this task hot */
 	p->se.exec_start = 0;
+
+	update_scan_period(p, new_cpu);
 }
 
 static void task_dead_fair(struct task_struct *p)

From 05cbdf4f5c191ff378c47bbf66d7230beb725bdb Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 21 Sep 2018 23:18:59 +0530
Subject: [PATCH 406/499] sched/numa: Limit the conditions where scan period is
 reset

migrate_task_rq_fair() resets the scan rate for NUMA balancing on every
cross-node migration. In the event of excessive load balancing due to
saturation, this may result in the scan rate being pegged at maximum and
further overloading the machine.

This patch only resets the scan if NUMA balancing is active, a preferred
node has been selected and the task is being migrated from the preferred
node as these are the most harmful. For example, a migration to the preferred
node does not justify a faster scan rate. Similarly, a migration between two
nodes that are not preferred is probably bouncing due to over-saturation of
the machine.  In that case, scanning faster and trapping more NUMA faults
will further overload the machine.

Specjbb2005 results (8 warehouses)
Higher bops are better

2 Socket - 2  Node Haswell - X86
JVMS  Prev    Current  %Change
4     203370  205332   0.964744
1     328431  319785   -2.63252

2 Socket - 4 Node Power8 - PowerNV
JVMS  Prev    Current  %Change
1     206070  206585   0.249915

2 Socket - 2  Node Power9 - PowerNV
JVMS  Prev    Current  %Change
4     188386  189162   0.41192
1     201566  213760   6.04963

4 Socket - 4  Node Power7 - PowerVM
JVMS  Prev     Current  %Change
8     59157.4  58736.8  -0.710985
1     105495   105419   -0.0720413

Some events stats before and after applying the patch.

perf stats 8th warehouse Multi JVM 2 Socket - 2  Node Haswell - X86
Event                     Before          After
cs                        13,825,492      14,285,708
migrations                1,152,509       1,180,621
faults                    371,948         339,114
cache-misses              55,654,206,041  55,205,631,894
sched:sched_move_numa     1,856           843
sched:sched_stick_numa    4               6
sched:sched_swap_numa     428             219
migrate:mm_migrate_pages  898             365

vmstat 8th warehouse Multi JVM 2 Socket - 2  Node Haswell - X86
Event                   Before  After
numa_hint_faults        57146   26907
numa_hint_faults_local  51612   24279
numa_hit                238164  239771
numa_huge_pte_updates   16      0
numa_interleave         63      68
numa_local              238085  239688
numa_other              79      83
numa_pages_migrated     883     363
numa_pte_updates        67540   27415

perf stats 8th warehouse Single JVM 2 Socket - 2  Node Haswell - X86
Event                     Before          After
cs                        3,288,525       3,202,779
migrations                38,652          37,186
faults                    111,678         106,076
cache-misses              12,111,197,376  12,024,873,744
sched:sched_move_numa     900             931
sched:sched_stick_numa    0               0
sched:sched_swap_numa     5               1
migrate:mm_migrate_pages  714             637

vmstat 8th warehouse Single JVM 2 Socket - 2  Node Haswell - X86
Event                   Before  After
numa_hint_faults        18572   17409
numa_hint_faults_local  14850   14367
numa_hit                73197   73953
numa_huge_pte_updates   11      20
numa_interleave         25      25
numa_local              73138   73892
numa_other              59      61
numa_pages_migrated     712     668
numa_pte_updates        24021   27276

perf stats 8th warehouse Multi JVM 2 Socket - 2  Node Power9 - PowerNV
Event                     Before       After
cs                        8,451,543    8,474,013
migrations                202,804      254,934
faults                    310,024      320,506
cache-misses              253,522,507  110,580,458
sched:sched_move_numa     213          725
sched:sched_stick_numa    0            0
sched:sched_swap_numa     2            7
migrate:mm_migrate_pages  88           145

vmstat 8th warehouse Multi JVM 2 Socket - 2  Node Power9 - PowerNV
Event                   Before  After
numa_hint_faults        11830   22797
numa_hint_faults_local  11301   21539
numa_hit                90038   89308
numa_huge_pte_updates   0       0
numa_interleave         855     865
numa_local              89796   88955
numa_other              242     353
numa_pages_migrated     88      149
numa_pte_updates        12039   22930

perf stats 8th warehouse Single JVM 2 Socket - 2  Node Power9 - PowerNV
Event                     Before     After
cs                        2,049,153  2,195,628
migrations                11,405     11,179
faults                    162,309    149,656
cache-misses              7,203,343  8,117,515
sched:sched_move_numa     22         49
sched:sched_stick_numa    0          0
sched:sched_swap_numa     0          0
migrate:mm_migrate_pages  1          5

vmstat 8th warehouse Single JVM 2 Socket - 2  Node Power9 - PowerNV
Event                   Before  After
numa_hint_faults        1693    3577
numa_hint_faults_local  1669    3476
numa_hit                25177   26142
numa_huge_pte_updates   0       0
numa_interleave         194     358
numa_local              24993   26042
numa_other              184     100
numa_pages_migrated     1       5
numa_pte_updates        1577    3587

perf stats 8th warehouse Multi JVM 4 Socket - 4  Node Power7 - PowerVM
Event                     Before           After
cs                        94,515,937       100,602,296
migrations                4,203,554        4,135,630
faults                    832,697          789,256
cache-misses              226,248,698,331  226,160,621,058
sched:sched_move_numa     1,730            1,366
sched:sched_stick_numa    14               16
sched:sched_swap_numa     432              374
migrate:mm_migrate_pages  1,398            1,350

vmstat 8th warehouse Multi JVM 4 Socket - 4  Node Power7 - PowerVM
Event                   Before  After
numa_hint_faults        80079   47857
numa_hint_faults_local  68620   39768
numa_hit                241187  240165
numa_huge_pte_updates   0       0
numa_interleave         0       0
numa_local              241186  240165
numa_other              1       0
numa_pages_migrated     1347    1224
numa_pte_updates        80729   48354

perf stats 8th warehouse Single JVM 4 Socket - 4  Node Power7 - PowerVM
Event                     Before          After
cs                        63,704,961      58,515,496
migrations                573,404         564,845
faults                    230,878         245,807
cache-misses              76,568,222,781  73,603,757,976
sched:sched_move_numa     509             996
sched:sched_stick_numa    31              10
sched:sched_swap_numa     182             193
migrate:mm_migrate_pages  541             646

vmstat 8th warehouse Single JVM 4 Socket - 4  Node Power7 - PowerVM
Event                   Before  After
numa_hint_faults        8501    13422
numa_hint_faults_local  2960    5619
numa_hit                35526   36118
numa_huge_pte_updates   0       0
numa_interleave         0       0
numa_local              35526   36116
numa_other              0       2
numa_pages_migrated     539     616
numa_pte_updates        8433    13374

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Jirka Hladky <jhladky@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1537552141-27815-5-git-send-email-srikar@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5cbfb3068bc6..3529bf61826b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2617,11 +2617,32 @@ static void update_scan_period(struct task_struct *p, int new_cpu)
 	int src_nid = cpu_to_node(task_cpu(p));
 	int dst_nid = cpu_to_node(new_cpu);
 
+	if (!static_branch_likely(&sched_numa_balancing))
+		return;
+
 	if (!p->mm || !p->numa_faults || (p->flags & PF_EXITING))
 		return;
 
-	if (src_nid != dst_nid)
-		p->numa_scan_period = task_scan_start(p);
+	if (src_nid == dst_nid)
+		return;
+
+	/*
+	 * Allow resets if faults have been trapped before one scan
+	 * has completed. This is most likely due to a new task that
+	 * is pulled cross-node due to wakeups or load balancing.
+	 */
+	if (p->numa_scan_seq) {
+		/*
+		 * Avoid scan adjustments if moving to the preferred
+		 * node or if the task was not previously running on
+		 * the preferred node.
+		 */
+		if (dst_nid == p->numa_preferred_nid ||
+		    (p->numa_preferred_nid != -1 && src_nid != p->numa_preferred_nid))
+			return;
+	}
+
+	p->numa_scan_period = task_scan_start(p);
 }
 
 #else

From 7534612123e0f5d020aba1076a6bb505db0e6bfe Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Fri, 21 Sep 2018 23:19:00 +0530
Subject: [PATCH 407/499] mm/migrate: Use spin_trylock() while resetting rate
 limit

Since this spinlock will only serialize the migrate rate limiting,
convert the spin_lock() to a spin_trylock(). If another thread is updating, this
task can move on.

Specjbb2005 results (8 warehouses)
Higher bops are better

2 Socket - 2  Node Haswell - X86
JVMS  Prev    Current  %Change
4     205332  198512   -3.32145
1     319785  313559   -1.94693

2 Socket - 4 Node Power8 - PowerNV
JVMS  Prev    Current  %Change
8     74912   74761.9  -0.200368
1     206585  214874   4.01239

2 Socket - 2  Node Power9 - PowerNV
JVMS  Prev    Current  %Change
4     189162  180536   -4.56011
1     213760  210281   -1.62753

4 Socket - 4  Node Power7 - PowerVM
JVMS  Prev     Current  %Change
8     58736.8  56511.4  -3.78877
1     105419   104899   -0.49327

Avoiding stretching of window intervals may be the reason for the
regression. Also code now uses READ_ONCE/WRITE_ONCE. That may
also be hurting performance to some extent.

Some events stats before and after applying the patch.

perf stats 8th warehouse Multi JVM 2 Socket - 2  Node Haswell - X86
Event                     Before          After
cs                        14,285,708      13,818,546
migrations                1,180,621       1,149,960
faults                    339,114         385,583
cache-misses              55,205,631,894  55,259,546,768
sched:sched_move_numa     843             2,257
sched:sched_stick_numa    6               9
sched:sched_swap_numa     219             512
migrate:mm_migrate_pages  365             2,225

vmstat 8th warehouse Multi JVM 2 Socket - 2  Node Haswell - X86
Event                   Before  After
numa_hint_faults        26907   72692
numa_hint_faults_local  24279   62270
numa_hit                239771  238762
numa_huge_pte_updates   0       48
numa_interleave         68      75
numa_local              239688  238676
numa_other              83      86
numa_pages_migrated     363     2225
numa_pte_updates        27415   98557

perf stats 8th warehouse Single JVM 2 Socket - 2  Node Haswell - X86
Event                     Before          After
cs                        3,202,779       3,173,490
migrations                37,186          36,966
faults                    106,076         108,776
cache-misses              12,024,873,744  12,200,075,320
sched:sched_move_numa     931             1,264
sched:sched_stick_numa    0               0
sched:sched_swap_numa     1               0
migrate:mm_migrate_pages  637             899

vmstat 8th warehouse Single JVM 2 Socket - 2  Node Haswell - X86
Event                   Before  After
numa_hint_faults        17409   21109
numa_hint_faults_local  14367   17120
numa_hit                73953   72934
numa_huge_pte_updates   20      42
numa_interleave         25      33
numa_local              73892   72866
numa_other              61      68
numa_pages_migrated     668     915
numa_pte_updates        27276   42326

perf stats 8th warehouse Multi JVM 2 Socket - 2  Node Power9 - PowerNV
Event                     Before       After
cs                        8,474,013    8,312,022
migrations                254,934      231,705
faults                    320,506      310,242
cache-misses              110,580,458  402,324,573
sched:sched_move_numa     725          193
sched:sched_stick_numa    0            0
sched:sched_swap_numa     7            3
migrate:mm_migrate_pages  145          93

vmstat 8th warehouse Multi JVM 2 Socket - 2  Node Power9 - PowerNV
Event                   Before  After
numa_hint_faults        22797   11838
numa_hint_faults_local  21539   11216
numa_hit                89308   90689
numa_huge_pte_updates   0       0
numa_interleave         865     1579
numa_local              88955   89634
numa_other              353     1055
numa_pages_migrated     149     92
numa_pte_updates        22930   12109

perf stats 8th warehouse Single JVM 2 Socket - 2  Node Power9 - PowerNV
Event                     Before     After
cs                        2,195,628  2,170,481
migrations                11,179     10,126
faults                    149,656    160,962
cache-misses              8,117,515  10,834,845
sched:sched_move_numa     49         10
sched:sched_stick_numa    0          0
sched:sched_swap_numa     0          0
migrate:mm_migrate_pages  5          2

vmstat 8th warehouse Single JVM 2 Socket - 2  Node Power9 - PowerNV
Event                   Before  After
numa_hint_faults        3577    403
numa_hint_faults_local  3476    358
numa_hit                26142   25898
numa_huge_pte_updates   0       0
numa_interleave         358     207
numa_local              26042   25860
numa_other              100     38
numa_pages_migrated     5       2
numa_pte_updates        3587    400

perf stats 8th warehouse Multi JVM 4 Socket - 4  Node Power7 - PowerVM
Event                     Before           After
cs                        100,602,296      110,339,633
migrations                4,135,630        4,139,812
faults                    789,256          863,622
cache-misses              226,160,621,058  231,838,045,660
sched:sched_move_numa     1,366            2,196
sched:sched_stick_numa    16               33
sched:sched_swap_numa     374              544
migrate:mm_migrate_pages  1,350            2,469

vmstat 8th warehouse Multi JVM 4 Socket - 4  Node Power7 - PowerVM
Event                   Before  After
numa_hint_faults        47857   85748
numa_hint_faults_local  39768   66831
numa_hit                240165  242213
numa_huge_pte_updates   0       0
numa_interleave         0       0
numa_local              240165  242211
numa_other              0       2
numa_pages_migrated     1224    2376
numa_pte_updates        48354   86233

perf stats 8th warehouse Single JVM 4 Socket - 4  Node Power7 - PowerVM
Event                     Before          After
cs                        58,515,496      59,331,057
migrations                564,845         552,019
faults                    245,807         266,586
cache-misses              73,603,757,976  73,796,312,990
sched:sched_move_numa     996             981
sched:sched_stick_numa    10              54
sched:sched_swap_numa     193             286
migrate:mm_migrate_pages  646             713

vmstat 8th warehouse Single JVM 4 Socket - 4  Node Power7 - PowerVM
Event                   Before  After
numa_hint_faults        13422   14807
numa_hint_faults_local  5619    5738
numa_hit                36118   36230
numa_huge_pte_updates   0       0
numa_interleave         0       0
numa_local              36116   36228
numa_other              2       2
numa_pages_migrated     616     703
numa_pte_updates        13374   14742

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Jirka Hladky <jhladky@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1537552141-27815-6-git-send-email-srikar@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 mm/migrate.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index d6a2e89b086a..4f1d894835b5 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1867,16 +1867,24 @@ static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT);
 static bool numamigrate_update_ratelimit(pg_data_t *pgdat,
 					unsigned long nr_pages)
 {
+	unsigned long next_window, interval;
+
+	next_window = READ_ONCE(pgdat->numabalancing_migrate_next_window);
+	interval = msecs_to_jiffies(migrate_interval_millisecs);
+
 	/*
 	 * Rate-limit the amount of data that is being migrated to a node.
 	 * Optimal placement is no good if the memory bus is saturated and
 	 * all the time is being spent migrating!
 	 */
-	if (time_after(jiffies, pgdat->numabalancing_migrate_next_window)) {
-		spin_lock(&pgdat->numabalancing_migrate_lock);
+	if (time_after(jiffies, next_window) &&
+			spin_trylock(&pgdat->numabalancing_migrate_lock)) {
 		pgdat->numabalancing_migrate_nr_pages = 0;
-		pgdat->numabalancing_migrate_next_window = jiffies +
-			msecs_to_jiffies(migrate_interval_millisecs);
+		do {
+			next_window += interval;
+		} while (unlikely(time_after(jiffies, next_window)));
+
+		WRITE_ONCE(pgdat->numabalancing_migrate_next_window, next_window);
 		spin_unlock(&pgdat->numabalancing_migrate_lock);
 	}
 	if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) {

From 6fd98e775f24fd41520928d345f5db3ff52bb35d Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Fri, 21 Sep 2018 23:19:01 +0530
Subject: [PATCH 408/499] sched/numa: Avoid task migration for small NUMA
 improvement

If NUMA improvement from the task migration is going to be very
minimal, then avoid task migration.

Specjbb2005 results (8 warehouses)
Higher bops are better

2 Socket - 2  Node Haswell - X86
JVMS  Prev    Current  %Change
4     198512  205910   3.72673
1     313559  318491   1.57291

2 Socket - 4 Node Power8 - PowerNV
JVMS  Prev     Current  %Change
8     74761.9  74935.9  0.232739
1     214874   226796   5.54837

2 Socket - 2  Node Power9 - PowerNV
JVMS  Prev    Current  %Change
4     180536  189780   5.12031
1     210281  205695   -2.18089

4 Socket - 4  Node Power7 - PowerVM
JVMS  Prev     Current  %Change
8     56511.4  60370    6.828
1     104899   108100   3.05151

1/7 cases is regressing, if we look at events migrate_pages seem
to vary the most especially in the regressing case. Also some
amount of variance is expected between different runs of
Specjbb2005.

Some events stats before and after applying the patch.

perf stats 8th warehouse Multi JVM 2 Socket - 2  Node Haswell - X86
Event                     Before          After
cs                        13,818,546      13,801,554
migrations                1,149,960       1,151,541
faults                    385,583         433,246
cache-misses              55,259,546,768  55,168,691,835
sched:sched_move_numa     2,257           2,551
sched:sched_stick_numa    9               24
sched:sched_swap_numa     512             904
migrate:mm_migrate_pages  2,225           1,571

vmstat 8th warehouse Multi JVM 2 Socket - 2  Node Haswell - X86
Event                   Before  After
numa_hint_faults        72692   113682
numa_hint_faults_local  62270   102163
numa_hit                238762  240181
numa_huge_pte_updates   48      36
numa_interleave         75      64
numa_local              238676  240103
numa_other              86      78
numa_pages_migrated     2225    1564
numa_pte_updates        98557   134080

perf stats 8th warehouse Single JVM 2 Socket - 2  Node Haswell - X86
Event                     Before          After
cs                        3,173,490       3,079,150
migrations                36,966          31,455
faults                    108,776         99,081
cache-misses              12,200,075,320  11,588,126,740
sched:sched_move_numa     1,264           1
sched:sched_stick_numa    0               0
sched:sched_swap_numa     0               0
migrate:mm_migrate_pages  899             36

vmstat 8th warehouse Single JVM 2 Socket - 2  Node Haswell - X86
Event                   Before  After
numa_hint_faults        21109   430
numa_hint_faults_local  17120   77
numa_hit                72934   71277
numa_huge_pte_updates   42      0
numa_interleave         33      22
numa_local              72866   71218
numa_other              68      59
numa_pages_migrated     915     23
numa_pte_updates        42326   0

perf stats 8th warehouse Multi JVM 2 Socket - 2  Node Power9 - PowerNV
Event                     Before       After
cs                        8,312,022    8,707,565
migrations                231,705      171,342
faults                    310,242      310,820
cache-misses              402,324,573  136,115,400
sched:sched_move_numa     193          215
sched:sched_stick_numa    0            6
sched:sched_swap_numa     3            24
migrate:mm_migrate_pages  93           162

vmstat 8th warehouse Multi JVM 2 Socket - 2  Node Power9 - PowerNV
Event                   Before  After
numa_hint_faults        11838   8985
numa_hint_faults_local  11216   8154
numa_hit                90689   93819
numa_huge_pte_updates   0       0
numa_interleave         1579    882
numa_local              89634   93496
numa_other              1055    323
numa_pages_migrated     92      169
numa_pte_updates        12109   9217

perf stats 8th warehouse Single JVM 2 Socket - 2  Node Power9 - PowerNV
Event                     Before      After
cs                        2,170,481   2,152,072
migrations                10,126      10,704
faults                    160,962     164,376
cache-misses              10,834,845  3,818,437
sched:sched_move_numa     10          16
sched:sched_stick_numa    0           0
sched:sched_swap_numa     0           7
migrate:mm_migrate_pages  2           199

vmstat 8th warehouse Single JVM 2 Socket - 2  Node Power9 - PowerNV
Event                   Before  After
numa_hint_faults        403     2248
numa_hint_faults_local  358     1666
numa_hit                25898   25704
numa_huge_pte_updates   0       0
numa_interleave         207     200
numa_local              25860   25679
numa_other              38      25
numa_pages_migrated     2       197
numa_pte_updates        400     2234

perf stats 8th warehouse Multi JVM 4 Socket - 4  Node Power7 - PowerVM
Event                     Before           After
cs                        110,339,633      93,330,595
migrations                4,139,812        4,122,061
faults                    863,622          865,979
cache-misses              231,838,045,660  225,395,083,479
sched:sched_move_numa     2,196            2,372
sched:sched_stick_numa    33               24
sched:sched_swap_numa     544              769
migrate:mm_migrate_pages  2,469            1,677

vmstat 8th warehouse Multi JVM 4 Socket - 4  Node Power7 - PowerVM
Event                   Before  After
numa_hint_faults        85748   91638
numa_hint_faults_local  66831   78096
numa_hit                242213  242225
numa_huge_pte_updates   0       0
numa_interleave         0       2
numa_local              242211  242219
numa_other              2       6
numa_pages_migrated     2376    1515
numa_pte_updates        86233   92274

perf stats 8th warehouse Single JVM 4 Socket - 4  Node Power7 - PowerVM
Event                     Before          After
cs                        59,331,057      51,487,271
migrations                552,019         537,170
faults                    266,586         256,921
cache-misses              73,796,312,990  70,073,831,187
sched:sched_move_numa     981             576
sched:sched_stick_numa    54              24
sched:sched_swap_numa     286             327
migrate:mm_migrate_pages  713             726

vmstat 8th warehouse Single JVM 4 Socket - 4  Node Power7 - PowerVM
Event                   Before  After
numa_hint_faults        14807   12000
numa_hint_faults_local  5738    5024
numa_hit                36230   36470
numa_huge_pte_updates   0       0
numa_interleave         0       0
numa_local              36228   36465
numa_other              2       5
numa_pages_migrated     703     726
numa_pte_updates        14742   11930

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Jirka Hladky <jhladky@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1537552141-27815-7-git-send-email-srikar@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3529bf61826b..25c7c7e09cbd 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1567,6 +1567,13 @@ static bool load_too_imbalanced(long src_load, long dst_load,
 	return (imb > old_imb);
 }
 
+/*
+ * Maximum NUMA importance can be 1998 (2*999);
+ * SMALLIMP @ 30 would be close to 1998/64.
+ * Used to deter task migration.
+ */
+#define SMALLIMP	30
+
 /*
  * This checks if the overall compute and NUMA accesses of the system would
  * be improved if the source tasks was migrated to the target dst_cpu taking
@@ -1600,7 +1607,7 @@ static void task_numa_compare(struct task_numa_env *env,
 		goto unlock;
 
 	if (!cur) {
-		if (maymove || imp > env->best_imp)
+		if (maymove && moveimp >= env->best_imp)
 			goto assign;
 		else
 			goto unlock;
@@ -1643,15 +1650,21 @@ static void task_numa_compare(struct task_numa_env *env,
 			       task_weight(cur, env->dst_nid, dist);
 	}
 
-	if (imp <= env->best_imp)
-		goto unlock;
-
 	if (maymove && moveimp > imp && moveimp > env->best_imp) {
-		imp = moveimp - 1;
+		imp = moveimp;
 		cur = NULL;
 		goto assign;
 	}
 
+	/*
+	 * If the NUMA importance is less than SMALLIMP,
+	 * task migration might only result in ping pong
+	 * of tasks and also hurt performance due to cache
+	 * misses.
+	 */
+	if (imp < SMALLIMP || imp <= env->best_imp + SMALLIMP / 2)
+		goto unlock;
+
 	/*
 	 * In the overloaded case, try and keep the load balanced.
 	 */

From 12d43deb1ee639d01a2a8d2a7a4cc8ad31224475 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Mon, 1 Oct 2018 17:31:17 +0200
Subject: [PATCH 409/499] drm: fix use-after-free read in
 drm_mode_create_lease_ioctl()

fd_install() moves the reference given to it into the file descriptor table
of the current process. If the current process is multithreaded, then
immediately after fd_install(), another thread can close() the file
descriptor and cause the file's resources to be cleaned up.

Since the reference to "lessee" is held by the file, we must not access
"lessee" after the fd_install() call.

As far as I can tell, to reach this codepath, the caller must have an open
file descriptor to a DRI device in master mode. I'm not sure what the
requirements for that are.

Signed-off-by: Jann Horn <jannh@google.com>
Fixes: 62884cd386b8 ("drm: Add four ioctls for managing drm mode object leases [v7]")
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20181001153117.216923-1-jannh@google.com
---
 drivers/gpu/drm/drm_lease.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c
index b54fb78a283c..b82da96ded5c 100644
--- a/drivers/gpu/drm/drm_lease.c
+++ b/drivers/gpu/drm/drm_lease.c
@@ -566,14 +566,14 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev,
 	lessee_priv->is_master = 1;
 	lessee_priv->authenticated = 1;
 
-	/* Hook up the fd */
-	fd_install(fd, lessee_file);
-
 	/* Pass fd back to userspace */
 	DRM_DEBUG_LEASE("Returning fd %d id %d\n", fd, lessee->lessee_id);
 	cl->fd = fd;
 	cl->lessee_id = lessee->lessee_id;
 
+	/* Hook up the fd */
+	fd_install(fd, lessee_file);
+
 	DRM_DEBUG_LEASE("drm_mode_create_lease_ioctl succeeded\n");
 	return 0;
 

From 6d348925b306ab0cc9757a09a8cea6bf288018e4 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 1 Oct 2018 15:28:56 +0100
Subject: [PATCH 410/499] MAINTAINERS: Remove dead path from LOCKING PRIMITIVES
 entry

Since 890658b7ab48 ("locking/mutex: Kill arch specific code"), there
are no mutex header files under arch/, so we can remove the redundant
entry from MAINTAINERS.

Reported-by: Joe Perches <joe@perches.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Cc: Jason Low <jason.low2@hpe.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20181001142856.GC9716@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index a255240d1452..3bd8913b2d78 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8598,7 +8598,6 @@ F:	include/linux/spinlock*.h
 F:	arch/*/include/asm/spinlock*.h
 F:	include/linux/rwlock*.h
 F:	include/linux/mutex*.h
-F:	arch/*/include/asm/mutex*.h
 F:	include/linux/rwsem*.h
 F:	arch/*/include/asm/rwsem.h
 F:	include/linux/seqlock.h

From efaffc5e40aeced0bcb497ed7a0a5b8c14abfcdf Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Mon, 1 Oct 2018 11:05:24 +0100
Subject: [PATCH 411/499] mm, sched/numa: Remove rate-limiting of automatic
 NUMA balancing migration

Rate limiting of page migrations due to automatic NUMA balancing was
introduced to mitigate the worst-case scenario of migrating at high
frequency due to false sharing or slowly ping-ponging between nodes.
Since then, a lot of effort was spent on correctly identifying these
pages and avoiding unnecessary migrations and the safety net may no longer
be required.

Jirka Hladky reported a regression in 4.17 due to a scheduler patch that
avoids spreading STREAM tasks wide prematurely. However, once the task
was properly placed, it delayed migrating the memory due to rate limiting.
Increasing the limit fixed the problem for him.

Currently, the limit is hard-coded and does not account for the real
capabilities of the hardware. Even if an estimate was attempted, it would
not properly account for the number of memory controllers and it could
not account for the amount of bandwidth used for normal accesses. Rather
than fudging, this patch simply eliminates the rate limiting.

However, Jirka reports that a STREAM configuration using multiple
processes achieved similar performance to 4.16. In local tests, this patch
improved performance of STREAM relative to the baseline but it is somewhat
machine-dependent. Most workloads show little or not performance difference
implying that there is not a heavily reliance on the throttling mechanism
and it is safe to remove.

STREAM on 2-socket machine
                         4.19.0-rc5             4.19.0-rc5
                         numab-v1r1       noratelimit-v1r1
MB/sec copy     43298.52 (   0.00%)    44673.38 (   3.18%)
MB/sec scale    30115.06 (   0.00%)    31293.06 (   3.91%)
MB/sec add      32825.12 (   0.00%)    34883.62 (   6.27%)
MB/sec triad    32549.52 (   0.00%)    34906.60 (   7.24%

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Reviewed-by: Rik van Riel <riel@surriel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Jirka Hladky <jhladky@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Linux-MM <linux-mm@kvack.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20181001100525.29789-2-mgorman@techsingularity.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mmzone.h         |  6 ----
 include/trace/events/migrate.h | 27 --------------
 mm/migrate.c                   | 65 ----------------------------------
 mm/page_alloc.c                |  2 --
 4 files changed, 100 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1e22d96734e0..3f4c0b167333 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -671,12 +671,6 @@ typedef struct pglist_data {
 #ifdef CONFIG_NUMA_BALANCING
 	/* Lock serializing the migrate rate limiting window */
 	spinlock_t numabalancing_migrate_lock;
-
-	/* Rate limiting time interval */
-	unsigned long numabalancing_migrate_next_window;
-
-	/* Number of pages migrated during the rate limiting time interval */
-	unsigned long numabalancing_migrate_nr_pages;
 #endif
 	/*
 	 * This is a per-node reserve of pages that are not available
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h
index 711372845945..705b33d1e395 100644
--- a/include/trace/events/migrate.h
+++ b/include/trace/events/migrate.h
@@ -70,33 +70,6 @@ TRACE_EVENT(mm_migrate_pages,
 		__print_symbolic(__entry->mode, MIGRATE_MODE),
 		__print_symbolic(__entry->reason, MIGRATE_REASON))
 );
-
-TRACE_EVENT(mm_numa_migrate_ratelimit,
-
-	TP_PROTO(struct task_struct *p, int dst_nid, unsigned long nr_pages),
-
-	TP_ARGS(p, dst_nid, nr_pages),
-
-	TP_STRUCT__entry(
-		__array(	char,		comm,	TASK_COMM_LEN)
-		__field(	pid_t,		pid)
-		__field(	int,		dst_nid)
-		__field(	unsigned long,	nr_pages)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->dst_nid	= dst_nid;
-		__entry->nr_pages	= nr_pages;
-	),
-
-	TP_printk("comm=%s pid=%d dst_nid=%d nr_pages=%lu",
-		__entry->comm,
-		__entry->pid,
-		__entry->dst_nid,
-		__entry->nr_pages)
-);
 #endif /* _TRACE_MIGRATE_H */
 
 /* This part must be outside protection */
diff --git a/mm/migrate.c b/mm/migrate.c
index 4f1d894835b5..5e285c1249a0 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1855,54 +1855,6 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
 	return newpage;
 }
 
-/*
- * page migration rate limiting control.
- * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs
- * window of time. Default here says do not migrate more than 1280M per second.
- */
-static unsigned int migrate_interval_millisecs __read_mostly = 100;
-static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT);
-
-/* Returns true if the node is migrate rate-limited after the update */
-static bool numamigrate_update_ratelimit(pg_data_t *pgdat,
-					unsigned long nr_pages)
-{
-	unsigned long next_window, interval;
-
-	next_window = READ_ONCE(pgdat->numabalancing_migrate_next_window);
-	interval = msecs_to_jiffies(migrate_interval_millisecs);
-
-	/*
-	 * Rate-limit the amount of data that is being migrated to a node.
-	 * Optimal placement is no good if the memory bus is saturated and
-	 * all the time is being spent migrating!
-	 */
-	if (time_after(jiffies, next_window) &&
-			spin_trylock(&pgdat->numabalancing_migrate_lock)) {
-		pgdat->numabalancing_migrate_nr_pages = 0;
-		do {
-			next_window += interval;
-		} while (unlikely(time_after(jiffies, next_window)));
-
-		WRITE_ONCE(pgdat->numabalancing_migrate_next_window, next_window);
-		spin_unlock(&pgdat->numabalancing_migrate_lock);
-	}
-	if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) {
-		trace_mm_numa_migrate_ratelimit(current, pgdat->node_id,
-								nr_pages);
-		return true;
-	}
-
-	/*
-	 * This is an unlocked non-atomic update so errors are possible.
-	 * The consequences are failing to migrate when we potentiall should
-	 * have which is not severe enough to warrant locking. If it is ever
-	 * a problem, it can be converted to a per-cpu counter.
-	 */
-	pgdat->numabalancing_migrate_nr_pages += nr_pages;
-	return false;
-}
-
 static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
 {
 	int page_lru;
@@ -1975,14 +1927,6 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
 	if (page_is_file_cache(page) && PageDirty(page))
 		goto out;
 
-	/*
-	 * Rate-limit the amount of data that is being migrated to a node.
-	 * Optimal placement is no good if the memory bus is saturated and
-	 * all the time is being spent migrating!
-	 */
-	if (numamigrate_update_ratelimit(pgdat, 1))
-		goto out;
-
 	isolated = numamigrate_isolate_page(pgdat, page);
 	if (!isolated)
 		goto out;
@@ -2029,14 +1973,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 	unsigned long mmun_start = address & HPAGE_PMD_MASK;
 	unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
 
-	/*
-	 * Rate-limit the amount of data that is being migrated to a node.
-	 * Optimal placement is no good if the memory bus is saturated and
-	 * all the time is being spent migrating!
-	 */
-	if (numamigrate_update_ratelimit(pgdat, HPAGE_PMD_NR))
-		goto out_dropref;
-
 	new_page = alloc_pages_node(node,
 		(GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
 		HPAGE_PMD_ORDER);
@@ -2133,7 +2069,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 
 out_fail:
 	count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
-out_dropref:
 	ptl = pmd_lock(mm, pmd);
 	if (pmd_same(*pmd, entry)) {
 		entry = pmd_modify(entry, vma->vm_page_prot);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 89d2a2ab3fe6..706a738c0aee 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6197,8 +6197,6 @@ static unsigned long __init calc_memmap_size(unsigned long spanned_pages,
 static void pgdat_init_numabalancing(struct pglist_data *pgdat)
 {
 	spin_lock_init(&pgdat->numabalancing_migrate_lock);
-	pgdat->numabalancing_migrate_nr_pages = 0;
-	pgdat->numabalancing_migrate_next_window = jiffies;
 }
 #else
 static void pgdat_init_numabalancing(struct pglist_data *pgdat) {}

From 37355bdc5a129899f6b245900a8eb944a092f7fd Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Mon, 1 Oct 2018 11:05:25 +0100
Subject: [PATCH 412/499] sched/numa: Migrate pages to local nodes quicker
 early in the lifetime of a task

Automatic NUMA Balancing uses a multi-stage pass to decide whether a page
should migrate to a local node. This filter avoids excessive ping-ponging
if a page is shared or used by threads that migrate cross-node frequently.

Threads inherit both page tables and the preferred node ID from the
parent. This means that threads can trigger hinting faults earlier than
a new task which delays scanning for a number of seconds. As it can be
load balanced very early in its lifetime there can be an unnecessary delay
before it starts migrating thread-local data. This patch migrates private
pages faster early in the lifetime of a thread using the sequence counter
as an identifier of new tasks.

With this patch applied, STREAM performance is the same as 4.17 even though
processes are not spread cross-node prematurely. Other workloads showed
a mix of minor gains and losses. This is somewhat expected most workloads
are not very sensitive to the starting conditions of a process.

                         4.19.0-rc5             4.19.0-rc5                 4.17.0
                         numab-v1r1       fastmigrate-v1r1                vanilla
MB/sec copy     43298.52 (   0.00%)    47335.46 (   9.32%)    47219.24 (   9.06%)
MB/sec scale    30115.06 (   0.00%)    32568.12 (   8.15%)    32527.56 (   8.01%)
MB/sec add      32825.12 (   0.00%)    36078.94 (   9.91%)    35928.02 (   9.45%)
MB/sec triad    32549.52 (   0.00%)    35935.94 (  10.40%)    35969.88 (  10.51%)

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Reviewed-by: Rik van Riel <riel@surriel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Jirka Hladky <jhladky@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Linux-MM <linux-mm@kvack.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20181001100525.29789-3-mgorman@techsingularity.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 25c7c7e09cbd..7fc4a371bdd2 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1392,6 +1392,17 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
 	int last_cpupid, this_cpupid;
 
 	this_cpupid = cpu_pid_to_cpupid(dst_cpu, current->pid);
+	last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
+
+	/*
+	 * Allow first faults or private faults to migrate immediately early in
+	 * the lifetime of a task. The magic number 4 is based on waiting for
+	 * two full passes of the "multi-stage node selection" test that is
+	 * executed below.
+	 */
+	if ((p->numa_preferred_nid == -1 || p->numa_scan_seq <= 4) &&
+	    (cpupid_pid_unset(last_cpupid) || cpupid_match_pid(p, last_cpupid)))
+		return true;
 
 	/*
 	 * Multi-stage node selection is used in conjunction with a periodic
@@ -1410,7 +1421,6 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
 	 * This quadric squishes small probabilities, making it less likely we
 	 * act on an unlikely task<->page relation.
 	 */
-	last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
 	if (!cpupid_pid_unset(last_cpupid) &&
 				cpupid_to_nid(last_cpupid) != dst_nid)
 		return false;

From 4d4c2d89913e2d891bd6a34b12050a2576e60525 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= <noralf@tronnes.org>
Date: Mon, 1 Oct 2018 21:45:36 +0200
Subject: [PATCH 413/499] drm/cma-helper: Fix crash in fbdev error path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sergey Suloev reported a crash happening in drm_client_dev_hotplug()
when fbdev had failed to register.

[    9.124598] vc4_hdmi 3f902000.hdmi: ASoC: Failed to create component debugfs directory
[    9.147667] vc4_hdmi 3f902000.hdmi: vc4-hdmi-hifi <-> 3f902000.hdmi mapping ok
[    9.155184] vc4_hdmi 3f902000.hdmi: ASoC: no DMI vendor name!
[    9.166544] vc4-drm soc:gpu: bound 3f902000.hdmi (ops vc4_hdmi_ops [vc4])
[    9.173840] vc4-drm soc:gpu: bound 3f806000.vec (ops vc4_vec_ops [vc4])
[    9.181029] vc4-drm soc:gpu: bound 3f004000.txp (ops vc4_txp_ops [vc4])
[    9.188519] vc4-drm soc:gpu: bound 3f400000.hvs (ops vc4_hvs_ops [vc4])
[    9.195690] vc4-drm soc:gpu: bound 3f206000.pixelvalve (ops vc4_crtc_ops [vc4])
[    9.203523] vc4-drm soc:gpu: bound 3f207000.pixelvalve (ops vc4_crtc_ops [vc4])
[    9.215032] vc4-drm soc:gpu: bound 3f807000.pixelvalve (ops vc4_crtc_ops [vc4])
[    9.274785] vc4-drm soc:gpu: bound 3fc00000.v3d (ops vc4_v3d_ops [vc4])
[    9.290246] [drm] Initialized vc4 0.0.0 20140616 for soc:gpu on minor 0
[    9.297464] [drm] Supports vblank timestamp caching Rev 2 (21.10.2013).
[    9.304600] [drm] Driver supports precise vblank timestamp query.
[    9.382856] vc4-drm soc:gpu: [drm:drm_fb_helper_fbdev_setup [drm_kms_helper]] *ERROR* Failed to set fbdev configuration
[   10.404937] Unable to handle kernel paging request at virtual address 00330a656369768a
[   10.441620] [00330a656369768a] address between user and kernel address ranges
[   10.449087] Internal error: Oops: 96000004 [#1] PREEMPT SMP
[   10.454762] Modules linked in: brcmfmac vc4 drm_kms_helper cfg80211 drm rfkill smsc95xx brcmutil usbnet drm_panel_orientation_quirks raspberrypi_hwmon bcm2835_dma crc32_ce pwm_bcm2835 bcm2835_rng virt_dma rng_core i2c_bcm2835 ip_tables x_tables ipv6
[   10.477296] CPU: 2 PID: 45 Comm: kworker/2:1 Not tainted 4.19.0-rc5 #3
[   10.483934] Hardware name: Raspberry Pi 3 Model B Rev 1.2 (DT)
[   10.489966] Workqueue: events output_poll_execute [drm_kms_helper]
[   10.596515] Process kworker/2:1 (pid: 45, stack limit = 0x000000007e8924dc)
[   10.603590] Call trace:
[   10.606259]  drm_client_dev_hotplug+0x5c/0xb0 [drm]
[   10.611303]  drm_kms_helper_hotplug_event+0x30/0x40 [drm_kms_helper]
[   10.617849]  output_poll_execute+0xc4/0x1e0 [drm_kms_helper]
[   10.623616]  process_one_work+0x1c8/0x318
[   10.627695]  worker_thread+0x48/0x428
[   10.631420]  kthread+0xf8/0x128
[   10.634615]  ret_from_fork+0x10/0x18
[   10.638255] Code: 54000220 f9401261 aa1303e0 b4000141 (f9400c21)
[   10.644456] ---[ end trace c75b4a4b0e141908 ]---

The reason for this is that drm_fbdev_cma_init() removes the drm_client
when fbdev registration fails, but it doesn't remove the client from the
drm_device client list. So the client list now has a pointer that points
into the unknown and we have a 'use after free' situation.

Split drm_client_new() into drm_client_init() and drm_client_add() to fix
removal in the error path.

Fixes: 894a677f4b3e ("drm/cma-helper: Use the generic fbdev emulation")
Reported-by: Sergey Suloev <ssuloev@orpaltech.com>
Cc: Stefan Wahren <stefan.wahren@i2se.com>
Cc: Eric Anholt <eric@anholt.net>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Noralf Trønnes <noralf@tronnes.org>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20181001194536.57756-1-noralf@tronnes.org
---
 drivers/gpu/drm/drm_client.c        | 35 +++++++++++++++++++++--------
 drivers/gpu/drm/drm_fb_cma_helper.c |  4 +++-
 drivers/gpu/drm/drm_fb_helper.c     |  4 +++-
 include/drm/drm_client.h            |  5 +++--
 4 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c
index baff50a4c234..df31c3815092 100644
--- a/drivers/gpu/drm/drm_client.c
+++ b/drivers/gpu/drm/drm_client.c
@@ -63,20 +63,21 @@ static void drm_client_close(struct drm_client_dev *client)
 EXPORT_SYMBOL(drm_client_close);
 
 /**
- * drm_client_new - Create a DRM client
+ * drm_client_init - Initialise a DRM client
  * @dev: DRM device
  * @client: DRM client
  * @name: Client name
  * @funcs: DRM client functions (optional)
  *
+ * This initialises the client and opens a &drm_file. Use drm_client_add() to complete the process.
  * The caller needs to hold a reference on @dev before calling this function.
  * The client is freed when the &drm_device is unregistered. See drm_client_release().
  *
  * Returns:
  * Zero on success or negative error code on failure.
  */
-int drm_client_new(struct drm_device *dev, struct drm_client_dev *client,
-		   const char *name, const struct drm_client_funcs *funcs)
+int drm_client_init(struct drm_device *dev, struct drm_client_dev *client,
+		    const char *name, const struct drm_client_funcs *funcs)
 {
 	int ret;
 
@@ -95,10 +96,6 @@ int drm_client_new(struct drm_device *dev, struct drm_client_dev *client,
 	if (ret)
 		goto err_put_module;
 
-	mutex_lock(&dev->clientlist_mutex);
-	list_add(&client->list, &dev->clientlist);
-	mutex_unlock(&dev->clientlist_mutex);
-
 	drm_dev_get(dev);
 
 	return 0;
@@ -109,13 +106,33 @@ int drm_client_new(struct drm_device *dev, struct drm_client_dev *client,
 
 	return ret;
 }
-EXPORT_SYMBOL(drm_client_new);
+EXPORT_SYMBOL(drm_client_init);
+
+/**
+ * drm_client_add - Add client to the device list
+ * @client: DRM client
+ *
+ * Add the client to the &drm_device client list to activate its callbacks.
+ * @client must be initialized by a call to drm_client_init(). After
+ * drm_client_add() it is no longer permissible to call drm_client_release()
+ * directly (outside the unregister callback), instead cleanup will happen
+ * automatically on driver unload.
+ */
+void drm_client_add(struct drm_client_dev *client)
+{
+	struct drm_device *dev = client->dev;
+
+	mutex_lock(&dev->clientlist_mutex);
+	list_add(&client->list, &dev->clientlist);
+	mutex_unlock(&dev->clientlist_mutex);
+}
+EXPORT_SYMBOL(drm_client_add);
 
 /**
  * drm_client_release - Release DRM client resources
  * @client: DRM client
  *
- * Releases resources by closing the &drm_file that was opened by drm_client_new().
+ * Releases resources by closing the &drm_file that was opened by drm_client_init().
  * It is called automatically if the &drm_client_funcs.unregister callback is _not_ set.
  *
  * This function should only be called from the unregister callback. An exception
diff --git a/drivers/gpu/drm/drm_fb_cma_helper.c b/drivers/gpu/drm/drm_fb_cma_helper.c
index 9da36a6271d3..9ac1f2e0f064 100644
--- a/drivers/gpu/drm/drm_fb_cma_helper.c
+++ b/drivers/gpu/drm/drm_fb_cma_helper.c
@@ -160,7 +160,7 @@ struct drm_fbdev_cma *drm_fbdev_cma_init(struct drm_device *dev,
 
 	fb_helper = &fbdev_cma->fb_helper;
 
-	ret = drm_client_new(dev, &fb_helper->client, "fbdev", NULL);
+	ret = drm_client_init(dev, &fb_helper->client, "fbdev", NULL);
 	if (ret)
 		goto err_free;
 
@@ -169,6 +169,8 @@ struct drm_fbdev_cma *drm_fbdev_cma_init(struct drm_device *dev,
 	if (ret)
 		goto err_client_put;
 
+	drm_client_add(&fb_helper->client);
+
 	return fbdev_cma;
 
 err_client_put:
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 16ec93b75dbf..515a7aec57ac 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -3218,12 +3218,14 @@ int drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp)
 	if (!fb_helper)
 		return -ENOMEM;
 
-	ret = drm_client_new(dev, &fb_helper->client, "fbdev", &drm_fbdev_client_funcs);
+	ret = drm_client_init(dev, &fb_helper->client, "fbdev", &drm_fbdev_client_funcs);
 	if (ret) {
 		kfree(fb_helper);
 		return ret;
 	}
 
+	drm_client_add(&fb_helper->client);
+
 	fb_helper->preferred_bpp = preferred_bpp;
 
 	drm_fbdev_client_hotplug(&fb_helper->client);
diff --git a/include/drm/drm_client.h b/include/drm/drm_client.h
index 989f8e52864d..971bb7853776 100644
--- a/include/drm/drm_client.h
+++ b/include/drm/drm_client.h
@@ -87,9 +87,10 @@ struct drm_client_dev {
 	struct drm_file *file;
 };
 
-int drm_client_new(struct drm_device *dev, struct drm_client_dev *client,
-		   const char *name, const struct drm_client_funcs *funcs);
+int drm_client_init(struct drm_device *dev, struct drm_client_dev *client,
+		    const char *name, const struct drm_client_funcs *funcs);
 void drm_client_release(struct drm_client_dev *client);
+void drm_client_add(struct drm_client_dev *client);
 
 void drm_client_dev_unregister(struct drm_device *dev);
 void drm_client_dev_hotplug(struct drm_device *dev);

From b0584ea66d73919cbf5878a3420a837f06ab8396 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Tue, 2 Oct 2018 02:41:53 +0000
Subject: [PATCH 414/499] bpf: don't accept cgroup local storage with zero
 value size

Explicitly forbid creating cgroup local storage maps with zero value
size, as it makes no sense and might even cause a panic.

Reported-by: syzbot+18628320d3b14a5c459c@syzkaller.appspotmail.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/local_storage.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 94126cbffc88..830d7f095748 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -195,6 +195,9 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
 	if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
 		return ERR_PTR(-EINVAL);
 
+	if (attr->value_size == 0)
+		return ERR_PTR(-EINVAL);
+
 	if (attr->value_size > PAGE_SIZE)
 		return ERR_PTR(-E2BIG);
 

From b45ba4a51cde29b2939365ef0c07ad34c8321789 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Mon, 1 Oct 2018 12:21:10 +0000
Subject: [PATCH 415/499] powerpc/lib: fix book3s/32 boot failure due to code
 patching

Commit 51c3c62b58b3 ("powerpc: Avoid code patching freed init
sections") accesses 'init_mem_is_free' flag too early, before the
kernel is relocated. This provokes early boot failure (before the
console is active).

As it is not necessary to do this verification that early, this
patch moves the test into patch_instruction() instead of
__patch_instruction().

This modification also has the advantage of avoiding unnecessary
remappings.

Fixes: 51c3c62b58b3 ("powerpc: Avoid code patching freed init sections")
Cc: stable@vger.kernel.org # 4.13+
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/lib/code-patching.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 6ae2777c220d..5ffee298745f 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -28,12 +28,6 @@ static int __patch_instruction(unsigned int *exec_addr, unsigned int instr,
 {
 	int err;
 
-	/* Make sure we aren't patching a freed init section */
-	if (init_mem_is_free && init_section_contains(exec_addr, 4)) {
-		pr_debug("Skipping init section patching addr: 0x%px\n", exec_addr);
-		return 0;
-	}
-
 	__put_user_size(instr, patch_addr, 4, err);
 	if (err)
 		return err;
@@ -148,7 +142,7 @@ static inline int unmap_patch_area(unsigned long addr)
 	return 0;
 }
 
-int patch_instruction(unsigned int *addr, unsigned int instr)
+static int do_patch_instruction(unsigned int *addr, unsigned int instr)
 {
 	int err;
 	unsigned int *patch_addr = NULL;
@@ -188,12 +182,22 @@ int patch_instruction(unsigned int *addr, unsigned int instr)
 }
 #else /* !CONFIG_STRICT_KERNEL_RWX */
 
-int patch_instruction(unsigned int *addr, unsigned int instr)
+static int do_patch_instruction(unsigned int *addr, unsigned int instr)
 {
 	return raw_patch_instruction(addr, instr);
 }
 
 #endif /* CONFIG_STRICT_KERNEL_RWX */
+
+int patch_instruction(unsigned int *addr, unsigned int instr)
+{
+	/* Make sure we aren't patching a freed init section */
+	if (init_mem_is_free && init_section_contains(addr, 4)) {
+		pr_debug("Skipping init section patching addr: 0x%px\n", addr);
+		return 0;
+	}
+	return do_patch_instruction(addr, instr);
+}
 NOKPROBE_SYMBOL(patch_instruction);
 
 int patch_branch(unsigned int *addr, unsigned long target, int flags)

From 86da809dda64a63fc27e05a215475325c3aaae92 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 24 Sep 2018 13:20:44 +0300
Subject: [PATCH 416/499] thunderbolt: Do not handle ICM events after domain is
 stopped

If there is a long chain of devices connected when the driver is loaded
ICM sends device connected event for each and those are put to tb->wq
for later processing. Now if the driver gets unloaded in the middle, so
that the work queue is not yet empty it gets flushed by tb_domain_stop().
However, by that time the root switch is already removed so the driver
crashes when it tries to dereference it in ICM event handling callbacks.

Fix this by checking whether the root switch is already removed. If it
is we know that the domain is stopped and we should merely skip handling
the event.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/thunderbolt/icm.c | 49 ++++++++++++++++-----------------------
 1 file changed, 20 insertions(+), 29 deletions(-)

diff --git a/drivers/thunderbolt/icm.c b/drivers/thunderbolt/icm.c
index e1e264a9a4c7..28fc4ce75edb 100644
--- a/drivers/thunderbolt/icm.c
+++ b/drivers/thunderbolt/icm.c
@@ -738,14 +738,6 @@ icm_fr_xdomain_connected(struct tb *tb, const struct icm_pkg_header *hdr)
 	u8 link, depth;
 	u64 route;
 
-	/*
-	 * After NVM upgrade adding root switch device fails because we
-	 * initiated reset. During that time ICM might still send
-	 * XDomain connected message which we ignore here.
-	 */
-	if (!tb->root_switch)
-		return;
-
 	link = pkg->link_info & ICM_LINK_INFO_LINK_MASK;
 	depth = (pkg->link_info & ICM_LINK_INFO_DEPTH_MASK) >>
 		ICM_LINK_INFO_DEPTH_SHIFT;
@@ -1037,14 +1029,6 @@ icm_tr_device_connected(struct tb *tb, const struct icm_pkg_header *hdr)
 	if (pkg->hdr.packet_id)
 		return;
 
-	/*
-	 * After NVM upgrade adding root switch device fails because we
-	 * initiated reset. During that time ICM might still send device
-	 * connected message which we ignore here.
-	 */
-	if (!tb->root_switch)
-		return;
-
 	route = get_route(pkg->route_hi, pkg->route_lo);
 	authorized = pkg->link_info & ICM_LINK_INFO_APPROVED;
 	security_level = (pkg->hdr.flags & ICM_FLAGS_SLEVEL_MASK) >>
@@ -1408,19 +1392,26 @@ static void icm_handle_notification(struct work_struct *work)
 
 	mutex_lock(&tb->lock);
 
-	switch (n->pkg->code) {
-	case ICM_EVENT_DEVICE_CONNECTED:
-		icm->device_connected(tb, n->pkg);
-		break;
-	case ICM_EVENT_DEVICE_DISCONNECTED:
-		icm->device_disconnected(tb, n->pkg);
-		break;
-	case ICM_EVENT_XDOMAIN_CONNECTED:
-		icm->xdomain_connected(tb, n->pkg);
-		break;
-	case ICM_EVENT_XDOMAIN_DISCONNECTED:
-		icm->xdomain_disconnected(tb, n->pkg);
-		break;
+	/*
+	 * When the domain is stopped we flush its workqueue but before
+	 * that the root switch is removed. In that case we should treat
+	 * the queued events as being canceled.
+	 */
+	if (tb->root_switch) {
+		switch (n->pkg->code) {
+		case ICM_EVENT_DEVICE_CONNECTED:
+			icm->device_connected(tb, n->pkg);
+			break;
+		case ICM_EVENT_DEVICE_DISCONNECTED:
+			icm->device_disconnected(tb, n->pkg);
+			break;
+		case ICM_EVENT_XDOMAIN_CONNECTED:
+			icm->xdomain_connected(tb, n->pkg);
+			break;
+		case ICM_EVENT_XDOMAIN_DISCONNECTED:
+			icm->xdomain_disconnected(tb, n->pkg);
+			break;
+		}
 	}
 
 	mutex_unlock(&tb->lock);

From eafa717bc145963c944bb0a64d16add683861b35 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 24 Sep 2018 13:20:45 +0300
Subject: [PATCH 417/499] thunderbolt: Initialize after IOMMUs

If IOMMU is enabled and Thunderbolt driver is built into the kernel
image, it will be probed before IOMMUs are attached to the PCI bus.
Because of this DMA mappings the driver does will not go through IOMMU
and start failing right after IOMMUs are enabled.

For this reason move the Thunderbolt driver initialization happen at
rootfs level.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/thunderbolt/nhi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c
index 88cff05a1808..5cd6bdfa068f 100644
--- a/drivers/thunderbolt/nhi.c
+++ b/drivers/thunderbolt/nhi.c
@@ -1191,5 +1191,5 @@ static void __exit nhi_unload(void)
 	tb_domain_exit();
 }
 
-fs_initcall(nhi_init);
+rootfs_initcall(nhi_init);
 module_exit(nhi_unload);

From 657ade07df72847f591ccdb36bd9b91ed0edbac3 Mon Sep 17 00:00:00 2001
From: Rickard x Andersson <rickaran@axis.com>
Date: Tue, 2 Oct 2018 14:49:32 +0200
Subject: [PATCH 418/499] net: fec: fix rare tx timeout

During certain heavy network loads TX could time out
with TX ring dump.
TX is sometimes never restarted after reaching
"tx_stop_threshold" because function "fec_enet_tx_queue"
only tests the first queue.

In addition the TX timeout callback function failed to
recover because it also operated only on the first queue.

Signed-off-by: Rickard x Andersson <rickaran@axis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec_main.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 2708297e7795..bf9b9fd6d2a0 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1158,7 +1158,7 @@ static void fec_enet_timeout_work(struct work_struct *work)
 		napi_disable(&fep->napi);
 		netif_tx_lock_bh(ndev);
 		fec_restart(ndev);
-		netif_wake_queue(ndev);
+		netif_tx_wake_all_queues(ndev);
 		netif_tx_unlock_bh(ndev);
 		napi_enable(&fep->napi);
 	}
@@ -1273,7 +1273,7 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
 
 		/* Since we have freed up a buffer, the ring is no longer full
 		 */
-		if (netif_queue_stopped(ndev)) {
+		if (netif_tx_queue_stopped(nq)) {
 			entries_free = fec_enet_get_free_txdesc_num(txq);
 			if (entries_free >= txq->tx_wake_threshold)
 				netif_tx_wake_queue(nq);
@@ -1746,7 +1746,7 @@ static void fec_enet_adjust_link(struct net_device *ndev)
 			napi_disable(&fep->napi);
 			netif_tx_lock_bh(ndev);
 			fec_restart(ndev);
-			netif_wake_queue(ndev);
+			netif_tx_wake_all_queues(ndev);
 			netif_tx_unlock_bh(ndev);
 			napi_enable(&fep->napi);
 		}
@@ -2247,7 +2247,7 @@ static int fec_enet_set_pauseparam(struct net_device *ndev,
 		napi_disable(&fep->napi);
 		netif_tx_lock_bh(ndev);
 		fec_restart(ndev);
-		netif_wake_queue(ndev);
+		netif_tx_wake_all_queues(ndev);
 		netif_tx_unlock_bh(ndev);
 		napi_enable(&fep->napi);
 	}

From fe3a83af6a50199bf250fa331e94216912f79395 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Tue, 2 Oct 2018 14:23:45 +0100
Subject: [PATCH 419/499] declance: Fix continuation with the adapter
 identification message

Fix a commit 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing
continuation lines") regression with the `declance' driver, which caused
the adapter identification message to be split between two lines, e.g.:

declance.c: v0.011 by Linux MIPS DECstation task force
tc6: PMAD-AA
, addr = 08:00:2b:1b:2a:6a, irq = 14
tc6: registered as eth0.

Address that properly, by printing identification with a single call,
making the messages now look like:

declance.c: v0.011 by Linux MIPS DECstation task force
tc6: PMAD-AA, addr = 08:00:2b:1b:2a:6a, irq = 14
tc6: registered as eth0.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Fixes: 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/declance.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c
index 116997a8b593..00332a1ea84b 100644
--- a/drivers/net/ethernet/amd/declance.c
+++ b/drivers/net/ethernet/amd/declance.c
@@ -1031,6 +1031,7 @@ static int dec_lance_probe(struct device *bdev, const int type)
 	int i, ret;
 	unsigned long esar_base;
 	unsigned char *esar;
+	const char *desc;
 
 	if (dec_lance_debug && version_printed++ == 0)
 		printk(version);
@@ -1216,19 +1217,20 @@ static int dec_lance_probe(struct device *bdev, const int type)
 	 */
 	switch (type) {
 	case ASIC_LANCE:
-		printk("%s: IOASIC onboard LANCE", name);
+		desc = "IOASIC onboard LANCE";
 		break;
 	case PMAD_LANCE:
-		printk("%s: PMAD-AA", name);
+		desc = "PMAD-AA";
 		break;
 	case PMAX_LANCE:
-		printk("%s: PMAX onboard LANCE", name);
+		desc = "PMAX onboard LANCE";
 		break;
 	}
 	for (i = 0; i < 6; i++)
 		dev->dev_addr[i] = esar[i * 4];
 
-	printk(", addr = %pM, irq = %d\n", dev->dev_addr, dev->irq);
+	printk("%s: %s, addr = %pM, irq = %d\n",
+	       name, desc, dev->dev_addr, dev->irq);
 
 	dev->netdev_ops = &lance_netdev_ops;
 	dev->watchdog_timeo = 5*HZ;

From ff58e2df62ce29d0552278c290ae494b30fe0c6f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 2 Oct 2018 10:10:14 -0700
Subject: [PATCH 420/499] nfp: avoid soft lockups under control message storm

When FW floods the driver with control messages try to exit the cmsg
processing loop every now and then to avoid soft lockups.  Cmsg
processing is generally very lightweight so 512 seems like a reasonable
budget, which should not be exceeded under normal conditions.

Fixes: 77ece8d5f196 ("nfp: add control vNIC datapath")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Tested-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/netronome/nfp/nfp_net_common.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 8ed38fd5a852..c6d29fdbb880 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2077,14 +2077,17 @@ nfp_ctrl_rx_one(struct nfp_net *nn, struct nfp_net_dp *dp,
 	return true;
 }
 
-static void nfp_ctrl_rx(struct nfp_net_r_vector *r_vec)
+static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec)
 {
 	struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring;
 	struct nfp_net *nn = r_vec->nfp_net;
 	struct nfp_net_dp *dp = &nn->dp;
+	unsigned int budget = 512;
 
-	while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring))
+	while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--)
 		continue;
+
+	return budget;
 }
 
 static void nfp_ctrl_poll(unsigned long arg)
@@ -2096,9 +2099,13 @@ static void nfp_ctrl_poll(unsigned long arg)
 	__nfp_ctrl_tx_queued(r_vec);
 	spin_unlock_bh(&r_vec->lock);
 
-	nfp_ctrl_rx(r_vec);
-
-	nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
+	if (nfp_ctrl_rx(r_vec)) {
+		nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
+	} else {
+		tasklet_schedule(&r_vec->tasklet);
+		nn_dp_warn(&r_vec->nfp_net->dp,
+			   "control message budget exceeded!\n");
+	}
 }
 
 /* Setup and Configuration

From 20a8378aa9dd108a01cb0e695599f5257a885c4b Mon Sep 17 00:00:00 2001
From: Mike Travis <mike.travis@hpe.com>
Date: Tue, 2 Oct 2018 13:01:45 -0500
Subject: [PATCH 421/499] x86/platform/uv: Provide is_early_uv_system()

Introduce is_early_uv_system() which uses efi.uv_systab to decide early
in the boot process whether the kernel runs on a UV system.

This is needed to skip other early setup/init code that might break
the UV platform if done too early such as before necessary ACPI tables
parsing takes place.

Suggested-by: Hedi Berriche <hedi.berriche@hpe.com>
Signed-off-by: Mike Travis <mike.travis@hpe.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Russ Anderson <rja@hpe.com>
Reviewed-by: Dimitri Sivanich <sivanich@hpe.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Russ Anderson <russ.anderson@hpe.com>
Cc: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Kate Stewart <kstewart@linuxfoundation.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Pavel Tatashin <pasha.tatashin@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Len Brown <len.brown@intel.com>
Cc: Dou Liyang <douly.fnst@cn.fujitsu.com>
Cc: Xiaoming Gao <gxm.linux.kernel@gmail.com>
Cc: Rajvi Jingar <rajvi.jingar@intel.com>
Link: https://lkml.kernel.org/r/20181002180144.801700401@stormcage.americas.sgi.com
---
 arch/x86/include/asm/uv/uv.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index a80c0673798f..e60c45fd3679 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -10,8 +10,13 @@ struct cpumask;
 struct mm_struct;
 
 #ifdef CONFIG_X86_UV
+#include <linux/efi.h>
 
 extern enum uv_system_type get_uv_system_type(void);
+static inline bool is_early_uv_system(void)
+{
+	return !((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab);
+}
 extern int is_uv_system(void);
 extern int is_uv_hubless(void);
 extern void uv_cpu_init(void);
@@ -23,6 +28,7 @@ extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 #else	/* X86_UV */
 
 static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
+static inline bool is_early_uv_system(void)	{ return 0; }
 static inline int is_uv_system(void)	{ return 0; }
 static inline int is_uv_hubless(void)	{ return 0; }
 static inline void uv_cpu_init(void)	{ }

From 2647c43c7f3ba4b752bfce261d53b16e2f5bc9e3 Mon Sep 17 00:00:00 2001
From: Mike Travis <mike.travis@hpe.com>
Date: Tue, 2 Oct 2018 13:01:46 -0500
Subject: [PATCH 422/499] x86/tsc: Fix UV TSC initialization

The recent rework of the TSC calibration code introduced a regression on UV
systems as it added a call to tsc_early_init() which initializes the TSC
ADJUST values before acpi_boot_table_init().  In the case of UV systems,
that is a necessary step that calls uv_system_init().  This informs
tsc_sanitize_first_cpu() that the kernel runs on a platform with async TSC
resets as documented in commit 341102c3ef29 ("x86/tsc: Add option that TSC
on Socket 0 being non-zero is valid")

Fix it by skipping the early tsc initialization on UV systems and let TSC
init tests take place later in tsc_init().

Fixes: cf7a63ef4e02 ("x86/tsc: Calibrate tsc only once")
Suggested-by: Hedi Berriche <hedi.berriche@hpe.com>
Signed-off-by: Mike Travis <mike.travis@hpe.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Russ Anderson <rja@hpe.com>
Reviewed-by: Dimitri Sivanich <sivanich@hpe.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Russ Anderson <russ.anderson@hpe.com>
Cc: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Kate Stewart <kstewart@linuxfoundation.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Pavel Tatashin <pasha.tatashin@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Len Brown <len.brown@intel.com>
Cc: Dou Liyang <douly.fnst@cn.fujitsu.com>
Cc: Xiaoming Gao <gxm.linux.kernel@gmail.com>
Cc: Rajvi Jingar <rajvi.jingar@intel.com>
Link: https://lkml.kernel.org/r/20181002180144.923579706@stormcage.americas.sgi.com
---
 arch/x86/kernel/tsc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 6490f618e096..b52bd2b6cdb4 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -26,6 +26,7 @@
 #include <asm/apic.h>
 #include <asm/intel-family.h>
 #include <asm/i8259.h>
+#include <asm/uv/uv.h>
 
 unsigned int __read_mostly cpu_khz;	/* TSC clocks / usec, not used here */
 EXPORT_SYMBOL(cpu_khz);
@@ -1433,6 +1434,9 @@ void __init tsc_early_init(void)
 {
 	if (!boot_cpu_has(X86_FEATURE_TSC))
 		return;
+	/* Don't change UV TSC multi-chassis synchronization */
+	if (is_early_uv_system())
+		return;
 	if (!determine_cpu_tsc_frequencies(true))
 		return;
 	loops_per_jiffy = get_loops_per_jiffy();

From ef1f2258748b675422ca0107e5bfb9ceeac675de Mon Sep 17 00:00:00 2001
From: Atish Patra <atish.patra@wdc.com>
Date: Tue, 11 Sep 2018 11:30:18 -0700
Subject: [PATCH 423/499] RISCV: Fix end PFN for low memory

Use memblock_end_of_DRAM which provides correct last low memory
PFN. Without that, DMA32 region becomes empty resulting in zero
pages being allocated for DMA32.

This patch is based on earlier patch from palmer which never
merged into 4.19. I just edited the commit text to make more
sense.

Signed-off-by: Atish Patra <atish.patra@wdc.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index aee603123030..b2d26d9d8489 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -186,7 +186,7 @@ static void __init setup_bootmem(void)
 	BUG_ON(mem_size == 0);
 
 	set_max_mapnr(PFN_DOWN(mem_size));
-	max_low_pfn = pfn_base + PFN_DOWN(mem_size);
+	max_low_pfn = memblock_end_of_DRAM();
 
 #ifdef CONFIG_BLK_DEV_INITRD
 	setup_initrd();

From beeeac43b6fae5f5eaf707b6fcc2bf1e09deb785 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Mon, 1 Oct 2018 21:42:37 -0700
Subject: [PATCH 424/499] Revert "serial: 8250_dw: Fix runtime PM handling"

This reverts commit d76c74387e1c978b6c5524a146ab0f3f72206f98.

While commit d76c74387e1c ("serial: 8250_dw: Fix runtime PM handling")
fixes runtime PM handling when using kgdb, it introduces a traceback for
everyone else.

BUG: sleeping function called from invalid context at
	/mnt/host/source/src/third_party/kernel/next/drivers/base/power/runtime.c:1034
in_atomic(): 1, irqs_disabled(): 1, pid: 1, name: swapper/0
7 locks held by swapper/0/1:
 #0: 000000005ec5bc72 (&dev->mutex){....}, at: __driver_attach+0xb5/0x12b
 #1: 000000005d5fa9e5 (&dev->mutex){....}, at: __device_attach+0x3e/0x15b
 #2: 0000000047e93286 (serial_mutex){+.+.}, at: serial8250_register_8250_port+0x51/0x8bb
 #3: 000000003b328f07 (port_mutex){+.+.}, at: uart_add_one_port+0xab/0x8b0
 #4: 00000000fa313d4d (&port->mutex){+.+.}, at: uart_add_one_port+0xcc/0x8b0
 #5: 00000000090983ca (console_lock){+.+.}, at: vprintk_emit+0xdb/0x217
 #6: 00000000c743e583 (console_owner){-...}, at: console_unlock+0x211/0x60f
irq event stamp: 735222
__down_trylock_console_sem+0x4a/0x84
console_unlock+0x338/0x60f
__do_softirq+0x4a4/0x50d
irq_exit+0x64/0xe2
CPU: 2 PID: 1 Comm: swapper/0 Not tainted 4.19.0-rc5 #6
Hardware name: Google Caroline/Caroline, BIOS Google_Caroline.7820.286.0 03/15/2017
Call Trace:
 dump_stack+0x7d/0xbd
 ___might_sleep+0x238/0x259
 __pm_runtime_resume+0x4e/0xa4
 ? serial8250_rpm_get+0x2e/0x44
 serial8250_console_write+0x44/0x301
 ? lock_acquire+0x1b8/0x1fa
 console_unlock+0x577/0x60f
 vprintk_emit+0x1f0/0x217
 printk+0x52/0x6e
 register_console+0x43b/0x524
 uart_add_one_port+0x672/0x8b0
 ? set_io_from_upio+0x150/0x162
 serial8250_register_8250_port+0x825/0x8bb
 dw8250_probe+0x80c/0x8b0
 ? dw8250_serial_inq+0x8e/0x8e
 ? dw8250_check_lcr+0x108/0x108
 ? dw8250_runtime_resume+0x5b/0x5b
 ? dw8250_serial_outq+0xa1/0xa1
 ? dw8250_remove+0x115/0x115
 platform_drv_probe+0x76/0xc5
 really_probe+0x1f1/0x3ee
 ? driver_allows_async_probing+0x5d/0x5d
 driver_probe_device+0xd6/0x112
 ? driver_allows_async_probing+0x5d/0x5d
 bus_for_each_drv+0xbe/0xe5
 __device_attach+0xdd/0x15b
 bus_probe_device+0x5a/0x10b
 device_add+0x501/0x894
 ? _raw_write_unlock+0x27/0x3a
 platform_device_add+0x224/0x2b7
 mfd_add_device+0x718/0x75b
 ? __kmalloc+0x144/0x16a
 ? mfd_add_devices+0x38/0xdb
 mfd_add_devices+0x9b/0xdb
 intel_lpss_probe+0x7d4/0x8ee
 intel_lpss_pci_probe+0xac/0xd4
 pci_device_probe+0x101/0x18e
...

Revert the offending patch until a more comprehensive solution
is available.

Cc: Tony Lindgren <tony@atomide.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Phil Edworthy <phil.edworthy@renesas.com>
Fixes: d76c74387e1c ("serial: 8250_dw: Fix runtime PM handling")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_dw.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index fa8dcb470640..d31b975dd3fd 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -630,10 +630,6 @@ static int dw8250_probe(struct platform_device *pdev)
 	if (!data->skip_autocfg)
 		dw8250_setup_port(p);
 
-#ifdef CONFIG_PM
-	uart.capabilities |= UART_CAP_RPM;
-#endif
-
 	/* If we have a valid fifosize, try hooking up DMA */
 	if (p->fifosize) {
 		data->dma.rxconf.src_maxburst = p->fifosize / 4;

From 10653022456dc77b398777fd8e95126c77954b49 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 30 Aug 2018 14:54:03 +0200
Subject: [PATCH 425/499] Revert "serial: sh-sci: Remove SCIx_RZ_SCIFA_REGTYPE"

This reverts commit 7acece71a517cad83a0842a94d94c13f271b680c.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Chris Brandt <chris.brandt@renesas.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sh-sci.c | 31 +++++++++++++++++++++++++++++++
 include/linux/serial_sci.h  |  1 +
 2 files changed, 32 insertions(+)

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index ac4424bf6b13..5d42c9a63001 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -291,6 +291,33 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = {
 		.error_clear = SCIF_ERROR_CLEAR,
 	},
 
+	/*
+	 * The "SCIFA" that is in RZ/T and RZ/A2.
+	 * It looks like a normal SCIF with FIFO data, but with a
+	 * compressed address space. Also, the break out of interrupts
+	 * are different: ERI/BRI, RXI, TXI, TEI, DRI.
+	 */
+	[SCIx_RZ_SCIFA_REGTYPE] = {
+		.regs = {
+			[SCSMR]		= { 0x00, 16 },
+			[SCBRR]		= { 0x02,  8 },
+			[SCSCR]		= { 0x04, 16 },
+			[SCxTDR]	= { 0x06,  8 },
+			[SCxSR]		= { 0x08, 16 },
+			[SCxRDR]	= { 0x0A,  8 },
+			[SCFCR]		= { 0x0C, 16 },
+			[SCFDR]		= { 0x0E, 16 },
+			[SCSPTR]	= { 0x10, 16 },
+			[SCLSR]		= { 0x12, 16 },
+		},
+		.fifosize = 16,
+		.overrun_reg = SCLSR,
+		.overrun_mask = SCLSR_ORER,
+		.sampling_rate_mask = SCI_SR(32),
+		.error_mask = SCIF_DEFAULT_ERROR_MASK,
+		.error_clear = SCIF_ERROR_CLEAR,
+	},
+
 	/*
 	 * Common SH-3 SCIF definitions.
 	 */
@@ -3110,6 +3137,10 @@ static const struct of_device_id of_sci_match[] = {
 		.compatible = "renesas,scif-r7s72100",
 		.data = SCI_OF_DATA(PORT_SCIF, SCIx_SH2_SCIF_FIFODATA_REGTYPE),
 	},
+	{
+		.compatible = "renesas,scif-r7s9210",
+		.data = SCI_OF_DATA(PORT_SCIF, SCIx_RZ_SCIFA_REGTYPE),
+	},
 	/* Family-specific types */
 	{
 		.compatible = "renesas,rcar-gen1-scif",
diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index c0e795d95477..1c89611e0e06 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -36,6 +36,7 @@ enum {
 	SCIx_SH4_SCIF_FIFODATA_REGTYPE,
 	SCIx_SH7705_SCIF_REGTYPE,
 	SCIx_HSCIF_REGTYPE,
+	SCIx_RZ_SCIFA_REGTYPE,
 
 	SCIx_NR_REGTYPES,
 };

From 5b162cc4ac27ba76e576abc9090c54cf90a17980 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 30 Aug 2018 14:54:04 +0200
Subject: [PATCH 426/499] Revert "serial: sh-sci: Allow for compressed SCIF
 address"

This reverts commit 2d4dd0da45401c7ae7332b4d1eb7bbb1348edde9.

This broke earlycon on all Renesas ARM platforms using a SCIF port for the
serial console (R-Car, RZ/A1, RZ/G1, RZ/G2 SoCs), due to an incorrect value
of port->regshift.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Chris Brandt <chris.brandt@renesas.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sh-sci.c | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 5d42c9a63001..ab3f6e91853d 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -346,15 +346,15 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = {
 	[SCIx_SH4_SCIF_REGTYPE] = {
 		.regs = {
 			[SCSMR]		= { 0x00, 16 },
-			[SCBRR]		= { 0x02,  8 },
-			[SCSCR]		= { 0x04, 16 },
-			[SCxTDR]	= { 0x06,  8 },
-			[SCxSR]		= { 0x08, 16 },
-			[SCxRDR]	= { 0x0a,  8 },
-			[SCFCR]		= { 0x0c, 16 },
-			[SCFDR]		= { 0x0e, 16 },
-			[SCSPTR]	= { 0x10, 16 },
-			[SCLSR]		= { 0x12, 16 },
+			[SCBRR]		= { 0x04,  8 },
+			[SCSCR]		= { 0x08, 16 },
+			[SCxTDR]	= { 0x0c,  8 },
+			[SCxSR]		= { 0x10, 16 },
+			[SCxRDR]	= { 0x14,  8 },
+			[SCFCR]		= { 0x18, 16 },
+			[SCFDR]		= { 0x1c, 16 },
+			[SCSPTR]	= { 0x20, 16 },
+			[SCLSR]		= { 0x24, 16 },
 		},
 		.fifosize = 16,
 		.overrun_reg = SCLSR,
@@ -2837,7 +2837,7 @@ static int sci_init_single(struct platform_device *dev,
 {
 	struct uart_port *port = &sci_port->port;
 	const struct resource *res;
-	unsigned int i, regtype;
+	unsigned int i;
 	int ret;
 
 	sci_port->cfg	= p;
@@ -2874,7 +2874,6 @@ static int sci_init_single(struct platform_device *dev,
 	if (unlikely(sci_port->params == NULL))
 		return -EINVAL;
 
-	regtype = sci_port->params - sci_port_params;
 	switch (p->type) {
 	case PORT_SCIFB:
 		sci_port->rx_trigger = 48;
@@ -2929,10 +2928,6 @@ static int sci_init_single(struct platform_device *dev,
 			port->regshift = 1;
 	}
 
-	if (regtype == SCIx_SH4_SCIF_REGTYPE)
-		if (sci_port->reg_size >= 0x20)
-			port->regshift = 1;
-
 	/*
 	 * The UART port needs an IRQ value, so we peg this to the RX IRQ
 	 * for the multi-IRQ ports, which is where we are primarily

From 2ab2ddd301a22ca3c5f0b743593e4ad2953dfa53 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 2 Oct 2018 12:35:05 -0700
Subject: [PATCH 427/499] inet: make sure to grab rcu_read_lock before using
 ireq->ireq_opt

Timer handlers do not imply rcu_read_lock(), so my recent fix
triggered a LOCKDEP warning when SYNACK is retransmit.

Lets add rcu_read_lock()/rcu_read_unlock() pairs around ireq->ireq_opt
usages instead of guessing what is done by callers, since it is
not worth the pain.

Get rid of ireq_opt_deref() helper since it hides the logic
without real benefit, since it is now a standard rcu_dereference().

Fixes: 1ad98e9d1bdf ("tcp/dccp: fix lockdep issue when SYN is backlogged")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h         | 5 -----
 net/dccp/ipv4.c                 | 4 +++-
 net/ipv4/inet_connection_sock.c | 5 ++++-
 net/ipv4/tcp_ipv4.c             | 4 +++-
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index a8cd5cf9ff5b..a80fd0ac4563 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -130,11 +130,6 @@ static inline int inet_request_bound_dev_if(const struct sock *sk,
 	return sk->sk_bound_dev_if;
 }
 
-static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq)
-{
-	return rcu_dereference(ireq->ireq_opt);
-}
-
 struct inet_cork {
 	unsigned int		flags;
 	__be32			addr;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index b08feb219b44..8e08cea6f178 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -493,9 +493,11 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req
 
 		dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->ir_loc_addr,
 							      ireq->ir_rmt_addr);
+		rcu_read_lock();
 		err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
 					    ireq->ir_rmt_addr,
-					    ireq_opt_deref(ireq));
+					    rcu_dereference(ireq->ireq_opt));
+		rcu_read_unlock();
 		err = net_xmit_eval(err);
 	}
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index dfd5009f96ef..15e7f7915a21 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -544,7 +544,8 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
 	struct ip_options_rcu *opt;
 	struct rtable *rt;
 
-	opt = ireq_opt_deref(ireq);
+	rcu_read_lock();
+	opt = rcu_dereference(ireq->ireq_opt);
 
 	flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
@@ -558,11 +559,13 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
 		goto no_route;
 	if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
 		goto route_err;
+	rcu_read_unlock();
 	return &rt->dst;
 
 route_err:
 	ip_rt_put(rt);
 no_route:
+	rcu_read_unlock();
 	__IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
 	return NULL;
 }
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 44c09eddbb78..cd426313a298 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -943,9 +943,11 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
 	if (skb) {
 		__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
 
+		rcu_read_lock();
 		err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
 					    ireq->ir_rmt_addr,
-					    ireq_opt_deref(ireq));
+					    rcu_dereference(ireq->ireq_opt));
+		rcu_read_unlock();
 		err = net_xmit_eval(err);
 	}
 

From 0f3b914c9cfcd7bbedd445dc4ac5dd999fa213c2 Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Tue, 2 Oct 2018 12:14:34 -0700
Subject: [PATCH 428/499] bonding: fix warning message

RX queue config for bonding master could be different from its slave
device(s). With the commit 6a9e461f6fe4 ("bonding: pass link-local
packets to bonding master also."), the packet is reinjected into stack
with skb->dev as bonding master. This potentially triggers the
message:

   "bondX received packet on queue Y, but number of RX queues is Z"

whenever the queue that packet is received on is higher than the
numrxqueues on bonding master (Y > Z).

Fixes: 6a9e461f6fe4 ("bonding: pass link-local packets to bonding master also.")
Reported-by: John Sperbeck <jsperbeck@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index c05c01a00755..ee28ec9e0aba 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1187,6 +1187,7 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb)
 
 		if (nskb) {
 			nskb->dev = bond->dev;
+			nskb->queue_mapping = 0;
 			netif_rx(nskb);
 		}
 		return RX_HANDLER_PASS;

From 0e1d6eca5113858ed2caea61a5adc03c595f6096 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 2 Oct 2018 15:47:35 -0700
Subject: [PATCH 429/499] rtnl: limit IFLA_NUM_TX_QUEUES and IFLA_NUM_RX_QUEUES
 to 4096

We have an impressive number of syzkaller bugs that are linked
to the fact that syzbot was able to create a networking device
with millions of TX (or RX) queues.

Let's limit the number of RX/TX queues to 4096, this really should
cover all known cases.

A separate patch will add various cond_resched() in the loops
handling sysfs entries at device creation and dismantle.

Tested:

lpaa6:~# ip link add gre-4097 numtxqueues 4097 numrxqueues 4097 type ip6gretap
RTNETLINK answers: Invalid argument

lpaa6:~# time ip link add gre-4096 numtxqueues 4096 numrxqueues 4096 type ip6gretap

real	0m0.180s
user	0m0.000s
sys	0m0.107s

Fixes: 76ff5cc91935 ("rtnl: allow to specify number of rx and tx queues on device creation")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 7f37fe9c65a5..448703312fed 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2835,6 +2835,12 @@ struct net_device *rtnl_create_link(struct net *net,
 	else if (ops->get_num_rx_queues)
 		num_rx_queues = ops->get_num_rx_queues();
 
+	if (num_tx_queues < 1 || num_tx_queues > 4096)
+		return ERR_PTR(-EINVAL);
+
+	if (num_rx_queues < 1 || num_rx_queues > 4096)
+		return ERR_PTR(-EINVAL);
+
 	dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type,
 			       ops->setup, num_tx_queues, num_rx_queues);
 	if (!dev)

From 0595751f267994c3c7027377058e4185b3a28e75 Mon Sep 17 00:00:00 2001
From: Aurelien Aptel <aaptel@suse.com>
Date: Thu, 17 May 2018 16:35:07 +0200
Subject: [PATCH 430/499] smb2: fix missing files in root share directory
 listing

When mounting a Windows share that is the root of a drive (eg. C$)
the server does not return . and .. directory entries. This results in
the smb2 code path erroneously skipping the 2 first entries.

Pseudo-code of the readdir() code path:

cifs_readdir(struct file, struct dir_context)
    initiate_cifs_search            <-- if no reponse cached yet
        server->ops->query_dir_first

    dir_emit_dots
        dir_emit                    <-- adds "." and ".." if we're at pos=0

    find_cifs_entry
        initiate_cifs_search        <-- if pos < start of current response
                                         (restart search)
        server->ops->query_dir_next <-- if pos > end of current response
                                         (fetch next search res)

    for(...)                        <-- loops over cur response entries
                                          starting at pos
        cifs_filldir                <-- skip . and .., emit entry
            cifs_fill_dirent
            dir_emit
	pos++

A) dir_emit_dots() always adds . & ..
   and sets the current dir pos to 2 (0 and 1 are done).

Therefore we always want the index_to_find to be 2 regardless of if
the response has . and ..

B) smb1 code initializes index_of_last_entry with a +2 offset

  in cifssmb.c CIFSFindFirst():
		psrch_inf->index_of_last_entry = 2 /* skip . and .. */ +
			psrch_inf->entries_in_buffer;

Later in find_cifs_entry() we want to find the next dir entry at pos=2
as a result of (A)

	first_entry_in_buffer = cfile->srch_inf.index_of_last_entry -
					cfile->srch_inf.entries_in_buffer;

This var is the dir pos that the first entry in the buffer will
have therefore it must be 2 in the first call.

If we don't offset index_of_last_entry by 2 (like in (B)),
first_entry_in_buffer=0 but we were instructed to get pos=2 so this
code in find_cifs_entry() skips the 2 first which is ok for non-root
shares, as it skips . and .. from the response but is not ok for root
shares where the 2 first are actual files

		pos_in_buf = index_to_find - first_entry_in_buffer;
                // pos_in_buf=2
		// we skip 2 first response entries :(
		for (i = 0; (i < (pos_in_buf)) && (cur_ent != NULL); i++) {
			/* go entry by entry figuring out which is first */
			cur_ent = nxt_dir_entry(cur_ent, end_of_smb,
						cfile->srch_inf.info_level);
		}

C) cifs_filldir() skips . and .. so we can safely ignore them for now.

Sample program:

int main(int argc, char **argv)
{
	const char *path = argc >= 2 ? argv[1] : ".";
	DIR *dh;
	struct dirent *de;

	printf("listing path <%s>\n", path);
	dh = opendir(path);
	if (!dh) {
		printf("opendir error %d\n", errno);
		return 1;
	}

	while (1) {
		de = readdir(dh);
		if (!de) {
			if (errno) {
				printf("readdir error %d\n", errno);
				return 1;
			}
			printf("end of listing\n");
			break;
		}
		printf("off=%lu <%s>\n", de->d_off, de->d_name);
	}

	return 0;
}

Before the fix with SMB1 on root shares:

<.>            off=1
<..>           off=2
<$Recycle.Bin> off=3
<bootmgr>      off=4

and on non-root shares:

<.>    off=1
<..>   off=4  <-- after adding .., the offsets jumps to +2 because
<2536> off=5       we skipped . and .. from response buffer (C)
<411>  off=6       but still incremented pos
<file> off=7
<fsx>  off=8

Therefore the fix for smb2 is to mimic smb1 behaviour and offset the
index_of_last_entry by 2.

Test results comparing smb1 and smb2 before/after the fix on root
share, non-root shares and on large directories (ie. multi-response
dir listing):

PRE FIX
=======
pre-1-root VS pre-2-root:
        ERR pre-2-root is missing [bootmgr, $Recycle.Bin]
pre-1-nonroot VS pre-2-nonroot:
        OK~ same files, same order, different offsets
pre-1-nonroot-large VS pre-2-nonroot-large:
        OK~ same files, same order, different offsets

POST FIX
========
post-1-root VS post-2-root:
        OK same files, same order, same offsets
post-1-nonroot VS post-2-nonroot:
        OK same files, same order, same offsets
post-1-nonroot-large VS post-2-nonroot-large:
        OK same files, same order, same offsets

REGRESSION?
===========
pre-1-root VS post-1-root:
        OK same files, same order, same offsets
pre-1-nonroot VS post-1-nonroot:
        OK same files, same order, same offsets

BugLink: https://bugzilla.samba.org/show_bug.cgi?id=13107
Signed-off-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Paulo Alcantara <palcantara@suse.deR>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
CC: Stable <stable@vger.kernel.org>
---
 fs/cifs/smb2ops.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index d954ce36b473..89985a0a6819 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1477,7 +1477,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
 	}
 
 	srch_inf->entries_in_buffer = 0;
-	srch_inf->index_of_last_entry = 0;
+	srch_inf->index_of_last_entry = 2;
 
 	rc = SMB2_query_directory(xid, tcon, fid->persistent_fid,
 				  fid->volatile_fid, 0, srch_inf);

From ddf83afb9f60ee58cdec30bb615eff65d00ba05e Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Thu, 30 Aug 2018 10:12:59 +1000
Subject: [PATCH 431/499] cifs: add a warning if we try to to dequeue a deleted
 mid

cifs_delete_mid() is called once we are finished handling a mid and we
expect no more work done on this mid.

Needed to fix recent commit:
commit 730928c8f4be88e9d6a027a16b1e8fa9c59fc077
("cifs: update smb2_queryfs() to use compounding")

Add a warning if someone tries to dequeue a mid that has already been
flagged to be deleted.
Also change list_del() to list_del_init() so that if we have similar bugs
resurface in the future we will not oops.

Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/cifsglob.h  |  1 +
 fs/cifs/connect.c   | 10 +++++++++-
 fs/cifs/transport.c |  3 ++-
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 0c9ab62c3df4..9dcaed031843 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1553,6 +1553,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
 
 /* Flags */
 #define   MID_WAIT_CANCELLED	 1 /* Cancelled while waiting for response */
+#define   MID_DELETED            2 /* Mid has been dequeued/deleted */
 
 /* Types of response buffer returned from SendReceive2 */
 #define   CIFS_NO_BUFFER        0    /* Response buffer not returned */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 7aa08dba4719..e9d64c92b8da 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -659,7 +659,15 @@ dequeue_mid(struct mid_q_entry *mid, bool malformed)
 		mid->mid_state = MID_RESPONSE_RECEIVED;
 	else
 		mid->mid_state = MID_RESPONSE_MALFORMED;
-	list_del_init(&mid->qhead);
+	/*
+	 * Trying to handle/dequeue a mid after the send_recv()
+	 * function has finished processing it is a bug.
+	 */
+	if (mid->mid_flags & MID_DELETED)
+		printk_once(KERN_WARNING
+			    "trying to dequeue a deleted mid\n");
+	else
+		list_del_init(&mid->qhead);
 	spin_unlock(&GlobalMid_Lock);
 }
 
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 78f96fa3d7d9..9cc9a127749e 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -142,7 +142,8 @@ void
 cifs_delete_mid(struct mid_q_entry *mid)
 {
 	spin_lock(&GlobalMid_Lock);
-	list_del(&mid->qhead);
+	list_del_init(&mid->qhead);
+	mid->mid_flags |= MID_DELETED;
 	spin_unlock(&GlobalMid_Lock);
 
 	DeleteMidQEntry(mid);

From 4e34feb5e96328df6a362c88098502b07c1b1e69 Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Thu, 30 Aug 2018 10:13:00 +1000
Subject: [PATCH 432/499] cifs: only wake the thread for the very last PDU in a
 compound

For compounded PDUs we whould only wake the waiting thread for the
very last PDU of the compound.
We do this so that we are guaranteed that the demultiplex_thread will
not process or access any of those MIDs any more once the send/recv
thread starts processing.

Else there is a race where at the end of the send/recv processing we
will try to delete all the mids of the compound. If the multiplex
thread still has other mids to process at this point for this compound
this can lead to an oops.

Needed to fix recent commit:
commit 730928c8f4be88e9d6a027a16b1e8fa9c59fc077
("cifs: update smb2_queryfs() to use compounding")

Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/transport.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 9cc9a127749e..b48f43963da6 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -773,6 +773,11 @@ cifs_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst)
 	return mid;
 }
 
+static void
+cifs_noop_callback(struct mid_q_entry *mid)
+{
+}
+
 int
 compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
 		   const int flags, const int num_rqst, struct smb_rqst *rqst,
@@ -827,8 +832,13 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
 		}
 
 		midQ[i]->mid_state = MID_REQUEST_SUBMITTED;
+		/*
+		 * We don't invoke the callback compounds unless it is the last
+		 * request.
+		 */
+		if (i < num_rqst - 1)
+			midQ[i]->callback = cifs_noop_callback;
 	}
-
 	cifs_in_send_inc(ses->server);
 	rc = smb_send_rqst(ses->server, num_rqst, rqst, flags);
 	cifs_in_send_dec(ses->server);
@@ -909,6 +919,12 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
 			midQ[i]->resp_buf = NULL;
 	}
 out:
+	/*
+	 * This will dequeue all mids. After this it is important that the
+	 * demultiplex_thread will not process any of these mids any futher.
+	 * This is prevented above by using a noop callback that will not
+	 * wake this thread except for the very last PDU.
+	 */
 	for (i = 0; i < num_rqst; i++)
 		cifs_delete_mid(midQ[i]);
 	add_credits(ses->server, credits, optype);

From 7af929d6d05ba5564139718e30d5bc96bdbc716a Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Tue, 2 Oct 2018 18:54:09 -0500
Subject: [PATCH 433/499] smb3: fix lease break problem introduced by
 compounding

Fixes problem (discovered by Aurelien) introduced by recent commit:
commit b24df3e30cbf48255db866720fb71f14bf9d2f39
("cifs: update receive_encrypted_standard to handle compounded responses")

which broke the ability to respond to some lease breaks
(lease breaks being ignored is a problem since can block
server response for duration of the lease break timeout).

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/connect.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index e9d64c92b8da..52d71b64c0c6 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -946,8 +946,7 @@ cifs_demultiplex_thread(void *p)
 		} else {
 			mids[0] = server->ops->find_mid(server, buf);
 			bufs[0] = buf;
-			if (mids[0])
-				num_mids = 1;
+			num_mids = 1;
 
 			if (!mids[0] || !mids[0]->receive)
 				length = standard_receive3(server, mids[0]);

From 45ec318578c0c22a11f5b9927d064418e1ab1905 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 2 Oct 2018 16:52:03 -0700
Subject: [PATCH 434/499] net: systemport: Fix wake-up interrupt race during
 resume

The AON_PM_L2 is normally used to trigger and identify the source of a
wake-up event. Since the RX_SYS clock is no longer turned off, we also
have an interrupt being sent to the SYSTEMPORT INTRL_2_0 controller, and
that interrupt remains active up until the magic packet detector is
disabled which happens much later during the driver resumption.

The race happens if we have a CPU that is entering the SYSTEMPORT
INTRL2_0 handler during resume, and another CPU has managed to clear the
wake-up interrupt during bcm_sysport_resume_from_wol(). In that case, we
have the first CPU stuck in the interrupt handler with an interrupt
cause that has been cleared under its feet, and so we keep returning
IRQ_NONE and we never make any progress.

This was not a problem before because we would always turn off the
RX_SYS clock during WoL, so the SYSTEMPORT INTRL2_0 would also be turned
off as well, thus not latching the interrupt.

The fix is to make sure we do not enable either the MPD or
BRCM_TAG_MATCH interrupts since those are redundant with what the
AON_PM_L2 interrupt controller already processes and they would cause
such a race to occur.

Fixes: bb9051a2b230 ("net: systemport: Add support for WAKE_FILTER")
Fixes: 83e82f4c706b ("net: systemport: add Wake-on-LAN support")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bcmsysport.c | 28 +++++++++-------------
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 147045757b10..c57238fce863 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -1069,9 +1069,6 @@ static void bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv)
 {
 	u32 reg;
 
-	/* Stop monitoring MPD interrupt */
-	intrl2_0_mask_set(priv, INTRL2_0_MPD | INTRL2_0_BRCM_MATCH_TAG);
-
 	/* Disable RXCHK, active filters and Broadcom tag matching */
 	reg = rxchk_readl(priv, RXCHK_CONTROL);
 	reg &= ~(RXCHK_BRCM_TAG_MATCH_MASK <<
@@ -1081,6 +1078,17 @@ static void bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv)
 	/* Clear the MagicPacket detection logic */
 	mpd_enable_set(priv, false);
 
+	reg = intrl2_0_readl(priv, INTRL2_CPU_STATUS);
+	if (reg & INTRL2_0_MPD)
+		netdev_info(priv->netdev, "Wake-on-LAN (MPD) interrupt!\n");
+
+	if (reg & INTRL2_0_BRCM_MATCH_TAG) {
+		reg = rxchk_readl(priv, RXCHK_BRCM_TAG_MATCH_STATUS) &
+				  RXCHK_BRCM_TAG_MATCH_MASK;
+		netdev_info(priv->netdev,
+			    "Wake-on-LAN (filters 0x%02x) interrupt!\n", reg);
+	}
+
 	netif_dbg(priv, wol, priv->netdev, "resumed from WOL\n");
 }
 
@@ -1105,7 +1113,6 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
 	struct bcm_sysport_tx_ring *txr;
 	unsigned int ring, ring_bit;
-	u32 reg;
 
 	priv->irq0_stat = intrl2_0_readl(priv, INTRL2_CPU_STATUS) &
 			  ~intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS);
@@ -1131,16 +1138,6 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
 	if (priv->irq0_stat & INTRL2_0_TX_RING_FULL)
 		bcm_sysport_tx_reclaim_all(priv);
 
-	if (priv->irq0_stat & INTRL2_0_MPD)
-		netdev_info(priv->netdev, "Wake-on-LAN (MPD) interrupt!\n");
-
-	if (priv->irq0_stat & INTRL2_0_BRCM_MATCH_TAG) {
-		reg = rxchk_readl(priv, RXCHK_BRCM_TAG_MATCH_STATUS) &
-				  RXCHK_BRCM_TAG_MATCH_MASK;
-		netdev_info(priv->netdev,
-			    "Wake-on-LAN (filters 0x%02x) interrupt!\n", reg);
-	}
-
 	if (!priv->is_lite)
 		goto out;
 
@@ -2641,9 +2638,6 @@ static int bcm_sysport_suspend_to_wol(struct bcm_sysport_priv *priv)
 	/* UniMAC receive needs to be turned on */
 	umac_enable_set(priv, CMD_RX_EN, 1);
 
-	/* Enable the interrupt wake-up source */
-	intrl2_0_mask_clear(priv, INTRL2_0_MPD | INTRL2_0_BRCM_MATCH_TAG);
-
 	netif_dbg(priv, wol, ndev, "entered WOL mode\n");
 
 	return 0;

From a07f388e2cde2be74b263f85df6f672fea0305a1 Mon Sep 17 00:00:00 2001
From: Sean Tranchetti <stranche@codeaurora.org>
Date: Tue, 2 Oct 2018 18:52:01 -0600
Subject: [PATCH 435/499] net: qualcomm: rmnet: Skip processing loopback
 packets

RMNET RX handler was processing invalid packets that were
originally sent on the real device and were looped back via
dev_loopback_xmit(). This was detected using syzkaller.

Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation")
Signed-off-by: Sean Tranchetti <stranche@codeaurora.org>
Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index 7fd86d40a337..6908b26feb9e 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -189,6 +189,9 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb)
 	if (!skb)
 		goto done;
 
+	if (skb->pkt_type == PACKET_LOOPBACK)
+		return RX_HANDLER_PASS;
+
 	dev = skb->dev;
 	port = rmnet_get_port(dev);
 

From 6392ff3c8e4c23d0a09b0ae9f94feb3effed490b Mon Sep 17 00:00:00 2001
From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Date: Tue, 2 Oct 2018 18:52:02 -0600
Subject: [PATCH 436/499] net: qualcomm: rmnet: Fix incorrect allocation flag
 in transmit

The incoming skb needs to be reallocated in case the headroom
is not sufficient to add the MAP header. This allocation needs to
be atomic otherwise it results in the following splat

[32805.801456] BUG: sleeping function called from invalid context
[32805.841141] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
[32805.904773] task: ffffffd7c5f62280 task.stack: ffffff80464a8000
[32805.910851] pc : ___might_sleep+0x180/0x188
[32805.915143] lr : ___might_sleep+0x180/0x188
[32806.131520] Call trace:
[32806.134041]  ___might_sleep+0x180/0x188
[32806.137980]  __might_sleep+0x50/0x84
[32806.141653]  __kmalloc_track_caller+0x80/0x3bc
[32806.146215]  __kmalloc_reserve+0x3c/0x88
[32806.150241]  pskb_expand_head+0x74/0x288
[32806.154269]  rmnet_egress_handler+0xb0/0x1d8
[32806.162239]  rmnet_vnd_start_xmit+0xc8/0x13c
[32806.166627]  dev_hard_start_xmit+0x148/0x280
[32806.181181]  sch_direct_xmit+0xa4/0x198
[32806.185125]  __qdisc_run+0x1f8/0x310
[32806.188803]  net_tx_action+0x23c/0x26c
[32806.192655]  __do_softirq+0x220/0x408
[32806.196420]  do_softirq+0x4c/0x70

Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation")
Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index 6908b26feb9e..1f98d65473cf 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -147,7 +147,7 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
 	}
 
 	if (skb_headroom(skb) < required_headroom) {
-		if (pskb_expand_head(skb, required_headroom, 0, GFP_KERNEL))
+		if (pskb_expand_head(skb, required_headroom, 0, GFP_ATOMIC))
 			return -ENOMEM;
 	}
 

From ec405641e2b73160e26ef17580d0cf28565d146c Mon Sep 17 00:00:00 2001
From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Date: Tue, 2 Oct 2018 18:52:03 -0600
Subject: [PATCH 437/499] net: qualcomm: rmnet: Fix incorrect allocation flag
 in receive path

The incoming skb needs to be reallocated in case the headroom
is not sufficient to adjust the ethernet header. This allocation
needs to be atomic otherwise it results in this splat

 [<600601bb>] ___might_sleep+0x185/0x1a3
 [<603f6314>] ? _raw_spin_unlock_irqrestore+0x0/0x27
 [<60069bb0>] ? __wake_up_common_lock+0x95/0xd1
 [<600602b0>] __might_sleep+0xd7/0xe2
 [<60065598>] ? enqueue_task_fair+0x112/0x209
 [<600eea13>] __kmalloc_track_caller+0x5d/0x124
 [<600ee9b6>] ? __kmalloc_track_caller+0x0/0x124
 [<602696d5>] __kmalloc_reserve.isra.34+0x30/0x7e
 [<603f629b>] ? _raw_spin_lock_irqsave+0x0/0x3d
 [<6026b744>] pskb_expand_head+0xbf/0x310
 [<6025ca6a>] rmnet_rx_handler+0x7e/0x16b
 [<6025c9ec>] ? rmnet_rx_handler+0x0/0x16b
 [<6027ad0c>] __netif_receive_skb_core+0x301/0x96f
 [<60033c17>] ? set_signals+0x0/0x40
 [<6027bbcb>] __netif_receive_skb+0x24/0x8e

Fixes: 74692caf1b0b ("net: qualcomm: rmnet: Process packets over ethernet")
Signed-off-by: Sean Tranchetti <stranche@codeaurora.org>
Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index 1f98d65473cf..11167abe5934 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -113,7 +113,7 @@ rmnet_map_ingress_handler(struct sk_buff *skb,
 	struct sk_buff *skbn;
 
 	if (skb->dev->type == ARPHRD_ETHER) {
-		if (pskb_expand_head(skb, ETH_HLEN, 0, GFP_KERNEL)) {
+		if (pskb_expand_head(skb, ETH_HLEN, 0, GFP_ATOMIC)) {
 			kfree_skb(skb);
 			return;
 		}

From 64199fc0a46ba211362472f7f942f900af9492fd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 30 Sep 2018 11:33:39 -0700
Subject: [PATCH 438/499] ipv4: fix use-after-free in ip_cmsg_recv_dstaddr()

Caching ip_hdr(skb) before a call to pskb_may_pull() is buggy,
do not do it.

Fixes: 2efd4fca703a ("ip: in cmsg IP(V6)_ORIGDSTADDR call pskb_may_pull")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index c0fe5ad996f2..26c36cccabdc 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -149,7 +149,6 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb)
 static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
 {
 	struct sockaddr_in sin;
-	const struct iphdr *iph = ip_hdr(skb);
 	__be16 *ports;
 	int end;
 
@@ -164,7 +163,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
 	ports = (__be16 *)skb_transport_header(skb);
 
 	sin.sin_family = AF_INET;
-	sin.sin_addr.s_addr = iph->daddr;
+	sin.sin_addr.s_addr = ip_hdr(skb)->daddr;
 	sin.sin_port = ports[1];
 	memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
 

From 9003b369499b3320d3f16145145b729fb6ff3bd4 Mon Sep 17 00:00:00 2001
From: "Alex Xu (Hello71)" <alex_y_xu@yahoo.ca>
Date: Sun, 30 Sep 2018 11:06:39 -0400
Subject: [PATCH 439/499] r8169: always autoneg on resume

This affects at least versions 25 and 33, so assume all cards are broken
and just renegotiate by default.

Fixes: 10bc6a6042c9 ("r8169: fix autoneg issue on resume with RTL8168E")
Signed-off-by: Alex Xu (Hello71) <alex_y_xu@yahoo.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index a94b874982dc..9a5e2969df61 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -4072,13 +4072,12 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 
 	genphy_soft_reset(dev->phydev);
 
-	/* It was reported that chip version 33 ends up with 10MBit/Half on a
+	/* It was reported that several chips end up with 10MBit/Half on a
 	 * 1GBit link after resuming from S3. For whatever reason the PHY on
-	 * this chip doesn't properly start a renegotiation when soft-reset.
+	 * these chips doesn't properly start a renegotiation when soft-reset.
 	 * Explicitly requesting a renegotiation fixes this.
 	 */
-	if (tp->mac_version == RTL_GIGA_MAC_VER_33 &&
-	    dev->phydev->autoneg == AUTONEG_ENABLE)
+	if (dev->phydev->autoneg == AUTONEG_ENABLE)
 		phy_restart_aneg(dev->phydev);
 }
 

From 4f166564014aba65ad6f15b612f6711fd0f117ee Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Tue, 2 Oct 2018 21:26:50 -0700
Subject: [PATCH 440/499] x86/vdso: Only enable vDSO retpolines when enabled
 and supported

When I fixed the vDSO build to use inline retpolines, I messed up
the Makefile logic and made it unconditional.  It should have
depended on CONFIG_RETPOLINE and on the availability of compiler
support.  This broke the build on some older compilers.

Reported-by: nikola.ciprich@linuxbox.cz
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Rickard <matt@softrans.com.au>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: jason.vas.dias@gmail.com
Cc: stable@vger.kernel.org
Fixes: 2e549b2ee0e3 ("x86/vdso: Fix vDSO build if a retpoline is emitted")
Link: http://lkml.kernel.org/r/08a1f29f2c238dd1f493945e702a521f8a5aa3ae.1538540801.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/entry/vdso/Makefile | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index fa3f439f0a92..141d415a8c80 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -68,7 +68,13 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
 CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
        $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
        -fno-omit-frame-pointer -foptimize-sibling-calls \
-       -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO $(RETPOLINE_VDSO_CFLAGS)
+       -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
+
+ifdef CONFIG_RETPOLINE
+ifneq ($(RETPOLINE_VDSO_CFLAGS),)
+  CFL += $(RETPOLINE_VDSO_CFLAGS)
+endif
+endif
 
 $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
 
@@ -138,7 +144,13 @@ KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
 KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
 KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
 KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
-KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS)
+
+ifdef CONFIG_RETPOLINE
+ifneq ($(RETPOLINE_VDSO_CFLAGS),)
+  KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS)
+endif
+endif
+
 $(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
 
 $(obj)/vdso32.so.dbg: FORCE \

From 88296bd42b4e9d24e138a68b337c235b5cac89a7 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Tue, 2 Oct 2018 15:45:11 -0700
Subject: [PATCH 441/499] x86/cpu/amd: Remove unnecessary parentheses

Clang warns when multiple pairs of parentheses are used for a single
conditional statement.

arch/x86/kernel/cpu/amd.c:925:14: warning: equality comparison with
extraneous parentheses [-Wparentheses-equality]
        if ((c->x86 == 6)) {
             ~~~~~~~^~~~
arch/x86/kernel/cpu/amd.c:925:14: note: remove extraneous parentheses
around the comparison to silence this warning
        if ((c->x86 == 6)) {
            ~       ^   ~
arch/x86/kernel/cpu/amd.c:925:14: note: use '=' to turn this equality
comparison into an assignment
        if ((c->x86 == 6)) {
                    ^~
                    =
1 warning generated.

Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20181002224511.14929-1-natechancellor@gmail.com
Link: https://github.com/ClangBuiltLinux/linux/issues/187
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/amd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 22ab408177b2..eeea634bee0a 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -922,7 +922,7 @@ static void init_amd(struct cpuinfo_x86 *c)
 static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
 {
 	/* AMD errata T13 (order #21922) */
-	if ((c->x86 == 6)) {
+	if (c->x86 == 6) {
 		/* Duron Rev A0 */
 		if (c->x86_model == 3 && c->x86_stepping == 0)
 			size = 64;

From e4a02ed2aaf447fa849e3254bfdb3b9b01e1e520 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 2 Oct 2018 14:48:49 -0700
Subject: [PATCH 442/499] locking/ww_mutex: Fix runtime warning in the WW mutex
 selftest

If CONFIG_WW_MUTEX_SELFTEST=y is enabled, booting an image
in an arm64 virtual machine results in the following
traceback if 8 CPUs are enabled:

  DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current)
  WARNING: CPU: 2 PID: 537 at kernel/locking/mutex.c:1033 __mutex_unlock_slowpath+0x1a8/0x2e0
  ...
  Call trace:
   __mutex_unlock_slowpath()
   ww_mutex_unlock()
   test_cycle_work()
   process_one_work()
   worker_thread()
   kthread()
   ret_from_fork()

If requesting b_mutex fails with -EDEADLK, the error variable
is reassigned to the return value from calling ww_mutex_lock
on a_mutex again. If this call fails, a_mutex is not locked.
It is, however, unconditionally unlocked subsequently, causing
the reported warning. Fix the problem by using two error variables.

With this change, the selftest still fails as follows:

  cyclic deadlock not resolved, ret[7/8] = -35

However, the traceback is gone.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Fixes: d1b42b800e5d0 ("locking/ww_mutex: Add kselftests for resolving ww_mutex cyclic deadlocks")
Link: http://lkml.kernel.org/r/1538516929-9734-1-git-send-email-linux@roeck-us.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/test-ww_mutex.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c
index 0be047dbd897..65a3b7e55b9f 100644
--- a/kernel/locking/test-ww_mutex.c
+++ b/kernel/locking/test-ww_mutex.c
@@ -260,7 +260,7 @@ static void test_cycle_work(struct work_struct *work)
 {
 	struct test_cycle *cycle = container_of(work, typeof(*cycle), work);
 	struct ww_acquire_ctx ctx;
-	int err;
+	int err, erra = 0;
 
 	ww_acquire_init(&ctx, &ww_class);
 	ww_mutex_lock(&cycle->a_mutex, &ctx);
@@ -270,17 +270,19 @@ static void test_cycle_work(struct work_struct *work)
 
 	err = ww_mutex_lock(cycle->b_mutex, &ctx);
 	if (err == -EDEADLK) {
+		err = 0;
 		ww_mutex_unlock(&cycle->a_mutex);
 		ww_mutex_lock_slow(cycle->b_mutex, &ctx);
-		err = ww_mutex_lock(&cycle->a_mutex, &ctx);
+		erra = ww_mutex_lock(&cycle->a_mutex, &ctx);
 	}
 
 	if (!err)
 		ww_mutex_unlock(cycle->b_mutex);
-	ww_mutex_unlock(&cycle->a_mutex);
+	if (!erra)
+		ww_mutex_unlock(&cycle->a_mutex);
 	ww_acquire_fini(&ctx);
 
-	cycle->result = err;
+	cycle->result = err ?: erra;
 }
 
 static int __test_cycle(unsigned int nthreads)

From ad608fbcf166fec809e402d548761768f602702c Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Tue, 11 Sep 2018 05:32:37 -0400
Subject: [PATCH 443/499] media: v4l: event: Prevent freeing event
 subscriptions while accessed

The event subscriptions are added to the subscribed event list while
holding a spinlock, but that lock is subsequently released while still
accessing the subscription object. This makes it possible to unsubscribe
the event --- and freeing the subscription object's memory --- while
the subscription object is simultaneously accessed.

Prevent this by adding a mutex to serialise the event subscription and
unsubscription. This also gives a guarantee to the callback ops that the
add op has returned before the del op is called.

This change also results in making the elems field less special:
subscriptions are only added to the event list once they are fully
initialised.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Hans Verkuil <hans.verkuil@cisco.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: stable@vger.kernel.org # for 4.14 and up
Fixes: c3b5b0241f62 ("V4L/DVB: V4L: Events: Add backend")
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/v4l2-core/v4l2-event.c | 38 +++++++++++++++-------------
 drivers/media/v4l2-core/v4l2-fh.c    |  2 ++
 include/media/v4l2-fh.h              |  4 +++
 3 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/drivers/media/v4l2-core/v4l2-event.c b/drivers/media/v4l2-core/v4l2-event.c
index 127fe6eb91d9..a3ef1f50a4b3 100644
--- a/drivers/media/v4l2-core/v4l2-event.c
+++ b/drivers/media/v4l2-core/v4l2-event.c
@@ -115,14 +115,6 @@ static void __v4l2_event_queue_fh(struct v4l2_fh *fh, const struct v4l2_event *e
 	if (sev == NULL)
 		return;
 
-	/*
-	 * If the event has been added to the fh->subscribed list, but its
-	 * add op has not completed yet elems will be 0, treat this as
-	 * not being subscribed.
-	 */
-	if (!sev->elems)
-		return;
-
 	/* Increase event sequence number on fh. */
 	fh->sequence++;
 
@@ -208,6 +200,7 @@ int v4l2_event_subscribe(struct v4l2_fh *fh,
 	struct v4l2_subscribed_event *sev, *found_ev;
 	unsigned long flags;
 	unsigned i;
+	int ret = 0;
 
 	if (sub->type == V4L2_EVENT_ALL)
 		return -EINVAL;
@@ -225,31 +218,36 @@ int v4l2_event_subscribe(struct v4l2_fh *fh,
 	sev->flags = sub->flags;
 	sev->fh = fh;
 	sev->ops = ops;
+	sev->elems = elems;
+
+	mutex_lock(&fh->subscribe_lock);
 
 	spin_lock_irqsave(&fh->vdev->fh_lock, flags);
 	found_ev = v4l2_event_subscribed(fh, sub->type, sub->id);
-	if (!found_ev)
-		list_add(&sev->list, &fh->subscribed);
 	spin_unlock_irqrestore(&fh->vdev->fh_lock, flags);
 
 	if (found_ev) {
+		/* Already listening */
 		kvfree(sev);
-		return 0; /* Already listening */
+		goto out_unlock;
 	}
 
 	if (sev->ops && sev->ops->add) {
-		int ret = sev->ops->add(sev, elems);
+		ret = sev->ops->add(sev, elems);
 		if (ret) {
-			sev->ops = NULL;
-			v4l2_event_unsubscribe(fh, sub);
-			return ret;
+			kvfree(sev);
+			goto out_unlock;
 		}
 	}
 
-	/* Mark as ready for use */
-	sev->elems = elems;
+	spin_lock_irqsave(&fh->vdev->fh_lock, flags);
+	list_add(&sev->list, &fh->subscribed);
+	spin_unlock_irqrestore(&fh->vdev->fh_lock, flags);
 
-	return 0;
+out_unlock:
+	mutex_unlock(&fh->subscribe_lock);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(v4l2_event_subscribe);
 
@@ -288,6 +286,8 @@ int v4l2_event_unsubscribe(struct v4l2_fh *fh,
 		return 0;
 	}
 
+	mutex_lock(&fh->subscribe_lock);
+
 	spin_lock_irqsave(&fh->vdev->fh_lock, flags);
 
 	sev = v4l2_event_subscribed(fh, sub->type, sub->id);
@@ -305,6 +305,8 @@ int v4l2_event_unsubscribe(struct v4l2_fh *fh,
 	if (sev && sev->ops && sev->ops->del)
 		sev->ops->del(sev);
 
+	mutex_unlock(&fh->subscribe_lock);
+
 	kvfree(sev);
 
 	return 0;
diff --git a/drivers/media/v4l2-core/v4l2-fh.c b/drivers/media/v4l2-core/v4l2-fh.c
index 3895999bf880..c91a7bd3ecfc 100644
--- a/drivers/media/v4l2-core/v4l2-fh.c
+++ b/drivers/media/v4l2-core/v4l2-fh.c
@@ -45,6 +45,7 @@ void v4l2_fh_init(struct v4l2_fh *fh, struct video_device *vdev)
 	INIT_LIST_HEAD(&fh->available);
 	INIT_LIST_HEAD(&fh->subscribed);
 	fh->sequence = -1;
+	mutex_init(&fh->subscribe_lock);
 }
 EXPORT_SYMBOL_GPL(v4l2_fh_init);
 
@@ -90,6 +91,7 @@ void v4l2_fh_exit(struct v4l2_fh *fh)
 		return;
 	v4l_disable_media_source(fh->vdev);
 	v4l2_event_unsubscribe_all(fh);
+	mutex_destroy(&fh->subscribe_lock);
 	fh->vdev = NULL;
 }
 EXPORT_SYMBOL_GPL(v4l2_fh_exit);
diff --git a/include/media/v4l2-fh.h b/include/media/v4l2-fh.h
index ea73fef8bdc0..8586cfb49828 100644
--- a/include/media/v4l2-fh.h
+++ b/include/media/v4l2-fh.h
@@ -38,10 +38,13 @@ struct v4l2_ctrl_handler;
  * @prio: priority of the file handler, as defined by &enum v4l2_priority
  *
  * @wait: event' s wait queue
+ * @subscribe_lock: serialise changes to the subscribed list; guarantee that
+ *		    the add and del event callbacks are orderly called
  * @subscribed: list of subscribed events
  * @available: list of events waiting to be dequeued
  * @navailable: number of available events at @available list
  * @sequence: event sequence number
+ *
  * @m2m_ctx: pointer to &struct v4l2_m2m_ctx
  */
 struct v4l2_fh {
@@ -52,6 +55,7 @@ struct v4l2_fh {
 
 	/* Events */
 	wait_queue_head_t	wait;
+	struct mutex		subscribe_lock;
 	struct list_head	subscribed;
 	struct list_head	available;
 	unsigned int		navailable;

From 4c9613ce556fdeb671e779668b627ea3a2b61728 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 3 Oct 2018 09:24:22 +0100
Subject: [PATCH 444/499] drm/i915: Handle incomplete Z_FINISH for compressed
 error states

The final call to zlib_deflate(Z_FINISH) may require more output
space to be allocated and so needs to re-invoked. Failure to do so in
the current code leads to incomplete zlib streams (albeit intact due to
the use of Z_SYNC_FLUSH) resulting in the occasional short object
capture.

v2: Check against overrunning our pre-allocated page array
v3: Drop Z_SYNC_FLUSH entirely

Testcase: igt/i915-error-capture.js
Fixes: 0a97015d45ee ("drm/i915: Compress GPU objects in error state")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: <stable@vger.kernel.org> # v4.10+
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181003082422.23214-1-chris@chris-wilson.co.uk
(cherry picked from commit 83bc0f5b432f60394466deef16fc753e27371d0b)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 88 +++++++++++++++++++--------
 drivers/gpu/drm/i915/i915_gpu_error.h |  1 +
 2 files changed, 64 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index f7f2aa71d8d9..a262a64f5625 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -232,6 +232,20 @@ static bool compress_init(struct compress *c)
 	return true;
 }
 
+static void *compress_next_page(struct drm_i915_error_object *dst)
+{
+	unsigned long page;
+
+	if (dst->page_count >= dst->num_pages)
+		return ERR_PTR(-ENOSPC);
+
+	page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
+	if (!page)
+		return ERR_PTR(-ENOMEM);
+
+	return dst->pages[dst->page_count++] = (void *)page;
+}
+
 static int compress_page(struct compress *c,
 			 void *src,
 			 struct drm_i915_error_object *dst)
@@ -245,19 +259,14 @@ static int compress_page(struct compress *c,
 
 	do {
 		if (zstream->avail_out == 0) {
-			unsigned long page;
+			zstream->next_out = compress_next_page(dst);
+			if (IS_ERR(zstream->next_out))
+				return PTR_ERR(zstream->next_out);
 
-			page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
-			if (!page)
-				return -ENOMEM;
-
-			dst->pages[dst->page_count++] = (void *)page;
-
-			zstream->next_out = (void *)page;
 			zstream->avail_out = PAGE_SIZE;
 		}
 
-		if (zlib_deflate(zstream, Z_SYNC_FLUSH) != Z_OK)
+		if (zlib_deflate(zstream, Z_NO_FLUSH) != Z_OK)
 			return -EIO;
 	} while (zstream->avail_in);
 
@@ -268,19 +277,42 @@ static int compress_page(struct compress *c,
 	return 0;
 }
 
+static int compress_flush(struct compress *c,
+			  struct drm_i915_error_object *dst)
+{
+	struct z_stream_s *zstream = &c->zstream;
+
+	do {
+		switch (zlib_deflate(zstream, Z_FINISH)) {
+		case Z_OK: /* more space requested */
+			zstream->next_out = compress_next_page(dst);
+			if (IS_ERR(zstream->next_out))
+				return PTR_ERR(zstream->next_out);
+
+			zstream->avail_out = PAGE_SIZE;
+			break;
+
+		case Z_STREAM_END:
+			goto end;
+
+		default: /* any error */
+			return -EIO;
+		}
+	} while (1);
+
+end:
+	memset(zstream->next_out, 0, zstream->avail_out);
+	dst->unused = zstream->avail_out;
+	return 0;
+}
+
 static void compress_fini(struct compress *c,
 			  struct drm_i915_error_object *dst)
 {
 	struct z_stream_s *zstream = &c->zstream;
 
-	if (dst) {
-		zlib_deflate(zstream, Z_FINISH);
-		dst->unused = zstream->avail_out;
-	}
-
 	zlib_deflateEnd(zstream);
 	kfree(zstream->workspace);
-
 	if (c->tmp)
 		free_page((unsigned long)c->tmp);
 }
@@ -319,6 +351,12 @@ static int compress_page(struct compress *c,
 	return 0;
 }
 
+static int compress_flush(struct compress *c,
+			  struct drm_i915_error_object *dst)
+{
+	return 0;
+}
+
 static void compress_fini(struct compress *c,
 			  struct drm_i915_error_object *dst)
 {
@@ -917,6 +955,7 @@ i915_error_object_create(struct drm_i915_private *i915,
 	unsigned long num_pages;
 	struct sgt_iter iter;
 	dma_addr_t dma;
+	int ret;
 
 	if (!vma)
 		return NULL;
@@ -930,6 +969,7 @@ i915_error_object_create(struct drm_i915_private *i915,
 
 	dst->gtt_offset = vma->node.start;
 	dst->gtt_size = vma->node.size;
+	dst->num_pages = num_pages;
 	dst->page_count = 0;
 	dst->unused = 0;
 
@@ -938,28 +978,26 @@ i915_error_object_create(struct drm_i915_private *i915,
 		return NULL;
 	}
 
+	ret = -EINVAL;
 	for_each_sgt_dma(dma, iter, vma->pages) {
 		void __iomem *s;
-		int ret;
 
 		ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0);
 
 		s = io_mapping_map_atomic_wc(&ggtt->iomap, slot);
 		ret = compress_page(&compress, (void  __force *)s, dst);
 		io_mapping_unmap_atomic(s);
-
 		if (ret)
-			goto unwind;
+			break;
 	}
-	goto out;
 
-unwind:
-	while (dst->page_count--)
-		free_page((unsigned long)dst->pages[dst->page_count]);
-	kfree(dst);
-	dst = NULL;
+	if (ret || compress_flush(&compress, dst)) {
+		while (dst->page_count--)
+			free_page((unsigned long)dst->pages[dst->page_count]);
+		kfree(dst);
+		dst = NULL;
+	}
 
-out:
 	compress_fini(&compress, dst);
 	ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE);
 	return dst;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index f893a4e8b783..8710fb18ed74 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -135,6 +135,7 @@ struct i915_gpu_state {
 		struct drm_i915_error_object {
 			u64 gtt_offset;
 			u64 gtt_size;
+			int num_pages;
 			int page_count;
 			int unused;
 			u32 *pages[0];

From 2cc543f5cd6deda27ef463686fa08c16c8c0990b Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Wed, 3 Oct 2018 12:45:56 +0200
Subject: [PATCH 445/499] sctp: fix fall-through annotation

Replace "fallthru" with a proper "fall through" annotation.

This fix is part of the ongoing efforts to enabling
-Wimplicit-fallthrough

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/outqueue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index d74d00b29942..42191ed9902b 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1048,7 +1048,7 @@ static void sctp_outq_flush_data(struct sctp_flush_ctx *ctx,
 		if (!ctx->packet || !ctx->packet->has_cookie_echo)
 			return;
 
-		/* fallthru */
+		/* fall through */
 	case SCTP_STATE_ESTABLISHED:
 	case SCTP_STATE_SHUTDOWN_PENDING:
 	case SCTP_STATE_SHUTDOWN_RECEIVED:

From 4233cfe6ec4683497d7318f55ce7617e97f2e610 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Wed, 3 Oct 2018 11:30:35 -0700
Subject: [PATCH 446/499] ixgbe: check return value of napi_complete_done()

The NIC driver should only enable interrupts when napi_complete_done()
returns true. This patch adds the check for ixgbe.

Cc: stable@vger.kernel.org # 4.10+
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Song Liu <songliubraving@fb.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index f27d73a7bf16..6cdd58d9d461 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -3196,11 +3196,13 @@ int ixgbe_poll(struct napi_struct *napi, int budget)
 		return budget;
 
 	/* all work done, exit the polling mode */
-	napi_complete_done(napi, work_done);
-	if (adapter->rx_itr_setting & 1)
-		ixgbe_set_itr(q_vector);
-	if (!test_bit(__IXGBE_DOWN, &adapter->state))
-		ixgbe_irq_enable_queues(adapter, BIT_ULL(q_vector->v_idx));
+	if (likely(napi_complete_done(napi, work_done))) {
+		if (adapter->rx_itr_setting & 1)
+			ixgbe_set_itr(q_vector);
+		if (!test_bit(__IXGBE_DOWN, &adapter->state))
+			ixgbe_irq_enable_queues(adapter,
+						BIT_ULL(q_vector->v_idx));
+	}
 
 	return min(work_done, budget - 1);
 }

From 6579804c431712d56956a63b1a01509441cc6800 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Thu, 4 Oct 2018 14:51:11 +1000
Subject: [PATCH 447/499] KVM: PPC: Book3S HV: Avoid crash from THP collapse
 during radix page fault

Commit 71d29f43b633 ("KVM: PPC: Book3S HV: Don't use compound_order to
determine host mapping size", 2018-09-11) added a call to
__find_linux_pte() and a dereference of the returned PTE pointer to the
radix page fault path in the common case where the page is normal
system memory.  Previously, __find_linux_pte() was only called for
mappings to physical addresses which don't have a page struct (e.g.
memory-mapped I/O) or where the page struct is marked as reserved
memory.

This exposes us to the possibility that the returned PTE pointer
could be NULL, for example in the case of a concurrent THP collapse
operation.  Dereferencing the returned NULL pointer causes a host
crash.

To fix this, we check for NULL, and if it is NULL, we retry the
operation by returning to the guest, with the expectation that it
will generate the same page fault again (unless of course it has
been fixed up by another CPU in the meantime).

Fixes: 71d29f43b633 ("KVM: PPC: Book3S HV: Don't use compound_order to determine host mapping size")
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 933c574e1cf7..998f8d089ac7 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -646,6 +646,16 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	 */
 	local_irq_disable();
 	ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
+	/*
+	 * If the PTE disappeared temporarily due to a THP
+	 * collapse, just return and let the guest try again.
+	 */
+	if (!ptep) {
+		local_irq_enable();
+		if (page)
+			put_page(page);
+		return RESUME_GUEST;
+	}
 	pte = *ptep;
 	local_irq_enable();
 

From 709ae62e8e6d9ac4df7dadb3b8ae432675c45ef9 Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Thu, 4 Oct 2018 11:39:42 +0800
Subject: [PATCH 448/499] ALSA: hda/realtek - Cannot adjust speaker's volume on
 Dell XPS 27 7760

The issue is the same as commit dd9aa335c880 ("ALSA: hda/realtek - Can't
adjust speaker's volume on a Dell AIO"), the output requires to connect
to a node with Amp-out capability.

Applying the same fixup ALC298_FIXUP_SPK_VOLUME can fix the issue.

BugLink: https://bugs.launchpad.net/bugs/1775068
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 1d117f00d04d..3ac7ba9b342d 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6409,6 +6409,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1028, 0x0706, "Dell Inspiron 7559", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER),
 	SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE),
 	SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+	SND_PCI_QUIRK(0x1028, 0x075c, "Dell XPS 27 7760", ALC298_FIXUP_SPK_VOLUME),
 	SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME),
 	SND_PCI_QUIRK(0x1028, 0x07b0, "Dell Precision 7520", ALC295_FIXUP_DISABLE_DAC3),
 	SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER),

From 02e425668f5c9deb42787d10001a3b605993ad15 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Wed, 3 Oct 2018 16:23:49 -0700
Subject: [PATCH 449/499] x86/vdso: Fix vDSO syscall fallback asm constraint
 regression

When I added the missing memory outputs, I failed to update the
index of the first argument (ebx) on 32-bit builds, which broke the
fallbacks.  Somehow I must have screwed up my testing or gotten
lucky.

Add another test to cover gettimeofday() as well.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Fixes: 715bd9d12f84 ("x86/vdso: Fix asm constraints on vDSO syscall fallbacks")
Link: http://lkml.kernel.org/r/21bd45ab04b6d838278fa5bebfa9163eceffa13c.1538608971.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/entry/vdso/vclock_gettime.c    |  8 +--
 tools/testing/selftests/x86/test_vdso.c | 73 +++++++++++++++++++++++++
 2 files changed, 77 insertions(+), 4 deletions(-)

diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 134e2d2e8add..e48ca3afa091 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -68,11 +68,11 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
 
 	asm (
 		"mov %%ebx, %%edx \n"
-		"mov %2, %%ebx \n"
+		"mov %[clock], %%ebx \n"
 		"call __kernel_vsyscall \n"
 		"mov %%edx, %%ebx \n"
 		: "=a" (ret), "=m" (*ts)
-		: "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
+		: "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts)
 		: "memory", "edx");
 	return ret;
 }
@@ -83,11 +83,11 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
 
 	asm (
 		"mov %%ebx, %%edx \n"
-		"mov %2, %%ebx \n"
+		"mov %[tv], %%ebx \n"
 		"call __kernel_vsyscall \n"
 		"mov %%edx, %%ebx \n"
 		: "=a" (ret), "=m" (*tv), "=m" (*tz)
-		: "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
+		: "0" (__NR_gettimeofday), [tv] "g" (tv), "c" (tz)
 		: "memory", "edx");
 	return ret;
 }
diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c
index 49f7294fb382..35edd61d1663 100644
--- a/tools/testing/selftests/x86/test_vdso.c
+++ b/tools/testing/selftests/x86/test_vdso.c
@@ -36,6 +36,10 @@ typedef int (*vgettime_t)(clockid_t, struct timespec *);
 
 vgettime_t vdso_clock_gettime;
 
+typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz);
+
+vgtod_t vdso_gettimeofday;
+
 typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
 
 getcpu_t vgetcpu;
@@ -104,6 +108,11 @@ static void fill_function_pointers()
 	vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
 	if (!vdso_clock_gettime)
 		printf("Warning: failed to find clock_gettime in vDSO\n");
+
+	vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday");
+	if (!vdso_gettimeofday)
+		printf("Warning: failed to find gettimeofday in vDSO\n");
+
 }
 
 static long sys_getcpu(unsigned * cpu, unsigned * node,
@@ -117,6 +126,11 @@ static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
 	return syscall(__NR_clock_gettime, id, ts);
 }
 
+static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+	return syscall(__NR_gettimeofday, tv, tz);
+}
+
 static void test_getcpu(void)
 {
 	printf("[RUN]\tTesting getcpu...\n");
@@ -177,6 +191,14 @@ static bool ts_leq(const struct timespec *a, const struct timespec *b)
 		return a->tv_nsec <= b->tv_nsec;
 }
 
+static bool tv_leq(const struct timeval *a, const struct timeval *b)
+{
+	if (a->tv_sec != b->tv_sec)
+		return a->tv_sec < b->tv_sec;
+	else
+		return a->tv_usec <= b->tv_usec;
+}
+
 static char const * const clocknames[] = {
 	[0] = "CLOCK_REALTIME",
 	[1] = "CLOCK_MONOTONIC",
@@ -248,11 +270,62 @@ static void test_clock_gettime(void)
 	test_one_clock_gettime(INT_MAX, "invalid");
 }
 
+static void test_gettimeofday(void)
+{
+	struct timeval start, vdso, end;
+	struct timezone sys_tz, vdso_tz;
+	int vdso_ret, end_ret;
+
+	if (!vdso_gettimeofday)
+		return;
+
+	printf("[RUN]\tTesting gettimeofday...\n");
+
+	if (sys_gettimeofday(&start, &sys_tz) < 0) {
+		printf("[FAIL]\tsys_gettimeofday failed (%d)\n", errno);
+		nerrs++;
+		return;
+	}
+
+	vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz);
+	end_ret = sys_gettimeofday(&end, NULL);
+
+	if (vdso_ret != 0 || end_ret != 0) {
+		printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+		       vdso_ret, errno);
+		nerrs++;
+		return;
+	}
+
+	printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n",
+	       (unsigned long long)start.tv_sec, start.tv_usec,
+	       (unsigned long long)vdso.tv_sec, vdso.tv_usec,
+	       (unsigned long long)end.tv_sec, end.tv_usec);
+
+	if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) {
+		printf("[FAIL]\tTimes are out of sequence\n");
+		nerrs++;
+	}
+
+	if (sys_tz.tz_minuteswest == vdso_tz.tz_minuteswest &&
+	    sys_tz.tz_dsttime == vdso_tz.tz_dsttime) {
+		printf("[OK]\ttimezones match: minuteswest=%d, dsttime=%d\n",
+		       sys_tz.tz_minuteswest, sys_tz.tz_dsttime);
+	} else {
+		printf("[FAIL]\ttimezones do not match\n");
+		nerrs++;
+	}
+
+	/* And make sure that passing NULL for tz doesn't crash. */
+	vdso_gettimeofday(&vdso, NULL);
+}
+
 int main(int argc, char **argv)
 {
 	fill_function_pointers();
 
 	test_clock_gettime();
+	test_gettimeofday();
 
 	/*
 	 * Test getcpu() last so that, if something goes wrong setting affinity,

From fd6b6d9b82f97a851fb0078201ddc38fe9728cda Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Mon, 1 Oct 2018 14:25:34 -0700
Subject: [PATCH 450/499] KVM: VMX: check for existence of secondary exec
 controls before accessing

Return early from vmx_set_virtual_apic_mode() if the processor doesn't
support VIRTUALIZE_APIC_ACCESSES or VIRTUALIZE_X2APIC_MODE, both of
which reside in SECONDARY_VM_EXEC_CONTROL.  This eliminates warnings
due to VMWRITEs to SECONDARY_VM_EXEC_CONTROL (VMCS field 401e) failing
on processors without secondary exec controls.

Remove the similar check for TPR shadowing as it is incorporated in the
flexpriority_enabled check and the APIC-related code in
vmx_update_msr_bitmap() is further gated by VIRTUALIZE_X2APIC_MODE.

Reported-by: Gerhard Wiesinger <redhat@wiesinger.com>
Fixes: 8d860bbeedef ("kvm: vmx: Basic APIC virtualization controls have three settings")
Cc: Jim Mattson <jmattson@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b16b2664cfe1..764ae031054f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10214,15 +10214,16 @@ static void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
 	if (!lapic_in_kernel(vcpu))
 		return;
 
+	if (!flexpriority_enabled &&
+	    !cpu_has_vmx_virtualize_x2apic_mode())
+		return;
+
 	/* Postpone execution until vmcs01 is the current VMCS. */
 	if (is_guest_mode(vcpu)) {
 		to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true;
 		return;
 	}
 
-	if (!cpu_need_tpr_shadow(vcpu))
-		return;
-
 	sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
 	sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
 			      SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);

From 2cf7ea9f40fabee0f8b40db4eb2d1e85cc6c0a95 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 3 Oct 2018 10:34:00 +0200
Subject: [PATCH 451/499] KVM: VMX: hide flexpriority from guest when disabled
 at the module level

As of commit 8d860bbeedef ("kvm: vmx: Basic APIC virtualization controls
have three settings"), KVM will disable VIRTUALIZE_APIC_ACCESSES when
a nested guest writes APIC_BASE MSR and kvm-intel.flexpriority=0,
whereas previously KVM would allow a nested guest to enable
VIRTUALIZE_APIC_ACCESSES so long as it's supported in hardware.  That is,
KVM now advertises VIRTUALIZE_APIC_ACCESSES to a guest but doesn't
(always) allow setting it when kvm-intel.flexpriority=0, and may even
initially allow the control and then clear it when the nested guest
writes APIC_BASE MSR, which is decidedly odd even if it doesn't cause
functional issues.

Hide the control completely when the module parameter is cleared.

reported-by: Sean Christopherson <sean.j.christopherson@intel.com>
Fixes: 8d860bbeedef ("kvm: vmx: Basic APIC virtualization controls have three settings")
Cc: Jim Mattson <jmattson@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 764ae031054f..55b62760b694 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3589,12 +3589,12 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
 		msrs->secondary_ctls_high);
 	msrs->secondary_ctls_low = 0;
 	msrs->secondary_ctls_high &=
-		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
 		SECONDARY_EXEC_DESC |
 		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
 		SECONDARY_EXEC_APIC_REGISTER_VIRT |
 		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
 		SECONDARY_EXEC_WBINVD_EXITING;
+
 	/*
 	 * We can emulate "VMCS shadowing," even if the hardware
 	 * doesn't support it.
@@ -3651,6 +3651,10 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
 		msrs->secondary_ctls_high |=
 			SECONDARY_EXEC_UNRESTRICTED_GUEST;
 
+	if (flexpriority_enabled)
+		msrs->secondary_ctls_high |=
+			SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+
 	/* miscellaneous data */
 	rdmsr(MSR_IA32_VMX_MISC,
 		msrs->misc_low,

From 601350ff58d5415a001769532f6b8333820e5786 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 28 Sep 2018 21:00:48 +0300
Subject: [PATCH 452/499] ovl: fix access beyond unterminated strings

KASAN detected slab-out-of-bounds access in printk from overlayfs,
because string format used %*s instead of %.*s.

> BUG: KASAN: slab-out-of-bounds in string+0x298/0x2d0 lib/vsprintf.c:604
> Read of size 1 at addr ffff8801c36c66ba by task syz-executor2/27811
>
> CPU: 0 PID: 27811 Comm: syz-executor2 Not tainted 4.19.0-rc5+ #36
...
>  printk+0xa7/0xcf kernel/printk/printk.c:1996
>  ovl_lookup_index.cold.15+0xe8/0x1f8 fs/overlayfs/namei.c:689

Reported-by: syzbot+376cea2b0ef340db3dd4@syzkaller.appspotmail.com
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: 359f392ca53e ("ovl: lookup index entry for copy up origin")
Cc: <stable@vger.kernel.org> # v4.13
---
 fs/overlayfs/namei.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index f28711846dd6..9c0ca6a7becf 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -686,7 +686,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
 			index = NULL;
 			goto out;
 		}
-		pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n"
+		pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
 				    "overlayfs: mount with '-o index=off' to disable inodes index.\n",
 				    d_inode(origin)->i_ino, name.len, name.name,
 				    err);

From 1a8f8d2a443ef9ad9a3065ba8c8119df714240fa Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Thu, 4 Oct 2018 14:49:10 +0200
Subject: [PATCH 453/499] ovl: fix format of setxattr debug

Format has a typo: it was meant to be "%.*s", not "%*s".  But at some point
callers grew nonprintable values as well, so use "%*pE" instead with a
maximized length.

Reported-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: 3a1e819b4e80 ("ovl: store file handle of lower inode on copy up")
Cc: <stable@vger.kernel.org> # v4.12
---
 fs/overlayfs/overlayfs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index f61839e1054c..a3c0d9584312 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -152,8 +152,8 @@ static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
 				  const void *value, size_t size, int flags)
 {
 	int err = vfs_setxattr(dentry, name, value, size, flags);
-	pr_debug("setxattr(%pd2, \"%s\", \"%*s\", 0x%x) = %i\n",
-		 dentry, name, (int) size, (char *) value, flags, err);
+	pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i\n",
+		 dentry, name, min((int)size, 48), value, size, flags, err);
 	return err;
 }
 

From 7e7126846c95a34f98a1524d5c473af1f0783735 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 3 Oct 2018 13:44:26 +0200
Subject: [PATCH 454/499] kvm: nVMX: fix entry with pending interrupt if APICv
 is enabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit b5861e5cf2fcf83031ea3e26b0a69d887adf7d21 introduced a check on
the interrupt-window and NMI-window CPU execution controls in order to
inject an external interrupt vmexit before the first guest instruction
executes.  However, when APIC virtualization is enabled the host does not
need a vmexit in order to inject an interrupt at the next interrupt window;
instead, it just places the interrupt vector in RVI and the processor will
inject it as soon as possible.  Therefore, on machines with APICv it is
not enough to check the CPU execution controls: the same scenario can also
happen if RVI>vPPR.

Fixes: b5861e5cf2fcf83031ea3e26b0a69d887adf7d21
Reviewed-by: Nikita Leshchenko <nikita.leshchenko@oracle.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Liran Alon <liran.alon@oracle.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 36 +++++++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 55b62760b694..612fd17be635 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6162,6 +6162,11 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
 	nested_mark_vmcs12_pages_dirty(vcpu);
 }
 
+static u8 vmx_get_rvi(void)
+{
+	return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
+}
+
 static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6174,7 +6179,7 @@ static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
 		WARN_ON_ONCE(!vmx->nested.virtual_apic_page))
 		return false;
 
-	rvi = vmcs_read16(GUEST_INTR_STATUS) & 0xff;
+	rvi = vmx_get_rvi();
 
 	vapic_page = kmap(vmx->nested.virtual_apic_page);
 	vppr = *((u32 *)(vapic_page + APIC_PROCPRI));
@@ -10349,6 +10354,14 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
 	return max_irr;
 }
 
+static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu)
+{
+	u8 rvi = vmx_get_rvi();
+	u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI);
+
+	return ((rvi & 0xf0) > (vppr & 0xf0));
+}
+
 static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 {
 	if (!kvm_vcpu_apicv_active(vcpu))
@@ -12593,10 +12606,13 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 	bool from_vmentry = !!exit_qual;
 	u32 dummy_exit_qual;
-	u32 vmcs01_cpu_exec_ctrl;
+	bool evaluate_pending_interrupts;
 	int r = 0;
 
-	vmcs01_cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+	evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
+		(CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING);
+	if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
+		evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
 
 	enter_guest_mode(vcpu);
 
@@ -12644,16 +12660,14 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
 	 * to L1 or delivered directly to L2 (e.g. In case L1 don't
 	 * intercept EXTERNAL_INTERRUPT).
 	 *
-	 * Usually this would be handled by L0 requesting a
-	 * IRQ/NMI window by setting VMCS accordingly. However,
-	 * this setting was done on VMCS01 and now VMCS02 is active
-	 * instead. Thus, we force L0 to perform pending event
-	 * evaluation by requesting a KVM_REQ_EVENT.
+	 * Usually this would be handled by the processor noticing an
+	 * IRQ/NMI window request, or checking RVI during evaluation of
+	 * pending virtual interrupts.  However, this setting was done
+	 * on VMCS01 and now VMCS02 is active instead. Thus, we force L0
+	 * to perform pending event evaluation by requesting a KVM_REQ_EVENT.
 	 */
-	if (vmcs01_cpu_exec_ctrl &
-		(CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING)) {
+	if (unlikely(evaluate_pending_interrupts))
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
-	}
 
 	/*
 	 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point

From 987bf116445db5d63a5c2ed94c4479687d9c9973 Mon Sep 17 00:00:00 2001
From: Shirish S <shirish.s@amd.com>
Date: Mon, 24 Sep 2018 19:01:47 +0530
Subject: [PATCH 455/499] drm/amd/display: Signal hw_done() after waiting for
 flip_done()

In amdgpu_dm_commit_tail(), wait until flip_done() is signaled before
we signal hw_done().

[Why]

This is to temporarily address a paging error that occurs when a
nonblocking commit contends with another commit, particularly in a
mirrored display configuration where at least 2 CRTCs are updated.
The error occurs in drm_atomic_helper_wait_for_flip_done(), when we
attempt to access the contents of new_crtc_state->commit.

Here's the sequence for a mirrored 2 display setup (irrelevant steps
left out for clarity):

**THREAD 1**                        | **THREAD 2**
                                    |
Initialize atomic state for flip    |
                                    |
Queue worker                        |
                                   ...

                                    | Do work for flip
                                    |
                                    | Signal hw_done() on CRTC 1
                                    | Signal hw_done() on CRTC 2
                                    |
                                    | Wait for flip_done() on CRTC 1

                                <---- **PREEMPTED BY THREAD 1**

Initialize atomic state for cursor  |
update (1)                          |
                                    |
Do cursor update work on both CRTCs |
                                    |
Clear atomic state (2)              |
**DONE**                            |
                                   ...
                                    |
                                    | Wait for flip_done() on CRTC 2
                                    | *ERROR*
                                    |

The issue starts with (1). When the atomic state is initialized, the
current CRTC states are duplicated to be the new_crtc_states, and
referenced to be the old_crtc_states. (The new_crtc_states are to be
filled with update data.)

Some things to note:

* Due to the mirrored configuration, the cursor updates on both CRTCs.

* At this point, the pflip IRQ has already been handled, and flip_done
  signaled on all CRTCs. The cursor commit can therefore continue.

* The old_crtc_states used by the cursor update are the **same states**
  as the new_crtc_states used by the flip worker.

At (2), the old_crtc_state is freed (*), and the cursor commit
completes. We then context switch back to the flip worker, where we
attempt to access the new_crtc_state->commit object. This is
problematic, as this state has already been freed.

(*) Technically, 'state->crtcs[i].state' is freed, which was made to
    reference old_crtc_state in drm_atomic_helper_swap_state()

[How]

By moving hw_done() after wait_for_flip_done(), we're guaranteed that
the new_crtc_state (from the flip worker's perspective) still exists.
This is because any other commit will be blocked, waiting for the
hw_done() signal.

Note that both the i915 and imx drivers have this sequence flipped
already, masking this problem.

Signed-off-by: Shirish S <shirish.s@amd.com>
Signed-off-by: Leo Li <sunpeng.li@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 96875950845a..6903fe6c894b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -4633,12 +4633,18 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 	}
 	spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
 
-	/* Signal HW programming completion */
-	drm_atomic_helper_commit_hw_done(state);
 
 	if (wait_for_vblank)
 		drm_atomic_helper_wait_for_flip_done(dev, state);
 
+	/*
+	 * FIXME:
+	 * Delay hw_done() until flip_done() is signaled. This is to block
+	 * another commit from freeing the CRTC state while we're still
+	 * waiting on flip_done.
+	 */
+	drm_atomic_helper_commit_hw_done(state);
+
 	drm_atomic_helper_cleanup_planes(dev, state);
 
 	/* Finally, drop a runtime PM reference for each newly disabled CRTC,

From 11b29c9e25788d0afb2ddb67bcd89424bd25f2f7 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Tue, 2 Oct 2018 18:41:12 -0400
Subject: [PATCH 456/499] drm/amdkfd: Fix incorrect use of process->mm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This mm_struct pointer should never be dereferenced. If running in
a user thread, just use current->mm. If running in a kernel worker
use get_task_mm to get a safe reference to the mm_struct.

Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 37 +++++++++++++++----
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index ec0d62a16e53..4f22e745df51 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -358,8 +358,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
 					struct queue *q,
 					struct qcm_process_device *qpd)
 {
-	int retval;
 	struct mqd_manager *mqd_mgr;
+	int retval;
 
 	mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
 	if (!mqd_mgr)
@@ -387,8 +387,12 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
 	if (!q->properties.is_active)
 		return 0;
 
-	retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
-			&q->properties, q->process->mm);
+	if (WARN(q->process->mm != current->mm,
+		 "should only run in user thread"))
+		retval = -EFAULT;
+	else
+		retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
+					   &q->properties, current->mm);
 	if (retval)
 		goto out_uninit_mqd;
 
@@ -545,9 +549,15 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
 		retval = map_queues_cpsch(dqm);
 	else if (q->properties.is_active &&
 		 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
-		  q->properties.type == KFD_QUEUE_TYPE_SDMA))
-		retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
-				       &q->properties, q->process->mm);
+		  q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
+		if (WARN(q->process->mm != current->mm,
+			 "should only run in user thread"))
+			retval = -EFAULT;
+		else
+			retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
+						   q->pipe, q->queue,
+						   &q->properties, current->mm);
+	}
 
 out_unlock:
 	dqm_unlock(dqm);
@@ -653,6 +663,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
 					  struct qcm_process_device *qpd)
 {
+	struct mm_struct *mm = NULL;
 	struct queue *q;
 	struct mqd_manager *mqd_mgr;
 	struct kfd_process_device *pdd;
@@ -686,6 +697,15 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
 		kfd_flush_tlb(pdd);
 	}
 
+	/* Take a safe reference to the mm_struct, which may otherwise
+	 * disappear even while the kfd_process is still referenced.
+	 */
+	mm = get_task_mm(pdd->process->lead_thread);
+	if (!mm) {
+		retval = -EFAULT;
+		goto out;
+	}
+
 	/* activate all active queues on the qpd */
 	list_for_each_entry(q, &qpd->queues_list, list) {
 		if (!q->properties.is_evicted)
@@ -700,14 +720,15 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
 		q->properties.is_evicted = false;
 		q->properties.is_active = true;
 		retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
-				       q->queue, &q->properties,
-				       q->process->mm);
+				       q->queue, &q->properties, mm);
 		if (retval)
 			goto out;
 		dqm->queue_count++;
 	}
 	qpd->evicted = 0;
 out:
+	if (mm)
+		mmput(mm);
 	dqm_unlock(dqm);
 	return retval;
 }

From f3c84a8e3e922afdcbc55f04df8fdf8a548f5a21 Mon Sep 17 00:00:00 2001
From: Nir Dotan <nird@mellanox.com>
Date: Thu, 4 Oct 2018 15:48:02 +0000
Subject: [PATCH 457/499] mlxsw: pci: Derive event type from event queue number

Due to a hardware issue in Spectrum-2, the field event_type of the event
queue element (EQE) has become reserved. It was used to distinguish between
command interface completion events and completion events.

Use queue number to determine event type, as command interface completion
events are always received on EQ0 and mlxsw driver maps completion events
to EQ1.

Fixes: c3ab435466d5 ("mlxsw: spectrum: Extend to support Spectrum-2 ASIC")
Signed-off-by: Nir Dotan <nird@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/pci.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 4d271fb3de3d..5890fdfd62c3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -718,14 +718,17 @@ static void mlxsw_pci_eq_tasklet(unsigned long data)
 	memset(&active_cqns, 0, sizeof(active_cqns));
 
 	while ((eqe = mlxsw_pci_eq_sw_eqe_get(q))) {
-		u8 event_type = mlxsw_pci_eqe_event_type_get(eqe);
 
-		switch (event_type) {
-		case MLXSW_PCI_EQE_EVENT_TYPE_CMD:
+		/* Command interface completion events are always received on
+		 * queue MLXSW_PCI_EQ_ASYNC_NUM (EQ0) and completion events
+		 * are mapped to queue MLXSW_PCI_EQ_COMP_NUM (EQ1).
+		 */
+		switch (q->num) {
+		case MLXSW_PCI_EQ_ASYNC_NUM:
 			mlxsw_pci_eq_cmd_event(mlxsw_pci, eqe);
 			q->u.eq.ev_cmd_count++;
 			break;
-		case MLXSW_PCI_EQE_EVENT_TYPE_COMP:
+		case MLXSW_PCI_EQ_COMP_NUM:
 			cqn = mlxsw_pci_eqe_cqn_get(eqe);
 			set_bit(cqn, active_cqns);
 			cq_handle = true;

From c360867ec46a4ec5cb19e5c329d65dff522cc69d Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 4 Oct 2018 15:48:03 +0000
Subject: [PATCH 458/499] mlxsw: spectrum: Delete RIF when VLAN device is
 removed

In commit 602b74eda813 ("mlxsw: spectrum_switchdev: Do not leak RIFs
when removing bridge") I handled the case where RIFs created for VLAN
devices were not properly cleaned up when their real device (a bridge)
was removed.

However, I forgot to handle the case of the VLAN device itself being
removed. Do so now when the VLAN device is being unlinked from its real
device.

Fixes: 99f44bb3527b ("mlxsw: spectrum: Enable L3 interfaces on top of bridge devices")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Reported-by: Artem Shvorin <art@qrator.net>
Tested-by: Artem Shvorin <art@qrator.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index b492152c8881..30bb2c533cec 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4845,6 +4845,8 @@ static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev,
 		upper_dev = info->upper_dev;
 		if (info->linking)
 			break;
+		if (is_vlan_dev(upper_dev))
+			mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, upper_dev);
 		if (netif_is_macvlan(upper_dev))
 			mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev);
 		break;

From 69e445ab8b66a9f30519842ef18be555d3ee9b51 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 4 Oct 2018 11:08:12 +0200
Subject: [PATCH 459/499] PM / core: Clear the direct_complete flag on errors

If __device_suspend() runs asynchronously (in which case the device
passed to it is in dpm_suspended_list at that point) and it returns
early on an error or pending wakeup, and the power.direct_complete
flag has been set for the device already, the subsequent
device_resume() will be confused by that and it will call
pm_runtime_enable() incorrectly, as runtime PM has not been
disabled for the device by __device_suspend().

To avoid that, clear power.direct_complete if __device_suspend()
is not going to disable runtime PM for the device before returning.

Fixes: aae4518b3124 (PM / sleep: Mechanism to avoid resuming runtime-suspended devices unnecessarily)
Reported-by: Al Cooper <alcooperx@gmail.com>
Tested-by: Al Cooper <alcooperx@gmail.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Cc: 3.16+ <stable@vger.kernel.org> # 3.16+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/main.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 3f68e2919dc5..a690fd400260 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1713,8 +1713,10 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 
 	dpm_wait_for_subordinate(dev, async);
 
-	if (async_error)
+	if (async_error) {
+		dev->power.direct_complete = false;
 		goto Complete;
+	}
 
 	/*
 	 * If a device configured to wake up the system from sleep states
@@ -1726,6 +1728,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 		pm_wakeup_event(dev, 0);
 
 	if (pm_wakeup_pending()) {
+		dev->power.direct_complete = false;
 		async_error = -EBUSY;
 		goto Complete;
 	}

From 4561ffca88c546f96367f94b8f1e4715a9c62314 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Mon, 24 Sep 2018 16:19:30 -0400
Subject: [PATCH 460/499] dm cache metadata: ignore hints array being too small
 during resize

Commit fd2fa9541 ("dm cache metadata: save in-core policy_hint_size to
on-disk superblock") enabled previously written policy hints to be
used after a cache is reactivated.  But in doing so the cache
metadata's hint array was left exposed to out of bounds access because
on resize the metadata's on-disk hint array wasn't ever extended.

Fix this by ignoring that there are no on-disk hints associated with the
newly added cache blocks.  An expanded on-disk hint array is later
rewritten upon the next clean shutdown of the cache.

Fixes: fd2fa9541 ("dm cache metadata: save in-core policy_hint_size to on-disk superblock")
Cc: stable@vger.kernel.org
Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-metadata.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 69dddeab124c..5936de71883f 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -1455,8 +1455,8 @@ static int __load_mappings(struct dm_cache_metadata *cmd,
 		if (hints_valid) {
 			r = dm_array_cursor_next(&cmd->hint_cursor);
 			if (r) {
-				DMERR("dm_array_cursor_next for hint failed");
-				goto out;
+				dm_array_cursor_end(&cmd->hint_cursor);
+				hints_valid = false;
 			}
 		}
 

From 5d07384a666d4b2f781dc056bfeec2c27fbdf383 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Tue, 25 Sep 2018 20:56:02 -0400
Subject: [PATCH 461/499] dm cache: fix resize crash if user doesn't reload
 cache table

A reload of the cache's DM table is needed during resize because
otherwise a crash will occur when attempting to access smq policy
entries associated with the portion of the cache that was recently
extended.

The reason is cache-size based data structures in the policy will not be
resized, the only way to safely extend the cache is to allow for a
proper cache policy initialization that occurs when the cache table is
loaded.  For example the smq policy's space_init(), init_allocator(),
calc_hotspot_params() must be sized based on the extended cache size.

The fix for this is to disallow cache resizes of this pattern:
1) suspend "cache" target's device
2) resize the fast device used for the cache
3) resume "cache" target's device

Instead, the last step must be a full reload of the cache's DM table.

Fixes: 66a636356 ("dm cache: add stochastic-multi-queue (smq) policy")
Cc: stable@vger.kernel.org
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-target.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index a53413371725..e13d991e9fb5 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -3009,8 +3009,13 @@ static dm_cblock_t get_cache_dev_size(struct cache *cache)
 
 static bool can_resize(struct cache *cache, dm_cblock_t new_size)
 {
-	if (from_cblock(new_size) > from_cblock(cache->cache_size))
-		return true;
+	if (from_cblock(new_size) > from_cblock(cache->cache_size)) {
+		if (cache->sized) {
+			DMERR("%s: unable to extend cache due to missing cache table reload",
+			      cache_device_name(cache));
+			return false;
+		}
+	}
 
 	/*
 	 * We can't drop a dirty block when shrinking the cache.

From f7b2a56e1f3dcbdb4cf09b2b63e859ffe0e09df8 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Fri, 28 Sep 2018 17:04:30 -0600
Subject: [PATCH 462/499] net/usb: cancel pending work when unbinding smsc75xx

Cancel pending work before freeing smsc75xx private data structure
during binding. This fixes the following crash in the driver:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000050
IP: mutex_lock+0x2b/0x3f
<snipped>
Workqueue: events smsc75xx_deferred_multicast_write [smsc75xx]
task: ffff8caa83e85700 task.stack: ffff948b80518000
RIP: 0010:mutex_lock+0x2b/0x3f
<snipped>
Call Trace:
 smsc75xx_deferred_multicast_write+0x40/0x1af [smsc75xx]
 process_one_work+0x18d/0x2fc
 worker_thread+0x1a2/0x269
 ? pr_cont_work+0x58/0x58
 kthread+0xfa/0x10a
 ? pr_cont_work+0x58/0x58
 ? rcu_read_unlock_sched_notrace+0x48/0x48
 ret_from_fork+0x22/0x40

Signed-off-by: Yu Zhao <yuzhao@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/smsc75xx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index e5a4cbb366dc..ec287c9741e8 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -1520,6 +1520,7 @@ static void smsc75xx_unbind(struct usbnet *dev, struct usb_interface *intf)
 {
 	struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]);
 	if (pdata) {
+		cancel_work_sync(&pdata->set_multicast);
 		netif_dbg(dev, ifdown, dev->net, "free pdata\n");
 		kfree(pdata);
 		pdata = NULL;

From 471b83bd8bbe4e89743683ef8ecb78f7029d8288 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 1 Oct 2018 12:21:59 +0300
Subject: [PATCH 463/499] team: Forbid enslaving team device to itself

team's ndo_add_slave() acquires 'team->lock' and later tries to open the
newly enslaved device via dev_open(). This emits a 'NETDEV_UP' event
that causes the VLAN driver to add VLAN 0 on the team device. team's
ndo_vlan_rx_add_vid() will also try to acquire 'team->lock' and
deadlock.

Fix this by checking early at the enslavement function that a team
device is not being enslaved to itself.

A similar check was added to the bond driver in commit 09a89c219baf
("bonding: disallow enslaving a bond to itself").

WARNING: possible recursive locking detected
4.18.0-rc7+ #176 Not tainted
--------------------------------------------
syz-executor4/6391 is trying to acquire lock:
(____ptrval____) (&team->lock){+.+.}, at: team_vlan_rx_add_vid+0x3b/0x1e0 drivers/net/team/team.c:1868

but task is already holding lock:
(____ptrval____) (&team->lock){+.+.}, at: team_add_slave+0xdb/0x1c30 drivers/net/team/team.c:1947

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(&team->lock);
  lock(&team->lock);

 *** DEADLOCK ***

 May be due to missing lock nesting notation

2 locks held by syz-executor4/6391:
 #0: (____ptrval____) (rtnl_mutex){+.+.}, at: rtnl_lock net/core/rtnetlink.c:77 [inline]
 #0: (____ptrval____) (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x412/0xc30 net/core/rtnetlink.c:4662
 #1: (____ptrval____) (&team->lock){+.+.}, at: team_add_slave+0xdb/0x1c30 drivers/net/team/team.c:1947

stack backtrace:
CPU: 1 PID: 6391 Comm: syz-executor4 Not tainted 4.18.0-rc7+ #176
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113
 print_deadlock_bug kernel/locking/lockdep.c:1765 [inline]
 check_deadlock kernel/locking/lockdep.c:1809 [inline]
 validate_chain kernel/locking/lockdep.c:2405 [inline]
 __lock_acquire.cold.64+0x1fb/0x486 kernel/locking/lockdep.c:3435
 lock_acquire+0x1e4/0x540 kernel/locking/lockdep.c:3924
 __mutex_lock_common kernel/locking/mutex.c:757 [inline]
 __mutex_lock+0x176/0x1820 kernel/locking/mutex.c:894
 mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:909
 team_vlan_rx_add_vid+0x3b/0x1e0 drivers/net/team/team.c:1868
 vlan_add_rx_filter_info+0x14a/0x1d0 net/8021q/vlan_core.c:210
 __vlan_vid_add net/8021q/vlan_core.c:278 [inline]
 vlan_vid_add+0x63e/0x9d0 net/8021q/vlan_core.c:308
 vlan_device_event.cold.12+0x2a/0x2f net/8021q/vlan.c:381
 notifier_call_chain+0x180/0x390 kernel/notifier.c:93
 __raw_notifier_call_chain kernel/notifier.c:394 [inline]
 raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401
 call_netdevice_notifiers_info+0x3f/0x90 net/core/dev.c:1735
 call_netdevice_notifiers net/core/dev.c:1753 [inline]
 dev_open+0x173/0x1b0 net/core/dev.c:1433
 team_port_add drivers/net/team/team.c:1219 [inline]
 team_add_slave+0xa8b/0x1c30 drivers/net/team/team.c:1948
 do_set_master+0x1c9/0x220 net/core/rtnetlink.c:2248
 do_setlink+0xba4/0x3e10 net/core/rtnetlink.c:2382
 rtnl_setlink+0x2a9/0x400 net/core/rtnetlink.c:2636
 rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4665
 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2455
 rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4683
 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
 netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343
 netlink_sendmsg+0xa18/0xfd0 net/netlink/af_netlink.c:1908
 sock_sendmsg_nosec net/socket.c:642 [inline]
 sock_sendmsg+0xd5/0x120 net/socket.c:652
 ___sys_sendmsg+0x7fd/0x930 net/socket.c:2126
 __sys_sendmsg+0x11d/0x290 net/socket.c:2164
 __do_sys_sendmsg net/socket.c:2173 [inline]
 __se_sys_sendmsg net/socket.c:2171 [inline]
 __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2171
 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x456b29
Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007f9706bf8c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007f9706bf96d4 RCX: 0000000000456b29
RDX: 0000000000000000 RSI: 0000000020000240 RDI: 0000000000000004
RBP: 00000000009300a0 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff
R13: 00000000004d3548 R14: 00000000004c8227 R15: 0000000000000000

Fixes: 87002b03baab ("net: introduce vlan_vid_[add/del] and use them instead of direct [add/kill]_vid ndo calls")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reported-and-tested-by: syzbot+bd051aba086537515cdb@syzkaller.appspotmail.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 6a047d30e8c6..d887016e54b6 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1167,6 +1167,12 @@ static int team_port_add(struct team *team, struct net_device *port_dev,
 		return -EBUSY;
 	}
 
+	if (dev == port_dev) {
+		NL_SET_ERR_MSG(extack, "Cannot enslave team device to itself");
+		netdev_err(dev, "Cannot enslave team device to itself\n");
+		return -EINVAL;
+	}
+
 	if (port_dev->features & NETIF_F_VLAN_CHALLENGED &&
 	    vlan_uses_dev(dev)) {
 		NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up");

From dbe80d446c859873820eedfff4abc61c71f1927b Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Fri, 5 Oct 2018 00:26:00 -0400
Subject: [PATCH 464/499] bnxt_en: Fix VNIC reservations on the PF.

The enables bit for VNIC was set wrong when calling the HWRM_FUNC_CFG
firmware call to reserve VNICs.  This has the effect that the firmware
will keep a large number of VNICs for the PF, and having very few for
VFs.  DPDK driver running on the VFs, which requires more VNICs, may not
work properly as a result.

Fixes: 674f50a5b026 ("bnxt_en: Implement new method to reserve rings.")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 0478e562abac..2564a92dcb02 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4650,7 +4650,7 @@ __bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, struct hwrm_func_cfg_input *req,
 				      FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
 		enables |= ring_grps ?
 			   FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0;
-		enables |= vnics ? FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS : 0;
+		enables |= vnics ? FUNC_CFG_REQ_ENABLES_NUM_VNICS : 0;
 
 		req->num_rx_rings = cpu_to_le16(rx_rings);
 		req->num_hw_ring_grps = cpu_to_le16(ring_grps);

From 5db0e0969af6501ad45fe0494039d3b9c797822b Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Fri, 5 Oct 2018 00:26:01 -0400
Subject: [PATCH 465/499] bnxt_en: Fix enables field in HWRM_QUEUE_COS2BW_CFG
 request

In HWRM_QUEUE_COS2BW_CFG request, enables field should have the bits
set only for the queue ids which are having the valid parameters.

This causes firmware to return error when the TC to hardware CoS queue
mapping is not 1:1 during DCBNL ETS setup.

Fixes: 2e8ef77ee0ff ("bnxt_en: Add TC to hardware QoS queue mapping logic.")
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
index ddc98c359488..a85d2be986af 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
@@ -98,13 +98,13 @@ static int bnxt_hwrm_queue_cos2bw_cfg(struct bnxt *bp, struct ieee_ets *ets,
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_COS2BW_CFG, -1, -1);
 	for (i = 0; i < max_tc; i++) {
-		u8 qidx;
+		u8 qidx = bp->tc_to_qidx[i];
 
 		req.enables |= cpu_to_le32(
-			QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID0_VALID << i);
+			QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID0_VALID <<
+			qidx);
 
 		memset(&cos2bw, 0, sizeof(cos2bw));
-		qidx = bp->tc_to_qidx[i];
 		cos2bw.queue_id = bp->q_info[qidx].queue_id;
 		if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_STRICT) {
 			cos2bw.tsa =

From a2bf74f4e1b82395dad2b08d2a911d9151db71c1 Mon Sep 17 00:00:00 2001
From: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Date: Fri, 5 Oct 2018 00:26:02 -0400
Subject: [PATCH 466/499] bnxt_en: free hwrm resources, if driver probe fails.

When the driver probe fails, all the resources that were allocated prior
to the failure must be freed. However, hwrm dma response memory is not
getting freed.

This patch fixes the problem described above.

Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.")
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 2564a92dcb02..3718984a8185 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -3017,10 +3017,11 @@ static void bnxt_free_hwrm_resources(struct bnxt *bp)
 {
 	struct pci_dev *pdev = bp->pdev;
 
-	dma_free_coherent(&pdev->dev, PAGE_SIZE, bp->hwrm_cmd_resp_addr,
-			  bp->hwrm_cmd_resp_dma_addr);
-
-	bp->hwrm_cmd_resp_addr = NULL;
+	if (bp->hwrm_cmd_resp_addr) {
+		dma_free_coherent(&pdev->dev, PAGE_SIZE, bp->hwrm_cmd_resp_addr,
+				  bp->hwrm_cmd_resp_dma_addr);
+		bp->hwrm_cmd_resp_addr = NULL;
+	}
 }
 
 static int bnxt_alloc_hwrm_resources(struct bnxt *bp)
@@ -9057,6 +9058,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	bnxt_clear_int_mode(bp);
 
 init_err_pci_clean:
+	bnxt_free_hwrm_resources(bp);
 	bnxt_cleanup_pci(bp);
 
 init_err_free:

From c78fe058879bdea919d44f23e21da26f603e9166 Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Fri, 5 Oct 2018 00:26:03 -0400
Subject: [PATCH 467/499] bnxt_en: get the reduced max_irqs by the ones used by
 RDMA

When getting the max rings supported, get the reduced max_irqs
by the ones used by RDMA.

If the number MSIX is the limiting factor, this bug may cause the
max ring count to be higher than it should be when RDMA driver is
loaded and may result in ring allocation failures.

Fixes: 30f529473ec9 ("bnxt_en: Do not modify max IRQ count after RDMA driver requests/frees IRQs.")
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 3718984a8185..e2d92548226a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -8622,7 +8622,7 @@ static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx,
 	*max_tx = hw_resc->max_tx_rings;
 	*max_rx = hw_resc->max_rx_rings;
 	*max_cp = min_t(int, bnxt_get_max_func_cp_rings_for_en(bp),
-			hw_resc->max_irqs);
+			hw_resc->max_irqs - bnxt_get_ulp_msix_num(bp));
 	*max_cp = min_t(int, *max_cp, hw_resc->max_stat_ctxs);
 	max_ring_grps = hw_resc->max_hw_ring_grps;
 	if (BNXT_CHIP_TYPE_NITRO_A0(bp) && BNXT_PF(bp)) {

From 17c357efe5eceebdc3971a48b3d4d61a03c1178b Mon Sep 17 00:00:00 2001
From: Flavio Leitner <fbl@redhat.com>
Date: Fri, 28 Sep 2018 14:51:28 -0300
Subject: [PATCH 468/499] openvswitch: load NAT helper

Load the respective NAT helper module if the flow uses it.

Signed-off-by: Flavio Leitner <fbl@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/conntrack.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 0aeb34c6389d..35ae64cbef33 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1312,6 +1312,10 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
 
 	rcu_assign_pointer(help->helper, helper);
 	info->helper = helper;
+
+	if (info->nat)
+		request_module("ip_nat_%s", name);
+
 	return 0;
 }
 

From ca8931948344c485569b04821d1f6bcebccd376b Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 4 Oct 2018 20:24:13 -0700
Subject: [PATCH 469/499] net: dsa: b53: Keep CPU port as tagged in all VLANs

Commit c499696e7901 ("net: dsa: b53: Stop using dev->cpu_port
incorrectly") was a bit too trigger happy in removing the CPU port from
the VLAN membership because we rely on DSA to program the CPU port VLAN,
which it does, except it does not bother itself with tagged/untagged and
just usese untagged.

Having the CPU port "follow" the user ports tagged/untagged is not great
and does not allow for properly differentiating, so keep the CPU port
tagged in all VLANs.

Reported-by: Gerhard Wiesinger <lists@wiesinger.com>
Fixes: c499696e7901 ("net: dsa: b53: Stop using dev->cpu_port incorrectly")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index d93c790bfbe8..ad534b90ef21 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1107,7 +1107,7 @@ void b53_vlan_add(struct dsa_switch *ds, int port,
 		b53_get_vlan_entry(dev, vid, vl);
 
 		vl->members |= BIT(port);
-		if (untagged)
+		if (untagged && !dsa_is_cpu_port(ds, port))
 			vl->untag |= BIT(port);
 		else
 			vl->untag &= ~BIT(port);
@@ -1149,7 +1149,7 @@ int b53_vlan_del(struct dsa_switch *ds, int port,
 				pvid = 0;
 		}
 
-		if (untagged)
+		if (untagged && !dsa_is_cpu_port(ds, port))
 			vl->untag &= ~(BIT(port));
 
 		b53_set_vlan_entry(dev, vid, vl);

From 9d2f67e43b73e8af7438be219b66a5de0cfa8bd9 Mon Sep 17 00:00:00 2001
From: Jianfeng Tan <jianfeng.tan@linux.alibaba.com>
Date: Sat, 29 Sep 2018 15:41:27 +0000
Subject: [PATCH 470/499] net/packet: fix packet drop as of virtio gso

When we use raw socket as the vhost backend, a packet from virito with
gso offloading information, cannot be sent out in later validaton at
xmit path, as we did not set correct skb->protocol which is further used
for looking up the gso function.

To fix this, we set this field according to virito hdr information.

Fixes: e858fae2b0b8f4 ("virtio_net: use common code for virtio_net_hdr and skb GSO conversion")
Signed-off-by: Jianfeng Tan <jianfeng.tan@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio_net.h | 18 ++++++++++++++++++
 net/packet/af_packet.c     | 11 +++++++----
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 9397628a1967..cb462f9ab7dd 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -5,6 +5,24 @@
 #include <linux/if_vlan.h>
 #include <uapi/linux/virtio_net.h>
 
+static inline int virtio_net_hdr_set_proto(struct sk_buff *skb,
+					   const struct virtio_net_hdr *hdr)
+{
+	switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+	case VIRTIO_NET_HDR_GSO_TCPV4:
+	case VIRTIO_NET_HDR_GSO_UDP:
+		skb->protocol = cpu_to_be16(ETH_P_IP);
+		break;
+	case VIRTIO_NET_HDR_GSO_TCPV6:
+		skb->protocol = cpu_to_be16(ETH_P_IPV6);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
 					const struct virtio_net_hdr *hdr,
 					bool little_endian)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 75c92a87e7b2..d6e94dc7e290 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2715,10 +2715,12 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 			}
 		}
 
-		if (po->has_vnet_hdr && virtio_net_hdr_to_skb(skb, vnet_hdr,
-							      vio_le())) {
-			tp_len = -EINVAL;
-			goto tpacket_error;
+		if (po->has_vnet_hdr) {
+			if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) {
+				tp_len = -EINVAL;
+				goto tpacket_error;
+			}
+			virtio_net_hdr_set_proto(skb, vnet_hdr);
 		}
 
 		skb->destructor = tpacket_destruct_skb;
@@ -2915,6 +2917,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 		if (err)
 			goto out_free;
 		len += sizeof(vnet_hdr);
+		virtio_net_hdr_set_proto(skb, &vnet_hdr);
 	}
 
 	skb_probe_transport_header(skb, reserve);

From 2d52527e80c2dc0c5f43f50adf183781262ec565 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Wed, 3 Oct 2018 15:20:58 +0200
Subject: [PATCH 471/499] be2net: don't flip hw_features when VXLANs are
 added/deleted

the be2net implementation of .ndo_tunnel_{add,del}() changes the value of
NETIF_F_GSO_UDP_TUNNEL bit in 'features' and 'hw_features', but it forgets
to call netdev_features_change(). Moreover, ethtool setting for that bit
can potentially be reverted after a tunnel is added or removed.

GSO already does software segmentation when 'hw_enc_features' is 0, even
if VXLAN offload is turned on. In addition, commit 096de2f83ebc ("benet:
stricter vxlan offloading check in be_features_check") avoids hardware
segmentation of non-VXLAN tunneled packets, or VXLAN packets having wrong
destination port. So, it's safe to avoid flipping the above feature on
addition/deletion of VXLAN tunnels.

Fixes: 630f4b70567f ("be2net: Export tunnel offloads only when a VxLAN tunnel is created")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/emulex/benet/be_main.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 74d122616e76..534787291b44 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -4002,8 +4002,6 @@ static int be_enable_vxlan_offloads(struct be_adapter *adapter)
 	netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 				   NETIF_F_TSO | NETIF_F_TSO6 |
 				   NETIF_F_GSO_UDP_TUNNEL;
-	netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
-	netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
 
 	dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
 		 be16_to_cpu(port));
@@ -4025,8 +4023,6 @@ static void be_disable_vxlan_offloads(struct be_adapter *adapter)
 	adapter->vxlan_port = 0;
 
 	netdev->hw_enc_features = 0;
-	netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
-	netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
 }
 
 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
@@ -5320,6 +5316,7 @@ static void be_netdev_init(struct net_device *netdev)
 	struct be_adapter *adapter = netdev_priv(netdev);
 
 	netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
+		NETIF_F_GSO_UDP_TUNNEL |
 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
 		NETIF_F_HW_VLAN_CTAG_TX;
 	if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))

From 7e4183752735deb7543e179a44f4f4b44917cd6f Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Wed, 3 Oct 2018 19:04:49 +0300
Subject: [PATCH 472/499] net: phy: phylink: fix SFP interface autodetection

When connecting SFP PHY to phylink use the detected interface.
Otherwise, the link fails to come up when the configured 'phy-mode'
differs from the SFP detected mode.

Move most of phylink_connect_phy() into __phylink_connect_phy(), and
leave phylink_connect_phy() as a wrapper. phylink_sfp_connect_phy() can
now pass the SFP detected PHY interface to __phylink_connect_phy().

This fixes 1GB SFP module link up on eth3 of the Macchiatobin board that
is configured in the DT to "2500base-x" phy-mode.

Fixes: 9525ae83959b6 ("phylink: add phylink infrastructure")
Suggested-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phylink.c | 48 +++++++++++++++++++++++----------------
 1 file changed, 28 insertions(+), 20 deletions(-)

diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 3ba5cf2a8a5f..7abca86c3aa9 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -717,6 +717,30 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
 	return 0;
 }
 
+static int __phylink_connect_phy(struct phylink *pl, struct phy_device *phy,
+		phy_interface_t interface)
+{
+	int ret;
+
+	if (WARN_ON(pl->link_an_mode == MLO_AN_FIXED ||
+		    (pl->link_an_mode == MLO_AN_INBAND &&
+		     phy_interface_mode_is_8023z(interface))))
+		return -EINVAL;
+
+	if (pl->phydev)
+		return -EBUSY;
+
+	ret = phy_attach_direct(pl->netdev, phy, 0, interface);
+	if (ret)
+		return ret;
+
+	ret = phylink_bringup_phy(pl, phy);
+	if (ret)
+		phy_detach(phy);
+
+	return ret;
+}
+
 /**
  * phylink_connect_phy() - connect a PHY to the phylink instance
  * @pl: a pointer to a &struct phylink returned from phylink_create()
@@ -734,31 +758,13 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
  */
 int phylink_connect_phy(struct phylink *pl, struct phy_device *phy)
 {
-	int ret;
-
-	if (WARN_ON(pl->link_an_mode == MLO_AN_FIXED ||
-		    (pl->link_an_mode == MLO_AN_INBAND &&
-		     phy_interface_mode_is_8023z(pl->link_interface))))
-		return -EINVAL;
-
-	if (pl->phydev)
-		return -EBUSY;
-
 	/* Use PHY device/driver interface */
 	if (pl->link_interface == PHY_INTERFACE_MODE_NA) {
 		pl->link_interface = phy->interface;
 		pl->link_config.interface = pl->link_interface;
 	}
 
-	ret = phy_attach_direct(pl->netdev, phy, 0, pl->link_interface);
-	if (ret)
-		return ret;
-
-	ret = phylink_bringup_phy(pl, phy);
-	if (ret)
-		phy_detach(phy);
-
-	return ret;
+	return __phylink_connect_phy(pl, phy, pl->link_interface);
 }
 EXPORT_SYMBOL_GPL(phylink_connect_phy);
 
@@ -1672,7 +1678,9 @@ static void phylink_sfp_link_up(void *upstream)
 
 static int phylink_sfp_connect_phy(void *upstream, struct phy_device *phy)
 {
-	return phylink_connect_phy(upstream, phy);
+	struct phylink *pl = upstream;
+
+	return __phylink_connect_phy(upstream, phy, pl->link_config.interface);
 }
 
 static void phylink_sfp_disconnect_phy(void *upstream)

From b3e9b515b08e407ab3a026dc2e4d935c48d05f69 Mon Sep 17 00:00:00 2001
From: "Singh, Brijesh" <brijesh.singh@amd.com>
Date: Thu, 4 Oct 2018 21:40:23 +0000
Subject: [PATCH 473/499] iommu/amd: Clear memory encryption mask from physical
 address
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Boris Ostrovsky reported a memory leak with device passthrough when SME
is active.

The VFIO driver uses iommu_iova_to_phys() to get the physical address for
an iova. This physical address is later passed into vfio_unmap_unpin() to
unpin the memory. The vfio_unmap_unpin() uses pfn_valid() before unpinning
the memory. The pfn_valid() check was failing because encryption mask was
part of the physical address returned. This resulted in the memory not
being unpinned and therefore leaked after the guest terminates.

The memory encryption mask must be cleared from the physical address in
iommu_iova_to_phys().

Fixes: 2543a786aa25 ("iommu/amd: Allow the AMD IOMMU to work with memory encryption")
Reported-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: <iommu@lists.linux-foundation.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: kvm@vger.kernel.org
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: <stable@vger.kernel.org> # 4.14+
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 73e47d93e7a0..bee0dfb7b93b 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3069,7 +3069,7 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
 		return 0;
 
 	offset_mask = pte_pgsize - 1;
-	__pte	    = *pte & PM_ADDR_MASK;
+	__pte	    = __sme_clr(*pte & PM_ADDR_MASK);
 
 	return (__pte & ~offset_mask) | (iova & offset_mask);
 }

From 9ce7610e6d201e3923c0b2f454f2e1d54f5da49e Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Mon, 1 Oct 2018 14:49:05 +0300
Subject: [PATCH 474/499] i2c: designware: Call i2c_dw_clk_rate() only when
 calculating timings

There are platforms which don't provide input clock rate but provide
I2C timing parameters. Commit 3bd4f277274b ("i2c: designware: Call
i2c_dw_clk_rate() only once in i2c_dw_init_master()") causes needless
warning during probe on those platforms since i2c_dw_clk_rate(), which
causes the warning when input clock is unknown, is called even when
there is no need to calculate timing parameters.

Fixes: 3bd4f277274b ("i2c: designware: Call i2c_dw_clk_rate() only once in i2c_dw_init_master()")
Reported-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: <stable@vger.kernel.org> # 4.19
Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-designware-master.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index 94d94b4a9a0d..18cc324f3ca9 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -34,11 +34,11 @@ static void i2c_dw_configure_fifo_master(struct dw_i2c_dev *dev)
 
 static int i2c_dw_set_timings_master(struct dw_i2c_dev *dev)
 {
-	u32 ic_clk = i2c_dw_clk_rate(dev);
 	const char *mode_str, *fp_str = "";
 	u32 comp_param1;
 	u32 sda_falling_time, scl_falling_time;
 	struct i2c_timings *t = &dev->timings;
+	u32 ic_clk;
 	int ret;
 
 	ret = i2c_dw_acquire_lock(dev);
@@ -53,6 +53,7 @@ static int i2c_dw_set_timings_master(struct dw_i2c_dev *dev)
 
 	/* Calculate SCL timing parameters for standard mode if not set */
 	if (!dev->ss_hcnt || !dev->ss_lcnt) {
+		ic_clk = i2c_dw_clk_rate(dev);
 		dev->ss_hcnt =
 			i2c_dw_scl_hcnt(ic_clk,
 					4000,	/* tHD;STA = tHIGH = 4.0 us */
@@ -89,6 +90,7 @@ static int i2c_dw_set_timings_master(struct dw_i2c_dev *dev)
 	 * needed also in high speed mode.
 	 */
 	if (!dev->fs_hcnt || !dev->fs_lcnt) {
+		ic_clk = i2c_dw_clk_rate(dev);
 		dev->fs_hcnt =
 			i2c_dw_scl_hcnt(ic_clk,
 					600,	/* tHD;STA = tHIGH = 0.6 us */

From a932ed3b718147c6537da290b7a91e990fdedb43 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Fri, 5 Oct 2018 16:43:55 +1000
Subject: [PATCH 475/499] powerpc: Don't print kernel instructions in
 show_user_instructions()

Recently we implemented show_user_instructions() which dumps the code
around the NIP when a user space process dies with an unhandled
signal. This was modelled on the x86 code, and we even went so far as
to implement the exact same bug, namely that if the user process
crashed with its NIP pointing into the kernel we will dump kernel text
to dmesg. eg:

  bad-bctr[2996]: segfault (11) at c000000000010000 nip c000000000010000 lr 12d0b0894 code 1
  bad-bctr[2996]: code: fbe10068 7cbe2b78 7c7f1b78 fb610048 38a10028 38810020 fb810050 7f8802a6
  bad-bctr[2996]: code: 3860001c f8010080 48242371 60000000 <7c7b1b79> 4082002c e8010080 eb610048

This was discovered on x86 by Jann Horn and fixed in commit
342db04ae712 ("x86/dumpstack: Don't dump kernel memory based on usermode RIP").

Fix it by checking the adjusted NIP value (pc) and number of
instructions against USER_DS, and bail if we fail the check, eg:

  bad-bctr[2969]: segfault (11) at c000000000010000 nip c000000000010000 lr 107930894 code 1
  bad-bctr[2969]: Bad NIP, not dumping instructions.

Fixes: 88b0fe175735 ("powerpc: Add show_user_instructions()")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/process.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 913c5725cdb2..bb6ac471a784 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1306,6 +1306,16 @@ void show_user_instructions(struct pt_regs *regs)
 
 	pc = regs->nip - (instructions_to_print * 3 / 4 * sizeof(int));
 
+	/*
+	 * Make sure the NIP points at userspace, not kernel text/data or
+	 * elsewhere.
+	 */
+	if (!__access_ok(pc, instructions_to_print * sizeof(int), USER_DS)) {
+		pr_info("%s[%d]: Bad NIP, not dumping instructions.\n",
+			current->comm, current->pid);
+		return;
+	}
+
 	pr_info("%s[%d]: code: ", current->comm, current->pid);
 
 	for (i = 0; i < instructions_to_print; i++) {

From ac1788cc7da4ce54edcfd2e499afdb0a23d5c41d Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Fri, 28 Sep 2018 09:17:32 +0530
Subject: [PATCH 476/499] powerpc/numa: Skip onlining a offline node in kdump
 path

With commit 2ea626306810 ("powerpc/topology: Get topology for shared
processors at boot"), kdump kernel on shared LPAR may crash.

The necessary conditions are
- Shared LPAR with at least 2 nodes having memory and CPUs.
- Memory requirement for kdump kernel must be met by the first N-1
  nodes where there are at least N nodes with memory and CPUs.

Example numactl of such a machine.
  $ numactl -H
  available: 5 nodes (0,2,5-7)
  node 0 cpus:
  node 0 size: 0 MB
  node 0 free: 0 MB
  node 2 cpus:
  node 2 size: 255 MB
  node 2 free: 189 MB
  node 5 cpus: 24 25 26 27 28 29 30 31
  node 5 size: 4095 MB
  node 5 free: 4024 MB
  node 6 cpus: 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23
  node 6 size: 6353 MB
  node 6 free: 5998 MB
  node 7 cpus: 8 9 10 11 12 13 14 15 32 33 34 35 36 37 38 39
  node 7 size: 7640 MB
  node 7 free: 7164 MB
  node distances:
  node   0   2   5   6   7
    0:  10  40  40  40  40
    2:  40  10  40  40  40
    5:  40  40  10  40  40
    6:  40  40  40  10  20
    7:  40  40  40  20  10

Steps to reproduce.
1. Load / start kdump service.
2. Trigger a kdump (for example : echo c > /proc/sysrq-trigger)

When booting a kdump kernel with 2048M:

  kexec: Starting switchover sequence.
  I'm in purgatory
  Using 1TB segments
  hash-mmu: Initializing hash mmu with SLB
  Linux version 4.19.0-rc5-master+ (srikar@linux-xxu6) (gcc version 4.8.5 (SUSE Linux)) #1 SMP Thu Sep 27 19:45:00 IST 2018
  Found initrd at 0xc000000009e70000:0xc00000000ae554b4
  Using pSeries machine description
  -----------------------------------------------------
  ppc64_pft_size    = 0x1e
  phys_mem_size     = 0x88000000
  dcache_bsize      = 0x80
  icache_bsize      = 0x80
  cpu_features      = 0x000000ff8f5d91a7
    possible        = 0x0000fbffcf5fb1a7
    always          = 0x0000006f8b5c91a1
  cpu_user_features = 0xdc0065c2 0xef000000
  mmu_features      = 0x7c006001
  firmware_features = 0x00000007c45bfc57
  htab_hash_mask    = 0x7fffff
  physical_start    = 0x8000000
  -----------------------------------------------------
  numa:   NODE_DATA [mem 0x87d5e300-0x87d67fff]
  numa:     NODE_DATA(0) on node 6
  numa:   NODE_DATA [mem 0x87d54600-0x87d5e2ff]
  Top of RAM: 0x88000000, Total RAM: 0x88000000
  Memory hole size: 0MB
  Zone ranges:
    DMA      [mem 0x0000000000000000-0x0000000087ffffff]
    DMA32    empty
    Normal   empty
  Movable zone start for each node
  Early memory node ranges
    node   6: [mem 0x0000000000000000-0x0000000087ffffff]
  Could not find start_pfn for node 0
  Initmem setup node 0 [mem 0x0000000000000000-0x0000000000000000]
  On node 0 totalpages: 0
  Initmem setup node 6 [mem 0x0000000000000000-0x0000000087ffffff]
  On node 6 totalpages: 34816

  Unable to handle kernel paging request for data at address 0x00000060
  Faulting instruction address: 0xc000000008703a54
  Oops: Kernel access of bad area, sig: 11 [#1]
  LE SMP NR_CPUS=2048 NUMA pSeries
  Modules linked in:
  CPU: 11 PID: 1 Comm: swapper/11 Not tainted 4.19.0-rc5-master+ #1
  NIP:  c000000008703a54 LR: c000000008703a38 CTR: 0000000000000000
  REGS: c00000000b673440 TRAP: 0380   Not tainted  (4.19.0-rc5-master+)
  MSR:  8000000002009033 <SF,VEC,EE,ME,IR,DR,RI,LE>  CR: 24022022  XER: 20000002
  CFAR: c0000000086fc238 IRQMASK: 0
  GPR00: c000000008703a38 c00000000b6736c0 c000000009281900 0000000000000000
  GPR04: 0000000000000000 0000000000000000 fffffffffffff001 c00000000b660080
  GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000000220
  GPR12: 0000000000002200 c000000009e51400 0000000000000000 0000000000000008
  GPR16: 0000000000000000 c000000008c152e8 c000000008c152a8 0000000000000000
  GPR20: c000000009422fd8 c000000009412fd8 c000000009426040 0000000000000008
  GPR24: 0000000000000000 0000000000000000 c000000009168bc8 c000000009168c78
  GPR28: c00000000b126410 0000000000000000 c00000000916a0b8 c00000000b126400
  NIP [c000000008703a54] bus_add_device+0x84/0x1e0
  LR [c000000008703a38] bus_add_device+0x68/0x1e0
  Call Trace:
  [c00000000b6736c0] [c000000008703a38] bus_add_device+0x68/0x1e0 (unreliable)
  [c00000000b673740] [c000000008700194] device_add+0x454/0x7c0
  [c00000000b673800] [c00000000872e660] __register_one_node+0xb0/0x240
  [c00000000b673860] [c00000000839a6bc] __try_online_node+0x12c/0x180
  [c00000000b673900] [c00000000839b978] try_online_node+0x58/0x90
  [c00000000b673930] [c0000000080846d8] find_and_online_cpu_nid+0x158/0x190
  [c00000000b673a10] [c0000000080848a0] numa_update_cpu_topology+0x190/0x580
  [c00000000b673c00] [c000000008d3f2e4] smp_cpus_done+0x94/0x108
  [c00000000b673c70] [c000000008d5c00c] smp_init+0x174/0x19c
  [c00000000b673d00] [c000000008d346b8] kernel_init_freeable+0x1e0/0x450
  [c00000000b673dc0] [c0000000080102e8] kernel_init+0x28/0x160
  [c00000000b673e30] [c00000000800b65c] ret_from_kernel_thread+0x5c/0x80
  Instruction dump:
  60000000 60000000 e89e0020 7fe3fb78 4bff87d5 60000000 7c7d1b79 4082008c
  e8bf0050 e93e0098 3b9f0010 2fa50000 <e8690060> 38630018 419e0114 7f84e378
  ---[ end trace 593577668c2daa65 ]---

However a regular kernel with 4096M (2048 gets reserved for crash
kernel) boots properly.

Unlike regular kernels, which mark all available nodes as online,
kdump kernel only marks just enough nodes as online and marks the rest
as offline at boot. However kdump kernel boots with all available
CPUs. With Commit 2ea626306810 ("powerpc/topology: Get topology for
shared processors at boot"), all CPUs are onlined on their respective
nodes at boot time. try_online_node() tries to online the offline
nodes but fails as all needed subsystems are not yet initialized.

As part of fix, detect and skip early onlining of a offline node.

Fixes: 2ea626306810 ("powerpc/topology: Get topology for shared processors at boot")
Reported-by: Pavithra Prakash <pavrampu@in.ibm.com>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Tested-by: Hari Bathini <hbathini@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/numa.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 59d07bd5374a..055b211b7126 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1217,9 +1217,10 @@ int find_and_online_cpu_nid(int cpu)
 		 * Need to ensure that NODE_DATA is initialized for a node from
 		 * available memory (see memblock_alloc_try_nid). If unable to
 		 * init the node, then default to nearest node that has memory
-		 * installed.
+		 * installed. Skip onlining a node if the subsystems are not
+		 * yet initialized.
 		 */
-		if (try_online_node(new_nid))
+		if (!topology_inited || try_online_node(new_nid))
 			new_nid = first_online_node;
 #else
 		/*

From b799207e1e1816b09e7a5920fbb2d5fcf6edd681 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 5 Oct 2018 18:17:59 +0200
Subject: [PATCH 477/499] bpf: 32-bit RSH verification must truncate input
 before the ALU op

When I wrote commit 468f6eafa6c4 ("bpf: fix 32-bit ALU op verification"), I
assumed that, in order to emulate 64-bit arithmetic with 32-bit logic, it
is sufficient to just truncate the output to 32 bits; and so I just moved
the register size coercion that used to be at the start of the function to
the end of the function.

That assumption is true for almost every op, but not for 32-bit right
shifts, because those can propagate information towards the least
significant bit. Fix it by always truncating inputs for 32-bit ops to 32
bits.

Also get rid of the coerce_reg_to_size() after the ALU op, since that has
no effect.

Fixes: 468f6eafa6c4 ("bpf: fix 32-bit ALU op verification")
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/verifier.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index bb07e74b34a2..465952a8e465 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2896,6 +2896,15 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
 	u64 umin_val, umax_val;
 	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
 
+	if (insn_bitness == 32) {
+		/* Relevant for 32-bit RSH: Information can propagate towards
+		 * LSB, so it isn't sufficient to only truncate the output to
+		 * 32 bits.
+		 */
+		coerce_reg_to_size(dst_reg, 4);
+		coerce_reg_to_size(&src_reg, 4);
+	}
+
 	smin_val = src_reg.smin_value;
 	smax_val = src_reg.smax_value;
 	umin_val = src_reg.umin_value;
@@ -3131,7 +3140,6 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
 	if (BPF_CLASS(insn->code) != BPF_ALU64) {
 		/* 32-bit ALU ops are (32,32)->32 */
 		coerce_reg_to_size(dst_reg, 4);
-		coerce_reg_to_size(&src_reg, 4);
 	}
 
 	__reg_deduce_bounds(dst_reg);

From 2c05d88818ab6571816b93edce4d53703870d7ae Mon Sep 17 00:00:00 2001
From: Wenwen Wang <wang6495@umn.edu>
Date: Fri, 5 Oct 2018 08:48:27 -0500
Subject: [PATCH 478/499] net: cxgb3_main: fix a missing-check bug

In cxgb_extension_ioctl(), the command of the ioctl is firstly copied from
the user-space buffer 'useraddr' to 'cmd' and checked through the
switch statement. If the command is not as expected, an error code
EOPNOTSUPP is returned. In the following execution, i.e., the cases of the
switch statement, the whole buffer of 'useraddr' is copied again to a
specific data structure, according to what kind of command is requested.
However, after the second copy, there is no re-check on the newly-copied
command. Given that the buffer 'useraddr' is in the user space, a malicious
user can race to change the command between the two copies. By doing so,
the attacker can supply malicious data to the kernel and cause undefined
behavior.

This patch adds a re-check in each case of the switch statement if there is
a second copy in that case, to re-check whether the command obtained in the
second copy is the same as the one in the first copy. If not, an error code
EINVAL is returned.

Signed-off-by: Wenwen Wang <wang6495@umn.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index a19172dbe6be..c34ea385fe4a 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -2159,6 +2159,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
 			return -EPERM;
 		if (copy_from_user(&t, useraddr, sizeof(t)))
 			return -EFAULT;
+		if (t.cmd != CHELSIO_SET_QSET_PARAMS)
+			return -EINVAL;
 		if (t.qset_idx >= SGE_QSETS)
 			return -EINVAL;
 		if (!in_range(t.intr_lat, 0, M_NEWTIMER) ||
@@ -2258,6 +2260,9 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
 		if (copy_from_user(&t, useraddr, sizeof(t)))
 			return -EFAULT;
 
+		if (t.cmd != CHELSIO_GET_QSET_PARAMS)
+			return -EINVAL;
+
 		/* Display qsets for all ports when offload enabled */
 		if (test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map)) {
 			q1 = 0;
@@ -2303,6 +2308,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
 			return -EBUSY;
 		if (copy_from_user(&edata, useraddr, sizeof(edata)))
 			return -EFAULT;
+		if (edata.cmd != CHELSIO_SET_QSET_NUM)
+			return -EINVAL;
 		if (edata.val < 1 ||
 			(edata.val > 1 && !(adapter->flags & USING_MSIX)))
 			return -EINVAL;
@@ -2343,6 +2350,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
 			return -EPERM;
 		if (copy_from_user(&t, useraddr, sizeof(t)))
 			return -EFAULT;
+		if (t.cmd != CHELSIO_LOAD_FW)
+			return -EINVAL;
 		/* Check t.len sanity ? */
 		fw_data = memdup_user(useraddr + sizeof(t), t.len);
 		if (IS_ERR(fw_data))
@@ -2366,6 +2375,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
 			return -EBUSY;
 		if (copy_from_user(&m, useraddr, sizeof(m)))
 			return -EFAULT;
+		if (m.cmd != CHELSIO_SETMTUTAB)
+			return -EINVAL;
 		if (m.nmtus != NMTUS)
 			return -EINVAL;
 		if (m.mtus[0] < 81)	/* accommodate SACK */
@@ -2407,6 +2418,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
 			return -EBUSY;
 		if (copy_from_user(&m, useraddr, sizeof(m)))
 			return -EFAULT;
+		if (m.cmd != CHELSIO_SET_PM)
+			return -EINVAL;
 		if (!is_power_of_2(m.rx_pg_sz) ||
 			!is_power_of_2(m.tx_pg_sz))
 			return -EINVAL;	/* not power of 2 */
@@ -2440,6 +2453,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
 			return -EIO;	/* need the memory controllers */
 		if (copy_from_user(&t, useraddr, sizeof(t)))
 			return -EFAULT;
+		if (t.cmd != CHELSIO_GET_MEM)
+			return -EINVAL;
 		if ((t.addr & 7) || (t.len & 7))
 			return -EINVAL;
 		if (t.mem_id == MEM_CM)
@@ -2492,6 +2507,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
 			return -EAGAIN;
 		if (copy_from_user(&t, useraddr, sizeof(t)))
 			return -EFAULT;
+		if (t.cmd != CHELSIO_SET_TRACE_FILTER)
+			return -EINVAL;
 
 		tp = (const struct trace_params *)&t.sip;
 		if (t.config_tx)

From 33aa8da1f8a7dc050b9d68f1db761ab787621065 Mon Sep 17 00:00:00 2001
From: Shanthosh RK <shanthosh.rk@gmail.com>
Date: Fri, 5 Oct 2018 20:57:48 +0530
Subject: [PATCH 479/499] net: bpfilter: Fix type cast and pointer warnings

Fixes the following Sparse warnings:

net/bpfilter/bpfilter_kern.c:62:21: warning: cast removes address space
of expression
net/bpfilter/bpfilter_kern.c:101:49: warning: Using plain integer as
NULL pointer

Signed-off-by: Shanthosh RK <shanthosh.rk@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bpfilter/bpfilter_kern.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
index f0fc182d3db7..b64e1649993b 100644
--- a/net/bpfilter/bpfilter_kern.c
+++ b/net/bpfilter/bpfilter_kern.c
@@ -59,7 +59,7 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname,
 	req.is_set = is_set;
 	req.pid = current->pid;
 	req.cmd = optname;
-	req.addr = (long)optval;
+	req.addr = (long __force __user)optval;
 	req.len = optlen;
 	mutex_lock(&bpfilter_lock);
 	if (!info.pid)
@@ -98,7 +98,7 @@ static int __init load_umh(void)
 	pr_info("Loaded bpfilter_umh pid %d\n", info.pid);
 
 	/* health check that usermode process started correctly */
-	if (__bpfilter_process_sockopt(NULL, 0, 0, 0, 0) != 0) {
+	if (__bpfilter_process_sockopt(NULL, 0, NULL, 0, 0) != 0) {
 		stop_umh();
 		return -EFAULT;
 	}

From 0781168e23a2fc8dceb989f11fc5b39b3ccacc35 Mon Sep 17 00:00:00 2001
From: Wenwen Wang <wang6495@umn.edu>
Date: Fri, 5 Oct 2018 10:59:36 -0500
Subject: [PATCH 480/499] yam: fix a missing-check bug

In yam_ioctl(), the concrete ioctl command is firstly copied from the
user-space buffer 'ifr->ifr_data' to 'ioctl_cmd' and checked through the
following switch statement. If the command is not as expected, an error
code EINVAL is returned. In the following execution the buffer
'ifr->ifr_data' is copied again in the cases of the switch statement to
specific data structures according to what kind of ioctl command is
requested. However, after the second copy, no re-check is enforced on the
newly-copied command. Given that the buffer 'ifr->ifr_data' is in the user
space, a malicious user can race to change the command between the two
copies. This way, the attacker can inject inconsistent data and cause
undefined behavior.

This patch adds a re-check in each case of the switch statement if there is
a second copy in that case, to re-check whether the command obtained in the
second copy is the same as the one in the first copy. If not, an error code
EINVAL will be returned.

Signed-off-by: Wenwen Wang <wang6495@umn.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hamradio/yam.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index 16ec7af6ab7b..ba9df430fca6 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -966,6 +966,8 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 				 sizeof(struct yamdrv_ioctl_mcs));
 		if (IS_ERR(ym))
 			return PTR_ERR(ym);
+		if (ym->cmd != SIOCYAMSMCS)
+			return -EINVAL;
 		if (ym->bitrate > YAM_MAXBITRATE) {
 			kfree(ym);
 			return -EINVAL;
@@ -981,6 +983,8 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		if (copy_from_user(&yi, ifr->ifr_data, sizeof(struct yamdrv_ioctl_cfg)))
 			 return -EFAULT;
 
+		if (yi.cmd != SIOCYAMSCFG)
+			return -EINVAL;
 		if ((yi.cfg.mask & YAM_IOBASE) && netif_running(dev))
 			return -EINVAL;		/* Cannot change this parameter when up */
 		if ((yi.cfg.mask & YAM_IRQ) && netif_running(dev))

From bd961c9bc66497f0c63f4ba1d02900bb85078366 Mon Sep 17 00:00:00 2001
From: Mauricio Faria de Oliveira <mfo@canonical.com>
Date: Mon, 1 Oct 2018 22:46:40 -0300
Subject: [PATCH 481/499] rtnetlink: fix rtnl_fdb_dump() for ndmsg header

Currently, rtnl_fdb_dump() assumes the family header is 'struct ifinfomsg',
which is not always true -- 'struct ndmsg' is used by iproute2 ('ip neigh').

The problem is, the function bails out early if nlmsg_parse() fails, which
does occur for iproute2 usage of 'struct ndmsg' because the payload length
is shorter than the family header alone (as 'struct ifinfomsg' is assumed).

This breaks backward compatibility with userspace -- nothing is sent back.

Some examples with iproute2 and netlink library for go [1]:

 1) $ bridge fdb show
    33:33:00:00:00:01 dev ens3 self permanent
    01:00:5e:00:00:01 dev ens3 self permanent
    33:33:ff:15:98:30 dev ens3 self permanent

      This one works, as it uses 'struct ifinfomsg'.

      fdb_show() @ iproute2/bridge/fdb.c
        """
        .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
        ...
        if (rtnl_dump_request(&rth, RTM_GETNEIGH, [...]
        """

 2) $ ip --family bridge neigh
    RTNETLINK answers: Invalid argument
    Dump terminated

      This one fails, as it uses 'struct ndmsg'.

      do_show_or_flush() @ iproute2/ip/ipneigh.c
        """
        .n.nlmsg_type = RTM_GETNEIGH,
        .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
        """

 3) $ ./neighlist
    < no output >

      This one fails, as it uses 'struct ndmsg'-based.

      neighList() @ netlink/neigh_linux.go
        """
        req := h.newNetlinkRequest(unix.RTM_GETNEIGH, [...]
        msg := Ndmsg{
        """

The actual breakage was introduced by commit 0ff50e83b512 ("net: rtnetlink:
bail out from rtnl_fdb_dump() on parse error"), because nlmsg_parse() fails
if the payload length (with the _actual_ family header) is less than the
family header length alone (which is assumed, in parameter 'hdrlen').
This is true in the examples above with struct ndmsg, with size and payload
length shorter than struct ifinfomsg.

However, that commit just intends to fix something under the assumption the
family header is indeed an 'struct ifinfomsg' - by preventing access to the
payload as such (via 'ifm' pointer) if the payload length is not sufficient
to actually contain it.

The assumption was introduced by commit 5e6d24358799 ("bridge: netlink dump
interface at par with brctl"), to support iproute2's 'bridge fdb' command
(not 'ip neigh') which indeed uses 'struct ifinfomsg', thus is not broken.

So, in order to unbreak the 'struct ndmsg' family headers and still allow
'struct ifinfomsg' to continue to work, check for the known message sizes
used with 'struct ndmsg' in iproute2 (with zero or one attribute which is
not used in this function anyway) then do not parse the data as ifinfomsg.

Same examples with this patch applied (or revert/before the original fix):

    $ bridge fdb show
    33:33:00:00:00:01 dev ens3 self permanent
    01:00:5e:00:00:01 dev ens3 self permanent
    33:33:ff:15:98:30 dev ens3 self permanent

    $ ip --family bridge neigh
    dev ens3 lladdr 33:33:00:00:00:01 PERMANENT
    dev ens3 lladdr 01:00:5e:00:00:01 PERMANENT
    dev ens3 lladdr 33:33:ff:15:98:30 PERMANENT

    $ ./neighlist
    netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x33, 0x33, 0x0, 0x0, 0x0, 0x1}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0}
    netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x1, 0x0, 0x5e, 0x0, 0x0, 0x1}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0}
    netlink.Neigh{LinkIndex:2, Family:7, State:128, Type:0, Flags:2, IP:net.IP(nil), HardwareAddr:net.HardwareAddr{0x33, 0x33, 0xff, 0x15, 0x98, 0x30}, LLIPAddr:net.IP(nil), Vlan:0, VNI:0}

Tested on mainline (v4.19-rc6) and net-next (3bd09b05b068).

References:

[1] netlink library for go (test-case)
    https://github.com/vishvananda/netlink

    $ cat ~/go/src/neighlist/main.go
    package main
    import ("fmt"; "syscall"; "github.com/vishvananda/netlink")
    func main() {
        neighs, _ := netlink.NeighList(0, syscall.AF_BRIDGE)
        for _, neigh := range neighs { fmt.Printf("%#v\n", neigh) }
    }

    $ export GOPATH=~/go
    $ go get github.com/vishvananda/netlink
    $ go build neighlist
    $ ~/go/src/neighlist/neighlist

Thanks to David Ahern for suggestions to improve this patch.

Fixes: 0ff50e83b512 ("net: rtnetlink: bail out from rtnl_fdb_dump() on parse error")
Fixes: 5e6d24358799 ("bridge: netlink dump interface at par with brctl")
Reported-by: Aidan Obley <aobley@pivotal.io>
Signed-off-by: Mauricio Faria de Oliveira <mfo@canonical.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 448703312fed..37c7936124e6 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3748,16 +3748,27 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	int err = 0;
 	int fidx = 0;
 
-	err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
-			  IFLA_MAX, ifla_policy, NULL);
-	if (err < 0) {
-		return -EINVAL;
-	} else if (err == 0) {
-		if (tb[IFLA_MASTER])
-			br_idx = nla_get_u32(tb[IFLA_MASTER]);
-	}
+	/* A hack to preserve kernel<->userspace interface.
+	 * Before Linux v4.12 this code accepted ndmsg since iproute2 v3.3.0.
+	 * However, ndmsg is shorter than ifinfomsg thus nlmsg_parse() bails.
+	 * So, check for ndmsg with an optional u32 attribute (not used here).
+	 * Fortunately these sizes don't conflict with the size of ifinfomsg
+	 * with an optional attribute.
+	 */
+	if (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) &&
+	    (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) +
+	     nla_attr_size(sizeof(u32)))) {
+		err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,
+				  IFLA_MAX, ifla_policy, NULL);
+		if (err < 0) {
+			return -EINVAL;
+		} else if (err == 0) {
+			if (tb[IFLA_MASTER])
+				br_idx = nla_get_u32(tb[IFLA_MASTER]);
+		}
 
-	brport_idx = ifm->ifi_index;
+		brport_idx = ifm->ifi_index;
+	}
 
 	if (br_idx) {
 		br_dev = __dev_get_by_index(net, br_idx);

From 8b4c3cdd9dd8290343ce959a132d3b334062c5b9 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 3 Oct 2018 15:05:36 -0700
Subject: [PATCH 482/499] net: sched: Add policy validation for tc attributes

A number of TC attributes are processed without proper validation
(e.g., length checks). Add a tca policy for all input attributes and use
when invoking nlmsg_parse.

The 2 Fixes tags below cover the latest additions. The other attributes
are a string (KIND), nested attribute (OPTIONS which does seem to have
validation in most cases), for dumps only or a flag.

Fixes: 5bc1701881e39 ("net: sched: introduce multichain support for filters")
Fixes: d47a6b0e7c492 ("net: sched: introduce ingress/egress block index attributes for qdisc")
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 98541c6399db..85e73f48e48f 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1311,6 +1311,18 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
  * Delete/get qdisc.
  */
 
+const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
+	[TCA_KIND]		= { .type = NLA_STRING },
+	[TCA_OPTIONS]		= { .type = NLA_NESTED },
+	[TCA_RATE]		= { .type = NLA_BINARY,
+				    .len = sizeof(struct tc_estimator) },
+	[TCA_STAB]		= { .type = NLA_NESTED },
+	[TCA_DUMP_INVISIBLE]	= { .type = NLA_FLAG },
+	[TCA_CHAIN]		= { .type = NLA_U32 },
+	[TCA_INGRESS_BLOCK]	= { .type = NLA_U32 },
+	[TCA_EGRESS_BLOCK]	= { .type = NLA_U32 },
+};
+
 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
 			struct netlink_ext_ack *extack)
 {
@@ -1327,7 +1339,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
-	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
+	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
+			  extack);
 	if (err < 0)
 		return err;
 
@@ -1411,7 +1424,8 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
 
 replay:
 	/* Reinit, just in case something touches this. */
-	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
+	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
+			  extack);
 	if (err < 0)
 		return err;
 
@@ -1645,7 +1659,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 	idx = 0;
 	ASSERT_RTNL();
 
-	err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL);
+	err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
+			  rtm_tca_policy, NULL);
 	if (err < 0)
 		return err;
 
@@ -1864,7 +1879,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
-	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
+	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
+			  extack);
 	if (err < 0)
 		return err;
 

From a688caa34beb2fd2a92f1b6d33e40cde433ba160 Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Thu, 4 Oct 2018 10:12:37 -0700
Subject: [PATCH 483/499] ipv6: take rcu lock in rawv6_send_hdrinc()

In rawv6_send_hdrinc(), in order to avoid an extra dst_hold(), we
directly assign the dst to skb and set passed in dst to NULL to avoid
double free.
However, in error case, we free skb and then do stats update with the
dst pointer passed in. This causes use-after-free on the dst.
Fix it by taking rcu read lock right before dst could get released to
make sure dst does not get freed until the stats update is done.
Note: we don't have this issue in ipv4 cause dst is not used for stats
update in v4.

Syzkaller reported following crash:
BUG: KASAN: use-after-free in rawv6_send_hdrinc net/ipv6/raw.c:692 [inline]
BUG: KASAN: use-after-free in rawv6_sendmsg+0x4421/0x4630 net/ipv6/raw.c:921
Read of size 8 at addr ffff8801d95ba730 by task syz-executor0/32088

CPU: 1 PID: 32088 Comm: syz-executor0 Not tainted 4.19.0-rc2+ #93
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1c4/0x2b4 lib/dump_stack.c:113
 print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256
 kasan_report_error mm/kasan/report.c:354 [inline]
 kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412
 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433
 rawv6_send_hdrinc net/ipv6/raw.c:692 [inline]
 rawv6_sendmsg+0x4421/0x4630 net/ipv6/raw.c:921
 inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798
 sock_sendmsg_nosec net/socket.c:621 [inline]
 sock_sendmsg+0xd5/0x120 net/socket.c:631
 ___sys_sendmsg+0x7fd/0x930 net/socket.c:2114
 __sys_sendmsg+0x11d/0x280 net/socket.c:2152
 __do_sys_sendmsg net/socket.c:2161 [inline]
 __se_sys_sendmsg net/socket.c:2159 [inline]
 __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2159
 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x457099
Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007f83756edc78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007f83756ee6d4 RCX: 0000000000457099
RDX: 0000000000000000 RSI: 0000000020003840 RDI: 0000000000000004
RBP: 00000000009300a0 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff
R13: 00000000004d4b30 R14: 00000000004c90b1 R15: 0000000000000000

Allocated by task 32088:
 save_stack+0x43/0xd0 mm/kasan/kasan.c:448
 set_track mm/kasan/kasan.c:460 [inline]
 kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553
 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:490
 kmem_cache_alloc+0x12e/0x730 mm/slab.c:3554
 dst_alloc+0xbb/0x1d0 net/core/dst.c:105
 ip6_dst_alloc+0x35/0xa0 net/ipv6/route.c:353
 ip6_rt_cache_alloc+0x247/0x7b0 net/ipv6/route.c:1186
 ip6_pol_route+0x8f8/0xd90 net/ipv6/route.c:1895
 ip6_pol_route_output+0x54/0x70 net/ipv6/route.c:2093
 fib6_rule_lookup+0x277/0x860 net/ipv6/fib6_rules.c:122
 ip6_route_output_flags+0x2c5/0x350 net/ipv6/route.c:2121
 ip6_route_output include/net/ip6_route.h:88 [inline]
 ip6_dst_lookup_tail+0xe27/0x1d60 net/ipv6/ip6_output.c:951
 ip6_dst_lookup_flow+0xc8/0x270 net/ipv6/ip6_output.c:1079
 rawv6_sendmsg+0x12d9/0x4630 net/ipv6/raw.c:905
 inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798
 sock_sendmsg_nosec net/socket.c:621 [inline]
 sock_sendmsg+0xd5/0x120 net/socket.c:631
 ___sys_sendmsg+0x7fd/0x930 net/socket.c:2114
 __sys_sendmsg+0x11d/0x280 net/socket.c:2152
 __do_sys_sendmsg net/socket.c:2161 [inline]
 __se_sys_sendmsg net/socket.c:2159 [inline]
 __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2159
 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Freed by task 5356:
 save_stack+0x43/0xd0 mm/kasan/kasan.c:448
 set_track mm/kasan/kasan.c:460 [inline]
 __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521
 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528
 __cache_free mm/slab.c:3498 [inline]
 kmem_cache_free+0x83/0x290 mm/slab.c:3756
 dst_destroy+0x267/0x3c0 net/core/dst.c:141
 dst_destroy_rcu+0x16/0x19 net/core/dst.c:154
 __rcu_reclaim kernel/rcu/rcu.h:236 [inline]
 rcu_do_batch kernel/rcu/tree.c:2576 [inline]
 invoke_rcu_callbacks kernel/rcu/tree.c:2880 [inline]
 __rcu_process_callbacks kernel/rcu/tree.c:2847 [inline]
 rcu_process_callbacks+0xf23/0x2670 kernel/rcu/tree.c:2864
 __do_softirq+0x30b/0xad8 kernel/softirq.c:292

Fixes: 1789a640f556 ("raw: avoid two atomics in xmit")
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/raw.c | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 413d98bf24f4..5e0efd3954e9 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -651,8 +651,6 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
 	skb->tstamp = sockc->transmit_time;
-	skb_dst_set(skb, &rt->dst);
-	*dstp = NULL;
 
 	skb_put(skb, length);
 	skb_reset_network_header(skb);
@@ -665,8 +663,14 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 
 	skb->transport_header = skb->network_header;
 	err = memcpy_from_msg(iph, msg, length);
-	if (err)
-		goto error_fault;
+	if (err) {
+		err = -EFAULT;
+		kfree_skb(skb);
+		goto error;
+	}
+
+	skb_dst_set(skb, &rt->dst);
+	*dstp = NULL;
 
 	/* if egress device is enslaved to an L3 master device pass the
 	 * skb to its handler for processing
@@ -675,21 +679,28 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 	if (unlikely(!skb))
 		return 0;
 
+	/* Acquire rcu_read_lock() in case we need to use rt->rt6i_idev
+	 * in the error path. Since skb has been freed, the dst could
+	 * have been queued for deletion.
+	 */
+	rcu_read_lock();
 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
 	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
 		      NULL, rt->dst.dev, dst_output);
 	if (err > 0)
 		err = net_xmit_errno(err);
-	if (err)
-		goto error;
+	if (err) {
+		IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+		rcu_read_unlock();
+		goto error_check;
+	}
+	rcu_read_unlock();
 out:
 	return 0;
 
-error_fault:
-	err = -EFAULT;
-	kfree_skb(skb);
 error:
 	IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+error_check:
 	if (err == -ENOBUFS && !np->recverr)
 		err = 0;
 	return err;

From 35f3625c21852ad839f20c91c7d81c4c1101e207 Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Fri, 5 Oct 2018 09:04:40 +0200
Subject: [PATCH 484/499] net: mvpp2: Extract the correct ethtype from the skb
 for tx csum offload

When offloading the L3 and L4 csum computation on TX, we need to extract
the l3_proto from the ethtype, independently of the presence of a vlan
tag.

The actual driver uses skb->protocol as-is, resulting in packets with
the wrong L4 checksum being sent when there's a vlan tag in the packet
header and checksum offloading is enabled.

This commit makes use of vlan_protocol_get() to get the correct ethtype
regardless the presence of a vlan tag.

Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit")
Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 38cc01beea79..a74002b43b51 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -1725,7 +1725,7 @@ static void mvpp2_txq_desc_put(struct mvpp2_tx_queue *txq)
 }
 
 /* Set Tx descriptors fields relevant for CSUM calculation */
-static u32 mvpp2_txq_desc_csum(int l3_offs, int l3_proto,
+static u32 mvpp2_txq_desc_csum(int l3_offs, __be16 l3_proto,
 			       int ip_hdr_len, int l4_proto)
 {
 	u32 command;
@@ -2600,14 +2600,15 @@ static u32 mvpp2_skb_tx_csum(struct mvpp2_port *port, struct sk_buff *skb)
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		int ip_hdr_len = 0;
 		u8 l4_proto;
+		__be16 l3_proto = vlan_get_protocol(skb);
 
-		if (skb->protocol == htons(ETH_P_IP)) {
+		if (l3_proto == htons(ETH_P_IP)) {
 			struct iphdr *ip4h = ip_hdr(skb);
 
 			/* Calculate IPv4 checksum and L4 checksum */
 			ip_hdr_len = ip4h->ihl;
 			l4_proto = ip4h->protocol;
-		} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		} else if (l3_proto == htons(ETH_P_IPV6)) {
 			struct ipv6hdr *ip6h = ipv6_hdr(skb);
 
 			/* Read l4_protocol from one of IPv6 extra headers */
@@ -2619,7 +2620,7 @@ static u32 mvpp2_skb_tx_csum(struct mvpp2_port *port, struct sk_buff *skb)
 		}
 
 		return mvpp2_txq_desc_csum(skb_network_offset(skb),
-				skb->protocol, ip_hdr_len, l4_proto);
+					   l3_proto, ip_hdr_len, l4_proto);
 	}
 
 	return MVPP2_TXD_L4_CSUM_NOT | MVPP2_TXD_IP_CSUM_DISABLE;

From 017b1660df89f5fb4bfe66c34e35f7d2031100c7 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Fri, 5 Oct 2018 15:51:29 -0700
Subject: [PATCH 485/499] mm: migration: fix migration of huge PMD shared pages

The page migration code employs try_to_unmap() to try and unmap the source
page.  This is accomplished by using rmap_walk to find all vmas where the
page is mapped.  This search stops when page mapcount is zero.  For shared
PMD huge pages, the page map count is always 1 no matter the number of
mappings.  Shared mappings are tracked via the reference count of the PMD
page.  Therefore, try_to_unmap stops prematurely and does not completely
unmap all mappings of the source page.

This problem can result is data corruption as writes to the original
source page can happen after contents of the page are copied to the target
page.  Hence, data is lost.

This problem was originally seen as DB corruption of shared global areas
after a huge page was soft offlined due to ECC memory errors.  DB
developers noticed they could reproduce the issue by (hotplug) offlining
memory used to back huge pages.  A simple testcase can reproduce the
problem by creating a shared PMD mapping (note that this must be at least
PUD_SIZE in size and PUD_SIZE aligned (1GB on x86)), and using
migrate_pages() to migrate process pages between nodes while continually
writing to the huge pages being migrated.

To fix, have the try_to_unmap_one routine check for huge PMD sharing by
calling huge_pmd_unshare for hugetlbfs huge pages.  If it is a shared
mapping it will be 'unshared' which removes the page table entry and drops
the reference on the PMD page.  After this, flush caches and TLB.

mmu notifiers are called before locking page tables, but we can not be
sure of PMD sharing until page tables are locked.  Therefore, check for
the possibility of PMD sharing before locking so that notifiers can
prepare for the worst possible case.

Link: http://lkml.kernel.org/r/20180823205917.16297-2-mike.kravetz@oracle.com
[mike.kravetz@oracle.com: make _range_in_vma() a static inline]
  Link: http://lkml.kernel.org/r/6063f215-a5c8-2f0c-465a-2c515ddc952d@oracle.com
Fixes: 39dde65c9940 ("shared page table for hugetlb page")
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hugetlb.h | 14 ++++++++++++++
 include/linux/mm.h      |  6 ++++++
 mm/hugetlb.c            | 37 ++++++++++++++++++++++++++++++++++--
 mm/rmap.c               | 42 ++++++++++++++++++++++++++++++++++++++---
 4 files changed, 94 insertions(+), 5 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 6b68e345f0ca..087fd5f48c91 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -140,6 +140,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 pte_t *huge_pte_offset(struct mm_struct *mm,
 		       unsigned long addr, unsigned long sz);
 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+				unsigned long *start, unsigned long *end);
 struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
 			      int write);
 struct page *follow_huge_pd(struct vm_area_struct *vma,
@@ -170,6 +172,18 @@ static inline unsigned long hugetlb_total_pages(void)
 	return 0;
 }
 
+static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr,
+					pte_t *ptep)
+{
+	return 0;
+}
+
+static inline void adjust_range_if_pmd_sharing_possible(
+				struct vm_area_struct *vma,
+				unsigned long *start, unsigned long *end)
+{
+}
+
 #define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n)	({ BUG(); 0; })
 #define follow_huge_addr(mm, addr, write)	ERR_PTR(-EINVAL)
 #define copy_hugetlb_page_range(src, dst, vma)	({ BUG(); 0; })
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a61ebe8ad4ca..0416a7204be3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2455,6 +2455,12 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
 	return vma;
 }
 
+static inline bool range_in_vma(struct vm_area_struct *vma,
+				unsigned long start, unsigned long end)
+{
+	return (vma && vma->vm_start <= start && end <= vma->vm_end);
+}
+
 #ifdef CONFIG_MMU
 pgprot_t vm_get_page_prot(unsigned long vm_flags);
 void vma_set_page_prot(struct vm_area_struct *vma);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3c21775f196b..b903d746e132 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4545,12 +4545,40 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
 	/*
 	 * check on proper vm_flags and page table alignment
 	 */
-	if (vma->vm_flags & VM_MAYSHARE &&
-	    vma->vm_start <= base && end <= vma->vm_end)
+	if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, base, end))
 		return true;
 	return false;
 }
 
+/*
+ * Determine if start,end range within vma could be mapped by shared pmd.
+ * If yes, adjust start and end to cover range associated with possible
+ * shared pmd mappings.
+ */
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+				unsigned long *start, unsigned long *end)
+{
+	unsigned long check_addr = *start;
+
+	if (!(vma->vm_flags & VM_MAYSHARE))
+		return;
+
+	for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {
+		unsigned long a_start = check_addr & PUD_MASK;
+		unsigned long a_end = a_start + PUD_SIZE;
+
+		/*
+		 * If sharing is possible, adjust start/end if necessary.
+		 */
+		if (range_in_vma(vma, a_start, a_end)) {
+			if (a_start < *start)
+				*start = a_start;
+			if (a_end > *end)
+				*end = a_end;
+		}
+	}
+}
+
 /*
  * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
  * and returns the corresponding pte. While this is not necessary for the
@@ -4648,6 +4676,11 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
 {
 	return 0;
 }
+
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+				unsigned long *start, unsigned long *end)
+{
+}
 #define want_pmd_share()	(0)
 #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
 
diff --git a/mm/rmap.c b/mm/rmap.c
index eb477809a5c0..1e79fac3186b 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1362,11 +1362,21 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	}
 
 	/*
-	 * We have to assume the worse case ie pmd for invalidation. Note that
-	 * the page can not be free in this function as call of try_to_unmap()
-	 * must hold a reference on the page.
+	 * For THP, we have to assume the worse case ie pmd for invalidation.
+	 * For hugetlb, it could be much worse if we need to do pud
+	 * invalidation in the case of pmd sharing.
+	 *
+	 * Note that the page can not be free in this function as call of
+	 * try_to_unmap() must hold a reference on the page.
 	 */
 	end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
+	if (PageHuge(page)) {
+		/*
+		 * If sharing is possible, start and end will be adjusted
+		 * accordingly.
+		 */
+		adjust_range_if_pmd_sharing_possible(vma, &start, &end);
+	}
 	mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
 
 	while (page_vma_mapped_walk(&pvmw)) {
@@ -1409,6 +1419,32 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 		subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
 		address = pvmw.address;
 
+		if (PageHuge(page)) {
+			if (huge_pmd_unshare(mm, &address, pvmw.pte)) {
+				/*
+				 * huge_pmd_unshare unmapped an entire PMD
+				 * page.  There is no way of knowing exactly
+				 * which PMDs may be cached for this mm, so
+				 * we must flush them all.  start/end were
+				 * already adjusted above to cover this range.
+				 */
+				flush_cache_range(vma, start, end);
+				flush_tlb_range(vma, start, end);
+				mmu_notifier_invalidate_range(mm, start, end);
+
+				/*
+				 * The ref count of the PMD page was dropped
+				 * which is part of the way map counting
+				 * is done for shared PMDs.  Return 'true'
+				 * here.  When there is no other sharing,
+				 * huge_pmd_unshare returns false and we will
+				 * unmap the actual page and drop map count
+				 * to zero.
+				 */
+				page_vma_mapped_walk_done(&pvmw);
+				break;
+			}
+		}
 
 		if (IS_ENABLED(CONFIG_MIGRATION) &&
 		    (flags & TTU_MIGRATION) &&

From dff11abe280b47c21b804a8ace318e0638bb9a49 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Fri, 5 Oct 2018 15:51:33 -0700
Subject: [PATCH 486/499] hugetlb: take PMD sharing into account when flushing
 tlb/caches

When fixing an issue with PMD sharing and migration, it was discovered via
code inspection that other callers of huge_pmd_unshare potentially have an
issue with cache and tlb flushing.

Use the routine adjust_range_if_pmd_sharing_possible() to calculate worst
case ranges for mmu notifiers.  Ensure that this range is flushed if
huge_pmd_unshare succeeds and unmaps a PUD_SUZE area.

Link: http://lkml.kernel.org/r/20180823205917.16297-3-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/hugetlb.c | 53 +++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 44 insertions(+), 9 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b903d746e132..5c390f5a5207 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3326,8 +3326,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	struct page *page;
 	struct hstate *h = hstate_vma(vma);
 	unsigned long sz = huge_page_size(h);
-	const unsigned long mmun_start = start;	/* For mmu_notifiers */
-	const unsigned long mmun_end   = end;	/* For mmu_notifiers */
+	unsigned long mmun_start = start;	/* For mmu_notifiers */
+	unsigned long mmun_end   = end;		/* For mmu_notifiers */
 
 	WARN_ON(!is_vm_hugetlb_page(vma));
 	BUG_ON(start & ~huge_page_mask(h));
@@ -3339,6 +3339,11 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	 */
 	tlb_remove_check_page_size_change(tlb, sz);
 	tlb_start_vma(tlb, vma);
+
+	/*
+	 * If sharing possible, alert mmu notifiers of worst case.
+	 */
+	adjust_range_if_pmd_sharing_possible(vma, &mmun_start, &mmun_end);
 	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 	address = start;
 	for (; address < end; address += sz) {
@@ -3349,6 +3354,10 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		ptl = huge_pte_lock(h, mm, ptep);
 		if (huge_pmd_unshare(mm, &address, ptep)) {
 			spin_unlock(ptl);
+			/*
+			 * We just unmapped a page of PMDs by clearing a PUD.
+			 * The caller's TLB flush range should cover this area.
+			 */
 			continue;
 		}
 
@@ -3431,12 +3440,23 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 {
 	struct mm_struct *mm;
 	struct mmu_gather tlb;
+	unsigned long tlb_start = start;
+	unsigned long tlb_end = end;
+
+	/*
+	 * If shared PMDs were possibly used within this vma range, adjust
+	 * start/end for worst case tlb flushing.
+	 * Note that we can not be sure if PMDs are shared until we try to
+	 * unmap pages.  However, we want to make sure TLB flushing covers
+	 * the largest possible range.
+	 */
+	adjust_range_if_pmd_sharing_possible(vma, &tlb_start, &tlb_end);
 
 	mm = vma->vm_mm;
 
-	tlb_gather_mmu(&tlb, mm, start, end);
+	tlb_gather_mmu(&tlb, mm, tlb_start, tlb_end);
 	__unmap_hugepage_range(&tlb, vma, start, end, ref_page);
-	tlb_finish_mmu(&tlb, start, end);
+	tlb_finish_mmu(&tlb, tlb_start, tlb_end);
 }
 
 /*
@@ -4298,11 +4318,21 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 	pte_t pte;
 	struct hstate *h = hstate_vma(vma);
 	unsigned long pages = 0;
+	unsigned long f_start = start;
+	unsigned long f_end = end;
+	bool shared_pmd = false;
+
+	/*
+	 * In the case of shared PMDs, the area to flush could be beyond
+	 * start/end.  Set f_start/f_end to cover the maximum possible
+	 * range if PMD sharing is possible.
+	 */
+	adjust_range_if_pmd_sharing_possible(vma, &f_start, &f_end);
 
 	BUG_ON(address >= end);
-	flush_cache_range(vma, address, end);
+	flush_cache_range(vma, f_start, f_end);
 
-	mmu_notifier_invalidate_range_start(mm, start, end);
+	mmu_notifier_invalidate_range_start(mm, f_start, f_end);
 	i_mmap_lock_write(vma->vm_file->f_mapping);
 	for (; address < end; address += huge_page_size(h)) {
 		spinlock_t *ptl;
@@ -4313,6 +4343,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 		if (huge_pmd_unshare(mm, &address, ptep)) {
 			pages++;
 			spin_unlock(ptl);
+			shared_pmd = true;
 			continue;
 		}
 		pte = huge_ptep_get(ptep);
@@ -4348,9 +4379,13 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 	 * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare
 	 * may have cleared our pud entry and done put_page on the page table:
 	 * once we release i_mmap_rwsem, another task can do the final put_page
-	 * and that page table be reused and filled with junk.
+	 * and that page table be reused and filled with junk.  If we actually
+	 * did unshare a page of pmds, flush the range corresponding to the pud.
 	 */
-	flush_hugetlb_tlb_range(vma, start, end);
+	if (shared_pmd)
+		flush_hugetlb_tlb_range(vma, f_start, f_end);
+	else
+		flush_hugetlb_tlb_range(vma, start, end);
 	/*
 	 * No need to call mmu_notifier_invalidate_range() we are downgrading
 	 * page table protection not changing it to point to a new page.
@@ -4358,7 +4393,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 	 * See Documentation/vm/mmu_notifier.rst
 	 */
 	i_mmap_unlock_write(vma->vm_file->f_mapping);
-	mmu_notifier_invalidate_range_end(mm, start, end);
+	mmu_notifier_invalidate_range_end(mm, f_start, f_end);
 
 	return pages << h->order;
 }

From 69eb7765b9c6902444c89c54e7043242faf981e5 Mon Sep 17 00:00:00 2001
From: Larry Chen <lchen@suse.com>
Date: Fri, 5 Oct 2018 15:51:37 -0700
Subject: [PATCH 487/499] ocfs2: fix crash in
 ocfs2_duplicate_clusters_by_page()

ocfs2_duplicate_clusters_by_page() may crash if one of the extent's pages
is dirty.  When a page has not been written back, it is still in dirty
state.  If ocfs2_duplicate_clusters_by_page() is called against the dirty
page, the crash happens.

To fix this bug, we can just unlock the page and wait until the page until
its not dirty.

The following is the backtrace:

kernel BUG at /root/code/ocfs2/refcounttree.c:2961!
[exception RIP: ocfs2_duplicate_clusters_by_page+822]
__ocfs2_move_extent+0x80/0x450 [ocfs2]
? __ocfs2_claim_clusters+0x130/0x250 [ocfs2]
ocfs2_defrag_extent+0x5b8/0x5e0 [ocfs2]
__ocfs2_move_extents_range+0x2a4/0x470 [ocfs2]
ocfs2_move_extents+0x180/0x3b0 [ocfs2]
? ocfs2_wait_for_recovery+0x13/0x70 [ocfs2]
ocfs2_ioctl_move_extents+0x133/0x2d0 [ocfs2]
ocfs2_ioctl+0x253/0x640 [ocfs2]
do_vfs_ioctl+0x90/0x5f0
SyS_ioctl+0x74/0x80
do_syscall_64+0x74/0x140
entry_SYSCALL_64_after_hwframe+0x3d/0xa2

Once we find the page is dirty, we do not wait until it's clean, rather we
use write_one_page() to write it back

Link: http://lkml.kernel.org/r/20180829074740.9438-1-lchen@suse.com
[lchen@suse.com: update comments]
  Link: http://lkml.kernel.org/r/20180830075041.14879-1-lchen@suse.com
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Larry Chen <lchen@suse.com>
Acked-by: Changwei Ge <ge.changwei@h3c.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ocfs2/refcounttree.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 7869622af22a..7a5ee145c733 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2946,6 +2946,7 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
 		if (map_end & (PAGE_SIZE - 1))
 			to = map_end & (PAGE_SIZE - 1);
 
+retry:
 		page = find_or_create_page(mapping, page_index, GFP_NOFS);
 		if (!page) {
 			ret = -ENOMEM;
@@ -2954,11 +2955,18 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
 		}
 
 		/*
-		 * In case PAGE_SIZE <= CLUSTER_SIZE, This page
-		 * can't be dirtied before we CoW it out.
+		 * In case PAGE_SIZE <= CLUSTER_SIZE, we do not expect a dirty
+		 * page, so write it back.
 		 */
-		if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize)
-			BUG_ON(PageDirty(page));
+		if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize) {
+			if (PageDirty(page)) {
+				/*
+				 * write_on_page will unlock the page on return
+				 */
+				ret = write_one_page(page);
+				goto retry;
+			}
+		}
 
 		if (!PageUptodate(page)) {
 			ret = block_read_full_page(page, ocfs2_get_block);

From e125fe405abedc1dc8a5b2229b80ee91c1434015 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 5 Oct 2018 15:51:41 -0700
Subject: [PATCH 488/499] mm, thp: fix mlocking THP page with migration enabled

A transparent huge page is represented by a single entry on an LRU list.
Therefore, we can only make unevictable an entire compound page, not
individual subpages.

If a user tries to mlock() part of a huge page, we want the rest of the
page to be reclaimable.

We handle this by keeping PTE-mapped huge pages on normal LRU lists: the
PMD on border of VM_LOCKED VMA will be split into PTE table.

Introduction of THP migration breaks[1] the rules around mlocking THP
pages.  If we had a single PMD mapping of the page in mlocked VMA, the
page will get mlocked, regardless of PTE mappings of the page.

For tmpfs/shmem it's easy to fix by checking PageDoubleMap() in
remove_migration_pmd().

Anon THP pages can only be shared between processes via fork().  Mlocked
page can only be shared if parent mlocked it before forking, otherwise CoW
will be triggered on mlock().

For Anon-THP, we can fix the issue by munlocking the page on removing PTE
migration entry for the page.  PTEs for the page will always come after
mlocked PMD: rmap walks VMAs from oldest to newest.

Test-case:

	#include <unistd.h>
	#include <sys/mman.h>
	#include <sys/wait.h>
	#include <linux/mempolicy.h>
	#include <numaif.h>

	int main(void)
	{
	        unsigned long nodemask = 4;
	        void *addr;

		addr = mmap((void *)0x20000000UL, 2UL << 20, PROT_READ | PROT_WRITE,
			MAP_PRIVATE | MAP_ANONYMOUS | MAP_LOCKED, -1, 0);

	        if (fork()) {
			wait(NULL);
			return 0;
	        }

	        mlock(addr, 4UL << 10);
	        mbind(addr, 2UL << 20, MPOL_PREFERRED | MPOL_F_RELATIVE_NODES,
	                &nodemask, 4, MPOL_MF_MOVE);

	        return 0;
	}

[1] https://lkml.kernel.org/r/CAOMGZ=G52R-30rZvhGxEbkTw7rLLwBGadVYeo--iizcD3upL3A@mail.gmail.com

Link: http://lkml.kernel.org/r/20180917133816.43995-1-kirill.shutemov@linux.intel.com
Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path")
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Reviewed-by: Zi Yan <zi.yan@cs.rutgers.edu>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>	[4.14+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/huge_memory.c | 2 +-
 mm/migrate.c     | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 533f9b00147d..00704060b7f7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2931,7 +2931,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
 	else
 		page_add_file_rmap(new, true);
 	set_pmd_at(mm, mmun_start, pvmw->pmd, pmde);
-	if (vma->vm_flags & VM_LOCKED)
+	if ((vma->vm_flags & VM_LOCKED) && !PageDoubleMap(new))
 		mlock_vma_page(new);
 	update_mmu_cache_pmd(vma, address, pvmw->pmd);
 }
diff --git a/mm/migrate.c b/mm/migrate.c
index d6a2e89b086a..9d374011c244 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -275,6 +275,9 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
 		if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
 			mlock_vma_page(new);
 
+		if (PageTransHuge(page) && PageMlocked(page))
+			clear_page_mlock(page);
+
 		/* No need to invalidate - it was non-present before */
 		update_mmu_cache(vma, pvmw.address, pvmw.pte);
 	}

From 51896864579d5a3349740847083f4db5c6487164 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 5 Oct 2018 15:51:44 -0700
Subject: [PATCH 489/499] mm/gup_benchmark: fix unsigned comparison to zero in
 __gup_benchmark_ioctl

get_user_pages_fast() will return negative value if no pages were pinned,
then be converted to a unsigned, which is compared to zero, giving the
wrong result.

Link: http://lkml.kernel.org/r/20180921095015.26088-1-yuehaibing@huawei.com
Fixes: 09e35a4a1ca8 ("mm/gup_benchmark: handle gup failures")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/gup_benchmark.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c
index 6a473709e9b6..7405c9d89d65 100644
--- a/mm/gup_benchmark.c
+++ b/mm/gup_benchmark.c
@@ -19,7 +19,8 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
 		struct gup_benchmark *gup)
 {
 	ktime_t start_time, end_time;
-	unsigned long i, nr, nr_pages, addr, next;
+	unsigned long i, nr_pages, addr, next;
+	int nr;
 	struct page **pages;
 
 	nr_pages = gup->size / PAGE_SIZE;

From 59cf0a9339ca4b0226e9b4329dfb0a5b8b0b9bc2 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 5 Oct 2018 15:51:48 -0700
Subject: [PATCH 490/499] ipc/shm.c: use ERR_CAST() for shm_lock() error return

This uses ERR_CAST() instead of an open-coded cast, as it is casting
across structure pointers, which upsets __randomize_layout:

ipc/shm.c: In function `shm_lock':
ipc/shm.c:209:9: note: randstruct: casting between randomized structure pointer types (ssa): `struct shmid_kernel' and `struct kern_ipc_perm'

  return (void *)ipcp;
         ^~~~~~~~~~~~

Link: http://lkml.kernel.org/r/20180919180722.GA15073@beast
Fixes: 82061c57ce93 ("ipc: drop ipc_lock()")
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Davidlohr Bueso <dbueso@suse.de>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 ipc/shm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ipc/shm.c b/ipc/shm.c
index 4cd402e4cfeb..1c65fb357395 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -206,7 +206,7 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
 	 * Callers of shm_lock() must validate the status of the returned ipc
 	 * object pointer and error out as appropriate.
 	 */
-	return (void *)ipcp;
+	return ERR_CAST(ipcp);
 }
 
 static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)

From e6112fc300702f96374f34368513d57795fc6d23 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Fri, 5 Oct 2018 15:51:51 -0700
Subject: [PATCH 491/499] mm/migrate.c: split only transparent huge pages when
 allocation fails

split_huge_page_to_list() fails on HugeTLB pages.  I was experimenting
with moving 32MB contig HugeTLB pages on arm64 (with a debug patch
applied) and hit the following stack trace when the kernel crashed.

[ 3732.462797] Call trace:
[ 3732.462835]  split_huge_page_to_list+0x3b0/0x858
[ 3732.462913]  migrate_pages+0x728/0xc20
[ 3732.462999]  soft_offline_page+0x448/0x8b0
[ 3732.463097]  __arm64_sys_madvise+0x724/0x850
[ 3732.463197]  el0_svc_handler+0x74/0x110
[ 3732.463297]  el0_svc+0x8/0xc
[ 3732.463347] Code: d1000400 f90b0e60 f2fbd5a2 a94982a1 (f9000420)

When unmap_and_move[_huge_page]() fails due to lack of memory, the
splitting should happen only for transparent huge pages not for HugeTLB
pages.  PageTransHuge() returns true for both THP and HugeTLB pages.
Hence the conditonal check should test PagesHuge() flag to make sure that
given pages is not a HugeTLB one.

Link: http://lkml.kernel.org/r/1537798495-4996-1-git-send-email-anshuman.khandual@arm.com
Fixes: 94723aafb9 ("mm: unclutter THP migration")
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Zi Yan <zi.yan@cs.rutgers.edu>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/migrate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index 9d374011c244..9ba72c2365e6 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1414,7 +1414,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
 				 * we encounter them after the rest of the list
 				 * is processed.
 				 */
-				if (PageTransHuge(page)) {
+				if (PageTransHuge(page) && !PageHuge(page)) {
 					lock_page(page);
 					rc = split_huge_page_to_list(page, from);
 					unlock_page(page);

From 20916d4636a9b3c1bf562b305f91d126771edaf9 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Fri, 5 Oct 2018 15:51:54 -0700
Subject: [PATCH 492/499] mm/hugetlb: add mmap() encodings for 32MB and 512MB
 page sizes

ARM64 architecture also supports 32MB and 512MB HugeTLB page sizes.  This
just adds mmap() system call argument encoding for them.

Link: http://lkml.kernel.org/r/1537841300-6979-1-git-send-email-anshuman.khandual@arm.com
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Acked-by: Punit Agrawal <punit.agrawal@arm.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/asm-generic/hugetlb_encode.h | 2 ++
 include/uapi/linux/memfd.h                | 2 ++
 include/uapi/linux/mman.h                 | 2 ++
 include/uapi/linux/shm.h                  | 2 ++
 4 files changed, 8 insertions(+)

diff --git a/include/uapi/asm-generic/hugetlb_encode.h b/include/uapi/asm-generic/hugetlb_encode.h
index e4732d3c2998..b0f8e87235bd 100644
--- a/include/uapi/asm-generic/hugetlb_encode.h
+++ b/include/uapi/asm-generic/hugetlb_encode.h
@@ -26,7 +26,9 @@
 #define HUGETLB_FLAG_ENCODE_2MB		(21 << HUGETLB_FLAG_ENCODE_SHIFT)
 #define HUGETLB_FLAG_ENCODE_8MB		(23 << HUGETLB_FLAG_ENCODE_SHIFT)
 #define HUGETLB_FLAG_ENCODE_16MB	(24 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_32MB	(25 << HUGETLB_FLAG_ENCODE_SHIFT)
 #define HUGETLB_FLAG_ENCODE_256MB	(28 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_512MB	(29 << HUGETLB_FLAG_ENCODE_SHIFT)
 #define HUGETLB_FLAG_ENCODE_1GB		(30 << HUGETLB_FLAG_ENCODE_SHIFT)
 #define HUGETLB_FLAG_ENCODE_2GB		(31 << HUGETLB_FLAG_ENCODE_SHIFT)
 #define HUGETLB_FLAG_ENCODE_16GB	(34 << HUGETLB_FLAG_ENCODE_SHIFT)
diff --git a/include/uapi/linux/memfd.h b/include/uapi/linux/memfd.h
index 015a4c0bbb47..7a8a26751c23 100644
--- a/include/uapi/linux/memfd.h
+++ b/include/uapi/linux/memfd.h
@@ -25,7 +25,9 @@
 #define MFD_HUGE_2MB	HUGETLB_FLAG_ENCODE_2MB
 #define MFD_HUGE_8MB	HUGETLB_FLAG_ENCODE_8MB
 #define MFD_HUGE_16MB	HUGETLB_FLAG_ENCODE_16MB
+#define MFD_HUGE_32MB	HUGETLB_FLAG_ENCODE_32MB
 #define MFD_HUGE_256MB	HUGETLB_FLAG_ENCODE_256MB
+#define MFD_HUGE_512MB	HUGETLB_FLAG_ENCODE_512MB
 #define MFD_HUGE_1GB	HUGETLB_FLAG_ENCODE_1GB
 #define MFD_HUGE_2GB	HUGETLB_FLAG_ENCODE_2GB
 #define MFD_HUGE_16GB	HUGETLB_FLAG_ENCODE_16GB
diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
index bfd5938fede6..d0f515d53299 100644
--- a/include/uapi/linux/mman.h
+++ b/include/uapi/linux/mman.h
@@ -28,7 +28,9 @@
 #define MAP_HUGE_2MB	HUGETLB_FLAG_ENCODE_2MB
 #define MAP_HUGE_8MB	HUGETLB_FLAG_ENCODE_8MB
 #define MAP_HUGE_16MB	HUGETLB_FLAG_ENCODE_16MB
+#define MAP_HUGE_32MB	HUGETLB_FLAG_ENCODE_32MB
 #define MAP_HUGE_256MB	HUGETLB_FLAG_ENCODE_256MB
+#define MAP_HUGE_512MB	HUGETLB_FLAG_ENCODE_512MB
 #define MAP_HUGE_1GB	HUGETLB_FLAG_ENCODE_1GB
 #define MAP_HUGE_2GB	HUGETLB_FLAG_ENCODE_2GB
 #define MAP_HUGE_16GB	HUGETLB_FLAG_ENCODE_16GB
diff --git a/include/uapi/linux/shm.h b/include/uapi/linux/shm.h
index dde1344f047c..6507ad0afc81 100644
--- a/include/uapi/linux/shm.h
+++ b/include/uapi/linux/shm.h
@@ -65,7 +65,9 @@ struct shmid_ds {
 #define SHM_HUGE_2MB	HUGETLB_FLAG_ENCODE_2MB
 #define SHM_HUGE_8MB	HUGETLB_FLAG_ENCODE_8MB
 #define SHM_HUGE_16MB	HUGETLB_FLAG_ENCODE_16MB
+#define SHM_HUGE_32MB	HUGETLB_FLAG_ENCODE_32MB
 #define SHM_HUGE_256MB	HUGETLB_FLAG_ENCODE_256MB
+#define SHM_HUGE_512MB	HUGETLB_FLAG_ENCODE_512MB
 #define SHM_HUGE_1GB	HUGETLB_FLAG_ENCODE_1GB
 #define SHM_HUGE_2GB	HUGETLB_FLAG_ENCODE_2GB
 #define SHM_HUGE_16GB	HUGETLB_FLAG_ENCODE_16GB

From f8a00cef17206ecd1b30d3d9f99e10d9fa707aa7 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 5 Oct 2018 15:51:58 -0700
Subject: [PATCH 493/499] proc: restrict kernel stack dumps to root

Currently, you can use /proc/self/task/*/stack to cause a stack walk on
a task you control while it is running on another CPU.  That means that
the stack can change under the stack walker.  The stack walker does
have guards against going completely off the rails and into random
kernel memory, but it can interpret random data from your kernel stack
as instruction pointers and stack pointers.  This can cause exposure of
kernel stack contents to userspace.

Restrict the ability to inspect kernel stacks of arbitrary tasks to root
in order to prevent a local attacker from exploiting racy stack unwinding
to leak kernel task stack contents.  See the added comment for a longer
rationale.

There don't seem to be any users of this userspace API that can't
gracefully bail out if reading from the file fails.  Therefore, I believe
that this change is unlikely to break things.  In the case that this patch
does end up needing a revert, the next-best solution might be to fake a
single-entry stack based on wchan.

Link: http://lkml.kernel.org/r/20180927153316.200286-1-jannh@google.com
Fixes: 2ec220e27f50 ("proc: add /proc/*/stack")
Signed-off-by: Jann Horn <jannh@google.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Ken Chen <kenchen@google.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/proc/base.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index ccf86f16d9f0..7e9f07bf260d 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -407,6 +407,20 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
 	unsigned long *entries;
 	int err;
 
+	/*
+	 * The ability to racily run the kernel stack unwinder on a running task
+	 * and then observe the unwinder output is scary; while it is useful for
+	 * debugging kernel issues, it can also allow an attacker to leak kernel
+	 * stack contents.
+	 * Doing this in a manner that is at least safe from races would require
+	 * some work to ensure that the remote task can not be scheduled; and
+	 * even then, this would still expose the unwinder as local attack
+	 * surface.
+	 * Therefore, this interface is restricted to root.
+	 */
+	if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))
+		return -EACCES;
+
 	entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries),
 				GFP_KERNEL);
 	if (!entries)

From 28e2c4bb99aa40f9d5f07ac130cbc4da0ea93079 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 5 Oct 2018 15:52:03 -0700
Subject: [PATCH 494/499] mm/vmstat.c: fix outdated vmstat_text

7a9cdebdcc17 ("mm: get rid of vmacache_flush_all() entirely") removed the
VMACACHE_FULL_FLUSHES statistics, but didn't remove the corresponding
entry in vmstat_text.  This causes an out-of-bounds access in
vmstat_show().

Luckily this only affects kernels with CONFIG_DEBUG_VM_VMACACHE=y, which
is probably very rare.

Link: http://lkml.kernel.org/r/20181001143138.95119-1-jannh@google.com
Fixes: 7a9cdebdcc17 ("mm: get rid of vmacache_flush_all() entirely")
Signed-off-by: Jann Horn <jannh@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Roman Gushchin <guro@fb.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Kemi Wang <kemi.wang@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/vmstat.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mm/vmstat.c b/mm/vmstat.c
index 8ba0870ecddd..4cea7b8f519d 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1283,7 +1283,6 @@ const char * const vmstat_text[] = {
 #ifdef CONFIG_DEBUG_VM_VMACACHE
 	"vmacache_find_calls",
 	"vmacache_find_hits",
-	"vmacache_full_flushes",
 #endif
 #ifdef CONFIG_SWAP
 	"swap_ra",

From 58bc4c34d249bf1bc50730a9a209139347cfacfe Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 5 Oct 2018 15:52:07 -0700
Subject: [PATCH 495/499] mm/vmstat.c: skip NR_TLB_REMOTE_FLUSH* properly

5dd0b16cdaff ("mm/vmstat: Make NR_TLB_REMOTE_FLUSH_RECEIVED available even
on UP") made the availability of the NR_TLB_REMOTE_FLUSH* counters inside
the kernel unconditional to reduce #ifdef soup, but (either to avoid
showing dummy zero counters to userspace, or because that code was missed)
didn't update the vmstat_array, meaning that all following counters would
be shown with incorrect values.

This only affects kernel builds with
CONFIG_VM_EVENT_COUNTERS=y && CONFIG_DEBUG_TLBFLUSH=y && CONFIG_SMP=n.

Link: http://lkml.kernel.org/r/20181001143138.95119-2-jannh@google.com
Fixes: 5dd0b16cdaff ("mm/vmstat: Make NR_TLB_REMOTE_FLUSH_RECEIVED available even on UP")
Signed-off-by: Jann Horn <jannh@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Roman Gushchin <guro@fb.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Kemi Wang <kemi.wang@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/vmstat.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mm/vmstat.c b/mm/vmstat.c
index 4cea7b8f519d..7878da76abf2 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1275,6 +1275,9 @@ const char * const vmstat_text[] = {
 #ifdef CONFIG_SMP
 	"nr_tlb_remote_flush",
 	"nr_tlb_remote_flush_received",
+#else
+	"", /* nr_tlb_remote_flush */
+	"", /* nr_tlb_remote_flush_received */
 #endif /* CONFIG_SMP */
 	"nr_tlb_local_flush_all",
 	"nr_tlb_local_flush_one",

From b8e57efa2c98cc56c49461c4950cf026422c29e9 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 5 Oct 2018 15:52:10 -0700
Subject: [PATCH 496/499] mm/vmscan.c: fix int overflow in callers of
 do_shrink_slab()

do_shrink_slab() returns unsigned long value, and the placing into int
variable cuts high bytes off.  Then we compare ret and 0xfffffffe (since
SHRINK_EMPTY is converted to ret type).

Thus a large number of objects returned by do_shrink_slab() may be
interpreted as SHRINK_EMPTY, if low bytes of their value are equal to
0xfffffffe.  Fix that by declaration ret as unsigned long in these
functions.

Link: http://lkml.kernel.org/r/153813407177.17544.14888305435570723973.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Reported-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/vmscan.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index c7ce2c161225..c5ef7240cbcb 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -580,8 +580,8 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
 			struct mem_cgroup *memcg, int priority)
 {
 	struct memcg_shrinker_map *map;
-	unsigned long freed = 0;
-	int ret, i;
+	unsigned long ret, freed = 0;
+	int i;
 
 	if (!memcg_kmem_enabled() || !mem_cgroup_online(memcg))
 		return 0;
@@ -677,9 +677,8 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
 				 struct mem_cgroup *memcg,
 				 int priority)
 {
+	unsigned long ret, freed = 0;
 	struct shrinker *shrinker;
-	unsigned long freed = 0;
-	int ret;
 
 	if (!mem_cgroup_is_root(memcg))
 		return shrink_slab_memcg(gfp_mask, nid, memcg, priority);

From cbe355f57c8074bc4f452e5b6e35509044c6fa23 Mon Sep 17 00:00:00 2001
From: Ashish Samant <ashish.samant@oracle.com>
Date: Fri, 5 Oct 2018 15:52:15 -0700
Subject: [PATCH 497/499] ocfs2: fix locking for res->tracking and
 dlm->tracking_list

In dlm_init_lockres() we access and modify res->tracking and
dlm->tracking_list without holding dlm->track_lock.  This can cause list
corruptions and can end up in kernel panic.

Fix this by locking res->tracking and dlm->tracking_list with
dlm->track_lock instead of dlm->spinlock.

Link: http://lkml.kernel.org/r/1529951192-4686-1-git-send-email-ashish.samant@oracle.com
Signed-off-by: Ashish Samant <ashish.samant@oracle.com>
Reviewed-by: Changwei Ge <ge.changwei@h3c.com>
Acked-by: Joseph Qi <jiangqi903@gmail.com>
Acked-by: Jun Piao <piaojun@huawei.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <ge.changwei@h3c.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ocfs2/dlm/dlmmaster.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index aaca0949fe53..826f0567ec43 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -584,9 +584,9 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
 
 	res->last_used = 0;
 
-	spin_lock(&dlm->spinlock);
+	spin_lock(&dlm->track_lock);
 	list_add_tail(&res->tracking, &dlm->tracking_list);
-	spin_unlock(&dlm->spinlock);
+	spin_unlock(&dlm->track_lock);
 
 	memset(res->lvb, 0, DLM_LVB_LEN);
 	memset(res->refmap, 0, sizeof(res->refmap));

From d41aa5252394c065d1f04d1ceea885b70d00c9c6 Mon Sep 17 00:00:00 2001
From: Daniel Black <daniel@linux.ibm.com>
Date: Fri, 5 Oct 2018 15:52:19 -0700
Subject: [PATCH 498/499] mm: madvise(MADV_DODUMP): allow hugetlbfs pages

Reproducer, assuming 2M of hugetlbfs available:

Hugetlbfs mounted, size=2M and option user=testuser

  # mount | grep ^hugetlbfs
  hugetlbfs on /dev/hugepages type hugetlbfs (rw,pagesize=2M,user=dan)
  # sysctl vm.nr_hugepages=1
  vm.nr_hugepages = 1
  # grep Huge /proc/meminfo
  AnonHugePages:         0 kB
  ShmemHugePages:        0 kB
  HugePages_Total:       1
  HugePages_Free:        1
  HugePages_Rsvd:        0
  HugePages_Surp:        0
  Hugepagesize:       2048 kB
  Hugetlb:            2048 kB

Code:

  #include <sys/mman.h>
  #include <stddef.h>
  #define SIZE 2*1024*1024
  int main()
  {
    void *ptr;
    ptr = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_HUGETLB | MAP_ANONYMOUS, -1, 0);
    madvise(ptr, SIZE, MADV_DONTDUMP);
    madvise(ptr, SIZE, MADV_DODUMP);
  }

Compile and strace:

  mmap(NULL, 2097152, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0) = 0x7ff7c9200000
  madvise(0x7ff7c9200000, 2097152, MADV_DONTDUMP) = 0
  madvise(0x7ff7c9200000, 2097152, MADV_DODUMP) = -1 EINVAL (Invalid argument)

hugetlbfs pages have VM_DONTEXPAND in the VmFlags driver pages based on
author testing with analysis from Florian Weimer[1].

The inclusion of VM_DONTEXPAND into the VM_SPECIAL defination was a
consequence of the large useage of VM_DONTEXPAND in device drivers.

A consequence of [2] is that VM_DONTEXPAND marked pages are unable to be
marked DODUMP.

A user could quite legitimately madvise(MADV_DONTDUMP) their hugetlbfs
memory for a while and later request that madvise(MADV_DODUMP) on the same
memory.  We correct this omission by allowing madvice(MADV_DODUMP) on
hugetlbfs pages.

[1] https://stackoverflow.com/questions/52548260/madvisedodump-on-the-same-ptr-size-as-a-successful-madvisedontdump-fails-wit
[2] commit 0103bd16fb90 ("mm: prepare VM_DONTDUMP for using in drivers")

Link: http://lkml.kernel.org/r/20180930054629.29150-1-daniel@linux.ibm.com
Link: https://lists.launchpad.net/maria-discuss/msg05245.html
Fixes: 0103bd16fb90 ("mm: prepare VM_DONTDUMP for using in drivers")
Reported-by: Kenneth Penza <kpenza@gmail.com>
Signed-off-by: Daniel Black <daniel@linux.ibm.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/madvise.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/madvise.c b/mm/madvise.c
index 972a9eaa898b..71d21df2a3f3 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -96,7 +96,7 @@ static long madvise_behavior(struct vm_area_struct *vma,
 		new_flags |= VM_DONTDUMP;
 		break;
 	case MADV_DODUMP:
-		if (new_flags & VM_SPECIAL) {
+		if (!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL) {
 			error = -EINVAL;
 			goto out;
 		}

From 0238df646e6224016a45505d2c111a24669ebe21 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 7 Oct 2018 17:26:02 +0200
Subject: [PATCH 499/499] Linux 4.19-rc7

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 6c3da3e10f07..9b2df076885a 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 19
 SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION = -rc7
 NAME = Merciless Moray
 
 # *DOCUMENTATION*