From 4bbc84ffd137fe43d68aa633d317b0a96de8a828 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Mon, 19 Dec 2016 19:17:08 -0800
Subject: [PATCH 001/322] sparc: use symbolic names for tsb indexing

Use symbolic names MM_TSB_BASE and MM_TSB_HUGE instead of numeric values
0 and 1 in __tsb_context_switch.  Code cleanup only, no functional change.

Suggested-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/mmu_context_64.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h
index b84be675e507..d0317993e947 100644
--- a/arch/sparc/include/asm/mmu_context_64.h
+++ b/arch/sparc/include/asm/mmu_context_64.h
@@ -35,15 +35,15 @@ void __tsb_context_switch(unsigned long pgd_pa,
 static inline void tsb_context_switch(struct mm_struct *mm)
 {
 	__tsb_context_switch(__pa(mm->pgd),
-			     &mm->context.tsb_block[0],
+			     &mm->context.tsb_block[MM_TSB_BASE],
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-			     (mm->context.tsb_block[1].tsb ?
-			      &mm->context.tsb_block[1] :
+			     (mm->context.tsb_block[MM_TSB_HUGE].tsb ?
+			      &mm->context.tsb_block[MM_TSB_HUGE] :
 			      NULL)
 #else
 			     NULL
 #endif
-			     , __pa(&mm->context.tsb_descr[0]));
+			     , __pa(&mm->context.tsb_descr[MM_TSB_BASE]));
 }
 
 void tsb_grow(struct mm_struct *mm,

From 2f884e6e688a0deb69e6c9552e51aef8b7e3f5f1 Mon Sep 17 00:00:00 2001
From: "Strashko, Grygorii" <grygorii.strashko@ti.com>
Date: Thu, 8 Dec 2016 17:33:10 -0600
Subject: [PATCH 002/322] irqchip/keystone: Fix "scheduling while atomic" on rt

The below call chain generates "scheduling while atomic" backtrace and
causes system crash when Keystone 2 IRQ chip driver is used with RT-kernel:

gic_handle_irq()
 |-__handle_domain_irq()
  |-generic_handle_irq()
   |-keystone_irq_handler()
    |-regmap_read()
     |-regmap_lock_spinlock()
      |-rt_spin_lock()

The reason is that Keystone driver dispatches IRQ using chained IRQ handler
and accesses I/O memory through syscon->regmap(mmio) which is implemented
as fast_io regmap and uses regular spinlocks for synchronization, but
spinlocks transformed to rt_mutexes on RT.

Hence, convert Keystone 2 IRQ driver to use generic irq handler instead of
chained IRQ handler. This way it will be compatible with RT kernel where it
will be forced thread IRQ handler while in non-RT kernel it still will be
executed in HW IRQ context.

Cc: Suman Anna <s-anna@ti.com>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Tested-by: Suman Anna <s-anna@ti.com>
Link: https://lkml.kernel.org/r/20161208233310.10329-1-grygorii.strashko@ti.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 drivers/irqchip/irq-keystone.c | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/drivers/irqchip/irq-keystone.c b/drivers/irqchip/irq-keystone.c
index 54a5e870a8f5..efbcf8435185 100644
--- a/drivers/irqchip/irq-keystone.c
+++ b/drivers/irqchip/irq-keystone.c
@@ -19,9 +19,9 @@
 #include <linux/bitops.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/interrupt.h>
 #include <linux/irqdomain.h>
 #include <linux/irqchip.h>
-#include <linux/irqchip/chained_irq.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/mfd/syscon.h>
@@ -39,6 +39,7 @@ struct keystone_irq_device {
 	struct irq_domain	*irqd;
 	struct regmap		*devctrl_regs;
 	u32			devctrl_offset;
+	raw_spinlock_t		wa_lock;
 };
 
 static inline u32 keystone_irq_readl(struct keystone_irq_device *kirq)
@@ -83,17 +84,15 @@ static void keystone_irq_ack(struct irq_data *d)
 	/* nothing to do here */
 }
 
-static void keystone_irq_handler(struct irq_desc *desc)
+static irqreturn_t keystone_irq_handler(int irq, void *keystone_irq)
 {
-	unsigned int irq = irq_desc_get_irq(desc);
-	struct keystone_irq_device *kirq = irq_desc_get_handler_data(desc);
+	struct keystone_irq_device *kirq = keystone_irq;
+	unsigned long wa_lock_flags;
 	unsigned long pending;
 	int src, virq;
 
 	dev_dbg(kirq->dev, "start irq %d\n", irq);
 
-	chained_irq_enter(irq_desc_get_chip(desc), desc);
-
 	pending = keystone_irq_readl(kirq);
 	keystone_irq_writel(kirq, pending);
 
@@ -111,13 +110,15 @@ static void keystone_irq_handler(struct irq_desc *desc)
 			if (!virq)
 				dev_warn(kirq->dev, "spurious irq detected hwirq %d, virq %d\n",
 					 src, virq);
+			raw_spin_lock_irqsave(&kirq->wa_lock, wa_lock_flags);
 			generic_handle_irq(virq);
+			raw_spin_unlock_irqrestore(&kirq->wa_lock,
+						   wa_lock_flags);
 		}
 	}
 
-	chained_irq_exit(irq_desc_get_chip(desc), desc);
-
 	dev_dbg(kirq->dev, "end irq %d\n", irq);
+	return IRQ_HANDLED;
 }
 
 static int keystone_irq_map(struct irq_domain *h, unsigned int virq,
@@ -182,9 +183,16 @@ static int keystone_irq_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
+	raw_spin_lock_init(&kirq->wa_lock);
+
 	platform_set_drvdata(pdev, kirq);
 
-	irq_set_chained_handler_and_data(kirq->irq, keystone_irq_handler, kirq);
+	ret = request_irq(kirq->irq, keystone_irq_handler,
+			  0, dev_name(dev), kirq);
+	if (ret) {
+		irq_domain_remove(kirq->irqd);
+		return ret;
+	}
 
 	/* clear all source bits */
 	keystone_irq_writel(kirq, ~0x0);
@@ -199,6 +207,8 @@ static int keystone_irq_remove(struct platform_device *pdev)
 	struct keystone_irq_device *kirq = platform_get_drvdata(pdev);
 	int hwirq;
 
+	free_irq(kirq->irq, kirq);
+
 	for (hwirq = 0; hwirq < KEYSTONE_N_IRQ; hwirq++)
 		irq_dispose_mapping(irq_find_mapping(kirq->irqd, hwirq));
 

From 88e20c74ee020f9e0c99dfce0dd9aa61c3f0cca0 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Tue, 27 Dec 2016 18:29:57 +0000
Subject: [PATCH 003/322] irqchip/mxs: Enable SKIP_SET_WAKE and MASK_ON_SUSPEND

The ICOLL controller doesn't provide any facility to configure the
wakeup sources. That's the reason why this implementation lacks
the irq_set_wake implementation. But this prevent us from properly
entering power management states like "suspend to idle".

So enable the flags IRQCHIP_SKIP_SET_WAKE and
IRQCHIP_MASK_ON_SUSPEND to let the irqchip core allows and handles
the power management.

Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Reviewed-by: Fabio Estevam <fabio.estevam@nxp.com>
Link: https://lkml.kernel.org/r/1482863397-11400-1-git-send-email-stefan.wahren@i2se.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 drivers/irqchip/irq-mxs.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c
index 17304705f2cf..05fa9f7af53c 100644
--- a/drivers/irqchip/irq-mxs.c
+++ b/drivers/irqchip/irq-mxs.c
@@ -131,12 +131,16 @@ static struct irq_chip mxs_icoll_chip = {
 	.irq_ack = icoll_ack_irq,
 	.irq_mask = icoll_mask_irq,
 	.irq_unmask = icoll_unmask_irq,
+	.flags = IRQCHIP_MASK_ON_SUSPEND |
+		 IRQCHIP_SKIP_SET_WAKE,
 };
 
 static struct irq_chip asm9260_icoll_chip = {
 	.irq_ack = icoll_ack_irq,
 	.irq_mask = asm9260_mask_irq,
 	.irq_unmask = asm9260_unmask_irq,
+	.flags = IRQCHIP_MASK_ON_SUSPEND |
+		 IRQCHIP_SKIP_SET_WAKE,
 };
 
 asmlinkage void __exception_irq_entry icoll_handle_irq(struct pt_regs *regs)

From e0edc8c546463f268d41d064d855bcff994c52fa Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 6 Jan 2017 11:48:50 -0500
Subject: [PATCH 004/322] libata: apply MAX_SEC_1024 to all CX1-JB*-HP devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Marko reports that CX1-JB512-HP shows the same timeout issues as
CX1-JB256-HP.  Let's apply MAX_SEC_128 to all devices in the series.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Marko Koski-Vähälä <marko@koski-vahala.com>
Cc: stable@vger.kernel.org # v3.19+
---
 drivers/ata/libata-core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 9cd0a2d41816..a2bd5acc8a89 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4356,10 +4356,10 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 	{ "ST380013AS",		"3.20",		ATA_HORKAGE_MAX_SEC_1024 },
 
 	/*
-	 * Device times out with higher max sects.
+	 * These devices time out with higher max sects.
 	 * https://bugzilla.kernel.org/show_bug.cgi?id=121671
 	 */
-	{ "LITEON CX1-JB256-HP", NULL,		ATA_HORKAGE_MAX_SEC_1024 },
+	{ "LITEON CX1-JB*-HP",	NULL,		ATA_HORKAGE_MAX_SEC_1024 },
 
 	/* Devices we expect to fail diagnostics */
 

From 2dae99558e86894e9e5dbf097477baaa5eb70134 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Mon, 19 Dec 2016 10:17:40 +0900
Subject: [PATCH 005/322] libata: Fix ATA request sense

For an ATA device supporting the sense data reporting feature set, a
failed command will trigger the execution of ata_eh_request_sense if
the result task file of the failed command has the ATA_SENSE bit set
(sense data available bit). ata_eh_request_sense executes the REQUEST
SENSE DATA EXT command to retrieve the sense data of the failed
command. On success of REQUEST SENSE DATA EXT, the ATA_SENSE bit will
NOT be set (the command succeeded) but ata_eh_request_sense
nevertheless tests the availability of sense data by testing that bit
presence in the result tf of the REQUEST SENSE DATA EXT command.  This
leads us to falsely assume that request sense data failed and to the
warning message:

atax.xx: request sense failed stat 50 emask 0

Upon success of REQUEST SENSE DATA EXT, set the ATA_SENSE bit in the
result task file command so that sense data can be returned by
ata_eh_request_sense.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: stable@vger.kernel.org
---
 drivers/ata/libata-core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index a2bd5acc8a89..c2d3785ec227 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1702,6 +1702,8 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
 
 		if (qc->err_mask & ~AC_ERR_OTHER)
 			qc->err_mask &= ~AC_ERR_OTHER;
+	} else if (qc->tf.command == ATA_CMD_REQ_SENSE_DATA) {
+		qc->result_tf.command |= ATA_SENSE;
 	}
 
 	/* finish up */

From 064c3db9c564cc5be514ac21fb4aa26cc33db746 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Mon, 12 Dec 2016 23:13:27 +0530
Subject: [PATCH 006/322] ata: sata_mv:- Handle return value of devm_ioremap.

Here, If devm_ioremap will fail. It will return NULL.
Then hpriv->base = NULL - 0x20000; Kernel can run into
a NULL-pointer dereference. This error check will avoid
NULL pointer dereference.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: stable@vger.kernel.org
---
 drivers/ata/sata_mv.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index 823e938c9a78..2f32782cea6d 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -4132,6 +4132,9 @@ static int mv_platform_probe(struct platform_device *pdev)
 	host->iomap = NULL;
 	hpriv->base = devm_ioremap(&pdev->dev, res->start,
 				   resource_size(res));
+	if (!hpriv->base)
+		return -ENOMEM;
+
 	hpriv->base -= SATAHC0_REG_BASE;
 
 	hpriv->clk = clk_get(&pdev->dev, NULL);

From 81ddd8c0c5e1cb41184d66567140cb48c53eb3d1 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabinv@axis.com>
Date: Fri, 13 Jan 2017 15:00:16 +0100
Subject: [PATCH 007/322] cifs: initialize file_info_lock

Reviewed-by: Jeff Layton <jlayton@redhat.com>
CC: Stable <stable@vger.kernel.org>

file_info_lock is not initalized in initiate_cifs_search(), leading to the
following splat after a simple "mount.cifs ... dir && ls dir/":

 BUG: spinlock bad magic on CPU#0, ls/486
  lock: 0xffff880009301110, .magic: 00000000, .owner: <none>/-1, .owner_cpu: 0
 CPU: 0 PID: 486 Comm: ls Not tainted 4.9.0 #27
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
  ffffc900042f3db0 ffffffff81327533 0000000000000000 ffff880009301110
  ffffc900042f3dd0 ffffffff810baf75 ffff880009301110 ffffffff817ae077
  ffffc900042f3df0 ffffffff810baff6 ffff880009301110 ffff880008d69900
 Call Trace:
  [<ffffffff81327533>] dump_stack+0x65/0x92
  [<ffffffff810baf75>] spin_dump+0x85/0xe0
  [<ffffffff810baff6>] spin_bug+0x26/0x30
  [<ffffffff810bb159>] do_raw_spin_lock+0xe9/0x130
  [<ffffffff8159ad2f>] _raw_spin_lock+0x1f/0x30
  [<ffffffff8127e50d>] cifs_closedir+0x4d/0x100
  [<ffffffff81181cfd>] __fput+0x5d/0x160
  [<ffffffff81181e3e>] ____fput+0xe/0x10
  [<ffffffff8109410e>] task_work_run+0x7e/0xa0
  [<ffffffff81002512>] exit_to_usermode_loop+0x92/0xa0
  [<ffffffff810026f9>] syscall_return_slowpath+0x49/0x50
  [<ffffffff8159b484>] entry_SYSCALL_64_fastpath+0xa7/0xa9

Fixes: 3afca265b5f53a0 ("Clarify locking of cifs file and tcon structures and make more granular")
Signed-off-by: Rabin Vincent <rabinv@axis.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/readdir.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 8f6a2a5863b9..a27fc8791551 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -285,6 +285,7 @@ initiate_cifs_search(const unsigned int xid, struct file *file)
 			rc = -ENOMEM;
 			goto error_exit;
 		}
+		spin_lock_init(&cifsFile->file_info_lock);
 		file->private_data = cifsFile;
 		cifsFile->tlink = cifs_get_tlink(tlink);
 		tcon = tlink_tcon(tlink);

From 5d0e7705774dd412a465896d08d59a81a345c1e4 Mon Sep 17 00:00:00 2001
From: Tom Hromatka <tom.hromatka@oracle.com>
Date: Tue, 10 Jan 2017 10:57:56 -0700
Subject: [PATCH 008/322] sparc: Fixed typo in sstate.c. Replaced panicing with
 panicking

Signed-off-by: Tom Hromatka <tom.hromatka@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/sstate.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/sparc/kernel/sstate.c b/arch/sparc/kernel/sstate.c
index c59af546f522..3caed4023589 100644
--- a/arch/sparc/kernel/sstate.c
+++ b/arch/sparc/kernel/sstate.c
@@ -43,8 +43,8 @@ static const char poweroff_msg[32] __attribute__((aligned(32))) =
 	"Linux powering off";
 static const char rebooting_msg[32] __attribute__((aligned(32))) =
 	"Linux rebooting";
-static const char panicing_msg[32] __attribute__((aligned(32))) =
-	"Linux panicing";
+static const char panicking_msg[32] __attribute__((aligned(32))) =
+	"Linux panicking";
 
 static int sstate_reboot_call(struct notifier_block *np, unsigned long type, void *_unused)
 {
@@ -76,7 +76,7 @@ static struct notifier_block sstate_reboot_notifier = {
 
 static int sstate_panic_event(struct notifier_block *n, unsigned long event, void *ptr)
 {
-	do_set_sstate(HV_SOFT_STATE_TRANSITION, panicing_msg);
+	do_set_sstate(HV_SOFT_STATE_TRANSITION, panicking_msg);
 
 	return NOTIFY_DONE;
 }

From 7195439d1d71bc4a6c33cfb57bc669a7cd041041 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Fri, 13 Jan 2017 12:23:35 +0100
Subject: [PATCH 009/322] Revert "bcma: init serial console directly from
 ChipCommon code"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 4c81acab3816 ("bcma: init serial console directly
from ChipCommon code") as it broke IRQ assignment. Getting IRQ with
bcma_core_irq helper on SoC requires MIPS core to be set. It happens
*after* ChipCommon initialization so we can't do this so early.

This fixes a user reported regression. It wasn't critical as serial was
still somehow working but lack of IRQs was making in unreliable.

Fixes: 4c81acab3816 ("bcma: init serial console directly from ChipCommon code")
Reported-by: Felix Fietkau <nbd@nbd.name>
Cc: stable@vger.kernel.org # 4.6+
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/bcma/bcma_private.h      |  3 +++
 drivers/bcma/driver_chipcommon.c | 11 +++--------
 drivers/bcma/driver_mips.c       |  3 +++
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index f642c4264c27..168fa175d65a 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -45,6 +45,9 @@ int bcma_sprom_get(struct bcma_bus *bus);
 void bcma_core_chipcommon_early_init(struct bcma_drv_cc *cc);
 void bcma_core_chipcommon_init(struct bcma_drv_cc *cc);
 void bcma_chipco_bcm4331_ext_pa_lines_ctl(struct bcma_drv_cc *cc, bool enable);
+#ifdef CONFIG_BCMA_DRIVER_MIPS
+void bcma_chipco_serial_init(struct bcma_drv_cc *cc);
+#endif /* CONFIG_BCMA_DRIVER_MIPS */
 
 /* driver_chipcommon_b.c */
 int bcma_core_chipcommon_b_init(struct bcma_drv_cc_b *ccb);
diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c
index b4f6520e74f0..62f5bfa5065d 100644
--- a/drivers/bcma/driver_chipcommon.c
+++ b/drivers/bcma/driver_chipcommon.c
@@ -15,8 +15,6 @@
 #include <linux/platform_device.h>
 #include <linux/bcma/bcma.h>
 
-static void bcma_chipco_serial_init(struct bcma_drv_cc *cc);
-
 static inline u32 bcma_cc_write32_masked(struct bcma_drv_cc *cc, u16 offset,
 					 u32 mask, u32 value)
 {
@@ -186,9 +184,6 @@ void bcma_core_chipcommon_early_init(struct bcma_drv_cc *cc)
 	if (cc->capabilities & BCMA_CC_CAP_PMU)
 		bcma_pmu_early_init(cc);
 
-	if (IS_BUILTIN(CONFIG_BCM47XX) && bus->hosttype == BCMA_HOSTTYPE_SOC)
-		bcma_chipco_serial_init(cc);
-
 	if (bus->hosttype == BCMA_HOSTTYPE_SOC)
 		bcma_core_chipcommon_flash_detect(cc);
 
@@ -378,9 +373,9 @@ u32 bcma_chipco_gpio_pulldown(struct bcma_drv_cc *cc, u32 mask, u32 value)
 	return res;
 }
 
-static void bcma_chipco_serial_init(struct bcma_drv_cc *cc)
+#ifdef CONFIG_BCMA_DRIVER_MIPS
+void bcma_chipco_serial_init(struct bcma_drv_cc *cc)
 {
-#if IS_BUILTIN(CONFIG_BCM47XX)
 	unsigned int irq;
 	u32 baud_base;
 	u32 i;
@@ -422,5 +417,5 @@ static void bcma_chipco_serial_init(struct bcma_drv_cc *cc)
 		ports[i].baud_base = baud_base;
 		ports[i].reg_shift = 0;
 	}
-#endif /* CONFIG_BCM47XX */
 }
+#endif /* CONFIG_BCMA_DRIVER_MIPS */
diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c
index 96f171328200..89af807cf29c 100644
--- a/drivers/bcma/driver_mips.c
+++ b/drivers/bcma/driver_mips.c
@@ -278,9 +278,12 @@ static void bcma_core_mips_nvram_init(struct bcma_drv_mips *mcore)
 
 void bcma_core_mips_early_init(struct bcma_drv_mips *mcore)
 {
+	struct bcma_bus *bus = mcore->core->bus;
+
 	if (mcore->early_setup_done)
 		return;
 
+	bcma_chipco_serial_init(&bus->drv_cc);
 	bcma_core_mips_nvram_init(mcore);
 
 	mcore->early_setup_done = true;

From b98acbff9a05b371c5f0ca6e44a3af8ce9274379 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 11 Jan 2017 15:36:20 +0000
Subject: [PATCH 010/322] regulator: twl6030: fix range comparison, allowing
 vsel = 59

The range min_uV > 1350000 && min_uV <= 150000 is never reachable
because of a typo in the previous range check and hence vsel = 59
is never reached.  Fix the previous range check to enable the
vsel = 59 setting.

Fixes CoverityScan CID#728454 ("Logially dead code")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/twl6030-regulator.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/twl6030-regulator.c b/drivers/regulator/twl6030-regulator.c
index 4864b9d742c0..716191046a70 100644
--- a/drivers/regulator/twl6030-regulator.c
+++ b/drivers/regulator/twl6030-regulator.c
@@ -452,7 +452,7 @@ static int twl6030smps_map_voltage(struct regulator_dev *rdev, int min_uV,
 			vsel = 62;
 		else if ((min_uV > 1800000) && (min_uV <= 1900000))
 			vsel = 61;
-		else if ((min_uV > 1350000) && (min_uV <= 1800000))
+		else if ((min_uV > 1500000) && (min_uV <= 1800000))
 			vsel = 60;
 		else if ((min_uV > 1350000) && (min_uV <= 1500000))
 			vsel = 59;

From b5b46c4740aed1538544f0fa849c5b76c7823469 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Wed, 18 Jan 2017 14:29:21 +0100
Subject: [PATCH 011/322] objtool: Fix IRET's opcode

The IRET opcode is 0xcf according to the Intel manual and also to objdump of my
vmlinux:

    1ea8:       48 cf                   iretq

Fix the opcode in arch_decode_instruction().

The previous value (0xc5) seems to correspond to LDS.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170118132921.19319-1-jslaby@suse.cz
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/arch/x86/decode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 5e0dea2cdc01..039636ffb6c8 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -150,9 +150,9 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 		*type = INSN_RETURN;
 		break;
 
-	case 0xc5: /* iret */
 	case 0xca: /* retf */
 	case 0xcb: /* retf */
+	case 0xcf: /* iret */
 		*type = INSN_CONTEXT_SWITCH;
 		break;
 

From 24d615a694d649aa2e167c3f97f62bdad07e3f84 Mon Sep 17 00:00:00 2001
From: Aleksander Morgado <aleksander@aleksander.es>
Date: Wed, 18 Jan 2017 21:31:31 +0100
Subject: [PATCH 012/322] USB: serial: qcserial: add Dell DW5570 QDL

The Dell DW5570 is a re-branded Sierra Wireless MC8805 which will by
default boot with vid 0x413c and pid 0x81a3. When triggered QDL download
mode, the device switches to pid 0x81a6 and provides the standard TTY
used for firmware upgrade.

Cc: <stable@vger.kernel.org>
Signed-off-by: Aleksander Morgado <aleksander@aleksander.es>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/qcserial.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
index 1bc6089b9008..696458db7e3c 100644
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -124,6 +124,7 @@ static const struct usb_device_id id_table[] = {
 	{USB_DEVICE(0x1410, 0xa021)},	/* Novatel Gobi 3000 Composite */
 	{USB_DEVICE(0x413c, 0x8193)},	/* Dell Gobi 3000 QDL */
 	{USB_DEVICE(0x413c, 0x8194)},	/* Dell Gobi 3000 Composite */
+	{USB_DEVICE(0x413c, 0x81a6)},	/* Dell DW5570 QDL (MC8805) */
 	{USB_DEVICE(0x1199, 0x68a4)},	/* Sierra Wireless QDL */
 	{USB_DEVICE(0x1199, 0x68a5)},	/* Sierra Wireless Modem */
 	{USB_DEVICE(0x1199, 0x68a8)},	/* Sierra Wireless QDL */

From 773c7220e22d193e5667c352fcbf8d47eefc817f Mon Sep 17 00:00:00 2001
From: Eric Farman <farman@linux.vnet.ibm.com>
Date: Fri, 13 Jan 2017 12:48:06 -0500
Subject: [PATCH 013/322] scsi: virtio_scsi: Reject commands when virtqueue is
 broken

In the case of a graceful set of detaches, where the virtio-scsi-ccw
disk is removed from the guest prior to the controller, the guest
behaves quite normally.  Specifically, the detach gets us into
sd_sync_cache to issue a Synchronize Cache(10) command, which
immediately fails (and is retried a couple of times) because the device
has been removed.  Later, the removal of the controller sees two CRWs
presented, but there's no further indication of the removal from the
guest viewpoint.

 [   17.217458] sd 0:0:0:0: [sda] Synchronizing SCSI cache
 [   17.219257] sd 0:0:0:0: [sda] Synchronize Cache(10) failed: Result: hostbyte=DID_BAD_TARGET driverbyte=DRIVER_OK
 [   21.449400] crw_info : CRW reports slct=0, oflw=0, chn=1, rsc=3, anc=0, erc=4, rsid=2
 [   21.449406] crw_info : CRW reports slct=0, oflw=0, chn=0, rsc=3, anc=0, erc=4, rsid=0

However, on s390, the SCSI disks can be removed "by surprise" when an
entire controller (host) is removed and all associated disks are removed
via the loop in scsi_forget_host.  The same call to sd_sync_cache is
made, but because the controller has already been removed, the
Synchronize Cache(10) command is neither issued (and then failed) nor
rejected.

That the I/O isn't returned means the guest cannot have other devices
added nor removed, and other tasks (such as shutdown or reboot) issued
by the guest will not complete either.  The virtio ring has already been
marked as broken (via virtio_break_device in virtio_ccw_remove), but we
still attempt to queue the command only to have it remain there.  The
calling sequence provides a bit of distinction for us:

  virtscsi_queuecommand()
   -> virtscsi_kick_cmd()
    -> virtscsi_add_cmd()
     -> virtqueue_add_sgs()
      -> virtqueue_add()
         if success
           return 0
         elseif vq->broken or vring_mapping_error()
           return -EIO
         else
           return -ENOSPC

A return of ENOSPC is generally a temporary condition, so returning
"host busy" from virtscsi_queuecommand makes sense here, to have it
redriven in a moment or two.  But the EIO return code is more of a
permanent error and so it would be wise to return the I/O itself and
allow the calling thread to finish gracefully.  The result is these four
kernel messages in the guest (the fourth one does not occur prior to
this patch):

 [   22.921562] crw_info : CRW reports slct=0, oflw=0, chn=1, rsc=3, anc=0, erc=4, rsid=2
 [   22.921580] crw_info : CRW reports slct=0, oflw=0, chn=0, rsc=3, anc=0, erc=4, rsid=0
 [   22.921978] sd 0:0:0:0: [sda] Synchronizing SCSI cache
 [   22.921993] sd 0:0:0:0: [sda] Synchronize Cache(10) failed: Result: hostbyte=DID_BAD_TARGET driverbyte=DRIVER_OK

I opted to fill in the same response data that is returned from the more
graceful device detach, where the disk device is removed prior to the
controller device.

Signed-off-by: Eric Farman <farman@linux.vnet.ibm.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/virtio_scsi.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index ec91bd07f00a..c680d7641311 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -534,7 +534,9 @@ static int virtscsi_queuecommand(struct virtio_scsi *vscsi,
 {
 	struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev);
 	struct virtio_scsi_cmd *cmd = scsi_cmd_priv(sc);
+	unsigned long flags;
 	int req_size;
+	int ret;
 
 	BUG_ON(scsi_sg_count(sc) > shost->sg_tablesize);
 
@@ -562,8 +564,15 @@ static int virtscsi_queuecommand(struct virtio_scsi *vscsi,
 		req_size = sizeof(cmd->req.cmd);
 	}
 
-	if (virtscsi_kick_cmd(req_vq, cmd, req_size, sizeof(cmd->resp.cmd)) != 0)
+	ret = virtscsi_kick_cmd(req_vq, cmd, req_size, sizeof(cmd->resp.cmd));
+	if (ret == -EIO) {
+		cmd->resp.cmd.response = VIRTIO_SCSI_S_BAD_TARGET;
+		spin_lock_irqsave(&req_vq->vq_lock, flags);
+		virtscsi_complete_cmd(vscsi, cmd);
+		spin_unlock_irqrestore(&req_vq->vq_lock, flags);
+	} else if (ret != 0) {
 		return SCSI_MLQUEUE_HOST_BUSY;
+	}
 	return 0;
 }
 

From 92549cdc288f47f3a98cf80ac5890c91f5876a06 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 17 Jan 2017 14:22:24 -0800
Subject: [PATCH 014/322] iwlwifi: fix kernel crash when unregistering thermal
 zone

A recent firmware change seems to have enabled thermal zones on the
iwlwifi driver. Unfortunately, my device fails when registering the
thermal zone. This doesn't stop the driver from attempting to unregister
the thermal zone at unload time, triggering a NULL pointer deference in
strlen() off the thermal_zone_device_unregister() path.

Don't unregister if name is NULL, for that case we failed registering.
Do the same for the cooling zone.

Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/intel/iwlwifi/mvm/tt.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
index 63a051be832e..bec7d9c46087 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
@@ -843,8 +843,10 @@ static void iwl_mvm_thermal_zone_unregister(struct iwl_mvm *mvm)
 		return;
 
 	IWL_DEBUG_TEMP(mvm, "Thermal zone device unregister\n");
-	thermal_zone_device_unregister(mvm->tz_device.tzone);
-	mvm->tz_device.tzone = NULL;
+	if (mvm->tz_device.tzone) {
+		thermal_zone_device_unregister(mvm->tz_device.tzone);
+		mvm->tz_device.tzone = NULL;
+	}
 }
 
 static void iwl_mvm_cooling_device_unregister(struct iwl_mvm *mvm)
@@ -853,8 +855,10 @@ static void iwl_mvm_cooling_device_unregister(struct iwl_mvm *mvm)
 		return;
 
 	IWL_DEBUG_TEMP(mvm, "Cooling device unregister\n");
-	thermal_cooling_device_unregister(mvm->cooling_dev.cdev);
-	mvm->cooling_dev.cdev = NULL;
+	if (mvm->cooling_dev.cdev) {
+		thermal_cooling_device_unregister(mvm->cooling_dev.cdev);
+		mvm->cooling_dev.cdev = NULL;
+	}
 }
 #endif /* CONFIG_THERMAL */
 

From a5badd1e97e6caeca78ad74191f12fc923c403a8 Mon Sep 17 00:00:00 2001
From: Alison Schofield <amsfield22@gmail.com>
Date: Sat, 14 Jan 2017 19:51:52 -0800
Subject: [PATCH 015/322] iio: health: afe4403: retrieve a valid iio_dev in
 suspend/resume

The suspend/resume functions were using dev_to_iio_dev() to get
the iio_dev. That only works on IIO dev's.  Replace it with spi
functions to get the correct iio_dev.

Signed-off-by: Alison Schofield <amsfield22@gmail.com>
Acked-by: Andrew F. Davis <afd@ti.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/health/afe4403.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/health/afe4403.c b/drivers/iio/health/afe4403.c
index 9a081465c42f..6bb23a49e81e 100644
--- a/drivers/iio/health/afe4403.c
+++ b/drivers/iio/health/afe4403.c
@@ -422,7 +422,7 @@ MODULE_DEVICE_TABLE(of, afe4403_of_match);
 
 static int __maybe_unused afe4403_suspend(struct device *dev)
 {
-	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	struct iio_dev *indio_dev = spi_get_drvdata(to_spi_device(dev));
 	struct afe4403_data *afe = iio_priv(indio_dev);
 	int ret;
 
@@ -443,7 +443,7 @@ static int __maybe_unused afe4403_suspend(struct device *dev)
 
 static int __maybe_unused afe4403_resume(struct device *dev)
 {
-	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	struct iio_dev *indio_dev = spi_get_drvdata(to_spi_device(dev));
 	struct afe4403_data *afe = iio_priv(indio_dev);
 	int ret;
 

From 802ecfc113df1e15af1d028427cbbe785ae9cc4a Mon Sep 17 00:00:00 2001
From: Alison Schofield <amsfield22@gmail.com>
Date: Sat, 14 Jan 2017 19:52:50 -0800
Subject: [PATCH 016/322] iio: health: afe4404: retrieve a valid iio_dev in
 suspend/resume

The suspend/resume functions were using dev_to_iio_dev() to get
the iio_dev. That only works on IIO dev's.  Replace it with i2c
functions to get the correct iio_dev.

Signed-off-by: Alison Schofield <amsfield22@gmail.com>
Acked-by: Andrew F. Davis <afd@ti.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/health/afe4404.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/health/afe4404.c b/drivers/iio/health/afe4404.c
index 45266404f7e3..964f5231a831 100644
--- a/drivers/iio/health/afe4404.c
+++ b/drivers/iio/health/afe4404.c
@@ -428,7 +428,7 @@ MODULE_DEVICE_TABLE(of, afe4404_of_match);
 
 static int __maybe_unused afe4404_suspend(struct device *dev)
 {
-	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	struct iio_dev *indio_dev = i2c_get_clientdata(to_i2c_client(dev));
 	struct afe4404_data *afe = iio_priv(indio_dev);
 	int ret;
 
@@ -449,7 +449,7 @@ static int __maybe_unused afe4404_suspend(struct device *dev)
 
 static int __maybe_unused afe4404_resume(struct device *dev)
 {
-	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	struct iio_dev *indio_dev = i2c_get_clientdata(to_i2c_client(dev));
 	struct afe4404_data *afe = iio_priv(indio_dev);
 	int ret;
 

From 828f84ee8f84710ea1818b3565add268bcb824c8 Mon Sep 17 00:00:00 2001
From: Matt Ranostay <matt@ranostay.consulting>
Date: Mon, 16 Jan 2017 18:04:18 -0800
Subject: [PATCH 017/322] iio: health: max30100: fixed parenthesis around FIFO
 count check

FIFO was being read every sample after the "almost full" state was
reached. This was due to an incorrect placement of the parenthesis
in the while condition check.

Note - the fixes tag is not actually correct, but the fix in this patch
would also be needed for it to function correctly so we'll go with that
one.  Backports should pick up both.

Signed-off-by: Matt Ranostay <matt@ranostay.consulting>
Fixes: b74fccad7 ("iio: health: max30100: correct FIFO check condition")
Cc: Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/health/max30100.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/health/max30100.c b/drivers/iio/health/max30100.c
index 90ab8a2d2846..183c14329d6e 100644
--- a/drivers/iio/health/max30100.c
+++ b/drivers/iio/health/max30100.c
@@ -238,7 +238,7 @@ static irqreturn_t max30100_interrupt_handler(int irq, void *private)
 
 	mutex_lock(&data->lock);
 
-	while (cnt || (cnt = max30100_fifo_count(data) > 0)) {
+	while (cnt || (cnt = max30100_fifo_count(data)) > 0) {
 		ret = max30100_read_measurement(data);
 		if (ret)
 			break;

From d1aaf20ee655888c227d5137b7a63551f8d15416 Mon Sep 17 00:00:00 2001
From: Alison Schofield <amsfield22@gmail.com>
Date: Mon, 16 Jan 2017 11:27:52 -0800
Subject: [PATCH 018/322] iio: adc: palmas_gpadc: retrieve a valid iio_dev in
 suspend/resume

The suspend/resume functions were using dev_to_iio_dev() to get
the iio_dev. That only works on IIO dev's.  Use dev_get_drvdata()
for a platform device to get the correct iio_dev.

Signed-off-by: Alison Schofield <amsfield22@gmail.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/adc/palmas_gpadc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/adc/palmas_gpadc.c b/drivers/iio/adc/palmas_gpadc.c
index 2bbf0c521beb..7d61b566e148 100644
--- a/drivers/iio/adc/palmas_gpadc.c
+++ b/drivers/iio/adc/palmas_gpadc.c
@@ -775,7 +775,7 @@ static int palmas_adc_wakeup_reset(struct palmas_gpadc *adc)
 
 static int palmas_gpadc_suspend(struct device *dev)
 {
-	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	struct iio_dev *indio_dev = dev_get_drvdata(dev);
 	struct palmas_gpadc *adc = iio_priv(indio_dev);
 	int wakeup = adc->wakeup1_enable || adc->wakeup2_enable;
 	int ret;
@@ -798,7 +798,7 @@ static int palmas_gpadc_suspend(struct device *dev)
 
 static int palmas_gpadc_resume(struct device *dev)
 {
-	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	struct iio_dev *indio_dev = dev_get_drvdata(dev);
 	struct palmas_gpadc *adc = iio_priv(indio_dev);
 	int wakeup = adc->wakeup1_enable || adc->wakeup2_enable;
 	int ret;

From 5c113b5e0082e90d2e1c7b12e96a7b8cf0623e27 Mon Sep 17 00:00:00 2001
From: John Brooks <john@fastquake.com>
Date: Wed, 18 Jan 2017 21:50:39 +0000
Subject: [PATCH 019/322] iio: dht11: Use usleep_range instead of msleep for
 start signal

The DHT22 (AM2302) datasheet specifies that the LOW start pulse should not
exceed 20ms. However, observations with an oscilloscope of an RPi Model 2B
(rev 1.1) communicating with a DHT22 sensor showed that the driver was
consistently sending start pulses longer than 20ms:

Kernel 4.7.10-v7+ (n=132):
    Minimum pulse length: 20.20ms
    Maximum:              29.84ms
    Mean:                 24.96ms
    StDev:                2.82ms
    Sensor response rate: 100%
    Read success rate:    76%

On kernel 4.8, the start pulse was so long that the sensor would not even
respond 97% of the time:

Kernel 4.8.16-v7+ (n=100):
    Minimum pulse length: 30.4ms
    Maximum:              74.4ms
    Mean:                 39.3ms
    StDev:                10.2ms
    Sensor response rate: 3%
    Read success rate:    3%

The driver would return ETIMEDOUT and write log messages like this:

[   51.430987] dht11 dht11@0: Only 1 signal edges detected
[   66.311019] dht11 dht11@0: Only 0 signal edges detected

Replacing msleep(18) with usleep_range(18000, 20000) made the pulse length
sane again and restored responsiveness:

Kernel 4.8.16-v7+ with usleep_range (n=123):
    Minimum pulse length: 18.16ms
    Maximum:              20.20ms
    Mean:                 19.85ms
    StDev:                0.51ms
    Sensor response rate: 100%
    Read success rate:    84%

Cc: stable@vger.kernel.org
Signed-off-by: John Brooks <john@fastquake.com>
Reviewed-by: Harald Geyer <harald@ccbib.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/humidity/dht11.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/humidity/dht11.c b/drivers/iio/humidity/dht11.c
index 9c47bc98f3ac..2a22ad920333 100644
--- a/drivers/iio/humidity/dht11.c
+++ b/drivers/iio/humidity/dht11.c
@@ -71,7 +71,8 @@
  * a) select an implementation using busy loop polling on those systems
  * b) use the checksum to do some probabilistic decoding
  */
-#define DHT11_START_TRANSMISSION	18  /* ms */
+#define DHT11_START_TRANSMISSION_MIN	18000  /* us */
+#define DHT11_START_TRANSMISSION_MAX	20000  /* us */
 #define DHT11_MIN_TIMERES	34000  /* ns */
 #define DHT11_THRESHOLD		49000  /* ns */
 #define DHT11_AMBIG_LOW		23000  /* ns */
@@ -228,7 +229,8 @@ static int dht11_read_raw(struct iio_dev *iio_dev,
 		ret = gpio_direction_output(dht11->gpio, 0);
 		if (ret)
 			goto err;
-		msleep(DHT11_START_TRANSMISSION);
+		usleep_range(DHT11_START_TRANSMISSION_MIN,
+			     DHT11_START_TRANSMISSION_MAX);
 		ret = gpio_direction_input(dht11->gpio);
 		if (ret)
 			goto err;

From 7f1931b35f0909695543a8c12f72ccd2d20ff241 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 10 Jan 2017 13:19:05 +0100
Subject: [PATCH 020/322] ARM: imx: hide unused variable in #ifdef

A bugfix added a new local variable that is only used inside of an #ifdef
section, and unused if CONFIG_PERF_EVENTS is disabled:

arch/arm/mach-imx/mmdc.c:63:25: warning: 'cpuhp_mmdc_state' defined but not used [-Wunused-variable]

This moves the variable down inside that same ifdef.

Fixes: a051f220d6b9 ("ARM/imx/mmcd: Fix broken cpu hotplug handling")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Frank Li <Frank.Li@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/mach-imx/mmdc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c
index 699157759120..c03bf28d8bbc 100644
--- a/arch/arm/mach-imx/mmdc.c
+++ b/arch/arm/mach-imx/mmdc.c
@@ -60,7 +60,6 @@
 
 #define to_mmdc_pmu(p) container_of(p, struct mmdc_pmu, pmu)
 
-static enum cpuhp_state cpuhp_mmdc_state;
 static int ddr_type;
 
 struct fsl_mmdc_devtype_data {
@@ -82,6 +81,7 @@ static const struct of_device_id imx_mmdc_dt_ids[] = {
 
 #ifdef CONFIG_PERF_EVENTS
 
+static enum cpuhp_state cpuhp_mmdc_state;
 static DEFINE_IDA(mmdc_ida);
 
 PMU_EVENT_ATTR_STRING(total-cycles, mmdc_pmu_total_cycles, "event=0x00")

From c26665ab5c49ad3e142e0f054ca3204f259ba09c Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Fri, 20 Jan 2017 21:29:40 +0100
Subject: [PATCH 021/322] x86/microcode/intel: Drop stashed AP patch pointer
 optimization

This was meant to save us the scanning of the microcode containter in
the initrd since the first AP had already done that but it can also hurt
us:

Imagine a single hyperthreaded CPU (Intel(R) Atom(TM) CPU N270, for
example) which updates the microcode on the BSP but since the microcode
engine is shared between the two threads, the update on CPU1 doesn't
happen because it has already happened on CPU0 and we don't find a newer
microcode revision on CPU1.

Which doesn't set the intel_ucode_patch pointer and at initrd
jettisoning time we don't save the microcode patch for later
application.

Now, when we suspend to RAM, the loaded microcode gets cleared so we
need to reload but there's no patch saved in the cache.

Removing the optimization fixes this issue and all is fine and dandy.

Fixes: 06b8534cb728 ("x86/microcode: Rework microcode loading")
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170120202955.4091-2-bp@alien8.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/microcode/intel.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 3f329b74e040..8325d8a09ab0 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -41,7 +41,7 @@
 
 static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin";
 
-/* Current microcode patch used in early patching */
+/* Current microcode patch used in early patching on the APs. */
 struct microcode_intel *intel_ucode_patch;
 
 static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1,
@@ -607,12 +607,6 @@ int __init save_microcode_in_initrd_intel(void)
 	struct ucode_cpu_info uci;
 	struct cpio_data cp;
 
-	/*
-	 * AP loading didn't find any microcode patch, no need to save anything.
-	 */
-	if (!intel_ucode_patch || IS_ERR(intel_ucode_patch))
-		return 0;
-
 	if (!load_builtin_intel_microcode(&cp))
 		cp = find_microcode_in_initrd(ucode_path, false);
 
@@ -628,7 +622,6 @@ int __init save_microcode_in_initrd_intel(void)
 	return 0;
 }
 
-
 /*
  * @res_patch, output: a pointer to the patch we found.
  */

From 4c833368f0bf748d4147bf301b1f95bc8eccb3c0 Mon Sep 17 00:00:00 2001
From: Kevin Hao <haokexin@gmail.com>
Date: Sun, 22 Jan 2017 16:50:23 +0800
Subject: [PATCH 022/322] x86/fpu: Set the xcomp_bv when we fake up a XSAVES
 area

I got the following calltrace on a Apollo Lake SoC with 32-bit kernel:

  WARNING: CPU: 2 PID: 261 at arch/x86/include/asm/fpu/internal.h:363 fpu__restore+0x1f5/0x260
  [...]
  Hardware name: Intel Corp. Broxton P/NOTEBOOK, BIOS APLIRVPA.X64.0138.B35.1608091058 08/09/2016
  Call Trace:
   dump_stack()
   __warn()
   ? fpu__restore()
   warn_slowpath_null()
   fpu__restore()
   __fpu__restore_sig()
   fpu__restore_sig()
   restore_sigcontext.isra.9()
   sys_sigreturn()
   do_int80_syscall_32()
   entry_INT80_32()

The reason is that a #GP occurs when executing XRSTORS. The root cause
is that we forget to set the xcomp_bv when we fake up the XSAVES area
in the copyin_to_xsaves() function.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@intel.com>
Link: http://lkml.kernel.org/r/1485075023-30161-1-git-send-email-haokexin@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/fpu/xstate.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 1d7770447b3e..e287b9075527 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1070,6 +1070,7 @@ int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
 	 * Add back in the features that came in from userspace:
 	 */
 	xsave->header.xfeatures |= xfeatures;
+	xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xsave->header.xfeatures;
 
 	return 0;
 }

From 7f59b319111bbc3a5f32730c8a43b201e9522f52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Szymanski?=
 <sebastien.szymanski@armadeus.com>
Date: Wed, 18 Jan 2017 11:09:51 +0100
Subject: [PATCH 023/322] ARM: dts: imx6dl: fix GPIO4 range
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GPIO4_11 is on pin 152(MX6DL_PAD_KEY_ROW2) and not on pin
151(MX6DL_PAD_KEY_ROW1).

I found the error while booting a mainline kernel on APF6S SoM and
noticed the following message:

[    2.609337] imx6dl-pinctrl 20e0000.iomuxc: pin MX6DL_PAD_KEY_ROW1
already requested by 20a8000.gpio:105; cannot claim for 20a8000.gpio:107
[    2.621884] imx6dl-pinctrl 20e0000.iomuxc: pin-151 (20a8000.gpio:107)
status -22
[    2.629303] spi_imx 2008000.ecspi: Can't get CS GPIO 107

With this patch, the message is gone and spi_imx driver probes correctly.

Fixes: bb728d662bed ("ARM: dts: add gpio-ranges property to iMX GPIO controllers")
Signed-off-by: Sébastien Szymanski <sebastien.szymanski@armadeus.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx6dl.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/imx6dl.dtsi b/arch/arm/boot/dts/imx6dl.dtsi
index 1ade1951e620..7aa120fbdc71 100644
--- a/arch/arm/boot/dts/imx6dl.dtsi
+++ b/arch/arm/boot/dts/imx6dl.dtsi
@@ -137,7 +137,7 @@ &gpio3 {
 &gpio4 {
 	gpio-ranges = <&iomuxc  5 136 1>, <&iomuxc  6 145 1>, <&iomuxc  7 150 1>,
 		      <&iomuxc  8 146 1>, <&iomuxc  9 151 1>, <&iomuxc 10 147 1>,
-		      <&iomuxc 11 151 1>, <&iomuxc 12 148 1>, <&iomuxc 13 153 1>,
+		      <&iomuxc 11 152 1>, <&iomuxc 12 148 1>, <&iomuxc 13 153 1>,
 		      <&iomuxc 14 149 1>, <&iomuxc 15 154 1>, <&iomuxc 16  39 7>,
 		      <&iomuxc 23  56 1>, <&iomuxc 24  61 7>, <&iomuxc 31  46 1>;
 };

From 7941c59e45f3b6d30e07375e9b6713427e0a9f98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=BCrg=20Billeter?= <j@bitron.ch>
Date: Mon, 10 Oct 2016 18:30:01 +0200
Subject: [PATCH 024/322] iwlwifi: fix double hyphen in MODULE_FIRMWARE for
 8000
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mistakenly, the driver is trying to load the 8000C firmware with an
incorrect name (i.e. with two hyphens where there should be only one)
and that fails.  Fix that by removing the hyphen from the format
macro.

Fixes: e1ba684f762b ("iwlwifi: 8000: fix MODULE_FIRMWARE input")
Signed-off-by: Jürg Billeter <j@bitron.ch>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/iwl-8000.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c
index d02ca1491d16..8d3e53fac1da 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c
@@ -91,7 +91,7 @@
 
 #define IWL8000_FW_PRE "iwlwifi-8000C-"
 #define IWL8000_MODULE_FIRMWARE(api) \
-	IWL8000_FW_PRE "-" __stringify(api) ".ucode"
+	IWL8000_FW_PRE __stringify(api) ".ucode"
 
 #define IWL8265_FW_PRE "iwlwifi-8265-"
 #define IWL8265_MODULE_FIRMWARE(api) \

From 03c902bff524e0cf664737a33f2365f7837040bf Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 2 Dec 2016 12:03:36 +0100
Subject: [PATCH 025/322] iwlwifi: mvm: avoid crash on restart w/o reserved
 queues

When the firmware restarts in a situation in which any station
has no queue reserved anymore because that queue was used, the
code will crash trying to access the queue_info array at the
offset 255, which is far too big. Fix this by checking that a
queue is actually reserved before writing its status.

Fixes: 8d98ae6eb0d5 ("iwlwifi: mvm: re-assign old queues after hw restart in dqa mode")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index 636c8b03e318..09e9e2e3ed04 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -1164,9 +1164,10 @@ static void iwl_mvm_realloc_queues_after_restart(struct iwl_mvm *mvm,
 		.frame_limit = IWL_FRAME_LIMIT,
 	};
 
-	/* Make sure reserved queue is still marked as such (or allocated) */
-	mvm->queue_info[mvm_sta->reserved_queue].status =
-		IWL_MVM_QUEUE_RESERVED;
+	/* Make sure reserved queue is still marked as such (if allocated) */
+	if (mvm_sta->reserved_queue != IEEE80211_INVAL_HW_QUEUE)
+		mvm->queue_info[mvm_sta->reserved_queue].status =
+			IWL_MVM_QUEUE_RESERVED;
 
 	for (i = 0; i <= IWL_MAX_TID_COUNT; i++) {
 		struct iwl_mvm_tid_data *tid_data = &mvm_sta->tid_data[i];

From d6040764adcb5cb6de1489422411d701c158bb69 Mon Sep 17 00:00:00 2001
From: Salvatore Benedetto <salvatore.benedetto@intel.com>
Date: Fri, 13 Jan 2017 11:54:08 +0000
Subject: [PATCH 026/322] crypto: api - Clear CRYPTO_ALG_DEAD bit before
 registering an alg

Make sure CRYPTO_ALG_DEAD bit is cleared before proceeding with
the algorithm registration. This fixes qat-dh registration when
driver is restarted

Cc: <stable@vger.kernel.org>
Signed-off-by: Salvatore Benedetto <salvatore.benedetto@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algapi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crypto/algapi.c b/crypto/algapi.c
index df939b54b09f..1fad2a6b3bbb 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -356,6 +356,7 @@ int crypto_register_alg(struct crypto_alg *alg)
 	struct crypto_larval *larval;
 	int err;
 
+	alg->cra_flags &= ~CRYPTO_ALG_DEAD;
 	err = crypto_check_alg(alg);
 	if (err)
 		return err;

From 11e3b725cfc282efe9d4a354153e99d86a16af08 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 17 Jan 2017 13:46:29 +0000
Subject: [PATCH 027/322] crypto: arm64/aes-blk - honour iv_out requirement in
 CBC and CTR modes

Update the ARMv8 Crypto Extensions and the plain NEON AES implementations
in CBC and CTR modes to return the next IV back to the skcipher API client.
This is necessary for chaining to work correctly.

Note that for CTR, this is only done if the request is a round multiple of
the block size, since otherwise, chaining is impossible anyway.

Cc: <stable@vger.kernel.org> # v3.16+
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/aes-modes.S | 90 +++++++++++++++++------------------
 1 file changed, 43 insertions(+), 47 deletions(-)

diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index c53dbeae79f2..838dad5c209f 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -193,15 +193,16 @@ AES_ENTRY(aes_cbc_encrypt)
 	cbz		w6, .Lcbcencloop
 
 	ld1		{v0.16b}, [x5]			/* get iv */
-	enc_prepare	w3, x2, x5
+	enc_prepare	w3, x2, x6
 
 .Lcbcencloop:
 	ld1		{v1.16b}, [x1], #16		/* get next pt block */
 	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with iv */
-	encrypt_block	v0, w3, x2, x5, w6
+	encrypt_block	v0, w3, x2, x6, w7
 	st1		{v0.16b}, [x0], #16
 	subs		w4, w4, #1
 	bne		.Lcbcencloop
+	st1		{v0.16b}, [x5]			/* return iv */
 	ret
 AES_ENDPROC(aes_cbc_encrypt)
 
@@ -211,7 +212,7 @@ AES_ENTRY(aes_cbc_decrypt)
 	cbz		w6, .LcbcdecloopNx
 
 	ld1		{v7.16b}, [x5]			/* get iv */
-	dec_prepare	w3, x2, x5
+	dec_prepare	w3, x2, x6
 
 .LcbcdecloopNx:
 #if INTERLEAVE >= 2
@@ -248,7 +249,7 @@ AES_ENTRY(aes_cbc_decrypt)
 .Lcbcdecloop:
 	ld1		{v1.16b}, [x1], #16		/* get next ct block */
 	mov		v0.16b, v1.16b			/* ...and copy to v0 */
-	decrypt_block	v0, w3, x2, x5, w6
+	decrypt_block	v0, w3, x2, x6, w7
 	eor		v0.16b, v0.16b, v7.16b		/* xor with iv => pt */
 	mov		v7.16b, v1.16b			/* ct is next iv */
 	st1		{v0.16b}, [x0], #16
@@ -256,6 +257,7 @@ AES_ENTRY(aes_cbc_decrypt)
 	bne		.Lcbcdecloop
 .Lcbcdecout:
 	FRAME_POP
+	st1		{v7.16b}, [x5]			/* return iv */
 	ret
 AES_ENDPROC(aes_cbc_decrypt)
 
@@ -267,24 +269,15 @@ AES_ENDPROC(aes_cbc_decrypt)
 
 AES_ENTRY(aes_ctr_encrypt)
 	FRAME_PUSH
-	cbnz		w6, .Lctrfirst		/* 1st time around? */
-	umov		x5, v4.d[1]		/* keep swabbed ctr in reg */
-	rev		x5, x5
-#if INTERLEAVE >= 2
-	cmn		w5, w4			/* 32 bit overflow? */
-	bcs		.Lctrinc
-	add		x5, x5, #1		/* increment BE ctr */
-	b		.LctrincNx
-#else
-	b		.Lctrinc
-#endif
-.Lctrfirst:
+	cbz		w6, .Lctrnotfirst	/* 1st time around? */
 	enc_prepare	w3, x2, x6
 	ld1		{v4.16b}, [x5]
-	umov		x5, v4.d[1]		/* keep swabbed ctr in reg */
-	rev		x5, x5
+
+.Lctrnotfirst:
+	umov		x8, v4.d[1]		/* keep swabbed ctr in reg */
+	rev		x8, x8
 #if INTERLEAVE >= 2
-	cmn		w5, w4			/* 32 bit overflow? */
+	cmn		w8, w4			/* 32 bit overflow? */
 	bcs		.Lctrloop
 .LctrloopNx:
 	subs		w4, w4, #INTERLEAVE
@@ -292,11 +285,11 @@ AES_ENTRY(aes_ctr_encrypt)
 #if INTERLEAVE == 2
 	mov		v0.8b, v4.8b
 	mov		v1.8b, v4.8b
-	rev		x7, x5
-	add		x5, x5, #1
+	rev		x7, x8
+	add		x8, x8, #1
 	ins		v0.d[1], x7
-	rev		x7, x5
-	add		x5, x5, #1
+	rev		x7, x8
+	add		x8, x8, #1
 	ins		v1.d[1], x7
 	ld1		{v2.16b-v3.16b}, [x1], #32	/* get 2 input blocks */
 	do_encrypt_block2x
@@ -305,7 +298,7 @@ AES_ENTRY(aes_ctr_encrypt)
 	st1		{v0.16b-v1.16b}, [x0], #32
 #else
 	ldr		q8, =0x30000000200000001	/* addends 1,2,3[,0] */
-	dup		v7.4s, w5
+	dup		v7.4s, w8
 	mov		v0.16b, v4.16b
 	add		v7.4s, v7.4s, v8.4s
 	mov		v1.16b, v4.16b
@@ -323,18 +316,12 @@ AES_ENTRY(aes_ctr_encrypt)
 	eor		v2.16b, v7.16b, v2.16b
 	eor		v3.16b, v5.16b, v3.16b
 	st1		{v0.16b-v3.16b}, [x0], #64
-	add		x5, x5, #INTERLEAVE
+	add		x8, x8, #INTERLEAVE
 #endif
-	cbz		w4, .LctroutNx
-.LctrincNx:
-	rev		x7, x5
+	rev		x7, x8
 	ins		v4.d[1], x7
+	cbz		w4, .Lctrout
 	b		.LctrloopNx
-.LctroutNx:
-	sub		x5, x5, #1
-	rev		x7, x5
-	ins		v4.d[1], x7
-	b		.Lctrout
 .Lctr1x:
 	adds		w4, w4, #INTERLEAVE
 	beq		.Lctrout
@@ -342,30 +329,39 @@ AES_ENTRY(aes_ctr_encrypt)
 .Lctrloop:
 	mov		v0.16b, v4.16b
 	encrypt_block	v0, w3, x2, x6, w7
+
+	adds		x8, x8, #1		/* increment BE ctr */
+	rev		x7, x8
+	ins		v4.d[1], x7
+	bcs		.Lctrcarry		/* overflow? */
+
+.Lctrcarrydone:
 	subs		w4, w4, #1
 	bmi		.Lctrhalfblock		/* blocks < 0 means 1/2 block */
 	ld1		{v3.16b}, [x1], #16
 	eor		v3.16b, v0.16b, v3.16b
 	st1		{v3.16b}, [x0], #16
-	beq		.Lctrout
-.Lctrinc:
-	adds		x5, x5, #1		/* increment BE ctr */
-	rev		x7, x5
-	ins		v4.d[1], x7
-	bcc		.Lctrloop		/* no overflow? */
+	bne		.Lctrloop
+
+.Lctrout:
+	st1		{v4.16b}, [x5]		/* return next CTR value */
+	FRAME_POP
+	ret
+
+.Lctrhalfblock:
+	ld1		{v3.8b}, [x1]
+	eor		v3.8b, v0.8b, v3.8b
+	st1		{v3.8b}, [x0]
+	FRAME_POP
+	ret
+
+.Lctrcarry:
 	umov		x7, v4.d[0]		/* load upper word of ctr  */
 	rev		x7, x7			/* ... to handle the carry */
 	add		x7, x7, #1
 	rev		x7, x7
 	ins		v4.d[0], x7
-	b		.Lctrloop
-.Lctrhalfblock:
-	ld1		{v3.8b}, [x1]
-	eor		v3.8b, v0.8b, v3.8b
-	st1		{v3.8b}, [x0]
-.Lctrout:
-	FRAME_POP
-	ret
+	b		.Lctrcarrydone
 AES_ENDPROC(aes_ctr_encrypt)
 	.ltorg
 

From d0e287a401d9acf67b75180b26e2d62b7d482652 Mon Sep 17 00:00:00 2001
From: Rask Ingemann Lambertsen <rask@formelder.dk>
Date: Sat, 21 Jan 2017 17:11:43 +0100
Subject: [PATCH 028/322] regulator: axp20x: AXP806: Fix dcdcb being set
 instead of dcdce

A typo or copy-paste bug means that the register access intended for
regulator dcdce goes to dcdcb instead. This patch corrects it.

Fixes: 2ca342d391e3 (regulator: axp20x: Support AXP806 variant)
Signed-off-by: Rask Ingemann Lambertsen <rask@formelder.dk>
Acked-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org
---
 drivers/regulator/axp20x-regulator.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c
index e6a512ebeae2..a3ade9e4ef47 100644
--- a/drivers/regulator/axp20x-regulator.c
+++ b/drivers/regulator/axp20x-regulator.c
@@ -272,7 +272,7 @@ static const struct regulator_desc axp806_regulators[] = {
 			64, AXP806_DCDCD_V_CTRL, 0x3f, AXP806_PWR_OUT_CTRL1,
 			BIT(3)),
 	AXP_DESC(AXP806, DCDCE, "dcdce", "vine", 1100, 3400, 100,
-		 AXP806_DCDCB_V_CTRL, 0x1f, AXP806_PWR_OUT_CTRL1, BIT(4)),
+		 AXP806_DCDCE_V_CTRL, 0x1f, AXP806_PWR_OUT_CTRL1, BIT(4)),
 	AXP_DESC(AXP806, ALDO1, "aldo1", "aldoin", 700, 3300, 100,
 		 AXP806_ALDO1_V_CTRL, 0x1f, AXP806_PWR_OUT_CTRL1, BIT(5)),
 	AXP_DESC(AXP806, ALDO2, "aldo2", "aldoin", 700, 3400, 100,

From 8ac092519ad91931c96d306c4bfae2c6587c325f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Mon, 23 Jan 2017 22:44:12 -0500
Subject: [PATCH 029/322] NFSv4.1: Fix a deadlock in layoutget

We cannot call nfs4_handle_exception() without first ensuring that the
slot has been freed. If not, we end up deadlocking with the process
waiting for recovery to complete, and recovery waiting for the slot
table to drain.

Fixes: 2e80dbe7ac51 ("NFSv4.1: Close callback races for OPEN, LAYOUTGET...")
Cc: stable@vger.kernel.org # v4.8+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ecc151697fd4..59bb574d7d7c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -8490,6 +8490,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
 		goto out;
 	}
 
+	nfs4_sequence_free_slot(&lgp->res.seq_res);
 	err = nfs4_handle_exception(server, nfs4err, exception);
 	if (!status) {
 		if (exception->retry)

From af2b7fa17eb92e52b65f96604448ff7a2a89ee99 Mon Sep 17 00:00:00 2001
From: Darren Stevens <darren@stevens-zone.net>
Date: Mon, 23 Jan 2017 19:42:54 +0000
Subject: [PATCH 030/322] powerpc: Add missing error check to
 prom_find_boot_cpu()

prom_init.c calls 'instance-to-package' twice, but the return
is not checked during prom_find_boot_cpu(). The result is then
passed to prom_getprop(), which could be PROM_ERROR. Add a return check
to prevent this.

This was found on a pasemi system, where CFE doesn't have a working
'instance-to package' prom call.

Before Commit 5c0484e25ec0 ('powerpc: Endian safe trampoline') the area
around addr 0 was mostly 0's and this doesn't cause a problem. Once the
macro 'FIXUP_ENDIAN' has been added to head_64.S, the low memory area
now has non-zero values, which cause the prom_getprop() call
to hang.

mpe: Also confirmed that under SLOF if 'instance-to-package' did fail
with PROM_ERROR we would crash in SLOF. So the bug is not specific to
CFE, it's just that other open firmwares don't trigger it because they
have a working 'instance-to-package'.

Fixes: 5c0484e25ec0 ("powerpc: Endian safe trampoline")
Cc: stable@vger.kernel.org # v3.13+
Signed-off-by: Darren Stevens <darren@stevens-zone.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/prom_init.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index ec47a939cbdd..ac83eb04a8b8 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2834,6 +2834,9 @@ static void __init prom_find_boot_cpu(void)
 
 	cpu_pkg = call_prom("instance-to-package", 1, 1, prom_cpu);
 
+	if (!PHANDLE_VALID(cpu_pkg))
+		return;
+
 	prom_getprop(cpu_pkg, "reg", &rval, sizeof(rval));
 	prom.cpu = be32_to_cpu(rval);
 

From dffba9a31c7769be3231c420d4b364c92ba3f1ac Mon Sep 17 00:00:00 2001
From: Yu-cheng Yu <yu-cheng.yu@intel.com>
Date: Mon, 23 Jan 2017 14:54:44 -0800
Subject: [PATCH 031/322] x86/fpu/xstate: Fix xcomp_bv in XSAVES header

The compacted-format XSAVES area is determined at boot time and
never changed after.  The field xsave.header.xcomp_bv indicates
which components are in the fixed XSAVES format.

In fpstate_init() we did not set xcomp_bv to reflect the XSAVES
format since at the time there is no valid data.

However, after we do copy_init_fpstate_to_fpregs() in fpu__clear(),
as in commit:

  b22cbe404a9c x86/fpu: Fix invalid FPU ptrace state after execve()

and when __fpu_restore_sig() does fpu__restore() for a COMPAT-mode
app, a #GP occurs.  This can be easily triggered by doing valgrind on
a COMPAT-mode "Hello World," as reported by Joakim Tjernlund and
others:

	https://bugzilla.kernel.org/show_bug.cgi?id=190061

Fix it by setting xcomp_bv correctly.

This patch also moves the xcomp_bv initialization to the proper
place, which was in copyin_to_xsaves() as of:

  4c833368f0bf x86/fpu: Set the xcomp_bv when we fake up a XSAVES area

which fixed the bug too, but it's more efficient and cleaner to
initialize things once per boot, not for every signal handling
operation.

Reported-by: Kevin Hao <haokexin@gmail.com>
Reported-by: Joakim Tjernlund <Joakim.Tjernlund@infinera.com>
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi V. Shankar <ravi.v.shankar@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: haokexin@gmail.com
Link: http://lkml.kernel.org/r/1485212084-4418-1-git-send-email-yu-cheng.yu@intel.com
[ Combined it with 4c833368f0bf. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/core.c   | 4 +++-
 arch/x86/kernel/fpu/xstate.c | 1 -
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index e4e97a5355ce..de7234401275 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -9,6 +9,7 @@
 #include <asm/fpu/regset.h>
 #include <asm/fpu/signal.h>
 #include <asm/fpu/types.h>
+#include <asm/fpu/xstate.h>
 #include <asm/traps.h>
 
 #include <linux/hardirq.h>
@@ -183,7 +184,8 @@ void fpstate_init(union fpregs_state *state)
 	 * it will #GP. Make sure it is replaced after the memset().
 	 */
 	if (static_cpu_has(X86_FEATURE_XSAVES))
-		state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT;
+		state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT |
+					       xfeatures_mask;
 
 	if (static_cpu_has(X86_FEATURE_FXSR))
 		fpstate_init_fxstate(&state->fxsave);
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index e287b9075527..1d7770447b3e 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1070,7 +1070,6 @@ int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
 	 * Add back in the features that came in from userspace:
 	 */
 	xsave->header.xfeatures |= xfeatures;
-	xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xsave->header.xfeatures;
 
 	return 0;
 }

From f05fea5b3574a5926c53865eea27139bb40b2f2b Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Thu, 19 Jan 2017 10:10:16 +1100
Subject: [PATCH 032/322] powerpc/eeh: Fix wrong flag passed to
 eeh_unfreeze_pe()

In __eeh_clear_pe_frozen_state(), we should pass the flag's value
instead of its address to eeh_unfreeze_pe(). The isolated flag is
cleared if no error returned from __eeh_clear_pe_frozen_state(). We
never observed the error from the function. So the isolated flag should
have been always cleared, no real issue is caused because of the misused
@flag.

This fixes the code by passing the value of @flag to eeh_unfreeze_pe().

Fixes: 5cfb20b96f6 ("powerpc/eeh: Emulate EEH recovery for VFIO devices")
Cc: stable@vger.kernel.org # v3.18+
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/eeh_driver.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index d88573bdd090..b94887165a10 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -545,7 +545,7 @@ static void *eeh_pe_detach_dev(void *data, void *userdata)
 static void *__eeh_clear_pe_frozen_state(void *data, void *flag)
 {
 	struct eeh_pe *pe = (struct eeh_pe *)data;
-	bool *clear_sw_state = flag;
+	bool clear_sw_state = *(bool *)flag;
 	int i, rc = 1;
 
 	for (i = 0; rc && i < 3; i++)

From f2574030b0e33263b8a1c28fa3c4fa9292283799 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 24 Jan 2017 21:37:20 +1100
Subject: [PATCH 033/322] powerpc: Revert the initial stack protector support

Unfortunately the stack protector support we merged recently only works
on some toolchains. If the toolchain is built without glibc support
everything works fine, but if glibc is built then it leads to a panic
at boot.

The solution is not rc5 material, so revert the support for now. This
reverts commits:

6533b7c16ee5 ("powerpc: Initial stack protector (-fstack-protector) support")
902e06eb86cd ("powerpc/32: Change the stack protector canary value per task")

Fixes: 6533b7c16ee5 ("powerpc: Initial stack protector (-fstack-protector) support")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Kconfig                      |  1 -
 arch/powerpc/include/asm/stackprotector.h | 40 -----------------------
 arch/powerpc/kernel/Makefile              |  4 ---
 arch/powerpc/kernel/asm-offsets.c         |  3 --
 arch/powerpc/kernel/entry_32.S            |  6 +---
 arch/powerpc/kernel/process.c             |  6 ----
 6 files changed, 1 insertion(+), 59 deletions(-)
 delete mode 100644 arch/powerpc/include/asm/stackprotector.h

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a8ee573fe610..a46d1c0d14d3 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -164,7 +164,6 @@ config PPC
 	select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE
 	select HAVE_ARCH_HARDENED_USERCOPY
 	select HAVE_KERNEL_GZIP
-	select HAVE_CC_STACKPROTECTOR
 
 config GENERIC_CSUM
 	def_bool CPU_LITTLE_ENDIAN
diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h
deleted file mode 100644
index 6720190eabec..000000000000
--- a/arch/powerpc/include/asm/stackprotector.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * GCC stack protector support.
- *
- * Stack protector works by putting predefined pattern at the start of
- * the stack frame and verifying that it hasn't been overwritten when
- * returning from the function.  The pattern is called stack canary
- * and gcc expects it to be defined by a global variable called
- * "__stack_chk_guard" on PPC.  This unfortunately means that on SMP
- * we cannot have a different canary value per task.
- */
-
-#ifndef _ASM_STACKPROTECTOR_H
-#define _ASM_STACKPROTECTOR_H
-
-#include <linux/random.h>
-#include <linux/version.h>
-#include <asm/reg.h>
-
-extern unsigned long __stack_chk_guard;
-
-/*
- * Initialize the stackprotector canary value.
- *
- * NOTE: this must only be called from functions that never return,
- * and it must always be inlined.
- */
-static __always_inline void boot_init_stack_canary(void)
-{
-	unsigned long canary;
-
-	/* Try to get a semi random initial value. */
-	get_random_bytes(&canary, sizeof(canary));
-	canary ^= mftb();
-	canary ^= LINUX_VERSION_CODE;
-
-	current->stack_canary = canary;
-	__stack_chk_guard = current->stack_canary;
-}
-
-#endif	/* _ASM_STACKPROTECTOR_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 23f8082d7bfa..f4c2b52e58b3 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -19,10 +19,6 @@ CFLAGS_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
 CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
 CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
 
-# -fstack-protector triggers protection checks in this code,
-# but it is being used too early to link to meaningful stack_chk logic.
-CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector)
-
 ifdef CONFIG_FUNCTION_TRACER
 # Do not trace early boot code
 CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 0601e6a7297c..195a9fc8f81c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -91,9 +91,6 @@ int main(void)
 	DEFINE(TI_livepatch_sp, offsetof(struct thread_info, livepatch_sp));
 #endif
 
-#ifdef CONFIG_CC_STACKPROTECTOR
-	DEFINE(TSK_STACK_CANARY, offsetof(struct task_struct, stack_canary));
-#endif
 	DEFINE(KSP, offsetof(struct thread_struct, ksp));
 	DEFINE(PT_REGS, offsetof(struct thread_struct, regs));
 #ifdef CONFIG_BOOKE
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 5742dbdbee46..3841d749a430 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -674,11 +674,7 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_SPEFSCR,r0		/* restore SPEFSCR reg */
 END_FTR_SECTION_IFSET(CPU_FTR_SPE)
 #endif /* CONFIG_SPE */
-#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
-	lwz	r0,TSK_STACK_CANARY(r2)
-	lis	r4,__stack_chk_guard@ha
-	stw	r0,__stack_chk_guard@l(r4)
-#endif
+
 	lwz	r0,_CCR(r1)
 	mtcrf	0xFF,r0
 	/* r3-r12 are destroyed -- Cort */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 04885cec24df..5dd056df0baa 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -64,12 +64,6 @@
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
 
-#ifdef CONFIG_CC_STACKPROTECTOR
-#include <linux/stackprotector.h>
-unsigned long __stack_chk_guard __read_mostly;
-EXPORT_SYMBOL(__stack_chk_guard);
-#endif
-
 /* Transactional Memory debug */
 #ifdef TM_DEBUG_SW
 #define TM_DEBUG(x...) printk(KERN_INFO x)

From 5d03a2fd2292e71936c4235885c35ccc3c94695b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Tue, 24 Jan 2017 10:31:18 +0100
Subject: [PATCH 034/322] USB: serial: option: add device ID for HP lt2523
 (Novatel E371)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Yet another laptop vendor rebranded Novatel E371.

Cc: stable@vger.kernel.org
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/option.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 7ce31a4c7e7f..42cc72e54c05 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -2007,6 +2007,7 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD200, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_6802, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD300, 0xff, 0xff, 0xff) },
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x421d, 0xff, 0xff, 0xff) }, /* HP lt2523 (Novatel E371) */
 	{ } /* Terminating entry */
 };
 MODULE_DEVICE_TABLE(usb, option_ids);

From a971c5545c3d45a1e33fda6e57913bb75aaa20c9 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Mon, 23 Jan 2017 14:54:10 -0200
Subject: [PATCH 035/322] ARM: dts: imx: Pass 'chosen' and 'memory' nodes

Commit 7f107887d199 ("ARM: dts: imx: Remove skeleton.dtsi") causes boot
issues when the bootloader does not create a 'chosen' node if such node
is not present in the dtb.

The reason for the boot failure is well explained by Javier Martinez
Canillas: "the decompressor relies on a pre-existing chosen node to be
available to insert the command line and merge other ATAGS info."

, so pass an empty 'chosen' node to fix the boot problem.

This issue has been seen in the kernelci reports with Barebox as
bootloader.

Also pass the 'memory' node in order to fix boot issues on the SolidRun
iMX6 platforms.

Fixes: 7f107887d199 ("ARM: dts: imx: Remove skeleton.dtsi")
Reported-by: kernelci.org bot <bot@kernelci.org>
Reported-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx1.dtsi    | 8 ++++++++
 arch/arm/boot/dts/imx23.dtsi   | 8 ++++++++
 arch/arm/boot/dts/imx25.dtsi   | 8 ++++++++
 arch/arm/boot/dts/imx27.dtsi   | 8 ++++++++
 arch/arm/boot/dts/imx28.dtsi   | 8 ++++++++
 arch/arm/boot/dts/imx31.dtsi   | 8 ++++++++
 arch/arm/boot/dts/imx35.dtsi   | 8 ++++++++
 arch/arm/boot/dts/imx50.dtsi   | 8 ++++++++
 arch/arm/boot/dts/imx51.dtsi   | 8 ++++++++
 arch/arm/boot/dts/imx53.dtsi   | 8 ++++++++
 arch/arm/boot/dts/imx6qdl.dtsi | 8 ++++++++
 arch/arm/boot/dts/imx6sl.dtsi  | 8 ++++++++
 arch/arm/boot/dts/imx6sx.dtsi  | 8 ++++++++
 arch/arm/boot/dts/imx6ul.dtsi  | 8 ++++++++
 arch/arm/boot/dts/imx7s.dtsi   | 8 ++++++++
 15 files changed, 120 insertions(+)

diff --git a/arch/arm/boot/dts/imx1.dtsi b/arch/arm/boot/dts/imx1.dtsi
index b792eee3899b..2ee40bc9ec21 100644
--- a/arch/arm/boot/dts/imx1.dtsi
+++ b/arch/arm/boot/dts/imx1.dtsi
@@ -18,6 +18,14 @@
 / {
 	#address-cells = <1>;
 	#size-cells = <1>;
+	/*
+	 * The decompressor and also some bootloaders rely on a
+	 * pre-existing /chosen node to be available to insert the
+	 * command line and merge other ATAGS info.
+	 * Also for U-Boot there must be a pre-existing /memory node.
+	 */
+	chosen {};
+	memory { device_type = "memory"; reg = <0 0>; };
 
 	aliases {
 		gpio0 = &gpio1;
diff --git a/arch/arm/boot/dts/imx23.dtsi b/arch/arm/boot/dts/imx23.dtsi
index ac2a9da62b6c..43ccbbf754a3 100644
--- a/arch/arm/boot/dts/imx23.dtsi
+++ b/arch/arm/boot/dts/imx23.dtsi
@@ -16,6 +16,14 @@ / {
 	#size-cells = <1>;
 
 	interrupt-parent = <&icoll>;
+	/*
+	 * The decompressor and also some bootloaders rely on a
+	 * pre-existing /chosen node to be available to insert the
+	 * command line and merge other ATAGS info.
+	 * Also for U-Boot there must be a pre-existing /memory node.
+	 */
+	chosen {};
+	memory { device_type = "memory"; reg = <0 0>; };
 
 	aliases {
 		gpio0 = &gpio0;
diff --git a/arch/arm/boot/dts/imx25.dtsi b/arch/arm/boot/dts/imx25.dtsi
index 831d09a28155..acd475659156 100644
--- a/arch/arm/boot/dts/imx25.dtsi
+++ b/arch/arm/boot/dts/imx25.dtsi
@@ -14,6 +14,14 @@
 / {
 	#address-cells = <1>;
 	#size-cells = <1>;
+	/*
+	 * The decompressor and also some bootloaders rely on a
+	 * pre-existing /chosen node to be available to insert the
+	 * command line and merge other ATAGS info.
+	 * Also for U-Boot there must be a pre-existing /memory node.
+	 */
+	chosen {};
+	memory { device_type = "memory"; reg = <0 0>; };
 
 	aliases {
 		ethernet0 = &fec;
diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi
index 9d8b5969ee3b..b397384248f4 100644
--- a/arch/arm/boot/dts/imx27.dtsi
+++ b/arch/arm/boot/dts/imx27.dtsi
@@ -19,6 +19,14 @@
 / {
 	#address-cells = <1>;
 	#size-cells = <1>;
+	/*
+	 * The decompressor and also some bootloaders rely on a
+	 * pre-existing /chosen node to be available to insert the
+	 * command line and merge other ATAGS info.
+	 * Also for U-Boot there must be a pre-existing /memory node.
+	 */
+	chosen {};
+	memory { device_type = "memory"; reg = <0 0>; };
 
 	aliases {
 		ethernet0 = &fec;
diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi
index 3aabf65a6a52..d6a2190b60ef 100644
--- a/arch/arm/boot/dts/imx28.dtsi
+++ b/arch/arm/boot/dts/imx28.dtsi
@@ -17,6 +17,14 @@ / {
 	#size-cells = <1>;
 
 	interrupt-parent = <&icoll>;
+	/*
+	 * The decompressor and also some bootloaders rely on a
+	 * pre-existing /chosen node to be available to insert the
+	 * command line and merge other ATAGS info.
+	 * Also for U-Boot there must be a pre-existing /memory node.
+	 */
+	chosen {};
+	memory { device_type = "memory"; reg = <0 0>; };
 
 	aliases {
 		ethernet0 = &mac0;
diff --git a/arch/arm/boot/dts/imx31.dtsi b/arch/arm/boot/dts/imx31.dtsi
index 85cd8be22f71..23b0d2cf9acd 100644
--- a/arch/arm/boot/dts/imx31.dtsi
+++ b/arch/arm/boot/dts/imx31.dtsi
@@ -12,6 +12,14 @@
 / {
 	#address-cells = <1>;
 	#size-cells = <1>;
+	/*
+	 * The decompressor and also some bootloaders rely on a
+	 * pre-existing /chosen node to be available to insert the
+	 * command line and merge other ATAGS info.
+	 * Also for U-Boot there must be a pre-existing /memory node.
+	 */
+	chosen {};
+	memory { device_type = "memory"; reg = <0 0>; };
 
 	aliases {
 		serial0 = &uart1;
diff --git a/arch/arm/boot/dts/imx35.dtsi b/arch/arm/boot/dts/imx35.dtsi
index 9f40e6229189..d0496c65cea2 100644
--- a/arch/arm/boot/dts/imx35.dtsi
+++ b/arch/arm/boot/dts/imx35.dtsi
@@ -13,6 +13,14 @@
 / {
 	#address-cells = <1>;
 	#size-cells = <1>;
+	/*
+	 * The decompressor and also some bootloaders rely on a
+	 * pre-existing /chosen node to be available to insert the
+	 * command line and merge other ATAGS info.
+	 * Also for U-Boot there must be a pre-existing /memory node.
+	 */
+	chosen {};
+	memory { device_type = "memory"; reg = <0 0>; };
 
 	aliases {
 		ethernet0 = &fec;
diff --git a/arch/arm/boot/dts/imx50.dtsi b/arch/arm/boot/dts/imx50.dtsi
index fe0221e4cbf7..ceae909e2201 100644
--- a/arch/arm/boot/dts/imx50.dtsi
+++ b/arch/arm/boot/dts/imx50.dtsi
@@ -17,6 +17,14 @@
 / {
 	#address-cells = <1>;
 	#size-cells = <1>;
+	/*
+	 * The decompressor and also some bootloaders rely on a
+	 * pre-existing /chosen node to be available to insert the
+	 * command line and merge other ATAGS info.
+	 * Also for U-Boot there must be a pre-existing /memory node.
+	 */
+	chosen {};
+	memory { device_type = "memory"; reg = <0 0>; };
 
 	aliases {
 		ethernet0 = &fec;
diff --git a/arch/arm/boot/dts/imx51.dtsi b/arch/arm/boot/dts/imx51.dtsi
index 33526cade735..1ee1d542d9ad 100644
--- a/arch/arm/boot/dts/imx51.dtsi
+++ b/arch/arm/boot/dts/imx51.dtsi
@@ -19,6 +19,14 @@
 / {
 	#address-cells = <1>;
 	#size-cells = <1>;
+	/*
+	 * The decompressor and also some bootloaders rely on a
+	 * pre-existing /chosen node to be available to insert the
+	 * command line and merge other ATAGS info.
+	 * Also for U-Boot there must be a pre-existing /memory node.
+	 */
+	chosen {};
+	memory { device_type = "memory"; reg = <0 0>; };
 
 	aliases {
 		ethernet0 = &fec;
diff --git a/arch/arm/boot/dts/imx53.dtsi b/arch/arm/boot/dts/imx53.dtsi
index ca51dc03e327..2e516f4985e4 100644
--- a/arch/arm/boot/dts/imx53.dtsi
+++ b/arch/arm/boot/dts/imx53.dtsi
@@ -19,6 +19,14 @@
 / {
 	#address-cells = <1>;
 	#size-cells = <1>;
+	/*
+	 * The decompressor and also some bootloaders rely on a
+	 * pre-existing /chosen node to be available to insert the
+	 * command line and merge other ATAGS info.
+	 * Also for U-Boot there must be a pre-existing /memory node.
+	 */
+	chosen {};
+	memory { device_type = "memory"; reg = <0 0>; };
 
 	aliases {
 		ethernet0 = &fec;
diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi
index 89b834f3fa17..e7d30f45b161 100644
--- a/arch/arm/boot/dts/imx6qdl.dtsi
+++ b/arch/arm/boot/dts/imx6qdl.dtsi
@@ -16,6 +16,14 @@
 / {
 	#address-cells = <1>;
 	#size-cells = <1>;
+	/*
+	 * The decompressor and also some bootloaders rely on a
+	 * pre-existing /chosen node to be available to insert the
+	 * command line and merge other ATAGS info.
+	 * Also for U-Boot there must be a pre-existing /memory node.
+	 */
+	chosen {};
+	memory { device_type = "memory"; reg = <0 0>; };
 
 	aliases {
 		ethernet0 = &fec;
diff --git a/arch/arm/boot/dts/imx6sl.dtsi b/arch/arm/boot/dts/imx6sl.dtsi
index 19cbd879c448..cc9572ea2860 100644
--- a/arch/arm/boot/dts/imx6sl.dtsi
+++ b/arch/arm/boot/dts/imx6sl.dtsi
@@ -14,6 +14,14 @@
 / {
 	#address-cells = <1>;
 	#size-cells = <1>;
+	/*
+	 * The decompressor and also some bootloaders rely on a
+	 * pre-existing /chosen node to be available to insert the
+	 * command line and merge other ATAGS info.
+	 * Also for U-Boot there must be a pre-existing /memory node.
+	 */
+	chosen {};
+	memory { device_type = "memory"; reg = <0 0>; };
 
 	aliases {
 		ethernet0 = &fec;
diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi
index 10f333016197..dd4ec85ecbaa 100644
--- a/arch/arm/boot/dts/imx6sx.dtsi
+++ b/arch/arm/boot/dts/imx6sx.dtsi
@@ -15,6 +15,14 @@
 / {
 	#address-cells = <1>;
 	#size-cells = <1>;
+	/*
+	 * The decompressor and also some bootloaders rely on a
+	 * pre-existing /chosen node to be available to insert the
+	 * command line and merge other ATAGS info.
+	 * Also for U-Boot there must be a pre-existing /memory node.
+	 */
+	chosen {};
+	memory { device_type = "memory"; reg = <0 0>; };
 
 	aliases {
 		can0 = &flexcan1;
diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi
index 39845a7e0463..53d3f8e41e9b 100644
--- a/arch/arm/boot/dts/imx6ul.dtsi
+++ b/arch/arm/boot/dts/imx6ul.dtsi
@@ -15,6 +15,14 @@
 / {
 	#address-cells = <1>;
 	#size-cells = <1>;
+	/*
+	 * The decompressor and also some bootloaders rely on a
+	 * pre-existing /chosen node to be available to insert the
+	 * command line and merge other ATAGS info.
+	 * Also for U-Boot there must be a pre-existing /memory node.
+	 */
+	chosen {};
+	memory { device_type = "memory"; reg = <0 0>; };
 
 	aliases {
 		ethernet0 = &fec1;
diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi
index 8ff2cbdd8f0d..be33dfc86838 100644
--- a/arch/arm/boot/dts/imx7s.dtsi
+++ b/arch/arm/boot/dts/imx7s.dtsi
@@ -50,6 +50,14 @@
 / {
 	#address-cells = <1>;
 	#size-cells = <1>;
+	/*
+	 * The decompressor and also some bootloaders rely on a
+	 * pre-existing /chosen node to be available to insert the
+	 * command line and merge other ATAGS info.
+	 * Also for U-Boot there must be a pre-existing /memory node.
+	 */
+	chosen {};
+	memory { device_type = "memory"; reg = <0 0>; };
 
 	aliases {
 		gpio0 = &gpio1;

From 059aa734824165507c65fd30a55ff000afd14983 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 22 Jan 2017 14:04:29 -0500
Subject: [PATCH 036/322] nfs: Don't increment lock sequence ID after
 NFS4ERR_MOVED

Xuan Qi reports that the Linux NFSv4 client failed to lock a file
that was migrated. The steps he observed on the wire:

1. The client sent a LOCK request to the source server
2. The source server replied NFS4ERR_MOVED
3. The client switched to the destination server
4. The client sent the same LOCK request to the destination
   server with a bumped lock sequence ID
5. The destination server rejected the LOCK request with
   NFS4ERR_BAD_SEQID

RFC 3530 section 8.1.5 provides a list of NFS errors which do not
bump a lock sequence ID.

However, RFC 3530 is now obsoleted by RFC 7530. In RFC 7530 section
9.1.7, this list has been updated by the addition of NFS4ERR_MOVED.

Reported-by: Xuan Qi <xuan.qi@oracle.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: stable@vger.kernel.org # v3.7+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/nfs4.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index bca536341d1a..1b1ca04820a3 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -282,7 +282,7 @@ enum nfsstat4 {
 
 static inline bool seqid_mutating_err(u32 err)
 {
-	/* rfc 3530 section 8.1.5: */
+	/* See RFC 7530, section 9.1.7 */
 	switch (err) {
 	case NFS4ERR_STALE_CLIENTID:
 	case NFS4ERR_STALE_STATEID:
@@ -291,6 +291,7 @@ static inline bool seqid_mutating_err(u32 err)
 	case NFS4ERR_BADXDR:
 	case NFS4ERR_RESOURCE:
 	case NFS4ERR_NOFILEHANDLE:
+	case NFS4ERR_MOVED:
 		return false;
 	};
 	return true;

From a430607b2ef7c3be090f88c71cfcb1b3988aa7c0 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Tue, 24 Jan 2017 11:34:20 -0500
Subject: [PATCH 037/322] NFSv4.0: always send mode in SETATTR after EXCLUSIVE4

Some nfsv4.0 servers may return a mode for the verifier following an open
with EXCLUSIVE4 createmode, but this does not mean the client should skip
setting the mode in the following SETATTR.  It should only do that for
EXCLUSIVE4_1 or UNGAURDED createmode.

Fixes: 5334c5bdac92 ("NFS: Send attributes in OPEN request for NFS4_CREATE_EXCLUSIVE4_1")
Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Cc: stable@vger.kernel.org # v4.3+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 59bb574d7d7c..0a0eaecf9676 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2700,7 +2700,8 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata,
 		sattr->ia_valid |= ATTR_MTIME;
 
 	/* Except MODE, it seems harmless of setting twice. */
-	if ((attrset[1] & FATTR4_WORD1_MODE))
+	if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE &&
+		attrset[1] & FATTR4_WORD1_MODE)
 		sattr->ia_valid &= ~ATTR_MODE;
 
 	if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL)

From 92fdb527eecff7e5eb945a3fbf4743110f5c1171 Mon Sep 17 00:00:00 2001
From: Yuriy Kolerov <yuriy.kolerov@synopsys.com>
Date: Wed, 28 Dec 2016 11:46:26 +0300
Subject: [PATCH 038/322] ARCv2: MCIP: Deprecate setting of affinity in Device
 Tree

Ignore value of interrupt distribution mode for common interrupts in
IDU since setting of affinity using value from Device Tree is deprecated
in ARC. Originally it is done in idu_irq_xlate() function and it is
semantically wrong and does not guaranty that an affinity value will be
set properly. idu_irq_enable() function is better place for
initialization of common interrupts.

By default send all common interrupts to all available online CPUs.
The affinity of common interrupts in IDU must be set manually since
in some cases the kernel will not call irq_set_affinity() by itself:

  1. When the kernel is not configured with support of SMP.
  2. When the kernel is configured with support of SMP but upper
     interrupt controllers does not support setting of the affinity
     and cannot propagate it to IDU.

Signed-off-by: Yuriy Kolerov <yuriy.kolerov@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 .../snps,archs-idu-intc.txt                   |  3 ++
 arch/arc/kernel/mcip.c                        | 52 ++++++++-----------
 2 files changed, 25 insertions(+), 30 deletions(-)

diff --git a/Documentation/devicetree/bindings/interrupt-controller/snps,archs-idu-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/snps,archs-idu-intc.txt
index 0dcb7c7d3e40..944657684d73 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/snps,archs-idu-intc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/snps,archs-idu-intc.txt
@@ -15,6 +15,9 @@ Properties:
   Second cell specifies the irq distribution mode to cores
      0=Round Robin; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3
 
+  The second cell in interrupts property is deprecated and may be ignored by
+  the kernel.
+
   intc accessed via the special ARC AUX register interface, hence "reg" property
   is not specified.
 
diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
index 9274f8ade8c7..9988b427a1e0 100644
--- a/arch/arc/kernel/mcip.c
+++ b/arch/arc/kernel/mcip.c
@@ -175,7 +175,6 @@ static void idu_irq_unmask(struct irq_data *data)
 	raw_spin_unlock_irqrestore(&mcip_lock, flags);
 }
 
-#ifdef CONFIG_SMP
 static int
 idu_irq_set_affinity(struct irq_data *data, const struct cpumask *cpumask,
 		     bool force)
@@ -205,12 +204,27 @@ idu_irq_set_affinity(struct irq_data *data, const struct cpumask *cpumask,
 
 	return IRQ_SET_MASK_OK;
 }
-#endif
+
+static void idu_irq_enable(struct irq_data *data)
+{
+	/*
+	 * By default send all common interrupts to all available online CPUs.
+	 * The affinity of common interrupts in IDU must be set manually since
+	 * in some cases the kernel will not call irq_set_affinity() by itself:
+	 *   1. When the kernel is not configured with support of SMP.
+	 *   2. When the kernel is configured with support of SMP but upper
+	 *      interrupt controllers does not support setting of the affinity
+	 *      and cannot propagate it to IDU.
+	 */
+	idu_irq_set_affinity(data, cpu_online_mask, false);
+	idu_irq_unmask(data);
+}
 
 static struct irq_chip idu_irq_chip = {
 	.name			= "MCIP IDU Intc",
 	.irq_mask		= idu_irq_mask,
 	.irq_unmask		= idu_irq_unmask,
+	.irq_enable		= idu_irq_enable,
 #ifdef CONFIG_SMP
 	.irq_set_affinity       = idu_irq_set_affinity,
 #endif
@@ -243,36 +257,14 @@ static int idu_irq_xlate(struct irq_domain *d, struct device_node *n,
 			 const u32 *intspec, unsigned int intsize,
 			 irq_hw_number_t *out_hwirq, unsigned int *out_type)
 {
-	irq_hw_number_t hwirq = *out_hwirq = intspec[0];
-	int distri = intspec[1];
-	unsigned long flags;
-
+	/*
+	 * Ignore value of interrupt distribution mode for common interrupts in
+	 * IDU which resides in intspec[1] since setting an affinity using value
+	 * from Device Tree is deprecated in ARC.
+	 */
+	*out_hwirq = intspec[0];
 	*out_type = IRQ_TYPE_NONE;
 
-	/* XXX: validate distribution scheme again online cpu mask */
-	if (distri == 0) {
-		/* 0 - Round Robin to all cpus, otherwise 1 bit per core */
-		raw_spin_lock_irqsave(&mcip_lock, flags);
-		idu_set_dest(hwirq, BIT(num_online_cpus()) - 1);
-		idu_set_mode(hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_RR);
-		raw_spin_unlock_irqrestore(&mcip_lock, flags);
-	} else {
-		/*
-		 * DEST based distribution for Level Triggered intr can only
-		 * have 1 CPU, so generalize it to always contain 1 cpu
-		 */
-		int cpu = ffs(distri);
-
-		if (cpu != fls(distri))
-			pr_warn("IDU irq %lx distri mode set to cpu %x\n",
-				hwirq, cpu);
-
-		raw_spin_lock_irqsave(&mcip_lock, flags);
-		idu_set_dest(hwirq, cpu);
-		idu_set_mode(hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_DEST);
-		raw_spin_unlock_irqrestore(&mcip_lock, flags);
-	}
-
 	return 0;
 }
 

From 3f5c34c6d4688b3b7e1dbc7bbc68a2f03a0d6b0c Mon Sep 17 00:00:00 2001
From: Manuel Lauss <manuel.lauss@gmail.com>
Date: Tue, 24 Jan 2017 10:12:00 -0800
Subject: [PATCH 039/322] Input: wm97xx - make missing platform data non-fatal

Commit 6480af4915d6 ("power_supply: wm97xx_battery: use
power_supply_get_drvdata") made wm97xx platform data mandatory, although
it's still optional.

This patch fixes an oops during driver probe on one of my MIPS boards with
a wm9712.

Signed-off-by: Manuel Lauss <manuel.lauss@gmail.com>
Reviewed-by: Robert Jarzmik <robert.jarzmik@free.fr>
Acked-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/wm97xx-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/touchscreen/wm97xx-core.c b/drivers/input/touchscreen/wm97xx-core.c
index 83cf11312fd9..c9d1c91e1887 100644
--- a/drivers/input/touchscreen/wm97xx-core.c
+++ b/drivers/input/touchscreen/wm97xx-core.c
@@ -682,7 +682,7 @@ static int wm97xx_probe(struct device *dev)
 	}
 	platform_set_drvdata(wm->battery_dev, wm);
 	wm->battery_dev->dev.parent = dev;
-	wm->battery_dev->dev.platform_data = pdata->batt_pdata;
+	wm->battery_dev->dev.platform_data = pdata ? pdata->batt_pdata : NULL;
 	ret = platform_device_add(wm->battery_dev);
 	if (ret < 0)
 		goto batt_reg_err;

From 36425cd67052e3becf325fd4d3ba5691791ef7e4 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Tue, 24 Jan 2017 10:23:42 -0800
Subject: [PATCH 040/322] ARC: udelay: fix inline assembler by adding LP_COUNT
 to clobber list

commit 3c7c7a2fc8811bc ("ARC: Don't use "+l" inline asm constraint")
modified the inline assembly to setup LP_COUNT register manually and NOT
rely on gcc to do it (with the +l inline assembler contraint hint, now
being retired in the compiler)

However the fix was flawed as we didn't add LP_COUNT to asm clobber list,
meaning gcc doesn't know that LP_COUNT or zero-delay-loops are in action
in the inline asm.

This resulted in some fun - as nested ZOL loops were being generared

| mov lp_count,250000 ;16 # tmp235,
| lp .L__GCC__LP14 #		<======= OUTER LOOP (gcc generated)
|   .L14:
|   ld r2, [r5] # MEM[(volatile u32 *)prephitmp_43], w
|   dmb 1
|   breq r2, -1, @.L21 #, w,,
|   bbit0 r2,1,@.L13 # w,,
|   ld r4,[r7] ;25 # loops_per_jiffy, loops_per_jiffy
|   mpymu r3,r4,r6 #, loops_per_jiffy, tmp234
|
|   mov lp_count, r3 #		 <====== INNER LOOP (from inline asm)
|   lp 1f
| 	 nop
|   1:
|   nop_s
| .L__GCC__LP14: ; loop end, start is @.L14 #,

This caused issues with drivers relying on sane behaviour of udelay
friends.

With LP_COUNT added to clobber list, gcc doesn't generate the outer
loop in say above case.

Addresses STAR 9001146134

Reported-by: Joao Pinto <jpinto@synopsys.com>
Fixes: 3c7c7a2fc8811bc ("ARC: Don't use "+l" inline asm constraint")
Cc: stable@vger.kernel.org
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/delay.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h
index a36e8601114d..d5da2115d78a 100644
--- a/arch/arc/include/asm/delay.h
+++ b/arch/arc/include/asm/delay.h
@@ -26,7 +26,9 @@ static inline void __delay(unsigned long loops)
 	"	lp  1f			\n"
 	"	nop			\n"
 	"1:				\n"
-	: : "r"(loops));
+	:
+        : "r"(loops)
+        : "lp_count");
 }
 
 extern void __bad_udelay(void);

From 517e7610d2ce04d1b8d8b6c6d1a36dcce5cac6ab Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 19 Jan 2017 17:05:00 -0800
Subject: [PATCH 041/322] ARCv2: MCIP: update the BCR per current changes

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/mcip.c |  3 +--
 include/soc/arc/mcip.h | 16 ++++++++--------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
index 9988b427a1e0..9f6b68fd4f3b 100644
--- a/arch/arc/kernel/mcip.c
+++ b/arch/arc/kernel/mcip.c
@@ -93,11 +93,10 @@ static void mcip_probe_n_setup(void)
 	READ_BCR(ARC_REG_MCIP_BCR, mp);
 
 	sprintf(smp_cpuinfo_buf,
-		"Extn [SMP]\t: ARConnect (v%d): %d cores with %s%s%s%s%s\n",
+		"Extn [SMP]\t: ARConnect (v%d): %d cores with %s%s%s%s\n",
 		mp.ver, mp.num_cores,
 		IS_AVAIL1(mp.ipi, "IPI "),
 		IS_AVAIL1(mp.idu, "IDU "),
-		IS_AVAIL1(mp.llm, "LLM "),
 		IS_AVAIL1(mp.dbg, "DEBUG "),
 		IS_AVAIL1(mp.gfrc, "GFRC"));
 
diff --git a/include/soc/arc/mcip.h b/include/soc/arc/mcip.h
index 6902c2a8bd23..4b6b489a8d7c 100644
--- a/include/soc/arc/mcip.h
+++ b/include/soc/arc/mcip.h
@@ -55,17 +55,17 @@ struct mcip_cmd {
 
 struct mcip_bcr {
 #ifdef CONFIG_CPU_BIG_ENDIAN
-		unsigned int pad3:8,
-			     idu:1, llm:1, num_cores:6,
-			     iocoh:1,  gfrc:1, dbg:1, pad2:1,
-			     msg:1, sem:1, ipi:1, pad:1,
+		unsigned int pad4:6, pw_dom:1, pad3:1,
+			     idu:1, pad2:1, num_cores:6,
+			     pad:1,  gfrc:1, dbg:1, pw:1,
+			     msg:1, sem:1, ipi:1, slv:1,
 			     ver:8;
 #else
 		unsigned int ver:8,
-			     pad:1, ipi:1, sem:1, msg:1,
-			     pad2:1, dbg:1, gfrc:1, iocoh:1,
-			     num_cores:6, llm:1, idu:1,
-			     pad3:8;
+			     slv:1, ipi:1, sem:1, msg:1,
+			     pw:1, dbg:1, gfrc:1, pad:1,
+			     num_cores:6, pad2:1, idu:1,
+			     pad3:1, pw_dom:1, pad4:6;
 #endif
 };
 

From bf02454a741b58682a82c314a9a46bed930ed2f7 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 12 Jan 2017 14:30:29 -0800
Subject: [PATCH 042/322] ARC: smp-boot: Decouple Non masters waiting API from
 jump to entry point

For run-on-reset SMP configs, non master cores call a routine which
waits until Master gives it a "go" signal (currently using a shared
mem flag). The same routine then jumps off the well known entry point of
all non Master cores i.e. @first_lines_of_secondary

This patch moves out the last part into one single place in early boot
code.

This is better in terms of absraction (the wait API only waits) and
returns, leaving out the "jump off to" part.

In actual implementation this requires some restructuring of the early
boot code as well as Master now jumps to BSS setup explicitly,
vs. falling thru into it before.

Technically this patch doesn't cause any functional change, it just
moves the ugly #ifdef'ry from assembly code to "C"

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/head.S | 14 +++++++-------
 arch/arc/kernel/smp.c  |  6 ++++--
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S
index 689dd867fdff..8b90d25a15cc 100644
--- a/arch/arc/kernel/head.S
+++ b/arch/arc/kernel/head.S
@@ -71,14 +71,14 @@ ENTRY(stext)
 	GET_CPU_ID  r5
 	cmp	r5, 0
 	mov.nz	r0, r5
-#ifdef CONFIG_ARC_SMP_HALT_ON_RESET
-	; Non-Master can proceed as system would be booted sufficiently
-	jnz	first_lines_of_secondary
-#else
+	bz	.Lmaster_proceed
+
 	; Non-Masters wait for Master to boot enough and bring them up
-	jnz	arc_platform_smp_wait_to_boot
-#endif
-	; Master falls thru
+	; when they resume, tail-call to entry point
+	mov	blink, @first_lines_of_secondary
+	j	arc_platform_smp_wait_to_boot
+
+.Lmaster_proceed:
 #endif
 
 	; Clear BSS before updating any globals
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index 88674d972c9d..44a0d21ed342 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -98,14 +98,16 @@ static void arc_default_smp_cpu_kick(int cpu, unsigned long pc)
 
 void arc_platform_smp_wait_to_boot(int cpu)
 {
+	/* for halt-on-reset, we've waited already */
+	if (IS_ENABLED(CONFIG_ARC_SMP_HALT_ON_RESET))
+		return;
+
 	while (wake_flag != cpu)
 		;
 
 	wake_flag = 0;
-	__asm__ __volatile__("j @first_lines_of_secondary	\n");
 }
 
-
 const char *arc_platform_smp_cpuinfo(void)
 {
 	return plat_smp_ops.info ? : "";

From d46d29f072accb069cb42b5fbebcc77d9094a785 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Wed, 11 Jan 2017 13:38:52 -0800
Subject: [PATCH 043/322] md/raid5-cache: delete meaningless code

sector_t is unsigned long, it's never < 0

Reported-by: Julia Lawall <julia.lawall@lip6.fr>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5-cache.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 0e8ed2c327b0..e0d6dd1835d0 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -1393,8 +1393,6 @@ static void r5l_do_reclaim(struct r5l_log *log)
 	next_checkpoint = r5c_calculate_new_cp(conf);
 	spin_unlock_irq(&log->io_list_lock);
 
-	BUG_ON(reclaimable < 0);
-
 	if (reclaimable == 0 || !write_super)
 		return;
 

From 86aa1397ddfde563b3692adadb8b8e32e97b4e5e Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Thu, 12 Jan 2017 17:22:41 -0800
Subject: [PATCH 044/322] md/r5cache: read data into orig_page for prexor of
 cached data

With write back cache, we use orig_page to do prexor. This patch
makes sure we read data into orig_page for it.

Flag R5_OrigPageUPTDODATE is added to show whether orig_page
has the latest data from raid disk.

We introduce a helper function uptodate_for_rmw() to simplify
the a couple conditions in handle_stripe_dirtying().

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5-cache.c |  2 ++
 drivers/md/raid5.c       | 44 ++++++++++++++++++++++++++++++++--------
 drivers/md/raid5.h       |  5 +++++
 3 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index e0d6dd1835d0..95dcaa022e1f 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -2349,6 +2349,8 @@ void r5c_release_extra_page(struct stripe_head *sh)
 			struct page *p = sh->dev[i].orig_page;
 
 			sh->dev[i].orig_page = sh->dev[i].page;
+			clear_bit(R5_OrigPageUPTDODATE, &sh->dev[i].flags);
+
 			if (!using_disk_info_extra_page)
 				put_page(p);
 		}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 36c13e4be9c9..7780ae44f355 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1015,7 +1015,17 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
 
 			if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
 				WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
-			sh->dev[i].vec.bv_page = sh->dev[i].page;
+
+			if (!op_is_write(op) &&
+			    test_bit(R5_InJournal, &sh->dev[i].flags))
+				/*
+				 * issuing read for a page in journal, this
+				 * must be preparing for prexor in rmw; read
+				 * the data into orig_page
+				 */
+				sh->dev[i].vec.bv_page = sh->dev[i].orig_page;
+			else
+				sh->dev[i].vec.bv_page = sh->dev[i].page;
 			bi->bi_vcnt = 1;
 			bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
 			bi->bi_io_vec[0].bv_offset = 0;
@@ -2380,6 +2390,13 @@ static void raid5_end_read_request(struct bio * bi)
 		} else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
 			clear_bit(R5_ReadNoMerge, &sh->dev[i].flags);
 
+		if (test_bit(R5_InJournal, &sh->dev[i].flags))
+			/*
+			 * end read for a page in journal, this
+			 * must be preparing for prexor in rmw
+			 */
+			set_bit(R5_OrigPageUPTDODATE, &sh->dev[i].flags);
+
 		if (atomic_read(&rdev->read_errors))
 			atomic_set(&rdev->read_errors, 0);
 	} else {
@@ -3594,6 +3611,21 @@ static void handle_stripe_clean_event(struct r5conf *conf,
 		break_stripe_batch_list(head_sh, STRIPE_EXPAND_SYNC_FLAGS);
 }
 
+/*
+ * For RMW in write back cache, we need extra page in prexor to store the
+ * old data. This page is stored in dev->orig_page.
+ *
+ * This function checks whether we have data for prexor. The exact logic
+ * is:
+ *       R5_UPTODATE && (!R5_InJournal || R5_OrigPageUPTDODATE)
+ */
+static inline bool uptodate_for_rmw(struct r5dev *dev)
+{
+	return (test_bit(R5_UPTODATE, &dev->flags)) &&
+		(!test_bit(R5_InJournal, &dev->flags) ||
+		 test_bit(R5_OrigPageUPTDODATE, &dev->flags));
+}
+
 static int handle_stripe_dirtying(struct r5conf *conf,
 				  struct stripe_head *sh,
 				  struct stripe_head_state *s,
@@ -3625,9 +3657,7 @@ static int handle_stripe_dirtying(struct r5conf *conf,
 		if ((dev->towrite || i == sh->pd_idx || i == sh->qd_idx ||
 		     test_bit(R5_InJournal, &dev->flags)) &&
 		    !test_bit(R5_LOCKED, &dev->flags) &&
-		    !((test_bit(R5_UPTODATE, &dev->flags) &&
-		       (!test_bit(R5_InJournal, &dev->flags) ||
-			dev->page != dev->orig_page)) ||
+		    !(uptodate_for_rmw(dev) ||
 		      test_bit(R5_Wantcompute, &dev->flags))) {
 			if (test_bit(R5_Insync, &dev->flags))
 				rmw++;
@@ -3639,7 +3669,6 @@ static int handle_stripe_dirtying(struct r5conf *conf,
 		    i != sh->pd_idx && i != sh->qd_idx &&
 		    !test_bit(R5_LOCKED, &dev->flags) &&
 		    !(test_bit(R5_UPTODATE, &dev->flags) ||
-		      test_bit(R5_InJournal, &dev->flags) ||
 		      test_bit(R5_Wantcompute, &dev->flags))) {
 			if (test_bit(R5_Insync, &dev->flags))
 				rcw++;
@@ -3693,9 +3722,7 @@ static int handle_stripe_dirtying(struct r5conf *conf,
 			     i == sh->pd_idx || i == sh->qd_idx ||
 			     test_bit(R5_InJournal, &dev->flags)) &&
 			    !test_bit(R5_LOCKED, &dev->flags) &&
-			    !((test_bit(R5_UPTODATE, &dev->flags) &&
-			       (!test_bit(R5_InJournal, &dev->flags) ||
-				dev->page != dev->orig_page)) ||
+			    !(uptodate_for_rmw(dev) ||
 			      test_bit(R5_Wantcompute, &dev->flags)) &&
 			    test_bit(R5_Insync, &dev->flags)) {
 				if (test_bit(STRIPE_PREREAD_ACTIVE,
@@ -3722,7 +3749,6 @@ static int handle_stripe_dirtying(struct r5conf *conf,
 			    i != sh->pd_idx && i != sh->qd_idx &&
 			    !test_bit(R5_LOCKED, &dev->flags) &&
 			    !(test_bit(R5_UPTODATE, &dev->flags) ||
-			      test_bit(R5_InJournal, &dev->flags) ||
 			      test_bit(R5_Wantcompute, &dev->flags))) {
 				rcw++;
 				if (test_bit(R5_Insync, &dev->flags) &&
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index ed8e1362ab36..461df197d157 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -322,6 +322,11 @@ enum r5dev_flags {
 			 * data and parity being written are in the journal
 			 * device
 			 */
+	R5_OrigPageUPTDODATE,	/* with write back cache, we read old data into
+				 * dev->orig_page for prexor. When this flag is
+				 * set, orig_page contains latest data in the
+				 * raid disk.
+				 */
 };
 
 /*

From ba02684daf7fb4a827580f909b7c7db61c05ae7d Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Thu, 12 Jan 2017 17:22:42 -0800
Subject: [PATCH 045/322] md/raid5: move comment of fetch_block to right
 location

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7780ae44f355..13d76767c2cf 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3312,13 +3312,6 @@ static int want_replace(struct stripe_head *sh, int disk_idx)
 	return rv;
 }
 
-/* fetch_block - checks the given member device to see if its data needs
- * to be read or computed to satisfy a request.
- *
- * Returns 1 when no more member devices need to be checked, otherwise returns
- * 0 to tell the loop in handle_stripe_fill to continue
- */
-
 static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s,
 			   int disk_idx, int disks)
 {
@@ -3409,6 +3402,12 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s,
 	return 0;
 }
 
+/* fetch_block - checks the given member device to see if its data needs
+ * to be read or computed to satisfy a request.
+ *
+ * Returns 1 when no more member devices need to be checked, otherwise returns
+ * 0 to tell the loop in handle_stripe_fill to continue
+ */
 static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
 		       int disk_idx, int disks)
 {

From a85dd7b8df52e35d8ee3794c65cac5c39128fd80 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Mon, 23 Jan 2017 17:12:57 -0800
Subject: [PATCH 046/322] md/r5cache: flush data only stripes in
 r5l_recovery_log()

For safer operation, all arrays start in write-through mode, which has been
better tested and is more mature. And actually the write-through/write-mode
isn't persistent after array restarted, so we always start array in
write-through mode. However, if recovery found data-only stripes before the
shutdown (from previous write-back mode), it is not safe to start the array in
write-through mode, as write-through mode can not handle stripes with data in
write-back cache. To solve this problem, we flush all data-only stripes in
r5l_recovery_log(). When r5l_recovery_log() returns, the array starts with
empty cache in write-through mode.

This logic is implemented in r5c_recovery_flush_data_only_stripes():

1. enable write back cache
2. flush all stripes
3. wake up conf->mddev->thread
4. wait for all stripes get flushed (reuse wait_for_quiescent)
5. disable write back cache

The wait in 4 will be waked up in release_inactive_stripe_list()
when conf->active_stripes reaches 0.

It is safe to wake up mddev->thread here because all the resource
required for the thread has been initialized.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md.c          |  5 ++++
 drivers/md/raid5-cache.c | 56 ++++++++++++++++++++++++++++------------
 2 files changed, 45 insertions(+), 16 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 82821ee0d57f..01175dac0db6 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5291,6 +5291,11 @@ int md_run(struct mddev *mddev)
 	if (start_readonly && mddev->ro == 0)
 		mddev->ro = 2; /* read-only, but switch on first write */
 
+	/*
+	 * NOTE: some pers->run(), for example r5l_recovery_log(), wakes
+	 * up mddev->thread. It is important to initialize critical
+	 * resources for mddev->thread BEFORE calling pers->run().
+	 */
 	err = pers->run(mddev);
 	if (err)
 		pr_warn("md: pers->run() failed ...\n");
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 95dcaa022e1f..3d7dda85494c 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -2060,7 +2060,7 @@ static int
 r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
 				       struct r5l_recovery_ctx *ctx)
 {
-	struct stripe_head *sh, *next;
+	struct stripe_head *sh;
 	struct mddev *mddev = log->rdev->mddev;
 	struct page *page;
 	sector_t next_checkpoint = MaxSector;
@@ -2074,7 +2074,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
 
 	WARN_ON(list_empty(&ctx->cached_list));
 
-	list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) {
+	list_for_each_entry(sh, &ctx->cached_list, lru) {
 		struct r5l_meta_block *mb;
 		int i;
 		int offset;
@@ -2124,14 +2124,39 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
 		ctx->pos = write_pos;
 		ctx->seq += 1;
 		next_checkpoint = sh->log_start;
-		list_del_init(&sh->lru);
-		raid5_release_stripe(sh);
 	}
 	log->next_checkpoint = next_checkpoint;
 	__free_page(page);
 	return 0;
 }
 
+static void r5c_recovery_flush_data_only_stripes(struct r5l_log *log,
+						 struct r5l_recovery_ctx *ctx)
+{
+	struct mddev *mddev = log->rdev->mddev;
+	struct r5conf *conf = mddev->private;
+	struct stripe_head *sh, *next;
+
+	if (ctx->data_only_stripes == 0)
+		return;
+
+	log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_BACK;
+
+	list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) {
+		r5c_make_stripe_write_out(sh);
+		set_bit(STRIPE_HANDLE, &sh->state);
+		list_del_init(&sh->lru);
+		raid5_release_stripe(sh);
+	}
+
+	md_wakeup_thread(conf->mddev->thread);
+	/* reuse conf->wait_for_quiescent in recovery */
+	wait_event(conf->wait_for_quiescent,
+		   atomic_read(&conf->active_stripes) == 0);
+
+	log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
+}
+
 static int r5l_recovery_log(struct r5l_log *log)
 {
 	struct mddev *mddev = log->rdev->mddev;
@@ -2158,32 +2183,31 @@ static int r5l_recovery_log(struct r5l_log *log)
 	pos = ctx.pos;
 	ctx.seq += 10000;
 
-	if (ctx.data_only_stripes == 0) {
-		log->next_checkpoint = ctx.pos;
-		r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++);
-		ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
-	}
 
 	if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0))
 		pr_debug("md/raid:%s: starting from clean shutdown\n",
 			 mdname(mddev));
-	else {
+	else
 		pr_debug("md/raid:%s: recovering %d data-only stripes and %d data-parity stripes\n",
 			 mdname(mddev), ctx.data_only_stripes,
 			 ctx.data_parity_stripes);
 
-		if (ctx.data_only_stripes > 0)
-			if (r5c_recovery_rewrite_data_only_stripes(log, &ctx)) {
-				pr_err("md/raid:%s: failed to rewrite stripes to journal\n",
-				       mdname(mddev));
-				return -EIO;
-			}
+	if (ctx.data_only_stripes == 0) {
+		log->next_checkpoint = ctx.pos;
+		r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++);
+		ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
+	} else if (r5c_recovery_rewrite_data_only_stripes(log, &ctx)) {
+		pr_err("md/raid:%s: failed to rewrite stripes to journal\n",
+		       mdname(mddev));
+		return -EIO;
 	}
 
 	log->log_start = ctx.pos;
 	log->seq = ctx.seq;
 	log->last_checkpoint = pos;
 	r5l_write_super(log, pos);
+
+	r5c_recovery_flush_data_only_stripes(log, &ctx);
 	return 0;
 }
 

From 07e83364845e1e1c7e189a01206a9d7d33831568 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Mon, 23 Jan 2017 17:12:58 -0800
Subject: [PATCH 047/322] md/r5cache: shift complex rmw from read path to write
 path

Write back cache requires a complex RMW mechanism, where old data is
read into dev->orig_page for prexor, and then xor is done with
dev->page. This logic is already implemented in the write path.

However, current read path is not awared of this requirement. When
the array is optimal, the RMW is not required, as the data are
read from raid disks. However, when the target stripe is degraded,
complex RMW is required to generate right data.

To keep read path as clean as possible, we handle read path by
flushing degraded, in-journal stripes before processing reads to
missing dev.

Specifically, when there is read requests to a degraded stripe
with data in journal, handle_stripe_fill() calls
r5c_make_stripe_write_out() and exits. Then handle_stripe_dirtying()
will do the complex RMW and flush the stripe to RAID disks. After
that, read requests are handled.

There is one more corner case when there is non-overwrite bio for
the missing (or out of sync) dev. handle_stripe_dirtying() will not
be able to process the non-overwrite bios without constructing the
data in handle_stripe_fill(). This is fixed by delaying non-overwrite
bios in handle_stripe_dirtying(). So handle_stripe_fill() works on
these bios after the stripe is flushed to raid disks.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5.c | 49 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 45 insertions(+), 4 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 13d76767c2cf..dc83da69ca7c 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2897,6 +2897,30 @@ sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous)
 	return r_sector;
 }
 
+/*
+ * There are cases where we want handle_stripe_dirtying() and
+ * schedule_reconstruction() to delay towrite to some dev of a stripe.
+ *
+ * This function checks whether we want to delay the towrite. Specifically,
+ * we delay the towrite when:
+ *
+ *   1. degraded stripe has a non-overwrite to the missing dev, AND this
+ *      stripe has data in journal (for other devices).
+ *
+ *      In this case, when reading data for the non-overwrite dev, it is
+ *      necessary to handle complex rmw of write back cache (prexor with
+ *      orig_page, and xor with page). To keep read path simple, we would
+ *      like to flush data in journal to RAID disks first, so complex rmw
+ *      is handled in the write patch (handle_stripe_dirtying).
+ *
+ */
+static inline bool delay_towrite(struct r5dev *dev,
+				   struct stripe_head_state *s)
+{
+	return !test_bit(R5_OVERWRITE, &dev->flags) &&
+		!test_bit(R5_Insync, &dev->flags) && s->injournal;
+}
+
 static void
 schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
 			 int rcw, int expand)
@@ -2917,7 +2941,7 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
 		for (i = disks; i--; ) {
 			struct r5dev *dev = &sh->dev[i];
 
-			if (dev->towrite) {
+			if (dev->towrite && !delay_towrite(dev, s)) {
 				set_bit(R5_LOCKED, &dev->flags);
 				set_bit(R5_Wantdrain, &dev->flags);
 				if (!expand)
@@ -3494,10 +3518,26 @@ static void handle_stripe_fill(struct stripe_head *sh,
 	 * midst of changing due to a write
 	 */
 	if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
-	    !sh->reconstruct_state)
+	    !sh->reconstruct_state) {
+
+		/*
+		 * For degraded stripe with data in journal, do not handle
+		 * read requests yet, instead, flush the stripe to raid
+		 * disks first, this avoids handling complex rmw of write
+		 * back cache (prexor with orig_page, and then xor with
+		 * page) in the read path
+		 */
+		if (s->injournal && s->failed) {
+			if (test_bit(STRIPE_R5C_CACHING, &sh->state))
+				r5c_make_stripe_write_out(sh);
+			goto out;
+		}
+
 		for (i = disks; i--; )
 			if (fetch_block(sh, s, i, disks))
 				break;
+	}
+out:
 	set_bit(STRIPE_HANDLE, &sh->state);
 }
 
@@ -3653,7 +3693,8 @@ static int handle_stripe_dirtying(struct r5conf *conf,
 	} else for (i = disks; i--; ) {
 		/* would I have to read this buffer for read_modify_write */
 		struct r5dev *dev = &sh->dev[i];
-		if ((dev->towrite || i == sh->pd_idx || i == sh->qd_idx ||
+		if (((dev->towrite && !delay_towrite(dev, s)) ||
+		     i == sh->pd_idx || i == sh->qd_idx ||
 		     test_bit(R5_InJournal, &dev->flags)) &&
 		    !test_bit(R5_LOCKED, &dev->flags) &&
 		    !(uptodate_for_rmw(dev) ||
@@ -3717,7 +3758,7 @@ static int handle_stripe_dirtying(struct r5conf *conf,
 
 		for (i = disks; i--; ) {
 			struct r5dev *dev = &sh->dev[i];
-			if ((dev->towrite ||
+			if (((dev->towrite && !delay_towrite(dev, s)) ||
 			     i == sh->pd_idx || i == sh->qd_idx ||
 			     test_bit(R5_InJournal, &dev->flags)) &&
 			    !test_bit(R5_LOCKED, &dev->flags) &&

From 2e38a37f23c98d7fad87ff022670060b8a0e2bf5 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Tue, 24 Jan 2017 10:45:30 -0800
Subject: [PATCH 048/322] md/r5cache: disable write back for degraded array

write-back cache in degraded mode introduces corner cases to the array.
Although we try to cover all these corner cases, it is safer to just
disable write-back cache when the array is in degraded mode.

In this patch, we disable writeback cache for degraded mode:
1. On device failure, if the array enters degraded mode, raid5_error()
   will submit async job r5c_disable_writeback_async to disable
   writeback;
2. In r5c_journal_mode_store(), it is invalid to enable writeback in
   degraded mode;
3. In r5c_try_caching_write(), stripes with s->failed>0 will be handled
   in write-through mode.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5-cache.c | 46 ++++++++++++++++++++++++++++++++++++++++
 drivers/md/raid5.c       | 15 +++++++------
 drivers/md/raid5.h       |  2 ++
 3 files changed, 56 insertions(+), 7 deletions(-)

diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 3d7dda85494c..302dea3296ba 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -162,6 +162,8 @@ struct r5l_log {
 
 	/* to submit async io_units, to fulfill ordering of flush */
 	struct work_struct deferred_io_work;
+	/* to disable write back during in degraded mode */
+	struct work_struct disable_writeback_work;
 };
 
 /*
@@ -611,6 +613,21 @@ static void r5l_submit_io_async(struct work_struct *work)
 		r5l_do_submit_io(log, io);
 }
 
+static void r5c_disable_writeback_async(struct work_struct *work)
+{
+	struct r5l_log *log = container_of(work, struct r5l_log,
+					   disable_writeback_work);
+	struct mddev *mddev = log->rdev->mddev;
+
+	if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH)
+		return;
+	pr_info("md/raid:%s: Disabling writeback cache for degraded array.\n",
+		mdname(mddev));
+	mddev_suspend(mddev);
+	log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
+	mddev_resume(mddev);
+}
+
 static void r5l_submit_current_io(struct r5l_log *log)
 {
 	struct r5l_io_unit *io = log->current_io;
@@ -2269,6 +2286,10 @@ static ssize_t r5c_journal_mode_store(struct mddev *mddev,
 	    val > R5C_JOURNAL_MODE_WRITE_BACK)
 		return -EINVAL;
 
+	if (raid5_calc_degraded(conf) > 0 &&
+	    val == R5C_JOURNAL_MODE_WRITE_BACK)
+		return -EINVAL;
+
 	mddev_suspend(mddev);
 	conf->log->r5c_journal_mode = val;
 	mddev_resume(mddev);
@@ -2323,6 +2344,16 @@ int r5c_try_caching_write(struct r5conf *conf,
 		set_bit(STRIPE_R5C_CACHING, &sh->state);
 	}
 
+	/*
+	 * When run in degraded mode, array is set to write-through mode.
+	 * This check helps drain pending write safely in the transition to
+	 * write-through mode.
+	 */
+	if (s->failed) {
+		r5c_make_stripe_write_out(sh);
+		return -EAGAIN;
+	}
+
 	for (i = disks; i--; ) {
 		dev = &sh->dev[i];
 		/* if non-overwrite, use writing-out phase */
@@ -2579,6 +2610,19 @@ static int r5l_load_log(struct r5l_log *log)
 	return ret;
 }
 
+void r5c_update_on_rdev_error(struct mddev *mddev)
+{
+	struct r5conf *conf = mddev->private;
+	struct r5l_log *log = conf->log;
+
+	if (!log)
+		return;
+
+	if (raid5_calc_degraded(conf) > 0 &&
+	    conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK)
+		schedule_work(&log->disable_writeback_work);
+}
+
 int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
 {
 	struct request_queue *q = bdev_get_queue(rdev->bdev);
@@ -2651,6 +2695,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
 	spin_lock_init(&log->no_space_stripes_lock);
 
 	INIT_WORK(&log->deferred_io_work, r5l_submit_io_async);
+	INIT_WORK(&log->disable_writeback_work, r5c_disable_writeback_async);
 
 	log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
 	INIT_LIST_HEAD(&log->stripe_in_journal_list);
@@ -2683,6 +2728,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
 
 void r5l_exit_log(struct r5l_log *log)
 {
+	flush_work(&log->disable_writeback_work);
 	md_unregister_thread(&log->reclaim_thread);
 	mempool_destroy(log->meta_pool);
 	bioset_free(log->bs);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index dc83da69ca7c..3c7e106c12a2 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -556,7 +556,7 @@ static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector,
  * of the two sections, and some non-in_sync devices may
  * be insync in the section most affected by failed devices.
  */
-static int calc_degraded(struct r5conf *conf)
+int raid5_calc_degraded(struct r5conf *conf)
 {
 	int degraded, degraded2;
 	int i;
@@ -619,7 +619,7 @@ static int has_failed(struct r5conf *conf)
 	if (conf->mddev->reshape_position == MaxSector)
 		return conf->mddev->degraded > conf->max_degraded;
 
-	degraded = calc_degraded(conf);
+	degraded = raid5_calc_degraded(conf);
 	if (degraded > conf->max_degraded)
 		return 1;
 	return 0;
@@ -2555,7 +2555,7 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
 
 	spin_lock_irqsave(&conf->device_lock, flags);
 	clear_bit(In_sync, &rdev->flags);
-	mddev->degraded = calc_degraded(conf);
+	mddev->degraded = raid5_calc_degraded(conf);
 	spin_unlock_irqrestore(&conf->device_lock, flags);
 	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 
@@ -2569,6 +2569,7 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
 		bdevname(rdev->bdev, b),
 		mdname(mddev),
 		conf->raid_disks - mddev->degraded);
+	r5c_update_on_rdev_error(mddev);
 }
 
 /*
@@ -7091,7 +7092,7 @@ static int raid5_run(struct mddev *mddev)
 	/*
 	 * 0 for a fully functional array, 1 or 2 for a degraded array.
 	 */
-	mddev->degraded = calc_degraded(conf);
+	mddev->degraded = raid5_calc_degraded(conf);
 
 	if (has_failed(conf)) {
 		pr_crit("md/raid:%s: not enough operational devices (%d/%d failed)\n",
@@ -7338,7 +7339,7 @@ static int raid5_spare_active(struct mddev *mddev)
 		}
 	}
 	spin_lock_irqsave(&conf->device_lock, flags);
-	mddev->degraded = calc_degraded(conf);
+	mddev->degraded = raid5_calc_degraded(conf);
 	spin_unlock_irqrestore(&conf->device_lock, flags);
 	print_raid5_conf(conf);
 	return count;
@@ -7698,7 +7699,7 @@ static int raid5_start_reshape(struct mddev *mddev)
 		 * pre and post number of devices.
 		 */
 		spin_lock_irqsave(&conf->device_lock, flags);
-		mddev->degraded = calc_degraded(conf);
+		mddev->degraded = raid5_calc_degraded(conf);
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 	}
 	mddev->raid_disks = conf->raid_disks;
@@ -7786,7 +7787,7 @@ static void raid5_finish_reshape(struct mddev *mddev)
 		} else {
 			int d;
 			spin_lock_irq(&conf->device_lock);
-			mddev->degraded = calc_degraded(conf);
+			mddev->degraded = raid5_calc_degraded(conf);
 			spin_unlock_irq(&conf->device_lock);
 			for (d = conf->raid_disks ;
 			     d < conf->raid_disks - mddev->delta_disks;
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 461df197d157..1440fa26e296 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -758,6 +758,7 @@ extern sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector,
 extern struct stripe_head *
 raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
 			int previous, int noblock, int noquiesce);
+extern int raid5_calc_degraded(struct r5conf *conf);
 extern int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev);
 extern void r5l_exit_log(struct r5l_log *log);
 extern int r5l_write_stripe(struct r5l_log *log, struct stripe_head *head_sh);
@@ -786,4 +787,5 @@ extern void r5c_flush_cache(struct r5conf *conf, int num);
 extern void r5c_check_stripe_cache_usage(struct r5conf *conf);
 extern void r5c_check_cached_full_stripe(struct r5conf *conf);
 extern struct md_sysfs_entry r5c_journal_mode;
+extern void r5c_update_on_rdev_error(struct mddev *mddev);
 #endif

From c929ea0b910355e1876c64431f3d5802f95b3d75 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Fri, 20 Jan 2017 16:48:39 +0800
Subject: [PATCH 049/322] SUNRPC: cleanup ida information when removing sunrpc
 module

After removing sunrpc module, I get many kmemleak information as,
unreferenced object 0xffff88003316b1e0 (size 544):
  comm "gssproxy", pid 2148, jiffies 4294794465 (age 4200.081s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffffb0cfb58a>] kmemleak_alloc+0x4a/0xa0
    [<ffffffffb03507fe>] kmem_cache_alloc+0x15e/0x1f0
    [<ffffffffb0639baa>] ida_pre_get+0xaa/0x150
    [<ffffffffb0639cfd>] ida_simple_get+0xad/0x180
    [<ffffffffc06054fb>] nlmsvc_lookup_host+0x4ab/0x7f0 [lockd]
    [<ffffffffc0605e1d>] lockd+0x4d/0x270 [lockd]
    [<ffffffffc06061e5>] param_set_timeout+0x55/0x100 [lockd]
    [<ffffffffc06cba24>] svc_defer+0x114/0x3f0 [sunrpc]
    [<ffffffffc06cbbe7>] svc_defer+0x2d7/0x3f0 [sunrpc]
    [<ffffffffc06c71da>] rpc_show_info+0x8a/0x110 [sunrpc]
    [<ffffffffb044a33f>] proc_reg_write+0x7f/0xc0
    [<ffffffffb038e41f>] __vfs_write+0xdf/0x3c0
    [<ffffffffb0390f1f>] vfs_write+0xef/0x240
    [<ffffffffb0392fbd>] SyS_write+0xad/0x130
    [<ffffffffb0d06c37>] entry_SYSCALL_64_fastpath+0x1a/0xa9
    [<ffffffffffffffff>] 0xffffffffffffffff

I found, the ida information (dynamic memory) isn't cleanup.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Fixes: 2f048db4680a ("SUNRPC: Add an identifier for struct rpc_clnt")
Cc: stable@vger.kernel.org # v3.12+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/clnt.h | 1 +
 net/sunrpc/clnt.c           | 5 +++++
 net/sunrpc/sunrpc_syms.c    | 1 +
 3 files changed, 7 insertions(+)

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 85cc819676e8..333ad11b3dd9 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -216,5 +216,6 @@ void rpc_clnt_xprt_switch_put(struct rpc_clnt *);
 void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *);
 bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt,
 			const struct sockaddr *sap);
+void rpc_cleanup_clids(void);
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_CLNT_H */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 1efbe48e794f..1dc9f3bac099 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -336,6 +336,11 @@ static int rpc_client_register(struct rpc_clnt *clnt,
 
 static DEFINE_IDA(rpc_clids);
 
+void rpc_cleanup_clids(void)
+{
+	ida_destroy(&rpc_clids);
+}
+
 static int rpc_alloc_clid(struct rpc_clnt *clnt)
 {
 	int clid;
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index d1c330a7953a..c73de181467a 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -119,6 +119,7 @@ init_sunrpc(void)
 static void __exit
 cleanup_sunrpc(void)
 {
+	rpc_cleanup_clids();
 	rpcauth_remove_module();
 	cleanup_socket_xprt();
 	svc_cleanup_xprt_sock();

From 27873de99f2fecca0f6b257316489ef2a1d86ffd Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Mon, 23 Jan 2017 08:34:45 -0800
Subject: [PATCH 050/322] scsi: qla2xxx: Fix a recently introduced memory leak

qla2x00_probe_one() allocates IRQs before it initializes rsp_q_map so
IRQs must be freed even if rsp_q_map allocation did not occur.  This was
detected by kmemleak.

Fixes: 4fa183455988 ("scsi: qla2xxx: Utilize pci_alloc_irq_vectors/pci_free_irq_vectors calls")
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Michael Hernandez <michael.hernandez@cavium.com>
Cc: Himanshu Madhani <himanshu.madhani@cavium.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: <stable@vger.kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-By: Himanshu Madhani <himanshu.madhani@cavium.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_isr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 5815403d1d65..3116bf390b06 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -3237,7 +3237,7 @@ qla2x00_free_irqs(scsi_qla_host_t *vha)
 	 * from a probe failure context.
 	 */
 	if (!ha->rsp_q_map || !ha->rsp_q_map[0])
-		return;
+		goto free_irqs;
 	rsp = ha->rsp_q_map[0];
 
 	if (ha->flags.msix_enabled) {
@@ -3257,6 +3257,7 @@ qla2x00_free_irqs(scsi_qla_host_t *vha)
 		free_irq(pci_irq_vector(ha->pdev, 0), rsp);
 	}
 
+free_irqs:
 	pci_free_irq_vectors(ha->pdev);
 }
 

From 78f824d4312a8944f5340c6b161bba3bf2c81096 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Tue, 21 Jun 2016 14:24:33 +0530
Subject: [PATCH 051/322] ARCv2: smp-boot: wake_flag polling by non-Masters
 needs to be uncached

This is needed on HS38 cores, for setting up IO-Coherency aperture properly

The polling could perturb the caches and coherecy fabric which could be
wrong in the small window when Master is setting up IOC aperture etc
in arc_cache_init()

We do it only for ARCv2 based builds to not affect EZChip ARCompact
based platform.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/smp.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index 44a0d21ed342..2afbafadb6ab 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -90,10 +90,23 @@ void __init smp_cpus_done(unsigned int max_cpus)
  */
 static volatile int wake_flag;
 
+#ifdef CONFIG_ISA_ARCOMPACT
+
+#define __boot_read(f)		f
+#define __boot_write(f, v)	f = v
+
+#else
+
+#define __boot_read(f)		arc_read_uncached_32(&f)
+#define __boot_write(f, v)	arc_write_uncached_32(&f, v)
+
+#endif
+
 static void arc_default_smp_cpu_kick(int cpu, unsigned long pc)
 {
 	BUG_ON(cpu == 0);
-	wake_flag = cpu;
+
+	__boot_write(wake_flag, cpu);
 }
 
 void arc_platform_smp_wait_to_boot(int cpu)
@@ -102,10 +115,10 @@ void arc_platform_smp_wait_to_boot(int cpu)
 	if (IS_ENABLED(CONFIG_ARC_SMP_HALT_ON_RESET))
 		return;
 
-	while (wake_flag != cpu)
+	while (__boot_read(wake_flag) != cpu)
 		;
 
-	wake_flag = 0;
+	__boot_write(wake_flag, 0);
 }
 
 const char *arc_platform_smp_cpuinfo(void)

From b5fa0f7f88edcde37df1807fdf9ff10ec787a60e Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 24 Jan 2017 16:36:57 +1100
Subject: [PATCH 052/322] powerpc: Fix build failure with clang due to
 BUILD_BUG_ON()

Anton says: In commit 4db7327194db ("powerpc: Add option to use jump
label for cpu_has_feature()") and commit c12e6f24d413 ("powerpc: Add
option to use jump label for mmu_has_feature()") we added:

  BUILD_BUG_ON(!__builtin_constant_p(feature))

to cpu_has_feature() and mmu_has_feature() in order to catch usage
issues (such as cpu_has_feature(cpu_has_feature(X), which has happened
once in the past). Unfortunately LLVM isn't smart enough to resolve
this, and it errors out.

I work around it in my clang/LLVM builds of the kernel, but I have just
discovered that it causes a lot of issues for the bcc (eBPF) trace tool
(which uses LLVM).

For now just #ifdef it away for clang builds.

Fixes: 4db7327194db ("powerpc: Add option to use jump label for cpu_has_feature()")
Fixes: c12e6f24d413 ("powerpc: Add option to use jump label for mmu_has_feature()")
Cc: stable@vger.kernel.org # v4.8+
Reported-by: Anton Blanchard <anton@samba.org>
Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/cpu_has_feature.h | 2 ++
 arch/powerpc/include/asm/mmu.h             | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h
index b312b152461b..6e834caa3720 100644
--- a/arch/powerpc/include/asm/cpu_has_feature.h
+++ b/arch/powerpc/include/asm/cpu_has_feature.h
@@ -23,7 +23,9 @@ static __always_inline bool cpu_has_feature(unsigned long feature)
 {
 	int i;
 
+#ifndef __clang__ /* clang can't cope with this */
 	BUILD_BUG_ON(!__builtin_constant_p(feature));
+#endif
 
 #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG
 	if (!static_key_initialized) {
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index a34c764ca8dd..233a7e8cc8e3 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -160,7 +160,9 @@ static __always_inline bool mmu_has_feature(unsigned long feature)
 {
 	int i;
 
+#ifndef __clang__ /* clang can't cope with this */
 	BUILD_BUG_ON(!__builtin_constant_p(feature));
+#endif
 
 #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG
 	if (!static_key_initialized) {

From ae4a3e028bb8b59e7cfeb0cc9ef03d885182ce8b Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Thu, 19 Jan 2017 08:49:07 -0800
Subject: [PATCH 053/322] dmaengine: cppi41: Fix runtime PM timeouts with USB
 mass storage

Commit fdea2d09b997 ("dmaengine: cppi41: Add basic PM runtime support")
added runtime PM support for cppi41, but had corner case issues. Some of
the issues were fixed with commit 098de42ad670 ("dmaengine: cppi41: Fix
unpaired pm runtime when only a USB hub is connected"). That fix however
caused a new regression where we can get error -115 messages with USB on
BeagleBone when connecting a USB mass storage device to a hub.

This is because when connecting a USB mass storage device to a hub, the
initial DMA transfers can take over 200ms to complete and cppi41
autosuspend delay times out.

To fix the issue, we want to implement refcounting for chan_busy array
that contains the active dma transfers. Increasing the autosuspend delay
won't help as that the delay could be potentially seconds, and it's best
to let the USB subsystem to deal with the timeouts on errors.

The earlier attempt for runtime PM was buggy as the pm_runtime_get/put()
calls could get unpaired easily as they did not follow the state of
the chan_busy array as described in commit 098de42ad670 ("dmaengine:
cppi41: Fix unpaired pm runtime when only a USB hub is connected".

Let's fix the issue by adding pm_runtime_get() to where a new transfer
is added to the chan_busy array, and calls to pm_runtime_put() where
chan_busy array entry is cleared. This prevents any autosuspend timeouts
from happening while dma transfers are active.

Fixes: 098de42ad670 ("dmaengine: cppi41: Fix unpaired pm runtime when
only a USB hub is connected")
Fixes: fdea2d09b997 ("dmaengine: cppi41: Add basic PM runtime support")
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Bin Liu <b-liu@ti.com>
Cc: Grygorii Strashko <grygorii.strashko@ti.com>
Cc: Kevin Hilman <khilman@baylibre.com>
Cc: Patrick Titiano <ptitiano@baylibre.com>
Cc: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Tested-by: Bin Liu <b-liu@ti.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/cppi41.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c
index d5ba43a87a68..7de4fdf86a6a 100644
--- a/drivers/dma/cppi41.c
+++ b/drivers/dma/cppi41.c
@@ -257,6 +257,10 @@ static struct cppi41_channel *desc_to_chan(struct cppi41_dd *cdd, u32 desc)
 	BUG_ON(desc_num >= ALLOC_DECS_NUM);
 	c = cdd->chan_busy[desc_num];
 	cdd->chan_busy[desc_num] = NULL;
+
+	/* Usecount for chan_busy[], paired with push_desc_queue() */
+	pm_runtime_put(cdd->ddev.dev);
+
 	return c;
 }
 
@@ -447,6 +451,15 @@ static void push_desc_queue(struct cppi41_channel *c)
 	 */
 	__iowmb();
 
+	/*
+	 * DMA transfers can take at least 200ms to complete with USB mass
+	 * storage connected. To prevent autosuspend timeouts, we must use
+	 * pm_runtime_get/put() when chan_busy[] is modified. This will get
+	 * cleared in desc_to_chan() or cppi41_stop_chan() depending on the
+	 * outcome of the transfer.
+	 */
+	pm_runtime_get(cdd->ddev.dev);
+
 	desc_phys = lower_32_bits(c->desc_phys);
 	desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc);
 	WARN_ON(cdd->chan_busy[desc_num]);
@@ -705,6 +718,9 @@ static int cppi41_stop_chan(struct dma_chan *chan)
 	WARN_ON(!cdd->chan_busy[desc_num]);
 	cdd->chan_busy[desc_num] = NULL;
 
+	/* Usecount for chan_busy[], paired with push_desc_queue() */
+	pm_runtime_put(cdd->ddev.dev);
+
 	return 0;
 }
 

From 362f4562466c3b9490e733e06999025638310d4a Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Thu, 19 Jan 2017 08:49:08 -0800
Subject: [PATCH 054/322] dmaengine: cppi41: Fix oops in cppi41_runtime_resume

Commit fdea2d09b997 ("dmaengine: cppi41: Add basic PM runtime support")
together with recent MUSB changes allowed USB and DMA on BeagleBone to idle
when no cable is connected. But looks like few corner case issues still
remain.

Looks like just by re-plugging USB cable about ten or so times on BeagleBone
when configured in USB peripheral mode we can get warnings and eventually
trigger an oops in cppi41 DMA:

WARNING: CPU: 0 PID: 14 at drivers/dma/cppi41.c:1154 cppi41_runtime_suspend+
x28/0x38 [cppi41]
...

WARNING: CPU: 0 PID: 14 at drivers/dma/cppi41.c:452
push_desc_queue+0x94/0x9c [cppi41]
...

Unable to handle kernel NULL pointer dereference at virtual
address 00000104
pgd = c0004000
[00000104] *pgd=00000000
Internal error: Oops: 805 [#1] SMP ARM
...
[<bf0d92cc>] (cppi41_runtime_resume [cppi41]) from [<c0589838>]
(__rpm_callback+0xc0/0x214)
[<c0589838>] (__rpm_callback) from [<c05899ac>] (rpm_callback+0x20/0x80)
[<c05899ac>] (rpm_callback) from [<c0589460>] (rpm_resume+0x504/0x78c)
[<c0589460>] (rpm_resume) from [<c058a1a0>] (pm_runtime_work+0x60/0xa8)
[<c058a1a0>] (pm_runtime_work) from [<c0156120>] (process_one_work+0x2b4/0x808)

This is because of a race with runtime PM and cppi41_dma_issue_pending()
as reported by Alexandre Bailon <abailon@baylibre.com> in earlier
set of patches. Based on mailing list discussions we however came to the
conclusion that a different fix from Alexandre's fix is needed in order
to guarantee that DMA is really active when we try to use it.

To fix the issue, we need to add a driver specific flag as we otherwise
can have -EINPROGRESS state set by runtime PM and can't rely on
pm_runtime_active() to tell us when we can use the DMA.

And we need to make sure the DMA transfers get triggered in the queued
order. So let's always queue the transfers, then flush the queue
from both cppi41_dma_issue_pending() and cppi41_runtime_resume()
as suggested by Grygorii Strashko <grygorii.strashko@ti.com> in an
earlier example patch.

For reference, this is also documented in Documentation/power/runtime_pm.txt
in the example at the end of the file as pointed out by Grygorii Strashko
<grygorii.strashko@ti.com>.

Based on earlier patches from Alexandre Bailon <abailon@baylibre.com>
and Grygorii Strashko <grygorii.strashko@ti.com> modified based on
testing and what was discussed on the mailing lists.

Fixes: fdea2d09b997 ("dmaengine: cppi41: Add basic PM runtime support")
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Bin Liu <b-liu@ti.com>
Cc: Grygorii Strashko <grygorii.strashko@ti.com>
Cc: Kevin Hilman <khilman@baylibre.com>
Cc: Patrick Titiano <ptitiano@baylibre.com>
Cc: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Reported-by: Alexandre Bailon <abailon@baylibre.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Tested-by: Bin Liu <b-liu@ti.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/cppi41.c | 40 +++++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c
index 7de4fdf86a6a..55c1782e3623 100644
--- a/drivers/dma/cppi41.c
+++ b/drivers/dma/cppi41.c
@@ -153,6 +153,8 @@ struct cppi41_dd {
 
 	/* context for suspend/resume */
 	unsigned int dma_tdfdq;
+
+	bool is_suspended;
 };
 
 #define FIST_COMPLETION_QUEUE	93
@@ -470,20 +472,26 @@ static void push_desc_queue(struct cppi41_channel *c)
 	cppi_writel(reg, cdd->qmgr_mem + QMGR_QUEUE_D(c->q_num));
 }
 
-static void pending_desc(struct cppi41_channel *c)
+/*
+ * Caller must hold cdd->lock to prevent push_desc_queue()
+ * getting called out of order. We have both cppi41_dma_issue_pending()
+ * and cppi41_runtime_resume() call this function.
+ */
+static void cppi41_run_queue(struct cppi41_dd *cdd)
 {
-	struct cppi41_dd *cdd = c->cdd;
-	unsigned long flags;
+	struct cppi41_channel *c, *_c;
 
-	spin_lock_irqsave(&cdd->lock, flags);
-	list_add_tail(&c->node, &cdd->pending);
-	spin_unlock_irqrestore(&cdd->lock, flags);
+	list_for_each_entry_safe(c, _c, &cdd->pending, node) {
+		push_desc_queue(c);
+		list_del(&c->node);
+	}
 }
 
 static void cppi41_dma_issue_pending(struct dma_chan *chan)
 {
 	struct cppi41_channel *c = to_cpp41_chan(chan);
 	struct cppi41_dd *cdd = c->cdd;
+	unsigned long flags;
 	int error;
 
 	error = pm_runtime_get(cdd->ddev.dev);
@@ -495,10 +503,11 @@ static void cppi41_dma_issue_pending(struct dma_chan *chan)
 		return;
 	}
 
-	if (likely(pm_runtime_active(cdd->ddev.dev)))
-		push_desc_queue(c);
-	else
-		pending_desc(c);
+	spin_lock_irqsave(&cdd->lock, flags);
+	list_add_tail(&c->node, &cdd->pending);
+	if (!cdd->is_suspended)
+		cppi41_run_queue(cdd);
+	spin_unlock_irqrestore(&cdd->lock, flags);
 
 	pm_runtime_mark_last_busy(cdd->ddev.dev);
 	pm_runtime_put_autosuspend(cdd->ddev.dev);
@@ -1166,8 +1175,12 @@ static int __maybe_unused cppi41_resume(struct device *dev)
 static int __maybe_unused cppi41_runtime_suspend(struct device *dev)
 {
 	struct cppi41_dd *cdd = dev_get_drvdata(dev);
+	unsigned long flags;
 
+	spin_lock_irqsave(&cdd->lock, flags);
+	cdd->is_suspended = true;
 	WARN_ON(!list_empty(&cdd->pending));
+	spin_unlock_irqrestore(&cdd->lock, flags);
 
 	return 0;
 }
@@ -1175,14 +1188,11 @@ static int __maybe_unused cppi41_runtime_suspend(struct device *dev)
 static int __maybe_unused cppi41_runtime_resume(struct device *dev)
 {
 	struct cppi41_dd *cdd = dev_get_drvdata(dev);
-	struct cppi41_channel *c, *_c;
 	unsigned long flags;
 
 	spin_lock_irqsave(&cdd->lock, flags);
-	list_for_each_entry_safe(c, _c, &cdd->pending, node) {
-		push_desc_queue(c);
-		list_del(&c->node);
-	}
+	cdd->is_suspended = false;
+	cppi41_run_queue(cdd);
 	spin_unlock_irqrestore(&cdd->lock, flags);
 
 	return 0;

From 6610d0edf6dc7ee97e46ab3a538a565c79d26199 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Fri, 20 Jan 2017 12:07:53 -0800
Subject: [PATCH 055/322] dmaengine: cppi41: Clean up pointless warnings

With patches "dmaengine: cppi41: Fix runtime PM timeouts with USB mass
storage", and "dmaengine: cppi41: Fix oops in cppi41_runtime_resume",
the pm_runtime_get/put() in cppi41_irq() is no longer needed. We now
guarantee that cppi41 is enabled when dma is in use.

We can still get pointless error -115 when musb is configured as a
usb peripheral. That's because we should now check for the state of
is_suspended instead.

Let's just remove the now useless code and replace it with a WARN().

Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/cppi41.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c
index 55c1782e3623..200828c60db9 100644
--- a/drivers/dma/cppi41.c
+++ b/drivers/dma/cppi41.c
@@ -323,12 +323,12 @@ static irqreturn_t cppi41_irq(int irq, void *data)
 
 		while (val) {
 			u32 desc, len;
-			int error;
 
-			error = pm_runtime_get(cdd->ddev.dev);
-			if (error < 0)
-				dev_err(cdd->ddev.dev, "%s pm runtime get: %i\n",
-					__func__, error);
+			/*
+			 * This should never trigger, see the comments in
+			 * push_desc_queue()
+			 */
+			WARN_ON(cdd->is_suspended);
 
 			q_num = __fls(val);
 			val &= ~(1 << q_num);
@@ -349,9 +349,6 @@ static irqreturn_t cppi41_irq(int irq, void *data)
 			c->residue = pd_trans_len(c->desc->pd6) - len;
 			dma_cookie_complete(&c->txd);
 			dmaengine_desc_get_callback_invoke(&c->txd, NULL);
-
-			pm_runtime_mark_last_busy(cdd->ddev.dev);
-			pm_runtime_put_autosuspend(cdd->ddev.dev);
 		}
 	}
 	return IRQ_HANDLED;

From 407788b51db6f6aab499d02420082f436abf3238 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 24 Jan 2017 09:18:57 -0600
Subject: [PATCH 056/322] usb: musb: Fix host mode error -71 regression

Commit 467d5c980709 ("usb: musb: Implement session bit based runtime PM for
musb-core") started implementing musb generic runtime PM support by
introducing devctl register session bit based state control.

This caused a regression where if a USB mass storage device is connected
to a USB hub, we can get:

usb 1-1: reset high-speed USB device number 2 using musb-hdrc
usb 1-1: device descriptor read/64, error -71
usb 1-1.1: new high-speed USB device number 4 using musb-hdrc

This is because before the USB storage device is connected, musb is
in OTG_STATE_A_SUSPEND. And we currently only set need_finish_resume
in musb_stage0_irq() and the related code calling finish_resume_work
in musb_resume() and musb_runtime_resume() never gets called.

To fix the issue, we can call schedule_delayed_work() directly in
musb_stage0_irq() to have finish_resume_work run.

And we should no longer never get interrupts when when suspended.
We have changed musb to no longer need pm_runtime_irqsafe().
The need_finish_resume flag was added in commit 9298b4aad37e ("usb:
musb: fix device hotplug behind hub") and no longer applies as far
as I can tell. So let's just remove the earlier code that no longer
is needed.

Fixes: 467d5c980709 ("usb: musb: Implement session bit based runtime PM for musb-core")
Reported-by: Bin Liu <b-liu@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Bin Liu <b-liu@ti.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/musb_core.c | 15 ++-------------
 drivers/usb/musb/musb_core.h |  1 -
 2 files changed, 2 insertions(+), 14 deletions(-)

diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index fca288bbc800..cd40467be3c5 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -594,11 +594,11 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 						| MUSB_PORT_STAT_RESUME;
 				musb->rh_timer = jiffies
 					+ msecs_to_jiffies(USB_RESUME_TIMEOUT);
-				musb->need_finish_resume = 1;
-
 				musb->xceiv->otg->state = OTG_STATE_A_HOST;
 				musb->is_active = 1;
 				musb_host_resume_root_hub(musb);
+				schedule_delayed_work(&musb->finish_resume_work,
+					msecs_to_jiffies(USB_RESUME_TIMEOUT));
 				break;
 			case OTG_STATE_B_WAIT_ACON:
 				musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
@@ -2710,11 +2710,6 @@ static int musb_resume(struct device *dev)
 	mask = MUSB_DEVCTL_BDEVICE | MUSB_DEVCTL_FSDEV | MUSB_DEVCTL_LSDEV;
 	if ((devctl & mask) != (musb->context.devctl & mask))
 		musb->port1_status = 0;
-	if (musb->need_finish_resume) {
-		musb->need_finish_resume = 0;
-		schedule_delayed_work(&musb->finish_resume_work,
-				      msecs_to_jiffies(USB_RESUME_TIMEOUT));
-	}
 
 	/*
 	 * The USB HUB code expects the device to be in RPM_ACTIVE once it came
@@ -2766,12 +2761,6 @@ static int musb_runtime_resume(struct device *dev)
 
 	musb_restore_context(musb);
 
-	if (musb->need_finish_resume) {
-		musb->need_finish_resume = 0;
-		schedule_delayed_work(&musb->finish_resume_work,
-				msecs_to_jiffies(USB_RESUME_TIMEOUT));
-	}
-
 	spin_lock_irqsave(&musb->lock, flags);
 	error = musb_run_resume_work(musb);
 	if (error)
diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h
index ade902ea1221..ce5a18c98c6d 100644
--- a/drivers/usb/musb/musb_core.h
+++ b/drivers/usb/musb/musb_core.h
@@ -410,7 +410,6 @@ struct musb {
 
 	/* is_suspended means USB B_PERIPHERAL suspend */
 	unsigned		is_suspended:1;
-	unsigned		need_finish_resume :1;
 
 	/* may_wakeup means remote wakeup is enabled */
 	unsigned		may_wakeup:1;

From 3ba7b7795b7e8889af1377904c55c7fae9e0c775 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 24 Jan 2017 09:18:58 -0600
Subject: [PATCH 057/322] usb: musb: Fix external abort on non-linefetch for
 musb_irq_work()

While testing musb host mode cable plugging on a BeagleBone, I came across this
error:

Unhandled fault: external abort on non-linefetch (0x1008) at 0xd1dcfc60
...
[<bf668390>] (musb_default_readb [musb_hdrc]) from [<bf668578>] (musb_irq_work+0x1c/0x180 [musb_hdrc])
[<bf668578>] (musb_irq_work [musb_hdrc]) from [<c0156554>] (process_one_work+0x2b4/0x808)
[<c0156554>] (process_one_work) from [<c015767c>] (worker_thread+0x3c/0x550)
[<c015767c>] (worker_thread) from [<c015d568>] (kthread+0x104/0x148)
[<c015d568>] (kthread) from [<c01078d0>] (ret_from_fork+0x14/0x24)

Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Bin Liu <b-liu@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/musb_core.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index cd40467be3c5..772f15821242 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -1925,6 +1925,14 @@ static void musb_pm_runtime_check_session(struct musb *musb)
 static void musb_irq_work(struct work_struct *data)
 {
 	struct musb *musb = container_of(data, struct musb, irq_work.work);
+	int error;
+
+	error = pm_runtime_get_sync(musb->controller);
+	if (error < 0) {
+		dev_err(musb->controller, "Could not enable: %i\n", error);
+
+		return;
+	}
 
 	musb_pm_runtime_check_session(musb);
 
@@ -1932,6 +1940,9 @@ static void musb_irq_work(struct work_struct *data)
 		musb->xceiv_old_state = musb->xceiv->otg->state;
 		sysfs_notify(&musb->controller->kobj, NULL, "mode");
 	}
+
+	pm_runtime_mark_last_busy(musb->controller);
+	pm_runtime_put_autosuspend(musb->controller);
 }
 
 static void musb_recover_from_babble(struct musb *musb)

From d9b2997e4a0a874e452df7cdd7de5a54502bd0aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Lalinsk=C3=BD?= <lukas@oxygene.sk>
Date: Fri, 20 Jan 2017 19:46:34 +0100
Subject: [PATCH 058/322] USB: Add quirk for WORLDE easykey.25 MIDI keyboard
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a quirk for WORLDE easykey.25 MIDI keyboard (idVendor=0218,
idProduct=0401). The device reports that it has config string
descriptor at index 3, but when the system selects the configuration
and tries to get the description, it returns a -EPROTO error,
the communication restarts and this keeps repeating over and over again.
Not requesting the string descriptor makes the device work correctly.

Relevant info from Wireshark:

[...]

CONFIGURATION DESCRIPTOR
    bLength: 9
    bDescriptorType: 0x02 (CONFIGURATION)
    wTotalLength: 101
    bNumInterfaces: 2
    bConfigurationValue: 1
    iConfiguration: 3
    Configuration bmAttributes: 0xc0  SELF-POWERED  NO REMOTE-WAKEUP
        1... .... = Must be 1: Must be 1 for USB 1.1 and higher
        .1.. .... = Self-Powered: This device is SELF-POWERED
        ..0. .... = Remote Wakeup: This device does NOT support remote wakeup
    bMaxPower: 50  (100mA)

[...]

     45 0.369104       host                  2.38.0                USB      64     GET DESCRIPTOR Request STRING

[...]

URB setup
    bmRequestType: 0x80
        1... .... = Direction: Device-to-host
        .00. .... = Type: Standard (0x00)
        ...0 0000 = Recipient: Device (0x00)
    bRequest: GET DESCRIPTOR (6)
    Descriptor Index: 0x03
    bDescriptorType: 0x03
    Language Id: English (United States) (0x0409)
    wLength: 255

     46 0.369255       2.38.0                host                  USB      64     GET DESCRIPTOR Response STRING[Malformed Packet]

[...]

Frame 46: 64 bytes on wire (512 bits), 64 bytes captured (512 bits) on interface 0
USB URB
    [Source: 2.38.0]
    [Destination: host]
    URB id: 0xffff88021f62d480
    URB type: URB_COMPLETE ('C')
    URB transfer type: URB_CONTROL (0x02)
    Endpoint: 0x80, Direction: IN
    Device: 38
    URB bus id: 2
    Device setup request: not relevant ('-')
    Data: present (0)
    URB sec: 1484896277
    URB usec: 455031
    URB status: Protocol error (-EPROTO) (-71)
    URB length [bytes]: 0
    Data length [bytes]: 0
    [Request in: 45]
    [Time from request: 0.000151000 seconds]
    Unused Setup Header
    Interval: 0
    Start frame: 0
    Copy of Transfer Flags: 0x00000200
    Number of ISO descriptors: 0
[Malformed Packet: USB]
    [Expert Info (Error/Malformed): Malformed Packet (Exception occurred)]
        [Malformed Packet (Exception occurred)]
        [Severity level: Error]
        [Group: Malformed]

Signed-off-by: Lukáš Lalinský <lukas@oxygene.sk>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/quirks.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index d2e50a27140c..24f9f98968a5 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -37,6 +37,10 @@ static const struct usb_device_id usb_quirk_list[] = {
 	/* CBM - Flash disk */
 	{ USB_DEVICE(0x0204, 0x6025), .driver_info = USB_QUIRK_RESET_RESUME },
 
+	/* WORLDE easy key (easykey.25) MIDI controller  */
+	{ USB_DEVICE(0x0218, 0x0401), .driver_info =
+			USB_QUIRK_CONFIG_INTF_STRINGS },
+
 	/* HP 5300/5370C scanner */
 	{ USB_DEVICE(0x03f0, 0x0701), .driver_info =
 			USB_QUIRK_STRING_FETCH_255 },

From 91539eb1fda2d530d3b268eef542c5414e54bf1a Mon Sep 17 00:00:00 2001
From: Iago Abal <mail@iagoabal.eu>
Date: Wed, 11 Jan 2017 14:00:21 +0100
Subject: [PATCH 059/322] dmaengine: pl330: fix double lock

The static bug finder EBA (http://www.iagoabal.eu/eba/) reported the
following double-lock bug:

    Double lock:
    1. spin_lock_irqsave(pch->lock, flags) at pl330_free_chan_resources:2236;
    2. call to function `pl330_release_channel' immediately after;
    3. call to function `dma_pl330_rqcb' in line 1753;
    4. spin_lock_irqsave(pch->lock, flags) at dma_pl330_rqcb:1505.

I have fixed it as suggested by Marek Szyprowski.

First, I have replaced `pch->lock' with `pl330->lock' in functions
`pl330_alloc_chan_resources' and `pl330_free_chan_resources'. This avoids
the double-lock by acquiring a different lock than `dma_pl330_rqcb'.

NOTE that, as a result, `pl330_free_chan_resources' executes
`list_splice_tail_init' on `pch->work_list' under lock `pl330->lock',
whereas in the rest of the code `pch->work_list' is protected by
`pch->lock'. I don't know if this may cause race conditions. Similarly
`pch->cyclic' is written by `pl330_alloc_chan_resources' under
`pl330->lock' but read by `pl330_tx_submit' under `pch->lock'.

Second, I have removed locking from `pl330_request_channel' and
`pl330_release_channel' functions. Function `pl330_request_channel' is
only called from `pl330_alloc_chan_resources', so the lock is already
held. Function `pl330_release_channel' is called from
`pl330_free_chan_resources', which already holds the lock, and from
`pl330_del'. Function `pl330_del' is called in an error path of
`pl330_probe' and at the end of `pl330_remove', but I assume that there
cannot be concurrent accesses to the protected data at those points.

Signed-off-by: Iago Abal <mail@iagoabal.eu>
Reviewed-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/pl330.c | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 740bbb942594..7539f73df9e0 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -1699,7 +1699,6 @@ static bool _chan_ns(const struct pl330_dmac *pl330, int i)
 static struct pl330_thread *pl330_request_channel(struct pl330_dmac *pl330)
 {
 	struct pl330_thread *thrd = NULL;
-	unsigned long flags;
 	int chans, i;
 
 	if (pl330->state == DYING)
@@ -1707,8 +1706,6 @@ static struct pl330_thread *pl330_request_channel(struct pl330_dmac *pl330)
 
 	chans = pl330->pcfg.num_chan;
 
-	spin_lock_irqsave(&pl330->lock, flags);
-
 	for (i = 0; i < chans; i++) {
 		thrd = &pl330->channels[i];
 		if ((thrd->free) && (!_manager_ns(thrd) ||
@@ -1726,8 +1723,6 @@ static struct pl330_thread *pl330_request_channel(struct pl330_dmac *pl330)
 		thrd = NULL;
 	}
 
-	spin_unlock_irqrestore(&pl330->lock, flags);
-
 	return thrd;
 }
 
@@ -1745,7 +1740,6 @@ static inline void _free_event(struct pl330_thread *thrd, int ev)
 static void pl330_release_channel(struct pl330_thread *thrd)
 {
 	struct pl330_dmac *pl330;
-	unsigned long flags;
 
 	if (!thrd || thrd->free)
 		return;
@@ -1757,10 +1751,8 @@ static void pl330_release_channel(struct pl330_thread *thrd)
 
 	pl330 = thrd->dmac;
 
-	spin_lock_irqsave(&pl330->lock, flags);
 	_free_event(thrd, thrd->ev);
 	thrd->free = true;
-	spin_unlock_irqrestore(&pl330->lock, flags);
 }
 
 /* Initialize the structure for PL330 configuration, that can be used
@@ -2122,20 +2114,20 @@ static int pl330_alloc_chan_resources(struct dma_chan *chan)
 	struct pl330_dmac *pl330 = pch->dmac;
 	unsigned long flags;
 
-	spin_lock_irqsave(&pch->lock, flags);
+	spin_lock_irqsave(&pl330->lock, flags);
 
 	dma_cookie_init(chan);
 	pch->cyclic = false;
 
 	pch->thread = pl330_request_channel(pl330);
 	if (!pch->thread) {
-		spin_unlock_irqrestore(&pch->lock, flags);
+		spin_unlock_irqrestore(&pl330->lock, flags);
 		return -ENOMEM;
 	}
 
 	tasklet_init(&pch->task, pl330_tasklet, (unsigned long) pch);
 
-	spin_unlock_irqrestore(&pch->lock, flags);
+	spin_unlock_irqrestore(&pl330->lock, flags);
 
 	return 1;
 }
@@ -2238,12 +2230,13 @@ static int pl330_pause(struct dma_chan *chan)
 static void pl330_free_chan_resources(struct dma_chan *chan)
 {
 	struct dma_pl330_chan *pch = to_pchan(chan);
+	struct pl330_dmac *pl330 = pch->dmac;
 	unsigned long flags;
 
 	tasklet_kill(&pch->task);
 
 	pm_runtime_get_sync(pch->dmac->ddma.dev);
-	spin_lock_irqsave(&pch->lock, flags);
+	spin_lock_irqsave(&pl330->lock, flags);
 
 	pl330_release_channel(pch->thread);
 	pch->thread = NULL;
@@ -2251,7 +2244,7 @@ static void pl330_free_chan_resources(struct dma_chan *chan)
 	if (pch->cyclic)
 		list_splice_tail_init(&pch->work_list, &pch->dmac->desc_pool);
 
-	spin_unlock_irqrestore(&pch->lock, flags);
+	spin_unlock_irqrestore(&pl330->lock, flags);
 	pm_runtime_mark_last_busy(pch->dmac->ddma.dev);
 	pm_runtime_put_autosuspend(pch->dmac->ddma.dev);
 }

From b17c1bba9cec1727451b906d9a0c209774624873 Mon Sep 17 00:00:00 2001
From: Rui Miguel Silva <rmfrfs@gmail.com>
Date: Mon, 23 Jan 2017 16:32:57 +0000
Subject: [PATCH 060/322] staging: greybus: timesync: validate platform state
 callback

When tearingdown timesync, and not in arche platform, the state platform
callback is not initialized. That will trigger the following NULL
dereferencing.
CallTrace:

 ? gb_timesync_platform_unlock_bus+0x11/0x20 [greybus]
 gb_timesync_teardown+0x85/0xc0 [greybus]
 gb_timesync_svc_remove+0xab/0x190 [greybus]
 gb_svc_del+0x29/0x110 [greybus]
 gb_hd_del+0x14/0x20 [greybus]
 ap_disconnect+0x24/0x60 [gb_es2]
 usb_unbind_interface+0x7a/0x2c0
 __device_release_driver+0x96/0x150
 device_release_driver+0x1e/0x30
 bus_remove_device+0xe7/0x130
 device_del+0x116/0x230
 usb_disable_device+0x97/0x1f0
 usb_disconnect+0x80/0x260
 hub_event+0x5ca/0x10e0
 process_one_work+0x126/0x3b0
 worker_thread+0x55/0x4c0
 ? process_one_work+0x3b0/0x3b0
 kthread+0xc4/0xe0
 ? kthread_park+0xb0/0xb0
 ret_from_fork+0x22/0x30

So, fix that by adding checks before use the callback.

Fixes: 970dc85bd95d ("greybus: timesync: Add timesync core driver")
Cc: <stable@vger.kernel.org> # 4.9.x
Signed-off-by: Rui Miguel Silva <rmfrfs@gmail.com>
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/greybus/timesync_platform.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/staging/greybus/timesync_platform.c b/drivers/staging/greybus/timesync_platform.c
index 113f3d6c4b3a..27f75b17679b 100644
--- a/drivers/staging/greybus/timesync_platform.c
+++ b/drivers/staging/greybus/timesync_platform.c
@@ -45,12 +45,18 @@ u32 gb_timesync_platform_get_clock_rate(void)
 
 int gb_timesync_platform_lock_bus(struct gb_timesync_svc *pdata)
 {
+	if (!arche_platform_change_state_cb)
+		return 0;
+
 	return arche_platform_change_state_cb(ARCHE_PLATFORM_STATE_TIME_SYNC,
 					      pdata);
 }
 
 void gb_timesync_platform_unlock_bus(void)
 {
+	if (!arche_platform_change_state_cb)
+		return;
+
 	arche_platform_change_state_cb(ARCHE_PLATFORM_STATE_ACTIVE, NULL);
 }
 

From 83e526f2a2fa4b2e82b6bd3ddbb26b70acfa8947 Mon Sep 17 00:00:00 2001
From: Vincent Pelletier <plr.vincent@gmail.com>
Date: Wed, 18 Jan 2017 00:57:44 +0000
Subject: [PATCH 061/322] usb: gadget: f_fs: Assorted buffer overflow checks.

OS descriptor head, when flagged as provided, is accessed without
checking if it fits in provided buffer. Verify length before access.
Also, there are other places where buffer length it checked
after accessing offsets which are potentially past the end. Check
buffer length before as well to fail cleanly.

Signed-off-by: Vincent Pelletier <plr.vincent@gmail.com>
Acked-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_fs.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 5490fc51638e..fd80c1b9c823 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -2269,6 +2269,8 @@ static int __ffs_data_do_os_desc(enum ffs_os_desc_type type,
 		if (len < sizeof(*d) || h->interface >= ffs->interfaces_count)
 			return -EINVAL;
 		length = le32_to_cpu(d->dwSize);
+		if (len < length)
+			return -EINVAL;
 		type = le32_to_cpu(d->dwPropertyDataType);
 		if (type < USB_EXT_PROP_UNICODE ||
 		    type > USB_EXT_PROP_UNICODE_MULTI) {
@@ -2277,6 +2279,11 @@ static int __ffs_data_do_os_desc(enum ffs_os_desc_type type,
 			return -EINVAL;
 		}
 		pnl = le16_to_cpu(d->wPropertyNameLength);
+		if (length < 14 + pnl) {
+			pr_vdebug("invalid os descriptor length: %d pnl:%d (descriptor %d)\n",
+				  length, pnl, type);
+			return -EINVAL;
+		}
 		pdl = le32_to_cpu(*(u32 *)((u8 *)data + 10 + pnl));
 		if (length != 14 + pnl + pdl) {
 			pr_vdebug("invalid os descriptor length: %d pnl:%d pdl:%d (descriptor %d)\n",
@@ -2363,6 +2370,9 @@ static int __ffs_data_got_descs(struct ffs_data *ffs,
 		}
 	}
 	if (flags & (1 << i)) {
+		if (len < 4) {
+			goto error;
+		}
 		os_descs_count = get_unaligned_le32(data);
 		data += 4;
 		len -= 4;
@@ -2435,7 +2445,8 @@ static int __ffs_data_got_strings(struct ffs_data *ffs,
 
 	ENTER();
 
-	if (unlikely(get_unaligned_le32(data) != FUNCTIONFS_STRINGS_MAGIC ||
+	if (unlikely(len < 16 ||
+		     get_unaligned_le32(data) != FUNCTIONFS_STRINGS_MAGIC ||
 		     get_unaligned_le32(data + 4) != len))
 		goto error;
 	str_count  = get_unaligned_le32(data + 8);

From 1372cef1c697d8aac0cc923f8aa2c37d790ec9ed Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 25 Jan 2017 19:30:09 +0000
Subject: [PATCH 062/322] regulator: fixed: Revert support for ACPI interface

This reverts commit 13bed58ce874 (regulator: fixed: add support for ACPI
interface).

While there does appear to be a practical need to manage regulators on ACPI
systems, using ad-hoc properties to describe regulators to the kernel presents
a number of problems (especially should ACPI gain first class support for such
things), and there are ongoing discussions as to how to manage this.

Until there is a rough consensus, revert commit 13bed58ce8748d43, which hasn't
been in a released kernel yet as discussed in [1] and the surrounding thread.

[1] http://lkml.kernel.org/r/20170125184949.x2wkoo7kbaaajkjk@sirena.org.uk

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Liam Girdwood <lgirdwood@gmail.com>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Lu Baolu <baolu.lu@linux.intel.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/fixed.c | 46 ---------------------------------------
 1 file changed, 46 deletions(-)

diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index a43b0e8a438d..988a7472c2ab 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -30,9 +30,6 @@
 #include <linux/of_gpio.h>
 #include <linux/regulator/of_regulator.h>
 #include <linux/regulator/machine.h>
-#include <linux/acpi.h>
-#include <linux/property.h>
-#include <linux/gpio/consumer.h>
 
 struct fixed_voltage_data {
 	struct regulator_desc desc;
@@ -97,44 +94,6 @@ of_get_fixed_voltage_config(struct device *dev,
 	return config;
 }
 
-/**
- * acpi_get_fixed_voltage_config - extract fixed_voltage_config structure info
- * @dev: device requesting for fixed_voltage_config
- * @desc: regulator description
- *
- * Populates fixed_voltage_config structure by extracting data through ACPI
- * interface, returns a pointer to the populated structure of NULL if memory
- * alloc fails.
- */
-static struct fixed_voltage_config *
-acpi_get_fixed_voltage_config(struct device *dev,
-			      const struct regulator_desc *desc)
-{
-	struct fixed_voltage_config *config;
-	const char *supply_name;
-	struct gpio_desc *gpiod;
-	int ret;
-
-	config = devm_kzalloc(dev, sizeof(*config), GFP_KERNEL);
-	if (!config)
-		return ERR_PTR(-ENOMEM);
-
-	ret = device_property_read_string(dev, "supply-name", &supply_name);
-	if (!ret)
-		config->supply_name = supply_name;
-
-	gpiod = gpiod_get(dev, "gpio", GPIOD_ASIS);
-	if (IS_ERR(gpiod))
-		return ERR_PTR(-ENODEV);
-
-	config->gpio = desc_to_gpio(gpiod);
-	config->enable_high = device_property_read_bool(dev,
-							"enable-active-high");
-	gpiod_put(gpiod);
-
-	return config;
-}
-
 static struct regulator_ops fixed_voltage_ops = {
 };
 
@@ -155,11 +114,6 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev)
 						     &drvdata->desc);
 		if (IS_ERR(config))
 			return PTR_ERR(config);
-	} else if (ACPI_HANDLE(&pdev->dev)) {
-		config = acpi_get_fixed_voltage_config(&pdev->dev,
-						       &drvdata->desc);
-		if (IS_ERR(config))
-			return PTR_ERR(config);
 	} else {
 		config = dev_get_platdata(&pdev->dev);
 	}

From 0e1929dedea36781e25902118c93edd8d8f09af1 Mon Sep 17 00:00:00 2001
From: Mike Looijmans <mike.looijmans@topic.nl>
Date: Mon, 16 Jan 2017 15:49:38 +0100
Subject: [PATCH 063/322] i2c: i2c-cadence: Initialize configuration before
 probing devices

The cadence I2C driver calls cdns_i2c_writereg(..) to setup a workaround
in the controller, but did so after calling i2c_add_adapter() which starts
probing devices on the bus. Change the order so that the configuration is
completely finished before using the adapter.

Signed-off-by: Mike Looijmans <mike.looijmans@topic.nl>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-cadence.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c
index 686971263bef..45d6771fac8c 100644
--- a/drivers/i2c/busses/i2c-cadence.c
+++ b/drivers/i2c/busses/i2c-cadence.c
@@ -962,10 +962,6 @@ static int cdns_i2c_probe(struct platform_device *pdev)
 		goto err_clk_dis;
 	}
 
-	ret = i2c_add_adapter(&id->adap);
-	if (ret < 0)
-		goto err_clk_dis;
-
 	/*
 	 * Cadence I2C controller has a bug wherein it generates
 	 * invalid read transaction after HW timeout in master receiver mode.
@@ -975,6 +971,10 @@ static int cdns_i2c_probe(struct platform_device *pdev)
 	 */
 	cdns_i2c_writereg(CDNS_I2C_TIMEOUT_MAX, CDNS_I2C_TIME_OUT_OFFSET);
 
+	ret = i2c_add_adapter(&id->adap);
+	if (ret < 0)
+		goto err_clk_dis;
+
 	dev_info(&pdev->dev, "%u kHz mmio %08lx irq %d\n",
 		 id->i2c_clk / 1000, (unsigned long)r_mem->start, id->irq);
 

From e13fe92bb58cf9b8f709ec18267ffc9e6ffeb016 Mon Sep 17 00:00:00 2001
From: Gao Pan <pandy.gao@nxp.com>
Date: Tue, 17 Jan 2017 18:20:55 +0800
Subject: [PATCH 064/322] i2c: imx-lpi2c: add VLLS mode support

When system enters VLLS mode, module power is turned off. As a result,
all registers are reset to HW default value. After exiting VLLS mode,
registers are still in default mode. As a result, the pinctrl settings
are incorrect, which will affect the module function.

The patch recovers the pinctrl setting when exit VLLS mode.

Signed-off-by: Gao Pan <pandy.gao@nxp.com>
Reviewed-by: Vladimir Zapolskiy <vz@mleia.com>
[wsa: added missing include]
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-imx-lpi2c.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c
index c62b7cd475f8..3310f2e0dbd3 100644
--- a/drivers/i2c/busses/i2c-imx-lpi2c.c
+++ b/drivers/i2c/busses/i2c-imx-lpi2c.c
@@ -28,6 +28,7 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
+#include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
@@ -636,12 +637,31 @@ static int lpi2c_imx_remove(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int lpi2c_imx_suspend(struct device *dev)
+{
+	pinctrl_pm_select_sleep_state(dev);
+
+	return 0;
+}
+
+static int lpi2c_imx_resume(struct device *dev)
+{
+	pinctrl_pm_select_default_state(dev);
+
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(imx_lpi2c_pm, lpi2c_imx_suspend, lpi2c_imx_resume);
+
 static struct platform_driver lpi2c_imx_driver = {
 	.probe = lpi2c_imx_probe,
 	.remove = lpi2c_imx_remove,
 	.driver = {
 		.name = DRIVER_NAME,
 		.of_match_table = lpi2c_imx_of_match,
+		.pm = &imx_lpi2c_pm,
 	},
 };
 

From 406dab8450ec76eca88a1af2fc15d18a2b36ca49 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 26 Jan 2017 15:14:52 -0500
Subject: [PATCH 065/322] nfs: Fix "Don't increment lock sequence ID after
 NFS4ERR_MOVED"

Lock sequence IDs are bumped in decode_lock by calling
nfs_increment_seqid(). nfs_increment_sequid() does not use the
seqid_mutating_err() function fixed in commit 059aa7348241 ("Don't
increment lock sequence ID after NFS4ERR_MOVED").

Fixes: 059aa7348241 ("Don't increment lock sequence ID after ...")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: Xuan Qi <xuan.qi@oracle.com>
Cc: stable@vger.kernel.org # v3.7+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4state.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 90e6193ce6be..daeb94e3acd4 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1091,6 +1091,7 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
 		case -NFS4ERR_BADXDR:
 		case -NFS4ERR_RESOURCE:
 		case -NFS4ERR_NOFILEHANDLE:
+		case -NFS4ERR_MOVED:
 			/* Non-seqid mutating errors */
 			return;
 	};

From ee6625a948d2e47267ec8fd97307fdd67d0f8a5b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Thu, 26 Jan 2017 15:50:41 -0500
Subject: [PATCH 066/322] pNFS: Fix a reference leak in _pnfs_return_layout

IF NFS_LAYOUT_RETURN_REQUESTED is not set, then we currently exit
without freeing the list of invalidated layout segments, leading
to a reference leak.

Reported-by: Olga Kornievskaia <aglo@umich.edu>
Fixes: 24408f5282 ("pNFS: Fix bugs in _pnfs_return_layout")
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pnfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 59554f3adf29..dd042498ce7c 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1200,10 +1200,10 @@ _pnfs_return_layout(struct inode *ino)
 
 	send = pnfs_prepare_layoutreturn(lo, &stateid, NULL);
 	spin_unlock(&ino->i_lock);
-	pnfs_free_lseg_list(&tmp_list);
 	if (send)
 		status = pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true);
 out_put_layout_hdr:
+	pnfs_free_lseg_list(&tmp_list);
 	pnfs_put_layout_hdr(lo);
 out:
 	dprintk("<-- %s status: %d\n", __func__, status);

From 282e4637bc1c0b338708bcebd09d31c69abec070 Mon Sep 17 00:00:00 2001
From: Jason Gerecke <killertofu@gmail.com>
Date: Thu, 26 Jan 2017 09:06:22 -0800
Subject: [PATCH 067/322] HID: wacom: Fix poor prox handling in 'wacom_pl_irq'

Commit 025bcc1 performed cleanup work on the 'wacom_pl_irq' function, making
it follow the standards used in the rest of the codebase. The change
unintiontionally allowed the function to send input events from reports
that are not marked as being in prox. This can cause problems as the
report values for X, Y, etc. are not guaranteed to be correct. In
particular, occasionally the tablet will send a report with these values
set to zero. If such a report is received it can caus an unexpected jump
in the XY position.

This patch surrounds more of the processing code with a proximity check,
preventing these zeroed reports from overwriting the current state. To
be safe, only the tool type and ABS_MISC events should be reported when
the pen is marked as being out of prox.

Fixes: 025bcc1540 ("HID: wacom: Simplify 'wacom_pl_irq'")
Signed-off-by: Jason Gerecke <jason.gerecke@wacom.com>
Reviewed-by: Ping Cheng <pingc@wacom.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/wacom_wac.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 0884dc9554fd..672145b0d8f5 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -166,19 +166,21 @@ static int wacom_pl_irq(struct wacom_wac *wacom)
 		wacom->id[0] = STYLUS_DEVICE_ID;
 	}
 
-	pressure = (signed char)((data[7] << 1) | ((data[4] >> 2) & 1));
-	if (features->pressure_max > 255)
-		pressure = (pressure << 1) | ((data[4] >> 6) & 1);
-	pressure += (features->pressure_max + 1) / 2;
+	if (prox) {
+		pressure = (signed char)((data[7] << 1) | ((data[4] >> 2) & 1));
+		if (features->pressure_max > 255)
+			pressure = (pressure << 1) | ((data[4] >> 6) & 1);
+		pressure += (features->pressure_max + 1) / 2;
 
-	input_report_abs(input, ABS_X, data[3] | (data[2] << 7) | ((data[1] & 0x03) << 14));
-	input_report_abs(input, ABS_Y, data[6] | (data[5] << 7) | ((data[4] & 0x03) << 14));
-	input_report_abs(input, ABS_PRESSURE, pressure);
+		input_report_abs(input, ABS_X, data[3] | (data[2] << 7) | ((data[1] & 0x03) << 14));
+		input_report_abs(input, ABS_Y, data[6] | (data[5] << 7) | ((data[4] & 0x03) << 14));
+		input_report_abs(input, ABS_PRESSURE, pressure);
 
-	input_report_key(input, BTN_TOUCH, data[4] & 0x08);
-	input_report_key(input, BTN_STYLUS, data[4] & 0x10);
-	/* Only allow the stylus2 button to be reported for the pen tool. */
-	input_report_key(input, BTN_STYLUS2, (wacom->tool[0] == BTN_TOOL_PEN) && (data[4] & 0x20));
+		input_report_key(input, BTN_TOUCH, data[4] & 0x08);
+		input_report_key(input, BTN_STYLUS, data[4] & 0x10);
+		/* Only allow the stylus2 button to be reported for the pen tool. */
+		input_report_key(input, BTN_STYLUS2, (wacom->tool[0] == BTN_TOOL_PEN) && (data[4] & 0x20));
+	}
 
 	if (!prox)
 		wacom->id[0] = 0;

From ed9ab4287f96e66340e0390e2c583f2f9110cba0 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 26 Jan 2017 17:34:40 +0000
Subject: [PATCH 068/322] HID: usbhid: Quirk a AMI virtual mouse and keyboard
 with ALWAYS_POLL

Quirking the following AMI USB device with ALWAYS_POLL fixes an AMI
virtual keyboard and mouse from not responding and timing out when
it is attached to a ppc64el Power 8 system and when we have some
rapid open/closes on the mouse device.

 usb 1-3: new high-speed USB device number 2 using xhci_hcd
 usb 1-3: New USB device found, idVendor=046b, idProduct=ff01
 usb 1-3: New USB device strings: Mfr=1, Product=2, SerialNumber=3
 usb 1-3: Product: Virtual Hub
 usb 1-3: Manufacturer: American Megatrends Inc.
 usb 1-3: SerialNumber: serial
 usb 1-3.3: new high-speed USB device number 3 using xhci_hcd
 usb 1-3.3: New USB device found, idVendor=046b, idProduct=ff31
 usb 1-3.3: New USB device strings: Mfr=1, Product=2, SerialNumber=3
 usb 1-3.3: Product: Virtual HardDisk Device
 usb 1-3.3: Manufacturer: American Megatrends Inc.
 usb 1-3.4: new low-speed USB device number 4 using xhci_hcd
 usb 1-3.4: New USB device found, idVendor=046b, idProduct=ff10
 usb 1-3.4: New USB device strings: Mfr=1, Product=2, SerialNumber=0
 usb 1-3.4: Product: Virtual Keyboard and Mouse
 usb 1-3.4: Manufacturer: American Megatrends Inc.

With the quirk I have not been able to trigger the issue with
half an hour of saturation soak testing.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-ids.h           | 3 +++
 drivers/hid/usbhid/hid-quirks.c | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index f46f2c5117fa..350accfee8e8 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -76,6 +76,9 @@
 #define USB_VENDOR_ID_ALPS_JP		0x044E
 #define HID_DEVICE_ID_ALPS_U1_DUAL	0x120B
 
+#define USB_VENDOR_ID_AMI		0x046b
+#define USB_DEVICE_ID_AMI_VIRT_KEYBOARD_AND_MOUSE	0xff10
+
 #define USB_VENDOR_ID_ANTON		0x1130
 #define USB_DEVICE_ID_ANTON_TOUCH_PAD	0x3101
 
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index e9d6cc7cdfc5..30a2977e2645 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -57,6 +57,7 @@ static const struct hid_blacklist {
 	{ USB_VENDOR_ID_AIREN, USB_DEVICE_ID_AIREN_SLIMPLUS, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_AKAI, USB_DEVICE_ID_AKAI_MPKMINI2, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_AKAI_09E8, USB_DEVICE_ID_AKAI_09E8_MIDIMIX, HID_QUIRK_NO_INIT_REPORTS },
+	{ USB_VENDOR_ID_AMI, USB_DEVICE_ID_AMI_VIRT_KEYBOARD_AND_MOUSE, HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_UC100KM, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_CS124U, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_2PORTKVM, HID_QUIRK_NOGET },

From 877a021e08ccb6434718c0cc781fdf943c884cc0 Mon Sep 17 00:00:00 2001
From: Ardinartsev Nikita <pinguin255@gmail.com>
Date: Thu, 26 Jan 2017 16:54:42 +0300
Subject: [PATCH 069/322] HID: hid-lg: Fix immediate disconnection of Logitech
 Rumblepad 2

With NOGET quirk Logitech F510 is now fully workable in dinput mode including
rumble effects (according to fftest).

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=117091

[jkosina@suse.cz: fix patch format]
Signed-off-by: Ardinartsev Nikita <ardinar23@gmail.com>
Acked-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-lg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c
index c5c5fbe9d605..52026dc94d5c 100644
--- a/drivers/hid/hid-lg.c
+++ b/drivers/hid/hid-lg.c
@@ -872,7 +872,7 @@ static const struct hid_device_id lg_devices[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_WINGMAN_FFG),
 		.driver_data = LG_NOGET | LG_FF4 },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_RUMBLEPAD2),
-		.driver_data = LG_FF2 },
+		.driver_data = LG_NOGET | LG_FF2 },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_FLIGHT_SYSTEM_G940),
 		.driver_data = LG_FF3 },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_SPACENAVIGATOR),

From 07cd12945551b63ecb1a349d50a6d69d1d6feb4a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 26 Jan 2017 16:47:28 -0500
Subject: [PATCH 070/322] cgroup: don't online subsystems before
 cgroup_name/path() are operational

While refactoring cgroup creation, a5bca2152036 ("cgroup: factor out
cgroup_create() out of cgroup_mkdir()") incorrectly onlined subsystems
before the new cgroup is associated with it kernfs_node.  This is fine
for cgroup proper but cgroup_name/path() depend on the associated
kernfs_node and if a subsystem makes the new cgroup_subsys_state
visible, which they're allowed to after onlining, it can lead to NULL
dereference.

The current code performs cgroup creation and subsystem onlining in
cgroup_create() and cgroup_mkdir() makes the cgroup and subsystems
visible afterwards.  There's no reason to online the subsystems early
and we can simply drop cgroup_apply_control_enable() call from
cgroup_create() so that the subsystems are onlined and made visible at
the same time.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Fixes: a5bca2152036 ("cgroup: factor out cgroup_create() out of cgroup_mkdir()")
Cc: stable@vger.kernel.org # v4.6+
---
 kernel/cgroup.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2ee9ec3051b2..688dd02af985 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -5221,6 +5221,11 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
 	return ERR_PTR(err);
 }
 
+/*
+ * The returned cgroup is fully initialized including its control mask, but
+ * it isn't associated with its kernfs_node and doesn't have the control
+ * mask applied.
+ */
 static struct cgroup *cgroup_create(struct cgroup *parent)
 {
 	struct cgroup_root *root = parent->root;
@@ -5288,11 +5293,6 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
 
 	cgroup_propagate_control(cgrp);
 
-	/* @cgrp doesn't have dir yet so the following will only create csses */
-	ret = cgroup_apply_control_enable(cgrp);
-	if (ret)
-		goto out_destroy;
-
 	return cgrp;
 
 out_cancel_ref:
@@ -5300,9 +5300,6 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
 out_free_cgrp:
 	kfree(cgrp);
 	return ERR_PTR(ret);
-out_destroy:
-	cgroup_destroy_locked(cgrp);
-	return ERR_PTR(ret);
 }
 
 static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,

From 586655d278ba08af7b198b93217746f9a506ee8a Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Wed, 25 Jan 2017 00:44:16 +0100
Subject: [PATCH 071/322] rtc: jz4740: make the driver buildable as a module
 again

By using kernel_halt() instead of machine_halt(), we can make the driver
build as a module.
However, jz4740 platforms not loading this module will not be able to power
off.

Suggested-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>

Revert "rtc: jz4740: make the driver builtin only"

This reverts commit b9168c539c0b2de756aaffd380384dbde8adbe07.
---
 drivers/rtc/Kconfig      |  5 ++++-
 drivers/rtc/rtc-jz4740.c | 12 ++++++++++--
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index c93c5a8fba32..5dc673dc9487 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -1551,12 +1551,15 @@ config RTC_DRV_MPC5121
 	  will be called rtc-mpc5121.
 
 config RTC_DRV_JZ4740
-	bool "Ingenic JZ4740 SoC"
+	tristate "Ingenic JZ4740 SoC"
 	depends on MACH_INGENIC || COMPILE_TEST
 	help
 	  If you say yes here you get support for the Ingenic JZ47xx SoCs RTC
 	  controllers.
 
+	  This driver can also be buillt as a module. If so, the module
+	  will be called rtc-jz4740.
+
 config RTC_DRV_LPC24XX
 	tristate "NXP RTC for LPC178x/18xx/408x/43xx"
 	depends on ARCH_LPC18XX || COMPILE_TEST
diff --git a/drivers/rtc/rtc-jz4740.c b/drivers/rtc/rtc-jz4740.c
index 72918c1ba092..64989afffa3d 100644
--- a/drivers/rtc/rtc-jz4740.c
+++ b/drivers/rtc/rtc-jz4740.c
@@ -17,6 +17,7 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/reboot.h>
@@ -294,7 +295,7 @@ static void jz4740_rtc_power_off(void)
 			     JZ_REG_RTC_RESET_COUNTER, reset_counter_ticks);
 
 	jz4740_rtc_poweroff(dev_for_power_off);
-	machine_halt();
+	kernel_halt();
 }
 
 static const struct of_device_id jz4740_rtc_of_match[] = {
@@ -302,6 +303,7 @@ static const struct of_device_id jz4740_rtc_of_match[] = {
 	{ .compatible = "ingenic,jz4780-rtc", .data = (void *)ID_JZ4780 },
 	{},
 };
+MODULE_DEVICE_TABLE(of, jz4740_rtc_of_match);
 
 static int jz4740_rtc_probe(struct platform_device *pdev)
 {
@@ -429,6 +431,7 @@ static const struct platform_device_id jz4740_rtc_ids[] = {
 	{ "jz4780-rtc", ID_JZ4780 },
 	{}
 };
+MODULE_DEVICE_TABLE(platform, jz4740_rtc_ids);
 
 static struct platform_driver jz4740_rtc_driver = {
 	.probe	 = jz4740_rtc_probe,
@@ -440,4 +443,9 @@ static struct platform_driver jz4740_rtc_driver = {
 	.id_table = jz4740_rtc_ids,
 };
 
-builtin_platform_driver(jz4740_rtc_driver);
+module_platform_driver(jz4740_rtc_driver);
+
+MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("RTC driver for the JZ4740 SoC\n");
+MODULE_ALIAS("platform:jz4740-rtc");

From 242ef5d483594a2bed6b8a2685849c83e7810d17 Mon Sep 17 00:00:00 2001
From: Sinclair Yeh <syeh@vmware.com>
Date: Wed, 18 Jan 2017 14:14:01 -0800
Subject: [PATCH 072/322] drm/vmwgfx: Fix depth input into
 drm_mode_legacy_fb_format

Currently the pitch is passed in as depth.  This causes
drm_mode_legacy_fb_format() to return the wrong pixel format.

The wrong pixel format will be rejected by vmw_kms_new_framebuffer(),
thus leaving par->set_fb to NULL.

This eventually causes a crash in vmw_fb_setcolreg() when the code
tries to dereference par->set_fb.

Signed-off-by: Sinclair Yeh <syeh@vmware.com>
Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_fb.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index 723fd763da8e..7a96798b9c0a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -481,8 +481,7 @@ static int vmw_fb_kms_framebuffer(struct fb_info *info)
 	mode_cmd.height = var->yres;
 	mode_cmd.pitches[0] = ((var->bits_per_pixel + 7) / 8) * mode_cmd.width;
 	mode_cmd.pixel_format =
-		drm_mode_legacy_fb_format(var->bits_per_pixel,
-			((var->bits_per_pixel + 7) / 8) * mode_cmd.width);
+		drm_mode_legacy_fb_format(var->bits_per_pixel, depth);
 
 	cur_fb = par->set_fb;
 	if (cur_fb && cur_fb->width == mode_cmd.width &&

From 191e885a2e130e639bb0c8ee350d7047294f2ce6 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Wed, 25 Jan 2017 10:31:52 -0800
Subject: [PATCH 073/322] firmware: fix NULL pointer dereference in
 __fw_load_abort()

Since commit 5d47ec02c37ea6 ("firmware: Correct handling of
fw_state_wait() return value") fw_load_abort() could be called twice and
lead us to a kernel crash. This happens only when the firmware fallback
mechanism (regular or custom) is used. The fallback mechanism exposes a
sysfs interface for userspace to upload a file and notify the kernel
when the file is loaded and ready, or to cancel an upload by echo'ing -1
into on the loading file:

echo -n "-1" > /sys/$DEVPATH/loading

This will call fw_load_abort(). Some distributions actually have a udev
rule in place to *always* immediately cancel all firmware fallback
mechanism requests (Debian), they have:

  $ cat /lib/udev/rules.d/50-firmware.rules
  # stub for immediately telling the kernel that userspace firmware loading
  # failed; necessary to avoid long timeouts with CONFIG_FW_LOADER_USER_HELPER=y
  SUBSYSTEM=="firmware", ACTION=="add", ATTR{loading}="-1

Distributions with this udev rule would run into this crash only if the
fallback mechanism is used. Since most distributions disable by default
using the fallback mechanism (CONFIG_FW_LOADER_USER_HELPER_FALLBACK),
this would typicaly mean only 2 drivers which *require* the fallback
mechanism could typically incur a crash: drivers/firmware/dell_rbu.c and
the drivers/leds/leds-lp55xx-common.c driver. Distributions enabling
CONFIG_FW_LOADER_USER_HELPER_FALLBACK by default are obviously more
exposed to this crash.

The crash happens because after commit 5b029624948d ("firmware: do not
use fw_lock for fw_state protection") and subsequent fix commit
5d47ec02c37ea6 ("firmware: Correct handling of fw_state_wait() return
value") a race can happen between this cancelation and the firmware
fw_state_wait_timeout() being woken up after a state change with which
fw_load_abort() as that calls swake_up(). Upon error
fw_state_wait_timeout() will also again call fw_load_abort() and trigger
a null reference.

At first glance we could just fix this with a !buf check on
fw_load_abort() before accessing buf->fw_st, however there is a logical
issue in having a state machine used for the fallback mechanism and
preventing access from it once we abort as its inside the buf
(buf->fw_st).

The firmware_class.c code is setting the buf to NULL to annotate an
abort has occurred. Replace this mechanism by simply using the state
check instead. All the other code in place already uses similar checks
for aborting as well so no further changes are needed.

An oops can be reproduced with the new fw_fallback.sh fallback mechanism
cancellation test. Either cancelling the fallback mechanism or the
custom fallback mechanism triggers a crash.

mcgrof@piggy ~/linux-next/tools/testing/selftests/firmware
(git::20170111-fw-fixes)$ sudo ./fw_fallback.sh

./fw_fallback.sh: timeout works
./fw_fallback.sh: firmware comparison works
./fw_fallback.sh: fallback mechanism works

[ this then sits here when it is trying the cancellation test ]

Kernel log:

test_firmware: loading 'nope-test-firmware.bin'
misc test_firmware: Direct firmware load for nope-test-firmware.bin failed with error -2
misc test_firmware: Falling back to user helper
BUG: unable to handle kernel NULL pointer dereference at 0000000000000038
IP: _request_firmware+0xa27/0xad0
PGD 0

Oops: 0000 [#1] SMP
Modules linked in: test_firmware(E) ... etc ...
CPU: 1 PID: 1396 Comm: fw_fallback.sh Tainted: G        W E   4.10.0-rc3-next-20170111+ #30
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.1-0-g8891697-prebuilt.qemu-project.org 04/01/2014
task: ffff9740b27f4340 task.stack: ffffbb15c0bc8000
RIP: 0010:_request_firmware+0xa27/0xad0
RSP: 0018:ffffbb15c0bcbd10 EFLAGS: 00010246
RAX: 00000000fffffffe RBX: ffff9740afe5aa80 RCX: 0000000000000000
RDX: ffff9740b27f4340 RSI: 0000000000000283 RDI: 0000000000000000
RBP: ffffbb15c0bcbd90 R08: ffffbb15c0bcbcd8 R09: 0000000000000000
R10: 0000000894a0d4b1 R11: 000000000000008c R12: ffffffffc0312480
R13: 0000000000000005 R14: ffff9740b1c32400 R15: 00000000000003e8
FS:  00007f8604422700(0000) GS:ffff9740bfc80000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000038 CR3: 000000012164c000 CR4: 00000000000006e0
Call Trace:
 request_firmware+0x37/0x50
 trigger_request_store+0x79/0xd0 [test_firmware]
 dev_attr_store+0x18/0x30
 sysfs_kf_write+0x37/0x40
 kernfs_fop_write+0x110/0x1a0
 __vfs_write+0x37/0x160
 ? _cond_resched+0x1a/0x50
 vfs_write+0xb5/0x1a0
 SyS_write+0x55/0xc0
 ? trace_do_page_fault+0x37/0xd0
 entry_SYSCALL_64_fastpath+0x1e/0xad
RIP: 0033:0x7f8603f49620
RSP: 002b:00007fff6287b788 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 000055c307b110a0 RCX: 00007f8603f49620
RDX: 0000000000000016 RSI: 000055c3084d8a90 RDI: 0000000000000001
RBP: 0000000000000016 R08: 000000000000c0ff R09: 000055c3084d6336
R10: 000055c307b108b0 R11: 0000000000000246 R12: 000055c307b13c80
R13: 000055c3084d6320 R14: 0000000000000000 R15: 00007fff6287b950
Code: 9f 64 84 e8 9c 61 fe ff b8 f4 ff ff ff e9 6b f9 ff
ff 48 c7 c7 40 6b 8d 84 89 45 a8 e8 43 84 18 00 49 8b be 00 03 00 00 8b
45 a8 <83> 7f 38 02 74 08 e8 6e ec ff ff 8b 45 a8 49 c7 86 00 03 00 00
RIP: _request_firmware+0xa27/0xad0 RSP: ffffbb15c0bcbd10
CR2: 0000000000000038
---[ end trace 6d94ac339c133e6f ]---

Fixes: 5d47ec02c37e ("firmware: Correct handling of fw_state_wait() return value")
Reported-and-Tested-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reported-and-Tested-by: Patrick Bruenn <p.bruenn@beckhoff.com>
Reported-by: Chris Wilson <chris@chris-wilson.co.uk>
CC: <stable@vger.kernel.org>    [3.10+]
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/firmware_class.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 4497d263209f..ac350c518e0c 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -558,9 +558,6 @@ static void fw_load_abort(struct firmware_priv *fw_priv)
 	struct firmware_buf *buf = fw_priv->buf;
 
 	__fw_load_abort(buf);
-
-	/* avoid user action after loading abort */
-	fw_priv->buf = NULL;
 }
 
 static LIST_HEAD(pending_fw_head);
@@ -713,7 +710,7 @@ static ssize_t firmware_loading_store(struct device *dev,
 
 	mutex_lock(&fw_lock);
 	fw_buf = fw_priv->buf;
-	if (!fw_buf)
+	if (fw_state_is_aborted(&fw_buf->fw_st))
 		goto out;
 
 	switch (loading) {

From 9b02c54bc951fca884ba5719f42a27e8240965bf Mon Sep 17 00:00:00 2001
From: Markus Mayer <mmayer@broadcom.com>
Date: Mon, 19 Dec 2016 12:10:27 -0800
Subject: [PATCH 074/322] cpufreq: brcmstb-avs-cpufreq: extend sysfs entry
 brcm_avs_pmap

We extend the brcm_avs_pmap sysfs entry (which issues the GET_PMAP
command to AVS) to include all fields from struct pmap. This means
adding mode (AVS, DVS, DVFS) and state (the P-state) to the output.

Signed-off-by: Markus Mayer <mmayer@broadcom.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/brcmstb-avs-cpufreq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c
index 4fda623e55bb..2c6e3253ba64 100644
--- a/drivers/cpufreq/brcmstb-avs-cpufreq.c
+++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c
@@ -954,9 +954,9 @@ static ssize_t show_brcm_avs_pmap(struct cpufreq_policy *policy, char *buf)
 	brcm_avs_parse_p1(pmap.p1, &mdiv_p0, &pdiv, &ndiv);
 	brcm_avs_parse_p2(pmap.p2, &mdiv_p1, &mdiv_p2, &mdiv_p3, &mdiv_p4);
 
-	return sprintf(buf, "0x%08x 0x%08x %u %u %u %u %u %u %u\n",
+	return sprintf(buf, "0x%08x 0x%08x %u %u %u %u %u %u %u %u %u\n",
 		pmap.p1, pmap.p2, ndiv, pdiv, mdiv_p0, mdiv_p1, mdiv_p2,
-		mdiv_p3, mdiv_p4);
+		mdiv_p3, mdiv_p4, pmap.mode, pmap.state);
 }
 
 static ssize_t show_brcm_avs_voltage(struct cpufreq_policy *policy, char *buf)

From 3c223c19aea85d3dda1416c187915f4a30b04b1f Mon Sep 17 00:00:00 2001
From: Markus Mayer <mmayer@broadcom.com>
Date: Mon, 19 Dec 2016 12:10:28 -0800
Subject: [PATCH 075/322] cpufreq: brcmstb-avs-cpufreq: properly retrieve
 P-state upon suspend

The AVS GET_PMAP command does return a P-state along with the P-map
information. However, that P-state is the initial P-state when the
P-map was first downloaded to AVS. It is *not* the current P-state.

Therefore, we explicitly retrieve the P-state using the GET_PSTATE
command.

Signed-off-by: Markus Mayer <mmayer@broadcom.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/brcmstb-avs-cpufreq.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c
index 2c6e3253ba64..c94360671f41 100644
--- a/drivers/cpufreq/brcmstb-avs-cpufreq.c
+++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c
@@ -784,8 +784,19 @@ static int brcm_avs_target_index(struct cpufreq_policy *policy,
 static int brcm_avs_suspend(struct cpufreq_policy *policy)
 {
 	struct private_data *priv = policy->driver_data;
+	int ret;
 
-	return brcm_avs_get_pmap(priv, &priv->pmap);
+	ret = brcm_avs_get_pmap(priv, &priv->pmap);
+	if (ret)
+		return ret;
+
+	/*
+	 * We can't use the P-state returned by brcm_avs_get_pmap(), since
+	 * that's the initial P-state from when the P-map was downloaded to the
+	 * AVS co-processor, not necessarily the P-state we are running at now.
+	 * So, we get the current P-state explicitly.
+	 */
+	return brcm_avs_get_pstate(priv, &priv->pmap.state);
 }
 
 static int brcm_avs_resume(struct cpufreq_policy *policy)

From 606f42265d384b9149bfb953c5dfc6d4710fef4c Mon Sep 17 00:00:00 2001
From: Prashanth Prakash <pprakash@codeaurora.org>
Date: Thu, 26 Jan 2017 11:08:32 -0700
Subject: [PATCH 076/322] arm64: skip register_cpufreq_notifier on ACPI-based
 systems

On ACPI based systems where the topology is setup using the API
store_cpu_topology, at the moment we do not have necessary code
to parse cpu capacity and handle cpufreq notifier, thus
resulting in a kernel panic.

Stack:
        init_cpu_capacity_callback+0xb4/0x1c8
        notifier_call_chain+0x5c/0xa0
        __blocking_notifier_call_chain+0x58/0xa0
        blocking_notifier_call_chain+0x3c/0x50
        cpufreq_set_policy+0xe4/0x328
        cpufreq_init_policy+0x80/0x100
        cpufreq_online+0x418/0x710
        cpufreq_add_dev+0x118/0x180
        subsys_interface_register+0xa4/0xf8
        cpufreq_register_driver+0x1c0/0x298
        cppc_cpufreq_init+0xdc/0x1000 [cppc_cpufreq]
        do_one_initcall+0x5c/0x168
        do_init_module+0x64/0x1e4
        load_module+0x130c/0x14d0
        SyS_finit_module+0x108/0x120
        el0_svc_naked+0x24/0x28

Fixes: 7202bde8b7ae ("arm64: parse cpu capacity-dmips-mhz from DT")
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Prashanth Prakash <pprakash@codeaurora.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/topology.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 23e9e13bd2aa..655e65f38f31 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -11,6 +11,7 @@
  * for more details.
  */
 
+#include <linux/acpi.h>
 #include <linux/cpu.h>
 #include <linux/cpumask.h>
 #include <linux/init.h>
@@ -209,7 +210,12 @@ static struct notifier_block init_cpu_capacity_notifier = {
 
 static int __init register_cpufreq_notifier(void)
 {
-	if (cap_parsing_failed)
+	/*
+	 * on ACPI-based systems we need to use the default cpu capacity
+	 * until we have the necessary code to parse the cpu capacity, so
+	 * skip registering cpufreq notifier.
+	 */
+	if (!acpi_disabled || cap_parsing_failed)
 		return -EINVAL;
 
 	if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) {

From 8413299cb3933dade6186bbee8363f190032107e Mon Sep 17 00:00:00 2001
From: Patrice Chotard <patrice.chotard@st.com>
Date: Fri, 27 Jan 2017 15:45:11 +0100
Subject: [PATCH 077/322] ARM: dts: STiH407-family: set
 snps,dis_u3_susphy_quirk

Since v4.10-rc1, the following logs appears in loop :
[  801.953836] usb usb6-port1: Cannot enable. Maybe the USB cable is bad?
[  801.960455] xhci-hcd xhci-hcd.0.auto: Cannot set link state.
[  801.966611] usb usb6-port1: cannot disable (err = -32)
[  806.083772] usb usb6-port1: Cannot enable. Maybe the USB cable is bad?
[  806.090370] xhci-hcd xhci-hcd.0.auto: Cannot set link state.
[  806.096494] usb usb6-port1: cannot disable (err = -32)

After analysis, xhci try to set link in U3 and returns an error.
Using snps,dis_u3_susphy_quirk fix this issue.

Signed-off-by: Patrice Chotard <patrice.chotard@st.com>
---
 arch/arm/boot/dts/stih407-family.dtsi | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/boot/dts/stih407-family.dtsi b/arch/arm/boot/dts/stih407-family.dtsi
index c8b2944e304a..ace97e8576db 100644
--- a/arch/arm/boot/dts/stih407-family.dtsi
+++ b/arch/arm/boot/dts/stih407-family.dtsi
@@ -680,6 +680,7 @@ dwc3: dwc3@9900000 {
 				phy-names	= "usb2-phy", "usb3-phy";
 				phys		= <&usb2_picophy0>,
 						  <&phy_port2 PHY_TYPE_USB3>;
+				snps,dis_u3_susphy_quirk;
 			};
 		};
 

From feb3cbea0946c67060e2d5bcb7499b0a6f6700fe Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Fri, 20 Jan 2017 08:20:24 -0800
Subject: [PATCH 078/322] ARM64: dts: meson-gxbb-odroidc2: fix GbE tx link
 breakage

OdroidC2 GbE link breaks under heavy tx transfer. This happens even if the
MAC does not enable Energy Efficient Ethernet (No Low Power state Idle on
the Tx path). The problem seems to come from the phy Rx path, entering the
LPI state.

Disabling EEE advertisement on the phy prevent this feature to be
negociated with the link partner and solve the issue.

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts
index 5d28e1cdc998..c59403adb387 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts
@@ -151,6 +151,18 @@ &ethmac {
 	status = "okay";
 	pinctrl-0 = <&eth_rgmii_pins>;
 	pinctrl-names = "default";
+	phy-handle = <&eth_phy0>;
+
+	mdio {
+		compatible = "snps,dwmac-mdio";
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		eth_phy0: ethernet-phy@0 {
+			reg = <0>;
+			eee-broken-1000t;
+		};
+	};
 };
 
 &ir {

From bba8e3f42736cf7f974968a818e53b128286ad1d Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Fri, 20 Jan 2017 08:20:25 -0800
Subject: [PATCH 079/322] ARM64: dts: meson-gx: Add firmware reserved memory
 zones
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Amlogic Meson GXBB/GXL/GXM secure monitor uses part of the memory space,
this patch adds these reserved zones.

Without such reserved memory zones, running the following stress command :
$ stress-ng --vm 16 --vm-bytes 128M --timeout 10s
multiple times:

Could lead to the following kernel crashes :
[   46.937975] Bad mode in Error handler detected on CPU1, code 0xbf000000 -- SError
...
[   47.058536] Internal error: Attempting to execute userspace memory: 8600000f [#3] PREEMPT SMP
...
Instead of the OOM killer.

Fixes: 4f24eda8401f ("ARM64: dts: Prepare configs for Amlogic Meson GXBaby")
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Reviewed-by: Andreas Färber <afaerber@suse.de>
[khilman: added Fixes tag, added _reserved and unit addresses]
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm64/boot/dts/amlogic/meson-gx.dtsi | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
index eada0b58ba1c..0cbe24b49710 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
@@ -55,6 +55,24 @@ / {
 	#address-cells = <2>;
 	#size-cells = <2>;
 
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		/* 16 MiB reserved for Hardware ROM Firmware */
+		hwrom_reserved: hwrom@0 {
+			reg = <0x0 0x0 0x0 0x1000000>;
+			no-map;
+		};
+
+		/* 2 MiB reserved for ARM Trusted Firmware (BL31) */
+		secmon_reserved: secmon@10000000 {
+			reg = <0x0 0x10000000 0x0 0x200000>;
+			no-map;
+		};
+	};
+
 	cpus {
 		#address-cells = <0x2>;
 		#size-cells = <0x0>;

From 9aed02feae57bf7a40cb04ea0e3017cb7a998db4 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 27 Jan 2017 10:45:27 -0800
Subject: [PATCH 080/322] ARC: [arcompact] handle unaligned access delay slot
 corner case

After emulating an unaligned access in delay slot of a branch, we
pretend as the delay slot never happened - so return back to actual
branch target (or next PC if branch was not taken).

Curently we did this by handling STATUS32.DE, we also need to clear the
BTA.T bit, which is disregarded when returning from original misaligned
exception, but could cause weirdness if it took the interrupt return
path (in case interrupt was acive too)

One ARC700 customer ran into this when enabling unaligned access fixup
for kernel mode accesses as well

Cc: stable@vger.kernel.org
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/unaligned.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c
index abd961f3e763..91ebe382147f 100644
--- a/arch/arc/kernel/unaligned.c
+++ b/arch/arc/kernel/unaligned.c
@@ -241,8 +241,9 @@ int misaligned_fixup(unsigned long address, struct pt_regs *regs,
 	if (state.fault)
 		goto fault;
 
+	/* clear any remanants of delay slot */
 	if (delay_mode(regs)) {
-		regs->ret = regs->bta;
+		regs->ret = regs->bta ~1U;
 		regs->status32 &= ~STATUS_DE_MASK;
 	} else {
 		regs->ret += state.instr_len;

From 030305d69fc6963c16003f50d7e8d74b02d0a143 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 27 Jan 2017 15:00:45 -0600
Subject: [PATCH 081/322] PCI/ASPM: Handle PCI-to-PCIe bridges as roots of PCIe
 hierarchies

In a struct pcie_link_state, link->root points to the pcie_link_state of
the root of the PCIe hierarchy.  For the topmost link, this points to
itself (link->root = link).  For others, we copy the pointer from the
parent (link->root = link->parent->root).

Previously we recognized that Root Ports originated PCIe hierarchies, but
we treated PCI/PCI-X to PCIe Bridges as being in the middle of the
hierarchy, and when we tried to copy the pointer from link->parent->root,
there was no parent, and we dereferenced a NULL pointer:

  BUG: unable to handle kernel NULL pointer dereference at 0000000000000090
  IP: [<ffffffff9e424350>] pcie_aspm_init_link_state+0x170/0x820

Recognize that PCI/PCI-X to PCIe Bridges originate PCIe hierarchies just
like Root Ports do, so link->root for these devices should also point to
itself.

Fixes: 51ebfc92b72b ("PCI: Enumerate switches below PCI-to-PCIe bridges")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=193411
Link: https://bugzilla.opensuse.org/show_bug.cgi?id=1022181
Tested-by: lists@ssl-mail.com
Tested-by: Jayachandran C. <jnair@caviumnetworks.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: stable@vger.kernel.org	# v4.2+
---
 drivers/pci/pcie/aspm.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 17ac1dce3286..3dd8bcbb3011 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -532,25 +532,32 @@ static struct pcie_link_state *alloc_pcie_link_state(struct pci_dev *pdev)
 	link = kzalloc(sizeof(*link), GFP_KERNEL);
 	if (!link)
 		return NULL;
+
 	INIT_LIST_HEAD(&link->sibling);
 	INIT_LIST_HEAD(&link->children);
 	INIT_LIST_HEAD(&link->link);
 	link->pdev = pdev;
-	if (pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) {
+
+	/*
+	 * Root Ports and PCI/PCI-X to PCIe Bridges are roots of PCIe
+	 * hierarchies.
+	 */
+	if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT ||
+	    pci_pcie_type(pdev) == PCI_EXP_TYPE_PCIE_BRIDGE) {
+		link->root = link;
+	} else {
 		struct pcie_link_state *parent;
+
 		parent = pdev->bus->parent->self->link_state;
 		if (!parent) {
 			kfree(link);
 			return NULL;
 		}
+
 		link->parent = parent;
+		link->root = link->parent->root;
 		list_add(&link->link, &parent->children);
 	}
-	/* Setup a pointer to the root port link */
-	if (!link->parent)
-		link->root = link;
-	else
-		link->root = link->parent->root;
 
 	list_add(&link->sibling, &link_list);
 	pdev->link_state = link;

From 2b1d530cb3157f828fcaadd259613f59db3c6d1c Mon Sep 17 00:00:00 2001
From: Kalle Valo <kvalo@codeaurora.org>
Date: Fri, 27 Jan 2017 14:19:25 +0200
Subject: [PATCH 082/322] MAINTAINERS: ath9k-devel is closed

ath9k-devel list is now closed, only linux-wireless should be used.

Reported-by: Michael Renzmann <mrenzmann@madwifi-project.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index cfff2c9e3d94..d7699d5b5863 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10169,7 +10169,6 @@ F:	drivers/media/tuners/qt1010*
 QUALCOMM ATHEROS ATH9K WIRELESS DRIVER
 M:	QCA ath9k Development <ath9k-devel@qca.qualcomm.com>
 L:	linux-wireless@vger.kernel.org
-L:	ath9k-devel@lists.ath9k.org
 W:	http://wireless.kernel.org/en/users/Drivers/ath9k
 S:	Supported
 F:	drivers/net/wireless/ath/ath9k/

From bf29bddf0417a4783da3b24e8c9e017ac649326f Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Fri, 27 Jan 2017 22:25:52 +0000
Subject: [PATCH 083/322] x86/efi: Always map the first physical page into the
 EFI pagetables

Commit:

  129766708 ("x86/efi: Only map RAM into EFI page tables if in mixed-mode")

stopped creating 1:1 mappings for all RAM, when running in native 64-bit mode.

It turns out though that there are 64-bit EFI implementations in the wild
(this particular problem has been reported on a Lenovo Yoga 710-11IKB),
which still make use of the first physical page for their own private use,
even though they explicitly mark it EFI_CONVENTIONAL_MEMORY in the memory
map.

In case there is no mapping for this particular frame in the EFI pagetables,
as soon as firmware tries to make use of it, a triple fault occurs and the
system reboots (in case of the Yoga 710-11IKB this is very early during bootup).

Fix that by always mapping the first page of physical memory into the EFI
pagetables. We're free to hand this page to the BIOS, as trim_bios_range()
will reserve the first page and isolate it away from memory allocators anyway.

Note that just reverting 129766708 alone is not enough on v4.9-rc1+ to fix the
regression on affected hardware, as this commit:

   ab72a27da ("x86/efi: Consolidate region mapping logic")

later made the first physical frame not to be mapped anyway.

Reported-by: Hanka Pavlikova <hanka@ucw.cz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vojtech Pavlik <vojtech@ucw.cz>
Cc: Waiman Long <waiman.long@hpe.com>
Cc: linux-efi@vger.kernel.org
Cc: stable@kernel.org # v4.8+
Fixes: 129766708 ("x86/efi: Only map RAM into EFI page tables if in mixed-mode")
Link: http://lkml.kernel.org/r/20170127222552.22336-1-matt@codeblueprint.co.uk
[ Tidied up the changelog and the comment. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/efi/efi_64.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 319148bd4b05..2f25a363068c 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -268,6 +268,22 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 
 	efi_scratch.use_pgd = true;
 
+	/*
+	 * Certain firmware versions are way too sentimential and still believe
+	 * they are exclusive and unquestionable owners of the first physical page,
+	 * even though they explicitly mark it as EFI_CONVENTIONAL_MEMORY
+	 * (but then write-access it later during SetVirtualAddressMap()).
+	 *
+	 * Create a 1:1 mapping for this page, to avoid triple faults during early
+	 * boot with such firmware. We are free to hand this page to the BIOS,
+	 * as trim_bios_range() will reserve the first page and isolate it away
+	 * from memory allocators anyway.
+	 */
+	if (kernel_map_pages_in_pgd(pgd, 0x0, 0x0, 1, _PAGE_RW)) {
+		pr_err("Failed to create 1:1 mapping for the first page!\n");
+		return 1;
+	}
+
 	/*
 	 * When making calls to the firmware everything needs to be 1:1
 	 * mapped and addressable with 32-bit pointers. Map the kernel

From 966d2b04e070bc040319aaebfec09e0144dc3341 Mon Sep 17 00:00:00 2001
From: Douglas Miller <dougmill@linux.vnet.ibm.com>
Date: Sat, 28 Jan 2017 06:42:20 -0600
Subject: [PATCH 084/322] percpu-refcount: fix reference leak during
 percpu-atomic transition

percpu_ref_tryget() and percpu_ref_tryget_live() should return
"true" IFF they acquire a reference. But the return value from
atomic_long_inc_not_zero() is a long and may have high bits set,
e.g. PERCPU_COUNT_BIAS, and the return value of the tryget routines
is bool so the reference may actually be acquired but the routines
return "false" which results in a reference leak since the caller
assumes it does not need to do a corresponding percpu_ref_put().

This was seen when performing CPU hotplug during I/O, as hangs in
blk_mq_freeze_queue_wait where percpu_ref_kill (blk_mq_freeze_queue_start)
raced with percpu_ref_tryget (blk_mq_timeout_work).
Sample stack trace:

__switch_to+0x2c0/0x450
__schedule+0x2f8/0x970
schedule+0x48/0xc0
blk_mq_freeze_queue_wait+0x94/0x120
blk_mq_queue_reinit_work+0xb8/0x180
blk_mq_queue_reinit_prepare+0x84/0xa0
cpuhp_invoke_callback+0x17c/0x600
cpuhp_up_callbacks+0x58/0x150
_cpu_up+0xf0/0x1c0
do_cpu_up+0x120/0x150
cpu_subsys_online+0x64/0xe0
device_online+0xb4/0x120
online_store+0xb4/0xc0
dev_attr_store+0x68/0xa0
sysfs_kf_write+0x80/0xb0
kernfs_fop_write+0x17c/0x250
__vfs_write+0x6c/0x1e0
vfs_write+0xd0/0x270
SyS_write+0x6c/0x110
system_call+0x38/0xe0

Examination of the queue showed a single reference (no PERCPU_COUNT_BIAS,
and __PERCPU_REF_DEAD, __PERCPU_REF_ATOMIC set) and no requests.
However, conditions at the time of the race are count of PERCPU_COUNT_BIAS + 0
and __PERCPU_REF_DEAD and __PERCPU_REF_ATOMIC set.

The fix is to make the tryget routines use an actual boolean internally instead
of the atomic long result truncated to a int.

Fixes: e625305b3907 percpu-refcount: make percpu_ref based on longs instead of ints
Link: https://bugzilla.kernel.org/show_bug.cgi?id=190751
Signed-off-by: Douglas Miller <dougmill@linux.vnet.ibm.com>
Reviewed-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Fixes: e625305b3907 ("percpu-refcount: make percpu_ref based on longs instead of ints")
Cc: stable@vger.kernel.org # v3.18+
---
 include/linux/percpu-refcount.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 1c7eec09e5eb..3a481a49546e 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -204,7 +204,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
 static inline bool percpu_ref_tryget(struct percpu_ref *ref)
 {
 	unsigned long __percpu *percpu_count;
-	int ret;
+	bool ret;
 
 	rcu_read_lock_sched();
 
@@ -238,7 +238,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
 static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 {
 	unsigned long __percpu *percpu_count;
-	int ret = false;
+	bool ret = false;
 
 	rcu_read_lock_sched();
 

From 83b5d1e3d3013dbf90645a5d07179d018c8243fa Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Tue, 3 Jan 2017 22:55:50 +0100
Subject: [PATCH 085/322] parisc, parport_gsc: Fixes for printk continuation
 lines

Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/parport/parport_gsc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/parport/parport_gsc.c b/drivers/parport/parport_gsc.c
index dd6d4ccb41e4..3858b87fd0bb 100644
--- a/drivers/parport/parport_gsc.c
+++ b/drivers/parport/parport_gsc.c
@@ -293,7 +293,7 @@ struct parport *parport_gsc_probe_port(unsigned long base,
 		p->irq = PARPORT_IRQ_NONE;
 	}
 	if (p->irq != PARPORT_IRQ_NONE) {
-		printk(", irq %d", p->irq);
+		pr_cont(", irq %d", p->irq);
 
 		if (p->dma == PARPORT_DMA_AUTO) {
 			p->dma = PARPORT_DMA_NONE;
@@ -303,8 +303,8 @@ struct parport *parport_gsc_probe_port(unsigned long base,
                                            is mandatory (see above) */
 		p->dma = PARPORT_DMA_NONE;
 
-	printk(" [");
-#define printmode(x) {if(p->modes&PARPORT_MODE_##x){printk("%s%s",f?",":"",#x);f++;}}
+	pr_cont(" [");
+#define printmode(x) {if(p->modes&PARPORT_MODE_##x){pr_cont("%s%s",f?",":"",#x);f++;}}
 	{
 		int f = 0;
 		printmode(PCSPP);
@@ -315,7 +315,7 @@ struct parport *parport_gsc_probe_port(unsigned long base,
 //		printmode(DMA);
 	}
 #undef printmode
-	printk("]\n");
+	pr_cont("]\n");
 
 	if (p->irq != PARPORT_IRQ_NONE) {
 		if (request_irq (p->irq, parport_irq_handler,

From 2ad5d52d42810bed95100a3d912679d8864421ec Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sat, 28 Jan 2017 11:52:02 +0100
Subject: [PATCH 086/322] parisc: Don't use BITS_PER_LONG in userspace-exported
 swab.h header

In swab.h the "#if BITS_PER_LONG > 32" breaks compiling userspace programs if
BITS_PER_LONG is #defined by userspace with the sizeof() compiler builtin.

Solve this problem by using __BITS_PER_LONG instead.  Since we now
#include asm/bitsperlong.h avoid further potential userspace pollution
by moving the #define of SHIFT_PER_LONG to bitops.h which is not
exported to userspace.

This patch unbreaks compiling qemu on hppa/parisc.

Signed-off-by: Helge Deller <deller@gmx.de>
Cc: <stable@vger.kernel.org>
---
 arch/parisc/include/asm/bitops.h           | 8 +++++++-
 arch/parisc/include/uapi/asm/bitsperlong.h | 2 --
 arch/parisc/include/uapi/asm/swab.h        | 5 +++--
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/arch/parisc/include/asm/bitops.h b/arch/parisc/include/asm/bitops.h
index 3f9406d9b9d6..da87943328a5 100644
--- a/arch/parisc/include/asm/bitops.h
+++ b/arch/parisc/include/asm/bitops.h
@@ -6,7 +6,7 @@
 #endif
 
 #include <linux/compiler.h>
-#include <asm/types.h>		/* for BITS_PER_LONG/SHIFT_PER_LONG */
+#include <asm/types.h>
 #include <asm/byteorder.h>
 #include <asm/barrier.h>
 #include <linux/atomic.h>
@@ -17,6 +17,12 @@
  * to include/asm-i386/bitops.h or kerneldoc
  */
 
+#if __BITS_PER_LONG == 64
+#define SHIFT_PER_LONG 6
+#else
+#define SHIFT_PER_LONG 5
+#endif
+
 #define CHOP_SHIFTCOUNT(x) (((unsigned long) (x)) & (BITS_PER_LONG - 1))
 
 
diff --git a/arch/parisc/include/uapi/asm/bitsperlong.h b/arch/parisc/include/uapi/asm/bitsperlong.h
index e0a23c7bdd43..07fa7e50bdc0 100644
--- a/arch/parisc/include/uapi/asm/bitsperlong.h
+++ b/arch/parisc/include/uapi/asm/bitsperlong.h
@@ -3,10 +3,8 @@
 
 #if defined(__LP64__)
 #define __BITS_PER_LONG 64
-#define SHIFT_PER_LONG 6
 #else
 #define __BITS_PER_LONG 32
-#define SHIFT_PER_LONG 5
 #endif
 
 #include <asm-generic/bitsperlong.h>
diff --git a/arch/parisc/include/uapi/asm/swab.h b/arch/parisc/include/uapi/asm/swab.h
index e78403b129ef..928e1bbac98f 100644
--- a/arch/parisc/include/uapi/asm/swab.h
+++ b/arch/parisc/include/uapi/asm/swab.h
@@ -1,6 +1,7 @@
 #ifndef _PARISC_SWAB_H
 #define _PARISC_SWAB_H
 
+#include <asm/bitsperlong.h>
 #include <linux/types.h>
 #include <linux/compiler.h>
 
@@ -38,7 +39,7 @@ static inline __attribute_const__ __u32 __arch_swab32(__u32 x)
 }
 #define __arch_swab32 __arch_swab32
 
-#if BITS_PER_LONG > 32
+#if __BITS_PER_LONG > 32
 /*
 ** From "PA-RISC 2.0 Architecture", HP Professional Books.
 ** See Appendix I page 8 , "Endian Byte Swapping".
@@ -61,6 +62,6 @@ static inline __attribute_const__ __u64 __arch_swab64(__u64 x)
 	return x;
 }
 #define __arch_swab64 __arch_swab64
-#endif /* BITS_PER_LONG > 32 */
+#endif /* __BITS_PER_LONG > 32 */
 
 #endif /* _PARISC_SWAB_H */

From 9eb7892351a3a3b403d879b41c4e6efb2c96516f Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Wed, 11 Jan 2017 19:35:41 +0200
Subject: [PATCH 087/322] net/mlx5: Change ENOTSUPP to EOPNOTSUPP

As ENOTSUPP is specific to NFS, change the return error value to
EOPNOTSUPP in various places in the mlx5 driver.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Suggested-by: Yotam Gigi <yotamg@mellanox.com>
Reviewed-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c          |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h           |  4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c     |  6 +++---
 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c   | 10 +++++-----
 .../net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c    |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c      |  4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c      |  4 ++--
 .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c       |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c         |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/port.c         |  4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/vport.c        |  2 +-
 12 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 3797cc7c1288..caa837e5e2b9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1728,7 +1728,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
 	if (cmd->cmdif_rev > CMD_IF_REV) {
 		dev_err(&dev->pdev->dev, "driver does not support command interface version. driver %d, firmware %d\n",
 			CMD_IF_REV, cmd->cmdif_rev);
-		err = -ENOTSUPP;
+		err = -EOPNOTSUPP;
 		goto err_free_page;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 951dbd58594d..1619147a63e8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -863,12 +863,12 @@ static inline void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv) {}
 
 static inline int mlx5e_arfs_enable(struct mlx5e_priv *priv)
 {
-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
 }
 
 static inline int mlx5e_arfs_disable(struct mlx5e_priv *priv)
 {
-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
 }
 #else
 int mlx5e_arfs_create_tables(struct mlx5e_priv *priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index f0b460f47f29..35f9ae037ba0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -89,7 +89,7 @@ static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev,
 	int i;
 
 	if (!MLX5_CAP_GEN(priv->mdev, ets))
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 
 	ets->ets_cap = mlx5_max_tc(priv->mdev) + 1;
 	for (i = 0; i < ets->ets_cap; i++) {
@@ -236,7 +236,7 @@ static int mlx5e_dcbnl_ieee_setets(struct net_device *netdev,
 	int err;
 
 	if (!MLX5_CAP_GEN(priv->mdev, ets))
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 
 	err = mlx5e_dbcnl_validate_ets(netdev, ets);
 	if (err)
@@ -402,7 +402,7 @@ static u8 mlx5e_dcbnl_setall(struct net_device *netdev)
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct ieee_ets ets;
 	struct ieee_pfc pfc;
-	int err = -ENOTSUPP;
+	int err = -EOPNOTSUPP;
 	int i;
 
 	if (!MLX5_CAP_GEN(mdev, ets))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 5197817e4b2f..ffbdf9ee5a9b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -595,7 +595,7 @@ static int mlx5e_get_coalesce(struct net_device *netdev,
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 
 	if (!MLX5_CAP_GEN(priv->mdev, cq_moderation))
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 
 	coal->rx_coalesce_usecs       = priv->params.rx_cq_moderation.usec;
 	coal->rx_max_coalesced_frames = priv->params.rx_cq_moderation.pkts;
@@ -620,7 +620,7 @@ static int mlx5e_set_coalesce(struct net_device *netdev,
 	int i;
 
 	if (!MLX5_CAP_GEN(mdev, cq_moderation))
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 
 	mutex_lock(&priv->state_lock);
 
@@ -1296,7 +1296,7 @@ static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
 	u32 mlx5_wol_mode;
 
 	if (!wol_supported)
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 
 	if (wol->wolopts & ~wol_supported)
 		return -EINVAL;
@@ -1426,7 +1426,7 @@ static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable)
 
 	if (rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE &&
 	    !MLX5_CAP_GEN(mdev, cq_period_start_from_cqe))
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 
 	if (!rx_mode_changed)
 		return 0;
@@ -1452,7 +1452,7 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev,
 	bool reset;
 
 	if (!MLX5_CAP_GEN(mdev, cqe_compression))
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 
 	if (enable && priv->tstamp.hwtstamp_config.rx_filter != HWTSTAMP_FILTER_NONE) {
 		netdev_err(netdev, "Can't enable cqe compression while timestamping is enabled.\n");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index d088effd7160..f33f72d0237c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -92,7 +92,7 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv,
 	ns = mlx5_get_flow_namespace(priv->mdev,
 				     MLX5_FLOW_NAMESPACE_ETHTOOL);
 	if (!ns)
-		return ERR_PTR(-ENOTSUPP);
+		return ERR_PTR(-EOPNOTSUPP);
 
 	table_size = min_t(u32, BIT(MLX5_CAP_FLOWTABLE(priv->mdev,
 						       flow_table_properties_nic_receive.log_max_ft_size)),
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 2b7dd315020c..948351ae5bd2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3331,7 +3331,7 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = {
 static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
 {
 	if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 	if (!MLX5_CAP_GEN(mdev, eth_net_offloads) ||
 	    !MLX5_CAP_GEN(mdev, nic_flow_table) ||
 	    !MLX5_CAP_ETH(mdev, csum_cap) ||
@@ -3343,7 +3343,7 @@ static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
 			       < 3) {
 		mlx5_core_warn(mdev,
 			       "Not creating net device, some required device capabilities are missing\n");
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 	}
 	if (!MLX5_CAP_ETH(mdev, self_lb_en_modifiable))
 		mlx5_core_warn(mdev, "Self loop back prevention is not supported\n");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index f14d9c9ba773..bb712139b36e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -133,7 +133,7 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
 
 	if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) ||
 	    !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist))
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 
 	esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%x\n",
 		  vport, vlan, qos, set_flags);
@@ -1630,7 +1630,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 	if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) ||
 	    !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) {
 		esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n");
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 	}
 
 	if (!MLX5_CAP_ESW_INGRESS_ACL(esw->dev, ft_support))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 03293ed1cc22..657d319fc4c6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -166,7 +166,7 @@ static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr,
 	return 0;
 
 out_notsupp:
-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
 }
 
 int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index c4478ecd8056..b53fc85a2375 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -322,7 +322,7 @@ int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
 						flow_table_properties_nic_receive.
 						flow_modify_en);
 	if (!atomic_mod_cap)
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 	opmod = 1;
 
 	return	mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, group_id, fte);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index d01e9f21d469..3c315eb8d270 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -807,7 +807,7 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
 		return 0;
 	}
 
-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
 }
 
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index d2ec9d232a70..fd12e0a377a5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -620,7 +620,7 @@ static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in,
 	u32 out[MLX5_ST_SZ_DW(qtct_reg)];
 
 	if (!MLX5_CAP_GEN(mdev, ets))
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 
 	return mlx5_core_access_reg(mdev, in, inlen, out, sizeof(out),
 				    MLX5_REG_QETCR, 0, 1);
@@ -632,7 +632,7 @@ static int mlx5_query_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *out,
 	u32 in[MLX5_ST_SZ_DW(qtct_reg)];
 
 	if (!MLX5_CAP_GEN(mdev, ets))
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 
 	memset(in, 0, sizeof(in));
 	return mlx5_core_access_reg(mdev, in, sizeof(in), out, outlen,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 269e4401c342..7129c30a2ab4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -532,7 +532,7 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
 	if (!MLX5_CAP_GEN(mdev, vport_group_manager))
 		return -EACCES;
 	if (!MLX5_CAP_ESW(mdev, nic_vport_node_guid_modify))
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 
 	in = mlx5_vzalloc(inlen);
 	if (!in)

From eff596da48784316ccb83bef82bc1213b512d5e0 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Thu, 12 Jan 2017 13:04:01 +0200
Subject: [PATCH 088/322] net/mlx5: Return EOPNOTSUPP when failing to get
 steering name-space

When we fail to retrieve a hardware steering name-space, the returned error
code should say that this operation is not supported. Align the various
places in the driver where this call is made to this convention.

Also, make sure to warn when we fail to retrieve a SW (ANCHOR) name-space.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c            | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c          | 6 +++---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c          | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 1fe80de5d68f..a0e5a69402b3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -1089,7 +1089,7 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
 					       MLX5_FLOW_NAMESPACE_KERNEL);
 
 	if (!priv->fs.ns)
-		return -EINVAL;
+		return -EOPNOTSUPP;
 
 	err = mlx5e_arfs_create_tables(priv);
 	if (err) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index bb712139b36e..d0c8bf014453 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -353,7 +353,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports)
 	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
 	if (!root_ns) {
 		esw_warn(dev, "Failed to get FDB flow namespace\n");
-		return -ENOMEM;
+		return -EOPNOTSUPP;
 	}
 
 	flow_group_in = mlx5_vzalloc(inlen);
@@ -962,7 +962,7 @@ static int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
 	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS);
 	if (!root_ns) {
 		esw_warn(dev, "Failed to get E-Switch egress flow namespace\n");
-		return -EIO;
+		return -EOPNOTSUPP;
 	}
 
 	flow_group_in = mlx5_vzalloc(inlen);
@@ -1079,7 +1079,7 @@ static int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
 	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS);
 	if (!root_ns) {
 		esw_warn(dev, "Failed to get E-Switch ingress flow namespace\n");
-		return -EIO;
+		return -EOPNOTSUPP;
 	}
 
 	flow_group_in = mlx5_vzalloc(inlen);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 657d319fc4c6..5803216157cf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -535,7 +535,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw)
 	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS);
 	if (!ns) {
 		esw_warn(esw->dev, "Failed to get offloads flow namespace\n");
-		return -ENOMEM;
+		return -EOPNOTSUPP;
 	}
 
 	ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 0ac7a2fc916c..6346a8f5883b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1822,7 +1822,7 @@ static int create_anchor_flow_table(struct mlx5_flow_steering *steering)
 	struct mlx5_flow_table *ft;
 
 	ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR);
-	if (!ns)
+	if (WARN_ON(!ns))
 		return -EINVAL;
 	ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL, 0);
 	if (IS_ERR(ft)) {

From 5403dc703ff277f8a2a12a83ac820750485f13b3 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Wed, 11 Jan 2017 19:39:42 +0200
Subject: [PATCH 089/322] net/mlx5: E-Switch, Err when retrieving steering
 name-space fails

Make sure to return error when we failed retrieving the FDB steering
name space. Also, while around, correctly print the error when mode
change revert fails in the warning message.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reported-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 5803216157cf..c61bca138e65 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -424,6 +424,7 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
 	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
 	if (!root_ns) {
 		esw_warn(dev, "Failed to get FDB flow namespace\n");
+		err = -EOPNOTSUPP;
 		goto ns_err;
 	}
 
@@ -655,7 +656,7 @@ static int esw_offloads_start(struct mlx5_eswitch *esw)
 		esw_warn(esw->dev, "Failed setting eswitch to offloads, err %d\n", err);
 		err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
 		if (err1)
-			esw_warn(esw->dev, "Failed setting eswitch back to legacy, err %d\n", err);
+			esw_warn(esw->dev, "Failed setting eswitch back to legacy, err %d\n", err1);
 	}
 	if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) {
 		if (mlx5_eswitch_inline_mode_get(esw,

From 5bae8c031053c69b4aa74b7f1ba15d4ec8426208 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Sun, 15 Jan 2017 19:05:38 +0200
Subject: [PATCH 090/322] net/mlx5: E-Switch, Re-enable RoCE on mode change
 only after FDB destroy

We must re-enable RoCE on the e-switch management port (PF) only after destroying
the FDB in its switchdev/offloaded mode. Otherwise, when encapsulation is supported,
this re-enablement will fail.

Also, it's more natural and symmetric to disable RoCE on the PF before we create
the FDB under switchdev mode, so do that as well and revert if getting into error
during the mode change later.

Fixes: 9da34cd34e85 ('net/mlx5: Disable RoCE on the e-switch management [..]')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../mellanox/mlx5/core/eswitch_offloads.c     | 29 ++++++++++++-------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index c61bca138e65..595f7c7383b3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -675,9 +675,14 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
 	int vport;
 	int err;
 
+	/* disable PF RoCE so missed packets don't go through RoCE steering */
+	mlx5_dev_list_lock();
+	mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+	mlx5_dev_list_unlock();
+
 	err = esw_create_offloads_fdb_table(esw, nvports);
 	if (err)
-		return err;
+		goto create_fdb_err;
 
 	err = esw_create_offloads_table(esw);
 	if (err)
@@ -697,11 +702,6 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
 			goto err_reps;
 	}
 
-	/* disable PF RoCE so missed packets don't go through RoCE steering */
-	mlx5_dev_list_lock();
-	mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-	mlx5_dev_list_unlock();
-
 	return 0;
 
 err_reps:
@@ -718,6 +718,13 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
 
 create_ft_err:
 	esw_destroy_offloads_fdb_table(esw);
+
+create_fdb_err:
+	/* enable back PF RoCE */
+	mlx5_dev_list_lock();
+	mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+	mlx5_dev_list_unlock();
+
 	return err;
 }
 
@@ -725,11 +732,6 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw)
 {
 	int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
 
-	/* enable back PF RoCE */
-	mlx5_dev_list_lock();
-	mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-	mlx5_dev_list_unlock();
-
 	mlx5_eswitch_disable_sriov(esw);
 	err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
 	if (err) {
@@ -739,6 +741,11 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw)
 			esw_warn(esw->dev, "Failed setting eswitch back to offloads, err %d\n", err);
 	}
 
+	/* enable back PF RoCE */
+	mlx5_dev_list_lock();
+	mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+	mlx5_dev_list_unlock();
+
 	return err;
 }
 

From 3e621b19b0bb1f5bea34f1fbc5fb5629191eda2b Mon Sep 17 00:00:00 2001
From: Hadar Hen Zion <hadarh@mellanox.com>
Date: Thu, 12 Jan 2017 11:07:40 +0200
Subject: [PATCH 091/322] net/mlx5e: Support TC encapsulation offloads with
 upper devices

When tunneling is used, some virtualizations systems set the (mlx5e) uplink
device to be stacked under upper devices such as bridge or ovs internal
port, where the VTEP IP address used for the encapsulation is set on
that upper device.

In order to support such use-cases, we also deal with a setup where the
egress mirred device isn't representing a port on the HW e-switch to where
the ingress device belongs. We use eswitch service function which returns
the uplink and set it as the egress device of the tc encap rule.

Fixes: a54e20b4fcae ("net/mlx5e: Add basic TC tunnel set action for SRIOV offloads")
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 46bef6a26a8c..c5282b6aba8b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -663,6 +663,7 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
 				   __be32 *saddr,
 				   int *out_ttl)
 {
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	struct rtable *rt;
 	struct neighbour *n = NULL;
 	int ttl;
@@ -677,12 +678,11 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
 #else
 	return -EOPNOTSUPP;
 #endif
-
-	if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) {
-		pr_warn("%s: can't offload, devices not on same HW e-switch\n", __func__);
-		ip_rt_put(rt);
-		return -EOPNOTSUPP;
-	}
+	/* if the egress device isn't on the same HW e-switch, we use the uplink */
+	if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev))
+		*out_dev = mlx5_eswitch_get_uplink_netdev(esw);
+	else
+		*out_dev = rt->dst.dev;
 
 	ttl = ip4_dst_hoplimit(&rt->dst);
 	n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
@@ -693,7 +693,6 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
 	*out_n = n;
 	*saddr = fl4->saddr;
 	*out_ttl = ttl;
-	*out_dev = rt->dst.dev;
 
 	return 0;
 }

From 1d3398facd08a7fd4202f269317a95668eb880b9 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Wed, 11 Jan 2017 14:32:26 +0200
Subject: [PATCH 092/322] net/mlx5e: Modify TIRs hash only when it's needed

We don't need to modify our TIRs unless the user requested a change in
the hash function/key, for example when changing indirection only.

Tested:
 # Modify TIRs hash is needed
ethtool -X ethX hkey  <new key>
ethtool -X ethX hfunc <new func>

 # Modify TIRs hash is not needed
ethtool -X ethX equal <new indirection table>

All cases are verified with TCP Multi-Stream traffic over IPv4 & IPv6.

Fixes: bdfc028de1b3 ("net/mlx5e: Fix ethtool RX hash func configuration change")
Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/en_ethtool.c   | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index ffbdf9ee5a9b..6f4eb34259f0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -996,6 +996,7 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	int inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
+	bool hash_changed = false;
 	void *in;
 
 	if ((hfunc != ETH_RSS_HASH_NO_CHANGE) &&
@@ -1017,14 +1018,21 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
 		mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0);
 	}
 
-	if (key)
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE &&
+	    hfunc != priv->params.rss_hfunc) {
+		priv->params.rss_hfunc = hfunc;
+		hash_changed = true;
+	}
+
+	if (key) {
 		memcpy(priv->params.toeplitz_hash_key, key,
 		       sizeof(priv->params.toeplitz_hash_key));
+		hash_changed = hash_changed ||
+			       priv->params.rss_hfunc == ETH_RSS_HASH_TOP;
+	}
 
-	if (hfunc != ETH_RSS_HASH_NO_CHANGE)
-		priv->params.rss_hfunc = hfunc;
-
-	mlx5e_modify_tirs_hash(priv, in, inlen);
+	if (hash_changed)
+		mlx5e_modify_tirs_hash(priv, in, inlen);
 
 	mutex_unlock(&priv->state_lock);
 

From a100ff3eef193d2d79daf98dcd97a54776ffeb78 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Thu, 12 Jan 2017 16:25:46 +0200
Subject: [PATCH 093/322] net/mlx5e: Fix update of hash function/key via
 ethtool

Modifying TIR hash should change selected fields bitmask in addition to
the function and key.

Formerly, Only on ethool mlx5e_set_rxfh "ethtoo -X" we would not set this
field resulting in zeroing of its value, which means no packet fields are
used for RX RSS hash calculation thus causing all traffic to arrive in
RQ[0].

On driver load out of the box we don't have this issue, since the TIR
hash is fully created from scratch.

Tested:
ethtool -X ethX hkey  <new key>
ethtool -X ethX hfunc <new func>
ethtool -X ethX equal <new indirection table>

All cases are verified with TCP Multi-Stream traffic over IPv4 & IPv6.

Fixes: bdfc028de1b3 ("net/mlx5e: Fix ethtool RX hash func configuration change")
Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |   3 +-
 .../ethernet/mellanox/mlx5/core/en_ethtool.c  |  13 +-
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 198 +++++++++---------
 3 files changed, 109 insertions(+), 105 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 1619147a63e8..d5ecb8f53fd4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -791,7 +791,8 @@ void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv);
 int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd);
 
 int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix);
-void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv);
+void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_priv *priv, void *tirc,
+				    enum mlx5e_traffic_types tt);
 
 int mlx5e_open_locked(struct net_device *netdev);
 int mlx5e_close_locked(struct net_device *netdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 6f4eb34259f0..bb67863aa361 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -980,15 +980,18 @@ static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
 
 static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen)
 {
-	struct mlx5_core_dev *mdev = priv->mdev;
 	void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
-	int i;
+	struct mlx5_core_dev *mdev = priv->mdev;
+	int ctxlen = MLX5_ST_SZ_BYTES(tirc);
+	int tt;
 
 	MLX5_SET(modify_tir_in, in, bitmask.hash, 1);
-	mlx5e_build_tir_ctx_hash(tirc, priv);
 
-	for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
-		mlx5_core_modify_tir(mdev, priv->indir_tir[i].tirn, in, inlen);
+	for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+		memset(tirc, 0, ctxlen);
+		mlx5e_build_indir_tir_ctx_hash(priv, tirc, tt);
+		mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen);
+	}
 }
 
 static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 948351ae5bd2..f14ca3385fdd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2022,8 +2022,23 @@ static void mlx5e_build_tir_ctx_lro(void *tirc, struct mlx5e_priv *priv)
 	MLX5_SET(tirc, tirc, lro_timeout_period_usecs, priv->params.lro_timeout);
 }
 
-void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv)
+void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_priv *priv, void *tirc,
+				    enum mlx5e_traffic_types tt)
 {
+	void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
+
+#define MLX5_HASH_IP            (MLX5_HASH_FIELD_SEL_SRC_IP   |\
+				 MLX5_HASH_FIELD_SEL_DST_IP)
+
+#define MLX5_HASH_IP_L4PORTS    (MLX5_HASH_FIELD_SEL_SRC_IP   |\
+				 MLX5_HASH_FIELD_SEL_DST_IP   |\
+				 MLX5_HASH_FIELD_SEL_L4_SPORT |\
+				 MLX5_HASH_FIELD_SEL_L4_DPORT)
+
+#define MLX5_HASH_IP_IPSEC_SPI  (MLX5_HASH_FIELD_SEL_SRC_IP   |\
+				 MLX5_HASH_FIELD_SEL_DST_IP   |\
+				 MLX5_HASH_FIELD_SEL_IPSEC_SPI)
+
 	MLX5_SET(tirc, tirc, rx_hash_fn,
 		 mlx5e_rx_hash_fn(priv->params.rss_hfunc));
 	if (priv->params.rss_hfunc == ETH_RSS_HASH_TOP) {
@@ -2035,6 +2050,88 @@ void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv)
 		MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
 		memcpy(rss_key, priv->params.toeplitz_hash_key, len);
 	}
+
+	switch (tt) {
+	case MLX5E_TT_IPV4_TCP:
+		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+			 MLX5_L3_PROT_TYPE_IPV4);
+		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+			 MLX5_L4_PROT_TYPE_TCP);
+		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+			 MLX5_HASH_IP_L4PORTS);
+		break;
+
+	case MLX5E_TT_IPV6_TCP:
+		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+			 MLX5_L3_PROT_TYPE_IPV6);
+		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+			 MLX5_L4_PROT_TYPE_TCP);
+		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+			 MLX5_HASH_IP_L4PORTS);
+		break;
+
+	case MLX5E_TT_IPV4_UDP:
+		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+			 MLX5_L3_PROT_TYPE_IPV4);
+		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+			 MLX5_L4_PROT_TYPE_UDP);
+		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+			 MLX5_HASH_IP_L4PORTS);
+		break;
+
+	case MLX5E_TT_IPV6_UDP:
+		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+			 MLX5_L3_PROT_TYPE_IPV6);
+		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+			 MLX5_L4_PROT_TYPE_UDP);
+		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+			 MLX5_HASH_IP_L4PORTS);
+		break;
+
+	case MLX5E_TT_IPV4_IPSEC_AH:
+		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+			 MLX5_L3_PROT_TYPE_IPV4);
+		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+			 MLX5_HASH_IP_IPSEC_SPI);
+		break;
+
+	case MLX5E_TT_IPV6_IPSEC_AH:
+		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+			 MLX5_L3_PROT_TYPE_IPV6);
+		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+			 MLX5_HASH_IP_IPSEC_SPI);
+		break;
+
+	case MLX5E_TT_IPV4_IPSEC_ESP:
+		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+			 MLX5_L3_PROT_TYPE_IPV4);
+		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+			 MLX5_HASH_IP_IPSEC_SPI);
+		break;
+
+	case MLX5E_TT_IPV6_IPSEC_ESP:
+		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+			 MLX5_L3_PROT_TYPE_IPV6);
+		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+			 MLX5_HASH_IP_IPSEC_SPI);
+		break;
+
+	case MLX5E_TT_IPV4:
+		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+			 MLX5_L3_PROT_TYPE_IPV4);
+		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+			 MLX5_HASH_IP);
+		break;
+
+	case MLX5E_TT_IPV6:
+		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+			 MLX5_L3_PROT_TYPE_IPV6);
+		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+			 MLX5_HASH_IP);
+		break;
+	default:
+		WARN_ONCE(true, "%s: bad traffic type!\n", __func__);
+	}
 }
 
 static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
@@ -2404,110 +2501,13 @@ void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
 static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc,
 				      enum mlx5e_traffic_types tt)
 {
-	void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
-
 	MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
 
-#define MLX5_HASH_IP            (MLX5_HASH_FIELD_SEL_SRC_IP   |\
-				 MLX5_HASH_FIELD_SEL_DST_IP)
-
-#define MLX5_HASH_IP_L4PORTS    (MLX5_HASH_FIELD_SEL_SRC_IP   |\
-				 MLX5_HASH_FIELD_SEL_DST_IP   |\
-				 MLX5_HASH_FIELD_SEL_L4_SPORT |\
-				 MLX5_HASH_FIELD_SEL_L4_DPORT)
-
-#define MLX5_HASH_IP_IPSEC_SPI  (MLX5_HASH_FIELD_SEL_SRC_IP   |\
-				 MLX5_HASH_FIELD_SEL_DST_IP   |\
-				 MLX5_HASH_FIELD_SEL_IPSEC_SPI)
-
 	mlx5e_build_tir_ctx_lro(tirc, priv);
 
 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
 	MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn);
-	mlx5e_build_tir_ctx_hash(tirc, priv);
-
-	switch (tt) {
-	case MLX5E_TT_IPV4_TCP:
-		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
-			 MLX5_L3_PROT_TYPE_IPV4);
-		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
-			 MLX5_L4_PROT_TYPE_TCP);
-		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
-			 MLX5_HASH_IP_L4PORTS);
-		break;
-
-	case MLX5E_TT_IPV6_TCP:
-		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
-			 MLX5_L3_PROT_TYPE_IPV6);
-		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
-			 MLX5_L4_PROT_TYPE_TCP);
-		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
-			 MLX5_HASH_IP_L4PORTS);
-		break;
-
-	case MLX5E_TT_IPV4_UDP:
-		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
-			 MLX5_L3_PROT_TYPE_IPV4);
-		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
-			 MLX5_L4_PROT_TYPE_UDP);
-		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
-			 MLX5_HASH_IP_L4PORTS);
-		break;
-
-	case MLX5E_TT_IPV6_UDP:
-		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
-			 MLX5_L3_PROT_TYPE_IPV6);
-		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
-			 MLX5_L4_PROT_TYPE_UDP);
-		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
-			 MLX5_HASH_IP_L4PORTS);
-		break;
-
-	case MLX5E_TT_IPV4_IPSEC_AH:
-		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
-			 MLX5_L3_PROT_TYPE_IPV4);
-		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
-			 MLX5_HASH_IP_IPSEC_SPI);
-		break;
-
-	case MLX5E_TT_IPV6_IPSEC_AH:
-		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
-			 MLX5_L3_PROT_TYPE_IPV6);
-		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
-			 MLX5_HASH_IP_IPSEC_SPI);
-		break;
-
-	case MLX5E_TT_IPV4_IPSEC_ESP:
-		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
-			 MLX5_L3_PROT_TYPE_IPV4);
-		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
-			 MLX5_HASH_IP_IPSEC_SPI);
-		break;
-
-	case MLX5E_TT_IPV6_IPSEC_ESP:
-		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
-			 MLX5_L3_PROT_TYPE_IPV6);
-		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
-			 MLX5_HASH_IP_IPSEC_SPI);
-		break;
-
-	case MLX5E_TT_IPV4:
-		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
-			 MLX5_L3_PROT_TYPE_IPV4);
-		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
-			 MLX5_HASH_IP);
-		break;
-
-	case MLX5E_TT_IPV6:
-		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
-			 MLX5_L3_PROT_TYPE_IPV6);
-		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
-			 MLX5_HASH_IP);
-		break;
-	default:
-		WARN_ONCE(true,
-			  "mlx5e_build_indir_tir_ctx: bad traffic type!\n");
-	}
+	mlx5e_build_indir_tir_ctx_hash(priv, tirc, tt);
 }
 
 static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 *tirc,

From d15118af268324ecfc968dd90396e966f4f9b3ff Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Thu, 19 Jan 2017 14:53:07 +0200
Subject: [PATCH 094/322] net/mlx5e: Check ets capability before ets query FW
 command

On dcbnl callback getpgtccfgtx, the driver should check the ets
capability before ets query command is sent to firmware.
It is valid to return from this void function without changing in/out
parameters, as these parameters are initialized to
DCB_ATTR_VALUE_UNDEFINED.

Fixes: 3a6a931dfb8e ("net/mlx5e: Support DCBX CEE API")
Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 35f9ae037ba0..0523ed47f597 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -511,6 +511,11 @@ static void mlx5e_dcbnl_getpgtccfgtx(struct net_device *netdev,
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
 
+	if (!MLX5_CAP_GEN(priv->mdev, ets)) {
+		netdev_err(netdev, "%s, ets is not supported\n", __func__);
+		return;
+	}
+
 	if (priority >= CEE_DCBX_MAX_PRIO) {
 		netdev_err(netdev,
 			   "%s, priority is out of range\n", __func__);

From 39cb2c9a316e77f6dfba96c543e55b6672d5a37e Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 29 Jan 2017 13:50:06 -0800
Subject: [PATCH 095/322] drm/i915: Check for NULL i915_vma in
 intel_unpin_fb_obj()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I've seen this trigger twice now, where the i915_gem_object_to_ggtt()
call in intel_unpin_fb_obj() returns NULL, resulting in an oops
immediately afterwards as the (inlined) call to i915_vma_unpin_fence()
tries to dereference it.

It seems to be some race condition where the object is going away at
shutdown time, since both times happened when shutting down the X
server.  The call chains were different:

 - VT ioctl(KDSETMODE, KD_TEXT):

    intel_cleanup_plane_fb+0x5b/0xa0 [i915]
    drm_atomic_helper_cleanup_planes+0x6f/0x90 [drm_kms_helper]
    intel_atomic_commit_tail+0x749/0xfe0 [i915]
    intel_atomic_commit+0x3cb/0x4f0 [i915]
    drm_atomic_commit+0x4b/0x50 [drm]
    restore_fbdev_mode+0x14c/0x2a0 [drm_kms_helper]
    drm_fb_helper_restore_fbdev_mode_unlocked+0x34/0x80 [drm_kms_helper]
    drm_fb_helper_set_par+0x2d/0x60 [drm_kms_helper]
    intel_fbdev_set_par+0x18/0x70 [i915]
    fb_set_var+0x236/0x460
    fbcon_blank+0x30f/0x350
    do_unblank_screen+0xd2/0x1a0
    vt_ioctl+0x507/0x12a0
    tty_ioctl+0x355/0xc30
    do_vfs_ioctl+0xa3/0x5e0
    SyS_ioctl+0x79/0x90
    entry_SYSCALL_64_fastpath+0x13/0x94

 - i915 unpin_work workqueue:

    intel_unpin_work_fn+0x58/0x140 [i915]
    process_one_work+0x1f1/0x480
    worker_thread+0x48/0x4d0
    kthread+0x101/0x140

and this patch purely papers over the issue by adding a NULL pointer
check and a WARN_ON_ONCE() to avoid the oops that would then generally
make the machine unresponsive.

Other callers of i915_gem_object_to_ggtt() seem to also check for the
returned pointer being NULL and warn about it, so this clearly has
happened before in other places.

[ Reported it originally to the i915 developers on Jan 8, applying the
  ugly workaround on my own now after triggering the problem for the
  second time with no feedback.

  This is likely to be the same bug reported as

     https://bugs.freedesktop.org/show_bug.cgi?id=98829
     https://bugs.freedesktop.org/show_bug.cgi?id=99134

  which has a patch for the underlying problem, but it hasn't gotten to
  me, so I'm applying the workaround. ]

Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Imre Deak <imre.deak@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/i915/intel_display.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 77f7b1d849a4..f0b9aa7a0483 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2251,6 +2251,9 @@ void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
 	intel_fill_fb_ggtt_view(&view, fb, rotation);
 	vma = i915_gem_object_to_ggtt(obj, &view);
 
+	if (WARN_ON_ONCE(!vma))
+		return;
+
 	i915_vma_unpin_fence(vma);
 	i915_gem_object_unpin_from_display_plane(vma);
 }

From 566cf877a1fcb6d6dc0126b076aad062054c2637 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 29 Jan 2017 14:25:17 -0800
Subject: [PATCH 096/322] Linux 4.10-rc6

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 098840012b9b..96b27a888285 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
 VERSION = 4
 PATCHLEVEL = 10
 SUBLEVEL = 0
-EXTRAVERSION = -rc5
-NAME = Anniversary Edition
+EXTRAVERSION = -rc6
+NAME = Fearless Coyote
 
 # *DOCUMENTATION*
 # To see a list of typical targets execute "make help"

From 0a764db103376cf69d04449b10688f3516cc0b88 Mon Sep 17 00:00:00 2001
From: Alexey Brodkin <Alexey.Brodkin@synopsys.com>
Date: Fri, 27 Jan 2017 15:24:43 +0300
Subject: [PATCH 097/322] stmmac: Discard masked flags in interrupt status
 register

DW GMAC databook says the following about bits in "Register 15 (Interrupt
Mask Register)":
--------------------------->8-------------------------
When set, this bit __disables_the_assertion_of_the_interrupt_signal__
because of the setting of XXX bit in Register 14 (Interrupt
Status Register).
--------------------------->8-------------------------

In fact even if we mask one bit in the mask register it doesn't prevent
corresponding bit to appear in the status register, it only disables
interrupt generation for corresponding event.

But currently we expect a bit different behavior: status bits to be in
sync with their masks, i.e. if mask for bit A is set in the mask
register then bit A won't appear in the interrupt status register.

This was proven to be incorrect assumption, see discussion here [1].
That misunderstanding causes unexpected behaviour of the GMAC, for
example we were happy enough to just see bogus messages about link
state changes.

So from now on we'll be only checking bits that really may trigger an
interrupt.

[1] https://lkml.org/lkml/2016/11/3/413

Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Fabrice Gasnier <fabrice.gasnier@st.com>
Cc: Joachim Eastwood <manabian@gmail.com>
Cc: Phil Reid <preid@electromag.com.au>
Cc: David Miller <davem@davemloft.net>
Cc: Alexandre Torgue <alexandre.torgue@gmail.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index be3c91c7f211..5484fd726d5a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -305,8 +305,12 @@ static int dwmac1000_irq_status(struct mac_device_info *hw,
 {
 	void __iomem *ioaddr = hw->pcsr;
 	u32 intr_status = readl(ioaddr + GMAC_INT_STATUS);
+	u32 intr_mask = readl(ioaddr + GMAC_INT_MASK);
 	int ret = 0;
 
+	/* Discard masked bits */
+	intr_status &= ~intr_mask;
+
 	/* Not used events (e.g. MMC interrupts) are not handled. */
 	if ((intr_status & GMAC_INT_STATUS_MMCTIS))
 		x->mmc_tx_irq_n++;

From dc97a89e726c4e1830320d1db8215ef77ecebae0 Mon Sep 17 00:00:00 2001
From: Rafal Ozieblo <rafalo@cadence.com>
Date: Fri, 27 Jan 2017 15:08:20 +0000
Subject: [PATCH 098/322] net: macb: Fix 64 bit addressing support for GEM

This patch adds support for 32 bit GEM in
64 bit system. It checks capability at runtime
and uses appropriate buffer descriptor.

Signed-off-by: Rafal Ozieblo <rafalo@cadence.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cadence/macb.c | 188 +++++++++++++++++++---------
 drivers/net/ethernet/cadence/macb.h |  22 +++-
 2 files changed, 148 insertions(+), 62 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index c0fb80acc2da..baba2db9d9c2 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -43,13 +43,13 @@
 #define DEFAULT_RX_RING_SIZE	512 /* must be power of 2 */
 #define MIN_RX_RING_SIZE	64
 #define MAX_RX_RING_SIZE	8192
-#define RX_RING_BYTES(bp)	(sizeof(struct macb_dma_desc)	\
+#define RX_RING_BYTES(bp)	(macb_dma_desc_get_size(bp)	\
 				 * (bp)->rx_ring_size)
 
 #define DEFAULT_TX_RING_SIZE	512 /* must be power of 2 */
 #define MIN_TX_RING_SIZE	64
 #define MAX_TX_RING_SIZE	4096
-#define TX_RING_BYTES(bp)	(sizeof(struct macb_dma_desc)	\
+#define TX_RING_BYTES(bp)	(macb_dma_desc_get_size(bp)	\
 				 * (bp)->tx_ring_size)
 
 /* level of occupied TX descriptors under which we wake up TX process */
@@ -78,6 +78,37 @@
  */
 #define MACB_HALT_TIMEOUT	1230
 
+/* DMA buffer descriptor might be different size
+ * depends on hardware configuration.
+ */
+static unsigned int macb_dma_desc_get_size(struct macb *bp)
+{
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+		return sizeof(struct macb_dma_desc) + sizeof(struct macb_dma_desc_64);
+#endif
+	return sizeof(struct macb_dma_desc);
+}
+
+static unsigned int macb_adj_dma_desc_idx(struct macb *bp, unsigned int idx)
+{
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	/* Dma buffer descriptor is 4 words length (instead of 2 words)
+	 * for 64b GEM.
+	 */
+	if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+		idx <<= 1;
+#endif
+	return idx;
+}
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+static struct macb_dma_desc_64 *macb_64b_desc(struct macb *bp, struct macb_dma_desc *desc)
+{
+	return (struct macb_dma_desc_64 *)((void *)desc + sizeof(struct macb_dma_desc));
+}
+#endif
+
 /* Ring buffer accessors */
 static unsigned int macb_tx_ring_wrap(struct macb *bp, unsigned int index)
 {
@@ -87,7 +118,9 @@ static unsigned int macb_tx_ring_wrap(struct macb *bp, unsigned int index)
 static struct macb_dma_desc *macb_tx_desc(struct macb_queue *queue,
 					  unsigned int index)
 {
-	return &queue->tx_ring[macb_tx_ring_wrap(queue->bp, index)];
+	index = macb_tx_ring_wrap(queue->bp, index);
+	index = macb_adj_dma_desc_idx(queue->bp, index);
+	return &queue->tx_ring[index];
 }
 
 static struct macb_tx_skb *macb_tx_skb(struct macb_queue *queue,
@@ -101,7 +134,7 @@ static dma_addr_t macb_tx_dma(struct macb_queue *queue, unsigned int index)
 	dma_addr_t offset;
 
 	offset = macb_tx_ring_wrap(queue->bp, index) *
-		 sizeof(struct macb_dma_desc);
+			macb_dma_desc_get_size(queue->bp);
 
 	return queue->tx_ring_dma + offset;
 }
@@ -113,7 +146,9 @@ static unsigned int macb_rx_ring_wrap(struct macb *bp, unsigned int index)
 
 static struct macb_dma_desc *macb_rx_desc(struct macb *bp, unsigned int index)
 {
-	return &bp->rx_ring[macb_rx_ring_wrap(bp, index)];
+	index = macb_rx_ring_wrap(bp, index);
+	index = macb_adj_dma_desc_idx(bp, index);
+	return &bp->rx_ring[index];
 }
 
 static void *macb_rx_buffer(struct macb *bp, unsigned int index)
@@ -560,12 +595,32 @@ static void macb_tx_unmap(struct macb *bp, struct macb_tx_skb *tx_skb)
 	}
 }
 
-static inline void macb_set_addr(struct macb_dma_desc *desc, dma_addr_t addr)
+static void macb_set_addr(struct macb *bp, struct macb_dma_desc *desc, dma_addr_t addr)
 {
-	desc->addr = (u32)addr;
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-	desc->addrh = (u32)(addr >> 32);
+	struct macb_dma_desc_64 *desc_64;
+
+	if (bp->hw_dma_cap == HW_DMA_CAP_64B) {
+		desc_64 = macb_64b_desc(bp, desc);
+		desc_64->addrh = upper_32_bits(addr);
+	}
 #endif
+	desc->addr = lower_32_bits(addr);
+}
+
+static dma_addr_t macb_get_addr(struct macb *bp, struct macb_dma_desc *desc)
+{
+	dma_addr_t addr = 0;
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	struct macb_dma_desc_64 *desc_64;
+
+	if (bp->hw_dma_cap == HW_DMA_CAP_64B) {
+		desc_64 = macb_64b_desc(bp, desc);
+		addr = ((u64)(desc_64->addrh) << 32);
+	}
+#endif
+	addr |= MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr));
+	return addr;
 }
 
 static void macb_tx_error_task(struct work_struct *work)
@@ -649,16 +704,17 @@ static void macb_tx_error_task(struct work_struct *work)
 
 	/* Set end of TX queue */
 	desc = macb_tx_desc(queue, 0);
-	macb_set_addr(desc, 0);
+	macb_set_addr(bp, desc, 0);
 	desc->ctrl = MACB_BIT(TX_USED);
 
 	/* Make descriptor updates visible to hardware */
 	wmb();
 
 	/* Reinitialize the TX desc queue */
-	queue_writel(queue, TBQP, (u32)(queue->tx_ring_dma));
+	queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma));
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-	queue_writel(queue, TBQPH, (u32)(queue->tx_ring_dma >> 32));
+	if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+		queue_writel(queue, TBQPH, upper_32_bits(queue->tx_ring_dma));
 #endif
 	/* Make TX ring reflect state of hardware */
 	queue->tx_head = 0;
@@ -750,6 +806,7 @@ static void gem_rx_refill(struct macb *bp)
 	unsigned int		entry;
 	struct sk_buff		*skb;
 	dma_addr_t		paddr;
+	struct macb_dma_desc *desc;
 
 	while (CIRC_SPACE(bp->rx_prepared_head, bp->rx_tail,
 			  bp->rx_ring_size) > 0) {
@@ -759,6 +816,7 @@ static void gem_rx_refill(struct macb *bp)
 		rmb();
 
 		bp->rx_prepared_head++;
+		desc = macb_rx_desc(bp, entry);
 
 		if (!bp->rx_skbuff[entry]) {
 			/* allocate sk_buff for this free entry in ring */
@@ -782,14 +840,14 @@ static void gem_rx_refill(struct macb *bp)
 
 			if (entry == bp->rx_ring_size - 1)
 				paddr |= MACB_BIT(RX_WRAP);
-			macb_set_addr(&(bp->rx_ring[entry]), paddr);
-			bp->rx_ring[entry].ctrl = 0;
+			macb_set_addr(bp, desc, paddr);
+			desc->ctrl = 0;
 
 			/* properly align Ethernet header */
 			skb_reserve(skb, NET_IP_ALIGN);
 		} else {
-			bp->rx_ring[entry].addr &= ~MACB_BIT(RX_USED);
-			bp->rx_ring[entry].ctrl = 0;
+			desc->addr &= ~MACB_BIT(RX_USED);
+			desc->ctrl = 0;
 		}
 	}
 
@@ -835,16 +893,13 @@ static int gem_rx(struct macb *bp, int budget)
 		bool rxused;
 
 		entry = macb_rx_ring_wrap(bp, bp->rx_tail);
-		desc = &bp->rx_ring[entry];
+		desc = macb_rx_desc(bp, entry);
 
 		/* Make hw descriptor updates visible to CPU */
 		rmb();
 
 		rxused = (desc->addr & MACB_BIT(RX_USED)) ? true : false;
-		addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr));
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-		addr |= ((u64)(desc->addrh) << 32);
-#endif
+		addr = macb_get_addr(bp, desc);
 		ctrl = desc->ctrl;
 
 		if (!rxused)
@@ -987,15 +1042,17 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
 static inline void macb_init_rx_ring(struct macb *bp)
 {
 	dma_addr_t addr;
+	struct macb_dma_desc *desc = NULL;
 	int i;
 
 	addr = bp->rx_buffers_dma;
 	for (i = 0; i < bp->rx_ring_size; i++) {
-		bp->rx_ring[i].addr = addr;
-		bp->rx_ring[i].ctrl = 0;
+		desc = macb_rx_desc(bp, i);
+		macb_set_addr(bp, desc, addr);
+		desc->ctrl = 0;
 		addr += bp->rx_buffer_size;
 	}
-	bp->rx_ring[bp->rx_ring_size - 1].addr |= MACB_BIT(RX_WRAP);
+	desc->addr |= MACB_BIT(RX_WRAP);
 	bp->rx_tail = 0;
 }
 
@@ -1008,15 +1065,14 @@ static int macb_rx(struct macb *bp, int budget)
 
 	for (tail = bp->rx_tail; budget > 0; tail++) {
 		struct macb_dma_desc *desc = macb_rx_desc(bp, tail);
-		u32 addr, ctrl;
+		u32 ctrl;
 
 		/* Make hw descriptor updates visible to CPU */
 		rmb();
 
-		addr = desc->addr;
 		ctrl = desc->ctrl;
 
-		if (!(addr & MACB_BIT(RX_USED)))
+		if (!(desc->addr & MACB_BIT(RX_USED)))
 			break;
 
 		if (ctrl & MACB_BIT(RX_SOF)) {
@@ -1336,7 +1392,7 @@ static unsigned int macb_tx_map(struct macb *bp,
 	i = tx_head;
 	entry = macb_tx_ring_wrap(bp, i);
 	ctrl = MACB_BIT(TX_USED);
-	desc = &queue->tx_ring[entry];
+	desc = macb_tx_desc(queue, entry);
 	desc->ctrl = ctrl;
 
 	if (lso_ctrl) {
@@ -1358,7 +1414,7 @@ static unsigned int macb_tx_map(struct macb *bp,
 		i--;
 		entry = macb_tx_ring_wrap(bp, i);
 		tx_skb = &queue->tx_skb[entry];
-		desc = &queue->tx_ring[entry];
+		desc = macb_tx_desc(queue, entry);
 
 		ctrl = (u32)tx_skb->size;
 		if (eof) {
@@ -1379,7 +1435,7 @@ static unsigned int macb_tx_map(struct macb *bp,
 			ctrl |= MACB_BF(MSS_MFS, mss_mfs);
 
 		/* Set TX buffer descriptor */
-		macb_set_addr(desc, tx_skb->mapping);
+		macb_set_addr(bp, desc, tx_skb->mapping);
 		/* desc->addr must be visible to hardware before clearing
 		 * 'TX_USED' bit in desc->ctrl.
 		 */
@@ -1586,11 +1642,9 @@ static void gem_free_rx_buffers(struct macb *bp)
 		if (!skb)
 			continue;
 
-		desc = &bp->rx_ring[i];
-		addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr));
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-		addr |= ((u64)(desc->addrh) << 32);
-#endif
+		desc = macb_rx_desc(bp, i);
+		addr = macb_get_addr(bp, desc);
+
 		dma_unmap_single(&bp->pdev->dev, addr, bp->rx_buffer_size,
 				 DMA_FROM_DEVICE);
 		dev_kfree_skb_any(skb);
@@ -1711,15 +1765,17 @@ static int macb_alloc_consistent(struct macb *bp)
 static void gem_init_rings(struct macb *bp)
 {
 	struct macb_queue *queue;
+	struct macb_dma_desc *desc = NULL;
 	unsigned int q;
 	int i;
 
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
 		for (i = 0; i < bp->tx_ring_size; i++) {
-			queue->tx_ring[i].addr = 0;
-			queue->tx_ring[i].ctrl = MACB_BIT(TX_USED);
+			desc = macb_tx_desc(queue, i);
+			macb_set_addr(bp, desc, 0);
+			desc->ctrl = MACB_BIT(TX_USED);
 		}
-		queue->tx_ring[bp->tx_ring_size - 1].ctrl |= MACB_BIT(TX_WRAP);
+		desc->ctrl |= MACB_BIT(TX_WRAP);
 		queue->tx_head = 0;
 		queue->tx_tail = 0;
 	}
@@ -1733,16 +1789,18 @@ static void gem_init_rings(struct macb *bp)
 static void macb_init_rings(struct macb *bp)
 {
 	int i;
+	struct macb_dma_desc *desc = NULL;
 
 	macb_init_rx_ring(bp);
 
 	for (i = 0; i < bp->tx_ring_size; i++) {
-		bp->queues[0].tx_ring[i].addr = 0;
-		bp->queues[0].tx_ring[i].ctrl = MACB_BIT(TX_USED);
+		desc = macb_tx_desc(&bp->queues[0], i);
+		macb_set_addr(bp, desc, 0);
+		desc->ctrl = MACB_BIT(TX_USED);
 	}
 	bp->queues[0].tx_head = 0;
 	bp->queues[0].tx_tail = 0;
-	bp->queues[0].tx_ring[bp->tx_ring_size - 1].ctrl |= MACB_BIT(TX_WRAP);
+	desc->ctrl |= MACB_BIT(TX_WRAP);
 }
 
 static void macb_reset_hw(struct macb *bp)
@@ -1863,7 +1921,8 @@ static void macb_configure_dma(struct macb *bp)
 			dmacfg &= ~GEM_BIT(TXCOEN);
 
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-		dmacfg |= GEM_BIT(ADDR64);
+		if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+			dmacfg |= GEM_BIT(ADDR64);
 #endif
 		netdev_dbg(bp->dev, "Cadence configure DMA with 0x%08x\n",
 			   dmacfg);
@@ -1910,14 +1969,16 @@ static void macb_init_hw(struct macb *bp)
 	macb_configure_dma(bp);
 
 	/* Initialize TX and RX buffers */
-	macb_writel(bp, RBQP, (u32)(bp->rx_ring_dma));
+	macb_writel(bp, RBQP, lower_32_bits(bp->rx_ring_dma));
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-	macb_writel(bp, RBQPH, (u32)(bp->rx_ring_dma >> 32));
+	if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+		macb_writel(bp, RBQPH, upper_32_bits(bp->rx_ring_dma));
 #endif
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-		queue_writel(queue, TBQP, (u32)(queue->tx_ring_dma));
+		queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma));
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-		queue_writel(queue, TBQPH, (u32)(queue->tx_ring_dma >> 32));
+		if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+			queue_writel(queue, TBQPH, upper_32_bits(queue->tx_ring_dma));
 #endif
 
 		/* Enable interrupts */
@@ -2627,7 +2688,8 @@ static int macb_init(struct platform_device *pdev)
 			queue->IMR  = GEM_IMR(hw_q - 1);
 			queue->TBQP = GEM_TBQP(hw_q - 1);
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-			queue->TBQPH = GEM_TBQPH(hw_q -1);
+			if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+				queue->TBQPH = GEM_TBQPH(hw_q - 1);
 #endif
 		} else {
 			/* queue0 uses legacy registers */
@@ -2637,7 +2699,8 @@ static int macb_init(struct platform_device *pdev)
 			queue->IMR  = MACB_IMR;
 			queue->TBQP = MACB_TBQP;
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-			queue->TBQPH = MACB_TBQPH;
+			if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+				queue->TBQPH = MACB_TBQPH;
 #endif
 		}
 
@@ -2730,13 +2793,14 @@ static int macb_init(struct platform_device *pdev)
 static int at91ether_start(struct net_device *dev)
 {
 	struct macb *lp = netdev_priv(dev);
+	struct macb_dma_desc *desc;
 	dma_addr_t addr;
 	u32 ctl;
 	int i;
 
 	lp->rx_ring = dma_alloc_coherent(&lp->pdev->dev,
 					 (AT91ETHER_MAX_RX_DESCR *
-					  sizeof(struct macb_dma_desc)),
+					  macb_dma_desc_get_size(lp)),
 					 &lp->rx_ring_dma, GFP_KERNEL);
 	if (!lp->rx_ring)
 		return -ENOMEM;
@@ -2748,7 +2812,7 @@ static int at91ether_start(struct net_device *dev)
 	if (!lp->rx_buffers) {
 		dma_free_coherent(&lp->pdev->dev,
 				  AT91ETHER_MAX_RX_DESCR *
-				  sizeof(struct macb_dma_desc),
+				  macb_dma_desc_get_size(lp),
 				  lp->rx_ring, lp->rx_ring_dma);
 		lp->rx_ring = NULL;
 		return -ENOMEM;
@@ -2756,13 +2820,14 @@ static int at91ether_start(struct net_device *dev)
 
 	addr = lp->rx_buffers_dma;
 	for (i = 0; i < AT91ETHER_MAX_RX_DESCR; i++) {
-		lp->rx_ring[i].addr = addr;
-		lp->rx_ring[i].ctrl = 0;
+		desc = macb_rx_desc(lp, i);
+		macb_set_addr(lp, desc, addr);
+		desc->ctrl = 0;
 		addr += AT91ETHER_MAX_RBUFF_SZ;
 	}
 
 	/* Set the Wrap bit on the last descriptor */
-	lp->rx_ring[AT91ETHER_MAX_RX_DESCR - 1].addr |= MACB_BIT(RX_WRAP);
+	desc->addr |= MACB_BIT(RX_WRAP);
 
 	/* Reset buffer index */
 	lp->rx_tail = 0;
@@ -2834,7 +2899,7 @@ static int at91ether_close(struct net_device *dev)
 
 	dma_free_coherent(&lp->pdev->dev,
 			  AT91ETHER_MAX_RX_DESCR *
-			  sizeof(struct macb_dma_desc),
+			  macb_dma_desc_get_size(lp),
 			  lp->rx_ring, lp->rx_ring_dma);
 	lp->rx_ring = NULL;
 
@@ -2885,13 +2950,15 @@ static int at91ether_start_xmit(struct sk_buff *skb, struct net_device *dev)
 static void at91ether_rx(struct net_device *dev)
 {
 	struct macb *lp = netdev_priv(dev);
+	struct macb_dma_desc *desc;
 	unsigned char *p_recv;
 	struct sk_buff *skb;
 	unsigned int pktlen;
 
-	while (lp->rx_ring[lp->rx_tail].addr & MACB_BIT(RX_USED)) {
+	desc = macb_rx_desc(lp, lp->rx_tail);
+	while (desc->addr & MACB_BIT(RX_USED)) {
 		p_recv = lp->rx_buffers + lp->rx_tail * AT91ETHER_MAX_RBUFF_SZ;
-		pktlen = MACB_BF(RX_FRMLEN, lp->rx_ring[lp->rx_tail].ctrl);
+		pktlen = MACB_BF(RX_FRMLEN, desc->ctrl);
 		skb = netdev_alloc_skb(dev, pktlen + 2);
 		if (skb) {
 			skb_reserve(skb, 2);
@@ -2905,17 +2972,19 @@ static void at91ether_rx(struct net_device *dev)
 			lp->stats.rx_dropped++;
 		}
 
-		if (lp->rx_ring[lp->rx_tail].ctrl & MACB_BIT(RX_MHASH_MATCH))
+		if (desc->ctrl & MACB_BIT(RX_MHASH_MATCH))
 			lp->stats.multicast++;
 
 		/* reset ownership bit */
-		lp->rx_ring[lp->rx_tail].addr &= ~MACB_BIT(RX_USED);
+		desc->addr &= ~MACB_BIT(RX_USED);
 
 		/* wrap after last buffer */
 		if (lp->rx_tail == AT91ETHER_MAX_RX_DESCR - 1)
 			lp->rx_tail = 0;
 		else
 			lp->rx_tail++;
+
+		desc = macb_rx_desc(lp, lp->rx_tail);
 	}
 }
 
@@ -3211,8 +3280,11 @@ static int macb_probe(struct platform_device *pdev)
 	device_init_wakeup(&pdev->dev, bp->wol & MACB_WOL_HAS_MAGIC_PACKET);
 
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-	if (GEM_BFEXT(DBWDEF, gem_readl(bp, DCFG1)) > GEM_DBW32)
+	if (GEM_BFEXT(DAW64, gem_readl(bp, DCFG6))) {
 		dma_set_mask(&pdev->dev, DMA_BIT_MASK(44));
+		bp->hw_dma_cap = HW_DMA_CAP_64B;
+	} else
+		bp->hw_dma_cap = HW_DMA_CAP_32B;
 #endif
 
 	spin_lock_init(&bp->lock);
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index d67adad67be1..fc8550a5d47f 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -385,6 +385,8 @@
 /* Bitfields in DCFG6. */
 #define GEM_PBUF_LSO_OFFSET			27
 #define GEM_PBUF_LSO_SIZE			1
+#define GEM_DAW64_OFFSET			23
+#define GEM_DAW64_SIZE				1
 
 /* Constants for CLK */
 #define MACB_CLK_DIV8				0
@@ -487,12 +489,20 @@
 struct macb_dma_desc {
 	u32	addr;
 	u32	ctrl;
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-	u32     addrh;
-	u32     resvd;
-#endif
 };
 
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+enum macb_hw_dma_cap {
+	HW_DMA_CAP_32B,
+	HW_DMA_CAP_64B,
+};
+
+struct macb_dma_desc_64 {
+	u32 addrh;
+	u32 resvd;
+};
+#endif
+
 /* DMA descriptor bitfields */
 #define MACB_RX_USED_OFFSET			0
 #define MACB_RX_USED_SIZE			1
@@ -874,6 +884,10 @@ struct macb {
 	unsigned int		jumbo_max_len;
 
 	u32			wol;
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	enum macb_hw_dma_cap hw_dma_cap;
+#endif
 };
 
 static inline bool macb_is_gem(struct macb *bp)

From f1712c73714088a7252d276a57126d56c7d37e64 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 27 Jan 2017 08:11:44 -0800
Subject: [PATCH 099/322] can: Fix kernel panic at security_sock_rcv_skb

Zhang Yanmin reported crashes [1] and provided a patch adding a
synchronize_rcu() call in can_rx_unregister()

The main problem seems that the sockets themselves are not RCU
protected.

If CAN uses RCU for delivery, then sockets should be freed only after
one RCU grace period.

Recent kernels could use sock_set_flag(sk, SOCK_RCU_FREE), but let's
ease stable backports with the following fix instead.

[1]
BUG: unable to handle kernel NULL pointer dereference at (null)
IP: [<ffffffff81495e25>] selinux_socket_sock_rcv_skb+0x65/0x2a0

Call Trace:
 <IRQ>
 [<ffffffff81485d8c>] security_sock_rcv_skb+0x4c/0x60
 [<ffffffff81d55771>] sk_filter+0x41/0x210
 [<ffffffff81d12913>] sock_queue_rcv_skb+0x53/0x3a0
 [<ffffffff81f0a2b3>] raw_rcv+0x2a3/0x3c0
 [<ffffffff81f06eab>] can_rcv_filter+0x12b/0x370
 [<ffffffff81f07af9>] can_receive+0xd9/0x120
 [<ffffffff81f07beb>] can_rcv+0xab/0x100
 [<ffffffff81d362ac>] __netif_receive_skb_core+0xd8c/0x11f0
 [<ffffffff81d36734>] __netif_receive_skb+0x24/0xb0
 [<ffffffff81d37f67>] process_backlog+0x127/0x280
 [<ffffffff81d36f7b>] net_rx_action+0x33b/0x4f0
 [<ffffffff810c88d4>] __do_softirq+0x184/0x440
 [<ffffffff81f9e86c>] do_softirq_own_stack+0x1c/0x30
 <EOI>
 [<ffffffff810c76fb>] do_softirq.part.18+0x3b/0x40
 [<ffffffff810c8bed>] do_softirq+0x1d/0x20
 [<ffffffff81d30085>] netif_rx_ni+0xe5/0x110
 [<ffffffff8199cc87>] slcan_receive_buf+0x507/0x520
 [<ffffffff8167ef7c>] flush_to_ldisc+0x21c/0x230
 [<ffffffff810e3baf>] process_one_work+0x24f/0x670
 [<ffffffff810e44ed>] worker_thread+0x9d/0x6f0
 [<ffffffff810e4450>] ? rescuer_thread+0x480/0x480
 [<ffffffff810ebafc>] kthread+0x12c/0x150
 [<ffffffff81f9ccef>] ret_from_fork+0x3f/0x70

Reported-by: Zhang Yanmin <yanmin.zhang@intel.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/can/core.h |  7 +++----
 net/can/af_can.c         | 12 ++++++++++--
 net/can/af_can.h         |  3 ++-
 net/can/bcm.c            |  4 ++--
 net/can/gw.c             |  2 +-
 net/can/raw.c            |  4 ++--
 6 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/include/linux/can/core.h b/include/linux/can/core.h
index a0875001b13c..df08a41d5be5 100644
--- a/include/linux/can/core.h
+++ b/include/linux/can/core.h
@@ -45,10 +45,9 @@ struct can_proto {
 extern int  can_proto_register(const struct can_proto *cp);
 extern void can_proto_unregister(const struct can_proto *cp);
 
-extern int  can_rx_register(struct net_device *dev, canid_t can_id,
-			    canid_t mask,
-			    void (*func)(struct sk_buff *, void *),
-			    void *data, char *ident);
+int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask,
+		    void (*func)(struct sk_buff *, void *),
+		    void *data, char *ident, struct sock *sk);
 
 extern void can_rx_unregister(struct net_device *dev, canid_t can_id,
 			      canid_t mask,
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 1108079d934f..5488e4a6ccd0 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -445,6 +445,7 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
  * @func: callback function on filter match
  * @data: returned parameter for callback function
  * @ident: string for calling module identification
+ * @sk: socket pointer (might be NULL)
  *
  * Description:
  *  Invokes the callback function with the received sk_buff and the given
@@ -468,7 +469,7 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
  */
 int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask,
 		    void (*func)(struct sk_buff *, void *), void *data,
-		    char *ident)
+		    char *ident, struct sock *sk)
 {
 	struct receiver *r;
 	struct hlist_head *rl;
@@ -496,6 +497,7 @@ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask,
 		r->func    = func;
 		r->data    = data;
 		r->ident   = ident;
+		r->sk      = sk;
 
 		hlist_add_head_rcu(&r->list, rl);
 		d->entries++;
@@ -520,8 +522,11 @@ EXPORT_SYMBOL(can_rx_register);
 static void can_rx_delete_receiver(struct rcu_head *rp)
 {
 	struct receiver *r = container_of(rp, struct receiver, rcu);
+	struct sock *sk = r->sk;
 
 	kmem_cache_free(rcv_cache, r);
+	if (sk)
+		sock_put(sk);
 }
 
 /**
@@ -596,8 +601,11 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
 	spin_unlock(&can_rcvlists_lock);
 
 	/* schedule the receiver item for deletion */
-	if (r)
+	if (r) {
+		if (r->sk)
+			sock_hold(r->sk);
 		call_rcu(&r->rcu, can_rx_delete_receiver);
+	}
 }
 EXPORT_SYMBOL(can_rx_unregister);
 
diff --git a/net/can/af_can.h b/net/can/af_can.h
index fca0fe9fc45a..b86f5129e838 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -50,13 +50,14 @@
 
 struct receiver {
 	struct hlist_node list;
-	struct rcu_head rcu;
 	canid_t can_id;
 	canid_t mask;
 	unsigned long matches;
 	void (*func)(struct sk_buff *, void *);
 	void *data;
 	char *ident;
+	struct sock *sk;
+	struct rcu_head rcu;
 };
 
 #define CAN_SFF_RCV_ARRAY_SZ (1 << CAN_SFF_ID_BITS)
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 21ac75390e3d..5c9407181918 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1216,7 +1216,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 				err = can_rx_register(dev, op->can_id,
 						      REGMASK(op->can_id),
 						      bcm_rx_handler, op,
-						      "bcm");
+						      "bcm", sk);
 
 				op->rx_reg_dev = dev;
 				dev_put(dev);
@@ -1225,7 +1225,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 		} else
 			err = can_rx_register(NULL, op->can_id,
 					      REGMASK(op->can_id),
-					      bcm_rx_handler, op, "bcm");
+					      bcm_rx_handler, op, "bcm", sk);
 		if (err) {
 			/* this bcm rx op is broken -> remove it */
 			list_del(&op->list);
diff --git a/net/can/gw.c b/net/can/gw.c
index a54ab0c82104..7056a1a2bb70 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -442,7 +442,7 @@ static inline int cgw_register_filter(struct cgw_job *gwj)
 {
 	return can_rx_register(gwj->src.dev, gwj->ccgw.filter.can_id,
 			       gwj->ccgw.filter.can_mask, can_can_gw_rcv,
-			       gwj, "gw");
+			       gwj, "gw", NULL);
 }
 
 static inline void cgw_unregister_filter(struct cgw_job *gwj)
diff --git a/net/can/raw.c b/net/can/raw.c
index b075f028d7e2..6dc546a06673 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -190,7 +190,7 @@ static int raw_enable_filters(struct net_device *dev, struct sock *sk,
 	for (i = 0; i < count; i++) {
 		err = can_rx_register(dev, filter[i].can_id,
 				      filter[i].can_mask,
-				      raw_rcv, sk, "raw");
+				      raw_rcv, sk, "raw", sk);
 		if (err) {
 			/* clean up successfully registered filters */
 			while (--i >= 0)
@@ -211,7 +211,7 @@ static int raw_enable_errfilter(struct net_device *dev, struct sock *sk,
 
 	if (err_mask)
 		err = can_rx_register(dev, 0, err_mask | CAN_ERR_FLAG,
-				      raw_rcv, sk, "raw");
+				      raw_rcv, sk, "raw", sk);
 
 	return err;
 }

From cf626c3b252b2c9d131be0dd66096ec3bf729e54 Mon Sep 17 00:00:00 2001
From: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Date: Fri, 27 Jan 2017 21:39:03 +0100
Subject: [PATCH 100/322] net: phy: micrel: KSZ8795 do not set
 SUPPORTED_[Asym_]Pause

As pr commit "net: phy: phy drivers should not set SUPPORTED_[Asym_]Pause"
this phy driver should not set these feature bits.

Signed-off-by: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Fixes: 9d162ed69f51 ("net: phy: micrel: add support for KSZ8795")
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index e55809c5beb7..6742070ca676 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -1012,7 +1012,7 @@ static struct phy_driver ksphy_driver[] = {
 	.phy_id		= PHY_ID_KSZ8795,
 	.phy_id_mask	= MICREL_PHY_ID_MASK,
 	.name		= "Micrel KSZ8795",
-	.features	= (SUPPORTED_Pause | SUPPORTED_Asym_Pause),
+	.features	= PHY_BASIC_FEATURES,
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.config_init	= kszphy_config_init,
 	.config_aneg	= ksz8873mll_config_aneg,

From d1156b489fa734d1af763d6a07b1637c01bb0aed Mon Sep 17 00:00:00 2001
From: Alexey Khoroshilov <khoroshilov@ispras.ru>
Date: Sat, 28 Jan 2017 01:07:30 +0300
Subject: [PATCH 101/322] net: adaptec: starfire: add checks for dma mapping
 errors

init_ring(), refill_rx_ring() and start_tx() don't check
if mapping dma memory succeed.
The patch adds the checks and failure handling.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/adaptec/starfire.c | 45 +++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c
index c12d2618eebf..3872ab96b80a 100644
--- a/drivers/net/ethernet/adaptec/starfire.c
+++ b/drivers/net/ethernet/adaptec/starfire.c
@@ -1152,6 +1152,12 @@ static void init_ring(struct net_device *dev)
 		if (skb == NULL)
 			break;
 		np->rx_info[i].mapping = pci_map_single(np->pci_dev, skb->data, np->rx_buf_sz, PCI_DMA_FROMDEVICE);
+		if (pci_dma_mapping_error(np->pci_dev,
+					  np->rx_info[i].mapping)) {
+			dev_kfree_skb(skb);
+			np->rx_info[i].skb = NULL;
+			break;
+		}
 		/* Grrr, we cannot offset to correctly align the IP header. */
 		np->rx_ring[i].rxaddr = cpu_to_dma(np->rx_info[i].mapping | RxDescValid);
 	}
@@ -1182,8 +1188,9 @@ static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev)
 {
 	struct netdev_private *np = netdev_priv(dev);
 	unsigned int entry;
+	unsigned int prev_tx;
 	u32 status;
-	int i;
+	int i, j;
 
 	/*
 	 * be cautious here, wrapping the queue has weird semantics
@@ -1201,6 +1208,7 @@ static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev)
 	}
 #endif /* ZEROCOPY && HAS_BROKEN_FIRMWARE */
 
+	prev_tx = np->cur_tx;
 	entry = np->cur_tx % TX_RING_SIZE;
 	for (i = 0; i < skb_num_frags(skb); i++) {
 		int wrap_ring = 0;
@@ -1234,6 +1242,11 @@ static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev)
 					       skb_frag_size(this_frag),
 					       PCI_DMA_TODEVICE);
 		}
+		if (pci_dma_mapping_error(np->pci_dev,
+					  np->tx_info[entry].mapping)) {
+			dev->stats.tx_dropped++;
+			goto err_out;
+		}
 
 		np->tx_ring[entry].addr = cpu_to_dma(np->tx_info[entry].mapping);
 		np->tx_ring[entry].status = cpu_to_le32(status);
@@ -1268,8 +1281,30 @@ static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev)
 		netif_stop_queue(dev);
 
 	return NETDEV_TX_OK;
-}
 
+err_out:
+	entry = prev_tx % TX_RING_SIZE;
+	np->tx_info[entry].skb = NULL;
+	if (i > 0) {
+		pci_unmap_single(np->pci_dev,
+				 np->tx_info[entry].mapping,
+				 skb_first_frag_len(skb),
+				 PCI_DMA_TODEVICE);
+		np->tx_info[entry].mapping = 0;
+		entry = (entry + np->tx_info[entry].used_slots) % TX_RING_SIZE;
+		for (j = 1; j < i; j++) {
+			pci_unmap_single(np->pci_dev,
+					 np->tx_info[entry].mapping,
+					 skb_frag_size(
+						&skb_shinfo(skb)->frags[j-1]),
+					 PCI_DMA_TODEVICE);
+			entry++;
+		}
+	}
+	dev_kfree_skb_any(skb);
+	np->cur_tx = prev_tx;
+	return NETDEV_TX_OK;
+}
 
 /* The interrupt handler does all of the Rx thread work and cleans up
    after the Tx thread. */
@@ -1569,6 +1604,12 @@ static void refill_rx_ring(struct net_device *dev)
 				break;	/* Better luck next round. */
 			np->rx_info[entry].mapping =
 				pci_map_single(np->pci_dev, skb->data, np->rx_buf_sz, PCI_DMA_FROMDEVICE);
+			if (pci_dma_mapping_error(np->pci_dev,
+						np->rx_info[entry].mapping)) {
+				dev_kfree_skb(skb);
+				np->rx_info[entry].skb = NULL;
+				break;
+			}
 			np->rx_ring[entry].rxaddr =
 				cpu_to_dma(np->rx_info[entry].mapping | RxDescValid);
 		}

From a0615a16f7d0ceb5804d295203c302d496d8ee91 Mon Sep 17 00:00:00 2001
From: Reza Arbab <arbab@linux.vnet.ibm.com>
Date: Wed, 25 Jan 2017 09:54:33 -0600
Subject: [PATCH 102/322] powerpc/mm: Use the correct pointer when setting a
 2MB pte

When setting a 2MB pte, radix__map_kernel_page() is using the address

	ptep = (pte_t *)pudp;

Fix this conversion to use pmdp instead. Use pmdp_ptep() to do this
instead of casting the pointer.

Fixes: 2bfd65e45e87 ("powerpc/mm/radix: Add radix callbacks for early init routines")
Cc: stable@vger.kernel.org # v4.7+
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Reza Arbab <arbab@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/pgtable-radix.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index cfa53ccc8baf..34f1a0dbc898 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -65,7 +65,7 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa,
 		if (!pmdp)
 			return -ENOMEM;
 		if (map_page_size == PMD_SIZE) {
-			ptep = (pte_t *)pudp;
+			ptep = pmdp_ptep(pmdp);
 			goto set_the_pte;
 		}
 		ptep = pte_alloc_kernel(pmdp, ea);
@@ -90,7 +90,7 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa,
 		}
 		pmdp = pmd_offset(pudp, ea);
 		if (map_page_size == PMD_SIZE) {
-			ptep = (pte_t *)pudp;
+			ptep = pmdp_ptep(pmdp);
 			goto set_the_pte;
 		}
 		if (!pmd_present(*pmdp)) {

From e82d02580af45663fad6d3596e4344c606e81e10 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@marvell.com>
Date: Mon, 23 Jan 2017 15:15:32 +0800
Subject: [PATCH 103/322] pinctrl: berlin-bg4ct: fix the value for "sd1a" of
 pin SCRD0_CRD_PRES

This should be a typo.

Signed-off-by: Jisheng Zhang <jszhang@marvell.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/berlin/berlin-bg4ct.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/berlin/berlin-bg4ct.c b/drivers/pinctrl/berlin/berlin-bg4ct.c
index 09172043d589..c617ec49e9ed 100644
--- a/drivers/pinctrl/berlin/berlin-bg4ct.c
+++ b/drivers/pinctrl/berlin/berlin-bg4ct.c
@@ -217,7 +217,7 @@ static const struct berlin_desc_group berlin4ct_soc_pinctrl_groups[] = {
 	BERLIN_PINCTRL_GROUP("SCRD0_CRD_PRES", 0xc, 0x3, 0x15,
 			BERLIN_PINCTRL_FUNCTION(0x0, "gpio"), /* GPIO20 */
 			BERLIN_PINCTRL_FUNCTION(0x1, "scrd0"), /* crd pres */
-			BERLIN_PINCTRL_FUNCTION(0x1, "sd1a")), /* DAT3 */
+			BERLIN_PINCTRL_FUNCTION(0x3, "sd1a")), /* DAT3 */
 	BERLIN_PINCTRL_GROUP("SPI1_SS0n", 0xc, 0x3, 0x18,
 			BERLIN_PINCTRL_FUNCTION(0x0, "spi1"), /* SS0n */
 			BERLIN_PINCTRL_FUNCTION(0x1, "gpio"), /* GPIO37 */

From 2154d94b40ea2a5de05245521371d0461bb0d669 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Mon, 23 Jan 2017 09:21:30 +0100
Subject: [PATCH 104/322] pinctrl: sunxi: Don't enforce bias disable (for now)

Commit 07fe64ba213f ("pinctrl: sunxi: Handle bias disable") actually
enforced enforced the disabling of the pull up/down resistors instead of
ignoring it like it was done before.

This was part of a wider rework to switch to the generic pinconf bindings,
and was meant to be merged together with DT patches that were switching to
it, and removing what was considered default values by both the binding and
the boards. This included no bias on a pin.

However, those DT patches were delayed to 4.11, which would be fine only
for a significant number boards having the bias setup wrong, which in turns
break the MMC on those boards (and possibly other devices too).

In order to avoid conflicts as much as possible, bring back the old
behaviour for 4.10, and we'll revert that commit once all the DT bits will
have landed.

Tested-by: Priit Laes <plaes@plaes.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Acked-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/sunxi/pinctrl-sunxi.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
index 0eb51e33cb1b..207a8de4e1ed 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
@@ -564,8 +564,7 @@ static int sunxi_pconf_group_set(struct pinctrl_dev *pctldev,
 			val = arg / 10 - 1;
 			break;
 		case PIN_CONFIG_BIAS_DISABLE:
-			val = 0;
-			break;
+			continue;
 		case PIN_CONFIG_BIAS_PULL_UP:
 			if (arg == 0)
 				return -EINVAL;

From 19b26d92dfb70f56440c187a20c49102ab648b97 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 24 Jan 2017 17:28:22 +0200
Subject: [PATCH 105/322] pinctrl: intel: merrifield: Add missed check in
 mrfld_config_set()

Not every pin can be configured. Add missed check to prevent access
violation.

Fixes: 4e80c8f50574 ("pinctrl: intel: Add Intel Merrifield pin controller support")
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/intel/pinctrl-merrifield.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/pinctrl/intel/pinctrl-merrifield.c b/drivers/pinctrl/intel/pinctrl-merrifield.c
index b21896126f76..4d4ef42a39b5 100644
--- a/drivers/pinctrl/intel/pinctrl-merrifield.c
+++ b/drivers/pinctrl/intel/pinctrl-merrifield.c
@@ -794,6 +794,9 @@ static int mrfld_config_set(struct pinctrl_dev *pctldev, unsigned int pin,
 	unsigned int i;
 	int ret;
 
+	if (!mrfld_buf_available(mp, pin))
+		return -ENOTSUPP;
+
 	for (i = 0; i < nconfigs; i++) {
 		switch (pinconf_to_config_param(configs[i])) {
 		case PIN_CONFIG_BIAS_DISABLE:

From 24c2503255d35c269b67162c397a1a1c1e02f6ce Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Wed, 25 Jan 2017 21:00:48 +0100
Subject: [PATCH 106/322] x86/microcode: Do not access the initrd after it has
 been freed

When we look for microcode blobs, we first try builtin and if that
doesn't succeed, we fallback to the initrd supplied to the kernel.

However, at some point doing boot, that initrd gets jettisoned and we
shouldn't access it anymore. But we do, as the below KASAN report shows.
That's because find_microcode_in_initrd() doesn't check whether the
initrd is still valid or not.

So do that.

  ==================================================================
  BUG: KASAN: use-after-free in find_cpio_data
  Read of size 1 by task swapper/1/0
  page:ffffea0000db9d40 count:0 mapcount:0 mapping:          (null) index:0x1
  flags: 0x100000000000000()
  raw: 0100000000000000 0000000000000000 0000000000000001 00000000ffffffff
  raw: dead000000000100 dead000000000200 0000000000000000 0000000000000000
  page dumped because: kasan: bad access detected
  CPU: 1 PID: 0 Comm: swapper/1 Tainted: G        W       4.10.0-rc5-debug-00075-g2dbde22 #3
  Hardware name: Dell Inc. XPS 13 9360/0839Y6, BIOS 1.2.3 12/01/2016
  Call Trace:
   dump_stack
   ? _atomic_dec_and_lock
   ? __dump_page
   kasan_report_error
   ? pointer
   ? find_cpio_data
   __asan_report_load1_noabort
   ? find_cpio_data
   find_cpio_data
   ? vsprintf
   ? dump_stack
   ? get_ucode_user
   ? print_usage_bug
   find_microcode_in_initrd
   __load_ucode_intel
   ? collect_cpu_info_early
   ? debug_check_no_locks_freed
   load_ucode_intel_ap
   ? collect_cpu_info
   ? trace_hardirqs_on
   ? flat_send_IPI_mask_allbutself
   load_ucode_ap
   ? get_builtin_firmware
   ? flush_tlb_func
   ? do_raw_spin_trylock
   ? cpumask_weight
   cpu_init
   ? trace_hardirqs_off
   ? play_dead_common
   ? native_play_dead
   ? hlt_play_dead
   ? syscall_init
   ? arch_cpu_idle_dead
   ? do_idle
   start_secondary
   start_cpu
  Memory state around the buggy address:
   ffff880036e74f00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
   ffff880036e74f80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
  >ffff880036e75000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
                     ^
   ffff880036e75080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
   ffff880036e75100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
  ==================================================================

Reported-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Tested-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170126165833.evjemhbqzaepirxo@pd.tnic
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/microcode.h     |  1 +
 arch/x86/kernel/cpu/microcode/amd.c  |  5 +++--
 arch/x86/kernel/cpu/microcode/core.c | 22 +++++++++++++++++-----
 3 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 38711df3bcb5..2266f864b747 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -140,6 +140,7 @@ extern void __init load_ucode_bsp(void);
 extern void load_ucode_ap(void);
 void reload_early_microcode(void);
 extern bool get_builtin_firmware(struct cpio_data *cd, const char *name);
+extern bool initrd_gone;
 #else
 static inline int __init microcode_init(void)			{ return 0; };
 static inline void __init load_ucode_bsp(void)			{ }
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 6a31e2691f3a..079e81733a58 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -384,8 +384,9 @@ void load_ucode_amd_ap(unsigned int family)
 reget:
 		if (!get_builtin_microcode(&cp, family)) {
 #ifdef CONFIG_BLK_DEV_INITRD
-			cp = find_cpio_data(ucode_path, (void *)initrd_start,
-					    initrd_end - initrd_start, NULL);
+			if (!initrd_gone)
+				cp = find_cpio_data(ucode_path, (void *)initrd_start,
+						    initrd_end - initrd_start, NULL);
 #endif
 			if (!(cp.data && cp.size)) {
 				/*
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 2af69d27da62..73102d932760 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -46,6 +46,8 @@
 static struct microcode_ops	*microcode_ops;
 static bool dis_ucode_ldr = true;
 
+bool initrd_gone;
+
 LIST_HEAD(microcode_cache);
 
 /*
@@ -190,21 +192,24 @@ void load_ucode_ap(void)
 static int __init save_microcode_in_initrd(void)
 {
 	struct cpuinfo_x86 *c = &boot_cpu_data;
+	int ret = -EINVAL;
 
 	switch (c->x86_vendor) {
 	case X86_VENDOR_INTEL:
 		if (c->x86 >= 6)
-			return save_microcode_in_initrd_intel();
+			ret = save_microcode_in_initrd_intel();
 		break;
 	case X86_VENDOR_AMD:
 		if (c->x86 >= 0x10)
-			return save_microcode_in_initrd_amd(c->x86);
+			ret = save_microcode_in_initrd_amd(c->x86);
 		break;
 	default:
 		break;
 	}
 
-	return -EINVAL;
+	initrd_gone = true;
+
+	return ret;
 }
 
 struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa)
@@ -247,9 +252,16 @@ struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa)
 	 * has the virtual address of the beginning of the initrd. It also
 	 * possibly relocates the ramdisk. In either case, initrd_start contains
 	 * the updated address so use that instead.
+	 *
+	 * initrd_gone is for the hotplug case where we've thrown out initrd
+	 * already.
 	 */
-	if (!use_pa && initrd_start)
-		start = initrd_start;
+	if (!use_pa) {
+		if (initrd_gone)
+			return (struct cpio_data){ NULL, 0, "" };
+		if (initrd_start)
+			start = initrd_start;
+	}
 
 	return find_cpio_data(path, (void *)start, size, NULL);
 #else /* !CONFIG_BLK_DEV_INITRD */

From 4e5b54f127426c82dc2816340c26d951a5bb3429 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Sun, 18 Dec 2016 14:35:45 +0100
Subject: [PATCH 107/322] drm: prevent double-(un)registration for connectors

If we're unlucky then the registration from a hotplugged connector
might race with the final registration step on driver load. And since
MST topology discover is asynchronous that's even somewhat likely.

v2: Also update the kerneldoc for @registered!

v3: Review from Chris:
- Improve kerneldoc for late_register/early_unregister callbacks.
- Use mutex_destroy.

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Sean Paul <seanpaul@chromium.org>
Reported-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20161218133545.2106-1-daniel.vetter@ffwll.ch
(cherry picked from commit e73ab00e9a0f1731f34d0620a9c55f5c30c4ad4e)
---
 drivers/gpu/drm/drm_connector.c | 20 +++++++++++++++-----
 include/drm/drm_connector.h     | 16 +++++++++++++++-
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index 5a4526289392..8be60a4576b3 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -225,6 +225,7 @@ int drm_connector_init(struct drm_device *dev,
 
 	INIT_LIST_HEAD(&connector->probed_modes);
 	INIT_LIST_HEAD(&connector->modes);
+	mutex_init(&connector->mutex);
 	connector->edid_blob_ptr = NULL;
 	connector->status = connector_status_unknown;
 
@@ -359,6 +360,8 @@ void drm_connector_cleanup(struct drm_connector *connector)
 		connector->funcs->atomic_destroy_state(connector,
 						       connector->state);
 
+	mutex_destroy(&connector->mutex);
+
 	memset(connector, 0, sizeof(*connector));
 }
 EXPORT_SYMBOL(drm_connector_cleanup);
@@ -374,14 +377,15 @@ EXPORT_SYMBOL(drm_connector_cleanup);
  */
 int drm_connector_register(struct drm_connector *connector)
 {
-	int ret;
+	int ret = 0;
 
+	mutex_lock(&connector->mutex);
 	if (connector->registered)
-		return 0;
+		goto unlock;
 
 	ret = drm_sysfs_connector_add(connector);
 	if (ret)
-		return ret;
+		goto unlock;
 
 	ret = drm_debugfs_connector_add(connector);
 	if (ret) {
@@ -397,12 +401,14 @@ int drm_connector_register(struct drm_connector *connector)
 	drm_mode_object_register(connector->dev, &connector->base);
 
 	connector->registered = true;
-	return 0;
+	goto unlock;
 
 err_debugfs:
 	drm_debugfs_connector_remove(connector);
 err_sysfs:
 	drm_sysfs_connector_remove(connector);
+unlock:
+	mutex_unlock(&connector->mutex);
 	return ret;
 }
 EXPORT_SYMBOL(drm_connector_register);
@@ -415,8 +421,11 @@ EXPORT_SYMBOL(drm_connector_register);
  */
 void drm_connector_unregister(struct drm_connector *connector)
 {
-	if (!connector->registered)
+	mutex_lock(&connector->mutex);
+	if (!connector->registered) {
+		mutex_unlock(&connector->mutex);
 		return;
+	}
 
 	if (connector->funcs->early_unregister)
 		connector->funcs->early_unregister(connector);
@@ -425,6 +434,7 @@ void drm_connector_unregister(struct drm_connector *connector)
 	drm_debugfs_connector_remove(connector);
 
 	connector->registered = false;
+	mutex_unlock(&connector->mutex);
 }
 EXPORT_SYMBOL(drm_connector_unregister);
 
diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index a9b95246e26e..045a97cbeba2 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -381,6 +381,8 @@ struct drm_connector_funcs {
 	 * core drm connector interfaces. Everything added from this callback
 	 * should be unregistered in the early_unregister callback.
 	 *
+	 * This is called while holding drm_connector->mutex.
+	 *
 	 * Returns:
 	 *
 	 * 0 on success, or a negative error code on failure.
@@ -395,6 +397,8 @@ struct drm_connector_funcs {
 	 * late_register(). It is called from drm_connector_unregister(),
 	 * early in the driver unload sequence to disable userspace access
 	 * before data structures are torndown.
+	 *
+	 * This is called while holding drm_connector->mutex.
 	 */
 	void (*early_unregister)(struct drm_connector *connector);
 
@@ -559,7 +563,6 @@ struct drm_cmdline_mode {
  * @interlace_allowed: can this connector handle interlaced modes?
  * @doublescan_allowed: can this connector handle doublescan?
  * @stereo_allowed: can this connector handle stereo modes?
- * @registered: is this connector exposed (registered) with userspace?
  * @modes: modes available on this connector (from fill_modes() + user)
  * @status: one of the drm_connector_status enums (connected, not, or unknown)
  * @probed_modes: list of modes derived directly from the display
@@ -607,6 +610,13 @@ struct drm_connector {
 
 	char *name;
 
+	/**
+	 * @mutex: Lock for general connector state, but currently only protects
+	 * @registered. Most of the connector state is still protected by the
+	 * mutex in &drm_mode_config.
+	 */
+	struct mutex mutex;
+
 	/**
 	 * @index: Compacted connector index, which matches the position inside
 	 * the mode_config.list for drivers not supporting hot-add/removing. Can
@@ -620,6 +630,10 @@ struct drm_connector {
 	bool interlace_allowed;
 	bool doublescan_allowed;
 	bool stereo_allowed;
+	/**
+	 * @registered: Is this connector exposed (registered) with userspace?
+	 * Protected by @mutex.
+	 */
 	bool registered;
 	struct list_head modes; /* list of modes on this connector */
 

From e6e7b48b295afa5a5ab440de0a94d9ad8b3ce2d0 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Thu, 12 Jan 2017 17:15:56 +0100
Subject: [PATCH 108/322] drm: Don't race connector registration

I was under the misconception that the sysfs dev stuff can be fully
set up, and then registered all in one step with device_add. That's
true for properties and property groups, but not for parents and child
devices. Those must be fully registered before you can register a
child.

Add a bit of tracking to make sure that asynchronous mst connector
hotplugging gets this right. For consistency we rely upon the implicit
barriers of the connector->mutex, which is taken anyway, to ensure
that at least either the connector or device registration call will
work out.

Mildly tested since I can't reliably reproduce this on my mst box
here.

Reported-by: Dave Hansen <dave.hansen@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1484237756-2720-1-git-send-email-daniel.vetter@ffwll.ch
---
 drivers/gpu/drm/drm_connector.c | 3 +++
 drivers/gpu/drm/drm_drv.c       | 4 ++++
 include/drm/drmP.h              | 1 +
 3 files changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index 8be60a4576b3..7a7019ac9388 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -379,6 +379,9 @@ int drm_connector_register(struct drm_connector *connector)
 {
 	int ret = 0;
 
+	if (!connector->dev->registered)
+		return 0;
+
 	mutex_lock(&connector->mutex);
 	if (connector->registered)
 		goto unlock;
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index a525751b4559..6594b4088f11 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -745,6 +745,8 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags)
 	if (ret)
 		goto err_minors;
 
+	dev->registered = true;
+
 	if (dev->driver->load) {
 		ret = dev->driver->load(dev, flags);
 		if (ret)
@@ -785,6 +787,8 @@ void drm_dev_unregister(struct drm_device *dev)
 
 	drm_lastclose(dev);
 
+	dev->registered = false;
+
 	if (drm_core_check_feature(dev, DRIVER_MODESET))
 		drm_modeset_unregister_all(dev);
 
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 192016e2b518..9c4ee144b5f6 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -517,6 +517,7 @@ struct drm_device {
 	struct drm_minor *control;		/**< Control node */
 	struct drm_minor *primary;		/**< Primary node */
 	struct drm_minor *render;		/**< Render node */
+	bool registered;
 
 	/* currently active master for this device. Protected by master_mutex */
 	struct drm_master *master;

From a06393ed03167771246c4c43192d9c264bc48412 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Wed, 18 Jan 2017 21:30:51 +0100
Subject: [PATCH 109/322] can: bcm: fix hrtimer/tasklet termination in bcm op
 removal

When removing a bcm tx operation either a hrtimer or a tasklet might run.
As the hrtimer triggers its associated tasklet and vice versa we need to
take care to mutually terminate both handlers.

Reported-by: Michael Josenhans <michael.josenhans@web.de>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Tested-by: Michael Josenhans <michael.josenhans@web.de>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/bcm.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/net/can/bcm.c b/net/can/bcm.c
index 5c9407181918..95d13b233c65 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -734,14 +734,23 @@ static struct bcm_op *bcm_find_op(struct list_head *ops,
 
 static void bcm_remove_op(struct bcm_op *op)
 {
-	hrtimer_cancel(&op->timer);
-	hrtimer_cancel(&op->thrtimer);
+	if (op->tsklet.func) {
+		while (test_bit(TASKLET_STATE_SCHED, &op->tsklet.state) ||
+		       test_bit(TASKLET_STATE_RUN, &op->tsklet.state) ||
+		       hrtimer_active(&op->timer)) {
+			hrtimer_cancel(&op->timer);
+			tasklet_kill(&op->tsklet);
+		}
+	}
 
-	if (op->tsklet.func)
-		tasklet_kill(&op->tsklet);
-
-	if (op->thrtsklet.func)
-		tasklet_kill(&op->thrtsklet);
+	if (op->thrtsklet.func) {
+		while (test_bit(TASKLET_STATE_SCHED, &op->thrtsklet.state) ||
+		       test_bit(TASKLET_STATE_RUN, &op->thrtsklet.state) ||
+		       hrtimer_active(&op->thrtimer)) {
+			hrtimer_cancel(&op->thrtimer);
+			tasklet_kill(&op->thrtsklet);
+		}
+	}
 
 	if ((op->frames) && (op->frames != &op->sframe))
 		kfree(op->frames);

From a76a82a3e38c8d3fb6499e3dfaeb0949241ab588 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 26 Jan 2017 16:39:55 +0100
Subject: [PATCH 110/322] perf/core: Fix use-after-free bug

Dmitry reported a KASAN use-after-free on event->group_leader.

It turns out there's a hole in perf_remove_from_context() due to
event_function_call() not calling its function when the task
associated with the event is already dead.

In this case the event will have been detached from the task, but the
grouping will have been retained, such that group operations might
still work properly while there are live child events etc.

This does however mean that we can miss a perf_group_detach() call
when the group decomposes, this in turn can then lead to
use-after-free.

Fix it by explicitly doing the group detach if its still required.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Tested-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org # v4.5+
Cc: syzkaller <syzkaller@googlegroups.com>
Fixes: 63b6da39bb38 ("perf: Fix perf_event_exit_task() race")
Link: http://lkml.kernel.org/r/20170126153955.GD6515@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 110b38a58493..4e1f4c0070ce 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1469,7 +1469,6 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
 static void
 list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 {
-
 	lockdep_assert_held(&ctx->lock);
 
 	WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
@@ -1624,6 +1623,8 @@ static void perf_group_attach(struct perf_event *event)
 {
 	struct perf_event *group_leader = event->group_leader, *pos;
 
+	lockdep_assert_held(&event->ctx->lock);
+
 	/*
 	 * We can have double attach due to group movement in perf_event_open.
 	 */
@@ -1697,6 +1698,8 @@ static void perf_group_detach(struct perf_event *event)
 	struct perf_event *sibling, *tmp;
 	struct list_head *list = NULL;
 
+	lockdep_assert_held(&event->ctx->lock);
+
 	/*
 	 * We can have double detach due to exit/hot-unplug + close.
 	 */
@@ -1895,9 +1898,29 @@ __perf_remove_from_context(struct perf_event *event,
  */
 static void perf_remove_from_context(struct perf_event *event, unsigned long flags)
 {
-	lockdep_assert_held(&event->ctx->mutex);
+	struct perf_event_context *ctx = event->ctx;
+
+	lockdep_assert_held(&ctx->mutex);
 
 	event_function_call(event, __perf_remove_from_context, (void *)flags);
+
+	/*
+	 * The above event_function_call() can NO-OP when it hits
+	 * TASK_TOMBSTONE. In that case we must already have been detached
+	 * from the context (by perf_event_exit_event()) but the grouping
+	 * might still be in-tact.
+	 */
+	WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
+	if ((flags & DETACH_GROUP) &&
+	    (event->attach_state & PERF_ATTACH_GROUP)) {
+		/*
+		 * Since in that case we cannot possibly be scheduled, simply
+		 * detach now.
+		 */
+		raw_spin_lock_irq(&ctx->lock);
+		perf_group_detach(event);
+		raw_spin_unlock_irq(&ctx->lock);
+	}
 }
 
 /*

From 0b3589be9b98994ce3d5aeca52445d1f5627c4ba Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 26 Jan 2017 23:15:08 +0100
Subject: [PATCH 111/322] perf/core: Fix PERF_RECORD_MMAP2 prot/flags for
 anonymous memory

Andres reported that MMAP2 records for anonymous memory always have
their protection field 0.

Turns out, someone daft put the prot/flags generation code in the file
branch, leaving them unset for anonymous memory.

Reported-by: Andres Freund <andres@anarazel.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Don Zickus <dzickus@redhat.com
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@gmail.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@kernel.org
Cc: anton@ozlabs.org
Cc: namhyung@kernel.org
Cc: stable@vger.kernel.org # v3.16+
Fixes: f972eb63b100 ("perf: Pass protection and flags bits through mmap2 interface")
Link: http://lkml.kernel.org/r/20170126221508.GF6536@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4e1f4c0070ce..e5aaa806702d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6632,6 +6632,27 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 	char *buf = NULL;
 	char *name;
 
+	if (vma->vm_flags & VM_READ)
+		prot |= PROT_READ;
+	if (vma->vm_flags & VM_WRITE)
+		prot |= PROT_WRITE;
+	if (vma->vm_flags & VM_EXEC)
+		prot |= PROT_EXEC;
+
+	if (vma->vm_flags & VM_MAYSHARE)
+		flags = MAP_SHARED;
+	else
+		flags = MAP_PRIVATE;
+
+	if (vma->vm_flags & VM_DENYWRITE)
+		flags |= MAP_DENYWRITE;
+	if (vma->vm_flags & VM_MAYEXEC)
+		flags |= MAP_EXECUTABLE;
+	if (vma->vm_flags & VM_LOCKED)
+		flags |= MAP_LOCKED;
+	if (vma->vm_flags & VM_HUGETLB)
+		flags |= MAP_HUGETLB;
+
 	if (file) {
 		struct inode *inode;
 		dev_t dev;
@@ -6658,27 +6679,6 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 		maj = MAJOR(dev);
 		min = MINOR(dev);
 
-		if (vma->vm_flags & VM_READ)
-			prot |= PROT_READ;
-		if (vma->vm_flags & VM_WRITE)
-			prot |= PROT_WRITE;
-		if (vma->vm_flags & VM_EXEC)
-			prot |= PROT_EXEC;
-
-		if (vma->vm_flags & VM_MAYSHARE)
-			flags = MAP_SHARED;
-		else
-			flags = MAP_PRIVATE;
-
-		if (vma->vm_flags & VM_DENYWRITE)
-			flags |= MAP_DENYWRITE;
-		if (vma->vm_flags & VM_MAYEXEC)
-			flags |= MAP_EXECUTABLE;
-		if (vma->vm_flags & VM_LOCKED)
-			flags |= MAP_LOCKED;
-		if (vma->vm_flags & VM_HUGETLB)
-			flags |= MAP_HUGETLB;
-
 		goto got_name;
 	} else {
 		if (vma->vm_ops && vma->vm_ops->name) {

From 97a98ae5b8acf08d07d972c087b2def060bc9b73 Mon Sep 17 00:00:00 2001
From: Alexander Sverdlin <alexander.sverdlin@gmail.com>
Date: Tue, 17 Jan 2017 21:10:11 +0100
Subject: [PATCH 112/322] ARM: 8642/1: LPAE: catch pending imprecise abort on
 unmask

Asynchronous external abort is coded differently in DFSR with LPAE enabled.

Fixes: 9254970c "ARM: 8447/1: catch pending imprecise abort on unmask".
Signed-off-by: Alexander Sverdlin <alexander.sverdlin@nokia.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/fault.c | 4 ++--
 arch/arm/mm/fault.h | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 3a2e678b8d30..0122ad1a6027 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -610,9 +610,9 @@ static int __init early_abort_handler(unsigned long addr, unsigned int fsr,
 
 void __init early_abt_enable(void)
 {
-	fsr_info[22].fn = early_abort_handler;
+	fsr_info[FSR_FS_AEA].fn = early_abort_handler;
 	local_abt_enable();
-	fsr_info[22].fn = do_bad;
+	fsr_info[FSR_FS_AEA].fn = do_bad;
 }
 
 #ifndef CONFIG_ARM_LPAE
diff --git a/arch/arm/mm/fault.h b/arch/arm/mm/fault.h
index 67532f242271..afc1f84e763b 100644
--- a/arch/arm/mm/fault.h
+++ b/arch/arm/mm/fault.h
@@ -11,11 +11,15 @@
 #define FSR_FS5_0		(0x3f)
 
 #ifdef CONFIG_ARM_LPAE
+#define FSR_FS_AEA		17
+
 static inline int fsr_fs(unsigned int fsr)
 {
 	return fsr & FSR_FS5_0;
 }
 #else
+#define FSR_FS_AEA		22
+
 static inline int fsr_fs(unsigned int fsr)
 {
 	return (fsr & FSR_FS3_0) | (fsr & FSR_FS4) >> 6;

From 228dbbfb5d77f8e047b2a1d78da14b7158433027 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Wed, 18 Jan 2017 17:11:56 +0100
Subject: [PATCH 113/322] ARM: 8643/3: arm/ptrace: Preserve previous registers
 for short regset write

Ensure that if userspace supplies insufficient data to
PTRACE_SETREGSET to fill all the registers, the thread's old
registers are preserved.

Cc: <stable@vger.kernel.org> # 3.0.x-
Fixes: 5be6f62b0059 ("ARM: 6883/1: ptrace: Migrate to regsets framework")
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/ptrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index ce131ed5939d..ae738a6319f6 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -600,7 +600,7 @@ static int gpr_set(struct task_struct *target,
 		   const void *kbuf, const void __user *ubuf)
 {
 	int ret;
-	struct pt_regs newregs;
+	struct pt_regs newregs = *task_pt_regs(target);
 
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 				 &newregs,

From ed72b81bb7a49e8bfaa8dd6ab0b0e103ff0771ae Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hansverk@cisco.com>
Date: Mon, 2 Jan 2017 09:41:40 -0200
Subject: [PATCH 114/322] [media] cec rst: remove "This API is not yet
 finalized" notice

The API is now finalized, so this notice should be dropped.

Signed-off-by: Hans Verkuil <hansverk@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 Documentation/media/uapi/cec/cec-func-close.rst           | 5 -----
 Documentation/media/uapi/cec/cec-func-ioctl.rst           | 5 -----
 Documentation/media/uapi/cec/cec-func-open.rst            | 5 -----
 Documentation/media/uapi/cec/cec-func-poll.rst            | 5 -----
 Documentation/media/uapi/cec/cec-intro.rst                | 5 -----
 Documentation/media/uapi/cec/cec-ioc-adap-g-caps.rst      | 5 -----
 Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst | 5 -----
 Documentation/media/uapi/cec/cec-ioc-adap-g-phys-addr.rst | 5 -----
 Documentation/media/uapi/cec/cec-ioc-dqevent.rst          | 5 -----
 Documentation/media/uapi/cec/cec-ioc-g-mode.rst           | 5 -----
 Documentation/media/uapi/cec/cec-ioc-receive.rst          | 5 -----
 11 files changed, 55 deletions(-)

diff --git a/Documentation/media/uapi/cec/cec-func-close.rst b/Documentation/media/uapi/cec/cec-func-close.rst
index 8267c31b317d..895d9c2d1c04 100644
--- a/Documentation/media/uapi/cec/cec-func-close.rst
+++ b/Documentation/media/uapi/cec/cec-func-close.rst
@@ -33,11 +33,6 @@ Arguments
 Description
 ===========
 
-.. note::
-
-   This documents the proposed CEC API. This API is not yet finalized
-   and is currently only available as a staging kernel module.
-
 Closes the cec device. Resources associated with the file descriptor are
 freed. The device configuration remain unchanged.
 
diff --git a/Documentation/media/uapi/cec/cec-func-ioctl.rst b/Documentation/media/uapi/cec/cec-func-ioctl.rst
index 9e8dbb118d6a..7dcfd178fb24 100644
--- a/Documentation/media/uapi/cec/cec-func-ioctl.rst
+++ b/Documentation/media/uapi/cec/cec-func-ioctl.rst
@@ -39,11 +39,6 @@ Arguments
 Description
 ===========
 
-.. note::
-
-   This documents the proposed CEC API. This API is not yet finalized
-   and is currently only available as a staging kernel module.
-
 The :c:func:`ioctl()` function manipulates cec device parameters. The
 argument ``fd`` must be an open file descriptor.
 
diff --git a/Documentation/media/uapi/cec/cec-func-open.rst b/Documentation/media/uapi/cec/cec-func-open.rst
index af3f5b5c24c6..0304388cd159 100644
--- a/Documentation/media/uapi/cec/cec-func-open.rst
+++ b/Documentation/media/uapi/cec/cec-func-open.rst
@@ -46,11 +46,6 @@ Arguments
 Description
 ===========
 
-.. note::
-
-   This documents the proposed CEC API. This API is not yet finalized
-   and is currently only available as a staging kernel module.
-
 To open a cec device applications call :c:func:`open()` with the
 desired device name. The function has no side effects; the device
 configuration remain unchanged.
diff --git a/Documentation/media/uapi/cec/cec-func-poll.rst b/Documentation/media/uapi/cec/cec-func-poll.rst
index cfb73e6027a5..6a863cfda6e0 100644
--- a/Documentation/media/uapi/cec/cec-func-poll.rst
+++ b/Documentation/media/uapi/cec/cec-func-poll.rst
@@ -39,11 +39,6 @@ Arguments
 Description
 ===========
 
-.. note::
-
-   This documents the proposed CEC API. This API is not yet finalized
-   and is currently only available as a staging kernel module.
-
 With the :c:func:`poll()` function applications can wait for CEC
 events.
 
diff --git a/Documentation/media/uapi/cec/cec-intro.rst b/Documentation/media/uapi/cec/cec-intro.rst
index 4a19ea5323a9..7d31d37b0642 100644
--- a/Documentation/media/uapi/cec/cec-intro.rst
+++ b/Documentation/media/uapi/cec/cec-intro.rst
@@ -3,11 +3,6 @@
 Introduction
 ============
 
-.. note::
-
-   This documents the proposed CEC API. This API is not yet finalized
-   and is currently only available as a staging kernel module.
-
 HDMI connectors provide a single pin for use by the Consumer Electronics
 Control protocol. This protocol allows different devices connected by an
 HDMI cable to communicate. The protocol for CEC version 1.4 is defined
diff --git a/Documentation/media/uapi/cec/cec-ioc-adap-g-caps.rst b/Documentation/media/uapi/cec/cec-ioc-adap-g-caps.rst
index 2b0ddb14b280..a0e961f11017 100644
--- a/Documentation/media/uapi/cec/cec-ioc-adap-g-caps.rst
+++ b/Documentation/media/uapi/cec/cec-ioc-adap-g-caps.rst
@@ -29,11 +29,6 @@ Arguments
 Description
 ===========
 
-.. note::
-
-   This documents the proposed CEC API. This API is not yet finalized
-   and is currently only available as a staging kernel module.
-
 All cec devices must support :ref:`ioctl CEC_ADAP_G_CAPS <CEC_ADAP_G_CAPS>`. To query
 device information, applications call the ioctl with a pointer to a
 struct :c:type:`cec_caps`. The driver fills the structure and
diff --git a/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst b/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst
index b878637e91b3..09f09bbe28d4 100644
--- a/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst
+++ b/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst
@@ -35,11 +35,6 @@ Arguments
 Description
 ===========
 
-.. note::
-
-   This documents the proposed CEC API. This API is not yet finalized
-   and is currently only available as a staging kernel module.
-
 To query the current CEC logical addresses, applications call
 :ref:`ioctl CEC_ADAP_G_LOG_ADDRS <CEC_ADAP_G_LOG_ADDRS>` with a pointer to a
 struct :c:type:`cec_log_addrs` where the driver stores the logical addresses.
diff --git a/Documentation/media/uapi/cec/cec-ioc-adap-g-phys-addr.rst b/Documentation/media/uapi/cec/cec-ioc-adap-g-phys-addr.rst
index 3357deb43c85..a3cdc75cec3e 100644
--- a/Documentation/media/uapi/cec/cec-ioc-adap-g-phys-addr.rst
+++ b/Documentation/media/uapi/cec/cec-ioc-adap-g-phys-addr.rst
@@ -35,11 +35,6 @@ Arguments
 Description
 ===========
 
-.. note::
-
-   This documents the proposed CEC API. This API is not yet finalized
-   and is currently only available as a staging kernel module.
-
 To query the current physical address applications call
 :ref:`ioctl CEC_ADAP_G_PHYS_ADDR <CEC_ADAP_G_PHYS_ADDR>` with a pointer to a __u16 where the
 driver stores the physical address.
diff --git a/Documentation/media/uapi/cec/cec-ioc-dqevent.rst b/Documentation/media/uapi/cec/cec-ioc-dqevent.rst
index e256c6605de7..6e589a1fae17 100644
--- a/Documentation/media/uapi/cec/cec-ioc-dqevent.rst
+++ b/Documentation/media/uapi/cec/cec-ioc-dqevent.rst
@@ -30,11 +30,6 @@ Arguments
 Description
 ===========
 
-.. note::
-
-   This documents the proposed CEC API. This API is not yet finalized
-   and is currently only available as a staging kernel module.
-
 CEC devices can send asynchronous events. These can be retrieved by
 calling :c:func:`CEC_DQEVENT`. If the file descriptor is in
 non-blocking mode and no event is pending, then it will return -1 and
diff --git a/Documentation/media/uapi/cec/cec-ioc-g-mode.rst b/Documentation/media/uapi/cec/cec-ioc-g-mode.rst
index 4f5818b9d277..e4ded9df0a84 100644
--- a/Documentation/media/uapi/cec/cec-ioc-g-mode.rst
+++ b/Documentation/media/uapi/cec/cec-ioc-g-mode.rst
@@ -31,11 +31,6 @@ Arguments
 Description
 ===========
 
-.. note::
-
-   This documents the proposed CEC API. This API is not yet finalized
-   and is currently only available as a staging kernel module.
-
 By default any filehandle can use :ref:`CEC_TRANSMIT`, but in order to prevent
 applications from stepping on each others toes it must be possible to
 obtain exclusive access to the CEC adapter. This ioctl sets the
diff --git a/Documentation/media/uapi/cec/cec-ioc-receive.rst b/Documentation/media/uapi/cec/cec-ioc-receive.rst
index bdf015b1d1dc..dc2adb391c0a 100644
--- a/Documentation/media/uapi/cec/cec-ioc-receive.rst
+++ b/Documentation/media/uapi/cec/cec-ioc-receive.rst
@@ -34,11 +34,6 @@ Arguments
 Description
 ===========
 
-.. note::
-
-   This documents the proposed CEC API. This API is not yet finalized
-   and is currently only available as a staging kernel module.
-
 To receive a CEC message the application has to fill in the
 ``timeout`` field of struct :c:type:`cec_msg` and pass it to
 :ref:`ioctl CEC_RECEIVE <CEC_RECEIVE>`.

From 8015d6b83cadc8f9f94c7bc8430255090ddf43d4 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hansverk@cisco.com>
Date: Mon, 2 Jan 2017 09:54:24 -0200
Subject: [PATCH 115/322] [media] cec-intro.rst: mention the v4l-utils package
 and CEC utilities

Mention where to find the CEC utilities.

Signed-off-by: Hans Verkuil <hansverk@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 Documentation/media/uapi/cec/cec-intro.rst | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/Documentation/media/uapi/cec/cec-intro.rst b/Documentation/media/uapi/cec/cec-intro.rst
index 7d31d37b0642..07ee2b8f89d6 100644
--- a/Documentation/media/uapi/cec/cec-intro.rst
+++ b/Documentation/media/uapi/cec/cec-intro.rst
@@ -26,3 +26,15 @@ control just the CEC pin.
 Drivers that support CEC will create a CEC device node (/dev/cecX) to
 give userspace access to the CEC adapter. The
 :ref:`CEC_ADAP_G_CAPS` ioctl will tell userspace what it is allowed to do.
+
+In order to check the support and test it, it is suggested to download
+the `v4l-utils <https://git.linuxtv.org/v4l-utils.git/>`_ package. It
+provides three tools to handle CEC:
+
+- cec-ctl: the Swiss army knife of CEC. Allows you to configure, transmit
+  and monitor CEC messages.
+
+- cec-compliance: does a CEC compliance test of a remote CEC device to
+  determine how compliant the CEC implementation is.
+
+- cec-follower: emulates a CEC follower.

From f9f96fc10c09ca16e336854c08bc1563eed97985 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Tue, 10 Jan 2017 09:44:54 -0200
Subject: [PATCH 116/322] [media] cec: fix wrong last_la determination

Due to an incorrect condition the last_la used for the initial attempt at
claiming a logical address could be wrong.

The last_la wasn't converted to a mask when ANDing with type2mask, so that
test was broken.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/cec/cec-adap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c
index ebb5e391b800..87a6b65ed3af 100644
--- a/drivers/media/cec/cec-adap.c
+++ b/drivers/media/cec/cec-adap.c
@@ -1206,7 +1206,7 @@ static int cec_config_thread_func(void *arg)
 		las->log_addr[i] = CEC_LOG_ADDR_INVALID;
 		if (last_la == CEC_LOG_ADDR_INVALID ||
 		    last_la == CEC_LOG_ADDR_UNREGISTERED ||
-		    !(last_la & type2mask[type]))
+		    !((1 << last_la) & type2mask[type]))
 			last_la = la_list[0];
 
 		err = cec_config_log_addr(adap, i, last_la);

From 08d85f3ea99f1eeafc4e8507936190e86a16ee8c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 17 Jan 2017 16:00:48 +0000
Subject: [PATCH 117/322] irqdomain: Avoid activating interrupts more than once

Since commit f3b0946d629c ("genirq/msi: Make sure PCI MSIs are
activated early"), we can end-up activating a PCI/MSI twice (once
at allocation time, and once at startup time).

This is normally of no consequences, except that there is some
HW out there that may misbehave if activate is used more than once
(the GICv3 ITS, for example, uses the activate callback
to issue the MAPVI command, and the architecture spec says that
"If there is an existing mapping for the EventID-DeviceID
combination, behavior is UNPREDICTABLE").

While this could be worked around in each individual driver, it may
make more sense to tackle the issue at the core level. In order to
avoid getting in that situation, let's have a per-interrupt flag
to remember if we have already activated that interrupt or not.

Fixes: f3b0946d629c ("genirq/msi: Make sure PCI MSIs are activated early")
Reported-and-tested-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1484668848-24361-1-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h    | 17 ++++++++++++++++
 kernel/irq/irqdomain.c | 44 ++++++++++++++++++++++++++++--------------
 2 files changed, 47 insertions(+), 14 deletions(-)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index e79875574b39..39e3254e5769 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -184,6 +184,7 @@ struct irq_data {
  *
  * IRQD_TRIGGER_MASK		- Mask for the trigger type bits
  * IRQD_SETAFFINITY_PENDING	- Affinity setting is pending
+ * IRQD_ACTIVATED		- Interrupt has already been activated
  * IRQD_NO_BALANCING		- Balancing disabled for this IRQ
  * IRQD_PER_CPU			- Interrupt is per cpu
  * IRQD_AFFINITY_SET		- Interrupt affinity was set
@@ -202,6 +203,7 @@ struct irq_data {
 enum {
 	IRQD_TRIGGER_MASK		= 0xf,
 	IRQD_SETAFFINITY_PENDING	= (1 <<  8),
+	IRQD_ACTIVATED			= (1 <<  9),
 	IRQD_NO_BALANCING		= (1 << 10),
 	IRQD_PER_CPU			= (1 << 11),
 	IRQD_AFFINITY_SET		= (1 << 12),
@@ -312,6 +314,21 @@ static inline bool irqd_affinity_is_managed(struct irq_data *d)
 	return __irqd_to_state(d) & IRQD_AFFINITY_MANAGED;
 }
 
+static inline bool irqd_is_activated(struct irq_data *d)
+{
+	return __irqd_to_state(d) & IRQD_ACTIVATED;
+}
+
+static inline void irqd_set_activated(struct irq_data *d)
+{
+	__irqd_to_state(d) |= IRQD_ACTIVATED;
+}
+
+static inline void irqd_clr_activated(struct irq_data *d)
+{
+	__irqd_to_state(d) &= ~IRQD_ACTIVATED;
+}
+
 #undef __irqd_to_state
 
 static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 8c0a0ae43521..b59e6768c5e9 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1346,6 +1346,30 @@ void irq_domain_free_irqs_parent(struct irq_domain *domain,
 }
 EXPORT_SYMBOL_GPL(irq_domain_free_irqs_parent);
 
+static void __irq_domain_activate_irq(struct irq_data *irq_data)
+{
+	if (irq_data && irq_data->domain) {
+		struct irq_domain *domain = irq_data->domain;
+
+		if (irq_data->parent_data)
+			__irq_domain_activate_irq(irq_data->parent_data);
+		if (domain->ops->activate)
+			domain->ops->activate(domain, irq_data);
+	}
+}
+
+static void __irq_domain_deactivate_irq(struct irq_data *irq_data)
+{
+	if (irq_data && irq_data->domain) {
+		struct irq_domain *domain = irq_data->domain;
+
+		if (domain->ops->deactivate)
+			domain->ops->deactivate(domain, irq_data);
+		if (irq_data->parent_data)
+			__irq_domain_deactivate_irq(irq_data->parent_data);
+	}
+}
+
 /**
  * irq_domain_activate_irq - Call domain_ops->activate recursively to activate
  *			     interrupt
@@ -1356,13 +1380,9 @@ EXPORT_SYMBOL_GPL(irq_domain_free_irqs_parent);
  */
 void irq_domain_activate_irq(struct irq_data *irq_data)
 {
-	if (irq_data && irq_data->domain) {
-		struct irq_domain *domain = irq_data->domain;
-
-		if (irq_data->parent_data)
-			irq_domain_activate_irq(irq_data->parent_data);
-		if (domain->ops->activate)
-			domain->ops->activate(domain, irq_data);
+	if (!irqd_is_activated(irq_data)) {
+		__irq_domain_activate_irq(irq_data);
+		irqd_set_activated(irq_data);
 	}
 }
 
@@ -1376,13 +1396,9 @@ void irq_domain_activate_irq(struct irq_data *irq_data)
  */
 void irq_domain_deactivate_irq(struct irq_data *irq_data)
 {
-	if (irq_data && irq_data->domain) {
-		struct irq_domain *domain = irq_data->domain;
-
-		if (domain->ops->deactivate)
-			domain->ops->deactivate(domain, irq_data);
-		if (irq_data->parent_data)
-			irq_domain_deactivate_irq(irq_data->parent_data);
+	if (irqd_is_activated(irq_data)) {
+		__irq_domain_deactivate_irq(irq_data);
+		irqd_clr_activated(irq_data);
 	}
 }
 

From 827e1579e1d5cb66e340e7be1944b825b542bbdf Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 26 Jan 2017 19:24:07 +0200
Subject: [PATCH 118/322] pinctrl: baytrail: Rectify debounce support (part 2)

The commit 04ff5a095d66 ("pinctrl: baytrail: Rectify debounce support")
almost fixes the logic of debuonce but missed couple of things, i.e.
typo in mask when disabling debounce and lack of enabling it back.

This patch addresses above issues.

Reported-by: Jean Delvare <jdelvare@suse.de>
Fixes: 04ff5a095d66 ("pinctrl: baytrail: Rectify debounce support")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jean Delvare <jdelvare@suse.de>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/intel/pinctrl-baytrail.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c
index c123488266ce..3eb7c2bde87e 100644
--- a/drivers/pinctrl/intel/pinctrl-baytrail.c
+++ b/drivers/pinctrl/intel/pinctrl-baytrail.c
@@ -1243,10 +1243,12 @@ static int byt_pin_config_set(struct pinctrl_dev *pctl_dev,
 			debounce = readl(db_reg);
 			debounce &= ~BYT_DEBOUNCE_PULSE_MASK;
 
+			if (arg)
+				conf |= BYT_DEBOUNCE_EN;
+			else
+				conf &= ~BYT_DEBOUNCE_EN;
+
 			switch (arg) {
-			case 0:
-				conf &= BYT_DEBOUNCE_EN;
-				break;
 			case 375:
 				debounce |= BYT_DEBOUNCE_PULSE_375US;
 				break;
@@ -1269,7 +1271,9 @@ static int byt_pin_config_set(struct pinctrl_dev *pctl_dev,
 				debounce |= BYT_DEBOUNCE_PULSE_24MS;
 				break;
 			default:
-				ret = -EINVAL;
+				if (arg)
+					ret = -EINVAL;
+				break;
 			}
 
 			if (!ret)

From 1b89970d81bbd52720fc64a3fe9572ee33588363 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 26 Jan 2017 19:24:08 +0200
Subject: [PATCH 119/322] pinctrl: baytrail: Debounce register is one per
 community

Debounce value is set globally per community. Otherwise user will easily
get a kernel crash when they start using the feature:

BUG: unable to handle kernel paging request at ffffc900003be000
IP: byt_gpio_dbg_show+0xa9/0x430

Make it clear in byt_gpio_reg().

Note that this fix just prevents kernel to crash, but doesn't make any
difference to the existing logic. It means the last caller will win the
trade and debounce value will be configured accordingly. The actual
logic fix needs to be thought about and it's not as important as crash
fix. That's why the latter goes separately and right now.

Fixes: 658b476c742f ("pinctrl: baytrail: Add debounce configuration")
Cc: Cristina Ciocan <cristina.ciocan@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jean Delvare <jdelvare@suse.de>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/intel/pinctrl-baytrail.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c
index 3eb7c2bde87e..05ba052ff6b9 100644
--- a/drivers/pinctrl/intel/pinctrl-baytrail.c
+++ b/drivers/pinctrl/intel/pinctrl-baytrail.c
@@ -731,16 +731,23 @@ static void __iomem *byt_gpio_reg(struct byt_gpio *vg, unsigned int offset,
 				  int reg)
 {
 	struct byt_community *comm = byt_get_community(vg, offset);
-	u32 reg_offset = 0;
+	u32 reg_offset;
 
 	if (!comm)
 		return NULL;
 
 	offset -= comm->pin_base;
-	if (reg == BYT_INT_STAT_REG)
+	switch (reg) {
+	case BYT_INT_STAT_REG:
 		reg_offset = (offset / 32) * 4;
-	else
+		break;
+	case BYT_DEBOUNCE_REG:
+		reg_offset = 0;
+		break;
+	default:
 		reg_offset = comm->pad_map[offset] * 16;
+		break;
+	}
 
 	return comm->reg_base + reg_offset + reg;
 }

From cdca06e4e85974d8a3503ab15709dbbaf90d3dd1 Mon Sep 17 00:00:00 2001
From: Alexander Stein <alexander.stein@systec-electronic.com>
Date: Mon, 30 Jan 2017 12:35:28 +0100
Subject: [PATCH 120/322] pinctrl: baytrail: Add missing spinlock usage in
 byt_gpio_irq_handler

According to VLI64 Intel Atom E3800 Specification Update (#329901)
concurrent read accesses may result in returning 0xffffffff and write
accesses may be dropped silently.
To workaround all accesses must be protected by locks.

Cc: stable@vger.kernel.org
Signed-off-by: Alexander Stein <alexander.stein@systec-electronic.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/intel/pinctrl-baytrail.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c
index 05ba052ff6b9..d94aef17348b 100644
--- a/drivers/pinctrl/intel/pinctrl-baytrail.c
+++ b/drivers/pinctrl/intel/pinctrl-baytrail.c
@@ -1623,7 +1623,9 @@ static void byt_gpio_irq_handler(struct irq_desc *desc)
 			continue;
 		}
 
+		raw_spin_lock(&vg->lock);
 		pending = readl(reg);
+		raw_spin_unlock(&vg->lock);
 		for_each_set_bit(pin, &pending, 32) {
 			virq = irq_find_mapping(vg->chip.irqdomain, base + pin);
 			generic_handle_irq(virq);

From 94842b4fc4d6b1691cfc86c6f5251f299d27f4ba Mon Sep 17 00:00:00 2001
From: Pavel Belous <pavel.s.belous@gmail.com>
Date: Sat, 28 Jan 2017 22:53:28 +0300
Subject: [PATCH 121/322] net: ethtool: add support for 2500BaseT and 5000BaseT
 link modes

This patch introduce support for 2500BaseT and 5000BaseT link modes.
These modes are included in the new IEEE 802.3bz standard.

Signed-off-by: Pavel Belous <pavel.s.belous@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index f0db7788f887..3dc91a46e8b8 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1384,6 +1384,8 @@ enum ethtool_link_mode_bit_indices {
 	ETHTOOL_LINK_MODE_10000baseLR_Full_BIT	= 44,
 	ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT	= 45,
 	ETHTOOL_LINK_MODE_10000baseER_Full_BIT	= 46,
+	ETHTOOL_LINK_MODE_2500baseT_Full_BIT	= 47,
+	ETHTOOL_LINK_MODE_5000baseT_Full_BIT	= 48,
 
 
 	/* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit
@@ -1393,7 +1395,7 @@ enum ethtool_link_mode_bit_indices {
 	 */
 
 	__ETHTOOL_LINK_MODE_LAST
-	  = ETHTOOL_LINK_MODE_10000baseER_Full_BIT,
+	  = ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
 };
 
 #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name)	\

From 4af0e5bb95ee3ba5ea4bd7dbb94e1648a5279cc9 Mon Sep 17 00:00:00 2001
From: Arseny Solokha <asolokha@kb.kras.ru>
Date: Sun, 29 Jan 2017 19:52:20 +0700
Subject: [PATCH 122/322] gianfar: synchronize DMA API usage by
 free_skb_rx_queue w/ gfar_new_page

In spite of switching to paged allocation of Rx buffers, the driver still
called dma_unmap_single() in the Rx queues tear-down path.

The DMA region unmapping code in free_skb_rx_queue() basically predates
the introduction of paged allocation to the driver. While being refactored,
it apparently hasn't reflected the change in the DMA API usage by its
counterpart gfar_new_page().

As a result, setting an interface to the DOWN state now yields the following:

  # ip link set eth2 down
  fsl-gianfar ffe24000.ethernet: DMA-API: device driver frees DMA memory with wrong function [device address=0x000000001ecd0000] [size=40]
  ------------[ cut here ]------------
  WARNING: CPU: 1 PID: 189 at lib/dma-debug.c:1123 check_unmap+0x8e0/0xa28
  CPU: 1 PID: 189 Comm: ip Tainted: G           O    4.9.5 #1
  task: dee73400 task.stack: dede2000
  NIP: c02101e8 LR: c02101e8 CTR: c0260d74
  REGS: dede3bb0 TRAP: 0700   Tainted: G           O     (4.9.5)
  MSR: 00021000 <CE,ME>  CR: 28002222  XER: 00000000

  GPR00: c02101e8 dede3c60 dee73400 000000b6 dfbd033c dfbd36c4 1f622000 dede2000
  GPR08: 00000007 c05b1634 1f622000 00000000 22002484 100a9904 00000000 00000000
  GPR16: 00000000 db4c849c 00000002 db4c8480 00000001 df142240 db4c84bc 00000000
  GPR24: c0706148 c0700000 00029000 c07552e8 c07323b4 dede3cb8 c07605e0 db535540
  NIP [c02101e8] check_unmap+0x8e0/0xa28
  LR [c02101e8] check_unmap+0x8e0/0xa28
  Call Trace:
  [dede3c60] [c02101e8] check_unmap+0x8e0/0xa28 (unreliable)
  [dede3cb0] [c02103b8] debug_dma_unmap_page+0x88/0x9c
  [dede3d30] [c02dffbc] free_skb_resources+0x2c4/0x404
  [dede3d80] [c02e39b4] gfar_close+0x24/0xc8
  [dede3da0] [c0361550] __dev_close_many+0xa0/0xf8
  [dede3dd0] [c03616f0] __dev_close+0x2c/0x4c
  [dede3df0] [c036b1b8] __dev_change_flags+0xa0/0x174
  [dede3e10] [c036b2ac] dev_change_flags+0x20/0x60
  [dede3e30] [c03e130c] devinet_ioctl+0x540/0x824
  [dede3e90] [c0347dcc] sock_ioctl+0x134/0x298
  [dede3eb0] [c0111814] do_vfs_ioctl+0xac/0x854
  [dede3f20] [c0111ffc] SyS_ioctl+0x40/0x74
  [dede3f40] [c000f290] ret_from_syscall+0x0/0x3c
  --- interrupt: c01 at 0xff45da0
      LR = 0xff45cd0
  Instruction dump:
  811d001c 7c66482e 813d0020 9061000c 807f000c 5463103a 7cc6182e 3c60c052
  386309ac 90c10008 4cc63182 4826b845 <0fe00000> 4bfffa60 3c80c052 388402c4
  ---[ end trace 695ae6d7ac1d0c47 ]---
  Mapped at:
   [<c02e22a8>] gfar_alloc_rx_buffs+0x178/0x248
   [<c02e3ef0>] startup_gfar+0x368/0x570
   [<c036aeb4>] __dev_open+0xdc/0x150
   [<c036b1b8>] __dev_change_flags+0xa0/0x174
   [<c036b2ac>] dev_change_flags+0x20/0x60

Even though the issue was discovered in 4.9 kernel, the code in question
is identical in the current net and net-next trees.

Fixes: 75354148ce69 ("gianfar: Add paged allocation and Rx S/G")
Signed-off-by: Arseny Solokha <asolokha@kb.kras.ru>
Acked-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/gianfar.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index c1b671667920..957bfc220978 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -2010,8 +2010,8 @@ static void free_skb_rx_queue(struct gfar_priv_rx_q *rx_queue)
 		if (!rxb->page)
 			continue;
 
-		dma_unmap_single(rx_queue->dev, rxb->dma,
-				 PAGE_SIZE, DMA_FROM_DEVICE);
+		dma_unmap_page(rx_queue->dev, rxb->dma,
+			       PAGE_SIZE, DMA_FROM_DEVICE);
 		__free_page(rxb->page);
 
 		rxb->page = NULL;

From d585df1c5ccf995fcee910705ad7a9cdd11d4152 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Mon, 30 Jan 2017 15:11:45 +0200
Subject: [PATCH 123/322] net/mlx4_core: Avoid command timeouts during VF
 driver device shutdown

Some Hypervisors detach VFs from VMs by instantly causing an FLR event
to be generated for a VF.

In the mlx4 case, this will cause that VF's comm channel to be disabled
before the VM has an opportunity to invoke the VF device's "shutdown"
method.

The result is that the VF driver on the VM will experience a command
timeout during the shutdown process when the Hypervisor does not deliver
a command-completion event to the VM.

To avoid FW command timeouts on the VM when the driver's shutdown method
is invoked, we detect the absence of the VF's comm channel at the very
start of the shutdown process. If the comm-channel has already been
disabled, we cause all FW commands during the device shutdown process to
immediately return success (and thus avoid all command timeouts).

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/catas.c |  2 +-
 drivers/net/ethernet/mellanox/mlx4/intf.c  | 12 ++++++++++++
 drivers/net/ethernet/mellanox/mlx4/mlx4.h  |  1 +
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c
index c7e939945259..53daa6ca5d83 100644
--- a/drivers/net/ethernet/mellanox/mlx4/catas.c
+++ b/drivers/net/ethernet/mellanox/mlx4/catas.c
@@ -158,7 +158,7 @@ static int mlx4_reset_slave(struct mlx4_dev *dev)
 	return -ETIMEDOUT;
 }
 
-static int mlx4_comm_internal_err(u32 slave_read)
+int mlx4_comm_internal_err(u32 slave_read)
 {
 	return (u32)COMM_CHAN_EVENT_INTERNAL_ERR ==
 		(slave_read & (u32)COMM_CHAN_EVENT_INTERNAL_ERR) ? 1 : 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c
index 0e8b7c44931f..8258d08acd8c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/intf.c
+++ b/drivers/net/ethernet/mellanox/mlx4/intf.c
@@ -222,6 +222,18 @@ void mlx4_unregister_device(struct mlx4_dev *dev)
 		return;
 
 	mlx4_stop_catas_poll(dev);
+	if (dev->persist->interface_state & MLX4_INTERFACE_STATE_DELETION &&
+	    mlx4_is_slave(dev)) {
+		/* In mlx4_remove_one on a VF */
+		u32 slave_read =
+			swab32(readl(&mlx4_priv(dev)->mfunc.comm->slave_read));
+
+		if (mlx4_comm_internal_err(slave_read)) {
+			mlx4_dbg(dev, "%s: comm channel is down, entering error state.\n",
+				 __func__);
+			mlx4_enter_error_state(dev->persist);
+		}
+	}
 	mutex_lock(&intf_mutex);
 
 	list_for_each_entry(intf, &intf_list, list)
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 88ee7d8a5923..086920b615af 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -1220,6 +1220,7 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type);
 void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type);
 
 void mlx4_enter_error_state(struct mlx4_dev_persistent *persist);
+int mlx4_comm_internal_err(u32 slave_read);
 
 int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
 		    enum mlx4_port_type *type);

From 2b89ed65a6f201a6a4f0450ad289aa4bf491608c Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Sun, 29 Jan 2017 22:52:53 -0500
Subject: [PATCH 124/322] ipv6: Paritially checksum full MTU frames

IPv6 will mark data that is smaller that mtu - headersize as
CHECKSUM_PARTIAL, but if the data will completely fill the mtu,
the packet checksum will be computed in software instead.
Extend the conditional to include the data that fills the mtu
as well.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2c0df09e9036..b6a94ff0bbd0 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1344,7 +1344,7 @@ static int __ip6_append_data(struct sock *sk,
 	 */
 	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
 	    headersize == sizeof(struct ipv6hdr) &&
-	    length < mtu - headersize &&
+	    length <= mtu - headersize &&
 	    !(flags & MSG_MORE) &&
 	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
 		csummode = CHECKSUM_PARTIAL;

From e085b9d8fcbd7c8a3175268a16437cf359c0d052 Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@microchip.com>
Date: Fri, 27 Jan 2017 15:43:42 +0100
Subject: [PATCH 125/322] MAINTAINERS: at91: change email address

Following the Microchip / Atmel merger and the unification of internal IT, it's
more convenient for me to swith to the microchip.com address. Change all my
entries to reflect this.

Signed-off-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 MAINTAINERS | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index cfff2c9e3d94..d0f5de961ff5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1091,7 +1091,7 @@ F:	arch/arm/boot/dts/aspeed-*
 F:	drivers/*/*aspeed*
 
 ARM/ATMEL AT91RM9200, AT91SAM9 AND SAMA5 SOC SUPPORT
-M:	Nicolas Ferre <nicolas.ferre@atmel.com>
+M:	Nicolas Ferre <nicolas.ferre@microchip.com>
 M:	Alexandre Belloni <alexandre.belloni@free-electrons.com>
 M:	Jean-Christophe Plagniol-Villard <plagnioj@jcrosoft.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -2178,7 +2178,7 @@ S:	Maintained
 F:	drivers/mmc/host/atmel-mci.c
 
 ATMEL AT91 SAMA5D2-Compatible Shutdown Controller
-M:	Nicolas Ferre <nicolas.ferre@atmel.com>
+M:	Nicolas Ferre <nicolas.ferre@microchip.com>
 S:	Supported
 F:	drivers/power/reset/at91-sama5d2_shdwc.c
 
@@ -2189,7 +2189,7 @@ S:	Supported
 F:	drivers/iio/adc/at91-sama5d2_adc.c
 
 ATMEL Audio ALSA driver
-M:	Nicolas Ferre <nicolas.ferre@atmel.com>
+M:	Nicolas Ferre <nicolas.ferre@microchip.com>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:	Supported
 F:	sound/soc/atmel
@@ -2223,14 +2223,14 @@ F:	drivers/media/platform/soc_camera/atmel-isi.c
 F:	include/media/atmel-isi.h
 
 ATMEL LCDFB DRIVER
-M:	Nicolas Ferre <nicolas.ferre@atmel.com>
+M:	Nicolas Ferre <nicolas.ferre@microchip.com>
 L:	linux-fbdev@vger.kernel.org
 S:	Maintained
 F:	drivers/video/fbdev/atmel_lcdfb.c
 F:	include/video/atmel_lcdc.h
 
 ATMEL MACB ETHERNET DRIVER
-M:	Nicolas Ferre <nicolas.ferre@atmel.com>
+M:	Nicolas Ferre <nicolas.ferre@microchip.com>
 S:	Supported
 F:	drivers/net/ethernet/cadence/
 
@@ -2248,26 +2248,26 @@ S:	Supported
 F:	drivers/mmc/host/sdhci-of-at91.c
 
 ATMEL SPI DRIVER
-M:	Nicolas Ferre <nicolas.ferre@atmel.com>
+M:	Nicolas Ferre <nicolas.ferre@microchip.com>
 S:	Supported
 F:	drivers/spi/spi-atmel.*
 
 ATMEL SSC DRIVER
-M:	Nicolas Ferre <nicolas.ferre@atmel.com>
+M:	Nicolas Ferre <nicolas.ferre@microchip.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Supported
 F:	drivers/misc/atmel-ssc.c
 F:	include/linux/atmel-ssc.h
 
 ATMEL Timer Counter (TC) AND CLOCKSOURCE DRIVERS
-M:	Nicolas Ferre <nicolas.ferre@atmel.com>
+M:	Nicolas Ferre <nicolas.ferre@microchip.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Supported
 F:	drivers/misc/atmel_tclib.c
 F:	drivers/clocksource/tcb_clksrc.c
 
 ATMEL USBA UDC DRIVER
-M:	Nicolas Ferre <nicolas.ferre@atmel.com>
+M:	Nicolas Ferre <nicolas.ferre@microchip.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Supported
 F:	drivers/usb/gadget/udc/atmel_usba_udc.*

From 420a3879d694e5c3e734fb92151d19b2ec503e46 Mon Sep 17 00:00:00 2001
From: Ludovic Desroches <ludovic.desroches@atmel.com>
Date: Fri, 27 Jan 2017 14:33:44 +0100
Subject: [PATCH 126/322] MAINTAINERS: change email address from atmel to
 microchip

Use microchip email address instead of old atmel one.

Signed-off-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 MAINTAINERS | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index d0f5de961ff5..eb9739ec4e92 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2173,7 +2173,7 @@ F:	include/linux/atm*
 F:	include/uapi/linux/atm*
 
 ATMEL AT91 / AT32 MCI DRIVER
-M:	Ludovic Desroches <ludovic.desroches@atmel.com>
+M:	Ludovic Desroches <ludovic.desroches@microchip.com>
 S:	Maintained
 F:	drivers/mmc/host/atmel-mci.c
 
@@ -2183,7 +2183,7 @@ S:	Supported
 F:	drivers/power/reset/at91-sama5d2_shdwc.c
 
 ATMEL SAMA5D2 ADC DRIVER
-M:	Ludovic Desroches <ludovic.desroches@atmel.com>
+M:	Ludovic Desroches <ludovic.desroches@microchip.com>
 L:	linux-iio@vger.kernel.org
 S:	Supported
 F:	drivers/iio/adc/at91-sama5d2_adc.c
@@ -2203,20 +2203,20 @@ F:	drivers/dma/at_hdmac_regs.h
 F:	include/linux/platform_data/dma-atmel.h
 
 ATMEL XDMA DRIVER
-M:	Ludovic Desroches <ludovic.desroches@atmel.com>
+M:	Ludovic Desroches <ludovic.desroches@microchip.com>
 L:	linux-arm-kernel@lists.infradead.org
 L:	dmaengine@vger.kernel.org
 S:	Supported
 F:	drivers/dma/at_xdmac.c
 
 ATMEL I2C DRIVER
-M:	Ludovic Desroches <ludovic.desroches@atmel.com>
+M:	Ludovic Desroches <ludovic.desroches@microchip.com>
 L:	linux-i2c@vger.kernel.org
 S:	Supported
 F:	drivers/i2c/busses/i2c-at91.c
 
 ATMEL ISI DRIVER
-M:	Ludovic Desroches <ludovic.desroches@atmel.com>
+M:	Ludovic Desroches <ludovic.desroches@microchip.com>
 L:	linux-media@vger.kernel.org
 S:	Supported
 F:	drivers/media/platform/soc_camera/atmel-isi.c
@@ -2242,7 +2242,7 @@ S:	Supported
 F:	drivers/mtd/nand/atmel_nand*
 
 ATMEL SDMMC DRIVER
-M:	Ludovic Desroches <ludovic.desroches@atmel.com>
+M:	Ludovic Desroches <ludovic.desroches@microchip.com>
 L:	linux-mmc@vger.kernel.org
 S:	Supported
 F:	drivers/mmc/host/sdhci-of-at91.c
@@ -9710,7 +9710,7 @@ S:	Maintained
 F:	drivers/pinctrl/pinctrl-at91.*
 
 PIN CONTROLLER - ATMEL AT91 PIO4
-M:	Ludovic Desroches <ludovic.desroches@atmel.com>
+M:	Ludovic Desroches <ludovic.desroches@microchip.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:	linux-gpio@vger.kernel.org
 S:	Supported

From 040587af31228d82c52267f717c9fcdb65f36335 Mon Sep 17 00:00:00 2001
From: Simon Horman <simon.horman@netronome.com>
Date: Mon, 30 Jan 2017 16:19:02 +0100
Subject: [PATCH 127/322] net/sched: cls_flower: Correct matching on ICMPv6
 code

When matching on the ICMPv6 code ICMPV6_CODE rather than
ICMPV4_CODE attributes should be used.

This corrects what appears to be a typo.

Sample usage:

tc qdisc add dev eth0 ingress
tc filter add dev eth0 protocol ipv6 parent ffff: flower \
	indev eth0 ip_proto icmpv6 type 128 code 0 action drop

Without this change the code parameter above is effectively ignored.

Fixes: 7b684884fbfa ("net/sched: cls_flower: Support matching on ICMP type and code")
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_flower.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 970db7a41684..5752789acc13 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -568,9 +568,9 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 			       &mask->icmp.type,
 			       TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,
 			       sizeof(key->icmp.type));
-		fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV4_CODE,
+		fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV6_CODE,
 			       &mask->icmp.code,
-			       TCA_FLOWER_KEY_ICMPV4_CODE_MASK,
+			       TCA_FLOWER_KEY_ICMPV6_CODE_MASK,
 			       sizeof(key->icmp.code));
 	}
 

From 7a7dc961a28b965a0d0303c2e989df17b411708b Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>
Date: Tue, 17 Jan 2017 10:59:02 -0500
Subject: [PATCH 128/322] sparc64: Zero pages on allocation for mondo and error
 queues.

Error queues use a non-zero first word to detect if the queues are full.
Using pages that have not been zeroed may result in false positive
overflow events.  These queues are set up once during boot so zeroing
all mondo and error queue pages is safe.

Note that the false positive overflow does not always occur because the
page allocation for these queues is so early in the boot cycle that
higher number CPUs get fresh pages.  It is only when traps are serviced
with lower number CPUs who were given already used pages that this issue
is exposed.

Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/irq_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index 34a7930b76ef..baed4cdeda75 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -1021,7 +1021,7 @@ static void __init alloc_one_queue(unsigned long *pa_ptr, unsigned long qmask)
 	unsigned long order = get_order(size);
 	unsigned long p;
 
-	p = __get_free_pages(GFP_KERNEL, order);
+	p = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
 	if (!p) {
 		prom_printf("SUN4V: Error, cannot allocate queue.\n");
 		prom_halt();

From 047487241ff59374fded8c477f21453681f5995c Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>
Date: Tue, 17 Jan 2017 10:59:03 -0500
Subject: [PATCH 129/322] sparc64: Handle PIO & MEM non-resumable errors.

User processes trying to access an invalid memory address via PIO will
receive a SIGBUS signal instead of causing a panic.  Memory errors will
receive a SIGKILL since a SIGBUS may result in a coredump which may
attempt to repeat the faulting access.

Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/traps_64.c | 73 ++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)

diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index 496fa926e1e0..d44fb806bbd7 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -2051,6 +2051,73 @@ void sun4v_resum_overflow(struct pt_regs *regs)
 	atomic_inc(&sun4v_resum_oflow_cnt);
 }
 
+/* Given a set of registers, get the virtual addressi that was being accessed
+ * by the faulting instructions at tpc.
+ */
+static unsigned long sun4v_get_vaddr(struct pt_regs *regs)
+{
+	unsigned int insn;
+
+	if (!copy_from_user(&insn, (void __user *)regs->tpc, 4)) {
+		return compute_effective_address(regs, insn,
+						 (insn >> 25) & 0x1f);
+	}
+	return 0;
+}
+
+/* Attempt to handle non-resumable errors generated from userspace.
+ * Returns true if the signal was handled, false otherwise.
+ */
+bool sun4v_nonresum_error_user_handled(struct pt_regs *regs,
+				  struct sun4v_error_entry *ent) {
+
+	unsigned int attrs = ent->err_attrs;
+
+	if (attrs & SUN4V_ERR_ATTRS_MEMORY) {
+		unsigned long addr = ent->err_raddr;
+		siginfo_t info;
+
+		if (addr == ~(u64)0) {
+			/* This seems highly unlikely to ever occur */
+			pr_emerg("SUN4V NON-RECOVERABLE ERROR: Memory error detected in unknown location!\n");
+		} else {
+			unsigned long page_cnt = DIV_ROUND_UP(ent->err_size,
+							      PAGE_SIZE);
+
+			/* Break the unfortunate news. */
+			pr_emerg("SUN4V NON-RECOVERABLE ERROR: Memory failed at %016lX\n",
+				 addr);
+			pr_emerg("SUN4V NON-RECOVERABLE ERROR:   Claiming %lu ages.\n",
+				 page_cnt);
+
+			while (page_cnt-- > 0) {
+				if (pfn_valid(addr >> PAGE_SHIFT))
+					get_page(pfn_to_page(addr >> PAGE_SHIFT));
+				addr += PAGE_SIZE;
+			}
+		}
+		info.si_signo = SIGKILL;
+		info.si_errno = 0;
+		info.si_trapno = 0;
+		force_sig_info(info.si_signo, &info, current);
+
+		return true;
+	}
+	if (attrs & SUN4V_ERR_ATTRS_PIO) {
+		siginfo_t info;
+
+		info.si_signo = SIGBUS;
+		info.si_code = BUS_ADRERR;
+		info.si_addr = (void __user *)sun4v_get_vaddr(regs);
+		force_sig_info(info.si_signo, &info, current);
+
+		return true;
+	}
+
+	/* Default to doing nothing */
+	return false;
+}
+
 /* We run with %pil set to PIL_NORMAL_MAX and PSTATE_IE enabled in %pstate.
  * Log the event, clear the first word of the entry, and die.
  */
@@ -2075,6 +2142,12 @@ void sun4v_nonresum_error(struct pt_regs *regs, unsigned long offset)
 
 	put_cpu();
 
+	if (!(regs->tstate & TSTATE_PRIV) &&
+	    sun4v_nonresum_error_user_handled(regs, &local_copy)) {
+		/* DON'T PANIC: This userspace error was handled. */
+		return;
+	}
+
 #ifdef CONFIG_PCI
 	/* Check for the special PCI poke sequence. */
 	if (pci_poke_in_progress && pci_poke_cpu == cpu) {

From 52f5631a4c056ad01682393be56d2be237e81610 Mon Sep 17 00:00:00 2001
From: Jurij Smakov <jurij@wooyd.org>
Date: Mon, 30 Jan 2017 15:41:36 -0600
Subject: [PATCH 130/322] rtlwifi: rtl8192ce: Fix loading of incorrect firmware

In commit cf4747d7535a ("rtlwifi: Fix regression caused by commit
d86e64768859, an error in the edit results in the wrong firmware
being loaded for some models of the RTL8188/8192CE. In this condition,
the connection suffered from high ping latency, slow transfer rates,
 and required higher signal strengths to work at all

See https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=853073,
https://bugzilla.opensuse.org/show_bug.cgi?id=1017471, and
https://github.com/lwfinger/rtlwifi_new/issues/203 for descriptions
of the problems. This patch fixes all of those problems.

Fixes: cf4747d7535a ("rtlwifi: Fix regression caused by commit d86e64768859")
Signed-off-by: Jurij Smakov <jurij@wooyd.org>
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Stable <stable@vger.kernel.org> # 4.9+
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c
index 691ddef1ae28..a33a06d58a9a 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c
@@ -92,7 +92,7 @@ int rtl92c_init_sw_vars(struct ieee80211_hw *hw)
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
 	struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
-	char *fw_name = "rtlwifi/rtl8192cfwU.bin";
+	char *fw_name;
 
 	rtl8192ce_bt_reg_init(hw);
 
@@ -164,8 +164,13 @@ int rtl92c_init_sw_vars(struct ieee80211_hw *hw)
 	}
 
 	/* request fw */
-	if (IS_81XXC_VENDOR_UMC_B_CUT(rtlhal->version))
+	if (IS_VENDOR_UMC_A_CUT(rtlhal->version) &&
+	    !IS_92C_SERIAL(rtlhal->version))
+		fw_name = "rtlwifi/rtl8192cfwU.bin";
+	else if (IS_81XXC_VENDOR_UMC_B_CUT(rtlhal->version))
 		fw_name = "rtlwifi/rtl8192cfwU_B.bin";
+	else
+		fw_name = "rtlwifi/rtl8192cfw.bin";
 
 	rtlpriv->max_fw_size = 0x4000;
 	pr_info("Using firmware %s\n", fw_name);

From 05e0be7c900797e9164976a6014d534ce3035909 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Tue, 31 Jan 2017 00:47:30 -0800
Subject: [PATCH 131/322] Input: synaptics-rmi4 - fix reversed conditions in
 enable/disable_irq_wake

These tests are reversed.  A warning should be displayed if an error is
returned, not on success.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/rmi4/rmi_driver.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c
index 11447ab1055c..bf5c36e229ba 100644
--- a/drivers/input/rmi4/rmi_driver.c
+++ b/drivers/input/rmi4/rmi_driver.c
@@ -901,7 +901,7 @@ void rmi_enable_irq(struct rmi_device *rmi_dev, bool clear_wake)
 	data->enabled = true;
 	if (clear_wake && device_may_wakeup(rmi_dev->xport->dev)) {
 		retval = disable_irq_wake(irq);
-		if (!retval)
+		if (retval)
 			dev_warn(&rmi_dev->dev,
 				 "Failed to disable irq for wake: %d\n",
 				 retval);
@@ -936,7 +936,7 @@ void rmi_disable_irq(struct rmi_device *rmi_dev, bool enable_wake)
 	disable_irq(irq);
 	if (enable_wake && device_may_wakeup(rmi_dev->xport->dev)) {
 		retval = enable_irq_wake(irq);
-		if (!retval)
+		if (retval)
 			dev_warn(&rmi_dev->dev,
 				 "Failed to enable irq for wake: %d\n",
 				 retval);

From 433e19cf33d34bb6751c874a9c00980552fe508c Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Sat, 28 Jan 2017 11:46:02 -0700
Subject: [PATCH 132/322] Drivers: hv: vmbus: finally fix
 hv_need_to_signal_on_read()

Commit a389fcfd2cb5 ("Drivers: hv: vmbus: Fix signaling logic in
hv_need_to_signal_on_read()")
added the proper mb(), but removed the test "prev_write_sz < pending_sz"
when making the signal decision.

As a result, the guest can signal the host unnecessarily,
and then the host can throttle the guest because the host
thinks the guest is buggy or malicious; finally the user
running stress test can perceive intermittent freeze of
the guest.

This patch brings back the test, and properly handles the
in-place consumption APIs used by NetVSC (see get_next_pkt_raw(),
put_pkt_raw() and commit_rd_index()).

Fixes: a389fcfd2cb5 ("Drivers: hv: vmbus: Fix signaling logic in
hv_need_to_signal_on_read()")

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reported-by: Rolf Neugebauer <rolf.neugebauer@docker.com>
Tested-by: Rolf Neugebauer <rolf.neugebauer@docker.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/ring_buffer.c    |  1 +
 drivers/net/hyperv/netvsc.c |  6 ++++++
 include/linux/hyperv.h      | 32 ++++++++++++++++++++++++++++++--
 3 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index cd49cb17eb7f..308dbda700eb 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -383,6 +383,7 @@ int hv_ringbuffer_read(struct vmbus_channel *channel,
 		return ret;
 	}
 
+	init_cached_read_index(channel);
 	next_read_location = hv_get_next_read_location(inring_info);
 	next_read_location = hv_copyfrom_ringbuffer(inring_info, &desc,
 						    sizeof(desc),
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 5a1cc089acb7..86e5749226ef 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -1295,6 +1295,9 @@ void netvsc_channel_cb(void *context)
 	ndev = hv_get_drvdata(device);
 	buffer = get_per_channel_state(channel);
 
+	/* commit_rd_index() -> hv_signal_on_read() needs this. */
+	init_cached_read_index(channel);
+
 	do {
 		desc = get_next_pkt_raw(channel);
 		if (desc != NULL) {
@@ -1347,6 +1350,9 @@ void netvsc_channel_cb(void *context)
 
 			bufferlen = bytes_recvd;
 		}
+
+		init_cached_read_index(channel);
+
 	} while (1);
 
 	if (bufferlen > NETVSC_PACKET_SIZE)
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 42fe43fb0c80..183efde54269 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -128,6 +128,7 @@ struct hv_ring_buffer_info {
 	u32 ring_data_startoffset;
 	u32 priv_write_index;
 	u32 priv_read_index;
+	u32 cached_read_index;
 };
 
 /*
@@ -180,6 +181,19 @@ static inline u32 hv_get_bytes_to_write(struct hv_ring_buffer_info *rbi)
 	return write;
 }
 
+static inline u32 hv_get_cached_bytes_to_write(
+	const struct hv_ring_buffer_info *rbi)
+{
+	u32 read_loc, write_loc, dsize, write;
+
+	dsize = rbi->ring_datasize;
+	read_loc = rbi->cached_read_index;
+	write_loc = rbi->ring_buffer->write_index;
+
+	write = write_loc >= read_loc ? dsize - (write_loc - read_loc) :
+		read_loc - write_loc;
+	return write;
+}
 /*
  * VMBUS version is 32 bit entity broken up into
  * two 16 bit quantities: major_number. minor_number.
@@ -1488,7 +1502,7 @@ hv_get_ring_buffer(struct hv_ring_buffer_info *ring_info)
 
 static inline  void hv_signal_on_read(struct vmbus_channel *channel)
 {
-	u32 cur_write_sz;
+	u32 cur_write_sz, cached_write_sz;
 	u32 pending_sz;
 	struct hv_ring_buffer_info *rbi = &channel->inbound;
 
@@ -1512,12 +1526,24 @@ static inline  void hv_signal_on_read(struct vmbus_channel *channel)
 
 	cur_write_sz = hv_get_bytes_to_write(rbi);
 
-	if (cur_write_sz >= pending_sz)
+	if (cur_write_sz < pending_sz)
+		return;
+
+	cached_write_sz = hv_get_cached_bytes_to_write(rbi);
+	if (cached_write_sz < pending_sz)
 		vmbus_setevent(channel);
 
 	return;
 }
 
+static inline void
+init_cached_read_index(struct vmbus_channel *channel)
+{
+	struct hv_ring_buffer_info *rbi = &channel->inbound;
+
+	rbi->cached_read_index = rbi->ring_buffer->read_index;
+}
+
 /*
  * An API to support in-place processing of incoming VMBUS packets.
  */
@@ -1569,6 +1595,8 @@ static inline void put_pkt_raw(struct vmbus_channel *channel,
  * This call commits the read index and potentially signals the host.
  * Here is the pattern for using the "in-place" consumption APIs:
  *
+ * init_cached_read_index();
+ *
  * while (get_next_pkt_raw() {
  *	process the packet "in-place";
  *	put_pkt_raw();

From 96692b097ba76d0c637ae8af47b29c73da33c9d0 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Wed, 14 Dec 2016 09:52:39 +1000
Subject: [PATCH 133/322] drm/nouveau/fence/g84-: protect against concurrent
 access to semaphore buffers

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_fence.h | 1 +
 drivers/gpu/drm/nouveau/nv84_fence.c    | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index ccdce1b4eec4..d5e58a38f160 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -99,6 +99,7 @@ struct nv84_fence_priv {
 	struct nouveau_bo *bo;
 	struct nouveau_bo *bo_gart;
 	u32 *suspend;
+	struct mutex mutex;
 };
 
 int  nv84_fence_context_new(struct nouveau_channel *);
diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c
index 52b87ae83e7b..f0b322bec7df 100644
--- a/drivers/gpu/drm/nouveau/nv84_fence.c
+++ b/drivers/gpu/drm/nouveau/nv84_fence.c
@@ -107,8 +107,10 @@ nv84_fence_context_del(struct nouveau_channel *chan)
 	struct nv84_fence_chan *fctx = chan->fence;
 
 	nouveau_bo_wr32(priv->bo, chan->chid * 16 / 4, fctx->base.sequence);
+	mutex_lock(&priv->mutex);
 	nouveau_bo_vma_del(priv->bo, &fctx->vma_gart);
 	nouveau_bo_vma_del(priv->bo, &fctx->vma);
+	mutex_unlock(&priv->mutex);
 	nouveau_fence_context_del(&fctx->base);
 	chan->fence = NULL;
 	nouveau_fence_context_free(&fctx->base);
@@ -134,11 +136,13 @@ nv84_fence_context_new(struct nouveau_channel *chan)
 	fctx->base.sync32 = nv84_fence_sync32;
 	fctx->base.sequence = nv84_fence_read(chan);
 
+	mutex_lock(&priv->mutex);
 	ret = nouveau_bo_vma_add(priv->bo, cli->vm, &fctx->vma);
 	if (ret == 0) {
 		ret = nouveau_bo_vma_add(priv->bo_gart, cli->vm,
 					&fctx->vma_gart);
 	}
+	mutex_unlock(&priv->mutex);
 
 	if (ret)
 		nv84_fence_context_del(chan);
@@ -212,6 +216,8 @@ nv84_fence_create(struct nouveau_drm *drm)
 	priv->base.context_base = dma_fence_context_alloc(priv->base.contexts);
 	priv->base.uevent = true;
 
+	mutex_init(&priv->mutex);
+
 	/* Use VRAM if there is any ; otherwise fallback to system memory */
 	domain = drm->device.info.ram_size != 0 ? TTM_PL_FLAG_VRAM :
 			 /*

From c966b6279f610a24ac1d42dcbe30e10fa61220b2 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Wed, 25 May 2016 17:11:40 +1000
Subject: [PATCH 134/322] drm/nouveau: prevent userspace from deleting client
 object

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_usif.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_usif.c b/drivers/gpu/drm/nouveau/nouveau_usif.c
index 08f9c6fa0f7f..1fba38622744 100644
--- a/drivers/gpu/drm/nouveau/nouveau_usif.c
+++ b/drivers/gpu/drm/nouveau/nouveau_usif.c
@@ -313,7 +313,8 @@ usif_ioctl(struct drm_file *filp, void __user *user, u32 argc)
 	if (!(ret = nvif_unpack(-ENOSYS, &data, &size, argv->v0, 0, 0, true))) {
 		/* block access to objects not created via this interface */
 		owner = argv->v0.owner;
-		if (argv->v0.object == 0ULL)
+		if (argv->v0.object == 0ULL &&
+		    argv->v0.type != NVIF_IOCTL_V0_DEL)
 			argv->v0.owner = NVDRM_OBJECT_ANY; /* except client */
 		else
 			argv->v0.owner = NVDRM_OBJECT_USIF;

From 7dfee6827780d4228148263545af936d0cae8930 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Mon, 9 Jan 2017 10:22:15 +1000
Subject: [PATCH 135/322] drm/nouveau/disp/mcp7x: disable dptmds workaround

The workaround appears to cause regressions on these boards, and from
inspection of RM traces, NVIDIA don't appear to do it on them either.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Tested-by: Roy Spliet <nouveau@spliet.org>
---
 drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
index 567466f93cd5..0db8efbf1c2e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
@@ -433,8 +433,6 @@ nv50_disp_dptmds_war(struct nvkm_device *device)
 	case 0x94:
 	case 0x96:
 	case 0x98:
-	case 0xaa:
-	case 0xac:
 		return true;
 	default:
 		break;

From d72498ca2cbcf15e5038b184a95f061bca21e820 Mon Sep 17 00:00:00 2001
From: Martin Peres <martin.peres@free.fr>
Date: Wed, 7 Dec 2016 06:30:15 +0200
Subject: [PATCH 136/322] drm/nouveau/nouveau/led: prevent compiling the
 led-code if nouveau=y and leds=m

The proper fix would have been to select LEDS_CLASS but this can lead
to a circular dependency, as found out by Arnd.

This patch implements Arnd's suggestion instead, at the cost of some
auto-magic for a fringe feature.

Reported-by: Arnd Bergmann <arnd@arndb.de>
Reported-by: Intel's 0-DAY
Fixes: 8d021d71b324 ("drm/nouveau/drm/nouveau: add a LED driver for the NVIDIA logo")
Signed-off-by: Martin Peres <martin.peres@free.fr>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_led.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_led.h b/drivers/gpu/drm/nouveau/nouveau_led.h
index 187ecdb82002..21a5775028cc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_led.h
+++ b/drivers/gpu/drm/nouveau/nouveau_led.h
@@ -42,7 +42,7 @@ nouveau_led(struct drm_device *dev)
 }
 
 /* nouveau_led.c */
-#if IS_ENABLED(CONFIG_LEDS_CLASS)
+#if IS_REACHABLE(CONFIG_LEDS_CLASS)
 int  nouveau_led_init(struct drm_device *dev);
 void nouveau_led_suspend(struct drm_device *dev);
 void nouveau_led_resume(struct drm_device *dev);

From d347583a39e2df609a9e40c835f72d3614665b53 Mon Sep 17 00:00:00 2001
From: Alastair Bridgewater <alastair.bridgewater@gmail.com>
Date: Wed, 11 Jan 2017 15:47:18 -0500
Subject: [PATCH 137/322] drm/nouveau/disp/gt215: Fix HDA ELD handling (thus,
 HDMI audio) on gt215

Store the ELD correctly, not just enough copies of the first byte
to pad out the given ELD size.

Signed-off-by: Alastair Bridgewater <alastair.bridgewater@gmail.com>
Fixes: 120b0c39c756 ("drm/nv50-/disp: audit and version SOR_HDA_ELD method")
Cc: stable@vger.kernel.org # v3.17+
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c
index 6f0436df0219..f8f2f16c22a2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c
@@ -59,7 +59,7 @@ gt215_hda_eld(NV50_DISP_MTHD_V1)
 			);
 		}
 		for (i = 0; i < size; i++)
-			nvkm_wr32(device, 0x61c440 + soff, (i << 8) | args->v0.data[0]);
+			nvkm_wr32(device, 0x61c440 + soff, (i << 8) | args->v0.data[i]);
 		for (; i < 0x60; i++)
 			nvkm_wr32(device, 0x61c440 + soff, (i << 8));
 		nvkm_mask(device, 0x61c448 + soff, 0x80000003, 0x80000003);

From 24bf7ae359b8cca165bb30742d2b1c03a1eb23af Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 19 Jan 2017 22:56:30 -0500
Subject: [PATCH 138/322] drm/nouveau/nv1a,nv1f/disp: fix memory clock rate
 retrieval

Based on the xf86-video-nv code, NFORCE (NV1A) and NFORCE2 (NV1F) have a
different way of retrieving clocks. See the
nv_hw.c:nForceUpdateArbitrationSettings function in the original code
for how these clocks were accessed.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=54587
Cc: stable@vger.kernel.org
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/dispnv04/hw.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/dispnv04/hw.c b/drivers/gpu/drm/nouveau/dispnv04/hw.c
index 74856a8b8f35..e64f52464ecf 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/hw.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/hw.c
@@ -222,6 +222,7 @@ nouveau_hw_get_clock(struct drm_device *dev, enum nvbios_pll_type plltype)
 		uint32_t mpllP;
 
 		pci_read_config_dword(pci_get_bus_and_slot(0, 3), 0x6c, &mpllP);
+		mpllP = (mpllP >> 8) & 0xf;
 		if (!mpllP)
 			mpllP = 4;
 
@@ -232,7 +233,7 @@ nouveau_hw_get_clock(struct drm_device *dev, enum nvbios_pll_type plltype)
 		uint32_t clock;
 
 		pci_read_config_dword(pci_get_bus_and_slot(0, 5), 0x4c, &clock);
-		return clock;
+		return clock / 1000;
 	}
 
 	ret = nouveau_hw_get_pllvals(dev, plltype, &pllvals);

From 2b5078937355c0d662ecef54b7d4d04f48da4fa9 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Tue, 24 Jan 2017 09:32:26 +1000
Subject: [PATCH 139/322] drm/nouveau/kms/nv50: request vblank events for
 commits that send completion events

This somehow fixes an issue where sync-to-vblank longer works correctly
after resume from suspend.

From a HW perspective, we don't need the IRQs turned on to be able to
detect flip completion, so it's assumed that this is required for the
voodoo in the core DRM vblank core.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nv50_display.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 2c2c64507661..32097fd615fd 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -4052,6 +4052,11 @@ nv50_disp_atomic_commit_tail(struct drm_atomic_state *state)
 		}
 	}
 
+	for_each_crtc_in_state(state, crtc, crtc_state, i) {
+		if (crtc->state->event)
+			drm_crtc_vblank_get(crtc);
+	}
+
 	/* Update plane(s). */
 	for_each_plane_in_state(state, plane, plane_state, i) {
 		struct nv50_wndw_atom *asyw = nv50_wndw_atom(plane->state);
@@ -4101,6 +4106,7 @@ nv50_disp_atomic_commit_tail(struct drm_atomic_state *state)
 			drm_crtc_send_vblank_event(crtc, crtc->state->event);
 			spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 			crtc->state->event = NULL;
+			drm_crtc_vblank_put(crtc);
 		}
 	}
 

From 161e6d44a5e2d3f85365cb717d60e363171b39e6 Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.co.uk>
Date: Mon, 16 Jan 2017 12:23:42 -0200
Subject: [PATCH 140/322] mmc: sdhci: Ignore unexpected CARD_INT interrupts

One of our kernelCI boxes hanged at boot because a faulty eSDHC device
was triggering spurious CARD_INT interrupts for SD cards, causing CMD52
reads, which are not allowed for SD devices.  This adds a sanity check
to the interruption path, preventing that illegal command from getting
sent if the CARD_INT interruption should be disabled.

This quirk allows that particular machine to resume boot despite the
faulty hardware, instead of getting hung dealing with thousands of
mishandled interrupts.

Suggested-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.co.uk>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Cc: <stable@vger.kernel.org>
---
 drivers/mmc/host/sdhci.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 23909804ffb8..0def99590d16 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2733,7 +2733,8 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
 		if (intmask & SDHCI_INT_RETUNE)
 			mmc_retune_needed(host->mmc);
 
-		if (intmask & SDHCI_INT_CARD_INT) {
+		if ((intmask & SDHCI_INT_CARD_INT) &&
+		    (host->ier & SDHCI_INT_CARD_INT)) {
 			sdhci_enable_sdio_irq_nolock(host, false);
 			host->thread_isr |= SDHCI_INT_CARD_INT;
 			result = IRQ_WAKE_THREAD;

From 81917bad86a66f2bdcb12b4c10ab1bf333ed25ec Mon Sep 17 00:00:00 2001
From: Roger Shimizu <rogershimizu@gmail.com>
Date: Mon, 30 Jan 2017 20:07:29 +0900
Subject: [PATCH 141/322] ARM: dts: orion5x-lschl: Fix model name

Model name should be consistent with legacy device file, so that user
can migrate their system from legacy device support to device-tree
safely.

Legacy device file is currently removed, but it can be found on 4.8
or previous version of linux:
  arch/arm/mach-orion5x/ls-chl-setup.c

Fixes: f94f268979a2 ("ARM: dts: orion5x: convert ls-chl to FDT")
Cc: Ashley Hughes <ashley.hughes@blueyonder.co.uk>
Signed-off-by: Roger Shimizu <rogershimizu@gmail.com>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
---
 arch/arm/boot/dts/orion5x-lschl.dts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/orion5x-lschl.dts b/arch/arm/boot/dts/orion5x-lschl.dts
index 947409252845..ea6c881634b9 100644
--- a/arch/arm/boot/dts/orion5x-lschl.dts
+++ b/arch/arm/boot/dts/orion5x-lschl.dts
@@ -2,7 +2,7 @@
  * Device Tree file for Buffalo Linkstation LS-CHLv3
  *
  * Copyright (C) 2016 Ash Hughes <ashley.hughes@blueyonder.co.uk>
- * Copyright (C) 2015, 2016
+ * Copyright (C) 2015-2017
  * Roger Shimizu <rogershimizu@gmail.com>
  *
  * This file is dual-licensed: you can use it either under the terms
@@ -52,7 +52,7 @@
 #include <dt-bindings/gpio/gpio.h>
 
 / {
-	model = "Buffalo Linkstation Live v3 (LS-CHL)";
+	model = "Buffalo Linkstation LiveV3 (LS-CHL)";
 	compatible = "buffalo,lschl", "marvell,orion5x-88f5182", "marvell,orion5x";
 
 	memory { /* 128 MB */

From 6cfd3cd8d8365cf78db1d25cd276d3d900eb8541 Mon Sep 17 00:00:00 2001
From: Roger Shimizu <rogershimizu@gmail.com>
Date: Mon, 30 Jan 2017 20:07:30 +0900
Subject: [PATCH 142/322] ARM: dts: orion5x-lschl: More consistent naming on
 linkstation series

DTS files, which includes orion5x-linkstation.dtsi, are named:
  orion5x-linkstation-*.dts

So we rename the file below:
  arch/arm/boot/dts/orion5x-lschl.dts
to the new name:
  arch/arm/boot/dts/orion5x-linkstation-lschl.dts

Because DTS conversion of this device was just introduced in 4.9, Debian
is still using legacy device support, other distros are the same,
so here we won't expect any impact actually.

Fixes: f94f268979a2 ("ARM: dts: orion5x: convert ls-chl to FDT")
Cc: Ashley Hughes <ashley.hughes@blueyonder.co.uk>
Signed-off-by: Roger Shimizu <rogershimizu@gmail.com>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
---
 .../boot/dts/{orion5x-lschl.dts => orion5x-linkstation-lschl.dts} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename arch/arm/boot/dts/{orion5x-lschl.dts => orion5x-linkstation-lschl.dts} (100%)

diff --git a/arch/arm/boot/dts/orion5x-lschl.dts b/arch/arm/boot/dts/orion5x-linkstation-lschl.dts
similarity index 100%
rename from arch/arm/boot/dts/orion5x-lschl.dts
rename to arch/arm/boot/dts/orion5x-linkstation-lschl.dts

From 7a7b5df84b6b4e5d599c7289526eed96541a0654 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 30 Jan 2017 11:26:38 +0100
Subject: [PATCH 143/322] HID: cp2112: fix sleep-while-atomic

A recent commit fixing DMA-buffers on stack added a shared transfer
buffer protected by a spinlock. This is broken as the USB HID request
callbacks can sleep. Fix this up by replacing the spinlock with a mutex.

Fixes: 1ffb3c40ffb5 ("HID: cp2112: make transfer buffers DMA capable")
Cc: stable <stable@vger.kernel.org>	# 4.9
Signed-off-by: Johan Hovold <johan@kernel.org>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-cp2112.c | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c
index f31a778b0851..3e0b6bad29f2 100644
--- a/drivers/hid/hid-cp2112.c
+++ b/drivers/hid/hid-cp2112.c
@@ -168,7 +168,7 @@ struct cp2112_device {
 	atomic_t xfer_avail;
 	struct gpio_chip gc;
 	u8 *in_out_buffer;
-	spinlock_t lock;
+	struct mutex lock;
 
 	struct gpio_desc *desc[8];
 	bool gpio_poll;
@@ -186,10 +186,9 @@ static int cp2112_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
 	struct cp2112_device *dev = gpiochip_get_data(chip);
 	struct hid_device *hdev = dev->hdev;
 	u8 *buf = dev->in_out_buffer;
-	unsigned long flags;
 	int ret;
 
-	spin_lock_irqsave(&dev->lock, flags);
+	mutex_lock(&dev->lock);
 
 	ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf,
 				 CP2112_GPIO_CONFIG_LENGTH, HID_FEATURE_REPORT,
@@ -213,7 +212,7 @@ static int cp2112_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
 	ret = 0;
 
 exit:
-	spin_unlock_irqrestore(&dev->lock, flags);
+	mutex_unlock(&dev->lock);
 	return ret <= 0 ? ret : -EIO;
 }
 
@@ -222,10 +221,9 @@ static void cp2112_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
 	struct cp2112_device *dev = gpiochip_get_data(chip);
 	struct hid_device *hdev = dev->hdev;
 	u8 *buf = dev->in_out_buffer;
-	unsigned long flags;
 	int ret;
 
-	spin_lock_irqsave(&dev->lock, flags);
+	mutex_lock(&dev->lock);
 
 	buf[0] = CP2112_GPIO_SET;
 	buf[1] = value ? 0xff : 0;
@@ -237,7 +235,7 @@ static void cp2112_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
 	if (ret < 0)
 		hid_err(hdev, "error setting GPIO values: %d\n", ret);
 
-	spin_unlock_irqrestore(&dev->lock, flags);
+	mutex_unlock(&dev->lock);
 }
 
 static int cp2112_gpio_get_all(struct gpio_chip *chip)
@@ -245,10 +243,9 @@ static int cp2112_gpio_get_all(struct gpio_chip *chip)
 	struct cp2112_device *dev = gpiochip_get_data(chip);
 	struct hid_device *hdev = dev->hdev;
 	u8 *buf = dev->in_out_buffer;
-	unsigned long flags;
 	int ret;
 
-	spin_lock_irqsave(&dev->lock, flags);
+	mutex_lock(&dev->lock);
 
 	ret = hid_hw_raw_request(hdev, CP2112_GPIO_GET, buf,
 				 CP2112_GPIO_GET_LENGTH, HID_FEATURE_REPORT,
@@ -262,7 +259,7 @@ static int cp2112_gpio_get_all(struct gpio_chip *chip)
 	ret = buf[1];
 
 exit:
-	spin_unlock_irqrestore(&dev->lock, flags);
+	mutex_unlock(&dev->lock);
 
 	return ret;
 }
@@ -284,10 +281,9 @@ static int cp2112_gpio_direction_output(struct gpio_chip *chip,
 	struct cp2112_device *dev = gpiochip_get_data(chip);
 	struct hid_device *hdev = dev->hdev;
 	u8 *buf = dev->in_out_buffer;
-	unsigned long flags;
 	int ret;
 
-	spin_lock_irqsave(&dev->lock, flags);
+	mutex_lock(&dev->lock);
 
 	ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf,
 				 CP2112_GPIO_CONFIG_LENGTH, HID_FEATURE_REPORT,
@@ -308,7 +304,7 @@ static int cp2112_gpio_direction_output(struct gpio_chip *chip,
 		goto fail;
 	}
 
-	spin_unlock_irqrestore(&dev->lock, flags);
+	mutex_unlock(&dev->lock);
 
 	/*
 	 * Set gpio value when output direction is already set,
@@ -319,7 +315,7 @@ static int cp2112_gpio_direction_output(struct gpio_chip *chip,
 	return 0;
 
 fail:
-	spin_unlock_irqrestore(&dev->lock, flags);
+	mutex_unlock(&dev->lock);
 	return ret < 0 ? ret : -EIO;
 }
 
@@ -1235,7 +1231,7 @@ static int cp2112_probe(struct hid_device *hdev, const struct hid_device_id *id)
 	if (!dev->in_out_buffer)
 		return -ENOMEM;
 
-	spin_lock_init(&dev->lock);
+	mutex_init(&dev->lock);
 
 	ret = hid_parse(hdev);
 	if (ret) {

From 8e9faa15469ed7c7467423db4c62aeed3ff4cae3 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 30 Jan 2017 11:26:39 +0100
Subject: [PATCH 144/322] HID: cp2112: fix gpio-callback error handling

In case of a zero-length report, the gpio direction_input callback would
currently return success instead of an errno.

Fixes: 1ffb3c40ffb5 ("HID: cp2112: make transfer buffers DMA capable")
Cc: stable <stable@vger.kernel.org>     # 4.9
Signed-off-by: Johan Hovold <johan@kernel.org>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-cp2112.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c
index 3e0b6bad29f2..b22d0f83f8e3 100644
--- a/drivers/hid/hid-cp2112.c
+++ b/drivers/hid/hid-cp2112.c
@@ -213,7 +213,7 @@ static int cp2112_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
 
 exit:
 	mutex_unlock(&dev->lock);
-	return ret <= 0 ? ret : -EIO;
+	return ret < 0 ? ret : -EIO;
 }
 
 static void cp2112_gpio_set(struct gpio_chip *chip, unsigned offset, int value)

From 92c715fca907686f5298220ece53423e38ba3aed Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Tue, 31 Jan 2017 10:25:25 +0100
Subject: [PATCH 145/322] drm/atomic: Fix double free in
 drm_atomic_state_default_clear

drm_atomic_helper_page_flip and drm_atomic_ioctl set their own events
in crtc_state->event. But when it's set the event is freed in 2 places.

Solve this by only freeing the event in the atomic ioctl when it
allocated its own event.

This has been broken twice. The first time when the code was introduced,
but only in the corner case when an event is allocated, but more crtc's
were included by atomic check and then failing. This can mostly
happen when you do an atomic modeset in i915 and the display clock is
changed, which forces all crtc's to be included to the state.

This has been broken worse by adding in-fences support, which caused
the double free to be done unconditionally.

[IGT] kms_rotation_crc: starting subtest primary-rotation-180
=============================================================================
BUG kmalloc-128 (Tainted: G     U         ): Object already free
-----------------------------------------------------------------------------

Disabling lock debugging due to kernel taint
INFO: Allocated in drm_atomic_helper_setup_commit+0x285/0x2f0 [drm_kms_helper] age=0 cpu=3 pid=1529
 ___slab_alloc+0x308/0x3b0
 __slab_alloc+0xd/0x20
 kmem_cache_alloc_trace+0x92/0x1c0
 drm_atomic_helper_setup_commit+0x285/0x2f0 [drm_kms_helper]
 intel_atomic_commit+0x35/0x4f0 [i915]
 drm_atomic_commit+0x46/0x50 [drm]
 drm_mode_atomic_ioctl+0x7d4/0xab0 [drm]
 drm_ioctl+0x2b3/0x490 [drm]
 do_vfs_ioctl+0x69c/0x700
 SyS_ioctl+0x4e/0x80
 entry_SYSCALL_64_fastpath+0x13/0x94
INFO: Freed in drm_event_cancel_free+0xa3/0xb0 [drm] age=0 cpu=3 pid=1529
 __slab_free+0x48/0x2e0
 kfree+0x159/0x1a0
 drm_event_cancel_free+0xa3/0xb0 [drm]
 drm_mode_atomic_ioctl+0x86d/0xab0 [drm]
 drm_ioctl+0x2b3/0x490 [drm]
 do_vfs_ioctl+0x69c/0x700
 SyS_ioctl+0x4e/0x80
 entry_SYSCALL_64_fastpath+0x13/0x94
INFO: Slab 0xffffde1f0997b080 objects=17 used=2 fp=0xffff92fb65ec2578 flags=0x200000000008101
INFO: Object 0xffff92fb65ec2578 @offset=1400 fp=0xffff92fb65ec2ae8

Redzone ffff92fb65ec2570: bb bb bb bb bb bb bb bb                          ........
Object ffff92fb65ec2578: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff92fb65ec2588: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff92fb65ec2598: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff92fb65ec25a8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff92fb65ec25b8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff92fb65ec25c8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff92fb65ec25d8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
Object ffff92fb65ec25e8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5  kkkkkkkkkkkkkkk.
Redzone ffff92fb65ec25f8: bb bb bb bb bb bb bb bb                          ........
Padding ffff92fb65ec2738: 5a 5a 5a 5a 5a 5a 5a 5a                          ZZZZZZZZ
CPU: 3 PID: 180 Comm: kworker/3:2 Tainted: G    BU          4.10.0-rc6-patser+ #5039
Hardware name:                  /NUC5PPYB, BIOS PYBSWCEL.86A.0031.2015.0601.1712 06/01/2015
Workqueue: events intel_atomic_helper_free_state [i915]
Call Trace:
 dump_stack+0x4d/0x6d
 print_trailer+0x20c/0x220
 free_debug_processing+0x1c6/0x330
 ? drm_atomic_state_default_clear+0xf7/0x1c0 [drm]
 __slab_free+0x48/0x2e0
 ? drm_atomic_state_default_clear+0xf7/0x1c0 [drm]
 kfree+0x159/0x1a0
 drm_atomic_state_default_clear+0xf7/0x1c0 [drm]
 ? drm_atomic_state_clear+0x30/0x30 [drm]
 intel_atomic_state_clear+0xd/0x20 [i915]
 drm_atomic_state_clear+0x1a/0x30 [drm]
 __drm_atomic_state_free+0x13/0x60 [drm]
 intel_atomic_helper_free_state+0x5d/0x70 [i915]
 process_one_work+0x260/0x4a0
 worker_thread+0x2d1/0x4f0
 kthread+0x127/0x130
 ? process_one_work+0x4a0/0x4a0
 ? kthread_stop+0x120/0x120
 ret_from_fork+0x29/0x40
FIX kmalloc-128: Object at 0xffff92fb65ec2578 not freed

Fixes: 3b24f7d67581 ("drm/atomic: Add struct drm_crtc_commit to track async updates")
Fixes: 9626014258a5 ("drm/fence: add in-fences support")
Cc: <stable@vger.kernel.org> # v4.8+
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Gustavo Padovan <gustavo.padovan@collabora.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/1485854725-27640-1-git-send-email-maarten.lankhorst@linux.intel.com
---
 drivers/gpu/drm/drm_atomic.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 50f5cf7b69d1..fdfb1ec17e66 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -2032,13 +2032,16 @@ static void complete_crtc_signaling(struct drm_device *dev,
 	}
 
 	for_each_crtc_in_state(state, crtc, crtc_state, i) {
+		struct drm_pending_vblank_event *event = crtc_state->event;
 		/*
-		 * TEST_ONLY and PAGE_FLIP_EVENT are mutually
-		 * exclusive, if they weren't, this code should be
-		 * called on success for TEST_ONLY too.
+		 * Free the allocated event. drm_atomic_helper_setup_commit
+		 * can allocate an event too, so only free it if it's ours
+		 * to prevent a double free in drm_atomic_state_clear.
 		 */
-		if (crtc_state->event)
-			drm_event_cancel_free(dev, &crtc_state->event->base);
+		if (event && (event->base.fence || event->base.file_priv)) {
+			drm_event_cancel_free(dev, &event->base);
+			crtc_state->event = NULL;
+		}
 	}
 
 	if (!fence_state)

From 79c6f448c8b79c321e4a1f31f98194e4f6b6cae7 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 30 Jan 2017 19:27:10 -0500
Subject: [PATCH 146/322] tracing: Fix hwlat kthread migration

The hwlat tracer creates a kernel thread at start of the tracer. It is
pinned to a single CPU and will move to the next CPU after each period of
running. If the user modifies the migration thread's affinity, it will not
change after that happens.

The original code created the thread at the first instance it was called,
but later was changed to destroy the thread after the tracer was finished,
and would not be created until the next instance of the tracer was
established. The code that initialized the affinity was only called on the
initial instantiation of the tracer. After that, it was not initialized, and
the previous affinity did not match the current newly created one, making
it appear that the user modified the thread's affinity when it did not, and
the thread failed to migrate again.

Cc: stable@vger.kernel.org
Fixes: 0330f7aa8ee6 ("tracing: Have hwlat trace migrate across tracing_cpumask CPUs")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_hwlat.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index 775569ec50d0..af344a1bf0d0 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -266,7 +266,7 @@ static int get_sample(void)
 static struct cpumask save_cpumask;
 static bool disable_migrate;
 
-static void move_to_next_cpu(void)
+static void move_to_next_cpu(bool initmask)
 {
 	static struct cpumask *current_mask;
 	int next_cpu;
@@ -275,7 +275,7 @@ static void move_to_next_cpu(void)
 		return;
 
 	/* Just pick the first CPU on first iteration */
-	if (!current_mask) {
+	if (initmask) {
 		current_mask = &save_cpumask;
 		get_online_cpus();
 		cpumask_and(current_mask, cpu_online_mask, tracing_buffer_mask);
@@ -330,10 +330,12 @@ static void move_to_next_cpu(void)
 static int kthread_fn(void *data)
 {
 	u64 interval;
+	bool initmask = true;
 
 	while (!kthread_should_stop()) {
 
-		move_to_next_cpu();
+		move_to_next_cpu(initmask);
+		initmask = false;
 
 		local_irq_disable();
 		get_sample();

From d07830db1bdb254e4b50d366010b219286b8c937 Mon Sep 17 00:00:00 2001
From: "Marcel J.E. Mol" <marcel@mesa.nl>
Date: Mon, 30 Jan 2017 19:26:40 +0100
Subject: [PATCH 147/322] USB: serial: pl2303: add ATEN device ID

Seems that ATEN serial-to-usb devices using pl2303 exist with
different device ids. This patch adds a missing device ID so it
is recognised by the driver.

Signed-off-by: Marcel J.E. Mol <marcel@mesa.nl>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/pl2303.c | 1 +
 drivers/usb/serial/pl2303.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 46fca6b75846..1db4b61bdf7b 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -49,6 +49,7 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID) },
 	{ USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID_RSAQ5) },
 	{ USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID) },
+	{ USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID2) },
 	{ USB_DEVICE(ATEN_VENDOR_ID2, ATEN_PRODUCT_ID) },
 	{ USB_DEVICE(ELCOM_VENDOR_ID, ELCOM_PRODUCT_ID) },
 	{ USB_DEVICE(ELCOM_VENDOR_ID, ELCOM_PRODUCT_ID_UCSGT) },
diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h
index e3b7af8adfb7..09d9be88209e 100644
--- a/drivers/usb/serial/pl2303.h
+++ b/drivers/usb/serial/pl2303.h
@@ -27,6 +27,7 @@
 #define ATEN_VENDOR_ID		0x0557
 #define ATEN_VENDOR_ID2		0x0547
 #define ATEN_PRODUCT_ID		0x2008
+#define ATEN_PRODUCT_ID2	0x2118
 
 #define IODATA_VENDOR_ID	0x04bb
 #define IODATA_PRODUCT_ID	0x0a03

From d19fb70dd68c4e960e2ac09b0b9c79dfdeefa726 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Wed, 18 Jan 2017 19:04:42 +0800
Subject: [PATCH 148/322] NFSD: Fix a null reference case in
 find_or_create_lock_stateid()

nfsd assigns the nfs4_free_lock_stateid to .sc_free in init_lock_stateid().

If nfsd doesn't go through init_lock_stateid() and put stateid at end,
there is a NULL reference to .sc_free when calling nfs4_put_stid(ns).

This patch let the nfs4_stid.sc_free assignment to nfs4_alloc_stid().

Cc: stable@vger.kernel.org
Fixes: 356a95ece7aa "nfsd: clean up races in lock stateid searching..."
Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4layouts.c |  5 +++--
 fs/nfsd/nfs4state.c   | 19 ++++++++-----------
 fs/nfsd/state.h       |  4 ++--
 3 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 596205d939a1..1fc07a9c70e9 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -223,10 +223,11 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
 	struct nfs4_layout_stateid *ls;
 	struct nfs4_stid *stp;
 
-	stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache);
+	stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache,
+					nfsd4_free_layout_stateid);
 	if (!stp)
 		return NULL;
-	stp->sc_free = nfsd4_free_layout_stateid;
+
 	get_nfs4_file(fp);
 	stp->sc_file = fp;
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 4b4beaaa4eaa..a0dee8ae9f97 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -633,8 +633,8 @@ find_or_hash_clnt_odstate(struct nfs4_file *fp, struct nfs4_clnt_odstate *new)
 	return co;
 }
 
-struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
-					 struct kmem_cache *slab)
+struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
+				  void (*sc_free)(struct nfs4_stid *))
 {
 	struct nfs4_stid *stid;
 	int new_id;
@@ -650,6 +650,8 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
 	idr_preload_end();
 	if (new_id < 0)
 		goto out_free;
+
+	stid->sc_free = sc_free;
 	stid->sc_client = cl;
 	stid->sc_stateid.si_opaque.so_id = new_id;
 	stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid;
@@ -675,15 +677,12 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
 static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
 {
 	struct nfs4_stid *stid;
-	struct nfs4_ol_stateid *stp;
 
-	stid = nfs4_alloc_stid(clp, stateid_slab);
+	stid = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_ol_stateid);
 	if (!stid)
 		return NULL;
 
-	stp = openlockstateid(stid);
-	stp->st_stid.sc_free = nfs4_free_ol_stateid;
-	return stp;
+	return openlockstateid(stid);
 }
 
 static void nfs4_free_deleg(struct nfs4_stid *stid)
@@ -781,11 +780,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh,
 		goto out_dec;
 	if (delegation_blocked(&current_fh->fh_handle))
 		goto out_dec;
-	dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab));
+	dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab, nfs4_free_deleg));
 	if (dp == NULL)
 		goto out_dec;
 
-	dp->dl_stid.sc_free = nfs4_free_deleg;
 	/*
 	 * delegation seqid's are never incremented.  The 4.1 special
 	 * meaning of seqid 0 isn't meaningful, really, but let's avoid
@@ -5580,7 +5578,6 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
 	stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner);
 	get_nfs4_file(fp);
 	stp->st_stid.sc_file = fp;
-	stp->st_stid.sc_free = nfs4_free_lock_stateid;
 	stp->st_access_bmap = 0;
 	stp->st_deny_bmap = open_stp->st_deny_bmap;
 	stp->st_openstp = open_stp;
@@ -5623,7 +5620,7 @@ find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi,
 	lst = find_lock_stateid(lo, fi);
 	if (lst == NULL) {
 		spin_unlock(&clp->cl_lock);
-		ns = nfs4_alloc_stid(clp, stateid_slab);
+		ns = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_lock_stateid);
 		if (ns == NULL)
 			return NULL;
 
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index c9399366f9df..4516e8b7d776 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -603,8 +603,8 @@ extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
 __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
 		     stateid_t *stateid, unsigned char typemask,
 		     struct nfs4_stid **s, struct nfsd_net *nn);
-struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
-		struct kmem_cache *slab);
+struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
+				  void (*sc_free)(struct nfs4_stid *));
 void nfs4_unhash_stid(struct nfs4_stid *s);
 void nfs4_put_stid(struct nfs4_stid *s);
 void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);

From 41f53350a0f36a7b8e31bec0d0ca907e028ab4cd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 24 Jan 2017 09:22:41 +0100
Subject: [PATCH 149/322] nfsd: special case truncates some more

Both the NFS protocols and the Linux VFS use a setattr operation with a
bitmap of attributs to set to set various file attributes including the
file size and the uid/gid.

The Linux syscalls never mixes size updates with unrelated updates like
the uid/gid, and some file systems like XFS and GFS2 rely on the fact
that truncates might not update random other attributes, and many other
file systems handle the case but do not update the different attributes
in the same transaction.  NFSD on the other hand passes the attributes
it gets on the wire more or less directly through to the VFS, leading to
updates the file systems don't expect.  XFS at least has an assert on
the allowed attributes, which caught an unusual NFS client setting the
size and group at the same time.

To handle this issue properly this switches nfsd to call vfs_truncate
for size changes, and then handle all other attributes through
notify_change.  As a side effect this also means less boilerplace code
around the size change as we can now reuse the VFS code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/vfs.c | 97 ++++++++++++++++++++-------------------------------
 1 file changed, 37 insertions(+), 60 deletions(-)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 26c6fdb4bf67..ca13236dbb1f 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -332,37 +332,6 @@ nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
 	}
 }
 
-static __be32
-nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp,
-		struct iattr *iap)
-{
-	struct inode *inode = d_inode(fhp->fh_dentry);
-	int host_err;
-
-	if (iap->ia_size < inode->i_size) {
-		__be32 err;
-
-		err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
-				NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE);
-		if (err)
-			return err;
-	}
-
-	host_err = get_write_access(inode);
-	if (host_err)
-		goto out_nfserrno;
-
-	host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
-	if (host_err)
-		goto out_put_write_access;
-	return 0;
-
-out_put_write_access:
-	put_write_access(inode);
-out_nfserrno:
-	return nfserrno(host_err);
-}
-
 /*
  * Set various file attributes.  After this call fhp needs an fh_put.
  */
@@ -377,7 +346,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 	__be32		err;
 	int		host_err;
 	bool		get_write_count;
-	int		size_change = 0;
 
 	if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
 		accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
@@ -390,11 +358,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 	/* Get inode */
 	err = fh_verify(rqstp, fhp, ftype, accmode);
 	if (err)
-		goto out;
+		return err;
 	if (get_write_count) {
 		host_err = fh_want_write(fhp);
 		if (host_err)
-			return nfserrno(host_err);
+			goto out_host_err;
 	}
 
 	dentry = fhp->fh_dentry;
@@ -405,50 +373,59 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 		iap->ia_valid &= ~ATTR_MODE;
 
 	if (!iap->ia_valid)
-		goto out;
+		return 0;
 
 	nfsd_sanitize_attrs(inode, iap);
 
+	if (check_guard && guardtime != inode->i_ctime.tv_sec)
+		return nfserr_notsync;
+
 	/*
 	 * The size case is special, it changes the file in addition to the
-	 * attributes.
+	 * attributes, and file systems don't expect it to be mixed with
+	 * "random" attribute changes.  We thus split out the size change
+	 * into a separate call for vfs_truncate, and do the rest as a
+	 * a separate setattr call.
 	 */
 	if (iap->ia_valid & ATTR_SIZE) {
-		err = nfsd_get_write_access(rqstp, fhp, iap);
-		if (err)
-			goto out;
-		size_change = 1;
+		struct path path = {
+			.mnt	= fhp->fh_export->ex_path.mnt,
+			.dentry	= dentry,
+		};
+		bool implicit_mtime = false;
 
 		/*
-		 * RFC5661, Section 18.30.4:
-		 *   Changing the size of a file with SETATTR indirectly
-		 *   changes the time_modify and change attributes.
-		 *
-		 * (and similar for the older RFCs)
+		 * vfs_truncate implicity updates the mtime IFF the file size
+		 * actually changes.  Avoid the additional seattr call below if
+		 * the only other attribute that the client sends is the mtime.
 		 */
-		if (iap->ia_size != i_size_read(inode))
-			iap->ia_valid |= ATTR_MTIME;
+		if (iap->ia_size != i_size_read(inode) &&
+		    ((iap->ia_valid & ~(ATTR_SIZE | ATTR_MTIME)) == 0))
+			implicit_mtime = true;
+
+		host_err = vfs_truncate(&path, iap->ia_size);
+		if (host_err)
+			goto out_host_err;
+
+		iap->ia_valid &= ~ATTR_SIZE;
+		if (implicit_mtime)
+			iap->ia_valid &= ~ATTR_MTIME;
+		if (!iap->ia_valid)
+			goto done;
 	}
 
 	iap->ia_valid |= ATTR_CTIME;
 
-	if (check_guard && guardtime != inode->i_ctime.tv_sec) {
-		err = nfserr_notsync;
-		goto out_put_write_access;
-	}
-
 	fh_lock(fhp);
 	host_err = notify_change(dentry, iap, NULL);
 	fh_unlock(fhp);
-	err = nfserrno(host_err);
+	if (host_err)
+		goto out_host_err;
 
-out_put_write_access:
-	if (size_change)
-		put_write_access(inode);
-	if (!err)
-		err = nfserrno(commit_metadata(fhp));
-out:
-	return err;
+done:
+	host_err = commit_metadata(fhp);
+out_host_err:
+	return nfserrno(host_err);
 }
 
 #if defined(CONFIG_NFSD_V4)

From 034dd34ff4916ec1f8f74e39ca3efb04eab2f791 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 31 Jan 2017 11:37:50 -0500
Subject: [PATCH 150/322] svcrpc: fix oops in absence of krb5 module

Olga Kornievskaia says: "I ran into this oops in the nfsd (below)
(4.10-rc3 kernel). To trigger this I had a client (unsuccessfully) try
to mount the server with krb5 where the server doesn't have the
rpcsec_gss_krb5 module built."

The problem is that rsci.cred is copied from a svc_cred structure that
gss_proxy didn't properly initialize.  Fix that.

[120408.542387] general protection fault: 0000 [#1] SMP
...
[120408.565724] CPU: 0 PID: 3601 Comm: nfsd Not tainted 4.10.0-rc3+ #16
[120408.567037] Hardware name: VMware, Inc. VMware Virtual =
Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015
[120408.569225] task: ffff8800776f95c0 task.stack: ffffc90003d58000
[120408.570483] RIP: 0010:gss_mech_put+0xb/0x20 [auth_rpcgss]
...
[120408.584946]  ? rsc_free+0x55/0x90 [auth_rpcgss]
[120408.585901]  gss_proxy_save_rsc+0xb2/0x2a0 [auth_rpcgss]
[120408.587017]  svcauth_gss_proxy_init+0x3cc/0x520 [auth_rpcgss]
[120408.588257]  ? __enqueue_entity+0x6c/0x70
[120408.589101]  svcauth_gss_accept+0x391/0xb90 [auth_rpcgss]
[120408.590212]  ? try_to_wake_up+0x4a/0x360
[120408.591036]  ? wake_up_process+0x15/0x20
[120408.592093]  ? svc_xprt_do_enqueue+0x12e/0x2d0 [sunrpc]
[120408.593177]  svc_authenticate+0xe1/0x100 [sunrpc]
[120408.594168]  svc_process_common+0x203/0x710 [sunrpc]
[120408.595220]  svc_process+0x105/0x1c0 [sunrpc]
[120408.596278]  nfsd+0xe9/0x160 [nfsd]
[120408.597060]  kthread+0x101/0x140
[120408.597734]  ? nfsd_destroy+0x60/0x60 [nfsd]
[120408.598626]  ? kthread_park+0x90/0x90
[120408.599448]  ret_from_fork+0x22/0x30

Fixes: 1d658336b05f "SUNRPC: Add RPC based upcall mechanism for RPCGSS auth"
Cc: stable@vger.kernel.org
Cc: Simo Sorce <simo@redhat.com>
Reported-by: Olga Kornievskaia <kolga@netapp.com>
Tested-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/auth_gss/gss_rpc_xdr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index dc6fb79a361f..25d9a9cf7b66 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -260,7 +260,7 @@ static int gssx_dec_option_array(struct xdr_stream *xdr,
 	if (!oa->data)
 		return -ENOMEM;
 
-	creds = kmalloc(sizeof(struct svc_cred), GFP_KERNEL);
+	creds = kzalloc(sizeof(struct svc_cred), GFP_KERNEL);
 	if (!creds) {
 		kfree(oa->data);
 		return -ENOMEM;

From c73e44269369e936165f0f9b61f1f09a11dae01c Mon Sep 17 00:00:00 2001
From: Vincent <vincent.stehle@laposte.net>
Date: Mon, 30 Jan 2017 15:06:43 +0100
Subject: [PATCH 151/322] net: thunderx: avoid dereferencing xcv when NULL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes the following smatch and coccinelle warnings:

  drivers/net/ethernet/cavium/thunder/thunder_xcv.c:119 xcv_setup_link() error: we previously assumed 'xcv' could be null (see line 118) [smatch]
  drivers/net/ethernet/cavium/thunder/thunder_xcv.c:119:16-20: ERROR: xcv is NULL but dereferenced. [coccinelle]

Fixes: 6465859aba1e66a5 ("net: thunderx: Add RGMII interface type support")
Signed-off-by: Vincent Stehlé <vincent.stehle@laposte.net>
Cc: Sunil Goutham <sgoutham@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/thunder_xcv.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_xcv.c b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c
index 67befedef709..578c7f8f11bf 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_xcv.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c
@@ -116,8 +116,7 @@ void xcv_setup_link(bool link_up, int link_speed)
 	int speed = 2;
 
 	if (!xcv) {
-		dev_err(&xcv->pdev->dev,
-			"XCV init not done, probe may have failed\n");
+		pr_err("XCV init not done, probe may have failed\n");
 		return;
 	}
 

From 90427ef5d2a4b9a24079889bf16afdcdaebc4240 Mon Sep 17 00:00:00 2001
From: Dimitris Michailidis <dmichail@google.com>
Date: Mon, 30 Jan 2017 14:09:42 -0800
Subject: [PATCH 152/322] ipv6: fix flow labels when the traffic class is non-0

ip6_make_flowlabel() determines the flow label for IPv6 packets. It's
supposed to be passed a flow label, which it returns as is if non-0 and
in some other cases, otherwise it calculates a new value.

The problem is callers often pass a flowi6.flowlabel, which may also
contain traffic class bits. If the traffic class is non-0
ip6_make_flowlabel() mistakes the non-0 it gets as a flow label and
returns the whole thing. Thus it can return a 'flow label' longer than
20b and the low 20b of that is typically 0 resulting in packets with 0
label. Moreover, different packets of a flow may be labeled differently.
For a TCP flow with ECN non-payload and payload packets get different
labels as exemplified by this pair of consecutive packets:

(pure ACK)
Internet Protocol Version 6, Src: 2002:af5:11a3::, Dst: 2002:af5:11a2::
    0110 .... = Version: 6
    .... 0000 0000 .... .... .... .... .... = Traffic Class: 0x00 (DSCP: CS0, ECN: Not-ECT)
        .... 0000 00.. .... .... .... .... .... = Differentiated Services Codepoint: Default (0)
        .... .... ..00 .... .... .... .... .... = Explicit Congestion Notification: Not ECN-Capable Transport (0)
    .... .... .... 0001 1100 1110 0100 1001 = Flow Label: 0x1ce49
    Payload Length: 32
    Next Header: TCP (6)

(payload)
Internet Protocol Version 6, Src: 2002:af5:11a3::, Dst: 2002:af5:11a2::
    0110 .... = Version: 6
    .... 0000 0010 .... .... .... .... .... = Traffic Class: 0x02 (DSCP: CS0, ECN: ECT(0))
        .... 0000 00.. .... .... .... .... .... = Differentiated Services Codepoint: Default (0)
        .... .... ..10 .... .... .... .... .... = Explicit Congestion Notification: ECN-Capable Transport codepoint '10' (2)
    .... .... .... 0000 0000 0000 0000 0000 = Flow Label: 0x00000
    Payload Length: 688
    Next Header: TCP (6)

This patch allows ip6_make_flowlabel() to be passed more than just a
flow label and has it extract the part it really wants. This was simpler
than modifying the callers. With this patch packets like the above become

Internet Protocol Version 6, Src: 2002:af5:11a3::, Dst: 2002:af5:11a2::
    0110 .... = Version: 6
    .... 0000 0000 .... .... .... .... .... = Traffic Class: 0x00 (DSCP: CS0, ECN: Not-ECT)
        .... 0000 00.. .... .... .... .... .... = Differentiated Services Codepoint: Default (0)
        .... .... ..00 .... .... .... .... .... = Explicit Congestion Notification: Not ECN-Capable Transport (0)
    .... .... .... 1010 1111 1010 0101 1110 = Flow Label: 0xafa5e
    Payload Length: 32
    Next Header: TCP (6)

Internet Protocol Version 6, Src: 2002:af5:11a3::, Dst: 2002:af5:11a2::
    0110 .... = Version: 6
    .... 0000 0010 .... .... .... .... .... = Traffic Class: 0x02 (DSCP: CS0, ECN: ECT(0))
        .... 0000 00.. .... .... .... .... .... = Differentiated Services Codepoint: Default (0)
        .... .... ..10 .... .... .... .... .... = Explicit Congestion Notification: ECN-Capable Transport codepoint '10' (2)
    .... .... .... 1010 1111 1010 0101 1110 = Flow Label: 0xafa5e
    Payload Length: 688
    Next Header: TCP (6)

Signed-off-by: Dimitris Michailidis <dmichail@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 7afe991e900e..dbf0abba33b8 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -776,6 +776,11 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
 {
 	u32 hash;
 
+	/* @flowlabel may include more than a flow label, eg, the traffic class.
+	 * Here we want only the flow label value.
+	 */
+	flowlabel &= IPV6_FLOWLABEL_MASK;
+
 	if (flowlabel ||
 	    net->ipv6.sysctl.auto_flowlabels == IP6_AUTO_FLOW_LABEL_OFF ||
 	    (!autolabel &&

From 62deb8187d116581c88c69a2dd9b5c16588545d4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 18 Jan 2017 14:29:17 +0000
Subject: [PATCH 153/322] FS-Cache: Initialise stores_lock in netfs cookie

Initialise the stores_lock in fscache netfs cookies.  Technically, it
shouldn't be necessary, since the netfs cookie is an index and stores no
data, but initialising it anyway adds insignificant overhead.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fscache/netfs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c
index 9b28649df3a1..a8aa00be4444 100644
--- a/fs/fscache/netfs.c
+++ b/fs/fscache/netfs.c
@@ -48,6 +48,7 @@ int __fscache_register_netfs(struct fscache_netfs *netfs)
 	cookie->flags		= 1 << FSCACHE_COOKIE_ENABLED;
 
 	spin_lock_init(&cookie->lock);
+	spin_lock_init(&cookie->stores_lock);
 	INIT_HLIST_HEAD(&cookie->backing_objects);
 
 	/* check the netfs type is not already present */

From 6bdded59c8933940ac7e5b416448276ac89d1144 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 18 Jan 2017 14:29:25 +0000
Subject: [PATCH 154/322] fscache: Clear outstanding writes when disabling a
 cookie

fscache_disable_cookie() needs to clear the outstanding writes on the
cookie it's disabling because they cannot be completed after.

Without this, fscache_nfs_open_file() gets stuck because it disables the
cookie when the file is opened for writing but can't uncache the pages till
afterwards - otherwise there's a race between the open routine and anyone
who already has it open R/O and is still reading from it.

Looking in /proc/pid/stack of the offending process shows:

[<ffffffffa0142883>] __fscache_wait_on_page_write+0x82/0x9b [fscache]
[<ffffffffa014336e>] __fscache_uncache_all_inode_pages+0x91/0xe1 [fscache]
[<ffffffffa01740fa>] nfs_fscache_open_file+0x59/0x9e [nfs]
[<ffffffffa01ccf41>] nfs4_file_open+0x17f/0x1b8 [nfsv4]
[<ffffffff8117350e>] do_dentry_open+0x16d/0x2b7
[<ffffffff811743ac>] vfs_open+0x5c/0x65
[<ffffffff81184185>] path_openat+0x785/0x8fb
[<ffffffff81184343>] do_filp_open+0x48/0x9e
[<ffffffff81174710>] do_sys_open+0x13b/0x1cb
[<ffffffff811747b9>] SyS_open+0x19/0x1b
[<ffffffff81001c44>] do_syscall_64+0x80/0x17a
[<ffffffff8165c2da>] return_from_SYSCALL_64+0x0/0x7a
[<ffffffffffffffff>] 0xffffffffffffffff

Reported-by: Jianhong Yin <jiyin@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fscache/cookie.c | 5 +++++
 fs/fscache/object.c | 6 ++++++
 2 files changed, 11 insertions(+)

diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 4304072161aa..40d61077bead 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -542,6 +542,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
 		hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) {
 			if (invalidate)
 				set_bit(FSCACHE_OBJECT_RETIRED, &object->flags);
+			clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
 			fscache_raise_event(object, FSCACHE_OBJECT_EV_KILL);
 		}
 	} else {
@@ -560,6 +561,10 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
 		wait_on_atomic_t(&cookie->n_active, fscache_wait_atomic_t,
 				 TASK_UNINTERRUPTIBLE);
 
+	/* Make sure any pending writes are cancelled. */
+	if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX)
+		fscache_invalidate_writes(cookie);
+
 	/* Reset the cookie state if it wasn't relinquished */
 	if (!test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags)) {
 		atomic_inc(&cookie->n_active);
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 9e792e30f4db..be02a086ed9b 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -645,6 +645,12 @@ static const struct fscache_state *fscache_kill_object(struct fscache_object *ob
 	fscache_mark_object_dead(object);
 	object->oob_event_mask = 0;
 
+	if (test_bit(FSCACHE_OBJECT_RETIRED, &object->flags)) {
+		/* Reject any new read/write ops and abort any that are pending. */
+		clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
+		fscache_cancel_all_ops(object);
+	}
+
 	if (list_empty(&object->dependents) &&
 	    object->n_ops == 0 &&
 	    object->n_children == 0)

From e26bfebdfc0d212d366de9990a096665d5c0209a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 31 Jan 2017 09:45:28 +0000
Subject: [PATCH 155/322] fscache: Fix dead object requeue

Under some circumstances, an fscache object can become queued such that it
fscache_object_work_func() can be called once the object is in the
OBJECT_DEAD state.  This results in the kernel oopsing when it tries to
invoke the handler for the state (which is hard coded to 0x2).

The way this comes about is something like the following:

 (1) The object dispatcher is processing a work state for an object.  This
     is done in workqueue context.

 (2) An out-of-band event comes in that isn't masked, causing the object to
     be queued, say EV_KILL.

 (3) The object dispatcher finishes processing the current work state on
     that object and then sees there's another event to process, so,
     without returning to the workqueue core, it processes that event too.
     It then follows the chain of events that initiates until we reach
     OBJECT_DEAD without going through a wait state (such as
     WAIT_FOR_CLEARANCE).

     At this point, object->events may be 0, object->event_mask will be 0
     and oob_event_mask will be 0.

 (4) The object dispatcher returns to the workqueue processor, and in due
     course, this sees that the object's work item is still queued and
     invokes it again.

 (5) The current state is a work state (OBJECT_DEAD), so the dispatcher
     jumps to it - resulting in an OOPS.

When I'm seeing this, the work state in (1) appears to have been either
LOOK_UP_OBJECT or CREATE_OBJECT (object->oob_table is
fscache_osm_lookup_oob).

The window for (2) is very small:

 (A) object->event_mask is cleared whilst the event dispatch process is
     underway - though there's no memory barrier to force this to the top
     of the function.

     The window, therefore is from the time the object was selected by the
     workqueue processor and made requeueable to the time the mask was
     cleared.

 (B) fscache_raise_event() will only queue the object if it manages to set
     the event bit and the corresponding event_mask bit was set.

     The enqueuement is then deferred slightly whilst we get a ref on the
     object and get the per-CPU variable for workqueue congestion.  This
     slight deferral slightly increases the probability by allowing extra
     time for the workqueue to make the item requeueable.

Handle this by giving the dead state a processor function and checking the
for the dead state address rather than seeing if the processor function is
address 0x2.  The dead state processor function can then set a flag to
indicate that it's occurred and give a warning if it occurs more than once
per object.

If this race occurs, an oops similar to the following is seen (note the RIP
value):

BUG: unable to handle kernel NULL pointer dereference at 0000000000000002
IP: [<0000000000000002>] 0x1
PGD 0
Oops: 0010 [#1] SMP
Modules linked in: ...
CPU: 17 PID: 16077 Comm: kworker/u48:9 Not tainted 3.10.0-327.18.2.el7.x86_64 #1
Hardware name: HP ProLiant DL380 Gen9/ProLiant DL380 Gen9, BIOS P89 12/27/2015
Workqueue: fscache_object fscache_object_work_func [fscache]
task: ffff880302b63980 ti: ffff880717544000 task.ti: ffff880717544000
RIP: 0010:[<0000000000000002>]  [<0000000000000002>] 0x1
RSP: 0018:ffff880717547df8  EFLAGS: 00010202
RAX: ffffffffa0368640 RBX: ffff880edf7a4480 RCX: dead000000200200
RDX: 0000000000000002 RSI: 00000000ffffffff RDI: ffff880edf7a4480
RBP: ffff880717547e18 R08: 0000000000000000 R09: dfc40a25cb3a4510
R10: dfc40a25cb3a4510 R11: 0000000000000400 R12: 0000000000000000
R13: ffff880edf7a4510 R14: ffff8817f6153400 R15: 0000000000000600
FS:  0000000000000000(0000) GS:ffff88181f420000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000002 CR3: 000000000194a000 CR4: 00000000001407e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Stack:
 ffffffffa0363695 ffff880edf7a4510 ffff88093f16f900 ffff8817faa4ec00
 ffff880717547e60 ffffffff8109d5db 00000000faa4ec18 0000000000000000
 ffff8817faa4ec18 ffff88093f16f930 ffff880302b63980 ffff88093f16f900
Call Trace:
 [<ffffffffa0363695>] ? fscache_object_work_func+0xa5/0x200 [fscache]
 [<ffffffff8109d5db>] process_one_work+0x17b/0x470
 [<ffffffff8109e4ac>] worker_thread+0x21c/0x400
 [<ffffffff8109e290>] ? rescuer_thread+0x400/0x400
 [<ffffffff810a5acf>] kthread+0xcf/0xe0
 [<ffffffff810a5a00>] ? kthread_create_on_node+0x140/0x140
 [<ffffffff816460d8>] ret_from_fork+0x58/0x90
 [<ffffffff810a5a00>] ? kthread_create_on_node+0x140/0x140

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Jeremy McNicoll <jeremymc@redhat.com>
Tested-by: Frank Sorenson <sorenson@redhat.com>
Tested-by: Benjamin Coddington <bcodding@redhat.com>
Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fscache/object.c           | 26 ++++++++++++++++++++++++--
 include/linux/fscache-cache.h |  1 +
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index be02a086ed9b..7a182c87f378 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -30,6 +30,7 @@ static const struct fscache_state *fscache_look_up_object(struct fscache_object
 static const struct fscache_state *fscache_object_available(struct fscache_object *, int);
 static const struct fscache_state *fscache_parent_ready(struct fscache_object *, int);
 static const struct fscache_state *fscache_update_object(struct fscache_object *, int);
+static const struct fscache_state *fscache_object_dead(struct fscache_object *, int);
 
 #define __STATE_NAME(n) fscache_osm_##n
 #define STATE(n) (&__STATE_NAME(n))
@@ -91,7 +92,7 @@ static WORK_STATE(LOOKUP_FAILURE,	"LCFL", fscache_lookup_failure);
 static WORK_STATE(KILL_OBJECT,		"KILL", fscache_kill_object);
 static WORK_STATE(KILL_DEPENDENTS,	"KDEP", fscache_kill_dependents);
 static WORK_STATE(DROP_OBJECT,		"DROP", fscache_drop_object);
-static WORK_STATE(OBJECT_DEAD,		"DEAD", (void*)2UL);
+static WORK_STATE(OBJECT_DEAD,		"DEAD", fscache_object_dead);
 
 static WAIT_STATE(WAIT_FOR_INIT,	"?INI",
 		  TRANSIT_TO(INIT_OBJECT,	1 << FSCACHE_OBJECT_EV_NEW_CHILD));
@@ -229,6 +230,10 @@ static void fscache_object_sm_dispatcher(struct fscache_object *object)
 	event = -1;
 	if (new_state == NO_TRANSIT) {
 		_debug("{OBJ%x} %s notrans", object->debug_id, state->name);
+		if (unlikely(state == STATE(OBJECT_DEAD))) {
+			_leave(" [dead]");
+			return;
+		}
 		fscache_enqueue_object(object);
 		event_mask = object->oob_event_mask;
 		goto unmask_events;
@@ -239,7 +244,7 @@ static void fscache_object_sm_dispatcher(struct fscache_object *object)
 	object->state = state = new_state;
 
 	if (state->work) {
-		if (unlikely(state->work == ((void *)2UL))) {
+		if (unlikely(state == STATE(OBJECT_DEAD))) {
 			_leave(" [dead]");
 			return;
 		}
@@ -1083,3 +1088,20 @@ void fscache_object_mark_killed(struct fscache_object *object,
 	}
 }
 EXPORT_SYMBOL(fscache_object_mark_killed);
+
+/*
+ * The object is dead.  We can get here if an object gets queued by an event
+ * that would lead to its death (such as EV_KILL) when the dispatcher is
+ * already running (and so can be requeued) but hasn't yet cleared the event
+ * mask.
+ */
+static const struct fscache_state *fscache_object_dead(struct fscache_object *object,
+						       int event)
+{
+	if (!test_and_set_bit(FSCACHE_OBJECT_RUN_AFTER_DEAD,
+			      &object->flags))
+		return NO_TRANSIT;
+
+	WARN(true, "FS-Cache object redispatched after death");
+	return NO_TRANSIT;
+}
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 13ba552e6c09..4c467ef50159 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -360,6 +360,7 @@ struct fscache_object {
 #define FSCACHE_OBJECT_IS_AVAILABLE	5	/* T if object has become active */
 #define FSCACHE_OBJECT_RETIRED		6	/* T if object was retired on relinquishment */
 #define FSCACHE_OBJECT_KILLED_BY_CACHE	7	/* T if object was killed by the cache */
+#define FSCACHE_OBJECT_RUN_AFTER_DEAD	8	/* T if object has been dispatched after death */
 
 	struct list_head	cache_link;	/* link in cache->object_list */
 	struct hlist_node	cookie_link;	/* link in cookie->backing_objects */

From aaaec6fc755447a1d056765b11b24d8ff2b81366 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 31 Jan 2017 19:03:21 +0100
Subject: [PATCH 156/322] x86/irq: Make irq activate operations symmetric

The recent commit which prevents double activation of interrupts unearthed
interesting code in x86. The code (ab)uses irq_domain_activate_irq() to
reconfigure an already activated interrupt. That trips over the prevention
code now.

Fix it by deactivating the interrupt before activating the new configuration.

Fixes: 08d85f3ea99f1 "irqdomain: Avoid activating interrupts more than once"
Reported-and-tested-by: Mike Galbraith <efault@gmx.de>
Reported-and-tested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1701311901580.3457@nanos
---
 arch/x86/kernel/apic/io_apic.c | 2 ++
 arch/x86/kernel/hpet.c         | 1 +
 2 files changed, 3 insertions(+)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 1e35dd06b090..52f352b063fd 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2117,6 +2117,7 @@ static inline void __init check_timer(void)
 			if (idx != -1 && irq_trigger(idx))
 				unmask_ioapic_irq(irq_get_chip_data(0));
 		}
+		irq_domain_deactivate_irq(irq_data);
 		irq_domain_activate_irq(irq_data);
 		if (timer_irq_works()) {
 			if (disable_timer_pin_1 > 0)
@@ -2138,6 +2139,7 @@ static inline void __init check_timer(void)
 		 * legacy devices should be connected to IO APIC #0
 		 */
 		replace_pin_at_irq_node(data, node, apic1, pin1, apic2, pin2);
+		irq_domain_deactivate_irq(irq_data);
 		irq_domain_activate_irq(irq_data);
 		legacy_pic->unmask(0);
 		if (timer_irq_works()) {
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 85e87b46c318..dc6ba5bda9fc 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -352,6 +352,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer)
 	} else {
 		struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
 
+		irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq));
 		irq_domain_activate_irq(irq_get_irq_data(hdev->irq));
 		disable_irq(hdev->irq);
 		irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));

From 0becc0ae5b42828785b589f686725ff5bc3b9b25 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 31 Jan 2017 09:37:34 +0100
Subject: [PATCH 157/322] x86/mce: Make timer handling more robust

Erik reported that on a preproduction hardware a CMCI storm triggers the
BUG_ON in add_timer_on(). The reason is that the per CPU MCE timer is
started by the CMCI logic before the MCE CPU hotplug callback starts the
timer with add_timer_on(). So the timer is already queued which triggers
the BUG.

Using add_timer_on() is pretty pointless in this code because the timer is
strictlty per CPU, initialized as pinned and all operations which arm the
timer happen on the CPU to which the timer belongs.

Simplify the whole machinery by using mod_timer() instead of add_timer_on()
which avoids the problem because mod_timer() can handle already queued
timers. Use __start_timer() everywhere so the earliest armed expiry time is
preserved.

Reported-by: Erik Veijola <erik.veijola@intel.com>
Tested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@alien8.de>
Cc: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1701310936080.3457@nanos
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/mcheck/mce.c | 31 ++++++++++++-------------------
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 00ef43233e03..537c6647d84c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1373,20 +1373,15 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
 
 static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
 
-static void __restart_timer(struct timer_list *t, unsigned long interval)
+static void __start_timer(struct timer_list *t, unsigned long interval)
 {
 	unsigned long when = jiffies + interval;
 	unsigned long flags;
 
 	local_irq_save(flags);
 
-	if (timer_pending(t)) {
-		if (time_before(when, t->expires))
-			mod_timer(t, when);
-	} else {
-		t->expires = round_jiffies(when);
-		add_timer_on(t, smp_processor_id());
-	}
+	if (!timer_pending(t) || time_before(when, t->expires))
+		mod_timer(t, round_jiffies(when));
 
 	local_irq_restore(flags);
 }
@@ -1421,7 +1416,7 @@ static void mce_timer_fn(unsigned long data)
 
 done:
 	__this_cpu_write(mce_next_interval, iv);
-	__restart_timer(t, iv);
+	__start_timer(t, iv);
 }
 
 /*
@@ -1432,7 +1427,7 @@ void mce_timer_kick(unsigned long interval)
 	struct timer_list *t = this_cpu_ptr(&mce_timer);
 	unsigned long iv = __this_cpu_read(mce_next_interval);
 
-	__restart_timer(t, interval);
+	__start_timer(t, interval);
 
 	if (interval < iv)
 		__this_cpu_write(mce_next_interval, interval);
@@ -1779,17 +1774,15 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
 	}
 }
 
-static void mce_start_timer(unsigned int cpu, struct timer_list *t)
+static void mce_start_timer(struct timer_list *t)
 {
 	unsigned long iv = check_interval * HZ;
 
 	if (mca_cfg.ignore_ce || !iv)
 		return;
 
-	per_cpu(mce_next_interval, cpu) = iv;
-
-	t->expires = round_jiffies(jiffies + iv);
-	add_timer_on(t, cpu);
+	this_cpu_write(mce_next_interval, iv);
+	__start_timer(t, iv);
 }
 
 static void __mcheck_cpu_setup_timer(void)
@@ -1806,7 +1799,7 @@ static void __mcheck_cpu_init_timer(void)
 	unsigned int cpu = smp_processor_id();
 
 	setup_pinned_timer(t, mce_timer_fn, cpu);
-	mce_start_timer(cpu, t);
+	mce_start_timer(t);
 }
 
 /* Handle unconfigured int18 (should never happen) */
@@ -2566,7 +2559,7 @@ static int mce_cpu_dead(unsigned int cpu)
 
 static int mce_cpu_online(unsigned int cpu)
 {
-	struct timer_list *t = &per_cpu(mce_timer, cpu);
+	struct timer_list *t = this_cpu_ptr(&mce_timer);
 	int ret;
 
 	mce_device_create(cpu);
@@ -2577,13 +2570,13 @@ static int mce_cpu_online(unsigned int cpu)
 		return ret;
 	}
 	mce_reenable_cpu();
-	mce_start_timer(cpu, t);
+	mce_start_timer(t);
 	return 0;
 }
 
 static int mce_cpu_pre_down(unsigned int cpu)
 {
-	struct timer_list *t = &per_cpu(mce_timer, cpu);
+	struct timer_list *t = this_cpu_ptr(&mce_timer);
 
 	mce_disable_cpu();
 	del_timer_sync(t);

From 970d14e3989160ee9e97c7d75ecbc893fd29dab9 Mon Sep 17 00:00:00 2001
From: Bhumika Goyal <bhumirks@gmail.com>
Date: Wed, 25 Jan 2017 00:54:07 +0530
Subject: [PATCH 158/322] nvdimm: constify device_type structures

Declare device_type structure as const as it is only stored in the
type field of a device structure. This field is of type const, so add
const to declaration of device_type structure.

File size before:
  text	   data	    bss	    dec	    hex	filename
  19278	   3199	     16	  22493	   57dd	nvdimm/namespace_devs.o

File size after:
  text	   data	    bss	    dec	    hex	filename
  19929	   3160	     16	  23105	   5a41	nvdimm/namespace_devs.o

Signed-off-by: Bhumika Goyal <bhumirks@gmail.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/namespace_devs.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index a518cb1b59d4..eda027db0918 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -52,17 +52,17 @@ static void namespace_blk_release(struct device *dev)
 	kfree(nsblk);
 }
 
-static struct device_type namespace_io_device_type = {
+static const struct device_type namespace_io_device_type = {
 	.name = "nd_namespace_io",
 	.release = namespace_io_release,
 };
 
-static struct device_type namespace_pmem_device_type = {
+static const struct device_type namespace_pmem_device_type = {
 	.name = "nd_namespace_pmem",
 	.release = namespace_pmem_release,
 };
 
-static struct device_type namespace_blk_device_type = {
+static const struct device_type namespace_blk_device_type = {
 	.name = "nd_namespace_blk",
 	.release = namespace_blk_release,
 };

From 9d032f4201d39e5cf43a8709a047e481f5723fdc Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 25 Jan 2017 00:54:07 +0530
Subject: [PATCH 159/322] libnvdimm, namespace: do not delete namespace-id 0

Given that the naming of pmem devices changes from the pmemX form to the
pmemX.Y form when namespace id is greater than 0, arrange for namespaces
with id-0 to be exempt from deletion. Otherwise a simple reconfiguration
of an existing namespace to a new mode results in a name change of the
resulting block device:

    # ndctl list --namespace=namespace1.0
    {
      "dev":"namespace1.0",
      "mode":"raw",
      "size":2147483648,
      "uuid":"3dadf3dc-89b9-4b24-b20e-abc8a4707ce3",
      "blockdev":"pmem1"
    }

    # ndctl create-namespace --reconfig=namespace1.0 --mode=memory --force
    {
      "dev":"namespace1.1",
      "mode":"memory",
      "size":2111832064,
      "uuid":"7b4a6341-7318-4219-a02c-fb57c0bbf613",
      "blockdev":"pmem1.1"
    }

This change does require tooling changes to explicitly look for
namespaceX.0 if the seed has already advanced to another namespace.

Cc: <stable@vger.kernel.org>
Fixes: 98a29c39dc68 ("libnvdimm, namespace: allow creation of multiple pmem-namespaces per region")
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/namespace_devs.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index eda027db0918..ce3e8dfa10ad 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -962,8 +962,8 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
 	struct nvdimm_drvdata *ndd;
 	struct nd_label_id label_id;
 	u32 flags = 0, remainder;
+	int rc, i, id = -1;
 	u8 *uuid = NULL;
-	int rc, i;
 
 	if (dev->driver || ndns->claim)
 		return -EBUSY;
@@ -972,11 +972,13 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
 		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
 
 		uuid = nspm->uuid;
+		id = nspm->id;
 	} else if (is_namespace_blk(dev)) {
 		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
 
 		uuid = nsblk->uuid;
 		flags = NSLABEL_FLAG_LOCAL;
+		id = nsblk->id;
 	}
 
 	/*
@@ -1039,10 +1041,11 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
 
 	/*
 	 * Try to delete the namespace if we deleted all of its
-	 * allocation, this is not the seed device for the region, and
-	 * it is not actively claimed by a btt instance.
+	 * allocation, this is not the seed or 0th device for the
+	 * region, and it is not actively claimed by a btt, pfn, or dax
+	 * instance.
 	 */
-	if (val == 0 && nd_region->ns_seed != dev && !ndns->claim)
+	if (val == 0 && id != 0 && nd_region->ns_seed != dev && !ndns->claim)
 		nd_device_unregister(dev, ND_ASYNC);
 
 	return rc;

From 4b3e6f2ef3722f1a6a97b6034ed492c1a21fd4ae Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Tue, 31 Jan 2017 18:35:37 -0800
Subject: [PATCH 160/322] xtensa: fix noMMU build on cores with MMU

Commit bf15f86b343ed8 ("xtensa: initialize MMU before jumping to reset
vector") calls MMU management functions even when CONFIG_MMU is not
selected. That breaks noMMU build on cores with MMU.

Don't manage MMU when CONFIG_MMU is not selected.

Cc: stable@vger.kernel.org
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 88a044af7504..32cdc2c52e98 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -540,7 +540,7 @@ subsys_initcall(topology_init);
 
 void cpu_reset(void)
 {
-#if XCHAL_HAVE_PTP_MMU
+#if XCHAL_HAVE_PTP_MMU && IS_ENABLED(CONFIG_MMU)
 	local_irq_disable();
 	/*
 	 * We have full MMU: all autoload ways, ways 7, 8 and 9 of DTLB must

From 2780f3c8f0233de90b6b47a23fc422b7780c5436 Mon Sep 17 00:00:00 2001
From: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Date: Wed, 25 Jan 2017 22:07:06 -0200
Subject: [PATCH 161/322] scsi: qla2xxx: Avoid that issuing a LIP triggers a
 kernel crash

Avoid that issuing a LIP as follows:

  find /sys -name 'issue_lip'|while read f; do echo 1 > $f; done

triggers the following:

BUG: unable to handle kernel NULL pointer dereference at (null)
Call Trace:
 qla2x00_abort_all_cmds+0xed/0x140 [qla2xxx]
 qla2x00_abort_isp_cleanup+0x1e3/0x280 [qla2xxx]
 qla2x00_abort_isp+0xef/0x690 [qla2xxx]
 qla2x00_do_dpc+0x36c/0x880 [qla2xxx]
 kthread+0x10c/0x140

[mkp: consolidated Mauricio's and Bart's fixes]

Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Reported-by: Bart Van Assche <bart.vanassche@sandisk.com>
Fixes: 1535aa75a3d8 ("qla2xxx: fix invalid DMA access after command aborts in PCI device remove")
Cc: Himanshu Madhani <himanshu.madhani@cavium.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_os.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index ad4edc13ebcf..1cd3c48a6350 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1616,7 +1616,7 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res)
 				/* Don't abort commands in adapter during EEH
 				 * recovery as it's not accessible/responding.
 				 */
-				if (!ha->flags.eeh_busy) {
+				if (GET_CMD_SP(sp) && !ha->flags.eeh_busy) {
 					/* Get a reference to the sp and drop the lock.
 					 * The reference ensures this sp->done() call
 					 * - and not the call in qla2xxx_eh_abort() -

From f2e767bb5d6ee0d988cb7d4e54b0b21175802b6b Mon Sep 17 00:00:00 2001
From: Ram Pai <linuxram@us.ibm.com>
Date: Thu, 26 Jan 2017 16:37:01 -0200
Subject: [PATCH 162/322] scsi: mpt3sas: Force request partial completion
 alignment

The firmware or device, possibly under a heavy I/O load, can return on a
partial unaligned boundary. Scsi-ml expects these requests to be
completed on an alignment boundary. Scsi-ml blindly requeues the I/O
without checking the alignment boundary of the I/O request for the
remaining bytes. This leads to errors, since devices cannot perform
non-aligned read/write operations.

This patch fixes the issue in the driver. It aligns unaligned
completions of FS requests, by truncating them to the nearest alignment
boundary.

[mkp: simplified if statement]

Reported-by: Mauricio Faria De Oliveira <mauricfo@linux.vnet.ibm.com>
Signed-off-by: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
Acked-by: Sreekanth Reddy <Sreekanth.Reddy@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mpt3sas/mpt3sas_scsih.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 75f3fce1c867..268103182d34 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -4657,6 +4657,7 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
 	struct MPT3SAS_DEVICE *sas_device_priv_data;
 	u32 response_code = 0;
 	unsigned long flags;
+	unsigned int sector_sz;
 
 	mpi_reply = mpt3sas_base_get_reply_virt_addr(ioc, reply);
 	scmd = _scsih_scsi_lookup_get_clear(ioc, smid);
@@ -4715,6 +4716,20 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
 	}
 
 	xfer_cnt = le32_to_cpu(mpi_reply->TransferCount);
+
+	/* In case of bogus fw or device, we could end up having
+	 * unaligned partial completion. We can force alignment here,
+	 * then scsi-ml does not need to handle this misbehavior.
+	 */
+	sector_sz = scmd->device->sector_size;
+	if (unlikely(scmd->request->cmd_type == REQ_TYPE_FS && sector_sz &&
+		     xfer_cnt % sector_sz)) {
+		sdev_printk(KERN_INFO, scmd->device,
+		    "unaligned partial completion avoided (xfer_cnt=%u, sector_sz=%u)\n",
+			    xfer_cnt, sector_sz);
+		xfer_cnt = round_down(xfer_cnt, sector_sz);
+	}
+
 	scsi_set_resid(scmd, scsi_bufflen(scmd) - xfer_cnt);
 	if (ioc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE)
 		log_info =  le32_to_cpu(mpi_reply->IOCLogInfo);

From dd86e373e09fb16b83e8adf5c48c421a4ca76468 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 31 Jan 2017 23:58:38 +0100
Subject: [PATCH 163/322] perf/x86/intel/rapl: Make package handling more
 robust

The package management code in RAPL relies on package mapping being
available before a CPU is started. This changed with:

  9d85eb9119f4 ("x86/smpboot: Make logical package management more robust")

because the ACPI/BIOS information turned out to be unreliable, but that
left RAPL in broken state. This was not noticed because on a regular boot
all CPUs are online before RAPL is initialized.

A possible fix would be to reintroduce the mess which allocates a package
data structure in CPU prepare and when it turns out to already exist in
starting throw it away later in the CPU online callback. But that's a
horrible hack and not required at all because RAPL becomes functional for
perf only in the CPU online callback. That's correct because user space is
not yet informed about the CPU being onlined, so nothing caan rely on RAPL
being available on that particular CPU.

Move the allocation to the CPU online callback and simplify the hotplug
handling. At this point the package mapping is established and correct.

This also adds a missing check for available package data in the
event_init() function.

Reported-by: Yasuaki Ishimatsu <yasu.isimatu@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Stephane Eranian <eranian@google.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Fixes: 9d85eb9119f4 ("x86/smpboot: Make logical package management more robust")
Link: http://lkml.kernel.org/r/20170131230141.212593966@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/rapl.c | 60 ++++++++++++++++--------------------
 include/linux/cpuhotplug.h   |  1 -
 2 files changed, 26 insertions(+), 35 deletions(-)

diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 17c3564d087a..22ef4f72cf32 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -161,7 +161,13 @@ static u64 rapl_timer_ms;
 
 static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
 {
-	return rapl_pmus->pmus[topology_logical_package_id(cpu)];
+	unsigned int pkgid = topology_logical_package_id(cpu);
+
+	/*
+	 * The unsigned check also catches the '-1' return value for non
+	 * existent mappings in the topology map.
+	 */
+	return pkgid < rapl_pmus->maxpkg ? rapl_pmus->pmus[pkgid] : NULL;
 }
 
 static inline u64 rapl_read_counter(struct perf_event *event)
@@ -402,6 +408,8 @@ static int rapl_pmu_event_init(struct perf_event *event)
 
 	/* must be done before validate_group */
 	pmu = cpu_to_rapl_pmu(event->cpu);
+	if (!pmu)
+		return -EINVAL;
 	event->cpu = pmu->cpu;
 	event->pmu_private = pmu;
 	event->hw.event_base = msr;
@@ -585,6 +593,20 @@ static int rapl_cpu_online(unsigned int cpu)
 	struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
 	int target;
 
+	if (!pmu) {
+		pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
+		if (!pmu)
+			return -ENOMEM;
+
+		raw_spin_lock_init(&pmu->lock);
+		INIT_LIST_HEAD(&pmu->active_list);
+		pmu->pmu = &rapl_pmus->pmu;
+		pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
+		rapl_hrtimer_init(pmu);
+
+		rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu;
+	}
+
 	/*
 	 * Check if there is an online cpu in the package which collects rapl
 	 * events already.
@@ -598,27 +620,6 @@ static int rapl_cpu_online(unsigned int cpu)
 	return 0;
 }
 
-static int rapl_cpu_prepare(unsigned int cpu)
-{
-	struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
-
-	if (pmu)
-		return 0;
-
-	pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
-	if (!pmu)
-		return -ENOMEM;
-
-	raw_spin_lock_init(&pmu->lock);
-	INIT_LIST_HEAD(&pmu->active_list);
-	pmu->pmu = &rapl_pmus->pmu;
-	pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
-	pmu->cpu = -1;
-	rapl_hrtimer_init(pmu);
-	rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu;
-	return 0;
-}
-
 static int rapl_check_hw_unit(bool apply_quirk)
 {
 	u64 msr_rapl_power_unit_bits;
@@ -803,29 +804,21 @@ static int __init rapl_pmu_init(void)
 	/*
 	 * Install callbacks. Core will call them for each online cpu.
 	 */
-
-	ret = cpuhp_setup_state(CPUHP_PERF_X86_RAPL_PREP, "perf/x86/rapl:prepare",
-				rapl_cpu_prepare, NULL);
-	if (ret)
-		goto out;
-
 	ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE,
 				"perf/x86/rapl:online",
 				rapl_cpu_online, rapl_cpu_offline);
 	if (ret)
-		goto out1;
+		goto out;
 
 	ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
 	if (ret)
-		goto out2;
+		goto out1;
 
 	rapl_advertise();
 	return 0;
 
-out2:
-	cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
 out1:
-	cpuhp_remove_state(CPUHP_PERF_X86_RAPL_PREP);
+	cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
 out:
 	pr_warn("Initialization failed (%d), disabled\n", ret);
 	cleanup_rapl_pmus();
@@ -836,7 +829,6 @@ module_init(rapl_pmu_init);
 static void __exit intel_rapl_exit(void)
 {
 	cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE);
-	cpuhp_remove_state_nocalls(CPUHP_PERF_X86_RAPL_PREP);
 	perf_pmu_unregister(&rapl_pmus->pmu);
 	cleanup_rapl_pmus();
 }
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index d936a0021839..8329f3dc592c 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -10,7 +10,6 @@ enum cpuhp_state {
 	CPUHP_PERF_X86_PREPARE,
 	CPUHP_PERF_X86_UNCORE_PREP,
 	CPUHP_PERF_X86_AMD_UNCORE_PREP,
-	CPUHP_PERF_X86_RAPL_PREP,
 	CPUHP_PERF_BFIN,
 	CPUHP_PERF_POWER,
 	CPUHP_PERF_SUPERH,

From 1aa6cfd33df492939b0be15ebdbcff1f8ae5ddb6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 31 Jan 2017 23:58:39 +0100
Subject: [PATCH 164/322] perf/x86/intel/uncore: Clean up hotplug conversion
 fallout

The recent conversion to the hotplug state machine kept two mechanisms from
the original code:

 1) The first_init logic which adds the number of online CPUs in a package
    to the refcount. That's wrong because the callbacks are executed for
    all online CPUs.

    Remove it so the refcounting is correct.

 2) The on_each_cpu() call to undo box->init() in the error handling
    path. That's bogus because when the prepare callback fails no box has
    been initialized yet.

    Remove it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Stephane Eranian <eranian@google.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Yasuaki Ishimatsu <yasu.isimatu@gmail.com>
Fixes: 1a246b9f58c6 ("perf/x86/intel/uncore: Convert to hotplug state machine")
Link: http://lkml.kernel.org/r/20170131230141.298032324@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/uncore.c | 44 ++++------------------------------
 1 file changed, 4 insertions(+), 40 deletions(-)

diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 8c4ccdc3a3f3..56c5235dcc29 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -764,30 +764,6 @@ static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
 	pmu->registered = false;
 }
 
-static void __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
-{
-	struct intel_uncore_pmu *pmu = type->pmus;
-	struct intel_uncore_box *box;
-	int i, pkg;
-
-	if (pmu) {
-		pkg = topology_physical_package_id(cpu);
-		for (i = 0; i < type->num_boxes; i++, pmu++) {
-			box = pmu->boxes[pkg];
-			if (box)
-				uncore_box_exit(box);
-		}
-	}
-}
-
-static void uncore_exit_boxes(void *dummy)
-{
-	struct intel_uncore_type **types;
-
-	for (types = uncore_msr_uncores; *types; types++)
-		__uncore_exit_boxes(*types++, smp_processor_id());
-}
-
 static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
 {
 	int pkg;
@@ -1078,22 +1054,12 @@ static int uncore_cpu_dying(unsigned int cpu)
 	return 0;
 }
 
-static int first_init;
-
 static int uncore_cpu_starting(unsigned int cpu)
 {
 	struct intel_uncore_type *type, **types = uncore_msr_uncores;
 	struct intel_uncore_pmu *pmu;
 	struct intel_uncore_box *box;
-	int i, pkg, ncpus = 1;
-
-	if (first_init) {
-		/*
-		 * On init we get the number of online cpus in the package
-		 * and set refcount for all of them.
-		 */
-		ncpus = cpumask_weight(topology_core_cpumask(cpu));
-	}
+	int i, pkg;
 
 	pkg = topology_logical_package_id(cpu);
 	for (; *types; types++) {
@@ -1104,7 +1070,7 @@ static int uncore_cpu_starting(unsigned int cpu)
 			if (!box)
 				continue;
 			/* The first cpu on a package activates the box */
-			if (atomic_add_return(ncpus, &box->refcnt) == ncpus)
+			if (atomic_inc_return(&box->refcnt) == 1)
 				uncore_box_init(box);
 		}
 	}
@@ -1408,19 +1374,17 @@ static int __init intel_uncore_init(void)
 					  "perf/x86/intel/uncore:prepare",
 					  uncore_cpu_prepare, NULL);
 	}
-	first_init = 1;
+
 	cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_STARTING,
 			  "perf/x86/uncore:starting",
 			  uncore_cpu_starting, uncore_cpu_dying);
-	first_init = 0;
+
 	cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
 			  "perf/x86/uncore:online",
 			  uncore_event_cpu_online, uncore_event_cpu_offline);
 	return 0;
 
 err:
-	/* Undo box->init_box() */
-	on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1);
 	uncore_types_exit(uncore_msr_uncores);
 	uncore_pci_exit();
 	return ret;

From fff4b87e594ad3d2e4f51e8d3d86a6f9d3d8b654 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 31 Jan 2017 23:58:40 +0100
Subject: [PATCH 165/322] perf/x86/intel/uncore: Make package handling more
 robust

The package management code in uncore relies on package mapping being
available before a CPU is started. This changed with:

  9d85eb9119f4 ("x86/smpboot: Make logical package management more robust")

because the ACPI/BIOS information turned out to be unreliable, but that
left uncore in broken state. This was not noticed because on a regular boot
all CPUs are online before uncore is initialized.

Move the allocation to the CPU online callback and simplify the hotplug
handling. At this point the package mapping is established and correct.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Stephane Eranian <eranian@google.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Yasuaki Ishimatsu <yasu.isimatu@gmail.com>
Fixes: 9d85eb9119f4 ("x86/smpboot: Make logical package management more robust")
Link: http://lkml.kernel.org/r/20170131230141.377156255@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/uncore.c | 196 +++++++++++++++------------------
 include/linux/cpuhotplug.h     |   2 -
 2 files changed, 91 insertions(+), 107 deletions(-)

diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 56c5235dcc29..1ab45976474d 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -100,7 +100,13 @@ ssize_t uncore_event_show(struct kobject *kobj,
 
 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
 {
-	return pmu->boxes[topology_logical_package_id(cpu)];
+	unsigned int pkgid = topology_logical_package_id(cpu);
+
+	/*
+	 * The unsigned check also catches the '-1' return value for non
+	 * existent mappings in the topology map.
+	 */
+	return pkgid < max_packages ? pmu->boxes[pkgid] : NULL;
 }
 
 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
@@ -1034,76 +1040,6 @@ static void uncore_pci_exit(void)
 	}
 }
 
-static int uncore_cpu_dying(unsigned int cpu)
-{
-	struct intel_uncore_type *type, **types = uncore_msr_uncores;
-	struct intel_uncore_pmu *pmu;
-	struct intel_uncore_box *box;
-	int i, pkg;
-
-	pkg = topology_logical_package_id(cpu);
-	for (; *types; types++) {
-		type = *types;
-		pmu = type->pmus;
-		for (i = 0; i < type->num_boxes; i++, pmu++) {
-			box = pmu->boxes[pkg];
-			if (box && atomic_dec_return(&box->refcnt) == 0)
-				uncore_box_exit(box);
-		}
-	}
-	return 0;
-}
-
-static int uncore_cpu_starting(unsigned int cpu)
-{
-	struct intel_uncore_type *type, **types = uncore_msr_uncores;
-	struct intel_uncore_pmu *pmu;
-	struct intel_uncore_box *box;
-	int i, pkg;
-
-	pkg = topology_logical_package_id(cpu);
-	for (; *types; types++) {
-		type = *types;
-		pmu = type->pmus;
-		for (i = 0; i < type->num_boxes; i++, pmu++) {
-			box = pmu->boxes[pkg];
-			if (!box)
-				continue;
-			/* The first cpu on a package activates the box */
-			if (atomic_inc_return(&box->refcnt) == 1)
-				uncore_box_init(box);
-		}
-	}
-
-	return 0;
-}
-
-static int uncore_cpu_prepare(unsigned int cpu)
-{
-	struct intel_uncore_type *type, **types = uncore_msr_uncores;
-	struct intel_uncore_pmu *pmu;
-	struct intel_uncore_box *box;
-	int i, pkg;
-
-	pkg = topology_logical_package_id(cpu);
-	for (; *types; types++) {
-		type = *types;
-		pmu = type->pmus;
-		for (i = 0; i < type->num_boxes; i++, pmu++) {
-			if (pmu->boxes[pkg])
-				continue;
-			/* First cpu of a package allocates the box */
-			box = uncore_alloc_box(type, cpu_to_node(cpu));
-			if (!box)
-				return -ENOMEM;
-			box->pmu = pmu;
-			box->pkgid = pkg;
-			pmu->boxes[pkg] = box;
-		}
-	}
-	return 0;
-}
-
 static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
 				   int new_cpu)
 {
@@ -1143,12 +1079,14 @@ static void uncore_change_context(struct intel_uncore_type **uncores,
 
 static int uncore_event_cpu_offline(unsigned int cpu)
 {
-	int target;
+	struct intel_uncore_type *type, **types = uncore_msr_uncores;
+	struct intel_uncore_pmu *pmu;
+	struct intel_uncore_box *box;
+	int i, pkg, target;
 
 	/* Check if exiting cpu is used for collecting uncore events */
 	if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
-		return 0;
-
+		goto unref;
 	/* Find a new cpu to collect uncore events */
 	target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
 
@@ -1160,12 +1098,82 @@ static int uncore_event_cpu_offline(unsigned int cpu)
 
 	uncore_change_context(uncore_msr_uncores, cpu, target);
 	uncore_change_context(uncore_pci_uncores, cpu, target);
+
+unref:
+	/* Clear the references */
+	pkg = topology_logical_package_id(cpu);
+	for (; *types; types++) {
+		type = *types;
+		pmu = type->pmus;
+		for (i = 0; i < type->num_boxes; i++, pmu++) {
+			box = pmu->boxes[pkg];
+			if (box && atomic_dec_return(&box->refcnt) == 0)
+				uncore_box_exit(box);
+		}
+	}
 	return 0;
 }
 
+static int allocate_boxes(struct intel_uncore_type **types,
+			 unsigned int pkg, unsigned int cpu)
+{
+	struct intel_uncore_box *box, *tmp;
+	struct intel_uncore_type *type;
+	struct intel_uncore_pmu *pmu;
+	LIST_HEAD(allocated);
+	int i;
+
+	/* Try to allocate all required boxes */
+	for (; *types; types++) {
+		type = *types;
+		pmu = type->pmus;
+		for (i = 0; i < type->num_boxes; i++, pmu++) {
+			if (pmu->boxes[pkg])
+				continue;
+			box = uncore_alloc_box(type, cpu_to_node(cpu));
+			if (!box)
+				goto cleanup;
+			box->pmu = pmu;
+			box->pkgid = pkg;
+			list_add(&box->active_list, &allocated);
+		}
+	}
+	/* Install them in the pmus */
+	list_for_each_entry_safe(box, tmp, &allocated, active_list) {
+		list_del_init(&box->active_list);
+		box->pmu->boxes[pkg] = box;
+	}
+	return 0;
+
+cleanup:
+	list_for_each_entry_safe(box, tmp, &allocated, active_list) {
+		list_del_init(&box->active_list);
+		kfree(box);
+	}
+	return -ENOMEM;
+}
+
 static int uncore_event_cpu_online(unsigned int cpu)
 {
-	int target;
+	struct intel_uncore_type *type, **types = uncore_msr_uncores;
+	struct intel_uncore_pmu *pmu;
+	struct intel_uncore_box *box;
+	int i, ret, pkg, target;
+
+	pkg = topology_logical_package_id(cpu);
+	ret = allocate_boxes(types, pkg, cpu);
+	if (ret)
+		return ret;
+
+	for (; *types; types++) {
+		type = *types;
+		pmu = type->pmus;
+		for (i = 0; i < type->num_boxes; i++, pmu++) {
+			box = pmu->boxes[pkg];
+			if (!box && atomic_inc_return(&box->refcnt) == 1)
+				uncore_box_init(box);
+		}
+	}
 
 	/*
 	 * Check if there is an online cpu in the package
@@ -1355,33 +1363,13 @@ static int __init intel_uncore_init(void)
 	if (cret && pret)
 		return -ENODEV;
 
-	/*
-	 * Install callbacks. Core will call them for each online cpu.
-	 *
-	 * The first online cpu of each package allocates and takes
-	 * the refcounts for all other online cpus in that package.
-	 * If msrs are not enabled no allocation is required and
-	 * uncore_cpu_prepare() is not called for each online cpu.
-	 */
-	if (!cret) {
-	       ret = cpuhp_setup_state(CPUHP_PERF_X86_UNCORE_PREP,
-				       "perf/x86/intel/uncore:prepare",
-				       uncore_cpu_prepare, NULL);
-		if (ret)
-			goto err;
-	} else {
-		cpuhp_setup_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP,
-					  "perf/x86/intel/uncore:prepare",
-					  uncore_cpu_prepare, NULL);
-	}
-
-	cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_STARTING,
-			  "perf/x86/uncore:starting",
-			  uncore_cpu_starting, uncore_cpu_dying);
-
-	cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
-			  "perf/x86/uncore:online",
-			  uncore_event_cpu_online, uncore_event_cpu_offline);
+	/* Install hotplug callbacks to setup the targets for each package */
+	ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
+				"perf/x86/intel/uncore:online",
+				uncore_event_cpu_online,
+				uncore_event_cpu_offline);
+	if (ret)
+		goto err;
 	return 0;
 
 err:
@@ -1393,9 +1381,7 @@ module_init(intel_uncore_init);
 
 static void __exit intel_uncore_exit(void)
 {
-	cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
-	cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_STARTING);
-	cpuhp_remove_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP);
+	cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
 	uncore_types_exit(uncore_msr_uncores);
 	uncore_pci_exit();
 }
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 8329f3dc592c..921acaaa1601 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -8,7 +8,6 @@ enum cpuhp_state {
 	CPUHP_CREATE_THREADS,
 	CPUHP_PERF_PREPARE,
 	CPUHP_PERF_X86_PREPARE,
-	CPUHP_PERF_X86_UNCORE_PREP,
 	CPUHP_PERF_X86_AMD_UNCORE_PREP,
 	CPUHP_PERF_BFIN,
 	CPUHP_PERF_POWER,
@@ -85,7 +84,6 @@ enum cpuhp_state {
 	CPUHP_AP_IRQ_ARMADA_XP_STARTING,
 	CPUHP_AP_IRQ_BCM2836_STARTING,
 	CPUHP_AP_ARM_MVEBU_COHERENCY,
-	CPUHP_AP_PERF_X86_UNCORE_STARTING,
 	CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
 	CPUHP_AP_PERF_X86_STARTING,
 	CPUHP_AP_PERF_X86_AMD_IBS_STARTING,

From eeee74a4f6625b77c3e8db0693c2d4546507ba0d Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Tue, 31 Jan 2017 10:21:30 +0100
Subject: [PATCH 166/322] drm/atomic: Unconditionally call prepare_fb.

Atomic drivers may set properties like rotation on the same fb, which
may require a call to prepare_fb even when framebuffer stays identical.

Instead of handling all the special cases in the core, let the driver
decide when prepare_fb and cleanup_fb are noops.

This is a revert of:

commit fcc60b413d14dd06ddbd79ec50e83c4fb2a097ba
Author: Keith Packard <keithp@keithp.com>
Date:   Sat Jun 4 01:16:22 2016 -0700

    drm: Don't prepare or cleanup unchanging frame buffers [v3]

The original commit mentions that this prevents waiting in i915 on all
previous rendering during cursor updates, but there are better ways to
fix this.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/6d82f9b6-9d16-91d1-d176-4a37b09afc44@linux.intel.com
(cherry picked from commit 0532be078a207d7dd6ad26ebd0834e258acc4ee7)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1485854491-27389-2-git-send-email-maarten.lankhorst@linux.intel.com
---
 drivers/gpu/drm/drm_atomic_helper.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 34f757bcabae..4594477dee00 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -1666,9 +1666,6 @@ int drm_atomic_helper_prepare_planes(struct drm_device *dev,
 
 		funcs = plane->helper_private;
 
-		if (!drm_atomic_helper_framebuffer_changed(dev, state, plane_state->crtc))
-			continue;
-
 		if (funcs->prepare_fb) {
 			ret = funcs->prepare_fb(plane, plane_state);
 			if (ret)
@@ -1685,9 +1682,6 @@ int drm_atomic_helper_prepare_planes(struct drm_device *dev,
 		if (j >= i)
 			continue;
 
-		if (!drm_atomic_helper_framebuffer_changed(dev, state, plane_state->crtc))
-			continue;
-
 		funcs = plane->helper_private;
 
 		if (funcs->cleanup_fb)
@@ -1954,9 +1948,6 @@ void drm_atomic_helper_cleanup_planes(struct drm_device *dev,
 	for_each_plane_in_state(old_state, plane, plane_state, i) {
 		const struct drm_plane_helper_funcs *funcs;
 
-		if (!drm_atomic_helper_framebuffer_changed(dev, old_state, plane_state->crtc))
-			continue;
-
 		funcs = plane->helper_private;
 
 		if (funcs->cleanup_fb)

From e8fe4f4b2b7b93048729538321c681c0cff33b39 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 31 Jan 2017 10:21:31 +0100
Subject: [PATCH 167/322] drm/i915: Track pinned vma in intel_plane_state

With atomic plane states we are able to track an allocation right from
preparation, during use and through to the final free after being
swapped out for a new plane. We can couple the VMA we pin for the
framebuffer (and its rotation) to this lifetime and avoid all the clumsy
lookups in between.

v2: Remove residual vma on plane cleanup (Chris)
v3: Add a description for the vma destruction in
    intel_plane_destroy_state (Maarten)

References: https://bugs.freedesktop.org/show_bug.cgi?id=98829
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170116152131.18089-1-chris@chris-wilson.co.uk
Acked-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
(cherry picked from commit be1e341513ca23b0668b7b0f26fa6e2ffc46ba20)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1485854491-27389-3-git-send-email-maarten.lankhorst@linux.intel.com
---
 drivers/gpu/drm/i915/i915_drv.h           |  16 +--
 drivers/gpu/drm/i915/intel_atomic_plane.c |  20 ++++
 drivers/gpu/drm/i915/intel_display.c      | 125 ++++++++--------------
 drivers/gpu/drm/i915/intel_drv.h          |   9 +-
 drivers/gpu/drm/i915/intel_fbc.c          |  52 ++++-----
 drivers/gpu/drm/i915/intel_fbdev.c        |   4 +-
 drivers/gpu/drm/i915/intel_sprite.c       |   8 +-
 7 files changed, 99 insertions(+), 135 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 69bc3b0c4390..8493e19b563a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1012,6 +1012,8 @@ struct intel_fbc {
 	struct work_struct underrun_work;
 
 	struct intel_fbc_state_cache {
+		struct i915_vma *vma;
+
 		struct {
 			unsigned int mode_flags;
 			uint32_t hsw_bdw_pixel_rate;
@@ -1025,15 +1027,14 @@ struct intel_fbc {
 		} plane;
 
 		struct {
-			u64 ilk_ggtt_offset;
 			uint32_t pixel_format;
 			unsigned int stride;
-			int fence_reg;
-			unsigned int tiling_mode;
 		} fb;
 	} state_cache;
 
 	struct intel_fbc_reg_params {
+		struct i915_vma *vma;
+
 		struct {
 			enum pipe pipe;
 			enum plane plane;
@@ -1041,10 +1042,8 @@ struct intel_fbc {
 		} crtc;
 
 		struct {
-			u64 ggtt_offset;
 			uint32_t pixel_format;
 			unsigned int stride;
-			int fence_reg;
 		} fb;
 
 		int cfb_size;
@@ -3168,13 +3167,6 @@ i915_gem_object_to_ggtt(struct drm_i915_gem_object *obj,
 	return i915_gem_obj_to_vma(obj, &to_i915(obj->base.dev)->ggtt.base, view);
 }
 
-static inline unsigned long
-i915_gem_object_ggtt_offset(struct drm_i915_gem_object *o,
-			    const struct i915_ggtt_view *view)
-{
-	return i915_ggtt_offset(i915_gem_object_to_ggtt(o, view));
-}
-
 /* i915_gem_fence_reg.c */
 int __must_check i915_vma_get_fence(struct i915_vma *vma);
 int __must_check i915_vma_put_fence(struct i915_vma *vma);
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
index dbe9fb41ae53..8d3e515f27ba 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -85,6 +85,8 @@ intel_plane_duplicate_state(struct drm_plane *plane)
 
 	__drm_atomic_helper_plane_duplicate_state(plane, state);
 
+	intel_state->vma = NULL;
+
 	return state;
 }
 
@@ -100,6 +102,24 @@ void
 intel_plane_destroy_state(struct drm_plane *plane,
 			  struct drm_plane_state *state)
 {
+	struct i915_vma *vma;
+
+	vma = fetch_and_zero(&to_intel_plane_state(state)->vma);
+
+	/*
+	 * FIXME: Normally intel_cleanup_plane_fb handles destruction of vma.
+	 * We currently don't clear all planes during driver unload, so we have
+	 * to be able to unpin vma here for now.
+	 *
+	 * Normally this can only happen during unload when kmscon is disabled
+	 * and userspace doesn't attempt to set a framebuffer at all.
+	 */
+	if (vma) {
+		mutex_lock(&plane->dev->struct_mutex);
+		intel_unpin_fb_vma(vma);
+		mutex_unlock(&plane->dev->struct_mutex);
+	}
+
 	drm_atomic_helper_plane_destroy_state(plane, state);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index f0b9aa7a0483..f1e4a21d4664 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2235,27 +2235,22 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
 			i915_vma_pin_fence(vma);
 	}
 
+	i915_vma_get(vma);
 err:
 	intel_runtime_pm_put(dev_priv);
 	return vma;
 }
 
-void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
+void intel_unpin_fb_vma(struct i915_vma *vma)
 {
-	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
-	struct i915_ggtt_view view;
-	struct i915_vma *vma;
-
-	WARN_ON(!mutex_is_locked(&obj->base.dev->struct_mutex));
-
-	intel_fill_fb_ggtt_view(&view, fb, rotation);
-	vma = i915_gem_object_to_ggtt(obj, &view);
+	lockdep_assert_held(&vma->vm->dev->struct_mutex);
 
 	if (WARN_ON_ONCE(!vma))
 		return;
 
 	i915_vma_unpin_fence(vma);
 	i915_gem_object_unpin_from_display_plane(vma);
+	i915_vma_put(vma);
 }
 
 static int intel_fb_pitch(const struct drm_framebuffer *fb, int plane,
@@ -2750,7 +2745,6 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 	struct drm_device *dev = intel_crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_crtc *c;
-	struct intel_crtc *i;
 	struct drm_i915_gem_object *obj;
 	struct drm_plane *primary = intel_crtc->base.primary;
 	struct drm_plane_state *plane_state = primary->state;
@@ -2775,20 +2769,20 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 	 * an fb with another CRTC instead
 	 */
 	for_each_crtc(dev, c) {
-		i = to_intel_crtc(c);
+		struct intel_plane_state *state;
 
 		if (c == &intel_crtc->base)
 			continue;
 
-		if (!i->active)
+		if (!to_intel_crtc(c)->active)
 			continue;
 
-		fb = c->primary->fb;
-		if (!fb)
+		state = to_intel_plane_state(c->primary->state);
+		if (!state->vma)
 			continue;
 
-		obj = intel_fb_obj(fb);
-		if (i915_gem_object_ggtt_offset(obj, NULL) == plane_config->base) {
+		if (intel_plane_ggtt_offset(state) == plane_config->base) {
+			fb = c->primary->fb;
 			drm_framebuffer_reference(fb);
 			goto valid_fb;
 		}
@@ -2809,6 +2803,19 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 	return;
 
 valid_fb:
+	mutex_lock(&dev->struct_mutex);
+	intel_state->vma =
+		intel_pin_and_fence_fb_obj(fb, primary->state->rotation);
+	mutex_unlock(&dev->struct_mutex);
+	if (IS_ERR(intel_state->vma)) {
+		DRM_ERROR("failed to pin boot fb on pipe %d: %li\n",
+			  intel_crtc->pipe, PTR_ERR(intel_state->vma));
+
+		intel_state->vma = NULL;
+		drm_framebuffer_unreference(fb);
+		return;
+	}
+
 	plane_state->src_x = 0;
 	plane_state->src_y = 0;
 	plane_state->src_w = fb->width << 16;
@@ -3104,13 +3111,13 @@ static void i9xx_update_primary_plane(struct drm_plane *primary,
 	I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]);
 	if (INTEL_GEN(dev_priv) >= 4) {
 		I915_WRITE(DSPSURF(plane),
-			   intel_fb_gtt_offset(fb, rotation) +
+			   intel_plane_ggtt_offset(plane_state) +
 			   intel_crtc->dspaddr_offset);
 		I915_WRITE(DSPTILEOFF(plane), (y << 16) | x);
 		I915_WRITE(DSPLINOFF(plane), linear_offset);
 	} else {
 		I915_WRITE(DSPADDR(plane),
-			   intel_fb_gtt_offset(fb, rotation) +
+			   intel_plane_ggtt_offset(plane_state) +
 			   intel_crtc->dspaddr_offset);
 	}
 	POSTING_READ(reg);
@@ -3207,7 +3214,7 @@ static void ironlake_update_primary_plane(struct drm_plane *primary,
 
 	I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]);
 	I915_WRITE(DSPSURF(plane),
-		   intel_fb_gtt_offset(fb, rotation) +
+		   intel_plane_ggtt_offset(plane_state) +
 		   intel_crtc->dspaddr_offset);
 	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
 		I915_WRITE(DSPOFFSET(plane), (y << 16) | x);
@@ -3230,23 +3237,6 @@ u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv,
 	}
 }
 
-u32 intel_fb_gtt_offset(struct drm_framebuffer *fb,
-			unsigned int rotation)
-{
-	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
-	struct i915_ggtt_view view;
-	struct i915_vma *vma;
-
-	intel_fill_fb_ggtt_view(&view, fb, rotation);
-
-	vma = i915_gem_object_to_ggtt(obj, &view);
-	if (WARN(!vma, "ggtt vma for display object not found! (view=%u)\n",
-		 view.type))
-		return -1;
-
-	return i915_ggtt_offset(vma);
-}
-
 static void skl_detach_scaler(struct intel_crtc *intel_crtc, int id)
 {
 	struct drm_device *dev = intel_crtc->base.dev;
@@ -3441,7 +3431,7 @@ static void skylake_update_primary_plane(struct drm_plane *plane,
 	}
 
 	I915_WRITE(PLANE_SURF(pipe, 0),
-		   intel_fb_gtt_offset(fb, rotation) + surf_addr);
+		   intel_plane_ggtt_offset(plane_state) + surf_addr);
 
 	POSTING_READ(PLANE_SURF(pipe, 0));
 }
@@ -11536,7 +11526,7 @@ static void intel_unpin_work_fn(struct work_struct *__work)
 		flush_work(&work->mmio_work);
 
 	mutex_lock(&dev->struct_mutex);
-	intel_unpin_fb_obj(work->old_fb, primary->state->rotation);
+	intel_unpin_fb_vma(work->old_vma);
 	i915_gem_object_put(work->pending_flip_obj);
 	mutex_unlock(&dev->struct_mutex);
 
@@ -12246,8 +12236,10 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 		goto cleanup_pending;
 	}
 
-	work->gtt_offset = intel_fb_gtt_offset(fb, primary->state->rotation);
-	work->gtt_offset += intel_crtc->dspaddr_offset;
+	work->old_vma = to_intel_plane_state(primary->state)->vma;
+	to_intel_plane_state(primary->state)->vma = vma;
+
+	work->gtt_offset = i915_ggtt_offset(vma) + intel_crtc->dspaddr_offset;
 	work->rotation = crtc->primary->state->rotation;
 
 	/*
@@ -12301,7 +12293,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 cleanup_request:
 	i915_add_request_no_flush(request);
 cleanup_unpin:
-	intel_unpin_fb_obj(fb, crtc->primary->state->rotation);
+	to_intel_plane_state(primary->state)->vma = work->old_vma;
+	intel_unpin_fb_vma(vma);
 cleanup_pending:
 	atomic_dec(&intel_crtc->unpin_work_count);
 unlock:
@@ -14794,6 +14787,8 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 			DRM_DEBUG_KMS("failed to pin object\n");
 			return PTR_ERR(vma);
 		}
+
+		to_intel_plane_state(new_state)->vma = vma;
 	}
 
 	return 0;
@@ -14812,19 +14807,12 @@ void
 intel_cleanup_plane_fb(struct drm_plane *plane,
 		       struct drm_plane_state *old_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(plane->dev);
-	struct intel_plane_state *old_intel_state;
-	struct drm_i915_gem_object *old_obj = intel_fb_obj(old_state->fb);
-	struct drm_i915_gem_object *obj = intel_fb_obj(plane->state->fb);
+	struct i915_vma *vma;
 
-	old_intel_state = to_intel_plane_state(old_state);
-
-	if (!obj && !old_obj)
-		return;
-
-	if (old_obj && (plane->type != DRM_PLANE_TYPE_CURSOR ||
-	    !INTEL_INFO(dev_priv)->cursor_needs_physical))
-		intel_unpin_fb_obj(old_state->fb, old_state->rotation);
+	/* Should only be called after a successful intel_prepare_plane_fb()! */
+	vma = fetch_and_zero(&to_intel_plane_state(old_state)->vma);
+	if (vma)
+		intel_unpin_fb_vma(vma);
 }
 
 int
@@ -15166,7 +15154,7 @@ intel_update_cursor_plane(struct drm_plane *plane,
 	if (!obj)
 		addr = 0;
 	else if (!INTEL_INFO(dev_priv)->cursor_needs_physical)
-		addr = i915_gem_object_ggtt_offset(obj, NULL);
+		addr = intel_plane_ggtt_offset(state);
 	else
 		addr = obj->phys_handle->busaddr;
 
@@ -17066,41 +17054,12 @@ void intel_display_resume(struct drm_device *dev)
 void intel_modeset_gem_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_crtc *c;
-	struct drm_i915_gem_object *obj;
 
 	intel_init_gt_powersave(dev_priv);
 
 	intel_modeset_init_hw(dev);
 
 	intel_setup_overlay(dev_priv);
-
-	/*
-	 * Make sure any fbs we allocated at startup are properly
-	 * pinned & fenced.  When we do the allocation it's too early
-	 * for this.
-	 */
-	for_each_crtc(dev, c) {
-		struct i915_vma *vma;
-
-		obj = intel_fb_obj(c->primary->fb);
-		if (obj == NULL)
-			continue;
-
-		mutex_lock(&dev->struct_mutex);
-		vma = intel_pin_and_fence_fb_obj(c->primary->fb,
-						 c->primary->state->rotation);
-		mutex_unlock(&dev->struct_mutex);
-		if (IS_ERR(vma)) {
-			DRM_ERROR("failed to pin boot fb on pipe %d\n",
-				  to_intel_crtc(c)->pipe);
-			drm_framebuffer_unreference(c->primary->fb);
-			c->primary->fb = NULL;
-			c->primary->crtc = c->primary->state->crtc = NULL;
-			update_state_fb(c->primary);
-			c->state->plane_mask &= ~(1 << drm_plane_index(c->primary));
-		}
-	}
 }
 
 int intel_connector_register(struct drm_connector *connector)
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index cd72ae171eeb..03a2112004f9 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -377,6 +377,7 @@ struct intel_atomic_state {
 struct intel_plane_state {
 	struct drm_plane_state base;
 	struct drm_rect clip;
+	struct i915_vma *vma;
 
 	struct {
 		u32 offset;
@@ -1046,6 +1047,7 @@ struct intel_flip_work {
 	struct work_struct mmio_work;
 
 	struct drm_crtc *crtc;
+	struct i915_vma *old_vma;
 	struct drm_framebuffer *old_fb;
 	struct drm_i915_gem_object *pending_flip_obj;
 	struct drm_pending_vblank_event *event;
@@ -1273,7 +1275,7 @@ void intel_release_load_detect_pipe(struct drm_connector *connector,
 				    struct drm_modeset_acquire_ctx *ctx);
 struct i915_vma *
 intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation);
-void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation);
+void intel_unpin_fb_vma(struct i915_vma *vma);
 struct drm_framebuffer *
 __intel_framebuffer_create(struct drm_device *dev,
 			   struct drm_mode_fb_cmd2 *mode_cmd,
@@ -1362,7 +1364,10 @@ void intel_mode_from_pipe_config(struct drm_display_mode *mode,
 int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state);
 int skl_max_scale(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state);
 
-u32 intel_fb_gtt_offset(struct drm_framebuffer *fb, unsigned int rotation);
+static inline u32 intel_plane_ggtt_offset(const struct intel_plane_state *state)
+{
+	return i915_ggtt_offset(state->vma);
+}
 
 u32 skl_plane_ctl_format(uint32_t pixel_format);
 u32 skl_plane_ctl_tiling(uint64_t fb_modifier);
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index 62f215b12eb5..f3a1d6a5cabe 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -173,7 +173,7 @@ static void i8xx_fbc_activate(struct drm_i915_private *dev_priv)
 	if (IS_I945GM(dev_priv))
 		fbc_ctl |= FBC_CTL_C3_IDLE; /* 945 needs special SR handling */
 	fbc_ctl |= (cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT;
-	fbc_ctl |= params->fb.fence_reg;
+	fbc_ctl |= params->vma->fence->id;
 	I915_WRITE(FBC_CONTROL, fbc_ctl);
 }
 
@@ -193,8 +193,8 @@ static void g4x_fbc_activate(struct drm_i915_private *dev_priv)
 	else
 		dpfc_ctl |= DPFC_CTL_LIMIT_1X;
 
-	if (params->fb.fence_reg != I915_FENCE_REG_NONE) {
-		dpfc_ctl |= DPFC_CTL_FENCE_EN | params->fb.fence_reg;
+	if (params->vma->fence) {
+		dpfc_ctl |= DPFC_CTL_FENCE_EN | params->vma->fence->id;
 		I915_WRITE(DPFC_FENCE_YOFF, params->crtc.fence_y_offset);
 	} else {
 		I915_WRITE(DPFC_FENCE_YOFF, 0);
@@ -251,13 +251,14 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv)
 		break;
 	}
 
-	if (params->fb.fence_reg != I915_FENCE_REG_NONE) {
+	if (params->vma->fence) {
 		dpfc_ctl |= DPFC_CTL_FENCE_EN;
 		if (IS_GEN5(dev_priv))
-			dpfc_ctl |= params->fb.fence_reg;
+			dpfc_ctl |= params->vma->fence->id;
 		if (IS_GEN6(dev_priv)) {
 			I915_WRITE(SNB_DPFC_CTL_SA,
-				   SNB_CPU_FENCE_ENABLE | params->fb.fence_reg);
+				   SNB_CPU_FENCE_ENABLE |
+				   params->vma->fence->id);
 			I915_WRITE(DPFC_CPU_FENCE_OFFSET,
 				   params->crtc.fence_y_offset);
 		}
@@ -269,7 +270,8 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv)
 	}
 
 	I915_WRITE(ILK_DPFC_FENCE_YOFF, params->crtc.fence_y_offset);
-	I915_WRITE(ILK_FBC_RT_BASE, params->fb.ggtt_offset | ILK_FBC_RT_VALID);
+	I915_WRITE(ILK_FBC_RT_BASE,
+		   i915_ggtt_offset(params->vma) | ILK_FBC_RT_VALID);
 	/* enable it... */
 	I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
 
@@ -319,10 +321,11 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv)
 		break;
 	}
 
-	if (params->fb.fence_reg != I915_FENCE_REG_NONE) {
+	if (params->vma->fence) {
 		dpfc_ctl |= IVB_DPFC_CTL_FENCE_EN;
 		I915_WRITE(SNB_DPFC_CTL_SA,
-			   SNB_CPU_FENCE_ENABLE | params->fb.fence_reg);
+			   SNB_CPU_FENCE_ENABLE |
+			   params->vma->fence->id);
 		I915_WRITE(DPFC_CPU_FENCE_OFFSET, params->crtc.fence_y_offset);
 	} else {
 		I915_WRITE(SNB_DPFC_CTL_SA,0);
@@ -727,14 +730,6 @@ static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc)
 	return effective_w <= max_w && effective_h <= max_h;
 }
 
-/* XXX replace me when we have VMA tracking for intel_plane_state */
-static int get_fence_id(struct drm_framebuffer *fb)
-{
-	struct i915_vma *vma = i915_gem_object_to_ggtt(intel_fb_obj(fb), NULL);
-
-	return vma && vma->fence ? vma->fence->id : I915_FENCE_REG_NONE;
-}
-
 static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
 					 struct intel_crtc_state *crtc_state,
 					 struct intel_plane_state *plane_state)
@@ -743,7 +738,8 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
 	struct intel_fbc *fbc = &dev_priv->fbc;
 	struct intel_fbc_state_cache *cache = &fbc->state_cache;
 	struct drm_framebuffer *fb = plane_state->base.fb;
-	struct drm_i915_gem_object *obj;
+
+	cache->vma = NULL;
 
 	cache->crtc.mode_flags = crtc_state->base.adjusted_mode.flags;
 	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
@@ -758,16 +754,10 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
 	if (!cache->plane.visible)
 		return;
 
-	obj = intel_fb_obj(fb);
-
-	/* FIXME: We lack the proper locking here, so only run this on the
-	 * platforms that need. */
-	if (IS_GEN(dev_priv, 5, 6))
-		cache->fb.ilk_ggtt_offset = i915_gem_object_ggtt_offset(obj, NULL);
 	cache->fb.pixel_format = fb->pixel_format;
 	cache->fb.stride = fb->pitches[0];
-	cache->fb.fence_reg = get_fence_id(fb);
-	cache->fb.tiling_mode = i915_gem_object_get_tiling(obj);
+
+	cache->vma = plane_state->vma;
 }
 
 static bool intel_fbc_can_activate(struct intel_crtc *crtc)
@@ -784,7 +774,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc)
 		return false;
 	}
 
-	if (!cache->plane.visible) {
+	if (!cache->vma) {
 		fbc->no_fbc_reason = "primary plane not visible";
 		return false;
 	}
@@ -807,8 +797,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc)
 	 * so have no fence associated with it) due to aperture constaints
 	 * at the time of pinning.
 	 */
-	if (cache->fb.tiling_mode != I915_TILING_X ||
-	    cache->fb.fence_reg == I915_FENCE_REG_NONE) {
+	if (!cache->vma->fence) {
 		fbc->no_fbc_reason = "framebuffer not tiled or fenced";
 		return false;
 	}
@@ -888,17 +877,16 @@ static void intel_fbc_get_reg_params(struct intel_crtc *crtc,
 	 * zero. */
 	memset(params, 0, sizeof(*params));
 
+	params->vma = cache->vma;
+
 	params->crtc.pipe = crtc->pipe;
 	params->crtc.plane = crtc->plane;
 	params->crtc.fence_y_offset = get_crtc_fence_y_offset(crtc);
 
 	params->fb.pixel_format = cache->fb.pixel_format;
 	params->fb.stride = cache->fb.stride;
-	params->fb.fence_reg = cache->fb.fence_reg;
 
 	params->cfb_size = intel_fbc_calculate_cfb_size(dev_priv, cache);
-
-	params->fb.ggtt_offset = cache->fb.ilk_ggtt_offset;
 }
 
 static bool intel_fbc_reg_params_equal(struct intel_fbc_reg_params *params1,
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 8cf2d80f2254..f4a8c4fc57c4 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -284,7 +284,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 out_destroy_fbi:
 	drm_fb_helper_release_fbi(helper);
 out_unpin:
-	intel_unpin_fb_obj(&ifbdev->fb->base, DRM_ROTATE_0);
+	intel_unpin_fb_vma(vma);
 out_unlock:
 	mutex_unlock(&dev->struct_mutex);
 	return ret;
@@ -549,7 +549,7 @@ static void intel_fbdev_destroy(struct intel_fbdev *ifbdev)
 
 	if (ifbdev->fb) {
 		mutex_lock(&ifbdev->helper.dev->struct_mutex);
-		intel_unpin_fb_obj(&ifbdev->fb->base, DRM_ROTATE_0);
+		intel_unpin_fb_vma(ifbdev->vma);
 		mutex_unlock(&ifbdev->helper.dev->struct_mutex);
 
 		drm_framebuffer_remove(&ifbdev->fb->base);
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 8f131a08d440..242a73e66d82 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -273,7 +273,7 @@ skl_update_plane(struct drm_plane *drm_plane,
 
 	I915_WRITE(PLANE_CTL(pipe, plane), plane_ctl);
 	I915_WRITE(PLANE_SURF(pipe, plane),
-		   intel_fb_gtt_offset(fb, rotation) + surf_addr);
+		   intel_plane_ggtt_offset(plane_state) + surf_addr);
 	POSTING_READ(PLANE_SURF(pipe, plane));
 }
 
@@ -458,7 +458,7 @@ vlv_update_plane(struct drm_plane *dplane,
 	I915_WRITE(SPSIZE(pipe, plane), (crtc_h << 16) | crtc_w);
 	I915_WRITE(SPCNTR(pipe, plane), sprctl);
 	I915_WRITE(SPSURF(pipe, plane),
-		   intel_fb_gtt_offset(fb, rotation) + sprsurf_offset);
+		   intel_plane_ggtt_offset(plane_state) + sprsurf_offset);
 	POSTING_READ(SPSURF(pipe, plane));
 }
 
@@ -594,7 +594,7 @@ ivb_update_plane(struct drm_plane *plane,
 		I915_WRITE(SPRSCALE(pipe), sprscale);
 	I915_WRITE(SPRCTL(pipe), sprctl);
 	I915_WRITE(SPRSURF(pipe),
-		   intel_fb_gtt_offset(fb, rotation) + sprsurf_offset);
+		   intel_plane_ggtt_offset(plane_state) + sprsurf_offset);
 	POSTING_READ(SPRSURF(pipe));
 }
 
@@ -721,7 +721,7 @@ ilk_update_plane(struct drm_plane *plane,
 	I915_WRITE(DVSSCALE(pipe), dvsscale);
 	I915_WRITE(DVSCNTR(pipe), dvscntr);
 	I915_WRITE(DVSSURF(pipe),
-		   intel_fb_gtt_offset(fb, rotation) + dvssurf_offset);
+		   intel_plane_ggtt_offset(plane_state) + dvssurf_offset);
 	POSTING_READ(DVSSURF(pipe));
 }
 

From 4993b39ab04b083ff6ee1147e7e7f120feb6bf7f Mon Sep 17 00:00:00 2001
From: Ivan Vecera <cera@cera.cz>
Date: Tue, 31 Jan 2017 20:01:31 +0100
Subject: [PATCH 168/322] be2net: fix initial MAC setting

Recent commit 34393529163a ("be2net: fix MAC addr setting on privileged
BE3 VFs") allows privileged BE3 VFs to set its MAC address during
initialization. Although the initial MAC for such VFs is already
programmed by parent PF the subsequent setting performed by VF is OK,
but in certain cases (after fresh boot) this command in VF can fail.

The MAC should be initialized only when:
1) no MAC is programmed (always except BE3 VFs during first init)
2) programmed MAC is different from requested (e.g. MAC is set when
   interface is down). In this case the initial MAC programmed by PF
   needs to be deleted.

The adapter->dev_mac contains MAC address currently programmed in HW so
it should be zeroed when the MAC is deleted from HW and should not be
filled when MAC is set when interface is down in be_mac_addr_set() as
no programming is performed in this case.

Example of failure without the fix (immediately after fresh boot):

# ip link set eth0 up  <- eth0 is BE3 PF
be2net 0000:01:00.0 eth0: Link is Up

# echo 1 > /sys/class/net/eth0/device/sriov_numvfs  <- Create 1 VF
...
be2net 0000:01:04.0: Emulex OneConnect(be3): VF  port 0

# ip link set eth8 up  <- eth8 is created privileged VF
be2net 0000:01:04.0: opcode 59-1 failed:status 1-76
RTNETLINK answers: Input/output error

# echo 0 > /sys/class/net/eth0/device/sriov_numvfs  <- Delete VF
iommu: Removing device 0000:01:04.0 from group 33
...

# echo 1 > /sys/class/net/eth0/device/sriov_numvfs  <- Create it again
iommu: Removing device 0000:01:04.0 from group 33
...

# ip link set eth8 up
be2net 0000:01:04.0 eth8: Link is Up

Initialization is now OK.

v2 - Corrected the comment and condition check suggested by Suresh & Harsha

Fixes: 34393529163a ("be2net: fix MAC addr setting on privileged BE3 VFs")
Cc: Sathya Perla <sathya.perla@broadcom.com>
Cc: Ajit Khaparde <ajit.khaparde@broadcom.com>
Cc: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Cc: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Ivan Vecera <cera@cera.cz>
Acked-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/emulex/benet/be_main.c | 33 +++++++++++++++++----
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 1a7f8ad7b9c6..cd49a54c538d 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -362,8 +362,10 @@ static int be_mac_addr_set(struct net_device *netdev, void *p)
 		status = -EPERM;
 		goto err;
 	}
-done:
+
+	/* Remember currently programmed MAC */
 	ether_addr_copy(adapter->dev_mac, addr->sa_data);
+done:
 	ether_addr_copy(netdev->dev_addr, addr->sa_data);
 	dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
 	return 0;
@@ -3618,8 +3620,10 @@ static void be_disable_if_filters(struct be_adapter *adapter)
 {
 	/* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
 	if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
-	    check_privilege(adapter, BE_PRIV_FILTMGMT))
+	    check_privilege(adapter, BE_PRIV_FILTMGMT)) {
 		be_dev_mac_del(adapter, adapter->pmac_id[0]);
+		eth_zero_addr(adapter->dev_mac);
+	}
 
 	be_clear_uc_list(adapter);
 	be_clear_mc_list(adapter);
@@ -3773,12 +3777,27 @@ static int be_enable_if_filters(struct be_adapter *adapter)
 	if (status)
 		return status;
 
-	/* Don't add MAC on BE3 VFs without FILTMGMT privilege */
-	if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
-	    check_privilege(adapter, BE_PRIV_FILTMGMT)) {
+	/* Normally this condition usually true as the ->dev_mac is zeroed.
+	 * But on BE3 VFs the initial MAC is pre-programmed by PF and
+	 * subsequent be_dev_mac_add() can fail (after fresh boot)
+	 */
+	if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
+		int old_pmac_id = -1;
+
+		/* Remember old programmed MAC if any - can happen on BE3 VF */
+		if (!is_zero_ether_addr(adapter->dev_mac))
+			old_pmac_id = adapter->pmac_id[0];
+
 		status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
 		if (status)
 			return status;
+
+		/* Delete the old programmed MAC as we successfully programmed
+		 * a new MAC
+		 */
+		if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
+			be_dev_mac_del(adapter, old_pmac_id);
+
 		ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
 	}
 
@@ -4552,6 +4571,10 @@ static int be_mac_setup(struct be_adapter *adapter)
 
 		memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
 		memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
+
+		/* Initial MAC for BE3 VFs is already programmed by PF */
+		if (BEx_chip(adapter) && be_virtfn(adapter))
+			memcpy(adapter->dev_mac, mac, ETH_ALEN);
 	}
 
 	return 0;

From 2da64d20a0b20046d688e44f4033efd09157e29d Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Wed, 1 Feb 2017 14:26:16 +1100
Subject: [PATCH 169/322] vfio/spapr: Fix missing mutex unlock when creating a
 window

Commit d9c728949ddc ("vfio/spapr: Postpone default window creation")
added an additional exit to the VFIO_IOMMU_SPAPR_TCE_CREATE case and
made it possible to return from tce_iommu_ioctl() without unlocking
container->lock; this fixes the issue.

Fixes: d9c728949ddc ("vfio/spapr: Postpone default window creation")
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio_iommu_spapr_tce.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 128d10282d16..7690e5bf3cf1 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -1123,12 +1123,11 @@ static long tce_iommu_ioctl(void *iommu_data,
 		mutex_lock(&container->lock);
 
 		ret = tce_iommu_create_default_window(container);
-		if (ret)
-			return ret;
-
-		ret = tce_iommu_create_window(container, create.page_shift,
-				create.window_size, create.levels,
-				&create.start_addr);
+		if (!ret)
+			ret = tce_iommu_create_window(container,
+					create.page_shift,
+					create.window_size, create.levels,
+					&create.start_addr);
 
 		mutex_unlock(&container->lock);
 

From fd62d9f5c575f0792f150109f1fd24a0d4b3f854 Mon Sep 17 00:00:00 2001
From: Yotam Gigi <yotamg@mellanox.com>
Date: Tue, 31 Jan 2017 15:14:29 +0200
Subject: [PATCH 170/322] net/sched: matchall: Fix configuration race

In the current version, the matchall internal state is split into two
structs: cls_matchall_head and cls_matchall_filter. This makes little
sense, as matchall instance supports only one filter, and there is no
situation where one exists and the other does not. In addition, that led
to some races when filter was deleted while packet was processed.

Unify that two structs into one, thus simplifying the process of matchall
creation and deletion. As a result, the new, delete and get callbacks have
a dummy implementation where all the work is done in destroy and change
callbacks, as was done in cls_cgroup.

Fixes: bf3994d2ed31 ("net/sched: introduce Match-all classifier")
Reported-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_matchall.c | 127 ++++++++++++++-------------------------
 1 file changed, 45 insertions(+), 82 deletions(-)

diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index f935429bd5ef..b12bc2abea93 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -16,16 +16,11 @@
 #include <net/sch_generic.h>
 #include <net/pkt_cls.h>
 
-struct cls_mall_filter {
+struct cls_mall_head {
 	struct tcf_exts exts;
 	struct tcf_result res;
 	u32 handle;
-	struct rcu_head	rcu;
 	u32 flags;
-};
-
-struct cls_mall_head {
-	struct cls_mall_filter *filter;
 	struct rcu_head	rcu;
 };
 
@@ -33,38 +28,29 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 			 struct tcf_result *res)
 {
 	struct cls_mall_head *head = rcu_dereference_bh(tp->root);
-	struct cls_mall_filter *f = head->filter;
 
-	if (tc_skip_sw(f->flags))
+	if (tc_skip_sw(head->flags))
 		return -1;
 
-	return tcf_exts_exec(skb, &f->exts, res);
+	return tcf_exts_exec(skb, &head->exts, res);
 }
 
 static int mall_init(struct tcf_proto *tp)
 {
-	struct cls_mall_head *head;
-
-	head = kzalloc(sizeof(*head), GFP_KERNEL);
-	if (!head)
-		return -ENOBUFS;
-
-	rcu_assign_pointer(tp->root, head);
-
 	return 0;
 }
 
-static void mall_destroy_filter(struct rcu_head *head)
+static void mall_destroy_rcu(struct rcu_head *rcu)
 {
-	struct cls_mall_filter *f = container_of(head, struct cls_mall_filter, rcu);
+	struct cls_mall_head *head = container_of(rcu, struct cls_mall_head,
+						  rcu);
 
-	tcf_exts_destroy(&f->exts);
-
-	kfree(f);
+	tcf_exts_destroy(&head->exts);
+	kfree(head);
 }
 
 static int mall_replace_hw_filter(struct tcf_proto *tp,
-				  struct cls_mall_filter *f,
+				  struct cls_mall_head *head,
 				  unsigned long cookie)
 {
 	struct net_device *dev = tp->q->dev_queue->dev;
@@ -74,7 +60,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
 	offload.type = TC_SETUP_MATCHALL;
 	offload.cls_mall = &mall_offload;
 	offload.cls_mall->command = TC_CLSMATCHALL_REPLACE;
-	offload.cls_mall->exts = &f->exts;
+	offload.cls_mall->exts = &head->exts;
 	offload.cls_mall->cookie = cookie;
 
 	return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
@@ -82,7 +68,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
 }
 
 static void mall_destroy_hw_filter(struct tcf_proto *tp,
-				   struct cls_mall_filter *f,
+				   struct cls_mall_head *head,
 				   unsigned long cookie)
 {
 	struct net_device *dev = tp->q->dev_queue->dev;
@@ -103,29 +89,20 @@ static bool mall_destroy(struct tcf_proto *tp, bool force)
 {
 	struct cls_mall_head *head = rtnl_dereference(tp->root);
 	struct net_device *dev = tp->q->dev_queue->dev;
-	struct cls_mall_filter *f = head->filter;
 
-	if (!force && f)
-		return false;
+	if (!head)
+		return true;
 
-	if (f) {
-		if (tc_should_offload(dev, tp, f->flags))
-			mall_destroy_hw_filter(tp, f, (unsigned long) f);
+	if (tc_should_offload(dev, tp, head->flags))
+		mall_destroy_hw_filter(tp, head, (unsigned long) head);
 
-		call_rcu(&f->rcu, mall_destroy_filter);
-	}
-	kfree_rcu(head, rcu);
+	call_rcu(&head->rcu, mall_destroy_rcu);
 	return true;
 }
 
 static unsigned long mall_get(struct tcf_proto *tp, u32 handle)
 {
-	struct cls_mall_head *head = rtnl_dereference(tp->root);
-	struct cls_mall_filter *f = head->filter;
-
-	if (f && f->handle == handle)
-		return (unsigned long) f;
-	return 0;
+	return 0UL;
 }
 
 static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
@@ -134,7 +111,7 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
 };
 
 static int mall_set_parms(struct net *net, struct tcf_proto *tp,
-			  struct cls_mall_filter *f,
+			  struct cls_mall_head *head,
 			  unsigned long base, struct nlattr **tb,
 			  struct nlattr *est, bool ovr)
 {
@@ -147,11 +124,11 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
 		return err;
 
 	if (tb[TCA_MATCHALL_CLASSID]) {
-		f->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
-		tcf_bind_filter(tp, &f->res, base);
+		head->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
+		tcf_bind_filter(tp, &head->res, base);
 	}
 
-	tcf_exts_change(tp, &f->exts, &e);
+	tcf_exts_change(tp, &head->exts, &e);
 
 	return 0;
 }
@@ -162,21 +139,17 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
 		       unsigned long *arg, bool ovr)
 {
 	struct cls_mall_head *head = rtnl_dereference(tp->root);
-	struct cls_mall_filter *fold = (struct cls_mall_filter *) *arg;
 	struct net_device *dev = tp->q->dev_queue->dev;
-	struct cls_mall_filter *f;
 	struct nlattr *tb[TCA_MATCHALL_MAX + 1];
+	struct cls_mall_head *new;
 	u32 flags = 0;
 	int err;
 
 	if (!tca[TCA_OPTIONS])
 		return -EINVAL;
 
-	if (head->filter)
-		return -EBUSY;
-
-	if (fold)
-		return -EINVAL;
+	if (head)
+		return -EEXIST;
 
 	err = nla_parse_nested(tb, TCA_MATCHALL_MAX,
 			       tca[TCA_OPTIONS], mall_policy);
@@ -189,23 +162,23 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
 			return -EINVAL;
 	}
 
-	f = kzalloc(sizeof(*f), GFP_KERNEL);
-	if (!f)
+	new = kzalloc(sizeof(*new), GFP_KERNEL);
+	if (!new)
 		return -ENOBUFS;
 
-	tcf_exts_init(&f->exts, TCA_MATCHALL_ACT, 0);
+	tcf_exts_init(&new->exts, TCA_MATCHALL_ACT, 0);
 
 	if (!handle)
 		handle = 1;
-	f->handle = handle;
-	f->flags = flags;
+	new->handle = handle;
+	new->flags = flags;
 
-	err = mall_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr);
+	err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr);
 	if (err)
 		goto errout;
 
 	if (tc_should_offload(dev, tp, flags)) {
-		err = mall_replace_hw_filter(tp, f, (unsigned long) f);
+		err = mall_replace_hw_filter(tp, new, (unsigned long) new);
 		if (err) {
 			if (tc_skip_sw(flags))
 				goto errout;
@@ -214,39 +187,29 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
 		}
 	}
 
-	*arg = (unsigned long) f;
-	rcu_assign_pointer(head->filter, f);
-
+	*arg = (unsigned long) head;
+	rcu_assign_pointer(tp->root, new);
+	if (head)
+		call_rcu(&head->rcu, mall_destroy_rcu);
 	return 0;
 
 errout:
-	kfree(f);
+	kfree(new);
 	return err;
 }
 
 static int mall_delete(struct tcf_proto *tp, unsigned long arg)
 {
-	struct cls_mall_head *head = rtnl_dereference(tp->root);
-	struct cls_mall_filter *f = (struct cls_mall_filter *) arg;
-	struct net_device *dev = tp->q->dev_queue->dev;
-
-	if (tc_should_offload(dev, tp, f->flags))
-		mall_destroy_hw_filter(tp, f, (unsigned long) f);
-
-	RCU_INIT_POINTER(head->filter, NULL);
-	tcf_unbind_filter(tp, &f->res);
-	call_rcu(&f->rcu, mall_destroy_filter);
-	return 0;
+	return -EOPNOTSUPP;
 }
 
 static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 {
 	struct cls_mall_head *head = rtnl_dereference(tp->root);
-	struct cls_mall_filter *f = head->filter;
 
 	if (arg->count < arg->skip)
 		goto skip;
-	if (arg->fn(tp, (unsigned long) f, arg) < 0)
+	if (arg->fn(tp, (unsigned long) head, arg) < 0)
 		arg->stop = 1;
 skip:
 	arg->count++;
@@ -255,28 +218,28 @@ static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 static int mall_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 		     struct sk_buff *skb, struct tcmsg *t)
 {
-	struct cls_mall_filter *f = (struct cls_mall_filter *) fh;
+	struct cls_mall_head *head = (struct cls_mall_head *) fh;
 	struct nlattr *nest;
 
-	if (!f)
+	if (!head)
 		return skb->len;
 
-	t->tcm_handle = f->handle;
+	t->tcm_handle = head->handle;
 
 	nest = nla_nest_start(skb, TCA_OPTIONS);
 	if (!nest)
 		goto nla_put_failure;
 
-	if (f->res.classid &&
-	    nla_put_u32(skb, TCA_MATCHALL_CLASSID, f->res.classid))
+	if (head->res.classid &&
+	    nla_put_u32(skb, TCA_MATCHALL_CLASSID, head->res.classid))
 		goto nla_put_failure;
 
-	if (tcf_exts_dump(skb, &f->exts))
+	if (tcf_exts_dump(skb, &head->exts))
 		goto nla_put_failure;
 
 	nla_nest_end(skb, nest);
 
-	if (tcf_exts_dump_stats(skb, &f->exts) < 0)
+	if (tcf_exts_dump_stats(skb, &head->exts) < 0)
 		goto nla_put_failure;
 
 	return skb->len;

From 1a2a14444d32b89b28116daea86f63ced1716668 Mon Sep 17 00:00:00 2001
From: Dimitris Michailidis <dmichail@google.com>
Date: Tue, 31 Jan 2017 16:03:13 -0800
Subject: [PATCH 171/322] net: fix ndo_features_check/ndo_fix_features comment
 ordering

Commit cdba756f5803a2 ("net: move ndo_features_check() close to
ndo_start_xmit()") inadvertently moved the doc comment for
.ndo_fix_features instead of .ndo_features_check. Fix the comment
ordering.

Fixes: cdba756f5803a2 ("net: move ndo_features_check() close to ndo_start_xmit()")
Signed-off-by: Dimitris Michailidis <dmichail@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9bde9558b596..70ad0291d517 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -866,11 +866,15 @@ struct netdev_xdp {
  *	of useless work if you return NETDEV_TX_BUSY.
  *	Required; cannot be NULL.
  *
- * netdev_features_t (*ndo_fix_features)(struct net_device *dev,
- *		netdev_features_t features);
- *	Adjusts the requested feature flags according to device-specific
- *	constraints, and returns the resulting flags. Must not modify
- *	the device state.
+ * netdev_features_t (*ndo_features_check)(struct sk_buff *skb,
+ *					   struct net_device *dev
+ *					   netdev_features_t features);
+ *	Called by core transmit path to determine if device is capable of
+ *	performing offload operations on a given packet. This is to give
+ *	the device an opportunity to implement any restrictions that cannot
+ *	be otherwise expressed by feature flags. The check is called with
+ *	the set of features that the stack has calculated and it returns
+ *	those the driver believes to be appropriate.
  *
  * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
  *                         void *accel_priv, select_queue_fallback_t fallback);
@@ -1028,6 +1032,12 @@ struct netdev_xdp {
  *	Called to release previously enslaved netdev.
  *
  *      Feature/offload setting functions.
+ * netdev_features_t (*ndo_fix_features)(struct net_device *dev,
+ *		netdev_features_t features);
+ *	Adjusts the requested feature flags according to device-specific
+ *	constraints, and returns the resulting flags. Must not modify
+ *	the device state.
+ *
  * int (*ndo_set_features)(struct net_device *dev, netdev_features_t features);
  *	Called to update device configuration to new features. Passed
  *	feature set might be less than what was returned by ndo_fix_features()).
@@ -1100,15 +1110,6 @@ struct netdev_xdp {
  *	Callback to use for xmit over the accelerated station. This
  *	is used in place of ndo_start_xmit on accelerated net
  *	devices.
- * netdev_features_t (*ndo_features_check)(struct sk_buff *skb,
- *					   struct net_device *dev
- *					   netdev_features_t features);
- *	Called by core transmit path to determine if device is capable of
- *	performing offload operations on a given packet. This is to give
- *	the device an opportunity to implement any restrictions that cannot
- *	be otherwise expressed by feature flags. The check is called with
- *	the set of features that the stack has calculated and it returns
- *	those the driver believes to be appropriate.
  * int (*ndo_set_tx_maxrate)(struct net_device *dev,
  *			     int queue_index, u32 maxrate);
  *	Called when a user wants to set a max-rate limitation of specific

From 63117f09c768be05a0bf465911297dc76394f686 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 1 Feb 2017 11:46:32 +0300
Subject: [PATCH 172/322] ipv6: pointer math error in
 ip6_tnl_parse_tlv_enc_lim()

Casting is a high precedence operation but "off" and "i" are in terms of
bytes so we need to have some parenthesis here.

Fixes: fbfa743a9d2a ("ipv6: fix ip6_tnl_parse_tlv_enc_lim()")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index ff8ee06491c3..75fac933c209 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -441,7 +441,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
 				if (i + sizeof(*tel) > optlen)
 					break;
 
-				tel = (struct ipv6_tlv_tnl_enc_lim *) skb->data + off + i;
+				tel = (struct ipv6_tlv_tnl_enc_lim *)(skb->data + off + i);
 				/* return index of option if found and valid */
 				if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
 				    tel->length == 1)

From 06425c308b92eaf60767bc71d359f4cbc7a561f8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 1 Feb 2017 08:33:53 -0800
Subject: [PATCH 173/322] tcp: fix 0 divide in __tcp_select_window()

syszkaller fuzzer was able to trigger a divide by zero, when
TCP window scaling is not enabled.

SO_RCVBUF can be used not only to increase sk_rcvbuf, also
to decrease it below current receive buffers utilization.

If mss is negative or 0, just return a zero TCP window.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov  <dvyukov@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1d5331a1b1dc..8ce50dc3ab8c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2518,9 +2518,11 @@ u32 __tcp_select_window(struct sock *sk)
 	int full_space = min_t(int, tp->window_clamp, allowed_space);
 	int window;
 
-	if (mss > full_space)
+	if (unlikely(mss > full_space)) {
 		mss = full_space;
-
+		if (mss <= 0)
+			return 0;
+	}
 	if (free_space < (full_space >> 1)) {
 		icsk->icsk_ack.quick = 0;
 

From 601bbbe0517303c9f8eb3d75e11d64efed1293c9 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 31 Jan 2017 14:56:43 -0800
Subject: [PATCH 174/322] Input: uinput - fix crash when mixing old and new
 init style

If user tries to initialize uinput device mixing old and new style
initialization (i.e. using old UI_SET_ABSBIT instead of UI_ABS_SETUP,
we forget to allocate input->absinfo and will crash when trying to send
absolute events:

        ioctl(ui, UI_DEV_SETUP, &us);
        ioctl(ui, UI_SET_PHYS, "Test");

        ioctl(ui, UI_SET_EVBIT, EV_ABS);
        ioctl(ui, UI_SET_ABSBIT, ABS_X);
        ioctl(ui, UI_SET_ABSBIT, ABS_Y);
        ioctl(ui, UI_DEV_CREATE, 0);

Reported-by: Rodrigo Rivas Costa <rodrigorivascosta@gmail.com>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=191811
Fixes: fbae10db0940 ("Input: uinput - rework ABS validation")
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/misc/uinput.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index 92595b98e7ed..022be0e22eba 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -263,13 +263,21 @@ static int uinput_create_device(struct uinput_device *udev)
 		return -EINVAL;
 	}
 
-	if (test_bit(ABS_MT_SLOT, dev->absbit)) {
-		nslot = input_abs_get_max(dev, ABS_MT_SLOT) + 1;
-		error = input_mt_init_slots(dev, nslot, 0);
-		if (error)
+	if (test_bit(EV_ABS, dev->evbit)) {
+		input_alloc_absinfo(dev);
+		if (!dev->absinfo) {
+			error = -EINVAL;
 			goto fail1;
-	} else if (test_bit(ABS_MT_POSITION_X, dev->absbit)) {
-		input_set_events_per_packet(dev, 60);
+		}
+
+		if (test_bit(ABS_MT_SLOT, dev->absbit)) {
+			nslot = input_abs_get_max(dev, ABS_MT_SLOT) + 1;
+			error = input_mt_init_slots(dev, nslot, 0);
+			if (error)
+				goto fail1;
+		} else if (test_bit(ABS_MT_POSITION_X, dev->absbit)) {
+			input_set_events_per_packet(dev, 60);
+		}
 	}
 
 	if (test_bit(EV_FF, dev->evbit) && !udev->ff_effects_max) {

From c8f325a59cfc718d13a50fbc746ed9b415c25e92 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 1 Feb 2017 17:45:02 +0000
Subject: [PATCH 175/322] efi/fdt: Avoid FDT manipulation after
 ExitBootServices()

Some AArch64 UEFI implementations disable the MMU in ExitBootServices(),
after which unaligned accesses to RAM are no longer supported.

Commit:

  abfb7b686a3e ("efi/libstub/arm*: Pass latest memory map to the kernel")

fixed an issue in the memory map handling of the stub FDT code, but
inadvertently created an issue with such firmware, by moving some
of the FDT manipulation to after the invocation of ExitBootServices().

Given that the stub's libfdt implementation uses the ordinary, accelerated
string functions, which rely on hardware handling of unaligned accesses,
manipulating the FDT with the MMU off may result in alignment faults.

So fix the situation by moving the update_fdt_memmap() call into the
callback function invoked by efi_exit_boot_services() right before it
calls the ExitBootServices() UEFI service (which is arguably a better
place for it anyway)

Note that disabling the MMU in ExitBootServices() is not compliant with
the UEFI spec, and carries great risk due to the fact that switching from
cached to uncached memory accesses halfway through compiler generated code
(i.e., involving a stack) can never be done in a way that is architecturally
safe.

Fixes: abfb7b686a3e ("efi/libstub/arm*: Pass latest memory map to the kernel")
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Riku Voipio <riku.voipio@linaro.org>
Cc: <stable@vger.kernel.org>
Cc: mark.rutland@arm.com
Cc: linux-efi@vger.kernel.org
Cc: matt@codeblueprint.co.uk
Cc: leif.lindholm@linaro.org
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1485971102-23330-2-git-send-email-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/libstub/fdt.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
index 921dfa047202..260c4b4b492e 100644
--- a/drivers/firmware/efi/libstub/fdt.c
+++ b/drivers/firmware/efi/libstub/fdt.c
@@ -187,6 +187,7 @@ static efi_status_t update_fdt_memmap(void *fdt, struct efi_boot_memmap *map)
 struct exit_boot_struct {
 	efi_memory_desc_t *runtime_map;
 	int *runtime_entry_count;
+	void *new_fdt_addr;
 };
 
 static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
@@ -202,7 +203,7 @@ static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
 	efi_get_virtmap(*map->map, *map->map_size, *map->desc_size,
 			p->runtime_map, p->runtime_entry_count);
 
-	return EFI_SUCCESS;
+	return update_fdt_memmap(p->new_fdt_addr, map);
 }
 
 /*
@@ -300,22 +301,13 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
 
 	priv.runtime_map = runtime_map;
 	priv.runtime_entry_count = &runtime_entry_count;
+	priv.new_fdt_addr = (void *)*new_fdt_addr;
 	status = efi_exit_boot_services(sys_table, handle, &map, &priv,
 					exit_boot_func);
 
 	if (status == EFI_SUCCESS) {
 		efi_set_virtual_address_map_t *svam;
 
-		status = update_fdt_memmap((void *)*new_fdt_addr, &map);
-		if (status != EFI_SUCCESS) {
-			/*
-			 * The kernel won't get far without the memory map, but
-			 * may still be able to print something meaningful so
-			 * return success here.
-			 */
-			return EFI_SUCCESS;
-		}
-
 		/* Install the new virtual address map */
 		svam = sys_table->runtime->set_virtual_address_map;
 		status = svam(runtime_entry_count * desc_size, desc_size,

From 3484ecbe0e9deb94afb0b9b6172d77e98eb72b94 Mon Sep 17 00:00:00 2001
From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Date: Thu, 22 Dec 2016 15:00:12 +0000
Subject: [PATCH 176/322] crypto: qat - fix bar discovery for c62x

Some accelerators of the c62x series have only two bars.
This patch skips BAR0 if the accelerator does not have it.

Cc: <stable@vger.kernel.org>
Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_c62x/adf_drv.c             | 2 +-
 drivers/crypto/qat/qat_common/adf_accel_devices.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/qat/qat_c62x/adf_drv.c b/drivers/crypto/qat/qat_c62x/adf_drv.c
index bc5cbc193aae..5b2d78a5b5aa 100644
--- a/drivers/crypto/qat/qat_c62x/adf_drv.c
+++ b/drivers/crypto/qat/qat_c62x/adf_drv.c
@@ -233,7 +233,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 			      &hw_data->accel_capabilities_mask);
 
 	/* Find and map all the device's BARS */
-	i = 0;
+	i = (hw_data->fuses & ADF_DEVICE_FUSECTL_MASK) ? 1 : 0;
 	bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
 	for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask,
 			 ADF_PCI_MAX_BARS * 2) {
diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h
index e8822536530b..33f0a6251e38 100644
--- a/drivers/crypto/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h
@@ -69,6 +69,7 @@
 #define ADF_ERRSOU5 (0x3A000 + 0xD8)
 #define ADF_DEVICE_FUSECTL_OFFSET 0x40
 #define ADF_DEVICE_LEGFUSE_OFFSET 0x4C
+#define ADF_DEVICE_FUSECTL_MASK 0x80000000
 #define ADF_PCI_MAX_BARS 3
 #define ADF_DEVICE_NAME_LENGTH 32
 #define ADF_ETR_MAX_RINGS_PER_BANK 16

From 685ce0626840e2673fe64ea8807684f7324fec5f Mon Sep 17 00:00:00 2001
From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Date: Thu, 22 Dec 2016 15:00:24 +0000
Subject: [PATCH 177/322] crypto: qat - zero esram only for DH85x devices

Zero embedded ram in DH85x devices. This is not
needed for newer generations as it is done by HW.

Cc: <stable@vger.kernel.org>
Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_common/qat_hal.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/qat/qat_common/qat_hal.c b/drivers/crypto/qat/qat_common/qat_hal.c
index 1e480f140663..8c4fd255a601 100644
--- a/drivers/crypto/qat/qat_common/qat_hal.c
+++ b/drivers/crypto/qat/qat_common/qat_hal.c
@@ -456,7 +456,7 @@ static int qat_hal_init_esram(struct icp_qat_fw_loader_handle *handle)
 	unsigned int csr_val;
 	int times = 30;
 
-	if (handle->pci_dev->device == ADF_C3XXX_PCI_DEVICE_ID)
+	if (handle->pci_dev->device != ADF_DH895XCC_PCI_DEVICE_ID)
 		return 0;
 
 	csr_val = ADF_CSR_RD(csr_addr, 0);
@@ -716,7 +716,7 @@ int qat_hal_init(struct adf_accel_dev *accel_dev)
 		(void __iomem *)((uintptr_t)handle->hal_cap_ae_xfer_csr_addr_v +
 				 LOCAL_TO_XFER_REG_OFFSET);
 	handle->pci_dev = pci_info->pci_dev;
-	if (handle->pci_dev->device != ADF_C3XXX_PCI_DEVICE_ID) {
+	if (handle->pci_dev->device == ADF_DH895XCC_PCI_DEVICE_ID) {
 		sram_bar =
 			&pci_info->pci_bars[hw_data->get_sram_bar_id(hw_data)];
 		handle->hal_sram_addr_v = sram_bar->virt_addr;

From 8381cdd0e32dd748bd34ca3ace476949948bd793 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 18 Jan 2017 14:14:56 +0900
Subject: [PATCH 178/322] perf diff: Fix segfault on 'perf diff -o N' option

The -o/--order option is to select column number to sort a diff result.

It does the job by adding a hpp field at the beginning of the sort list.
But it should not be added to the output field list as it has no
callbacks required by a output field.

During the setup_sorting(), the perf_hpp__setup_output_field() appends
the given sort keys to the output field if it's not there already.

Originally it was checked by fmt->list being non-empty.  But commit
3f931f2c4274 ("perf hists: Make hpp setup function generic") changed it
to check the ->equal callback.

Anyways, we don't need to add the pseudo hpp field to the output field
list since it won't be used for output.  So just skip fields if they
have no ->color or ->entry callbacks.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Fixes: 3f931f2c4274 ("perf hists: Make hpp setup function generic")
Link: http://lkml.kernel.org/r/20170118051457.30946-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/hist.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 37388397b5bc..4ec79b2f9416 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -560,6 +560,10 @@ void perf_hpp__setup_output_field(struct perf_hpp_list *list)
 	perf_hpp_list__for_each_sort_list(list, fmt) {
 		struct perf_hpp_fmt *pos;
 
+		/* skip sort-only fields ("sort_compute" in perf diff) */
+		if (!fmt->entry && !fmt->color)
+			continue;
+
 		perf_hpp_list__for_each_format(list, pos) {
 			if (fmt_equal(fmt, pos))
 				goto next;

From a1c9f97f0b64e6337d9cfcc08c134450934fdd90 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 18 Jan 2017 14:14:57 +0900
Subject: [PATCH 179/322] perf diff: Fix -o/--order option behavior (again)

Commit 21e6d8428664 ("perf diff: Use perf_hpp__register_sort_field
interface") changed list_add() to perf_hpp__register_sort_field().

This resulted in a behavior change since the field was added to the tail
instead of the head.  So the -o option is mostly ignored due to its
order in the list.

This patch fixes it by adding perf_hpp__prepend_sort_field().

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Fixes: 21e6d8428664 ("perf diff: Use perf_hpp__register_sort_field interface")
Link: http://lkml.kernel.org/r/20170118051457.30946-2-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-diff.c | 2 +-
 tools/perf/ui/hist.c      | 6 ++++++
 tools/perf/util/hist.h    | 7 +++++++
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 9ff0db4e2d0c..933aeec46f4a 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -1199,7 +1199,7 @@ static int ui_init(void)
 		BUG_ON(1);
 	}
 
-	perf_hpp__register_sort_field(fmt);
+	perf_hpp__prepend_sort_field(fmt);
 	return 0;
 }
 
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 4ec79b2f9416..18cfcdc90356 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -521,6 +521,12 @@ void perf_hpp_list__register_sort_field(struct perf_hpp_list *list,
 	list_add_tail(&format->sort_list, &list->sorts);
 }
 
+void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list,
+				       struct perf_hpp_fmt *format)
+{
+	list_add(&format->sort_list, &list->sorts);
+}
+
 void perf_hpp__column_unregister(struct perf_hpp_fmt *format)
 {
 	list_del(&format->list);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index d4b6514eeef5..28c216e3d5b7 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -283,6 +283,8 @@ void perf_hpp_list__column_register(struct perf_hpp_list *list,
 				    struct perf_hpp_fmt *format);
 void perf_hpp_list__register_sort_field(struct perf_hpp_list *list,
 					struct perf_hpp_fmt *format);
+void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list,
+				       struct perf_hpp_fmt *format);
 
 static inline void perf_hpp__column_register(struct perf_hpp_fmt *format)
 {
@@ -294,6 +296,11 @@ static inline void perf_hpp__register_sort_field(struct perf_hpp_fmt *format)
 	perf_hpp_list__register_sort_field(&perf_hpp_list, format);
 }
 
+static inline void perf_hpp__prepend_sort_field(struct perf_hpp_fmt *format)
+{
+	perf_hpp_list__prepend_sort_field(&perf_hpp_list, format);
+}
+
 #define perf_hpp_list__for_each_format(_list, format) \
 	list_for_each_entry(format, &(_list)->fields, list)
 

From aa33b9b9a2ebb00d33c83a5312d4fbf2d5aeba36 Mon Sep 17 00:00:00 2001
From: Krister Johansen <kjlx@templeofstupid.com>
Date: Thu, 5 Jan 2017 22:23:31 -0800
Subject: [PATCH 180/322] perf callchain: Reference count maps

If dso__load_kcore frees all of the existing maps, but one has already
been attached to a callchain cursor node, then we can get a SIGSEGV in
any function that happens to try to use this invalid cursor.  Use the
existing map refcount mechanism to forestall cleanup of a map until the
cursor iterates past the node.

Signed-off-by: Krister Johansen <kjlx@templeofstupid.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: stable@kernel.org
Fixes: 84c2cafa2889 ("perf tools: Reference count struct map")
Link: http://lkml.kernel.org/r/20170106062331.GB2707@templeofstupid.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/callchain.c | 11 +++++++++--
 tools/perf/util/callchain.h |  6 ++++++
 tools/perf/util/hist.c      |  7 +++++++
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 42922512c1c6..8b610dd9e2f6 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -437,7 +437,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
 		}
 		call->ip = cursor_node->ip;
 		call->ms.sym = cursor_node->sym;
-		call->ms.map = cursor_node->map;
+		call->ms.map = map__get(cursor_node->map);
 
 		if (cursor_node->branch) {
 			call->branch_count = 1;
@@ -477,6 +477,7 @@ add_child(struct callchain_node *parent,
 
 		list_for_each_entry_safe(call, tmp, &new->val, list) {
 			list_del(&call->list);
+			map__zput(call->ms.map);
 			free(call);
 		}
 		free(new);
@@ -761,6 +762,7 @@ merge_chain_branch(struct callchain_cursor *cursor,
 					list->ms.map, list->ms.sym,
 					false, NULL, 0, 0);
 		list_del(&list->list);
+		map__zput(list->ms.map);
 		free(list);
 	}
 
@@ -811,7 +813,8 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
 	}
 
 	node->ip = ip;
-	node->map = map;
+	map__zput(node->map);
+	node->map = map__get(map);
 	node->sym = sym;
 	node->branch = branch;
 	node->nr_loop_iter = nr_loop_iter;
@@ -1142,11 +1145,13 @@ static void free_callchain_node(struct callchain_node *node)
 
 	list_for_each_entry_safe(list, tmp, &node->parent_val, list) {
 		list_del(&list->list);
+		map__zput(list->ms.map);
 		free(list);
 	}
 
 	list_for_each_entry_safe(list, tmp, &node->val, list) {
 		list_del(&list->list);
+		map__zput(list->ms.map);
 		free(list);
 	}
 
@@ -1210,6 +1215,7 @@ int callchain_node__make_parent_list(struct callchain_node *node)
 				goto out;
 			*new = *chain;
 			new->has_children = false;
+			map__get(new->ms.map);
 			list_add_tail(&new->list, &head);
 		}
 		parent = parent->parent;
@@ -1230,6 +1236,7 @@ int callchain_node__make_parent_list(struct callchain_node *node)
 out:
 	list_for_each_entry_safe(chain, new, &head, list) {
 		list_del(&chain->list);
+		map__zput(chain->ms.map);
 		free(chain);
 	}
 	return -ENOMEM;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 35c8e379530f..4f4b60f1558a 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -5,6 +5,7 @@
 #include <linux/list.h>
 #include <linux/rbtree.h>
 #include "event.h"
+#include "map.h"
 #include "symbol.h"
 
 #define HELP_PAD "\t\t\t\t"
@@ -184,8 +185,13 @@ int callchain_merge(struct callchain_cursor *cursor,
  */
 static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
 {
+	struct callchain_cursor_node *node;
+
 	cursor->nr = 0;
 	cursor->last = &cursor->first;
+
+	for (node = cursor->first; node != NULL; node = node->next)
+		map__zput(node->map);
 }
 
 int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 6770a9645609..7d1b7d33e644 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1,6 +1,7 @@
 #include "util.h"
 #include "build-id.h"
 #include "hist.h"
+#include "map.h"
 #include "session.h"
 #include "sort.h"
 #include "evlist.h"
@@ -1019,6 +1020,10 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
 			 int max_stack_depth, void *arg)
 {
 	int err, err2;
+	struct map *alm = NULL;
+
+	if (al && al->map)
+		alm = map__get(al->map);
 
 	err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent,
 					iter->evsel, al, max_stack_depth);
@@ -1058,6 +1063,8 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
 	if (!err)
 		err = err2;
 
+	map__put(alm);
+
 	return err;
 }
 

From 26a346f23c5291d1d9521e72763103daf2c6f0d1 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 1 Feb 2017 17:57:56 +0100
Subject: [PATCH 181/322] tracing/kprobes: Fix __init annotation

clang complains about "__init" being attached to a struct name:

kernel/trace/trace_kprobe.c:1375:15: error: '__section__' attribute only applies to functions and global variables

The intention must have been to mark the function as __init instead of
the type, so move the attribute there.

Link: http://lkml.kernel.org/r/20170201165826.2625888-1-arnd@arndb.de

Fixes: f18f97ac43d7 ("tracing/kprobes: Add a helper method to return number of probe hits")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_kprobe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index a133ecd741e4..7ad9e53ad174 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1372,7 +1372,7 @@ kprobe_trace_selftest_target(int a1, int a2, int a3, int a4, int a5, int a6)
 	return a1 + a2 + a3 + a4 + a5 + a6;
 }
 
-static struct __init trace_event_file *
+static __init struct trace_event_file *
 find_trace_probe_file(struct trace_kprobe *tk, struct trace_array *tr)
 {
 	struct trace_event_file *file;

From 57bcd0a6364cd4eaa362d7ff1777e88ddf501602 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 27 Jan 2017 10:31:52 -0500
Subject: [PATCH 182/322] drm/amdgpu/si: fix crash on headless asics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Missing check for crtcs present.

Fixes:
https://bugzilla.kernel.org/show_bug.cgi?id=193341
https://bugs.freedesktop.org/show_bug.cgi?id=99387

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index e2b0b1646f99..0635829b18cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -254,6 +254,9 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
 	}
 	WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0);
 
+	if (adev->mode_info.num_crtc)
+		amdgpu_display_set_vga_render_state(adev, false);
+
 	gmc_v6_0_mc_stop(adev, &save);
 
 	if (gmc_v6_0_wait_for_idle((void *)adev)) {
@@ -283,7 +286,6 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
 		dev_warn(adev->dev, "Wait for MC idle timedout !\n");
 	}
 	gmc_v6_0_mc_resume(adev, &save);
-	amdgpu_display_set_vga_render_state(adev, false);
 }
 
 static int gmc_v6_0_mc_init(struct amdgpu_device *adev)

From 51964e9e12d0a054002a1a0d1dec4f661c7aaf28 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Mon, 30 Jan 2017 12:06:35 +0900
Subject: [PATCH 183/322] drm/radeon: Fix vram_size/visible values in
 DRM_RADEON_GEM_INFO ioctl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

vram_size is supposed to be the total amount of VRAM that can be used by
userspace, which corresponds to the TTM VRAM manager size (which is
normally the full amount of VRAM, but can be just the visible VRAM when
DMA can't be used for BO migration for some reason).

The above was incorrectly used for vram_visible before, resulting in
generally too large values being reported.

Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_drv.c | 3 ++-
 drivers/gpu/drm/radeon/radeon_gem.c | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index e0c143b865f3..30bd4a6a9d46 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -97,9 +97,10 @@
  *   2.46.0 - Add PFP_SYNC_ME support on evergreen
  *   2.47.0 - Add UVD_NO_OP register support
  *   2.48.0 - TA_CS_BC_BASE_ADDR allowed on SI
+ *   2.49.0 - DRM_RADEON_GEM_INFO ioctl returns correct vram_size/visible values
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	48
+#define KMS_DRIVER_MINOR	49
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 0bcffd8a7bd3..96683f5b2b1b 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -220,8 +220,8 @@ int radeon_gem_info_ioctl(struct drm_device *dev, void *data,
 
 	man = &rdev->mman.bdev.man[TTM_PL_VRAM];
 
-	args->vram_size = rdev->mc.real_vram_size;
-	args->vram_visible = (u64)man->size << PAGE_SHIFT;
+	args->vram_size = (u64)man->size << PAGE_SHIFT;
+	args->vram_visible = rdev->mc.visible_vram_size;
 	args->vram_visible -= rdev->vram_pin_size;
 	args->gart_size = rdev->mc.gtt_size;
 	args->gart_size -= rdev->gart_pin_size;

From dfef358bd1beb4e7b5c94eca944be9cd23dfc752 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 30 Jan 2017 13:15:41 +0100
Subject: [PATCH 184/322] PCI/MSI: Don't apply affinity if there aren't enough
 vectors left
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bart reported a problem wіth an out of bounds access in the low-level IRQ
affinity code, which we root caused to the qla2xxx driver assigning all its
MSI-X vectors to the pre and post vectors, and not having any left for the
actually spread IRQs.

Fix this issue by not asking for affinity assignment when there are no
vectors to assign left.

Fixes: 402723ad5c62 ("PCI/MSI: Provide pci_alloc_irq_vectors_affinity()")
Link: https://lkml.kernel.org/r/1485359225.3093.3.camel@sandisk.com
Reported-by: Bart Van Assche <bart.vanassche@sandisk.com>
Tested-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/msi.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 50c5003295ca..7f73bacf13ed 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1206,6 +1206,16 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
 	if (flags & PCI_IRQ_AFFINITY) {
 		if (!affd)
 			affd = &msi_default_affd;
+
+		if (affd->pre_vectors + affd->post_vectors > min_vecs)
+			return -EINVAL;
+
+		/*
+		 * If there aren't any vectors left after applying the pre/post
+		 * vectors don't bother with assigning affinity.
+		 */
+		if (affd->pre_vectors + affd->post_vectors == min_vecs)
+			affd = NULL;
 	} else {
 		if (WARN_ON(affd))
 			affd = NULL;

From 1a902f6b70c55171ca2419d946b85274e35c9757 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 2 Feb 2017 12:38:33 +0100
Subject: [PATCH 185/322] ARM: orion5x: fix Makefile for linkstation-lschl.dtb

The rename of orion5x-lschl.dts needs to be reflected in the Makefile:

make[3]: *** No rule to make target 'arch/arm/boot/dts/orion5x-lschl.dtb', needed by '__build'.

Fixes: 6cfd3cd8d836 ("ARM: dts: orion5x-lschl: More consistent naming on linkstation series")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
---
 arch/arm/boot/dts/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index cccdbcb557b6..20fe4a54ee5e 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -616,7 +616,7 @@ dtb-$(CONFIG_ARCH_ORION5X) += \
 	orion5x-lacie-ethernet-disk-mini-v2.dtb \
 	orion5x-linkstation-lsgl.dtb \
 	orion5x-linkstation-lswtgl.dtb \
-	orion5x-lschl.dtb \
+	orion5x-linkstation-lschl.dtb \
 	orion5x-lswsgl.dtb \
 	orion5x-maxtor-shared-storage-2.dtb \
 	orion5x-netgear-wnr854t.dtb \

From f32b20e89e82c9ff1825fc5c5d69753ff5558ccd Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Tue, 31 Jan 2017 22:35:32 -0800
Subject: [PATCH 186/322] mlx4: Fix memory leak after mlx4_en_update_priv()

In mlx4_en_update_priv(), dst->tx_ring[t] and dst->tx_cq[t]
are over-written by src->tx_ring[t] and src->tx_cq[t] without
first calling kfree.

One of the reproducible code paths is by doing 'ethtool -L'.

The fix is to do the kfree in mlx4_en_free_resources().

Here is the kmemleak report:
unreferenced object 0xffff880841211800 (size 2048):
  comm "ethtool", pid 3096, jiffies 4294716940 (age 528.353s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff81930718>] kmemleak_alloc+0x28/0x50
    [<ffffffff8120b213>] kmem_cache_alloc_trace+0x103/0x260
    [<ffffffff8170e0a8>] mlx4_en_try_alloc_resources+0x118/0x1a0
    [<ffffffff817065a9>] mlx4_en_set_ringparam+0x169/0x210
    [<ffffffff818040c5>] dev_ethtool+0xae5/0x2190
    [<ffffffff8181b898>] dev_ioctl+0x168/0x6f0
    [<ffffffff817d7a72>] sock_do_ioctl+0x42/0x50
    [<ffffffff817d819b>] sock_ioctl+0x21b/0x2d0
    [<ffffffff81247a73>] do_vfs_ioctl+0x93/0x6a0
    [<ffffffff812480f9>] SyS_ioctl+0x79/0x90
    [<ffffffff8193d7ea>] entry_SYSCALL_64_fastpath+0x18/0xad
    [<ffffffffffffffff>] 0xffffffffffffffff
unreferenced object 0xffff880841213000 (size 2048):
  comm "ethtool", pid 3096, jiffies 4294716940 (age 528.353s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff81930718>] kmemleak_alloc+0x28/0x50
    [<ffffffff8120b213>] kmem_cache_alloc_trace+0x103/0x260
    [<ffffffff8170e0cb>] mlx4_en_try_alloc_resources+0x13b/0x1a0
    [<ffffffff817065a9>] mlx4_en_set_ringparam+0x169/0x210
    [<ffffffff818040c5>] dev_ethtool+0xae5/0x2190
    [<ffffffff8181b898>] dev_ioctl+0x168/0x6f0
    [<ffffffff817d7a72>] sock_do_ioctl+0x42/0x50
    [<ffffffff817d819b>] sock_ioctl+0x21b/0x2d0
    [<ffffffff81247a73>] do_vfs_ioctl+0x93/0x6a0
    [<ffffffff812480f9>] SyS_ioctl+0x79/0x90
    [<ffffffff8193d7ea>] entry_SYSCALL_64_fastpath+0x18/0xad
    [<ffffffffffffffff>] 0xffffffffffffffff

(gdb) list *mlx4_en_try_alloc_resources+0x118
0xffffffff8170e0a8 is in mlx4_en_try_alloc_resources (drivers/net/ethernet/mellanox/mlx4/en_netdev.c:2145).
2140                    if (!dst->tx_ring_num[t])
2141                            continue;
2142
2143                    dst->tx_ring[t] = kzalloc(sizeof(struct mlx4_en_tx_ring *) *
2144                                              MAX_TX_RINGS, GFP_KERNEL);
2145                    if (!dst->tx_ring[t])
2146                            goto err_free_tx;
2147
2148                    dst->tx_cq[t] = kzalloc(sizeof(struct mlx4_en_cq *) *
2149                                            MAX_TX_RINGS, GFP_KERNEL);
(gdb) list *mlx4_en_try_alloc_resources+0x13b
0xffffffff8170e0cb is in mlx4_en_try_alloc_resources (drivers/net/ethernet/mellanox/mlx4/en_netdev.c:2150).
2145                    if (!dst->tx_ring[t])
2146                            goto err_free_tx;
2147
2148                    dst->tx_cq[t] = kzalloc(sizeof(struct mlx4_en_cq *) *
2149                                            MAX_TX_RINGS, GFP_KERNEL);
2150                    if (!dst->tx_cq[t]) {
2151                            kfree(dst->tx_ring[t]);
2152                            goto err_free_tx;
2153                    }
2154            }

Fixes: ec25bc04ed8e ("net/mlx4_en: Add resilience in low memory systems")
Cc: Eugenia Emantayev <eugenia@mellanox.com>
Cc: Saeed Mahameed <saeedm@mellanox.com>
Cc: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 761f8b12399c..3abcead208d2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2042,6 +2042,8 @@ static void mlx4_en_free_resources(struct mlx4_en_priv *priv)
 			if (priv->tx_cq[t] && priv->tx_cq[t][i])
 				mlx4_en_destroy_cq(priv, &priv->tx_cq[t][i]);
 		}
+		kfree(priv->tx_ring[t]);
+		kfree(priv->tx_cq[t]);
 	}
 
 	for (i = 0; i < priv->rx_ring_num; i++) {
@@ -2214,7 +2216,6 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
-	int t;
 
 	en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port);
 
@@ -2248,11 +2249,6 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
 	mlx4_en_free_resources(priv);
 	mutex_unlock(&mdev->state_lock);
 
-	for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
-		kfree(priv->tx_ring[t]);
-		kfree(priv->tx_cq[t]);
-	}
-
 	free_netdev(dev);
 }
 

From 770f82253dbd7e6892a88018f2f6cd395f48d214 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Tue, 31 Jan 2017 22:35:33 -0800
Subject: [PATCH 187/322] mlx4: xdp_prog becomes inactive after ethtool '-L' or
 '-G'

After calling mlx4_en_try_alloc_resources (e.g. by changing the
number of rx-queues with ethtool -L), the existing xdp_prog becomes
inactive.

The bug is that the xdp_prog ptr has not been carried over from
the old rx-queues to the new rx-queues

Fixes: 47a38e155037 ("net/mlx4_en: add support for fast rx drop bpf program")
Cc: Brenden Blanco <bblanco@plumgrid.com>
Cc: Saeed Mahameed <saeedm@mellanox.com>
Cc: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlx4/en_ethtool.c   |  4 +--
 .../net/ethernet/mellanox/mlx4/en_netdev.c    | 27 ++++++++++++++++---
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h  |  3 ++-
 3 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index d5a9372ed84d..9aa422691954 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1099,7 +1099,7 @@ static int mlx4_en_set_ringparam(struct net_device *dev,
 	memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
 	new_prof.tx_ring_size = tx_size;
 	new_prof.rx_ring_size = rx_size;
-	err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof);
+	err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true);
 	if (err)
 		goto out;
 
@@ -1774,7 +1774,7 @@ static int mlx4_en_set_channels(struct net_device *dev,
 	new_prof.tx_ring_num[TX_XDP] = xdp_count;
 	new_prof.rx_ring_num = channel->rx_count;
 
-	err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof);
+	err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true);
 	if (err)
 		goto out;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 3abcead208d2..3b4961a8e8e4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2186,9 +2186,11 @@ static void mlx4_en_update_priv(struct mlx4_en_priv *dst,
 
 int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv,
 				struct mlx4_en_priv *tmp,
-				struct mlx4_en_port_profile *prof)
+				struct mlx4_en_port_profile *prof,
+				bool carry_xdp_prog)
 {
-	int t;
+	struct bpf_prog *xdp_prog;
+	int i, t;
 
 	mlx4_en_copy_priv(tmp, priv, prof);
 
@@ -2202,6 +2204,23 @@ int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv,
 		}
 		return -ENOMEM;
 	}
+
+	/* All rx_rings has the same xdp_prog.  Pick the first one. */
+	xdp_prog = rcu_dereference_protected(
+		priv->rx_ring[0]->xdp_prog,
+		lockdep_is_held(&priv->mdev->state_lock));
+
+	if (xdp_prog && carry_xdp_prog) {
+		xdp_prog = bpf_prog_add(xdp_prog, tmp->rx_ring_num);
+		if (IS_ERR(xdp_prog)) {
+			mlx4_en_free_resources(tmp);
+			return PTR_ERR(xdp_prog);
+		}
+		for (i = 0; i < tmp->rx_ring_num; i++)
+			rcu_assign_pointer(tmp->rx_ring[i]->xdp_prog,
+					   xdp_prog);
+	}
+
 	return 0;
 }
 
@@ -2751,7 +2770,7 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
 		en_warn(priv, "Reducing the number of TX rings, to not exceed the max total rings number.\n");
 	}
 
-	err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof);
+	err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, false);
 	if (err) {
 		if (prog)
 			bpf_prog_sub(prog, priv->rx_ring_num - 1);
@@ -3495,7 +3514,7 @@ int mlx4_en_reset_config(struct net_device *dev,
 	memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
 	memcpy(&new_prof.hwtstamp_config, &ts_config, sizeof(ts_config));
 
-	err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof);
+	err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true);
 	if (err)
 		goto out;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index ba1c6cd0cc79..cec59bc264c9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -679,7 +679,8 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
 
 int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv,
 				struct mlx4_en_priv *tmp,
-				struct mlx4_en_port_profile *prof);
+				struct mlx4_en_port_profile *prof,
+				bool carry_xdp_prog);
 void mlx4_en_safe_replace_resources(struct mlx4_en_priv *priv,
 				    struct mlx4_en_priv *tmp);
 

From cafe8df8b9bc9aa3dffa827c1a6757c6cd36f657 Mon Sep 17 00:00:00 2001
From: Mao Wenan <maowenan@huawei.com>
Date: Tue, 31 Jan 2017 18:46:43 -0800
Subject: [PATCH 188/322] net: phy: Fix lack of reference count on PHY driver

There is currently no reference count being held on the PHY driver,
which makes it possible to remove the PHY driver module while the PHY
state machine is running and polling the PHY. This could cause crashes
similar to this one to show up:

[   43.361162] BUG: unable to handle kernel NULL pointer dereference at 0000000000000140
[   43.361162] IP: phy_state_machine+0x32/0x490
[   43.361162] PGD 59dc067
[   43.361162] PUD 0
[   43.361162]
[   43.361162] Oops: 0000 [#1] SMP
[   43.361162] Modules linked in: dsa_loop [last unloaded: broadcom]
[   43.361162] CPU: 0 PID: 1299 Comm: kworker/0:3 Not tainted 4.10.0-rc5+ #415
[   43.361162] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS Ubuntu-1.8.2-1ubuntu2 04/01/2014
[   43.361162] Workqueue: events_power_efficient phy_state_machine
[   43.361162] task: ffff880006782b80 task.stack: ffffc90000184000
[   43.361162] RIP: 0010:phy_state_machine+0x32/0x490
[   43.361162] RSP: 0018:ffffc90000187e18 EFLAGS: 00000246
[   43.361162] RAX: 0000000000000000 RBX: ffff8800059e53c0 RCX:
ffff880006a15c60
[   43.361162] RDX: ffff880006782b80 RSI: 0000000000000000 RDI:
ffff8800059e5428
[   43.361162] RBP: ffffc90000187e48 R08: ffff880006a15c40 R09:
0000000000000000
[   43.361162] R10: 0000000000000000 R11: 0000000000000000 R12:
ffff8800059e5428
[   43.361162] R13: ffff8800059e5000 R14: 0000000000000000 R15:
ffff880006a15c40
[   43.361162] FS:  0000000000000000(0000) GS:ffff880006a00000(0000)
knlGS:0000000000000000
[   43.361162] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   43.361162] CR2: 0000000000000140 CR3: 0000000005979000 CR4:
00000000000006f0
[   43.361162] Call Trace:
[   43.361162]  process_one_work+0x1b4/0x3e0
[   43.361162]  worker_thread+0x43/0x4d0
[   43.361162]  ? __schedule+0x17f/0x4e0
[   43.361162]  kthread+0xf7/0x130
[   43.361162]  ? process_one_work+0x3e0/0x3e0
[   43.361162]  ? kthread_create_on_node+0x40/0x40
[   43.361162]  ret_from_fork+0x29/0x40
[   43.361162] Code: 56 41 55 41 54 4c 8d 67 68 53 4c 8d af 40 fc ff ff
48 89 fb 4c 89 e7 48 83 ec 08 e8 c9 9d 27 00 48 8b 83 60 ff ff ff 44 8b
73 98 <48> 8b 90 40 01 00 00 44 89 f0 48 85 d2 74 08 4c 89 ef ff d2 8b

Keep references on the PHY driver module right before we are going to
utilize it in phy_attach_direct(), and conversely when we don't use it
anymore in phy_detach().

Signed-off-by: Mao Wenan <maowenan@huawei.com>
[florian: rebase, rework commit message]
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 92b08383cafa..0d8f4d3847f6 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -920,6 +920,11 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 		return -EIO;
 	}
 
+	if (!try_module_get(d->driver->owner)) {
+		dev_err(&dev->dev, "failed to get the device driver module\n");
+		return -EIO;
+	}
+
 	get_device(d);
 
 	/* Assume that if there is no driver, that it doesn't
@@ -977,6 +982,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 error:
 	phy_detach(phydev);
 	put_device(d);
+	module_put(d->driver->owner);
 	if (ndev_owner != bus->owner)
 		module_put(bus->owner);
 	return err;
@@ -1059,6 +1065,7 @@ void phy_detach(struct phy_device *phydev)
 	bus = phydev->mdio.bus;
 
 	put_device(&phydev->mdio.dev);
+	module_put(phydev->mdio.dev.driver->owner);
 	if (ndev_owner != bus->owner)
 		module_put(bus->owner);
 }

From 94e1dab1c94715e18bb0bada503de3f3d7593076 Mon Sep 17 00:00:00 2001
From: Harsh Jain <harsh@chelsio.com>
Date: Tue, 24 Jan 2017 10:34:32 +0530
Subject: [PATCH 189/322] crypto: chcr - Fix panic on dma_unmap_sg

Save DMA mapped sg list addresses to request context buffer.

Signed-off-by: Atul Gupta <atul.gupta@chelsio.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/chelsio/chcr_algo.c   | 49 +++++++++++++++-------------
 drivers/crypto/chelsio/chcr_crypto.h |  3 ++
 2 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index 2ed1e24b44a8..d29c2b45d3ab 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -158,7 +158,7 @@ int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
 	case CRYPTO_ALG_TYPE_AEAD:
 		ctx_req.req.aead_req = (struct aead_request *)req;
 		ctx_req.ctx.reqctx = aead_request_ctx(ctx_req.req.aead_req);
-		dma_unmap_sg(&u_ctx->lldi.pdev->dev, ctx_req.req.aead_req->dst,
+		dma_unmap_sg(&u_ctx->lldi.pdev->dev, ctx_req.ctx.reqctx->dst,
 			     ctx_req.ctx.reqctx->dst_nents, DMA_FROM_DEVICE);
 		if (ctx_req.ctx.reqctx->skb) {
 			kfree_skb(ctx_req.ctx.reqctx->skb);
@@ -1362,8 +1362,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 	struct chcr_wr *chcr_req;
 	struct cpl_rx_phys_dsgl *phys_cpl;
 	struct phys_sge_parm sg_param;
-	struct scatterlist *src, *dst;
-	struct scatterlist src_sg[2], dst_sg[2];
+	struct scatterlist *src;
 	unsigned int frags = 0, transhdr_len;
 	unsigned int ivsize = crypto_aead_ivsize(tfm), dst_size = 0;
 	unsigned int   kctx_len = 0;
@@ -1383,19 +1382,21 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 
 	if (sg_nents_for_len(req->src, req->assoclen + req->cryptlen) < 0)
 		goto err;
-	src = scatterwalk_ffwd(src_sg, req->src, req->assoclen);
-	dst = src;
+	src = scatterwalk_ffwd(reqctx->srcffwd, req->src, req->assoclen);
+	reqctx->dst = src;
+
 	if (req->src != req->dst) {
 		err = chcr_copy_assoc(req, aeadctx);
 		if (err)
 			return ERR_PTR(err);
-		dst = scatterwalk_ffwd(dst_sg, req->dst, req->assoclen);
+		reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd, req->dst,
+					       req->assoclen);
 	}
 	if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_NULL) {
 		null = 1;
 		assoclen = 0;
 	}
-	reqctx->dst_nents = sg_nents_for_len(dst, req->cryptlen +
+	reqctx->dst_nents = sg_nents_for_len(reqctx->dst, req->cryptlen +
 					     (op_type ? -authsize : authsize));
 	if (reqctx->dst_nents <= 0) {
 		pr_err("AUTHENC:Invalid Destination sg entries\n");
@@ -1460,7 +1461,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 	sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize);
 	sg_param.qid = qid;
 	sg_param.align = 0;
-	if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, dst,
+	if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, reqctx->dst,
 				  &sg_param))
 		goto dstmap_fail;
 
@@ -1711,8 +1712,7 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req,
 	struct chcr_wr *chcr_req;
 	struct cpl_rx_phys_dsgl *phys_cpl;
 	struct phys_sge_parm sg_param;
-	struct scatterlist *src, *dst;
-	struct scatterlist src_sg[2], dst_sg[2];
+	struct scatterlist *src;
 	unsigned int frags = 0, transhdr_len, ivsize = AES_BLOCK_SIZE;
 	unsigned int dst_size = 0, kctx_len;
 	unsigned int sub_type;
@@ -1728,17 +1728,19 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req,
 	if (sg_nents_for_len(req->src, req->assoclen + req->cryptlen) < 0)
 		goto err;
 	sub_type = get_aead_subtype(tfm);
-	src = scatterwalk_ffwd(src_sg, req->src, req->assoclen);
-	dst = src;
+	src = scatterwalk_ffwd(reqctx->srcffwd, req->src, req->assoclen);
+	reqctx->dst = src;
+
 	if (req->src != req->dst) {
 		err = chcr_copy_assoc(req, aeadctx);
 		if (err) {
 			pr_err("AAD copy to destination buffer fails\n");
 			return ERR_PTR(err);
 		}
-		dst = scatterwalk_ffwd(dst_sg, req->dst, req->assoclen);
+		reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd, req->dst,
+					       req->assoclen);
 	}
-	reqctx->dst_nents = sg_nents_for_len(dst, req->cryptlen +
+	reqctx->dst_nents = sg_nents_for_len(reqctx->dst, req->cryptlen +
 					     (op_type ? -authsize : authsize));
 	if (reqctx->dst_nents <= 0) {
 		pr_err("CCM:Invalid Destination sg entries\n");
@@ -1777,7 +1779,7 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req,
 	sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize);
 	sg_param.qid = qid;
 	sg_param.align = 0;
-	if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, dst,
+	if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, reqctx->dst,
 				  &sg_param))
 		goto dstmap_fail;
 
@@ -1809,8 +1811,7 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 	struct chcr_wr *chcr_req;
 	struct cpl_rx_phys_dsgl *phys_cpl;
 	struct phys_sge_parm sg_param;
-	struct scatterlist *src, *dst;
-	struct scatterlist src_sg[2], dst_sg[2];
+	struct scatterlist *src;
 	unsigned int frags = 0, transhdr_len;
 	unsigned int ivsize = AES_BLOCK_SIZE;
 	unsigned int dst_size = 0, kctx_len;
@@ -1832,13 +1833,14 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 	if (sg_nents_for_len(req->src, req->assoclen + req->cryptlen) < 0)
 		goto err;
 
-	src = scatterwalk_ffwd(src_sg, req->src, req->assoclen);
-	dst = src;
+	src = scatterwalk_ffwd(reqctx->srcffwd, req->src, req->assoclen);
+	reqctx->dst = src;
 	if (req->src != req->dst) {
 		err = chcr_copy_assoc(req, aeadctx);
 		if (err)
 			return	ERR_PTR(err);
-		dst = scatterwalk_ffwd(dst_sg, req->dst, req->assoclen);
+		reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd, req->dst,
+					       req->assoclen);
 	}
 
 	if (!req->cryptlen)
@@ -1848,7 +1850,7 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 		crypt_len = AES_BLOCK_SIZE;
 	else
 		crypt_len = req->cryptlen;
-	reqctx->dst_nents = sg_nents_for_len(dst, req->cryptlen +
+	reqctx->dst_nents = sg_nents_for_len(reqctx->dst, req->cryptlen +
 					     (op_type ? -authsize : authsize));
 	if (reqctx->dst_nents <= 0) {
 		pr_err("GCM:Invalid Destination sg entries\n");
@@ -1923,7 +1925,7 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 	sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize);
 	sg_param.qid = qid;
 	sg_param.align = 0;
-	if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, dst,
+	if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, reqctx->dst,
 				  &sg_param))
 		goto dstmap_fail;
 
@@ -1937,7 +1939,8 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 		write_sg_to_skb(skb, &frags, src, req->cryptlen);
 	} else {
 		aes_gcm_empty_pld_pad(req->dst, authsize - 1);
-		write_sg_to_skb(skb, &frags, dst, crypt_len);
+		write_sg_to_skb(skb, &frags, reqctx->dst, crypt_len);
+
 	}
 
 	create_wreq(ctx, chcr_req, req, skb, kctx_len, size, 1,
diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h
index d5af7d64a763..7ec0a8f12475 100644
--- a/drivers/crypto/chelsio/chcr_crypto.h
+++ b/drivers/crypto/chelsio/chcr_crypto.h
@@ -158,6 +158,9 @@ struct ablk_ctx {
 };
 struct chcr_aead_reqctx {
 	struct	sk_buff	*skb;
+	struct scatterlist *dst;
+	struct scatterlist srcffwd[2];
+	struct scatterlist dstffwd[2];
 	short int dst_nents;
 	u16 verify;
 	u8 iv[CHCR_MAX_CRYPTO_IV_LEN];

From f5f7bebc91ab378dea5aad5277c4d283e46472d9 Mon Sep 17 00:00:00 2001
From: Harsh Jain <harsh@chelsio.com>
Date: Tue, 24 Jan 2017 10:34:33 +0530
Subject: [PATCH 190/322] crypto: chcr - Check device is allocated before use

Ensure dev is allocated for crypto uld context before using the device
for crypto operations.

Cc: <stable@vger.kernel.org>
Signed-off-by: Atul Gupta <atul.gupta@chelsio.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/chelsio/chcr_core.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c
index 918da8e6e2d8..1c65f07e1cc9 100644
--- a/drivers/crypto/chelsio/chcr_core.c
+++ b/drivers/crypto/chelsio/chcr_core.c
@@ -52,6 +52,7 @@ static struct cxgb4_uld_info chcr_uld_info = {
 int assign_chcr_device(struct chcr_dev **dev)
 {
 	struct uld_ctx *u_ctx;
+	int ret = -ENXIO;
 
 	/*
 	 * Which device to use if multiple devices are available TODO
@@ -59,15 +60,14 @@ int assign_chcr_device(struct chcr_dev **dev)
 	 * must go to the same device to maintain the ordering.
 	 */
 	mutex_lock(&dev_mutex); /* TODO ? */
-	u_ctx = list_first_entry(&uld_ctx_list, struct uld_ctx, entry);
-	if (!u_ctx) {
-		mutex_unlock(&dev_mutex);
-		return -ENXIO;
+	list_for_each_entry(u_ctx, &uld_ctx_list, entry)
+		if (u_ctx && u_ctx->dev) {
+			*dev = u_ctx->dev;
+			ret = 0;
+			break;
 	}
-
-	*dev = u_ctx->dev;
 	mutex_unlock(&dev_mutex);
-	return 0;
+	return ret;
 }
 
 static int chcr_dev_add(struct uld_ctx *u_ctx)
@@ -202,10 +202,8 @@ static int chcr_uld_state_change(void *handle, enum cxgb4_state state)
 
 static int __init chcr_crypto_init(void)
 {
-	if (cxgb4_register_uld(CXGB4_ULD_CRYPTO, &chcr_uld_info)) {
+	if (cxgb4_register_uld(CXGB4_ULD_CRYPTO, &chcr_uld_info))
 		pr_err("ULD register fail: No chcr crypto support in cxgb4");
-		return -1;
-	}
 
 	return 0;
 }

From 500c0106e638e08c2c661c305ed57d6b67e10908 Mon Sep 17 00:00:00 2001
From: Gary R Hook <gary.hook@amd.com>
Date: Fri, 27 Jan 2017 15:28:45 -0600
Subject: [PATCH 191/322] crypto: ccp - Fix DMA operations when IOMMU is
 enabled

An I/O page fault occurs when the IOMMU is enabled on a
system that supports the v5 CCP.  DMA operations use a
Request ID value that does not match what is expected by
the IOMMU, resulting in the I/O page fault.  Setting the
Request ID value to 0 corrects this issue.

Cc: <stable@vger.kernel.org>
Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-dev-v5.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c
index e2ce8190ecc9..612898b4aaad 100644
--- a/drivers/crypto/ccp/ccp-dev-v5.c
+++ b/drivers/crypto/ccp/ccp-dev-v5.c
@@ -959,7 +959,7 @@ static irqreturn_t ccp5_irq_handler(int irq, void *data)
 static void ccp5_config(struct ccp_device *ccp)
 {
 	/* Public side */
-	iowrite32(0x00001249, ccp->io_regs + CMD5_REQID_CONFIG_OFFSET);
+	iowrite32(0x0, ccp->io_regs + CMD5_REQID_CONFIG_OFFSET);
 }
 
 static void ccp5other_config(struct ccp_device *ccp)

From e5da5c5667381d2772374ee6a2967b3576c9483d Mon Sep 17 00:00:00 2001
From: Gary R Hook <gary.hook@amd.com>
Date: Fri, 27 Jan 2017 17:09:04 -0600
Subject: [PATCH 192/322] crypto: ccp - Fix double add when creating new DMA
 command

Eliminate a double-add by creating a new list to manage
command descriptors when created; move the descriptor to
the pending list when the command is submitted.

Cc: <stable@vger.kernel.org>
Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-dev.h       | 1 +
 drivers/crypto/ccp/ccp-dmaengine.c | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index 830f35e6005f..649e5610a5ce 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -238,6 +238,7 @@ struct ccp_dma_chan {
 	struct ccp_device *ccp;
 
 	spinlock_t lock;
+	struct list_head created;
 	struct list_head pending;
 	struct list_head active;
 	struct list_head complete;
diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c
index 6553912804f7..e5d9278f4019 100644
--- a/drivers/crypto/ccp/ccp-dmaengine.c
+++ b/drivers/crypto/ccp/ccp-dmaengine.c
@@ -63,6 +63,7 @@ static void ccp_free_chan_resources(struct dma_chan *dma_chan)
 	ccp_free_desc_resources(chan->ccp, &chan->complete);
 	ccp_free_desc_resources(chan->ccp, &chan->active);
 	ccp_free_desc_resources(chan->ccp, &chan->pending);
+	ccp_free_desc_resources(chan->ccp, &chan->created);
 
 	spin_unlock_irqrestore(&chan->lock, flags);
 }
@@ -273,6 +274,7 @@ static dma_cookie_t ccp_tx_submit(struct dma_async_tx_descriptor *tx_desc)
 	spin_lock_irqsave(&chan->lock, flags);
 
 	cookie = dma_cookie_assign(tx_desc);
+	list_del(&desc->entry);
 	list_add_tail(&desc->entry, &chan->pending);
 
 	spin_unlock_irqrestore(&chan->lock, flags);
@@ -426,7 +428,7 @@ static struct ccp_dma_desc *ccp_create_desc(struct dma_chan *dma_chan,
 
 	spin_lock_irqsave(&chan->lock, sflags);
 
-	list_add_tail(&desc->entry, &chan->pending);
+	list_add_tail(&desc->entry, &chan->created);
 
 	spin_unlock_irqrestore(&chan->lock, sflags);
 
@@ -610,6 +612,7 @@ static int ccp_terminate_all(struct dma_chan *dma_chan)
 	/*TODO: Purge the complete list? */
 	ccp_free_desc_resources(chan->ccp, &chan->active);
 	ccp_free_desc_resources(chan->ccp, &chan->pending);
+	ccp_free_desc_resources(chan->ccp, &chan->created);
 
 	spin_unlock_irqrestore(&chan->lock, flags);
 
@@ -679,6 +682,7 @@ int ccp_dmaengine_register(struct ccp_device *ccp)
 		chan->ccp = ccp;
 
 		spin_lock_init(&chan->lock);
+		INIT_LIST_HEAD(&chan->created);
 		INIT_LIST_HEAD(&chan->pending);
 		INIT_LIST_HEAD(&chan->active);
 		INIT_LIST_HEAD(&chan->complete);

From c26819900036f5b91608051a0fc7c76f6b4ffc7b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 1 Feb 2017 22:17:39 +0800
Subject: [PATCH 193/322] crypto: aesni - Fix failure when pcbc module is
 absent
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When aesni is built as a module together with pcbc, the pcbc module
must be present for aesni to load.  However, the pcbc module may not
be present for reasons such as its absence on initramfs.  This patch
allows the aesni to function even if the pcbc module is enabled but
not present.

Reported-by: Arkadiusz Miśkiewicz <arekm@maven.pl>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/aesni-intel_glue.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 6ef688a1ef3e..7ff1b0c86a8e 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -1085,9 +1085,9 @@ static void aesni_free_simds(void)
 		    aesni_simd_skciphers[i]; i++)
 		simd_skcipher_free(aesni_simd_skciphers[i]);
 
-	for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2) &&
-		    aesni_simd_skciphers2[i].simd; i++)
-		simd_skcipher_free(aesni_simd_skciphers2[i].simd);
+	for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++)
+		if (aesni_simd_skciphers2[i].simd)
+			simd_skcipher_free(aesni_simd_skciphers2[i].simd);
 }
 
 static int __init aesni_init(void)
@@ -1168,7 +1168,7 @@ static int __init aesni_init(void)
 		simd = simd_skcipher_create_compat(algname, drvname, basename);
 		err = PTR_ERR(simd);
 		if (IS_ERR(simd))
-			goto unregister_simds;
+			continue;
 
 		aesni_simd_skciphers2[i].simd = simd;
 	}

From 0b529f143e8baad441a5aac9ad55ec2434d8fb46 Mon Sep 17 00:00:00 2001
From: Harsh Jain <harsh@chelsio.com>
Date: Wed, 1 Feb 2017 21:10:28 +0530
Subject: [PATCH 194/322] crypto: algif_aead - Fix kernel panic on list_del
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Kernel panics when userspace program try to access AEAD interface.
Remove node from Linked List before freeing its memory.

Cc: <stable@vger.kernel.org>
Signed-off-by: Harsh Jain <harsh@chelsio.com>
Reviewed-by: Stephan Müller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algif_aead.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index f849311e9fd4..533265f110e0 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -661,9 +661,9 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
 unlock:
 	list_for_each_entry_safe(rsgl, tmp, &ctx->list, list) {
 		af_alg_free_sg(&rsgl->sgl);
+		list_del(&rsgl->list);
 		if (rsgl != &ctx->first_rsgl)
 			sock_kfree_s(sk, rsgl, sizeof(*rsgl));
-		list_del(&rsgl->list);
 	}
 	INIT_LIST_HEAD(&ctx->list);
 	aead_wmem_wakeup(sk);

From 7c2cf1c4615cc2f576d0604406cdf0065f00b83b Mon Sep 17 00:00:00 2001
From: Harsh Jain <harsh@chelsio.com>
Date: Fri, 27 Jan 2017 16:09:06 +0530
Subject: [PATCH 195/322] crypto: chcr - Fix key length for RFC4106

Check keylen before copying salt to avoid wrap around of Integer.

Signed-off-by: Harsh Jain <harsh@chelsio.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/chelsio/chcr_algo.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index d29c2b45d3ab..b4b78b37f8a6 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -2192,8 +2192,8 @@ static int chcr_gcm_setkey(struct crypto_aead *aead, const u8 *key,
 	unsigned int ck_size;
 	int ret = 0, key_ctx_size = 0;
 
-	if (get_aead_subtype(aead) ==
-	    CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106) {
+	if (get_aead_subtype(aead) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106 &&
+	    keylen > 3) {
 		keylen -= 4;  /* nonce/salt is present in the last 4 bytes */
 		memcpy(aeadctx->salt, key + keylen, 4);
 	}

From d98e0929071e7ef63d35c1838b0ad0805ae366dd Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 3 Feb 2017 08:53:51 -0600
Subject: [PATCH 196/322] Revert "PCI: pciehp: Add runtime PM support for PCIe
 hotplug ports"

This reverts commit 68db9bc814362e7f24371c27d12a4f34477d9356.

Yinghai reported that the following manual hotplug sequence:

  # echo 0 > /sys/bus/pci/slots/8/power
  # echo 1 > /sys/bus/pci/slots/8/power

worked in v4.9, but fails in v4.10-rc1, and that reverting 68db9bc81436
("PCI: pciehp: Add runtime PM support for PCIe hotplug ports") makes it
work again.

Fixes: 68db9bc81436 ("PCI: pciehp: Add runtime PM support for PCIe hotplug ports")
Link: https://lkml.kernel.org/r/CAE9FiQVCMCa7iVyuwp9z6VrY0cE7V_xghuXip28Ft52=8QmTWw@mail.gmail.com
Link: https://bugzilla.kernel.org/show_bug.cgi?id=193951
Reported-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/hotplug/pciehp_ctrl.c |  6 ------
 drivers/pci/pci.c                 | 12 ++++++------
 2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
index 10c9c0ba8ff2..ec0b4c11ccd9 100644
--- a/drivers/pci/hotplug/pciehp_ctrl.c
+++ b/drivers/pci/hotplug/pciehp_ctrl.c
@@ -31,7 +31,6 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/slab.h>
-#include <linux/pm_runtime.h>
 #include <linux/pci.h>
 #include "../pci.h"
 #include "pciehp.h"
@@ -99,7 +98,6 @@ static int board_added(struct slot *p_slot)
 	pciehp_green_led_blink(p_slot);
 
 	/* Check link training status */
-	pm_runtime_get_sync(&ctrl->pcie->port->dev);
 	retval = pciehp_check_link_status(ctrl);
 	if (retval) {
 		ctrl_err(ctrl, "Failed to check link status\n");
@@ -120,14 +118,12 @@ static int board_added(struct slot *p_slot)
 		if (retval != -EEXIST)
 			goto err_exit;
 	}
-	pm_runtime_put(&ctrl->pcie->port->dev);
 
 	pciehp_green_led_on(p_slot);
 	pciehp_set_attention_status(p_slot, 0);
 	return 0;
 
 err_exit:
-	pm_runtime_put(&ctrl->pcie->port->dev);
 	set_slot_off(ctrl, p_slot);
 	return retval;
 }
@@ -141,9 +137,7 @@ static int remove_board(struct slot *p_slot)
 	int retval;
 	struct controller *ctrl = p_slot->ctrl;
 
-	pm_runtime_get_sync(&ctrl->pcie->port->dev);
 	retval = pciehp_unconfigure_device(p_slot);
-	pm_runtime_put(&ctrl->pcie->port->dev);
 	if (retval)
 		return retval;
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index a881c0d3d2e8..7904d02ffdb9 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2241,10 +2241,13 @@ bool pci_bridge_d3_possible(struct pci_dev *bridge)
 			return false;
 
 		/*
-		 * Hotplug ports handled by firmware in System Management Mode
+		 * Hotplug interrupts cannot be delivered if the link is down,
+		 * so parents of a hotplug port must stay awake. In addition,
+		 * hotplug ports handled by firmware in System Management Mode
 		 * may not be put into D3 by the OS (Thunderbolt on non-Macs).
+		 * For simplicity, disallow in general for now.
 		 */
-		if (bridge->is_hotplug_bridge && !pciehp_is_native(bridge))
+		if (bridge->is_hotplug_bridge)
 			return false;
 
 		if (pci_bridge_d3_force)
@@ -2276,10 +2279,7 @@ static int pci_dev_check_d3cold(struct pci_dev *dev, void *data)
 	     !pci_pme_capable(dev, PCI_D3cold)) ||
 
 	    /* If it is a bridge it must be allowed to go to D3. */
-	    !pci_power_manageable(dev) ||
-
-	    /* Hotplug interrupts cannot be delivered if the link is down. */
-	    dev->is_hotplug_bridge)
+	    !pci_power_manageable(dev))
 
 		*d3cold_ok = false;
 

From d19a55ccad15a486ffe03030570744e5d5bd9f8e Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Fri, 6 Jan 2017 15:33:14 -0500
Subject: [PATCH 197/322] dm mpath: cleanup -Wbool-operation warning in
 choose_pgpath()

Reported-by: David Binderman <dcb314@hotmail.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-mpath.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 6400cffb986d..3570bcb7a4a4 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -427,7 +427,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
 	unsigned long flags;
 	struct priority_group *pg;
 	struct pgpath *pgpath;
-	bool bypassed = true;
+	unsigned bypassed = 1;
 
 	if (!atomic_read(&m->nr_valid_paths)) {
 		clear_bit(MPATHF_QUEUE_IO, &m->flags);
@@ -466,7 +466,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
 	 */
 	do {
 		list_for_each_entry(pg, &m->priority_groups, list) {
-			if (pg->bypassed == bypassed)
+			if (pg->bypassed == !!bypassed)
 				continue;
 			pgpath = choose_path_in_pg(m, pg, nr_bytes);
 			if (!IS_ERR_OR_NULL(pgpath)) {

From 4087a1fffe38106e10646606a27f10d40451862d Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Wed, 25 Jan 2017 16:24:52 +0100
Subject: [PATCH 198/322] dm rq: cope with DM device destruction while in
 dm_old_request_fn()

Fixes a crash in dm_table_find_target() due to a NULL struct dm_table
being passed from dm_old_request_fn() that races with DM device
destruction.

Reported-by: artem@flashgrid.io
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
---
 drivers/md/dm-rq.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 9d7275fb541a..6e702fc69a83 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -779,6 +779,10 @@ static void dm_old_request_fn(struct request_queue *q)
 		int srcu_idx;
 		struct dm_table *map = dm_get_live_table(md, &srcu_idx);
 
+		if (unlikely(!map)) {
+			dm_put_live_table(md, srcu_idx);
+			return;
+		}
 		ti = dm_table_find_target(map, pos);
 		dm_put_live_table(md, srcu_idx);
 	}

From f5b0cba8f23915e92932f11eb063e37d70556a89 Mon Sep 17 00:00:00 2001
From: Ondrej Kozina <okozina@redhat.com>
Date: Tue, 31 Jan 2017 15:47:11 +0100
Subject: [PATCH 199/322] dm crypt: replace RCU read-side section with rwsem

The lockdep splat below hints at a bug in RCU usage in dm-crypt that
was introduced with commit c538f6ec9f56 ("dm crypt: add ability to use
keys from the kernel key retention service").  The kernel keyring
function user_key_payload() is in fact a wrapper for
rcu_dereference_protected() which must not be called with only
rcu_read_lock() section mark.

Unfortunately the kernel keyring subsystem doesn't currently provide
an interface that allows the use of an RCU read-side section.  So for
now we must drop RCU in favour of rwsem until a proper function is
made available in the kernel keyring subsystem.

===============================
[ INFO: suspicious RCU usage. ]
4.10.0-rc5 #2 Not tainted
-------------------------------
./include/keys/user-type.h:53 suspicious rcu_dereference_protected() usage!
other info that might help us debug this:
rcu_scheduler_active = 2, debug_locks = 1
2 locks held by cryptsetup/6464:
 #0:  (&md->type_lock){+.+.+.}, at: [<ffffffffa02472a2>] dm_lock_md_type+0x12/0x20 [dm_mod]
 #1:  (rcu_read_lock){......}, at: [<ffffffffa02822f8>] crypt_set_key+0x1d8/0x4b0 [dm_crypt]
stack backtrace:
CPU: 1 PID: 6464 Comm: cryptsetup Not tainted 4.10.0-rc5 #2
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.1-1.fc24 04/01/2014
Call Trace:
 dump_stack+0x67/0x92
 lockdep_rcu_suspicious+0xc5/0x100
 crypt_set_key+0x351/0x4b0 [dm_crypt]
 ? crypt_set_key+0x1d8/0x4b0 [dm_crypt]
 crypt_ctr+0x341/0xa53 [dm_crypt]
 dm_table_add_target+0x147/0x330 [dm_mod]
 table_load+0x111/0x350 [dm_mod]
 ? retrieve_status+0x1c0/0x1c0 [dm_mod]
 ctl_ioctl+0x1f5/0x510 [dm_mod]
 dm_ctl_ioctl+0xe/0x20 [dm_mod]
 do_vfs_ioctl+0x8e/0x690
 ? ____fput+0x9/0x10
 ? task_work_run+0x7e/0xa0
 ? trace_hardirqs_on_caller+0x122/0x1b0
 SyS_ioctl+0x3c/0x70
 entry_SYSCALL_64_fastpath+0x18/0xad
RIP: 0033:0x7f392c9a4ec7
RSP: 002b:00007ffef6383378 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
RAX: ffffffffffffffda RBX: 00007ffef63830a0 RCX: 00007f392c9a4ec7
RDX: 000000000124fcc0 RSI: 00000000c138fd09 RDI: 0000000000000005
RBP: 00007ffef6383090 R08: 00000000ffffffff R09: 00000000012482b0
R10: 2a28205d34383336 R11: 0000000000000246 R12: 00007f392d803a08
R13: 00007ffef63831e0 R14: 0000000000000000 R15: 00007f392d803a0b

Fixes: c538f6ec9f56 ("dm crypt: add ability to use keys from the kernel key retention service")
Reported-by: Milan Broz <mbroz@redhat.com>
Signed-off-by: Ondrej Kozina <okozina@redhat.com>
Reviewed-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-crypt.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 7c6c57216bf2..8a9f742d8ed7 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1534,18 +1534,18 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string
 		return PTR_ERR(key);
 	}
 
-	rcu_read_lock();
+	down_read(&key->sem);
 
 	ukp = user_key_payload(key);
 	if (!ukp) {
-		rcu_read_unlock();
+		up_read(&key->sem);
 		key_put(key);
 		kzfree(new_key_string);
 		return -EKEYREVOKED;
 	}
 
 	if (cc->key_size != ukp->datalen) {
-		rcu_read_unlock();
+		up_read(&key->sem);
 		key_put(key);
 		kzfree(new_key_string);
 		return -EINVAL;
@@ -1553,7 +1553,7 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string
 
 	memcpy(cc->key, ukp->data, cc->key_size);
 
-	rcu_read_unlock();
+	up_read(&key->sem);
 	key_put(key);
 
 	/* clear the flag since following operations may invalidate previously valid key */

From 013e8167899d389075160412a8c0c5e0581e1f13 Mon Sep 17 00:00:00 2001
From: David Lebrun <david.lebrun@uclouvain.be>
Date: Thu, 2 Feb 2017 11:29:38 +0100
Subject: [PATCH 200/322] ipv6: sr: remove cleanup flag and fix HMAC
 computation

In the latest version of the IPv6 Segment Routing IETF draft [1] the
cleanup flag is removed and the flags field length is shrunk from 16 bits
to 8 bits. As a consequence, the input of the HMAC computation is modified
in a non-backward compatible way by covering the whole octet of flags
instead of only the cleanup bit. As such, if an implementation compatible
with the latest draft computes the HMAC of an SRH who has other flags set
to 1, then the HMAC result would differ from the current implementation.

This patch carries those modifications to prevent conflict with other
implementations of IPv6 SR.

[1] https://tools.ietf.org/html/draft-ietf-6man-segment-routing-header-05

Signed-off-by: David Lebrun <david.lebrun@uclouvain.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/seg6.h |  9 +++------
 net/ipv6/exthdrs.c        | 31 +++----------------------------
 net/ipv6/seg6_hmac.c      |  8 ++++----
 3 files changed, 10 insertions(+), 38 deletions(-)

diff --git a/include/uapi/linux/seg6.h b/include/uapi/linux/seg6.h
index c396a8052f73..052799e4d751 100644
--- a/include/uapi/linux/seg6.h
+++ b/include/uapi/linux/seg6.h
@@ -23,14 +23,12 @@ struct ipv6_sr_hdr {
 	__u8	type;
 	__u8	segments_left;
 	__u8	first_segment;
-	__u8	flag_1;
-	__u8	flag_2;
-	__u8	reserved;
+	__u8	flags;
+	__u16	reserved;
 
 	struct in6_addr segments[0];
 };
 
-#define SR6_FLAG1_CLEANUP	(1 << 7)
 #define SR6_FLAG1_PROTECTED	(1 << 6)
 #define SR6_FLAG1_OAM		(1 << 5)
 #define SR6_FLAG1_ALERT		(1 << 4)
@@ -42,8 +40,7 @@ struct ipv6_sr_hdr {
 #define SR6_TLV_PADDING		4
 #define SR6_TLV_HMAC		5
 
-#define sr_has_cleanup(srh) ((srh)->flag_1 & SR6_FLAG1_CLEANUP)
-#define sr_has_hmac(srh) ((srh)->flag_1 & SR6_FLAG1_HMAC)
+#define sr_has_hmac(srh) ((srh)->flags & SR6_FLAG1_HMAC)
 
 struct sr6_tlv {
 	__u8 type;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index e4198502fd98..275cac628a95 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -327,7 +327,6 @@ static int ipv6_srh_rcv(struct sk_buff *skb)
 	struct ipv6_sr_hdr *hdr;
 	struct inet6_dev *idev;
 	struct in6_addr *addr;
-	bool cleanup = false;
 	int accept_seg6;
 
 	hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb);
@@ -351,11 +350,7 @@ static int ipv6_srh_rcv(struct sk_buff *skb)
 #endif
 
 looped_back:
-	if (hdr->segments_left > 0) {
-		if (hdr->nexthdr != NEXTHDR_IPV6 && hdr->segments_left == 1 &&
-		    sr_has_cleanup(hdr))
-			cleanup = true;
-	} else {
+	if (hdr->segments_left == 0) {
 		if (hdr->nexthdr == NEXTHDR_IPV6) {
 			int offset = (hdr->hdrlen + 1) << 3;
 
@@ -418,21 +413,6 @@ static int ipv6_srh_rcv(struct sk_buff *skb)
 
 	ipv6_hdr(skb)->daddr = *addr;
 
-	if (cleanup) {
-		int srhlen = (hdr->hdrlen + 1) << 3;
-		int nh = hdr->nexthdr;
-
-		skb_pull_rcsum(skb, sizeof(struct ipv6hdr) + srhlen);
-		memmove(skb_network_header(skb) + srhlen,
-			skb_network_header(skb),
-			(unsigned char *)hdr - skb_network_header(skb));
-		skb->network_header += srhlen;
-		ipv6_hdr(skb)->nexthdr = nh;
-		ipv6_hdr(skb)->payload_len = htons(skb->len -
-						   sizeof(struct ipv6hdr));
-		skb_push_rcsum(skb, sizeof(struct ipv6hdr));
-	}
-
 	skb_dst_drop(skb);
 
 	ip6_route_input(skb);
@@ -453,13 +433,8 @@ static int ipv6_srh_rcv(struct sk_buff *skb)
 		}
 		ipv6_hdr(skb)->hop_limit--;
 
-		/* be sure that srh is still present before reinjecting */
-		if (!cleanup) {
-			skb_pull(skb, sizeof(struct ipv6hdr));
-			goto looped_back;
-		}
-		skb_set_transport_header(skb, sizeof(struct ipv6hdr));
-		IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
+		skb_pull(skb, sizeof(struct ipv6hdr));
+		goto looped_back;
 	}
 
 	dst_input(skb);
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index 03a064803626..6ef3dfb6e811 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -174,7 +174,7 @@ int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr,
 	 * hash function (RadioGatun) with up to 1216 bits
 	 */
 
-	/* saddr(16) + first_seg(1) + cleanup(1) + keyid(4) + seglist(16n) */
+	/* saddr(16) + first_seg(1) + flags(1) + keyid(4) + seglist(16n) */
 	plen = 16 + 1 + 1 + 4 + (hdr->first_segment + 1) * 16;
 
 	/* this limit allows for 14 segments */
@@ -186,7 +186,7 @@ int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr,
 	 *
 	 * 1. Source IPv6 address (128 bits)
 	 * 2. first_segment value (8 bits)
-	 * 3. cleanup flag (8 bits: highest bit is cleanup value, others are 0)
+	 * 3. Flags (8 bits)
 	 * 4. HMAC Key ID (32 bits)
 	 * 5. All segments in the segments list (n * 128 bits)
 	 */
@@ -202,8 +202,8 @@ int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr,
 	/* first_segment value */
 	*off++ = hdr->first_segment;
 
-	/* cleanup flag */
-	*off++ = !!(sr_has_cleanup(hdr)) << 7;
+	/* flags */
+	*off++ = hdr->flags;
 
 	/* HMAC Key ID */
 	memcpy(off, &hmackeyid, 4);

From 3808d34838184fd29088d6b3a364ba2f1c018fb6 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Thu, 2 Feb 2017 13:32:10 +0100
Subject: [PATCH 201/322] ethtool: do not vzalloc(0) on registers dump

If ->get_regs_len() callback return 0, we allocate 0 bytes of memory,
what print ugly warning in dmesg, which can be found further below.

This happen on mac80211 devices where ieee80211_get_regs_len() just
return 0 and driver only fills ethtool_regs structure and actually
do not provide any dump. However I assume this can happen on other
drivers i.e. when for some devices driver provide regs dump and for
others do not. Hence preventing to to print warning in ethtool code
seems to be reasonable.

ethtool: vmalloc: allocation failure: 0 bytes, mode:0x24080c2(GFP_KERNEL|__GFP_HIGHMEM|__GFP_ZERO)
<snip>
Call Trace:
[<ffffffff813bde47>] dump_stack+0x63/0x8c
[<ffffffff811b0a1f>] warn_alloc+0x13f/0x170
[<ffffffff811f0476>] __vmalloc_node_range+0x1e6/0x2c0
[<ffffffff811f0874>] vzalloc+0x54/0x60
[<ffffffff8169986c>] dev_ethtool+0xb4c/0x1b30
[<ffffffff816adbb1>] dev_ioctl+0x181/0x520
[<ffffffff816714d2>] sock_do_ioctl+0x42/0x50
<snip>
Mem-Info:
active_anon:435809 inactive_anon:173951 isolated_anon:0
 active_file:835822 inactive_file:196932 isolated_file:0
 unevictable:0 dirty:8 writeback:0 unstable:0
 slab_reclaimable:157732 slab_unreclaimable:10022
 mapped:83042 shmem:306356 pagetables:9507 bounce:0
 free:130041 free_pcp:1080 free_cma:0
Node 0 active_anon:1743236kB inactive_anon:695804kB active_file:3343288kB inactive_file:787728kB unevictable:0kB isolated(anon):0kB isolated(file):0kB mapped:332168kB dirty:32kB writeback:0kB shmem:0kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 1225424kB writeback_tmp:0kB unstable:0kB pages_scanned:0 all_unreclaimable? no
Node 0 DMA free:15900kB min:136kB low:168kB high:200kB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB writepending:0kB present:15984kB managed:15900kB mlocked:0kB slab_reclaimable:0kB slab_unreclaimable:0kB kernel_stack:0kB pagetables:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB
lowmem_reserve[]: 0 3187 7643 7643
Node 0 DMA32 free:419732kB min:28124kB low:35152kB high:42180kB active_anon:541180kB inactive_anon:248988kB active_file:1466388kB inactive_file:389632kB unevictable:0kB writepending:0kB present:3370280kB managed:3290932kB mlocked:0kB slab_reclaimable:217184kB slab_unreclaimable:4180kB kernel_stack:160kB pagetables:984kB bounce:0kB free_pcp:2236kB local_pcp:660kB free_cma:0kB
lowmem_reserve[]: 0 0 4456 4456

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 236a21e3c878..d92de0a1f0a4 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1405,9 +1405,12 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
 	if (regs.len > reglen)
 		regs.len = reglen;
 
-	regbuf = vzalloc(reglen);
-	if (reglen && !regbuf)
-		return -ENOMEM;
+	regbuf = NULL;
+	if (reglen) {
+		regbuf = vzalloc(reglen);
+		if (!regbuf)
+			return -ENOMEM;
+	}
 
 	ops->get_regs(dev, &regs, regbuf);
 

From 56067812d5b0e737ac2063e94a50f76b810d6ca3 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 3 Feb 2017 09:54:05 +0000
Subject: [PATCH 202/322] kbuild: modversions: add infrastructure for emitting
 relative CRCs

This add the kbuild infrastructure that will allow architectures to emit
vmlinux symbol CRCs as 32-bit offsets to another location in the kernel
where the actual value is stored. This works around problems with CRCs
being mistaken for relocatable symbols on kernels that self relocate at
runtime (i.e., powerpc with CONFIG_RELOCATABLE=y)

For the kbuild side of things, this comes down to the following:

 - introducing a Kconfig symbol MODULE_REL_CRCS

 - adding a -R switch to genksyms to instruct it to emit the CRC symbols
   as references into the .rodata section

 - making modpost distinguish such references from absolute CRC symbols
   by the section index (SHN_ABS)

 - making kallsyms disregard non-absolute symbols with a __crc_ prefix

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 init/Kconfig                |  4 ++++
 scripts/Makefile.build      |  2 ++
 scripts/genksyms/genksyms.c | 19 ++++++++++++++-----
 scripts/kallsyms.c          | 12 ++++++++++++
 scripts/mod/modpost.c       | 10 ++++++++++
 5 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/init/Kconfig b/init/Kconfig
index e1a937348a3e..4dd8bd232a1d 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1987,6 +1987,10 @@ config MODVERSIONS
 	  make them incompatible with the kernel you are running.  If
 	  unsure, say N.
 
+config MODULE_REL_CRCS
+	bool
+	depends on MODVERSIONS
+
 config MODULE_SRCVERSION_ALL
 	bool "Source checksum for all modules"
 	help
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index eadcd4d359d9..d883116ebaa4 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -164,6 +164,7 @@ cmd_gensymtypes_c =                                                         \
     $(CPP) -D__GENKSYMS__ $(c_flags) $< |                                   \
     $(GENKSYMS) $(if $(1), -T $(2))                                         \
      $(patsubst y,-s _,$(CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX))             \
+     $(patsubst y,-R,$(CONFIG_MODULE_REL_CRCS))                             \
      $(if $(KBUILD_PRESERVE),-p)                                            \
      -r $(firstword $(wildcard $(2:.symtypes=.symref) /dev/null))
 
@@ -337,6 +338,7 @@ cmd_gensymtypes_S =                                                         \
     $(CPP) -D__GENKSYMS__ $(c_flags) -xc - |                                \
     $(GENKSYMS) $(if $(1), -T $(2))                                         \
      $(patsubst y,-s _,$(CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX))             \
+     $(patsubst y,-R,$(CONFIG_MODULE_REL_CRCS))                             \
      $(if $(KBUILD_PRESERVE),-p)                                            \
      -r $(firstword $(wildcard $(2:.symtypes=.symref) /dev/null))
 
diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c
index 06121ce524a7..c9235d8340f1 100644
--- a/scripts/genksyms/genksyms.c
+++ b/scripts/genksyms/genksyms.c
@@ -44,7 +44,7 @@ char *cur_filename, *source_file;
 int in_source_file;
 
 static int flag_debug, flag_dump_defs, flag_reference, flag_dump_types,
-	   flag_preserve, flag_warnings;
+	   flag_preserve, flag_warnings, flag_rel_crcs;
 static const char *mod_prefix = "";
 
 static int errors;
@@ -693,7 +693,10 @@ void export_symbol(const char *name)
 			fputs(">\n", debugfile);
 
 		/* Used as a linker script. */
-		printf("%s__crc_%s = 0x%08lx ;\n", mod_prefix, name, crc);
+		printf(!flag_rel_crcs ? "%s__crc_%s = 0x%08lx;\n" :
+		       "SECTIONS { .rodata : ALIGN(4) { "
+		       "%s__crc_%s = .; LONG(0x%08lx); } }\n",
+		       mod_prefix, name, crc);
 	}
 }
 
@@ -730,7 +733,7 @@ void error_with_pos(const char *fmt, ...)
 
 static void genksyms_usage(void)
 {
-	fputs("Usage:\n" "genksyms [-adDTwqhV] > /path/to/.tmp_obj.ver\n" "\n"
+	fputs("Usage:\n" "genksyms [-adDTwqhVR] > /path/to/.tmp_obj.ver\n" "\n"
 #ifdef __GNU_LIBRARY__
 	      "  -s, --symbol-prefix   Select symbol prefix\n"
 	      "  -d, --debug           Increment the debug level (repeatable)\n"
@@ -742,6 +745,7 @@ static void genksyms_usage(void)
 	      "  -q, --quiet           Disable warnings (default)\n"
 	      "  -h, --help            Print this message\n"
 	      "  -V, --version         Print the release version\n"
+	      "  -R, --relative-crc    Emit section relative symbol CRCs\n"
 #else				/* __GNU_LIBRARY__ */
 	      "  -s                    Select symbol prefix\n"
 	      "  -d                    Increment the debug level (repeatable)\n"
@@ -753,6 +757,7 @@ static void genksyms_usage(void)
 	      "  -q                    Disable warnings (default)\n"
 	      "  -h                    Print this message\n"
 	      "  -V                    Print the release version\n"
+	      "  -R                    Emit section relative symbol CRCs\n"
 #endif				/* __GNU_LIBRARY__ */
 	      , stderr);
 }
@@ -774,13 +779,14 @@ int main(int argc, char **argv)
 		{"preserve", 0, 0, 'p'},
 		{"version", 0, 0, 'V'},
 		{"help", 0, 0, 'h'},
+		{"relative-crc", 0, 0, 'R'},
 		{0, 0, 0, 0}
 	};
 
-	while ((o = getopt_long(argc, argv, "s:dwqVDr:T:ph",
+	while ((o = getopt_long(argc, argv, "s:dwqVDr:T:phR",
 				&long_opts[0], NULL)) != EOF)
 #else				/* __GNU_LIBRARY__ */
-	while ((o = getopt(argc, argv, "s:dwqVDr:T:ph")) != EOF)
+	while ((o = getopt(argc, argv, "s:dwqVDr:T:phR")) != EOF)
 #endif				/* __GNU_LIBRARY__ */
 		switch (o) {
 		case 's':
@@ -823,6 +829,9 @@ int main(int argc, char **argv)
 		case 'h':
 			genksyms_usage();
 			return 0;
+		case 'R':
+			flag_rel_crcs = 1;
+			break;
 		default:
 			genksyms_usage();
 			return 1;
diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index 299b92ca1ae0..5d554419170b 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -219,6 +219,10 @@ static int symbol_valid(struct sym_entry *s)
 		"_SDA2_BASE_",		/* ppc */
 		NULL };
 
+	static char *special_prefixes[] = {
+		"__crc_",		/* modversions */
+		NULL };
+
 	static char *special_suffixes[] = {
 		"_veneer",		/* arm */
 		"_from_arm",		/* arm */
@@ -259,6 +263,14 @@ static int symbol_valid(struct sym_entry *s)
 		if (strcmp(sym_name, special_symbols[i]) == 0)
 			return 0;
 
+	for (i = 0; special_prefixes[i]; i++) {
+		int l = strlen(special_prefixes[i]);
+
+		if (l <= strlen(sym_name) &&
+		    strncmp(sym_name, special_prefixes[i], l) == 0)
+			return 0;
+	}
+
 	for (i = 0; special_suffixes[i]; i++) {
 		int l = strlen(sym_name) - strlen(special_suffixes[i]);
 
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 29c89a6bad3d..4dedd0d3d3a7 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -621,6 +621,16 @@ static void handle_modversions(struct module *mod, struct elf_info *info,
 	if (strncmp(symname, CRC_PFX, strlen(CRC_PFX)) == 0) {
 		is_crc = true;
 		crc = (unsigned int) sym->st_value;
+		if (sym->st_shndx != SHN_UNDEF && sym->st_shndx != SHN_ABS) {
+			unsigned int *crcp;
+
+			/* symbol points to the CRC in the ELF object */
+			crcp = (void *)info->hdr + sym->st_value +
+			       info->sechdrs[sym->st_shndx].sh_offset -
+			       (info->hdr->e_type != ET_REL ?
+				info->sechdrs[sym->st_shndx].sh_addr : 0);
+			crc = *crcp;
+		}
 		sym_update_crc(symname + strlen(CRC_PFX), mod, crc,
 				export);
 	}

From 71810db27c1c853b335675bee335d893bc3d324b Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 3 Feb 2017 09:54:06 +0000
Subject: [PATCH 203/322] modversions: treat symbol CRCs as 32 bit quantities

The modversion symbol CRCs are emitted as ELF symbols, which allows us
to easily populate the kcrctab sections by relying on the linker to
associate each kcrctab slot with the correct value.

This has a couple of downsides:

 - Given that the CRCs are treated as memory addresses, we waste 4 bytes
   for each CRC on 64 bit architectures,

 - On architectures that support runtime relocation, a R_<arch>_RELATIVE
   relocation entry is emitted for each CRC value, which identifies it
   as a quantity that requires fixing up based on the actual runtime
   load offset of the kernel. This results in corrupted CRCs unless we
   explicitly undo the fixup (and this is currently being handled in the
   core module code)

 - Such runtime relocation entries take up 24 bytes of __init space
   each, resulting in a x8 overhead in [uncompressed] kernel size for
   CRCs.

Switching to explicit 32 bit values on 64 bit architectures fixes most
of these issues, given that 32 bit values are not treated as quantities
that require fixing up based on the actual runtime load offset.  Note
that on some ELF64 architectures [such as PPC64], these 32-bit values
are still emitted as [absolute] runtime relocatable quantities, even if
the value resolves to a build time constant.  Since relative relocations
are always resolved at build time, this patch enables MODULE_REL_CRCS on
powerpc when CONFIG_RELOCATABLE=y, which turns the absolute CRC
references into relative references into .rodata where the actual CRC
value is stored.

So redefine all CRC fields and variables as u32, and redefine the
__CRC_SYMBOL() macro for 64 bit builds to emit the CRC reference using
inline assembler (which is necessary since 64-bit C code cannot use
32-bit types to hold memory addresses, even if they are ultimately
resolved using values that do not exceed 0xffffffff).  To avoid
potential problems with legacy 32-bit architectures using legacy
toolchains, the equivalent C definition of the kcrctab entry is retained
for 32-bit architectures.

Note that this mostly reverts commit d4703aefdbc8 ("module: handle ppc64
relocating kcrctabs when CONFIG_RELOCATABLE=y")

Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/Kconfig              |  1 +
 arch/powerpc/include/asm/module.h |  4 ---
 arch/powerpc/kernel/module_64.c   |  8 -----
 include/asm-generic/export.h      | 11 ++++---
 include/linux/export.h            | 14 ++++++++
 include/linux/module.h            | 14 ++++----
 kernel/module.c                   | 53 +++++++++++++++----------------
 7 files changed, 53 insertions(+), 52 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a8ee573fe610..db8a1ef6bfaf 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -484,6 +484,7 @@ config RELOCATABLE
 	bool "Build a relocatable kernel"
 	depends on (PPC64 && !COMPILE_TEST) || (FLATMEM && (44x || FSL_BOOKE))
 	select NONSTATIC_KERNEL
+	select MODULE_REL_CRCS if MODVERSIONS
 	help
 	  This builds a kernel image that is capable of running at the
 	  location the kernel is loaded at. For ppc32, there is no any
diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index cc12c61ef315..53885512b8d3 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -90,9 +90,5 @@ static inline int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sec
 }
 #endif
 
-#if defined(CONFIG_MODVERSIONS) && defined(CONFIG_PPC64)
-#define ARCH_RELOCATES_KCRCTAB
-#define reloc_start PHYSICAL_START
-#endif
 #endif /* __KERNEL__ */
 #endif	/* _ASM_POWERPC_MODULE_H */
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index bb1807184bad..0b0f89685b67 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -286,14 +286,6 @@ static void dedotify_versions(struct modversion_info *vers,
 	for (end = (void *)vers + size; vers < end; vers++)
 		if (vers->name[0] == '.') {
 			memmove(vers->name, vers->name+1, strlen(vers->name));
-#ifdef ARCH_RELOCATES_KCRCTAB
-			/* The TOC symbol has no CRC computed. To avoid CRC
-			 * check failing, we must force it to the expected
-			 * value (see CRC check in module.c).
-			 */
-			if (!strcmp(vers->name, "TOC."))
-				vers->crc = -(unsigned long)reloc_start;
-#endif
 		}
 }
 
diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
index 63554e9f6e0c..719db1968d81 100644
--- a/include/asm-generic/export.h
+++ b/include/asm-generic/export.h
@@ -9,18 +9,15 @@
 #ifndef KSYM_ALIGN
 #define KSYM_ALIGN 8
 #endif
-#ifndef KCRC_ALIGN
-#define KCRC_ALIGN 8
-#endif
 #else
 #define __put .long
 #ifndef KSYM_ALIGN
 #define KSYM_ALIGN 4
 #endif
+#endif
 #ifndef KCRC_ALIGN
 #define KCRC_ALIGN 4
 #endif
-#endif
 
 #ifdef CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX
 #define KSYM(name) _##name
@@ -52,7 +49,11 @@ KSYM(__kstrtab_\name):
 	.section ___kcrctab\sec+\name,"a"
 	.balign KCRC_ALIGN
 KSYM(__kcrctab_\name):
-	__put KSYM(__crc_\name)
+#if defined(CONFIG_MODULE_REL_CRCS)
+	.long KSYM(__crc_\name) - .
+#else
+	.long KSYM(__crc_\name)
+#endif
 	.weak KSYM(__crc_\name)
 	.previous
 #endif
diff --git a/include/linux/export.h b/include/linux/export.h
index 2a0f61fbc731..7473fba6a60c 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -43,6 +43,13 @@ extern struct module __this_module;
 #ifdef CONFIG_MODVERSIONS
 /* Mark the CRC weak since genksyms apparently decides not to
  * generate a checksums for some symbols */
+#if defined(CONFIG_MODULE_REL_CRCS)
+#define __CRC_SYMBOL(sym, sec)						\
+	asm("	.section \"___kcrctab" sec "+" #sym "\", \"a\"	\n"	\
+	    "	.weak	" VMLINUX_SYMBOL_STR(__crc_##sym) "	\n"	\
+	    "	.long	" VMLINUX_SYMBOL_STR(__crc_##sym) " - .	\n"	\
+	    "	.previous					\n");
+#elif !defined(CONFIG_64BIT)
 #define __CRC_SYMBOL(sym, sec)						\
 	extern __visible void *__crc_##sym __attribute__((weak));	\
 	static const unsigned long __kcrctab_##sym			\
@@ -50,6 +57,13 @@ extern struct module __this_module;
 	__attribute__((section("___kcrctab" sec "+" #sym), used))	\
 	= (unsigned long) &__crc_##sym;
 #else
+#define __CRC_SYMBOL(sym, sec)						\
+	asm("	.section \"___kcrctab" sec "+" #sym "\", \"a\"	\n"	\
+	    "	.weak	" VMLINUX_SYMBOL_STR(__crc_##sym) "	\n"	\
+	    "	.long	" VMLINUX_SYMBOL_STR(__crc_##sym) "	\n"	\
+	    "	.previous					\n");
+#endif
+#else
 #define __CRC_SYMBOL(sym, sec)
 #endif
 
diff --git a/include/linux/module.h b/include/linux/module.h
index 7c84273d60b9..cc7cba219b20 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -346,7 +346,7 @@ struct module {
 
 	/* Exported symbols */
 	const struct kernel_symbol *syms;
-	const unsigned long *crcs;
+	const s32 *crcs;
 	unsigned int num_syms;
 
 	/* Kernel parameters. */
@@ -359,18 +359,18 @@ struct module {
 	/* GPL-only exported symbols. */
 	unsigned int num_gpl_syms;
 	const struct kernel_symbol *gpl_syms;
-	const unsigned long *gpl_crcs;
+	const s32 *gpl_crcs;
 
 #ifdef CONFIG_UNUSED_SYMBOLS
 	/* unused exported symbols. */
 	const struct kernel_symbol *unused_syms;
-	const unsigned long *unused_crcs;
+	const s32 *unused_crcs;
 	unsigned int num_unused_syms;
 
 	/* GPL-only, unused exported symbols. */
 	unsigned int num_unused_gpl_syms;
 	const struct kernel_symbol *unused_gpl_syms;
-	const unsigned long *unused_gpl_crcs;
+	const s32 *unused_gpl_crcs;
 #endif
 
 #ifdef CONFIG_MODULE_SIG
@@ -382,7 +382,7 @@ struct module {
 
 	/* symbols that will be GPL-only in the near future. */
 	const struct kernel_symbol *gpl_future_syms;
-	const unsigned long *gpl_future_crcs;
+	const s32 *gpl_future_crcs;
 	unsigned int num_gpl_future_syms;
 
 	/* Exception table */
@@ -523,7 +523,7 @@ struct module *find_module(const char *name);
 
 struct symsearch {
 	const struct kernel_symbol *start, *stop;
-	const unsigned long *crcs;
+	const s32 *crcs;
 	enum {
 		NOT_GPL_ONLY,
 		GPL_ONLY,
@@ -539,7 +539,7 @@ struct symsearch {
  */
 const struct kernel_symbol *find_symbol(const char *name,
 					struct module **owner,
-					const unsigned long **crc,
+					const s32 **crc,
 					bool gplok,
 					bool warn);
 
diff --git a/kernel/module.c b/kernel/module.c
index 38d4270925d4..3d8f126208e3 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -389,16 +389,16 @@ extern const struct kernel_symbol __start___ksymtab_gpl[];
 extern const struct kernel_symbol __stop___ksymtab_gpl[];
 extern const struct kernel_symbol __start___ksymtab_gpl_future[];
 extern const struct kernel_symbol __stop___ksymtab_gpl_future[];
-extern const unsigned long __start___kcrctab[];
-extern const unsigned long __start___kcrctab_gpl[];
-extern const unsigned long __start___kcrctab_gpl_future[];
+extern const s32 __start___kcrctab[];
+extern const s32 __start___kcrctab_gpl[];
+extern const s32 __start___kcrctab_gpl_future[];
 #ifdef CONFIG_UNUSED_SYMBOLS
 extern const struct kernel_symbol __start___ksymtab_unused[];
 extern const struct kernel_symbol __stop___ksymtab_unused[];
 extern const struct kernel_symbol __start___ksymtab_unused_gpl[];
 extern const struct kernel_symbol __stop___ksymtab_unused_gpl[];
-extern const unsigned long __start___kcrctab_unused[];
-extern const unsigned long __start___kcrctab_unused_gpl[];
+extern const s32 __start___kcrctab_unused[];
+extern const s32 __start___kcrctab_unused_gpl[];
 #endif
 
 #ifndef CONFIG_MODVERSIONS
@@ -497,7 +497,7 @@ struct find_symbol_arg {
 
 	/* Output */
 	struct module *owner;
-	const unsigned long *crc;
+	const s32 *crc;
 	const struct kernel_symbol *sym;
 };
 
@@ -563,7 +563,7 @@ static bool find_symbol_in_section(const struct symsearch *syms,
  * (optional) module which owns it.  Needs preempt disabled or module_mutex. */
 const struct kernel_symbol *find_symbol(const char *name,
 					struct module **owner,
-					const unsigned long **crc,
+					const s32 **crc,
 					bool gplok,
 					bool warn)
 {
@@ -1249,23 +1249,17 @@ static int try_to_force_load(struct module *mod, const char *reason)
 }
 
 #ifdef CONFIG_MODVERSIONS
-/* If the arch applies (non-zero) relocations to kernel kcrctab, unapply it. */
-static unsigned long maybe_relocated(unsigned long crc,
-				     const struct module *crc_owner)
+
+static u32 resolve_rel_crc(const s32 *crc)
 {
-#ifdef ARCH_RELOCATES_KCRCTAB
-	if (crc_owner == NULL)
-		return crc - (unsigned long)reloc_start;
-#endif
-	return crc;
+	return *(u32 *)((void *)crc + *crc);
 }
 
 static int check_version(Elf_Shdr *sechdrs,
 			 unsigned int versindex,
 			 const char *symname,
 			 struct module *mod,
-			 const unsigned long *crc,
-			 const struct module *crc_owner)
+			 const s32 *crc)
 {
 	unsigned int i, num_versions;
 	struct modversion_info *versions;
@@ -1283,13 +1277,19 @@ static int check_version(Elf_Shdr *sechdrs,
 		/ sizeof(struct modversion_info);
 
 	for (i = 0; i < num_versions; i++) {
+		u32 crcval;
+
 		if (strcmp(versions[i].name, symname) != 0)
 			continue;
 
-		if (versions[i].crc == maybe_relocated(*crc, crc_owner))
+		if (IS_ENABLED(CONFIG_MODULE_REL_CRCS))
+			crcval = resolve_rel_crc(crc);
+		else
+			crcval = *crc;
+		if (versions[i].crc == crcval)
 			return 1;
-		pr_debug("Found checksum %lX vs module %lX\n",
-		       maybe_relocated(*crc, crc_owner), versions[i].crc);
+		pr_debug("Found checksum %X vs module %lX\n",
+			 crcval, versions[i].crc);
 		goto bad_version;
 	}
 
@@ -1307,7 +1307,7 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
 					  unsigned int versindex,
 					  struct module *mod)
 {
-	const unsigned long *crc;
+	const s32 *crc;
 
 	/*
 	 * Since this should be found in kernel (which can't be removed), no
@@ -1321,8 +1321,7 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
 	}
 	preempt_enable();
 	return check_version(sechdrs, versindex,
-			     VMLINUX_SYMBOL_STR(module_layout), mod, crc,
-			     NULL);
+			     VMLINUX_SYMBOL_STR(module_layout), mod, crc);
 }
 
 /* First part is kernel version, which we ignore if module has crcs. */
@@ -1340,8 +1339,7 @@ static inline int check_version(Elf_Shdr *sechdrs,
 				unsigned int versindex,
 				const char *symname,
 				struct module *mod,
-				const unsigned long *crc,
-				const struct module *crc_owner)
+				const s32 *crc)
 {
 	return 1;
 }
@@ -1368,7 +1366,7 @@ static const struct kernel_symbol *resolve_symbol(struct module *mod,
 {
 	struct module *owner;
 	const struct kernel_symbol *sym;
-	const unsigned long *crc;
+	const s32 *crc;
 	int err;
 
 	/*
@@ -1383,8 +1381,7 @@ static const struct kernel_symbol *resolve_symbol(struct module *mod,
 	if (!sym)
 		goto unlock;
 
-	if (!check_version(info->sechdrs, info->index.vers, name, mod, crc,
-			   owner)) {
+	if (!check_version(info->sechdrs, info->index.vers, name, mod, crc)) {
 		sym = ERR_PTR(-EINVAL);
 		goto getname;
 	}

From 4b9eee96fcb361a5e16a8d2619825e8a048f81f7 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 3 Feb 2017 09:54:07 +0000
Subject: [PATCH 204/322] module: unify absolute krctab definitions for 32-bit
 and 64-bit

The previous patch introduced a separate inline asm version of the
krcrctab declaration template for use with 64-bit architectures, which
cannot refer to ELF symbols using 32-bit quantities.

This declaration should be equivalent to the C one for 32-bit
architectures, but just in case - unify them in a separate patch, which
can simply be dropped if it turns out to break anything.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/export.h | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/include/linux/export.h b/include/linux/export.h
index 7473fba6a60c..1a1dfdb2a5c6 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -49,13 +49,6 @@ extern struct module __this_module;
 	    "	.weak	" VMLINUX_SYMBOL_STR(__crc_##sym) "	\n"	\
 	    "	.long	" VMLINUX_SYMBOL_STR(__crc_##sym) " - .	\n"	\
 	    "	.previous					\n");
-#elif !defined(CONFIG_64BIT)
-#define __CRC_SYMBOL(sym, sec)						\
-	extern __visible void *__crc_##sym __attribute__((weak));	\
-	static const unsigned long __kcrctab_##sym			\
-	__used								\
-	__attribute__((section("___kcrctab" sec "+" #sym), used))	\
-	= (unsigned long) &__crc_##sym;
 #else
 #define __CRC_SYMBOL(sym, sec)						\
 	asm("	.section \"___kcrctab" sec "+" #sym "\", \"a\"	\n"	\

From 00c87e9a70a17b355b81c36adedf05e84f54e10d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Wed, 1 Feb 2017 14:19:53 +0100
Subject: [PATCH 205/322] KVM: x86: do not save guest-unsupported XSAVE state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Saving unsupported state prevents migration when the new host does not
support a XSAVE feature of the original host, even if the feature is not
exposed to the guest.

We've masked host features with guest-visible features before, with
4344ee981e21 ("KVM: x86: only copy XSAVE state for the supported
features") and dropped it when implementing XSAVES.  Do it again.

Fixes: df1daba7d1cb ("KVM: x86: support XSAVES usage in the host")
Cc: stable@vger.kernel.org
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/x86.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d153be8929a6..e52c9088660f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3182,6 +3182,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
 	memcpy(dest, xsave, XSAVE_HDR_OFFSET);
 
 	/* Set XSTATE_BV */
+	xstate_bv &= vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE;
 	*(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv;
 
 	/*

From 29905b52fad0854351f57bab867647e4982285bf Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 2 Feb 2017 18:05:26 +0000
Subject: [PATCH 206/322] log2: make order_base_2() behave correctly on const
 input value zero

The function order_base_2() is defined (according to the comment block)
as returning zero on input zero, but subsequently passes the input into
roundup_pow_of_two(), which is explicitly undefined for input zero.

This has gone unnoticed until now, but optimization passes in GCC 7 may
produce constant folded function instances where a constant value of
zero is passed into order_base_2(), resulting in link errors against the
deliberately undefined '____ilog2_NaN'.

So update order_base_2() to adhere to its own documented interface.

[ See

     http://marc.info/?l=linux-kernel&m=147672952517795&w=2

  and follow-up discussion for more background. The gcc "optimization
  pass" is really just broken, but now the GCC trunk problem seems to
  have escaped out of just specially built daily images, so we need to
  work around it in mainline.    - Linus ]

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/log2.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/include/linux/log2.h b/include/linux/log2.h
index fd7ff3d91e6a..ef3d4f67118c 100644
--- a/include/linux/log2.h
+++ b/include/linux/log2.h
@@ -203,6 +203,17 @@ unsigned long __rounddown_pow_of_two(unsigned long n)
  *  ... and so on.
  */
 
-#define order_base_2(n) ilog2(roundup_pow_of_two(n))
+static inline __attribute_const__
+int __order_base_2(unsigned long n)
+{
+	return n > 1 ? ilog2(n - 1) + 1 : 0;
+}
 
+#define order_base_2(n)				\
+(						\
+	__builtin_constant_p(n) ? (		\
+		((n) == 0 || (n) == 1) ? 0 :	\
+		ilog2((n) - 1) + 1) :		\
+	__order_base_2(n)			\
+)
 #endif /* _LINUX_LOG2_H */

From e471486c13b82b1338d49c798f78bb62b1ed0a9e Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 2 Feb 2017 10:31:00 -0800
Subject: [PATCH 207/322] acpi, nfit: fix acpi_nfit_flush_probe() crash

We queue an on-stack work item to 'nfit_wq' and wait for it to complete
as part of a 'flush_probe' request. However, if the user cancels the
wait we need to make sure the item is flushed from the queue otherwise
we are leaving an out-of-scope stack address on the work list.

 BUG: unable to handle kernel paging request at ffffbcb3c72f7cd0
 IP: [<ffffffffa9413a7b>] __list_add+0x1b/0xb0
 [..]
 RIP: 0010:[<ffffffffa9413a7b>]  [<ffffffffa9413a7b>] __list_add+0x1b/0xb0
 RSP: 0018:ffffbcb3c7ba7c00  EFLAGS: 00010046
 [..]
 Call Trace:
  [<ffffffffa90bb11a>] insert_work+0x3a/0xc0
  [<ffffffffa927fdda>] ? seq_open+0x5a/0xa0
  [<ffffffffa90bb30a>] __queue_work+0x16a/0x460
  [<ffffffffa90bbb08>] queue_work_on+0x38/0x40
  [<ffffffffc0cf2685>] acpi_nfit_flush_probe+0x95/0xc0 [nfit]
  [<ffffffffc0cf25d0>] ? nfit_visible+0x40/0x40 [nfit]
  [<ffffffffa9571495>] wait_probe_show+0x25/0x60
  [<ffffffffa9546b30>] dev_attr_show+0x20/0x50

Fixes: 7ae0fa439faf ("nfit, libnvdimm: async region scrub workqueue")
Cc: <stable@vger.kernel.org>
Reviewed-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 2f82b8eba360..7361d00818e2 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -2704,6 +2704,7 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
 	struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
 	struct device *dev = acpi_desc->dev;
 	struct acpi_nfit_flush_work flush;
+	int rc;
 
 	/* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
 	device_lock(dev);
@@ -2716,7 +2717,10 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
 	INIT_WORK_ONSTACK(&flush.work, flush_probe);
 	COMPLETION_INITIALIZER_ONSTACK(flush.cmp);
 	queue_work(nfit_wq, &flush.work);
-	return wait_for_completion_interruptible(&flush.cmp);
+
+	rc = wait_for_completion_interruptible(&flush.cmp);
+	cancel_work_sync(&flush.work);
+	return rc;
 }
 
 static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,

From 5fa8bbda38c668e56b0c6cdecced2eac2fe36dec Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 2 Feb 2017 10:31:35 -0800
Subject: [PATCH 208/322] net: use a work queue to defer
 net_disable_timestamp() work

Dmitry reported a warning [1] showing that we were calling
net_disable_timestamp() -> static_key_slow_dec() from a non
process context.

Grabbing a mutex while holding a spinlock or rcu_read_lock()
is not allowed.

As Cong suggested, we now use a work queue.

It is possible netstamp_clear() exits while netstamp_needed_deferred
is not zero, but it is probably not worth trying to do better than that.

netstamp_needed_deferred atomic tracks the exact number of deferred
decrements.

[1]
[ INFO: suspicious RCU usage. ]
4.10.0-rc5+ #192 Not tainted
-------------------------------
./include/linux/rcupdate.h:561 Illegal context switch in RCU read-side
critical section!

other info that might help us debug this:

rcu_scheduler_active = 2, debug_locks = 0
2 locks held by syz-executor14/23111:
 #0:  (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff83a35c35>] lock_sock
include/net/sock.h:1454 [inline]
 #0:  (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff83a35c35>]
rawv6_sendmsg+0x1e65/0x3ec0 net/ipv6/raw.c:919
 #1:  (rcu_read_lock){......}, at: [<ffffffff83ae2678>] nf_hook
include/linux/netfilter.h:201 [inline]
 #1:  (rcu_read_lock){......}, at: [<ffffffff83ae2678>]
__ip6_local_out+0x258/0x840 net/ipv6/output_core.c:160

stack backtrace:
CPU: 2 PID: 23111 Comm: syz-executor14 Not tainted 4.10.0-rc5+ #192
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:15 [inline]
 dump_stack+0x2ee/0x3ef lib/dump_stack.c:51
 lockdep_rcu_suspicious+0x139/0x180 kernel/locking/lockdep.c:4452
 rcu_preempt_sleep_check include/linux/rcupdate.h:560 [inline]
 ___might_sleep+0x560/0x650 kernel/sched/core.c:7748
 __might_sleep+0x95/0x1a0 kernel/sched/core.c:7739
 mutex_lock_nested+0x24f/0x1730 kernel/locking/mutex.c:752
 atomic_dec_and_mutex_lock+0x119/0x160 kernel/locking/mutex.c:1060
 __static_key_slow_dec+0x7a/0x1e0 kernel/jump_label.c:149
 static_key_slow_dec+0x51/0x90 kernel/jump_label.c:174
 net_disable_timestamp+0x3b/0x50 net/core/dev.c:1728
 sock_disable_timestamp+0x98/0xc0 net/core/sock.c:403
 __sk_destruct+0x27d/0x6b0 net/core/sock.c:1441
 sk_destruct+0x47/0x80 net/core/sock.c:1460
 __sk_free+0x57/0x230 net/core/sock.c:1468
 sock_wfree+0xae/0x120 net/core/sock.c:1645
 skb_release_head_state+0xfc/0x200 net/core/skbuff.c:655
 skb_release_all+0x15/0x60 net/core/skbuff.c:668
 __kfree_skb+0x15/0x20 net/core/skbuff.c:684
 kfree_skb+0x16e/0x4c0 net/core/skbuff.c:705
 inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304
 inet_frag_put include/net/inet_frag.h:133 [inline]
 nf_ct_frag6_gather+0x1106/0x3840
net/ipv6/netfilter/nf_conntrack_reasm.c:617
 ipv6_defrag+0x1be/0x2b0 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68
 nf_hook_entry_hookfn include/linux/netfilter.h:102 [inline]
 nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310
 nf_hook include/linux/netfilter.h:212 [inline]
 __ip6_local_out+0x489/0x840 net/ipv6/output_core.c:160
 ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170
 ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722
 ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742
 rawv6_push_pending_frames net/ipv6/raw.c:613 [inline]
 rawv6_sendmsg+0x2d1a/0x3ec0 net/ipv6/raw.c:927
 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744
 sock_sendmsg_nosec net/socket.c:635 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:645
 sock_write_iter+0x326/0x600 net/socket.c:848
 do_iter_readv_writev+0x2e3/0x5b0 fs/read_write.c:695
 do_readv_writev+0x42c/0x9b0 fs/read_write.c:872
 vfs_writev+0x87/0xc0 fs/read_write.c:911
 do_writev+0x110/0x2c0 fs/read_write.c:944
 SYSC_writev fs/read_write.c:1017 [inline]
 SyS_writev+0x27/0x30 fs/read_write.c:1014
 entry_SYSCALL_64_fastpath+0x1f/0xc2
RIP: 0033:0x445559
RSP: 002b:00007f6f46fceb58 EFLAGS: 00000292 ORIG_RAX: 0000000000000014
RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 0000000000445559
RDX: 0000000000000001 RSI: 0000000020f1eff0 RDI: 0000000000000005
RBP: 00000000006e19c0 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000700000
R13: 0000000020f59000 R14: 0000000000000015 R15: 0000000000020400
BUG: sleeping function called from invalid context at
kernel/locking/mutex.c:752
in_atomic(): 1, irqs_disabled(): 0, pid: 23111, name: syz-executor14
INFO: lockdep is turned off.
CPU: 2 PID: 23111 Comm: syz-executor14 Not tainted 4.10.0-rc5+ #192
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:15 [inline]
 dump_stack+0x2ee/0x3ef lib/dump_stack.c:51
 ___might_sleep+0x47e/0x650 kernel/sched/core.c:7780
 __might_sleep+0x95/0x1a0 kernel/sched/core.c:7739
 mutex_lock_nested+0x24f/0x1730 kernel/locking/mutex.c:752
 atomic_dec_and_mutex_lock+0x119/0x160 kernel/locking/mutex.c:1060
 __static_key_slow_dec+0x7a/0x1e0 kernel/jump_label.c:149
 static_key_slow_dec+0x51/0x90 kernel/jump_label.c:174
 net_disable_timestamp+0x3b/0x50 net/core/dev.c:1728
 sock_disable_timestamp+0x98/0xc0 net/core/sock.c:403
 __sk_destruct+0x27d/0x6b0 net/core/sock.c:1441
 sk_destruct+0x47/0x80 net/core/sock.c:1460
 __sk_free+0x57/0x230 net/core/sock.c:1468
 sock_wfree+0xae/0x120 net/core/sock.c:1645
 skb_release_head_state+0xfc/0x200 net/core/skbuff.c:655
 skb_release_all+0x15/0x60 net/core/skbuff.c:668
 __kfree_skb+0x15/0x20 net/core/skbuff.c:684
 kfree_skb+0x16e/0x4c0 net/core/skbuff.c:705
 inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304
 inet_frag_put include/net/inet_frag.h:133 [inline]
 nf_ct_frag6_gather+0x1106/0x3840
net/ipv6/netfilter/nf_conntrack_reasm.c:617
 ipv6_defrag+0x1be/0x2b0 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68
 nf_hook_entry_hookfn include/linux/netfilter.h:102 [inline]
 nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310
 nf_hook include/linux/netfilter.h:212 [inline]
 __ip6_local_out+0x489/0x840 net/ipv6/output_core.c:160
 ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170
 ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722
 ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742
 rawv6_push_pending_frames net/ipv6/raw.c:613 [inline]
 rawv6_sendmsg+0x2d1a/0x3ec0 net/ipv6/raw.c:927
 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744
 sock_sendmsg_nosec net/socket.c:635 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:645
 sock_write_iter+0x326/0x600 net/socket.c:848
 do_iter_readv_writev+0x2e3/0x5b0 fs/read_write.c:695
 do_readv_writev+0x42c/0x9b0 fs/read_write.c:872
 vfs_writev+0x87/0xc0 fs/read_write.c:911
 do_writev+0x110/0x2c0 fs/read_write.c:944
 SYSC_writev fs/read_write.c:1017 [inline]
 SyS_writev+0x27/0x30 fs/read_write.c:1014
 entry_SYSCALL_64_fastpath+0x1f/0xc2
RIP: 0033:0x445559

Fixes: b90e5794c5bd ("net: dont call jump_label_dec from irq context")
Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 7f218e095361..29101c98399f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1695,24 +1695,19 @@ EXPORT_SYMBOL_GPL(net_dec_egress_queue);
 
 static struct static_key netstamp_needed __read_mostly;
 #ifdef HAVE_JUMP_LABEL
-/* We are not allowed to call static_key_slow_dec() from irq context
- * If net_disable_timestamp() is called from irq context, defer the
- * static_key_slow_dec() calls.
- */
 static atomic_t netstamp_needed_deferred;
+static void netstamp_clear(struct work_struct *work)
+{
+	int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
+
+	while (deferred--)
+		static_key_slow_dec(&netstamp_needed);
+}
+static DECLARE_WORK(netstamp_work, netstamp_clear);
 #endif
 
 void net_enable_timestamp(void)
 {
-#ifdef HAVE_JUMP_LABEL
-	int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
-
-	if (deferred) {
-		while (--deferred)
-			static_key_slow_dec(&netstamp_needed);
-		return;
-	}
-#endif
 	static_key_slow_inc(&netstamp_needed);
 }
 EXPORT_SYMBOL(net_enable_timestamp);
@@ -1720,12 +1715,12 @@ EXPORT_SYMBOL(net_enable_timestamp);
 void net_disable_timestamp(void)
 {
 #ifdef HAVE_JUMP_LABEL
-	if (in_interrupt()) {
-		atomic_inc(&netstamp_needed_deferred);
-		return;
-	}
-#endif
+	/* net_disable_timestamp() can be called from non process context */
+	atomic_inc(&netstamp_needed_deferred);
+	schedule_work(&netstamp_work);
+#else
 	static_key_slow_dec(&netstamp_needed);
+#endif
 }
 EXPORT_SYMBOL(net_disable_timestamp);
 

From 0d5415b489f68b58e1983a53793d25d53098ed4b Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 3 Feb 2017 05:43:52 +0200
Subject: [PATCH 209/322] Revert "vring: Force use of DMA API for ARM-based
 systems with legacy devices"

This reverts commit c7070619f3408d9a0dffbed9149e6f00479cf43b.

This has been shown to regress on some ARM systems:

by forcing on DMA API usage for ARM systems, we have inadvertently
kicked open a hornets' nest in terms of cache-coherency. Namely that
unless the virtio device is explicitly described as capable of coherent
DMA by firmware, the DMA APIs on ARM and other DT-based platforms will
assume it is non-coherent. This turns out to cause a big problem for the
likes of QEMU and kvmtool, which generate virtio-mmio devices in their
guest DTs but neglect to add the often-overlooked "dma-coherent"
property; as a result, we end up with the guest making non-cacheable
accesses to the vring, the host doing so cacheably, both talking past
each other and things going horribly wrong.

We are working on a safer work-around.

Fixes: c7070619f340 ("vring: Force use of DMA API for ARM-based systems with legacy devices")
Reported-by: Robin Murphy <robin.murphy@arm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/virtio/virtio_ring.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 7e38ed79c3fc..409aeaa49246 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -159,13 +159,6 @@ static bool vring_use_dma_api(struct virtio_device *vdev)
 	if (xen_domain())
 		return true;
 
-	/*
-	 * On ARM-based machines, the DMA ops will do the right thing,
-	 * so always use them with legacy devices.
-	 */
-	if (IS_ENABLED(CONFIG_ARM) || IS_ENABLED(CONFIG_ARM64))
-		return !virtio_has_feature(vdev, VIRTIO_F_VERSION_1);
-
 	return false;
 }
 

From cda8bba0f99d25d2061c531113c14fa41effc3ae Mon Sep 17 00:00:00 2001
From: Halil Pasic <pasic@linux.vnet.ibm.com>
Date: Mon, 30 Jan 2017 11:09:36 +0100
Subject: [PATCH 210/322] vhost: fix initialization for vq->is_le

Currently, under certain circumstances vhost_init_is_le does just a part
of the initialization job, and depends on vhost_reset_is_le being called
too. For this reason vhost_vq_init_access used to call vhost_reset_is_le
when vq->private_data is NULL. This is not only counter intuitive, but
also real a problem because it breaks vhost_net. The bug was introduced to
vhost_net with commit 2751c9882b94 ("vhost: cross-endian support for
legacy devices"). The symptom is corruption of the vq's used.idx field
(virtio) after VHOST_NET_SET_BACKEND was issued as a part of the vhost
shutdown on a vq with pending descriptors.

Let us make sure the outcome of vhost_init_is_le never depend on the state
it is actually supposed to initialize, and fix virtio_net by removing the
reset from vhost_vq_init_access.

With the above, there is no reason for vhost_reset_is_le to do just half
of the job. Let us make vhost_reset_is_le reinitialize is_le.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Reported-by: Michael A. Tebolt <miket@us.ibm.com>
Reported-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Fixes: commit 2751c9882b94 ("vhost: cross-endian support for legacy devices")
Cc: <stable@vger.kernel.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Tested-by: Michael A. Tebolt <miket@us.ibm.com>
---
 drivers/vhost/vhost.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index d6432603880c..8f99fe08de02 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -130,14 +130,14 @@ static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx,
 
 static void vhost_init_is_le(struct vhost_virtqueue *vq)
 {
-	if (vhost_has_feature(vq, VIRTIO_F_VERSION_1))
-		vq->is_le = true;
+	vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1)
+		|| virtio_legacy_is_little_endian();
 }
 #endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */
 
 static void vhost_reset_is_le(struct vhost_virtqueue *vq)
 {
-	vq->is_le = virtio_legacy_is_little_endian();
+	vhost_init_is_le(vq);
 }
 
 struct vhost_flush_struct {
@@ -1714,10 +1714,8 @@ int vhost_vq_init_access(struct vhost_virtqueue *vq)
 	int r;
 	bool is_le = vq->is_le;
 
-	if (!vq->private_data) {
-		vhost_reset_is_le(vq);
+	if (!vq->private_data)
 		return 0;
-	}
 
 	vhost_init_is_le(vq);
 

From 79134d11d030b886106bf45a5638c1ccb1f0856c Mon Sep 17 00:00:00 2001
From: Amit Shah <amit.shah@redhat.com>
Date: Fri, 3 Feb 2017 16:48:14 +0530
Subject: [PATCH 211/322] MAINTAINERS: update email address for Amit Shah

I'm leaving my job at Red Hat, this email address will stop working next week.
Update it to one that I will have access to later.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 5f10c28b2e15..e0f979294e23 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13066,7 +13066,7 @@ F:	drivers/input/serio/userio.c
 F:	include/uapi/linux/userio.h
 
 VIRTIO CONSOLE DRIVER
-M:	Amit Shah <amit.shah@redhat.com>
+M:	Amit Shah <amit@kernel.org>
 L:	virtualization@lists.linux-foundation.org
 S:	Maintained
 F:	drivers/char/virtio_console.c

From d7b028f56a971a2e4d8d7887540a144eeefcd4ab Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Fri, 3 Feb 2017 13:13:09 -0800
Subject: [PATCH 212/322] zswap: disable changing params if init fails

Add zswap_init_failed bool that prevents changing any of the module
params, if init_zswap() fails, and set zswap_enabled to false.  Change
'enabled' param to a callback, and check zswap_init_failed before
allowing any change to 'enabled', 'zpool', or 'compressor' params.

Any driver that is built-in to the kernel will not be unloaded if its
init function returns error, and its module params remain accessible for
users to change via sysfs.  Since zswap uses param callbacks, which
assume that zswap has been initialized, changing the zswap params after
a failed initialization will result in WARNING due to the param
callbacks expecting a pool to already exist.  This prevents that by
immediately exiting any of the param callbacks if initialization failed.

This was reported here:
  https://marc.info/?l=linux-mm&m=147004228125528&w=4

And fixes this WARNING:
  [  429.723476] WARNING: CPU: 0 PID: 5140 at mm/zswap.c:503 __zswap_pool_current+0x56/0x60

The warning is just noise, and not serious.  However, when init fails,
zswap frees all its percpu dstmem pages and its kmem cache.  The kmem
cache might be serious, if kmem_cache_alloc(NULL, gfp) has problems; but
the percpu dstmem pages are definitely a problem, as they're used as
temporary buffer for compressed pages before copying into place in the
zpool.

If the user does get zswap enabled after an init failure, then zswap
will likely Oops on the first page it tries to compress (or worse, start
corrupting memory).

Fixes: 90b0fc26d5db ("zswap: change zpool/compressor at runtime")
Link: http://lkml.kernel.org/r/20170124200259.16191-2-ddstreet@ieee.org
Signed-off-by: Dan Streetman <dan.streetman@canonical.com>
Reported-by: Marcin Miroslaw <marcin@mejor.pl>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/zswap.c | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index 067a0d62f318..cabf09e0128b 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -78,7 +78,13 @@ static u64 zswap_duplicate_entry;
 
 /* Enable/disable zswap (disabled by default) */
 static bool zswap_enabled;
-module_param_named(enabled, zswap_enabled, bool, 0644);
+static int zswap_enabled_param_set(const char *,
+				   const struct kernel_param *);
+static struct kernel_param_ops zswap_enabled_param_ops = {
+	.set =		zswap_enabled_param_set,
+	.get =		param_get_bool,
+};
+module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644);
 
 /* Crypto compressor to use */
 #define ZSWAP_COMPRESSOR_DEFAULT "lzo"
@@ -176,6 +182,9 @@ static atomic_t zswap_pools_count = ATOMIC_INIT(0);
 /* used by param callback function */
 static bool zswap_init_started;
 
+/* fatal error during init */
+static bool zswap_init_failed;
+
 /*********************************
 * helpers and fwd declarations
 **********************************/
@@ -624,6 +633,11 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp,
 	char *s = strstrip((char *)val);
 	int ret;
 
+	if (zswap_init_failed) {
+		pr_err("can't set param, initialization failed\n");
+		return -ENODEV;
+	}
+
 	/* no change required */
 	if (!strcmp(s, *(char **)kp->arg))
 		return 0;
@@ -703,6 +717,17 @@ static int zswap_zpool_param_set(const char *val,
 	return __zswap_param_set(val, kp, NULL, zswap_compressor);
 }
 
+static int zswap_enabled_param_set(const char *val,
+				   const struct kernel_param *kp)
+{
+	if (zswap_init_failed) {
+		pr_err("can't enable, initialization failed\n");
+		return -ENODEV;
+	}
+
+	return param_set_bool(val, kp);
+}
+
 /*********************************
 * writeback code
 **********************************/
@@ -1201,6 +1226,9 @@ static int __init init_zswap(void)
 dstmem_fail:
 	zswap_entry_cache_destroy();
 cache_fail:
+	/* if built-in, we aren't unloaded on failure; don't allow use */
+	zswap_init_failed = true;
+	zswap_enabled = false;
 	return -ENOMEM;
 }
 /* must be late so crypto has time to come up */

From 4f40c6e5627ea73b4e7c615c59631f38cc880885 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 3 Feb 2017 13:13:12 -0800
Subject: [PATCH 213/322] kasan: respect /proc/sys/kernel/traceoff_on_warning

After much waiting I finally reproduced a KASAN issue, only to find my
trace-buffer empty of useful information because it got spooled out :/

Make kasan_report honour the /proc/sys/kernel/traceoff_on_warning
interface.

Link: http://lkml.kernel.org/r/20170125164106.3514-1-aryabinin@virtuozzo.com
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Acked-by: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/kasan/report.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index b82b3e215157..f479365530b6 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -13,6 +13,7 @@
  *
  */
 
+#include <linux/ftrace.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/printk.h>
@@ -300,6 +301,8 @@ void kasan_report(unsigned long addr, size_t size,
 	if (likely(!kasan_report_enabled()))
 		return;
 
+	disable_trace_on_warning();
+
 	info.access_addr = (void *)addr;
 	info.access_size = size;
 	info.is_write = is_write;

From 253fd0f02040a19c6fe80e4171659fa3482a422d Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 3 Feb 2017 13:13:15 -0800
Subject: [PATCH 214/322] shmem: fix sleeping from atomic context

Syzkaller fuzzer managed to trigger this:

    BUG: sleeping function called from invalid context at mm/shmem.c:852
    in_atomic(): 1, irqs_disabled(): 0, pid: 529, name: khugepaged
    3 locks held by khugepaged/529:
     #0:  (shrinker_rwsem){++++..}, at: [<ffffffff818d7ef1>] shrink_slab.part.59+0x121/0xd30 mm/vmscan.c:451
     #1:  (&type->s_umount_key#29){++++..}, at: [<ffffffff81a63630>] trylock_super+0x20/0x100 fs/super.c:392
     #2:  (&(&sbinfo->shrinklist_lock)->rlock){+.+.-.}, at: [<ffffffff818fd83e>] spin_lock include/linux/spinlock.h:302 [inline]
     #2:  (&(&sbinfo->shrinklist_lock)->rlock){+.+.-.}, at: [<ffffffff818fd83e>] shmem_unused_huge_shrink+0x28e/0x1490 mm/shmem.c:427
    CPU: 2 PID: 529 Comm: khugepaged Not tainted 4.10.0-rc5+ #201
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
    Call Trace:
       shmem_undo_range+0xb20/0x2710 mm/shmem.c:852
       shmem_truncate_range+0x27/0xa0 mm/shmem.c:939
       shmem_evict_inode+0x35f/0xca0 mm/shmem.c:1030
       evict+0x46e/0x980 fs/inode.c:553
       iput_final fs/inode.c:1515 [inline]
       iput+0x589/0xb20 fs/inode.c:1542
       shmem_unused_huge_shrink+0xbad/0x1490 mm/shmem.c:446
       shmem_unused_huge_scan+0x10c/0x170 mm/shmem.c:512
       super_cache_scan+0x376/0x450 fs/super.c:106
       do_shrink_slab mm/vmscan.c:378 [inline]
       shrink_slab.part.59+0x543/0xd30 mm/vmscan.c:481
       shrink_slab mm/vmscan.c:2592 [inline]
       shrink_node+0x2c7/0x870 mm/vmscan.c:2592
       shrink_zones mm/vmscan.c:2734 [inline]
       do_try_to_free_pages+0x369/0xc80 mm/vmscan.c:2776
       try_to_free_pages+0x3c6/0x900 mm/vmscan.c:2982
       __perform_reclaim mm/page_alloc.c:3301 [inline]
       __alloc_pages_direct_reclaim mm/page_alloc.c:3322 [inline]
       __alloc_pages_slowpath+0xa24/0x1c30 mm/page_alloc.c:3683
       __alloc_pages_nodemask+0x544/0xae0 mm/page_alloc.c:3848
       __alloc_pages include/linux/gfp.h:426 [inline]
       __alloc_pages_node include/linux/gfp.h:439 [inline]
       khugepaged_alloc_page+0xc2/0x1b0 mm/khugepaged.c:750
       collapse_huge_page+0x182/0x1fe0 mm/khugepaged.c:955
       khugepaged_scan_pmd+0xfdf/0x12a0 mm/khugepaged.c:1208
       khugepaged_scan_mm_slot mm/khugepaged.c:1727 [inline]
       khugepaged_do_scan mm/khugepaged.c:1808 [inline]
       khugepaged+0xe9b/0x1590 mm/khugepaged.c:1853
       kthread+0x326/0x3f0 kernel/kthread.c:227
       ret_from_fork+0x31/0x40 arch/x86/entry/entry_64.S:430

The iput() from atomic context was a bad idea: if after igrab() somebody
else calls iput() and we left with the last inode reference, our iput()
would lead to inode eviction and therefore sleeping.

This patch should fix the situation.

Link: http://lkml.kernel.org/r/20170131093141.GA15899@node.shutemov.name
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/shmem.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index bb53285a1d99..3a7587a0314d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -415,6 +415,7 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
 		struct shrink_control *sc, unsigned long nr_to_split)
 {
 	LIST_HEAD(list), *pos, *next;
+	LIST_HEAD(to_remove);
 	struct inode *inode;
 	struct shmem_inode_info *info;
 	struct page *page;
@@ -441,9 +442,8 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
 		/* Check if there's anything to gain */
 		if (round_up(inode->i_size, PAGE_SIZE) ==
 				round_up(inode->i_size, HPAGE_PMD_SIZE)) {
-			list_del_init(&info->shrinklist);
+			list_move(&info->shrinklist, &to_remove);
 			removed++;
-			iput(inode);
 			goto next;
 		}
 
@@ -454,6 +454,13 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
 	}
 	spin_unlock(&sbinfo->shrinklist_lock);
 
+	list_for_each_safe(pos, next, &to_remove) {
+		info = list_entry(pos, struct shmem_inode_info, shrinklist);
+		inode = &info->vfs_inode;
+		list_del_init(&info->shrinklist);
+		iput(inode);
+	}
+
 	list_for_each_safe(pos, next, &list) {
 		int ret;
 

From 35f860f9ba6aac56cc38e8b18916d833a83f1157 Mon Sep 17 00:00:00 2001
From: David Lin <dtwlin@google.com>
Date: Fri, 3 Feb 2017 13:13:18 -0800
Subject: [PATCH 215/322] jump label: pass kbuild_cflags when checking for asm
 goto support

Some versions of ARM GCC compiler such as Android toolchain throws in a
'-fpic' flag by default.  This causes the gcc-goto check script to fail
although some config would have '-fno-pic' flag in the KBUILD_CFLAGS.

This patch passes the KBUILD_CFLAGS to the check script so that the
script does not rely on the default config from different compilers.

Link: http://lkml.kernel.org/r/20170120234329.78868-1-dtwlin@google.com
Signed-off-by: David Lin <dtwlin@google.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Michal Marek <mmarek@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 96b27a888285..d7b758a1ce14 100644
--- a/Makefile
+++ b/Makefile
@@ -797,7 +797,7 @@ KBUILD_CFLAGS   += $(call cc-option,-Werror=incompatible-pointer-types)
 KBUILD_ARFLAGS := $(call ar-option,D)
 
 # check for 'asm goto'
-ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y)
+ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLAGS)), y)
 	KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
 	KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO
 endif

From deb88a2a19e85842d79ba96b05031739ec327ff4 Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hpe.com>
Date: Fri, 3 Feb 2017 13:13:20 -0800
Subject: [PATCH 216/322] mm/memory_hotplug.c: check start_pfn in
 test_pages_in_a_zone()

Patch series "fix a kernel oops when reading sysfs valid_zones", v2.

A sysfs memory file is created for each 2GiB memory block on x86-64 when
the system has 64GiB or more memory.  [1] When the start address of a
memory block is not backed by struct page, i.e.  a memory range is not
aligned by 2GiB, reading its 'valid_zones' attribute file leads to a
kernel oops.  This issue was observed on multiple x86-64 systems with
more than 64GiB of memory.  This patch-set fixes this issue.

Patch 1 first fixes an issue in test_pages_in_a_zone(), which does not
test the start section.

Patch 2 then fixes the kernel oops by extending test_pages_in_a_zone()
to return valid [start, end).

Note for stable kernels: The memory block size change was made by commit
bdee237c0343 ("x86: mm: Use 2GB memory block size on large-memory x86-64
systems"), which was accepted to 3.9.  However, this patch-set depends
on (and fixes) the change to test_pages_in_a_zone() made by commit
5f0f2887f4de ("mm/memory_hotplug.c: check for missing sections in
test_pages_in_a_zone()"), which was accepted to 4.4.

So, I recommend that we backport it up to 4.4.

[1] 'Commit bdee237c0343 ("x86: mm: Use 2GB memory block size on
    large-memory x86-64 systems")'

This patch (of 2):

test_pages_in_a_zone() does not check 'start_pfn' when it is aligned by
section since 'sec_end_pfn' is set equal to 'pfn'.  Since this function
is called for testing the range of a sysfs memory file, 'start_pfn' is
always aligned by section.

Fix it by properly setting 'sec_end_pfn' to the next section pfn.

Also make sure that this function returns 1 only when the range belongs
to a zone.

Link: http://lkml.kernel.org/r/20170127222149.30893-2-toshi.kani@hpe.com
Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Cc: Andrew Banman <abanman@sgi.com>
Cc: Reza Arbab <arbab@linux.vnet.ibm.com>
Cc: Greg KH <greg@kroah.com>
Cc: <stable@vger.kernel.org>	[4.4+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory_hotplug.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index ca2723d47338..1218f73890b6 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1483,7 +1483,7 @@ bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
 }
 
 /*
- * Confirm all pages in a range [start, end) is belongs to the same zone.
+ * Confirm all pages in a range [start, end) belong to the same zone.
  */
 int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
 {
@@ -1491,9 +1491,9 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
 	struct zone *zone = NULL;
 	struct page *page;
 	int i;
-	for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn);
+	for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn + 1);
 	     pfn < end_pfn;
-	     pfn = sec_end_pfn + 1, sec_end_pfn += PAGES_PER_SECTION) {
+	     pfn = sec_end_pfn, sec_end_pfn += PAGES_PER_SECTION) {
 		/* Make sure the memory section is present first */
 		if (!present_section_nr(pfn_to_section_nr(pfn)))
 			continue;
@@ -1512,7 +1512,11 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
 			zone = page_zone(page);
 		}
 	}
-	return 1;
+
+	if (zone)
+		return 1;
+	else
+		return 0;
 }
 
 /*

From a96dfddbcc04336bbed50dc2b24823e45e09e80c Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hpe.com>
Date: Fri, 3 Feb 2017 13:13:23 -0800
Subject: [PATCH 217/322] base/memory, hotplug: fix a kernel oops in
 show_valid_zones()

Reading a sysfs "memoryN/valid_zones" file leads to the following oops
when the first page of a range is not backed by struct page.
show_valid_zones() assumes that 'start_pfn' is always valid for
page_zone().

 BUG: unable to handle kernel paging request at ffffea017a000000
 IP: show_valid_zones+0x6f/0x160

This issue may happen on x86-64 systems with 64GiB or more memory since
their memory block size is bumped up to 2GiB.  [1] An example of such
systems is desribed below.  0x3240000000 is only aligned by 1GiB and
this memory block starts from 0x3200000000, which is not backed by
struct page.

 BIOS-e820: [mem 0x0000003240000000-0x000000603fffffff] usable

Since test_pages_in_a_zone() already checks holes, fix this issue by
extending this function to return 'valid_start' and 'valid_end' for a
given range.  show_valid_zones() then proceeds with the valid range.

[1] 'Commit bdee237c0343 ("x86: mm: Use 2GB memory block size on
    large-memory x86-64 systems")'

Link: http://lkml.kernel.org/r/20170127222149.30893-3-toshi.kani@hpe.com
Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Zhang Zhen <zhenzhang.zhang@huawei.com>
Cc: Reza Arbab <arbab@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: <stable@vger.kernel.org>	[4.4+]

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/memory.c          | 12 ++++++------
 include/linux/memory_hotplug.h |  3 ++-
 mm/memory_hotplug.c            | 20 +++++++++++++++-----
 3 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index dacb6a8418aa..fa26ffd25fa6 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -389,33 +389,33 @@ static ssize_t show_valid_zones(struct device *dev,
 {
 	struct memory_block *mem = to_memory_block(dev);
 	unsigned long start_pfn, end_pfn;
+	unsigned long valid_start, valid_end, valid_pages;
 	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
-	struct page *first_page;
 	struct zone *zone;
 	int zone_shift = 0;
 
 	start_pfn = section_nr_to_pfn(mem->start_section_nr);
 	end_pfn = start_pfn + nr_pages;
-	first_page = pfn_to_page(start_pfn);
 
 	/* The block contains more than one zone can not be offlined. */
-	if (!test_pages_in_a_zone(start_pfn, end_pfn))
+	if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, &valid_end))
 		return sprintf(buf, "none\n");
 
-	zone = page_zone(first_page);
+	zone = page_zone(pfn_to_page(valid_start));
+	valid_pages = valid_end - valid_start;
 
 	/* MMOP_ONLINE_KEEP */
 	sprintf(buf, "%s", zone->name);
 
 	/* MMOP_ONLINE_KERNEL */
-	zone_can_shift(start_pfn, nr_pages, ZONE_NORMAL, &zone_shift);
+	zone_can_shift(valid_start, valid_pages, ZONE_NORMAL, &zone_shift);
 	if (zone_shift) {
 		strcat(buf, " ");
 		strcat(buf, (zone + zone_shift)->name);
 	}
 
 	/* MMOP_ONLINE_MOVABLE */
-	zone_can_shift(start_pfn, nr_pages, ZONE_MOVABLE, &zone_shift);
+	zone_can_shift(valid_start, valid_pages, ZONE_MOVABLE, &zone_shift);
 	if (zone_shift) {
 		strcat(buf, " ");
 		strcat(buf, (zone + zone_shift)->name);
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index c1784c0b4f35..134a2f69c21a 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -85,7 +85,8 @@ extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
 extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
 /* VM interface that may be used by firmware interface */
 extern int online_pages(unsigned long, unsigned long, int);
-extern int test_pages_in_a_zone(unsigned long, unsigned long);
+extern int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
+	unsigned long *valid_start, unsigned long *valid_end);
 extern void __offline_isolated_pages(unsigned long, unsigned long);
 
 typedef void (*online_page_callback_t)(struct page *page);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 1218f73890b6..b8c11e063ff0 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1484,10 +1484,13 @@ bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
 
 /*
  * Confirm all pages in a range [start, end) belong to the same zone.
+ * When true, return its valid [start, end).
  */
-int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
+int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
+			 unsigned long *valid_start, unsigned long *valid_end)
 {
 	unsigned long pfn, sec_end_pfn;
+	unsigned long start, end;
 	struct zone *zone = NULL;
 	struct page *page;
 	int i;
@@ -1509,14 +1512,20 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
 			page = pfn_to_page(pfn + i);
 			if (zone && page_zone(page) != zone)
 				return 0;
+			if (!zone)
+				start = pfn + i;
 			zone = page_zone(page);
+			end = pfn + MAX_ORDER_NR_PAGES;
 		}
 	}
 
-	if (zone)
+	if (zone) {
+		*valid_start = start;
+		*valid_end = end;
 		return 1;
-	else
+	} else {
 		return 0;
+	}
 }
 
 /*
@@ -1843,6 +1852,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
 	long offlined_pages;
 	int ret, drain, retry_max, node;
 	unsigned long flags;
+	unsigned long valid_start, valid_end;
 	struct zone *zone;
 	struct memory_notify arg;
 
@@ -1853,10 +1863,10 @@ static int __ref __offline_pages(unsigned long start_pfn,
 		return -EINVAL;
 	/* This makes hotplug much easier...and readable.
 	   we assume this for now. .*/
-	if (!test_pages_in_a_zone(start_pfn, end_pfn))
+	if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, &valid_end))
 		return -EINVAL;
 
-	zone = page_zone(pfn_to_page(start_pfn));
+	zone = page_zone(pfn_to_page(valid_start));
 	node = zone_to_nid(zone);
 	nr_pages = end_pfn - start_pfn;
 

From d1908f52557b3230fbd63c0429f3b4b748bf2b6d Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 3 Feb 2017 13:13:26 -0800
Subject: [PATCH 218/322] fs: break out of iomap_file_buffered_write on fatal
 signals

Tetsuo has noticed that an OOM stress test which performs large write
requests can cause the full memory reserves depletion.  He has tracked
this down to the following path

	__alloc_pages_nodemask+0x436/0x4d0
	alloc_pages_current+0x97/0x1b0
	__page_cache_alloc+0x15d/0x1a0          mm/filemap.c:728
	pagecache_get_page+0x5a/0x2b0           mm/filemap.c:1331
	grab_cache_page_write_begin+0x23/0x40   mm/filemap.c:2773
	iomap_write_begin+0x50/0xd0             fs/iomap.c:118
	iomap_write_actor+0xb5/0x1a0            fs/iomap.c:190
	? iomap_write_end+0x80/0x80             fs/iomap.c:150
	iomap_apply+0xb3/0x130                  fs/iomap.c:79
	iomap_file_buffered_write+0x68/0xa0     fs/iomap.c:243
	? iomap_write_end+0x80/0x80
	xfs_file_buffered_aio_write+0x132/0x390 [xfs]
	? remove_wait_queue+0x59/0x60
	xfs_file_write_iter+0x90/0x130 [xfs]
	__vfs_write+0xe5/0x140
	vfs_write+0xc7/0x1f0
	? syscall_trace_enter+0x1d0/0x380
	SyS_write+0x58/0xc0
	do_syscall_64+0x6c/0x200
	entry_SYSCALL64_slow_path+0x25/0x25

the oom victim has access to all memory reserves to make a forward
progress to exit easier.  But iomap_file_buffered_write and other
callers of iomap_apply loop to complete the full request.  We need to
check for fatal signals and back off with a short write instead.

As the iomap_apply delegates all the work down to the actor we have to
hook into those.  All callers that work with the page cache are calling
iomap_write_begin so we will check for signals there.  dax_iomap_actor
has to handle the situation explicitly because it copies data to the
userspace directly.  Other callers like iomap_page_mkwrite work on a
single page or iomap_fiemap_actor do not allocate memory based on the
given len.

Fixes: 68a9f5e7007c ("xfs: implement iomap based buffered write path")
Link: http://lkml.kernel.org/r/20170201092706.9966-2-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: <stable@vger.kernel.org>	[4.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dax.c   | 5 +++++
 fs/iomap.c | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/fs/dax.c b/fs/dax.c
index 3af2da5e64ce..c45598b912e1 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1031,6 +1031,11 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		struct blk_dax_ctl dax = { 0 };
 		ssize_t map_len;
 
+		if (fatal_signal_pending(current)) {
+			ret = -EINTR;
+			break;
+		}
+
 		dax.sector = dax_iomap_sector(iomap, pos);
 		dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK;
 		map_len = dax_map_atomic(iomap->bdev, &dax);
diff --git a/fs/iomap.c b/fs/iomap.c
index 354a123f170e..a51cb4c07d4d 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -114,6 +114,9 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 
 	BUG_ON(pos + len > iomap->offset + iomap->length);
 
+	if (fatal_signal_pending(current))
+		return -EINTR;
+
 	page = grab_cache_page_write_begin(inode->i_mapping, index, flags);
 	if (!page)
 		return -ENOMEM;

From 5abf186a30a89d5b9c18a6bf93a2c192c9fd52f6 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 3 Feb 2017 13:13:29 -0800
Subject: [PATCH 219/322] mm, fs: check for fatal signals in
 do_generic_file_read()

do_generic_file_read() can be told to perform a large request from
userspace.  If the system is under OOM and the reading task is the OOM
victim then it has an access to memory reserves and finishing the full
request can lead to the full memory depletion which is dangerous.  Make
sure we rather go with a short read and allow the killed task to
terminate.

Link: http://lkml.kernel.org/r/20170201092706.9966-3-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/filemap.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/mm/filemap.c b/mm/filemap.c
index b772a33ef640..3f9afded581b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1791,6 +1791,11 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
 
 		cond_resched();
 find_page:
+		if (fatal_signal_pending(current)) {
+			error = -EINTR;
+			goto out;
+		}
+
 		page = find_get_page(mapping, index);
 		if (!page) {
 			page_cache_sync_readahead(mapping,

From 6e978b22efa1db9f6e71b24440b5f1d93e968ee3 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 3 Feb 2017 14:18:39 -0800
Subject: [PATCH 220/322] cpufreq: intel_pstate: Disable energy efficiency
 optimization

Some Kabylake desktop processors may not reach max turbo when running in
HWP mode, even if running under sustained 100% utilization.

This occurs when the HWP.EPP (Energy Performance Preference) is set to
"balance_power" (0x80) -- the default on most systems.

It occurs because the platform BIOS may erroneously enable an
energy-efficiency setting -- MSR_IA32_POWER_CTL BIT-EE, which is not
recommended to be enabled on this SKU.

On the failing systems, this BIOS issue was not discovered when the
desktop motherboard was tested with Windows, because the BIOS also
neglects to provide the ACPI/CPPC table, that Windows requires to enable
HWP, and so Windows runs in legacy P-state mode, where this setting has
no effect.

Linux' intel_pstate driver does not require ACPI/CPPC to enable HWP, and
so it runs in HWP mode, exposing this incorrect BIOS configuration.

There are several ways to address this problem.

First, Linux can also run in legacy P-state mode on this system.
As intel_pstate is how Linux enables HWP, booting with
"intel_pstate=disable"
will run in acpi-cpufreq/ondemand legacy p-state mode.

Or second, the "performance" governor can be used with intel_pstate,
which will modify HWP.EPP to 0.

Or third, starting in 4.10, the
/sys/devices/system/cpu/cpufreq/policy*/energy_performance_preference
attribute in can be updated from "balance_power" to "performance".

Or fourth, apply this patch, which fixes the erroneous setting of
MSR_IA32_POWER_CTL BIT_EE on this model, allowing the default
configuration to function as designed.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Reviewed-by: Len Brown <len.brown@intel.com>
Cc: 4.6+ <stable@vger.kernel.org> # 4.6+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index f91c25718d16..86e36544925f 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1235,6 +1235,25 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata)
 		cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
 }
 
+#define MSR_IA32_POWER_CTL_BIT_EE	19
+
+/* Disable energy efficiency optimization */
+static void intel_pstate_disable_ee(int cpu)
+{
+	u64 power_ctl;
+	int ret;
+
+	ret = rdmsrl_on_cpu(cpu, MSR_IA32_POWER_CTL, &power_ctl);
+	if (ret)
+		return;
+
+	if (!(power_ctl & BIT(MSR_IA32_POWER_CTL_BIT_EE))) {
+		pr_info("Disabling energy efficiency optimization\n");
+		power_ctl |= BIT(MSR_IA32_POWER_CTL_BIT_EE);
+		wrmsrl_on_cpu(cpu, MSR_IA32_POWER_CTL, power_ctl);
+	}
+}
+
 static int atom_get_min_pstate(void)
 {
 	u64 value;
@@ -1845,6 +1864,11 @@ static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
 	{}
 };
 
+static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = {
+	ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, core_params),
+	{}
+};
+
 static int intel_pstate_init_cpu(unsigned int cpunum)
 {
 	struct cpudata *cpu;
@@ -1875,6 +1899,12 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
 	cpu->cpu = cpunum;
 
 	if (hwp_active) {
+		const struct x86_cpu_id *id;
+
+		id = x86_match_cpu(intel_pstate_cpu_ee_disable_ids);
+		if (id)
+			intel_pstate_disable_ee(cpunum);
+
 		intel_pstate_hwp_enable(cpu);
 		pid_params.sample_rate_ms = 50;
 		pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC;

From a9306a63631493afc75893a4ac405d4e1cbae6aa Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sat, 4 Feb 2017 00:44:36 +0100
Subject: [PATCH 221/322] PM / runtime: Avoid false-positive warnings from
 might_sleep_if()

The might_sleep_if() assertions in __pm_runtime_idle(),
__pm_runtime_suspend() and __pm_runtime_resume() may generate
false-positive warnings in some situations.  For example, that
happens if a nested pm_runtime_get_sync()/pm_runtime_put() pair
is executed with disabled interrupts within an outer
pm_runtime_get_sync()/pm_runtime_put() section for the same device.
[Generally, pm_runtime_get_sync() may sleep, so it should not be
called with disabled interrupts, but in this particular case the
previous pm_runtime_get_sync() guarantees that the device will not
be suspended, so the inner pm_runtime_get_sync() will return
immediately after incrementing the device's usage counter.]

That started to happen in the i915 driver in 4.10-rc, leading to
the following splat:

 BUG: sleeping function called from invalid context at drivers/base/power/runtime.c:1032
 in_atomic(): 1, irqs_disabled(): 0, pid: 1500, name: Xorg
 1 lock held by Xorg/1500:
  #0:  (&dev->struct_mutex){+.+.+.}, at:
  [<ffffffffa0680c13>] i915_mutex_lock_interruptible+0x43/0x140 [i915]
 CPU: 0 PID: 1500 Comm: Xorg Not tainted
 Call Trace:
  dump_stack+0x85/0xc2
  ___might_sleep+0x196/0x260
  __might_sleep+0x53/0xb0
  __pm_runtime_resume+0x7a/0x90
  intel_runtime_pm_get+0x25/0x90 [i915]
  aliasing_gtt_bind_vma+0xaa/0xf0 [i915]
  i915_vma_bind+0xaf/0x1e0 [i915]
  i915_gem_execbuffer_relocate_entry+0x513/0x6f0 [i915]
  i915_gem_execbuffer_relocate_vma.isra.34+0x188/0x250 [i915]
  ? trace_hardirqs_on+0xd/0x10
  ? i915_gem_execbuffer_reserve_vma.isra.31+0x152/0x1f0 [i915]
  ? i915_gem_execbuffer_reserve.isra.32+0x372/0x3a0 [i915]
  i915_gem_do_execbuffer.isra.38+0xa70/0x1a40 [i915]
  ? __might_fault+0x4e/0xb0
  i915_gem_execbuffer2+0xc5/0x260 [i915]
  ? __might_fault+0x4e/0xb0
  drm_ioctl+0x206/0x450 [drm]
  ? i915_gem_execbuffer+0x340/0x340 [i915]
  ? __fget+0x5/0x200
  do_vfs_ioctl+0x91/0x6f0
  ? __fget+0x111/0x200
  ? __fget+0x5/0x200
  SyS_ioctl+0x79/0x90
  entry_SYSCALL_64_fastpath+0x23/0xc6

even though the code triggering it is correct.

Unfortunately, the might_sleep_if() assertions in question are
too coarse-grained to cover such cases correctly, so make them
a bit less sensitive in order to avoid the false-positives.

Reported-and-tested-by: Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/runtime.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 872eac4cb1df..a14fac6a01d3 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -966,13 +966,13 @@ int __pm_runtime_idle(struct device *dev, int rpmflags)
 	unsigned long flags;
 	int retval;
 
-	might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe);
-
 	if (rpmflags & RPM_GET_PUT) {
 		if (!atomic_dec_and_test(&dev->power.usage_count))
 			return 0;
 	}
 
+	might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe);
+
 	spin_lock_irqsave(&dev->power.lock, flags);
 	retval = rpm_idle(dev, rpmflags);
 	spin_unlock_irqrestore(&dev->power.lock, flags);
@@ -998,13 +998,13 @@ int __pm_runtime_suspend(struct device *dev, int rpmflags)
 	unsigned long flags;
 	int retval;
 
-	might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe);
-
 	if (rpmflags & RPM_GET_PUT) {
 		if (!atomic_dec_and_test(&dev->power.usage_count))
 			return 0;
 	}
 
+	might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe);
+
 	spin_lock_irqsave(&dev->power.lock, flags);
 	retval = rpm_suspend(dev, rpmflags);
 	spin_unlock_irqrestore(&dev->power.lock, flags);
@@ -1029,7 +1029,8 @@ int __pm_runtime_resume(struct device *dev, int rpmflags)
 	unsigned long flags;
 	int retval;
 
-	might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe);
+	might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe &&
+			dev->power.runtime_status != RPM_ACTIVE);
 
 	if (rpmflags & RPM_GET_PUT)
 		atomic_inc(&dev->power.usage_count);

From bfb34527a32a1a576d9bfb7026d3ab0369a6cd60 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 4 Feb 2017 14:47:31 -0800
Subject: [PATCH 222/322] libnvdimm, pfn: fix memmap reservation size versus 4K
 alignment

When vmemmap_populate() allocates space for the memmap it does so in 2MB
sized chunks. The libnvdimm-pfn driver incorrectly accounts for this
when the alignment of the device is set to 4K. When this happens we
trigger memory allocation failures in altmap_alloc_block_buf() and
trigger warnings of the form:

 WARNING: CPU: 0 PID: 3376 at arch/x86/mm/init_64.c:656 arch_add_memory+0xe4/0xf0
 [..]
 Call Trace:
  dump_stack+0x86/0xc3
  __warn+0xcb/0xf0
  warn_slowpath_null+0x1d/0x20
  arch_add_memory+0xe4/0xf0
  devm_memremap_pages+0x29b/0x4e0

Fixes: 315c562536c4 ("libnvdimm, pfn: add 'align' attribute, default to HPAGE_SIZE")
Cc: <stable@vger.kernel.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/pfn_devs.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index a2ac9e641aa9..6c033c9a2f06 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -627,15 +627,12 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
 	size = resource_size(&nsio->res);
 	npfns = (size - start_pad - end_trunc - SZ_8K) / SZ_4K;
 	if (nd_pfn->mode == PFN_MODE_PMEM) {
-		unsigned long memmap_size;
-
 		/*
 		 * vmemmap_populate_hugepages() allocates the memmap array in
 		 * HPAGE_SIZE chunks.
 		 */
-		memmap_size = ALIGN(64 * npfns, HPAGE_SIZE);
-		offset = ALIGN(start + SZ_8K + memmap_size + dax_label_reserve,
-				nd_pfn->align) - start;
+		offset = ALIGN(start + SZ_8K + 64 * npfns + dax_label_reserve,
+				max(nd_pfn->align, HPAGE_SIZE)) - start;
 	} else if (nd_pfn->mode == PFN_MODE_RAM)
 		offset = ALIGN(start + SZ_8K + dax_label_reserve,
 				nd_pfn->align) - start;

From 34b2cef20f19c87999fff3da4071e66937db9644 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Feb 2017 11:16:52 -0800
Subject: [PATCH 223/322] ipv4: keep skb->dst around in presence of IP options

Andrey Konovalov got crashes in __ip_options_echo() when a NULL skb->dst
is accessed.

ipv4_pktinfo_prepare() should not drop the dst if (evil) IP options
are present.

We could refine the test to the presence of ts_needtime or srr,
but IP options are not often used, so let's be conservative.

Thanks to syzkaller team for finding this bug.

Fixes: d826eb14ecef ("ipv4: PKTINFO doesnt need dst reference")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 53ae0c6315ad..900011709e3b 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1238,7 +1238,14 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
 		pktinfo->ipi_ifindex = 0;
 		pktinfo->ipi_spec_dst.s_addr = 0;
 	}
-	skb_dst_drop(skb);
+	/* We need to keep the dst for __ip_options_echo()
+	 * We could restrict the test to opt.ts_needtime || opt.srr,
+	 * but the following is good enough as IP options are not often used.
+	 */
+	if (unlikely(IPCB(skb)->opt.optlen))
+		skb_dst_force(skb);
+	else
+		skb_dst_drop(skb);
 }
 
 int ip_setsockopt(struct sock *sk, int level,

From d71b7896886345c53ef1d84bda2bc758554f5d61 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 3 Feb 2017 00:03:26 -0800
Subject: [PATCH 224/322] netlabel: out of bound access in cipso_v4_validate()

syzkaller found another out of bound access in ip_options_compile(),
or more exactly in cipso_v4_validate()

Fixes: 20e2a8648596 ("cipso: handle CIPSO options correctly when NetLabel is disabled")
Fixes: 446fda4f2682 ("[NetLabel]: CIPSOv4 engine")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov  <dvyukov@google.com>
Cc: Paul Moore <paul@paul-moore.com>
Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/cipso_ipv4.h | 4 ++++
 net/ipv4/cipso_ipv4.c    | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
index 3ebb168b9afc..a34b141f125f 100644
--- a/include/net/cipso_ipv4.h
+++ b/include/net/cipso_ipv4.h
@@ -309,6 +309,10 @@ static inline int cipso_v4_validate(const struct sk_buff *skb,
 	}
 
 	for (opt_iter = 6; opt_iter < opt_len;) {
+		if (opt_iter + 1 == opt_len) {
+			err_offset = opt_iter;
+			goto out;
+		}
 		tag_len = opt[opt_iter + 1];
 		if ((tag_len == 0) || (tag_len > (opt_len - opt_iter))) {
 			err_offset = opt_iter + 1;
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 72d6f056d863..ae206163c273 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1587,6 +1587,10 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
 				goto validate_return_locked;
 			}
 
+		if (opt_iter + 1 == opt_len) {
+			err_offset = opt_iter;
+			goto validate_return_locked;
+		}
 		tag_len = tag[1];
 		if (tag_len > (opt_len - opt_iter)) {
 			err_offset = opt_iter + 1;

From 79a8b9aa388b0620cc1d525d7c0f0d9a8a85e08e Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sun, 5 Feb 2017 11:50:21 +0100
Subject: [PATCH 225/322] x86/CPU/AMD: Bring back Compute Unit ID

Commit:

  a33d331761bc ("x86/CPU/AMD: Fix Bulldozer topology")

restored the initial approach we had with the Fam15h topology of
enumerating CU (Compute Unit) threads as cores. And this is still
correct - they're beefier than HT threads but still have some
shared functionality.

Our current approach has a problem with the Mad Max Steam game, for
example. Yves Dionne reported a certain "choppiness" while playing on
v4.9.5.

That problem stems most likely from the fact that the CU threads share
resources within one CU and when we schedule to a thread of a different
compute unit, this incurs latency due to migrating the working set to a
different CU through the caches.

When the thread siblings mask mirrors that aspect of the CUs and
threads, the scheduler pays attention to it and tries to schedule within
one CU first. Which takes care of the latency, of course.

Reported-by: Yves Dionne <yves.dionne@gmail.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: <stable@vger.kernel.org> # 4.9
Cc: Brice Goglin <Brice.Goglin@inria.fr>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yazen Ghannam <yazen.ghannam@amd.com>
Link: http://lkml.kernel.org/r/20170205105022.8705-1-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/processor.h |  1 +
 arch/x86/kernel/cpu/amd.c        |  9 ++++++++-
 arch/x86/kernel/cpu/common.c     |  1 +
 arch/x86/kernel/smpboot.c        | 12 +++++++++---
 4 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 1be64da0384e..e6cfe7ba2d65 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -104,6 +104,7 @@ struct cpuinfo_x86 {
 	__u8			x86_phys_bits;
 	/* CPUID returned core id bits: */
 	__u8			x86_coreid_bits;
+	__u8			cu_id;
 	/* Max extended CPUID function supported: */
 	__u32			extended_cpuid_level;
 	/* Maximum supported CPUID level, -1=no CPUID: */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 1d3167269a67..20dc44d1e6be 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -309,8 +309,15 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
 
 	/* get information required for multi-node processors */
 	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+		u32 eax, ebx, ecx, edx;
 
-		node_id = cpuid_ecx(0x8000001e) & 7;
+		cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
+
+		node_id  = ecx & 0xff;
+		smp_num_siblings = ((ebx >> 8) & 0xff) + 1;
+
+		if (c->x86 == 0x15)
+			c->cu_id = ebx & 0xff;
 
 		/*
 		 * We may have multiple LLCs if L3 caches exist, so check if we
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9bab7a8a4293..ede03e849a8b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1015,6 +1015,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 	c->x86_model_id[0] = '\0';  /* Unset */
 	c->x86_max_cores = 1;
 	c->x86_coreid_bits = 0;
+	c->cu_id = 0xff;
 #ifdef CONFIG_X86_64
 	c->x86_clflush_size = 64;
 	c->x86_phys_bits = 36;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 46732dc3b73c..99b920d0e516 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -433,9 +433,15 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
 		int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
 
 		if (c->phys_proc_id == o->phys_proc_id &&
-		    per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2) &&
-		    c->cpu_core_id == o->cpu_core_id)
-			return topology_sane(c, o, "smt");
+		    per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) {
+			if (c->cpu_core_id == o->cpu_core_id)
+				return topology_sane(c, o, "smt");
+
+			if ((c->cu_id != 0xff) &&
+			    (o->cu_id != 0xff) &&
+			    (c->cu_id == o->cu_id))
+				return topology_sane(c, o, "smt");
+		}
 
 	} else if (c->phys_proc_id == o->phys_proc_id &&
 		   c->cpu_core_id == o->cpu_core_id) {

From 08b259631b5a1d912af4832847b5642f377d9101 Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <Yazen.Ghannam@amd.com>
Date: Sun, 5 Feb 2017 11:50:22 +0100
Subject: [PATCH 226/322] x86/CPU/AMD: Fix Zen SMT topology

After:

  a33d331761bc ("x86/CPU/AMD: Fix Bulldozer topology")

our  SMT scheduling topology for Fam17h systems is broken, because
the ThreadId is included in the ApicId when SMT is enabled.

So, without further decoding cpu_core_id is unique for each thread
rather than the same for threads on the same core. This didn't affect
systems with SMT disabled. Make cpu_core_id be what it is defined to be.

Signed-off-by: Yazen Ghannam <Yazen.Ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: <stable@vger.kernel.org> # 4.9
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170205105022.8705-2-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/amd.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 20dc44d1e6be..2b4cf04239b6 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -319,6 +319,13 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
 		if (c->x86 == 0x15)
 			c->cu_id = ebx & 0xff;
 
+		if (c->x86 >= 0x17) {
+			c->cpu_core_id = ebx & 0xff;
+
+			if (smp_num_siblings > 1)
+				c->x86_max_cores /= smp_num_siblings;
+		}
+
 		/*
 		 * We may have multiple LLCs if L3 caches exist, so check if we
 		 * have an L3 cache by looking at the L3 cache CPUID leaf.

From 7892032cfe67f4bde6fc2ee967e45a8fbaf33756 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Feb 2017 23:18:55 -0800
Subject: [PATCH 227/322] ip6_gre: fix ip6gre_err() invalid reads

Andrey Konovalov reported out of bound accesses in ip6gre_err()

If GRE flags contains GRE_KEY, the following expression
*(((__be32 *)p) + (grehlen / 4) - 1)

accesses data ~40 bytes after the expected point, since
grehlen includes the size of IPv6 headers.

Let's use a "struct gre_base_hdr *greh" pointer to make this
code more readable.

p[1] becomes greh->protocol.
grhlen is the GRE header length.

Fixes: c12b395a4664 ("gre: Support GRE over IPv6")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_gre.c | 40 +++++++++++++++++++++-------------------
 1 file changed, 21 insertions(+), 19 deletions(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 558631860d91..630b73be5999 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -367,35 +367,37 @@ static void ip6gre_tunnel_uninit(struct net_device *dev)
 
 
 static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-		u8 type, u8 code, int offset, __be32 info)
+		       u8 type, u8 code, int offset, __be32 info)
 {
-	const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
-	__be16 *p = (__be16 *)(skb->data + offset);
-	int grehlen = offset + 4;
+	const struct gre_base_hdr *greh;
+	const struct ipv6hdr *ipv6h;
+	int grehlen = sizeof(*greh);
 	struct ip6_tnl *t;
+	int key_off = 0;
 	__be16 flags;
+	__be32 key;
 
-	flags = p[0];
-	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
-		if (flags&(GRE_VERSION|GRE_ROUTING))
-			return;
-		if (flags&GRE_KEY) {
-			grehlen += 4;
-			if (flags&GRE_CSUM)
-				grehlen += 4;
-		}
+	if (!pskb_may_pull(skb, offset + grehlen))
+		return;
+	greh = (const struct gre_base_hdr *)(skb->data + offset);
+	flags = greh->flags;
+	if (flags & (GRE_VERSION | GRE_ROUTING))
+		return;
+	if (flags & GRE_CSUM)
+		grehlen += 4;
+	if (flags & GRE_KEY) {
+		key_off = grehlen + offset;
+		grehlen += 4;
 	}
 
-	/* If only 8 bytes returned, keyed message will be dropped here */
-	if (!pskb_may_pull(skb, grehlen))
+	if (!pskb_may_pull(skb, offset + grehlen))
 		return;
 	ipv6h = (const struct ipv6hdr *)skb->data;
-	p = (__be16 *)(skb->data + offset);
+	greh = (const struct gre_base_hdr *)(skb->data + offset);
+	key = key_off ? *(__be32 *)(skb->data + key_off) : 0;
 
 	t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr,
-				flags & GRE_KEY ?
-				*(((__be32 *)p) + (grehlen / 4) - 1) : 0,
-				p[1]);
+				 key, greh->protocol);
 	if (!t)
 		return;
 

From d5adbfcd5f7bcc6fa58a41c5c5ada0e5c826ce2c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 5 Feb 2017 15:10:58 -0800
Subject: [PATCH 228/322] Linux 4.10-rc7

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index d7b758a1ce14..8e223e081c9d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 10
 SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION = -rc7
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*

From e479ab651f071dbd1518ce8fb121c7f42f2bb97d Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@qca.qualcomm.com>
Date: Sat, 4 Feb 2017 13:59:22 +0200
Subject: [PATCH 229/322] mac80211: Fix FILS AEAD protection in Association
 Request frame

Incorrect num_elem parameter value (1 vs. 5) was used in the
aes_siv_encrypt() call. This resulted in only the first one of the five
AAD vectors to SIV getting included in calculation. This does not
protect all the contents correctly and would not interoperate with a
standard compliant implementation.

Fix this by using the correct number. A matching fix is needed in the AP
side (hostapd) to get FILS authentication working properly.

Fixes: 39404feee691 ("mac80211: FILS AEAD protection for station mode association frames")
Reported-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/fils_aead.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/fils_aead.c b/net/mac80211/fils_aead.c
index ecfdd97758a3..e795aaa2aa1f 100644
--- a/net/mac80211/fils_aead.c
+++ b/net/mac80211/fils_aead.c
@@ -272,7 +272,7 @@ int fils_encrypt_assoc_req(struct sk_buff *skb,
 	crypt_len = skb->data + skb->len - encr;
 	skb_put(skb, AES_BLOCK_SIZE);
 	return aes_siv_encrypt(assoc_data->fils_kek, assoc_data->fils_kek_len,
-			       encr, crypt_len, 1, addr, len, encr);
+			       encr, crypt_len, 5, addr, len, encr);
 }
 
 int fils_decrypt_assoc_resp(struct ieee80211_sub_if_data *sdata,

From 01fba20b5976e445676febbdf6dc78d71c6d7b62 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@qca.qualcomm.com>
Date: Sat, 4 Feb 2017 18:08:42 +0200
Subject: [PATCH 230/322] mac80211: Allocate a sync skcipher explicitly for
 FILS AEAD

The skcipher could have been of the async variant which may return from
skcipher_encrypt() with -EINPROGRESS after having queued the request.
The FILS AEAD implementation here does not have code for dealing with
that possibility, so allocate a sync cipher explicitly to avoid
potential issues with hardware accelerators.

This is based on the patch sent out by Ard.

Fixes: 39404feee691 ("mac80211: FILS AEAD protection for station mode association frames")
Reported-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/fils_aead.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/fils_aead.c b/net/mac80211/fils_aead.c
index e795aaa2aa1f..5c3af5eb4052 100644
--- a/net/mac80211/fils_aead.c
+++ b/net/mac80211/fils_aead.c
@@ -124,7 +124,7 @@ static int aes_siv_encrypt(const u8 *key, size_t key_len,
 
 	/* CTR */
 
-	tfm2 = crypto_alloc_skcipher("ctr(aes)", 0, 0);
+	tfm2 = crypto_alloc_skcipher("ctr(aes)", 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(tfm2)) {
 		kfree(tmp);
 		return PTR_ERR(tfm2);
@@ -183,7 +183,7 @@ static int aes_siv_decrypt(const u8 *key, size_t key_len,
 
 	/* CTR */
 
-	tfm2 = crypto_alloc_skcipher("ctr(aes)", 0, 0);
+	tfm2 = crypto_alloc_skcipher("ctr(aes)", 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(tfm2))
 		return PTR_ERR(tfm2);
 	/* K2 for CTR */

From da7061c82e4a1bc6a5e134ef362c86261906c860 Mon Sep 17 00:00:00 2001
From: Thorsten Horstmann <thorsten@defutech.de>
Date: Fri, 3 Feb 2017 14:38:29 +0100
Subject: [PATCH 231/322] mac80211: Fix adding of mesh vendor IEs

The function ieee80211_ie_split_vendor doesn't return 0 on errors. Instead
it returns any offset < ielen when WLAN_EID_VENDOR_SPECIFIC is found. The
return value in mesh_add_vendor_ies must therefore be checked against
ifmsh->ie_len and not 0. Otherwise all ifmsh->ie starting with
WLAN_EID_VENDOR_SPECIFIC will be rejected.

Fixes: 082ebb0c258d ("mac80211: fix mesh beacon format")
Signed-off-by: Thorsten Horstmann <thorsten@defutech.de>
Signed-off-by: Mathias Kretschmer <mathias.kretschmer@fit.fraunhofer.de>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
[sven@narfation.org: Add commit message]
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mesh.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 42120d965263..50e1b7f78bd4 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -339,7 +339,7 @@ int mesh_add_vendor_ies(struct ieee80211_sub_if_data *sdata,
 	/* fast-forward to vendor IEs */
 	offset = ieee80211_ie_split_vendor(ifmsh->ie, ifmsh->ie_len, 0);
 
-	if (offset) {
+	if (offset < ifmsh->ie_len) {
 		len = ifmsh->ie_len - offset;
 		data = ifmsh->ie + offset;
 		if (skb_tailroom(skb) < len)

From fd551bac4795854adaa87bad7e5136083719802b Mon Sep 17 00:00:00 2001
From: Masashi Honma <masashi.honma@gmail.com>
Date: Thu, 26 Jan 2017 08:56:13 +0900
Subject: [PATCH 232/322] nl80211: Fix mesh HT operation check

A previous change to fix checks for NL80211_MESHCONF_HT_OPMODE
missed setting the flag when replacing FILL_IN_MESH_PARAM_IF_SET
with checking codes. This results in dropping the received HT
operation value when called by nl80211_update_mesh_config(). Fix
this by setting the flag properly.

Fixes: 9757235f451c ("nl80211: correct checks for NL80211_MESHCONF_HT_OPMODE value")
Signed-off-by: Masashi Honma <masashi.honma@gmail.com>
[rewrite commit message to use Fixes: line]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5c1b267e22be..aee396b9f190 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5916,6 +5916,7 @@ do {									    \
 			break;
 		}
 		cfg->ht_opmode = ht_opmode;
+		mask |= (1 << (NL80211_MESHCONF_HT_OPMODE - 1));
 	}
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathToRootTimeout,
 				  1, 65535, mask,

From 37a7ea4a9b81f6a864c10a7cb0b96458df5310a3 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 6 Feb 2017 15:09:48 +0100
Subject: [PATCH 233/322] ALSA: seq: Don't handle loop timeout at
 snd_seq_pool_done()

snd_seq_pool_done() syncs with closing of all opened threads, but it
aborts the wait loop with a timeout, and proceeds to the release
resource even if not all threads have been closed.  The timeout was 5
seconds, and if you run a crazy stuff, it can exceed easily, and may
result in the access of the invalid memory address -- this is what
syzkaller detected in a bug report.

As a fix, let the code graduate from naiveness, simply remove the loop
timeout.

BugLink: http://lkml.kernel.org/r/CACT4Y+YdhDV2H5LLzDTJDVF-qiYHUHhtRaW4rbb4gUhTCQB81w@mail.gmail.com
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/seq/seq_memory.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c
index c850345c43b5..dfa5156f3585 100644
--- a/sound/core/seq/seq_memory.c
+++ b/sound/core/seq/seq_memory.c
@@ -419,7 +419,6 @@ int snd_seq_pool_done(struct snd_seq_pool *pool)
 {
 	unsigned long flags;
 	struct snd_seq_event_cell *ptr;
-	int max_count = 5 * HZ;
 
 	if (snd_BUG_ON(!pool))
 		return -EINVAL;
@@ -432,14 +431,8 @@ int snd_seq_pool_done(struct snd_seq_pool *pool)
 	if (waitqueue_active(&pool->output_sleep))
 		wake_up(&pool->output_sleep);
 
-	while (atomic_read(&pool->counter) > 0) {
-		if (max_count == 0) {
-			pr_warn("ALSA: snd_seq_pool_done timeout: %d cells remain\n", atomic_read(&pool->counter));
-			break;
-		}
+	while (atomic_read(&pool->counter) > 0)
 		schedule_timeout_uninterruptible(1);
-		max_count--;
-	}
 	
 	/* release all resources */
 	spin_lock_irqsave(&pool->lock, flags);

From ebf6c9cb23d7e56eec8575a88071dec97ad5c6e2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 5 Feb 2017 20:23:22 -0800
Subject: [PATCH 234/322] ipv6: tcp: add a missing tcp_v6_restore_cb()

Dmitry reported use-after-free in ip6_datagram_recv_specific_ctl()

A similar bug was fixed in commit 8ce48623f0cf ("ipv6: tcp: restore
IP6CB for pktoptions skbs"), but I missed another spot.

tcp_v6_syn_recv_sock() can indeed set np->pktoptions from ireq->pktopts

Fixes: 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line misses")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index cb8929681dc7..eaad72c3d746 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -991,6 +991,16 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	return 0; /* don't send reset */
 }
 
+static void tcp_v6_restore_cb(struct sk_buff *skb)
+{
+	/* We need to move header back to the beginning if xfrm6_policy_check()
+	 * and tcp_v6_fill_cb() are going to be called again.
+	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
+	 */
+	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
+		sizeof(struct inet6_skb_parm));
+}
+
 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
 					 struct request_sock *req,
 					 struct dst_entry *dst,
@@ -1182,8 +1192,10 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
 						      sk_gfp_mask(sk, GFP_ATOMIC));
 			consume_skb(ireq->pktopts);
 			ireq->pktopts = NULL;
-			if (newnp->pktoptions)
+			if (newnp->pktoptions) {
+				tcp_v6_restore_cb(newnp->pktoptions);
 				skb_set_owner_r(newnp->pktoptions, newsk);
+			}
 		}
 	}
 
@@ -1198,16 +1210,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
 	return NULL;
 }
 
-static void tcp_v6_restore_cb(struct sk_buff *skb)
-{
-	/* We need to move header back to the beginning if xfrm6_policy_check()
-	 * and tcp_v6_fill_cb() are going to be called again.
-	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
-	 */
-	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
-		sizeof(struct inet6_skb_parm));
-}
-
 /* The socket must have it's spinlock held when we get
  * here, unless it is a TCP_LISTEN socket.
  *

From 08b3b33f3e4dc0016ad878c1a48f094a74956277 Mon Sep 17 00:00:00 2001
From: Dinh Nguyen <dinguyen@kernel.org>
Date: Fri, 3 Feb 2017 09:29:07 -0600
Subject: [PATCH 235/322] MAINTAINERS: socfpga: update email for Dinh Nguyen

My opensource.altera.com email will be going away soon.

Signed-off-by: Dinh Nguyen <dinguyen@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index acd7c3d5410a..35c71bb41cfe 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1773,7 +1773,7 @@ F:	drivers/soc/renesas/
 F:	include/linux/soc/renesas/
 
 ARM/SOCFPGA ARCHITECTURE
-M:	Dinh Nguyen <dinguyen@opensource.altera.com>
+M:	Dinh Nguyen <dinguyen@kernel.org>
 S:	Maintained
 F:	arch/arm/mach-socfpga/
 F:	arch/arm/boot/dts/socfpga*
@@ -1783,7 +1783,7 @@ W:	http://www.rocketboards.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/dinguyen/linux.git
 
 ARM/SOCFPGA CLOCK FRAMEWORK SUPPORT
-M:	Dinh Nguyen <dinguyen@opensource.altera.com>
+M:	Dinh Nguyen <dinguyen@kernel.org>
 S:	Maintained
 F:	drivers/clk/socfpga/
 

From eeeefd41843218c55a8782a6920f044d9bf6207a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 5 Feb 2017 18:10:35 +0100
Subject: [PATCH 236/322] block: don't try Write Same from
 __blkdev_issue_zeroout

Write Same can return an error asynchronously if it turns out the
underlying SCSI device does not support Write Same, which makes a
proper fallback to other methods in __blkdev_issue_zeroout impossible.
Thus only issue a Write Same from blkdev_issue_zeroout an don't try it
at all from __blkdev_issue_zeroout as a non-invasive workaround.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reported-by: Junichi Nomura <j-nomura@ce.jp.nec.com>
Fixes: e73c23ff ("block: add async variant of blkdev_issue_zeroout")
Tested-by: Junichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-lib.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/block/blk-lib.c b/block/blk-lib.c
index f8c82a9b4012..ed1e78e24db0 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -306,11 +306,6 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 	if (ret == 0 || (ret && ret != -EOPNOTSUPP))
 		goto out;
 
-	ret = __blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
-			ZERO_PAGE(0), biop);
-	if (ret == 0 || (ret && ret != -EOPNOTSUPP))
-		goto out;
-
 	ret = 0;
 	while (nr_sects != 0) {
 		bio = next_bio(bio, min(nr_sects, (sector_t)BIO_MAX_PAGES),
@@ -369,6 +364,10 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 			return 0;
 	}
 
+	if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
+			ZERO_PAGE(0)))
+		return 0;
+
 	blk_start_plug(&plug);
 	ret = __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask,
 			&bio, discard);

From 5aff1d245e8cc1ab5c4517d916edaed9e3f7f973 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 3 Feb 2017 09:58:24 +0100
Subject: [PATCH 237/322] ARM: defconfigs: make NF_CT_PROTO_SCTP and
 NF_CT_PROTO_UDPLITE built-in

The symbols can no longer be used as loadable modules, leading to a harmless Kconfig
warning:

arch/arm/configs/imote2_defconfig:60:warning: symbol value 'm' invalid for NF_CT_PROTO_UDPLITE
arch/arm/configs/imote2_defconfig:59:warning: symbol value 'm' invalid for NF_CT_PROTO_SCTP
arch/arm/configs/ezx_defconfig:68:warning: symbol value 'm' invalid for NF_CT_PROTO_UDPLITE
arch/arm/configs/ezx_defconfig:67:warning: symbol value 'm' invalid for NF_CT_PROTO_SCTP

Let's make them built-in.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/configs/ezx_defconfig    | 4 ++--
 arch/arm/configs/imote2_defconfig | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm/configs/ezx_defconfig b/arch/arm/configs/ezx_defconfig
index ea316c4b890e..d3f1768840e2 100644
--- a/arch/arm/configs/ezx_defconfig
+++ b/arch/arm/configs/ezx_defconfig
@@ -64,8 +64,8 @@ CONFIG_NETFILTER=y
 CONFIG_NETFILTER_NETLINK_QUEUE=m
 CONFIG_NF_CONNTRACK=m
 CONFIG_NF_CONNTRACK_EVENTS=y
-CONFIG_NF_CT_PROTO_SCTP=m
-CONFIG_NF_CT_PROTO_UDPLITE=m
+CONFIG_NF_CT_PROTO_SCTP=y
+CONFIG_NF_CT_PROTO_UDPLITE=y
 CONFIG_NF_CONNTRACK_AMANDA=m
 CONFIG_NF_CONNTRACK_FTP=m
 CONFIG_NF_CONNTRACK_H323=m
diff --git a/arch/arm/configs/imote2_defconfig b/arch/arm/configs/imote2_defconfig
index 18e59feaa307..7f479cdb3479 100644
--- a/arch/arm/configs/imote2_defconfig
+++ b/arch/arm/configs/imote2_defconfig
@@ -56,8 +56,8 @@ CONFIG_NETFILTER=y
 CONFIG_NETFILTER_NETLINK_QUEUE=m
 CONFIG_NF_CONNTRACK=m
 CONFIG_NF_CONNTRACK_EVENTS=y
-CONFIG_NF_CT_PROTO_SCTP=m
-CONFIG_NF_CT_PROTO_UDPLITE=m
+CONFIG_NF_CT_PROTO_SCTP=y
+CONFIG_NF_CT_PROTO_UDPLITE=y
 CONFIG_NF_CONNTRACK_AMANDA=m
 CONFIG_NF_CONNTRACK_FTP=m
 CONFIG_NF_CONNTRACK_H323=m

From a088d1d73a4bcfd7bc482f8d08375b9b665dc3e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Fri, 3 Feb 2017 08:11:03 +0100
Subject: [PATCH 238/322] ipv6: Fix IPv6 packet loss in scenarios involving
 roaming + snooping switches
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When for instance a mobile Linux device roams from one access point to
another with both APs sharing the same broadcast domain and a
multicast snooping switch in between:

1)    (c) <~~~> (AP1) <--[SSW]--> (AP2)

2)              (AP1) <--[SSW]--> (AP2) <~~~> (c)

Then currently IPv6 multicast packets will get lost for (c) until an
MLD Querier sends its next query message. The packet loss occurs
because upon roaming the Linux host so far stayed silent regarding
MLD and the snooping switch will therefore be unaware of the
multicast topology change for a while.

This patch fixes this by always resending MLD reports when an interface
change happens, for instance from NO-CARRIER to CARRIER state.

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f60e88e56255..81f7b4ea4281 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3386,9 +3386,15 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 			}
 
 			if (idev) {
-				if (idev->if_flags & IF_READY)
-					/* device is already configured. */
+				if (idev->if_flags & IF_READY) {
+					/* device is already configured -
+					 * but resend MLD reports, we might
+					 * have roamed and need to update
+					 * multicast snooping switches
+					 */
+					ipv6_mc_up(idev);
 					break;
+				}
 				idev->if_flags |= IF_READY;
 			}
 

From b3f2d07f4649adcf6905953a10d217b5683e4077 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 3 Feb 2017 17:35:46 +0100
Subject: [PATCH 239/322] hns: avoid stack overflow with CONFIG_KASAN

The use of ACCESS_ONCE() looks like a micro-optimization to force gcc to use
an indexed load for the register address, but it has an absolutely detrimental
effect on builds with gcc-5 and CONFIG_KASAN=y, leading to a very likely
kernel stack overflow aside from very complex object code:

hisilicon/hns/hns_dsaf_gmac.c: In function 'hns_gmac_update_stats':
hisilicon/hns/hns_dsaf_gmac.c:419:1: error: the frame size of 2912 bytes is larger than 1024 bytes [-Werror=frame-larger-than=]
hisilicon/hns/hns_dsaf_ppe.c: In function 'hns_ppe_reset_common':
hisilicon/hns/hns_dsaf_ppe.c:390:1: error: the frame size of 1184 bytes is larger than 1024 bytes [-Werror=frame-larger-than=]
hisilicon/hns/hns_dsaf_ppe.c: In function 'hns_ppe_get_regs':
hisilicon/hns/hns_dsaf_ppe.c:621:1: error: the frame size of 3632 bytes is larger than 1024 bytes [-Werror=frame-larger-than=]
hisilicon/hns/hns_dsaf_rcb.c: In function 'hns_rcb_get_common_regs':
hisilicon/hns/hns_dsaf_rcb.c:970:1: error: the frame size of 2784 bytes is larger than 1024 bytes [-Werror=frame-larger-than=]
hisilicon/hns/hns_dsaf_gmac.c: In function 'hns_gmac_get_regs':
hisilicon/hns/hns_dsaf_gmac.c:641:1: error: the frame size of 5728 bytes is larger than 1024 bytes [-Werror=frame-larger-than=]
hisilicon/hns/hns_dsaf_rcb.c: In function 'hns_rcb_get_ring_regs':
hisilicon/hns/hns_dsaf_rcb.c:1021:1: error: the frame size of 2208 bytes is larger than 1024 bytes [-Werror=frame-larger-than=]
hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_comm_init':
hisilicon/hns/hns_dsaf_main.c:1209:1: error: the frame size of 1904 bytes is larger than 1024 bytes [-Werror=frame-larger-than=]
hisilicon/hns/hns_dsaf_xgmac.c: In function 'hns_xgmac_get_regs':
hisilicon/hns/hns_dsaf_xgmac.c:748:1: error: the frame size of 4704 bytes is larger than 1024 bytes [-Werror=frame-larger-than=]
hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_update_stats':
hisilicon/hns/hns_dsaf_main.c:2420:1: error: the frame size of 1088 bytes is larger than 1024 bytes [-Werror=frame-larger-than=]
hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_get_regs':
hisilicon/hns/hns_dsaf_main.c:2753:1: error: the frame size of 10768 bytes is larger than 1024 bytes [-Werror=frame-larger-than=]

This does not seem to happen any more with gcc-7, but removing the ACCESS_ONCE
seems safe anyway and it avoids a serious issue for some people. I have verified
that with gcc-5.3.1, the object code we get is better in the new version
both with and without CONFIG_KASAN, as we no longer allocate a 1344 byte
stack frame for hns_dsaf_get_regs() but otherwise have practically identical
object code.

With gcc-7.0.0, removing ACCESS_ONCE has no effect, the object code is already
good either way.

This patch is probably not urgent to get into 4.11 as only KASAN=y builds
with certain compilers are affected, but I still think it makes sense to
backport into older kernels.

Cc: stable@vger.kernel.org
Fixes: 511e6bc ("net: add Hisilicon Network Subsystem DSAF support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
index 87226685f742..8fa18fc17cd2 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
@@ -1014,9 +1014,7 @@
 
 static inline void dsaf_write_reg(void __iomem *base, u32 reg, u32 value)
 {
-	u8 __iomem *reg_addr = ACCESS_ONCE(base);
-
-	writel(value, reg_addr + reg);
+	writel(value, base + reg);
 }
 
 #define dsaf_write_dev(a, reg, value) \
@@ -1024,9 +1022,7 @@ static inline void dsaf_write_reg(void __iomem *base, u32 reg, u32 value)
 
 static inline u32 dsaf_read_reg(u8 __iomem *base, u32 reg)
 {
-	u8 __iomem *reg_addr = ACCESS_ONCE(base);
-
-	return readl(reg_addr + reg);
+	return readl(base + reg);
 }
 
 static inline void dsaf_write_syscon(struct regmap *base, u32 reg, u32 value)

From ccf7abb93af09ad0868ae9033d1ca8108bdaec82 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 3 Feb 2017 14:59:38 -0800
Subject: [PATCH 240/322] tcp: avoid infinite loop in tcp_splice_read()

Splicing from TCP socket is vulnerable when a packet with URG flag is
received and stored into receive queue.

__tcp_splice_read() returns 0, and sk_wait_data() immediately
returns since there is the problematic skb in queue.

This is a nice way to burn cpu (aka infinite loop) and trigger
soft lockups.

Again, this gem was found by syzkaller tool.

Fixes: 9c55e01c0cc8 ("[TCP]: Splice receive support.")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov  <dvyukov@google.com>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4a044964da66..0efb4c7f6704 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -770,6 +770,12 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
 				ret = -EAGAIN;
 				break;
 			}
+			/* if __tcp_splice_read() got nothing while we have
+			 * an skb in receive queue, we do not want to loop.
+			 * This might happen with URG data.
+			 */
+			if (!skb_queue_empty(&sk->sk_receive_queue))
+				break;
 			sk_wait_data(sk, &timeo, NULL);
 			if (signal_pending(current)) {
 				ret = sock_intr_errno(timeo);

From e1edab87faf6ca30cd137e0795bc73aa9a9a22ec Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 3 Feb 2017 18:20:48 -0500
Subject: [PATCH 241/322] tun: read vnet_hdr_sz once

When IFF_VNET_HDR is enabled, a virtio_net header must precede data.
Data length is verified to be greater than or equal to expected header
length tun->vnet_hdr_sz before copying.

Read this value once and cache locally, as it can be updated between
the test and use (TOCTOU).

Signed-off-by: Willem de Bruijn <willemb@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
CC: Eric Dumazet <edumazet@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 2cd10b26b650..bfabe180053e 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1170,9 +1170,11 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	}
 
 	if (tun->flags & IFF_VNET_HDR) {
-		if (len < tun->vnet_hdr_sz)
+		int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
+
+		if (len < vnet_hdr_sz)
 			return -EINVAL;
-		len -= tun->vnet_hdr_sz;
+		len -= vnet_hdr_sz;
 
 		if (!copy_from_iter_full(&gso, sizeof(gso), from))
 			return -EFAULT;
@@ -1183,7 +1185,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 
 		if (tun16_to_cpu(tun, gso.hdr_len) > len)
 			return -EINVAL;
-		iov_iter_advance(from, tun->vnet_hdr_sz - sizeof(gso));
+		iov_iter_advance(from, vnet_hdr_sz - sizeof(gso));
 	}
 
 	if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) {
@@ -1335,7 +1337,7 @@ static ssize_t tun_put_user(struct tun_struct *tun,
 		vlan_hlen = VLAN_HLEN;
 
 	if (tun->flags & IFF_VNET_HDR)
-		vnet_hdr_sz = tun->vnet_hdr_sz;
+		vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
 
 	total = skb->len + vlan_hlen + vnet_hdr_sz;
 

From 837585a5375c38d40361cfe64e6fd11e1addb936 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 3 Feb 2017 18:20:49 -0500
Subject: [PATCH 242/322] macvtap: read vnet_hdr_size once

When IFF_VNET_HDR is enabled, a virtio_net header must precede data.
Data length is verified to be greater than or equal to expected header
length tun->vnet_hdr_sz before copying.

Macvtap functions read the value once, but unless READ_ONCE is used,
the compiler may ignore this and read multiple times. Enforce a single
read and locally cached value to avoid updates between test and use.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvtap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 402618565838..c27011bbe30c 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -681,7 +681,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
 	size_t linear;
 
 	if (q->flags & IFF_VNET_HDR) {
-		vnet_hdr_len = q->vnet_hdr_sz;
+		vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz);
 
 		err = -EINVAL;
 		if (len < vnet_hdr_len)
@@ -820,7 +820,7 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q,
 
 	if (q->flags & IFF_VNET_HDR) {
 		struct virtio_net_hdr vnet_hdr;
-		vnet_hdr_len = q->vnet_hdr_sz;
+		vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz);
 		if (iov_iter_count(iter) < vnet_hdr_len)
 			return -EINVAL;
 

From f3d83317a69e7d658e7c83e24f8b31ac533c39e3 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 7 Feb 2017 09:32:30 +0100
Subject: [PATCH 243/322] Revert "ALSA: line6: Only determine control port
 properties if needed"

This reverts commit f6a0dd107ad0c8b59d1c9735eea4b8cb9f460949.

The commit caused a regression on LINE6 Transport that has no control
caps.  Although reverting the commit may result back in a spurious
error message for some device again, it's the simplest regression fix,
hence it's taken as is at first.  The further code fix will follow
later.

Fixes: f6a0dd107ad0 ("ALSA: line6: Only determine control port properties if needed")
Reported-by: Igor Zinovev <zinigor@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/line6/driver.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c
index 90009c0b3a92..ab3c280a23d1 100644
--- a/sound/usb/line6/driver.c
+++ b/sound/usb/line6/driver.c
@@ -754,8 +754,9 @@ int line6_probe(struct usb_interface *interface,
 		goto error;
 	}
 
+	line6_get_interval(line6);
+
 	if (properties->capabilities & LINE6_CAP_CONTROL) {
-		line6_get_interval(line6);
 		ret = line6_init_cap_control(line6);
 		if (ret < 0)
 			goto error;

From 5593523f968bc86d42a035c6df47d5e0979b5ace Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Sat, 4 Feb 2017 16:56:03 +0000
Subject: [PATCH 244/322] pegasus: Use heap buffers for all register access
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allocating USB buffers on the stack is not portable, and no longer
works on x86_64 (with VMAP_STACK enabled as per default).

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
References: https://bugs.debian.org/852556
Reported-by: Lisandro Damián Nicanor Pérez Meyer <lisandro@debian.org>
Tested-by: Lisandro Damián Nicanor Pérez Meyer <lisandro@debian.org>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/pegasus.c | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
index 24e803fe9a53..36674484c6fb 100644
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c
@@ -126,40 +126,61 @@ static void async_ctrl_callback(struct urb *urb)
 
 static int get_registers(pegasus_t *pegasus, __u16 indx, __u16 size, void *data)
 {
+	u8 *buf;
 	int ret;
 
+	buf = kmalloc(size, GFP_NOIO);
+	if (!buf)
+		return -ENOMEM;
+
 	ret = usb_control_msg(pegasus->usb, usb_rcvctrlpipe(pegasus->usb, 0),
 			      PEGASUS_REQ_GET_REGS, PEGASUS_REQT_READ, 0,
-			      indx, data, size, 1000);
+			      indx, buf, size, 1000);
 	if (ret < 0)
 		netif_dbg(pegasus, drv, pegasus->net,
 			  "%s returned %d\n", __func__, ret);
+	else if (ret <= size)
+		memcpy(data, buf, ret);
+	kfree(buf);
 	return ret;
 }
 
-static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size, void *data)
+static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size,
+			 const void *data)
 {
+	u8 *buf;
 	int ret;
 
+	buf = kmemdup(data, size, GFP_NOIO);
+	if (!buf)
+		return -ENOMEM;
+
 	ret = usb_control_msg(pegasus->usb, usb_sndctrlpipe(pegasus->usb, 0),
 			      PEGASUS_REQ_SET_REGS, PEGASUS_REQT_WRITE, 0,
-			      indx, data, size, 100);
+			      indx, buf, size, 100);
 	if (ret < 0)
 		netif_dbg(pegasus, drv, pegasus->net,
 			  "%s returned %d\n", __func__, ret);
+	kfree(buf);
 	return ret;
 }
 
 static int set_register(pegasus_t *pegasus, __u16 indx, __u8 data)
 {
+	u8 *buf;
 	int ret;
 
+	buf = kmemdup(&data, 1, GFP_NOIO);
+	if (!buf)
+		return -ENOMEM;
+
 	ret = usb_control_msg(pegasus->usb, usb_sndctrlpipe(pegasus->usb, 0),
 			      PEGASUS_REQ_SET_REG, PEGASUS_REQT_WRITE, data,
-			      indx, &data, 1, 1000);
+			      indx, buf, 1, 1000);
 	if (ret < 0)
 		netif_dbg(pegasus, drv, pegasus->net,
 			  "%s returned %d\n", __func__, ret);
+	kfree(buf);
 	return ret;
 }
 

From 7926aff5c57b577ab0f43364ff0c59d968f6a414 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Sat, 4 Feb 2017 16:56:32 +0000
Subject: [PATCH 245/322] rtl8150: Use heap buffers for all register access

Allocating USB buffers on the stack is not portable, and no longer
works on x86_64 (with VMAP_STACK enabled as per default).

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/rtl8150.c | 34 +++++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c
index 95b7bd0d7abc..c81c79110cef 100644
--- a/drivers/net/usb/rtl8150.c
+++ b/drivers/net/usb/rtl8150.c
@@ -155,16 +155,36 @@ static const char driver_name [] = "rtl8150";
 */
 static int get_registers(rtl8150_t * dev, u16 indx, u16 size, void *data)
 {
-	return usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0),
-			       RTL8150_REQ_GET_REGS, RTL8150_REQT_READ,
-			       indx, 0, data, size, 500);
+	void *buf;
+	int ret;
+
+	buf = kmalloc(size, GFP_NOIO);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0),
+			      RTL8150_REQ_GET_REGS, RTL8150_REQT_READ,
+			      indx, 0, buf, size, 500);
+	if (ret > 0 && ret <= size)
+		memcpy(data, buf, ret);
+	kfree(buf);
+	return ret;
 }
 
-static int set_registers(rtl8150_t * dev, u16 indx, u16 size, void *data)
+static int set_registers(rtl8150_t * dev, u16 indx, u16 size, const void *data)
 {
-	return usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0),
-			       RTL8150_REQ_SET_REGS, RTL8150_REQT_WRITE,
-			       indx, 0, data, size, 500);
+	void *buf;
+	int ret;
+
+	buf = kmemdup(data, size, GFP_NOIO);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0),
+			      RTL8150_REQ_SET_REGS, RTL8150_REQT_WRITE,
+			      indx, 0, buf, size, 500);
+	kfree(buf);
+	return ret;
 }
 
 static void async_set_reg_cb(struct urb *urb)

From d41149145f98fe26dcd0bfd1d6cc095e6e041418 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Sat, 4 Feb 2017 16:56:56 +0000
Subject: [PATCH 246/322] catc: Combine failure cleanup code in catc_probe()

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/catc.c | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c
index 3daa41bdd4ea..985909eab72c 100644
--- a/drivers/net/usb/catc.c
+++ b/drivers/net/usb/catc.c
@@ -776,7 +776,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id
 	struct net_device *netdev;
 	struct catc *catc;
 	u8 broadcast[ETH_ALEN];
-	int i, pktsz;
+	int i, pktsz, ret;
 
 	if (usb_set_interface(usbdev,
 			intf->altsetting->desc.bInterfaceNumber, 1)) {
@@ -811,12 +811,8 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id
 	if ((!catc->ctrl_urb) || (!catc->tx_urb) || 
 	    (!catc->rx_urb) || (!catc->irq_urb)) {
 		dev_err(&intf->dev, "No free urbs available.\n");
-		usb_free_urb(catc->ctrl_urb);
-		usb_free_urb(catc->tx_urb);
-		usb_free_urb(catc->rx_urb);
-		usb_free_urb(catc->irq_urb);
-		free_netdev(netdev);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto fail_free;
 	}
 
 	/* The F5U011 has the same vendor/product as the netmate but a device version of 0x130 */
@@ -913,16 +909,21 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id
 	usb_set_intfdata(intf, catc);
 
 	SET_NETDEV_DEV(netdev, &intf->dev);
-	if (register_netdev(netdev) != 0) {
-		usb_set_intfdata(intf, NULL);
-		usb_free_urb(catc->ctrl_urb);
-		usb_free_urb(catc->tx_urb);
-		usb_free_urb(catc->rx_urb);
-		usb_free_urb(catc->irq_urb);
-		free_netdev(netdev);
-		return -EIO;
-	}
+	ret = register_netdev(netdev);
+	if (ret)
+		goto fail_clear_intfdata;
+
 	return 0;
+
+fail_clear_intfdata:
+	usb_set_intfdata(intf, NULL);
+fail_free:
+	usb_free_urb(catc->ctrl_urb);
+	usb_free_urb(catc->tx_urb);
+	usb_free_urb(catc->rx_urb);
+	usb_free_urb(catc->irq_urb);
+	free_netdev(netdev);
+	return ret;
 }
 
 static void catc_disconnect(struct usb_interface *intf)

From 2d6a0e9de03ee658a9adc3bfb2f0ca55dff1e478 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Sat, 4 Feb 2017 16:57:04 +0000
Subject: [PATCH 247/322] catc: Use heap buffer for memory size test

Allocating USB buffers on the stack is not portable, and no longer
works on x86_64 (with VMAP_STACK enabled as per default).

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/catc.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c
index 985909eab72c..0acc9b640419 100644
--- a/drivers/net/usb/catc.c
+++ b/drivers/net/usb/catc.c
@@ -776,7 +776,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id
 	struct net_device *netdev;
 	struct catc *catc;
 	u8 broadcast[ETH_ALEN];
-	int i, pktsz, ret;
+	int pktsz, ret;
 
 	if (usb_set_interface(usbdev,
 			intf->altsetting->desc.bInterfaceNumber, 1)) {
@@ -840,15 +840,24 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id
                 catc->irq_buf, 2, catc_irq_done, catc, 1);
 
 	if (!catc->is_f5u011) {
+		u32 *buf;
+		int i;
+
 		dev_dbg(dev, "Checking memory size\n");
 
-		i = 0x12345678;
-		catc_write_mem(catc, 0x7a80, &i, 4);
-		i = 0x87654321;	
-		catc_write_mem(catc, 0xfa80, &i, 4);
-		catc_read_mem(catc, 0x7a80, &i, 4);
+		buf = kmalloc(4, GFP_KERNEL);
+		if (!buf) {
+			ret = -ENOMEM;
+			goto fail_free;
+		}
+
+		*buf = 0x12345678;
+		catc_write_mem(catc, 0x7a80, buf, 4);
+		*buf = 0x87654321;
+		catc_write_mem(catc, 0xfa80, buf, 4);
+		catc_read_mem(catc, 0x7a80, buf, 4);
 	  
-		switch (i) {
+		switch (*buf) {
 		case 0x12345678:
 			catc_set_reg(catc, TxBufCount, 8);
 			catc_set_reg(catc, RxBufCount, 32);
@@ -863,6 +872,8 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id
 			dev_dbg(dev, "32k Memory\n");
 			break;
 		}
+
+		kfree(buf);
 	  
 		dev_dbg(dev, "Getting MAC from SEEROM.\n");
 	  

From 69629464e0b587f3711739b3aa2bcdaf2e075276 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 5 Feb 2017 09:25:24 -0800
Subject: [PATCH 248/322] udp: properly cope with csum errors

Dmitry reported that UDP sockets being destroyed would trigger the
WARN_ON(atomic_read(&sk->sk_rmem_alloc)); in inet_sock_destruct()

It turns out we do not properly destroy skb(s) that have wrong UDP
checksum.

Thanks again to syzkaller team.

Fixes : 7c13f97ffde6 ("udp: do fwd memory scheduling on dequeue")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h  | 4 +++-
 net/core/datagram.c | 8 ++++++--
 net/ipv4/udp.c      | 2 +-
 net/ipv6/udp.c      | 2 +-
 4 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index f0e867f58722..c4f5e6fca17c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2006,7 +2006,9 @@ void sk_reset_timer(struct sock *sk, struct timer_list *timer,
 void sk_stop_timer(struct sock *sk, struct timer_list *timer);
 
 int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb,
-			unsigned int flags);
+			unsigned int flags,
+			void (*destructor)(struct sock *sk,
+					   struct sk_buff *skb));
 int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
 
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 662bea587165..ea633342ab0d 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -332,7 +332,9 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
 EXPORT_SYMBOL(__skb_free_datagram_locked);
 
 int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb,
-			unsigned int flags)
+			unsigned int flags,
+			void (*destructor)(struct sock *sk,
+					   struct sk_buff *skb))
 {
 	int err = 0;
 
@@ -342,6 +344,8 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb,
 		if (skb == skb_peek(&sk->sk_receive_queue)) {
 			__skb_unlink(skb, &sk->sk_receive_queue);
 			atomic_dec(&skb->users);
+			if (destructor)
+				destructor(sk, skb);
 			err = 0;
 		}
 		spin_unlock_bh(&sk->sk_receive_queue.lock);
@@ -375,7 +379,7 @@ EXPORT_SYMBOL(__sk_queue_drop_skb);
 
 int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
 {
-	int err = __sk_queue_drop_skb(sk, skb, flags);
+	int err = __sk_queue_drop_skb(sk, skb, flags, NULL);
 
 	kfree_skb(skb);
 	sk_mem_reclaim_partial(sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1307a7c2e544..8aab7d78d25b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1501,7 +1501,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
 	return err;
 
 csum_copy_err:
-	if (!__sk_queue_drop_skb(sk, skb, flags)) {
+	if (!__sk_queue_drop_skb(sk, skb, flags, udp_skb_destructor)) {
 		UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
 		UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
 	}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 4d5c4eee4b3f..8990856f5101 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -441,7 +441,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	return err;
 
 csum_copy_err:
-	if (!__sk_queue_drop_skb(sk, skb, flags)) {
+	if (!__sk_queue_drop_skb(sk, skb, flags, udp_skb_destructor)) {
 		if (is_udp4) {
 			UDP_INC_STATS(sock_net(sk),
 				      UDP_MIB_CSUMERRORS, is_udplite);

From bd4ce941c8d5b862b2f83364be5dbe8fc8ab48f8 Mon Sep 17 00:00:00 2001
From: Benjamin Poirier <bpoirier@suse.com>
Date: Mon, 6 Feb 2017 10:14:31 -0800
Subject: [PATCH 249/322] mlx4: Invoke softirqs after napi_reschedule

mlx4 may schedule napi from a workqueue. Afterwards, softirqs are not run
in a deterministic time frame and the following message may be logged:
NOHZ: local_softirq_pending 08

The problem is the same as what was described in commit ec13ee80145c
("virtio_net: invoke softirqs after __napi_schedule") and this patch
applies the same fix to mlx4.

Fixes: 07841f9d94c1 ("net/mlx4_en: Schedule napi when RX buffers allocation fails")
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Benjamin Poirier <bpoirier@suse.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_rx.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index eac527e25ec9..cc003fdf0ed9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -514,8 +514,11 @@ void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv)
 		return;
 
 	for (ring = 0; ring < priv->rx_ring_num; ring++) {
-		if (mlx4_en_is_ring_empty(priv->rx_ring[ring]))
+		if (mlx4_en_is_ring_empty(priv->rx_ring[ring])) {
+			local_bh_disable();
 			napi_reschedule(&priv->rx_cq[ring]->napi);
+			local_bh_enable();
+		}
 	}
 }
 

From 2dcab598484185dea7ec22219c76dcdd59e3cb90 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Mon, 6 Feb 2017 18:10:31 -0200
Subject: [PATCH 250/322] sctp: avoid BUG_ON on sctp_wait_for_sndbuf

Alexander Popov reported that an application may trigger a BUG_ON in
sctp_wait_for_sndbuf if the socket tx buffer is full, a thread is
waiting on it to queue more data and meanwhile another thread peels off
the association being used by the first thread.

This patch replaces the BUG_ON call with a proper error handling. It
will return -EPIPE to the original sendmsg call, similarly to what would
have been done if the association wasn't found in the first place.

Acked-by: Alexander Popov <alex.popov@linux.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 37eeab7899fc..e214d2e7e9a3 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -7426,7 +7426,8 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
 		 */
 		release_sock(sk);
 		current_timeo = schedule_timeout(current_timeo);
-		BUG_ON(sk != asoc->base.sk);
+		if (sk != asoc->base.sk)
+			goto do_error;
 		lock_sock(sk);
 
 		*timeo_p = current_timeo;

From a524c218bc94c705886a0e0fedeee45d1931da32 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Date: Tue, 7 Feb 2017 09:44:58 -0800
Subject: [PATCH 251/322] ARC: [arcompact] brown paper bag bug in unaligned
 access delay slot fixup

Reported-by: Jo-Philipp Wich <jo@mein.io>
Fixes: 9aed02feae57bf7 ("ARC: [arcompact] handle unaligned access delay slot")
Cc: linux-kernel@vger.kernel.org
Cc: linux-snps-arc@lists.infradead.org
Cc: stable@vger.kernel.org
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arc/kernel/unaligned.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c
index 91ebe382147f..5f69c3bd59bb 100644
--- a/arch/arc/kernel/unaligned.c
+++ b/arch/arc/kernel/unaligned.c
@@ -243,7 +243,7 @@ int misaligned_fixup(unsigned long address, struct pt_regs *regs,
 
 	/* clear any remanants of delay slot */
 	if (delay_mode(regs)) {
-		regs->ret = regs->bta ~1U;
+		regs->ret = regs->bta & ~1U;
 		regs->status32 &= ~STATUS_DE_MASK;
 	} else {
 		regs->ret += state.instr_len;

From 930a42ded3fede7ca3acafc9153f4f2d0f56a92c Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Tue, 7 Feb 2017 17:26:57 +1100
Subject: [PATCH 252/322] vfio/spapr_tce: Set window when adding additional
 groups to container

If a container already has a group attached, attaching a new group
should just program already created IOMMU tables to the hardware via
the iommu_table_group_ops::set_window() callback.

However commit 6f01cc692a16 ("vfio/spapr: Add a helper to create
default DMA window") did not just simplify the code but also removed
the set_window() calls in the case of attaching groups to a container
which already has tables so it broke VFIO PCI hotplug.

This reverts set_window() bits in tce_iommu_take_ownership_ddw().

Fixes: 6f01cc692a16 ("vfio/spapr: Add a helper to create default DMA window")
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio_iommu_spapr_tce.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 7690e5bf3cf1..59b3f62a2d64 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -1245,6 +1245,8 @@ static void tce_iommu_release_ownership_ddw(struct tce_container *container,
 static long tce_iommu_take_ownership_ddw(struct tce_container *container,
 		struct iommu_table_group *table_group)
 {
+	long i, ret = 0;
+
 	if (!table_group->ops->create_table || !table_group->ops->set_window ||
 			!table_group->ops->release_ownership) {
 		WARN_ON_ONCE(1);
@@ -1253,7 +1255,27 @@ static long tce_iommu_take_ownership_ddw(struct tce_container *container,
 
 	table_group->ops->take_ownership(table_group);
 
+	/* Set all windows to the new group */
+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+		struct iommu_table *tbl = container->tables[i];
+
+		if (!tbl)
+			continue;
+
+		ret = table_group->ops->set_window(table_group, i, tbl);
+		if (ret)
+			goto release_exit;
+	}
+
 	return 0;
+
+release_exit:
+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
+		table_group->ops->unset_window(table_group, i);
+
+	table_group->ops->release_ownership(table_group);
+
+	return ret;
 }
 
 static int tce_iommu_attach_group(void *iommu_data,

From 912964eacb111551db73429719eb5fadcab0ff8a Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 7 Feb 2017 20:56:08 +0800
Subject: [PATCH 253/322] sctp: check af before verify address in
 sctp_addr_id2transport

Commit 6f29a1306131 ("sctp: sctp_addr_id2transport should verify the
addr before looking up assoc") invoked sctp_verify_addr to verify the
addr.

But it didn't check af variable beforehand, once users pass an address
with family = 0 through sockopt, sctp_get_af_specific will return NULL
and NULL pointer dereference will be caused by af->sockaddr_len.

This patch is to fix it by returning NULL if af variable is NULL.

Fixes: 6f29a1306131 ("sctp: sctp_addr_id2transport should verify the addr before looking up assoc")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e214d2e7e9a3..1b5d669e3029 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -239,7 +239,7 @@ static struct sctp_transport *sctp_addr_id2transport(struct sock *sk,
 	union sctp_addr *laddr = (union sctp_addr *)addr;
 	struct sctp_transport *transport;
 
-	if (sctp_verify_addr(sk, laddr, af->sockaddr_len))
+	if (!af || sctp_verify_addr(sk, laddr, af->sockaddr_len))
 		return NULL;
 
 	addr_asoc = sctp_endpoint_lookup_assoc(sctp_sk(sk)->ep,

From b6789123bccba8b5feb9901ed2e8c3c39181979d Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 7 Feb 2017 11:11:16 -0800
Subject: [PATCH 254/322] mm: fix KPF_SWAPCACHE in /proc/kpageflags

Commit 6326fec1122c ("mm: Use owner_priv bit for PageSwapCache, valid
when PageSwapBacked") aliased PG_swapcache to PG_owner_priv_1 (and
depending on PageSwapBacked being true).

As a result, the KPF_SWAPCACHE bit in '/proc/kpageflags' should now be
synthesized, instead of being shown on unrelated pages which just happen
to have PG_owner_priv_1 set.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/page.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/proc/page.c b/fs/proc/page.c
index a2066e6dee90..2726536489b1 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -173,7 +173,8 @@ u64 stable_page_flags(struct page *page)
 	u |= kpf_copy_bit(k, KPF_ACTIVE,	PG_active);
 	u |= kpf_copy_bit(k, KPF_RECLAIM,	PG_reclaim);
 
-	u |= kpf_copy_bit(k, KPF_SWAPCACHE,	PG_swapcache);
+	if (PageSwapCache(page))
+		u |= 1 << KPF_SWAPCACHE;
 	u |= kpf_copy_bit(k, KPF_SWAPBACKED,	PG_swapbacked);
 
 	u |= kpf_copy_bit(k, KPF_UNEVICTABLE,	PG_unevictable);

From 413d37326700aaf708730b940b04192c36e13ef4 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 7 Feb 2017 09:59:21 -0800
Subject: [PATCH 255/322] Input: synaptics-rmi4 - select 'SERIO' when needed

With CONFIG_SERIO=m, we get a build error for the rmi4-f03 driver,
added in linux-4.10:

warning: (HID_RMI) selects RMI4_F03 which has unmet direct dependencies (!UML && INPUT && RMI4_CORE && (SERIO=y || RMI4_CORE=SERIO))
drivers/input/built-in.o: In function `rmi_f03_attention':
rmi_f03.c:(.text+0xcfe0): undefined reference to `serio_interrupt'
rmi_f03.c:(.text+0xd055): undefined reference to `serio_interrupt'
drivers/input/built-in.o: In function `rmi_f03_remove':
rmi_f03.c:(.text+0xd115): undefined reference to `serio_unregister_port'
drivers/input/built-in.o: In function `rmi_f03_probe':
rmi_f03.c:(.text+0xd209): undefined reference to `__serio_register_port'

An earlier patch tried to fix this, but missed the HID_RMI driver that
does a 'select' on the F03 backend.

This adds a hidden Kconfig symbol that enforces 'serio' to be enabled
when RMI4-F03 is, which covers all cases.

Fixes: d7ddad0acc4a ("Input: synaptics-rmi4 - fix F03 build error when serio is module")
Fixes: c5e8848fc98e ("Input: synaptics-rmi4 - add support for F03")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/rmi4/Kconfig | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/input/rmi4/Kconfig b/drivers/input/rmi4/Kconfig
index 8993983e3fe4..bb7762bf2879 100644
--- a/drivers/input/rmi4/Kconfig
+++ b/drivers/input/rmi4/Kconfig
@@ -42,13 +42,19 @@ config RMI4_SMB
 config RMI4_F03
         bool "RMI4 Function 03 (PS2 Guest)"
 	depends on RMI4_CORE
-	depends on SERIO=y || RMI4_CORE=SERIO
         help
           Say Y here if you want to add support for RMI4 function 03.
 
           Function 03 provides PS2 guest support for RMI4 devices. This
           includes support for TrackPoints on TouchPads.
 
+config RMI4_F03_SERIO
+	tristate
+	depends on RMI4_CORE
+	depends on RMI4_F03
+	default RMI4_CORE
+	select SERIO
+
 config RMI4_2D_SENSOR
 	bool
 	depends on RMI4_CORE

From bfeda41d06d85ad9d52f2413cfc2b77be5022f75 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Tue, 7 Feb 2017 15:33:20 -0800
Subject: [PATCH 256/322] stacktrace, lockdep: Fix address, newline ugliness

Since KERN_CONT became meaningful again, lockdep stack traces have had
annoying extra newlines, like this:

[    5.561122] -> #1 (B){+.+...}:
[    5.561528]
[    5.561532] [<ffffffff810d8873>] lock_acquire+0xc3/0x210
[    5.562178]
[    5.562181] [<ffffffff816f6414>] mutex_lock_nested+0x74/0x6d0
[    5.562861]
[    5.562880] [<ffffffffa01aa3c3>] init_btrfs_fs+0x21/0x196 [btrfs]
[    5.563717]
[    5.563721] [<ffffffff81000472>] do_one_initcall+0x52/0x1b0
[    5.564554]
[    5.564559] [<ffffffff811a3af6>] do_init_module+0x5f/0x209
[    5.565357]
[    5.565361] [<ffffffff81122f4d>] load_module+0x218d/0x2b80
[    5.566020]
[    5.566021] [<ffffffff81123beb>] SyS_finit_module+0xeb/0x120
[    5.566694]
[    5.566696] [<ffffffff816fd241>] entry_SYSCALL_64_fastpath+0x1f/0xc2

That's happening because each printk() call now gets printed on its own
line, and we do a separate call to print the spaces before the symbol.
Fix it by doing the printk() directly instead of using the
print_ip_sym() helper.

Additionally, the symbol address isn't very helpful, so let's get rid of
that, too. The final result looks like this:

[    5.194518] -> #1 (B){+.+...}:
[    5.195002]        lock_acquire+0xc3/0x210
[    5.195439]        mutex_lock_nested+0x74/0x6d0
[    5.196491]        do_one_initcall+0x52/0x1b0
[    5.196939]        do_init_module+0x5f/0x209
[    5.197355]        load_module+0x218d/0x2b80
[    5.197792]        SyS_finit_module+0xeb/0x120
[    5.198251]        entry_SYSCALL_64_fastpath+0x1f/0xc2

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: kernel-team@fb.com
Fixes: 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines")
Link: http://lkml.kernel.org/r/43b4e114724b2bdb0308fa86cb33aa07d3d67fad.1486510315.git.osandov@fb.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/stacktrace.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index b6e4c16377c7..9c15a9124e83 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -18,10 +18,8 @@ void print_stack_trace(struct stack_trace *trace, int spaces)
 	if (WARN_ON(!trace->entries))
 		return;
 
-	for (i = 0; i < trace->nr_entries; i++) {
-		printk("%*c", 1 + spaces, ' ');
-		print_ip_sym(trace->entries[i]);
-	}
+	for (i = 0; i < trace->nr_entries; i++)
+		printk("%*c%pS\n", 1 + spaces, ' ', (void *)trace->entries[i]);
 }
 EXPORT_SYMBOL_GPL(print_stack_trace);
 
@@ -29,7 +27,6 @@ int snprint_stack_trace(char *buf, size_t size,
 			struct stack_trace *trace, int spaces)
 {
 	int i;
-	unsigned long ip;
 	int generated;
 	int total = 0;
 
@@ -37,9 +34,8 @@ int snprint_stack_trace(char *buf, size_t size,
 		return 0;
 
 	for (i = 0; i < trace->nr_entries; i++) {
-		ip = trace->entries[i];
-		generated = snprintf(buf, size, "%*c[<%p>] %pS\n",
-				1 + spaces, ' ', (void *) ip, (void *) ip);
+		generated = snprintf(buf, size, "%*c%pS\n", 1 + spaces, ' ',
+				     (void *)trace->entries[i]);
 
 		total += generated;
 

From 0c461cb727d146c9ef2d3e86214f498b78b7d125 Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Tue, 31 Jan 2017 11:54:04 -0500
Subject: [PATCH 257/322] selinux: fix off-by-one in setprocattr

SELinux tries to support setting/clearing of /proc/pid/attr attributes
from the shell by ignoring terminating newlines and treating an
attribute value that begins with a NUL or newline as an attempt to
clear the attribute.  However, the test for clearing attributes has
always been wrong; it has an off-by-one error, and this could further
lead to reading past the end of the allocated buffer since commit
bb646cdb12e75d82258c2f2e7746d5952d3e321a ("proc_pid_attr_write():
switch to memdup_user()").  Fix the off-by-one error.

Even with this fix, setting and clearing /proc/pid/attr attributes
from the shell is not straightforward since the interface does not
support multiple write() calls (so shells that write the value and
newline separately will set and then immediately clear the attribute,
requiring use of echo -n to set the attribute), whereas trying to use
echo -n "" to clear the attribute causes the shell to skip the
write() call altogether since POSIX says that a zero-length write
causes no side effects. Thus, one must use echo -n to set and echo
without -n to clear, as in the following example:
$ echo -n unconfined_u:object_r:user_home_t:s0 > /proc/$$/attr/fscreate
$ cat /proc/$$/attr/fscreate
unconfined_u:object_r:user_home_t:s0
$ echo "" > /proc/$$/attr/fscreate
$ cat /proc/$$/attr/fscreate

Note the use of /proc/$$ rather than /proc/self, as otherwise
the cat command will read its own attribute value, not that of the shell.

There are no users of this facility to my knowledge; possibly we
should just get rid of it.

UPDATE: Upon further investigation it appears that a local process
with the process:setfscreate permission can cause a kernel panic as a
result of this bug.  This patch fixes CVE-2017-2618.

Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
[PM: added the update about CVE-2017-2618 to the commit description]
Cc: stable@vger.kernel.org # 3.5: d6ea83ec6864e
Signed-off-by: Paul Moore <paul@paul-moore.com>

Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 security/selinux/hooks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index c7c6619431d5..d98550abe16d 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -5887,7 +5887,7 @@ static int selinux_setprocattr(struct task_struct *p,
 		return error;
 
 	/* Obtain a SID for the context, if one was specified. */
-	if (size && str[1] && str[1] != '\n') {
+	if (size && str[0] && str[0] != '\n') {
 		if (str[size-1] == '\n') {
 			str[size-1] = 0;
 			size--;

From 5351fbb1bf1413f6024892093528280769ca852f Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Thu, 26 Jan 2017 17:32:11 +0300
Subject: [PATCH 258/322] drm/i915: fix use-after-free in page_flip_completed()

page_flip_completed() dereferences 'work' variable after executing
queue_work(). This is not safe as the 'work' item might be already freed
by queued work:

    BUG: KASAN: use-after-free in page_flip_completed+0x3ff/0x490 at addr ffff8803dc010f90
    Call Trace:
     __asan_report_load8_noabort+0x59/0x80
     page_flip_completed+0x3ff/0x490
     intel_finish_page_flip_mmio+0xe3/0x130
     intel_pipe_handle_vblank+0x2d/0x40
     gen8_irq_handler+0x4a7/0xed0
     __handle_irq_event_percpu+0xf6/0x860
     handle_irq_event_percpu+0x6b/0x160
     handle_irq_event+0xc7/0x1b0
     handle_edge_irq+0x1f4/0xa50
     handle_irq+0x41/0x70
     do_IRQ+0x9a/0x200
     common_interrupt+0x89/0x89

    Freed:
     kfree+0x113/0x4d0
     intel_unpin_work_fn+0x29a/0x3b0
     process_one_work+0x79e/0x1b70
     worker_thread+0x611/0x1460
     kthread+0x241/0x3a0
     ret_from_fork+0x27/0x40

Move queue_work() after	trace_i915_flip_complete() to fix this.

Fixes: e5510fac98a7 ("drm/i915: add tracepoints for flip requests & completions")
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: <stable@vger.kernel.org> # v2.6.36+
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20170126143211.24013-1-aryabinin@virtuozzo.com
(cherry picked from commit 05c41f926fcc7ef838c80a6a99d84f67b4e0b824)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index f1e4a21d4664..891c86aef99d 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4262,10 +4262,10 @@ static void page_flip_completed(struct intel_crtc *intel_crtc)
 	drm_crtc_vblank_put(&intel_crtc->base);
 
 	wake_up_all(&dev_priv->pending_flip_queue);
-	queue_work(dev_priv->wq, &work->unpin_work);
-
 	trace_i915_flip_complete(intel_crtc->plane,
 				 work->pending_flip_obj);
+
+	queue_work(dev_priv->wq, &work->unpin_work);
 }
 
 static int intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc)

From e3818697e1d9140d0b990fecf4429d40c41ca0b5 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 9 Jan 2017 11:19:32 +0000
Subject: [PATCH 259/322] drm/i915: Flush untouched framebuffers before display
 on !llc

On a non-llc system, the objects are created with .cache_level =
CACHE_NONE and so the transition to uncached for scanout is a no-op.
However, if the object was never written to, it will still be in the CPU
domain (having been zeroed out by shmemfs). Those cachelines need to be
flushed prior to display.

Reported-and-tested-by: Vito Caputo
Fixes: a6a7cc4b7db6 ("drm/i915: Always flush the dirty CPU cache when pinning the scanout")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: <drm-intel-fixes@lists.freedesktop.org> # v4.10-rc1+
Link: http://patchwork.freedesktop.org/patch/msgid/20170109111932.6342-1-chris@chris-wilson.co.uk
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
(cherry picked from commit 69aeafeae9b30d797c439a30d1a4ccc8dc5b0eb0)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4b23a7814713..b3f1134fb9c8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3478,7 +3478,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
 
 	/* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */
-	if (obj->cache_dirty) {
+	if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
 		i915_gem_clflush_object(obj, true);
 		intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
 	}

From 7152187159193056f30ad5726741bb25028672bf Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 2 Feb 2017 10:47:11 +0100
Subject: [PATCH 260/322] drm/i915: fix i915 running as dom0 under Xen

Commit 920cf4194954ec ("drm/i915: Introduce an internal allocator for
disposable private objects") introduced a regression for the kernel
running as Xen dom0: when switching to graphics mode a GPU HANG
occurred.

Reason seems to be a missing adaption similar to that done in
commit 7453c549f5f648 ("swiotlb: Export swiotlb_max_segment to users")
to i915_gem_object_get_pages_internal().

So limit the maximum page order to be used according to the maximum
swiotlb segment size instead to the complete swiotlb size.

Fixes: 920cf4194954 ("drm/i915: Introduce an internal allocator for disposable private objects")
Signed-off-by: Juergen Gross <jgross@suse.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170202094711.939-1-jgross@suse.com
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: intel-gfx@lists.freedesktop.org
Cc: <drm-intel-fixes@lists.freedesktop.org> # v4.10-rc1+
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
(cherry picked from commit 5584f1b1d73e9cc95092734c316e467c6c4468f9)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_internal.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
index 4b3ff3e5b911..d09c74973cb3 100644
--- a/drivers/gpu/drm/i915/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/i915_gem_internal.c
@@ -66,8 +66,16 @@ i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
 
 	max_order = MAX_ORDER;
 #ifdef CONFIG_SWIOTLB
-	if (swiotlb_nr_tbl()) /* minimum max swiotlb size is IO_TLB_SEGSIZE */
-		max_order = min(max_order, ilog2(IO_TLB_SEGPAGES));
+	if (swiotlb_nr_tbl()) {
+		unsigned int max_segment;
+
+		max_segment = swiotlb_max_segment();
+		if (max_segment) {
+			max_segment = max_t(unsigned int, max_segment,
+					    PAGE_SIZE) >> PAGE_SHIFT;
+			max_order = min(max_order, ilog2(max_segment));
+		}
+	}
 #endif
 
 	gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;

From 853277481178fdf14d1a4e9e6ac7174d6046176f Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 1 Feb 2017 15:46:09 +0200
Subject: [PATCH 261/322] drm/i915: don't warn about Skylake CPU - KabyPoint
 PCH combo

Apparently there are machines out there with Skylake CPU and KabyPoint
PCH. Judging from our driver code, there doesn't seem to be any code
paths that would do anything different between SunrisePoint and
KabyPoint PCHs, so it would seem okay to accept the combo without
warnings.

Fixes: 22dea0be50b2 ("drm/i915: Introduce Kabypoint PCH for Kabylake H/DT.")
References: https://lists.freedesktop.org/archives/intel-gfx/2017-February/118611.html
Reported-by: Rainer Koenig <Rainer.Koenig@ts.fujitsu.com>
Cc: Rainer Koenig <Rainer.Koenig@ts.fujitsu.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: <stable@vger.kernel.org> # v4.8+
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1485956769-26015-1-git-send-email-jani.nikula@intel.com
(cherry picked from commit 3aac4acb89710fe782c9e78e7b1febf76e112c6c)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index b2c4a0b8a627..8d6309865fa5 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -213,7 +213,8 @@ static void intel_detect_pch(struct drm_device *dev)
 			} else if (id == INTEL_PCH_KBP_DEVICE_ID_TYPE) {
 				dev_priv->pch_type = PCH_KBP;
 				DRM_DEBUG_KMS("Found KabyPoint PCH\n");
-				WARN_ON(!IS_KABYLAKE(dev_priv));
+				WARN_ON(!IS_SKYLAKE(dev_priv) &&
+					!IS_KABYLAKE(dev_priv));
 			} else if ((id == INTEL_PCH_P2X_DEVICE_ID_TYPE) ||
 				   (id == INTEL_PCH_P3X_DEVICE_ID_TYPE) ||
 				   ((id == INTEL_PCH_QEMU_DEVICE_ID_TYPE) &&

From 5cad24d835772f9f709971a8d6fcf12afe53b2a7 Mon Sep 17 00:00:00 2001
From: Jean-Nicolas Graux <jean-nicolas.graux@st.com>
Date: Tue, 7 Feb 2017 12:12:41 +0100
Subject: [PATCH 262/322] mmc: mmci: avoid clearing ST Micro busy end interrupt
 mistakenly

This fixes a race condition that may occur whenever ST micro busy end
interrupt is raised just after being unmasked but before leaving mmci
interrupt context.

A dead-lock has been found if connecting mmci ST Micro variant whose amba
id is 0x10480180 to some new eMMC that supports internal caches.  Whenever
mmci driver enables cache control by programming eMMC's EXT_CSD register,
block driver may request to flush the eMMC internal caches causing mmci
driver to send a MMC_SWITCH command to the card with FLUSH_CACHE operation.
And because busy end interrupt may be mistakenly cleared while not yet
processed, this mmc request may never complete.  As a result, mmcqd task
may be stuck forever.

Here is an instance caught by lockup detector which shows that mmcqd task
was hung while waiting for mmc_flush_cache command to complete:

..
[  240.251595] INFO: task mmcqd/1:52 blocked for more than 120 seconds.
[  240.257973]       Not tainted 4.1.13-00510-g9d91424 #2
[  240.263109] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  240.270955] mmcqd/1         D c047504c     0    52      2 0x00000000
[  240.277359] [<c047504c>] (__schedule) from [<c04754a0>] (schedule+0x40/0x98)
[  240.284418] [<c04754a0>] (schedule) from [<c0477d40>] (schedule_timeout+0x148/0x188)
[  240.292191] [<c0477d40>] (schedule_timeout) from [<c0476040>] (wait_for_common+0xa4/0x170)
[  240.300491] [<c0476040>] (wait_for_common) from [<c02efc1c>] (mmc_wait_for_req_done+0x4c/0x13c)
[  240.309224] [<c02efc1c>] (mmc_wait_for_req_done) from [<c02efd90>] (mmc_wait_for_cmd+0x64/0x84)
[  240.317953] [<c02efd90>] (mmc_wait_for_cmd) from [<c02f5b14>] (__mmc_switch+0xa4/0x2a8)
[  240.325964] [<c02f5b14>] (__mmc_switch) from [<c02f5d40>] (mmc_switch+0x28/0x30)
[  240.333389] [<c02f5d40>] (mmc_switch) from [<c02f0984>] (mmc_flush_cache+0x54/0x80)
[  240.341073] [<c02f0984>] (mmc_flush_cache) from [<c02ff0c4>] (mmc_blk_issue_rq+0x114/0x4e8)
[  240.349459] [<c02ff0c4>] (mmc_blk_issue_rq) from [<c03008d4>] (mmc_queue_thread+0xc0/0x180)
[  240.357844] [<c03008d4>] (mmc_queue_thread) from [<c003cf90>] (kthread+0xdc/0xf4)
[  240.365339] [<c003cf90>] (kthread) from [<c0010068>] (ret_from_fork+0x14/0x2c)
..
..
[  240.664311] INFO: task partprobe:564 blocked for more than 120 seconds.
[  240.670943]       Not tainted 4.1.13-00510-g9d91424 #2
[  240.676078] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  240.683922] partprobe       D c047504c     0   564    486 0x00000000
[  240.690318] [<c047504c>] (__schedule) from [<c04754a0>] (schedule+0x40/0x98)
[  240.697396] [<c04754a0>] (schedule) from [<c0477d40>] (schedule_timeout+0x148/0x188)
[  240.705149] [<c0477d40>] (schedule_timeout) from [<c0476040>] (wait_for_common+0xa4/0x170)
[  240.713446] [<c0476040>] (wait_for_common) from [<c01f3300>] (submit_bio_wait+0x58/0x64)
[  240.721571] [<c01f3300>] (submit_bio_wait) from [<c01fbbd8>] (blkdev_issue_flush+0x60/0x88)
[  240.729957] [<c01fbbd8>] (blkdev_issue_flush) from [<c010ff84>] (blkdev_fsync+0x34/0x44)
[  240.738083] [<c010ff84>] (blkdev_fsync) from [<c0109594>] (do_fsync+0x3c/0x64)
[  240.745319] [<c0109594>] (do_fsync) from [<c000ffc0>] (ret_fast_syscall+0x0/0x3c)
..

Here is the detailed sequence showing when this issue may happen:

1) At probe time, mmci device is initialized and card busy detection based
on DAT[0] monitoring is enabled.

2) Later during run time, since card reported to support internal caches, a
MMCI_SWITCH command is sent to eMMC device with FLUSH_CACHE operation. On
receiving this command, eMMC may enter busy state (for a relatively short
time in the case of the dead-lock).

3) Then mmci interrupt is raised and mmci_irq() is called:

MMCISTATUS register is read and is equal to 0x01000440. So the following
status bits are set:
- MCI_CMDRESPEND (= 6)
- MCI_DATABLOCKEND (= 10)
- MCI_ST_CARDBUSY (= 24)

Since MMCIMASK0 register is 0x3FF, status variable is set to 0x00000040 and
BIT MCI_CMDRESPEND is cleared by writing MMCICLEAR register.

Then mmci_cmd_irq() is called. Considering the following conditions:
- host->busy_status is 0,
- this is a "busy response",
- reading again MMCISTATUS register gives 0x1000400,
MMCIMASK0 is updated to unmask MCI_ST_BUSYEND bit.

Thus, MMCIMASK0 is set to 0x010003FF and host->busy_status is set to wait
for busy end completion.

Back again in status loop of mmci_irq(), we quickly go through
mmci_data_irq() as there are no data in that case.  And we finally go
through following test at the end of while(status) loop:

/*
 * Don't poll for busy completion in irq context.
 */
if (host->variant->busy_detect && host->busy_status)
	status &= ~host->variant->busy_detect_flag;

Because status variable is not yet null (is equal to 0x40), we do not leave
interrupt context yet but we loop again into while(status) loop. So we run
across following steps:

a) MMCISTATUS register is read again and this time is equal to 0x01000400.
So that following bits are set:
- MCI_DATABLOCKEND (= 10)
- MCI_ST_CARDBUSY (= 24)

Since MMCIMASK0 register is equal to 0x010003FF:

b) status variable is set to 0x01000000.
c) MCI_ST_CARDBUSY bit is cleared by writing MMCICLEAR register.

Then, mmci_cmd_irq() is called one more time. Since host->busy_status is
set and that MCI_ST_CARDBUSY is set in status variable, we just return from
this function.

Back again in mmci_irq(), status variable is set to 0 and we finally leave
the while(status) loop. As a result we leave interrupt context, waiting for
busy end interrupt event.

Now, consider that busy end completion is raised IN BETWEEN steps 3.a) and
3.c). In such a case, we may mistakenly clear busy end interrupt at step
3.c) while it has not yet been processed. This will result in mmc command
to wait forever for a busy end completion that will never happen.

To fix the problem, this patch implements the following changes:

Considering that the mmci seems to be triggering the IRQ on both edges
while monitoring DAT0 for busy completion and that same status bit is used
to monitor start and end of busy detection, special care must be taken to
make sure that both start and end interrupts are always cleared one after
the other.

1) Clearing of card busy bit is moved in mmc_cmd_irq() function where
unmasking of busy end bit is effectively handled.
2) Just before unmasking busy end event, busy start event is cleared by
writing card busy bit in MMCICLEAR register.
3) Finally, once we are no more busy with a command, busy end event is
cleared writing again card busy bit in MMCICLEAR register.

This patch has been tested with the ST Accordo5 machine, not yet supported
upstream but relies on the mmci driver.

Signed-off-by: Sarang Mairal <sarang.mairal@garmin.com>
Signed-off-by: Jean-Nicolas Graux <jean-nicolas.graux@st.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/mmci.c | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 01a804792f30..b5972440c1bf 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -1023,7 +1023,12 @@ mmci_cmd_irq(struct mmci_host *host, struct mmc_command *cmd,
 		if (!host->busy_status && busy_resp &&
 		    !(status & (MCI_CMDCRCFAIL|MCI_CMDTIMEOUT)) &&
 		    (readl(base + MMCISTATUS) & host->variant->busy_detect_flag)) {
-			/* Unmask the busy IRQ */
+
+			/* Clear the busy start IRQ */
+			writel(host->variant->busy_detect_mask,
+			       host->base + MMCICLEAR);
+
+			/* Unmask the busy end IRQ */
 			writel(readl(base + MMCIMASK0) |
 			       host->variant->busy_detect_mask,
 			       base + MMCIMASK0);
@@ -1038,10 +1043,14 @@ mmci_cmd_irq(struct mmci_host *host, struct mmc_command *cmd,
 
 		/*
 		 * At this point we are not busy with a command, we have
-		 * not received a new busy request, mask the busy IRQ and
-		 * fall through to process the IRQ.
+		 * not received a new busy request, clear and mask the busy
+		 * end IRQ and fall through to process the IRQ.
 		 */
 		if (host->busy_status) {
+
+			writel(host->variant->busy_detect_mask,
+			       host->base + MMCICLEAR);
+
 			writel(readl(base + MMCIMASK0) &
 			       ~host->variant->busy_detect_mask,
 			       base + MMCIMASK0);
@@ -1283,12 +1292,21 @@ static irqreturn_t mmci_irq(int irq, void *dev_id)
 		}
 
 		/*
-		 * We intentionally clear the MCI_ST_CARDBUSY IRQ here (if it's
-		 * enabled) since the HW seems to be triggering the IRQ on both
-		 * edges while monitoring DAT0 for busy completion.
+		 * We intentionally clear the MCI_ST_CARDBUSY IRQ (if it's
+		 * enabled) in mmci_cmd_irq() function where ST Micro busy
+		 * detection variant is handled. Considering the HW seems to be
+		 * triggering the IRQ on both edges while monitoring DAT0 for
+		 * busy completion and that same status bit is used to monitor
+		 * start and end of busy detection, special care must be taken
+		 * to make sure that both start and end interrupts are always
+		 * cleared one after the other.
 		 */
 		status &= readl(host->base + MMCIMASK0);
-		writel(status, host->base + MMCICLEAR);
+		if (host->variant->busy_detect)
+			writel(status & ~host->variant->busy_detect_mask,
+			       host->base + MMCICLEAR);
+		else
+			writel(status, host->base + MMCICLEAR);
 
 		dev_dbg(mmc_dev(host->mmc), "irq0 (data+cmd) %08x\n", status);
 

From 789ea12500e5ce3911d0a6a822277c3133451927 Mon Sep 17 00:00:00 2001
From: "Lee, Shawn C" <shawn.c.lee@intel.com>
Date: Fri, 3 Feb 2017 12:32:09 +0800
Subject: [PATCH 263/322] drm/i915/bxt: Add MST support when do DPLL
 calculation

Add the missing INTEL_OUTPUT_DP_MST case in bxt_get_dpll()
to correctly initialize the crtc_state and port plls when
link training a DP MST monitor on BXT/APL devices.

Fixes: a277ca7dc01d ("drm/i915: Split bxt_ddi_pll_select()")
Bugs: https://bugs.freedesktop.org/show_bug.cgi?id=99572
Reviewed-by: Cooper Chiou <cooper.chiou@intel.com>
Reviewed-by: Gary C Wang <gary.c.wang@intel.com>
Reviewed-by: Ciobanu, Nathan D <nathan.d.ciobanu@intel.com>
Reviewed-by: Herbert, Marc <marc.herbert@intel.com>
Reviewed-by: Bride, Jim <jim.bride@intel.com>
Reviewed-by: Navare, Manasi D <manasi.d.navare@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: <stable@vger.kernel.org> # v4.9+
Signed-off-by: Lee, Shawn C <shawn.c.lee@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1486096329-6255-1-git-send-email-shawn.c.lee@intel.com
(cherry picked from commit 0aab2c721d81590012a5021a516f00666646741f)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_dpll_mgr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c
index 58a756f2f224..a2f0e070d38d 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c
@@ -1730,7 +1730,8 @@ bxt_get_dpll(struct intel_crtc *crtc,
 		return NULL;
 
 	if ((encoder->type == INTEL_OUTPUT_DP ||
-	     encoder->type == INTEL_OUTPUT_EDP) &&
+	     encoder->type == INTEL_OUTPUT_EDP ||
+	     encoder->type == INTEL_OUTPUT_DP_MST) &&
 	    !bxt_ddi_dp_set_dpll_hw_state(clock, &dpll_hw_state))
 		return NULL;
 

From 83bf6d55c132d5c4f773e5a04149c05f4aa0c2ad Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 3 Feb 2017 12:57:17 +0000
Subject: [PATCH 264/322] drm/i915: Remove overzealous fence warn on runtime
 suspend

The goal of the WARN was to catch when we are still actively using the
fence as we go into the runtime suspend. However, the reg->pin_count is
too coarse as it does not distinguish between exclusive ownership of the
fence register from activity.

I've not improved on the WARN, nor have we captured this WARN in an
exact igt, but it is showing up regularly in the wild:

[ 1915.935332] WARNING: CPU: 1 PID: 10861 at drivers/gpu/drm/i915/i915_gem.c:2022 i915_gem_runtime_suspend+0x116/0x130 [i915]
[ 1915.935383] WARN_ON(reg->pin_count)[ 1915.935399] Modules linked in:
 snd_hda_intel i915 drm_kms_helper vgem netconsole scsi_transport_iscsi fuse vfat fat x86_pkg_temp_thermal coretemp intel_cstate intel_uncore snd_hda_codec_hdmi snd_hda_codec_generic snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mei_me mei serio_raw intel_rapl_perf intel_pch_thermal soundcore wmi acpi_pad i2c_algo_bit syscopyarea sysfillrect sysimgblt fb_sys_fops drm r8169 mii video [last unloaded: drm_kms_helper]
[ 1915.935785] CPU: 1 PID: 10861 Comm: kworker/1:0 Tainted: G     U  W       4.9.0-rc5+ #170
[ 1915.935799] Hardware name: LENOVO 80MX/Lenovo E31-80, BIOS DCCN34WW(V2.03) 12/01/2015
[ 1915.935822] Workqueue: pm pm_runtime_work
[ 1915.935845]  ffffc900044fbbf0 ffffffffac3220bc ffffc900044fbc40 0000000000000000
[ 1915.935890]  ffffc900044fbc30 ffffffffac059bcb 000007e6044fbc60 ffff8801626e3198
[ 1915.935937]  ffff8801626e0000 0000000000000002 ffffffffc05e5d4e 0000000000000000
[ 1915.935985] Call Trace:
[ 1915.936013]  [<ffffffffac3220bc>] dump_stack+0x4f/0x73
[ 1915.936038]  [<ffffffffac059bcb>] __warn+0xcb/0xf0
[ 1915.936060]  [<ffffffffac059c4f>] warn_slowpath_fmt+0x5f/0x80
[ 1915.936158]  [<ffffffffc052d916>] i915_gem_runtime_suspend+0x116/0x130 [i915]
[ 1915.936251]  [<ffffffffc04f1c74>] intel_runtime_suspend+0x64/0x280 [i915]
[ 1915.936277]  [<ffffffffac0926f1>] ? dequeue_entity+0x241/0xbc0
[ 1915.936298]  [<ffffffffac36bb85>] pci_pm_runtime_suspend+0x55/0x180
[ 1915.936317]  [<ffffffffac36bb30>] ? pci_pm_runtime_resume+0xa0/0xa0
[ 1915.936339]  [<ffffffffac4514e2>] __rpm_callback+0x32/0x70
[ 1915.936356]  [<ffffffffac451544>] rpm_callback+0x24/0x80
[ 1915.936375]  [<ffffffffac36bb30>] ? pci_pm_runtime_resume+0xa0/0xa0
[ 1915.936392]  [<ffffffffac45222d>] rpm_suspend+0x12d/0x680
[ 1915.936415]  [<ffffffffac69f6d7>] ? _raw_spin_unlock_irq+0x17/0x30
[ 1915.936435]  [<ffffffffac0810b8>] ? finish_task_switch+0x88/0x220
[ 1915.936455]  [<ffffffffac4534bf>] pm_runtime_work+0x6f/0xb0
[ 1915.936477]  [<ffffffffac074353>] process_one_work+0x1f3/0x4d0
[ 1915.936501]  [<ffffffffac074678>] worker_thread+0x48/0x4e0
[ 1915.936523]  [<ffffffffac074630>] ? process_one_work+0x4d0/0x4d0
[ 1915.936542]  [<ffffffffac074630>] ? process_one_work+0x4d0/0x4d0
[ 1915.936559]  [<ffffffffac07a2c9>] kthread+0xd9/0xf0
[ 1915.936580]  [<ffffffffac07a1f0>] ? kthread_park+0x60/0x60
[ 1915.936600]  [<ffffffffac69fe62>] ret_from_fork+0x22/0x30

In the case the register is pinned, it should be present and we will
need to invalidate them to be restored upon resume as we cannot expect
the owner of the pin to call get_fence prior to use after resume.

Fixes: 7c108fd8feac ("drm/i915: Move fence cancellation to runtime suspend")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98804
Reported-by: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Imre Deak <imre.deak@linux.intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: <drm-intel-fixes@lists.freedesktop.org> # v4.10-rc1+
Link: http://patchwork.freedesktop.org/patch/msgid/20170203125717.8431-1-chris@chris-wilson.co.uk
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
(cherry picked from commit e0ec3ec698851a6c97a12d696407b3ff77700c23)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c |  1 +
 drivers/gpu/drm/i915/i915_gem.c | 12 ++++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 8d6309865fa5..728ca3ea74d2 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -2428,6 +2428,7 @@ static int intel_runtime_resume(struct device *kdev)
 	 * we can do is to hope that things will still work (and disable RPM).
 	 */
 	i915_gem_init_swizzling(dev_priv);
+	i915_gem_restore_fences(dev_priv);
 
 	intel_runtime_pm_enable_interrupts(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b3f1134fb9c8..24b5b046754b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2010,8 +2010,16 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
 		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
 
-		if (WARN_ON(reg->pin_count))
-			continue;
+		/* Ideally we want to assert that the fence register is not
+		 * live at this point (i.e. that no piece of code will be
+		 * trying to write through fence + GTT, as that both violates
+		 * our tracking of activity and associated locking/barriers,
+		 * but also is illegal given that the hw is powered down).
+		 *
+		 * Previously we used reg->pin_count as a "liveness" indicator.
+		 * That is not sufficient, and we need a more fine-grained
+		 * tool if we want to have a sanity check here.
+		 */
 
 		if (!reg->vma)
 			continue;

From 6e7eb1783be7f19eb071c96ddda0bbf22279ff46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Winiarski?= <michal.winiarski@intel.com>
Date: Tue, 7 Feb 2017 20:55:59 +0100
Subject: [PATCH 265/322] drm/i915: Always convert incoming exec offsets to
 non-canonical
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We're using non-canonical addresses in drm_mm, and we're making sure that
userspace is using canonical addressing - both in case of softpin
(verifying incoming offset) and when relocating (converting to canonical
when updating offset returned to userspace).
Unfortunately when considering the need for relocations, we're comparing
offset from userspace (in canonical form) with drm_mm node (in
non-canonical form), and as a result, we end up always relocating if our
offsets are in the "problematic" range.
Let's always convert the offsets to avoid the performance impact of
relocations.

Fixes: a5f0edf63bdf ("drm/i915: Avoid writing relocs with addresses in non-canonical form")
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michel Thierry <michel.thierry@intel.com>
Reported-by: Michał Pyrzowski <michal.pyrzowski@intel.com>
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170207195559.18798-1-michal.winiarski@intel.com
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
(cherry picked from commit 038c95a313e4ca954ee5ab8a0c7559a646b0f462)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 097d9d8c2315..b8b877c91b0a 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1181,14 +1181,14 @@ validate_exec_list(struct drm_device *dev,
 			if (exec[i].offset !=
 			    gen8_canonical_addr(exec[i].offset & PAGE_MASK))
 				return -EINVAL;
-
-			/* From drm_mm perspective address space is continuous,
-			 * so from this point we're always using non-canonical
-			 * form internally.
-			 */
-			exec[i].offset = gen8_noncanonical_addr(exec[i].offset);
 		}
 
+		/* From drm_mm perspective address space is continuous,
+		 * so from this point we're always using non-canonical
+		 * form internally.
+		 */
+		exec[i].offset = gen8_noncanonical_addr(exec[i].offset);
+
 		if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
 			return -EINVAL;
 

From 4842e98f26dd80be3623c4714a244ba52ea096a8 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 8 Feb 2017 12:35:39 +0100
Subject: [PATCH 266/322] ALSA: seq: Fix race at creating a queue

When a sequencer queue is created in snd_seq_queue_alloc(),it adds the
new queue element to the public list before referencing it.  Thus the
queue might be deleted before the call of snd_seq_queue_use(), and it
results in the use-after-free error, as spotted by syzkaller.

The fix is to reference the queue object at the right time.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/seq/seq_queue.c | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c
index 0bec02e89d51..450c5187eecb 100644
--- a/sound/core/seq/seq_queue.c
+++ b/sound/core/seq/seq_queue.c
@@ -181,6 +181,8 @@ void __exit snd_seq_queues_delete(void)
 	}
 }
 
+static void queue_use(struct snd_seq_queue *queue, int client, int use);
+
 /* allocate a new queue -
  * return queue index value or negative value for error
  */
@@ -192,11 +194,11 @@ int snd_seq_queue_alloc(int client, int locked, unsigned int info_flags)
 	if (q == NULL)
 		return -ENOMEM;
 	q->info_flags = info_flags;
+	queue_use(q, client, 1);
 	if (queue_list_add(q) < 0) {
 		queue_delete(q);
 		return -ENOMEM;
 	}
-	snd_seq_queue_use(q->queue, client, 1); /* use this queue */
 	return q->queue;
 }
 
@@ -502,19 +504,9 @@ int snd_seq_queue_timer_set_tempo(int queueid, int client,
 	return result;
 }
 
-
-/* use or unuse this queue -
- * if it is the first client, starts the timer.
- * if it is not longer used by any clients, stop the timer.
- */
-int snd_seq_queue_use(int queueid, int client, int use)
+/* use or unuse this queue */
+static void queue_use(struct snd_seq_queue *queue, int client, int use)
 {
-	struct snd_seq_queue *queue;
-
-	queue = queueptr(queueid);
-	if (queue == NULL)
-		return -EINVAL;
-	mutex_lock(&queue->timer_mutex);
 	if (use) {
 		if (!test_and_set_bit(client, queue->clients_bitmap))
 			queue->clients++;
@@ -529,6 +521,21 @@ int snd_seq_queue_use(int queueid, int client, int use)
 	} else {
 		snd_seq_timer_close(queue);
 	}
+}
+
+/* use or unuse this queue -
+ * if it is the first client, starts the timer.
+ * if it is not longer used by any clients, stop the timer.
+ */
+int snd_seq_queue_use(int queueid, int client, int use)
+{
+	struct snd_seq_queue *queue;
+
+	queue = queueptr(queueid);
+	if (queue == NULL)
+		return -EINVAL;
+	mutex_lock(&queue->timer_mutex);
+	queue_use(queue, client, use);
 	mutex_unlock(&queue->timer_mutex);
 	queuefree(queue);
 	return 0;

From d7df2443cd5f67fc6ee7c05a88e4996e8177f91b Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 3 Feb 2017 17:10:28 +1100
Subject: [PATCH 267/322] powerpc/mm: Fix spurrious segfaults on radix with
 autonuma

When autonuma (Automatic NUMA balancing) marks a PTE inaccessible it
clears all the protection bits but leave the PTE valid.

With the Radix MMU, an attempt at executing from such a PTE will
take a fault with bit 35 of SRR1 set "SRR1_ISI_N_OR_G".

It is thus incorrect to treat all such faults as errors. We should
pass them to handle_mm_fault() for autonuma to deal with. The case
of pages that are really not executable is handled by the existing
test for VM_EXEC further down.

That leaves us with catching the kernel attempts at executing user
pages. We can catch that earlier, even before we do find_vma.

It is never valid on powerpc for the kernel to take an exec fault
to begin with. So fold that test with the existing test for the
kernel faulting on kernel addresses to bail out early.

Fixes: 1d18ad026844 ("powerpc/mm: Detect instruction fetch denied and report")
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/fault.c | 21 +++++----------------
 1 file changed, 5 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 6fd30ac7d14a..62a50d6d1053 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -253,8 +253,11 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
 	if (unlikely(debugger_fault_handler(regs)))
 		goto bail;
 
-	/* On a kernel SLB miss we can only check for a valid exception entry */
-	if (!user_mode(regs) && (address >= TASK_SIZE)) {
+	/*
+	 * The kernel should never take an execute fault nor should it
+	 * take a page fault to a kernel address.
+	 */
+	if (!user_mode(regs) && (is_exec || (address >= TASK_SIZE))) {
 		rc = SIGSEGV;
 		goto bail;
 	}
@@ -390,20 +393,6 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
 #endif /* CONFIG_8xx */
 
 	if (is_exec) {
-		/*
-		 * An execution fault + no execute ?
-		 *
-		 * On CPUs that don't have CPU_FTR_COHERENT_ICACHE we
-		 * deliberately create NX mappings, and use the fault to do the
-		 * cache flush. This is usually handled in hash_page_do_lazy_icache()
-		 * but we could end up here if that races with a concurrent PTE
-		 * update. In that case we need to fall through here to the VMA
-		 * check below.
-		 */
-		if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
-			(regs->msr & SRR1_ISI_N_OR_G))
-			goto bad_area;
-
 		/*
 		 * Allow execution from readable areas if the MMU does not
 		 * provide separate controls over reading and executing.

From 391e2a6de9781e4906dd7e0b1cc097050bf43e11 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Sun, 23 Oct 2016 14:28:15 -0700
Subject: [PATCH 268/322] target: Don't BUG_ON during NodeACL dynamic ->
 explicit conversion

After the v4.2+ RCU conversion to se_node_acl->lun_entry_hlist,
a BUG_ON() was added in core_enable_device_list_for_node() to
detect when the located orig->se_lun_acl contains an existing
se_lun_acl pointer reference.

However, this scenario can happen when a dynamically generated
NodeACL is being converted to an explicit NodeACL, when the
explicit NodeACL contains a different LUN mapping than the
default provided by the WWN endpoint.

So instead of triggering BUG_ON(), go ahead and fail instead
following the original pre RCU conversion logic.

Reported-by: Benjamin ESTRABAUD <ben.estrabaud@mpstor.com>
Cc: Benjamin ESTRABAUD <ben.estrabaud@mpstor.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: stable@vger.kernel.org # 4.2+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_device.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 1ebd13ef7bd3..26929c44d703 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -352,7 +352,15 @@ int core_enable_device_list_for_node(
 			kfree(new);
 			return -EINVAL;
 		}
-		BUG_ON(orig->se_lun_acl != NULL);
+		if (orig->se_lun_acl != NULL) {
+			pr_warn_ratelimited("Detected existing explicit"
+				" se_lun_acl->se_lun_group reference for %s"
+				" mapped_lun: %llu, failing\n",
+				 nacl->initiatorname, mapped_lun);
+			mutex_unlock(&nacl->lun_entry_mutex);
+			kfree(new);
+			return -EINVAL;
+		}
 
 		rcu_assign_pointer(new->se_lun, lun);
 		rcu_assign_pointer(new->se_lun_acl, lun_acl);

From 0583c261e6325f392c1f7a1b9112e31298e1a4bd Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Mon, 31 Oct 2016 00:54:40 -0700
Subject: [PATCH 269/322] target: Use correct SCSI status during EXTENDED_COPY
 exception

This patch adds the missing target_complete_cmd() SCSI status
parameter change in target_xcopy_do_work(), that was originally
missing in commit 926317de33.

It correctly propigates up the correct SCSI status during
EXTENDED_COPY exception cases, instead of always using the
hardcoded SAM_STAT_CHECK_CONDITION from original code.

This is required for ESX host environments that expect to
hit SAM_STAT_RESERVATION_CONFLICT for certain scenarios,
and SAM_STAT_CHECK_CONDITION results in non-retriable
status for these cases.

Reported-by: Nixon Vincent <nixon.vincent@calsoftinc.com>
Tested-by: Nixon Vincent <nixon.vincent@calsoftinc.com>
Cc: Nixon Vincent <nixon.vincent@calsoftinc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: stable@vger.kernel.org # 3.14+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_xcopy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index d828b3b5000b..cac5a20a4de0 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -864,7 +864,7 @@ static void target_xcopy_do_work(struct work_struct *work)
 			" CHECK_CONDITION -> sending response\n", rc);
 		ec_cmd->scsi_status = SAM_STAT_CHECK_CONDITION;
 	}
-	target_complete_cmd(ec_cmd, SAM_STAT_CHECK_CONDITION);
+	target_complete_cmd(ec_cmd, ec_cmd->scsi_status);
 }
 
 sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)

From c54eeffbe9338fa982dc853d816fda9202a13b5a Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Tue, 6 Dec 2016 22:45:46 -0800
Subject: [PATCH 270/322] target: Fix early transport_generic_handle_tmr abort
 scenario

This patch fixes a bug where incoming task management requests
can be explicitly aborted during an active LUN_RESET, but who's
struct work_struct are canceled in-flight before execution.

This occurs when core_tmr_drain_tmr_list() invokes cancel_work_sync()
for the incoming se_tmr_req->task_cmd->work, resulting in cmd->work
for target_tmr_work() never getting invoked and the aborted TMR
waiting indefinately within transport_wait_for_tasks().

To address this case, perform a CMD_T_ABORTED check early in
transport_generic_handle_tmr(), and invoke the normal path via
transport_cmd_check_stop_to_fabric() to complete any TMR kthreads
blocked waiting for CMD_T_STOP in transport_wait_for_tasks().

Also, move the TRANSPORT_ISTATE_PROCESSING assignment earlier
into transport_generic_handle_tmr() so the existing check in
core_tmr_drain_tmr_list() avoids attempting abort the incoming
se_tmr_req->task_cmd->work if it has already been queued into
se_device->tmr_wq.

Reported-by: Rob Millner <rlm@daterainc.com>
Tested-by: Rob Millner <rlm@daterainc.com>
Cc: Rob Millner <rlm@daterainc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: stable@vger.kernel.org # 3.14+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 1cadc9eefa21..8b698432aea5 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -3110,7 +3110,6 @@ static void target_tmr_work(struct work_struct *work)
 		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 		goto check_stop;
 	}
-	cmd->t_state = TRANSPORT_ISTATE_PROCESSING;
 	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 
 	cmd->se_tfo->queue_tm_rsp(cmd);
@@ -3123,11 +3122,25 @@ int transport_generic_handle_tmr(
 	struct se_cmd *cmd)
 {
 	unsigned long flags;
+	bool aborted = false;
 
 	spin_lock_irqsave(&cmd->t_state_lock, flags);
-	cmd->transport_state |= CMD_T_ACTIVE;
+	if (cmd->transport_state & CMD_T_ABORTED) {
+		aborted = true;
+	} else {
+		cmd->t_state = TRANSPORT_ISTATE_PROCESSING;
+		cmd->transport_state |= CMD_T_ACTIVE;
+	}
 	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 
+	if (aborted) {
+		pr_warn_ratelimited("handle_tmr caught CMD_T_ABORTED TMR %d"
+			"ref_tag: %llu tag: %llu\n", cmd->se_tmr_req->function,
+			cmd->se_tmr_req->ref_task_tag, cmd->tag);
+		transport_cmd_check_stop_to_fabric(cmd);
+		return 0;
+	}
+
 	INIT_WORK(&cmd->work, target_tmr_work);
 	queue_work(cmd->se_dev->tmr_wq, &cmd->work);
 	return 0;

From 01d4d673558985d9a118e1e05026633c3e2ade9b Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Wed, 7 Dec 2016 12:55:54 -0800
Subject: [PATCH 271/322] target: Fix multi-session dynamic se_node_acl double
 free OOPs

This patch addresses a long-standing bug with multi-session
(eg: iscsi-target + iser-target) se_node_acl dynamic free
withini transport_deregister_session().

This bug is caused when a storage endpoint is configured with
demo-mode (generate_node_acls = 1 + cache_dynamic_acls = 1)
initiators, and initiator login creates a new dynamic node acl
and attaches two sessions to it.

After that, demo-mode for the storage instance is disabled via
configfs (generate_node_acls = 0 + cache_dynamic_acls = 0) and
the existing dynamic acl is never converted to an explicit ACL.

The end result is dynamic acl resources are released twice when
the sessions are shutdown in transport_deregister_session().

If the storage instance is not changed to disable demo-mode,
or the dynamic acl is converted to an explict ACL, or there
is only a single session associated with the dynamic ACL,
the bug is not triggered.

To address this big, move the release of dynamic se_node_acl
memory into target_complete_nacl() so it's only freed once
when se_node_acl->acl_kref reaches zero.

(Drop unnecessary list_del_init usage - HCH)

Reported-by: Rob Millner <rlm@daterainc.com>
Tested-by: Rob Millner <rlm@daterainc.com>
Cc: Rob Millner <rlm@daterainc.com>
Cc: stable@vger.kernel.org # 4.1+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 69 ++++++++++++++++----------
 include/target/target_core_base.h      |  1 +
 2 files changed, 44 insertions(+), 26 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 8b698432aea5..437591bc7c08 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -457,8 +457,20 @@ static void target_complete_nacl(struct kref *kref)
 {
 	struct se_node_acl *nacl = container_of(kref,
 				struct se_node_acl, acl_kref);
+	struct se_portal_group *se_tpg = nacl->se_tpg;
 
-	complete(&nacl->acl_free_comp);
+	if (!nacl->dynamic_stop) {
+		complete(&nacl->acl_free_comp);
+		return;
+	}
+
+	mutex_lock(&se_tpg->acl_node_mutex);
+	list_del(&nacl->acl_list);
+	mutex_unlock(&se_tpg->acl_node_mutex);
+
+	core_tpg_wait_for_nacl_pr_ref(nacl);
+	core_free_device_list_for_node(nacl, se_tpg);
+	kfree(nacl);
 }
 
 void target_put_nacl(struct se_node_acl *nacl)
@@ -499,12 +511,39 @@ EXPORT_SYMBOL(transport_deregister_session_configfs);
 void transport_free_session(struct se_session *se_sess)
 {
 	struct se_node_acl *se_nacl = se_sess->se_node_acl;
+
 	/*
 	 * Drop the se_node_acl->nacl_kref obtained from within
 	 * core_tpg_get_initiator_node_acl().
 	 */
 	if (se_nacl) {
+		struct se_portal_group *se_tpg = se_nacl->se_tpg;
+		const struct target_core_fabric_ops *se_tfo = se_tpg->se_tpg_tfo;
+		unsigned long flags;
+
 		se_sess->se_node_acl = NULL;
+
+		/*
+		 * Also determine if we need to drop the extra ->cmd_kref if
+		 * it had been previously dynamically generated, and
+		 * the endpoint is not caching dynamic ACLs.
+		 */
+		mutex_lock(&se_tpg->acl_node_mutex);
+		if (se_nacl->dynamic_node_acl &&
+		    !se_tfo->tpg_check_demo_mode_cache(se_tpg)) {
+			spin_lock_irqsave(&se_nacl->nacl_sess_lock, flags);
+			if (list_empty(&se_nacl->acl_sess_list))
+				se_nacl->dynamic_stop = true;
+			spin_unlock_irqrestore(&se_nacl->nacl_sess_lock, flags);
+
+			if (se_nacl->dynamic_stop)
+				list_del(&se_nacl->acl_list);
+		}
+		mutex_unlock(&se_tpg->acl_node_mutex);
+
+		if (se_nacl->dynamic_stop)
+			target_put_nacl(se_nacl);
+
 		target_put_nacl(se_nacl);
 	}
 	if (se_sess->sess_cmd_map) {
@@ -518,16 +557,12 @@ EXPORT_SYMBOL(transport_free_session);
 void transport_deregister_session(struct se_session *se_sess)
 {
 	struct se_portal_group *se_tpg = se_sess->se_tpg;
-	const struct target_core_fabric_ops *se_tfo;
-	struct se_node_acl *se_nacl;
 	unsigned long flags;
-	bool drop_nacl = false;
 
 	if (!se_tpg) {
 		transport_free_session(se_sess);
 		return;
 	}
-	se_tfo = se_tpg->se_tpg_tfo;
 
 	spin_lock_irqsave(&se_tpg->session_lock, flags);
 	list_del(&se_sess->sess_list);
@@ -535,33 +570,15 @@ void transport_deregister_session(struct se_session *se_sess)
 	se_sess->fabric_sess_ptr = NULL;
 	spin_unlock_irqrestore(&se_tpg->session_lock, flags);
 
-	/*
-	 * Determine if we need to do extra work for this initiator node's
-	 * struct se_node_acl if it had been previously dynamically generated.
-	 */
-	se_nacl = se_sess->se_node_acl;
-
-	mutex_lock(&se_tpg->acl_node_mutex);
-	if (se_nacl && se_nacl->dynamic_node_acl) {
-		if (!se_tfo->tpg_check_demo_mode_cache(se_tpg)) {
-			list_del(&se_nacl->acl_list);
-			drop_nacl = true;
-		}
-	}
-	mutex_unlock(&se_tpg->acl_node_mutex);
-
-	if (drop_nacl) {
-		core_tpg_wait_for_nacl_pr_ref(se_nacl);
-		core_free_device_list_for_node(se_nacl, se_tpg);
-		se_sess->se_node_acl = NULL;
-		kfree(se_nacl);
-	}
 	pr_debug("TARGET_CORE[%s]: Deregistered fabric_sess\n",
 		se_tpg->se_tpg_tfo->get_fabric_name());
 	/*
 	 * If last kref is dropping now for an explicit NodeACL, awake sleeping
 	 * ->acl_free_comp caller to wakeup configfs se_node_acl->acl_group
 	 * removal context from within transport_free_session() code.
+	 *
+	 * For dynamic ACL, target_put_nacl() uses target_complete_nacl()
+	 * to release all remaining generate_node_acl=1 created ACL resources.
 	 */
 
 	transport_free_session(se_sess);
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 43edf82e54ff..da854fb4530f 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -538,6 +538,7 @@ struct se_node_acl {
 	char			initiatorname[TRANSPORT_IQN_LEN];
 	/* Used to signal demo mode created ACL, disabled by default */
 	bool			dynamic_node_acl;
+	bool			dynamic_stop;
 	u32			queue_depth;
 	u32			acl_index;
 	enum target_prot_type	saved_prot_type;

From 9b2792c3da1e80f2d460167d319302a24c9ca2b7 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Mon, 6 Feb 2017 14:28:09 -0800
Subject: [PATCH 272/322] target: Fix COMPARE_AND_WRITE ref leak for non GOOD
 status

This patch addresses a long standing bug where the commit phase
of COMPARE_AND_WRITE would result in a se_cmd->cmd_kref reference
leak if se_cmd->scsi_status returned non SAM_STAT_GOOD.

This would manifest first as a lost SCSI response, and eventual
hung task during fabric driver logout or re-login, as existing
shutdown logic waited for the COMPARE_AND_WRITE se_cmd->cmd_kref
to reach zero.

To address this bug, compare_and_write_post() has been changed
to drop the incorrect !cmd->scsi_status conditional that was
preventing *post_ret = 1 for being set during non SAM_STAT_GOOD
status.

This patch has been tested with SAM_STAT_CHECK_CONDITION status
from normal target_complete_cmd() callback path, as well as the
incoming __target_execute_cmd() submission failure path when
se_cmd->execute_cmd() returns non zero status.

Reported-by: Donald White <dew@datera.io>
Cc: Donald White <dew@datera.io>
Tested-by: Gary Guo <ghg@datera.io>
Cc: Gary Guo <ghg@datera.io>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: <stable@vger.kernel.org> # v3.12+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_sbc.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index 4879e70e2eef..df7b6e95c019 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -451,6 +451,7 @@ static sense_reason_t compare_and_write_post(struct se_cmd *cmd, bool success,
 					     int *post_ret)
 {
 	struct se_device *dev = cmd->se_dev;
+	sense_reason_t ret = TCM_NO_SENSE;
 
 	/*
 	 * Only set SCF_COMPARE_AND_WRITE_POST to force a response fall-through
@@ -458,9 +459,12 @@ static sense_reason_t compare_and_write_post(struct se_cmd *cmd, bool success,
 	 * sent to the backend driver.
 	 */
 	spin_lock_irq(&cmd->t_state_lock);
-	if ((cmd->transport_state & CMD_T_SENT) && !cmd->scsi_status) {
+	if (cmd->transport_state & CMD_T_SENT) {
 		cmd->se_cmd_flags |= SCF_COMPARE_AND_WRITE_POST;
 		*post_ret = 1;
+
+		if (cmd->scsi_status == SAM_STAT_CHECK_CONDITION)
+			ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 	}
 	spin_unlock_irq(&cmd->t_state_lock);
 
@@ -470,7 +474,7 @@ static sense_reason_t compare_and_write_post(struct se_cmd *cmd, bool success,
 	 */
 	up(&dev->caw_sem);
 
-	return TCM_NO_SENSE;
+	return ret;
 }
 
 static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool success,

From 2a362249187a8d0f6d942d6e1d763d150a296f47 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Mon, 6 Feb 2017 19:39:09 -0500
Subject: [PATCH 273/322] btrfs: fix btrfs_compat_ioctl failures on non-compat
 ioctls

Commit 4c63c2454ef incorrectly assumed that returning -ENOIOCTLCMD would
cause the native ioctl to be called.  The ->compat_ioctl callback is
expected to handle all ioctls, not just compat variants.  As a result,
when using 32-bit userspace on 64-bit kernels, everything except those
three ioctls would return -ENOTTY.

Fixes: 4c63c2454ef ("btrfs: bugfix: handle FS_IOC32_{GETFLAGS,SETFLAGS,GETVERSION} in btrfs_ioctl")
Cc: stable@vger.kernel.org
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0a6902555e65..0c77cad89e87 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -5665,6 +5665,10 @@ long btrfs_ioctl(struct file *file, unsigned int
 #ifdef CONFIG_COMPAT
 long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
+	/*
+	 * These all access 32-bit values anyway so no further
+	 * handling is necessary.
+	 */
 	switch (cmd) {
 	case FS_IOC32_GETFLAGS:
 		cmd = FS_IOC_GETFLAGS;
@@ -5675,8 +5679,6 @@ long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case FS_IOC32_GETVERSION:
 		cmd = FS_IOC_GETVERSION;
 		break;
-	default:
-		return -ENOIOCTLCMD;
 	}
 
 	return btrfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));

From a11a7f71cac209c7c9cca66eb506e1ebb033a3b3 Mon Sep 17 00:00:00 2001
From: Marcus Huewe <suse-tux@gmx.de>
Date: Mon, 6 Feb 2017 18:34:56 +0100
Subject: [PATCH 274/322] ipv6: addrconf: fix generation of new temporary
 addresses

Under some circumstances it is possible that no new temporary addresses
will be generated.

For instance, addrconf_prefix_rcv_add_addr() indirectly calls
ipv6_create_tempaddr(), which creates a tentative temporary address and
starts dad. Next, addrconf_prefix_rcv_add_addr() indirectly calls
addrconf_verify_rtnl(). Now, assume that the previously created temporary
address has the least preferred lifetime among all existing addresses and
is still tentative (that is, dad is still running). Hence, the next run of
addrconf_verify_rtnl() is performed when the preferred lifetime of the
temporary address ends. If dad succeeds before the next run, the temporary
address becomes deprecated during the next run, but no new temporary
address is generated.

In order to fix this, schedule the next addrconf_verify_rtnl() run slightly
before the temporary address becomes deprecated, if dad succeeded.

Signed-off-by: Marcus Huewe <suse-tux@gmx.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 81f7b4ea4281..a7bcc0ab5e99 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4015,6 +4015,12 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id)
 
 	if (bump_id)
 		rt_genid_bump_ipv6(dev_net(dev));
+
+	/* Make sure that a new temporary address will be created
+	 * before this temporary address becomes deprecated.
+	 */
+	if (ifp->flags & IFA_F_TEMPORARY)
+		addrconf_verify_rtnl();
 }
 
 static void addrconf_dad_run(struct inet6_dev *idev)

From 628f07d33c1f2e7bf31e0a4a988bb07914bd5e73 Mon Sep 17 00:00:00 2001
From: Eyal Itkin <eyal.itkin@gmail.com>
Date: Tue, 7 Feb 2017 16:43:05 +0300
Subject: [PATCH 275/322] IB/rxe: Fix resid update

Update the response's resid field when larger than MTU, instead of only
updating the local resid variable.

Fixes: 8700e3e7c485 ("Soft RoCE driver")
Signed-off-by: Eyal Itkin <eyal.itkin@gmail.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/sw/rxe/rxe_resp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 3435efff8799..5bcf07328972 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -479,7 +479,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
 				goto err2;
 			}
 
-			resid = mtu;
+			qp->resp.resid = mtu;
 		} else {
 			if (pktlen != resid) {
 				state = RESPST_ERR_LENGTH;

From 647bf3d8a8e5777319da92af672289b2a6c4dc66 Mon Sep 17 00:00:00 2001
From: Eyal Itkin <eyal.itkin@gmail.com>
Date: Tue, 7 Feb 2017 16:45:19 +0300
Subject: [PATCH 276/322] IB/rxe: Fix mem_check_range integer overflow

Update the range check to avoid integer-overflow in edge case.
Resolves CVE 2016-8636.

Signed-off-by: Eyal Itkin <eyal.itkin@gmail.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/sw/rxe/rxe_mr.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index d0faca294006..86a6585b847d 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -59,9 +59,11 @@ int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length)
 
 	case RXE_MEM_TYPE_MR:
 	case RXE_MEM_TYPE_FMR:
-		return ((iova < mem->iova) ||
-			((iova + length) > (mem->iova + mem->length))) ?
-			-EFAULT : 0;
+		if (iova < mem->iova ||
+		    length > mem->length ||
+		    iova > mem->iova + mem->length - length)
+			return -EFAULT;
+		return 0;
 
 	default:
 		return -EFAULT;

From 646ebd4166ca00bdf682a36bd2e1c9a74d848ac6 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leon@kernel.org>
Date: Wed, 8 Feb 2017 17:04:09 +0200
Subject: [PATCH 277/322] RDMA: Don't reference kernel private header from UAPI
 header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove references to private kernel header and defines from exported
ib_user_verb.h file.

The code snippet below is used to reproduce the issue:

 #include <stdio.h>
 #include <rdma/ib_user_verb.h>

 int main(void)
 {
	printf("IB_USER_VERBS_ABI_VERSION = %d\n", IB_USER_VERBS_ABI_VERSION);
	return 0;
 }

It fails during compilation phase with an error:
➜  /tmp gcc main.c
main.c:2:31: fatal error: rdma/ib_user_verb.h: No such file or directory
 #include <rdma/ib_user_verb.h>
                               ^
compilation terminated.

Fixes: 189aba99e700 ("IB/uverbs: Extend modify_qp and support packet pacing")
CC: Bodong Wang <bodong@mellanox.com>
CC: Matan Barak <matanb@mellanox.com>
CC: Christoph Hellwig <hch@infradead.org>
Tested-by: Slava Shwartsman <slavash@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/ib_user_verbs.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index dfdfe4e92d31..f4f87cff6dc6 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -37,7 +37,6 @@
 #define IB_USER_VERBS_H
 
 #include <linux/types.h>
-#include <rdma/ib_verbs.h>
 
 /*
  * Increment this value if any changes that break userspace ABI
@@ -548,11 +547,17 @@ enum {
 };
 
 enum {
-	IB_USER_LEGACY_LAST_QP_ATTR_MASK = IB_QP_DEST_QPN
+	/*
+	 * This value is equal to IB_QP_DEST_QPN.
+	 */
+	IB_USER_LEGACY_LAST_QP_ATTR_MASK = 1ULL << 20,
 };
 
 enum {
-	IB_USER_LAST_QP_ATTR_MASK = IB_QP_RATE_LIMIT
+	/*
+	 * This value is equal to IB_QP_RATE_LIMIT.
+	 */
+	IB_USER_LAST_QP_ATTR_MASK = 1ULL << 25,
 };
 
 struct ib_uverbs_ex_create_qp {

From 2bd137de531367fb573d90150d1872cb2a2095f7 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Wed, 8 Feb 2017 09:29:00 -0800
Subject: [PATCH 278/322] lwtunnel: valid encap attr check should return 0 when
 lwtunnel is disabled

An error was reported upgrading to 4.9.8:
    root@Typhoon:~# ip route add default table 210 nexthop dev eth0 via 10.68.64.1
    weight 1 nexthop dev eth0 via 10.68.64.2 weight 1
    RTNETLINK answers: Operation not supported

The problem occurs when CONFIG_LWTUNNEL is not enabled and a multipath
route is submitted.

The point of lwtunnel_valid_encap_type_attr is catch modules that
need to be loaded before any references are taken with rntl held. With
CONFIG_LWTUNNEL disabled, there will be no modules to load so the
lwtunnel_valid_encap_type_attr stub should just return 0.

Fixes: 9ed59592e3e3 ("lwtunnel: fix autoload of lwt modules")
Reported-by: pupilla@libero.it
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/lwtunnel.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index 73dd87647460..0388b9c5f5e2 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -178,7 +178,10 @@ static inline int lwtunnel_valid_encap_type(u16 encap_type)
 }
 static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len)
 {
-	return -EOPNOTSUPP;
+	/* return 0 since we are not walking attr looking for
+	 * RTA_ENCAP_TYPE attribute on nexthops.
+	 */
+	return 0;
 }
 
 static inline int lwtunnel_build_state(struct net_device *dev, u16 encap_type,

From d7426c69a1942b2b9b709bf66b944ff09f561484 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Wed, 8 Feb 2017 10:02:13 -0800
Subject: [PATCH 279/322] sit: fix a double free on error path

Dmitry reported a double free in sit_init_net():

  kernel BUG at mm/percpu.c:689!
  invalid opcode: 0000 [#1] SMP KASAN
  Dumping ftrace buffer:
     (ftrace buffer empty)
  Modules linked in:
  CPU: 0 PID: 15692 Comm: syz-executor1 Not tainted 4.10.0-rc6-next-20170206 #1
  Hardware name: Google Google Compute Engine/Google Compute Engine,
  BIOS Google 01/01/2011
  task: ffff8801c9cc27c0 task.stack: ffff88017d1d8000
  RIP: 0010:pcpu_free_area+0x68b/0x810 mm/percpu.c:689
  RSP: 0018:ffff88017d1df488 EFLAGS: 00010046
  RAX: 0000000000010000 RBX: 00000000000007c0 RCX: ffffc90002829000
  RDX: 0000000000010000 RSI: ffffffff81940efb RDI: ffff8801db841d94
  RBP: ffff88017d1df590 R08: dffffc0000000000 R09: 1ffffffff0bb3bdd
  R10: dffffc0000000000 R11: 00000000000135dd R12: ffff8801db841d80
  R13: 0000000000038e40 R14: 00000000000007c0 R15: 00000000000007c0
  FS:  00007f6ea608f700(0000) GS:ffff8801dbe00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 000000002000aff8 CR3: 00000001c8d44000 CR4: 00000000001426f0
  DR0: 0000000020000000 DR1: 0000000020000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600
  Call Trace:
   free_percpu+0x212/0x520 mm/percpu.c:1264
   ipip6_dev_free+0x43/0x60 net/ipv6/sit.c:1335
   sit_init_net+0x3cb/0xa10 net/ipv6/sit.c:1831
   ops_init+0x10a/0x530 net/core/net_namespace.c:115
   setup_net+0x2ed/0x690 net/core/net_namespace.c:291
   copy_net_ns+0x26c/0x530 net/core/net_namespace.c:396
   create_new_namespaces+0x409/0x860 kernel/nsproxy.c:106
   unshare_nsproxy_namespaces+0xae/0x1e0 kernel/nsproxy.c:205
   SYSC_unshare kernel/fork.c:2281 [inline]
   SyS_unshare+0x64e/0xfc0 kernel/fork.c:2231
   entry_SYSCALL_64_fastpath+0x1f/0xc2

This is because when tunnel->dst_cache init fails, we free dev->tstats
once in ipip6_tunnel_init() and twice in sit_init_net(). This looks
redundant but its ndo_uinit() does not seem enough to clean up everything
here. So avoid this by setting dev->tstats to NULL after the first free,
at least for -net.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index fad992ad4bc8..99853c6e33a8 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1380,6 +1380,7 @@ static int ipip6_tunnel_init(struct net_device *dev)
 	err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
 	if (err) {
 		free_percpu(dev->tstats);
+		dev->tstats = NULL;
 		return err;
 	}
 

From b22bc27868e8c11fe3f00937a341b44f80b50364 Mon Sep 17 00:00:00 2001
From: "Bryant G. Ly" <bryantly@linux.vnet.ibm.com>
Date: Mon, 6 Feb 2017 10:04:28 -0600
Subject: [PATCH 280/322] ibmvscsis: Add SGL limit

This patch adds internal LIO sgl limit since the driver already
sets a max transfer limit on transport layer of 1MB to the client.

Cc: stable@vger.kernel.org
Tested-by: Steven Royer <seroyer@linux.vnet.ibm.com>
Signed-off-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
index 99b747cedbeb..0f807798c624 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
@@ -3816,6 +3816,7 @@ static struct configfs_attribute *ibmvscsis_tpg_attrs[] = {
 static const struct target_core_fabric_ops ibmvscsis_ops = {
 	.module				= THIS_MODULE,
 	.name				= "ibmvscsis",
+	.max_data_sg_nents		= MAX_TXU / PAGE_SIZE,
 	.get_fabric_name		= ibmvscsis_get_fabric_name,
 	.tpg_get_wwn			= ibmvscsis_get_fabric_wwn,
 	.tpg_get_tag			= ibmvscsis_get_tag,

From 217e6fa24ce28ec87fca8da93c9016cb78028612 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 7 Feb 2017 15:57:20 -0500
Subject: [PATCH 281/322] net: introduce device min_header_len

The stack must not pass packets to device drivers that are shorter
than the minimum link layer header length.

Previously, packet sockets would drop packets smaller than or equal
to dev->hard_header_len, but this has false positives. Zero length
payload is used over Ethernet. Other link layer protocols support
variable length headers. Support for validation of these protocols
removed the min length check for all protocols.

Introduce an explicit dev->min_header_len parameter and drop all
packets below this value. Initially, set it to non-zero only for
Ethernet and loopback. Other protocols can follow in a patch to
net-next.

Fixes: 9ed988cd5915 ("packet: validate variable length ll headers")
Reported-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/loopback.c    | 1 +
 include/linux/netdevice.h | 4 ++++
 net/ethernet/eth.c        | 1 +
 3 files changed, 6 insertions(+)

diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 1e05b7c2d157..0844f8496413 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -164,6 +164,7 @@ static void loopback_setup(struct net_device *dev)
 {
 	dev->mtu		= 64 * 1024;
 	dev->hard_header_len	= ETH_HLEN;	/* 14	*/
+	dev->min_header_len	= ETH_HLEN;	/* 14	*/
 	dev->addr_len		= ETH_ALEN;	/* 6	*/
 	dev->type		= ARPHRD_LOOPBACK;	/* 0x0001*/
 	dev->flags		= IFF_LOOPBACK;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 70ad0291d517..27914672602d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1511,6 +1511,7 @@ enum netdev_priv_flags {
  *	@max_mtu:	Interface Maximum MTU value
  *	@type:		Interface hardware type
  *	@hard_header_len: Maximum hardware header length.
+ *	@min_header_len:  Minimum hardware header length
  *
  *	@needed_headroom: Extra headroom the hardware may need, but not in all
  *			  cases can this be guaranteed
@@ -1728,6 +1729,7 @@ struct net_device {
 	unsigned int		max_mtu;
 	unsigned short		type;
 	unsigned short		hard_header_len;
+	unsigned short		min_header_len;
 
 	unsigned short		needed_headroom;
 	unsigned short		needed_tailroom;
@@ -2694,6 +2696,8 @@ static inline bool dev_validate_header(const struct net_device *dev,
 {
 	if (likely(len >= dev->hard_header_len))
 		return true;
+	if (len < dev->min_header_len)
+		return false;
 
 	if (capable(CAP_SYS_RAWIO)) {
 		memset(ll_header + len, 0, dev->hard_header_len - len);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 8c5a479681ca..516c87e75de7 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -356,6 +356,7 @@ void ether_setup(struct net_device *dev)
 	dev->header_ops		= &eth_header_ops;
 	dev->type		= ARPHRD_ETHER;
 	dev->hard_header_len 	= ETH_HLEN;
+	dev->min_header_len	= ETH_HLEN;
 	dev->mtu		= ETH_DATA_LEN;
 	dev->min_mtu		= ETH_MIN_MTU;
 	dev->max_mtu		= ETH_DATA_LEN;

From 57031eb794906eea4e1c7b31dc1e2429c0af0c66 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 7 Feb 2017 15:57:21 -0500
Subject: [PATCH 282/322] packet: round up linear to header len

Link layer protocols may unconditionally pull headers, as Ethernet
does in eth_type_trans. Ensure that the entire link layer header
always lies in the skb linear segment. tpacket_snd has such a check.
Extend this to packet_snd.

Variable length link layer headers complicate the computation
somewhat. Here skb->len may be smaller than dev->hard_header_len.

Round up the linear length to be at least as long as the smallest of
the two.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 3d555c79a7b5..d56ee46b11fc 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2755,7 +2755,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	struct virtio_net_hdr vnet_hdr = { 0 };
 	int offset = 0;
 	struct packet_sock *po = pkt_sk(sk);
-	int hlen, tlen;
+	int hlen, tlen, linear;
 	int extra_len = 0;
 
 	/*
@@ -2816,8 +2816,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	err = -ENOBUFS;
 	hlen = LL_RESERVED_SPACE(dev);
 	tlen = dev->needed_tailroom;
-	skb = packet_alloc_skb(sk, hlen + tlen, hlen, len,
-			       __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len),
+	linear = __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len);
+	linear = max(linear, min_t(int, len, dev->hard_header_len));
+	skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, linear,
 			       msg->msg_flags & MSG_DONTWAIT, &err);
 	if (skb == NULL)
 		goto out_unlock;

From 73d2c6678e6c3af7e7a42b1e78cd0211782ade32 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Tue, 7 Feb 2017 12:59:46 -0800
Subject: [PATCH 283/322] ping: fix a null pointer dereference

Andrey reported a kernel crash:

  general protection fault: 0000 [#1] SMP KASAN
  Dumping ftrace buffer:
     (ftrace buffer empty)
  Modules linked in:
  CPU: 2 PID: 3880 Comm: syz-executor1 Not tainted 4.10.0-rc6+ #124
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
  task: ffff880060048040 task.stack: ffff880069be8000
  RIP: 0010:ping_v4_push_pending_frames net/ipv4/ping.c:647 [inline]
  RIP: 0010:ping_v4_sendmsg+0x1acd/0x23f0 net/ipv4/ping.c:837
  RSP: 0018:ffff880069bef8b8 EFLAGS: 00010206
  RAX: dffffc0000000000 RBX: ffff880069befb90 RCX: 0000000000000000
  RDX: 0000000000000018 RSI: ffff880069befa30 RDI: 00000000000000c2
  RBP: ffff880069befbb8 R08: 0000000000000008 R09: 0000000000000000
  R10: 0000000000000002 R11: 0000000000000000 R12: ffff880069befab0
  R13: ffff88006c624a80 R14: ffff880069befa70 R15: 0000000000000000
  FS:  00007f6f7c716700(0000) GS:ffff88006de00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00000000004a6f28 CR3: 000000003a134000 CR4: 00000000000006e0
  Call Trace:
   inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744
   sock_sendmsg_nosec net/socket.c:635 [inline]
   sock_sendmsg+0xca/0x110 net/socket.c:645
   SYSC_sendto+0x660/0x810 net/socket.c:1687
   SyS_sendto+0x40/0x50 net/socket.c:1655
   entry_SYSCALL_64_fastpath+0x1f/0xc2

This is because we miss a check for NULL pointer for skb_peek() when
the queue is empty. Other places already have the same check.

Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind")
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Tested-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ping.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 86cca610f4c2..68d77b1f1495 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -642,6 +642,8 @@ static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh,
 {
 	struct sk_buff *skb = skb_peek(&sk->sk_write_queue);
 
+	if (!skb)
+		return 0;
 	pfh->wcheck = csum_partial((char *)&pfh->icmph,
 		sizeof(struct icmphdr), pfh->wcheck);
 	pfh->icmph.checksum = csum_fold(pfh->wcheck);

From 382e1eea2d983cd2343482c6a638f497bb44a636 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 7 Feb 2017 23:10:13 -0800
Subject: [PATCH 284/322] net: dsa: Do not destroy invalid network devices

dsa_slave_create() can fail, and dsa_user_port_unapply() will properly check
for the network device not being NULL before attempting to destroy it. We were
not setting the slave network device as NULL if dsa_slave_create() failed, so
we would later on be calling dsa_slave_destroy() on a now free'd and
unitialized network device, causing crashes in dsa_slave_destroy().

Fixes: 83c0afaec7b7 ("net: dsa: Add new binding implementation")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/dsa2.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index da3862124545..0f99297b2fb3 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -273,6 +273,7 @@ static int dsa_user_port_apply(struct device_node *port, u32 index,
 	if (err) {
 		dev_warn(ds->dev, "Failed to create slave %d: %d\n",
 			 index, err);
+		ds->ports[index].netdev = NULL;
 		return err;
 	}
 

From 075ad765ef7541b2860de8408c165a92b78aefa3 Mon Sep 17 00:00:00 2001
From: Thanneeru Srinivasulu <tsrinivasulu@cavium.com>
Date: Wed, 8 Feb 2017 18:09:00 +0530
Subject: [PATCH 285/322] net: thunderx: Fix PHY autoneg for SGMII QLM mode

This patch fixes the case where there is no phydev attached
to a LMAC in DT due to non-existance of a PHY driver or due
to usage of non-stanadard PHY which doesn't support autoneg.
Changes dependeds on firmware to send correct info w.r.t
PHY and autoneg capability.

This patch also covers a case where a 10G/40G interface is used
as a 1G with convertors with Cortina PHY in between.

Signed-off-by: Thanneeru Srinivasulu <tsrinivasulu@cavium.com>
Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/cavium/thunder/thunder_bgx.c | 108 ++++++++++++++++--
 .../net/ethernet/cavium/thunder/thunder_bgx.h |   5 +
 2 files changed, 101 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 2f85b64f01fa..1e4695270da6 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -31,6 +31,7 @@ struct lmac {
 	u8                      lmac_type;
 	u8                      lane_to_sds;
 	bool                    use_training;
+	bool                    autoneg;
 	bool			link_up;
 	int			lmacid; /* ID within BGX */
 	int			lmacid_bd; /* ID on board */
@@ -461,7 +462,17 @@ static int bgx_lmac_sgmii_init(struct bgx *bgx, struct lmac *lmac)
 	/* power down, reset autoneg, autoneg enable */
 	cfg = bgx_reg_read(bgx, lmacid, BGX_GMP_PCS_MRX_CTL);
 	cfg &= ~PCS_MRX_CTL_PWR_DN;
-	cfg |= (PCS_MRX_CTL_RST_AN | PCS_MRX_CTL_AN_EN);
+	cfg |= PCS_MRX_CTL_RST_AN;
+	if (lmac->phydev) {
+		cfg |= PCS_MRX_CTL_AN_EN;
+	} else {
+		/* In scenarios where PHY driver is not present or it's a
+		 * non-standard PHY, FW sets AN_EN to inform Linux driver
+		 * to do auto-neg and link polling or not.
+		 */
+		if (cfg & PCS_MRX_CTL_AN_EN)
+			lmac->autoneg = true;
+	}
 	bgx_reg_write(bgx, lmacid, BGX_GMP_PCS_MRX_CTL, cfg);
 
 	if (lmac->lmac_type == BGX_MODE_QSGMII) {
@@ -472,7 +483,7 @@ static int bgx_lmac_sgmii_init(struct bgx *bgx, struct lmac *lmac)
 		return 0;
 	}
 
-	if (lmac->lmac_type == BGX_MODE_SGMII) {
+	if ((lmac->lmac_type == BGX_MODE_SGMII) && lmac->phydev) {
 		if (bgx_poll_reg(bgx, lmacid, BGX_GMP_PCS_MRX_STATUS,
 				 PCS_MRX_STATUS_AN_CPT, false)) {
 			dev_err(&bgx->pdev->dev, "BGX AN_CPT not completed\n");
@@ -678,12 +689,71 @@ static int bgx_xaui_check_link(struct lmac *lmac)
 	return -1;
 }
 
+static void bgx_poll_for_sgmii_link(struct lmac *lmac)
+{
+	u64 pcs_link, an_result;
+	u8 speed;
+
+	pcs_link = bgx_reg_read(lmac->bgx, lmac->lmacid,
+				BGX_GMP_PCS_MRX_STATUS);
+
+	/*Link state bit is sticky, read it again*/
+	if (!(pcs_link & PCS_MRX_STATUS_LINK))
+		pcs_link = bgx_reg_read(lmac->bgx, lmac->lmacid,
+					BGX_GMP_PCS_MRX_STATUS);
+
+	if (bgx_poll_reg(lmac->bgx, lmac->lmacid, BGX_GMP_PCS_MRX_STATUS,
+			 PCS_MRX_STATUS_AN_CPT, false)) {
+		lmac->link_up = false;
+		lmac->last_speed = SPEED_UNKNOWN;
+		lmac->last_duplex = DUPLEX_UNKNOWN;
+		goto next_poll;
+	}
+
+	lmac->link_up = ((pcs_link & PCS_MRX_STATUS_LINK) != 0) ? true : false;
+	an_result = bgx_reg_read(lmac->bgx, lmac->lmacid,
+				 BGX_GMP_PCS_ANX_AN_RESULTS);
+
+	speed = (an_result >> 3) & 0x3;
+	lmac->last_duplex = (an_result >> 1) & 0x1;
+	switch (speed) {
+	case 0:
+		lmac->last_speed = 10;
+		break;
+	case 1:
+		lmac->last_speed = 100;
+		break;
+	case 2:
+		lmac->last_speed = 1000;
+		break;
+	default:
+		lmac->link_up = false;
+		lmac->last_speed = SPEED_UNKNOWN;
+		lmac->last_duplex = DUPLEX_UNKNOWN;
+		break;
+	}
+
+next_poll:
+
+	if (lmac->last_link != lmac->link_up) {
+		if (lmac->link_up)
+			bgx_sgmii_change_link_state(lmac);
+		lmac->last_link = lmac->link_up;
+	}
+
+	queue_delayed_work(lmac->check_link, &lmac->dwork, HZ * 3);
+}
+
 static void bgx_poll_for_link(struct work_struct *work)
 {
 	struct lmac *lmac;
 	u64 spu_link, smu_link;
 
 	lmac = container_of(work, struct lmac, dwork.work);
+	if (lmac->is_sgmii) {
+		bgx_poll_for_sgmii_link(lmac);
+		return;
+	}
 
 	/* Receive link is latching low. Force it high and verify it */
 	bgx_reg_modify(lmac->bgx, lmac->lmacid,
@@ -775,9 +845,21 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
 	    (lmac->lmac_type != BGX_MODE_XLAUI) &&
 	    (lmac->lmac_type != BGX_MODE_40G_KR) &&
 	    (lmac->lmac_type != BGX_MODE_10G_KR)) {
-		if (!lmac->phydev)
-			return -ENODEV;
-
+		if (!lmac->phydev) {
+			if (lmac->autoneg) {
+				bgx_reg_write(bgx, lmacid,
+					      BGX_GMP_PCS_LINKX_TIMER,
+					      PCS_LINKX_TIMER_COUNT);
+				goto poll;
+			} else {
+				/* Default to below link speed and duplex */
+				lmac->link_up = true;
+				lmac->last_speed = 1000;
+				lmac->last_duplex = 1;
+				bgx_sgmii_change_link_state(lmac);
+				return 0;
+			}
+		}
 		lmac->phydev->dev_flags = 0;
 
 		if (phy_connect_direct(&lmac->netdev, lmac->phydev,
@@ -786,15 +868,17 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
 			return -ENODEV;
 
 		phy_start_aneg(lmac->phydev);
-	} else {
-		lmac->check_link = alloc_workqueue("check_link", WQ_UNBOUND |
-						   WQ_MEM_RECLAIM, 1);
-		if (!lmac->check_link)
-			return -ENOMEM;
-		INIT_DELAYED_WORK(&lmac->dwork, bgx_poll_for_link);
-		queue_delayed_work(lmac->check_link, &lmac->dwork, 0);
+		return 0;
 	}
 
+poll:
+	lmac->check_link = alloc_workqueue("check_link", WQ_UNBOUND |
+					   WQ_MEM_RECLAIM, 1);
+	if (!lmac->check_link)
+		return -ENOMEM;
+	INIT_DELAYED_WORK(&lmac->dwork, bgx_poll_for_link);
+	queue_delayed_work(lmac->check_link, &lmac->dwork, 0);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index c18ebfeb2039..a60f189429bb 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -153,10 +153,15 @@
 #define	 PCS_MRX_CTL_LOOPBACK1			BIT_ULL(14)
 #define	 PCS_MRX_CTL_RESET			BIT_ULL(15)
 #define BGX_GMP_PCS_MRX_STATUS		0x30008
+#define	 PCS_MRX_STATUS_LINK			BIT_ULL(2)
 #define	 PCS_MRX_STATUS_AN_CPT			BIT_ULL(5)
+#define BGX_GMP_PCS_ANX_ADV		0x30010
 #define BGX_GMP_PCS_ANX_AN_RESULTS	0x30020
+#define BGX_GMP_PCS_LINKX_TIMER		0x30040
+#define PCS_LINKX_TIMER_COUNT			0x1E84
 #define BGX_GMP_PCS_SGM_AN_ADV		0x30068
 #define BGX_GMP_PCS_MISCX_CTL		0x30078
+#define  PCS_MISC_CTL_MODE			BIT_ULL(8)
 #define  PCS_MISC_CTL_DISP_EN			BIT_ULL(13)
 #define  PCS_MISC_CTL_GMX_ENO			BIT_ULL(11)
 #define  PCS_MISC_CTL_SAMP_PT_MASK	0x7Full

From 49d29a077af8d2ee3b291ccd8d053541bebe09d7 Mon Sep 17 00:00:00 2001
From: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Date: Wed, 1 Feb 2017 10:35:08 +0100
Subject: [PATCH 286/322] drm: vc4: adapt to new behaviour of drm_crtc.c

When drm_crtc_init_with_planes() was orignally added
(in drm_crtc.c, e13161af80c185ecd8dc4641d0f5df58f9e3e0af
drm: Add drm_crtc_init_with_planes() (v2)), it only checked for "primary"
being non-null. If that was the case, it modified primary->possible_crtcs.

Then, when support for cursor planes was added
(fc1d3e44ef7c1db93384150fdbf8948dcf949f15 drm: Allow drivers to register
cursor planes with crtc), the same behaviour was implemented for cursor
planes.

vc4_plane_init() since its inception has passed 0xff as "possible_crtcs"
parameter to drm_universal_plane_init(). With a change in drm_crtc.c
(7abc7d47510c75dd984380ebf819616e574c9604 drm: don't override
possible_crtcs for primary/cursor planes) passing 0xff results in primary's
possible_crtcs set to 0xff (cursor was updated manually by vc4_crtc.c).
Consequently, it would be allowed to use the primary plane from CRTC 1 (for
example) on CRTC 0, which would result in the overlay and cursors being
buried.

Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Link: http://patchwork.freedesktop.org/patch/msgid/1485941708-27892-1-git-send-email-andrzej.p@samsung.com
Fixes: 7abc7d47510c ("drm: don't override possible_crtcs for primary/cursor planes")
---
 drivers/gpu/drm/vc4/vc4_plane.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 881bf489478b..686cdd3c86f2 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -858,7 +858,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
 		}
 	}
 	plane = &vc4_plane->base;
-	ret = drm_universal_plane_init(dev, plane, 0xff,
+	ret = drm_universal_plane_init(dev, plane, 0,
 				       &vc4_plane_funcs,
 				       formats, num_formats,
 				       type, NULL);

From ed5bd7dc88edf4a4a9c67130742b1b59aa017a5f Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Wed, 8 Feb 2017 14:30:50 -0800
Subject: [PATCH 287/322] kernel/ucount.c: mark user_header with
 kmemleak_ignore()

The user_header gets caught by kmemleak with the following splat as
missing a free:

  unreferenced object 0xffff99667a733d80 (size 96):
  comm "swapper/0", pid 1, jiffies 4294892317 (age 62191.468s)
  hex dump (first 32 bytes):
    a0 b6 92 b4 ff ff ff ff 00 00 00 00 01 00 00 00  ................
    01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
     kmemleak_alloc+0x4a/0xa0
     __kmalloc+0x144/0x260
     __register_sysctl_table+0x54/0x5e0
     register_sysctl+0x1b/0x20
     user_namespace_sysctl_init+0x17/0x34
     do_one_initcall+0x52/0x1a0
     kernel_init_freeable+0x173/0x200
     kernel_init+0xe/0x100
     ret_from_fork+0x2c/0x40

The BUG_ON()s are intended to crash so no need to clean up after
ourselves on error there.  This is also a kernel/ subsys_init() we don't
need a respective exit call here as this is never modular, so just white
list it.

Link: http://lkml.kernel.org/r/20170203211404.31458-1-mcgrof@kernel.org
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Nikolay Borisov <n.borisov.lkml@gmail.com>
Cc: Serge Hallyn <serge@hallyn.com>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/ucount.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/ucount.c b/kernel/ucount.c
index 4bbd38ec3788..95c6336fc2b3 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -227,11 +227,10 @@ static __init int user_namespace_sysctl_init(void)
 	 * properly.
 	 */
 	user_header = register_sysctl("user", empty);
+	kmemleak_ignore(user_header);
 	BUG_ON(!user_header);
 	BUG_ON(!setup_userns_sysctls(&init_user_ns));
 #endif
 	return 0;
 }
 subsys_initcall(user_namespace_sysctl_init);
-
-

From 0911d0041c22922228ca52a977d7b0b0159fee4b Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 8 Feb 2017 14:30:53 -0800
Subject: [PATCH 288/322] mm: avoid returning VM_FAULT_RETRY from
 ->page_mkwrite handlers

Some ->page_mkwrite handlers may return VM_FAULT_RETRY as its return
code (GFS2 or Lustre can definitely do this).  However VM_FAULT_RETRY
from ->page_mkwrite is completely unhandled by the mm code and results
in locking and writeably mapping the page which definitely is not what
the caller wanted.

Fix Lustre and block_page_mkwrite_ret() used by other filesystems
(notably GFS2) to return VM_FAULT_NOPAGE instead which results in
bailing out from the fault code, the CPU then retries the access, and we
fault again effectively doing what the handler wanted.

Link: http://lkml.kernel.org/r/20170203150729.15863-1-jack@suse.cz
Signed-off-by: Jan Kara <jack@suse.cz>
Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Reviewed-by: Jinshan Xiong <jinshan.xiong@intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/staging/lustre/lustre/llite/llite_mmap.c | 4 +---
 include/linux/buffer_head.h                      | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c
index ee01f20d8b11..9afa6bec3e6f 100644
--- a/drivers/staging/lustre/lustre/llite/llite_mmap.c
+++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c
@@ -390,15 +390,13 @@ static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 		result = VM_FAULT_LOCKED;
 		break;
 	case -ENODATA:
+	case -EAGAIN:
 	case -EFAULT:
 		result = VM_FAULT_NOPAGE;
 		break;
 	case -ENOMEM:
 		result = VM_FAULT_OOM;
 		break;
-	case -EAGAIN:
-		result = VM_FAULT_RETRY;
-		break;
 	default:
 		result = VM_FAULT_SIGBUS;
 		break;
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index d67ab83823ad..79591c3660cc 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -243,12 +243,10 @@ static inline int block_page_mkwrite_return(int err)
 {
 	if (err == 0)
 		return VM_FAULT_LOCKED;
-	if (err == -EFAULT)
+	if (err == -EFAULT || err == -EAGAIN)
 		return VM_FAULT_NOPAGE;
 	if (err == -ENOMEM)
 		return VM_FAULT_OOM;
-	if (err == -EAGAIN)
-		return VM_FAULT_RETRY;
 	/* -ENOSPC, -EDQUOT, -EIO ... */
 	return VM_FAULT_SIGBUS;
 }

From 4d59b6ccf000862beed6fc0765d3209f98a8d8a2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 8 Feb 2017 14:30:56 -0800
Subject: [PATCH 289/322] cpumask: use nr_cpumask_bits for parsing functions

Commit 513e3d2d11c9 ("cpumask: always use nr_cpu_ids in formatting and
parsing functions") converted both cpumask printing and parsing
functions to use nr_cpu_ids instead of nr_cpumask_bits.  While this was
okay for the printing functions as it just picked one of the two output
formats that we were alternating between depending on a kernel config,
doing the same for parsing wasn't okay.

nr_cpumask_bits can be either nr_cpu_ids or NR_CPUS.  We can always use
nr_cpu_ids but that is a variable while NR_CPUS is a constant, so it can
be more efficient to use NR_CPUS when we can get away with it.
Converting the printing functions to nr_cpu_ids makes sense because it
affects how the masks get presented to userspace and doesn't break
anything; however, using nr_cpu_ids for parsing functions can
incorrectly leave the higher bits uninitialized while reading in these
masks from userland.  As all testing and comparison functions use
nr_cpumask_bits which can be larger than nr_cpu_ids, the parsed cpumasks
can erroneously yield false negative results.

This made the taskstats interface incorrectly return -EINVAL even when
the inputs were correct.

Fix it by restoring the parse functions to use nr_cpumask_bits instead
of nr_cpu_ids.

Link: http://lkml.kernel.org/r/20170206182442.GB31078@htj.duckdns.org
Fixes: 513e3d2d11c9 ("cpumask: always use nr_cpu_ids in formatting and parsing functions")
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Martin Steigerwald <martin.steigerwald@teamix.de>
Debugged-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Cc: <stable@vger.kernel.org>	[4.0+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpumask.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index c717f5ea88cb..b3d2c1a89ac4 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -560,7 +560,7 @@ static inline void cpumask_copy(struct cpumask *dstp,
 static inline int cpumask_parse_user(const char __user *buf, int len,
 				     struct cpumask *dstp)
 {
-	return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpu_ids);
+	return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits);
 }
 
 /**
@@ -575,7 +575,7 @@ static inline int cpumask_parselist_user(const char __user *buf, int len,
 				     struct cpumask *dstp)
 {
 	return bitmap_parselist_user(buf, len, cpumask_bits(dstp),
-				     nr_cpu_ids);
+				     nr_cpumask_bits);
 }
 
 /**
@@ -590,7 +590,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp)
 	char *nl = strchr(buf, '\n');
 	unsigned int len = nl ? (unsigned int)(nl - buf) : strlen(buf);
 
-	return bitmap_parse(buf, len, cpumask_bits(dstp), nr_cpu_ids);
+	return bitmap_parse(buf, len, cpumask_bits(dstp), nr_cpumask_bits);
 }
 
 /**
@@ -602,7 +602,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp)
  */
 static inline int cpulist_parse(const char *buf, struct cpumask *dstp)
 {
-	return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpu_ids);
+	return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpumask_bits);
 }
 
 /**

From a810007afe239d59c1115fcaa06eb5b480f876e9 Mon Sep 17 00:00:00 2001
From: Sean Rees <sean@erifax.org>
Date: Wed, 8 Feb 2017 14:30:59 -0800
Subject: [PATCH 290/322] mm/slub.c: fix random_seq offset destruction

Commit 210e7a43fa90 ("mm: SLUB freelist randomization") broke USB hub
initialisation as described in

  https://bugzilla.kernel.org/show_bug.cgi?id=177551.

Bail out early from init_cache_random_seq if s->random_seq is already
initialised.  This prevents destroying the previously computed
random_seq offsets later in the function.

If the offsets are destroyed, then shuffle_freelist will truncate
page->freelist to just the first object (orphaning the rest).

Fixes: 210e7a43fa90 ("mm: SLUB freelist randomization")
Link: http://lkml.kernel.org/r/20170207140707.20824-1-sean@erifax.org
Signed-off-by: Sean Rees <sean@erifax.org>
Reported-by: <userwithuid@gmail.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slub.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mm/slub.c b/mm/slub.c
index 7aa6f433f4de..7ec0a965c6a3 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1422,6 +1422,10 @@ static int init_cache_random_seq(struct kmem_cache *s)
 	int err;
 	unsigned long i, count = oo_objects(s->oo);
 
+	/* Bailout if already initialised */
+	if (s->random_seq)
+		return 0;
+
 	err = cache_random_seq_create(s, count, GFP_KERNEL);
 	if (err) {
 		pr_err("SLUB: Unable to initialize free list for %s\n",

From 3b802c9455f973fa786eafb4d5bd4634a7dd5130 Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Tue, 7 Feb 2017 16:23:31 -0800
Subject: [PATCH 291/322] Revert "hwrng: core - zeroize buffers with random
 data"

This reverts commit 2cc751545854d7bd7eedf4d7e377bb52e176cd07.

With this commit in place I get on a Cavium ThunderX (arm64) system:

$ if=/dev/hwrng bs=256 count=1 | od -t x1 -A x -v > rng-bad.txt
1+0 records in
1+0 records out
256 bytes (256 B) copied, 9.1171e-05 s, 2.8 MB/s
$ dd if=/dev/hwrng bs=256 count=1 | od -t x1 -A x -v >> rng-bad.txt
1+0 records in
1+0 records out
256 bytes (256 B) copied, 9.6141e-05 s, 2.7 MB/s
$ cat rng-bad.txt
000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
000010 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
000020 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
000030 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
000040 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
000050 00 00 00 00 37 20 46 ae d0 fc 1c 55 25 6e b0 b8
000060 7c 7e d7 d4 00 0f 6f b2 91 1e 30 a8 fa 3e 52 0e
000070 06 2d 53 30 be a1 20 0f aa 56 6e 0e 44 6e f4 35
000080 b7 6a fe d2 52 70 7e 58 56 02 41 ea d1 9c 6a 6a
000090 d1 bd d8 4c da 35 45 ef 89 55 fc 59 d5 cd 57 ba
0000a0 4e 3e 02 1c 12 76 43 37 23 e1 9f 7a 9f 9e 99 24
0000b0 47 b2 de e3 79 85 f6 55 7e ad 76 13 4f a0 b5 41
0000c0 c6 92 42 01 d9 12 de 8f b4 7b 6e ae d7 24 fc 65
0000d0 4d af 0a aa 36 d9 17 8d 0e 8b 7a 3b b6 5f 96 47
0000e0 46 f7 d8 ce 0b e8 3e c6 13 a6 2c b6 d6 cc 17 26
0000f0 e3 c3 17 8e 9e 45 56 1e 41 ef 29 1a a8 65 c8 3a
000100
000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
000010 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
000020 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
000030 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
000040 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
000050 00 00 00 00 f4 90 65 aa 8b f2 5e 31 01 53 b4 d4
000060 06 c0 23 a2 99 3d 01 e4 b0 c1 b1 55 0f 80 63 cf
000070 33 24 d8 3a 1d 5e cd 2c ba c0 d0 18 6f bc 97 46
000080 1e 19 51 b1 90 15 af 80 5e d1 08 0d eb b0 6c ab
000090 6a b4 fe 62 37 c5 e1 ee 93 c3 58 78 91 2a d5 23
0000a0 63 50 eb 1f 3b 84 35 18 cf b2 a4 b8 46 69 9e cf
0000b0 0c 95 af 03 51 45 a8 42 f1 64 c9 55 fc 69 76 63
0000c0 98 9d 82 fa 76 85 24 da 80 07 29 fe 4e 76 0c 61
0000d0 ff 23 94 4f c8 5c ce 0b 50 e8 31 bc 9d ce f4 ca
0000e0 be ca 28 da e6 fa cc 64 1c ec a8 41 db fe 42 bd
0000f0 a0 e2 4b 32 b4 52 ba 03 70 8e c1 8e d0 50 3a c6
000100

To my untrained mental entropy detector, the first several bytes of
each read from /dev/hwrng seem to not be very random (i.e. all zero).

When I revert the patch (apply this patch), I get back to what we have
in v4.9, which looks like (much more random appearing):

$ dd if=/dev/hwrng bs=256 count=1 | od -t x1 -A x -v > rng-good.txt
1+0 records in
1+0 records out
256 bytes (256 B) copied, 0.000252233 s, 1.0 MB/s
$ dd if=/dev/hwrng bs=256 count=1 | od -t x1 -A x -v >> rng-good.txt
1+0 records in
1+0 records out
256 bytes (256 B) copied, 0.000113571 s, 2.3 MB/s
$ cat rng-good.txt
000000 75 d1 2d 19 68 1f d2 26 a1 49 22 61 66 e8 09 e5
000010 e0 4e 10 d0 1a 2c 45 5d 59 04 79 8e e2 b7 2c 2e
000020 e8 ad da 34 d5 56 51 3d 58 29 c7 7a 8e ed 22 67
000030 f9 25 b9 fb c6 b7 9c 35 1f 84 21 35 c1 1d 48 34
000040 45 7c f6 f1 57 63 1a 88 38 e8 81 f0 a9 63 ad 0e
000050 be 5d 3e 74 2e 4e cb 36 c2 01 a8 14 e1 38 e1 bb
000060 23 79 09 56 77 19 ff 98 e8 44 f3 27 eb 6e 0a cb
000070 c9 36 e3 2a 96 13 07 a0 90 3f 3b bd 1d 04 1d 67
000080 be 33 14 f8 02 c2 a4 02 ab 8b 5b 74 86 17 f0 5e
000090 a1 d7 aa ef a6 21 7b 93 d1 85 86 eb 4e 8c d0 4c
0000a0 56 ac e4 45 27 44 84 9f 71 db 36 b9 f7 47 d7 b3
0000b0 f2 9c 62 41 a3 46 2b 5b e3 80 63 a4 35 b5 3c f4
0000c0 bc 1e 3a ad e4 59 4a 98 6c e8 8d ff 1b 16 f8 52
0000d0 05 5c 2f 52 2a 0f 45 5b 51 fb 93 97 a4 49 4f 06
0000e0 f3 a0 d1 1e ba 3d ed a7 60 8f bb 84 2c 21 94 2d
0000f0 b3 66 a6 61 1e 58 30 24 85 f8 c8 18 c3 77 00 22
000100
000000 73 ca cc a1 d9 bb 21 8d c3 5c f3 ab 43 6d a7 a4
000010 4a fd c5 f4 9c ba 4a 0f b1 2e 19 15 4e 84 26 e0
000020 67 c9 f2 52 4d 65 1f 81 b7 8b 6d 2b 56 7b 99 75
000030 2e cd d0 db 08 0c 4b df f3 83 c6 83 00 2e 2b b8
000040 0f af 61 1d f2 02 35 74 b5 a4 6f 28 f3 a1 09 12
000050 f2 53 b5 d2 da 45 01 e5 12 d6 46 f8 0b db ed 51
000060 7b f4 0d 54 e0 63 ea 22 e2 1d d0 d6 d0 e7 7e e0
000070 93 91 fb 87 95 43 41 28 de 3d 8b a3 a8 8f c4 9e
000080 30 95 12 7a b2 27 28 ff 37 04 2e 09 7c dd 7c 12
000090 e1 50 60 fb 6d 5f a8 65 14 40 89 e3 4c d2 87 8f
0000a0 34 76 7e 66 7a 8e 6b a3 fc cf 38 52 2e f9 26 f0
0000b0 98 63 15 06 34 99 b2 88 4f aa d8 14 88 71 f1 81
0000c0 be 51 11 2b f4 7e a0 1e 12 b2 44 2e f6 8d 84 ea
0000d0 63 82 2b 66 b3 9a fd 08 73 5a c2 cc ab 5a af b1
0000e0 88 e3 a6 80 4b fc db ed 71 e0 ae c0 0a a4 8c 35
0000f0 eb 89 f9 8a 4b 52 59 6f 09 7c 01 3f 56 e7 c7 bf
000100

Signed-off-by: David Daney <david.daney@cavium.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/hw_random/core.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 6ce5ce8be2f2..87fba424817e 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -92,7 +92,6 @@ static void add_early_randomness(struct hwrng *rng)
 	mutex_unlock(&reading_mutex);
 	if (bytes_read > 0)
 		add_device_randomness(rng_buffer, bytes_read);
-	memset(rng_buffer, 0, size);
 }
 
 static inline void cleanup_rng(struct kref *kref)
@@ -288,7 +287,6 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf,
 		}
 	}
 out:
-	memset(rng_buffer, 0, rng_buffer_size());
 	return ret ? : err;
 
 out_unlock_reading:
@@ -427,7 +425,6 @@ static int hwrng_fillfn(void *unused)
 		/* Outside lock, sure, but y'know: randomness. */
 		add_hwgenerator_randomness((void *)rng_fillbuf, rc,
 					   rc * current_quality * 8 >> 10);
-		memset(rng_fillbuf, 0, rng_buffer_size());
 	}
 	hwrng_fill = NULL;
 	return 0;

From d966564fcdc19e13eb6ba1fbe6b8101070339c3d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 8 Feb 2017 18:08:29 -0800
Subject: [PATCH 292/322] Revert "x86/ioapic: Restore IO-APIC irq_chip
 retrigger callback"

This reverts commit 020eb3daaba2857b32c4cf4c82f503d6a00a67de.

Gabriel C reports that it causes his machine to not boot, and we haven't
tracked down the reason for it yet.  Since the bug it fixes has been
around for a longish time, we're better off reverting the fix for now.

Gabriel says:
 "It hangs early and freezes with a lot RCU warnings.

  I bisected it down to :

  > Ruslan Ruslichenko (1):
  >       x86/ioapic: Restore IO-APIC irq_chip retrigger callback

  Reverting this one fixes the problem for me..

  The box is a PRIMERGY TX200 S5 , 2 socket , 2 x E5520 CPU(s) installed"

and Ruslan and Thomas are currently stumped.

Reported-and-bisected-by: Gabriel C <nix.or.die@gmail.com>
Cc: Ruslan Ruslichenko <rruslich@cisco.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@kernel.org   # for the backport of the original commit
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/apic/io_apic.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 52f352b063fd..bd6b8c270c24 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1875,7 +1875,6 @@ static struct irq_chip ioapic_chip __read_mostly = {
 	.irq_ack		= irq_chip_ack_parent,
 	.irq_eoi		= ioapic_ack_level,
 	.irq_set_affinity	= ioapic_set_affinity,
-	.irq_retrigger		= irq_chip_retrigger_hierarchy,
 	.flags			= IRQCHIP_SKIP_SET_WAKE,
 };
 
@@ -1887,7 +1886,6 @@ static struct irq_chip ioapic_ir_chip __read_mostly = {
 	.irq_ack		= irq_chip_ack_parent,
 	.irq_eoi		= ioapic_ir_ack_level,
 	.irq_set_affinity	= ioapic_set_affinity,
-	.irq_retrigger		= irq_chip_retrigger_hierarchy,
 	.flags			= IRQCHIP_SKIP_SET_WAKE,
 };
 

From 90c1e3c2fafec57fcb55b5d69bcf293b1a5fc8b3 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 6 Feb 2017 13:05:16 +1100
Subject: [PATCH 293/322] powerpc/mm/radix: Update ERAT flushes when
 invalidating TLB

Three tiny changes to the ERAT flushing logic: First don't make
it depend on DD1. It hasn't been decided yet but we might run
DD2 in a mode that also requires explicit flushes for performance
reasons so make it unconditional. We also add a missing isync, and
finally remove the flush from _tlbiel_va as it is only necessary
for congruence-class invalidations (PID, LPID and full TLB), not
targetted invalidations.

Fixes: 96ed1fe511a8 ("powerpc/mm/radix: Invalidate ERAT on tlbiel for POWER9 DD1")
Cc: stable@vger.kernel.org # v4.9+
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/tlb-radix.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 61b79119065f..952713d6cf04 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -50,9 +50,7 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
 	for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) {
 		__tlbiel_pid(pid, set, ric);
 	}
-	if (cpu_has_feature(CPU_FTR_POWER9_DD1))
-		asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
-	return;
+	asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
 }
 
 static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
@@ -85,8 +83,6 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid,
 	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
 	asm volatile("ptesync": : :"memory");
-	if (cpu_has_feature(CPU_FTR_POWER9_DD1))
-		asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
 }
 
 static inline void _tlbie_va(unsigned long va, unsigned long pid,

From 9b256714979fad61ae11d90b53cf67dd5e6484eb Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 7 Feb 2017 11:35:31 +1100
Subject: [PATCH 294/322] powerpc/powernv: Fix CPU hotplug to handle waking on
 HVI

The IPIs come in as HVI not EE, so we need to test the appropriate
SRR1 bits. The encoding is such that it won't have false positives
on P7 and P8 so we can just test it like that. We also need to handle
the icp-opal variant of the flush.

Fixes: d74361881f0d ("powerpc/xics: Add ICP OPAL backend")
Cc: stable@vger.kernel.org # v4.8+
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/reg.h       |  3 ++-
 arch/powerpc/include/asm/xics.h      |  1 +
 arch/powerpc/platforms/powernv/smp.c | 12 ++++++++++--
 arch/powerpc/sysdev/xics/icp-opal.c  | 29 ++++++++++++++++++++++++++++
 4 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 0d4531aa2052..dff79798903d 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -649,9 +649,10 @@
 #define   SRR1_ISI_N_OR_G	0x10000000 /* ISI: Access is no-exec or G */
 #define   SRR1_ISI_PROT		0x08000000 /* ISI: Other protection fault */
 #define   SRR1_WAKEMASK		0x00380000 /* reason for wakeup */
-#define   SRR1_WAKEMASK_P8	0x003c0000 /* reason for wakeup on POWER8 */
+#define   SRR1_WAKEMASK_P8	0x003c0000 /* reason for wakeup on POWER8 and 9 */
 #define   SRR1_WAKESYSERR	0x00300000 /* System error */
 #define   SRR1_WAKEEE		0x00200000 /* External interrupt */
+#define   SRR1_WAKEHVI		0x00240000 /* Hypervisor Virtualization Interrupt (P9) */
 #define   SRR1_WAKEMT		0x00280000 /* mtctrl */
 #define	  SRR1_WAKEHMI		0x00280000 /* Hypervisor maintenance */
 #define   SRR1_WAKEDEC		0x00180000 /* Decrementer interrupt */
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index f0b238516e9b..e0b9e576905a 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -44,6 +44,7 @@ static inline int icp_hv_init(void) { return -ENODEV; }
 
 #ifdef CONFIG_PPC_POWERNV
 extern int icp_opal_init(void);
+extern void icp_opal_flush_interrupt(void);
 #else
 static inline int icp_opal_init(void) { return -ENODEV; }
 #endif
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index c789258ae1e1..eec0e8d0454d 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -155,8 +155,10 @@ static void pnv_smp_cpu_kill_self(void)
 		wmask = SRR1_WAKEMASK_P8;
 
 	idle_states = pnv_get_supported_cpuidle_states();
+
 	/* We don't want to take decrementer interrupts while we are offline,
-	 * so clear LPCR:PECE1. We keep PECE2 enabled.
+	 * so clear LPCR:PECE1. We keep PECE2 (and LPCR_PECE_HVEE on P9)
+	 * enabled as to let IPIs in.
 	 */
 	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
 
@@ -206,8 +208,12 @@ static void pnv_smp_cpu_kill_self(void)
 		 * contains 0.
 		 */
 		if (((srr1 & wmask) == SRR1_WAKEEE) ||
+		    ((srr1 & wmask) == SRR1_WAKEHVI) ||
 		    (local_paca->irq_happened & PACA_IRQ_EE)) {
-			icp_native_flush_interrupt();
+			if (cpu_has_feature(CPU_FTR_ARCH_300))
+				icp_opal_flush_interrupt();
+			else
+				icp_native_flush_interrupt();
 		} else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
 			unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
 			asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
@@ -221,6 +227,8 @@ static void pnv_smp_cpu_kill_self(void)
 		if (srr1 && !generic_check_cpu_restart(cpu))
 			DBG("CPU%d Unexpected exit while offline !\n", cpu);
 	}
+
+	/* Re-enable decrementer interrupts */
 	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_PECE1);
 	DBG("CPU%d coming online...\n", cpu);
 }
diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c
index 60c57657c772..c96c0cb95d87 100644
--- a/arch/powerpc/sysdev/xics/icp-opal.c
+++ b/arch/powerpc/sysdev/xics/icp-opal.c
@@ -132,6 +132,35 @@ static irqreturn_t icp_opal_ipi_action(int irq, void *dev_id)
 	return smp_ipi_demux();
 }
 
+/*
+ * Called when an interrupt is received on an off-line CPU to
+ * clear the interrupt, so that the CPU can go back to nap mode.
+ */
+void icp_opal_flush_interrupt(void)
+{
+	unsigned int xirr;
+	unsigned int vec;
+
+	do {
+		xirr = icp_opal_get_xirr();
+		vec = xirr & 0x00ffffff;
+		if (vec == XICS_IRQ_SPURIOUS)
+			break;
+		if (vec == XICS_IPI) {
+			/* Clear pending IPI */
+			int cpu = smp_processor_id();
+			kvmppc_set_host_ipi(cpu, 0);
+			opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff);
+		} else {
+			pr_err("XICS: hw interrupt 0x%x to offline cpu, "
+			       "disabling\n", vec);
+			xics_mask_unknown_vec(vec);
+		}
+
+		/* EOI the interrupt */
+	} while (opal_int_eoi(xirr) > 0);
+}
+
 #endif /* CONFIG_SMP */
 
 static const struct icp_ops icp_opal_ops = {

From f83e6862047e1e371bdc5d512dd6cabe8a3965b8 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 7 Feb 2017 11:35:36 +1100
Subject: [PATCH 295/322] powerpc/powernv: Properly set "host-ipi" on IPIs

Otherwise KVM will fail to pass them through to the host

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/sysdev/xics/icp-opal.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c
index c96c0cb95d87..f9670eabfcfa 100644
--- a/arch/powerpc/sysdev/xics/icp-opal.c
+++ b/arch/powerpc/sysdev/xics/icp-opal.c
@@ -120,14 +120,16 @@ static void icp_opal_cause_ipi(int cpu, unsigned long data)
 {
 	int hw_cpu = get_hard_smp_processor_id(cpu);
 
+	kvmppc_set_host_ipi(cpu, 1);
 	opal_int_set_mfrr(hw_cpu, IPI_PRIORITY);
 }
 
 static irqreturn_t icp_opal_ipi_action(int irq, void *dev_id)
 {
-	int hw_cpu = hard_smp_processor_id();
+	int cpu = smp_processor_id();
 
-	opal_int_set_mfrr(hw_cpu, 0xff);
+	kvmppc_set_host_ipi(cpu, 0);
+	opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff);
 
 	return smp_ipi_demux();
 }

From af677166cf63c179dc2485053166e02c4aea01eb Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Thu, 9 Feb 2017 09:20:54 +0800
Subject: [PATCH 296/322] ALSA: hda - adding a new NV HDMI/DP codec ID in the
 driver

Without this change, the HDMI/DP codec will be recognised as a
generic codec, and there is no sound when playing through this codec.

As suggested by NVidia side, after adding the new ID in the driver,
the sound playing works well.

Cc: <stable@vger.kernel.org>
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_hdmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index cf9bc042fe96..3fc201c3b95a 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -3639,6 +3639,7 @@ HDA_CODEC_ENTRY(0x10de0070, "GPU 70 HDMI/DP",	patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de0071, "GPU 71 HDMI/DP",	patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de0072, "GPU 72 HDMI/DP",	patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de007d, "GPU 7d HDMI/DP",	patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de0080, "GPU 80 HDMI/DP",	patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de0082, "GPU 82 HDMI/DP",	patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de0083, "GPU 83 HDMI/DP",	patch_nvhdmi),
 HDA_CODEC_ENTRY(0x10de8001, "MCP73 HDMI",	patch_nvhdmi_2ch),

From f43128c75202f29ee71aa83e6c320a911137c189 Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda <ricardo.ribalda@gmail.com>
Date: Fri, 27 Jan 2017 15:59:30 +0100
Subject: [PATCH 297/322] i2c: piix4: Fix request_region size

Since '701dc207bf55 ("i2c: piix4: Avoid race conditions with IMC")' we
are using the SMBSLVCNT register at offset 0x8. We need to request it.

Fixes: 701dc207bf55 ("i2c: piix4: Avoid race conditions with IMC")
Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Signed-off-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-piix4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
index e34d82e79b98..73cc6799cc59 100644
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c
@@ -58,7 +58,7 @@
 #define SMBSLVDAT	(0xC + piix4_smba)
 
 /* count for request_region */
-#define SMBIOSIZE	8
+#define SMBIOSIZE	9
 
 /* PCI Address Constants */
 #define SMBBA		0x090

From bbb27fc33d44e7b8d96369810654df4ee1837566 Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda <ricardo.ribalda@gmail.com>
Date: Thu, 2 Feb 2017 20:15:16 +0100
Subject: [PATCH 298/322] i2c: piix4: Request the SMBUS semaphore inside the
 mutex

SMBSLVCNT must be protected with the piix4_mutex_sb800 in order to avoid
multiple buses accessing to the semaphore at the same time.

Fixes: 701dc207bf55 ("i2c: piix4: Avoid race conditions with IMC")
Reported-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Signed-off-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-piix4.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
index 73cc6799cc59..c21ca7bf2efe 100644
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c
@@ -592,6 +592,8 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr,
 	u8 port;
 	int retval;
 
+	mutex_lock(&piix4_mutex_sb800);
+
 	/* Request the SMBUS semaphore, avoid conflicts with the IMC */
 	smbslvcnt  = inb_p(SMBSLVCNT);
 	do {
@@ -605,10 +607,10 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr,
 		usleep_range(1000, 2000);
 	} while (--retries);
 	/* SMBus is still owned by the IMC, we give up */
-	if (!retries)
+	if (!retries) {
+		mutex_unlock(&piix4_mutex_sb800);
 		return -EBUSY;
-
-	mutex_lock(&piix4_mutex_sb800);
+	}
 
 	outb_p(piix4_port_sel_sb800, SB800_PIIX4_SMB_IDX);
 	smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1);
@@ -623,11 +625,11 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr,
 
 	outb_p(smba_en_lo, SB800_PIIX4_SMB_IDX + 1);
 
-	mutex_unlock(&piix4_mutex_sb800);
-
 	/* Release the semaphore */
 	outb_p(smbslvcnt | 0x20, SMBSLVCNT);
 
+	mutex_unlock(&piix4_mutex_sb800);
+
 	return retval;
 }
 

From 8672aed7bd865774257efd40929702759a869329 Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Wed, 8 Feb 2017 22:44:44 -0800
Subject: [PATCH 299/322] pstore: don't OOPS when there are no ftrace zones

We'll OOPS in ramoops_get_next_prz() if the platform didn't ask for any
ftrace zones (i.e., cxt->fprzs will be NULL). Let's just skip this
entire FTRACE section if there's no 'fprzs'.

Regression seen on a coreboot/depthcharge-based Chromebook.

Fixes: 2fbea82bbb89 ("pstore: Merge per-CPU ftrace records into one")
Cc: Joel Fernandes <joelaf@google.com>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 fs/pstore/ram.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 27c059e1760a..1d887efaaf71 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -280,7 +280,7 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
 					   1, id, type, PSTORE_TYPE_PMSG, 0);
 
 	/* ftrace is last since it may want to dynamically allocate memory. */
-	if (!prz_ok(prz)) {
+	if (!prz_ok(prz) && cxt->fprzs) {
 		if (!(cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU)) {
 			prz = ramoops_get_next_prz(cxt->fprzs,
 					&cxt->ftrace_read_cnt, 1, id, type,

From 6d9f66ac7fec2a6ccd649e5909806dfe36f1fc25 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 8 Feb 2017 19:05:26 -0800
Subject: [PATCH 300/322] net: phy: Fix PHY module checks and NULL deref in
 phy_attach_direct()

The Generic PHY drivers gets assigned after we checked that the current
PHY driver is NULL, so we need to check a few things before we can
safely dereference d->driver. This would be causing a NULL deference to
occur when a system binds to the Generic PHY driver. Update
phy_attach_direct() to do the following:

- grab the driver module reference after we have assigned the Generic
  PHY drivers accordingly, and remember we came from the generic PHY
  path

- update the error path to clean up the module reference in case the
  Generic PHY probe function fails

- split the error path involving phy_detacht() to avoid double free/put
  since phy_detach() does all the clean up

- finally, have phy_detach() drop the module reference count before we
  call device_release_driver() for the Generic PHY driver case

Fixes: cafe8df8b9bc ("net: phy: Fix lack of reference count on PHY driver")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 0d8f4d3847f6..8c8e15b8739d 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -908,6 +908,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 	struct module *ndev_owner = dev->dev.parent->driver->owner;
 	struct mii_bus *bus = phydev->mdio.bus;
 	struct device *d = &phydev->mdio.dev;
+	bool using_genphy = false;
 	int err;
 
 	/* For Ethernet device drivers that register their own MDIO bus, we
@@ -920,11 +921,6 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 		return -EIO;
 	}
 
-	if (!try_module_get(d->driver->owner)) {
-		dev_err(&dev->dev, "failed to get the device driver module\n");
-		return -EIO;
-	}
-
 	get_device(d);
 
 	/* Assume that if there is no driver, that it doesn't
@@ -938,12 +934,22 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 			d->driver =
 				&genphy_driver[GENPHY_DRV_1G].mdiodrv.driver;
 
+		using_genphy = true;
+	}
+
+	if (!try_module_get(d->driver->owner)) {
+		dev_err(&dev->dev, "failed to get the device driver module\n");
+		err = -EIO;
+		goto error_put_device;
+	}
+
+	if (using_genphy) {
 		err = d->driver->probe(d);
 		if (err >= 0)
 			err = device_bind_driver(d);
 
 		if (err)
-			goto error;
+			goto error_module_put;
 	}
 
 	if (phydev->attached_dev) {
@@ -980,9 +986,14 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 	return err;
 
 error:
+	/* phy_detach() does all of the cleanup below */
 	phy_detach(phydev);
-	put_device(d);
+	return err;
+
+error_module_put:
 	module_put(d->driver->owner);
+error_put_device:
+	put_device(d);
 	if (ndev_owner != bus->owner)
 		module_put(bus->owner);
 	return err;
@@ -1045,6 +1056,8 @@ void phy_detach(struct phy_device *phydev)
 
 	phy_led_triggers_unregister(phydev);
 
+	module_put(phydev->mdio.dev.driver->owner);
+
 	/* If the device had no specific driver before (i.e. - it
 	 * was using the generic driver), we unbind the device
 	 * from the generic driver so that there's a chance a
@@ -1065,7 +1078,6 @@ void phy_detach(struct phy_device *phydev)
 	bus = phydev->mdio.bus;
 
 	put_device(&phydev->mdio.dev);
-	module_put(phydev->mdio.dev.driver->owner);
 	if (ndev_owner != bus->owner)
 		module_put(bus->owner);
 }

From 538d92912d3190a1dd809233a0d57277459f37b2 Mon Sep 17 00:00:00 2001
From: Vineeth Remanan Pillai <vineethp@amazon.com>
Date: Tue, 7 Feb 2017 18:59:01 +0000
Subject: [PATCH 301/322] xen-netfront: Rework the fix for Rx stall during OOM
 and network stress

The commit 90c311b0eeea ("xen-netfront: Fix Rx stall during network
stress and OOM") caused the refill timer to be triggerred almost on
all invocations of xennet_alloc_rx_buffers for certain workloads.
This reworks the fix by reverting to the old behaviour and taking into
consideration the skb allocation failure. Refill timer is now triggered
on insufficient requests or skb allocation failure.

Signed-off-by: Vineeth Remanan Pillai <vineethp@amazon.com>
Fixes: 90c311b0eeea (xen-netfront: Fix Rx stall during network stress and OOM)
Reported-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netfront.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 8315fe73ecd0..9dba69731f30 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -281,6 +281,7 @@ static void xennet_alloc_rx_buffers(struct netfront_queue *queue)
 {
 	RING_IDX req_prod = queue->rx.req_prod_pvt;
 	int notify;
+	int err = 0;
 
 	if (unlikely(!netif_carrier_ok(queue->info->netdev)))
 		return;
@@ -295,8 +296,10 @@ static void xennet_alloc_rx_buffers(struct netfront_queue *queue)
 		struct xen_netif_rx_request *req;
 
 		skb = xennet_alloc_one_rx_buffer(queue);
-		if (!skb)
+		if (!skb) {
+			err = -ENOMEM;
 			break;
+		}
 
 		id = xennet_rxidx(req_prod);
 
@@ -320,8 +323,13 @@ static void xennet_alloc_rx_buffers(struct netfront_queue *queue)
 
 	queue->rx.req_prod_pvt = req_prod;
 
-	/* Not enough requests? Try again later. */
-	if (req_prod - queue->rx.sring->req_prod < NET_RX_SLOTS_MIN) {
+	/* Try again later if there are not enough requests or skb allocation
+	 * failed.
+	 * Enough requests is quantified as the sum of newly created slots and
+	 * the unconsumed slots at the backend.
+	 */
+	if (req_prod - queue->rx.rsp_cons < NET_RX_SLOTS_MIN ||
+	    unlikely(err)) {
 		mod_timer(&queue->rx_refill_timer, jiffies + (HZ/10));
 		return;
 	}

From 98e3862ca2b1ae595a13805dcab4c3a6d7718f4d Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Tue, 7 Feb 2017 12:59:47 -0800
Subject: [PATCH 302/322] kcm: fix 0-length case for kcm_sendmsg()

Dmitry reported a kernel warning:

 WARNING: CPU: 3 PID: 2936 at net/kcm/kcmsock.c:627
 kcm_write_msgs+0x12e3/0x1b90 net/kcm/kcmsock.c:627
 CPU: 3 PID: 2936 Comm: a.out Not tainted 4.10.0-rc6+ #209
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
 Call Trace:
  __dump_stack lib/dump_stack.c:15 [inline]
  dump_stack+0x2ee/0x3ef lib/dump_stack.c:51
  panic+0x1fb/0x412 kernel/panic.c:179
  __warn+0x1c4/0x1e0 kernel/panic.c:539
  warn_slowpath_null+0x2c/0x40 kernel/panic.c:582
  kcm_write_msgs+0x12e3/0x1b90 net/kcm/kcmsock.c:627
  kcm_sendmsg+0x163a/0x2200 net/kcm/kcmsock.c:1029
  sock_sendmsg_nosec net/socket.c:635 [inline]
  sock_sendmsg+0xca/0x110 net/socket.c:645
  sock_write_iter+0x326/0x600 net/socket.c:848
  new_sync_write fs/read_write.c:499 [inline]
  __vfs_write+0x483/0x740 fs/read_write.c:512
  vfs_write+0x187/0x530 fs/read_write.c:560
  SYSC_write fs/read_write.c:607 [inline]
  SyS_write+0xfb/0x230 fs/read_write.c:599
  entry_SYSCALL_64_fastpath+0x1f/0xc2

when calling syscall(__NR_write, sock2, 0x208aaf27ul, 0x0ul) on a KCM
seqpacket socket. It appears that kcm_sendmsg() does not handle len==0
case correctly, which causes an empty skb is allocated and queued.
Fix this by skipping the skb allocation for len==0 case.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Tom Herbert <tom@herbertland.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/kcm/kcmsock.c | 42 +++++++++++++++++++++++-------------------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 7e08a4d3d77d..64f0e8531af0 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -929,24 +929,26 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 			goto out_error;
 	}
 
-	/* New message, alloc head skb */
-	head = alloc_skb(0, sk->sk_allocation);
-	while (!head) {
-		kcm_push(kcm);
-		err = sk_stream_wait_memory(sk, &timeo);
-		if (err)
-			goto out_error;
-
+	if (msg_data_left(msg)) {
+		/* New message, alloc head skb */
 		head = alloc_skb(0, sk->sk_allocation);
+		while (!head) {
+			kcm_push(kcm);
+			err = sk_stream_wait_memory(sk, &timeo);
+			if (err)
+				goto out_error;
+
+			head = alloc_skb(0, sk->sk_allocation);
+		}
+
+		skb = head;
+
+		/* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling
+		 * csum_and_copy_from_iter from skb_do_copy_data_nocache.
+		 */
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 
-	skb = head;
-
-	/* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling
-	 * csum_and_copy_from_iter from skb_do_copy_data_nocache.
-	 */
-	skb->ip_summed = CHECKSUM_UNNECESSARY;
-
 start:
 	while (msg_data_left(msg)) {
 		bool merge = true;
@@ -1018,10 +1020,12 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 	if (eor) {
 		bool not_busy = skb_queue_empty(&sk->sk_write_queue);
 
-		/* Message complete, queue it on send buffer */
-		__skb_queue_tail(&sk->sk_write_queue, head);
-		kcm->seq_skb = NULL;
-		KCM_STATS_INCR(kcm->stats.tx_msgs);
+		if (head) {
+			/* Message complete, queue it on send buffer */
+			__skb_queue_tail(&sk->sk_write_queue, head);
+			kcm->seq_skb = NULL;
+			KCM_STATS_INCR(kcm->stats.tx_msgs);
+		}
 
 		if (msg->msg_flags & MSG_BATCH) {
 			kcm->tx_wait_more = true;

From 5a70348e1187c5bf1cbd0ec51843f36befed1c2d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20Br=C3=BCns?= <stefan.bruens@rwth-aachen.de>
Date: Wed, 8 Feb 2017 02:46:32 +0100
Subject: [PATCH 303/322] sierra_net: Add support for IPv6 and Dual-Stack Link
 Sense Indications
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If a context is configured as dualstack ("IPv4v6"), the modem indicates
the context activation with a slightly different indication message.
The dual-stack indication omits the link_type (IPv4/v6) and adds
additional address fields.
IPv6 LSIs are identical to IPv4 LSIs, but have a different link type.

Signed-off-by: Stefan Brüns <stefan.bruens@rwth-aachen.de>
Reviewed-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/sierra_net.c | 101 +++++++++++++++++++++++------------
 1 file changed, 66 insertions(+), 35 deletions(-)

diff --git a/drivers/net/usb/sierra_net.c b/drivers/net/usb/sierra_net.c
index 12071f1582df..631b1ed86610 100644
--- a/drivers/net/usb/sierra_net.c
+++ b/drivers/net/usb/sierra_net.c
@@ -73,8 +73,6 @@ static	atomic_t iface_counter = ATOMIC_INIT(0);
 /* Private data structure */
 struct sierra_net_data {
 
-	u8 ethr_hdr_tmpl[ETH_HLEN]; /* ethernet header template for rx'd pkts */
-
 	u16 link_up;		/* air link up or down */
 	u8 tx_hdr_template[4];	/* part of HIP hdr for tx'd packets */
 
@@ -122,6 +120,7 @@ struct param {
 
 /* LSI Protocol types */
 #define SIERRA_NET_PROTOCOL_UMTS      0x01
+#define SIERRA_NET_PROTOCOL_UMTS_DS   0x04
 /* LSI Coverage */
 #define SIERRA_NET_COVERAGE_NONE      0x00
 #define SIERRA_NET_COVERAGE_NOPACKET  0x01
@@ -129,7 +128,8 @@ struct param {
 /* LSI Session */
 #define SIERRA_NET_SESSION_IDLE       0x00
 /* LSI Link types */
-#define SIERRA_NET_AS_LINK_TYPE_IPv4  0x00
+#define SIERRA_NET_AS_LINK_TYPE_IPV4  0x00
+#define SIERRA_NET_AS_LINK_TYPE_IPV6  0x02
 
 struct lsi_umts {
 	u8 protocol;
@@ -137,9 +137,14 @@ struct lsi_umts {
 	__be16 length;
 	/* eventually use a union for the rest - assume umts for now */
 	u8 coverage;
-	u8 unused2[41];
+	u8 network_len; /* network name len */
+	u8 network[40]; /* network name (UCS2, bigendian) */
 	u8 session_state;
 	u8 unused3[33];
+} __packed;
+
+struct lsi_umts_single {
+	struct lsi_umts lsi;
 	u8 link_type;
 	u8 pdp_addr_len; /* NW-supplied PDP address len */
 	u8 pdp_addr[16]; /* NW-supplied PDP address (bigendian)) */
@@ -158,10 +163,31 @@ struct lsi_umts {
 	u8 reserved[8];
 } __packed;
 
+struct lsi_umts_dual {
+	struct lsi_umts lsi;
+	u8 pdp_addr4_len; /* NW-supplied PDP IPv4 address len */
+	u8 pdp_addr4[4];  /* NW-supplied PDP IPv4 address (bigendian)) */
+	u8 pdp_addr6_len; /* NW-supplied PDP IPv6 address len */
+	u8 pdp_addr6[16]; /* NW-supplied PDP IPv6 address (bigendian)) */
+	u8 unused4[23];
+	u8 dns1_addr4_len; /* NW-supplied 1st DNS v4 address len (bigendian) */
+	u8 dns1_addr4[4];  /* NW-supplied 1st DNS v4 address */
+	u8 dns1_addr6_len; /* NW-supplied 1st DNS v6 address len */
+	u8 dns1_addr6[16]; /* NW-supplied 1st DNS v6 address (bigendian)*/
+	u8 dns2_addr4_len; /* NW-supplied 2nd DNS v4 address len (bigendian) */
+	u8 dns2_addr4[4];  /* NW-supplied 2nd DNS v4 address */
+	u8 dns2_addr6_len; /* NW-supplied 2nd DNS v6 address len */
+	u8 dns2_addr6[16]; /* NW-supplied 2nd DNS v6 address (bigendian)*/
+	u8 unused5[68];
+} __packed;
+
 #define SIERRA_NET_LSI_COMMON_LEN      4
-#define SIERRA_NET_LSI_UMTS_LEN        (sizeof(struct lsi_umts))
+#define SIERRA_NET_LSI_UMTS_LEN        (sizeof(struct lsi_umts_single))
 #define SIERRA_NET_LSI_UMTS_STATUS_LEN \
 	(SIERRA_NET_LSI_UMTS_LEN - SIERRA_NET_LSI_COMMON_LEN)
+#define SIERRA_NET_LSI_UMTS_DS_LEN     (sizeof(struct lsi_umts_dual))
+#define SIERRA_NET_LSI_UMTS_DS_STATUS_LEN \
+	(SIERRA_NET_LSI_UMTS_DS_LEN - SIERRA_NET_LSI_COMMON_LEN)
 
 /* Forward definitions */
 static void sierra_sync_timer(unsigned long syncdata);
@@ -190,10 +216,11 @@ static inline void sierra_net_set_private(struct usbnet *dev,
 	dev->data[0] = (unsigned long)priv;
 }
 
-/* is packet IPv4 */
+/* is packet IPv4/IPv6 */
 static inline int is_ip(struct sk_buff *skb)
 {
-	return skb->protocol == cpu_to_be16(ETH_P_IP);
+	return skb->protocol == cpu_to_be16(ETH_P_IP) ||
+	       skb->protocol == cpu_to_be16(ETH_P_IPV6);
 }
 
 /*
@@ -349,38 +376,43 @@ static inline int sierra_net_is_valid_addrlen(u8 len)
 static int sierra_net_parse_lsi(struct usbnet *dev, char *data, int datalen)
 {
 	struct lsi_umts *lsi = (struct lsi_umts *)data;
+	u32 expected_length;
 
-	if (datalen < sizeof(struct lsi_umts)) {
-		netdev_err(dev->net, "%s: Data length %d, exp %Zu\n",
-				__func__, datalen,
-				sizeof(struct lsi_umts));
-		return -1;
-	}
-
-	if (lsi->length != cpu_to_be16(SIERRA_NET_LSI_UMTS_STATUS_LEN)) {
-		netdev_err(dev->net, "%s: LSI_UMTS_STATUS_LEN %d, exp %u\n",
-				__func__, be16_to_cpu(lsi->length),
-				(u32)SIERRA_NET_LSI_UMTS_STATUS_LEN);
+	if (datalen < sizeof(struct lsi_umts_single)) {
+		netdev_err(dev->net, "%s: Data length %d, exp >= %Zu\n",
+			   __func__, datalen, sizeof(struct lsi_umts_single));
 		return -1;
 	}
 
 	/* Validate the protocol  - only support UMTS for now */
-	if (lsi->protocol != SIERRA_NET_PROTOCOL_UMTS) {
+	if (lsi->protocol == SIERRA_NET_PROTOCOL_UMTS) {
+		struct lsi_umts_single *single = (struct lsi_umts_single *)lsi;
+
+		/* Validate the link type */
+		if (single->link_type != SIERRA_NET_AS_LINK_TYPE_IPV4 &&
+		    single->link_type != SIERRA_NET_AS_LINK_TYPE_IPV6) {
+			netdev_err(dev->net, "Link type unsupported: 0x%02x\n",
+				   single->link_type);
+			return -1;
+		}
+		expected_length = SIERRA_NET_LSI_UMTS_STATUS_LEN;
+	} else if (lsi->protocol == SIERRA_NET_PROTOCOL_UMTS_DS) {
+		expected_length = SIERRA_NET_LSI_UMTS_DS_STATUS_LEN;
+	} else {
 		netdev_err(dev->net, "Protocol unsupported, 0x%02x\n",
-			lsi->protocol);
+			   lsi->protocol);
 		return -1;
 	}
 
-	/* Validate the link type */
-	if (lsi->link_type != SIERRA_NET_AS_LINK_TYPE_IPv4) {
-		netdev_err(dev->net, "Link type unsupported: 0x%02x\n",
-			lsi->link_type);
+	if (be16_to_cpu(lsi->length) != expected_length) {
+		netdev_err(dev->net, "%s: LSI_UMTS_STATUS_LEN %d, exp %u\n",
+			   __func__, be16_to_cpu(lsi->length), expected_length);
 		return -1;
 	}
 
 	/* Validate the coverage */
-	if (lsi->coverage == SIERRA_NET_COVERAGE_NONE
-	   || lsi->coverage == SIERRA_NET_COVERAGE_NOPACKET) {
+	if (lsi->coverage == SIERRA_NET_COVERAGE_NONE ||
+	    lsi->coverage == SIERRA_NET_COVERAGE_NOPACKET) {
 		netdev_err(dev->net, "No coverage, 0x%02x\n", lsi->coverage);
 		return 0;
 	}
@@ -652,7 +684,6 @@ static int sierra_net_bind(struct usbnet *dev, struct usb_interface *intf)
 	u8	numendpoints;
 	u16	fwattr = 0;
 	int	status;
-	struct ethhdr *eth;
 	struct sierra_net_data *priv;
 	static const u8 sync_tmplate[sizeof(priv->sync_msg)] = {
 		0x00, 0x00, SIERRA_NET_HIP_MSYNC_ID, 0x00};
@@ -690,11 +721,6 @@ static int sierra_net_bind(struct usbnet *dev, struct usb_interface *intf)
 	dev->net->dev_addr[ETH_ALEN-2] = atomic_inc_return(&iface_counter);
 	dev->net->dev_addr[ETH_ALEN-1] = ifacenum;
 
-	/* we will have to manufacture ethernet headers, prepare template */
-	eth = (struct ethhdr *)priv->ethr_hdr_tmpl;
-	memcpy(&eth->h_dest, dev->net->dev_addr, ETH_ALEN);
-	eth->h_proto = cpu_to_be16(ETH_P_IP);
-
 	/* prepare shutdown message template */
 	memcpy(priv->shdwn_msg, shdwn_tmplate, sizeof(priv->shdwn_msg));
 	/* set context index initially to 0 - prepares tx hdr template */
@@ -824,9 +850,14 @@ static int sierra_net_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 
 		skb_pull(skb, hh.hdrlen);
 
-		/* We are going to accept this packet, prepare it */
-		memcpy(skb->data, sierra_net_get_private(dev)->ethr_hdr_tmpl,
-			ETH_HLEN);
+		/* We are going to accept this packet, prepare it.
+		 * In case protocol is IPv6, keep it, otherwise force IPv4.
+		 */
+		skb_reset_mac_header(skb);
+		if (eth_hdr(skb)->h_proto != cpu_to_be16(ETH_P_IPV6))
+			eth_hdr(skb)->h_proto = cpu_to_be16(ETH_P_IP);
+		eth_zero_addr(eth_hdr(skb)->h_source);
+		memcpy(eth_hdr(skb)->h_dest, dev->net->dev_addr, ETH_ALEN);
 
 		/* Last packet in batch handled by usbnet */
 		if (hh.payload_len.word == skb->len)

From 764895d3039e903dac3a70f219949efe43d036a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20Br=C3=BCns?= <stefan.bruens@rwth-aachen.de>
Date: Wed, 8 Feb 2017 02:46:33 +0100
Subject: [PATCH 304/322] sierra_net: Skip validating irrelevant fields for
 IDLE LSIs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the context is deactivated, the link_type is set to 0xff, which
triggers a warning message, and results in a wrong link status, as
the LSI is ignored.

Signed-off-by: Stefan Brüns <stefan.bruens@rwth-aachen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/sierra_net.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/usb/sierra_net.c b/drivers/net/usb/sierra_net.c
index 631b1ed86610..d9440bc022f2 100644
--- a/drivers/net/usb/sierra_net.c
+++ b/drivers/net/usb/sierra_net.c
@@ -384,6 +384,13 @@ static int sierra_net_parse_lsi(struct usbnet *dev, char *data, int datalen)
 		return -1;
 	}
 
+	/* Validate the session state */
+	if (lsi->session_state == SIERRA_NET_SESSION_IDLE) {
+		netdev_err(dev->net, "Session idle, 0x%02x\n",
+			   lsi->session_state);
+		return 0;
+	}
+
 	/* Validate the protocol  - only support UMTS for now */
 	if (lsi->protocol == SIERRA_NET_PROTOCOL_UMTS) {
 		struct lsi_umts_single *single = (struct lsi_umts_single *)lsi;
@@ -417,13 +424,6 @@ static int sierra_net_parse_lsi(struct usbnet *dev, char *data, int datalen)
 		return 0;
 	}
 
-	/* Validate the session state */
-	if (lsi->session_state == SIERRA_NET_SESSION_IDLE) {
-		netdev_err(dev->net, "Session idle, 0x%02x\n",
-			lsi->session_state);
-		return 0;
-	}
-
 	/* Set link_sense true */
 	return 1;
 }

From e2e004acc7cbe3c531e752a270a74e95cde3ea48 Mon Sep 17 00:00:00 2001
From: Ross Lagerwall <ross.lagerwall@citrix.com>
Date: Wed, 8 Feb 2017 10:57:37 +0000
Subject: [PATCH 305/322] xen-netfront: Improve error handling during
 initialization

This fixes a crash when running out of grant refs when creating many
queues across many netdevs.

* If creating queues fails (i.e. there are no grant refs available),
call xenbus_dev_fatal() to ensure that the xenbus device is set to the
closed state.
* If no queues are created, don't call xennet_disconnect_backend as
netdev->real_num_tx_queues will not have been set correctly.
* If setup_netfront() fails, ensure that all the queues created are
cleaned up, not just those that have been set up.
* If any queues were set up and an error occurs, call
xennet_destroy_queues() to clean up the napi context.
* If any fatal error occurs, unregister and destroy the netdev to avoid
leaving around a half setup network device.

Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netfront.c | 29 +++++++++++------------------
 1 file changed, 11 insertions(+), 18 deletions(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 9dba69731f30..d3812581c6c0 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1830,27 +1830,19 @@ static int talk_to_netback(struct xenbus_device *dev,
 		xennet_destroy_queues(info);
 
 	err = xennet_create_queues(info, &num_queues);
-	if (err < 0)
-		goto destroy_ring;
+	if (err < 0) {
+		xenbus_dev_fatal(dev, err, "creating queues");
+		kfree(info->queues);
+		info->queues = NULL;
+		goto out;
+	}
 
 	/* Create shared ring, alloc event channel -- for each queue */
 	for (i = 0; i < num_queues; ++i) {
 		queue = &info->queues[i];
 		err = setup_netfront(dev, queue, feature_split_evtchn);
-		if (err) {
-			/* setup_netfront() will tidy up the current
-			 * queue on error, but we need to clean up
-			 * those already allocated.
-			 */
-			if (i > 0) {
-				rtnl_lock();
-				netif_set_real_num_tx_queues(info->netdev, i);
-				rtnl_unlock();
-				goto destroy_ring;
-			} else {
-				goto out;
-			}
-		}
+		if (err)
+			goto destroy_ring;
 	}
 
 again:
@@ -1940,9 +1932,10 @@ static int talk_to_netback(struct xenbus_device *dev,
 	xenbus_transaction_end(xbt, 1);
  destroy_ring:
 	xennet_disconnect_backend(info);
-	kfree(info->queues);
-	info->queues = NULL;
+	xennet_destroy_queues(info);
  out:
+	unregister_netdev(info->netdev);
+	xennet_free_netdev(info->netdev);
 	return err;
 }
 

From 9c8bb163ae784be4f79ae504e78c862806087c54 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 8 Feb 2017 21:16:45 +0800
Subject: [PATCH 306/322] igmp, mld: Fix memory leak in igmpv3/mld_del_delrec()

In function igmpv3/mld_add_delrec() we allocate pmc and put it in
idev->mc_tomb, so we should free it when we don't need it in del_delrec().
But I removed kfree(pmc) incorrectly in latest two patches. Now fix it.

Fixes: 24803f38a5c0 ("igmp: do not remove igmp souce list info when ...")
Fixes: 1666d49e1d41 ("mld: do not remove mld souce list info when ...")
Reported-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c  | 1 +
 net/ipv6/mcast.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 5b15459955f8..44fd86de2823 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1172,6 +1172,7 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im)
 				psf->sf_crcount = im->crcount;
 		}
 		in_dev_put(pmc->interface);
+		kfree(pmc);
 	}
 	spin_unlock_bh(&im->lock);
 }
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 7139fffd61b6..1bdc703cb966 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -779,6 +779,7 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 				psf->sf_crcount = im->mca_crcount;
 		}
 		in6_dev_put(pmc->idev);
+		kfree(pmc);
 	}
 	spin_unlock_bh(&im->mca_lock);
 }

From bb1a619735b4660f21bce3e728b937640024b4ad Mon Sep 17 00:00:00 2001
From: Yendapally Reddy Dhananjaya Reddy <yendapally.reddy@broadcom.com>
Date: Wed, 8 Feb 2017 17:14:26 -0500
Subject: [PATCH 307/322] net: phy: Initialize mdio clock at probe function

USB PHYs need the MDIO clock divisor enabled earlier to work.
Initialize mdio clock divisor in probe function. The ext bus
bit available in the same register will be used by mdio mux
to enable external mdio.

Signed-off-by: Yendapally Reddy Dhananjaya Reddy <yendapally.reddy@broadcom.com>
Fixes: ddc24ae1 ("net: phy: Broadcom iProc MDIO bus driver")
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jon Mason <jon.mason@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-bcm-iproc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/phy/mdio-bcm-iproc.c b/drivers/net/phy/mdio-bcm-iproc.c
index c0b4e65267af..46fe1ae919a3 100644
--- a/drivers/net/phy/mdio-bcm-iproc.c
+++ b/drivers/net/phy/mdio-bcm-iproc.c
@@ -81,8 +81,6 @@ static int iproc_mdio_read(struct mii_bus *bus, int phy_id, int reg)
 	if (rc)
 		return rc;
 
-	iproc_mdio_config_clk(priv->base);
-
 	/* Prepare the read operation */
 	cmd = (MII_DATA_TA_VAL << MII_DATA_TA_SHIFT) |
 		(reg << MII_DATA_RA_SHIFT) |
@@ -112,8 +110,6 @@ static int iproc_mdio_write(struct mii_bus *bus, int phy_id,
 	if (rc)
 		return rc;
 
-	iproc_mdio_config_clk(priv->base);
-
 	/* Prepare the write operation */
 	cmd = (MII_DATA_TA_VAL << MII_DATA_TA_SHIFT) |
 		(reg << MII_DATA_RA_SHIFT) |
@@ -163,6 +159,8 @@ static int iproc_mdio_probe(struct platform_device *pdev)
 	bus->read = iproc_mdio_read;
 	bus->write = iproc_mdio_write;
 
+	iproc_mdio_config_clk(priv->base);
+
 	rc = of_mdiobus_register(bus, pdev->dev.of_node);
 	if (rc) {
 		dev_err(&pdev->dev, "MDIO bus registration failed\n");

From ffdadd68af5a397b8a52289ab39d62e1acb39e63 Mon Sep 17 00:00:00 2001
From: ojab <ojab@ojab.ru>
Date: Wed, 28 Dec 2016 11:05:24 +0000
Subject: [PATCH 308/322] scsi: mpt3sas: disable ASPM for MPI2 controllers

MPI2 controllers sometimes got lost (i.e. disappear from
/sys/bus/pci/devices) if ASMP is enabled.

Signed-off-by: Slava Kardakov <ojab@ojab.ru>
Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=60644
Cc: <stable@vger.kernel.org>
Acked-by: Sreekanth Reddy <Sreekanth.Reddy@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mpt3sas/mpt3sas_scsih.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 268103182d34..0b5b423b1db0 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -51,6 +51,7 @@
 #include <linux/workqueue.h>
 #include <linux/delay.h>
 #include <linux/pci.h>
+#include <linux/pci-aspm.h>
 #include <linux/interrupt.h>
 #include <linux/aer.h>
 #include <linux/raid_class.h>
@@ -8761,6 +8762,8 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	switch (hba_mpi_version) {
 	case MPI2_VERSION:
+		pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S |
+			PCIE_LINK_STATE_L1 | PCIE_LINK_STATE_CLKPM);
 		/* Use mpt2sas driver host template for SAS 2.0 HBA's */
 		shost = scsi_host_alloc(&mpt2sas_driver_template,
 		  sizeof(struct MPT3SAS_ADAPTER));

From 8af8e1c22f9994bb1849c01d66c24fe23f9bc9a0 Mon Sep 17 00:00:00 2001
From: Dave Carroll <david.carroll@microsemi.com>
Date: Thu, 9 Feb 2017 11:04:47 -0700
Subject: [PATCH 309/322] scsi: aacraid: Fix INTx/MSI-x issue with older
 controllers

commit 78cbccd3bd68 ("aacraid: Fix for KDUMP driver hang")

caused a problem on older controllers which do not support MSI-x (namely
ASR3405,ASR3805). This patch conditionalizes the previous patch to
controllers which support MSI-x

Cc: <stable@vger.kernel.org> # v4.7+
Fixes: 78cbccd3bd68 ("aacraid: Fix for KDUMP driver hang")
Reported-by: Arkadiusz Miskiewicz <a.miskiewicz@gmail.com>
Signed-off-by: Dave Carroll <david.carroll@microsemi.com>
Reviewed-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/comminit.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c
index 4f56b1003cc7..5b48bedd7c38 100644
--- a/drivers/scsi/aacraid/comminit.c
+++ b/drivers/scsi/aacraid/comminit.c
@@ -50,9 +50,13 @@ struct aac_common aac_config = {
 
 static inline int aac_is_msix_mode(struct aac_dev *dev)
 {
-	u32 status;
+	u32 status = 0;
 
-	status = src_readl(dev, MUnit.OMR);
+	if (dev->pdev->device == PMC_DEVICE_S6 ||
+		dev->pdev->device == PMC_DEVICE_S7 ||
+		dev->pdev->device == PMC_DEVICE_S8) {
+		status = src_readl(dev, MUnit.OMR);
+	}
 	return (status & AAC_INT_MODE_MSIX);
 }
 

From 2dfa6688aafdc3f74efeb1cf05fb871465d67f79 Mon Sep 17 00:00:00 2001
From: Steffen Maier <maier@linux.vnet.ibm.com>
Date: Wed, 8 Feb 2017 15:34:22 +0100
Subject: [PATCH 310/322] scsi: zfcp: fix use-after-free by not tracing WKA
 port open/close on failed send

Dan Carpenter kindly reported:
<quote>
The patch d27a7cb91960: "zfcp: trace on request for open and close of
WKA port" from Aug 10, 2016, leads to the following static checker
warning:

	drivers/s390/scsi/zfcp_fsf.c:1615 zfcp_fsf_open_wka_port()
	warn: 'req' was already freed.

drivers/s390/scsi/zfcp_fsf.c
  1609          zfcp_fsf_start_timer(req, ZFCP_FSF_REQUEST_TIMEOUT);
  1610          retval = zfcp_fsf_req_send(req);
  1611          if (retval)
  1612                  zfcp_fsf_req_free(req);
                                          ^^^
Freed.

  1613  out:
  1614          spin_unlock_irq(&qdio->req_q_lock);
  1615          if (req && !IS_ERR(req))
  1616                  zfcp_dbf_rec_run_wka("fsowp_1", wka_port, req->req_id);
                                                                  ^^^^^^^^^^^
Use after free.

  1617          return retval;
  1618  }

Same thing for zfcp_fsf_close_wka_port() as well.
</quote>

Rather than relying on req being NULL (or ERR_PTR) for all cases where
we don't want to trace or should not trace,
simply check retval which is unconditionally initialized with -EIO != 0
and it can only become 0 on successful retval = zfcp_fsf_req_send(req).
With that we can also remove the then again unnecessary unconditional
initialization of req which was introduced with that earlier commit.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Suggested-by: Benjamin Block <bblock@linux.vnet.ibm.com>
Signed-off-by: Steffen Maier <maier@linux.vnet.ibm.com>
Fixes: d27a7cb91960 ("zfcp: trace on request for open and close of WKA port")
Cc: <stable@vger.kernel.org> #2.6.38+
Reviewed-by: Benjamin Block <bblock@linux.vnet.ibm.com>
Reviewed-by: Jens Remus <jremus@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/s390/scsi/zfcp_fsf.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index 75f820ca17b7..27ff38f839fc 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -1583,7 +1583,7 @@ static void zfcp_fsf_open_wka_port_handler(struct zfcp_fsf_req *req)
 int zfcp_fsf_open_wka_port(struct zfcp_fc_wka_port *wka_port)
 {
 	struct zfcp_qdio *qdio = wka_port->adapter->qdio;
-	struct zfcp_fsf_req *req = NULL;
+	struct zfcp_fsf_req *req;
 	int retval = -EIO;
 
 	spin_lock_irq(&qdio->req_q_lock);
@@ -1612,7 +1612,7 @@ int zfcp_fsf_open_wka_port(struct zfcp_fc_wka_port *wka_port)
 		zfcp_fsf_req_free(req);
 out:
 	spin_unlock_irq(&qdio->req_q_lock);
-	if (req && !IS_ERR(req))
+	if (!retval)
 		zfcp_dbf_rec_run_wka("fsowp_1", wka_port, req->req_id);
 	return retval;
 }
@@ -1638,7 +1638,7 @@ static void zfcp_fsf_close_wka_port_handler(struct zfcp_fsf_req *req)
 int zfcp_fsf_close_wka_port(struct zfcp_fc_wka_port *wka_port)
 {
 	struct zfcp_qdio *qdio = wka_port->adapter->qdio;
-	struct zfcp_fsf_req *req = NULL;
+	struct zfcp_fsf_req *req;
 	int retval = -EIO;
 
 	spin_lock_irq(&qdio->req_q_lock);
@@ -1667,7 +1667,7 @@ int zfcp_fsf_close_wka_port(struct zfcp_fc_wka_port *wka_port)
 		zfcp_fsf_req_free(req);
 out:
 	spin_unlock_irq(&qdio->req_q_lock);
-	if (req && !IS_ERR(req))
+	if (!retval)
 		zfcp_dbf_rec_run_wka("fscwp_1", wka_port, req->req_id);
 	return retval;
 }

From 0839ffb83e44e5ff1843e932592525fc2bff23ff Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 9 Feb 2017 14:20:42 -0500
Subject: [PATCH 311/322] nfsd: Revert "nfsd: special case truncates some more"

This patch incorrectly attempted nested mnt_want_write, and incorrectly
disabled nfsd's owner override for truncate.  We'll fix those problems
and make another attempt soon, for the moment I think the safest is to
revert.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/vfs.c | 97 +++++++++++++++++++++++++++++++--------------------
 1 file changed, 60 insertions(+), 37 deletions(-)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ca13236dbb1f..26c6fdb4bf67 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -332,6 +332,37 @@ nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
 	}
 }
 
+static __be32
+nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp,
+		struct iattr *iap)
+{
+	struct inode *inode = d_inode(fhp->fh_dentry);
+	int host_err;
+
+	if (iap->ia_size < inode->i_size) {
+		__be32 err;
+
+		err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
+				NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE);
+		if (err)
+			return err;
+	}
+
+	host_err = get_write_access(inode);
+	if (host_err)
+		goto out_nfserrno;
+
+	host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
+	if (host_err)
+		goto out_put_write_access;
+	return 0;
+
+out_put_write_access:
+	put_write_access(inode);
+out_nfserrno:
+	return nfserrno(host_err);
+}
+
 /*
  * Set various file attributes.  After this call fhp needs an fh_put.
  */
@@ -346,6 +377,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 	__be32		err;
 	int		host_err;
 	bool		get_write_count;
+	int		size_change = 0;
 
 	if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
 		accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
@@ -358,11 +390,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 	/* Get inode */
 	err = fh_verify(rqstp, fhp, ftype, accmode);
 	if (err)
-		return err;
+		goto out;
 	if (get_write_count) {
 		host_err = fh_want_write(fhp);
 		if (host_err)
-			goto out_host_err;
+			return nfserrno(host_err);
 	}
 
 	dentry = fhp->fh_dentry;
@@ -373,59 +405,50 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 		iap->ia_valid &= ~ATTR_MODE;
 
 	if (!iap->ia_valid)
-		return 0;
+		goto out;
 
 	nfsd_sanitize_attrs(inode, iap);
 
-	if (check_guard && guardtime != inode->i_ctime.tv_sec)
-		return nfserr_notsync;
-
 	/*
 	 * The size case is special, it changes the file in addition to the
-	 * attributes, and file systems don't expect it to be mixed with
-	 * "random" attribute changes.  We thus split out the size change
-	 * into a separate call for vfs_truncate, and do the rest as a
-	 * a separate setattr call.
+	 * attributes.
 	 */
 	if (iap->ia_valid & ATTR_SIZE) {
-		struct path path = {
-			.mnt	= fhp->fh_export->ex_path.mnt,
-			.dentry	= dentry,
-		};
-		bool implicit_mtime = false;
+		err = nfsd_get_write_access(rqstp, fhp, iap);
+		if (err)
+			goto out;
+		size_change = 1;
 
 		/*
-		 * vfs_truncate implicity updates the mtime IFF the file size
-		 * actually changes.  Avoid the additional seattr call below if
-		 * the only other attribute that the client sends is the mtime.
+		 * RFC5661, Section 18.30.4:
+		 *   Changing the size of a file with SETATTR indirectly
+		 *   changes the time_modify and change attributes.
+		 *
+		 * (and similar for the older RFCs)
 		 */
-		if (iap->ia_size != i_size_read(inode) &&
-		    ((iap->ia_valid & ~(ATTR_SIZE | ATTR_MTIME)) == 0))
-			implicit_mtime = true;
-
-		host_err = vfs_truncate(&path, iap->ia_size);
-		if (host_err)
-			goto out_host_err;
-
-		iap->ia_valid &= ~ATTR_SIZE;
-		if (implicit_mtime)
-			iap->ia_valid &= ~ATTR_MTIME;
-		if (!iap->ia_valid)
-			goto done;
+		if (iap->ia_size != i_size_read(inode))
+			iap->ia_valid |= ATTR_MTIME;
 	}
 
 	iap->ia_valid |= ATTR_CTIME;
 
+	if (check_guard && guardtime != inode->i_ctime.tv_sec) {
+		err = nfserr_notsync;
+		goto out_put_write_access;
+	}
+
 	fh_lock(fhp);
 	host_err = notify_change(dentry, iap, NULL);
 	fh_unlock(fhp);
-	if (host_err)
-		goto out_host_err;
+	err = nfserrno(host_err);
 
-done:
-	host_err = commit_metadata(fhp);
-out_host_err:
-	return nfserrno(host_err);
+out_put_write_access:
+	if (size_change)
+		put_write_access(inode);
+	if (!err)
+		err = nfserrno(commit_metadata(fhp));
+out:
+	return err;
 }
 
 #if defined(CONFIG_NFSD_V4)

From 451d24d1e5f40bad000fa9abe36ddb16fc9928cb Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 31 Jan 2017 11:27:10 +0100
Subject: [PATCH 312/322] perf/core: Fix crash in perf_event_read()

Alexei had his box explode because doing read() on a package
(rapl/uncore) event that isn't currently scheduled in ends up doing an
out-of-bounds load.

Rework the code to more explicitly deal with event->oncpu being -1.

Reported-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Tested-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: David Carrillo-Cisneros <davidcc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: eranian@google.com
Fixes: d6a2f9035bfc ("perf/core: Introduce PMU_EV_CAP_READ_ACTIVE_PKG")
Link: http://lkml.kernel.org/r/20170131102710.GL6515@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index e5aaa806702d..e235bb991bdd 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3487,14 +3487,15 @@ struct perf_read_data {
 	int ret;
 };
 
-static int find_cpu_to_read(struct perf_event *event, int local_cpu)
+static int __perf_event_read_cpu(struct perf_event *event, int event_cpu)
 {
-	int event_cpu = event->oncpu;
 	u16 local_pkg, event_pkg;
 
 	if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
-		event_pkg =  topology_physical_package_id(event_cpu);
-		local_pkg =  topology_physical_package_id(local_cpu);
+		int local_cpu = smp_processor_id();
+
+		event_pkg = topology_physical_package_id(event_cpu);
+		local_pkg = topology_physical_package_id(local_cpu);
 
 		if (event_pkg == local_pkg)
 			return local_cpu;
@@ -3624,7 +3625,7 @@ u64 perf_event_read_local(struct perf_event *event)
 
 static int perf_event_read(struct perf_event *event, bool group)
 {
-	int ret = 0, cpu_to_read, local_cpu;
+	int event_cpu, ret = 0;
 
 	/*
 	 * If event is enabled and currently active on a CPU, update the
@@ -3637,21 +3638,25 @@ static int perf_event_read(struct perf_event *event, bool group)
 			.ret = 0,
 		};
 
-		local_cpu = get_cpu();
-		cpu_to_read = find_cpu_to_read(event, local_cpu);
-		put_cpu();
+		event_cpu = READ_ONCE(event->oncpu);
+		if ((unsigned)event_cpu >= nr_cpu_ids)
+			return 0;
+
+		preempt_disable();
+		event_cpu = __perf_event_read_cpu(event, event_cpu);
 
 		/*
 		 * Purposely ignore the smp_call_function_single() return
 		 * value.
 		 *
-		 * If event->oncpu isn't a valid CPU it means the event got
+		 * If event_cpu isn't a valid CPU it means the event got
 		 * scheduled out and that will have updated the event count.
 		 *
 		 * Therefore, either way, we'll have an up-to-date event count
 		 * after this.
 		 */
-		(void)smp_call_function_single(cpu_to_read, __perf_event_read, &data, 1);
+		(void)smp_call_function_single(event_cpu, __perf_event_read, &data, 1);
+		preempt_enable();
 		ret = data.ret;
 	} else if (event->state == PERF_EVENT_STATE_INACTIVE) {
 		struct perf_event_context *ctx = event->ctx;

From 7bdb59f1ad474bd7161adc8f923cdef10f2638d1 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 7 Feb 2017 17:44:54 +0100
Subject: [PATCH 313/322] tick/nohz: Fix possible missing clock reprog after
 tick soft restart

ts->next_tick keeps track of the next tick deadline in order to optimize
clock programmation on irq exit and avoid redundant clock device writes.

Now if ts->next_tick missed an update, we may spuriously miss a clock
reprog later as the nohz code is fooled by an obsolete next_tick value.

This is what happens here on a specific path: when we observe an
expired timer from the nohz update code on irq exit, we perform a soft
tick restart which simply fires the closest possible tick without
actually exiting the nohz mode and restoring a periodic state. But we
forget to update ts->next_tick accordingly.

As a result, after the next tick resulting from such soft tick restart,
the nohz code sees a stale value on ts->next_tick which doesn't match
the clock deadline that just expired. If that obsolete ts->next_tick
value happens to collide with the actual next tick deadline to be
scheduled, we may spuriously bypass the clock reprogramming. In the
worst case, the tick may never fire again.

Fix this with a ts->next_tick reset on soft tick restart.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed: Wanpeng Li <wanpeng.li@hotmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1486485894-29173-1-git-send-email-fweisbec@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/tick-sched.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 74e0388cc88d..fc6f740d0277 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -725,6 +725,11 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 		 */
 		if (delta == 0) {
 			tick_nohz_restart(ts, now);
+			/*
+			 * Make sure next tick stop doesn't get fooled by past
+			 * clock deadline
+			 */
+			ts->next_tick = 0;
 			goto out;
 		}
 	}

From f2e04214ef7f7e49d1e06109ad1b2718155dab25 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 9 Feb 2017 16:08:41 +0100
Subject: [PATCH 314/322] x86/tsc: Avoid the large time jump when sanitizing
 TSC ADJUST

Olof reported that on a machine which has a BIOS wreckaged TSC the
timestamps in dmesg are making a large jump because the TSC value is
jumping forward after resetting the TSC ADJUST register to a sane value.

This can be avoided by calling the TSC ADJUST saniziting function before
initializing the per cpu sched clock machinery. That takes the offset into
account and avoid the time jump.

What cannot be avoided is that the 'Firmware Bug' warnings on the secondary
CPUs are printed with the large time offsets because it would be too much
effort and ugly hackery to print those warnings into a buffer and emit them
after the adjustemt on the starting CPUs. It's a firmware bug and should be
fixed in firmware. The weird timestamps are collateral damage and just
illustrate the sillyness of the BIOS folks:

[    0.397445] smp: Bringing up secondary CPUs ...
[    0.402100] x86: Booting SMP configuration:
[    0.406343] .... node  #0, CPUs:      #1
[1265776479.930667] [Firmware Bug]: TSC ADJUST differs: Reference CPU0: -2978888639075328 CPU1: -2978888639183101
[1265776479.944664] TSC ADJUST synchronize: Reference CPU0: 0 CPU1: -2978888639183101
[    0.508119]  #2
[1265776480.032346] [Firmware Bug]: TSC ADJUST differs: Reference CPU0: -2978888639075328 CPU2: -2978888639183677
[1265776480.044192] TSC ADJUST synchronize: Reference CPU0: 0 CPU2: -2978888639183677
[    0.607643]  #3
[1265776480.131874] [Firmware Bug]: TSC ADJUST differs: Reference CPU0: -2978888639075328 CPU3: -2978888639184530
[1265776480.143720] TSC ADJUST synchronize: Reference CPU0: 0 CPU3: -2978888639184530
[    0.707108] smp: Brought up 1 node, 4 CPUs
[    0.711271] smpboot: Total of 4 processors activated (21698.88 BogoMIPS)

Reported-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20170209151231.411460506@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/tsc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index e41af597aed8..37e7cf544e51 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1356,6 +1356,9 @@ void __init tsc_init(void)
 		(unsigned long)cpu_khz / 1000,
 		(unsigned long)cpu_khz % 1000);
 
+	/* Sanitize TSC ADJUST before cyc2ns gets initialized */
+	tsc_store_and_check_tsc_adjust(true);
+
 	/*
 	 * Secondary CPUs do not run through tsc_init(), so set up
 	 * all the scale factors for all CPUs, assuming the same
@@ -1386,8 +1389,6 @@ void __init tsc_init(void)
 
 	if (unsynchronized_tsc())
 		mark_tsc_unstable("TSCs unsynchronized");
-	else
-		tsc_store_and_check_tsc_adjust(true);
 
 	check_system_tsc_reliable();
 

From 5f2e71e71410ecb858cfec184ba092adaca61626 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 9 Feb 2017 16:08:42 +0100
Subject: [PATCH 315/322] x86/tsc: Make the TSC ADJUST sanitizing work for
 tsc_reliable

When the TSC is marked reliable then the synchronization check is skipped,
but that also skips the TSC ADJUST sanitizing code. So on a machine with a
wreckaged BIOS the TSC deviation between CPUs might go unnoticed.

Let the TSC adjust sanitizing code run unconditionally and just skip the
expensive synchronization checks when TSC is marked reliable.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Olof Johansson <olof@lixom.net>
Link: http://lkml.kernel.org/r/20170209151231.491189912@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/tsc_sync.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index d0db011051a5..728f75378475 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -286,13 +286,6 @@ void check_tsc_sync_source(int cpu)
 	if (unsynchronized_tsc())
 		return;
 
-	if (tsc_clocksource_reliable) {
-		if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING)
-			pr_info(
-			"Skipped synchronization checks as TSC is reliable.\n");
-		return;
-	}
-
 	/*
 	 * Set the maximum number of test runs to
 	 *  1 if the CPU does not provide the TSC_ADJUST MSR
@@ -380,14 +373,19 @@ void check_tsc_sync_target(void)
 	int cpus = 2;
 
 	/* Also aborts if there is no TSC. */
-	if (unsynchronized_tsc() || tsc_clocksource_reliable)
+	if (unsynchronized_tsc())
 		return;
 
 	/*
 	 * Store, verify and sanitize the TSC adjust register. If
 	 * successful skip the test.
+	 *
+	 * The test is also skipped when the TSC is marked reliable. This
+	 * is true for SoCs which have no fallback clocksource. On these
+	 * SoCs the TSC is frequency synchronized, but still the TSC ADJUST
+	 * register might have been wreckaged by the BIOS..
 	 */
-	if (tsc_store_and_check_tsc_adjust(false)) {
+	if (tsc_store_and_check_tsc_adjust(false) || tsc_clocksource_reliable) {
 		atomic_inc(&skip_test);
 		return;
 	}

From 146fbb766934dc003fcbf755b519acef683576bf Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Fri, 10 Feb 2017 12:54:05 +0300
Subject: [PATCH 316/322] x86/mm/ptdump: Fix soft lockup in page table walker

CONFIG_KASAN=y needs a lot of virtual memory mapped for its shadow.
In that case ptdump_walk_pgd_level_core() takes a lot of time to
walk across all page tables and doing this without
a rescheduling causes soft lockups:

 NMI watchdog: BUG: soft lockup - CPU#3 stuck for 23s! [swapper/0:1]
 ...
 Call Trace:
  ptdump_walk_pgd_level_core+0x40c/0x550
  ptdump_walk_pgd_level_checkwx+0x17/0x20
  mark_rodata_ro+0x13b/0x150
  kernel_init+0x2f/0x120
  ret_from_fork+0x2c/0x40

I guess that this issue might arise even without KASAN on huge machines
with several terabytes of RAM.

Stick cond_resched() in pgd loop to fix this.

Reported-by: Tobias Regnery <tobias.regnery@gmail.com>
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Alexander Potapenko <glider@google.com>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20170210095405.31802-1-aryabinin@virtuozzo.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/dump_pagetables.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index ea9c49adaa1f..8aa6bea1cd6c 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -15,6 +15,7 @@
 #include <linux/debugfs.h>
 #include <linux/mm.h>
 #include <linux/init.h>
+#include <linux/sched.h>
 #include <linux/seq_file.h>
 
 #include <asm/pgtable.h>
@@ -406,6 +407,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
 		} else
 			note_page(m, &st, __pgprot(0), 1);
 
+		cond_resched();
 		start++;
 	}
 

From b85ea006b6bebb692628f11882af41c3e12e1e09 Mon Sep 17 00:00:00 2001
From: Kejian Yan <yankejian@huawei.com>
Date: Thu, 9 Feb 2017 11:46:15 +0000
Subject: [PATCH 317/322] net: hns: Fix the device being used for dma mapping
 during TX

This patch fixes the device being used to DMA map skb->data.
Erroneous device assignment causes the crash when SMMU is enabled.
This happens during TX since buffer gets DMA mapped with device
correspondign to net_device and gets unmapped using the device
related to DSAF.

Signed-off-by: Kejian Yan <yankejian@huawei.com>
Reviewed-by: Yisen Zhuang <yisen.zhuang@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_enet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 672b64606321..8aed72860e7c 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -305,8 +305,8 @@ int hns_nic_net_xmit_hw(struct net_device *ndev,
 			struct hns_nic_ring_data *ring_data)
 {
 	struct hns_nic_priv *priv = netdev_priv(ndev);
-	struct device *dev = priv->dev;
 	struct hnae_ring *ring = ring_data->ring;
+	struct device *dev = ring_to_dev(ring);
 	struct netdev_queue *dev_queue;
 	struct skb_frag_struct *frag;
 	int buf_num;

From 7ba1b689038726d34e3244c1ac9e2e18c2ea4787 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Thu, 9 Feb 2017 14:12:11 +0100
Subject: [PATCH 318/322] NET: mkiss: Fix panic

If a USB-to-serial adapter is unplugged, the driver re-initializes, with
dev->hard_header_len and dev->addr_len set to zero, instead of the correct
values.  If then a packet is sent through the half-dead interface, the
kernel will panic due to running out of headroom in the skb when pushing
for the AX.25 headers resulting in this panic:

[<c0595468>] (skb_panic) from [<c0401f70>] (skb_push+0x4c/0x50)
[<c0401f70>] (skb_push) from [<bf0bdad4>] (ax25_hard_header+0x34/0xf4 [ax25])
[<bf0bdad4>] (ax25_hard_header [ax25]) from [<bf0d05d4>] (ax_header+0x38/0x40 [mkiss])
[<bf0d05d4>] (ax_header [mkiss]) from [<c041b584>] (neigh_compat_output+0x8c/0xd8)
[<c041b584>] (neigh_compat_output) from [<c043e7a8>] (ip_finish_output+0x2a0/0x914)
[<c043e7a8>] (ip_finish_output) from [<c043f948>] (ip_output+0xd8/0xf0)
[<c043f948>] (ip_output) from [<c043f04c>] (ip_local_out_sk+0x44/0x48)

This patch makes mkiss behave like the 6pack driver. 6pack does not
panic.  In 6pack.c sp_setup() (same function name here) the values for
dev->hard_header_len and dev->addr_len are set to the same values as in
my mkiss patch.

[ralf@linux-mips.org: Massages original submission to conform to the usual
standards for patch submissions.]

Signed-off-by: Thomas Osterried <thomas@osterried.de>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hamradio/mkiss.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index ece59c54a653..4a40a3d825b4 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -648,8 +648,8 @@ static void ax_setup(struct net_device *dev)
 {
 	/* Finish setting up the DEVICE info. */
 	dev->mtu             = AX_MTU;
-	dev->hard_header_len = 0;
-	dev->addr_len        = 0;
+	dev->hard_header_len = AX25_MAX_HEADER_LEN;
+	dev->addr_len        = AX25_ADDR_LEN;
 	dev->type            = ARPHRD_AX25;
 	dev->tx_queue_len    = 10;
 	dev->header_ops      = &ax25_header_ops;

From 74470954857c264168d2b5a113904cf0cfd27d18 Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Mon, 30 Jan 2017 12:45:46 -0500
Subject: [PATCH 319/322] xen-netfront: Delete rx_refill_timer in
 xennet_disconnect_backend()

rx_refill_timer should be deleted as soon as we disconnect from the
backend since otherwise it is possible for the timer to go off before
we get to xennet_destroy_queues(). If this happens we may dereference
queue->rx.sring which is set to NULL in xennet_disconnect_backend().

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
CC: stable@vger.kernel.org
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netfront.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index d3812581c6c0..1e4125a98291 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1387,6 +1387,8 @@ static void xennet_disconnect_backend(struct netfront_info *info)
 	for (i = 0; i < num_queues && info->queues; ++i) {
 		struct netfront_queue *queue = &info->queues[i];
 
+		del_timer_sync(&queue->rx_refill_timer);
+
 		if (queue->tx_irq && (queue->tx_irq == queue->rx_irq))
 			unbind_from_irqhandler(queue->tx_irq, queue);
 		if (queue->tx_irq && (queue->tx_irq != queue->rx_irq)) {
@@ -1741,7 +1743,6 @@ static void xennet_destroy_queues(struct netfront_info *info)
 
 		if (netif_running(info->netdev))
 			napi_disable(&queue->napi);
-		del_timer_sync(&queue->rx_refill_timer);
 		netif_napi_del(&queue->napi);
 	}
 

From 72fb96e7bdbbdd4421b0726992496531060f3636 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 9 Feb 2017 16:15:52 -0800
Subject: [PATCH 320/322] l2tp: do not use udp_ioctl()

udp_ioctl(), as its name suggests, is used by UDP protocols,
but is also used by L2TP :(

L2TP should use its own handler, because it really does not
look the same.

SIOCINQ for instance should not assume UDP checksum or headers.

Thanks to Andrey and syzkaller team for providing the report
and a nice reproducer.

While crashes only happen on recent kernels (after commit
7c13f97ffde6 ("udp: do fwd memory scheduling on dequeue")), this
probably needs to be backported to older kernels.

Fixes: 7c13f97ffde6 ("udp: do fwd memory scheduling on dequeue")
Fixes: 85584672012e ("udp: Fix udp_poll() and ioctl()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.h |  1 +
 net/l2tp/l2tp_ip.c   | 27 ++++++++++++++++++++++++++-
 net/l2tp/l2tp_ip6.c  |  2 +-
 3 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 8f560f7140a0..aebf281d09ee 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -263,6 +263,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb,
 int l2tp_nl_register_ops(enum l2tp_pwtype pw_type,
 			 const struct l2tp_nl_cmd_ops *ops);
 void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
+int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 
 /* Session reference counts. Incremented when code obtains a reference
  * to a session.
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 3d73278b86ca..28c21546d5b6 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -11,6 +11,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <asm/ioctls.h>
 #include <linux/icmp.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
@@ -553,6 +554,30 @@ static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg,
 	return err ? err : copied;
 }
 
+int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+	struct sk_buff *skb;
+	int amount;
+
+	switch (cmd) {
+	case SIOCOUTQ:
+		amount = sk_wmem_alloc_get(sk);
+		break;
+	case SIOCINQ:
+		spin_lock_bh(&sk->sk_receive_queue.lock);
+		skb = skb_peek(&sk->sk_receive_queue);
+		amount = skb ? skb->len : 0;
+		spin_unlock_bh(&sk->sk_receive_queue.lock);
+		break;
+
+	default:
+		return -ENOIOCTLCMD;
+	}
+
+	return put_user(amount, (int __user *)arg);
+}
+EXPORT_SYMBOL(l2tp_ioctl);
+
 static struct proto l2tp_ip_prot = {
 	.name		   = "L2TP/IP",
 	.owner		   = THIS_MODULE,
@@ -561,7 +586,7 @@ static struct proto l2tp_ip_prot = {
 	.bind		   = l2tp_ip_bind,
 	.connect	   = l2tp_ip_connect,
 	.disconnect	   = l2tp_ip_disconnect,
-	.ioctl		   = udp_ioctl,
+	.ioctl		   = l2tp_ioctl,
 	.destroy	   = l2tp_ip_destroy_sock,
 	.setsockopt	   = ip_setsockopt,
 	.getsockopt	   = ip_getsockopt,
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 331ccf5a7bad..f47c45250f86 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -722,7 +722,7 @@ static struct proto l2tp_ip6_prot = {
 	.bind		   = l2tp_ip6_bind,
 	.connect	   = l2tp_ip6_connect,
 	.disconnect	   = l2tp_ip6_disconnect,
-	.ioctl		   = udp_ioctl,
+	.ioctl		   = l2tp_ioctl,
 	.destroy	   = l2tp_ip6_destroy_sock,
 	.setsockopt	   = ipv6_setsockopt,
 	.getsockopt	   = ipv6_getsockopt,

From 6e78b3f7a193546b1c00a6d084596e774f147169 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Fri, 10 Feb 2017 15:03:35 -0800
Subject: [PATCH 321/322] Btrfs: fix btrfs_decompress_buf2page()

If btrfs_decompress_buf2page() is handed a bio with its page in the
middle of the working buffer, then we adjust the offset into the working
buffer. After we copy into the bio, we advance the iterator by the
number of bytes we copied. Then, we have some logic to handle the case
of discontiguous pages and adjust the offset into the working buffer
again. However, if we didn't advance the bio to a new page, we may enter
this case in error, essentially repeating the adjustment that we already
made when we entered the function. The end result is bogus data in the
bio.

Previously, we only checked for this case when we advanced to a new
page, but the conversion to bio iterators changed that. This restores
the old, correct behavior.

A case I saw when testing with zlib was:

    buf_start = 42769
    total_out = 46865
    working_bytes = total_out - buf_start = 4096
    start_byte = 45056

The condition (total_out > start_byte && buf_start < start_byte) is
true, so we adjust the offset:

    buf_offset = start_byte - buf_start = 2287
    working_bytes -= buf_offset = 1809
    current_buf_start = buf_start = 42769

Then, we copy

    bytes = min(bvec.bv_len, PAGE_SIZE - buf_offset, working_bytes) = 1809
    buf_offset += bytes = 4096
    working_bytes -= bytes = 0
    current_buf_start += bytes = 44578

After bio_advance(), we are still in the same page, so start_byte is the
same. Then, we check (total_out > start_byte && current_buf_start < start_byte),
which is true! So, we adjust the values again:

    buf_offset = start_byte - buf_start = 2287
    working_bytes = total_out - start_byte = 1809
    current_buf_start = buf_start + buf_offset = 45056

But note that working_bytes was already zero before this, so we should
have stopped copying.

Fixes: 974b1adc3b10 ("btrfs: use bio iterators for the decompression handlers")
Reported-by: Pat Erley <pat-lkml@erley.org>
Reviewed-by: Chris Mason <clm@fb.com>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Chris Mason <clm@fb.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Tested-by: Liu Bo <bo.li.liu@oracle.com>
---
 fs/btrfs/compression.c | 39 ++++++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 7f390849343b..c4444d6f439f 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -1024,6 +1024,7 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
 	unsigned long buf_offset;
 	unsigned long current_buf_start;
 	unsigned long start_byte;
+	unsigned long prev_start_byte;
 	unsigned long working_bytes = total_out - buf_start;
 	unsigned long bytes;
 	char *kaddr;
@@ -1071,26 +1072,34 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
 		if (!bio->bi_iter.bi_size)
 			return 0;
 		bvec = bio_iter_iovec(bio, bio->bi_iter);
-
+		prev_start_byte = start_byte;
 		start_byte = page_offset(bvec.bv_page) - disk_start;
 
 		/*
-		 * make sure our new page is covered by this
-		 * working buffer
+		 * We need to make sure we're only adjusting
+		 * our offset into compression working buffer when
+		 * we're switching pages.  Otherwise we can incorrectly
+		 * keep copying when we were actually done.
 		 */
-		if (total_out <= start_byte)
-			return 1;
+		if (start_byte != prev_start_byte) {
+			/*
+			 * make sure our new page is covered by this
+			 * working buffer
+			 */
+			if (total_out <= start_byte)
+				return 1;
 
-		/*
-		 * the next page in the biovec might not be adjacent
-		 * to the last page, but it might still be found
-		 * inside this working buffer. bump our offset pointer
-		 */
-		if (total_out > start_byte &&
-		    current_buf_start < start_byte) {
-			buf_offset = start_byte - buf_start;
-			working_bytes = total_out - start_byte;
-			current_buf_start = buf_start + buf_offset;
+			/*
+			 * the next page in the biovec might not be adjacent
+			 * to the last page, but it might still be found
+			 * inside this working buffer. bump our offset pointer
+			 */
+			if (total_out > start_byte &&
+			    current_buf_start < start_byte) {
+				buf_offset = start_byte - buf_start;
+				working_bytes = total_out - start_byte;
+				current_buf_start = buf_start + buf_offset;
+			}
 		}
 	}
 

From 7089db84e356562f8ba737c29e472cc42d530dbc Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 12 Feb 2017 13:03:20 -0800
Subject: [PATCH 322/322] Linux 4.10-rc8

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 8e223e081c9d..503dae1de8ef 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 10
 SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION = -rc8
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*