From 88f46b3fe2ac41c381770ebad9f2ee49346b57a2 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Mon, 20 Jul 2020 22:33:15 +0800 Subject: [PATCH 001/648] ieee802154: fix one possible memleak in ca8210_dev_com_init We should call destroy_workqueue to destroy mlme_workqueue in error branch. Fixes: ded845a781a5 ("ieee802154: Add CA8210 IEEE 802.15.4 device driver") Signed-off-by: Liu Jian Link: https://lore.kernel.org/r/20200720143315.40523-1-liujian56@huawei.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/ca8210.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index e04c3b60cae7..4eb64709d44c 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -2925,6 +2925,7 @@ static int ca8210_dev_com_init(struct ca8210_priv *priv) ); if (!priv->irq_workqueue) { dev_crit(&priv->spi->dev, "alloc of irq_workqueue failed!\n"); + destroy_workqueue(priv->mlme_workqueue); return -ENOMEM; } From e3914ed6cf44bfe1f169e26241f8314556fd1ac1 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sun, 2 Aug 2020 07:23:39 -0700 Subject: [PATCH 002/648] ieee802154/adf7242: check status of adf7242_read_reg Clang static analysis reports this error adf7242.c:887:6: warning: Assigned value is garbage or undefined len = len_u8; ^ ~~~~~~ len_u8 is set in adf7242_read_reg(lp, 0, &len_u8); When this call fails, len_u8 is not set. So check the return code. Fixes: 7302b9d90117 ("ieee802154/adf7242: Driver for ADF7242 MAC IEEE802154") Signed-off-by: Tom Rix Acked-by: Michael Hennerich Link: https://lore.kernel.org/r/20200802142339.21091-1-trix@redhat.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/adf7242.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c index c11f32f644db..7db9cbd0f5de 100644 --- a/drivers/net/ieee802154/adf7242.c +++ b/drivers/net/ieee802154/adf7242.c @@ -882,7 +882,9 @@ static int adf7242_rx(struct adf7242_local *lp) int ret; u8 lqi, len_u8, *data; - adf7242_read_reg(lp, 0, &len_u8); + ret = adf7242_read_reg(lp, 0, &len_u8); + if (ret) + return ret; len = len_u8; From ce4109005770ff6f160e1424928b9cc696e8c8f2 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 2 Aug 2020 11:25:09 +0800 Subject: [PATCH 003/648] regulator: cros-ec-regulator: Add NULL test for devm_kmemdup call Fix possible NULL pointer dereference. Signed-off-by: Axel Lin Link: https://lore.kernel.org/r/20200802032509.305425-1-axel.lin@ingics.com Signed-off-by: Mark Brown --- drivers/regulator/cros-ec-regulator.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/regulator/cros-ec-regulator.c b/drivers/regulator/cros-ec-regulator.c index 3117bbd2826b..eb3fc1db4edc 100644 --- a/drivers/regulator/cros-ec-regulator.c +++ b/drivers/regulator/cros-ec-regulator.c @@ -170,6 +170,9 @@ static int cros_ec_regulator_init_info(struct device *dev, data->voltages_mV = devm_kmemdup(dev, resp.voltages_mv, sizeof(u16) * data->num_voltages, GFP_KERNEL); + if (!data->voltages_mV) + return -ENOMEM; + data->desc.n_voltages = data->num_voltages; /* Make sure the returned name is always a valid string */ From 46d4a403a04c0ba46641452367b6a04bcf918a06 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Wed, 8 Jul 2020 14:56:22 +0200 Subject: [PATCH 004/648] auxdisplay: Replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: Miguel Ojeda --- drivers/auxdisplay/arm-charlcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/auxdisplay/arm-charlcd.c b/drivers/auxdisplay/arm-charlcd.c index dea031484cc4..0b1c99cca733 100644 --- a/drivers/auxdisplay/arm-charlcd.c +++ b/drivers/auxdisplay/arm-charlcd.c @@ -2,7 +2,7 @@ /* * Driver for the on-board character LCD found on some ARM reference boards * This is basically an Hitachi HD44780 LCD with a custom IP block to drive it - * http://en.wikipedia.org/wiki/HD44780_Character_LCD + * https://en.wikipedia.org/wiki/HD44780_Character_LCD * Currently it will just display the text "ARM Linux" and the linux version * * Author: Linus Walleij From 09dad81e0f1701ea26babe2442a1478d6ad447d3 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 10 Aug 2020 10:39:31 +0100 Subject: [PATCH 005/648] regulator: fix spelling mistake "Cant" -> "Can't" There is a spelling mistake in a dev_err message. Fix it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20200810093931.50624-1-colin.king@canonical.com Signed-off-by: Mark Brown --- drivers/regulator/fixed.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c index d54830e48b8d..142a70a89153 100644 --- a/drivers/regulator/fixed.c +++ b/drivers/regulator/fixed.c @@ -182,7 +182,7 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev) drvdata->enable_clock = devm_clk_get(dev, NULL); if (IS_ERR(drvdata->enable_clock)) { - dev_err(dev, "Cant get enable-clock from devicetree\n"); + dev_err(dev, "Can't get enable-clock from devicetree\n"); return -ENOENT; } } else { From d86f9088938e637de5908ed463b023eee4d1b9e1 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 25 Jul 2020 03:16:24 +0000 Subject: [PATCH 006/648] phy: qualcomm: fix platform_no_drv_owner.cocci warnings Remove .owner field if calls are used which set it automatically Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci Signed-off-by: YueHaibing Link: https://lore.kernel.org/r/20200725031624.31432-1-yuehaibing@huawei.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-ipq806x-usb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-ipq806x-usb.c b/drivers/phy/qualcomm/phy-qcom-ipq806x-usb.c index 71f257b4a7f5..6707efd38680 100644 --- a/drivers/phy/qualcomm/phy-qcom-ipq806x-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-ipq806x-usb.c @@ -557,7 +557,6 @@ static struct platform_driver qcom_ipq806x_usb_phy_driver = { .probe = qcom_ipq806x_usb_phy_probe, .driver = { .name = "qcom-ipq806x-usb-phy", - .owner = THIS_MODULE, .of_match_table = qcom_ipq806x_usb_phy_table, }, }; From 04db2304a9495b98b8976c9a8cc5f9845be35ba8 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 23 Jul 2020 11:36:22 +0000 Subject: [PATCH 007/648] phy: qualcomm: fix return value check in qcom_ipq806x_usb_phy_probe() In case of error, the function devm_ioremap() returns NULL pointer not ERR_PTR(). The IS_ERR() test in the return value check should be replaced with NULL test. Fixes: ef19b117b834 ("phy: qualcomm: add qcom ipq806x dwc usb phy driver") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Link: https://lore.kernel.org/r/20200723113622.136752-1-weiyongjun1@huawei.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-ipq806x-usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-ipq806x-usb.c b/drivers/phy/qualcomm/phy-qcom-ipq806x-usb.c index 6707efd38680..9061ece7ff6a 100644 --- a/drivers/phy/qualcomm/phy-qcom-ipq806x-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-ipq806x-usb.c @@ -505,9 +505,9 @@ static int qcom_ipq806x_usb_phy_probe(struct platform_device *pdev) size = resource_size(res); phy_dwc3->base = devm_ioremap(phy_dwc3->dev, res->start, size); - if (IS_ERR(phy_dwc3->base)) { + if (!phy_dwc3->base) { dev_err(phy_dwc3->dev, "failed to map reg\n"); - return PTR_ERR(phy_dwc3->base); + return -ENOMEM; } phy_dwc3->ref_clk = devm_clk_get(phy_dwc3->dev, "ref"); From e5ed6069b7d8a41e4de09e8edf783f6b1a83b48f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 21 Jul 2020 13:59:58 -0700 Subject: [PATCH 008/648] ARC: pgalloc.h: delete a duplicated word + other fixes Drop the repeated word "to". Change "Thay" to "That". Add a closing right parenthesis. Signed-off-by: Randy Dunlap Cc: Vineet Gupta Cc: linux-snps-arc@lists.infradead.org Signed-off-by: Vineet Gupta --- arch/arc/include/asm/pgalloc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h index b747f2ec2928..6147db925248 100644 --- a/arch/arc/include/asm/pgalloc.h +++ b/arch/arc/include/asm/pgalloc.h @@ -18,10 +18,10 @@ * vineetg: April 2010 * -Switched pgtable_t from being struct page * to unsigned long * =Needed so that Page Table allocator (pte_alloc_one) is not forced to - * to deal with struct page. Thay way in future we can make it allocate + * deal with struct page. That way in future we can make it allocate * multiple PG Tbls in one Page Frame * =sweet side effect is avoiding calls to ugly page_address( ) from the - * pg-tlb allocator sub-sys (pte_alloc_one, ptr_free, pmd_populate + * pg-tlb allocator sub-sys (pte_alloc_one, ptr_free, pmd_populate) * * Amit Bhor, Sameer Dhavale: Codito Technologies 2004 */ From feb92d7d3813456c11dce215b3421801a78a8986 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Sun, 26 Jul 2020 21:51:59 -0700 Subject: [PATCH 009/648] ARC: perf: don't bail setup if pct irq missing in device-tree Current code inadventely bails if hardware supports sampling/overflow interrupts, but the irq is missing from device tree. | | # perf stat -e cycles,instructions,major-faults,minor-faults ../hackbench | Running with 10 groups 400 process | Time: 0.921 | | Performance counter stats for '../hackbench': | | cycles | instructions | 0 major-faults | 8679 minor-faults This need not be as we can still do simple counting based perf stat. This unborks perf on HSDK-4xD Cc: Signed-off-by: Vineet Gupta --- arch/arc/kernel/perf_event.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 661fd842ea97..79849f37e782 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -562,7 +562,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev) { struct arc_reg_pct_build pct_bcr; struct arc_reg_cc_build cc_bcr; - int i, has_interrupts; + int i, has_interrupts, irq; int counter_size; /* in bits */ union cc_name { @@ -637,13 +637,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev) .attr_groups = arc_pmu->attr_groups, }; - if (has_interrupts) { - int irq = platform_get_irq(pdev, 0); - - if (irq < 0) { - pr_err("Cannot get IRQ number for the platform\n"); - return -ENODEV; - } + if (has_interrupts && (irq = platform_get_irq(pdev, 0) >= 0)) { arc_pmu->irq = irq; @@ -652,9 +646,9 @@ static int arc_pmu_device_probe(struct platform_device *pdev) this_cpu_ptr(&arc_pmu_cpu)); on_each_cpu(arc_cpu_pmu_irq_init, &irq, 1); - - } else + } else { arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + } /* * perf parser doesn't really like '-' symbol in events name, so let's From fe81d927b78c4f0557836661d32e41ebc957b024 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 9 Jul 2020 19:52:32 -0700 Subject: [PATCH 010/648] ARC: HSDK: wireup perf irq Newer version of HSDK aka HSDK-4xD (with dual issue HS48x4 CPU) wired up the perf interrupt, so enable that in DT. This is OK for old HSDK where this irq is ignored because pct irq is not wired up in hardware. Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/hsdk.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 9acbeba832c0..5d64a5a940ee 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -88,6 +88,8 @@ idu_intc: idu-interrupt-controller { arcpct: pct { compatible = "snps,archs-pct"; + interrupt-parent = <&cpu_intc>; + interrupts = <20>; }; /* TIMER0 with interrupt for clockevent */ From b4b6fb8de8dcb756c78ef7203cb9d00a42185e26 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 29 Jun 2020 15:00:54 +0300 Subject: [PATCH 011/648] arm64: dts: zynqmp: Add GTR transceivers Add a DT node for the PS-GTR transceivers. Signed-off-by: Laurent Pinchart Acked-by: Michal Simek Link: https://lore.kernel.org/r/20200629120054.29338-4-laurent.pinchart@ideasonboard.com --- arch/arm64/boot/dts/xilinx/zynqmp.dtsi | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi index 9174ddc76bdc..a0aad47b0ffc 100644 --- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi +++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi @@ -13,6 +13,7 @@ */ #include +#include / { compatible = "xlnx,zynqmp"; @@ -558,6 +559,15 @@ pcie_intc: legacy-interrupt-controller { }; }; + psgtr: phy@fd400000 { + compatible = "xlnx,zynqmp-psgtr-v1.1"; + status = "disabled"; + reg = <0x0 0xfd400000 0x0 0x40000>, + <0x0 0xfd3d0000 0x0 0x1000>; + reg-names = "serdes", "siou"; + #phy-cells = <4>; + }; + rtc: rtc@ffa60000 { compatible = "xlnx,zynqmp-rtc"; status = "disabled"; From 8d53ecfbf23135381d2ebbd1b261a7c7f18a7294 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 29 Jun 2020 10:17:44 +0200 Subject: [PATCH 012/648] arm64: dts: xilinx: Align IOMMU nodename with dtschema Fix dtschema validator warnings like: smmu@fd800000: $nodename:0: 'smmu@fd800000' does not match '^iommu@[0-9a-f]*' Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20200629081744.13916-1-krzk@kernel.org --- arch/arm64/boot/dts/xilinx/zynqmp.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi index a0aad47b0ffc..3ec99f13c259 100644 --- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi +++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi @@ -611,7 +611,7 @@ sdhci1: mmc@ff170000 { power-domains = <&zynqmp_firmware PD_SD_1>; }; - smmu: smmu@fd800000 { + smmu: iommu@fd800000 { compatible = "arm,mmu-500"; reg = <0x0 0xfd800000 0x0 0x20000>; status = "disabled"; From a58cfdba2039ff2d5758840e97a23a2dedecf1e8 Mon Sep 17 00:00:00 2001 From: Jing Xiangfeng Date: Fri, 24 Jul 2020 11:54:30 +0800 Subject: [PATCH 013/648] ARM: OMAP2+: Fix an IS_ERR() vs NULL check in _get_pwrdm() The of_clk_get() function returns error pointers, it never returns NULL. Fixes: 4ea3711aece4 ("ARM: OMAP2+: omap-iommu.c conversion to ti-sysc") Signed-off-by: Jing Xiangfeng Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/omap-iommu.c b/arch/arm/mach-omap2/omap-iommu.c index 54aff33e55e6..bfa5e1b8dba7 100644 --- a/arch/arm/mach-omap2/omap-iommu.c +++ b/arch/arm/mach-omap2/omap-iommu.c @@ -74,7 +74,7 @@ static struct powerdomain *_get_pwrdm(struct device *dev) return pwrdm; clk = of_clk_get(dev->of_node->parent, 0); - if (!clk) { + if (IS_ERR(clk)) { dev_err(dev, "no fck found\n"); return NULL; } From d7dfee67688ac7f2dfd4c3bc70c053ee990c40b5 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sat, 8 Aug 2020 21:56:10 -0500 Subject: [PATCH 014/648] ARM: dts: logicpd-torpedo-baseboard: Fix broken audio Older versions of U-Boot would pinmux the whole board, but as the bootloader got updated, it started to only pinmux the pins it needed, and expected Linux to configure what it needed. Unfortunately this caused an issue with the audio, because the mcbsp2 pins were configured in the device tree, they were never referenced by the driver. When U-Boot stopped muxing the audio pins, the audio died. This patch adds the references to the associate the pin controller with the mcbsp2 driver which makes audio operate again. Fixes: 739f85bba5ab ("ARM: dts: Move most of logicpd-torpedo-37xx-devkit to logicpd-torpedo-baseboard") Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi b/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi index 381f0e82bb70..b0f6613e6d54 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi +++ b/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi @@ -81,6 +81,8 @@ &vaux4 { }; &mcbsp2 { + pinctrl-names = "default"; + pinctrl-0 = <&mcbsp2_pins>; status = "okay"; }; From 4d26e9a028e3d88223e06fa133c3d55af7ddbceb Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Fri, 14 Aug 2020 07:53:38 -0500 Subject: [PATCH 015/648] ARM: dts: logicpd-som-lv-baseboard: Fix broken audio Older versions of U-Boot would pinmux the whole board, but as the bootloader got updated, it started to only pinmux the pins it needed, and expected Linux to configure what it needed. Unfortunately this caused an issue with the audio, because the mcbsp2 pins were configured in the device tree but never referenced by the driver. When U-Boot stopped muxing the audio pins, the audio died. This patch adds the references to the associate the pin controller with the mcbsp2 driver which makes audio operate again. Fixes: 5cb8b0fa55a9 ("ARM: dts: Move most of logicpd-som-lv-37xx-devkit.dts to logicpd-som-lv-baseboard.dtsi") Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/logicpd-som-lv-baseboard.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/logicpd-som-lv-baseboard.dtsi b/arch/arm/boot/dts/logicpd-som-lv-baseboard.dtsi index 100396f6c2fe..c310c33ca6f3 100644 --- a/arch/arm/boot/dts/logicpd-som-lv-baseboard.dtsi +++ b/arch/arm/boot/dts/logicpd-som-lv-baseboard.dtsi @@ -51,6 +51,8 @@ &vaux4 { &mcbsp2 { status = "okay"; + pinctrl-names = "default"; + pinctrl-0 = <&mcbsp2_pins>; }; &charger { From d1db7b80a6c8c5f81db0e80664d29b374750e2c6 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Fri, 14 Aug 2020 07:24:41 -0500 Subject: [PATCH 016/648] ARM: dts: logicpd-som-lv-baseboard: Fix missing video A previous commit removed the panel-dpi driver, which made the SOM-LV video stop working because it relied on the DPI driver for setting video timings. Now that the simple-panel driver is available in omap2plus, this patch migrates the SOM-LV dev kits to use a similar panel and remove the manual timing requirements. A similar patch was already done and applied to the Torpedo family. Fixes: 8bf4b1621178 ("drm/omap: Remove panel-dpi driver") Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- .../boot/dts/logicpd-som-lv-baseboard.dtsi | 27 ++++--------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/arch/arm/boot/dts/logicpd-som-lv-baseboard.dtsi b/arch/arm/boot/dts/logicpd-som-lv-baseboard.dtsi index c310c33ca6f3..395e05f10d36 100644 --- a/arch/arm/boot/dts/logicpd-som-lv-baseboard.dtsi +++ b/arch/arm/boot/dts/logicpd-som-lv-baseboard.dtsi @@ -104,35 +104,18 @@ video_reg: video_reg { regulator-max-microvolt = <3300000>; }; - lcd0: display@0 { - compatible = "panel-dpi"; - label = "28"; - status = "okay"; - /* default-on; */ + lcd0: display { + /* This isn't the exact LCD, but the timings meet spec */ + compatible = "logicpd,type28"; pinctrl-names = "default"; pinctrl-0 = <&lcd_enable_pin>; - enable-gpios = <&gpio5 27 GPIO_ACTIVE_HIGH>; /* gpio155, lcd INI */ + backlight = <&bl>; + enable-gpios = <&gpio5 27 GPIO_ACTIVE_HIGH>; port { lcd_in: endpoint { remote-endpoint = <&dpi_out>; }; }; - - panel-timing { - clock-frequency = <9000000>; - hactive = <480>; - vactive = <272>; - hfront-porch = <3>; - hback-porch = <2>; - hsync-len = <42>; - vback-porch = <3>; - vfront-porch = <2>; - vsync-len = <11>; - hsync-active = <1>; - vsync-active = <1>; - de-active = <1>; - pixelclk-active = <0>; - }; }; bl: backlight { From 73a32129f8ccb556704a26b422f54e048bf14bd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Wed, 12 Aug 2020 03:31:34 +0200 Subject: [PATCH 017/648] regulator: push allocation in regulator_init_coupling() outside of lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allocating memory with regulator_list_mutex held makes lockdep unhappy when memory pressure makes the system do fs_reclaim on eg. eMMC using a regulator. Push the lock inside regulator_init_coupling() after the allocation. ====================================================== WARNING: possible circular locking dependency detected 5.7.13+ #533 Not tainted ------------------------------------------------------ kswapd0/383 is trying to acquire lock: cca78ca4 (&sbi->write_io[i][j].io_rwsem){++++}-{3:3}, at: __submit_merged_write_cond+0x104/0x154 but task is already holding lock: c0e38518 (fs_reclaim){+.+.}-{0:0}, at: __fs_reclaim_acquire+0x0/0x50 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (fs_reclaim){+.+.}-{0:0}: fs_reclaim_acquire.part.11+0x40/0x50 fs_reclaim_acquire+0x24/0x28 __kmalloc+0x54/0x218 regulator_register+0x860/0x1584 dummy_regulator_probe+0x60/0xa8 [...] other info that might help us debug this: Chain exists of: &sbi->write_io[i][j].io_rwsem --> regulator_list_mutex --> fs_reclaim Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(fs_reclaim); lock(regulator_list_mutex); lock(fs_reclaim); lock(&sbi->write_io[i][j].io_rwsem); *** DEADLOCK *** 1 lock held by kswapd0/383: #0: c0e38518 (fs_reclaim){+.+.}-{0:0}, at: __fs_reclaim_acquire+0x0/0x50 [...] Fixes: d8ca7d184b33 ("regulator: core: Introduce API for regulators coupling customization") Signed-off-by: Michał Mirosław Reviewed-by: Dmitry Osipenko Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1a889cf7f61c6429c9e6b34ddcdde99be77a26b6.1597195321.git.mirq-linux@rere.qmqm.pl Signed-off-by: Mark Brown --- drivers/regulator/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 75ff7c563c5d..513f95c6f837 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5040,7 +5040,10 @@ static int regulator_init_coupling(struct regulator_dev *rdev) if (!of_check_coupling_data(rdev)) return -EPERM; + mutex_lock(®ulator_list_mutex); rdev->coupling_desc.coupler = regulator_find_coupler(rdev); + mutex_unlock(®ulator_list_mutex); + if (IS_ERR(rdev->coupling_desc.coupler)) { err = PTR_ERR(rdev->coupling_desc.coupler); rdev_err(rdev, "failed to get coupler: %d\n", err); @@ -5248,9 +5251,7 @@ regulator_register(const struct regulator_desc *regulator_desc, if (ret < 0) goto wash; - mutex_lock(®ulator_list_mutex); ret = regulator_init_coupling(rdev); - mutex_unlock(®ulator_list_mutex); if (ret < 0) goto wash; From 467bf30142c64f2eb64e2ac67fa4595126230efd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Wed, 12 Aug 2020 03:31:34 +0200 Subject: [PATCH 018/648] regulator: push allocation in regulator_ena_gpio_request() out of lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move another allocation out of regulator_list_mutex-protected region, as reclaim might want to take the same lock. WARNING: possible circular locking dependency detected 5.7.13+ #534 Not tainted ------------------------------------------------------ kswapd0/383 is trying to acquire lock: c0e5d920 (regulator_list_mutex){+.+.}-{3:3}, at: regulator_lock_dependent+0x54/0x2c0 but task is already holding lock: c0e38518 (fs_reclaim){+.+.}-{0:0}, at: __fs_reclaim_acquire+0x0/0x50 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (fs_reclaim){+.+.}-{0:0}: fs_reclaim_acquire.part.11+0x40/0x50 fs_reclaim_acquire+0x24/0x28 kmem_cache_alloc_trace+0x40/0x1e8 regulator_register+0x384/0x1630 devm_regulator_register+0x50/0x84 reg_fixed_voltage_probe+0x248/0x35c [...] other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(fs_reclaim); lock(regulator_list_mutex); lock(fs_reclaim); lock(regulator_list_mutex); *** DEADLOCK *** [...] 2 locks held by kswapd0/383: #0: c0e38518 (fs_reclaim){+.+.}-{0:0}, at: __fs_reclaim_acquire+0x0/0x50 #1: cb70e5e0 (hctx->srcu){....}-{0:0}, at: hctx_lock+0x60/0xb8 [...] Fixes: 541d052d7215 ("regulator: core: Only support passing enable GPIO descriptors") [this commit only changes context] Fixes: f8702f9e4aa7 ("regulator: core: Use ww_mutex for regulators locking") [this is when the regulator_list_mutex was introduced in reclaim locking path] Signed-off-by: Michał Mirosław Link: https://lore.kernel.org/r/41fe6a9670335721b48e8f5195038c3d67a3bf92.1597195321.git.mirq-linux@rere.qmqm.pl Signed-off-by: Mark Brown --- drivers/regulator/core.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 513f95c6f837..62fcf1ebbd04 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -2230,10 +2230,13 @@ EXPORT_SYMBOL_GPL(regulator_bulk_unregister_supply_alias); static int regulator_ena_gpio_request(struct regulator_dev *rdev, const struct regulator_config *config) { - struct regulator_enable_gpio *pin; + struct regulator_enable_gpio *pin, *new_pin; struct gpio_desc *gpiod; gpiod = config->ena_gpiod; + new_pin = kzalloc(sizeof(*new_pin), GFP_KERNEL); + + mutex_lock(®ulator_list_mutex); list_for_each_entry(pin, ®ulator_ena_gpio_list, list) { if (pin->gpiod == gpiod) { @@ -2242,9 +2245,13 @@ static int regulator_ena_gpio_request(struct regulator_dev *rdev, } } - pin = kzalloc(sizeof(struct regulator_enable_gpio), GFP_KERNEL); - if (pin == NULL) + if (new_pin == NULL) { + mutex_unlock(®ulator_list_mutex); return -ENOMEM; + } + + pin = new_pin; + new_pin = NULL; pin->gpiod = gpiod; list_add(&pin->list, ®ulator_ena_gpio_list); @@ -2252,6 +2259,10 @@ static int regulator_ena_gpio_request(struct regulator_dev *rdev, update_ena_gpio_to_rdev: pin->request_count++; rdev->ena_pin = pin; + + mutex_unlock(®ulator_list_mutex); + kfree(new_pin); + return 0; } @@ -5209,9 +5220,7 @@ regulator_register(const struct regulator_desc *regulator_desc, } if (config->ena_gpiod) { - mutex_lock(®ulator_list_mutex); ret = regulator_ena_gpio_request(rdev, config); - mutex_unlock(®ulator_list_mutex); if (ret != 0) { rdev_err(rdev, "Failed to request enable GPIO: %d\n", ret); From 87fe29b61f9522a3d7b60a4580851f548558186f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Wed, 12 Aug 2020 03:31:35 +0200 Subject: [PATCH 019/648] regulator: push allocations in create_regulator() outside of lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move all allocations outside of the regulator_lock()ed section. ====================================================== WARNING: possible circular locking dependency detected 5.7.13+ #535 Not tainted ------------------------------------------------------ f2fs_discard-179:7/702 is trying to acquire lock: c0e5d920 (regulator_list_mutex){+.+.}-{3:3}, at: regulator_lock_dependent+0x54/0x2c0 but task is already holding lock: cb95b080 (&dcc->cmd_lock){+.+.}-{3:3}, at: __issue_discard_cmd+0xec/0x5f8 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: [...] -> #3 (fs_reclaim){+.+.}-{0:0}: fs_reclaim_acquire.part.11+0x40/0x50 fs_reclaim_acquire+0x24/0x28 __kmalloc_track_caller+0x54/0x218 kstrdup+0x40/0x5c create_regulator+0xf4/0x368 regulator_resolve_supply+0x1a0/0x200 regulator_register+0x9c8/0x163c [...] other info that might help us debug this: Chain exists of: regulator_list_mutex --> &sit_i->sentry_lock --> &dcc->cmd_lock [...] Fixes: f8702f9e4aa7 ("regulator: core: Use ww_mutex for regulators locking") Signed-off-by: Michał Mirosław Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/6eebc99b2474f4ffaa0405b15178ece0e7e4f608.1597195321.git.mirq-linux@rere.qmqm.pl Signed-off-by: Mark Brown --- drivers/regulator/core.c | 53 +++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 62fcf1ebbd04..64a8c9444dcd 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1580,44 +1580,53 @@ static struct regulator *create_regulator(struct regulator_dev *rdev, const char *supply_name) { struct regulator *regulator; - char buf[REG_STR_SIZE]; - int err, size; + int err; + + if (dev) { + char buf[REG_STR_SIZE]; + int size; + + size = snprintf(buf, REG_STR_SIZE, "%s-%s", + dev->kobj.name, supply_name); + if (size >= REG_STR_SIZE) + return NULL; + + supply_name = kstrdup(buf, GFP_KERNEL); + if (supply_name == NULL) + return NULL; + } else { + supply_name = kstrdup_const(supply_name, GFP_KERNEL); + if (supply_name == NULL) + return NULL; + } regulator = kzalloc(sizeof(*regulator), GFP_KERNEL); - if (regulator == NULL) + if (regulator == NULL) { + kfree(supply_name); return NULL; + } + + regulator->rdev = rdev; + regulator->supply_name = supply_name; regulator_lock(rdev); - regulator->rdev = rdev; list_add(®ulator->list, &rdev->consumer_list); + regulator_unlock(rdev); if (dev) { regulator->dev = dev; /* Add a link to the device sysfs entry */ - size = snprintf(buf, REG_STR_SIZE, "%s-%s", - dev->kobj.name, supply_name); - if (size >= REG_STR_SIZE) - goto overflow_err; - - regulator->supply_name = kstrdup(buf, GFP_KERNEL); - if (regulator->supply_name == NULL) - goto overflow_err; - err = sysfs_create_link_nowarn(&rdev->dev.kobj, &dev->kobj, - buf); + supply_name); if (err) { rdev_dbg(rdev, "could not add device link %s err %d\n", dev->kobj.name, err); /* non-fatal */ } - } else { - regulator->supply_name = kstrdup_const(supply_name, GFP_KERNEL); - if (regulator->supply_name == NULL) - goto overflow_err; } - regulator->debugfs = debugfs_create_dir(regulator->supply_name, + regulator->debugfs = debugfs_create_dir(supply_name, rdev->debugfs); if (!regulator->debugfs) { rdev_dbg(rdev, "Failed to create debugfs directory\n"); @@ -1642,13 +1651,7 @@ static struct regulator *create_regulator(struct regulator_dev *rdev, _regulator_is_enabled(rdev)) regulator->always_on = true; - regulator_unlock(rdev); return regulator; -overflow_err: - list_del(®ulator->list); - kfree(regulator); - regulator_unlock(rdev); - return NULL; } static int _regulator_get_enable_time(struct regulator_dev *rdev) From 5c06540165d443c6455123eb48e7f1a9b618ab34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Wed, 12 Aug 2020 03:31:36 +0200 Subject: [PATCH 020/648] regulator: push allocation in set_consumer_device_supply() out of lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pull regulator_list_mutex into set_consumer_device_supply() and keep allocations outside of it. Fourth of the fs_reclaim deadlock case. Fixes: 45389c47526d ("regulator: core: Add early supply resolution for regulators") Signed-off-by: Michał Mirosław Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/f0380bdb3d60aeefa9693c4e234d2dcda7e56747.1597195321.git.mirq-linux@rere.qmqm.pl Signed-off-by: Mark Brown --- drivers/regulator/core.c | 46 +++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 64a8c9444dcd..9f4944dad3a1 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1461,7 +1461,7 @@ static int set_consumer_device_supply(struct regulator_dev *rdev, const char *consumer_dev_name, const char *supply) { - struct regulator_map *node; + struct regulator_map *node, *new_node; int has_dev; if (supply == NULL) @@ -1472,6 +1472,22 @@ static int set_consumer_device_supply(struct regulator_dev *rdev, else has_dev = 0; + new_node = kzalloc(sizeof(struct regulator_map), GFP_KERNEL); + if (new_node == NULL) + return -ENOMEM; + + new_node->regulator = rdev; + new_node->supply = supply; + + if (has_dev) { + new_node->dev_name = kstrdup(consumer_dev_name, GFP_KERNEL); + if (new_node->dev_name == NULL) { + kfree(new_node); + return -ENOMEM; + } + } + + mutex_lock(®ulator_list_mutex); list_for_each_entry(node, ®ulator_map_list, list) { if (node->dev_name && consumer_dev_name) { if (strcmp(node->dev_name, consumer_dev_name) != 0) @@ -1489,26 +1505,19 @@ static int set_consumer_device_supply(struct regulator_dev *rdev, node->regulator->desc->name, supply, dev_name(&rdev->dev), rdev_get_name(rdev)); - return -EBUSY; + goto fail; } - node = kzalloc(sizeof(struct regulator_map), GFP_KERNEL); - if (node == NULL) - return -ENOMEM; + list_add(&new_node->list, ®ulator_map_list); + mutex_unlock(®ulator_list_mutex); - node->regulator = rdev; - node->supply = supply; - - if (has_dev) { - node->dev_name = kstrdup(consumer_dev_name, GFP_KERNEL); - if (node->dev_name == NULL) { - kfree(node); - return -ENOMEM; - } - } - - list_add(&node->list, ®ulator_map_list); return 0; + +fail: + mutex_unlock(®ulator_list_mutex); + kfree(new_node->dev_name); + kfree(new_node); + return -EBUSY; } static void unset_regulator_supplies(struct regulator_dev *rdev) @@ -5269,19 +5278,16 @@ regulator_register(const struct regulator_desc *regulator_desc, /* add consumers devices */ if (init_data) { - mutex_lock(®ulator_list_mutex); for (i = 0; i < init_data->num_consumer_supplies; i++) { ret = set_consumer_device_supply(rdev, init_data->consumer_supplies[i].dev_name, init_data->consumer_supplies[i].supply); if (ret < 0) { - mutex_unlock(®ulator_list_mutex); dev_err(dev, "Failed to set supply %s\n", init_data->consumer_supplies[i].supply); goto unset_supplies; } } - mutex_unlock(®ulator_list_mutex); } if (!rdev->desc->ops->get_voltage && From d3c731564e09b6c2ebefcd1344743a91a237d6dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Wed, 12 Aug 2020 03:31:36 +0200 Subject: [PATCH 021/648] regulator: plug of_node leak in regulator_register()'s error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By calling device_initialize() earlier and noting that kfree(NULL) is ok, we can save a bit of code in error handling and plug of_node leak. Fixed commit already did part of the work. Fixes: 9177514ce349 ("regulator: fix memory leak on error path of regulator_register()") Signed-off-by: Michał Mirosław Reviewed-by: Vladimir Zapolskiy Acked-by: Vladimir Zapolskiy Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/f5035b1b4d40745e66bacd571bbbb5e4644d21a1.1597195321.git.mirq-linux@rere.qmqm.pl Signed-off-by: Mark Brown --- drivers/regulator/core.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 9f4944dad3a1..1561f7555fc6 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5167,6 +5167,7 @@ regulator_register(const struct regulator_desc *regulator_desc, ret = -ENOMEM; goto rinse; } + device_initialize(&rdev->dev); /* * Duplicate the config so the driver could override it after @@ -5174,9 +5175,8 @@ regulator_register(const struct regulator_desc *regulator_desc, */ config = kmemdup(cfg, sizeof(*cfg), GFP_KERNEL); if (config == NULL) { - kfree(rdev); ret = -ENOMEM; - goto rinse; + goto clean; } init_data = regulator_of_get_init_data(dev, regulator_desc, config, @@ -5188,10 +5188,8 @@ regulator_register(const struct regulator_desc *regulator_desc, * from a gpio extender or something else. */ if (PTR_ERR(init_data) == -EPROBE_DEFER) { - kfree(config); - kfree(rdev); ret = -EPROBE_DEFER; - goto rinse; + goto clean; } /* @@ -5244,7 +5242,6 @@ regulator_register(const struct regulator_desc *regulator_desc, } /* register with sysfs */ - device_initialize(&rdev->dev); rdev->dev.class = ®ulator_class; rdev->dev.parent = dev; dev_set_name(&rdev->dev, "regulator.%lu", @@ -5322,13 +5319,11 @@ regulator_register(const struct regulator_desc *regulator_desc, mutex_lock(®ulator_list_mutex); regulator_ena_gpio_free(rdev); mutex_unlock(®ulator_list_mutex); - put_device(&rdev->dev); - rdev = NULL; clean: if (dangling_of_gpiod) gpiod_put(config->ena_gpiod); - kfree(rdev); kfree(config); + put_device(&rdev->dev); rinse: if (dangling_cfg_gpiod) gpiod_put(cfg->ena_gpiod); From 2dbf085594370abc1453356518719300d8a7acc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Wed, 12 Aug 2020 03:31:37 +0200 Subject: [PATCH 022/648] regulator: cleanup regulator_ena_gpio_free() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since only regulator_ena_gpio_request() allocates rdev->ena_pin, and it guarantees that same gpiod gets same pin structure, it is enough to compare just the pointers. Also we know there can be only one matching entry on the list. Rework the code take advantage of the facts. Signed-off-by: Michał Mirosław Link: https://lore.kernel.org/r/3ff002c7aa3bd774491af4291a9df23541fcf892.1597195321.git.mirq-linux@rere.qmqm.pl Signed-off-by: Mark Brown --- drivers/regulator/core.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 1561f7555fc6..6e2e74745d88 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -2287,19 +2287,19 @@ static void regulator_ena_gpio_free(struct regulator_dev *rdev) /* Free the GPIO only in case of no use */ list_for_each_entry_safe(pin, n, ®ulator_ena_gpio_list, list) { - if (pin->gpiod == rdev->ena_pin->gpiod) { - if (pin->request_count <= 1) { - pin->request_count = 0; - gpiod_put(pin->gpiod); - list_del(&pin->list); - kfree(pin); - rdev->ena_pin = NULL; - return; - } else { - pin->request_count--; - } - } + if (pin != rdev->ena_pin) + continue; + + if (--pin->request_count) + break; + + gpiod_put(pin->gpiod); + list_del(&pin->list); + kfree(pin); + break; } + + rdev->ena_pin = NULL; } /** From a577f3456c0a2fac3dee037c483753e6e68f3e49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Wed, 12 Aug 2020 03:31:38 +0200 Subject: [PATCH 023/648] regulator: remove superfluous lock in regulator_resolve_coupling() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code modifies rdev, but locks c_rdev instead. Remove the lock as this is held together by regulator_list_mutex taken in the caller. Fixes: f9503385b187 ("regulator: core: Mutually resolve regulators coupling") Signed-off-by: Michał Mirosław Reviewed-by: Dmitry Osipenko Link: https://lore.kernel.org/r/25eb81cefb37a646f3e44eaaf1d8ae8881cfde52.1597195321.git.mirq-linux@rere.qmqm.pl Signed-off-by: Mark Brown --- drivers/regulator/core.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 6e2e74745d88..6789d1efdf5d 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -4972,13 +4972,9 @@ static void regulator_resolve_coupling(struct regulator_dev *rdev) return; } - regulator_lock(c_rdev); - c_desc->coupled_rdevs[i] = c_rdev; c_desc->n_resolved++; - regulator_unlock(c_rdev); - regulator_resolve_coupling(c_rdev); } } From 0ff5a4812be4ebd4782bbb555d369636eea164f7 Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Fri, 31 Jul 2020 10:26:40 -0500 Subject: [PATCH 024/648] ARM: dts: socfpga: fix register entry for timer3 on Arria10 Fixes the register address for the timer3 entry on Arria10. Fixes: 475dc86d08de4 ("arm: dts: socfpga: Add a base DTSI for Altera's Arria10 SOC") Signed-off-by: Dinh Nguyen --- arch/arm/boot/dts/socfpga_arria10.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/socfpga_arria10.dtsi b/arch/arm/boot/dts/socfpga_arria10.dtsi index fc4abef143a0..0013ec3463c4 100644 --- a/arch/arm/boot/dts/socfpga_arria10.dtsi +++ b/arch/arm/boot/dts/socfpga_arria10.dtsi @@ -821,7 +821,7 @@ timer2: timer2@ffd00000 { timer3: timer3@ffd00100 { compatible = "snps,dw-apb-timer"; interrupts = <0 118 IRQ_TYPE_LEVEL_HIGH>; - reg = <0xffd01000 0x100>; + reg = <0xffd00100 0x100>; clocks = <&l4_sys_free_clk>; clock-names = "timer"; resets = <&rst L4SYSTIMER1_RESET>; From b15b4da88be52e7455acc59318f6c97b8740b329 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 4 Aug 2020 13:16:01 +0300 Subject: [PATCH 025/648] staging: greybus: audio: Uninitialized variable in gbaudio_remove_controls() The "err" variable is not meaningful so there is no need to print it. It's uninitialized on the first iteration through the loop. Fixes: 510e340efe0c ("staging: greybus: audio: Add helper APIs for dynamic audio modules") Signed-off-by: Dan Carpenter Reviewed-by: Alex Elder Reviewed-by: Vaibhav Agarwal Link: https://lore.kernel.org/r/20200804101601.GA392148@mwanda Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/audio_helper.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/greybus/audio_helper.c b/drivers/staging/greybus/audio_helper.c index 8b100a71f02e..237531ba60f3 100644 --- a/drivers/staging/greybus/audio_helper.c +++ b/drivers/staging/greybus/audio_helper.c @@ -173,8 +173,7 @@ static int gbaudio_remove_controls(struct snd_card *card, struct device *dev, id.index = control->index; kctl = snd_ctl_find_id(card, &id); if (!kctl) { - dev_err(dev, "%d: Failed to find %s\n", err, - control->name); + dev_err(dev, "Failed to find %s\n", control->name); continue; } err = snd_ctl_remove(card, kctl); From fea22e159d51c766ba70473f473a0ec914cc7e92 Mon Sep 17 00:00:00 2001 From: Rustam Kovhaev Date: Tue, 4 Aug 2020 07:56:14 -0700 Subject: [PATCH 026/648] staging: wlan-ng: fix out of bounds read in prism2sta_probe_usb() let's use usb_find_common_endpoints() to discover endpoints, it does all necessary checks for type and xfer direction remove memset() in hfa384x_create(), because we now assign endpoints in prism2sta_probe_usb() and because create_wlan() uses kzalloc() to allocate hfa384x struct before calling hfa384x_create() Fixes: faaff9765664 ("staging: wlan-ng: properly check endpoint types") Reported-and-tested-by: syzbot+22794221ab96b0bab53a@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?extid=22794221ab96b0bab53a Signed-off-by: Rustam Kovhaev Cc: stable Link: https://lore.kernel.org/r/20200804145614.104320-1-rkovhaev@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wlan-ng/hfa384x_usb.c | 5 ----- drivers/staging/wlan-ng/prism2usb.c | 19 ++++++------------- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/drivers/staging/wlan-ng/hfa384x_usb.c b/drivers/staging/wlan-ng/hfa384x_usb.c index fa1bf8b069fd..2720f7319a3d 100644 --- a/drivers/staging/wlan-ng/hfa384x_usb.c +++ b/drivers/staging/wlan-ng/hfa384x_usb.c @@ -524,13 +524,8 @@ static void hfa384x_usb_defer(struct work_struct *data) */ void hfa384x_create(struct hfa384x *hw, struct usb_device *usb) { - memset(hw, 0, sizeof(*hw)); hw->usb = usb; - /* set up the endpoints */ - hw->endp_in = usb_rcvbulkpipe(usb, 1); - hw->endp_out = usb_sndbulkpipe(usb, 2); - /* Set up the waitq */ init_waitqueue_head(&hw->cmdq); diff --git a/drivers/staging/wlan-ng/prism2usb.c b/drivers/staging/wlan-ng/prism2usb.c index 456603fd26c0..4b08dc1da4f9 100644 --- a/drivers/staging/wlan-ng/prism2usb.c +++ b/drivers/staging/wlan-ng/prism2usb.c @@ -61,23 +61,14 @@ static int prism2sta_probe_usb(struct usb_interface *interface, const struct usb_device_id *id) { struct usb_device *dev; - const struct usb_endpoint_descriptor *epd; - const struct usb_host_interface *iface_desc = interface->cur_altsetting; + struct usb_endpoint_descriptor *bulk_in, *bulk_out; + struct usb_host_interface *iface_desc = interface->cur_altsetting; struct wlandevice *wlandev = NULL; struct hfa384x *hw = NULL; int result = 0; - if (iface_desc->desc.bNumEndpoints != 2) { - result = -ENODEV; - goto failed; - } - - result = -EINVAL; - epd = &iface_desc->endpoint[1].desc; - if (!usb_endpoint_is_bulk_in(epd)) - goto failed; - epd = &iface_desc->endpoint[2].desc; - if (!usb_endpoint_is_bulk_out(epd)) + result = usb_find_common_endpoints(iface_desc, &bulk_in, &bulk_out, NULL, NULL); + if (result) goto failed; dev = interface_to_usbdev(interface); @@ -96,6 +87,8 @@ static int prism2sta_probe_usb(struct usb_interface *interface, } /* Initialize the hw data */ + hw->endp_in = usb_rcvbulkpipe(dev, bulk_in->bEndpointAddress); + hw->endp_out = usb_sndbulkpipe(dev, bulk_out->bEndpointAddress); hfa384x_create(hw, dev); hw->wlandev = wlandev; From 1dffeb8b8b4c261c45416d53c75ea51e6ece1770 Mon Sep 17 00:00:00 2001 From: Vaibhav Agarwal Date: Fri, 14 Aug 2020 18:03:15 +0530 Subject: [PATCH 027/648] staging: greybus: audio: fix uninitialized value issue The current implementation for gbcodec_mixer_dapm_ctl_put() uses uninitialized gbvalue for comparison with updated value. This was found using static analysis with coverity. Uninitialized scalar variable (UNINIT) 11. uninit_use: Using uninitialized value gbvalue.value.integer_value[0]. 460 if (gbvalue.value.integer_value[0] != val) { This patch fixes the issue with fetching the gbvalue before using it for comparision. Fixes: 6339d2322c47 ("greybus: audio: Add topology parser for GB codec") Reported-by: Colin Ian King Signed-off-by: Vaibhav Agarwal Cc: stable Link: https://lore.kernel.org/r/bc4f29eb502ccf93cd2ffd98db0e319fa7d0f247.1597408126.git.vaibhav.sr@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/audio_topology.c | 29 ++++++++++++------------ 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/staging/greybus/audio_topology.c b/drivers/staging/greybus/audio_topology.c index 2f9fdbdcd547..83b38ae8908c 100644 --- a/drivers/staging/greybus/audio_topology.c +++ b/drivers/staging/greybus/audio_topology.c @@ -456,6 +456,15 @@ static int gbcodec_mixer_dapm_ctl_put(struct snd_kcontrol *kcontrol, val = ucontrol->value.integer.value[0] & mask; connect = !!val; + ret = gb_pm_runtime_get_sync(bundle); + if (ret) + return ret; + + ret = gb_audio_gb_get_control(module->mgmt_connection, data->ctl_id, + GB_AUDIO_INVALID_INDEX, &gbvalue); + if (ret) + goto exit; + /* update ucontrol */ if (gbvalue.value.integer_value[0] != val) { for (wi = 0; wi < wlist->num_widgets; wi++) { @@ -466,25 +475,17 @@ static int gbcodec_mixer_dapm_ctl_put(struct snd_kcontrol *kcontrol, gbvalue.value.integer_value[0] = cpu_to_le32(ucontrol->value.integer.value[0]); - ret = gb_pm_runtime_get_sync(bundle); - if (ret) - return ret; - ret = gb_audio_gb_set_control(module->mgmt_connection, data->ctl_id, GB_AUDIO_INVALID_INDEX, &gbvalue); - - gb_pm_runtime_put_autosuspend(bundle); - - if (ret) { - dev_err_ratelimited(codec_dev, - "%d:Error in %s for %s\n", ret, - __func__, kcontrol->id.name); - return ret; - } } - return 0; +exit: + gb_pm_runtime_put_autosuspend(bundle); + if (ret) + dev_err_ratelimited(codec_dev, "%d:Error in %s for %s\n", ret, + __func__, kcontrol->id.name); + return ret; } #define SOC_DAPM_MIXER_GB(xname, kcount, data) \ From 9a2a0862d973c9eab6a1f3efa0d4d18dcedbaf1a Mon Sep 17 00:00:00 2001 From: Wright Feng Date: Thu, 13 Aug 2020 02:00:16 -0500 Subject: [PATCH 028/648] brcmfmac: reserve tx credit only when txctl is ready to send The 4329 throughput drops from 40.2 Mbits/sec to 190 Kbits/sec in 2G 11n mode because the commit b41c232d33666 ("brcmfmac: reserve 2 credits for host tx control path"). To fix the issue, host driver only reserves tx control credit when there is a txctl frame is pending to send. And we also check available credit by using "not equal to 0" instead of "greater than 0" because tx_max and tx_seq are circled positive numbers. Reported-by: Dmitry Osipenko Fixes: b41c232d33666 ("brcmfmac: reserve 2 credits for host tx control path") Signed-off-by: Wright Feng Tested-by: Dmitry Osipenko Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200813070017.89023-1-wright.feng@cypress.com --- .../net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index e8712ad3ac45..3c07d1bbe1c6 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -664,9 +664,15 @@ static void pkt_align(struct sk_buff *p, int len, int align) /* To check if there's window offered */ static bool data_ok(struct brcmf_sdio *bus) { - /* Reserve TXCTL_CREDITS credits for txctl */ - return (bus->tx_max - bus->tx_seq) > TXCTL_CREDITS && - ((bus->tx_max - bus->tx_seq) & 0x80) == 0; + u8 tx_rsv = 0; + + /* Reserve TXCTL_CREDITS credits for txctl when it is ready to send */ + if (bus->ctrl_frame_stat) + tx_rsv = TXCTL_CREDITS; + + return (bus->tx_max - bus->tx_seq - tx_rsv) != 0 && + ((bus->tx_max - bus->tx_seq - tx_rsv) & 0x80) == 0; + } /* To check if there's window offered */ From 3dabfa2bda48dab717986609762ce2a49335eb99 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 9 Aug 2020 16:49:59 +0200 Subject: [PATCH 029/648] clk: davinci: Use the correct size when allocating memory 'sizeof(*pllen)' should be used in place of 'sizeof(*pllout)' to avoid a small over-allocation. Fixes: 2d1726915159 ("clk: davinci: New driver for davinci PLL clocks") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20200809144959.747986-1-christophe.jaillet@wanadoo.fr Reviewed-by: David Lechner Signed-off-by: Stephen Boyd --- drivers/clk/davinci/pll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/davinci/pll.c b/drivers/clk/davinci/pll.c index 6c35e4bb7940..0d750433eb42 100644 --- a/drivers/clk/davinci/pll.c +++ b/drivers/clk/davinci/pll.c @@ -491,7 +491,7 @@ struct clk *davinci_pll_clk_register(struct device *dev, parent_name = postdiv_name; } - pllen = kzalloc(sizeof(*pllout), GFP_KERNEL); + pllen = kzalloc(sizeof(*pllen), GFP_KERNEL); if (!pllen) { ret = -ENOMEM; goto err_unregister_postdiv; From e9c006bc782c488f485ffe50de20b44e1e3daa18 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sun, 9 Aug 2020 21:40:20 -0700 Subject: [PATCH 030/648] clk: rockchip: Fix initialization of mux_pll_src_4plls_p MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A new warning in Clang points out that the initialization of mux_pll_src_4plls_p appears incorrect: ../drivers/clk/rockchip/clk-rk3228.c:140:58: warning: suspicious concatenation of string literals in an array initialization; did you mean to separate the elements with a comma? [-Wstring-concatenation] PNAME(mux_pll_src_4plls_p) = { "cpll", "gpll", "hdmiphy" "usb480m" }; ^ , ../drivers/clk/rockchip/clk-rk3228.c:140:48: note: place parentheses around the string literal to silence warning PNAME(mux_pll_src_4plls_p) = { "cpll", "gpll", "hdmiphy" "usb480m" }; ^ 1 warning generated. Given the name of the variable and the same variable name in rv1108, it seems that this should have been four distinct elements. Fix it up by adding the comma as suggested. Fixes: 307a2e9ac524 ("clk: rockchip: add clock controller for rk3228") Link: https://github.com/ClangBuiltLinux/linux/issues/1123 Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20200810044020.2063350-1-natechancellor@gmail.com Reviewed-by: Heiko Stübner Signed-off-by: Stephen Boyd --- drivers/clk/rockchip/clk-rk3228.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/rockchip/clk-rk3228.c b/drivers/clk/rockchip/clk-rk3228.c index d7243c09cc84..47d6482dda9d 100644 --- a/drivers/clk/rockchip/clk-rk3228.c +++ b/drivers/clk/rockchip/clk-rk3228.c @@ -137,7 +137,7 @@ PNAME(mux_usb480m_p) = { "usb480m_phy", "xin24m" }; PNAME(mux_hdmiphy_p) = { "hdmiphy_phy", "xin24m" }; PNAME(mux_aclk_cpu_src_p) = { "cpll_aclk_cpu", "gpll_aclk_cpu", "hdmiphy_aclk_cpu" }; -PNAME(mux_pll_src_4plls_p) = { "cpll", "gpll", "hdmiphy" "usb480m" }; +PNAME(mux_pll_src_4plls_p) = { "cpll", "gpll", "hdmiphy", "usb480m" }; PNAME(mux_pll_src_3plls_p) = { "cpll", "gpll", "hdmiphy" }; PNAME(mux_pll_src_2plls_p) = { "cpll", "gpll" }; PNAME(mux_sclk_hdmi_cec_p) = { "cpll", "gpll", "xin24m" }; From 6542e2b613c2b1952e83973dc434831332ce8e27 Mon Sep 17 00:00:00 2001 From: David Shah Date: Tue, 18 Aug 2020 10:51:00 +0100 Subject: [PATCH 031/648] ARM: dts: omap5: Fix DSI base address and clocks DSI was not probing due to base address off by 0x1000, and sys_clk missing. With this patch, the Pyra display works if HDMI is disabled in the device tree. Fixes: 5a507162f096 ("ARM: dts: Configure interconnect target module for omap5 dsi1") Signed-off-by: David Shah Tested-by: H. Nikolaus Schaller [tony@atomide.com: standardized subject line, added fixes tag] Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5.dtsi | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index 5da9cff7a53c..a82c96258a93 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -488,11 +488,11 @@ rfbi: encoder@0 { }; }; - target-module@5000 { + target-module@4000 { compatible = "ti,sysc-omap2", "ti,sysc"; - reg = <0x5000 0x4>, - <0x5010 0x4>, - <0x5014 0x4>; + reg = <0x4000 0x4>, + <0x4010 0x4>, + <0x4014 0x4>; reg-names = "rev", "sysc", "syss"; ti,sysc-sidle = , , @@ -504,7 +504,7 @@ SYSC_OMAP2_SOFTRESET | ti,syss-mask = <1>; #address-cells = <1>; #size-cells = <1>; - ranges = <0 0x5000 0x1000>; + ranges = <0 0x4000 0x1000>; dsi1: encoder@0 { compatible = "ti,omap5-dsi"; @@ -514,8 +514,9 @@ dsi1: encoder@0 { reg-names = "proto", "phy", "pll"; interrupts = ; status = "disabled"; - clocks = <&dss_clkctrl OMAP5_DSS_CORE_CLKCTRL 8>; - clock-names = "fck"; + clocks = <&dss_clkctrl OMAP5_DSS_CORE_CLKCTRL 8>, + <&dss_clkctrl OMAP5_DSS_CORE_CLKCTRL 10>; + clock-names = "fck", "sys_clk"; }; }; @@ -545,8 +546,9 @@ dsi2: encoder@0 { reg-names = "proto", "phy", "pll"; interrupts = ; status = "disabled"; - clocks = <&dss_clkctrl OMAP5_DSS_CORE_CLKCTRL 8>; - clock-names = "fck"; + clocks = <&dss_clkctrl OMAP5_DSS_CORE_CLKCTRL 8>, + <&dss_clkctrl OMAP5_DSS_CORE_CLKCTRL 10>; + clock-names = "fck", "sys_clk"; }; }; From f8d0168e035f6359275efcfd1ad014c80e5adaa9 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 18 Aug 2020 06:09:33 +0800 Subject: [PATCH 032/648] soundwire: bus: fix typo in comment on INTSTAT registers s/Instat/Intstat/ Signed-off-by: Pierre-Louis Bossart Reviewed-by: Guennadi Liakhovetski Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20200817220933.17492-1-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c index e6e0fb9a81b4..da0201693c24 100644 --- a/drivers/soundwire/bus.c +++ b/drivers/soundwire/bus.c @@ -1372,7 +1372,7 @@ static int sdw_handle_slave_alerts(struct sdw_slave *slave) return ret; } - /* Read Instat 1, Instat 2 and Instat 3 registers */ + /* Read Intstat 1, Intstat 2 and Intstat 3 registers */ ret = sdw_read(slave, SDW_SCP_INT1); if (ret < 0) { dev_err(slave->bus->dev, From 5610921a4435ef45c33702073e64f835f3dca7f1 Mon Sep 17 00:00:00 2001 From: Mateusz Gorski Date: Wed, 22 Jul 2020 19:35:24 +0200 Subject: [PATCH 033/648] ASoC: Intel: skl_hda_dsp_generic: Fix NULLptr dereference in autosuspend delay Different modules for HDMI codec are used depending on the "hda_codec_use_common_hdmi" option being enabled or not. Driver private context for both of them is different. This leads to null-pointer dereference error when driver tries to set autosuspend delay for HDMI codec while the option is off (hdac_hdmi module is used for HDMI). Change the string in conditional statement to "ehdaudio0D0" to ensure that only the HDAudio codec is handled by this function. Fixes: 5bf73b1b1dec ("ASoC: intel/skl/hda - fix oops on systems without i915 audio codec") Signed-off-by: Mateusz Gorski Reviewed-by: Kai Vehmanen Link: https://lore.kernel.org/r/20200722173524.30161-1-mateusz.gorski@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/skl_hda_dsp_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/boards/skl_hda_dsp_generic.c b/sound/soc/intel/boards/skl_hda_dsp_generic.c index ca4900036ead..bc50eda297ab 100644 --- a/sound/soc/intel/boards/skl_hda_dsp_generic.c +++ b/sound/soc/intel/boards/skl_hda_dsp_generic.c @@ -181,7 +181,7 @@ static void skl_set_hda_codec_autosuspend_delay(struct snd_soc_card *card) struct snd_soc_dai *dai; for_each_card_rtds(card, rtd) { - if (!strstr(rtd->dai_link->codecs->name, "ehdaudio")) + if (!strstr(rtd->dai_link->codecs->name, "ehdaudio0D0")) continue; dai = asoc_rtd_to_codec(rtd, 0); hda_pvt = snd_soc_component_get_drvdata(dai->component); From bc62d68e2a0a69fcdcf28aca8edb01abf306b698 Mon Sep 17 00:00:00 2001 From: Amol Grover Date: Mon, 6 Apr 2020 16:29:50 +0530 Subject: [PATCH 034/648] device_cgroup: Fix RCU list debugging warning exceptions may be traversed using list_for_each_entry_rcu() outside of an RCU read side critical section BUT under the protection of decgroup_mutex. Hence add the corresponding lockdep expression to fix the following false-positive warning: [ 2.304417] ============================= [ 2.304418] WARNING: suspicious RCU usage [ 2.304420] 5.5.4-stable #17 Tainted: G E [ 2.304422] ----------------------------- [ 2.304424] security/device_cgroup.c:355 RCU-list traversed in non-reader section!! Signed-off-by: Amol Grover Signed-off-by: James Morris --- security/device_cgroup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 43ab0ad45c1b..04375df52fc9 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -354,7 +354,8 @@ static bool match_exception_partial(struct list_head *exceptions, short type, { struct dev_exception_item *ex; - list_for_each_entry_rcu(ex, exceptions, list) { + list_for_each_entry_rcu(ex, exceptions, list, + lockdep_is_held(&devcgroup_mutex)) { if ((type & DEVCG_DEV_BLOCK) && !(ex->type & DEVCG_DEV_BLOCK)) continue; if ((type & DEVCG_DEV_CHAR) && !(ex->type & DEVCG_DEV_CHAR)) From 5c1e4f7e9e49b6925b1fb5c507d2c614f3edb292 Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Wed, 18 Mar 2020 10:42:15 +0800 Subject: [PATCH 035/648] selftests/timers: Turn off timeout setting The following 4 tests in timers can take longer than the default 45 seconds that added in commit 852c8cbf34d3 ("selftests/kselftest/runner.sh: Add 45 second timeout per test") to run: * nsleep-lat - 2m7.350s * set-timer-lat - 2m0.66s * inconsistency-check - 1m45.074s * raw_skew - 2m0.013s Thus they will be marked as failed with the current 45s setting: not ok 3 selftests: timers: nsleep-lat # TIMEOUT not ok 4 selftests: timers: set-timer-lat # TIMEOUT not ok 6 selftests: timers: inconsistency-check # TIMEOUT not ok 7 selftests: timers: raw_skew # TIMEOUT Disable the timeout setting for timers can make these tests finish properly: ok 3 selftests: timers: nsleep-lat ok 4 selftests: timers: set-timer-lat ok 6 selftests: timers: inconsistency-check ok 7 selftests: timers: raw_skew https://bugs.launchpad.net/bugs/1864626 Fixes: 852c8cbf34d3 ("selftests/kselftest/runner.sh: Add 45 second timeout per test") Signed-off-by: Po-Hsu Lin Acked-by: John Stultz Signed-off-by: Shuah Khan --- tools/testing/selftests/timers/Makefile | 1 + tools/testing/selftests/timers/settings | 1 + 2 files changed, 2 insertions(+) create mode 100644 tools/testing/selftests/timers/settings diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile index 7656c7ce79d9..0e73a16874c4 100644 --- a/tools/testing/selftests/timers/Makefile +++ b/tools/testing/selftests/timers/Makefile @@ -13,6 +13,7 @@ DESTRUCTIVE_TESTS = alarmtimer-suspend valid-adjtimex adjtick change_skew \ TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS) +TEST_FILES := settings include ../lib.mk diff --git a/tools/testing/selftests/timers/settings b/tools/testing/selftests/timers/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/timers/settings @@ -0,0 +1 @@ +timeout=0 From bf53f463099016dddd12acfed6a025bb780cd504 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 20 Aug 2020 10:57:52 +0200 Subject: [PATCH 036/648] ARM: configs: Update Integrator defconfig Update the Integrator defconfig to the changes from the recent kernel cycles and especially the v5.9-rc1 changes. - Move options around as a result of Kconfig changes. - Drop the ZBOOT_ROM* stuff that doesn't exist anymore. - Select GPIO input and evdev as used by the pushbuttons on the machine. - Select CONFIG_DRM_DISPLAY_CONNECTOR=y, the CONFIG_DRM_SIMPLE_BRIDGE was renamed when renaming the driver, but the simple bridge requires another bridge, the display connector bridge, in order to work, so this is required to have graphics working properly in kernel v5.9-rc1+ Link: https://lore.kernel.org/r/20200820085752.42994-1-linus.walleij@linaro.org Signed-off-by: Linus Walleij Signed-off-by: Olof Johansson --- arch/arm/configs/integrator_defconfig | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm/configs/integrator_defconfig b/arch/arm/configs/integrator_defconfig index a9755c501bec..b06e537d5149 100644 --- a/arch/arm/configs/integrator_defconfig +++ b/arch/arm/configs/integrator_defconfig @@ -1,13 +1,11 @@ CONFIG_SYSVIPC=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_PREEMPT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_PARTITION_ADVANCED=y CONFIG_ARCH_MULTI_V4T=y CONFIG_ARCH_MULTI_V5=y # CONFIG_ARCH_MULTI_V7 is not set @@ -15,19 +13,17 @@ CONFIG_ARCH_INTEGRATOR=y CONFIG_ARCH_INTEGRATOR_AP=y CONFIG_INTEGRATOR_IMPD1=y CONFIG_ARCH_INTEGRATOR_CP=y -CONFIG_PCI=y -CONFIG_PREEMPT=y CONFIG_AEABI=y # CONFIG_ATAGS is not set -CONFIG_ZBOOT_ROM_TEXT=0x0 -CONFIG_ZBOOT_ROM_BSS=0x0 CONFIG_CMDLINE="console=ttyAM0,38400n8 root=/dev/nfs ip=bootp" CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_GOV_POWERSAVE=y CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y CONFIG_CPUFREQ_DT=y -CONFIG_CMA=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_PARTITION_ADVANCED=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -37,6 +33,7 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y # CONFIG_IPV6 is not set +CONFIG_PCI=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_AFS_PARTS=y @@ -52,9 +49,12 @@ CONFIG_BLK_DEV_RAM_SIZE=8192 CONFIG_NETDEVICES=y CONFIG_E100=y CONFIG_SMC91X=y +CONFIG_INPUT_EVDEV=y # CONFIG_KEYBOARD_ATKBD is not set +CONFIG_KEYBOARD_GPIO=y # CONFIG_SERIO_SERPORT is not set CONFIG_DRM=y +CONFIG_DRM_DISPLAY_CONNECTOR=y CONFIG_DRM_SIMPLE_BRIDGE=y CONFIG_DRM_PL111=y CONFIG_FB_MODE_HELPERS=y From 3b574ea2f89321d0a428286be0057340f0313323 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 17 Aug 2020 08:56:53 +0200 Subject: [PATCH 037/648] arm64: defconfig: Enable USB gadget with configfs USB OTG connections are pretty common for embedded and development boards, for example to have networking or serial access to the device. Build as a module the USB gadget configfs options so the defconfig can be used in such development configurations. Link: https://lore.kernel.org/r/20200817065654.4419-1-krzk@kernel.org Signed-off-by: Krzysztof Kozlowski Signed-off-by: Olof Johansson --- arch/arm64/configs/defconfig | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index e0f33826819f..0b0b30efc0f5 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -724,6 +724,17 @@ CONFIG_USB_GADGET=y CONFIG_USB_RENESAS_USBHS_UDC=m CONFIG_USB_RENESAS_USB3=m CONFIG_USB_TEGRA_XUDC=m +CONFIG_USB_CONFIGFS=m +CONFIG_USB_CONFIGFS_SERIAL=y +CONFIG_USB_CONFIGFS_ACM=y +CONFIG_USB_CONFIGFS_OBEX=y +CONFIG_USB_CONFIGFS_NCM=y +CONFIG_USB_CONFIGFS_ECM=y +CONFIG_USB_CONFIGFS_ECM_SUBSET=y +CONFIG_USB_CONFIGFS_RNDIS=y +CONFIG_USB_CONFIGFS_EEM=y +CONFIG_USB_CONFIGFS_MASS_STORAGE=y +CONFIG_USB_CONFIGFS_F_FS=y CONFIG_TYPEC=m CONFIG_TYPEC_TCPM=m CONFIG_TYPEC_FUSB302=m From 1ebda87ad906b732af5660c3ee2e896a13292545 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 17 Aug 2020 08:56:54 +0200 Subject: [PATCH 038/648] arm64: defconfig: Enable ptn5150 extcon driver Enable the extcon driver for NXP PTN5150A chips (CC logic for USB Type-C applications) present on some of i.MX8 evaluation boards (e.g. Variscite Symphony board). Link: https://lore.kernel.org/r/20200817065654.4419-2-krzk@kernel.org Signed-off-by: Krzysztof Kozlowski Signed-off-by: Olof Johansson --- arch/arm64/configs/defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 0b0b30efc0f5..6d04b9577b0b 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -925,6 +925,7 @@ CONFIG_ARCH_TEGRA_194_SOC=y CONFIG_ARCH_K3_AM6_SOC=y CONFIG_ARCH_K3_J721E_SOC=y CONFIG_TI_SCI_PM_DOMAINS=y +CONFIG_EXTCON_PTN5150=m CONFIG_EXTCON_USB_GPIO=y CONFIG_EXTCON_USBC_CROS_EC=y CONFIG_IIO=y From 38480df564cc68f081bb38998927d164b9010995 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Aug 2020 19:05:59 +0200 Subject: [PATCH 039/648] KVM: arm64: pvtime: steal-time is only supported when configured Don't confuse the guest by saying steal-time is supported when it hasn't been configured by userspace and won't work. Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200804170604.42662-2-drjones@redhat.com --- arch/arm64/kvm/pvtime.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/pvtime.c b/arch/arm64/kvm/pvtime.c index f7b52ce1557e..c3ef4ebd6846 100644 --- a/arch/arm64/kvm/pvtime.c +++ b/arch/arm64/kvm/pvtime.c @@ -43,7 +43,8 @@ long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu) switch (feature) { case ARM_SMCCC_HV_PV_TIME_FEATURES: case ARM_SMCCC_HV_PV_TIME_ST: - val = SMCCC_RET_SUCCESS; + if (vcpu->arch.steal.base != GPA_INVALID) + val = SMCCC_RET_SUCCESS; break; } From 2dbd780e34ac53e79c6c359ce12b89ed665ef562 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Aug 2020 19:06:00 +0200 Subject: [PATCH 040/648] KVM: arm64: pvtime: Fix potential loss of stolen time We should only check current->sched_info.run_delay once when updating stolen time. Otherwise there's a chance there could be a change between checks that we miss (preemption disabling comes after vcpu request checks). Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200804170604.42662-3-drjones@redhat.com --- arch/arm64/kvm/pvtime.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/pvtime.c b/arch/arm64/kvm/pvtime.c index c3ef4ebd6846..95f9580275b1 100644 --- a/arch/arm64/kvm/pvtime.c +++ b/arch/arm64/kvm/pvtime.c @@ -13,6 +13,7 @@ void kvm_update_stolen_time(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; + u64 last_steal = vcpu->arch.steal.last_steal; u64 steal; __le64 steal_le; u64 offset; @@ -24,8 +25,8 @@ void kvm_update_stolen_time(struct kvm_vcpu *vcpu) /* Let's do the local bookkeeping */ steal = vcpu->arch.steal.steal; - steal += current->sched_info.run_delay - vcpu->arch.steal.last_steal; - vcpu->arch.steal.last_steal = current->sched_info.run_delay; + vcpu->arch.steal.last_steal = READ_ONCE(current->sched_info.run_delay); + steal += vcpu->arch.steal.last_steal - last_steal; vcpu->arch.steal.steal = steal; steal_le = cpu_to_le64(steal); From 4d2d4ce001f283ed8127173543b4cfb65641e357 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Aug 2020 19:06:01 +0200 Subject: [PATCH 041/648] KVM: arm64: Drop type input from kvm_put_guest We can use typeof() to avoid the need for the type input. Suggested-by: Marc Zyngier Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200804170604.42662-4-drjones@redhat.com --- arch/arm64/kvm/pvtime.c | 2 +- include/linux/kvm_host.h | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kvm/pvtime.c b/arch/arm64/kvm/pvtime.c index 95f9580275b1..241ded7ee0ad 100644 --- a/arch/arm64/kvm/pvtime.c +++ b/arch/arm64/kvm/pvtime.c @@ -32,7 +32,7 @@ void kvm_update_stolen_time(struct kvm_vcpu *vcpu) steal_le = cpu_to_le64(steal); idx = srcu_read_lock(&kvm->srcu); offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time); - kvm_put_guest(kvm, base + offset, steal_le, u64); + kvm_put_guest(kvm, base + offset, steal_le); srcu_read_unlock(&kvm->srcu, idx); } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a23076765b4c..84371fb06209 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -749,25 +749,26 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, gpa_t gpa, unsigned long len); -#define __kvm_put_guest(kvm, gfn, offset, value, type) \ +#define __kvm_put_guest(kvm, gfn, offset, v) \ ({ \ unsigned long __addr = gfn_to_hva(kvm, gfn); \ - type __user *__uaddr = (type __user *)(__addr + offset); \ + typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset); \ int __ret = -EFAULT; \ \ if (!kvm_is_error_hva(__addr)) \ - __ret = put_user(value, __uaddr); \ + __ret = put_user(v, __uaddr); \ if (!__ret) \ mark_page_dirty(kvm, gfn); \ __ret; \ }) -#define kvm_put_guest(kvm, gpa, value, type) \ +#define kvm_put_guest(kvm, gpa, v) \ ({ \ gpa_t __gpa = gpa; \ struct kvm *__kvm = kvm; \ + \ __kvm_put_guest(__kvm, __gpa >> PAGE_SHIFT, \ - offset_in_page(__gpa), (value), type); \ + offset_in_page(__gpa), v); \ }) int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); From 53f985584e3c2ebe5f2455530fbf87a001528db8 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Aug 2020 19:06:02 +0200 Subject: [PATCH 042/648] KVM: arm64: pvtime: Fix stolen time accounting across migration When updating the stolen time we should always read the current stolen time from the user provided memory, not from a kernel cache. If we use a cache then we'll end up resetting stolen time to zero on the first update after migration. Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200804170604.42662-5-drjones@redhat.com --- arch/arm64/include/asm/kvm_host.h | 1 - arch/arm64/kvm/pvtime.c | 25 ++++++++++--------------- include/linux/kvm_host.h | 20 ++++++++++++++++++++ 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 65568b23868a..dd9c3b25aa1e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -368,7 +368,6 @@ struct kvm_vcpu_arch { /* Guest PV state */ struct { - u64 steal; u64 last_steal; gpa_t base; } steal; diff --git a/arch/arm64/kvm/pvtime.c b/arch/arm64/kvm/pvtime.c index 241ded7ee0ad..75234321d896 100644 --- a/arch/arm64/kvm/pvtime.c +++ b/arch/arm64/kvm/pvtime.c @@ -13,26 +13,22 @@ void kvm_update_stolen_time(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; - u64 last_steal = vcpu->arch.steal.last_steal; - u64 steal; - __le64 steal_le; - u64 offset; - int idx; u64 base = vcpu->arch.steal.base; + u64 last_steal = vcpu->arch.steal.last_steal; + u64 offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time); + u64 steal = 0; + int idx; if (base == GPA_INVALID) return; - /* Let's do the local bookkeeping */ - steal = vcpu->arch.steal.steal; - vcpu->arch.steal.last_steal = READ_ONCE(current->sched_info.run_delay); - steal += vcpu->arch.steal.last_steal - last_steal; - vcpu->arch.steal.steal = steal; - - steal_le = cpu_to_le64(steal); idx = srcu_read_lock(&kvm->srcu); - offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time); - kvm_put_guest(kvm, base + offset, steal_le); + if (!kvm_get_guest(kvm, base + offset, steal)) { + steal = le64_to_cpu(steal); + vcpu->arch.steal.last_steal = READ_ONCE(current->sched_info.run_delay); + steal += vcpu->arch.steal.last_steal - last_steal; + kvm_put_guest(kvm, base + offset, cpu_to_le64(steal)); + } srcu_read_unlock(&kvm->srcu, idx); } @@ -66,7 +62,6 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu) * Start counting stolen time from the time the guest requests * the feature enabled. */ - vcpu->arch.steal.steal = 0; vcpu->arch.steal.last_steal = current->sched_info.run_delay; idx = srcu_read_lock(&kvm->srcu); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 84371fb06209..05e3c2fb3ef7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -749,6 +749,26 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, gpa_t gpa, unsigned long len); +#define __kvm_get_guest(kvm, gfn, offset, v) \ +({ \ + unsigned long __addr = gfn_to_hva(kvm, gfn); \ + typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset); \ + int __ret = -EFAULT; \ + \ + if (!kvm_is_error_hva(__addr)) \ + __ret = get_user(v, __uaddr); \ + __ret; \ +}) + +#define kvm_get_guest(kvm, gpa, v) \ +({ \ + gpa_t __gpa = gpa; \ + struct kvm *__kvm = kvm; \ + \ + __kvm_get_guest(__kvm, __gpa >> PAGE_SHIFT, \ + offset_in_page(__gpa), v); \ +}) + #define __kvm_put_guest(kvm, gfn, offset, v) \ ({ \ unsigned long __addr = gfn_to_hva(kvm, gfn); \ From 739c7af7daeede8e2ec78392f2617c965ce0342a Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Aug 2020 19:06:03 +0200 Subject: [PATCH 043/648] KVM: Documentation: Minor fixups In preparation for documenting a new capability let's fix up the formatting of the current ones. Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier Reviewed-by: Steven Price Link: https://lore.kernel.org/r/20200804170604.42662-6-drjones@redhat.com --- Documentation/virt/kvm/api.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index eb3a1316f03e..49af23d2b462 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6130,7 +6130,7 @@ HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx. 8.21 KVM_CAP_HYPERV_DIRECT_TLBFLUSH ----------------------------------- -:Architecture: x86 +:Architectures: x86 This capability indicates that KVM running on top of Hyper-V hypervisor enables Direct TLB flush for its guests meaning that TLB flush @@ -6143,16 +6143,17 @@ in CPUID and only exposes Hyper-V identification. In this case, guest thinks it's running on Hyper-V and only use Hyper-V hypercalls. 8.22 KVM_CAP_S390_VCPU_RESETS +----------------------------- -Architectures: s390 +:Architectures: s390 This capability indicates that the KVM_S390_NORMAL_RESET and KVM_S390_CLEAR_RESET ioctls are available. 8.23 KVM_CAP_S390_PROTECTED +--------------------------- -Architecture: s390 - +:Architectures: s390 This capability indicates that the Ultravisor has been initialized and KVM can therefore start protected VMs. From 004a01241c5a0d375266ebf1c72f208de99294e9 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 4 Aug 2020 19:06:04 +0200 Subject: [PATCH 044/648] arm64/x86: KVM: Introduce steal-time cap arm64 requires a vcpu fd (KVM_HAS_DEVICE_ATTR vcpu ioctl) to probe support for steal-time. However this is unnecessary, as only a KVM fd is required, and it complicates userspace (userspace may prefer delaying vcpu creation until after feature probing). Introduce a cap that can be checked instead. While x86 can already probe steal-time support with a kvm fd (KVM_GET_SUPPORTED_CPUID), we add the cap there too for consistency. Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier Reviewed-by: Steven Price Link: https://lore.kernel.org/r/20200804170604.42662-7-drjones@redhat.com --- Documentation/virt/kvm/api.rst | 13 +++++++++++++ arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/arm.c | 3 +++ arch/arm64/kvm/pvtime.c | 2 +- arch/x86/kvm/x86.c | 3 +++ include/uapi/linux/kvm.h | 1 + 6 files changed, 22 insertions(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 49af23d2b462..d2b733dc7892 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6160,3 +6160,16 @@ KVM can therefore start protected VMs. This capability governs the KVM_S390_PV_COMMAND ioctl and the KVM_MP_STATE_LOAD MP_STATE. KVM_SET_MP_STATE can fail for protected guests when the state change is invalid. + +8.24 KVM_CAP_STEAL_TIME +----------------------- + +:Architectures: arm64, x86 + +This capability indicates that KVM supports steal time accounting. +When steal time accounting is supported it may be enabled with +architecture-specific interfaces. This capability and the architecture- +specific interfaces must be consistent, i.e. if one says the feature +is supported, than the other should as well and vice versa. For arm64 +see Documentation/virt/kvm/devices/vcpu.rst "KVM_ARM_VCPU_PVTIME_CTRL". +For x86 see Documentation/virt/kvm/msr.rst "MSR_KVM_STEAL_TIME". diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index dd9c3b25aa1e..af4989a25bb7 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -543,6 +543,7 @@ long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu); gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu); void kvm_update_stolen_time(struct kvm_vcpu *vcpu); +bool kvm_arm_pvtime_supported(void); int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu, diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 691d21e4c717..57876b0b870b 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -206,6 +206,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) */ r = 1; break; + case KVM_CAP_STEAL_TIME: + r = kvm_arm_pvtime_supported(); + break; default: r = kvm_arch_vm_ioctl_check_extension(kvm, ext); break; diff --git a/arch/arm64/kvm/pvtime.c b/arch/arm64/kvm/pvtime.c index 75234321d896..920ac43077ad 100644 --- a/arch/arm64/kvm/pvtime.c +++ b/arch/arm64/kvm/pvtime.c @@ -71,7 +71,7 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu) return base; } -static bool kvm_arm_pvtime_supported(void) +bool kvm_arm_pvtime_supported(void) { return !!sched_info_on(); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 599d73206299..c44d3a73b8eb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3581,6 +3581,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SMALLER_MAXPHYADDR: r = (int) allow_smaller_maxphyaddr; break; + case KVM_CAP_STEAL_TIME: + r = sched_info_on(); + break; default: break; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index f6d86033c4fa..3d8023474f2a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1035,6 +1035,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_LAST_CPU 184 #define KVM_CAP_SMALLER_MAXPHYADDR 185 #define KVM_CAP_S390_DIAG318 186 +#define KVM_CAP_STEAL_TIME 187 #ifdef KVM_CAP_IRQ_ROUTING From 5e7820e369248f880767c4c4079b414529bc2125 Mon Sep 17 00:00:00 2001 From: Brent Lu Date: Fri, 31 Jul 2020 20:26:04 +0800 Subject: [PATCH 045/648] ASoC: intel: atom: Add period size constraint Use constraint to make sure the period size could always be multiple of 1ms to align with the fundamental design/limitation of firmware. Signed-off-by: Brent Lu Link: https://lore.kernel.org/r/1596198365-10105-2-git-send-email-brent.lu@intel.com Signed-off-by: Mark Brown --- sound/soc/intel/atom/sst-mfld-platform-pcm.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/soc/intel/atom/sst-mfld-platform-pcm.c b/sound/soc/intel/atom/sst-mfld-platform-pcm.c index b1cac7abdc0a..fba2c795ce0d 100644 --- a/sound/soc/intel/atom/sst-mfld-platform-pcm.c +++ b/sound/soc/intel/atom/sst-mfld-platform-pcm.c @@ -333,6 +333,17 @@ static int sst_media_open(struct snd_pcm_substream *substream, if (ret_val < 0) goto out_power_up; + /* + * Make sure the period to be multiple of 1ms to align the + * design of firmware. Apply same rule to buffer size to make + * sure alsa could always find a value for period size + * regardless the buffer size given by user space. + */ + snd_pcm_hw_constraint_step(substream->runtime, 0, + SNDRV_PCM_HW_PARAM_PERIOD_SIZE, 48); + snd_pcm_hw_constraint_step(substream->runtime, 0, + SNDRV_PCM_HW_PARAM_BUFFER_SIZE, 48); + /* Make sure, that the period size is always even */ snd_pcm_hw_constraint_step(substream->runtime, 0, SNDRV_PCM_HW_PARAM_PERIODS, 2); From 3c27ea23ffb43262da6c64964163895951aaed4e Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Thu, 20 Aug 2020 17:45:11 +0200 Subject: [PATCH 046/648] ASoC: qcom: Set card->owner to avoid warnings On Linux 5.9-rc1 I get the following warning with apq8016-sbc: WARNING: CPU: 2 PID: 69 at sound/core/init.c:207 snd_card_new+0x36c/0x3b0 [snd] CPU: 2 PID: 69 Comm: kworker/2:1 Not tainted 5.9.0-rc1 #1 Workqueue: events deferred_probe_work_func pc : snd_card_new+0x36c/0x3b0 [snd] lr : snd_card_new+0xf4/0x3b0 [snd] Call trace: snd_card_new+0x36c/0x3b0 [snd] snd_soc_bind_card+0x340/0x9a0 [snd_soc_core] snd_soc_register_card+0xf4/0x110 [snd_soc_core] devm_snd_soc_register_card+0x44/0xa0 [snd_soc_core] apq8016_sbc_platform_probe+0x11c/0x140 [snd_soc_apq8016_sbc] This warning was introduced in commit 81033c6b584b ("ALSA: core: Warn on empty module"). It looks like we are supposed to set card->owner to THIS_MODULE. Fix this for all the qcom ASoC drivers. Cc: Srinivas Kandagatla Fixes: 79119c798649 ("ASoC: qcom: Add Storm machine driver") Fixes: bdb052e81f62 ("ASoC: qcom: add apq8016 sound card support") Fixes: a6f933f63f2f ("ASoC: qcom: apq8096: Add db820c machine driver") Fixes: 6b1687bf76ef ("ASoC: qcom: add sdm845 sound card support") Signed-off-by: Stephan Gerhold Link: https://lore.kernel.org/r/20200820154511.203072-1-stephan@gerhold.net Signed-off-by: Mark Brown --- sound/soc/qcom/apq8016_sbc.c | 1 + sound/soc/qcom/apq8096.c | 1 + sound/soc/qcom/sdm845.c | 1 + sound/soc/qcom/storm.c | 1 + 4 files changed, 4 insertions(+) diff --git a/sound/soc/qcom/apq8016_sbc.c b/sound/soc/qcom/apq8016_sbc.c index 083413abc2f6..575e2aefefe3 100644 --- a/sound/soc/qcom/apq8016_sbc.c +++ b/sound/soc/qcom/apq8016_sbc.c @@ -143,6 +143,7 @@ static int apq8016_sbc_platform_probe(struct platform_device *pdev) card = &data->card; card->dev = dev; + card->owner = THIS_MODULE; card->dapm_widgets = apq8016_sbc_dapm_widgets; card->num_dapm_widgets = ARRAY_SIZE(apq8016_sbc_dapm_widgets); diff --git a/sound/soc/qcom/apq8096.c b/sound/soc/qcom/apq8096.c index 253549600c5a..1a69baefc5ce 100644 --- a/sound/soc/qcom/apq8096.c +++ b/sound/soc/qcom/apq8096.c @@ -114,6 +114,7 @@ static int apq8096_platform_probe(struct platform_device *pdev) return -ENOMEM; card->dev = dev; + card->owner = THIS_MODULE; dev_set_drvdata(dev, card); ret = qcom_snd_parse_of(card); if (ret) diff --git a/sound/soc/qcom/sdm845.c b/sound/soc/qcom/sdm845.c index 0d10fba53945..ab1bf23c21a6 100644 --- a/sound/soc/qcom/sdm845.c +++ b/sound/soc/qcom/sdm845.c @@ -555,6 +555,7 @@ static int sdm845_snd_platform_probe(struct platform_device *pdev) card->dapm_widgets = sdm845_snd_widgets; card->num_dapm_widgets = ARRAY_SIZE(sdm845_snd_widgets); card->dev = dev; + card->owner = THIS_MODULE; dev_set_drvdata(dev, card); ret = qcom_snd_parse_of(card); if (ret) diff --git a/sound/soc/qcom/storm.c b/sound/soc/qcom/storm.c index c0c388d4db82..80c9cf2f254a 100644 --- a/sound/soc/qcom/storm.c +++ b/sound/soc/qcom/storm.c @@ -96,6 +96,7 @@ static int storm_platform_probe(struct platform_device *pdev) return -ENOMEM; card->dev = &pdev->dev; + card->owner = THIS_MODULE; ret = snd_soc_of_parse_card_name(card, "qcom,model"); if (ret) { From 81dbbb417da4d1ac407dca5b434d39d5b6b91ef3 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Tue, 28 Jul 2020 12:50:06 +0200 Subject: [PATCH 047/648] ARM: dts: ls1021a: fix QuadSPI-memory reg range According to the Reference Manual, the correct size is 512 MiB. Without this fix, probing the QSPI fails: fsl-quadspi 1550000.spi: ioremap failed for resource [mem 0x40000000-0x7fffffff] fsl-quadspi 1550000.spi: Freescale QuadSPI probe failed fsl-quadspi: probe of 1550000.spi failed with error -12 Fixes: 85f8ee78ab72 ("ARM: dts: ls1021a: Add support for QSPI with ls1021a SoC") Signed-off-by: Matthias Schiffer Signed-off-by: Shawn Guo --- arch/arm/boot/dts/ls1021a.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi index 069af9a19bb6..827373ef1a54 100644 --- a/arch/arm/boot/dts/ls1021a.dtsi +++ b/arch/arm/boot/dts/ls1021a.dtsi @@ -182,7 +182,7 @@ qspi: spi@1550000 { #address-cells = <1>; #size-cells = <0>; reg = <0x0 0x1550000 0x0 0x10000>, - <0x0 0x40000000 0x0 0x40000000>; + <0x0 0x40000000 0x0 0x20000000>; reg-names = "QuadSPI", "QuadSPI-memory"; interrupts = ; clock-names = "qspi_en", "qspi"; From 336306790b2bbf7ce837625fa3b24ba724d05838 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Tue, 28 Jul 2020 13:48:25 -0700 Subject: [PATCH 048/648] iio: cros_ec: Set Gyroscope default frequency to 25Hz BMI160 Minimium gyroscope frequency in normal mode is 25Hz. When older EC firmware do not report their sensors frequencies, use 25Hz as the minimum for gyroscope to be sure it works on BMI160. Fixes: ae7b02ad2f32d ("iio: common: cros_ec_sensors: Expose cros_ec_sensors frequency range via iio sysfs") Signed-off-by: Gwendal Grignou Reviewed-by: Enric Balletbo i Serra Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c index ea480c1d4349..1bc6efa47316 100644 --- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c +++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c @@ -72,10 +72,13 @@ static void get_default_min_max_freq(enum motionsensor_type type, switch (type) { case MOTIONSENSE_TYPE_ACCEL: - case MOTIONSENSE_TYPE_GYRO: *min_freq = 12500; *max_freq = 100000; break; + case MOTIONSENSE_TYPE_GYRO: + *min_freq = 25000; + *max_freq = 100000; + break; case MOTIONSENSE_TYPE_MAG: *min_freq = 5000; *max_freq = 25000; From ab3300deff6a81cc10887437383fc8a63884c3f9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 27 Jul 2020 14:23:16 +0300 Subject: [PATCH 049/648] counter: microchip-tcb-capture: check the correct variable This should be testing "regmap" instead of "priv->regmap". The "priv->regmap" variable is always zero so it's not an error pointer. Fixes: 106b104137fd ("counter: Add microchip TCB capture counter") Signed-off-by: Dan Carpenter Signed-off-by: Jonathan Cameron --- drivers/counter/microchip-tcb-capture.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/counter/microchip-tcb-capture.c b/drivers/counter/microchip-tcb-capture.c index f7b7743ddb94..b7b252c5addf 100644 --- a/drivers/counter/microchip-tcb-capture.c +++ b/drivers/counter/microchip-tcb-capture.c @@ -320,8 +320,8 @@ static int mchp_tc_probe(struct platform_device *pdev) } regmap = syscon_node_to_regmap(np->parent); - if (IS_ERR(priv->regmap)) - return PTR_ERR(priv->regmap); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); /* max. channels number is 2 when in QDEC mode */ priv->num_channels = of_property_count_u32_elems(np, "reg"); From e71e6dbe96ac80ac2aebe71a6a942e7bd60e7596 Mon Sep 17 00:00:00 2001 From: Maxim Kochetkov Date: Mon, 3 Aug 2020 08:04:05 +0300 Subject: [PATCH 050/648] iio: adc: ti-ads1015: fix conversion when CONFIG_PM is not set To stop conversion ads1015_set_power_state() function call unimplemented function __pm_runtime_suspend() from pm_runtime_put_autosuspend() if CONFIG_PM is not set. In case of CONFIG_PM is not set: __pm_runtime_suspend() returns -ENOSYS, so ads1015_read_raw() failed because ads1015_set_power_state() returns an error. If CONFIG_PM is disabled, there is no need to start/stop conversion. Fix it by adding return 0 function variant if CONFIG_PM is not set. Signed-off-by: Maxim Kochetkov Fixes: ecc24e72f437 ("iio: adc: Add TI ADS1015 ADC driver support") Tested-by: Maxim Kiselev Reviewed-by: Andy Shevchenko Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ti-ads1015.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/iio/adc/ti-ads1015.c b/drivers/iio/adc/ti-ads1015.c index f42ab112986e..9fef39bcf997 100644 --- a/drivers/iio/adc/ti-ads1015.c +++ b/drivers/iio/adc/ti-ads1015.c @@ -316,6 +316,7 @@ static const struct iio_chan_spec ads1115_channels[] = { IIO_CHAN_SOFT_TIMESTAMP(ADS1015_TIMESTAMP), }; +#ifdef CONFIG_PM static int ads1015_set_power_state(struct ads1015_data *data, bool on) { int ret; @@ -333,6 +334,15 @@ static int ads1015_set_power_state(struct ads1015_data *data, bool on) return ret < 0 ? ret : 0; } +#else /* !CONFIG_PM */ + +static int ads1015_set_power_state(struct ads1015_data *data, bool on) +{ + return 0; +} + +#endif /* !CONFIG_PM */ + static int ads1015_get_adc_result(struct ads1015_data *data, int chan, int *val) { From e3ea4192d82480e1215280267746f51ce21db283 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 3 Aug 2020 10:30:01 +0200 Subject: [PATCH 051/648] iio: adc: rockchip_saradc: select IIO_TRIGGERED_BUFFER The kernel fails to compile due to undefined reference to devm_iio_triggered_buffer_setup() if IIO_TRIGGERED_BUFFER is not enabled. The original patch [1] had this dependency. But somehow it didn't make it into the kernel tree. Re-add it. [1] https://lore.kernel.org/lkml/20200623233011.2319035-3-heiko@sntech.de/ Fixes: 4e130dc7b413 ("iio: adc: rockchip_saradc: Add support iio buffers") Signed-off-by: Michael Walle Reviewed-by: Heiko Stuebner Signed-off-by: Jonathan Cameron --- drivers/iio/adc/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 66d9cc073157..d94dc800b842 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -865,6 +865,8 @@ config ROCKCHIP_SARADC tristate "Rockchip SARADC driver" depends on ARCH_ROCKCHIP || (ARM && COMPILE_TEST) depends on RESET_CONTROLLER + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER help Say yes here to build support for the SARADC found in SoCs from Rockchip. From 95ad67577de4ea08eb8e441394e698aa4addcc0b Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 22 Jul 2020 16:50:37 +0100 Subject: [PATCH 052/648] iio: accel: kxsd9: Fix alignment of local buffer. iio_push_to_buffers_with_timestamp assumes 8 byte alignment which is not guaranteed by an array of smaller elements. Note that whilst in this particular case the alignment forcing of the ts element is not strictly necessary it acts as good documentation. Doing this where not necessary should cut down on the number of cut and paste introduced errors elsewhere. Fixes: 0427a106a98a ("iio: accel: kxsd9: Add triggered buffer handling") Reported-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Reviewed-by: Andy Shevchenko Cc: --- drivers/iio/accel/kxsd9.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/iio/accel/kxsd9.c b/drivers/iio/accel/kxsd9.c index 66b2e4cf24cf..0e18b92e2099 100644 --- a/drivers/iio/accel/kxsd9.c +++ b/drivers/iio/accel/kxsd9.c @@ -209,14 +209,20 @@ static irqreturn_t kxsd9_trigger_handler(int irq, void *p) const struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; struct kxsd9_state *st = iio_priv(indio_dev); + /* + * Ensure correct positioning and alignment of timestamp. + * No need to zero initialize as all elements written. + */ + struct { + __be16 chan[4]; + s64 ts __aligned(8); + } hw_values; int ret; - /* 4 * 16bit values AND timestamp */ - __be16 hw_values[8]; ret = regmap_bulk_read(st->map, KXSD9_REG_X, - &hw_values, - 8); + hw_values.chan, + sizeof(hw_values.chan)); if (ret) { dev_err(st->dev, "error reading data\n"); @@ -224,7 +230,7 @@ static irqreturn_t kxsd9_trigger_handler(int irq, void *p) } iio_push_to_buffers_with_timestamp(indio_dev, - hw_values, + &hw_values, iio_get_time_ns(indio_dev)); iio_trigger_notify_done(indio_dev->trig); From 89226a296d816727405d3fea684ef69e7d388bd8 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 22 Jul 2020 16:50:38 +0100 Subject: [PATCH 053/648] iio:accel:mma8452: Fix timestamp alignment and prevent data leak. One of a class of bugs pointed out by Lars in a recent review. iio_push_to_buffers_with_timestamp assumes the buffer used is aligned to the size of the timestamp (8 bytes). This is not guaranteed in this driver which uses a 16 byte u8 array on the stack. As Lars also noted this anti pattern can involve a leak of data to userspace and that indeed can happen here. We close both issues by moving to a suitable structure in the iio_priv() data with alignment ensured by use of an explicit c structure. This data is allocated with kzalloc so no data can leak appart from previous readings. The additional forcing of the 8 byte alignment of the timestamp is not strictly necessary but makes the code less fragile by making this explicit. Fixes: c7eeea93ac60 ("iio: Add Freescale MMA8452Q 3-axis accelerometer driver") Reported-by: Lars-Peter Clausen Cc: Peter Meerwald Signed-off-by: Jonathan Cameron Reviewed-by: Andy Shevchenko Cc: --- drivers/iio/accel/mma8452.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c index ba27f8673131..1cf2b5db26ca 100644 --- a/drivers/iio/accel/mma8452.c +++ b/drivers/iio/accel/mma8452.c @@ -110,6 +110,12 @@ struct mma8452_data { int sleep_val; struct regulator *vdd_reg; struct regulator *vddio_reg; + + /* Ensure correct alignment of time stamp when present */ + struct { + __be16 channels[3]; + s64 ts __aligned(8); + } buffer; }; /** @@ -1091,14 +1097,13 @@ static irqreturn_t mma8452_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; struct mma8452_data *data = iio_priv(indio_dev); - u8 buffer[16]; /* 3 16-bit channels + padding + ts */ int ret; - ret = mma8452_read(data, (__be16 *)buffer); + ret = mma8452_read(data, data->buffer.channels); if (ret < 0) goto done; - iio_push_to_buffers_with_timestamp(indio_dev, buffer, + iio_push_to_buffers_with_timestamp(indio_dev, &data->buffer, iio_get_time_ns(indio_dev)); done: From a6f86f724394de3629da63fe5e1b7a4ab3396efe Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 22 Jul 2020 16:50:39 +0100 Subject: [PATCH 054/648] iio:accel:bmc150-accel: Fix timestamp alignment and prevent data leak. One of a class of bugs pointed out by Lars in a recent review. iio_push_to_buffers_with_timestamp assumes the buffer used is aligned to the size of the timestamp (8 bytes). This is not guaranteed in this driver which uses a 16 byte array of smaller elements on the stack. As Lars also noted this anti pattern can involve a leak of data to userspace and that indeed can happen here. We close both issues by moving to a suitable structure in the iio_priv() data with alignment ensured by use of an explicit c structure. This data is allocated with kzalloc so no data can leak appart from previous readings. Fixes tag is beyond some major refactoring so likely manual backporting would be needed to get that far back. Whilst the force alignment of the ts is not strictly necessary, it does make the code less fragile. Fixes: 3bbec9773389 ("iio: bmc150_accel: add support for hardware fifo") Reported-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Acked-by: Srinivas Pandruvada Reviewed-by: Andy Shevchenko Cc: --- drivers/iio/accel/bmc150-accel-core.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c index 24864d9dfab5..48435865fdaf 100644 --- a/drivers/iio/accel/bmc150-accel-core.c +++ b/drivers/iio/accel/bmc150-accel-core.c @@ -189,6 +189,14 @@ struct bmc150_accel_data { struct mutex mutex; u8 fifo_mode, watermark; s16 buffer[8]; + /* + * Ensure there is sufficient space and correct alignment for + * the timestamp if enabled + */ + struct { + __le16 channels[3]; + s64 ts __aligned(8); + } scan; u8 bw_bits; u32 slope_dur; u32 slope_thres; @@ -922,15 +930,16 @@ static int __bmc150_accel_fifo_flush(struct iio_dev *indio_dev, * now. */ for (i = 0; i < count; i++) { - u16 sample[8]; int j, bit; j = 0; for_each_set_bit(bit, indio_dev->active_scan_mask, indio_dev->masklength) - memcpy(&sample[j++], &buffer[i * 3 + bit], 2); + memcpy(&data->scan.channels[j++], &buffer[i * 3 + bit], + sizeof(data->scan.channels[0])); - iio_push_to_buffers_with_timestamp(indio_dev, sample, tstamp); + iio_push_to_buffers_with_timestamp(indio_dev, &data->scan, + tstamp); tstamp += sample_period; } From 7e5ac1f2206eda414f90c698fe1820dee873394d Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 22 Jul 2020 16:50:40 +0100 Subject: [PATCH 055/648] iio:accel:mma7455: Fix timestamp alignment and prevent data leak. One of a class of bugs pointed out by Lars in a recent review. iio_push_to_buffers_with_timestamp assumes the buffer used is aligned to the size of the timestamp (8 bytes). This is not guaranteed in this driver which uses a 16 byte u8 array on the stack As Lars also noted this anti pattern can involve a leak of data to userspace and that indeed can happen here. We close both issues by moving to a suitable structure in the iio_priv() data with alignment ensured by use of an explicit c structure. This data is allocated with kzalloc so no data can leak appart from previous readings. The force alignment of ts is not strictly necessary in this particularly case but does make the code less fragile. Fixes: a84ef0d181d9 ("iio: accel: add Freescale MMA7455L/MMA7456L 3-axis accelerometer driver") Reported-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Cc: Reviewed-by: Andy Shevchenko --- drivers/iio/accel/mma7455_core.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/iio/accel/mma7455_core.c b/drivers/iio/accel/mma7455_core.c index 7e99bcb3398d..922bd38ff6ea 100644 --- a/drivers/iio/accel/mma7455_core.c +++ b/drivers/iio/accel/mma7455_core.c @@ -52,6 +52,14 @@ struct mma7455_data { struct regmap *regmap; + /* + * Used to reorganize data. Will ensure correct alignment of + * the timestamp if present + */ + struct { + __le16 channels[3]; + s64 ts __aligned(8); + } scan; }; static int mma7455_drdy(struct mma7455_data *mma7455) @@ -82,19 +90,19 @@ static irqreturn_t mma7455_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; struct mma7455_data *mma7455 = iio_priv(indio_dev); - u8 buf[16]; /* 3 x 16-bit channels + padding + ts */ int ret; ret = mma7455_drdy(mma7455); if (ret) goto done; - ret = regmap_bulk_read(mma7455->regmap, MMA7455_REG_XOUTL, buf, - sizeof(__le16) * 3); + ret = regmap_bulk_read(mma7455->regmap, MMA7455_REG_XOUTL, + mma7455->scan.channels, + sizeof(mma7455->scan.channels)); if (ret) goto done; - iio_push_to_buffers_with_timestamp(indio_dev, buf, + iio_push_to_buffers_with_timestamp(indio_dev, &mma7455->scan, iio_get_time_ns(indio_dev)); done: From f60e8bb84282b8e633956cfe74b4f0d64ca73cec Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 22 Jul 2020 16:50:42 +0100 Subject: [PATCH 056/648] iio:proximity:mb1232: Fix timestamp alignment and prevent data leak. One of a class of bugs pointed out by Lars in a recent review. iio_push_to_buffers_with_timestamp assumes the buffer used is aligned to the size of the timestamp (8 bytes). This is not guaranteed in this driver which uses a 16 byte s16 array on the stack As Lars also noted this anti pattern can involve a leak of data to userspace and that indeed can happen here. We close both issues by moving to a suitable structure in the iio_priv() data with alignment ensured by use of an explicit c structure. This data is allocated with kzalloc so no data can leak appart from previous readings. In this case the forced alignment of the ts is necessary to ensure correct padding on x86_32 where the s64 would only be 4 byte aligned. Fixes: 16b05261537e ("mb1232.c: add distance iio sensor with i2c") Reported-by: Lars-Peter Clausen Cc: Andreas Klinger Signed-off-by: Jonathan Cameron Cc: Reviewed-by: Andy Shevchenko --- drivers/iio/proximity/mb1232.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/iio/proximity/mb1232.c b/drivers/iio/proximity/mb1232.c index 654564c45248..ad4b1fb2607a 100644 --- a/drivers/iio/proximity/mb1232.c +++ b/drivers/iio/proximity/mb1232.c @@ -40,6 +40,11 @@ struct mb1232_data { */ struct completion ranging; int irqnr; + /* Ensure correct alignment of data to push to IIO buffer */ + struct { + s16 distance; + s64 ts __aligned(8); + } scan; }; static irqreturn_t mb1232_handle_irq(int irq, void *dev_id) @@ -113,17 +118,13 @@ static irqreturn_t mb1232_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; struct mb1232_data *data = iio_priv(indio_dev); - /* - * triggered buffer - * 16-bit channel + 48-bit padding + 64-bit timestamp - */ - s16 buffer[8] = { 0 }; - buffer[0] = mb1232_read_distance(data); - if (buffer[0] < 0) + data->scan.distance = mb1232_read_distance(data); + if (data->scan.distance < 0) goto err; - iio_push_to_buffers_with_timestamp(indio_dev, buffer, pf->timestamp); + iio_push_to_buffers_with_timestamp(indio_dev, &data->scan, + pf->timestamp); err: iio_trigger_notify_done(indio_dev->trig); From eb1a148ef41d8ae8d9201efc3f1b145976290331 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 22 Jul 2020 16:50:43 +0100 Subject: [PATCH 057/648] iio:chemical:ccs811: Fix timestamp alignment and prevent data leak. One of a class of bugs pointed out by Lars in a recent review. iio_push_to_buffers_with_timestamp assumes the buffer used is aligned to the size of the timestamp (8 bytes). This is not guaranteed in this driver which uses an array of smaller elements on the stack. As Lars also noted this anti pattern can involve a leak of data to userspace and that indeed can happen here. We close both issues by moving to a suitable structure in the iio_priv() data with alignment explicitly requested. This data is allocated with kzalloc so no data can leak appart from previous readings. The explicit alignment of ts is necessary to ensure consistent padding for x86_32 in which the ts would otherwise be 4 byte aligned. Fixes: 283d26917ad6 ("iio: chemical: ccs811: Add triggered buffer support") Reported-by: Lars-Peter Clausen Cc: Narcisa Ana Maria Vasile Signed-off-by: Jonathan Cameron Reviewed-by: Andy Shevchenko Cc: --- drivers/iio/chemical/ccs811.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/iio/chemical/ccs811.c b/drivers/iio/chemical/ccs811.c index 2b007e7568b2..60dd87e96f5f 100644 --- a/drivers/iio/chemical/ccs811.c +++ b/drivers/iio/chemical/ccs811.c @@ -78,6 +78,11 @@ struct ccs811_data { struct iio_trigger *drdy_trig; struct gpio_desc *wakeup_gpio; bool drdy_trig_on; + /* Ensures correct alignment of timestamp if present */ + struct { + s16 channels[2]; + s64 ts __aligned(8); + } scan; }; static const struct iio_chan_spec ccs811_channels[] = { @@ -327,17 +332,17 @@ static irqreturn_t ccs811_trigger_handler(int irq, void *p) struct iio_dev *indio_dev = pf->indio_dev; struct ccs811_data *data = iio_priv(indio_dev); struct i2c_client *client = data->client; - s16 buf[8]; /* s16 eCO2 + s16 TVOC + padding + 8 byte timestamp */ int ret; - ret = i2c_smbus_read_i2c_block_data(client, CCS811_ALG_RESULT_DATA, 4, - (u8 *)&buf); + ret = i2c_smbus_read_i2c_block_data(client, CCS811_ALG_RESULT_DATA, + sizeof(data->scan.channels), + (u8 *)data->scan.channels); if (ret != 4) { dev_err(&client->dev, "cannot read sensor data\n"); goto err; } - iio_push_to_buffers_with_timestamp(indio_dev, buf, + iio_push_to_buffers_with_timestamp(indio_dev, &data->scan, iio_get_time_ns(indio_dev)); err: From 523628852a5f5f34a15252b2634d0498d3cfb347 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 22 Jul 2020 16:50:45 +0100 Subject: [PATCH 058/648] iio:light:max44000 Fix timestamp alignment and prevent data leak. One of a class of bugs pointed out by Lars in a recent review. iio_push_to_buffers_with_timestamp assumes the buffer used is aligned to the size of the timestamp (8 bytes). This is not guaranteed in this driver which uses a 16 byte array of smaller elements on the stack. As Lars also noted this anti pattern can involve a leak of data to userspace and that indeed can happen here. We close both issues by moving to a suitable structure in the iio_priv(). This data is allocated with kzalloc so no data can leak appart from previous readings. It is necessary to force the alignment of ts to avoid the padding on x86_32 being different from 64 bit platorms (it alows for 4 bytes aligned 8 byte types. Fixes: 06ad7ea10e2b ("max44000: Initial triggered buffer support") Reported-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Reviewed-by: Andy Shevchenko Cc: --- drivers/iio/light/max44000.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/iio/light/max44000.c b/drivers/iio/light/max44000.c index aa8ed1e3e89a..b8e721bced5b 100644 --- a/drivers/iio/light/max44000.c +++ b/drivers/iio/light/max44000.c @@ -75,6 +75,11 @@ struct max44000_data { struct mutex lock; struct regmap *regmap; + /* Ensure naturally aligned timestamp */ + struct { + u16 channels[2]; + s64 ts __aligned(8); + } scan; }; /* Default scale is set to the minimum of 0.03125 or 1 / (1 << 5) lux */ @@ -488,7 +493,6 @@ static irqreturn_t max44000_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; struct max44000_data *data = iio_priv(indio_dev); - u16 buf[8]; /* 2x u16 + padding + 8 bytes timestamp */ int index = 0; unsigned int regval; int ret; @@ -498,17 +502,17 @@ static irqreturn_t max44000_trigger_handler(int irq, void *p) ret = max44000_read_alsval(data); if (ret < 0) goto out_unlock; - buf[index++] = ret; + data->scan.channels[index++] = ret; } if (test_bit(MAX44000_SCAN_INDEX_PRX, indio_dev->active_scan_mask)) { ret = regmap_read(data->regmap, MAX44000_REG_PRX_DATA, ®val); if (ret < 0) goto out_unlock; - buf[index] = regval; + data->scan.channels[index] = regval; } mutex_unlock(&data->lock); - iio_push_to_buffers_with_timestamp(indio_dev, buf, + iio_push_to_buffers_with_timestamp(indio_dev, &data->scan, iio_get_time_ns(indio_dev)); iio_trigger_notify_done(indio_dev->trig); return IRQ_HANDLED; From 2684d5003490df5398aeafe2592ba9d4a4653998 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 22 Jul 2020 16:50:48 +0100 Subject: [PATCH 059/648] iio:light:ltr501 Fix timestamp alignment issue. One of a class of bugs pointed out by Lars in a recent review. iio_push_to_buffers_with_timestamp assumes the buffer used is aligned to the size of the timestamp (8 bytes). This is not guaranteed in this driver which uses an array of smaller elements on the stack. Here we use a structure on the stack. The driver already did an explicit memset so no data leak was possible. Forced alignment of ts is not strictly necessary but probably makes the code slightly less fragile. Note there has been some rework in this driver of the years, so no way this will apply cleanly all the way back. Fixes: 2690be905123 ("iio: Add Lite-On ltr501 ambient light / proximity sensor driver") Reported-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Reviewed-by: Andy Shevchenko Cc: --- drivers/iio/light/ltr501.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/iio/light/ltr501.c b/drivers/iio/light/ltr501.c index 4bac0646398d..b4323d2db0b1 100644 --- a/drivers/iio/light/ltr501.c +++ b/drivers/iio/light/ltr501.c @@ -1243,13 +1243,16 @@ static irqreturn_t ltr501_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; struct ltr501_data *data = iio_priv(indio_dev); - u16 buf[8]; + struct { + u16 channels[3]; + s64 ts __aligned(8); + } scan; __le16 als_buf[2]; u8 mask = 0; int j = 0; int ret, psdata; - memset(buf, 0, sizeof(buf)); + memset(&scan, 0, sizeof(scan)); /* figure out which data needs to be ready */ if (test_bit(0, indio_dev->active_scan_mask) || @@ -1268,9 +1271,9 @@ static irqreturn_t ltr501_trigger_handler(int irq, void *p) if (ret < 0) return ret; if (test_bit(0, indio_dev->active_scan_mask)) - buf[j++] = le16_to_cpu(als_buf[1]); + scan.channels[j++] = le16_to_cpu(als_buf[1]); if (test_bit(1, indio_dev->active_scan_mask)) - buf[j++] = le16_to_cpu(als_buf[0]); + scan.channels[j++] = le16_to_cpu(als_buf[0]); } if (mask & LTR501_STATUS_PS_RDY) { @@ -1278,10 +1281,10 @@ static irqreturn_t ltr501_trigger_handler(int irq, void *p) &psdata, 2); if (ret < 0) goto done; - buf[j++] = psdata & LTR501_PS_DATA_MASK; + scan.channels[j++] = psdata & LTR501_PS_DATA_MASK; } - iio_push_to_buffers_with_timestamp(indio_dev, buf, + iio_push_to_buffers_with_timestamp(indio_dev, &scan, iio_get_time_ns(indio_dev)); done: From 02ad21cefbac4d89ac443866f25b90449527737b Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 22 Jul 2020 16:50:49 +0100 Subject: [PATCH 060/648] iio:magnetometer:ak8975 Fix alignment and data leak issues. One of a class of bugs pointed out by Lars in a recent review. iio_push_to_buffers_with_timestamp assumes the buffer used is aligned to the size of the timestamp (8 bytes). This is not guaranteed in this driver which uses an array of smaller elements on the stack. As Lars also noted this anti pattern can involve a leak of data to userspace and that indeed can happen here. We close both issues by moving to a suitable structure in the iio_priv() data. This data is allocated with kzalloc so no data can leak apart from previous readings. The explicit alignment of ts is not necessary in this case as by coincidence the padding will end up the same, however I consider it to make the code less fragile and have included it. Fixes: bc11ca4a0b84 ("iio:magnetometer:ak8975: triggered buffer support") Reported-by: Lars-Peter Clausen Cc: Gregor Boirie Cc: Linus Walleij Signed-off-by: Jonathan Cameron Reviewed-by: Andy Shevchenko Cc: --- drivers/iio/magnetometer/ak8975.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/iio/magnetometer/ak8975.c b/drivers/iio/magnetometer/ak8975.c index 03d71f796177..623766ff800b 100644 --- a/drivers/iio/magnetometer/ak8975.c +++ b/drivers/iio/magnetometer/ak8975.c @@ -366,6 +366,12 @@ struct ak8975_data { struct iio_mount_matrix orientation; struct regulator *vdd; struct regulator *vid; + + /* Ensure natural alignment of timestamp */ + struct { + s16 channels[3]; + s64 ts __aligned(8); + } scan; }; /* Enable attached power regulator if any. */ @@ -793,7 +799,6 @@ static void ak8975_fill_buffer(struct iio_dev *indio_dev) const struct i2c_client *client = data->client; const struct ak_def *def = data->def; int ret; - s16 buff[8]; /* 3 x 16 bits axis values + 1 aligned 64 bits timestamp */ __le16 fval[3]; mutex_lock(&data->lock); @@ -816,12 +821,13 @@ static void ak8975_fill_buffer(struct iio_dev *indio_dev) mutex_unlock(&data->lock); /* Clamp to valid range. */ - buff[0] = clamp_t(s16, le16_to_cpu(fval[0]), -def->range, def->range); - buff[1] = clamp_t(s16, le16_to_cpu(fval[1]), -def->range, def->range); - buff[2] = clamp_t(s16, le16_to_cpu(fval[2]), -def->range, def->range); + data->scan.channels[0] = clamp_t(s16, le16_to_cpu(fval[0]), -def->range, def->range); + data->scan.channels[1] = clamp_t(s16, le16_to_cpu(fval[1]), -def->range, def->range); + data->scan.channels[2] = clamp_t(s16, le16_to_cpu(fval[2]), -def->range, def->range); - iio_push_to_buffers_with_timestamp(indio_dev, buff, + iio_push_to_buffers_with_timestamp(indio_dev, &data->scan, iio_get_time_ns(indio_dev)); + return; unlock: From deb6323b739c54e1a1e83cd3a2bae4901e3eebf6 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Mon, 3 Aug 2020 16:01:24 +0800 Subject: [PATCH 061/648] ARM: dts: imx7ulp: Correct gpio ranges Correct gpio ranges according to i.MX7ULP pinctrl driver: gpio_ptc: ONLY pin 0~19 are available; gpio_ptd: ONLY pin 0~11 are available; gpio_pte: ONLY pin 0~15 are available; gpio_ptf: ONLY pin 0~19 are available; Fixes: 20434dc92c05 ("ARM: dts: imx: add common imx7ulp dtsi support") Signed-off-by: Anson Huang Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx7ulp.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/imx7ulp.dtsi b/arch/arm/boot/dts/imx7ulp.dtsi index 367439639da9..b7ea37ad4e55 100644 --- a/arch/arm/boot/dts/imx7ulp.dtsi +++ b/arch/arm/boot/dts/imx7ulp.dtsi @@ -394,7 +394,7 @@ gpio_ptc: gpio@40ae0000 { clocks = <&pcc2 IMX7ULP_CLK_RGPIO2P1>, <&pcc3 IMX7ULP_CLK_PCTLC>; clock-names = "gpio", "port"; - gpio-ranges = <&iomuxc1 0 0 32>; + gpio-ranges = <&iomuxc1 0 0 20>; }; gpio_ptd: gpio@40af0000 { @@ -408,7 +408,7 @@ gpio_ptd: gpio@40af0000 { clocks = <&pcc2 IMX7ULP_CLK_RGPIO2P1>, <&pcc3 IMX7ULP_CLK_PCTLD>; clock-names = "gpio", "port"; - gpio-ranges = <&iomuxc1 0 32 32>; + gpio-ranges = <&iomuxc1 0 32 12>; }; gpio_pte: gpio@40b00000 { @@ -422,7 +422,7 @@ gpio_pte: gpio@40b00000 { clocks = <&pcc2 IMX7ULP_CLK_RGPIO2P1>, <&pcc3 IMX7ULP_CLK_PCTLE>; clock-names = "gpio", "port"; - gpio-ranges = <&iomuxc1 0 64 32>; + gpio-ranges = <&iomuxc1 0 64 16>; }; gpio_ptf: gpio@40b10000 { @@ -436,7 +436,7 @@ gpio_ptf: gpio@40b10000 { clocks = <&pcc2 IMX7ULP_CLK_RGPIO2P1>, <&pcc3 IMX7ULP_CLK_PCTLF>; clock-names = "gpio", "port"; - gpio-ranges = <&iomuxc1 0 96 32>; + gpio-ranges = <&iomuxc1 0 96 20>; }; }; From 140a1dc5aea1648258bd9a5e6186331199212079 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 17 Aug 2020 15:01:42 -0300 Subject: [PATCH 062/648] ARM: dts: imx6qdl-gw51xx: Remove unneeded #address-cells/#size-cells The following dtc warning is seen when building with W=1: arch/arm/boot/dts/imx6qdl-gw51xx.dtsi:23.12-68.4: Warning (avoid_unnecessary_addr_size): /gpio-keys: unnecessary #address-cells/#size-cells without "ranges" or child "reg" property Remove the unneeded #address-cells/#size-cells properties to fix the issue. Fixes: 64bf0a0af18d ("ARM: dts: imx6qdl-gw: add Gateworks System Controller support") Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-gw51xx.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/boot/dts/imx6qdl-gw51xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw51xx.dtsi index 7705285d9e3c..4d01c3300b97 100644 --- a/arch/arm/boot/dts/imx6qdl-gw51xx.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw51xx.dtsi @@ -22,8 +22,6 @@ chosen { gpio-keys { compatible = "gpio-keys"; - #address-cells = <1>; - #size-cells = <0>; user-pb { label = "user_pb"; From ecc1aebe3436a3a97b42f7542f7b30a7a3cd1dc5 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 17 Aug 2020 15:01:43 -0300 Subject: [PATCH 063/648] ARM: dts: imx6q-prtwd2: Remove unneeded i2c unit name The following dtc warning is seen when building with W=1: arch/arm/boot/dts/imx6q-prtwd2.dts:33.8-43.4: Warning (unit_address_vs_reg): /i2c@4: node has a unit name, but no reg or ranges property Remove the unneeded i2c unit name to fix the issue. Fixes: 88010b8174ab ("ARM: dts: add Protonic WD2 board") Signed-off-by: Fabio Estevam Reviewed-by: Oleksij Rempel Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6q-prtwd2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6q-prtwd2.dts b/arch/arm/boot/dts/imx6q-prtwd2.dts index dffafbcaa7af..349959d38020 100644 --- a/arch/arm/boot/dts/imx6q-prtwd2.dts +++ b/arch/arm/boot/dts/imx6q-prtwd2.dts @@ -30,7 +30,7 @@ usdhc2_wifi_pwrseq: usdhc2_wifi_pwrseq { }; /* PRTWD2 rev 1 bitbang I2C for Ethernet Switch */ - i2c@4 { + i2c { compatible = "i2c-gpio"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_i2c4>; From 56e79dfd036b538940227fb31371c1cd67b2467f Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 18 Aug 2020 14:25:31 -0600 Subject: [PATCH 064/648] arm64: dts: imx: Add missing imx8mm-beacon-kit.dtb to build The imx8mm-beacon-kit.dtb was never added to dtbs-y and wasn't getting built. Fix it. Fixes: 593816fa2f35 ("arm64: dts: imx: Add Beacon i.MX8m-Mini development kit") Cc: Shawn Guo Cc: Sascha Hauer Cc: Pengutronix Kernel Team Cc: Fabio Estevam Cc: NXP Linux Team Signed-off-by: Rob Herring Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/Makefile b/arch/arm64/boot/dts/freescale/Makefile index a39f0a1723e0..903c0eb61290 100644 --- a/arch/arm64/boot/dts/freescale/Makefile +++ b/arch/arm64/boot/dts/freescale/Makefile @@ -28,6 +28,7 @@ dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-honeycomb.dtb dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-qds.dtb dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-rdb.dtb +dtb-$(CONFIG_ARCH_MXC) += imx8mm-beacon-kit.dtb dtb-$(CONFIG_ARCH_MXC) += imx8mm-evk.dtb dtb-$(CONFIG_ARCH_MXC) += imx8mn-evk.dtb dtb-$(CONFIG_ARCH_MXC) += imx8mn-ddr4-evk.dtb From c5e46066b23dbbb4228fbdc3de66a27ddc8b8a36 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Wed, 19 Aug 2020 14:59:44 -0500 Subject: [PATCH 065/648] ARM: dts: imx6q-logicpd: Fix broken PWM The DTC doesn't like the default PWM settings, because it's expecting three cells. This patch reduces adds the extra entry of 0 to the PWM reference. Fixes: fa28d8212ede ("ARM: dts: imx: default to #pwm-cells = <3> in the SoC dtsi files") Reviewed-by: Fabio Estevam Signed-off-by: Adam Ford Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6q-logicpd.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6q-logicpd.dts b/arch/arm/boot/dts/imx6q-logicpd.dts index 7a3d1d3e54a9..8f94364ba484 100644 --- a/arch/arm/boot/dts/imx6q-logicpd.dts +++ b/arch/arm/boot/dts/imx6q-logicpd.dts @@ -13,7 +13,7 @@ / { backlight: backlight-lvds { compatible = "pwm-backlight"; - pwms = <&pwm3 0 20000>; + pwms = <&pwm3 0 20000 0>; brightness-levels = <0 4 8 16 32 64 128 255>; default-brightness-level = <6>; power-supply = <®_lcd>; From 2a6838d54128952ace6f0ca166dd8706abe46649 Mon Sep 17 00:00:00 2001 From: Chris Healy Date: Fri, 21 Aug 2020 14:21:02 -0700 Subject: [PATCH 066/648] ARM: dts: vfxxx: Add syscon compatible with OCOTP Add syscon compatibility with Vybrid OCOTP node. This is required to access the UID. Fixes: fa8d20c8dbb77 ("ARM: dts: vfxxx: Add node corresponding to OCOTP") Cc: stable@vger.kernel.org Reviewed-by: Fabio Estevam Reviewed-by: Stefan Agner Signed-off-by: Chris Healy Signed-off-by: Shawn Guo --- arch/arm/boot/dts/vfxxx.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/vfxxx.dtsi b/arch/arm/boot/dts/vfxxx.dtsi index 0fe03aa0367f..2259d11af721 100644 --- a/arch/arm/boot/dts/vfxxx.dtsi +++ b/arch/arm/boot/dts/vfxxx.dtsi @@ -495,7 +495,7 @@ edma1: dma-controller@40098000 { }; ocotp: ocotp@400a5000 { - compatible = "fsl,vf610-ocotp"; + compatible = "fsl,vf610-ocotp", "syscon"; reg = <0x400a5000 0x1000>; clocks = <&clks VF610_CLK_OCOTP>; }; From afd55e6d1bd35b4b36847869011447a83a81c8e0 Mon Sep 17 00:00:00 2001 From: Sivaprakash Murugesan Date: Wed, 29 Jul 2020 21:00:03 +0530 Subject: [PATCH 067/648] phy: qcom-qmp: Use correct values for ipq8074 PCIe Gen2 PHY init There were some problem in ipq8074 Gen2 PCIe phy init sequence. 1. Few register values were wrongly updated in the phy init sequence. 2. The register QSERDES_RX_SIGDET_CNTRL is a RX tuning parameter register which is added in serdes table causing the wrong register was getting updated. 3. Clocks and resets were not added in the phy init. Fix these to make Gen2 PCIe port on ipq8074 devices to work. Fixes: eef243d04b2b6 ("phy: qcom-qmp: Add support for IPQ8074") Cc: stable@vger.kernel.org Co-developed-by: Selvam Sathappan Periakaruppan Signed-off-by: Selvam Sathappan Periakaruppan Signed-off-by: Sivaprakash Murugesan Link: https://lore.kernel.org/r/1596036607-11877-4-git-send-email-sivaprak@codeaurora.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp.c | 16 +++++++++------- drivers/phy/qualcomm/phy-qcom-qmp.h | 2 ++ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.c b/drivers/phy/qualcomm/phy-qcom-qmp.c index 562053ce9455..6e6f992a9524 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp.c @@ -604,8 +604,8 @@ static const struct qmp_phy_init_tbl ipq8074_pcie_serdes_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_COM_BG_TRIM, 0xf), QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP_EN, 0x1), QMP_PHY_INIT_CFG(QSERDES_COM_VCO_TUNE_MAP, 0x0), - QMP_PHY_INIT_CFG(QSERDES_COM_VCO_TUNE_TIMER1, 0x1f), - QMP_PHY_INIT_CFG(QSERDES_COM_VCO_TUNE_TIMER2, 0x3f), + QMP_PHY_INIT_CFG(QSERDES_COM_VCO_TUNE_TIMER1, 0xff), + QMP_PHY_INIT_CFG(QSERDES_COM_VCO_TUNE_TIMER2, 0x1f), QMP_PHY_INIT_CFG(QSERDES_COM_CMN_CONFIG, 0x6), QMP_PHY_INIT_CFG(QSERDES_COM_PLL_IVCO, 0xf), QMP_PHY_INIT_CFG(QSERDES_COM_HSCLK_SEL, 0x0), @@ -631,7 +631,6 @@ static const struct qmp_phy_init_tbl ipq8074_pcie_serdes_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_COM_INTEGLOOP_GAIN1_MODE0, 0x0), QMP_PHY_INIT_CFG(QSERDES_COM_INTEGLOOP_GAIN0_MODE0, 0x80), QMP_PHY_INIT_CFG(QSERDES_COM_BIAS_EN_CTRL_BY_PSM, 0x1), - QMP_PHY_INIT_CFG(QSERDES_COM_VCO_TUNE_CTRL, 0xa), QMP_PHY_INIT_CFG(QSERDES_COM_SSC_EN_CENTER, 0x1), QMP_PHY_INIT_CFG(QSERDES_COM_SSC_PER1, 0x31), QMP_PHY_INIT_CFG(QSERDES_COM_SSC_PER2, 0x1), @@ -640,7 +639,6 @@ static const struct qmp_phy_init_tbl ipq8074_pcie_serdes_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_COM_SSC_STEP_SIZE1, 0x2f), QMP_PHY_INIT_CFG(QSERDES_COM_SSC_STEP_SIZE2, 0x19), QMP_PHY_INIT_CFG(QSERDES_COM_CLK_EP_DIV, 0x19), - QMP_PHY_INIT_CFG(QSERDES_RX_SIGDET_CNTRL, 0x7), }; static const struct qmp_phy_init_tbl ipq8074_pcie_tx_tbl[] = { @@ -648,6 +646,8 @@ static const struct qmp_phy_init_tbl ipq8074_pcie_tx_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_TX_LANE_MODE, 0x6), QMP_PHY_INIT_CFG(QSERDES_TX_RES_CODE_LANE_OFFSET, 0x2), QMP_PHY_INIT_CFG(QSERDES_TX_RCV_DETECT_LVL_2, 0x12), + QMP_PHY_INIT_CFG(QSERDES_TX_EMP_POST1_LVL, 0x36), + QMP_PHY_INIT_CFG(QSERDES_TX_SLEW_CNTL, 0x0a), }; static const struct qmp_phy_init_tbl ipq8074_pcie_rx_tbl[] = { @@ -658,7 +658,6 @@ static const struct qmp_phy_init_tbl ipq8074_pcie_rx_tbl[] = { QMP_PHY_INIT_CFG(QSERDES_RX_RX_EQU_ADAPTOR_CNTRL4, 0xdb), QMP_PHY_INIT_CFG(QSERDES_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x4b), QMP_PHY_INIT_CFG(QSERDES_RX_UCDR_SO_GAIN, 0x4), - QMP_PHY_INIT_CFG(QSERDES_RX_UCDR_SO_GAIN_HALF, 0x4), }; static const struct qmp_phy_init_tbl ipq8074_pcie_pcs_tbl[] = { @@ -2046,6 +2045,9 @@ static const struct qmp_phy_cfg msm8996_usb3phy_cfg = { .pwrdn_ctrl = SW_PWRDN, }; +static const char * const ipq8074_pciephy_clk_l[] = { + "aux", "cfg_ahb", +}; /* list of resets */ static const char * const ipq8074_pciephy_reset_l[] = { "phy", "common", @@ -2063,8 +2065,8 @@ static const struct qmp_phy_cfg ipq8074_pciephy_cfg = { .rx_tbl_num = ARRAY_SIZE(ipq8074_pcie_rx_tbl), .pcs_tbl = ipq8074_pcie_pcs_tbl, .pcs_tbl_num = ARRAY_SIZE(ipq8074_pcie_pcs_tbl), - .clk_list = NULL, - .num_clks = 0, + .clk_list = ipq8074_pciephy_clk_l, + .num_clks = ARRAY_SIZE(ipq8074_pciephy_clk_l), .reset_list = ipq8074_pciephy_reset_l, .num_resets = ARRAY_SIZE(ipq8074_pciephy_reset_l), .vreg_list = NULL, diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.h b/drivers/phy/qualcomm/phy-qcom-qmp.h index 4277f592684b..904b80ab9009 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp.h @@ -77,6 +77,8 @@ #define QSERDES_COM_CORECLK_DIV_MODE1 0x1bc /* Only for QMP V2 PHY - TX registers */ +#define QSERDES_TX_EMP_POST1_LVL 0x018 +#define QSERDES_TX_SLEW_CNTL 0x040 #define QSERDES_TX_RES_CODE_LANE_OFFSET 0x054 #define QSERDES_TX_DEBUG_BUS_SEL 0x064 #define QSERDES_TX_HIGHZ_TRANSCEIVEREN_BIAS_DRVR_EN 0x068 From 6ccc48e0eb2f3a5f3bd39954a21317e5f8874726 Mon Sep 17 00:00:00 2001 From: Patrick Riphagen Date: Thu, 6 Aug 2020 13:55:47 +0200 Subject: [PATCH 068/648] USB: serial: ftdi_sio: add IDs for Xsens Mti USB converter The device added has an FTDI chip inside. The device is used to connect Xsens USB Motion Trackers. Cc: stable@vger.kernel.org Signed-off-by: Patrick Riphagen Signed-off-by: Johan Hovold --- drivers/usb/serial/ftdi_sio.c | 1 + drivers/usb/serial/ftdi_sio_ids.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 871cdccf3a5f..9823bb424abd 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -713,6 +713,7 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(XSENS_VID, XSENS_AWINDA_STATION_PID) }, { USB_DEVICE(XSENS_VID, XSENS_CONVERTER_PID) }, { USB_DEVICE(XSENS_VID, XSENS_MTDEVBOARD_PID) }, + { USB_DEVICE(XSENS_VID, XSENS_MTIUSBCONVERTER_PID) }, { USB_DEVICE(XSENS_VID, XSENS_MTW_PID) }, { USB_DEVICE(FTDI_VID, FTDI_OMNI1509) }, { USB_DEVICE(MOBILITY_VID, MOBILITY_USB_SERIAL_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index e8373528264c..b5ca17a5967a 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -160,6 +160,7 @@ #define XSENS_AWINDA_DONGLE_PID 0x0102 #define XSENS_MTW_PID 0x0200 /* Xsens MTw */ #define XSENS_MTDEVBOARD_PID 0x0300 /* Motion Tracker Development Board */ +#define XSENS_MTIUSBCONVERTER_PID 0x0301 /* MTi USB converter */ #define XSENS_CONVERTER_PID 0xD00D /* Xsens USB-serial converter */ /* Xsens devices using FTDI VID */ From 3cb5fcf1f3a5dfb7bc0305bb15971db04a0e51d4 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 22 Aug 2020 00:37:53 +0300 Subject: [PATCH 069/648] MAINTAINERS: add myself as maintainer for spi-fsl-dspi driver Since I've introduced a fairly large diff to this driver since tag v5.4, I would like to avoid breakage for my use cases by getting a chance to be copied on newly submitted patches. Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20200821213753.3143632-1-olteanv@gmail.com Signed-off-by: Mark Brown --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 68f21d46614c..5a5f3f7d88ee 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6806,6 +6806,14 @@ L: linuxppc-dev@lists.ozlabs.org S: Maintained F: drivers/dma/fsldma.* +FREESCALE DSPI DRIVER +M: Vladimir Oltean +L: linux-spi@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt +F: drivers/spi/spi-fsl-dspi.c +F: include/linux/spi/spi-fsl-dspi.h + FREESCALE ENETC ETHERNET DRIVERS M: Claudiu Manoil L: netdev@vger.kernel.org From 5cbb80d5236b47b149da292b86d5fc99a680894b Mon Sep 17 00:00:00 2001 From: Chris Healy Date: Sat, 22 Aug 2020 19:25:05 -0700 Subject: [PATCH 070/648] ARM: dts: imx7d-zii-rmu2: fix rgmii phy-mode for ksz9031 phy Since commit bcf3440c6dd7 ("net: phy: micrel: add phy-mode support for the KSZ9031 PHY") the networking is broken on the imx7d-zii-rmu2 board. The end result is that network receive behaviour is marginal with lots of RX CRC errors experienced and NFS frequently failing. Quoting the explanation from Andrew Lunn in commit 0672d22a19244 ("ARM: dts: imx: Fix the AR803X phy-mode"): "The problem here is, all the DTs were broken since day 0. However, because the PHY driver was also broken, nobody noticed and it worked. Now that the PHY driver has been fixed, all the bugs in the DTs now become an issue" Fix it by switching to phy-mode = "rgmii-id". Fixes: bcf3440c6dd7 ("net: phy: micrel: add phy-mode support for the KSZ9031 PHY") Signed-off-by: Chris Healy Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx7d-zii-rmu2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx7d-zii-rmu2.dts b/arch/arm/boot/dts/imx7d-zii-rmu2.dts index e5e20b07f184..7cb6153fc650 100644 --- a/arch/arm/boot/dts/imx7d-zii-rmu2.dts +++ b/arch/arm/boot/dts/imx7d-zii-rmu2.dts @@ -58,7 +58,7 @@ &fec1 { <&clks IMX7D_ENET1_TIME_ROOT_CLK>; assigned-clock-parents = <&clks IMX7D_PLL_ENET_MAIN_100M_CLK>; assigned-clock-rates = <0>, <100000000>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&fec1_phy>; status = "okay"; From baa5b28b7a474f66a511ebf71a2ade510652a2f6 Mon Sep 17 00:00:00 2001 From: Md Haris Iqbal Date: Tue, 11 Aug 2020 14:57:22 +0530 Subject: [PATCH 071/648] RDMA/rtrs-srv: Replace device_register with device_initialize and device_add There are error cases when we will call free_srv before device kobject is initialized; in such cases calling put_device generates the following warning: kobject: '(null)' (000000009f5445ed): is not initialized, yet kobject_put() is being called. So call device_initialize() only once when the server is allocated. If we end up calling put_srv() and subsequently free_srv(), our call to put_device() would result in deletion of the obj. Call device_add() later when we actually have a connection. Correspondingly, call device_del() instead of device_unregister() when srv->dev_ref falls to 0. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Link: https://lore.kernel.org/r/20200811092722.2450-1-haris.iqbal@cloud.ionos.com Suggested-by: Jason Gunthorpe Signed-off-by: Md Haris Iqbal Reviewed-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c | 8 ++++---- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index 3d7877534bcc..2f981ae97076 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -182,16 +182,16 @@ static int rtrs_srv_create_once_sysfs_root_folders(struct rtrs_srv_sess *sess) * sysfs files are created */ dev_set_uevent_suppress(&srv->dev, true); - err = device_register(&srv->dev); + err = device_add(&srv->dev); if (err) { - pr_err("device_register(): %d\n", err); + pr_err("device_add(): %d\n", err); goto put; } srv->kobj_paths = kobject_create_and_add("paths", &srv->dev.kobj); if (!srv->kobj_paths) { err = -ENOMEM; pr_err("kobject_create_and_add(): %d\n", err); - device_unregister(&srv->dev); + device_del(&srv->dev); goto unlock; } dev_set_uevent_suppress(&srv->dev, false); @@ -216,7 +216,7 @@ rtrs_srv_destroy_once_sysfs_root_folders(struct rtrs_srv_sess *sess) kobject_del(srv->kobj_paths); kobject_put(srv->kobj_paths); mutex_unlock(&srv->paths_mutex); - device_unregister(&srv->dev); + device_del(&srv->dev); } else { mutex_unlock(&srv->paths_mutex); } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index a219bd1bdbc2..b61a18e57aeb 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -1336,6 +1336,7 @@ static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx, uuid_copy(&srv->paths_uuid, paths_uuid); srv->queue_depth = sess_queue_depth; srv->ctx = ctx; + device_initialize(&srv->dev); srv->chunks = kcalloc(srv->queue_depth, sizeof(*srv->chunks), GFP_KERNEL); From 60b1af64eb35074a4f2d41cc1e503a7671e68963 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Thu, 20 Aug 2020 23:36:46 +0800 Subject: [PATCH 072/648] RDMA/rxe: Fix the parent sysfs read when the interface has 15 chars 'parent' sysfs reads will yield '\0' bytes when the interface name has 15 chars, and there will no "\n" output. To reproduce, create one interface with 15 chars: [root@test ~]# ip a s enp0s29u1u7u3c2 2: enp0s29u1u7u3c2: mtu 1500 qdisc fq_codel state UNKNOWN group default qlen 1000 link/ether 02:21:28:57:47:17 brd ff:ff:ff:ff:ff:ff inet6 fe80::ac41:338f:5bcd:c222/64 scope link noprefixroute valid_lft forever preferred_lft forever [root@test ~]# modprobe rdma_rxe [root@test ~]# echo enp0s29u1u7u3c2 > /sys/module/rdma_rxe/parameters/add [root@test ~]# cat /sys/class/infiniband/rxe0/parent enp0s29u1u7u3c2[root@test ~]# [root@test ~]# f="/sys/class/infiniband/rxe0/parent" [root@test ~]# echo "$(<"$f")" -bash: warning: command substitution: ignored null byte in input enp0s29u1u7u3c2 Use scnprintf and PAGE_SIZE to fill the sysfs output buffer. Cc: stable@vger.kernel.org Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20200820153646.31316-1-yi.zhang@redhat.com Suggested-by: Jason Gunthorpe Signed-off-by: Yi Zhang Reviewed-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index bb61e534e468..756980f79951 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1056,7 +1056,7 @@ static ssize_t parent_show(struct device *device, struct rxe_dev *rxe = rdma_device_to_drv_device(device, struct rxe_dev, ib_dev); - return snprintf(buf, 16, "%s\n", rxe_parent_name(rxe, 1)); + return scnprintf(buf, PAGE_SIZE, "%s\n", rxe_parent_name(rxe, 1)); } static DEVICE_ATTR_RO(parent); From 0235bc04627d02a08f7ad9d226a8fe78e6c4a1c3 Mon Sep 17 00:00:00 2001 From: Rander Wang Date: Fri, 21 Aug 2020 14:55:53 -0500 Subject: [PATCH 073/648] ASoC: Intel: tgl_max98373: fix a runtime pm issue in multi-thread case When the playback & capture streams are stopped simultaneously, the SOF PCI device will remain pm_runtime active. The root-cause is a race condition with two threads reaching the trigger function at the same time. They see another stream is active so the dapm pin is not disabled, so the codec remains active as well as the parent PCI device. For max98373, the capture stream provides feedback when playback is working and it is unused when playback is stopped. So the dapm pin should be set only when playback is active. Fixes: 94d2d08974746 ('ASoC: Intel: Boards: tgl_max98373: add dai_trigger function') Reviewed-by: Ranjani Sridharan Signed-off-by: Rander Wang Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200821195603.215535-7-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_maxim_common.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/soc/intel/boards/sof_maxim_common.c b/sound/soc/intel/boards/sof_maxim_common.c index 1a6961592029..b6e63ea13d64 100644 --- a/sound/soc/intel/boards/sof_maxim_common.c +++ b/sound/soc/intel/boards/sof_maxim_common.c @@ -66,6 +66,10 @@ int max98373_trigger(struct snd_pcm_substream *substream, int cmd) int j; int ret = 0; + /* set spk pin by playback only */ + if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) + return 0; + for_each_rtd_codec_dais(rtd, j, codec_dai) { struct snd_soc_component *component = codec_dai->component; struct snd_soc_dapm_context *dapm = @@ -86,9 +90,6 @@ int max98373_trigger(struct snd_pcm_substream *substream, int cmd) case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - /* Make sure no streams are active before disable pin */ - if (snd_soc_dai_active(codec_dai) != 1) - break; ret = snd_soc_dapm_disable_pin(dapm, pin_name); if (!ret) snd_soc_dapm_sync(dapm); From 8824d19b45867be75d375385414c4f06719a11a4 Mon Sep 17 00:00:00 2001 From: "Nikunj A. Dadhania" Date: Tue, 21 Jul 2020 17:05:23 +0530 Subject: [PATCH 074/648] thunderbolt: Disable ports that are not implemented Commit 4caf2511ec49 ("thunderbolt: Add trivial .shutdown") exposes a bug in the Thunderbolt driver, that frees an unallocated id, resulting in the following spinlock bad magic bug. [ 20.633803] BUG: spinlock bad magic on CPU#4, halt/3313 [ 20.640030] lock: 0xffff92e6ad5c97e0, .magic: 00000000, .owner: /-1, .owner_cpu: 0 [ 20.672139] Call Trace: [ 20.675032] dump_stack+0x97/0xdb [ 20.678950] ? spin_bug+0xa5/0xb0 [ 20.682865] do_raw_spin_lock+0x68/0x98 [ 20.687397] _raw_spin_lock_irqsave+0x3f/0x5d [ 20.692535] ida_destroy+0x4f/0x124 [ 20.696657] tb_switch_release+0x6d/0xfd [ 20.701295] device_release+0x2c/0x7d [ 20.705622] kobject_put+0x8e/0xac [ 20.709637] tb_stop+0x55/0x66 [ 20.713243] tb_domain_remove+0x36/0x62 [ 20.717774] nhi_remove+0x4d/0x58 Fix the issue by disabling ports that are enabled as per the EEPROM, but not implemented. While at it, update the kernel doc for the disabled field, to reflect this. Cc: stable@vger.kernel.org Fixes: 4caf2511ec49 ("thunderbolt: Add trivial .shutdown") Reported-by: Srikanth Nandamuri Signed-off-by: Nikunj A. Dadhania Signed-off-by: Mika Westerberg --- drivers/thunderbolt/switch.c | 1 + drivers/thunderbolt/tb.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 712395f518b8..698c52775eec 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -684,6 +684,7 @@ static int tb_init_port(struct tb_port *port) if (res == -ENODEV) { tb_dbg(port->sw->tb, " Port %d: not implemented\n", port->port); + port->disabled = true; return 0; } return res; diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index a413d55b5f8b..3c620a9203c5 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -186,7 +186,7 @@ struct tb_switch { * @cap_adap: Offset of the adapter specific capability (%0 if not present) * @cap_usb4: Offset to the USB4 port capability (%0 if not present) * @port: Port number on switch - * @disabled: Disabled by eeprom + * @disabled: Disabled by eeprom or enabled but not implemented * @bonded: true if the port is bonded (two lanes combined as one) * @dual_link_port: If the switch is connected using two ports, points * to the other port. From 813050e0a9b871ac575abfd3d321f74916df609d Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 24 Jun 2020 21:45:03 +0300 Subject: [PATCH 075/648] thunderbolt: Use maximum USB3 link rate when reclaiming if link is not up If the USB3 link is not up the actual link rate is 0 so when reclaiming bandwidth we should look at maximum supported link rate instead. Cc: stable@vger.kernel.org Fixes: 0bd680cd900c ("thunderbolt: Add USB3 bandwidth management") Signed-off-by: Mika Westerberg --- drivers/thunderbolt/tunnel.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c index 2aae2c76d880..d50e5b93632a 100644 --- a/drivers/thunderbolt/tunnel.c +++ b/drivers/thunderbolt/tunnel.c @@ -951,10 +951,18 @@ static void tb_usb3_reclaim_available_bandwidth(struct tb_tunnel *tunnel, int ret, max_rate, allocate_up, allocate_down; ret = usb4_usb3_port_actual_link_rate(tunnel->src_port); - if (ret <= 0) { - tb_tunnel_warn(tunnel, "tunnel is not up\n"); + if (ret < 0) { + tb_tunnel_warn(tunnel, "failed to read actual link rate\n"); return; + } else if (!ret) { + /* Use maximum link rate if the link valid is not set */ + ret = usb4_usb3_port_max_link_rate(tunnel->src_port); + if (ret < 0) { + tb_tunnel_warn(tunnel, "failed to read maximum link rate\n"); + return; + } } + /* * 90% of the max rate can be allocated for isochronous * transfers. From 9d682ea6bcc76b8b2691c79add59f7d99c881635 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 25 Aug 2020 13:12:57 +0200 Subject: [PATCH 076/648] vboxsf: Fix the check for the old binary mount-arguments struct Fix the check for the mainline vboxsf code being used with the old mount.vboxsf mount binary from the out-of-tree vboxsf version doing a comparison between signed and unsigned data types. This fixes the following smatch warnings: fs/vboxsf/super.c:390 vboxsf_parse_monolithic() warn: impossible condition '(options[1] == (255)) => ((-128)-127 == 255)' fs/vboxsf/super.c:391 vboxsf_parse_monolithic() warn: impossible condition '(options[2] == (254)) => ((-128)-127 == 254)' fs/vboxsf/super.c:392 vboxsf_parse_monolithic() warn: impossible condition '(options[3] == (253)) => ((-128)-127 == 253)' Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Hans de Goede Signed-off-by: Al Viro --- fs/vboxsf/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c index 8fe03b4a0d2b..25aade344192 100644 --- a/fs/vboxsf/super.c +++ b/fs/vboxsf/super.c @@ -384,7 +384,7 @@ static int vboxsf_setup(void) static int vboxsf_parse_monolithic(struct fs_context *fc, void *data) { - char *options = data; + unsigned char *options = data; if (options && options[0] == VBSF_MOUNT_SIGNATURE_BYTE_0 && options[1] == VBSF_MOUNT_SIGNATURE_BYTE_1 && From 7ad26d6671db758c959d7e1d100b138a38483612 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 25 Aug 2020 08:39:24 +0900 Subject: [PATCH 077/648] ASoC: pcm3168a: ignore 0 Hz settings Some sound card try to set 0 Hz as reset, but it is impossible. This patch ignores it to avoid error return. Signed-off-by: Kuninori Morimoto Link: https://lore.kernel.org/r/87a6yjy5sy.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/codecs/pcm3168a.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/codecs/pcm3168a.c b/sound/soc/codecs/pcm3168a.c index 5e445fee4ef5..821e7395f90f 100644 --- a/sound/soc/codecs/pcm3168a.c +++ b/sound/soc/codecs/pcm3168a.c @@ -306,6 +306,13 @@ static int pcm3168a_set_dai_sysclk(struct snd_soc_dai *dai, struct pcm3168a_priv *pcm3168a = snd_soc_component_get_drvdata(dai->component); int ret; + /* + * Some sound card sets 0 Hz as reset, + * but it is impossible to set. Ignore it here + */ + if (freq == 0) + return 0; + if (freq > PCM3168A_MAX_SYSCLK) return -EINVAL; From d062085d61b1c2015845d1d9c475266381cce785 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 25 Aug 2020 08:38:54 +0900 Subject: [PATCH 078/648] ASoC: ti: fixup ams_delta_mute() function name commit 059374fe9ea5d ("ASoC: ti: merge .digital_mute() into .mute_stream()") merged .digital_mute() into .mute_stream(). But it didn't rename ams_delta_digital_mute() to ams_delta_mute(). This patch fixup it. Signed-off-by: Kuninori Morimoto Reported-by: kernel test robot Link: https://lore.kernel.org/r/87blizy5ts.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/ti/ams-delta.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/ti/ams-delta.c b/sound/soc/ti/ams-delta.c index 5c47de96c529..57feb473a579 100644 --- a/sound/soc/ti/ams-delta.c +++ b/sound/soc/ti/ams-delta.c @@ -446,12 +446,12 @@ static const struct snd_soc_dai_ops ams_delta_dai_ops = { /* Will be used if the codec ever has its own digital_mute function */ static int ams_delta_startup(struct snd_pcm_substream *substream) { - return ams_delta_digital_mute(NULL, 0, substream->stream); + return ams_delta_mute(NULL, 0, substream->stream); } static void ams_delta_shutdown(struct snd_pcm_substream *substream) { - ams_delta_digital_mute(NULL, 1, substream->stream); + ams_delta_mute(NULL, 1, substream->stream); } From 2ea370a9173f4a3c80b24221049359a5d1518bc0 Mon Sep 17 00:00:00 2001 From: Vignesh Raghavendra Date: Tue, 25 Aug 2020 22:55:06 +0530 Subject: [PATCH 079/648] spi: spi-cadence-quadspi: Populate get_name() interface Implement get_name() interface of spi_controller_mem_ops so as to avoid changing of mtd->name due to driver being moved over to spi-mem framework from SPI NOR. This avoids breaking of MTD cmdline args being passed by bootloaders which maybe using old driver name. Fixes: 31fb632b5d43c ("spi: Move cadence-quadspi driver to drivers/spi/") Reported-by: Jan Kiszka Suggested-by: Boris Brezillon Signed-off-by: Vignesh Raghavendra Link: https://lore.kernel.org/r/20200825172506.14375-1-vigneshr@ti.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 1c1a9d17eec0..508b219eabf8 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -1128,8 +1128,17 @@ static int cqspi_request_mmap_dma(struct cqspi_st *cqspi) return 0; } +static const char *cqspi_get_name(struct spi_mem *mem) +{ + struct cqspi_st *cqspi = spi_master_get_devdata(mem->spi->master); + struct device *dev = &cqspi->pdev->dev; + + return devm_kasprintf(dev, GFP_KERNEL, "%s.%d", dev_name(dev), mem->spi->chip_select); +} + static const struct spi_controller_mem_ops cqspi_mem_ops = { .exec_op = cqspi_exec_mem_op, + .get_name = cqspi_get_name, }; static int cqspi_setup_flash(struct cqspi_st *cqspi) From 460f26d3ee70ec25b3bf8d08e0c79936212d36b6 Mon Sep 17 00:00:00 2001 From: Gurchetan Singh Date: Mon, 15 Jun 2020 16:05:00 -0700 Subject: [PATCH 080/648] drm/virtio: Revert "drm/virtio: Call the right shmem helpers" This reverts commit d323bb44e4d23802eb25d13de1f93f2335bd60d0. Fixes a double-free regression: [ 4.357928] drm_gem_shmem_free_object+0xb4/0x100 [ 4.358983] virtio_gpu_dequeue_ctrl_func+0xd9/0x290 [ 4.360343] process_one_work+0x1d2/0x3a0 [ 4.361581] worker_thread+0x45/0x3c0 [ 4.362645] kthread+0xf6/0x130 [ 4.363543] ? process_one_work+0x3a0/0x3a0 [ 4.364770] ? kthread_park+0x80/0x80 [ 4.365799] ret_from_fork+0x35/0x40 [ 4.367103] Modules linked in: [ 4.367958] CR2: 0000000000000018 [ 4.368857] ---[ end trace db84f7a2974d5c79 ]--- [ 4.370118] RIP: 0010:dma_direct_unmap_sg+0x1f/0x60 In addition, virtio has it's own set of dma-ops so there's not an obviously clean way to transition to shmem helpers. Fixes: d323bb44e4d2 ("drm/virtio: Call the right shmem helpers") Signed-off-by: Gurchetan Singh Link: http://patchwork.freedesktop.org/patch/msgid/20200615230500.551-1-gurchetansingh@chromium.org Signed-off-by: Gerd Hoffmann (cherry picked from commit 51c3b0cc32d2e17581fce5b487ee95bbe9e8270a) --- drivers/gpu/drm/virtio/virtgpu_object.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index e83651b7747d..842f8b61aa89 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -151,7 +151,13 @@ static int virtio_gpu_object_shmem_init(struct virtio_gpu_device *vgdev, if (ret < 0) return -EINVAL; - shmem->pages = drm_gem_shmem_get_pages_sgt(&bo->base.base); + /* + * virtio_gpu uses drm_gem_shmem_get_sg_table instead of + * drm_gem_shmem_get_pages_sgt because virtio has it's own set of + * dma-ops. This is discouraged for other drivers, but should be fine + * since virtio_gpu doesn't support dma-buf import from other devices. + */ + shmem->pages = drm_gem_shmem_get_sg_table(&bo->base.base); if (!shmem->pages) { drm_gem_shmem_unpin(&bo->base.base); return -EINVAL; From 6e0c9b5f90978287b5a3d38ee83203d295f375f1 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Wed, 26 Aug 2020 22:03:36 +0530 Subject: [PATCH 081/648] ASoC: max98373: Fix return check for devm_regmap_init_sdw() devm_regmap_init_sdw() returns a valid pointer on success or ERR_PTR on failure which should be checked with IS_ERR. Also use PTR_ERR for returning error codes. Reported-by: Takashi Iwai Fixes: 56a5b7910e96 ("ASoC: codecs: max98373: add SoundWire support") Signed-off-by: Vinod Koul Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200826163340.3249608-2-vkoul@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/max98373-sdw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/max98373-sdw.c b/sound/soc/codecs/max98373-sdw.c index 5fe724728e84..e4675cfff7b2 100644 --- a/sound/soc/codecs/max98373-sdw.c +++ b/sound/soc/codecs/max98373-sdw.c @@ -838,8 +838,8 @@ static int max98373_sdw_probe(struct sdw_slave *slave, /* Regmap Initialization */ regmap = devm_regmap_init_sdw(slave, &max98373_sdw_regmap); - if (!regmap) - return -EINVAL; + if (IS_ERR(regmap)) + return PTR_ERR(regmap); return max98373_init(slave, regmap); } From 344850d93c098e1b17e6f89d5e436893e9762ef3 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Wed, 26 Aug 2020 22:03:37 +0530 Subject: [PATCH 082/648] ASoC: rt1308-sdw: Fix return check for devm_regmap_init_sdw() devm_regmap_init_sdw() returns a valid pointer on success or ERR_PTR on failure which should be checked with IS_ERR. Also use PTR_ERR for returning error codes. Reported-by: Takashi Iwai Fixes: a87a6653a28c ("ASoC: rt1308-sdw: add rt1308 SdW amplifier driver") Signed-off-by: Vinod Koul Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200826163340.3249608-3-vkoul@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/rt1308-sdw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt1308-sdw.c b/sound/soc/codecs/rt1308-sdw.c index b0ba0d2acbdd..56e952a904a3 100644 --- a/sound/soc/codecs/rt1308-sdw.c +++ b/sound/soc/codecs/rt1308-sdw.c @@ -684,8 +684,8 @@ static int rt1308_sdw_probe(struct sdw_slave *slave, /* Regmap Initialization */ regmap = devm_regmap_init_sdw(slave, &rt1308_sdw_regmap); - if (!regmap) - return -EINVAL; + if (IS_ERR(regmap)) + return PTR_ERR(regmap); rt1308_sdw_init(&slave->dev, regmap, slave); From be1a4b2c56db860a220c6f74d461188e5733264a Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Wed, 26 Aug 2020 22:03:38 +0530 Subject: [PATCH 083/648] ASoC: rt711: Fix return check for devm_regmap_init_sdw() devm_regmap_init_sdw() returns a valid pointer on success or ERR_PTR on failure which should be checked with IS_ERR. Also use PTR_ERR for returning error codes. Reported-by: Takashi Iwai Fixes: 320b8b0d13b8 ("ASoC: rt711: add rt711 codec driver") Signed-off-by: Vinod Koul Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200826163340.3249608-4-vkoul@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/rt711-sdw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt711-sdw.c b/sound/soc/codecs/rt711-sdw.c index 45b928954b58..7efff130a638 100644 --- a/sound/soc/codecs/rt711-sdw.c +++ b/sound/soc/codecs/rt711-sdw.c @@ -452,8 +452,8 @@ static int rt711_sdw_probe(struct sdw_slave *slave, /* Regmap Initialization */ sdw_regmap = devm_regmap_init_sdw(slave, &rt711_sdw_regmap); - if (!sdw_regmap) - return -EINVAL; + if (IS_ERR(sdw_regmap)) + return PTR_ERR(sdw_regmap); regmap = devm_regmap_init(&slave->dev, NULL, &slave->dev, &rt711_regmap); From 282eb0b52e3f0399ee48a4cad0d9ffec840b0164 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Wed, 26 Aug 2020 22:03:39 +0530 Subject: [PATCH 084/648] ASoC: rt715: Fix return check for devm_regmap_init_sdw() devm_regmap_init_sdw() returns a valid pointer on success or ERR_PTR on failure which should be checked with IS_ERR. Also use PTR_ERR for returning error codes. Reported-by: Takashi Iwai Fixes: d1ede0641b05 ("ASoC: rt715: add RT715 codec driver") Signed-off-by: Vinod Koul Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200826163340.3249608-5-vkoul@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/rt715-sdw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt715-sdw.c b/sound/soc/codecs/rt715-sdw.c index d11b23d6b240..68a36739f1b0 100644 --- a/sound/soc/codecs/rt715-sdw.c +++ b/sound/soc/codecs/rt715-sdw.c @@ -527,8 +527,8 @@ static int rt715_sdw_probe(struct sdw_slave *slave, /* Regmap Initialization */ sdw_regmap = devm_regmap_init_sdw(slave, &rt715_sdw_regmap); - if (!sdw_regmap) - return -EINVAL; + if (IS_ERR(sdw_regmap)) + return PTR_ERR(sdw_regmap); regmap = devm_regmap_init(&slave->dev, NULL, &slave->dev, &rt715_regmap); From db1a4250aef53775ec0094b81454213319cc8c74 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Wed, 26 Aug 2020 22:03:40 +0530 Subject: [PATCH 085/648] ASoC: rt700: Fix return check for devm_regmap_init_sdw() devm_regmap_init_sdw() returns a valid pointer on success or ERR_PTR on failure which should be checked with IS_ERR. Also use PTR_ERR for returning error codes. Reported-by: Takashi Iwai Fixes: 7d2a5f9ae41e ("ASoC: rt700: add rt700 codec driver") Signed-off-by: Vinod Koul Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200826163340.3249608-6-vkoul@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/rt700-sdw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt700-sdw.c b/sound/soc/codecs/rt700-sdw.c index 4d14048d1197..1d24bf040718 100644 --- a/sound/soc/codecs/rt700-sdw.c +++ b/sound/soc/codecs/rt700-sdw.c @@ -452,8 +452,8 @@ static int rt700_sdw_probe(struct sdw_slave *slave, /* Regmap Initialization */ sdw_regmap = devm_regmap_init_sdw(slave, &rt700_sdw_regmap); - if (!sdw_regmap) - return -EINVAL; + if (IS_ERR(sdw_regmap)) + return PTR_ERR(sdw_regmap); regmap = devm_regmap_init(&slave->dev, NULL, &slave->dev, &rt700_regmap); From c1e6414cdc371f9ed82cefebba7538499a3059f9 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Thu, 20 Aug 2020 12:28:27 +0800 Subject: [PATCH 086/648] ASoC: qcom: common: Fix refcount imbalance on error for_each_child_of_node returns a node pointer np with refcount incremented. So when devm_kzalloc fails, a pairing refcount decrement is needed to keep np's refcount balanced. Fixes: 16395ceee11f8 ("ASoC: qcom: common: Fix NULL pointer in of parser") Signed-off-by: Dinghao Liu Link: https://lore.kernel.org/r/20200820042828.10308-1-dinghao.liu@zju.edu.cn Signed-off-by: Mark Brown --- sound/soc/qcom/common.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/qcom/common.c b/sound/soc/qcom/common.c index 5194d90ddb96..fd69cf8b1f23 100644 --- a/sound/soc/qcom/common.c +++ b/sound/soc/qcom/common.c @@ -52,8 +52,10 @@ int qcom_snd_parse_of(struct snd_soc_card *card) for_each_child_of_node(dev->of_node, np) { dlc = devm_kzalloc(dev, 2 * sizeof(*dlc), GFP_KERNEL); - if (!dlc) - return -ENOMEM; + if (!dlc) { + ret = -ENOMEM; + goto err; + } link->cpus = &dlc[0]; link->platforms = &dlc[1]; From 5de55ce951a1466e31ff68a7bc6b0a7ce3cb5947 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 17 Aug 2020 11:19:26 -0400 Subject: [PATCH 087/648] xprtrdma: Release in-flight MRs on disconnect Dan Aloni reports that when a server disconnects abruptly, a few memory regions are left DMA mapped. Over time this leak could pin enough I/O resources to slow or even deadlock an NFS/RDMA client. I found that if a transport disconnects before pending Send and FastReg WRs can be posted, the to-be-registered MRs are stranded on the req's rl_registered list and never released -- since they weren't posted, there's no Send completion to DMA unmap them. Reported-by: Dan Aloni Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 75c646743df3..ca89f24a1590 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -933,6 +933,8 @@ static void rpcrdma_req_reset(struct rpcrdma_req *req) rpcrdma_regbuf_dma_unmap(req->rl_sendbuf); rpcrdma_regbuf_dma_unmap(req->rl_recvbuf); + + frwr_reset(req); } /* ASSUMPTION: the rb_allreqs list is stable for the duration, From 35bf948f1edbf507f6e57e0879fa6ea36d2d2930 Mon Sep 17 00:00:00 2001 From: Jitao Shi Date: Mon, 17 Aug 2020 21:06:40 +0800 Subject: [PATCH 088/648] drm/mediatek: dsi: Fix scrolling of panel with small hfp or hbp horizontal_backporch_byte should be hbp * bpp - hbp extra bytes. So remove the wrong subtraction 10. Fixes: 7a5bc4e22ecf ("drm/mediatek: change the dsi phytiming calculate method") Signed-off-by: Jitao Shi Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_dsi.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index 16fd99dcdacf..80b7a082e874 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -466,14 +466,13 @@ static void mtk_dsi_config_vdo_timing(struct mtk_dsi *dsi) horizontal_sync_active_byte = (vm->hsync_len * dsi_tmp_buf_bpp - 10); if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) - horizontal_backporch_byte = - (vm->hback_porch * dsi_tmp_buf_bpp - 10); + horizontal_backporch_byte = vm->hback_porch * dsi_tmp_buf_bpp; else - horizontal_backporch_byte = ((vm->hback_porch + vm->hsync_len) * - dsi_tmp_buf_bpp - 10); + horizontal_backporch_byte = (vm->hback_porch + vm->hsync_len) * + dsi_tmp_buf_bpp; data_phy_cycles = timing->lpx + timing->da_hs_prepare + - timing->da_hs_zero + timing->da_hs_exit + 3; + timing->da_hs_zero + timing->da_hs_exit; if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_BURST) { if ((vm->hfront_porch + vm->hback_porch) * dsi_tmp_buf_bpp > From 3d7a9520f0c3e6a68b6de8c5812fc8b6d7a52626 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 20 Aug 2020 18:52:43 -0400 Subject: [PATCH 089/648] NFSv4.1 handle ERR_DELAY error reclaiming locking state on delegation recall A client should be able to handle getting an ERR_DELAY error while doing a LOCK call to reclaim state due to delegation being recalled. This is a transient error that can happen due to server moving its volumes and invalidating its file location cache and upon reference to it during the LOCK call needing to do an expensive lookup (leading to an ERR_DELAY error on a PUTFH). Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index dbd01548335b..4a6cfb497103 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7298,7 +7298,12 @@ int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, err = nfs4_set_lock_state(state, fl); if (err != 0) return err; - err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); + do { + err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); + if (err != -NFS4ERR_DELAY) + break; + ssleep(1); + } while (err == -NFS4ERR_DELAY); return nfs4_handle_delegation_recall_error(server, state, stateid, fl, err); } From aac544c3553d98ebe150dda19a25aa253f7ad3fe Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Tue, 25 Aug 2020 01:25:11 +0200 Subject: [PATCH 090/648] Compiler Attributes: remove comment about sparse not supporting __has_attribute Sparse supports __has_attribute() since 2018-08-31, so the comment is not true anymore but more importantly is rather confusing. So remove it. Signed-off-by: Luc Van Oostenryck Signed-off-by: Miguel Ojeda --- include/linux/compiler_attributes.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 6122efdad6ad..af7a58c19e20 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -24,12 +24,6 @@ * __has_attribute is supported on gcc >= 5, clang >= 2.9 and icc >= 17. * In the meantime, to support 4.6 <= gcc < 5, we implement __has_attribute * by hand. - * - * sparse does not support __has_attribute (yet) and defines __GNUC_MINOR__ - * depending on the compiler used to build it; however, these attributes have - * no semantic effects for sparse, so it does not matter. Also note that, - * in order to avoid sparse's warnings, even the unsupported ones must be - * defined to 0. */ #ifndef __has_attribute # define __has_attribute(x) __GCC4_has_attribute_##x From 5861af92ff2a2e002449191413c35f3ec5f721fe Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Tue, 25 Aug 2020 01:25:26 +0200 Subject: [PATCH 091/648] Compiler Attributes: fix comment concerning GCC 4.6 GCC 4.6 is not supported anymore, so remove a reference to it, leaving just the part about version prior GCC 5. Signed-off-by: Luc Van Oostenryck Signed-off-by: Miguel Ojeda --- include/linux/compiler_attributes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index af7a58c19e20..ea7b756b1c8f 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -22,7 +22,7 @@ /* * __has_attribute is supported on gcc >= 5, clang >= 2.9 and icc >= 17. - * In the meantime, to support 4.6 <= gcc < 5, we implement __has_attribute + * In the meantime, to support gcc < 5, we implement __has_attribute * by hand. */ #ifndef __has_attribute From e3ddd6067ee62f6e76ebcf61ff08b2c729ae412b Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Wed, 19 Aug 2020 15:56:32 +0800 Subject: [PATCH 092/648] RDMA/rxe: Fix memleak in rxe_mem_init_user When page_address() fails, umem should be freed just like when rxe_mem_alloc() fails. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20200819075632.22285-1-dinghao.liu@zju.edu.cn Signed-off-by: Dinghao Liu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index cdd811a45120..ce24144de16a 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -205,6 +205,7 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, vaddr = page_address(sg_page_iter_page(&sg_iter)); if (!vaddr) { pr_warn("null vaddr\n"); + ib_umem_release(umem); err = -ENOMEM; goto err1; } From d862060a4b43479887ae8e2c0b74a58c4e27e5f3 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 25 Aug 2020 18:17:25 +0300 Subject: [PATCH 093/648] RDMA/rxe: Fix panic when calling kmem_cache_create() To avoid the following kernel panic when calling kmem_cache_create() with a NULL pointer from pool_cache(), Block the rxe_param_set_add() from running if the rdma_rxe module is not initialized. BUG: unable to handle kernel NULL pointer dereference at 000000000000000b PGD 0 P4D 0 Oops: 0000 [#1] SMP NOPTI CPU: 4 PID: 8512 Comm: modprobe Kdump: loaded Not tainted 4.18.0-231.el8.x86_64 #1 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 10/02/2018 RIP: 0010:kmem_cache_alloc+0xd1/0x1b0 Code: 8b 57 18 45 8b 77 1c 48 8b 5c 24 30 0f 1f 44 00 00 5b 48 89 e8 5d 41 5c 41 5d 41 5e 41 5f c3 81 e3 00 00 10 00 75 0e 4d 89 fe <41> f6 47 0b 04 0f 84 6c ff ff ff 4c 89 ff e8 cc da 01 00 49 89 c6 RSP: 0018:ffffa2b8c773f9d0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000005 RDX: 0000000000000004 RSI: 00000000006080c0 RDI: 0000000000000000 RBP: ffff8ea0a8634fd0 R08: ffffa2b8c773f988 R09: 00000000006000c0 R10: 0000000000000000 R11: 0000000000000230 R12: 00000000006080c0 R13: ffffffffc0a97fc8 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f9138ed9740(0000) GS:ffff8ea4ae800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000000b CR3: 000000046d59a000 CR4: 00000000003406e0 Call Trace: rxe_alloc+0xc8/0x160 [rdma_rxe] rxe_get_dma_mr+0x25/0xb0 [rdma_rxe] __ib_alloc_pd+0xcb/0x160 [ib_core] ib_mad_init_device+0x296/0x8b0 [ib_core] add_client_context+0x11a/0x160 [ib_core] enable_device_and_get+0xdc/0x1d0 [ib_core] ib_register_device+0x572/0x6b0 [ib_core] ? crypto_create_tfm+0x32/0xe0 ? crypto_create_tfm+0x7a/0xe0 ? crypto_alloc_tfm+0x58/0xf0 rxe_register_device+0x19d/0x1c0 [rdma_rxe] rxe_net_add+0x3d/0x70 [rdma_rxe] ? dev_get_by_name_rcu+0x73/0x90 rxe_param_set_add+0xaf/0xc0 [rdma_rxe] parse_args+0x179/0x370 ? ref_module+0x1b0/0x1b0 load_module+0x135e/0x17e0 ? ref_module+0x1b0/0x1b0 ? __do_sys_init_module+0x13b/0x180 __do_sys_init_module+0x13b/0x180 do_syscall_64+0x5b/0x1a0 entry_SYSCALL_64_after_hwframe+0x65/0xca RIP: 0033:0x7f9137ed296e This can be triggered if a user tries to use the 'module option' which is not actually a real module option but some idiotic (and thankfully no obsolete) sysfs interface. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20200825151725.254046-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe.c | 4 ++++ drivers/infiniband/sw/rxe/rxe.h | 2 ++ drivers/infiniband/sw/rxe/rxe_sysfs.c | 5 +++++ 3 files changed, 11 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 907203afbd99..77f2c7cd1216 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -40,6 +40,8 @@ MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib"); MODULE_DESCRIPTION("Soft RDMA transport"); MODULE_LICENSE("Dual BSD/GPL"); +bool rxe_initialized; + /* free resources for a rxe device all objects created for this device must * have been destroyed */ @@ -315,6 +317,7 @@ static int __init rxe_module_init(void) return err; rdma_link_register(&rxe_link_ops); + rxe_initialized = true; pr_info("loaded\n"); return 0; } @@ -326,6 +329,7 @@ static void __exit rxe_module_exit(void) rxe_net_exit(); rxe_cache_exit(); + rxe_initialized = false; pr_info("unloaded\n"); } diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h index fb07eed9e402..cae1b0a24c85 100644 --- a/drivers/infiniband/sw/rxe/rxe.h +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -67,6 +67,8 @@ #define RXE_ROCE_V2_SPORT (0xc000) +extern bool rxe_initialized; + static inline u32 rxe_crc32(struct rxe_dev *rxe, u32 crc, void *next, size_t len) { diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c index ccda5f5a3bc0..2af31d421bfc 100644 --- a/drivers/infiniband/sw/rxe/rxe_sysfs.c +++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c @@ -61,6 +61,11 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp) struct net_device *ndev; struct rxe_dev *exists; + if (!rxe_initialized) { + pr_err("Module parameters are not supported, use rdma link add or rxe_cfg\n"); + return -EAGAIN; + } + len = sanitize_arg(val, intf, sizeof(intf)); if (!len) { pr_err("add: invalid interface name\n"); From ec78b3bd66bc9a015505df0ef0eb153d9e64b03b Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Mon, 24 Aug 2020 14:02:29 +0300 Subject: [PATCH 094/648] RDMA/mlx4: Read pkey table length instead of hardcoded value If the pkey_table is not available (which is the case when RoCE is not supported), the cited commit caused a regression where mlx4_devices without RoCE are not created. Fix this by returning a pkey table length of zero in procedure eth_link_query_port() if the pkey-table length reported by the device is zero. Link: https://lore.kernel.org/r/20200824110229.1094376-1-leon@kernel.org Cc: Fixes: 1901b91f9982 ("IB/core: Fix potential NULL pointer dereference in pkey cache") Fixes: fa417f7b520e ("IB/mlx4: Add support for IBoE") Signed-off-by: Mark Bloch Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 5e7910a517da..bd4f975e7f9a 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -784,7 +784,8 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, props->ip_gids = true; props->gid_tbl_len = mdev->dev->caps.gid_table_len[port]; props->max_msg_sz = mdev->dev->caps.max_msg_sz; - props->pkey_tbl_len = 1; + if (mdev->dev->caps.pkey_table_len[port]) + props->pkey_tbl_len = 1; props->max_mtu = IB_MTU_4096; props->max_vl_num = 2; props->state = IB_PORT_DOWN; From 2d0e60ee322d512fa6bc62d23a6760b39a380847 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Mon, 24 Aug 2020 11:14:32 -0700 Subject: [PATCH 095/648] RDMA/bnxt_re: Do not report transparent vlan from QP1 QP1 Rx CQE reports transparent VLAN ID in the completion and this is used while reporting the completion for received MAD packet. Check if the vlan id is configured before reporting it in the work completion. Fixes: 84511455ac5b ("RDMA/bnxt_re: report vlan_id and sl in qp1 recv completion") Link: https://lore.kernel.org/r/1598292876-26529-3-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 3f18efc0c297..62eb62c8603a 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -3264,6 +3264,19 @@ static void bnxt_re_process_res_rawqp1_wc(struct ib_wc *wc, wc->wc_flags |= IB_WC_GRH; } +static bool bnxt_re_check_if_vlan_valid(struct bnxt_re_dev *rdev, + u16 vlan_id) +{ + /* + * Check if the vlan is configured in the host. If not configured, it + * can be a transparent VLAN. So dont report the vlan id. + */ + if (!__vlan_find_dev_deep_rcu(rdev->netdev, + htons(ETH_P_8021Q), vlan_id)) + return false; + return true; +} + static bool bnxt_re_is_vlan_pkt(struct bnxt_qplib_cqe *orig_cqe, u16 *vid, u8 *sl) { @@ -3332,9 +3345,11 @@ static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *gsi_sqp, wc->src_qp = orig_cqe->src_qp; memcpy(wc->smac, orig_cqe->smac, ETH_ALEN); if (bnxt_re_is_vlan_pkt(orig_cqe, &vlan_id, &sl)) { - wc->vlan_id = vlan_id; - wc->sl = sl; - wc->wc_flags |= IB_WC_WITH_VLAN; + if (bnxt_re_check_if_vlan_valid(rdev, vlan_id)) { + wc->vlan_id = vlan_id; + wc->sl = sl; + wc->wc_flags |= IB_WC_WITH_VLAN; + } } wc->port_num = 1; wc->vendor_err = orig_cqe->status; From 84cf229f4001c1216afc3e4c7f05e1620a0dd4bc Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Mon, 24 Aug 2020 11:14:33 -0700 Subject: [PATCH 096/648] RDMA/bnxt_re: Fix the qp table indexing qp->id can be a value outside the max number of qp. Indexing the qp table with the id can cause out of bounds crash. So changing the qp table indexing by (qp->id % max_qp -1). Allocating one extra entry for QP1. Some adapters create one more than the max_qp requested to accommodate QP1. If the qp->id is 1, store the inforamtion in the last entry of the qp table. Fixes: f218d67ef004 ("RDMA/bnxt_re: Allow posting when QPs are in error") Link: https://lore.kernel.org/r/1598292876-26529-4-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 22 ++++++++++++++-------- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 10 ++++++---- drivers/infiniband/hw/bnxt_re/qplib_rcfw.h | 5 +++++ 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 117b42349a28..3535130a9e2d 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -818,6 +818,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) u16 cmd_flags = 0; u32 qp_flags = 0; u8 pg_sz_lvl; + u32 tbl_indx; int rc; RCFW_CMD_PREP(req, CREATE_QP1, cmd_flags); @@ -907,8 +908,9 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) rq->dbinfo.db = qp->dpi->dbr; rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size); } - rcfw->qp_tbl[qp->id].qp_id = qp->id; - rcfw->qp_tbl[qp->id].qp_handle = (void *)qp; + tbl_indx = map_qp_id_to_tbl_indx(qp->id, rcfw); + rcfw->qp_tbl[tbl_indx].qp_id = qp->id; + rcfw->qp_tbl[tbl_indx].qp_handle = (void *)qp; return 0; @@ -959,6 +961,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) u16 cmd_flags = 0; u32 qp_flags = 0; u8 pg_sz_lvl; + u32 tbl_indx; u16 nsge; RCFW_CMD_PREP(req, CREATE_QP, cmd_flags); @@ -1111,8 +1114,9 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) rq->dbinfo.db = qp->dpi->dbr; rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size); } - rcfw->qp_tbl[qp->id].qp_id = qp->id; - rcfw->qp_tbl[qp->id].qp_handle = (void *)qp; + tbl_indx = map_qp_id_to_tbl_indx(qp->id, rcfw); + rcfw->qp_tbl[tbl_indx].qp_id = qp->id; + rcfw->qp_tbl[tbl_indx].qp_handle = (void *)qp; return 0; fail: @@ -1457,10 +1461,12 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, struct cmdq_destroy_qp req; struct creq_destroy_qp_resp resp; u16 cmd_flags = 0; + u32 tbl_indx; int rc; - rcfw->qp_tbl[qp->id].qp_id = BNXT_QPLIB_QP_ID_INVALID; - rcfw->qp_tbl[qp->id].qp_handle = NULL; + tbl_indx = map_qp_id_to_tbl_indx(qp->id, rcfw); + rcfw->qp_tbl[tbl_indx].qp_id = BNXT_QPLIB_QP_ID_INVALID; + rcfw->qp_tbl[tbl_indx].qp_handle = NULL; RCFW_CMD_PREP(req, DESTROY_QP, cmd_flags); @@ -1468,8 +1474,8 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, NULL, 0); if (rc) { - rcfw->qp_tbl[qp->id].qp_id = qp->id; - rcfw->qp_tbl[qp->id].qp_handle = qp; + rcfw->qp_tbl[tbl_indx].qp_id = qp->id; + rcfw->qp_tbl[tbl_indx].qp_handle = qp; return rc; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 4e211162acee..f7736e34ac64 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -307,14 +307,15 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, __le16 mcookie; u16 cookie; int rc = 0; - u32 qp_id; + u32 qp_id, tbl_indx; pdev = rcfw->pdev; switch (qp_event->event) { case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION: err_event = (struct creq_qp_error_notification *)qp_event; qp_id = le32_to_cpu(err_event->xid); - qp = rcfw->qp_tbl[qp_id].qp_handle; + tbl_indx = map_qp_id_to_tbl_indx(qp_id, rcfw); + qp = rcfw->qp_tbl[tbl_indx].qp_handle; dev_dbg(&pdev->dev, "Received QP error notification\n"); dev_dbg(&pdev->dev, "qpid 0x%x, req_err=0x%x, resp_err=0x%x\n", @@ -615,8 +616,9 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res, cmdq->bmap_size = bmap_size; - rcfw->qp_tbl_size = qp_tbl_sz; - rcfw->qp_tbl = kcalloc(qp_tbl_sz, sizeof(struct bnxt_qplib_qp_node), + /* Allocate one extra to hold the QP1 entries */ + rcfw->qp_tbl_size = qp_tbl_sz + 1; + rcfw->qp_tbl = kcalloc(rcfw->qp_tbl_size, sizeof(struct bnxt_qplib_qp_node), GFP_KERNEL); if (!rcfw->qp_tbl) goto fail; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 157387636d00..5f2f0a5a3560 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -216,4 +216,9 @@ int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_ctx *ctx, int is_virtfn); void bnxt_qplib_mark_qp_error(void *qp_handle); +static inline u32 map_qp_id_to_tbl_indx(u32 qid, struct bnxt_qplib_rcfw *rcfw) +{ + /* Last index of the qp_tbl is for QP1 ie. qp_tbl_size - 1*/ + return (qid == 1) ? rcfw->qp_tbl_size - 1 : qid % rcfw->qp_tbl_size - 2; +} #endif /* __BNXT_QPLIB_RCFW_H__ */ From f86b31c6a28f06eed3f6d9dc958079853b0792f1 Mon Sep 17 00:00:00 2001 From: Naresh Kumar PBS Date: Mon, 24 Aug 2020 11:14:34 -0700 Subject: [PATCH 097/648] RDMA/bnxt_re: Static NQ depth allocation At first, driver allocates memory for NQ based on qplib_ctx->cq_count and qplib_ctx->srqc_count. Later when creating ring, it uses a static value of 128K -1. Fixing this with a static value for now. Fixes: b08fe048a69d ("RDMA/bnxt_re: Refactor net ring allocation function") Link: https://lore.kernel.org/r/1598292876-26529-5-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Naresh Kumar PBS Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 17ac8b7c5710..13bbeb42794f 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1037,8 +1037,7 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) struct bnxt_qplib_nq *nq; nq = &rdev->nq[i]; - nq->hwq.max_elements = (qplib_ctx->cq_count + - qplib_ctx->srqc_count + 2); + nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT; rc = bnxt_qplib_alloc_nq(&rdev->qplib_res, &rdev->nq[i]); if (rc) { ibdev_err(&rdev->ibdev, "Alloc Failed NQ%d rc:%#x", From 847b97887ed4569968d5b9a740f2334abca9f99a Mon Sep 17 00:00:00 2001 From: Naresh Kumar PBS Date: Mon, 24 Aug 2020 11:14:35 -0700 Subject: [PATCH 098/648] RDMA/bnxt_re: Restrict the max_gids to 256 Some adapters report more than 256 gid entries. Restrict it to 256 for now. Fixes: 1ac5a4047975("RDMA/bnxt_re: Add bnxt_re RoCE driver") Link: https://lore.kernel.org/r/1598292876-26529-6-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Naresh Kumar PBS Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 2 +- drivers/infiniband/hw/bnxt_re/qplib_sp.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 4cd475ea97a2..64d44f51db4b 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -149,7 +149,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, attr->max_inline_data = le32_to_cpu(sb->max_inline_data); attr->l2_db_size = (sb->l2_db_space_size + 1) * (0x01 << RCFW_DBR_BASE_PAGE_SHIFT); - attr->max_sgid = le32_to_cpu(sb->max_gid); + attr->max_sgid = BNXT_QPLIB_NUM_GIDS_SUPPORTED; bnxt_qplib_query_version(rcfw, attr->fw_ver); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 6404f0da1051..967890cd81f2 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -47,6 +47,7 @@ struct bnxt_qplib_dev_attr { #define FW_VER_ARR_LEN 4 u8 fw_ver[FW_VER_ARR_LEN]; +#define BNXT_QPLIB_NUM_GIDS_SUPPORTED 256 u16 max_sgid; u16 max_mrw; u32 max_qp; From 934d0ac9a64d21523e3ad03ea4098da7826bc788 Mon Sep 17 00:00:00 2001 From: Naresh Kumar PBS Date: Mon, 24 Aug 2020 11:14:36 -0700 Subject: [PATCH 099/648] RDMA/bnxt_re: Fix driver crash on unaligned PSN entry address When computing the first psn entry, driver checks for page alignment. If this address is not page aligned,it attempts to compute the offset in that page for later use by using ALIGN macro. ALIGN macro does not return offset bytes but the requested aligned address and hence cannot be used directly to store as offset. Since driver was using the address itself instead of offset, it resulted in invalid address when filling the psn buffer. Fixed driver to use PAGE_MASK macro to calculate the offset. Fixes: fddcbbb02af4 ("RDMA/bnxt_re: Simplify obtaining queue entry from hw ring") Link: https://lore.kernel.org/r/1598292876-26529-7-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Naresh Kumar PBS Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 3535130a9e2d..9f90cfec4bbb 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -937,10 +937,10 @@ static void bnxt_qplib_init_psn_ptr(struct bnxt_qplib_qp *qp, int size) sq = &qp->sq; hwq = &sq->hwq; + /* First psn entry */ fpsne = (u64)bnxt_qplib_get_qe(hwq, hwq->depth, &psn_pg); if (!IS_ALIGNED(fpsne, PAGE_SIZE)) - indx_pad = ALIGN(fpsne, PAGE_SIZE) / size; - + indx_pad = (fpsne & ~PAGE_MASK) / size; hwq->pad_pgofft = indx_pad; hwq->pad_pg = (u64 *)psn_pg; hwq->pad_stride = size; From 097a9d23b7250355b182c5fd47dd4c55b22b1c33 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Mon, 24 Aug 2020 11:14:31 -0700 Subject: [PATCH 100/648] RDMA/bnxt_re: Remove the qp from list only if the qp destroy succeeds Driver crashes when destroy_qp is re-tried because of an error returned. This is because the qp entry was removed from the qp list during the first call. Remove qp from the list only if destroy_qp returns success. The driver will still trigger a WARN_ON due to the memory leaking, but at least it isn't corrupting memory too. Fixes: 8dae419f9ec7 ("RDMA/bnxt_re: Refactor queue pair creation code") Link: https://lore.kernel.org/r/1598292876-26529-2-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 62eb62c8603a..0525b7461276 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -752,12 +752,6 @@ static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp) gsi_sqp = rdev->gsi_ctx.gsi_sqp; gsi_sah = rdev->gsi_ctx.gsi_sah; - /* remove from active qp list */ - mutex_lock(&rdev->qp_lock); - list_del(&gsi_sqp->list); - mutex_unlock(&rdev->qp_lock); - atomic_dec(&rdev->qp_count); - ibdev_dbg(&rdev->ibdev, "Destroy the shadow AH\n"); bnxt_qplib_destroy_ah(&rdev->qplib_res, &gsi_sah->qplib_ah, @@ -772,6 +766,12 @@ static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp) } bnxt_qplib_free_qp_res(&rdev->qplib_res, &gsi_sqp->qplib_qp); + /* remove from active qp list */ + mutex_lock(&rdev->qp_lock); + list_del(&gsi_sqp->list); + mutex_unlock(&rdev->qp_lock); + atomic_dec(&rdev->qp_count); + kfree(rdev->gsi_ctx.sqp_tbl); kfree(gsi_sah); kfree(gsi_sqp); @@ -792,11 +792,6 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) unsigned int flags; int rc; - mutex_lock(&rdev->qp_lock); - list_del(&qp->list); - mutex_unlock(&rdev->qp_lock); - atomic_dec(&rdev->qp_count); - bnxt_qplib_flush_cqn_wq(&qp->qplib_qp); rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp); @@ -819,6 +814,11 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) goto sh_fail; } + mutex_lock(&rdev->qp_lock); + list_del(&qp->list); + mutex_unlock(&rdev->qp_lock); + atomic_dec(&rdev->qp_count); + ib_umem_release(qp->rumem); ib_umem_release(qp->sumem); From d1c9da9e4c938e8bbf8b0ef9e5772b97db5639e9 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 12 Aug 2020 12:23:32 +0200 Subject: [PATCH 101/648] mt76: mt7615: use v1 MCU API on MT7615 to fix issues with adding/removing stations The implementation of embedding WTBL update inside the STA_REC update is buggy on the MT7615 v2 firmware. This leads to connection issues after a station has connected and disconnected again. Switch to the v1 MCU API ops, since they have received much more testing and should be more stable. On MT7622 and later, the v2 API is more actively used, so we should keep using it as well. Fixes: 6849e29ed92e ("mt76: mt7615: add starec operating flow for firmware v2") Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200812102332.11812-1-nbd@nbd.name --- drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c index d0cbb283982f..bd316dbd9041 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c @@ -2128,7 +2128,8 @@ static int mt7615_load_n9(struct mt7615_dev *dev, const char *name) sizeof(dev->mt76.hw->wiphy->fw_version), "%.10s-%.15s", hdr->fw_ver, hdr->build_date); - if (!strncmp(hdr->fw_ver, "2.0", sizeof(hdr->fw_ver))) { + if (!is_mt7615(&dev->mt76) && + !strncmp(hdr->fw_ver, "2.0", sizeof(hdr->fw_ver))) { dev->fw_ver = MT7615_FIRMWARE_V2; dev->mcu_ops = &sta_update_ops; } else { From b4be5a53ebf478ffcfb4c98c0ccc4a8d922b9a02 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 12 Aug 2020 16:49:43 +0200 Subject: [PATCH 102/648] mt76: mt7915: use ieee80211_free_txskb to free tx skbs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using dev_kfree_skb for tx skbs breaks AQL. This worked until now only by accident, because a mac80211 issue breaks AQL on drivers with firmware rate control that report the rate via ieee80211_tx_status_ext as struct rate_info. Signed-off-by: Felix Fietkau Acked-by: Toke Høiland-Jørgensen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200812144943.91974-1-nbd@nbd.name --- drivers/net/wireless/mediatek/mt76/mt7915/init.c | 8 ++++++-- drivers/net/wireless/mediatek/mt76/mt7915/mac.c | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c index e90d0087e377..8d6ceb3b67b4 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c @@ -699,8 +699,12 @@ void mt7915_unregister_device(struct mt7915_dev *dev) spin_lock_bh(&dev->token_lock); idr_for_each_entry(&dev->token, txwi, id) { mt7915_txp_skb_unmap(&dev->mt76, txwi); - if (txwi->skb) - dev_kfree_skb_any(txwi->skb); + if (txwi->skb) { + struct ieee80211_hw *hw; + + hw = mt76_tx_status_get_hw(&dev->mt76, txwi->skb); + ieee80211_free_txskb(hw, txwi->skb); + } mt76_put_txwi(&dev->mt76, txwi); } spin_unlock_bh(&dev->token_lock); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c index 6825afca1efb..036207f828f3 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c @@ -841,7 +841,7 @@ mt7915_tx_complete_status(struct mt76_dev *mdev, struct sk_buff *skb, if (sta || !(info->flags & IEEE80211_TX_CTL_NO_ACK)) mt7915_tx_status(sta, hw, info, NULL); - dev_kfree_skb(skb); + ieee80211_free_txskb(hw, skb); } void mt7915_txp_skb_unmap(struct mt76_dev *dev, From 4afc850e2e9e781976fb2c7852ce7bac374af938 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Tue, 25 Aug 2020 17:38:29 +0200 Subject: [PATCH 103/648] mwifiex: Increase AES key storage size to 256 bits Following commit e18696786548 ("mwifiex: Prevent memory corruption handling keys") the mwifiex driver fails to authenticate with certain networks, specifically networks with 256 bit keys, and repeatedly asks for the password. The kernel log repeats the following lines (id and bssid redacted): mwifiex_pcie 0000:01:00.0: info: trying to associate to '' bssid mwifiex_pcie 0000:01:00.0: info: associated to bssid successfully mwifiex_pcie 0000:01:00.0: crypto keys added mwifiex_pcie 0000:01:00.0: info: successfully disconnected from : reason code 3 Tracking down this problem lead to the overflow check introduced by the aforementioned commit into mwifiex_ret_802_11_key_material_v2(). This check fails on networks with 256 bit keys due to the current storage size for AES keys in struct mwifiex_aes_param being only 128 bit. To fix this issue, increase the storage size for AES keys to 256 bit. Fixes: e18696786548 ("mwifiex: Prevent memory corruption handling keys") Signed-off-by: Maximilian Luz Reported-by: Kaloyan Nikolov Tested-by: Kaloyan Nikolov Reviewed-by: Dan Carpenter Reviewed-by: Brian Norris Tested-by: Brian Norris Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200825153829.38043-1-luzmaximilian@gmail.com --- drivers/net/wireless/marvell/mwifiex/fw.h | 2 +- drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h index 8047e307892e..d9f8bdbc817b 100644 --- a/drivers/net/wireless/marvell/mwifiex/fw.h +++ b/drivers/net/wireless/marvell/mwifiex/fw.h @@ -954,7 +954,7 @@ struct mwifiex_tkip_param { struct mwifiex_aes_param { u8 pn[WPA_PN_SIZE]; __le16 key_len; - u8 key[WLAN_KEY_LEN_CCMP]; + u8 key[WLAN_KEY_LEN_CCMP_256]; } __packed; struct mwifiex_wapi_param { diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c index 962d8bfe6f10..119ccacd1fcc 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c @@ -619,7 +619,7 @@ static int mwifiex_ret_802_11_key_material_v2(struct mwifiex_private *priv, key_v2 = &resp->params.key_material_v2; len = le16_to_cpu(key_v2->key_param_set.key_params.aes.key_len); - if (len > WLAN_KEY_LEN_CCMP) + if (len > sizeof(key_v2->key_param_set.key_params.aes.key)) return -EINVAL; if (le16_to_cpu(key_v2->action) == HostCmd_ACT_GEN_SET) { @@ -635,7 +635,7 @@ static int mwifiex_ret_802_11_key_material_v2(struct mwifiex_private *priv, return 0; memset(priv->aes_key_v2.key_param_set.key_params.aes.key, 0, - WLAN_KEY_LEN_CCMP); + sizeof(key_v2->key_param_set.key_params.aes.key)); priv->aes_key_v2.key_param_set.key_params.aes.key_len = cpu_to_le16(len); memcpy(priv->aes_key_v2.key_param_set.key_params.aes.key, From 097930e85f90f252c44dc0d084598265dd44ca48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Thu, 27 Aug 2020 17:34:48 +0200 Subject: [PATCH 104/648] batman-adv: bla: fix type misuse for backbone_gw hash indexing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It seems that due to a copy & paste error the void pointer in batadv_choose_backbone_gw() is cast to the wrong type. Fixing this by using "struct batadv_bla_backbone_gw" instead of "struct batadv_bla_claim" which better matches the caller's side. For now it seems that we were lucky because the two structs both have their orig/vid and addr/vid in the beginning. However I stumbled over this issue when I was trying to add some debug variables in front of "orig" in batadv_backbone_gw, which caused hash lookups to fail. Fixes: 07568d0369f9 ("batman-adv: don't rely on positions in struct for hashing") Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann --- net/batman-adv/bridge_loop_avoidance.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 8500f56cbd10..d8c5d3170676 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -83,11 +83,12 @@ static inline u32 batadv_choose_claim(const void *data, u32 size) */ static inline u32 batadv_choose_backbone_gw(const void *data, u32 size) { - const struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data; + const struct batadv_bla_backbone_gw *gw; u32 hash = 0; - hash = jhash(&claim->addr, sizeof(claim->addr), hash); - hash = jhash(&claim->vid, sizeof(claim->vid), hash); + gw = (struct batadv_bla_backbone_gw *)data; + hash = jhash(&gw->orig, sizeof(gw->orig), hash); + hash = jhash(&gw->vid, sizeof(gw->vid), hash); return hash % size; } From 07b5b12d97dc9f47ff3dff46c4f944a15bd762e5 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 26 Aug 2020 09:08:26 +0800 Subject: [PATCH 105/648] drm/sun4i: add missing put_device() call in sun8i_r40_tcon_tv_set_mux() If sun8i_r40_tcon_tv_set_mux() succeed, sun8i_r40_tcon_tv_set_mux() doesn't have a corresponding put_device(). Thus add put_device() to fix the exception handling for this function implementation. Fixes: 0305189afb32 ("drm/sun4i: tcon: Add support for R40 TCON") Signed-off-by: Yu Kuai Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20200826010826.1785487-1-yukuai3@huawei.com --- drivers/gpu/drm/sun4i/sun4i_tcon.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index 359b56e43b83..24d95f058918 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -1433,14 +1433,18 @@ static int sun8i_r40_tcon_tv_set_mux(struct sun4i_tcon *tcon, if (IS_ENABLED(CONFIG_DRM_SUN8I_TCON_TOP) && encoder->encoder_type == DRM_MODE_ENCODER_TMDS) { ret = sun8i_tcon_top_set_hdmi_src(&pdev->dev, id); - if (ret) + if (ret) { + put_device(&pdev->dev); return ret; + } } if (IS_ENABLED(CONFIG_DRM_SUN8I_TCON_TOP)) { ret = sun8i_tcon_top_de_config(&pdev->dev, tcon->id, id); - if (ret) + if (ret) { + put_device(&pdev->dev); return ret; + } } return 0; From e5c388b4b967037a0e00b60194b0dbcf94881a9b Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 7 Aug 2020 21:29:28 -0700 Subject: [PATCH 106/648] ARC: show_regs: fix r12 printing and simplify when working on ARC64, spotted an issue in ARCv2 reg file printing. print_reg_file() assumes contiguous reg-file whereas in ARCv2 they are not: r12 comes before r0-r11 due to hardware auto-save. Apparently this issue has been present since v2 port submission. To avoid bolting hacks for this discontinuity while looping through pt_regs, just ditching the loop and print pt_regs directly. Signed-off-by: Vineet Gupta --- arch/arc/kernel/troubleshoot.c | 77 +++++++++++++--------------------- 1 file changed, 30 insertions(+), 47 deletions(-) diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index 28e8bf04b253..a331bb5d8319 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -18,44 +18,37 @@ #define ARC_PATH_MAX 256 -/* - * Common routine to print scratch regs (r0-r12) or callee regs (r13-r25) - * -Prints 3 regs per line and a CR. - * -To continue, callee regs right after scratch, special handling of CR - */ -static noinline void print_reg_file(long *reg_rev, int start_num) +static noinline void print_regs_scratch(struct pt_regs *regs) { - unsigned int i; - char buf[512]; - int n = 0, len = sizeof(buf); + pr_cont("BTA: 0x%08lx\n SP: 0x%08lx FP: 0x%08lx BLK: %pS\n", + regs->bta, regs->sp, regs->fp, (void *)regs->blink); + pr_cont("LPS: 0x%08lx\tLPE: 0x%08lx\tLPC: 0x%08lx\n", + regs->lp_start, regs->lp_end, regs->lp_count); - for (i = start_num; i < start_num + 13; i++) { - n += scnprintf(buf + n, len - n, "r%02u: 0x%08lx\t", - i, (unsigned long)*reg_rev); - - if (((i + 1) % 3) == 0) - n += scnprintf(buf + n, len - n, "\n"); - - /* because pt_regs has regs reversed: r12..r0, r25..r13 */ - if (is_isa_arcv2() && start_num == 0) - reg_rev++; - else - reg_rev--; - } - - if (start_num != 0) - n += scnprintf(buf + n, len - n, "\n\n"); - - /* To continue printing callee regs on same line as scratch regs */ - if (start_num == 0) - pr_info("%s", buf); - else - pr_cont("%s\n", buf); + pr_info("r00: 0x%08lx\tr01: 0x%08lx\tr02: 0x%08lx\n" \ + "r03: 0x%08lx\tr04: 0x%08lx\tr05: 0x%08lx\n" \ + "r06: 0x%08lx\tr07: 0x%08lx\tr08: 0x%08lx\n" \ + "r09: 0x%08lx\tr10: 0x%08lx\tr11: 0x%08lx\n" \ + "r12: 0x%08lx\t", + regs->r0, regs->r1, regs->r2, + regs->r3, regs->r4, regs->r5, + regs->r6, regs->r7, regs->r8, + regs->r9, regs->r10, regs->r11, + regs->r12); } -static void show_callee_regs(struct callee_regs *cregs) +static void print_regs_callee(struct callee_regs *regs) { - print_reg_file(&(cregs->r13), 13); + pr_cont("r13: 0x%08lx\tr14: 0x%08lx\n" \ + "r15: 0x%08lx\tr16: 0x%08lx\tr17: 0x%08lx\n" \ + "r18: 0x%08lx\tr19: 0x%08lx\tr20: 0x%08lx\n" \ + "r21: 0x%08lx\tr22: 0x%08lx\tr23: 0x%08lx\n" \ + "r24: 0x%08lx\tr25: 0x%08lx\n", + regs->r13, regs->r14, + regs->r15, regs->r16, regs->r17, + regs->r18, regs->r19, regs->r20, + regs->r21, regs->r22, regs->r23, + regs->r24, regs->r25); } static void print_task_path_n_nm(struct task_struct *tsk) @@ -175,7 +168,7 @@ static void show_ecr_verbose(struct pt_regs *regs) void show_regs(struct pt_regs *regs) { struct task_struct *tsk = current; - struct callee_regs *cregs; + struct callee_regs *cregs = (struct callee_regs *)tsk->thread.callee_reg; /* * generic code calls us with preemption disabled, but some calls @@ -204,25 +197,15 @@ void show_regs(struct pt_regs *regs) STS_BIT(regs, A2), STS_BIT(regs, A1), STS_BIT(regs, E2), STS_BIT(regs, E1)); #else - pr_cont(" [%2s%2s%2s%2s]", + pr_cont(" [%2s%2s%2s%2s] ", STS_BIT(regs, IE), (regs->status32 & STATUS_U_MASK) ? "U " : "K ", STS_BIT(regs, DE), STS_BIT(regs, AE)); #endif - pr_cont(" BTA: 0x%08lx\n SP: 0x%08lx FP: 0x%08lx BLK: %pS\n", - regs->bta, regs->sp, regs->fp, (void *)regs->blink); - pr_info("LPS: 0x%08lx\tLPE: 0x%08lx\tLPC: 0x%08lx\n", - regs->lp_start, regs->lp_end, regs->lp_count); - /* print regs->r0 thru regs->r12 - * Sequential printing was generating horrible code - */ - print_reg_file(&(regs->r0), 0); - - /* If Callee regs were saved, display them too */ - cregs = (struct callee_regs *)current->thread.callee_reg; + print_regs_scratch(regs); if (cregs) - show_callee_regs(cregs); + print_regs_callee(cregs); preempt_disable(); } From 89d29997f103d08264b0685796b420d911658b96 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 24 Aug 2020 12:10:33 -0700 Subject: [PATCH 107/648] irqchip/eznps: Fix build error for !ARC700 builds eznps driver is supposed to be platform independent however it ends up including stuff from inside arch/arc headers leading to rand config build errors. The quick hack to fix this (proper fix is too much chrun for non active user-base) is to add following to nps platform agnostic header. - copy AUX_IENABLE from arch/arc header - move CTOP_AUX_IACK from arch/arc/plat-eznps/*/** Reported-by: kernel test robot Reported-by: Sebastian Andrzej Siewior Link: https://lkml.kernel.org/r/20200824095831.5lpkmkafelnvlpi2@linutronix.de Signed-off-by: Vineet Gupta --- arch/arc/plat-eznps/include/plat/ctop.h | 1 - include/soc/nps/common.h | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arc/plat-eznps/include/plat/ctop.h b/arch/arc/plat-eznps/include/plat/ctop.h index a4a61531c7fb..77712c5ffe84 100644 --- a/arch/arc/plat-eznps/include/plat/ctop.h +++ b/arch/arc/plat-eznps/include/plat/ctop.h @@ -33,7 +33,6 @@ #define CTOP_AUX_DPC (CTOP_AUX_BASE + 0x02C) #define CTOP_AUX_LPC (CTOP_AUX_BASE + 0x030) #define CTOP_AUX_EFLAGS (CTOP_AUX_BASE + 0x080) -#define CTOP_AUX_IACK (CTOP_AUX_BASE + 0x088) #define CTOP_AUX_GPA1 (CTOP_AUX_BASE + 0x08C) #define CTOP_AUX_UDMC (CTOP_AUX_BASE + 0x300) diff --git a/include/soc/nps/common.h b/include/soc/nps/common.h index 9b1d43d671a3..8c18dc6d3fde 100644 --- a/include/soc/nps/common.h +++ b/include/soc/nps/common.h @@ -45,6 +45,12 @@ #define CTOP_INST_MOV2B_FLIP_R3_B1_B2_INST 0x5B60 #define CTOP_INST_MOV2B_FLIP_R3_B1_B2_LIMM 0x00010422 +#ifndef AUX_IENABLE +#define AUX_IENABLE 0x40c +#endif + +#define CTOP_AUX_IACK (0xFFFFF800 + 0x088) + #ifndef __ASSEMBLY__ /* In order to increase compilation test coverage */ From 4965b8cd1bc1ffb017e5c58e622da82b55e49414 Mon Sep 17 00:00:00 2001 From: Prateek Sood Date: Fri, 21 Aug 2020 02:27:50 +0530 Subject: [PATCH 108/648] firmware_loader: fix memory leak for paged buffer vfree() is being called on paged buffer allocated using alloc_page() and mapped using vmap(). Freeing of pages in vfree() relies on nr_pages of struct vm_struct. vmap() does not update nr_pages. It can lead to memory leaks. Fixes: ddaf29fd9bb6 ("firmware: Free temporary page table after vmapping") Signed-off-by: Prateek Sood Reviewed-by: Takashi Iwai Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1597957070-27185-1-git-send-email-prsood@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_loader/firmware.h | 2 ++ drivers/base/firmware_loader/main.c | 17 +++++++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/base/firmware_loader/firmware.h b/drivers/base/firmware_loader/firmware.h index 933e2192fbe8..d08efc77cf16 100644 --- a/drivers/base/firmware_loader/firmware.h +++ b/drivers/base/firmware_loader/firmware.h @@ -142,10 +142,12 @@ int assign_fw(struct firmware *fw, struct device *device, u32 opt_flags); void fw_free_paged_buf(struct fw_priv *fw_priv); int fw_grow_paged_buf(struct fw_priv *fw_priv, int pages_needed); int fw_map_paged_buf(struct fw_priv *fw_priv); +bool fw_is_paged_buf(struct fw_priv *fw_priv); #else static inline void fw_free_paged_buf(struct fw_priv *fw_priv) {} static inline int fw_grow_paged_buf(struct fw_priv *fw_priv, int pages_needed) { return -ENXIO; } static inline int fw_map_paged_buf(struct fw_priv *fw_priv) { return -ENXIO; } +static inline bool fw_is_paged_buf(struct fw_priv *fw_priv) { return false; } #endif #endif /* __FIRMWARE_LOADER_H */ diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index 9da0c9d5f538..63b9714a0154 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -252,9 +252,11 @@ static void __free_fw_priv(struct kref *ref) list_del(&fw_priv->list); spin_unlock(&fwc->lock); - fw_free_paged_buf(fw_priv); /* free leftover pages */ - if (!fw_priv->allocated_size) + if (fw_is_paged_buf(fw_priv)) + fw_free_paged_buf(fw_priv); + else if (!fw_priv->allocated_size) vfree(fw_priv->data); + kfree_const(fw_priv->fw_name); kfree(fw_priv); } @@ -268,6 +270,11 @@ static void free_fw_priv(struct fw_priv *fw_priv) } #ifdef CONFIG_FW_LOADER_PAGED_BUF +bool fw_is_paged_buf(struct fw_priv *fw_priv) +{ + return fw_priv->is_paged_buf; +} + void fw_free_paged_buf(struct fw_priv *fw_priv) { int i; @@ -275,6 +282,8 @@ void fw_free_paged_buf(struct fw_priv *fw_priv) if (!fw_priv->pages) return; + vunmap(fw_priv->data); + for (i = 0; i < fw_priv->nr_pages; i++) __free_page(fw_priv->pages[i]); kvfree(fw_priv->pages); @@ -328,10 +337,6 @@ int fw_map_paged_buf(struct fw_priv *fw_priv) if (!fw_priv->data) return -ENOMEM; - /* page table is no longer needed after mapping, let's free */ - kvfree(fw_priv->pages); - fw_priv->pages = NULL; - return 0; } #endif From 103a8542cb35b5130f732d00b0419a594ba1b517 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 28 Aug 2020 15:38:52 +0530 Subject: [PATCH 109/648] powerpc/book3s64/radix: Fix boot failure with large amount of guest memory If the hypervisor doesn't support hugepages, the kernel ends up allocating a large number of page table pages. The early page table allocation was wrongly setting the max memblock limit to ppc64_rma_size with radix translation which resulted in boot failure as shown below. Kernel panic - not syncing: early_alloc_pgtable: Failed to allocate 16777216 bytes align=0x1000000 nid=-1 from=0x0000000000000000 max_addr=0xffffffffffffffff CPU: 0 PID: 0 Comm: swapper Not tainted 5.8.0-24.9-default+ #2 Call Trace: [c0000000016f3d00] [c0000000007c6470] dump_stack+0xc4/0x114 (unreliable) [c0000000016f3d40] [c00000000014c78c] panic+0x164/0x418 [c0000000016f3dd0] [c000000000098890] early_alloc_pgtable+0xe0/0xec [c0000000016f3e60] [c0000000010a5440] radix__early_init_mmu+0x360/0x4b4 [c0000000016f3ef0] [c000000001099bac] early_init_mmu+0x1c/0x3c [c0000000016f3f10] [c00000000109a320] early_setup+0x134/0x170 This was because the kernel was checking for the radix feature before we enable the feature via mmu_features. This resulted in the kernel using hash restrictions on radix. Rework the early init code such that the kernel boot with memblock restrictions as imposed by hash. At that point, the kernel still hasn't finalized the translation the kernel will end up using. We have three different ways of detecting radix. 1. dt_cpu_ftrs_scan -> used only in case of PowerNV 2. ibm,pa-features -> Used when we don't use cpu_dt_ftr_scan 3. CAS -> Where we negotiate with hypervisor about the supported translation. We look at 1 or 2 early in the boot and after that, we look at the CAS vector to finalize the translation the kernel will use. We also support a kernel command line option (disable_radix) to switch to hash. Update the memblock limit after mmu_early_init_devtree() if the kernel is going to use radix translation. This forces some of the memblock allocations we do before mmu_early_init_devtree() to be within the RMA limit. Fixes: 2bfd65e45e87 ("powerpc/mm/radix: Add radix callbacks for early init routines") Reported-by: Shirisha Ganta Signed-off-by: Aneesh Kumar K.V Reviewed-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200828100852.426575-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/book3s/64/mmu.h | 10 +++++----- arch/powerpc/mm/book3s64/radix_pgtable.c | 15 --------------- arch/powerpc/mm/init_64.c | 11 +++++++++-- 3 files changed, 14 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 55442d45c597..b392384a3b15 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -239,14 +239,14 @@ static inline void early_init_mmu_secondary(void) extern void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size); -extern void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base, - phys_addr_t first_memblock_size); static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) { - if (early_radix_enabled()) - return radix__setup_initial_memory_limit(first_memblock_base, - first_memblock_size); + /* + * Hash has more strict restrictions. At this point we don't + * know which translations we will pick. Hence go with hash + * restrictions. + */ return hash__setup_initial_memory_limit(first_memblock_base, first_memblock_size); } diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 28c784976bed..d5f0c10d752a 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -734,21 +734,6 @@ void radix__mmu_cleanup_all(void) } } -void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base, - phys_addr_t first_memblock_size) -{ - /* - * We don't currently support the first MEMBLOCK not mapping 0 - * physical on those processors - */ - BUG_ON(first_memblock_base != 0); - - /* - * Radix mode is not limited by RMA / VRMA addressing. - */ - ppc64_rma_size = ULONG_MAX; -} - #ifdef CONFIG_MEMORY_HOTPLUG static void free_pte_table(pte_t *pte_start, pmd_t *pmd) { diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 02e127fa5777..8459056cce67 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -433,9 +433,16 @@ void __init mmu_early_init_devtree(void) if (!(mfmsr() & MSR_HV)) early_check_vec5(); - if (early_radix_enabled()) + if (early_radix_enabled()) { radix__early_init_devtree(); - else + /* + * We have finalized the translation we are going to use by now. + * Radix mode is not limited by RMA / VRMA addressing. + * Hence don't limit memblock allocations. + */ + ppc64_rma_size = ULONG_MAX; + memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); + } else hash__early_init_devtree(); } #endif /* CONFIG_PPC_BOOK3S_64 */ From 40b8b826a6998639dd1c26f0e127f18371e1058d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 3 Aug 2020 11:27:06 +0300 Subject: [PATCH 110/648] kobject: Restore old behaviour of kobject_del(NULL) The commit 079ad2fb4bf9 ("kobject: Avoid premature parent object freeing in kobject_cleanup()") inadvertently dropped a possibility to call kobject_del() with NULL pointer. Restore the old behaviour. Fixes: 079ad2fb4bf9 ("kobject: Avoid premature parent object freeing in kobject_cleanup()") Cc: stable Reported-by: Qu Wenruo Cc: Heikki Krogerus Signed-off-by: Andy Shevchenko Reviewed-by: Qu Wenruo Link: https://lore.kernel.org/r/20200803082706.65347-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/kobject.c b/lib/kobject.c index 3afb939f2a1c..9dce68c378e6 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -637,8 +637,12 @@ static void __kobject_del(struct kobject *kobj) */ void kobject_del(struct kobject *kobj) { - struct kobject *parent = kobj->parent; + struct kobject *parent; + if (!kobj) + return; + + parent = kobj->parent; __kobject_del(kobj); kobject_put(parent); } From 20d9fdee72dfaa1fa7588c7a846283bd740e7157 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 27 Aug 2020 08:55:39 +0900 Subject: [PATCH 111/648] ASoC: soc-core: add snd_soc_find_dai_with_mutex() commit 25612477d20b52 ("ASoC: soc-dai: set dai_link dpcm_ flags with a helper") added snd_soc_dai_link_set_capabilities(). But it is using snd_soc_find_dai() (A) which is required client_mutex (B). And client_mutex is soc-core.c local. struct snd_soc_dai *snd_soc_find_dai(xxx) { ... (B) lockdep_assert_held(&client_mutex); ... } void snd_soc_dai_link_set_capabilities(xxx) { ... for_each_pcm_streams(direction) { ... for_each_link_cpus(dai_link, i, cpu) { (A) dai = snd_soc_find_dai(cpu); ... } ... for_each_link_codecs(dai_link, i, codec) { (A) dai = snd_soc_find_dai(codec); ... } } ... } Because of these background, we will get WARNING if .config has CONFIG_LOCKDEP. WARNING: CPU: 2 PID: 53 at sound/soc/soc-core.c:814 snd_soc_find_dai+0xf8/0x100 CPU: 2 PID: 53 Comm: kworker/2:1 Not tainted 5.7.0-rc1+ #328 Hardware name: Renesas H3ULCB Kingfisher board based on r8a77951 (DT) Workqueue: events deferred_probe_work_func pstate: 60000005 (nZCv daif -PAN -UAO) pc : snd_soc_find_dai+0xf8/0x100 lr : snd_soc_find_dai+0xf4/0x100 ... Call trace: snd_soc_find_dai+0xf8/0x100 snd_soc_dai_link_set_capabilities+0xa0/0x16c graph_dai_link_of_dpcm+0x390/0x3c0 graph_for_each_link+0x134/0x200 graph_probe+0x144/0x230 platform_drv_probe+0x5c/0xb0 really_probe+0xe4/0x430 driver_probe_device+0x60/0xf4 snd_soc_find_dai() will be used from (X) CPU/Codec/Platform driver with mutex lock, and (Y) Card driver without mutex lock. This snd_soc_dai_link_set_capabilities() is for Card driver, this means called without mutex. This patch adds snd_soc_find_dai_with_mutex() to solve it. Fixes: 25612477d20b52 ("ASoC: soc-dai: set dai_link dpcm_ flags with a helper") Signed-off-by: Kuninori Morimoto Link: https://lore.kernel.org/r/87blixvuab.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc.h | 2 ++ sound/soc/soc-core.c | 13 +++++++++++++ sound/soc/soc-dai.c | 4 ++-- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/include/sound/soc.h b/include/sound/soc.h index 5e3919ffb00c..4176071f61bf 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -1361,6 +1361,8 @@ void snd_soc_unregister_dai(struct snd_soc_dai *dai); struct snd_soc_dai *snd_soc_find_dai( const struct snd_soc_dai_link_component *dlc); +struct snd_soc_dai *snd_soc_find_dai_with_mutex( + const struct snd_soc_dai_link_component *dlc); #include diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 2fe1b2ec7c8f..fe11856d7a63 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -834,6 +834,19 @@ struct snd_soc_dai *snd_soc_find_dai( } EXPORT_SYMBOL_GPL(snd_soc_find_dai); +struct snd_soc_dai *snd_soc_find_dai_with_mutex( + const struct snd_soc_dai_link_component *dlc) +{ + struct snd_soc_dai *dai; + + mutex_lock(&client_mutex); + dai = snd_soc_find_dai(dlc); + mutex_unlock(&client_mutex); + + return dai; +} +EXPORT_SYMBOL_GPL(snd_soc_find_dai_with_mutex); + static int soc_dai_link_sanity_check(struct snd_soc_card *card, struct snd_soc_dai_link *link) { diff --git a/sound/soc/soc-dai.c b/sound/soc/soc-dai.c index 91a2551e4cef..0dbd312aad08 100644 --- a/sound/soc/soc-dai.c +++ b/sound/soc/soc-dai.c @@ -412,14 +412,14 @@ void snd_soc_dai_link_set_capabilities(struct snd_soc_dai_link *dai_link) supported_codec = false; for_each_link_cpus(dai_link, i, cpu) { - dai = snd_soc_find_dai(cpu); + dai = snd_soc_find_dai_with_mutex(cpu); if (dai && snd_soc_dai_stream_valid(dai, direction)) { supported_cpu = true; break; } } for_each_link_codecs(dai_link, i, codec) { - dai = snd_soc_find_dai(codec); + dai = snd_soc_find_dai_with_mutex(codec); if (dai && snd_soc_dai_stream_valid(dai, direction)) { supported_codec = true; break; From 9c4b205a20f483d8a5d1208cfec33e339347d4bd Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 28 Aug 2020 17:14:38 +0200 Subject: [PATCH 112/648] ASoC: meson: axg-toddr: fix channel order on g12 platforms On g12 and following platforms, The first channel of record with more than 2 channels ends being placed randomly on an even channel of the output. On these SoCs, a bit was added to force the first channel to be placed at the beginning of the output. Apparently the behavior if the bit is not set is not easily predictable. According to the documentation, this bit is not present on the axg series. Set the bit on g12 and fix the problem. Fixes: a3c23a8ad4dc ("ASoC: meson: axg-toddr: add g12a support") Reported-by: Nicolas Belin Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20200828151438.350974-1-jbrunet@baylibre.com Signed-off-by: Mark Brown --- sound/soc/meson/axg-toddr.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/sound/soc/meson/axg-toddr.c b/sound/soc/meson/axg-toddr.c index e711abcf8c12..d6adf7edea41 100644 --- a/sound/soc/meson/axg-toddr.c +++ b/sound/soc/meson/axg-toddr.c @@ -18,6 +18,7 @@ #define CTRL0_TODDR_SEL_RESAMPLE BIT(30) #define CTRL0_TODDR_EXT_SIGNED BIT(29) #define CTRL0_TODDR_PP_MODE BIT(28) +#define CTRL0_TODDR_SYNC_CH BIT(27) #define CTRL0_TODDR_TYPE_MASK GENMASK(15, 13) #define CTRL0_TODDR_TYPE(x) ((x) << 13) #define CTRL0_TODDR_MSB_POS_MASK GENMASK(12, 8) @@ -189,10 +190,31 @@ static const struct axg_fifo_match_data axg_toddr_match_data = { .dai_drv = &axg_toddr_dai_drv }; +static int g12a_toddr_dai_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct axg_fifo *fifo = snd_soc_dai_get_drvdata(dai); + int ret; + + ret = axg_toddr_dai_startup(substream, dai); + if (ret) + return ret; + + /* + * Make sure the first channel ends up in the at beginning of the output + * As weird as it looks, without this the first channel may be misplaced + * in memory, with a random shift of 2 channels. + */ + regmap_update_bits(fifo->map, FIFO_CTRL0, CTRL0_TODDR_SYNC_CH, + CTRL0_TODDR_SYNC_CH); + + return 0; +} + static const struct snd_soc_dai_ops g12a_toddr_ops = { .prepare = g12a_toddr_dai_prepare, .hw_params = axg_toddr_dai_hw_params, - .startup = axg_toddr_dai_startup, + .startup = g12a_toddr_dai_startup, .shutdown = axg_toddr_dai_shutdown, }; From e5fc436f06eef54ef512ea55a9db8eb9f2e76959 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Fri, 28 Aug 2020 10:53:01 +0200 Subject: [PATCH 113/648] sparse: use static inline for __chk_{user,io}_ptr() __chk_user_ptr() & __chk_io_ptr() are dummy extern functions which only exist to enforce the typechecking of __user or __iomem pointers in macros when using sparse. This typechecking is done by inserting a call to these functions. But the presence of these calls can inhibit some simplifications and so influence the result of sparse's analysis of context/locking. Fix this by changing these calls into static inline calls with an empty body. Signed-off-by: Luc Van Oostenryck Signed-off-by: Miguel Ojeda --- include/linux/compiler_types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 4b33cb385f96..6e390d58a9f8 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -11,8 +11,8 @@ # define __iomem __attribute__((noderef, address_space(__iomem))) # define __percpu __attribute__((noderef, address_space(__percpu))) # define __rcu __attribute__((noderef, address_space(__rcu))) -extern void __chk_user_ptr(const volatile void __user *); -extern void __chk_io_ptr(const volatile void __iomem *); +static inline void __chk_user_ptr(const volatile void __user *ptr) { } +static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } /* context/locking */ # define __must_hold(x) __attribute__((context(x,1,1))) # define __acquires(x) __attribute__((context(x,0,1))) From 1a21e5b930e8454438ebb707f558b256e4b06ec7 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Thu, 27 Aug 2020 13:44:03 +0200 Subject: [PATCH 114/648] drm/ingenic: Fix leak of device_node pointer of_graph_get_remote_node() requires of_node_put() to be called on the device_node pointer when it's no more in use. Fixes: fc1acf317b01 ("drm/ingenic: Add support for the IPU") Signed-off-by: Paul Cercueil Reviewed-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200827114404.36748-1-paul@crapouillou.net --- drivers/gpu/drm/ingenic/ingenic-drm-drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c index ada990a7f911..c1bcb93aed2d 100644 --- a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c +++ b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c @@ -978,6 +978,7 @@ static int ingenic_drm_probe(struct platform_device *pdev) } drm_of_component_match_add(dev, &match, compare_of, np); + of_node_put(np); return component_master_add_with_match(dev, &ingenic_master_ops, match); } From 3b5b005ef7d9e2ff1693c1d14a0f4459dcedb4e0 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Thu, 27 Aug 2020 13:44:04 +0200 Subject: [PATCH 115/648] drm/ingenic: Fix driver not probing when IPU port is missing Even if support for the IPU was compiled in, we may run on a device (e.g. the Qi LB60) where the IPU is not available, or simply with an old devicetree without the IPU node. In that case the ingenic-drm refused to probe. Fix the driver so that it will probe even if the IPU node is not present in devicetree (but then IPU support is disabled of course). v2: Take a different approach Fixes: fc1acf317b01 ("drm/ingenic: Add support for the IPU") Signed-off-by: Paul Cercueil Reviewed-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200827114404.36748-2-paul@crapouillou.net --- drivers/gpu/drm/ingenic/ingenic-drm-drv.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c index c1bcb93aed2d..b7074161ccf0 100644 --- a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c +++ b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c @@ -673,7 +673,7 @@ static void ingenic_drm_unbind_all(void *d) component_unbind_all(priv->dev, &priv->drm); } -static int ingenic_drm_bind(struct device *dev) +static int ingenic_drm_bind(struct device *dev, bool has_components) { struct platform_device *pdev = to_platform_device(dev); const struct jz_soc_info *soc_info; @@ -808,7 +808,7 @@ static int ingenic_drm_bind(struct device *dev) return ret; } - if (IS_ENABLED(CONFIG_DRM_INGENIC_IPU)) { + if (IS_ENABLED(CONFIG_DRM_INGENIC_IPU) && has_components) { ret = component_bind_all(dev, drm); if (ret) { if (ret != -EPROBE_DEFER) @@ -939,6 +939,11 @@ static int ingenic_drm_bind(struct device *dev) return ret; } +static int ingenic_drm_bind_with_components(struct device *dev) +{ + return ingenic_drm_bind(dev, true); +} + static int compare_of(struct device *dev, void *data) { return dev->of_node == data; @@ -957,7 +962,7 @@ static void ingenic_drm_unbind(struct device *dev) } static const struct component_master_ops ingenic_master_ops = { - .bind = ingenic_drm_bind, + .bind = ingenic_drm_bind_with_components, .unbind = ingenic_drm_unbind, }; @@ -968,14 +973,12 @@ static int ingenic_drm_probe(struct platform_device *pdev) struct device_node *np; if (!IS_ENABLED(CONFIG_DRM_INGENIC_IPU)) - return ingenic_drm_bind(dev); + return ingenic_drm_bind(dev, false); /* IPU is at port address 8 */ np = of_graph_get_remote_node(dev->of_node, 8, 0); - if (!np) { - dev_err(dev, "Unable to get IPU node\n"); - return -EINVAL; - } + if (!np) + return ingenic_drm_bind(dev, false); drm_of_component_match_add(dev, &match, compare_of, np); of_node_put(np); From 1f2f98f2703e8134678fe20982886085631eda23 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 29 Aug 2020 13:12:48 +0200 Subject: [PATCH 116/648] arm64: dts: imx8mq: Fix TMU interrupt property "interrupt" is not a valid property. Using proper name fixes dtbs_check warning: arch/arm64/boot/dts/freescale/imx8mq-zii-ultra-zest.dt.yaml: tmu@30260000: 'interrupts' is a required property Fixes: e464fd2ba4d4 ("arm64: dts: imx8mq: enable the multi sensor TMU") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mq.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi index f70435cf9ad5..561fa792fe5a 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi @@ -423,7 +423,7 @@ gpio5: gpio@30240000 { tmu: tmu@30260000 { compatible = "fsl,imx8mq-tmu"; reg = <0x30260000 0x10000>; - interrupt = ; + interrupts = ; clocks = <&clk IMX8MQ_CLK_TMU_ROOT>; little-endian; fsl,tmu-range = <0xb0000 0xa0026 0x80048 0x70061>; From 1ac698790819b83f39fd7ea4f6cdabee9bdd7b38 Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Sat, 29 Aug 2020 11:05:39 +0200 Subject: [PATCH 117/648] USB: serial: option: add support for SIM7070/SIM7080/SIM7090 modules These modules have 2 different USB layouts: The default layout with PID 0x9205 (AT+CUSBSELNV=1) exposes 4 TTYs and an ECM interface: T: Bus=02 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 6 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=1e0e ProdID=9205 Rev=00.00 S: Manufacturer=SimTech, Incorporated S: Product=SimTech SIM7080 S: SerialNumber=1234567890ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#=0x0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#=0x1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#=0x2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#=0x3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#=0x4 Alt= 0 #EPs= 1 Cls=02(commc) Sub=06 Prot=00 Driver=cdc_ether I: If#=0x5 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether The purpose of each TTY is as follows: * ttyUSB0: DIAG/QCDM port. * ttyUSB1: GNSS data. * ttyUSB2: AT-capable port (control). * ttyUSB3: AT-capable port (data). In the secondary layout with PID=0x9206 (AT+CUSBSELNV=86) the module exposes 6 TTY ports: T: Bus=02 Lev=01 Prnt=01 Port=02 Cnt=01 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=02(commc) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1e0e ProdID=9206 Rev=00.00 S: Manufacturer=SimTech, Incorporated S: Product=SimTech SIM7080 S: SerialNumber=1234567890ABCDEF C: #Ifs= 6 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#=0x0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#=0x1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#=0x2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#=0x3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#=0x4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#=0x5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option The purpose of each TTY is as follows: * ttyUSB0: DIAG/QCDM port. * ttyUSB1: GNSS data. * ttyUSB2: AT-capable port (control). * ttyUSB3: QFLOG interface. * ttyUSB4: DAM interface. * ttyUSB5: AT-capable port (data). Signed-off-by: Aleksander Morgado Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 89b3192af326..01c5b452c6ea 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1819,6 +1819,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x1e0e, 0x9003, 0xff) }, /* Simcom SIM7500/SIM7600 MBIM mode */ { USB_DEVICE_INTERFACE_CLASS(0x1e0e, 0x9011, 0xff), /* Simcom SIM7500/SIM7600 RNDIS mode */ .driver_info = RSVD(7) }, + { USB_DEVICE_INTERFACE_CLASS(0x1e0e, 0x9205, 0xff) }, /* Simcom SIM7070/SIM7080/SIM7090 AT+ECM mode */ + { USB_DEVICE_INTERFACE_CLASS(0x1e0e, 0x9206, 0xff) }, /* Simcom SIM7070/SIM7080/SIM7090 AT-only mode */ { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X060S_X200), .driver_info = NCTRL(0) | NCTRL(1) | RSVD(4) }, { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X220_X500D), From 2bb70f0a4b238323e4e2f392fc3ddeb5b7208c9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Sat, 29 Aug 2020 15:42:50 +0200 Subject: [PATCH 118/648] USB: serial: option: support dynamic Quectel USB compositions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The USB composition, defining the set of exported functions, is dynamic in newer Quectel modems. Default functions can be disabled and alternative functions can be enabled instead. The alternatives includes class functions using interface pairs, which should be handled by the respective class drivers. Active interfaces are numbered consecutively, so static blacklisting based on interface numbers will fail when the composition changes. An example of such an error, where the option driver has bound to the CDC ECM data interface, preventing cdc_ether from handling this function: T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0125 Rev= 3.18 S: Manufacturer=Quectel S: Product=EC25-AF C:* #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 4 IfCount= 2 Cls=02(comm.) Sub=06 Prot=00 I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=06 Prot=00 Driver=(none) E: Ad=89(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I:* If#= 5 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=00 Driver=option I: If#= 5 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=option E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Another device with the same id gets correct drivers, since the interface of the network function happens to be blacklisted by option: T: Bus=01 Lev=02 Prnt=02 Port=01 Cnt=01 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0125 Rev= 3.18 S: Manufacturer=Android S: Product=Android C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Change rules for EC21, EC25, BG96 and EG95 to match vendor specific serial functions only, to prevent binding to class functions. Require 2 endpoints on ff/ff/ff functions, avoiding the 3 endpoint QMI/RMNET network functions. Cc: AceLan Kao Cc: Sebastian Sjoholm Cc: Dan Williams Cc: stable@vger.kernel.org Signed-off-by: Bjørn Mork Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 01c5b452c6ea..0c6f160a214a 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1094,14 +1094,18 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R410M), .driver_info = RSVD(1) | RSVD(3) }, /* Quectel products using Quectel vendor ID */ - { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21), - .driver_info = RSVD(4) }, - { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC25), - .driver_info = RSVD(4) }, - { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG95), - .driver_info = RSVD(4) }, - { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96), - .driver_info = RSVD(4) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21, 0xff, 0xff, 0xff), + .driver_info = NUMEP2 }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC25, 0xff, 0xff, 0xff), + .driver_info = NUMEP2 }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC25, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG95, 0xff, 0xff, 0xff), + .driver_info = NUMEP2 }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG95, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96, 0xff, 0xff, 0xff), + .driver_info = NUMEP2 }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) }, From ad7a7acaedcf45071c822b6c983f9c1e084041c9 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 24 Aug 2020 10:51:26 +0300 Subject: [PATCH 119/648] phy: omap-usb2-phy: disable PHY charger detect AM654x PG1.0 has a silicon bug that D+ is pulled high after POR, which could cause enumeration failure with some USB hubs. Disabling the USB2_PHY Charger Detect function will put D+ into the normal state. This addresses Silicon Errata: i2075 - "USB2PHY: USB2PHY Charger Detect is Enabled by Default Without VBUS Presence" Signed-off-by: Roger Quadros Tested-by: Jan Kiszka Link: https://lore.kernel.org/r/20200824075127.14902-2-rogerq@ti.com Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-omap-usb2.c | 47 +++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/drivers/phy/ti/phy-omap-usb2.c b/drivers/phy/ti/phy-omap-usb2.c index cb2dd3230fa7..507f79d14adb 100644 --- a/drivers/phy/ti/phy-omap-usb2.c +++ b/drivers/phy/ti/phy-omap-usb2.c @@ -22,10 +22,15 @@ #include #include #include +#include #define USB2PHY_ANA_CONFIG1 0x4c #define USB2PHY_DISCON_BYP_LATCH BIT(31) +#define USB2PHY_CHRG_DET 0x14 +#define USB2PHY_CHRG_DET_USE_CHG_DET_REG BIT(29) +#define USB2PHY_CHRG_DET_DIS_CHG_DET BIT(28) + /* SoC Specific USB2_OTG register definitions */ #define AM654_USB2_OTG_PD BIT(8) #define AM654_USB2_VBUS_DET_EN BIT(5) @@ -43,6 +48,7 @@ #define OMAP_USB2_HAS_START_SRP BIT(0) #define OMAP_USB2_HAS_SET_VBUS BIT(1) #define OMAP_USB2_CALIBRATE_FALSE_DISCONNECT BIT(2) +#define OMAP_USB2_DISABLE_CHRG_DET BIT(3) struct omap_usb { struct usb_phy phy; @@ -236,6 +242,13 @@ static int omap_usb_init(struct phy *x) omap_usb_writel(phy->phy_base, USB2PHY_ANA_CONFIG1, val); } + if (phy->flags & OMAP_USB2_DISABLE_CHRG_DET) { + val = omap_usb_readl(phy->phy_base, USB2PHY_CHRG_DET); + val |= USB2PHY_CHRG_DET_USE_CHG_DET_REG | + USB2PHY_CHRG_DET_DIS_CHG_DET; + omap_usb_writel(phy->phy_base, USB2PHY_CHRG_DET, val); + } + return 0; } @@ -329,6 +342,26 @@ static const struct of_device_id omap_usb2_id_table[] = { }; MODULE_DEVICE_TABLE(of, omap_usb2_id_table); +static void omap_usb2_init_errata(struct omap_usb *phy) +{ + static const struct soc_device_attribute am65x_sr10_soc_devices[] = { + { .family = "AM65X", .revision = "SR1.0" }, + { /* sentinel */ } + }; + + /* + * Errata i2075: USB2PHY: USB2PHY Charger Detect is Enabled by + * Default Without VBUS Presence. + * + * AM654x SR1.0 has a silicon bug due to which D+ is pulled high after + * POR, which could cause enumeration failure with some USB hubs. + * Disabling the USB2_PHY Charger Detect function will put D+ + * into the normal state. + */ + if (soc_device_match(am65x_sr10_soc_devices)) + phy->flags |= OMAP_USB2_DISABLE_CHRG_DET; +} + static int omap_usb2_probe(struct platform_device *pdev) { struct omap_usb *phy; @@ -366,14 +399,14 @@ static int omap_usb2_probe(struct platform_device *pdev) phy->mask = phy_data->mask; phy->power_on = phy_data->power_on; phy->power_off = phy_data->power_off; + phy->flags = phy_data->flags; - if (phy_data->flags & OMAP_USB2_CALIBRATE_FALSE_DISCONNECT) { - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - phy->phy_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(phy->phy_base)) - return PTR_ERR(phy->phy_base); - phy->flags |= OMAP_USB2_CALIBRATE_FALSE_DISCONNECT; - } + omap_usb2_init_errata(phy); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + phy->phy_base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(phy->phy_base)) + return PTR_ERR(phy->phy_base); phy->syscon_phy_power = syscon_regmap_lookup_by_phandle(node, "syscon-phy-power"); From 54f82df2ba86e2a8e9cbf4036d192366e3905c89 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 22 Jul 2020 16:50:56 +0100 Subject: [PATCH 120/648] iio:adc:ti-adc081c Fix alignment and data leak issues One of a class of bugs pointed out by Lars in a recent review. iio_push_to_buffers_with_timestamp assumes the buffer used is aligned to the size of the timestamp (8 bytes). This is not guaranteed in this driver which uses an array of smaller elements on the stack. As Lars also noted this anti pattern can involve a leak of data to userspace and that indeed can happen here. We close both issues by moving to a suitable structure in the iio_priv(). This data is allocated with kzalloc so no data can leak apart from previous readings. The eplicit alignment of ts is necessary to ensure correct padding on x86_32 where s64 is only aligned to 4 bytes. Fixes: 08e05d1fce5c ("ti-adc081c: Initial triggered buffer support") Reported-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Reviewed-by: Andy Shevchenko Cc: --- drivers/iio/adc/ti-adc081c.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/ti-adc081c.c b/drivers/iio/adc/ti-adc081c.c index 9426f70a8005..cf63983a54d9 100644 --- a/drivers/iio/adc/ti-adc081c.c +++ b/drivers/iio/adc/ti-adc081c.c @@ -33,6 +33,12 @@ struct adc081c { /* 8, 10 or 12 */ int bits; + + /* Ensure natural alignment of buffer elements */ + struct { + u16 channel; + s64 ts __aligned(8); + } scan; }; #define REG_CONV_RES 0x00 @@ -128,14 +134,13 @@ static irqreturn_t adc081c_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; struct adc081c *data = iio_priv(indio_dev); - u16 buf[8]; /* 2 bytes data + 6 bytes padding + 8 bytes timestamp */ int ret; ret = i2c_smbus_read_word_swapped(data->i2c, REG_CONV_RES); if (ret < 0) goto out; - buf[0] = ret; - iio_push_to_buffers_with_timestamp(indio_dev, buf, + data->scan.channel = ret; + iio_push_to_buffers_with_timestamp(indio_dev, &data->scan, iio_get_time_ns(indio_dev)); out: iio_trigger_notify_done(indio_dev->trig); From a661b571e3682705cb402a5cd1e970586a3ec00f Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 22 Jul 2020 16:50:57 +0100 Subject: [PATCH 121/648] iio:adc:ti-adc084s021 Fix alignment and data leak issues. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One of a class of bugs pointed out by Lars in a recent review. iio_push_to_buffers_with_timestamp assumes the buffer used is aligned to the size of the timestamp (8 bytes). This is not guaranteed in this driver which uses an array of smaller elements on the stack. As Lars also noted this anti pattern can involve a leak of data to userspace and that indeed can happen here. We close both issues by moving to a suitable structure in the iio_priv(). This data is allocated with kzalloc so no data can leak apart from previous readings. The force alignment of ts is not strictly necessary in this case but reduces the fragility of the code. Fixes: 3691e5a69449 ("iio: adc: add driver for the ti-adc084s021 chip") Reported-by: Lars-Peter Clausen Cc: Mårten Lindahl Signed-off-by: Jonathan Cameron Reviewed-by: Andy Shevchenko Cc: --- drivers/iio/adc/ti-adc084s021.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/ti-adc084s021.c b/drivers/iio/adc/ti-adc084s021.c index 9017e1e24273..dfba34834a57 100644 --- a/drivers/iio/adc/ti-adc084s021.c +++ b/drivers/iio/adc/ti-adc084s021.c @@ -26,6 +26,11 @@ struct adc084s021 { struct spi_transfer spi_trans; struct regulator *reg; struct mutex lock; + /* Buffer used to align data */ + struct { + __be16 channels[4]; + s64 ts __aligned(8); + } scan; /* * DMA (thus cache coherency maintenance) requires the * transfer buffers to live in their own cache line. @@ -141,14 +146,13 @@ static irqreturn_t adc084s021_buffer_trigger_handler(int irq, void *pollfunc) struct iio_poll_func *pf = pollfunc; struct iio_dev *indio_dev = pf->indio_dev; struct adc084s021 *adc = iio_priv(indio_dev); - __be16 data[8] = {0}; /* 4 * 16-bit words of data + 8 bytes timestamp */ mutex_lock(&adc->lock); - if (adc084s021_adc_conversion(adc, &data) < 0) + if (adc084s021_adc_conversion(adc, adc->scan.channels) < 0) dev_err(&adc->spi->dev, "Failed to read data\n"); - iio_push_to_buffers_with_timestamp(indio_dev, data, + iio_push_to_buffers_with_timestamp(indio_dev, &adc->scan, iio_get_time_ns(indio_dev)); mutex_unlock(&adc->lock); iio_trigger_notify_done(indio_dev->trig); From f8cd222feb82ecd82dcf610fcc15186f55f9c2b5 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 22 Jul 2020 16:51:02 +0100 Subject: [PATCH 122/648] iio:adc:ina2xx Fix timestamp alignment issue. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One of a class of bugs pointed out by Lars in a recent review. iio_push_to_buffers_with_timestamp assumes the buffer used is aligned to the size of the timestamp (8 bytes). This is not guaranteed in this driver which uses a 32 byte array of smaller elements on the stack. As Lars also noted this anti pattern can involve a leak of data to userspace and that indeed can happen here. We close both issues by moving to a suitable structure in the iio_priv() data with alignment explicitly requested. This data is allocated with kzalloc so no data can leak apart from previous readings. The explicit alignment isn't technically needed here, but it reduced fragility and avoids cut and paste into drivers where it will be needed. If we want this in older stables will need manual backport due to driver reworks. Fixes: c43a102e67db ("iio: ina2xx: add support for TI INA2xx Power Monitors") Reported-by: Lars-Peter Clausen Cc: Stefan Brüns Cc: Marc Titinger Signed-off-by: Jonathan Cameron Reviewed-by: Andy Shevchenko Cc: --- drivers/iio/adc/ina2xx-adc.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/ina2xx-adc.c b/drivers/iio/adc/ina2xx-adc.c index 5ed63e874292..b573ec60a8b8 100644 --- a/drivers/iio/adc/ina2xx-adc.c +++ b/drivers/iio/adc/ina2xx-adc.c @@ -146,6 +146,11 @@ struct ina2xx_chip_info { int range_vbus; /* Bus voltage maximum in V */ int pga_gain_vshunt; /* Shunt voltage PGA gain */ bool allow_async_readout; + /* data buffer needs space for channel data and timestamp */ + struct { + u16 chan[4]; + u64 ts __aligned(8); + } scan; }; static const struct ina2xx_config ina2xx_config[] = { @@ -738,8 +743,6 @@ static int ina2xx_conversion_ready(struct iio_dev *indio_dev) static int ina2xx_work_buffer(struct iio_dev *indio_dev) { struct ina2xx_chip_info *chip = iio_priv(indio_dev); - /* data buffer needs space for channel data and timestap */ - unsigned short data[4 + sizeof(s64)/sizeof(short)]; int bit, ret, i = 0; s64 time; @@ -758,10 +761,10 @@ static int ina2xx_work_buffer(struct iio_dev *indio_dev) if (ret < 0) return ret; - data[i++] = val; + chip->scan.chan[i++] = val; } - iio_push_to_buffers_with_timestamp(indio_dev, data, time); + iio_push_to_buffers_with_timestamp(indio_dev, &chip->scan, time); return 0; }; From db8f06d97ec284dc018e2e4890d2e5035fde8630 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 22 Jul 2020 16:51:03 +0100 Subject: [PATCH 123/648] iio:adc:max1118 Fix alignment of timestamp and data leak issues One of a class of bugs pointed out by Lars in a recent review. iio_push_to_buffers_with_timestamp assumes the buffer used is aligned to the size of the timestamp (8 bytes). This is not guaranteed in this driver which uses an array of smaller elements on the stack. As Lars also noted this anti pattern can involve a leak of data to userspace and that indeed can happen here. We close both issues by moving to a suitable structure in the iio_priv() data. This data is allocated with kzalloc so no data can leak apart from previous readings. The explicit alignment of ts is necessary to ensure correct padding on architectures where s64 is only 4 bytes aligned such as x86_32. Fixes: a9e9c7153e96 ("iio: adc: add max1117/max1118/max1119 ADC driver") Reported-by: Lars-Peter Clausen Cc: Akinobu Mita Signed-off-by: Jonathan Cameron Reviewed-by: Andy Shevchenko Cc: --- drivers/iio/adc/max1118.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/max1118.c b/drivers/iio/adc/max1118.c index 01b20e420ac4..6efb0b43d938 100644 --- a/drivers/iio/adc/max1118.c +++ b/drivers/iio/adc/max1118.c @@ -36,6 +36,11 @@ struct max1118 { struct spi_device *spi; struct mutex lock; struct regulator *reg; + /* Ensure natural alignment of buffer elements */ + struct { + u8 channels[2]; + s64 ts __aligned(8); + } scan; u8 data ____cacheline_aligned; }; @@ -166,7 +171,6 @@ static irqreturn_t max1118_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; struct max1118 *adc = iio_priv(indio_dev); - u8 data[16] = { }; /* 2x 8-bit ADC data + padding + 8 bytes timestamp */ int scan_index; int i = 0; @@ -184,10 +188,10 @@ static irqreturn_t max1118_trigger_handler(int irq, void *p) goto out; } - data[i] = ret; + adc->scan.channels[i] = ret; i++; } - iio_push_to_buffers_with_timestamp(indio_dev, data, + iio_push_to_buffers_with_timestamp(indio_dev, &adc->scan, iio_get_time_ns(indio_dev)); out: mutex_unlock(&adc->lock); From d53bfdf2f0b2ec2d4bcebc4f1a1257d05f78dc51 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 15 Aug 2020 20:13:55 +0200 Subject: [PATCH 124/648] iio: adc: meson-saradc: Use the parent device to look up the calib data On the older-gen 32-bit SoCs the meson-saradc driver is used to read the SoC temperature. This requires reading calibration data from the eFuse. Looking up the calibration data nvmem-cell requires the OF device_node pointer to be available in the struct device which is passed to devm_nvmem_cell_get(). This however got lost with commit 8cb631ccbb1952 ("iio: Remove superfluous of_node assignments") from indio_dev->dev. As devm_nvmem_cell_get() is called in the initialization phase the device_node is not yet available because the NVMEM cell is looked up before iio_device_register() is called (which would then set the device_node automatically). Use the parent device to look up the NVMEM cell instead to fix this issue. Fixes: 8cb631ccbb1952 ("iio: Remove superfluous of_node assignments") Signed-off-by: Martin Blumenstingl Signed-off-by: Jonathan Cameron --- drivers/iio/adc/meson_saradc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c index 93c2252c0b89..1a9189ba69ae 100644 --- a/drivers/iio/adc/meson_saradc.c +++ b/drivers/iio/adc/meson_saradc.c @@ -707,7 +707,7 @@ static int meson_sar_adc_temp_sensor_init(struct iio_dev *indio_dev) size_t read_len; int ret; - temperature_calib = devm_nvmem_cell_get(&indio_dev->dev, + temperature_calib = devm_nvmem_cell_get(indio_dev->dev.parent, "temperature_calib"); if (IS_ERR(temperature_calib)) { ret = PTR_ERR(temperature_calib); From 3f1093d83d7164e4705e4232ccf76da54adfda85 Mon Sep 17 00:00:00 2001 From: Angelo Compagnucci Date: Wed, 19 Aug 2020 09:55:25 +0200 Subject: [PATCH 125/648] iio: adc: mcp3422: fix locking scope Locking should be held for the entire reading sequence involving setting the channel, waiting for the channel switch and reading from the channel. If not, reading from a channel can result mixing with the reading from another channel. Fixes: 07914c84ba30 ("iio: adc: Add driver for Microchip MCP3422/3/4 high resolution ADC") Signed-off-by: Angelo Compagnucci Link: https://lore.kernel.org/r/20200819075525.1395248-1-angelo.compagnucci@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/mcp3422.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/iio/adc/mcp3422.c b/drivers/iio/adc/mcp3422.c index 5f1706d1c3c0..219adac2384b 100644 --- a/drivers/iio/adc/mcp3422.c +++ b/drivers/iio/adc/mcp3422.c @@ -96,16 +96,12 @@ static int mcp3422_update_config(struct mcp3422 *adc, u8 newconfig) { int ret; - mutex_lock(&adc->lock); - ret = i2c_master_send(adc->i2c, &newconfig, 1); if (ret > 0) { adc->config = newconfig; ret = 0; } - mutex_unlock(&adc->lock); - return ret; } @@ -138,6 +134,8 @@ static int mcp3422_read_channel(struct mcp3422 *adc, u8 config; u8 req_channel = channel->channel; + mutex_lock(&adc->lock); + if (req_channel != MCP3422_CHANNEL(adc->config)) { config = adc->config; config &= ~MCP3422_CHANNEL_MASK; @@ -150,7 +148,11 @@ static int mcp3422_read_channel(struct mcp3422 *adc, msleep(mcp3422_read_times[MCP3422_SAMPLE_RATE(adc->config)]); } - return mcp3422_read(adc, value, &config); + ret = mcp3422_read(adc, value, &config); + + mutex_unlock(&adc->lock); + + return ret; } static int mcp3422_read_raw(struct iio_dev *iio, From 6396feabf7a4104a4ddfecc00b8aac535c631a66 Mon Sep 17 00:00:00 2001 From: Moti Haimovski Date: Wed, 19 Aug 2020 17:40:33 +0300 Subject: [PATCH 126/648] habanalabs: prevent user buff overflow This commit fixes a potential debugfs issue that may occur when reading the clock gating mask into the user buffer since the user buffer size was not taken into consideration. Signed-off-by: Moti Haimovski Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index 37701e4f9d5a..aa77771635d3 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -982,7 +982,7 @@ static ssize_t hl_clk_gate_read(struct file *f, char __user *buf, return 0; sprintf(tmp_buf, "0x%llx\n", hdev->clock_gating_mask); - rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf, + rc = simple_read_from_buffer(buf, count, ppos, tmp_buf, strlen(tmp_buf) + 1); return rc; From 69c6e18d0ce9980c8c6708f1fdb4ba843f8df172 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Wed, 26 Aug 2020 17:34:26 +0300 Subject: [PATCH 127/648] habanalabs: fix report of RAZWI initiator coordinates All initiator coordinates received upon an 'MMU page fault RAZWI event' should be the routers coordinates, the only exception is the DMA initiators for which the reported coordinates correspond to their actual location. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../habanalabs/include/gaudi/gaudi_masks.h | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h b/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h index 13ef6b2887fd..3510c42d24e3 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h @@ -378,15 +378,15 @@ enum axi_id { ((((y) & RAZWI_INITIATOR_Y_MASK) << RAZWI_INITIATOR_Y_SHIFT) | \ (((x) & RAZWI_INITIATOR_X_MASK) << RAZWI_INITIATOR_X_SHIFT)) -#define RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0 RAZWI_INITIATOR_ID_X_Y(1, 0) -#define RAZWI_INITIATOR_ID_X_Y_TPC1 RAZWI_INITIATOR_ID_X_Y(2, 0) -#define RAZWI_INITIATOR_ID_X_Y_MME0_0 RAZWI_INITIATOR_ID_X_Y(3, 0) -#define RAZWI_INITIATOR_ID_X_Y_MME0_1 RAZWI_INITIATOR_ID_X_Y(4, 0) -#define RAZWI_INITIATOR_ID_X_Y_MME1_0 RAZWI_INITIATOR_ID_X_Y(5, 0) -#define RAZWI_INITIATOR_ID_X_Y_MME1_1 RAZWI_INITIATOR_ID_X_Y(6, 0) -#define RAZWI_INITIATOR_ID_X_Y_TPC2 RAZWI_INITIATOR_ID_X_Y(7, 0) +#define RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0 RAZWI_INITIATOR_ID_X_Y(1, 1) +#define RAZWI_INITIATOR_ID_X_Y_TPC1 RAZWI_INITIATOR_ID_X_Y(2, 1) +#define RAZWI_INITIATOR_ID_X_Y_MME0_0 RAZWI_INITIATOR_ID_X_Y(3, 1) +#define RAZWI_INITIATOR_ID_X_Y_MME0_1 RAZWI_INITIATOR_ID_X_Y(4, 1) +#define RAZWI_INITIATOR_ID_X_Y_MME1_0 RAZWI_INITIATOR_ID_X_Y(5, 1) +#define RAZWI_INITIATOR_ID_X_Y_MME1_1 RAZWI_INITIATOR_ID_X_Y(6, 1) +#define RAZWI_INITIATOR_ID_X_Y_TPC2 RAZWI_INITIATOR_ID_X_Y(7, 1) #define RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC \ - RAZWI_INITIATOR_ID_X_Y(8, 0) + RAZWI_INITIATOR_ID_X_Y(8, 1) #define RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0 RAZWI_INITIATOR_ID_X_Y(0, 1) #define RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0 RAZWI_INITIATOR_ID_X_Y(9, 1) #define RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1 RAZWI_INITIATOR_ID_X_Y(0, 2) @@ -395,14 +395,14 @@ enum axi_id { #define RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0 RAZWI_INITIATOR_ID_X_Y(9, 3) #define RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1 RAZWI_INITIATOR_ID_X_Y(0, 4) #define RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1 RAZWI_INITIATOR_ID_X_Y(9, 4) -#define RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2 RAZWI_INITIATOR_ID_X_Y(1, 5) -#define RAZWI_INITIATOR_ID_X_Y_TPC5 RAZWI_INITIATOR_ID_X_Y(2, 5) -#define RAZWI_INITIATOR_ID_X_Y_MME2_0 RAZWI_INITIATOR_ID_X_Y(3, 5) -#define RAZWI_INITIATOR_ID_X_Y_MME2_1 RAZWI_INITIATOR_ID_X_Y(4, 5) -#define RAZWI_INITIATOR_ID_X_Y_MME3_0 RAZWI_INITIATOR_ID_X_Y(5, 5) -#define RAZWI_INITIATOR_ID_X_Y_MME3_1 RAZWI_INITIATOR_ID_X_Y(6, 5) -#define RAZWI_INITIATOR_ID_X_Y_TPC6 RAZWI_INITIATOR_ID_X_Y(7, 5) -#define RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5 RAZWI_INITIATOR_ID_X_Y(8, 5) +#define RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2 RAZWI_INITIATOR_ID_X_Y(1, 6) +#define RAZWI_INITIATOR_ID_X_Y_TPC5 RAZWI_INITIATOR_ID_X_Y(2, 6) +#define RAZWI_INITIATOR_ID_X_Y_MME2_0 RAZWI_INITIATOR_ID_X_Y(3, 6) +#define RAZWI_INITIATOR_ID_X_Y_MME2_1 RAZWI_INITIATOR_ID_X_Y(4, 6) +#define RAZWI_INITIATOR_ID_X_Y_MME3_0 RAZWI_INITIATOR_ID_X_Y(5, 6) +#define RAZWI_INITIATOR_ID_X_Y_MME3_1 RAZWI_INITIATOR_ID_X_Y(6, 6) +#define RAZWI_INITIATOR_ID_X_Y_TPC6 RAZWI_INITIATOR_ID_X_Y(7, 6) +#define RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5 RAZWI_INITIATOR_ID_X_Y(8, 6) #define PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT 1 From fd90e3808fd2c207560270c39b86b71af2231aa1 Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Fri, 28 Aug 2020 14:50:32 +0200 Subject: [PATCH 128/648] drm/sun4i: Fix dsi dcs long write function It's writing too much data. regmap_bulk_write expects number of register sized chunks to write, not a byte sized length of the bounce buffer. Bounce buffer needs to be padded too, so that regmap_bulk_write will not read past the end of the buffer. Fixes: 133add5b5ad4 ("drm/sun4i: Add Allwinner A31 MIPI-DSI controller support") Signed-off-by: Ondrej Jirman Signed-off-by: Maxime Ripard Reviewed-by: Jernej Skrabec Link: https://patchwork.freedesktop.org/patch/msgid/20200828125032.937148-1-megous@megous.com --- drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c index aa67cb037e9d..32d4c3f7fc4e 100644 --- a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c +++ b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c @@ -889,7 +889,7 @@ static int sun6i_dsi_dcs_write_long(struct sun6i_dsi *dsi, regmap_write(dsi->regs, SUN6I_DSI_CMD_TX_REG(0), sun6i_dsi_dcs_build_pkt_hdr(dsi, msg)); - bounce = kzalloc(msg->tx_len + sizeof(crc), GFP_KERNEL); + bounce = kzalloc(ALIGN(msg->tx_len + sizeof(crc), 4), GFP_KERNEL); if (!bounce) return -ENOMEM; @@ -900,7 +900,7 @@ static int sun6i_dsi_dcs_write_long(struct sun6i_dsi *dsi, memcpy((u8 *)bounce + msg->tx_len, &crc, sizeof(crc)); len += sizeof(crc); - regmap_bulk_write(dsi->regs, SUN6I_DSI_CMD_TX_REG(1), bounce, len); + regmap_bulk_write(dsi->regs, SUN6I_DSI_CMD_TX_REG(1), bounce, DIV_ROUND_UP(len, 4)); regmap_write(dsi->regs, SUN6I_DSI_CMD_CTL_REG, len + 4 - 1); kfree(bounce); From 3168c158ad3535af1cd7423c9f8cd5ac549f2f9c Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Sun, 30 Aug 2020 17:03:04 -0700 Subject: [PATCH 129/648] libbpf: Fix build failure from uninitialized variable warning While compiling libbpf, some GCC versions (at least 8.4.0) have difficulty determining control flow and a emit warning for potentially uninitialized usage of 'map', which results in a build error if using "-Werror": In file included from libbpf.c:56: libbpf.c: In function '__bpf_object__open': libbpf_internal.h:59:2: warning: 'map' may be used uninitialized in this function [-Wmaybe-uninitialized] libbpf_print(level, "libbpf: " fmt, ##__VA_ARGS__); \ ^~~~~~~~~~~~ libbpf.c:5032:18: note: 'map' was declared here struct bpf_map *map, *targ_map; ^~~ The warning/error is false based on code inspection, so silence it with a NULL initialization. Fixes: 646f02ffdd49 ("libbpf: Add BTF-defined map-in-map support") Reference: 063e68813391 ("libbpf: Fix false uninitialized variable warning") Signed-off-by: Tony Ambardar Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200831000304.1696435-1-Tony.Ambardar@gmail.com --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 0ad0b0491e1f..7253b833576c 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5203,8 +5203,8 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, int i, j, nrels, new_sz; const struct btf_var_secinfo *vi = NULL; const struct btf_type *sec, *var, *def; + struct bpf_map *map = NULL, *targ_map; const struct btf_member *member; - struct bpf_map *map, *targ_map; const char *name, *mname; Elf_Data *symbols; unsigned int moff; From ea403fde7552bd61bad6ea45e3feb99db77cb31e Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sun, 23 Aug 2020 17:14:53 +0800 Subject: [PATCH 130/648] scsi: pm8001: Fix memleak in pm8001_exec_internal_task_abort When pm8001_tag_alloc() fails, task should be freed just like it is done in the subsequent error paths. Link: https://lore.kernel.org/r/20200823091453.4782-1-dinghao.liu@zju.edu.cn Acked-by: Jack Wang Signed-off-by: Dinghao Liu Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm8001_sas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index 337e79d6837f..9889bab7d31c 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -818,7 +818,7 @@ pm8001_exec_internal_task_abort(struct pm8001_hba_info *pm8001_ha, res = pm8001_tag_alloc(pm8001_ha, &ccb_tag); if (res) - return res; + goto ex_err; ccb = &pm8001_ha->ccb_info[ccb_tag]; ccb->device = pm8001_dev; ccb->ccb_tag = ccb_tag; From 5a5b80f98534416b3b253859897e2ba1dc241e70 Mon Sep 17 00:00:00 2001 From: Javed Hasan Date: Tue, 25 Aug 2020 02:39:40 -0700 Subject: [PATCH 131/648] scsi: libfc: Fix for double free() Fix for '&fp->skb' double free. Link: https://lore.kernel.org/r/20200825093940.19612-1-jhasan@marvell.com Reported-by: Dan Carpenter Signed-off-by: Javed Hasan Signed-off-by: Martin K. Petersen --- drivers/scsi/libfc/fc_disc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c index d8cbc9c0e766..e67abb184a8a 100644 --- a/drivers/scsi/libfc/fc_disc.c +++ b/drivers/scsi/libfc/fc_disc.c @@ -634,8 +634,6 @@ static void fc_disc_gpn_id_resp(struct fc_seq *sp, struct fc_frame *fp, fc_frame_free(fp); out: kref_put(&rdata->kref, fc_rport_destroy); - if (!IS_ERR(fp)) - fc_frame_free(fp); } /** From 2a87d485c4cb4d1b34be6c278a1c6ce3e15c8b8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Rebe?= Date: Thu, 27 Aug 2020 22:27:29 +0200 Subject: [PATCH 132/648] scsi: qla2xxx: Fix regression on sparc64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 98aee70d19a7 ("qla2xxx: Add endianizer to max_payload_size modifier.") in 2014 broke qla2xxx on sparc64, e.g. as in the Sun Blade 1000 / 2000. Unbreak by partial revert to fix endianness in nvram firmware default initialization. Also mark the second frame_payload_size in nvram_t __le16 to avoid new sparse warnings. Link: https://lore.kernel.org/r/20200827.222729.1875148247374704975.rene@exactcode.com Fixes: 98aee70d19a7 ("qla2xxx: Add endianizer to max_payload_size modifier.") Reviewed-by: Himanshu Madhani Reviewed-by: Bart Van Assche Acked-by: Arun Easi Signed-off-by: René Rebe Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_def.h | 2 +- drivers/scsi/qla2xxx/qla_init.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 1bc090d8a71b..a165120d2976 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -1626,7 +1626,7 @@ typedef struct { */ uint8_t firmware_options[2]; - uint16_t frame_payload_size; + __le16 frame_payload_size; __le16 max_iocb_allocation; __le16 execution_throttle; uint8_t retry_count; diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 57a2d76aa691..4ff75d9fc125 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -4603,18 +4603,18 @@ qla2x00_nvram_config(scsi_qla_host_t *vha) nv->firmware_options[1] = BIT_7 | BIT_5; nv->add_firmware_options[0] = BIT_5; nv->add_firmware_options[1] = BIT_5 | BIT_4; - nv->frame_payload_size = 2048; + nv->frame_payload_size = cpu_to_le16(2048); nv->special_options[1] = BIT_7; } else if (IS_QLA2200(ha)) { nv->firmware_options[0] = BIT_2 | BIT_1; nv->firmware_options[1] = BIT_7 | BIT_5; nv->add_firmware_options[0] = BIT_5; nv->add_firmware_options[1] = BIT_5 | BIT_4; - nv->frame_payload_size = 1024; + nv->frame_payload_size = cpu_to_le16(1024); } else if (IS_QLA2100(ha)) { nv->firmware_options[0] = BIT_3 | BIT_1; nv->firmware_options[1] = BIT_5; - nv->frame_payload_size = 1024; + nv->frame_payload_size = cpu_to_le16(1024); } nv->max_iocb_allocation = cpu_to_le16(256); From 7ac836ebcb1509845fe7d66314f469f8e709da93 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 28 Aug 2020 10:53:29 -0700 Subject: [PATCH 133/648] scsi: lpfc: Fix setting IRQ affinity with an empty CPU mask Some systems are reporting the following log message during driver unload or system shutdown: ics_rtas_set_affinity: No online cpus in the mask A prior commit introduced the writing of an empty affinity mask in calls to irq_set_affinity_hint() when disabling interrupts or when there are no remaining online CPUs to service an eq interrupt. At least some ppc64 systems are checking whether affinity masks are empty or not. Do not call irq_set_affinity_hint() with an empty CPU mask. Fixes: dcaa21367938 ("scsi: lpfc: Change default IRQ model on AMD architectures") Link: https://lore.kernel.org/r/20200828175332.130300-2-james.smart@broadcom.com Cc: # v5.5+ Co-developed-by: Dick Kennedy Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_init.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index c69725999315..ca25e54bb782 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -11376,7 +11376,6 @@ lpfc_irq_clear_aff(struct lpfc_hba_eq_hdl *eqhdl) { cpumask_clear(&eqhdl->aff_mask); irq_clear_status_flags(eqhdl->irq, IRQ_NO_BALANCING); - irq_set_affinity_hint(eqhdl->irq, &eqhdl->aff_mask); } /** From 7b08e89f98cee9907895fabb64cf437bc505ce9a Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 28 Aug 2020 10:53:30 -0700 Subject: [PATCH 134/648] scsi: lpfc: Fix FLOGI/PLOGI receive race condition in pt2pt discovery The driver is unable to successfully login with remote device. During pt2pt login, the driver completes its FLOGI request with the remote device having WWN precedence. The remote device issues its own (delayed) FLOGI after accepting the driver's and, upon transmitting the FLOGI, immediately recognizes it has already processed the driver's FLOGI thus it transitions to sending a PLOGI before waiting for an ACC to its FLOGI. In the driver, the FLOGI is received and an ACC sent, followed by the PLOGI being received and an ACC sent. The issue is that the PLOGI reception occurs before the response from the adapter from the FLOGI ACC is received. Processing of the PLOGI sets state flags to perform the REG_RPI mailbox command and proceed with the rest of discovery on the port. The same completion routine used by both FLOGI and PLOGI is generic in nature. One of the things it does is clear flags, and those flags happen to drive the rest of discovery. So what happened was the PLOGI processing set the flags, the FLOGI ACC completion cleared them, thus when the PLOGI ACC completes it doesn't see the flags and stops. Fix by modifying the generic completion routine to not clear the rest of discovery flag (NLP_ACC_REGLOGIN) unless the completion is also associated with performing a mailbox command as part of its handling. For things such as FLOGI ACC, there isn't a subsequent action to perform with the adapter, thus there is no mailbox cmd ptr. PLOGI ACC though will perform REG_RPI upon completion, thus there is a mailbox cmd ptr. Link: https://lore.kernel.org/r/20200828175332.130300-3-james.smart@broadcom.com Co-developed-by: Dick Kennedy Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_els.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 48dc63f22cca..abc6200d8881 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -4656,7 +4656,9 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, out: if (ndlp && NLP_CHK_NODE_ACT(ndlp) && shost) { spin_lock_irq(shost->host_lock); - ndlp->nlp_flag &= ~(NLP_ACC_REGLOGIN | NLP_RM_DFLT_RPI); + if (mbox) + ndlp->nlp_flag &= ~NLP_ACC_REGLOGIN; + ndlp->nlp_flag &= ~NLP_RM_DFLT_RPI; spin_unlock_irq(shost->host_lock); /* If the node is not being used by another discovery thread, From 441f6b5b097d74a8aa72ec0d8992ef820e2b3773 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 28 Aug 2020 10:53:31 -0700 Subject: [PATCH 135/648] scsi: lpfc: Extend the RDF FPIN Registration descriptor for additional events Currently the driver registers for Link Integrity events only. This patch adds registration for the following FPIN types: - Delivery Notifications - Congestion Notification - Peer Congestion Notification Link: https://lore.kernel.org/r/20200828175332.130300-4-james.smart@broadcom.com Co-developed-by: Dick Kennedy Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_els.c | 3 +++ drivers/scsi/lpfc/lpfc_hw4.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index abc6200d8881..f4e274eb6c9c 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -3517,6 +3517,9 @@ lpfc_issue_els_rdf(struct lpfc_vport *vport, uint8_t retry) FC_TLV_DESC_LENGTH_FROM_SZ(prdf->reg_d1)); prdf->reg_d1.reg_desc.count = cpu_to_be32(ELS_RDF_REG_TAG_CNT); prdf->reg_d1.desc_tags[0] = cpu_to_be32(ELS_DTAG_LNK_INTEGRITY); + prdf->reg_d1.desc_tags[1] = cpu_to_be32(ELS_DTAG_DELIVERY); + prdf->reg_d1.desc_tags[2] = cpu_to_be32(ELS_DTAG_PEER_CONGEST); + prdf->reg_d1.desc_tags[3] = cpu_to_be32(ELS_DTAG_CONGESTION); lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, "Issue RDF: did:x%x", diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index c4ba8273a63f..12e4e76233e6 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -4800,7 +4800,7 @@ struct send_frame_wqe { uint32_t fc_hdr_wd5; /* word 15 */ }; -#define ELS_RDF_REG_TAG_CNT 1 +#define ELS_RDF_REG_TAG_CNT 4 struct lpfc_els_rdf_reg_desc { struct fc_df_desc_fpin_reg reg_desc; /* descriptor header */ __be32 desc_tags[ELS_RDF_REG_TAG_CNT]; From bc534069374966e6df0862c2f039e17cb4c2133a Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 28 Aug 2020 10:53:32 -0700 Subject: [PATCH 136/648] scsi: lpfc: Update lpfc version to 12.8.0.4 Update lpfc version to 12.8.0.4 Link: https://lore.kernel.org/r/20200828175332.130300-5-james.smart@broadcom.com Co-developed-by: Dick Kennedy Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index 20adec4387f0..c657abf22b75 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -20,7 +20,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "12.8.0.3" +#define LPFC_DRIVER_VERSION "12.8.0.4" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ From 45df80d7605c25055a85fbc5a8446c81c6c0ca24 Mon Sep 17 00:00:00 2001 From: Vadym Kochan Date: Mon, 31 Aug 2020 04:55:39 +0300 Subject: [PATCH 137/648] misc: eeprom: at24: register nvmem only after eeprom is ready to use During nvmem_register() the nvmem core sends notifications when: - cell added - nvmem added and during these notifications some callback func may access the nvmem device, which will fail in case of at24 eeprom because regulator and pm are enabled after nvmem_register(). Fixes: cd5676db0574 ("misc: eeprom: at24: support pm_runtime control") Fixes: b20eb4c1f026 ("eeprom: at24: drop unnecessary label") Cc: stable@vger.kernel.org Signed-off-by: Vadym Kochan Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 2591c21b2b5d..26a23abc053d 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -692,10 +692,6 @@ static int at24_probe(struct i2c_client *client) nvmem_config.word_size = 1; nvmem_config.size = byte_len; - at24->nvmem = devm_nvmem_register(dev, &nvmem_config); - if (IS_ERR(at24->nvmem)) - return PTR_ERR(at24->nvmem); - i2c_set_clientdata(client, at24); err = regulator_enable(at24->vcc_reg); @@ -708,6 +704,13 @@ static int at24_probe(struct i2c_client *client) pm_runtime_set_active(dev); pm_runtime_enable(dev); + at24->nvmem = devm_nvmem_register(dev, &nvmem_config); + if (IS_ERR(at24->nvmem)) { + pm_runtime_disable(dev); + regulator_disable(at24->vcc_reg); + return PTR_ERR(at24->nvmem); + } + /* * Perform a one-byte test read to verify that the * chip is functional. From 6546d28f0ef2ea81f6668fa5477cb99cd5e97b59 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 23 Aug 2020 16:41:59 -0700 Subject: [PATCH 138/648] Documentation: fix dma-buf.rst underline length warning /home/rdunlap/lnx/lnx-59-rc2/Documentation/driver-api/dma-buf.rst:182: WARNING: Title underline too short. Indefinite DMA Fences ~~~~~~~~~~~~~~~~~~~~ Fixes: 72b6ede73623 ("dma-buf.rst: Document why indefinite fences are a bad idea") Signed-off-by: Randy Dunlap Cc: Daniel Vetter Cc: Daniel Vetter Cc: Dave Airlie Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1b22d4c3-4ea5-c633-9e35-71ce65d8dbcc@infradead.org --- Documentation/driver-api/dma-buf.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/driver-api/dma-buf.rst b/Documentation/driver-api/dma-buf.rst index 100bfd227265..13ea0cc0a3fa 100644 --- a/Documentation/driver-api/dma-buf.rst +++ b/Documentation/driver-api/dma-buf.rst @@ -179,7 +179,7 @@ DMA Fence uABI/Sync File :internal: Indefinite DMA Fences -~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~ At various times &dma_fence with an indefinite time until dma_fence_wait() finishes have been proposed. Examples include: From 4e4bb894467cd2a08fc3116407b13e6bc23a7119 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Tue, 1 Sep 2020 12:53:42 +0200 Subject: [PATCH 139/648] clang-format: Update with the latest for_each macro list Re-run the shell fragment that generated the original list. Signed-off-by: Miguel Ojeda --- .clang-format | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.clang-format b/.clang-format index a0a96088c74f..badfc1ba440a 100644 --- a/.clang-format +++ b/.clang-format @@ -111,6 +111,7 @@ ForEachMacros: - 'css_for_each_descendant_pre' - 'device_for_each_child_node' - 'dma_fence_chain_for_each' + - 'do_for_each_ftrace_op' - 'drm_atomic_crtc_for_each_plane' - 'drm_atomic_crtc_state_for_each_plane' - 'drm_atomic_crtc_state_for_each_plane_state' @@ -136,6 +137,7 @@ ForEachMacros: - 'for_each_active_dev_scope' - 'for_each_active_drhd_unit' - 'for_each_active_iommu' + - 'for_each_aggr_pgid' - 'for_each_available_child_of_node' - 'for_each_bio' - 'for_each_board_func_rsrc' @@ -234,6 +236,7 @@ ForEachMacros: - 'for_each_node_state' - 'for_each_node_with_cpus' - 'for_each_node_with_property' + - 'for_each_nonreserved_multicast_dest_pgid' - 'for_each_of_allnodes' - 'for_each_of_allnodes_from' - 'for_each_of_cpu_node' @@ -256,6 +259,7 @@ ForEachMacros: - 'for_each_pci_dev' - 'for_each_pci_msi_entry' - 'for_each_pcm_streams' + - 'for_each_physmem_range' - 'for_each_populated_zone' - 'for_each_possible_cpu' - 'for_each_present_cpu' @@ -265,6 +269,8 @@ ForEachMacros: - 'for_each_process_thread' - 'for_each_property_of_node' - 'for_each_registered_fb' + - 'for_each_requested_gpio' + - 'for_each_requested_gpio_in_range' - 'for_each_reserved_mem_region' - 'for_each_rtd_codec_dais' - 'for_each_rtd_codec_dais_rollback' @@ -278,12 +284,17 @@ ForEachMacros: - 'for_each_sg' - 'for_each_sg_dma_page' - 'for_each_sg_page' + - 'for_each_sgtable_dma_page' + - 'for_each_sgtable_dma_sg' + - 'for_each_sgtable_page' + - 'for_each_sgtable_sg' - 'for_each_sibling_event' - 'for_each_subelement' - 'for_each_subelement_extid' - 'for_each_subelement_id' - '__for_each_thread' - 'for_each_thread' + - 'for_each_unicast_dest_pgid' - 'for_each_wakeup_source' - 'for_each_zone' - 'for_each_zone_zonelist' @@ -464,6 +475,7 @@ ForEachMacros: - 'v4l2_m2m_for_each_src_buf' - 'v4l2_m2m_for_each_src_buf_safe' - 'virtio_device_for_each_vq' + - 'while_for_each_ftrace_op' - 'xa_for_each' - 'xa_for_each_marked' - 'xa_for_each_range' From 811c5494436789e7149487c06e0602b507ce274b Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Thu, 27 Aug 2020 19:33:56 +0200 Subject: [PATCH 140/648] ASoC: wm8994: Skip setting of the WM8994_MICBIAS register for WM1811 The WM8994_MICBIAS register is not available in the WM1811 CODEC so skip initialization of that register for that device. This suppresses an error during boot: "wm8994-codec: ASoC: error at snd_soc_component_update_bits on wm8994-codec" Signed-off-by: Sylwester Nawrocki Acked-by: Krzysztof Kozlowski Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20200827173357.31891-1-s.nawrocki@samsung.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm8994.c | 2 ++ sound/soc/codecs/wm_hubs.c | 3 +++ sound/soc/codecs/wm_hubs.h | 1 + 3 files changed, 6 insertions(+) diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c index 038be667c1a6..b3ba0536736e 100644 --- a/sound/soc/codecs/wm8994.c +++ b/sound/soc/codecs/wm8994.c @@ -4193,11 +4193,13 @@ static int wm8994_component_probe(struct snd_soc_component *component) wm8994->hubs.dcs_readback_mode = 2; break; } + wm8994->hubs.micd_scthr = true; break; case WM8958: wm8994->hubs.dcs_readback_mode = 1; wm8994->hubs.hp_startup_mode = 1; + wm8994->hubs.micd_scthr = true; switch (control->revision) { case 0: diff --git a/sound/soc/codecs/wm_hubs.c b/sound/soc/codecs/wm_hubs.c index 891effe220fe..0c881846f485 100644 --- a/sound/soc/codecs/wm_hubs.c +++ b/sound/soc/codecs/wm_hubs.c @@ -1223,6 +1223,9 @@ int wm_hubs_handle_analogue_pdata(struct snd_soc_component *component, snd_soc_component_update_bits(component, WM8993_ADDITIONAL_CONTROL, WM8993_LINEOUT2_FB, WM8993_LINEOUT2_FB); + if (!hubs->micd_scthr) + return 0; + snd_soc_component_update_bits(component, WM8993_MICBIAS, WM8993_JD_SCTHR_MASK | WM8993_JD_THR_MASK | WM8993_MICB1_LVL | WM8993_MICB2_LVL, diff --git a/sound/soc/codecs/wm_hubs.h b/sound/soc/codecs/wm_hubs.h index 4b8e5f0d6e32..988b29e63060 100644 --- a/sound/soc/codecs/wm_hubs.h +++ b/sound/soc/codecs/wm_hubs.h @@ -27,6 +27,7 @@ struct wm_hubs_data { int hp_startup_mode; int series_startup; int no_series_update; + bool micd_scthr; bool no_cache_dac_hp_direct; struct list_head dcs_cache; From f5a2cda4f1db89776b64c4f0f2c2ac609527ac70 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Thu, 27 Aug 2020 19:33:57 +0200 Subject: [PATCH 141/648] ASoC: wm8994: Ensure the device is resumed in wm89xx_mic_detect functions When the wm8958_mic_detect, wm8994_mic_detect functions get called from the machine driver, e.g. from the card's late_probe() callback, the CODEC device may be PM runtime suspended and any regmap writes have no effect. Add PM runtime calls to these functions to ensure the device registers are updated as expected. This suppresses an error during boot "wm8994-codec: ASoC: error at snd_soc_component_update_bits on wm8994-codec" caused by the regmap access error due to the cache_only flag being set. Signed-off-by: Sylwester Nawrocki Acked-by: Krzysztof Kozlowski Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20200827173357.31891-2-s.nawrocki@samsung.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm8994.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c index b3ba0536736e..fc9ea198ac79 100644 --- a/sound/soc/codecs/wm8994.c +++ b/sound/soc/codecs/wm8994.c @@ -3514,6 +3514,8 @@ int wm8994_mic_detect(struct snd_soc_component *component, struct snd_soc_jack * return -EINVAL; } + pm_runtime_get_sync(component->dev); + switch (micbias) { case 1: micdet = &wm8994->micdet[0]; @@ -3561,6 +3563,8 @@ int wm8994_mic_detect(struct snd_soc_component *component, struct snd_soc_jack * snd_soc_dapm_sync(dapm); + pm_runtime_put(component->dev); + return 0; } EXPORT_SYMBOL_GPL(wm8994_mic_detect); @@ -3932,6 +3936,8 @@ int wm8958_mic_detect(struct snd_soc_component *component, struct snd_soc_jack * return -EINVAL; } + pm_runtime_get_sync(component->dev); + if (jack) { snd_soc_dapm_force_enable_pin(dapm, "CLK_SYS"); snd_soc_dapm_sync(dapm); @@ -4000,6 +4006,8 @@ int wm8958_mic_detect(struct snd_soc_component *component, struct snd_soc_jack * snd_soc_dapm_sync(dapm); } + pm_runtime_put(component->dev); + return 0; } EXPORT_SYMBOL_GPL(wm8958_mic_detect); From 0a7416f94707c60b9f66b01c0a505b7e41375f3a Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 31 Aug 2020 23:43:35 +0300 Subject: [PATCH 142/648] regulator: core: Fix slab-out-of-bounds in regulator_unlock_recursive() The recent commit 7d8196641ee1 ("regulator: Remove pointer table overallocation") changed the size of coupled_rdevs and now KASAN is able to detect slab-out-of-bounds problem in regulator_unlock_recursive(), which is a legit problem caused by a typo in the code. The recursive unlock function uses n_coupled value of a parent regulator for unlocking supply regulator, while supply's n_coupled should be used. In practice problem may only affect platforms that use coupled regulators. Cc: stable@vger.kernel.org # 5.0+ Fixes: f8702f9e4aa7 ("regulator: core: Use ww_mutex for regulators locking") Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20200831204335.19489-1-digetx@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/core.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 6789d1efdf5d..0e764596b0c0 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -236,8 +236,8 @@ static bool regulator_supply_is_couple(struct regulator_dev *rdev) static void regulator_unlock_recursive(struct regulator_dev *rdev, unsigned int n_coupled) { - struct regulator_dev *c_rdev; - int i; + struct regulator_dev *c_rdev, *supply_rdev; + int i, supply_n_coupled; for (i = n_coupled; i > 0; i--) { c_rdev = rdev->coupling_desc.coupled_rdevs[i - 1]; @@ -245,10 +245,13 @@ static void regulator_unlock_recursive(struct regulator_dev *rdev, if (!c_rdev) continue; - if (c_rdev->supply && !regulator_supply_is_couple(c_rdev)) - regulator_unlock_recursive( - c_rdev->supply->rdev, - c_rdev->coupling_desc.n_coupled); + if (c_rdev->supply && !regulator_supply_is_couple(c_rdev)) { + supply_rdev = c_rdev->supply->rdev; + supply_n_coupled = supply_rdev->coupling_desc.n_coupled; + + regulator_unlock_recursive(supply_rdev, + supply_n_coupled); + } regulator_unlock(c_rdev); } From fc1f178cdb31783ff37296ecae817a1045a1a513 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 1 Sep 2020 22:45:17 +1000 Subject: [PATCH 143/648] selftests/powerpc: Skip PROT_SAO test in guests/LPARS In commit 9b725a90a8f1 ("powerpc/64s: Disallow PROT_SAO in LPARs by default") PROT_SAO was disabled in guests/LPARs by default. So skip the test if we are running in a guest to avoid a spurious failure. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200901124653.523182-1-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/mm/prot_sao.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/mm/prot_sao.c b/tools/testing/selftests/powerpc/mm/prot_sao.c index e0cf8ebbf8cd..30b71b1d78d5 100644 --- a/tools/testing/selftests/powerpc/mm/prot_sao.c +++ b/tools/testing/selftests/powerpc/mm/prot_sao.c @@ -7,6 +7,7 @@ #include #include #include +#include #include @@ -18,9 +19,13 @@ int test_prot_sao(void) { char *p; - /* SAO was introduced in 2.06 and removed in 3.1 */ + /* + * SAO was introduced in 2.06 and removed in 3.1. It's disabled in + * guests/LPARs by default, so also skip if we are running in a guest. + */ SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06) || - have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + have_hwcap2(PPC_FEATURE2_ARCH_3_1) || + access("/proc/device-tree/rtas/ibm,hypertas-functions", F_OK) == 0); /* * Ensure we can ask for PROT_SAO. From a139ffa40f0c24b753838b8ef3dcf6ad10eb7854 Mon Sep 17 00:00:00 2001 From: Angelo Compagnucci Date: Tue, 1 Sep 2020 11:32:18 +0200 Subject: [PATCH 144/648] iio: adc: mcp3422: fix locking on error path Reading from the chip should be unlocked on error path else the lock could never being released. Fixes: 07914c84ba30 ("iio: adc: Add driver for Microchip MCP3422/3/4 high resolution ADC") Fixes: 3f1093d83d71 ("iio: adc: mcp3422: fix locking scope") Acked-by: Jonathan Cameron Signed-off-by: Angelo Compagnucci Link: https://lore.kernel.org/r/20200901093218.1500845-1-angelo.compagnucci@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/mcp3422.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/mcp3422.c b/drivers/iio/adc/mcp3422.c index 219adac2384b..da353dcb1e9d 100644 --- a/drivers/iio/adc/mcp3422.c +++ b/drivers/iio/adc/mcp3422.c @@ -143,8 +143,10 @@ static int mcp3422_read_channel(struct mcp3422 *adc, config &= ~MCP3422_PGA_MASK; config |= MCP3422_PGA_VALUE(adc->pga[req_channel]); ret = mcp3422_update_config(adc, config); - if (ret < 0) + if (ret < 0) { + mutex_unlock(&adc->lock); return ret; + } msleep(mcp3422_read_times[MCP3422_SAMPLE_RATE(adc->config)]); } From 2569231d71dff82cfd6e82ab3871776f72ec53b6 Mon Sep 17 00:00:00 2001 From: Camel Guo Date: Tue, 1 Sep 2020 15:57:35 +0200 Subject: [PATCH 145/648] ASoC: tlv320adcx140: Fix accessing uninitialized adcx140->dev In adcx140_i2c_probe, adcx140->dev is accessed before its initialization. This commit fixes this bug. Fixes: 689c7655b50c ("ASoC: tlv320adcx140: Add the tlv320adcx140 codec driver family") Acked-by: Dan Murphy Signed-off-by: Camel Guo Link: https://lore.kernel.org/r/20200901135736.32036-1-camel.guo@axis.com Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320adcx140.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/tlv320adcx140.c b/sound/soc/codecs/tlv320adcx140.c index 5cd50d841177..7ae6ec374be3 100644 --- a/sound/soc/codecs/tlv320adcx140.c +++ b/sound/soc/codecs/tlv320adcx140.c @@ -980,6 +980,8 @@ static int adcx140_i2c_probe(struct i2c_client *i2c, if (!adcx140) return -ENOMEM; + adcx140->dev = &i2c->dev; + adcx140->gpio_reset = devm_gpiod_get_optional(adcx140->dev, "reset", GPIOD_OUT_LOW); if (IS_ERR(adcx140->gpio_reset)) @@ -1007,7 +1009,7 @@ static int adcx140_i2c_probe(struct i2c_client *i2c, ret); return ret; } - adcx140->dev = &i2c->dev; + i2c_set_clientdata(i2c, adcx140); return devm_snd_soc_register_component(&i2c->dev, From 154549558a622b31702fcaa01ccd85e6e34073de Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Tue, 1 Sep 2020 17:30:41 +0200 Subject: [PATCH 146/648] ASoC: Intel: haswell: Fix power transition refactor While addressing existing power-cycle limitations for sound/soc/intel/haswell solution, change brings regression for standard audio userspace flows e.g.: when using PulseAudio. Occasional sound-card initialization fail is still better than permanent audio distortions, so revert the change. Fixes: 8ec7d6043263 ("ASoC: Intel: haswell: Power transition refactor") Reported-by: Christian Bundy Signed-off-by: Cezary Rojewski Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200901153041.14771-1-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- sound/soc/intel/haswell/sst-haswell-dsp.c | 187 ++++++++++------------ 1 file changed, 82 insertions(+), 105 deletions(-) diff --git a/sound/soc/intel/haswell/sst-haswell-dsp.c b/sound/soc/intel/haswell/sst-haswell-dsp.c index de80e19454c1..88c3f63bded9 100644 --- a/sound/soc/intel/haswell/sst-haswell-dsp.c +++ b/sound/soc/intel/haswell/sst-haswell-dsp.c @@ -243,92 +243,45 @@ static irqreturn_t hsw_irq(int irq, void *context) return ret; } -#define CSR_DEFAULT_VALUE 0x8480040E -#define ISC_DEFAULT_VALUE 0x0 -#define ISD_DEFAULT_VALUE 0x0 -#define IMC_DEFAULT_VALUE 0x7FFF0003 -#define IMD_DEFAULT_VALUE 0x7FFF0003 -#define IPCC_DEFAULT_VALUE 0x0 -#define IPCD_DEFAULT_VALUE 0x0 -#define CLKCTL_DEFAULT_VALUE 0x7FF -#define CSR2_DEFAULT_VALUE 0x0 -#define LTR_CTRL_DEFAULT_VALUE 0x0 -#define HMD_CTRL_DEFAULT_VALUE 0x0 - -static void hsw_set_shim_defaults(struct sst_dsp *sst) -{ - sst_dsp_shim_write_unlocked(sst, SST_CSR, CSR_DEFAULT_VALUE); - sst_dsp_shim_write_unlocked(sst, SST_ISRX, ISC_DEFAULT_VALUE); - sst_dsp_shim_write_unlocked(sst, SST_ISRD, ISD_DEFAULT_VALUE); - sst_dsp_shim_write_unlocked(sst, SST_IMRX, IMC_DEFAULT_VALUE); - sst_dsp_shim_write_unlocked(sst, SST_IMRD, IMD_DEFAULT_VALUE); - sst_dsp_shim_write_unlocked(sst, SST_IPCX, IPCC_DEFAULT_VALUE); - sst_dsp_shim_write_unlocked(sst, SST_IPCD, IPCD_DEFAULT_VALUE); - sst_dsp_shim_write_unlocked(sst, SST_CLKCTL, CLKCTL_DEFAULT_VALUE); - sst_dsp_shim_write_unlocked(sst, SST_CSR2, CSR2_DEFAULT_VALUE); - sst_dsp_shim_write_unlocked(sst, SST_LTRC, LTR_CTRL_DEFAULT_VALUE); - sst_dsp_shim_write_unlocked(sst, SST_HMDC, HMD_CTRL_DEFAULT_VALUE); -} - -/* all clock-gating minus DCLCGE and DTCGE */ -#define SST_VDRTCL2_CG_OTHER 0xB7D - static void hsw_set_dsp_D3(struct sst_dsp *sst) { + u32 val; u32 reg; - /* disable clock core gating */ + /* Disable core clock gating (VDRTCTL2.DCLCGE = 0) */ reg = readl(sst->addr.pci_cfg + SST_VDRTCTL2); - reg &= ~(SST_VDRTCL2_DCLCGE); + reg &= ~(SST_VDRTCL2_DCLCGE | SST_VDRTCL2_DTCGE); writel(reg, sst->addr.pci_cfg + SST_VDRTCTL2); - /* stall, reset and set 24MHz XOSC */ - sst_dsp_shim_update_bits_unlocked(sst, SST_CSR, - SST_CSR_24MHZ_LPCS | SST_CSR_STALL | SST_CSR_RST, - SST_CSR_24MHZ_LPCS | SST_CSR_STALL | SST_CSR_RST); + /* enable power gating and switch off DRAM & IRAM blocks */ + val = readl(sst->addr.pci_cfg + SST_VDRTCTL0); + val |= SST_VDRTCL0_DSRAMPGE_MASK | + SST_VDRTCL0_ISRAMPGE_MASK; + val &= ~(SST_VDRTCL0_D3PGD | SST_VDRTCL0_D3SRAMPGD); + writel(val, sst->addr.pci_cfg + SST_VDRTCTL0); - /* DRAM power gating all */ - reg = readl(sst->addr.pci_cfg + SST_VDRTCTL0); - reg |= SST_VDRTCL0_ISRAMPGE_MASK | - SST_VDRTCL0_DSRAMPGE_MASK; - reg &= ~(SST_VDRTCL0_D3SRAMPGD); - reg |= SST_VDRTCL0_D3PGD; - writel(reg, sst->addr.pci_cfg + SST_VDRTCTL0); - udelay(50); + /* switch off audio PLL */ + val = readl(sst->addr.pci_cfg + SST_VDRTCTL2); + val |= SST_VDRTCL2_APLLSE_MASK; + writel(val, sst->addr.pci_cfg + SST_VDRTCTL2); - /* PLL shutdown enable */ - reg = readl(sst->addr.pci_cfg + SST_VDRTCTL2); - reg |= SST_VDRTCL2_APLLSE_MASK; - writel(reg, sst->addr.pci_cfg + SST_VDRTCTL2); - - /* disable MCLK */ + /* disable MCLK(clkctl.smos = 0) */ sst_dsp_shim_update_bits_unlocked(sst, SST_CLKCTL, - SST_CLKCTL_MASK, 0); + SST_CLKCTL_MASK, 0); - /* switch clock gating */ - reg = readl(sst->addr.pci_cfg + SST_VDRTCTL2); - reg |= SST_VDRTCL2_CG_OTHER; - reg &= ~(SST_VDRTCL2_DTCGE); - writel(reg, sst->addr.pci_cfg + SST_VDRTCTL2); - /* enable DTCGE separatelly */ - reg = readl(sst->addr.pci_cfg + SST_VDRTCTL2); - reg |= SST_VDRTCL2_DTCGE; - writel(reg, sst->addr.pci_cfg + SST_VDRTCTL2); - - /* set shim defaults */ - hsw_set_shim_defaults(sst); - - /* set D3 */ - reg = readl(sst->addr.pci_cfg + SST_PMCS); - reg |= SST_PMCS_PS_MASK; - writel(reg, sst->addr.pci_cfg + SST_PMCS); + /* Set D3 state, delay 50 us */ + val = readl(sst->addr.pci_cfg + SST_PMCS); + val |= SST_PMCS_PS_MASK; + writel(val, sst->addr.pci_cfg + SST_PMCS); udelay(50); - /* enable clock core gating */ + /* Enable core clock gating (VDRTCTL2.DCLCGE = 1), delay 50 us */ reg = readl(sst->addr.pci_cfg + SST_VDRTCTL2); - reg |= SST_VDRTCL2_DCLCGE; + reg |= SST_VDRTCL2_DCLCGE | SST_VDRTCL2_DTCGE; writel(reg, sst->addr.pci_cfg + SST_VDRTCTL2); + udelay(50); + } static void hsw_reset(struct sst_dsp *sst) @@ -346,62 +299,75 @@ static void hsw_reset(struct sst_dsp *sst) SST_CSR_RST | SST_CSR_STALL, SST_CSR_STALL); } -/* recommended CSR state for power-up */ -#define SST_CSR_D0_MASK (0x18A09C0C | SST_CSR_DCS_MASK) - static int hsw_set_dsp_D0(struct sst_dsp *sst) { - u32 reg; + int tries = 10; + u32 reg, fw_dump_bit; - /* disable clock core gating */ + /* Disable core clock gating (VDRTCTL2.DCLCGE = 0) */ reg = readl(sst->addr.pci_cfg + SST_VDRTCTL2); - reg &= ~(SST_VDRTCL2_DCLCGE); + reg &= ~(SST_VDRTCL2_DCLCGE | SST_VDRTCL2_DTCGE); writel(reg, sst->addr.pci_cfg + SST_VDRTCTL2); - /* switch clock gating */ - reg = readl(sst->addr.pci_cfg + SST_VDRTCTL2); - reg |= SST_VDRTCL2_CG_OTHER; - reg &= ~(SST_VDRTCL2_DTCGE); - writel(reg, sst->addr.pci_cfg + SST_VDRTCTL2); - - /* set D0 */ - reg = readl(sst->addr.pci_cfg + SST_PMCS); - reg &= ~(SST_PMCS_PS_MASK); - writel(reg, sst->addr.pci_cfg + SST_PMCS); - - /* DRAM power gating none */ + /* Disable D3PG (VDRTCTL0.D3PGD = 1) */ reg = readl(sst->addr.pci_cfg + SST_VDRTCTL0); - reg &= ~(SST_VDRTCL0_ISRAMPGE_MASK | - SST_VDRTCL0_DSRAMPGE_MASK); - reg |= SST_VDRTCL0_D3SRAMPGD; reg |= SST_VDRTCL0_D3PGD; writel(reg, sst->addr.pci_cfg + SST_VDRTCTL0); - mdelay(10); - /* set shim defaults */ - hsw_set_shim_defaults(sst); + /* Set D0 state */ + reg = readl(sst->addr.pci_cfg + SST_PMCS); + reg &= ~SST_PMCS_PS_MASK; + writel(reg, sst->addr.pci_cfg + SST_PMCS); - /* restore MCLK */ + /* check that ADSP shim is enabled */ + while (tries--) { + reg = readl(sst->addr.pci_cfg + SST_PMCS) & SST_PMCS_PS_MASK; + if (reg == 0) + goto finish; + + msleep(1); + } + + return -ENODEV; + +finish: + /* select SSP1 19.2MHz base clock, SSP clock 0, turn off Low Power Clock */ + sst_dsp_shim_update_bits_unlocked(sst, SST_CSR, + SST_CSR_S1IOCS | SST_CSR_SBCS1 | SST_CSR_LPCS, 0x0); + + /* stall DSP core, set clk to 192/96Mhz */ + sst_dsp_shim_update_bits_unlocked(sst, + SST_CSR, SST_CSR_STALL | SST_CSR_DCS_MASK, + SST_CSR_STALL | SST_CSR_DCS(4)); + + /* Set 24MHz MCLK, prevent local clock gating, enable SSP0 clock */ sst_dsp_shim_update_bits_unlocked(sst, SST_CLKCTL, - SST_CLKCTL_MASK, SST_CLKCTL_MASK); + SST_CLKCTL_MASK | SST_CLKCTL_DCPLCG | SST_CLKCTL_SCOE0, + SST_CLKCTL_MASK | SST_CLKCTL_DCPLCG | SST_CLKCTL_SCOE0); - /* PLL shutdown disable */ + /* Stall and reset core, set CSR */ + hsw_reset(sst); + + /* Enable core clock gating (VDRTCTL2.DCLCGE = 1), delay 50 us */ reg = readl(sst->addr.pci_cfg + SST_VDRTCTL2); - reg &= ~(SST_VDRTCL2_APLLSE_MASK); + reg |= SST_VDRTCL2_DCLCGE | SST_VDRTCL2_DTCGE; writel(reg, sst->addr.pci_cfg + SST_VDRTCTL2); - sst_dsp_shim_update_bits_unlocked(sst, SST_CSR, - SST_CSR_D0_MASK, SST_CSR_SBCS0 | SST_CSR_SBCS1 | - SST_CSR_STALL | SST_CSR_DCS(4)); udelay(50); - /* enable clock core gating */ + /* switch on audio PLL */ reg = readl(sst->addr.pci_cfg + SST_VDRTCTL2); - reg |= SST_VDRTCL2_DCLCGE; + reg &= ~SST_VDRTCL2_APLLSE_MASK; writel(reg, sst->addr.pci_cfg + SST_VDRTCTL2); - /* clear reset */ - sst_dsp_shim_update_bits_unlocked(sst, SST_CSR, SST_CSR_RST, 0); + /* set default power gating control, enable power gating control for all blocks. that is, + can't be accessed, please enable each block before accessing. */ + reg = readl(sst->addr.pci_cfg + SST_VDRTCTL0); + reg |= SST_VDRTCL0_DSRAMPGE_MASK | SST_VDRTCL0_ISRAMPGE_MASK; + /* for D0, always enable the block(DSRAM[0]) used for FW dump */ + fw_dump_bit = 1 << SST_VDRTCL0_DSRAMPGE_SHIFT; + writel(reg & ~fw_dump_bit, sst->addr.pci_cfg + SST_VDRTCTL0); + /* disable DMA finish function for SSP0 & SSP1 */ sst_dsp_shim_update_bits_unlocked(sst, SST_CSR2, SST_CSR2_SDFD_SSP1, @@ -418,6 +384,12 @@ static int hsw_set_dsp_D0(struct sst_dsp *sst) sst_dsp_shim_update_bits(sst, SST_IMRD, (SST_IMRD_DONE | SST_IMRD_BUSY | SST_IMRD_SSP0 | SST_IMRD_DMAC), 0x0); + /* clear IPC registers */ + sst_dsp_shim_write(sst, SST_IPCX, 0x0); + sst_dsp_shim_write(sst, SST_IPCD, 0x0); + sst_dsp_shim_write(sst, 0x80, 0x6); + sst_dsp_shim_write(sst, 0xe0, 0x300a); + return 0; } @@ -443,6 +415,11 @@ static void hsw_sleep(struct sst_dsp *sst) { dev_dbg(sst->dev, "HSW_PM dsp runtime suspend\n"); + /* put DSP into reset and stall */ + sst_dsp_shim_update_bits(sst, SST_CSR, + SST_CSR_24MHZ_LPCS | SST_CSR_RST | SST_CSR_STALL, + SST_CSR_RST | SST_CSR_STALL | SST_CSR_24MHZ_LPCS); + hsw_set_dsp_D3(sst); dev_dbg(sst->dev, "HSW_PM dsp runtime suspend exit\n"); } From 1eb832ac2dee8a350f3fcd5544886f07a3ef8869 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 1 Sep 2020 16:43:43 +0200 Subject: [PATCH 147/648] tools/bpf: build: Make sure resolve_btfids cleans up after itself MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new resolve_btfids tool did not clean up the feature detection folder on 'make clean', and also was not called properly from the clean rule in tools/make/ folder on its 'make clean'. This lead to stale objects being left around, which could cause feature detection to fail on subsequent builds. Fixes: fbbb68de80a4 ("bpf: Add resolve_btfids tool to resolve BTF IDs in ELF object") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20200901144343.179552-1-toke@redhat.com --- tools/bpf/Makefile | 4 ++-- tools/bpf/resolve_btfids/Makefile | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile index 0a6d09a3e91f..39bb322707b4 100644 --- a/tools/bpf/Makefile +++ b/tools/bpf/Makefile @@ -38,7 +38,7 @@ FEATURE_TESTS = libbfd disassembler-four-args FEATURE_DISPLAY = libbfd disassembler-four-args check_feat := 1 -NON_CHECK_FEAT_TARGETS := clean bpftool_clean runqslower_clean +NON_CHECK_FEAT_TARGETS := clean bpftool_clean runqslower_clean resolve_btfids_clean ifdef MAKECMDGOALS ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),) check_feat := 0 @@ -89,7 +89,7 @@ $(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c $(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c $(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c -clean: bpftool_clean runqslower_clean +clean: bpftool_clean runqslower_clean resolve_btfids_clean $(call QUIET_CLEAN, bpf-progs) $(Q)$(RM) -r -- $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \ $(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.* diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index a88cd4426398..fe8eb537688b 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -80,6 +80,7 @@ libbpf-clean: clean: libsubcmd-clean libbpf-clean fixdep-clean $(call msg,CLEAN,$(BINARY)) $(Q)$(RM) -f $(BINARY); \ + $(RM) -rf $(if $(OUTPUT),$(OUTPUT),.)/feature; \ find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) tags: From 4af22ded0ecf23adea1b26ea264c53f9f1cfc310 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Fri, 28 Aug 2020 19:39:02 +0300 Subject: [PATCH 148/648] arc: fix memory initialization for systems with two memory banks Rework of memory map initialization broke initialization of ARC systems with two memory banks. Before these changes, memblock was not aware of nodes configuration and the memory map was always allocated from the "lowmem" bank. After the addition of node information to memblock, the core mm attempts to allocate the memory map for the "highmem" bank from its node. The access to this memory using __va() fails because it can be only accessed using kmap. Anther problem that was uncovered is that {min,max}_high_pfn are calculated from u64 high_mem_start variable which prevents truncation to 32-bit physical address and the PFN values are above the node and zone boundaries. Use phys_addr_t type for high_mem_start and high_mem_size to ensure correspondence between PFNs and highmem zone boundaries and reserve the entire highmem bank until mem_init() to avoid accesses to it before highmem is enabled. To test this: 1. Enable HIGHMEM in ARC config 2. Enable 2 memory banks in haps_hs.dts (uncomment the 2nd bank) Fixes: 51930df5801e ("mm: free_area_init: allow defining max_zone_pfn in descending order") Cc: stable@vger.kernel.org [5.8] Signed-off-by: Mike Rapoport Signed-off-by: Vineet Gupta [vgupta: added instructions to test highmem] --- arch/arc/mm/init.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index f886ac69d8ad..3a35b82a718e 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -26,8 +26,8 @@ static unsigned long low_mem_sz; #ifdef CONFIG_HIGHMEM static unsigned long min_high_pfn, max_high_pfn; -static u64 high_mem_start; -static u64 high_mem_sz; +static phys_addr_t high_mem_start; +static phys_addr_t high_mem_sz; #endif #ifdef CONFIG_DISCONTIGMEM @@ -69,6 +69,7 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) high_mem_sz = size; in_use = 1; memblock_add_node(base, size, 1); + memblock_reserve(base, size); #endif } @@ -157,7 +158,7 @@ void __init setup_arch_memory(void) min_high_pfn = PFN_DOWN(high_mem_start); max_high_pfn = PFN_DOWN(high_mem_start + high_mem_sz); - max_zone_pfn[ZONE_HIGHMEM] = max_high_pfn; + max_zone_pfn[ZONE_HIGHMEM] = min_low_pfn; high_memory = (void *)(min_high_pfn << PAGE_SHIFT); kmap_init(); @@ -166,6 +167,17 @@ void __init setup_arch_memory(void) free_area_init(max_zone_pfn); } +static void __init highmem_init(void) +{ +#ifdef CONFIG_HIGHMEM + unsigned long tmp; + + memblock_free(high_mem_start, high_mem_sz); + for (tmp = min_high_pfn; tmp < max_high_pfn; tmp++) + free_highmem_page(pfn_to_page(tmp)); +#endif +} + /* * mem_init - initializes memory * @@ -174,14 +186,7 @@ void __init setup_arch_memory(void) */ void __init mem_init(void) { -#ifdef CONFIG_HIGHMEM - unsigned long tmp; - - reset_all_zones_managed_pages(); - for (tmp = min_high_pfn; tmp < max_high_pfn; tmp++) - free_highmem_page(pfn_to_page(tmp)); -#endif - memblock_free_all(); + highmem_init(); mem_init_print_info(NULL); } From 26907eb605fbc3ba9dbf888f21d9d8d04471271d Mon Sep 17 00:00:00 2001 From: Evgeniy Didin Date: Tue, 7 Jul 2020 18:38:58 +0300 Subject: [PATCH 149/648] ARC: [plat-hsdk]: Switch ethernet phy-mode to rgmii-id HSDK board has Micrel KSZ9031, recent commit bcf3440c6dd ("net: phy: micrel: add phy-mode support for the KSZ9031 PHY") caused a breakdown of Ethernet. Using 'phy-mode = "rgmii"' is not correct because accodring RGMII specification it is necessary to have delay on RX (PHY to MAX) which is not generated in case of "rgmii". Using "rgmii-id" adds necessary delay and solves the issue. Also adding name of PHY placed on HSDK board. Signed-off-by: Evgeniy Didin Cc: Eugeniy Paltsev Cc: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/hsdk.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 5d64a5a940ee..dcaa44e408ac 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -210,7 +210,7 @@ gmac: ethernet@8000 { reg = <0x8000 0x2000>; interrupts = <10>; interrupt-names = "macirq"; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; snps,pbl = <32>; snps,multicast-filter-bins = <256>; clocks = <&gmacclk>; @@ -228,7 +228,7 @@ mdio { #address-cells = <1>; #size-cells = <0>; compatible = "snps,dwmac-mdio"; - phy0: ethernet-phy@0 { + phy0: ethernet-phy@0 { /* Micrel KSZ9031 */ reg = <0>; }; }; From 5528d03183fe5243416c706f64b1faa518b05130 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 25 Aug 2020 18:05:10 +0530 Subject: [PATCH 150/648] scsi: target: iscsi: Fix data digest calculation Current code does not consider 'page_off' in data digest calculation. To fix this, add a local variable 'first_sg' and set first_sg.offset to sg->offset + page_off. Link: https://lore.kernel.org/r/1598358910-3052-1-git-send-email-varun@chelsio.com Fixes: e48354ce078c ("iscsi-target: Add iSCSI fabric support for target v4.1") Cc: Reviewed-by: Mike Christie Signed-off-by: Varun Prakash Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/iscsi_target.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index c9689610e186..2ec778e97b1b 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1389,14 +1389,27 @@ static u32 iscsit_do_crypto_hash_sg( sg = cmd->first_data_sg; page_off = cmd->first_data_sg_off; + if (data_length && page_off) { + struct scatterlist first_sg; + u32 len = min_t(u32, data_length, sg->length - page_off); + + sg_init_table(&first_sg, 1); + sg_set_page(&first_sg, sg_page(sg), len, sg->offset + page_off); + + ahash_request_set_crypt(hash, &first_sg, NULL, len); + crypto_ahash_update(hash); + + data_length -= len; + sg = sg_next(sg); + } + while (data_length) { - u32 cur_len = min_t(u32, data_length, (sg->length - page_off)); + u32 cur_len = min_t(u32, data_length, sg->length); ahash_request_set_crypt(hash, sg, NULL, cur_len); crypto_ahash_update(hash); data_length -= cur_len; - page_off = 0; /* iscsit_map_iovec has already checked for invalid sg pointers */ sg = sg_next(sg); } From 53de092f47ff40e8d4d78d590d95819d391bf2e0 Mon Sep 17 00:00:00 2001 From: Luo Jiaxing Date: Wed, 26 Aug 2020 15:24:26 +0800 Subject: [PATCH 151/648] scsi: libsas: Set data_dir as DMA_NONE if libata marks qc as NODATA It was discovered that sdparm will fail when attempting to disable write cache on a SATA disk connected via libsas. In the ATA command set the write cache state is controlled through the SET FEATURES operation. This is roughly corresponds to MODE SELECT in SCSI and the latter command is what is used in the SCSI-ATA translation layer. A subtle difference is that a MODE SELECT carries data whereas SET FEATURES is defined as a non-data command in ATA. Set the DMA data direction to DMA_NONE if the requested ATA command is identified as non-data. [mkp: commit desc] Fixes: fa1c1e8f1ece ("[SCSI] Add SATA support to libsas") Link: https://lore.kernel.org/r/1598426666-54544-1-git-send-email-luojiaxing@huawei.com Reviewed-by: John Garry Reviewed-by: Jason Yan Signed-off-by: Luo Jiaxing Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_ata.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 1b93332daa6b..8b424c8ab834 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -209,7 +209,10 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) task->num_scatter = si; } - task->data_dir = qc->dma_dir; + if (qc->tf.protocol == ATA_PROT_NODATA) + task->data_dir = DMA_NONE; + else + task->data_dir = qc->dma_dir; task->scatter = qc->sg; task->ata_task.retry_count = 1; task->task_state_flags = SAS_TASK_STATE_PENDING; From c6016c6e39c3ee8fd671532520be3cc13e439db2 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Tue, 18 Aug 2020 09:25:10 +0200 Subject: [PATCH 152/648] drm/virtio: fix unblank When going through a disable/enable cycle without changing the framebuffer the optimization added by commit 3954ff10e06e ("drm/virtio: skip set_scanout if framebuffer didn't change") causes the screen stay blank. Add a bool to force an update to fix that. v2: use drm_atomic_crtc_needs_modeset() (Daniel). Cc: 1882851@bugs.launchpad.net Fixes: 3954ff10e06e ("drm/virtio: skip set_scanout if framebuffer didn't change") Signed-off-by: Gerd Hoffmann Tested-by: Jiri Slaby Tested-by: Diego Viola Acked-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20200818072511.6745-2-kraxel@redhat.com (cherry picked from commit 1bc371cd0ec907bab870cacb6e898105f9c41dc8) --- drivers/gpu/drm/virtio/virtgpu_display.c | 11 +++++++++++ drivers/gpu/drm/virtio/virtgpu_drv.h | 1 + drivers/gpu/drm/virtio/virtgpu_plane.c | 4 +++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c index af55b334be2f..97170998cd0e 100644 --- a/drivers/gpu/drm/virtio/virtgpu_display.c +++ b/drivers/gpu/drm/virtio/virtgpu_display.c @@ -123,6 +123,17 @@ static int virtio_gpu_crtc_atomic_check(struct drm_crtc *crtc, static void virtio_gpu_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { + struct virtio_gpu_output *output = drm_crtc_to_virtio_gpu_output(crtc); + + /* + * virtio-gpu can't do modeset and plane update operations + * independent from each other. So the actual modeset happens + * in the plane update callback, and here we just check + * whenever we must force the modeset. + */ + if (drm_atomic_crtc_needs_modeset(crtc->state)) { + output->needs_modeset = true; + } } static const struct drm_crtc_helper_funcs virtio_gpu_crtc_helper_funcs = { diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 9ff9f4ac0522..4ab1b0ba2925 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -138,6 +138,7 @@ struct virtio_gpu_output { int cur_x; int cur_y; bool enabled; + bool needs_modeset; }; #define drm_crtc_to_virtio_gpu_output(x) \ container_of(x, struct virtio_gpu_output, crtc) diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c index 52d24179bcec..65757409d9ed 100644 --- a/drivers/gpu/drm/virtio/virtgpu_plane.c +++ b/drivers/gpu/drm/virtio/virtgpu_plane.c @@ -163,7 +163,9 @@ static void virtio_gpu_primary_plane_update(struct drm_plane *plane, plane->state->src_w != old_state->src_w || plane->state->src_h != old_state->src_h || plane->state->src_x != old_state->src_x || - plane->state->src_y != old_state->src_y) { + plane->state->src_y != old_state->src_y || + output->needs_modeset) { + output->needs_modeset = false; DRM_DEBUG("handle 0x%x, crtc %dx%d+%d+%d, src %dx%d+%d+%d\n", bo->hw_res_handle, plane->state->crtc_w, plane->state->crtc_h, From 675bceb097e6fb9769309093ec6f3d33a2fed9cc Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 2 Sep 2020 09:31:22 +0530 Subject: [PATCH 153/648] powerpc/mm: Remove DEBUG_VM_PGTABLE support on powerpc The test is broken w.r.t page table update rules and results in kernel crash as below. Disable the support until we get the tests updated. [ 21.083519] kernel BUG at arch/powerpc/mm/pgtable.c:304! cpu 0x0: Vector: 700 (Program Check) at [c000000c6d1e76c0] pc: c00000000009a5ec: assert_pte_locked+0x14c/0x380 lr: c0000000005eeeec: pte_update+0x11c/0x190 sp: c000000c6d1e7950 msr: 8000000002029033 current = 0xc000000c6d172c80 paca = 0xc000000003ba0000 irqmask: 0x03 irq_happened: 0x01 pid = 1, comm = swapper/0 kernel BUG at arch/powerpc/mm/pgtable.c:304! [link register ] c0000000005eeeec pte_update+0x11c/0x190 [c000000c6d1e7950] 0000000000000001 (unreliable) [c000000c6d1e79b0] c0000000005eee14 pte_update+0x44/0x190 [c000000c6d1e7a10] c000000001a2ca9c pte_advanced_tests+0x160/0x3d8 [c000000c6d1e7ab0] c000000001a2d4fc debug_vm_pgtable+0x7e8/0x1338 [c000000c6d1e7ba0] c0000000000116ec do_one_initcall+0xac/0x5f0 [c000000c6d1e7c80] c0000000019e4fac kernel_init_freeable+0x4dc/0x5a4 [c000000c6d1e7db0] c000000000012474 kernel_init+0x24/0x160 [c000000c6d1e7e20] c00000000000cbd0 ret_from_kernel_thread+0x5c/0x6c With DEBUG_VM disabled [ 20.530152] BUG: Kernel NULL pointer dereference on read at 0x00000000 [ 20.530183] Faulting instruction address: 0xc0000000000df330 cpu 0x33: Vector: 380 (Data SLB Access) at [c000000c6d19f700] pc: c0000000000df330: memset+0x68/0x104 lr: c00000000009f6d8: hash__pmdp_huge_get_and_clear+0xe8/0x1b0 sp: c000000c6d19f990 msr: 8000000002009033 dar: 0 current = 0xc000000c6d177480 paca = 0xc00000001ec4f400 irqmask: 0x03 irq_happened: 0x01 pid = 1, comm = swapper/0 [link register ] c00000000009f6d8 hash__pmdp_huge_get_and_clear+0xe8/0x1b0 [c000000c6d19f990] c00000000009f748 hash__pmdp_huge_get_and_clear+0x158/0x1b0 (unreliable) [c000000c6d19fa10] c0000000019ebf30 pmd_advanced_tests+0x1f0/0x378 [c000000c6d19fab0] c0000000019ed088 debug_vm_pgtable+0x79c/0x1244 [c000000c6d19fba0] c0000000000116ec do_one_initcall+0xac/0x5f0 [c000000c6d19fc80] c0000000019a4fac kernel_init_freeable+0x4dc/0x5a4 [c000000c6d19fdb0] c000000000012474 kernel_init+0x24/0x160 [c000000c6d19fe20] c00000000000cbd0 ret_from_kernel_thread+0x5c/0x6c 33:mon> Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200902040122.136414-1-aneesh.kumar@linux.ibm.com --- Documentation/features/debug/debug-vm-pgtable/arch-support.txt | 2 +- arch/powerpc/Kconfig | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt index 53da483c8326..1c49723e7534 100644 --- a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt +++ b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt @@ -22,7 +22,7 @@ | nios2: | TODO | | openrisc: | TODO | | parisc: | TODO | - | powerpc: | ok | + | powerpc: | TODO | | riscv: | ok | | s390: | ok | | sh: | TODO | diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 65bed1fdeaad..787e829b6f25 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -116,7 +116,6 @@ config PPC # select ARCH_32BIT_OFF_T if PPC32 select ARCH_HAS_DEBUG_VIRTUAL - select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE From 6a0137101f47301fff2da6ba4b9048383d569909 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 1 Sep 2020 10:06:23 +0200 Subject: [PATCH 154/648] ASoC: Intel: bytcr_rt5640: Add quirk for MPMAN Converter9 2-in-1 The MPMAN Converter9 2-in-1 almost fully works with out default settings. The only problem is that it has only 1 speaker so any sounds only playing on the right channel get lost. Add a quirk for this model using the default settings + MONO_SPEAKER. Signed-off-by: Hans de Goede Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200901080623.4987-1-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5640.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 479992f4e97a..fc202747ba83 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -591,6 +591,16 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_SSP0_AIF1 | BYT_RT5640_MCLK_EN), }, + { /* MPMAN Converter 9, similar hw as the I.T.Works TW891 2-in-1 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MPMAN"), + DMI_MATCH(DMI_PRODUCT_NAME, "Converter9"), + }, + .driver_data = (void *)(BYTCR_INPUT_DEFAULTS | + BYT_RT5640_MONO_SPEAKER | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, { /* MPMAN MPWIN895CL */ .matches = { From 59ae97a7a9e1499c2070e29841d1c4be4ae2994a Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Wed, 2 Sep 2020 15:09:52 +0200 Subject: [PATCH 155/648] regulator: pwm: Fix machine constraints application If the zero duty cycle doesn't correspond to any voltage in the voltage table, the PWM regulator returns an -EINVAL from get_voltage_sel() which results in the core erroring out with a "failed to get the current voltage" and ending up not applying the machine constraints. Instead, return -ENOTRECOVERABLE which makes the core set the voltage since it's at an unknown value. For example, with this device tree: fooregulator { compatible = "pwm-regulator"; pwms = <&foopwm 0 100000>; regulator-min-microvolt = <2250000>; regulator-max-microvolt = <2250000>; regulator-name = "fooregulator"; regulator-always-on; regulator-boot-on; voltage-table = <2250000 30>; }; Before this patch: fooregulator: failed to get the current voltage(-22) After this patch: fooregulator: Setting 2250000-2250000uV fooregulator: 2250 mV Signed-off-by: Vincent Whitchurch Link: https://lore.kernel.org/r/20200902130952.24880-1-vincent.whitchurch@axis.com Signed-off-by: Mark Brown --- drivers/regulator/pwm-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c index 3234b118b53e..990bd50771d8 100644 --- a/drivers/regulator/pwm-regulator.c +++ b/drivers/regulator/pwm-regulator.c @@ -279,7 +279,7 @@ static int pwm_regulator_init_table(struct platform_device *pdev, return ret; } - drvdata->state = -EINVAL; + drvdata->state = -ENOTRECOVERABLE; drvdata->duty_cycle_table = duty_cycle_table; drvdata->desc.ops = &pwm_regulator_voltage_table_ops; drvdata->desc.n_voltages = length / sizeof(*duty_cycle_table); From 837ba18dfcd4db21ad58107c65bfe89753aa56d7 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Wed, 2 Sep 2020 15:23:41 +0200 Subject: [PATCH 156/648] spi: spi-loopback-test: Fix out-of-bounds read The "tx/rx-transfer - crossing PAGE_SIZE" test always fails when len=131071 and rx_offset >= 5: spi-loopback-test spi0.0: Running test tx/rx-transfer - crossing PAGE_SIZE ... with iteration values: len = 131071, tx_off = 0, rx_off = 3 with iteration values: len = 131071, tx_off = 0, rx_off = 4 with iteration values: len = 131071, tx_off = 0, rx_off = 5 loopback strangeness - rx changed outside of allowed range at: ...a4321000 spi_msg@ffffffd5a4157690 frame_length: 131071 actual_length: 131071 spi_transfer@ffffffd5a41576f8 len: 131071 tx_buf: ffffffd5a4340ffc Note that rx_offset > 3 can only occur if the SPI controller driver sets ->dma_alignment to a higher value than 4, so most SPI controller drivers are not affect. The allocated Rx buffer is of size SPI_TEST_MAX_SIZE_PLUS, which is 132 KiB (assuming 4 KiB pages). This test uses an initial offset into the rx_buf of PAGE_SIZE - 4, and a len of 131071, so the range expected to be written in this transfer ends at (4096 - 4) + 5 + 131071 == 132 KiB, which is also the end of the allocated buffer. But the code which verifies the content of the buffer reads a byte beyond the allocated buffer and spuriously fails because this out-of-bounds read doesn't return the expected value. Fix this by using ITERATE_LEN instead of ITERATE_MAX_LEN to avoid testing sizes which cause out-of-bounds reads. Signed-off-by: Vincent Whitchurch Link: https://lore.kernel.org/r/20200902132341.7079-1-vincent.whitchurch@axis.com Signed-off-by: Mark Brown --- drivers/spi/spi-loopback-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-loopback-test.c b/drivers/spi/spi-loopback-test.c index 9522d1b5786d..df981e55c24c 100644 --- a/drivers/spi/spi-loopback-test.c +++ b/drivers/spi/spi-loopback-test.c @@ -90,7 +90,7 @@ static struct spi_test spi_tests[] = { { .description = "tx/rx-transfer - crossing PAGE_SIZE", .fill_option = FILL_COUNT_8, - .iterate_len = { ITERATE_MAX_LEN }, + .iterate_len = { ITERATE_LEN }, .iterate_tx_align = ITERATE_ALIGN, .iterate_rx_align = ITERATE_ALIGN, .transfer_count = 1, From 8aa64be019567c4f90d45c5082a4b6f22e182d00 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Tue, 1 Sep 2020 20:38:55 +0800 Subject: [PATCH 157/648] RDMA/core: Fix unsafe linked list traversal after failing to allocate CQ It's not safe to access the next CQ in list_for_each_entry() after invoking ib_free_cq(), because the CQ has already been freed in current iteration. It should be replaced by list_for_each_entry_safe(). Fixes: c7ff819aefea ("RDMA/core: Introduce shared CQ pool API") Link: https://lore.kernel.org/r/1598963935-32335-1-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang Signed-off-by: Weihang Li Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index 513825e424bf..a92fc3f90bb5 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -379,7 +379,7 @@ static int ib_alloc_cqs(struct ib_device *dev, unsigned int nr_cqes, { LIST_HEAD(tmp_list); unsigned int nr_cqs, i; - struct ib_cq *cq; + struct ib_cq *cq, *n; int ret; if (poll_ctx > IB_POLL_LAST_POOL_TYPE) { @@ -412,7 +412,7 @@ static int ib_alloc_cqs(struct ib_device *dev, unsigned int nr_cqes, return 0; out_free_cqs: - list_for_each_entry(cq, &tmp_list, pool_entry) { + list_for_each_entry_safe(cq, n, &tmp_list, pool_entry) { cq->shared = false; ib_free_cq(cq); } From 28b0865714b315e318ac45c4fc9156f3d4649646 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 2 Sep 2020 15:43:04 +0300 Subject: [PATCH 158/648] RDMA/core: Fix reported speed and width When the returned speed from __ethtool_get_link_ksettings() is SPEED_UNKNOWN this will lead to reporting a wrong speed and width for providers that uses ib_get_eth_speed(), fix that by defaulting the netdev_speed to SPEED_1000 in case the returned value from __ethtool_get_link_ksettings() is SPEED_UNKNOWN. Fixes: d41861942fc5 ("IB/core: Add generic function to extract IB speed from netdev") Link: https://lore.kernel.org/r/20200902124304.170912-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 3096e73797b7..307886737646 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1801,7 +1801,7 @@ int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width) dev_put(netdev); - if (!rc) { + if (!rc && lksettings.base.speed != (u32)SPEED_UNKNOWN) { netdev_speed = lksettings.base.speed; } else { netdev_speed = SPEED_1000; From ed43ffea78dcc97db3f561da834f1a49c8961e33 Mon Sep 17 00:00:00 2001 From: Hou Pu Date: Wed, 29 Jul 2020 09:03:43 -0400 Subject: [PATCH 159/648] scsi: target: iscsi: Fix hang in iscsit_access_np() when getting tpg->np_login_sem The iSCSI target login thread might get stuck with the following stack: cat /proc/`pidof iscsi_np`/stack [<0>] down_interruptible+0x42/0x50 [<0>] iscsit_access_np+0xe3/0x167 [<0>] iscsi_target_locate_portal+0x695/0x8ac [<0>] __iscsi_target_login_thread+0x855/0xb82 [<0>] iscsi_target_login_thread+0x2f/0x5a [<0>] kthread+0xfa/0x130 [<0>] ret_from_fork+0x1f/0x30 This can be reproduced via the following steps: 1. Initiator A tries to log in to iqn1-tpg1 on port 3260. After finishing PDU exchange in the login thread and before the negotiation is finished the the network link goes down. At this point A has not finished login and tpg->np_login_sem is held. 2. Initiator B tries to log in to iqn2-tpg1 on port 3260. After finishing PDU exchange in the login thread the target expects to process remaining login PDUs in workqueue context. 3. Initiator A' tries to log in to iqn1-tpg1 on port 3260 from a new socket. A' will wait for tpg->np_login_sem with np->np_login_timer loaded to wait for at most 15 seconds. The lock is held by A so A' eventually times out. 4. Before A' got timeout initiator B gets negotiation failed and calls iscsi_target_login_drop()->iscsi_target_login_sess_out(). The np->np_login_timer is canceled and initiator A' will hang forever. Because A' is now in the login thread, no new login requests can be serviced. Fix this by moving iscsi_stop_login_thread_timer() out of iscsi_target_login_sess_out(). Also remove iscsi_np parameter from iscsi_target_login_sess_out(). Link: https://lore.kernel.org/r/20200729130343.24976-1-houpu@bytedance.com Cc: stable@vger.kernel.org Reviewed-by: Mike Christie Signed-off-by: Hou Pu Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/iscsi_target_login.c | 6 +++--- drivers/target/iscsi/iscsi_target_login.h | 3 +-- drivers/target/iscsi/iscsi_target_nego.c | 3 +-- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 85748e338858..893d1b406c29 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -1149,7 +1149,7 @@ void iscsit_free_conn(struct iscsi_conn *conn) } void iscsi_target_login_sess_out(struct iscsi_conn *conn, - struct iscsi_np *np, bool zero_tsih, bool new_sess) + bool zero_tsih, bool new_sess) { if (!new_sess) goto old_sess_out; @@ -1167,7 +1167,6 @@ void iscsi_target_login_sess_out(struct iscsi_conn *conn, conn->sess = NULL; old_sess_out: - iscsi_stop_login_thread_timer(np); /* * If login negotiation fails check if the Time2Retain timer * needs to be restarted. @@ -1407,8 +1406,9 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) new_sess_out: new_sess = true; old_sess_out: + iscsi_stop_login_thread_timer(np); tpg_np = conn->tpg_np; - iscsi_target_login_sess_out(conn, np, zero_tsih, new_sess); + iscsi_target_login_sess_out(conn, zero_tsih, new_sess); new_sess = false; if (tpg) { diff --git a/drivers/target/iscsi/iscsi_target_login.h b/drivers/target/iscsi/iscsi_target_login.h index 3b8e3639ff5d..fc95e6150253 100644 --- a/drivers/target/iscsi/iscsi_target_login.h +++ b/drivers/target/iscsi/iscsi_target_login.h @@ -22,8 +22,7 @@ extern int iscsit_put_login_tx(struct iscsi_conn *, struct iscsi_login *, u32); extern void iscsit_free_conn(struct iscsi_conn *); extern int iscsit_start_kthreads(struct iscsi_conn *); extern void iscsi_post_login_handler(struct iscsi_np *, struct iscsi_conn *, u8); -extern void iscsi_target_login_sess_out(struct iscsi_conn *, struct iscsi_np *, - bool, bool); +extern void iscsi_target_login_sess_out(struct iscsi_conn *, bool, bool); extern int iscsi_target_login_thread(void *); extern void iscsi_handle_login_thread_timeout(struct timer_list *t); diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index f88a52fec889..8b40f10976ff 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -535,12 +535,11 @@ static bool iscsi_target_sk_check_and_clear(struct iscsi_conn *conn, unsigned in static void iscsi_target_login_drop(struct iscsi_conn *conn, struct iscsi_login *login) { - struct iscsi_np *np = login->np; bool zero_tsih = login->zero_tsih; iscsi_remove_failed_auth_entry(conn); iscsi_target_nego_release(conn); - iscsi_target_login_sess_out(conn, np, zero_tsih, true); + iscsi_target_login_sess_out(conn, zero_tsih, true); } struct conn_timeout { From d2af39141eea34ef651961e885f49d96781a1016 Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Thu, 27 Aug 2020 18:53:32 +0200 Subject: [PATCH 160/648] scsi: megaraid_sas: Don't call disable_irq from process IRQ poll disable_irq() might sleep. Replace it with disable_irq_nosync() which is sufficient as irq_poll_scheduled protects against concurrently running complete_cmd_fusion() from megasas_irqpoll() and megasas_isr_fusion(). Link: https://lore.kernel.org/r/20200827165332.8432-1-thenzl@redhat.com Fixes: a6ffd5bf681 scsi: megaraid_sas: Call disable_irq from process IRQ poll Signed-off-by: Tomas Henzl Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_fusion.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 0824410f78f8..0e143020a9fb 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -3689,7 +3689,7 @@ int megasas_irqpoll(struct irq_poll *irqpoll, int budget) instance = irq_ctx->instance; if (irq_ctx->irq_line_enable) { - disable_irq(irq_ctx->os_irq); + disable_irq_nosync(irq_ctx->os_irq); irq_ctx->irq_line_enable = false; } From b614d55b970d08bcac5b0bc15a5526181b3e4459 Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Tue, 1 Sep 2020 16:50:26 +0200 Subject: [PATCH 161/648] scsi: mpt3sas: Don't call disable_irq from IRQ poll handler disable_irq() might sleep, replace it with disable_irq_nosync(). For synchronisation 'irq_poll_scheduled' is sufficient Fixes: 320e77acb3 scsi: mpt3sas: Irq poll to avoid CPU hard lockups Link: https://lore.kernel.org/r/20200901145026.12174-1-thenzl@redhat.com Signed-off-by: Tomas Henzl Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 1d64524cd863..5850569a8396 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -1733,7 +1733,7 @@ _base_irqpoll(struct irq_poll *irqpoll, int budget) reply_q = container_of(irqpoll, struct adapter_reply_queue, irqpoll); if (reply_q->irq_line_enable) { - disable_irq(reply_q->os_irq); + disable_irq_nosync(reply_q->os_irq); reply_q->irq_line_enable = false; } num_entries = _base_process_reply_queue(reply_q); From 4c62285439f80f8996c38e0bda79b1125a192365 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 2 Sep 2020 22:14:39 +1000 Subject: [PATCH 162/648] Revert "powerpc/build: vdso linker warning for orphan sections" This reverts commit f2af201002a8bc22500c04cc474ea480bf361351. It added a usage of cc-ldoption, but cc-ldoption was removed in commit 055efab3120b ("kbuild: drop support for cc-ldoption"). Reported-by: Nick Desaulniers Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/vdso32/Makefile | 2 +- arch/powerpc/kernel/vdso32/vdso32.lds.S | 1 - arch/powerpc/kernel/vdso64/Makefile | 2 +- arch/powerpc/kernel/vdso64/vdso64.lds.S | 3 +-- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index 87ab1152d5ce..e147bbdc12cd 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -50,7 +50,7 @@ $(obj-vdso32): %.o: %.S FORCE # actual build commands quiet_cmd_vdso32ld = VDSO32L $@ - cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ $(call cc-ldoption, -Wl$(comma)--orphan-handling=warn) -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) + cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $< diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index 4c985467a668..5206c2eb2a1d 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -111,7 +111,6 @@ SECTIONS *(.note.GNU-stack) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) - *(.glink .iplt .plt .rela*) } } diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index 38c317f25141..32ebb3522ea1 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -34,7 +34,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE # actual build commands quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) $(call cc-ldoption, -Wl$(comma)--orphan-handling=warn) + cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) # install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 4e3a8d4ee614..256fb9720298 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -30,7 +30,7 @@ SECTIONS . = ALIGN(16); .text : { *(.text .stub .text.* .gnu.linkonce.t.* __ftr_alt_*) - *(.sfpr) + *(.sfpr .glink) } :text PROVIDE(__etext = .); PROVIDE(_etext = .); @@ -111,7 +111,6 @@ SECTIONS *(.branch_lt) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) - *(.glink .iplt .plt .rela*) } } From b1910c6b9983817160e04d4e87b2dc1413c5361a Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Wed, 29 Jul 2020 10:50:12 -0700 Subject: [PATCH 163/648] interconnect: Show bandwidth for disabled paths as zero in debugfs For disabled paths the 'interconnect_summary' in debugfs currently shows the orginally requested bandwidths. This is confusing, since the bandwidth requests aren't active. Instead show the bandwidths for disabled paths/requests as zero. Signed-off-by: Matthias Kaehlcke Reviewed-by: Evan Green Link: https://lore.kernel.org/r/20200729104933.1.If8e80e4c0c7ddf99056f6e726e59505ed4e127f3@changeid Signed-off-by: Georgi Djakov --- drivers/interconnect/core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index befd111049c0..cf07491b7415 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -55,12 +55,18 @@ static int icc_summary_show(struct seq_file *s, void *data) icc_summary_show_one(s, n); hlist_for_each_entry(r, &n->req_list, req_node) { + u32 avg_bw = 0, peak_bw = 0; + if (!r->dev) continue; + if (r->enabled) { + avg_bw = r->avg_bw; + peak_bw = r->peak_bw; + } + seq_printf(s, " %-27s %12u %12u %12u\n", - dev_name(r->dev), r->tag, r->avg_bw, - r->peak_bw); + dev_name(r->dev), r->tag, avg_bw, peak_bw); } } } From 3fbbf2148a406b3e350fe91e6fdd78eb42ecad24 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Wed, 2 Sep 2020 13:26:50 -0700 Subject: [PATCH 164/648] soundwire: fix double free of dangling pointer clang static analysis flags this problem stream.c:844:9: warning: Use of memory after it is freed kfree(bus->defer_msg.msg->buf); ^~~~~~~~~~~~~~~~~~~~~~~ This happens in an error handler cleaning up memory allocated for elements in a list. list_for_each_entry(m_rt, &stream->master_list, stream_node) { bus = m_rt->bus; kfree(bus->defer_msg.msg->buf); kfree(bus->defer_msg.msg); } And is triggered when the call to sdw_bank_switch() fails. There are a two problems. First, when sdw_bank_switch() fails, though it frees memory it does not clear bus's reference 'defer_msg.msg' to that memory. The second problem is the freeing msg->buf. In some cases msg will be NULL so this will dereference a null pointer. Need to check before freeing. Fixes: 99b8a5d608a6 ("soundwire: Add bank switch routine") Signed-off-by: Tom Rix Reviewed-by: Nick Desaulniers Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200902202650.14189-1-trix@redhat.com Signed-off-by: Vinod Koul --- drivers/soundwire/stream.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c index 37290a799023..6e36deb505b1 100644 --- a/drivers/soundwire/stream.c +++ b/drivers/soundwire/stream.c @@ -717,6 +717,7 @@ static int sdw_bank_switch(struct sdw_bus *bus, int m_rt_count) kfree(wbuf); error_1: kfree(wr_msg); + bus->defer_msg.msg = NULL; return ret; } @@ -840,9 +841,10 @@ static int do_bank_switch(struct sdw_stream_runtime *stream) error: list_for_each_entry(m_rt, &stream->master_list, stream_node) { bus = m_rt->bus; - - kfree(bus->defer_msg.msg->buf); - kfree(bus->defer_msg.msg); + if (bus->defer_msg.msg) { + kfree(bus->defer_msg.msg->buf); + kfree(bus->defer_msg.msg); + } } msg_unlock: From aef0148f3606117352053c015cb33734e9ee7397 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Wed, 2 Sep 2020 22:30:56 -0400 Subject: [PATCH 165/648] x86/cmdline: Disable jump tables for cmdline.c When CONFIG_RETPOLINE is disabled, Clang uses a jump table for the switch statement in cmdline_find_option (jump tables are disabled when CONFIG_RETPOLINE is enabled). This function is called very early in boot from sme_enable() if CONFIG_AMD_MEM_ENCRYPT is enabled. At this time, the kernel is still executing out of the identity mapping, but the jump table will contain virtual addresses. Fix this by disabling jump tables for cmdline.c when AMD_MEM_ENCRYPT is enabled. Signed-off-by: Arvind Sankar Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200903023056.3914690-1-nivedita@alum.mit.edu --- arch/x86/lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index d46fff11f06f..aa067859a70b 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -24,7 +24,7 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_cmdline.o = -pg endif -CFLAGS_cmdline.o := -fno-stack-protector +CFLAGS_cmdline.o := -fno-stack-protector -fno-jump-tables endif inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk From 23870f1227680d2aacff6f79c3ab2222bd04e86e Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Wed, 2 Sep 2020 18:03:23 +0200 Subject: [PATCH 166/648] locking/lockdep: Fix "USED" <- "IN-NMI" inversions During the LPC RCU BoF Paul asked how come the "USED" <- "IN-NMI" detector doesn't trip over rcu_read_lock()'s lockdep annotation. Looking into this I found a very embarrasing typo in verify_lock_unused(): - if (!(class->usage_mask & LOCK_USED)) + if (!(class->usage_mask & LOCKF_USED)) fixing that will indeed cause rcu_read_lock() to insta-splat :/ The above typo means that instead of testing for: 0x100 (1 << LOCK_USED), we test for 8 (LOCK_USED), which corresponds to (1 << LOCK_ENABLED_HARDIRQ). So instead of testing for _any_ used lock, it will only match any lock used with interrupts enabled. The rcu_read_lock() annotation uses .check=0, which means it will not set any of the interrupt bits and will thus never match. In order to properly fix the situation and allow rcu_read_lock() to correctly work, split LOCK_USED into LOCK_USED and LOCK_USED_READ and by having .read users set USED_READ and test USED, pure read-recursive locks are permitted. Fixes: f6f48e180404 ("lockdep: Teach lockdep about "USED" <- "IN-NMI" inversions") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Tested-by: Masami Hiramatsu Acked-by: Paul E. McKenney Link: https://lore.kernel.org/r/20200902160323.GK1362448@hirez.programming.kicks-ass.net --- kernel/locking/lockdep.c | 35 +++++++++++++++++++++++++----- kernel/locking/lockdep_internals.h | 2 ++ 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 54b74fabf40c..2facbbd146ec 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3969,13 +3969,18 @@ static int separate_irq_context(struct task_struct *curr, static int mark_lock(struct task_struct *curr, struct held_lock *this, enum lock_usage_bit new_bit) { - unsigned int new_mask = 1 << new_bit, ret = 1; + unsigned int old_mask, new_mask, ret = 1; if (new_bit >= LOCK_USAGE_STATES) { DEBUG_LOCKS_WARN_ON(1); return 0; } + if (new_bit == LOCK_USED && this->read) + new_bit = LOCK_USED_READ; + + new_mask = 1 << new_bit; + /* * If already set then do not dirty the cacheline, * nor do any checks: @@ -3988,13 +3993,22 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, /* * Make sure we didn't race: */ - if (unlikely(hlock_class(this)->usage_mask & new_mask)) { - graph_unlock(); - return 1; - } + if (unlikely(hlock_class(this)->usage_mask & new_mask)) + goto unlock; + old_mask = hlock_class(this)->usage_mask; hlock_class(this)->usage_mask |= new_mask; + /* + * Save one usage_traces[] entry and map both LOCK_USED and + * LOCK_USED_READ onto the same entry. + */ + if (new_bit == LOCK_USED || new_bit == LOCK_USED_READ) { + if (old_mask & (LOCKF_USED | LOCKF_USED_READ)) + goto unlock; + new_bit = LOCK_USED; + } + if (!(hlock_class(this)->usage_traces[new_bit] = save_trace())) return 0; @@ -4008,6 +4022,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, return 0; } +unlock: graph_unlock(); /* @@ -4942,12 +4957,20 @@ static void verify_lock_unused(struct lockdep_map *lock, struct held_lock *hlock { #ifdef CONFIG_PROVE_LOCKING struct lock_class *class = look_up_lock_class(lock, subclass); + unsigned long mask = LOCKF_USED; /* if it doesn't have a class (yet), it certainly hasn't been used yet */ if (!class) return; - if (!(class->usage_mask & LOCK_USED)) + /* + * READ locks only conflict with USED, such that if we only ever use + * READ locks, there is no deadlock possible -- RCU. + */ + if (!hlock->read) + mask |= LOCKF_USED_READ; + + if (!(class->usage_mask & mask)) return; hlock->class_idx = class - lock_classes; diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index baca699b94e9..b0be1560ed17 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h @@ -19,6 +19,7 @@ enum lock_usage_bit { #include "lockdep_states.h" #undef LOCKDEP_STATE LOCK_USED, + LOCK_USED_READ, LOCK_USAGE_STATES }; @@ -40,6 +41,7 @@ enum { #include "lockdep_states.h" #undef LOCKDEP_STATE __LOCKF(USED) + __LOCKF(USED_READ) }; #define LOCKDEP_STATE(__STATE) LOCKF_ENABLED_##__STATE | From 4819e15f740ec884a50bdc431d7f1e7638b6f7d9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 2 Sep 2020 17:59:04 +0200 Subject: [PATCH 167/648] x86/mm/32: Bring back vmalloc faulting on x86_32 One can not simply remove vmalloc faulting on x86-32. Upstream commit: 7f0a002b5a21 ("x86/mm: remove vmalloc faulting") removed it on x86 alltogether because previously the arch_sync_kernel_mappings() interface was introduced. This interface added synchronization of vmalloc/ioremap page-table updates to all page-tables in the system at creation time and was thought to make vmalloc faulting obsolete. But that assumption was incredibly naive. It turned out that there is a race window between the time the vmalloc or ioremap code establishes a mapping and the time it synchronizes this change to other page-tables in the system. During this race window another CPU or thread can establish a vmalloc mapping which uses the same intermediate page-table entries (e.g. PMD or PUD) and does no synchronization in the end, because it found all necessary mappings already present in the kernel reference page-table. But when these intermediate page-table entries are not yet synchronized, the other CPU or thread will continue with a vmalloc address that is not yet mapped in the page-table it currently uses, causing an unhandled page fault and oops like below: BUG: unable to handle page fault for address: fe80c000 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page *pde = 33183067 *pte = a8648163 Oops: 0002 [#1] SMP CPU: 1 PID: 13514 Comm: cve-2017-17053 Tainted: G ... Call Trace: ldt_dup_context+0x66/0x80 dup_mm+0x2b3/0x480 copy_process+0x133b/0x15c0 _do_fork+0x94/0x3e0 __ia32_sys_clone+0x67/0x80 __do_fast_syscall_32+0x3f/0x70 do_fast_syscall_32+0x29/0x60 do_SYSENTER_32+0x15/0x20 entry_SYSENTER_32+0x9f/0xf2 EIP: 0xb7eef549 So the arch_sync_kernel_mappings() interface is racy, but removing it would mean to re-introduce the vmalloc_sync_all() interface, which is even more awful. Keep arch_sync_kernel_mappings() in place and catch the race condition in the page-fault handler instead. Do a partial revert of above commit to get vmalloc faulting on x86-32 back in place. Fixes: 7f0a002b5a21 ("x86/mm: remove vmalloc faulting") Reported-by: Naresh Kamboju Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200902155904.17544-1-joro@8bytes.org --- arch/x86/mm/fault.c | 78 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 35f1498e9832..6e3e8a124903 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -190,6 +190,53 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) return pmd_k; } +/* + * Handle a fault on the vmalloc or module mapping area + * + * This is needed because there is a race condition between the time + * when the vmalloc mapping code updates the PMD to the point in time + * where it synchronizes this update with the other page-tables in the + * system. + * + * In this race window another thread/CPU can map an area on the same + * PMD, finds it already present and does not synchronize it with the + * rest of the system yet. As a result v[mz]alloc might return areas + * which are not mapped in every page-table in the system, causing an + * unhandled page-fault when they are accessed. + */ +static noinline int vmalloc_fault(unsigned long address) +{ + unsigned long pgd_paddr; + pmd_t *pmd_k; + pte_t *pte_k; + + /* Make sure we are in vmalloc area: */ + if (!(address >= VMALLOC_START && address < VMALLOC_END)) + return -1; + + /* + * Synchronize this task's top level page-table + * with the 'reference' page table. + * + * Do _not_ use "current" here. We might be inside + * an interrupt in the middle of a task switch.. + */ + pgd_paddr = read_cr3_pa(); + pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); + if (!pmd_k) + return -1; + + if (pmd_large(*pmd_k)) + return 0; + + pte_k = pte_offset_kernel(pmd_k, address); + if (!pte_present(*pte_k)) + return -1; + + return 0; +} +NOKPROBE_SYMBOL(vmalloc_fault); + void arch_sync_kernel_mappings(unsigned long start, unsigned long end) { unsigned long addr; @@ -1110,6 +1157,37 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code, */ WARN_ON_ONCE(hw_error_code & X86_PF_PK); +#ifdef CONFIG_X86_32 + /* + * We can fault-in kernel-space virtual memory on-demand. The + * 'reference' page table is init_mm.pgd. + * + * NOTE! We MUST NOT take any locks for this case. We may + * be in an interrupt or a critical region, and should + * only copy the information from the master page table, + * nothing more. + * + * Before doing this on-demand faulting, ensure that the + * fault is not any of the following: + * 1. A fault on a PTE with a reserved bit set. + * 2. A fault caused by a user-mode access. (Do not demand- + * fault kernel memory due to user-mode accesses). + * 3. A fault caused by a page-level protection violation. + * (A demand fault would be on a non-present page which + * would have X86_PF_PROT==0). + * + * This is only needed to close a race condition on x86-32 in + * the vmalloc mapping/unmapping code. See the comment above + * vmalloc_fault() for details. On x86-64 the race does not + * exist as the vmalloc mappings don't need to be synchronized + * there. + */ + if (!(hw_error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) { + if (vmalloc_fault(address) >= 0) + return; + } +#endif + /* Was the fault spurious, caused by lazy TLB invalidation? */ if (spurious_kernel_fault(hw_error_code, address)) return; From 318af7b80b6a6751520cf2b71edb8c45abb9d9d8 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 17 Jul 2020 13:29:48 -0500 Subject: [PATCH 168/648] Revert "kbuild: use -flive-patching when CONFIG_LIVEPATCH is enabled" Use of the new -flive-patching flag was introduced with the following commit: 43bd3a95c98e ("kbuild: use -flive-patching when CONFIG_LIVEPATCH is enabled") This flag has several drawbacks: - It disables some optimizations, so it can have a negative effect on performance. - According to the GCC documentation it's not compatible with LTO, which will become a compatibility issue as LTO support gets upstreamed in the kernel. - It was intended to be used for source-based patch generation tooling, as opposed to binary-based patch generation tooling (e.g., kpatch-build). It probably should have at least been behind a separate config option so as not to negatively affect other livepatch users. - Clang doesn't have the flag, so as far as I can tell, this method of generating patches is incompatible with Clang, which like LTO is becoming more mainstream. - It breaks GCC's implicit noreturn detection for local functions. This is the cause of several "unreachable instruction" objtool warnings. - The broken noreturn detection is an obvious GCC regression, but we haven't yet gotten GCC developers to acknowledge that, which doesn't inspire confidence in their willingness to keep the feature working as optimizations are added or changed going forward. - While there *is* a distro which relies on this flag for their distro livepatch module builds, there's not a publicly documented way to create safe livepatch modules with it. Its use seems to be based on tribal knowledge. It serves no benefit to those who don't know how to use it. (In fact, I believe the current livepatch documentation and samples are misleading and dangerous, and should be corrected. Or at least amended with a disclaimer. But I don't feel qualified to make such changes.) Also, we have an idea for using objtool to detect function changes, which could potentially obsolete the need for this flag anyway. At this point the flag has no benefits for upstream which would counteract the above drawbacks. Revert it until it becomes more ready. This reverts commit 43bd3a95c98e1a86b8b55d97f745c224ecff02b9. Fixes: 43bd3a95c98e ("kbuild: use -flive-patching when CONFIG_LIVEPATCH is enabled") Reported-by: Randy Dunlap Signed-off-by: Josh Poimboeuf Acked-by: Miroslav Benes Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/696262e997359666afa053fe7d1a9fb2bb373964.1595010490.git.jpoimboe@redhat.com --- Makefile | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Makefile b/Makefile index 5cfc3481207f..f1763e4a312b 100644 --- a/Makefile +++ b/Makefile @@ -887,10 +887,6 @@ KBUILD_CFLAGS_KERNEL += -ffunction-sections -fdata-sections LDFLAGS_vmlinux += --gc-sections endif -ifdef CONFIG_LIVEPATCH -KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone) -endif - ifdef CONFIG_SHADOW_CALL_STACK CC_FLAGS_SCS := -fsanitize=shadow-call-stack KBUILD_CFLAGS += $(CC_FLAGS_SCS) From 6d3ba803ce32ecee36eb462427dc82237b3a70c8 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 19 Aug 2020 19:51:33 +0200 Subject: [PATCH 169/648] dma-buf: Fix kerneldoc of dma_buf_set_name() Fix W=1 compile warnings (invalid kerneldoc): drivers/dma-buf/dma-buf.c:328: warning: Function parameter or member 'dmabuf' not described in 'dma_buf_set_name' Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20200819175134.19261-1-krzk@kernel.org --- drivers/dma-buf/dma-buf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 1699a8e309ef..58564d82a3a2 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -316,9 +316,9 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll) * name of the dma-buf if the same piece of memory is used for multiple * purpose between different devices. * - * @dmabuf [in] dmabuf buffer that will be renamed. - * @buf: [in] A piece of userspace memory that contains the name of - * the dma-buf. + * @dmabuf: [in] dmabuf buffer that will be renamed. + * @buf: [in] A piece of userspace memory that contains the name of + * the dma-buf. * * Returns 0 on success. If the dma-buf buffer is already attached to * devices, return -EBUSY. From 8d0441cf9b6c7bb2ddfa10739d1f5cb5dad4cd55 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 19 Aug 2020 19:51:34 +0200 Subject: [PATCH 170/648] dma-buf: fence-chain: Document missing dma_fence_chain_init() parameter in kerneldoc Fix W=1 compile warnings (invalid kerneldoc): drivers/dma-buf/dma-fence-chain.c:233: warning: Function parameter or member 'seqno' not described in 'dma_fence_chain_init' Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20200819175134.19261-2-krzk@kernel.org --- drivers/dma-buf/dma-fence-chain.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c index 3d123502ff12..7d129e68ac70 100644 --- a/drivers/dma-buf/dma-fence-chain.c +++ b/drivers/dma-buf/dma-fence-chain.c @@ -222,6 +222,7 @@ EXPORT_SYMBOL(dma_fence_chain_ops); * @chain: the chain node to initialize * @prev: the previous fence * @fence: the current fence + * @seqno: the sequence number to use for the fence chain * * Initialize a new chain node and either start a new chain or add the node to * the existing chain of the previous fence. From 21e9ba5373fc2cec608fd68301a1dbfd14df3172 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 2 Sep 2020 14:12:46 +0530 Subject: [PATCH 171/648] libbpf: Remove arch-specific include path in Makefile Ubuntu mainline builds for ppc64le are failing with the below error (*): CALL /home/kernel/COD/linux/scripts/atomic/check-atomics.sh DESCEND bpf/resolve_btfids Auto-detecting system features: ... libelf: [ [32mon[m ] ... zlib: [ [32mon[m ] ... bpf: [ [31mOFF[m ] BPF API too old make[6]: *** [Makefile:295: bpfdep] Error 1 make[5]: *** [Makefile:54: /home/kernel/COD/linux/debian/build/build-generic/tools/bpf/resolve_btfids//libbpf.a] Error 2 make[4]: *** [Makefile:71: bpf/resolve_btfids] Error 2 make[3]: *** [/home/kernel/COD/linux/Makefile:1890: tools/bpf/resolve_btfids] Error 2 make[2]: *** [/home/kernel/COD/linux/Makefile:335: __build_one_by_one] Error 2 make[2]: Leaving directory '/home/kernel/COD/linux/debian/build/build-generic' make[1]: *** [Makefile:185: __sub-make] Error 2 make[1]: Leaving directory '/home/kernel/COD/linux' resolve_btfids needs to be build as a host binary and it needs libbpf. However, libbpf Makefile hardcodes an include path utilizing $(ARCH). This results in mixing of cross-architecture headers resulting in a build failure. The specific header include path doesn't seem necessary for a libbpf build. Hence, remove the same. (*) https://kernel.ubuntu.com/~kernel-ppa/mainline/v5.9-rc3/ppc64el/log Reported-by: Vaidyanathan Srinivasan Signed-off-by: Naveen N. Rao Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200902084246.1513055-1-naveen.n.rao@linux.vnet.ibm.com --- tools/lib/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index bf8ed134cb8a..b78484e7a608 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -59,7 +59,7 @@ FEATURE_USER = .libbpf FEATURE_TESTS = libelf libelf-mmap zlib bpf reallocarray FEATURE_DISPLAY = libelf zlib bpf -INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi +INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES) check_feat := 1 From d0c20d38af135b2b4b90aa59df7878ef0c8fbef4 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 2 Sep 2020 10:47:02 -0700 Subject: [PATCH 172/648] xfs: fix xfs_bmap_validate_extent_raw when checking attr fork of rt files The realtime flag only applies to the data fork, so don't use the realtime block number checks on the attr fork of a realtime file. Fixes: 30b0984d9117 ("xfs: refactor bmap record validation") Signed-off-by: Darrick J. Wong Reviewed-by: Eric Sandeen --- fs/xfs/libxfs/xfs_bmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 9c40d5971035..1b0a01b06a05 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -6226,7 +6226,7 @@ xfs_bmap_validate_extent( isrt = XFS_IS_REALTIME_INODE(ip); endfsb = irec->br_startblock + irec->br_blockcount - 1; - if (isrt) { + if (isrt && whichfork == XFS_DATA_FORK) { if (!xfs_verify_rtbno(mp, irec->br_startblock)) return __this_address; if (!xfs_verify_rtbno(mp, endfsb)) From 6180bb446ab624b9ab8bf201ed251ca87f07b413 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Fri, 4 Sep 2020 00:16:25 +0800 Subject: [PATCH 173/648] dax: fix detection of dax support for non-persistent memory block devices When calling __generic_fsdax_supported(), a dax-unsupported device may not have dax_dev as NULL, e.g. the dax related code block is not enabled by Kconfig. Therefore in __generic_fsdax_supported(), to check whether a device supports DAX or not, the following order of operations should be performed: - If dax_dev pointer is NULL, it means the device driver explicitly announce it doesn't support DAX. Then it is OK to directly return false from __generic_fsdax_supported(). - If dax_dev pointer is NOT NULL, it might be because the driver doesn't support DAX and not explicitly initialize related data structure. Then bdev_dax_supported() should be called for further check. If device driver desn't explicitly set its dax_dev pointer to NULL, this is not a bug. Calling bdev_dax_supported() makes sure they can be recognized as dax-unsupported eventually. Fixes: c2affe920b0e ("dax: do not print error message for non-persistent memory block device") Cc: Jan Kara Cc: Vishal Verma Reviewed-and-tested-by: Adrian Huang Reviewed-by: Ira Weiny Reviewed-by: Mike Snitzer Reviewed-by: Pankaj Gupta Signed-off-by: Coly Li Signed-off-by: Vishal Verma Link: https://lore.kernel.org/r/20200903161625.19524-1-colyli@suse.de --- drivers/dax/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 32642634c1bb..e5767c83ea23 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -100,7 +100,7 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev, return false; } - if (!dax_dev && !bdev_dax_supported(bdev, blocksize)) { + if (!dax_dev || !bdev_dax_supported(bdev, blocksize)) { pr_debug("%s: error: dax unsupported by block device\n", bdevname(bdev, buf)); return false; From f71800228dc74711c3df43854ce7089562a3bc2d Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 20 Aug 2020 22:31:44 +0200 Subject: [PATCH 174/648] drm/tve200: Stabilize enable/disable The TVE200 will occasionally print a bunch of lost interrupts and similar dmesg messages, sometimes during boot and sometimes after disabling and coming back to enablement. This is probably because the hardware is left in an unknown state by the boot loader that displays a logo. This can be fixed by bringing the controller into a known state by resetting the controller while enabling it. We retry reset 5 times like the vendor driver does. We also put the controller into reset before de-clocking it and clear all interrupts before enabling the vblank IRQ. This makes the video enable/disable/enable cycle rock solid on the D-Link DIR-685. Tested extensively. Signed-off-by: Linus Walleij Acked-by: Daniel Vetter Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20200820203144.271081-1-linus.walleij@linaro.org --- drivers/gpu/drm/tve200/tve200_display.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tve200/tve200_display.c b/drivers/gpu/drm/tve200/tve200_display.c index d733bbc4ac0e..17ff24d999d1 100644 --- a/drivers/gpu/drm/tve200/tve200_display.c +++ b/drivers/gpu/drm/tve200/tve200_display.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -130,9 +131,25 @@ static void tve200_display_enable(struct drm_simple_display_pipe *pipe, struct drm_connector *connector = priv->connector; u32 format = fb->format->format; u32 ctrl1 = 0; + int retries; clk_prepare_enable(priv->clk); + /* Reset the TVE200 and wait for it to come back online */ + writel(TVE200_CTRL_4_RESET, priv->regs + TVE200_CTRL_4); + for (retries = 0; retries < 5; retries++) { + usleep_range(30000, 50000); + if (readl(priv->regs + TVE200_CTRL_4) & TVE200_CTRL_4_RESET) + continue; + else + break; + } + if (retries == 5 && + readl(priv->regs + TVE200_CTRL_4) & TVE200_CTRL_4_RESET) { + dev_err(drm->dev, "can't get hardware out of reset\n"); + return; + } + /* Function 1 */ ctrl1 |= TVE200_CTRL_CSMODE; /* Interlace mode for CCIR656: parameterize? */ @@ -230,8 +247,9 @@ static void tve200_display_disable(struct drm_simple_display_pipe *pipe) drm_crtc_vblank_off(crtc); - /* Disable and Power Down */ + /* Disable put into reset and Power Down */ writel(0, priv->regs + TVE200_CTRL); + writel(TVE200_CTRL_4_RESET, priv->regs + TVE200_CTRL_4); clk_disable_unprepare(priv->clk); } @@ -279,6 +297,8 @@ static int tve200_display_enable_vblank(struct drm_simple_display_pipe *pipe) struct drm_device *drm = crtc->dev; struct tve200_drm_dev_private *priv = drm->dev_private; + /* Clear any IRQs and enable */ + writel(0xFF, priv->regs + TVE200_INT_CLR); writel(TVE200_INT_V_STATUS, priv->regs + TVE200_INT_EN); return 0; } From 91e045b93db79a2ef66e045ad0d1f8f9d348e1f4 Mon Sep 17 00:00:00 2001 From: Mike Tipton Date: Thu, 3 Sep 2020 12:21:44 -0700 Subject: [PATCH 175/648] interconnect: qcom: Fix small BW votes being truncated to zero Small BW votes that translate to less than a single BCM unit are currently truncated to zero. Ensure that non-zero BW requests always result in at least a vote of 1 to BCM. Fixes: 976daac4a1c5 ("interconnect: qcom: Consolidate interconnect RPMh support") Signed-off-by: Mike Tipton Link: https://lore.kernel.org/r/20200903192149.30385-2-mdtipton@codeaurora.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/bcm-voter.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/interconnect/qcom/bcm-voter.c b/drivers/interconnect/qcom/bcm-voter.c index a3d2ef1d9903..609db9c95fd7 100644 --- a/drivers/interconnect/qcom/bcm-voter.c +++ b/drivers/interconnect/qcom/bcm-voter.c @@ -52,8 +52,20 @@ static int cmp_vcd(void *priv, struct list_head *a, struct list_head *b) return 1; } +static u64 bcm_div(u64 num, u32 base) +{ + /* Ensure that small votes aren't lost. */ + if (num && num < base) + return 1; + + do_div(num, base); + + return num; +} + static void bcm_aggregate(struct qcom_icc_bcm *bcm) { + struct qcom_icc_node *node; size_t i, bucket; u64 agg_avg[QCOM_ICC_NUM_BUCKETS] = {0}; u64 agg_peak[QCOM_ICC_NUM_BUCKETS] = {0}; @@ -61,22 +73,21 @@ static void bcm_aggregate(struct qcom_icc_bcm *bcm) for (bucket = 0; bucket < QCOM_ICC_NUM_BUCKETS; bucket++) { for (i = 0; i < bcm->num_nodes; i++) { - temp = bcm->nodes[i]->sum_avg[bucket] * bcm->aux_data.width; - do_div(temp, bcm->nodes[i]->buswidth * bcm->nodes[i]->channels); + node = bcm->nodes[i]; + temp = bcm_div(node->sum_avg[bucket] * bcm->aux_data.width, + node->buswidth * node->channels); agg_avg[bucket] = max(agg_avg[bucket], temp); - temp = bcm->nodes[i]->max_peak[bucket] * bcm->aux_data.width; - do_div(temp, bcm->nodes[i]->buswidth); + temp = bcm_div(node->max_peak[bucket] * bcm->aux_data.width, + node->buswidth); agg_peak[bucket] = max(agg_peak[bucket], temp); } temp = agg_avg[bucket] * 1000ULL; - do_div(temp, bcm->aux_data.unit); - bcm->vote_x[bucket] = temp; + bcm->vote_x[bucket] = bcm_div(temp, bcm->aux_data.unit); temp = agg_peak[bucket] * 1000ULL; - do_div(temp, bcm->aux_data.unit); - bcm->vote_y[bucket] = temp; + bcm->vote_y[bucket] = bcm_div(temp, bcm->aux_data.unit); } if (bcm->keepalive && bcm->vote_x[QCOM_ICC_BUCKET_AMC] == 0 && From dc0988bbe1bd41e2fa555e4a6f890b819a34b49b Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 2 Sep 2020 16:53:40 -0700 Subject: [PATCH 176/648] bpf: Do not use bucket_lock for hashmap iterator Currently, for hashmap, the bpf iterator will grab a bucket lock, a spinlock, before traversing the elements in the bucket. This can ensure all bpf visted elements are valid. But this mechanism may cause deadlock if update/deletion happens to the same bucket of the visited map in the program. For example, if we added bpf_map_update_elem() call to the same visited element in selftests bpf_iter_bpf_hash_map.c, we will have the following deadlock: ============================================ WARNING: possible recursive locking detected 5.9.0-rc1+ #841 Not tainted -------------------------------------------- test_progs/1750 is trying to acquire lock: ffff9a5bb73c5e70 (&htab->buckets[i].raw_lock){....}-{2:2}, at: htab_map_update_elem+0x1cf/0x410 but task is already holding lock: ffff9a5bb73c5e20 (&htab->buckets[i].raw_lock){....}-{2:2}, at: bpf_hash_map_seq_find_next+0x94/0x120 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&htab->buckets[i].raw_lock); lock(&htab->buckets[i].raw_lock); *** DEADLOCK *** ... Call Trace: dump_stack+0x78/0xa0 __lock_acquire.cold.74+0x209/0x2e3 lock_acquire+0xba/0x380 ? htab_map_update_elem+0x1cf/0x410 ? __lock_acquire+0x639/0x20c0 _raw_spin_lock_irqsave+0x3b/0x80 ? htab_map_update_elem+0x1cf/0x410 htab_map_update_elem+0x1cf/0x410 ? lock_acquire+0xba/0x380 bpf_prog_ad6dab10433b135d_dump_bpf_hash_map+0x88/0xa9c ? find_held_lock+0x34/0xa0 bpf_iter_run_prog+0x81/0x16e __bpf_hash_map_seq_show+0x145/0x180 bpf_seq_read+0xff/0x3d0 vfs_read+0xad/0x1c0 ksys_read+0x5f/0xe0 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ... The bucket_lock first grabbed in seq_ops->next() called by bpf_seq_read(), and then grabbed again in htab_map_update_elem() in the bpf program, causing deadlocks. Actually, we do not need bucket_lock here, we can just use rcu_read_lock() similar to netlink iterator where the rcu_read_{lock,unlock} likes below: seq_ops->start(): rcu_read_lock(); seq_ops->next(): rcu_read_unlock(); /* next element */ rcu_read_lock(); seq_ops->stop(); rcu_read_unlock(); Compared to old bucket_lock mechanism, if concurrent updata/delete happens, we may visit stale elements, miss some elements, or repeat some elements. I think this is a reasonable compromise. For users wanting to avoid stale, missing/repeated accesses, bpf_map batch access syscall interface can be used. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200902235340.2001375-1-yhs@fb.com --- kernel/bpf/hashtab.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 78dfff6a501b..7df28a45c66b 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1622,7 +1622,6 @@ struct bpf_iter_seq_hash_map_info { struct bpf_map *map; struct bpf_htab *htab; void *percpu_value_buf; // non-zero means percpu hash - unsigned long flags; u32 bucket_id; u32 skip_elems; }; @@ -1632,7 +1631,6 @@ bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info, struct htab_elem *prev_elem) { const struct bpf_htab *htab = info->htab; - unsigned long flags = info->flags; u32 skip_elems = info->skip_elems; u32 bucket_id = info->bucket_id; struct hlist_nulls_head *head; @@ -1656,19 +1654,18 @@ bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info, /* not found, unlock and go to the next bucket */ b = &htab->buckets[bucket_id++]; - htab_unlock_bucket(htab, b, flags); + rcu_read_unlock(); skip_elems = 0; } for (i = bucket_id; i < htab->n_buckets; i++) { b = &htab->buckets[i]; - flags = htab_lock_bucket(htab, b); + rcu_read_lock(); count = 0; head = &b->head; hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) { if (count >= skip_elems) { - info->flags = flags; info->bucket_id = i; info->skip_elems = count; return elem; @@ -1676,7 +1673,7 @@ bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info, count++; } - htab_unlock_bucket(htab, b, flags); + rcu_read_unlock(); skip_elems = 0; } @@ -1754,14 +1751,10 @@ static int bpf_hash_map_seq_show(struct seq_file *seq, void *v) static void bpf_hash_map_seq_stop(struct seq_file *seq, void *v) { - struct bpf_iter_seq_hash_map_info *info = seq->private; - if (!v) (void)__bpf_hash_map_seq_show(seq, NULL); else - htab_unlock_bucket(info->htab, - &info->htab->buckets[info->bucket_id], - info->flags); + rcu_read_unlock(); } static int bpf_iter_init_hash_map(void *priv_data, From 4daab7132731ac5ec9384c8a070cdb9607dc38c8 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 2 Sep 2020 16:53:41 -0700 Subject: [PATCH 177/648] selftests/bpf: Add bpf_{update, delete}_map_elem in hashmap iter program Added bpf_{updata,delete}_map_elem to the very map element the iter program is visiting. Due to rcu protection, the visited map elements, although stale, should still contain correct values. $ ./test_progs -n 4/18 #4/18 bpf_hash_map:OK #4 bpf_iter:OK Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200902235341.2001534-1-yhs@fb.com --- .../selftests/bpf/progs/bpf_iter_bpf_hash_map.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c index 07ddbfdbcab7..6dfce3fd68bc 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c @@ -47,7 +47,10 @@ int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx) __u32 seq_num = ctx->meta->seq_num; struct bpf_map *map = ctx->map; struct key_t *key = ctx->key; + struct key_t tmp_key; __u64 *val = ctx->value; + __u64 tmp_val = 0; + int ret; if (in_test_mode) { /* test mode is used by selftests to @@ -61,6 +64,18 @@ int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx) if (key == (void *)0 || val == (void *)0) return 0; + /* update the value and then delete the pair. + * it should not impact the existing 'val' which is still + * accessible under rcu. + */ + __builtin_memcpy(&tmp_key, key, sizeof(struct key_t)); + ret = bpf_map_update_elem(&hashmap1, &tmp_key, &tmp_val, 0); + if (ret) + return 0; + ret = bpf_map_delete_elem(&hashmap1, &tmp_key); + if (ret) + return 0; + key_sum_a += key->a; key_sum_b += key->b; key_sum_c += key->c; From ccae0f36d500aef727f98acd8d0601e6b262a513 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 4 Sep 2020 14:10:47 +0800 Subject: [PATCH 178/648] x86, fakenuma: Fix invalid starting node ID Commit: cc9aec03e58f ("x86/numa_emulation: Introduce uniform split capability") uses "-1" as the starting node ID, which causes the strange kernel log as follows, when "numa=fake=32G" is added to the kernel command line: Faking node -1 at [mem 0x0000000000000000-0x0000000893ffffff] (35136MB) Faking node 0 at [mem 0x0000001840000000-0x000000203fffffff] (32768MB) Faking node 1 at [mem 0x0000000894000000-0x000000183fffffff] (64192MB) Faking node 2 at [mem 0x0000002040000000-0x000000283fffffff] (32768MB) Faking node 3 at [mem 0x0000002840000000-0x000000303fffffff] (32768MB) And finally the kernel crashes: BUG: Bad page state in process swapper pfn:00011 page:(____ptrval____) refcount:0 mapcount:1 mapping:(____ptrval____) index:0x55cd7e44b270 pfn:0x11 failed to read mapping contents, not a valid kernel address? flags: 0x5(locked|uptodate) raw: 0000000000000005 000055cd7e44af30 000055cd7e44af50 0000000100000006 raw: 000055cd7e44b270 000055cd7e44b290 0000000000000000 000055cd7e44b510 page dumped because: page still charged to cgroup page->mem_cgroup:000055cd7e44b510 Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 5.9.0-rc2 #1 Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0008.031920191559 03/19/2019 Call Trace: dump_stack+0x57/0x80 bad_page.cold+0x63/0x94 __free_pages_ok+0x33f/0x360 memblock_free_all+0x127/0x195 mem_init+0x23/0x1f5 start_kernel+0x219/0x4f5 secondary_startup_64+0xb6/0xc0 Fix this bug via using 0 as the starting node ID. This restores the original behavior before cc9aec03e58f. [ mingo: Massaged the changelog. ] Fixes: cc9aec03e58f ("x86/numa_emulation: Introduce uniform split capability") Signed-off-by: "Huang, Ying" Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200904061047.612950-1-ying.huang@intel.com --- arch/x86/mm/numa_emulation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index c5174b4e318b..683cd12f4793 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -321,7 +321,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei, u64 addr, u64 max_addr, u64 size) { return split_nodes_size_interleave_uniform(ei, pi, addr, max_addr, size, - 0, NULL, NUMA_NO_NODE); + 0, NULL, 0); } static int __init setup_emu2phys_nid(int *dfl_phys_nid) From 1b0df11fde0f14a269a181b3b7f5122415bc5ed7 Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Wed, 2 Sep 2020 13:07:56 -0400 Subject: [PATCH 179/648] padata: fix possible padata_works_lock deadlock syzbot reports, WARNING: inconsistent lock state 5.9.0-rc2-syzkaller #0 Not tainted -------------------------------- inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage. syz-executor.0/26715 takes: (padata_works_lock){+.?.}-{2:2}, at: padata_do_parallel kernel/padata.c:220 {IN-SOFTIRQ-W} state was registered at: spin_lock include/linux/spinlock.h:354 [inline] padata_do_parallel kernel/padata.c:220 ... __do_softirq kernel/softirq.c:298 ... sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1091 asm_sysvec_apic_timer_interrupt arch/x86/include/asm/idtentry.h:581 Possible unsafe locking scenario: CPU0 ---- lock(padata_works_lock); lock(padata_works_lock); padata_do_parallel() takes padata_works_lock with softirqs enabled, so a deadlock is possible if, on the same CPU, the lock is acquired in process context and then softirq handling done in an interrupt leads to the same path. Fix by leaving softirqs disabled while do_parallel holds padata_works_lock. Reported-by: syzbot+f4b9f49e38e25eb4ef52@syzkaller.appspotmail.com Fixes: 4611ce2246889 ("padata: allocate work structures for parallel jobs from a pool") Signed-off-by: Daniel Jordan Cc: Herbert Xu Cc: Steffen Klassert Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Herbert Xu --- kernel/padata.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/padata.c b/kernel/padata.c index 16cb894dc272..d4d3ba6e1728 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -215,12 +215,13 @@ int padata_do_parallel(struct padata_shell *ps, padata->pd = pd; padata->cb_cpu = *cb_cpu; - rcu_read_unlock_bh(); - spin_lock(&padata_works_lock); padata->seq_nr = ++pd->seq_nr; pw = padata_work_alloc(); spin_unlock(&padata_works_lock); + + rcu_read_unlock_bh(); + if (pw) { padata_work_init(pw, padata_parallel_worker, padata, 0); queue_work(pinst->parallel_wq, &pw->pw_work); From aecb2016c90a1b620e21c9e143afbdc9666cce52 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Tue, 1 Sep 2020 10:33:24 +0200 Subject: [PATCH 180/648] xen/balloon: add header guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to protect against the header being included multiple times on the same compilation unit. Signed-off-by: Roger Pau Monné Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20200901083326.21264-2-roger.pau@citrix.com Signed-off-by: Juergen Gross --- include/xen/balloon.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/xen/balloon.h b/include/xen/balloon.h index 6fb95aa19405..6dbdb0b3fd03 100644 --- a/include/xen/balloon.h +++ b/include/xen/balloon.h @@ -2,6 +2,8 @@ /****************************************************************************** * Xen balloon functionality */ +#ifndef _XEN_BALLOON_H +#define _XEN_BALLOON_H #define RETRY_UNLIMITED 0 @@ -34,3 +36,5 @@ static inline void xen_balloon_init(void) { } #endif + +#endif /* _XEN_BALLOON_H */ From 4533d3aed857c558d6aabd00d0cb04100c5a2258 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Tue, 1 Sep 2020 10:33:25 +0200 Subject: [PATCH 181/648] memremap: rename MEMORY_DEVICE_DEVDAX to MEMORY_DEVICE_GENERIC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is in preparation for the logic behind MEMORY_DEVICE_DEVDAX also being used by non DAX devices. No functional change intended. Signed-off-by: Roger Pau Monné Reviewed-by: Ira Weiny Acked-by: Andrew Morton Reviewed-by: Pankaj Gupta Link: https://lore.kernel.org/r/20200901083326.21264-3-roger.pau@citrix.com Signed-off-by: Juergen Gross --- drivers/dax/device.c | 2 +- include/linux/memremap.h | 9 ++++----- mm/memremap.c | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 4c0af2eb7e19..1e89513f3c59 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -429,7 +429,7 @@ int dev_dax_probe(struct device *dev) return -EBUSY; } - dev_dax->pgmap.type = MEMORY_DEVICE_DEVDAX; + dev_dax->pgmap.type = MEMORY_DEVICE_GENERIC; addr = devm_memremap_pages(dev, &dev_dax->pgmap); if (IS_ERR(addr)) return PTR_ERR(addr); diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 5f5b2df06e61..e5862746751b 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -46,11 +46,10 @@ struct vmem_altmap { * wakeup is used to coordinate physical address space management (ex: * fs truncate/hole punch) vs pinned pages (ex: device dma). * - * MEMORY_DEVICE_DEVDAX: + * MEMORY_DEVICE_GENERIC: * Host memory that has similar access semantics as System RAM i.e. DMA - * coherent and supports page pinning. In contrast to - * MEMORY_DEVICE_FS_DAX, this memory is access via a device-dax - * character device. + * coherent and supports page pinning. This is for example used by DAX devices + * that expose memory using a character device. * * MEMORY_DEVICE_PCI_P2PDMA: * Device memory residing in a PCI BAR intended for use with Peer-to-Peer @@ -60,7 +59,7 @@ enum memory_type { /* 0 is reserved to catch uninitialized type fields */ MEMORY_DEVICE_PRIVATE = 1, MEMORY_DEVICE_FS_DAX, - MEMORY_DEVICE_DEVDAX, + MEMORY_DEVICE_GENERIC, MEMORY_DEVICE_PCI_P2PDMA, }; diff --git a/mm/memremap.c b/mm/memremap.c index 03e38b7a38f1..006dace60b1a 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -216,7 +216,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) return ERR_PTR(-EINVAL); } break; - case MEMORY_DEVICE_DEVDAX: + case MEMORY_DEVICE_GENERIC: need_devmap_managed = false; break; case MEMORY_DEVICE_PCI_P2PDMA: From 9e2369c06c8a181478039258a4598c1ddd2cadfa Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Tue, 1 Sep 2020 10:33:26 +0200 Subject: [PATCH 182/648] xen: add helpers to allocate unpopulated memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To be used in order to create foreign mappings. This is based on the ZONE_DEVICE facility which is used by persistent memory devices in order to create struct pages and kernel virtual mappings for the IOMEM areas of such devices. Note that on kernels without support for ZONE_DEVICE Xen will fallback to use ballooned pages in order to create foreign mappings. The newly added helpers use the same parameters as the existing {alloc/free}_xenballooned_pages functions, which allows for in-place replacement of the callers. Once a memory region has been added to be used as scratch mapping space it will no longer be released, and pages returned are kept in a linked list. This allows to have a buffer of pages and prevents resorting to frequent additions and removals of regions. If enabled (because ZONE_DEVICE is supported) the usage of the new functionality untangles Xen balloon and RAM hotplug from the usage of unpopulated physical memory ranges to map foreign pages, which is the correct thing to do in order to avoid mappings of foreign pages depend on memory hotplug. Note the driver is currently not enabled on Arm platforms because it would interfere with the identity mapping required on some platforms. Signed-off-by: Roger Pau Monné Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20200901083326.21264-4-roger.pau@citrix.com Signed-off-by: Juergen Gross --- drivers/gpu/drm/xen/xen_drm_front_gem.c | 9 +- drivers/xen/Kconfig | 10 ++ drivers/xen/Makefile | 1 + drivers/xen/balloon.c | 4 +- drivers/xen/grant-table.c | 4 +- drivers/xen/privcmd.c | 4 +- drivers/xen/unpopulated-alloc.c | 183 ++++++++++++++++++++++++ drivers/xen/xenbus/xenbus_client.c | 6 +- drivers/xen/xlate_mmu.c | 4 +- include/xen/xen.h | 9 ++ 10 files changed, 219 insertions(+), 15 deletions(-) create mode 100644 drivers/xen/unpopulated-alloc.c diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c b/drivers/gpu/drm/xen/xen_drm_front_gem.c index 39ff95b75357..534daf37c97e 100644 --- a/drivers/gpu/drm/xen/xen_drm_front_gem.c +++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c @@ -18,6 +18,7 @@ #include #include +#include #include "xen_drm_front.h" #include "xen_drm_front_gem.h" @@ -99,8 +100,8 @@ static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) * allocate ballooned pages which will be used to map * grant references provided by the backend */ - ret = alloc_xenballooned_pages(xen_obj->num_pages, - xen_obj->pages); + ret = xen_alloc_unpopulated_pages(xen_obj->num_pages, + xen_obj->pages); if (ret < 0) { DRM_ERROR("Cannot allocate %zu ballooned pages: %d\n", xen_obj->num_pages, ret); @@ -152,8 +153,8 @@ void xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj) } else { if (xen_obj->pages) { if (xen_obj->be_alloc) { - free_xenballooned_pages(xen_obj->num_pages, - xen_obj->pages); + xen_free_unpopulated_pages(xen_obj->num_pages, + xen_obj->pages); gem_free_pages_array(xen_obj); } else { drm_gem_put_pages(&xen_obj->base, diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 46e7fd099904..0ab54df82520 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -324,4 +324,14 @@ config XEN_HAVE_VPMU config XEN_FRONT_PGDIR_SHBUF tristate +config XEN_UNPOPULATED_ALLOC + bool "Use unpopulated memory ranges for guest mappings" + depends on X86 && ZONE_DEVICE + default XEN_BACKEND || XEN_GNTDEV || XEN_DOM0 + help + Use unpopulated memory ranges in order to create mappings for guest + memory regions, including grant maps and foreign pages. This avoids + having to balloon out RAM regions in order to obtain physical memory + space to create such mappings. + endmenu diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 0d322f3d90cd..3cca2be28824 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -42,3 +42,4 @@ xen-gntdev-$(CONFIG_XEN_GNTDEV_DMABUF) += gntdev-dmabuf.o xen-gntalloc-y := gntalloc.o xen-privcmd-y := privcmd.o privcmd-buf.o obj-$(CONFIG_XEN_FRONT_PGDIR_SHBUF) += xen-front-pgdir-shbuf.o +obj-$(CONFIG_XEN_UNPOPULATED_ALLOC) += unpopulated-alloc.o diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index b1d8b028bf80..4bfbe71705e4 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -654,7 +654,7 @@ void free_xenballooned_pages(int nr_pages, struct page **pages) } EXPORT_SYMBOL(free_xenballooned_pages); -#ifdef CONFIG_XEN_PV +#if defined(CONFIG_XEN_PV) && !defined(CONFIG_XEN_UNPOPULATED_ALLOC) static void __init balloon_add_region(unsigned long start_pfn, unsigned long pages) { @@ -708,7 +708,7 @@ static int __init balloon_init(void) register_sysctl_table(xen_root); #endif -#ifdef CONFIG_XEN_PV +#if defined(CONFIG_XEN_PV) && !defined(CONFIG_XEN_UNPOPULATED_ALLOC) { int i; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 8d06bf1cc347..523dcdf39cc9 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -801,7 +801,7 @@ int gnttab_alloc_pages(int nr_pages, struct page **pages) { int ret; - ret = alloc_xenballooned_pages(nr_pages, pages); + ret = xen_alloc_unpopulated_pages(nr_pages, pages); if (ret < 0) return ret; @@ -836,7 +836,7 @@ EXPORT_SYMBOL_GPL(gnttab_pages_clear_private); void gnttab_free_pages(int nr_pages, struct page **pages) { gnttab_pages_clear_private(nr_pages, pages); - free_xenballooned_pages(nr_pages, pages); + xen_free_unpopulated_pages(nr_pages, pages); } EXPORT_SYMBOL_GPL(gnttab_free_pages); diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 095d683ad574..8bcb0ce223a5 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -425,7 +425,7 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) if (pages == NULL) return -ENOMEM; - rc = alloc_xenballooned_pages(numpgs, pages); + rc = xen_alloc_unpopulated_pages(numpgs, pages); if (rc != 0) { pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__, numpgs, rc); @@ -896,7 +896,7 @@ static void privcmd_close(struct vm_area_struct *vma) rc = xen_unmap_domain_gfn_range(vma, numgfns, pages); if (rc == 0) - free_xenballooned_pages(numpgs, pages); + xen_free_unpopulated_pages(numpgs, pages); else pr_crit("unable to unmap MFN range: leaking %d pages. rc=%d\n", numpgs, rc); diff --git a/drivers/xen/unpopulated-alloc.c b/drivers/xen/unpopulated-alloc.c new file mode 100644 index 000000000000..3b98dc921426 --- /dev/null +++ b/drivers/xen/unpopulated-alloc.c @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +static DEFINE_MUTEX(list_lock); +static LIST_HEAD(page_list); +static unsigned int list_count; + +static int fill_list(unsigned int nr_pages) +{ + struct dev_pagemap *pgmap; + void *vaddr; + unsigned int i, alloc_pages = round_up(nr_pages, PAGES_PER_SECTION); + int ret; + + pgmap = kzalloc(sizeof(*pgmap), GFP_KERNEL); + if (!pgmap) + return -ENOMEM; + + pgmap->type = MEMORY_DEVICE_GENERIC; + pgmap->res.name = "Xen scratch"; + pgmap->res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; + + ret = allocate_resource(&iomem_resource, &pgmap->res, + alloc_pages * PAGE_SIZE, 0, -1, + PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); + if (ret < 0) { + pr_err("Cannot allocate new IOMEM resource\n"); + kfree(pgmap); + return ret; + } + +#ifdef CONFIG_XEN_HAVE_PVMMU + /* + * memremap will build page tables for the new memory so + * the p2m must contain invalid entries so the correct + * non-present PTEs will be written. + * + * If a failure occurs, the original (identity) p2m entries + * are not restored since this region is now known not to + * conflict with any devices. + */ + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + xen_pfn_t pfn = PFN_DOWN(pgmap->res.start); + + for (i = 0; i < alloc_pages; i++) { + if (!set_phys_to_machine(pfn + i, INVALID_P2M_ENTRY)) { + pr_warn("set_phys_to_machine() failed, no memory added\n"); + release_resource(&pgmap->res); + kfree(pgmap); + return -ENOMEM; + } + } + } +#endif + + vaddr = memremap_pages(pgmap, NUMA_NO_NODE); + if (IS_ERR(vaddr)) { + pr_err("Cannot remap memory range\n"); + release_resource(&pgmap->res); + kfree(pgmap); + return PTR_ERR(vaddr); + } + + for (i = 0; i < alloc_pages; i++) { + struct page *pg = virt_to_page(vaddr + PAGE_SIZE * i); + + BUG_ON(!virt_addr_valid(vaddr + PAGE_SIZE * i)); + list_add(&pg->lru, &page_list); + list_count++; + } + + return 0; +} + +/** + * xen_alloc_unpopulated_pages - alloc unpopulated pages + * @nr_pages: Number of pages + * @pages: pages returned + * @return 0 on success, error otherwise + */ +int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages) +{ + unsigned int i; + int ret = 0; + + mutex_lock(&list_lock); + if (list_count < nr_pages) { + ret = fill_list(nr_pages - list_count); + if (ret) + goto out; + } + + for (i = 0; i < nr_pages; i++) { + struct page *pg = list_first_entry_or_null(&page_list, + struct page, + lru); + + BUG_ON(!pg); + list_del(&pg->lru); + list_count--; + pages[i] = pg; + +#ifdef CONFIG_XEN_HAVE_PVMMU + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + ret = xen_alloc_p2m_entry(page_to_pfn(pg)); + if (ret < 0) { + unsigned int j; + + for (j = 0; j <= i; j++) { + list_add(&pages[j]->lru, &page_list); + list_count++; + } + goto out; + } + } +#endif + } + +out: + mutex_unlock(&list_lock); + return ret; +} +EXPORT_SYMBOL(xen_alloc_unpopulated_pages); + +/** + * xen_free_unpopulated_pages - return unpopulated pages + * @nr_pages: Number of pages + * @pages: pages to return + */ +void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages) +{ + unsigned int i; + + mutex_lock(&list_lock); + for (i = 0; i < nr_pages; i++) { + list_add(&pages[i]->lru, &page_list); + list_count++; + } + mutex_unlock(&list_lock); +} +EXPORT_SYMBOL(xen_free_unpopulated_pages); + +#ifdef CONFIG_XEN_PV +static int __init init(void) +{ + unsigned int i; + + if (!xen_domain()) + return -ENODEV; + + if (!xen_pv_domain()) + return 0; + + /* + * Initialize with pages from the extra memory regions (see + * arch/x86/xen/setup.c). + */ + for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { + unsigned int j; + + for (j = 0; j < xen_extra_mem[i].n_pfns; j++) { + struct page *pg = + pfn_to_page(xen_extra_mem[i].start_pfn + j); + + list_add(&pg->lru, &page_list); + list_count++; + } + } + + return 0; +} +subsys_initcall(init); +#endif diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 907bcbb93afb..2690318ad50f 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -621,7 +621,7 @@ static int xenbus_map_ring_hvm(struct xenbus_device *dev, bool leaked = false; unsigned int nr_pages = XENBUS_PAGES(nr_grefs); - err = alloc_xenballooned_pages(nr_pages, node->hvm.pages); + err = xen_alloc_unpopulated_pages(nr_pages, node->hvm.pages); if (err) goto out_err; @@ -662,7 +662,7 @@ static int xenbus_map_ring_hvm(struct xenbus_device *dev, addr, nr_pages); out_free_ballooned_pages: if (!leaked) - free_xenballooned_pages(nr_pages, node->hvm.pages); + xen_free_unpopulated_pages(nr_pages, node->hvm.pages); out_err: return err; } @@ -858,7 +858,7 @@ static int xenbus_unmap_ring_hvm(struct xenbus_device *dev, void *vaddr) info.addrs); if (!rv) { vunmap(vaddr); - free_xenballooned_pages(nr_pages, node->hvm.pages); + xen_free_unpopulated_pages(nr_pages, node->hvm.pages); } else WARN(1, "Leaking %p, size %u page(s)\n", vaddr, nr_pages); diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c index 7b1077f0abcb..34742c6e189e 100644 --- a/drivers/xen/xlate_mmu.c +++ b/drivers/xen/xlate_mmu.c @@ -232,7 +232,7 @@ int __init xen_xlate_map_ballooned_pages(xen_pfn_t **gfns, void **virt, kfree(pages); return -ENOMEM; } - rc = alloc_xenballooned_pages(nr_pages, pages); + rc = xen_alloc_unpopulated_pages(nr_pages, pages); if (rc) { pr_warn("%s Couldn't balloon alloc %ld pages rc:%d\n", __func__, nr_pages, rc); @@ -249,7 +249,7 @@ int __init xen_xlate_map_ballooned_pages(xen_pfn_t **gfns, void **virt, if (!vaddr) { pr_warn("%s Couldn't map %ld pages rc:%d\n", __func__, nr_pages, rc); - free_xenballooned_pages(nr_pages, pages); + xen_free_unpopulated_pages(nr_pages, pages); kfree(pages); kfree(pfns); return -ENOMEM; diff --git a/include/xen/xen.h b/include/xen/xen.h index 19a72f591e2b..43efba045acc 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -52,4 +52,13 @@ bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, extern u64 xen_saved_max_mem_size; #endif +#ifdef CONFIG_XEN_UNPOPULATED_ALLOC +int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages); +void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages); +#else +#define xen_alloc_unpopulated_pages alloc_xenballooned_pages +#define xen_free_unpopulated_pages free_xenballooned_pages +#include +#endif + #endif /* _XEN_XEN_H */ From 0ee9f600e69d901d31469359287b90bbe8e54553 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Wed, 2 Sep 2020 00:03:05 +0200 Subject: [PATCH 183/648] drm/sun4i: Fix DE2 YVU handling Function sun8i_vi_layer_get_csc_mode() is supposed to return CSC mode but due to inproper return type (bool instead of u32) it returns just 0 or 1. Colors are wrong for YVU formats because of that. Fixes: daab3d0e8e2b ("drm/sun4i: de2: csc_mode in de2 format struct is mostly redundant") Reported-by: Roman Stratiienko Signed-off-by: Jernej Skrabec Tested-by: Roman Stratiienko Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20200901220305.6809-1-jernej.skrabec@siol.net --- drivers/gpu/drm/sun4i/sun8i_vi_layer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun8i_vi_layer.c b/drivers/gpu/drm/sun4i/sun8i_vi_layer.c index 22c8c5375d0d..c0147af6a840 100644 --- a/drivers/gpu/drm/sun4i/sun8i_vi_layer.c +++ b/drivers/gpu/drm/sun4i/sun8i_vi_layer.c @@ -211,7 +211,7 @@ static int sun8i_vi_layer_update_coord(struct sun8i_mixer *mixer, int channel, return 0; } -static bool sun8i_vi_layer_get_csc_mode(const struct drm_format_info *format) +static u32 sun8i_vi_layer_get_csc_mode(const struct drm_format_info *format) { if (!format->is_yuv) return SUN8I_CSC_MODE_OFF; From e359c70462d2a82aae80274d027351d38792dde6 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 28 Jul 2020 15:48:09 +0200 Subject: [PATCH 184/648] drm/sun4i: backend: Support alpha property on lowest plane Unlike what we previously thought, only the per-pixel alpha is broken on the lowest plane and the per-plane alpha isn't. Remove the check on the alpha property being set on the lowest plane to reject a mode. Fixes: dcf496a6a608 ("drm/sun4i: sun4i: Introduce a quirk for lowest plane alpha support") Signed-off-by: Maxime Ripard Reviewed-by: Chen-Yu Tsai Cc: Paul Kocialkowski Link: https://patchwork.freedesktop.org/patch/msgid/20200728134810.883457-1-maxime@cerno.tech --- drivers/gpu/drm/sun4i/sun4i_backend.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.c b/drivers/gpu/drm/sun4i/sun4i_backend.c index 072ea113e6be..30672c4c634d 100644 --- a/drivers/gpu/drm/sun4i/sun4i_backend.c +++ b/drivers/gpu/drm/sun4i/sun4i_backend.c @@ -589,8 +589,7 @@ static int sun4i_backend_atomic_check(struct sunxi_engine *engine, /* We can't have an alpha plane at the lowest position */ if (!backend->quirks->supports_lowest_plane_alpha && - (plane_states[0]->fb->format->has_alpha || - (plane_states[0]->alpha != DRM_BLEND_ALPHA_OPAQUE))) + (plane_states[0]->alpha != DRM_BLEND_ALPHA_OPAQUE)) return -EINVAL; for (i = 1; i < num_planes; i++) { From 5e2e2600a3744491a8b49b92597c13b693692082 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 28 Jul 2020 15:48:10 +0200 Subject: [PATCH 185/648] drm/sun4i: backend: Disable alpha on the lowest plane on the A20 Unlike we previously thought, the per-pixel alpha is just as broken on the A20 as it is on the A10. Remove the quirk that says we can use it. Fixes: dcf496a6a608 ("drm/sun4i: sun4i: Introduce a quirk for lowest plane alpha support") Signed-off-by: Maxime Ripard Reviewed-by: Chen-Yu Tsai Cc: Paul Kocialkowski Link: https://patchwork.freedesktop.org/patch/msgid/20200728134810.883457-2-maxime@cerno.tech --- drivers/gpu/drm/sun4i/sun4i_backend.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.c b/drivers/gpu/drm/sun4i/sun4i_backend.c index 30672c4c634d..ed5d86617802 100644 --- a/drivers/gpu/drm/sun4i/sun4i_backend.c +++ b/drivers/gpu/drm/sun4i/sun4i_backend.c @@ -994,7 +994,6 @@ static const struct sun4i_backend_quirks sun6i_backend_quirks = { static const struct sun4i_backend_quirks sun7i_backend_quirks = { .needs_output_muxing = true, - .supports_lowest_plane_alpha = true, }; static const struct sun4i_backend_quirks sun8i_a33_backend_quirks = { From 365d2a23663711c32e778c9c18b07163f9193925 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Tue, 25 Aug 2020 07:38:28 +0200 Subject: [PATCH 186/648] MAINTAINERS: Update QUALCOMM IOMMU after Arm SMMU drivers move Commit e86d1aa8b60f ("iommu/arm-smmu: Move Arm SMMU drivers into their own subdirectory") moved drivers/iommu/qcom_iommu.c to drivers/iommu/arm/arm-smmu/qcom_iommu.c amongst other moves, adjusted some sections in MAINTAINERS, but missed adjusting the QUALCOMM IOMMU section. Hence, ./scripts/get_maintainer.pl --self-test=patterns complains: warning: no file matches F: drivers/iommu/qcom_iommu.c Update the file entry in MAINTAINERS to the new location. Signed-off-by: Lukas Bulwahn Acked-by: Will Deacon Link: https://lore.kernel.org/r/20200825053828.4166-1-lukas.bulwahn@gmail.com Signed-off-by: Joerg Roedel --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index e4647c84c987..0079d067b74c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14366,7 +14366,7 @@ M: Rob Clark L: iommu@lists.linux-foundation.org L: linux-arm-msm@vger.kernel.org S: Maintained -F: drivers/iommu/qcom_iommu.c +F: drivers/iommu/arm/arm-smmu/qcom_iommu.c QUALCOMM IPCC MAILBOX DRIVER M: Manivannan Sadhasivam From 6e4e9ec65078093165463c13d4eb92b3e8d7b2e8 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 28 Aug 2020 08:06:15 +0800 Subject: [PATCH 187/648] iommu/vt-d: Serialize IOMMU GCMD register modifications The VT-d spec requires (10.4.4 Global Command Register, GCMD_REG General Description) that: If multiple control fields in this register need to be modified, software must serialize the modifications through multiple writes to this register. However, in irq_remapping.c, modifications of IRE and CFI are done in one write. We need to do two separate writes with STS checking after each. It also checks the status register before writing command register to avoid unnecessary register write. Fixes: af8d102f999a4 ("x86/intel/irq_remapping: Clean up x2apic opt-out security warning mess") Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Cc: Andy Lutomirski Cc: Jacob Pan Cc: Kevin Tian Cc: Ashok Raj Link: https://lore.kernel.org/r/20200828000615.8281-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/irq_remapping.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 23583b0e66a5..8f4ce72570ce 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -508,12 +508,18 @@ static void iommu_enable_irq_remapping(struct intel_iommu *iommu) /* Enable interrupt-remapping */ iommu->gcmd |= DMA_GCMD_IRE; - iommu->gcmd &= ~DMA_GCMD_CFI; /* Block compatibility-format MSIs */ writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); - IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_IRES), sts); + /* Block compatibility-format MSIs */ + if (sts & DMA_GSTS_CFIS) { + iommu->gcmd &= ~DMA_GCMD_CFI; + writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, + readl, !(sts & DMA_GSTS_CFIS), sts); + } + /* * With CFI clear in the Global Command register, we should be * protected from dangerous (i.e. compatibility) interrupts From 2d33b7d631d9dc81c78bb71368645cf7f0e68cb1 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 3 Sep 2020 14:51:32 +0800 Subject: [PATCH 188/648] iommu/vt-d: Fix NULL pointer dereference in dev_iommu_priv_set() The dev_iommu_priv_set() must be called after probe_device(). This fixes a NULL pointer deference bug when booting a system with kernel cmdline "intel_iommu=on,igfx_off", where the dev_iommu_priv_set() is abused. The following stacktrace was produced: Command line: BOOT_IMAGE=/isolinux/bzImage console=tty1 intel_iommu=on,igfx_off ... DMAR: Host address width 39 DMAR: DRHD base: 0x000000fed90000 flags: 0x0 DMAR: dmar0: reg_base_addr fed90000 ver 1:0 cap 1c0000c40660462 ecap 19e2ff0505e DMAR: DRHD base: 0x000000fed91000 flags: 0x1 DMAR: dmar1: reg_base_addr fed91000 ver 1:0 cap d2008c40660462 ecap f050da DMAR: RMRR base: 0x0000009aa9f000 end: 0x0000009aabefff DMAR: RMRR base: 0x0000009d000000 end: 0x0000009f7fffff DMAR: No ATSR found BUG: kernel NULL pointer dereference, address: 0000000000000038 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 0 P4D 0 Oops: 0002 [#1] SMP PTI CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.9.0-devel+ #2 Hardware name: LENOVO 20HGS0TW00/20HGS0TW00, BIOS N1WET46S (1.25s ) 03/30/2018 RIP: 0010:intel_iommu_init+0xed0/0x1136 Code: fe e9 61 02 00 00 bb f4 ff ff ff e9 57 02 00 00 48 63 d1 48 c1 e2 04 48 03 50 20 48 8b 12 48 85 d2 74 0b 48 8b 92 d0 02 00 00 48 89 7a 38 ff c1 e9 15 f5 ff ff 48 c7 c7 60 99 ac a7 49 c7 c7 a0 RSP: 0000:ffff96d180073dd0 EFLAGS: 00010282 RAX: ffff8c91037a7d20 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffffffffff RBP: ffff96d180073e90 R08: 0000000000000001 R09: ffff8c91039fe3c0 R10: 0000000000000226 R11: 0000000000000226 R12: 000000000000000b R13: ffff8c910367c650 R14: ffffffffa8426d60 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8c9107480000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000038 CR3: 00000004b100a001 CR4: 00000000003706e0 Call Trace: ? _raw_spin_unlock_irqrestore+0x1f/0x30 ? call_rcu+0x10e/0x320 ? trace_hardirqs_on+0x2c/0xd0 ? rdinit_setup+0x2c/0x2c ? e820__memblock_setup+0x8b/0x8b pci_iommu_init+0x16/0x3f do_one_initcall+0x46/0x1e4 kernel_init_freeable+0x169/0x1b2 ? rest_init+0x9f/0x9f kernel_init+0xa/0x101 ret_from_fork+0x22/0x30 Modules linked in: CR2: 0000000000000038 ---[ end trace 3653722a6f936f18 ]--- Fixes: 01b9d4e21148c ("iommu/vt-d: Use dev_iommu_priv_get/set()") Reported-by: Torsten Hilbrich Reported-by: Wendy Wang Signed-off-by: Lu Baolu Tested-by: Torsten Hilbrich Link: https://lore.kernel.org/linux-iommu/96717683-70be-7388-3d2f-61131070a96a@secunet.com/ Link: https://lore.kernel.org/r/20200903065132.16879-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 100 ++++++++++++++++++++---------------- 1 file changed, 55 insertions(+), 45 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index f8177c59d229..acd537294c92 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -364,7 +364,6 @@ static int iommu_skip_te_disable; int intel_iommu_gfx_mapped; EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); -#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) #define DEFER_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-2)) struct device_domain_info *get_domain_info(struct device *dev) { @@ -374,8 +373,7 @@ struct device_domain_info *get_domain_info(struct device *dev) return NULL; info = dev_iommu_priv_get(dev); - if (unlikely(info == DUMMY_DEVICE_DOMAIN_INFO || - info == DEFER_DEVICE_DOMAIN_INFO)) + if (unlikely(info == DEFER_DEVICE_DOMAIN_INFO)) return NULL; return info; @@ -742,11 +740,6 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, return &context[devfn]; } -static int iommu_dummy(struct device *dev) -{ - return dev_iommu_priv_get(dev) == DUMMY_DEVICE_DOMAIN_INFO; -} - static bool attach_deferred(struct device *dev) { return dev_iommu_priv_get(dev) == DEFER_DEVICE_DOMAIN_INFO; @@ -779,6 +772,53 @@ is_downstream_to_pci_bridge(struct device *dev, struct device *bridge) return false; } +static bool quirk_ioat_snb_local_iommu(struct pci_dev *pdev) +{ + struct dmar_drhd_unit *drhd; + u32 vtbar; + int rc; + + /* We know that this device on this chipset has its own IOMMU. + * If we find it under a different IOMMU, then the BIOS is lying + * to us. Hope that the IOMMU for this device is actually + * disabled, and it needs no translation... + */ + rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar); + if (rc) { + /* "can't" happen */ + dev_info(&pdev->dev, "failed to run vt-d quirk\n"); + return false; + } + vtbar &= 0xffff0000; + + /* we know that the this iommu should be at offset 0xa000 from vtbar */ + drhd = dmar_find_matched_drhd_unit(pdev); + if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) { + pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); + return true; + } + + return false; +} + +static bool iommu_is_dummy(struct intel_iommu *iommu, struct device *dev) +{ + if (!iommu || iommu->drhd->ignored) + return true; + + if (dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(dev); + + if (pdev->vendor == PCI_VENDOR_ID_INTEL && + pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SNB && + quirk_ioat_snb_local_iommu(pdev)) + return true; + } + + return false; +} + struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) { struct dmar_drhd_unit *drhd = NULL; @@ -788,7 +828,7 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) u16 segment = 0; int i; - if (!dev || iommu_dummy(dev)) + if (!dev) return NULL; if (dev_is_pci(dev)) { @@ -805,7 +845,7 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) dev = &ACPI_COMPANION(dev)->dev; rcu_read_lock(); - for_each_active_iommu(iommu, drhd) { + for_each_iommu(iommu, drhd) { if (pdev && segment != drhd->segment) continue; @@ -841,6 +881,9 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) } iommu = NULL; out: + if (iommu_is_dummy(iommu, dev)) + iommu = NULL; + rcu_read_unlock(); return iommu; @@ -2447,7 +2490,7 @@ struct dmar_domain *find_domain(struct device *dev) { struct device_domain_info *info; - if (unlikely(attach_deferred(dev) || iommu_dummy(dev))) + if (unlikely(attach_deferred(dev))) return NULL; /* No lock here, assumes no domain exit in normal case */ @@ -3989,35 +4032,6 @@ static void __init iommu_exit_mempool(void) iova_cache_put(); } -static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev) -{ - struct dmar_drhd_unit *drhd; - u32 vtbar; - int rc; - - /* We know that this device on this chipset has its own IOMMU. - * If we find it under a different IOMMU, then the BIOS is lying - * to us. Hope that the IOMMU for this device is actually - * disabled, and it needs no translation... - */ - rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar); - if (rc) { - /* "can't" happen */ - dev_info(&pdev->dev, "failed to run vt-d quirk\n"); - return; - } - vtbar &= 0xffff0000; - - /* we know that the this iommu should be at offset 0xa000 from vtbar */ - drhd = dmar_find_matched_drhd_unit(pdev); - if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) { - pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"); - add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); - dev_iommu_priv_set(&pdev->dev, DUMMY_DEVICE_DOMAIN_INFO); - } -} -DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu); - static void __init init_no_remapping_devices(void) { struct dmar_drhd_unit *drhd; @@ -4049,12 +4063,8 @@ static void __init init_no_remapping_devices(void) /* This IOMMU has *only* gfx devices. Either bypass it or set the gfx_mapped flag, as appropriate */ drhd->gfx_dedicated = 1; - if (!dmar_map_gfx) { + if (!dmar_map_gfx) drhd->ignored = 1; - for_each_active_dev_scope(drhd->devices, - drhd->devices_cnt, i, dev) - dev_iommu_priv_set(dev, DUMMY_DEVICE_DOMAIN_INFO); - } } } From 3fb884ffe921c99483a84b0175f3c03f048e9069 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 2 Sep 2020 11:18:29 +0100 Subject: [PATCH 189/648] KVM: arm64: Do not try to map PUDs when they are folded into PMD For the obscure cases where PMD and PUD are the same size (64kB pages with 42bit VA, for example, which results in only two levels of page tables), we can't map anything as a PUD, because there is... erm... no PUD to speak of. Everything is either a PMD or a PTE. So let's only try and map a PUD when its size is different from that of a PMD. Cc: stable@vger.kernel.org Fixes: b8e0ba7c8bea ("KVM: arm64: Add support for creating PUD hugepages at stage 2") Reported-by: Gavin Shan Reported-by: Eric Auger Reviewed-by: Alexandru Elisei Reviewed-by: Gavin Shan Tested-by: Gavin Shan Tested-by: Eric Auger Tested-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kvm/mmu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 0121ef2c7c8d..16b8660ddbcc 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1964,7 +1964,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, (fault_status == FSC_PERM && stage2_is_exec(mmu, fault_ipa, vma_pagesize)); - if (vma_pagesize == PUD_SIZE) { + /* + * If PUD_SIZE == PMD_SIZE, there is no real PUD level, and + * all we have is a 2-level page table. Trying to map a PUD in + * this case would be fatally wrong. + */ + if (PUD_SIZE != PMD_SIZE && vma_pagesize == PUD_SIZE) { pud_t new_pud = kvm_pfn_pud(pfn, mem_type); new_pud = kvm_pud_mkhuge(new_pud); From 26e495f341075c09023ba16dee9a7f37a021e745 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 3 Sep 2020 09:38:21 +0000 Subject: [PATCH 190/648] iommu/amd: Restore IRTE.RemapEn bit after programming IRTE Currently, the RemapEn (valid) bit is accidentally cleared when programming IRTE w/ guestMode=0. It should be restored to the prior state. Fixes: b9fc6b56f478 ("iommu/amd: Implements irq_set_vcpu_affinity() hook to setup vapic mode for pass-through devices") Signed-off-by: Suravee Suthikulpanit Reviewed-by: Joao Martins Link: https://lore.kernel.org/r/20200903093822.52012-2-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index ba9f3dbc5b94..967f4e96d1eb 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -3850,6 +3850,7 @@ int amd_iommu_deactivate_guest_mode(void *data) struct amd_ir_data *ir_data = (struct amd_ir_data *)data; struct irte_ga *entry = (struct irte_ga *) ir_data->entry; struct irq_cfg *cfg = ir_data->cfg; + u64 valid = entry->lo.fields_remap.valid; if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || !entry || !entry->lo.fields_vapic.guest_mode) @@ -3858,6 +3859,7 @@ int amd_iommu_deactivate_guest_mode(void *data) entry->lo.val = 0; entry->hi.val = 0; + entry->lo.fields_remap.valid = valid; entry->lo.fields_remap.dm = apic->irq_dest_mode; entry->lo.fields_remap.int_type = apic->irq_delivery_mode; entry->hi.fields.vector = cfg->vector; From e52d58d54a321d4fe9d0ecdabe4f8774449f0d6e Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 3 Sep 2020 09:38:22 +0000 Subject: [PATCH 191/648] iommu/amd: Use cmpxchg_double() when updating 128-bit IRTE When using 128-bit interrupt-remapping table entry (IRTE) (a.k.a GA mode), current driver disables interrupt remapping when it updates the IRTE so that the upper and lower 64-bit values can be updated safely. However, this creates a small window, where the interrupt could arrive and result in IO_PAGE_FAULT (for interrupt) as shown below. IOMMU Driver Device IRQ ============ =========== irte.RemapEn=0 ... change IRTE IRQ from device ==> IO_PAGE_FAULT !! ... irte.RemapEn=1 This scenario has been observed when changing irq affinity on a system running I/O-intensive workload, in which the destination APIC ID in the IRTE is updated. Instead, use cmpxchg_double() to update the 128-bit IRTE at once without disabling the interrupt remapping. However, this means several features, which require GA (128-bit IRTE) support will also be affected if cmpxchg16b is not supported (which is unprecedented for AMD processors w/ IOMMU). Fixes: 880ac60e2538 ("iommu/amd: Introduce interrupt remapping ops structure") Reported-by: Sean Osborne Signed-off-by: Suravee Suthikulpanit Tested-by: Erik Rockstrom Reviewed-by: Joao Martins Link: https://lore.kernel.org/r/20200903093822.52012-3-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/Kconfig | 2 +- drivers/iommu/amd/init.c | 21 +++++++++++++++++++-- drivers/iommu/amd/iommu.c | 17 +++++++++++++---- 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig index 1f061d91e0b8..626b97d0dd21 100644 --- a/drivers/iommu/amd/Kconfig +++ b/drivers/iommu/amd/Kconfig @@ -10,7 +10,7 @@ config AMD_IOMMU select IOMMU_API select IOMMU_IOVA select IOMMU_DMA - depends on X86_64 && PCI && ACPI + depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE help With this option you can enable support for AMD IOMMU hardware in your system. An IOMMU is a hardware component which provides diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index c652f16eb702..445a08d23fed 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1511,7 +1511,14 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) iommu->mmio_phys_end = MMIO_REG_END_OFFSET; else iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; - if (((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)) + + /* + * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports. + * GAM also requires GA mode. Therefore, we need to + * check cmpxchg16b support before enabling it. + */ + if (!boot_cpu_has(X86_FEATURE_CX16) || + ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)) amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; break; case 0x11: @@ -1520,8 +1527,18 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) iommu->mmio_phys_end = MMIO_REG_END_OFFSET; else iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; - if (((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) + + /* + * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports. + * XT, GAM also requires GA mode. Therefore, we need to + * check cmpxchg16b support before enabling them. + */ + if (!boot_cpu_has(X86_FEATURE_CX16) || + ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) { amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; + break; + } + /* * Note: Since iommu_update_intcapxt() leverages * the IOMMU MMIO access to MSI capability block registers diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 967f4e96d1eb..a382d7a73eaa 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -3292,6 +3292,7 @@ static int alloc_irq_index(u16 devid, int count, bool align, static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte, struct amd_ir_data *data) { + bool ret; struct irq_remap_table *table; struct amd_iommu *iommu; unsigned long flags; @@ -3309,10 +3310,18 @@ static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte, entry = (struct irte_ga *)table->table; entry = &entry[index]; - entry->lo.fields_remap.valid = 0; - entry->hi.val = irte->hi.val; - entry->lo.val = irte->lo.val; - entry->lo.fields_remap.valid = 1; + + ret = cmpxchg_double(&entry->lo.val, &entry->hi.val, + entry->lo.val, entry->hi.val, + irte->lo.val, irte->hi.val); + /* + * We use cmpxchg16 to atomically update the 128-bit IRTE, + * and it cannot be updated by the hardware or other processors + * behind us, so the return value of cmpxchg16 should be the + * same as the old value. + */ + WARN_ON(!ret); + if (data) data->ref = entry; From 376426b1a953762b00df887e28d29e44ab4ff723 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 2 Sep 2020 11:53:03 +0100 Subject: [PATCH 192/648] KVM: arm64: Fix address truncation in traces Owing to their ARMv7 origins, the trace events are truncating most address values to 32bits. That's not really helpful. Expand the printing of such values to their full glory. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/trace_arm.h | 16 ++++++++-------- arch/arm64/kvm/trace_handle_exit.h | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h index 4691053c5ee4..ff0444352bba 100644 --- a/arch/arm64/kvm/trace_arm.h +++ b/arch/arm64/kvm/trace_arm.h @@ -23,7 +23,7 @@ TRACE_EVENT(kvm_entry, __entry->vcpu_pc = vcpu_pc; ), - TP_printk("PC: 0x%08lx", __entry->vcpu_pc) + TP_printk("PC: 0x%016lx", __entry->vcpu_pc) ); TRACE_EVENT(kvm_exit, @@ -42,7 +42,7 @@ TRACE_EVENT(kvm_exit, __entry->vcpu_pc = vcpu_pc; ), - TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx", + TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%016lx", __print_symbolic(__entry->ret, kvm_arm_exception_type), __entry->esr_ec, __print_symbolic(__entry->esr_ec, kvm_arm_exception_class), @@ -69,7 +69,7 @@ TRACE_EVENT(kvm_guest_fault, __entry->ipa = ipa; ), - TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx", + TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#016lx", __entry->ipa, __entry->hsr, __entry->hxfar, __entry->vcpu_pc) ); @@ -131,7 +131,7 @@ TRACE_EVENT(kvm_mmio_emulate, __entry->cpsr = cpsr; ), - TP_printk("Emulate MMIO at: 0x%08lx (instr: %08lx, cpsr: %08lx)", + TP_printk("Emulate MMIO at: 0x%016lx (instr: %08lx, cpsr: %08lx)", __entry->vcpu_pc, __entry->instr, __entry->cpsr) ); @@ -149,7 +149,7 @@ TRACE_EVENT(kvm_unmap_hva_range, __entry->end = end; ), - TP_printk("mmu notifier unmap range: %#08lx -- %#08lx", + TP_printk("mmu notifier unmap range: %#016lx -- %#016lx", __entry->start, __entry->end) ); @@ -165,7 +165,7 @@ TRACE_EVENT(kvm_set_spte_hva, __entry->hva = hva; ), - TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva) + TP_printk("mmu notifier set pte hva: %#016lx", __entry->hva) ); TRACE_EVENT(kvm_age_hva, @@ -182,7 +182,7 @@ TRACE_EVENT(kvm_age_hva, __entry->end = end; ), - TP_printk("mmu notifier age hva: %#08lx -- %#08lx", + TP_printk("mmu notifier age hva: %#016lx -- %#016lx", __entry->start, __entry->end) ); @@ -198,7 +198,7 @@ TRACE_EVENT(kvm_test_age_hva, __entry->hva = hva; ), - TP_printk("mmu notifier test age hva: %#08lx", __entry->hva) + TP_printk("mmu notifier test age hva: %#016lx", __entry->hva) ); TRACE_EVENT(kvm_set_way_flush, diff --git a/arch/arm64/kvm/trace_handle_exit.h b/arch/arm64/kvm/trace_handle_exit.h index 2c56d1e0f5bd..8d78acc4fba7 100644 --- a/arch/arm64/kvm/trace_handle_exit.h +++ b/arch/arm64/kvm/trace_handle_exit.h @@ -22,7 +22,7 @@ TRACE_EVENT(kvm_wfx_arm64, __entry->is_wfe = is_wfe; ), - TP_printk("guest executed wf%c at: 0x%08lx", + TP_printk("guest executed wf%c at: 0x%016lx", __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc) ); @@ -42,7 +42,7 @@ TRACE_EVENT(kvm_hvc_arm64, __entry->imm = imm; ), - TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)", + TP_printk("HVC at 0x%016lx (r0: 0x%016lx, imm: 0x%lx)", __entry->vcpu_pc, __entry->r0, __entry->imm) ); @@ -135,7 +135,7 @@ TRACE_EVENT(trap_reg, __entry->write_value = write_value; ), - TP_printk("%s %s reg %d (0x%08llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value) + TP_printk("%s %s reg %d (0x%016llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value) ); TRACE_EVENT(kvm_handle_sys_reg, From 7b75cd5128421c673153efb1236705696a1a9812 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Tue, 1 Sep 2020 14:33:56 +0100 Subject: [PATCH 193/648] KVM: arm64: Update page shift if stage 2 block mapping not supported Commit 196f878a7ac2e (" KVM: arm/arm64: Signal SIGBUS when stage2 discovers hwpoison memory") modifies user_mem_abort() to send a SIGBUS signal when the fault IPA maps to a hwpoisoned page. Commit 1559b7583ff6 ("KVM: arm/arm64: Re-check VMA on detecting a poisoned page") changed kvm_send_hwpoison_signal() to use the page shift instead of the VMA because at that point the code had already released the mmap lock, which means userspace could have modified the VMA. If userspace uses hugetlbfs for the VM memory, user_mem_abort() tries to map the guest fault IPA using block mappings in stage 2. That is not always possible, if, for example, userspace uses dirty page logging for the VM. Update the page shift appropriately in those cases when we downgrade the stage 2 entry from a block mapping to a page. Fixes: 1559b7583ff6 ("KVM: arm/arm64: Re-check VMA on detecting a poisoned page") Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Reviewed-by: Gavin Shan Link: https://lore.kernel.org/r/20200901133357.52640-2-alexandru.elisei@arm.com --- arch/arm64/kvm/mmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 16b8660ddbcc..f58d657a898d 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1871,6 +1871,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, !fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) { force_pte = true; vma_pagesize = PAGE_SIZE; + vma_shift = PAGE_SHIFT; } /* From 7cad554887f1c5fd77e57e6bf4be38370c2160cb Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 24 Aug 2020 12:54:14 +0200 Subject: [PATCH 194/648] iommu/amd: Do not force direct mapping when SME is active Do not force devices supporting IOMMUv2 to be direct mapped when memory encryption is active. This might cause them to be unusable because their DMA mask does not include the encryption bit. Signed-off-by: Joerg Roedel Link: https://lore.kernel.org/r/20200824105415.21000-2-joro@8bytes.org Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index a382d7a73eaa..07ae8b93887e 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2659,7 +2659,12 @@ static int amd_iommu_def_domain_type(struct device *dev) if (!dev_data) return 0; - if (dev_data->iommu_v2) + /* + * Do not identity map IOMMUv2 capable devices when memory encryption is + * active, because some of those devices (AMD GPUs) don't have the + * encryption bit in their DMA-mask and require remapping. + */ + if (!mem_encrypt_active() && dev_data->iommu_v2) return IOMMU_DOMAIN_IDENTITY; return 0; From 2822e582501b65707089b097e773e6fd70774841 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 24 Aug 2020 12:54:15 +0200 Subject: [PATCH 195/648] iommu/amd: Do not use IOMMUv2 functionality when SME is active When memory encryption is active the device is likely not in a direct mapped domain. Forbid using IOMMUv2 functionality for now until finer grained checks for this have been implemented. Signed-off-by: Joerg Roedel Link: https://lore.kernel.org/r/20200824105415.21000-3-joro@8bytes.org Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu_v2.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index c259108ab6dd..0d175aed1d92 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -737,6 +737,13 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids) might_sleep(); + /* + * When memory encryption is active the device is likely not in a + * direct-mapped domain. Forbid using IOMMUv2 functionality for now. + */ + if (mem_encrypt_active()) + return -ENODEV; + if (!amd_iommu_v2_supported()) return -ENODEV; From 29aaebbca4abc4cceb38738483051abefafb6950 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 22 Aug 2020 17:02:09 +0100 Subject: [PATCH 196/648] iommu/vt-d: Handle 36bit addressing for x86-32 Beware that the address size for x86-32 may exceed unsigned long. [ 0.368971] UBSAN: shift-out-of-bounds in drivers/iommu/intel/iommu.c:128:14 [ 0.369055] shift exponent 36 is too large for 32-bit type 'long unsigned int' If we don't handle the wide addresses, the pages are mismapped and the device read/writes go astray, detected as DMAR faults and leading to device failure. The behaviour changed (from working to broken) in commit fa954e683178 ("iommu/vt-d: Delegate the dma domain to upper layer"), but the error looks older. Fixes: fa954e683178 ("iommu/vt-d: Delegate the dma domain to upper layer") Signed-off-by: Chris Wilson Acked-by: Lu Baolu Cc: James Sewart Cc: Lu Baolu Cc: Joerg Roedel Cc: # v5.3+ Link: https://lore.kernel.org/r/20200822160209.28512-1-chris@chris-wilson.co.uk Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index acd537294c92..87b17bac04c2 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -123,29 +123,29 @@ static inline unsigned int level_to_offset_bits(int level) return (level - 1) * LEVEL_STRIDE; } -static inline int pfn_level_offset(unsigned long pfn, int level) +static inline int pfn_level_offset(u64 pfn, int level) { return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK; } -static inline unsigned long level_mask(int level) +static inline u64 level_mask(int level) { - return -1UL << level_to_offset_bits(level); + return -1ULL << level_to_offset_bits(level); } -static inline unsigned long level_size(int level) +static inline u64 level_size(int level) { - return 1UL << level_to_offset_bits(level); + return 1ULL << level_to_offset_bits(level); } -static inline unsigned long align_to_level(unsigned long pfn, int level) +static inline u64 align_to_level(u64 pfn, int level) { return (pfn + level_size(level) - 1) & level_mask(level); } static inline unsigned long lvl_to_nr_pages(unsigned int lvl) { - return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH); + return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH); } /* VT-d pages must always be _smaller_ than MM pages. Otherwise things From fc7f148feb8975784bd40e41fff875d33f698be8 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Tue, 18 Aug 2020 09:25:11 +0200 Subject: [PATCH 197/648] drm/virtio: drop virtio_gpu_output->enabled Not needed, already tracked by drm_crtc_state->active. Signed-off-by: Gerd Hoffmann Acked-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20200818072511.6745-3-kraxel@redhat.com (cherry picked from commit 1174c8a0f33c1e5c442ac40381fe124248c08b3a) --- drivers/gpu/drm/virtio/virtgpu_display.c | 4 ---- drivers/gpu/drm/virtio/virtgpu_drv.h | 1 - drivers/gpu/drm/virtio/virtgpu_plane.c | 2 +- 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c index 97170998cd0e..afd0f9200f90 100644 --- a/drivers/gpu/drm/virtio/virtgpu_display.c +++ b/drivers/gpu/drm/virtio/virtgpu_display.c @@ -97,9 +97,6 @@ static void virtio_gpu_crtc_mode_set_nofb(struct drm_crtc *crtc) static void virtio_gpu_crtc_atomic_enable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { - struct virtio_gpu_output *output = drm_crtc_to_virtio_gpu_output(crtc); - - output->enabled = true; } static void virtio_gpu_crtc_atomic_disable(struct drm_crtc *crtc, @@ -111,7 +108,6 @@ static void virtio_gpu_crtc_atomic_disable(struct drm_crtc *crtc, virtio_gpu_cmd_set_scanout(vgdev, output->index, 0, 0, 0, 0, 0); virtio_gpu_notify(vgdev); - output->enabled = false; } static int virtio_gpu_crtc_atomic_check(struct drm_crtc *crtc, diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 4ab1b0ba2925..fbc04272db4f 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -137,7 +137,6 @@ struct virtio_gpu_output { struct edid *edid; int cur_x; int cur_y; - bool enabled; bool needs_modeset; }; #define drm_crtc_to_virtio_gpu_output(x) \ diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c index 65757409d9ed..6a311cd93440 100644 --- a/drivers/gpu/drm/virtio/virtgpu_plane.c +++ b/drivers/gpu/drm/virtio/virtgpu_plane.c @@ -142,7 +142,7 @@ static void virtio_gpu_primary_plane_update(struct drm_plane *plane, if (WARN_ON(!output)) return; - if (!plane->state->fb || !output->enabled) { + if (!plane->state->fb || !output->crtc.state->active) { DRM_DEBUG("nofb\n"); virtio_gpu_cmd_set_scanout(vgdev, output->index, 0, plane->state->src_w >> 16, From 2356bb4b8221d7dc8c7beb810418122ed90254c9 Mon Sep 17 00:00:00 2001 From: Vamshi K Sthambamkadi Date: Fri, 28 Aug 2020 17:02:46 +0530 Subject: [PATCH 198/648] tracing/kprobes, x86/ptrace: Fix regs argument order for i386 On i386, the order of parameters passed on regs is eax,edx,and ecx (as per regparm(3) calling conventions). Change the mapping in regs_get_kernel_argument(), so that arg1=ax arg2=dx, and arg3=cx. Running the selftests testcase kprobes_args_use.tc shows the result as passed. Fixes: 3c88ee194c28 ("x86: ptrace: Add function argument access API") Signed-off-by: Vamshi K Sthambamkadi Signed-off-by: Borislav Petkov Acked-by: Masami Hiramatsu Acked-by: Peter Zijlstra (Intel) Cc: Link: https://lkml.kernel.org/r/20200828113242.GA1424@cosmos --- arch/x86/include/asm/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 40aa69d04862..d8324a236696 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -327,8 +327,8 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, static const unsigned int argument_offs[] = { #ifdef __i386__ offsetof(struct pt_regs, ax), - offsetof(struct pt_regs, cx), offsetof(struct pt_regs, dx), + offsetof(struct pt_regs, cx), #define NR_REG_ARGUMENTS 3 #else offsetof(struct pt_regs, di), From 662a0221893a3d58aa72719671844264306f6e4b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Sep 2020 15:25:50 +0200 Subject: [PATCH 199/648] x86/entry: Fix AC assertion The WARN added in commit 3c73b81a9164 ("x86/entry, selftests: Further improve user entry sanity checks") unconditionally triggers on a IVB machine because it does not support SMAP. For !SMAP hardware the CLAC/STAC instructions are patched out and thus if userspace sets AC, it is still have set after entry. Fixes: 3c73b81a9164 ("x86/entry, selftests: Further improve user entry sanity checks") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Tested-by: Daniel Thompson Acked-by: Andy Lutomirski Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200902133200.666781610@infradead.org --- arch/x86/include/asm/entry-common.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index a8f9315b9eae..6fe54b2813c1 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -18,8 +18,16 @@ static __always_inline void arch_check_user_regs(struct pt_regs *regs) * state, not the interrupt state as imagined by Xen. */ unsigned long flags = native_save_fl(); - WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF | - X86_EFLAGS_NT)); + unsigned long mask = X86_EFLAGS_DF | X86_EFLAGS_NT; + + /* + * For !SMAP hardware we patch out CLAC on entry. + */ + if (boot_cpu_has(X86_FEATURE_SMAP) || + (IS_ENABLED(CONFIG_64_BIT) && boot_cpu_has(X86_FEATURE_XENPV))) + mask |= X86_EFLAGS_AC; + + WARN_ON_ONCE(flags & mask); /* We think we came from user mode. Make sure pt_regs agrees. */ WARN_ON_ONCE(!user_mode(regs)); From d5c678aed5eddb944b8e7ce451b107b39245962d Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 2 Sep 2020 15:25:51 +0200 Subject: [PATCH 200/648] x86/debug: Allow a single level of #DB recursion Trying to clear DR7 around a #DB from usermode malfunctions if the tasks schedules when delivering SIGTRAP. Rather than trying to define a special no-recursion region, just allow a single level of recursion. The same mechanism is used for NMI, and it hasn't caused any problems yet. Fixes: 9f58fdde95c9 ("x86/db: Split out dr6/7 handling") Reported-by: Kyle Huey Debugged-by: Josh Poimboeuf Signed-off-by: Andy Lutomirski Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Tested-by: Daniel Thompson Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/8b9bd05f187231df008d48cf818a6a311cbd5c98.1597882384.git.luto@kernel.org Link: https://lore.kernel.org/r/20200902133200.726584153@infradead.org --- arch/x86/kernel/traps.c | 65 ++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 34 deletions(-) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 1f66d2d1e998..81a2fb711091 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -729,20 +729,9 @@ static bool is_sysenter_singlestep(struct pt_regs *regs) #endif } -static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7) +static __always_inline unsigned long debug_read_clear_dr6(void) { - /* - * Disable breakpoints during exception handling; recursive exceptions - * are exceedingly 'fun'. - * - * Since this function is NOKPROBE, and that also applies to - * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a - * HW_BREAKPOINT_W on our stack) - * - * Entry text is excluded for HW_BP_X and cpu_entry_area, which - * includes the entry stack is excluded for everything. - */ - *dr7 = local_db_save(); + unsigned long dr6; /* * The Intel SDM says: @@ -755,15 +744,12 @@ static __always_inline void debug_enter(unsigned long *dr6, unsigned long *dr7) * * Keep it simple: clear DR6 immediately. */ - get_debugreg(*dr6, 6); + get_debugreg(dr6, 6); set_debugreg(0, 6); /* Filter out all the reserved bits which are preset to 1 */ - *dr6 &= ~DR6_RESERVED; -} + dr6 &= ~DR6_RESERVED; -static __always_inline void debug_exit(unsigned long dr7) -{ - local_db_restore(dr7); + return dr6; } /* @@ -863,6 +849,18 @@ static void handle_debug(struct pt_regs *regs, unsigned long dr6, bool user) static __always_inline void exc_debug_kernel(struct pt_regs *regs, unsigned long dr6) { + /* + * Disable breakpoints during exception handling; recursive exceptions + * are exceedingly 'fun'. + * + * Since this function is NOKPROBE, and that also applies to + * HW_BREAKPOINT_X, we can't hit a breakpoint before this (XXX except a + * HW_BREAKPOINT_W on our stack) + * + * Entry text is excluded for HW_BP_X and cpu_entry_area, which + * includes the entry stack is excluded for everything. + */ + unsigned long dr7 = local_db_save(); bool irq_state = idtentry_enter_nmi(regs); instrumentation_begin(); @@ -883,6 +881,8 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, instrumentation_end(); idtentry_exit_nmi(regs, irq_state); + + local_db_restore(dr7); } static __always_inline void exc_debug_user(struct pt_regs *regs, @@ -894,6 +894,15 @@ static __always_inline void exc_debug_user(struct pt_regs *regs, */ WARN_ON_ONCE(!user_mode(regs)); + /* + * NB: We can't easily clear DR7 here because + * idtentry_exit_to_usermode() can invoke ptrace, schedule, access + * user memory, etc. This means that a recursive #DB is possible. If + * this happens, that #DB will hit exc_debug_kernel() and clear DR7. + * Since we're not on the IST stack right now, everything will be + * fine. + */ + irqentry_enter_from_user_mode(regs); instrumentation_begin(); @@ -907,36 +916,24 @@ static __always_inline void exc_debug_user(struct pt_regs *regs, /* IST stack entry */ DEFINE_IDTENTRY_DEBUG(exc_debug) { - unsigned long dr6, dr7; - - debug_enter(&dr6, &dr7); - exc_debug_kernel(regs, dr6); - debug_exit(dr7); + exc_debug_kernel(regs, debug_read_clear_dr6()); } /* User entry, runs on regular task stack */ DEFINE_IDTENTRY_DEBUG_USER(exc_debug) { - unsigned long dr6, dr7; - - debug_enter(&dr6, &dr7); - exc_debug_user(regs, dr6); - debug_exit(dr7); + exc_debug_user(regs, debug_read_clear_dr6()); } #else /* 32 bit does not have separate entry points. */ DEFINE_IDTENTRY_RAW(exc_debug) { - unsigned long dr6, dr7; - - debug_enter(&dr6, &dr7); + unsigned long dr6 = debug_read_clear_dr6(); if (user_mode(regs)) exc_debug_user(regs, dr6); else exc_debug_kernel(regs, dr6); - - debug_exit(dr7); } #endif From 4facb95b7adaf77e2da73aafb9ba60996fe42a12 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 2 Sep 2020 01:50:54 +0200 Subject: [PATCH 201/648] x86/entry: Unbreak 32bit fast syscall Andy reported that the syscall treacing for 32bit fast syscall fails: # ./tools/testing/selftests/x86/ptrace_syscall_32 ... [RUN] SYSEMU [FAIL] Initial args are wrong (nr=224, args=10 11 12 13 14 4289172732) ... [RUN] SYSCALL [FAIL] Initial args are wrong (nr=29, args=0 0 0 0 0 4289172732) The eason is that the conversion to generic entry code moved the retrieval of the sixth argument (EBP) after the point where the syscall entry work runs, i.e. ptrace, seccomp, audit... Unbreak it by providing a split up version of syscall_enter_from_user_mode(). - syscall_enter_from_user_mode_prepare() establishes state and enables interrupts - syscall_enter_from_user_mode_work() runs the entry work Replace the call to syscall_enter_from_user_mode() in the 32bit fast syscall C-entry with the split functions and stick the EBP retrieval between them. Fixes: 27d6b4d14f5c ("x86/entry: Use generic syscall entry function") Reported-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/87k0xdjbtt.fsf@nanos.tec.linutronix.de --- arch/x86/entry/common.c | 29 +++++++++++++------- include/linux/entry-common.h | 51 +++++++++++++++++++++++++++++------- kernel/entry/common.c | 35 ++++++++++++++++++++----- 3 files changed, 91 insertions(+), 24 deletions(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 48512c7944e7..2f84c7ca74ea 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -60,16 +60,10 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs) #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs) { - unsigned int nr = (unsigned int)regs->orig_ax; - if (IS_ENABLED(CONFIG_IA32_EMULATION)) current_thread_info()->status |= TS_COMPAT; - /* - * Subtlety here: if ptrace pokes something larger than 2^32-1 into - * orig_ax, the unsigned int return value truncates it. This may - * or may not be necessary, but it matches the old asm behavior. - */ - return (unsigned int)syscall_enter_from_user_mode(regs, nr); + + return (unsigned int)regs->orig_ax; } /* @@ -91,15 +85,29 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) { unsigned int nr = syscall_32_enter(regs); + /* + * Subtlety here: if ptrace pokes something larger than 2^32-1 into + * orig_ax, the unsigned int return value truncates it. This may + * or may not be necessary, but it matches the old asm behavior. + */ + nr = (unsigned int)syscall_enter_from_user_mode(regs, nr); + do_syscall_32_irqs_on(regs, nr); syscall_exit_to_user_mode(regs); } static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) { - unsigned int nr = syscall_32_enter(regs); + unsigned int nr = syscall_32_enter(regs); int res; + /* + * This cannot use syscall_enter_from_user_mode() as it has to + * fetch EBP before invoking any of the syscall entry work + * functions. + */ + syscall_enter_from_user_mode_prepare(regs); + instrumentation_begin(); /* Fetch EBP from where the vDSO stashed it. */ if (IS_ENABLED(CONFIG_X86_64)) { @@ -122,6 +130,9 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) return false; } + /* The case truncates any ptrace induced syscall nr > 2^32 -1 */ + nr = (unsigned int)syscall_enter_from_user_mode_work(regs, nr); + /* Now this is just like a normal syscall. */ do_syscall_32_irqs_on(regs, nr); syscall_exit_to_user_mode(regs); diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index efebbffcd5cc..159c7476b11b 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -110,15 +110,30 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs #endif /** - * syscall_enter_from_user_mode - Check and handle work before invoking - * a syscall + * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts + * @regs: Pointer to currents pt_regs + * + * Invoked from architecture specific syscall entry code with interrupts + * disabled. The calling code has to be non-instrumentable. When the + * function returns all state is correct, interrupts are enabled and the + * subsequent functions can be instrumented. + * + * This handles lockdep, RCU (context tracking) and tracing state. + * + * This is invoked when there is extra architecture specific functionality + * to be done between establishing state and handling user mode entry work. + */ +void syscall_enter_from_user_mode_prepare(struct pt_regs *regs); + +/** + * syscall_enter_from_user_mode_work - Check and handle work before invoking + * a syscall * @regs: Pointer to currents pt_regs * @syscall: The syscall number * * Invoked from architecture specific syscall entry code with interrupts - * disabled. The calling code has to be non-instrumentable. When the - * function returns all state is correct and the subsequent functions can be - * instrumented. + * enabled after invoking syscall_enter_from_user_mode_prepare() and extra + * architecture specific work. * * Returns: The original or a modified syscall number * @@ -127,12 +142,30 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs * syscall_set_return_value() first. If neither of those are called and -1 * is returned, then the syscall will fail with ENOSYS. * - * The following functionality is handled here: + * It handles the following work items: * - * 1) Establish state (lockdep, RCU (context tracking), tracing) - * 2) TIF flag dependent invocations of arch_syscall_enter_tracehook(), + * 1) TIF flag dependent invocations of arch_syscall_enter_tracehook(), * __secure_computing(), trace_sys_enter() - * 3) Invocation of audit_syscall_entry() + * 2) Invocation of audit_syscall_entry() + */ +long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall); + +/** + * syscall_enter_from_user_mode - Establish state and check and handle work + * before invoking a syscall + * @regs: Pointer to currents pt_regs + * @syscall: The syscall number + * + * Invoked from architecture specific syscall entry code with interrupts + * disabled. The calling code has to be non-instrumentable. When the + * function returns all state is correct, interrupts are enabled and the + * subsequent functions can be instrumented. + * + * This is combination of syscall_enter_from_user_mode_prepare() and + * syscall_enter_from_user_mode_work(). + * + * Returns: The original or a modified syscall number. See + * syscall_enter_from_user_mode_work() for further explanation. */ long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall); diff --git a/kernel/entry/common.c b/kernel/entry/common.c index fcae019158ca..18683598edbc 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -69,22 +69,45 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall, return ret ? : syscall_get_nr(current, regs); } -noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall) +static __always_inline long +__syscall_enter_from_user_work(struct pt_regs *regs, long syscall) { unsigned long ti_work; - enter_from_user_mode(regs); - instrumentation_begin(); - - local_irq_enable(); ti_work = READ_ONCE(current_thread_info()->flags); if (ti_work & SYSCALL_ENTER_WORK) syscall = syscall_trace_enter(regs, syscall, ti_work); - instrumentation_end(); return syscall; } +long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall) +{ + return __syscall_enter_from_user_work(regs, syscall); +} + +noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall) +{ + long ret; + + enter_from_user_mode(regs); + + instrumentation_begin(); + local_irq_enable(); + ret = __syscall_enter_from_user_work(regs, syscall); + instrumentation_end(); + + return ret; +} + +noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs) +{ + enter_from_user_mode(regs); + instrumentation_begin(); + local_irq_enable(); + instrumentation_end(); +} + /** * exit_to_user_mode - Fixup state when exiting to user mode * From cfd54fa83a5068b61b7eb28d3c117d8354c74c7a Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 1 Sep 2020 11:25:28 +0300 Subject: [PATCH 202/648] usb: Fix out of sync data toggle if a configured device is reconfigured Userspace drivers that use a SetConfiguration() request to "lightweight" reset an already configured usb device might cause data toggles to get out of sync between the device and host, and the device becomes unusable. The xHCI host requires endpoints to be dropped and added back to reset the toggle. If USB core notices the new configuration is the same as the current active configuration it will avoid these extra steps by calling usb_reset_configuration() instead of usb_set_configuration(). A SetConfiguration() request will reset the device side data toggles. Make sure usb_reset_configuration() function also drops and adds back the endpoints to ensure data toggles are in sync. To avoid code duplication split the current usb_disable_device() function and reuse the endpoint specific part. Cc: stable Tested-by: Martin Thierer Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20200901082528.12557-1-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/message.c | 91 ++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 49 deletions(-) diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 6197938dcc2d..ae1de9cc4b09 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -1205,6 +1205,34 @@ void usb_disable_interface(struct usb_device *dev, struct usb_interface *intf, } } +/* + * usb_disable_device_endpoints -- Disable all endpoints for a device + * @dev: the device whose endpoints are being disabled + * @skip_ep0: 0 to disable endpoint 0, 1 to skip it. + */ +static void usb_disable_device_endpoints(struct usb_device *dev, int skip_ep0) +{ + struct usb_hcd *hcd = bus_to_hcd(dev->bus); + int i; + + if (hcd->driver->check_bandwidth) { + /* First pass: Cancel URBs, leave endpoint pointers intact. */ + for (i = skip_ep0; i < 16; ++i) { + usb_disable_endpoint(dev, i, false); + usb_disable_endpoint(dev, i + USB_DIR_IN, false); + } + /* Remove endpoints from the host controller internal state */ + mutex_lock(hcd->bandwidth_mutex); + usb_hcd_alloc_bandwidth(dev, NULL, NULL, NULL); + mutex_unlock(hcd->bandwidth_mutex); + } + /* Second pass: remove endpoint pointers */ + for (i = skip_ep0; i < 16; ++i) { + usb_disable_endpoint(dev, i, true); + usb_disable_endpoint(dev, i + USB_DIR_IN, true); + } +} + /** * usb_disable_device - Disable all the endpoints for a USB device * @dev: the device whose endpoints are being disabled @@ -1218,7 +1246,6 @@ void usb_disable_interface(struct usb_device *dev, struct usb_interface *intf, void usb_disable_device(struct usb_device *dev, int skip_ep0) { int i; - struct usb_hcd *hcd = bus_to_hcd(dev->bus); /* getting rid of interfaces will disconnect * any drivers bound to them (a key side effect) @@ -1264,22 +1291,8 @@ void usb_disable_device(struct usb_device *dev, int skip_ep0) dev_dbg(&dev->dev, "%s nuking %s URBs\n", __func__, skip_ep0 ? "non-ep0" : "all"); - if (hcd->driver->check_bandwidth) { - /* First pass: Cancel URBs, leave endpoint pointers intact. */ - for (i = skip_ep0; i < 16; ++i) { - usb_disable_endpoint(dev, i, false); - usb_disable_endpoint(dev, i + USB_DIR_IN, false); - } - /* Remove endpoints from the host controller internal state */ - mutex_lock(hcd->bandwidth_mutex); - usb_hcd_alloc_bandwidth(dev, NULL, NULL, NULL); - mutex_unlock(hcd->bandwidth_mutex); - /* Second pass: remove endpoint pointers */ - } - for (i = skip_ep0; i < 16; ++i) { - usb_disable_endpoint(dev, i, true); - usb_disable_endpoint(dev, i + USB_DIR_IN, true); - } + + usb_disable_device_endpoints(dev, skip_ep0); } /** @@ -1522,6 +1535,9 @@ EXPORT_SYMBOL_GPL(usb_set_interface); * The caller must own the device lock. * * Return: Zero on success, else a negative error code. + * + * If this routine fails the device will probably be in an unusable state + * with endpoints disabled, and interfaces only partially enabled. */ int usb_reset_configuration(struct usb_device *dev) { @@ -1537,10 +1553,7 @@ int usb_reset_configuration(struct usb_device *dev) * calls during probe() are fine */ - for (i = 1; i < 16; ++i) { - usb_disable_endpoint(dev, i, true); - usb_disable_endpoint(dev, i + USB_DIR_IN, true); - } + usb_disable_device_endpoints(dev, 1); /* skip ep0*/ config = dev->actconfig; retval = 0; @@ -1553,34 +1566,10 @@ int usb_reset_configuration(struct usb_device *dev) mutex_unlock(hcd->bandwidth_mutex); return -ENOMEM; } - /* Make sure we have enough bandwidth for each alternate setting 0 */ - for (i = 0; i < config->desc.bNumInterfaces; i++) { - struct usb_interface *intf = config->interface[i]; - struct usb_host_interface *alt; - alt = usb_altnum_to_altsetting(intf, 0); - if (!alt) - alt = &intf->altsetting[0]; - if (alt != intf->cur_altsetting) - retval = usb_hcd_alloc_bandwidth(dev, NULL, - intf->cur_altsetting, alt); - if (retval < 0) - break; - } - /* If not, reinstate the old alternate settings */ + /* xHCI adds all endpoints in usb_hcd_alloc_bandwidth */ + retval = usb_hcd_alloc_bandwidth(dev, config, NULL, NULL); if (retval < 0) { -reset_old_alts: - for (i--; i >= 0; i--) { - struct usb_interface *intf = config->interface[i]; - struct usb_host_interface *alt; - - alt = usb_altnum_to_altsetting(intf, 0); - if (!alt) - alt = &intf->altsetting[0]; - if (alt != intf->cur_altsetting) - usb_hcd_alloc_bandwidth(dev, NULL, - alt, intf->cur_altsetting); - } usb_enable_lpm(dev); mutex_unlock(hcd->bandwidth_mutex); return retval; @@ -1589,8 +1578,12 @@ int usb_reset_configuration(struct usb_device *dev) USB_REQ_SET_CONFIGURATION, 0, config->desc.bConfigurationValue, 0, NULL, 0, USB_CTRL_SET_TIMEOUT); - if (retval < 0) - goto reset_old_alts; + if (retval < 0) { + usb_hcd_alloc_bandwidth(dev, NULL, NULL, NULL); + usb_enable_lpm(dev); + mutex_unlock(hcd->bandwidth_mutex); + return retval; + } mutex_unlock(hcd->bandwidth_mutex); /* re-init hc/hcd interface/endpoint state */ From 290a405ce318d036666c4155d5899eb8cd6e0d97 Mon Sep 17 00:00:00 2001 From: Madhusudanarao Amara Date: Wed, 26 Aug 2020 00:08:11 +0530 Subject: [PATCH 203/648] usb: typec: intel_pmc_mux: Un-register the USB role switch Added missing code for un-register USB role switch in the remove and error path. Cc: Stable # v5.8 Reviewed-by: Heikki Krogerus Fixes: 6701adfa9693b ("usb: typec: driver for Intel PMC mux control") Signed-off-by: Madhusudanarao Amara Link: https://lore.kernel.org/r/20200825183811.7262-1-madhusudanarao.amara@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/mux/intel_pmc_mux.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c index e4021e13af40..fd9008f19208 100644 --- a/drivers/usb/typec/mux/intel_pmc_mux.c +++ b/drivers/usb/typec/mux/intel_pmc_mux.c @@ -497,6 +497,7 @@ static int pmc_usb_probe(struct platform_device *pdev) for (i = 0; i < pmc->num_ports; i++) { typec_switch_unregister(pmc->port[i].typec_sw); typec_mux_unregister(pmc->port[i].typec_mux); + usb_role_switch_unregister(pmc->port[i].usb_sw); } return ret; @@ -510,6 +511,7 @@ static int pmc_usb_remove(struct platform_device *pdev) for (i = 0; i < pmc->num_ports; i++) { typec_switch_unregister(pmc->port[i].typec_sw); typec_mux_unregister(pmc->port[i].typec_mux); + usb_role_switch_unregister(pmc->port[i].usb_sw); } return 0; From 1f3546ff3f0a1000971daef58406954bad3f7061 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Fri, 4 Sep 2020 14:09:18 +0300 Subject: [PATCH 204/648] usb: typec: ucsi: acpi: Check the _DEP dependencies Failing probe with -EPROBE_DEFER until all dependencies listed in the _DEP (Operation Region Dependencies) object have been met. This will fix an issue where on some platforms UCSI ACPI driver fails to probe because the address space handler for the operation region that the UCSI ACPI interface uses has not been loaded yet. Fixes: 8243edf44152 ("usb: typec: ucsi: Add ACPI driver") Cc: stable@vger.kernel.org Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200904110918.51546-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_acpi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index 9fc4f338e870..c0aca2f0f23f 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -112,11 +112,15 @@ static void ucsi_acpi_notify(acpi_handle handle, u32 event, void *data) static int ucsi_acpi_probe(struct platform_device *pdev) { + struct acpi_device *adev = ACPI_COMPANION(&pdev->dev); struct ucsi_acpi *ua; struct resource *res; acpi_status status; int ret; + if (adev->dep_unmet) + return -EPROBE_DEFER; + ua = devm_kzalloc(&pdev->dev, sizeof(*ua), GFP_KERNEL); if (!ua) return -ENOMEM; From a6498d51821edf9615b42b968fb419a40197a982 Mon Sep 17 00:00:00 2001 From: Amjad Ouled-Ameur Date: Thu, 27 Aug 2020 16:48:10 +0200 Subject: [PATCH 205/648] Revert "usb: dwc3: meson-g12a: fix shared reset control use" This reverts commit 7a410953d1fb4dbe91ffcfdee9cbbf889d19b0d7. This commit breaks USB on meson-gxl-s905x-libretech-cc. Reverting the change solves the issue. In fact, according to the reset framework code, consumers must not use reset_control_(de)assert() on shared reset lines when reset_control_reset has been used, and vice-versa. Moreover, with this commit, usb is not guaranted to be reset since the reset is likely to be initially deasserted. Reverting the commit will bring back the suspend warning mentioned in the commit description. Nevertheless, a warning is much less critical than breaking dwc3-meson-g12a USB completely. We will address the warning issue in another way as a 2nd step. Fixes: 7a410953d1fb ("usb: dwc3: meson-g12a: fix shared reset control use") Cc: stable Signed-off-by: Amjad Ouled-Ameur Reported-by: Jerome Brunet Acked-by: Neil Armstrong Acked-by: Philipp Zabel Link: https://lore.kernel.org/r/20200827144810.26657-1-aouledameur@baylibre.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-meson-g12a.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-meson-g12a.c b/drivers/usb/dwc3/dwc3-meson-g12a.c index 88b75b5a039c..1f7f4d88ed9d 100644 --- a/drivers/usb/dwc3/dwc3-meson-g12a.c +++ b/drivers/usb/dwc3/dwc3-meson-g12a.c @@ -737,13 +737,13 @@ static int dwc3_meson_g12a_probe(struct platform_device *pdev) goto err_disable_clks; } - ret = reset_control_deassert(priv->reset); + ret = reset_control_reset(priv->reset); if (ret) - goto err_assert_reset; + goto err_disable_clks; ret = dwc3_meson_g12a_get_phys(priv); if (ret) - goto err_assert_reset; + goto err_disable_clks; ret = priv->drvdata->setup_regmaps(priv, base); if (ret) @@ -752,7 +752,7 @@ static int dwc3_meson_g12a_probe(struct platform_device *pdev) if (priv->vbus) { ret = regulator_enable(priv->vbus); if (ret) - goto err_assert_reset; + goto err_disable_clks; } /* Get dr_mode */ @@ -765,13 +765,13 @@ static int dwc3_meson_g12a_probe(struct platform_device *pdev) ret = priv->drvdata->usb_init(priv); if (ret) - goto err_assert_reset; + goto err_disable_clks; /* Init PHYs */ for (i = 0 ; i < PHY_COUNT ; ++i) { ret = phy_init(priv->phys[i]); if (ret) - goto err_assert_reset; + goto err_disable_clks; } /* Set PHY Power */ @@ -809,9 +809,6 @@ static int dwc3_meson_g12a_probe(struct platform_device *pdev) for (i = 0 ; i < PHY_COUNT ; ++i) phy_exit(priv->phys[i]); -err_assert_reset: - reset_control_assert(priv->reset); - err_disable_clks: clk_bulk_disable_unprepare(priv->drvdata->num_clks, priv->drvdata->clks); From a18cd6c9b6bc73dc17e8b7e9bd07decaa8833c97 Mon Sep 17 00:00:00 2001 From: Zeng Tao Date: Fri, 4 Sep 2020 14:37:44 +0800 Subject: [PATCH 206/648] usb: core: fix slab-out-of-bounds Read in read_descriptors The USB device descriptor may get changed between two consecutive enumerations on the same device for some reason, such as DFU or malicius device. In that case, we may access the changing descriptor if we don't take the device lock here. The issue is reported: https://syzkaller.appspot.com/bug?id=901a0d9e6519ef8dc7acab25344bd287dd3c7be9 Cc: stable Cc: Alan Stern Reported-by: syzbot+256e56ddde8b8957eabd@syzkaller.appspotmail.com Fixes: 217a9081d8e6 ("USB: add all configs to the "descriptors" attribute") Signed-off-by: Zeng Tao Link: https://lore.kernel.org/r/1599201467-11000-1-git-send-email-prime.zeng@hisilicon.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/sysfs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index a2ca38e25e0c..8d134193fa0c 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -889,7 +889,11 @@ read_descriptors(struct file *filp, struct kobject *kobj, size_t srclen, n; int cfgno; void *src; + int retval; + retval = usb_lock_device_interruptible(udev); + if (retval < 0) + return -EINTR; /* The binary attribute begins with the device descriptor. * Following that are the raw descriptor entries for all the * configurations (config plus subsidiary descriptors). @@ -914,6 +918,7 @@ read_descriptors(struct file *filp, struct kobject *kobj, off -= srclen; } } + usb_unlock_device(udev); return count - nleft; } From b52a95eac112f6f21f0c93987c0bbf3e91bfbf88 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Mon, 31 Aug 2020 12:22:08 -0600 Subject: [PATCH 207/648] dyndbg: give %3u width in pr-format, cosmetic only Specify the print-width so log entries line up nicely. no functional changes. Signed-off-by: Jim Cromie Link: https://lore.kernel.org/r/20200831182210.850852-2-jim.cromie@gmail.com Signed-off-by: Greg Kroah-Hartman --- lib/dynamic_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 1d012e597cc3..01b7d0210412 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -947,7 +947,7 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n, list_add(&dt->link, &ddebug_tables); mutex_unlock(&ddebug_lock); - v2pr_info("%u debug prints in module %s\n", n, dt->mod_name); + v2pr_info("%3u debug prints in module %s\n", n, dt->mod_name); return 0; } From a2d375eda771a6a4866f3717a8ed81b63acb1dbd Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Mon, 31 Aug 2020 12:22:09 -0600 Subject: [PATCH 208/648] dyndbg: refine export, rename to dynamic_debug_exec_queries() commit 4c0d77828d4f ("dyndbg: export ddebug_exec_queries") had a few problems: - broken non DYNAMIC_DEBUG_CORE configs, sparse warning - the exported function modifies query string, breaks on RO strings. - func name follows internal convention, shouldn't be exposed as is. 1st is fixed in header with ifdefd function prototype or stub defn. Also remove an obsolete HAVE-symbol ifdef-comment, and add others. Fix others by wrapping existing internal function with a new one, named in accordance with module-prefix naming convention, before export hits v5.9.0. In new function, copy query string to a local buffer, so users can pass hard-coded/RO queries, and internal function can be used unchanged. Fixes: 4c0d77828d4f ("dyndbg: export ddebug_exec_queries") Signed-off-by: Jim Cromie Link: https://lore.kernel.org/r/20200831182210.850852-3-jim.cromie@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/dynamic_debug.h | 20 ++++++++++++++++---- lib/dynamic_debug.c | 27 +++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index aa9ff9e1c0b3..8aa0c7c2608c 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -49,6 +49,10 @@ struct _ddebug { #if defined(CONFIG_DYNAMIC_DEBUG_CORE) + +/* exported for module authors to exercise >control */ +int dynamic_debug_exec_queries(const char *query, const char *modname); + int ddebug_add_module(struct _ddebug *tab, unsigned int n, const char *modname); extern int ddebug_remove_module(const char *mod_name); @@ -105,7 +109,7 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor, static_branch_unlikely(&descriptor.key.dd_key_false) #endif -#else /* !HAVE_JUMP_LABEL */ +#else /* !CONFIG_JUMP_LABEL */ #define _DPRINTK_KEY_INIT @@ -117,7 +121,7 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor, unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) #endif -#endif +#endif /* CONFIG_JUMP_LABEL */ #define __dynamic_func_call(id, fmt, func, ...) do { \ DEFINE_DYNAMIC_DEBUG_METADATA(id, fmt); \ @@ -172,10 +176,11 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor, KERN_DEBUG, prefix_str, prefix_type, \ rowsize, groupsize, buf, len, ascii) -#else +#else /* !CONFIG_DYNAMIC_DEBUG_CORE */ #include #include +#include static inline int ddebug_add_module(struct _ddebug *tab, unsigned int n, const char *modname) @@ -210,6 +215,13 @@ static inline int ddebug_dyndbg_module_param_cb(char *param, char *val, print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, \ rowsize, groupsize, buf, len, ascii); \ } while (0) -#endif + +static inline int dynamic_debug_exec_queries(const char *query, const char *modname) +{ + pr_warn("kernel not built with CONFIG_DYNAMIC_DEBUG_CORE\n"); + return 0; +} + +#endif /* !CONFIG_DYNAMIC_DEBUG_CORE */ #endif diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 01b7d0210412..08e4b057514c 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -525,7 +525,7 @@ static int ddebug_exec_query(char *query_string, const char *modname) last error or number of matching callsites. Module name is either in param (for boot arg) or perhaps in query string. */ -int ddebug_exec_queries(char *query, const char *modname) +static int ddebug_exec_queries(char *query, const char *modname) { char *split; int i, errs = 0, exitcode = 0, rc, nfound = 0; @@ -557,7 +557,30 @@ int ddebug_exec_queries(char *query, const char *modname) return exitcode; return nfound; } -EXPORT_SYMBOL_GPL(ddebug_exec_queries); + +/** + * dynamic_debug_exec_queries - select and change dynamic-debug prints + * @query: query-string described in admin-guide/dynamic-debug-howto + * @modname: string containing module name, usually &module.mod_name + * + * This uses the >/proc/dynamic_debug/control reader, allowing module + * authors to modify their dynamic-debug callsites. The modname is + * canonically struct module.mod_name, but can also be null or a + * module-wildcard, for example: "drm*". + */ +int dynamic_debug_exec_queries(const char *query, const char *modname) +{ + int rc; + char *qry = kstrndup(query, PAGE_SIZE, GFP_KERNEL); + + if (!query) + return -ENOMEM; + + rc = ddebug_exec_queries(qry, modname); + kfree(qry); + return rc; +} +EXPORT_SYMBOL_GPL(dynamic_debug_exec_queries); #define PREFIX_SIZE 64 From 42f07816ac0cc797928119cc039c414ae2b95d34 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Mon, 31 Aug 2020 12:22:10 -0600 Subject: [PATCH 209/648] dyndbg: fix problem parsing format="foo bar" commit 14775b049642 ("dyndbg: accept query terms like file=bar and module=foo") added the combined keyword=value parsing poorly; revert most of it, keeping the keyword & arg change. Instead, fix the tokenizer for the new input, by terminating the keyword (an unquoted word) on '=' as well as space, thus letting the tokenizer work on the quoted argument, like it would have previously. Also add a few debug-prints to show more parsing context, into tokenizer and parse-query, and use "keyword, value" in others. Fixes: 14775b049642 ("dyndbg: accept query terms like file=bar and module=foo") Signed-off-by: Jim Cromie Link: https://lore.kernel.org/r/20200831182210.850852-4-jim.cromie@gmail.com Signed-off-by: Greg Kroah-Hartman --- lib/dynamic_debug.c | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 08e4b057514c..04f4c80b0d16 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -237,6 +237,7 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords) { int nwords = 0; + vpr_info("entry, buf:'%s'\n", buf); while (*buf) { char *end; @@ -247,6 +248,8 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords) if (*buf == '#') break; /* token starts comment, skip rest of line */ + vpr_info("start-of-word:%d '%s'\n", nwords, buf); + /* find `end' of word, whitespace separated or quoted */ if (*buf == '"' || *buf == '\'') { int quote = *buf++; @@ -257,7 +260,9 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords) return -EINVAL; /* unclosed quote */ } } else { - for (end = buf; *end && !isspace(*end); end++) + for (end = buf; + *end && *end != '=' && !isspace(*end); + end++) ; BUG_ON(end == buf); } @@ -373,30 +378,21 @@ static int ddebug_parse_query(char *words[], int nwords, unsigned int i; int rc = 0; char *fline; - char *keyword, *arg; - if (modname) + if (nwords % 2 != 0) { + pr_err("expecting pairs of match-spec \n"); + return -EINVAL; + } + if (modname) { /* support $modname.dyndbg= */ + vpr_info("module:%s queries:'%s'\n", modname); query->module = modname; + } + for (i = 0; i < nwords; i += 2) { + char *keyword = words[i]; + char *arg = words[i+1]; - for (i = 0; i < nwords; i++) { - /* accept keyword=arg */ - vpr_info("%d w:%s\n", i, words[i]); - - keyword = words[i]; - arg = strchr(keyword, '='); - if (arg) { - *arg++ = '\0'; - } else { - i++; /* next word is arg */ - if (!(i < nwords)) { - pr_err("missing arg to keyword: %s\n", keyword); - return -EINVAL; - } - arg = words[i]; - } - vpr_info("%d key:%s arg:%s\n", i, keyword, arg); - + vpr_info("keyword:'%s' value:'%s'\n", keyword, arg); if (!strcmp(keyword, "func")) { rc = check_set(&query->function, arg, "func"); } else if (!strcmp(keyword, "file")) { From bd018a6a75cebb511bb55a0e7690024be975fe93 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 31 Aug 2020 19:37:00 +0900 Subject: [PATCH 210/648] video: fbdev: fix OOB read in vga_8planes_imageblit() syzbot is reporting OOB read at vga_8planes_imageblit() [1], for "cdat[y] >> 4" can become a negative value due to "const char *cdat". [1] https://syzkaller.appspot.com/bug?id=0d7a0da1557dcd1989e00cb3692b26d4173b4132 Reported-by: syzbot Signed-off-by: Tetsuo Handa Cc: stable Link: https://lore.kernel.org/r/90b55ec3-d5b0-3307-9f7c-7ff5c5fd6ad3@i-love.sakura.ne.jp Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/vga16fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/vga16fb.c b/drivers/video/fbdev/vga16fb.c index a20eeb8308ff..578d3541e3d6 100644 --- a/drivers/video/fbdev/vga16fb.c +++ b/drivers/video/fbdev/vga16fb.c @@ -1121,7 +1121,7 @@ static void vga_8planes_imageblit(struct fb_info *info, const struct fb_image *i char oldop = setop(0); char oldsr = setsr(0); char oldmask = selectmask(); - const char *cdat = image->data; + const unsigned char *cdat = image->data; u32 dx = image->dx; char __iomem *where; int y; From e3b9fc7eec55e6fdc8beeed18f2ed207086341e2 Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Tue, 11 Aug 2020 17:01:29 +0200 Subject: [PATCH 211/648] debugfs: Fix module state check condition The '#ifdef MODULE' check in the original commit does not work as intended. The code under the check is not built at all if CONFIG_DEBUG_FS=y. Fix this by using a correct check. Fixes: 275678e7a9be ("debugfs: Check module state before warning in {full/open}_proxy_open()") Signed-off-by: Vladis Dronov Cc: stable Link: https://lore.kernel.org/r/20200811150129.53343-1-vdronov@redhat.com Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index b167d2d02148..a768a09430c3 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -177,7 +177,7 @@ static int open_proxy_open(struct inode *inode, struct file *filp) goto out; if (!fops_get(real_fops)) { -#ifdef MODULE +#ifdef CONFIG_MODULES if (real_fops->owner && real_fops->owner->state == MODULE_STATE_GOING) goto out; @@ -312,7 +312,7 @@ static int full_proxy_open(struct inode *inode, struct file *filp) goto out; if (!fops_get(real_fops)) { -#ifdef MODULE +#ifdef CONFIG_MODULES if (real_fops->owner && real_fops->owner->state == MODULE_STATE_GOING) goto out; From 693a8e936590f93451e6f5a3d748616f5a59c80b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Fri, 28 Aug 2020 18:14:35 +0200 Subject: [PATCH 212/648] driver code: print symbolic error code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dev_err_probe() prepends the message with an error code. Let's make it more readable by translating the code to a more recognisable symbol. Fixes: a787e5400a1c ("driver core: add device probe log helper") Signed-off-by: Michał Mirosław Link: https://lore.kernel.org/r/ea3f973e4708919573026fdce52c264db147626d.1598630856.git.mirq-linux@rere.qmqm.pl Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index ac1046a382bc..1a4706310b28 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -4237,10 +4237,10 @@ int dev_err_probe(const struct device *dev, int err, const char *fmt, ...) vaf.va = &args; if (err != -EPROBE_DEFER) { - dev_err(dev, "error %d: %pV", err, &vaf); + dev_err(dev, "error %pe: %pV", ERR_PTR(err), &vaf); } else { device_set_deferred_probe_reason(dev, &vaf); - dev_dbg(dev, "error %d: %pV", err, &vaf); + dev_dbg(dev, "error %pe: %pV", ERR_PTR(err), &vaf); } va_end(args); From 51de18bff23362bfaa3c7af33bb455de274b49c9 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 27 Aug 2020 20:20:29 +0200 Subject: [PATCH 213/648] MAINTAINERS: Add the security document to SECURITY CONTACT When changing the document related to kernel security workflow, notify the security mailing list as its concerned by this. Cc: Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20200827182029.3458-1-krzk@kernel.org Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index f0068bceeb61..c5317a7ddee0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15546,6 +15546,7 @@ F: include/uapi/linux/sed* SECURITY CONTACT M: Security Officers S: Supported +F: Documentation/admin-guide/security-bugs.rst SECURITY SUBSYSTEM M: James Morris From 6b57b15abe11aa334ebf726e02c0deaf123ba040 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Tue, 1 Sep 2020 11:44:44 -0700 Subject: [PATCH 214/648] driver core: Fix device_pm_lock() locking for device links This commit fixes two issues: 1. The lockdep warning reported by Dong Aisheng [1]. It is a warning about a cycle (dpm_list_mtx --> kn->active#3 --> fw_lock) that was introduced when device-link devices were added to expose device link information in sysfs. The patch that "introduced" this cycle can't be reverted because it's fixes a real SRCU issue and also ensures that the device-link device is deleted as soon as the device-link is deleted. This is important to avoid sysfs name collisions if the device-link is create again immediately (this can happen a lot with deferred probing). 2. Inconsistency in grabbing device_pm_lock() during device link deletion Some device link deletion code paths grab device_pm_lock(), while others don't. The device_pm_lock() is grabbed during device_link_add() because it checks if the supplier is in the dpm_list and also reorders the dpm_list. However, when a device link is deleted, it does not do either of those and therefore device_pm_lock() is not necessary. Dropping the device_pm_lock() in all the device link deletion paths removes the inconsistency in locking. Thanks to Stephen Boyd for helping me understand the lockdep splat. Fixes: 843e600b8a2b ("driver core: Fix sleeping in invalid context during device link deletion") [1] - https://lore.kernel.org/lkml/CAA+hA=S4eAreb7vo69LAXSk2t5=DEKNxHaiY1wSpk4xTp9urLg@mail.gmail.com/ Reported-by: Dong Aisheng Signed-off-by: Saravana Kannan Tested-by: Peng Fan Link: https://lore.kernel.org/r/20200901184445.1736658-1-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 1a4706310b28..97812a5f42b8 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -807,9 +807,7 @@ static void device_link_put_kref(struct device_link *link) void device_link_del(struct device_link *link) { device_links_write_lock(); - device_pm_lock(); device_link_put_kref(link); - device_pm_unlock(); device_links_write_unlock(); } EXPORT_SYMBOL_GPL(device_link_del); @@ -830,7 +828,6 @@ void device_link_remove(void *consumer, struct device *supplier) return; device_links_write_lock(); - device_pm_lock(); list_for_each_entry(link, &supplier->links.consumers, s_node) { if (link->consumer == consumer) { @@ -839,7 +836,6 @@ void device_link_remove(void *consumer, struct device *supplier) } } - device_pm_unlock(); device_links_write_unlock(); } EXPORT_SYMBOL_GPL(device_link_remove); From 34221545d2069dc947131f42392fd4cebabe1b39 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Thu, 3 Sep 2020 20:03:10 -0600 Subject: [PATCH 215/648] drm/msm: Split the a5xx preemption record The main a5xx preemption record can be marked as privileged to protect it from user access but the counters storage needs to be remain unprivileged. Split the buffers and mark the critical memory as privileged. Cc: stable@vger.kernel.org Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a5xx_gpu.h | 1 + drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 25 ++++++++++++++++++----- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h index 54868d4e3958..1e5b1a15a70f 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h @@ -31,6 +31,7 @@ struct a5xx_gpu { struct msm_ringbuffer *next_ring; struct drm_gem_object *preempt_bo[MSM_GPU_MAX_RINGS]; + struct drm_gem_object *preempt_counters_bo[MSM_GPU_MAX_RINGS]; struct a5xx_preempt_record *preempt[MSM_GPU_MAX_RINGS]; uint64_t preempt_iova[MSM_GPU_MAX_RINGS]; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c index 9cf9353a7ff1..9f3fe177b00e 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c @@ -226,19 +226,31 @@ static int preempt_init_ring(struct a5xx_gpu *a5xx_gpu, struct adreno_gpu *adreno_gpu = &a5xx_gpu->base; struct msm_gpu *gpu = &adreno_gpu->base; struct a5xx_preempt_record *ptr; - struct drm_gem_object *bo = NULL; - u64 iova = 0; + void *counters; + struct drm_gem_object *bo = NULL, *counters_bo = NULL; + u64 iova = 0, counters_iova = 0; ptr = msm_gem_kernel_new(gpu->dev, A5XX_PREEMPT_RECORD_SIZE + A5XX_PREEMPT_COUNTER_SIZE, - MSM_BO_UNCACHED, gpu->aspace, &bo, &iova); + MSM_BO_UNCACHED | MSM_BO_MAP_PRIV, gpu->aspace, &bo, &iova); if (IS_ERR(ptr)) return PTR_ERR(ptr); + /* The buffer to store counters needs to be unprivileged */ + counters = msm_gem_kernel_new(gpu->dev, + A5XX_PREEMPT_COUNTER_SIZE, + MSM_BO_UNCACHED, gpu->aspace, &counters_bo, &counters_iova); + if (IS_ERR(counters)) { + msm_gem_kernel_put(bo, gpu->aspace, true); + return PTR_ERR(counters); + } + msm_gem_object_set_name(bo, "preempt"); + msm_gem_object_set_name(counters_bo, "preempt_counters"); a5xx_gpu->preempt_bo[ring->id] = bo; + a5xx_gpu->preempt_counters_bo[ring->id] = counters_bo; a5xx_gpu->preempt_iova[ring->id] = iova; a5xx_gpu->preempt[ring->id] = ptr; @@ -249,7 +261,7 @@ static int preempt_init_ring(struct a5xx_gpu *a5xx_gpu, ptr->data = 0; ptr->cntl = MSM_GPU_RB_CNTL_DEFAULT; ptr->rptr_addr = rbmemptr(ring, rptr); - ptr->counter = iova + A5XX_PREEMPT_RECORD_SIZE; + ptr->counter = counters_iova; return 0; } @@ -260,8 +272,11 @@ void a5xx_preempt_fini(struct msm_gpu *gpu) struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); int i; - for (i = 0; i < gpu->nr_rings; i++) + for (i = 0; i < gpu->nr_rings; i++) { msm_gem_kernel_put(a5xx_gpu->preempt_bo[i], gpu->aspace, true); + msm_gem_kernel_put(a5xx_gpu->preempt_counters_bo[i], + gpu->aspace, true); + } } void a5xx_preempt_init(struct msm_gpu *gpu) From 604234f33658cdd72f686be405a99646b397d0b3 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Thu, 3 Sep 2020 20:03:11 -0600 Subject: [PATCH 216/648] drm/msm: Enable expanded apriv support for a650 a650 supports expanded apriv support that allows us to map critical buffers (ringbuffer and memstore) as as privileged to protect them from corruption. Cc: stable@vger.kernel.org Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 6 +++++- drivers/gpu/drm/msm/msm_gpu.c | 2 +- drivers/gpu/drm/msm/msm_gpu.h | 11 +++++++++++ drivers/gpu/drm/msm/msm_ringbuffer.c | 4 ++-- 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index c5a3e4d4c007..406efaac95a7 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -678,7 +678,8 @@ static int a6xx_hw_init(struct msm_gpu *gpu) A6XX_PROTECT_RDONLY(0x980, 0x4)); gpu_write(gpu, REG_A6XX_CP_PROTECT(25), A6XX_PROTECT_RW(0xa630, 0x0)); - if (adreno_is_a650(adreno_gpu)) { + /* Enable expanded apriv for targets that support it */ + if (gpu->hw_apriv) { gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL, (1 << 6) | (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1)); } @@ -1056,6 +1057,9 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) adreno_gpu->registers = NULL; adreno_gpu->reg_offsets = a6xx_register_offsets; + if (adreno_is_a650(adreno_gpu)) + adreno_gpu->base.hw_apriv = true; + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); if (ret) { a6xx_destroy(&(a6xx_gpu->base.base)); diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index d5645472b25d..57ddc9438351 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -908,7 +908,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, memptrs = msm_gem_kernel_new(drm, sizeof(struct msm_rbmemptrs) * nr_rings, - MSM_BO_UNCACHED, gpu->aspace, &gpu->memptrs_bo, + check_apriv(gpu, MSM_BO_UNCACHED), gpu->aspace, &gpu->memptrs_bo, &memptrs_iova); if (IS_ERR(memptrs)) { diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 0db117a7339b..37cffac4cbe3 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -15,6 +15,7 @@ #include "msm_drv.h" #include "msm_fence.h" #include "msm_ringbuffer.h" +#include "msm_gem.h" struct msm_gem_submit; struct msm_gpu_perfcntr; @@ -139,6 +140,8 @@ struct msm_gpu { } devfreq; struct msm_gpu_state *crashstate; + /* True if the hardware supports expanded apriv (a650 and newer) */ + bool hw_apriv; }; /* It turns out that all targets use the same ringbuffer size */ @@ -327,4 +330,12 @@ static inline void msm_gpu_crashstate_put(struct msm_gpu *gpu) mutex_unlock(&gpu->dev->struct_mutex); } +/* + * Simple macro to semi-cleanly add the MAP_PRIV flag for targets that can + * support expanded privileges + */ +#define check_apriv(gpu, flags) \ + (((gpu)->hw_apriv ? MSM_BO_MAP_PRIV : 0) | (flags)) + + #endif /* __MSM_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 39ecb5a18431..935bf9b1d941 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -27,8 +27,8 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, ring->id = id; ring->start = msm_gem_kernel_new(gpu->dev, MSM_GPU_RINGBUFFER_SZ, - MSM_BO_WC | MSM_BO_GPU_READONLY, gpu->aspace, &ring->bo, - &ring->iova); + check_apriv(gpu, MSM_BO_WC | MSM_BO_GPU_READONLY), + gpu->aspace, &ring->bo, &ring->iova); if (IS_ERR(ring->start)) { ret = PTR_ERR(ring->start); From 7b3f3948c8b7053d771acc9f79810cc410f5e2e0 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Thu, 3 Sep 2020 20:03:12 -0600 Subject: [PATCH 217/648] drm/msm: Disable preemption on all 5xx targets Temporarily disable preemption on a5xx targets pending some improvements to protect the RPTR shadow from being corrupted. Cc: stable@vger.kernel.org Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 9e63a190642c..e718f964d590 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -1511,7 +1511,8 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) check_speed_bin(&pdev->dev); - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4); + /* Restricting nr_rings to 1 to temporarily disable preemption */ + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); if (ret) { a5xx_destroy(&(a5xx_gpu->base.base)); return ERR_PTR(ret); From f6828e0c4045f03f9cf2df6c2a768102641183f4 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Thu, 3 Sep 2020 20:03:13 -0600 Subject: [PATCH 218/648] drm/msm: Disable the RPTR shadow Disable the RPTR shadow across all targets. It will be selectively re-enabled later for targets that need it. Cc: stable@vger.kernel.org Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a2xx_gpu.c | 5 +++++ drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 10 +++++++++ drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 10 +++++++++ drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 11 ++++++++-- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 7 +++++++ drivers/gpu/drm/msm/adreno/adreno_gpu.c | 27 ++----------------------- 6 files changed, 43 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c index 6021f8d9efd1..48fa49f69d6d 100644 --- a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c @@ -164,6 +164,11 @@ static int a2xx_hw_init(struct msm_gpu *gpu) if (ret) return ret; + gpu_write(gpu, REG_AXXX_CP_RB_CNTL, + MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); + + gpu_write(gpu, REG_AXXX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova)); + /* NOTE: PM4/micro-engine firmware registers look to be the same * for a2xx and a3xx.. we could possibly push that part down to * adreno_gpu base class. Or push both PM4 and PFP but diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 0a5ea9f56cb8..f6471145a7a6 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -211,6 +211,16 @@ static int a3xx_hw_init(struct msm_gpu *gpu) if (ret) return ret; + /* + * Use the default ringbuffer size and block size but disable the RPTR + * shadow + */ + gpu_write(gpu, REG_AXXX_CP_RB_CNTL, + MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); + + /* Set the ringbuffer address */ + gpu_write(gpu, REG_AXXX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova)); + /* setup access protection: */ gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007); diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index b9b26b2bf9c5..954753600625 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -267,6 +267,16 @@ static int a4xx_hw_init(struct msm_gpu *gpu) if (ret) return ret; + /* + * Use the default ringbuffer size and block size but disable the RPTR + * shadow + */ + gpu_write(gpu, REG_A4XX_CP_RB_CNTL, + MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); + + /* Set the ringbuffer address */ + gpu_write(gpu, REG_A4XX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova)); + /* Load PM4: */ ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data); len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index e718f964d590..ce3c0b5c167b 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -703,8 +703,6 @@ static int a5xx_hw_init(struct msm_gpu *gpu) if (ret) return ret; - a5xx_preempt_hw_init(gpu); - if (!adreno_is_a510(adreno_gpu)) a5xx_gpmu_ucode_init(gpu); @@ -712,6 +710,15 @@ static int a5xx_hw_init(struct msm_gpu *gpu) if (ret) return ret; + /* Set the ringbuffer address */ + gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI, + gpu->rb[0]->iova); + + gpu_write(gpu, REG_A5XX_CP_RB_CNTL, + MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); + + a5xx_preempt_hw_init(gpu); + /* Disable the interrupts through the initial bringup stage */ gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 406efaac95a7..74bc27eb4203 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -695,6 +695,13 @@ static int a6xx_hw_init(struct msm_gpu *gpu) if (ret) goto out; + /* Set the ringbuffer address */ + gpu_write64(gpu, REG_A6XX_CP_RB_BASE, REG_A6XX_CP_RB_BASE_HI, + gpu->rb[0]->iova); + + gpu_write(gpu, REG_A6XX_CP_RB_CNTL, + MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE); + /* Always come up on rb 0 */ a6xx_gpu->cur_ring = gpu->rb[0]; diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index d2dbb6968cba..459f10a3710b 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -400,26 +400,6 @@ int adreno_hw_init(struct msm_gpu *gpu) ring->memptrs->rptr = 0; } - /* - * Setup REG_CP_RB_CNTL. The same value is used across targets (with - * the excpetion of A430 that disables the RPTR shadow) - the cacluation - * for the ringbuffer size and block size is moved to msm_gpu.h for the - * pre-processor to deal with and the A430 variant is ORed in here - */ - adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL, - MSM_GPU_RB_CNTL_DEFAULT | - (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0)); - - /* Setup ringbuffer address - use ringbuffer[0] for GPU init */ - adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_BASE, - REG_ADRENO_CP_RB_BASE_HI, gpu->rb[0]->iova); - - if (!adreno_is_a430(adreno_gpu)) { - adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR, - REG_ADRENO_CP_RB_RPTR_ADDR_HI, - rbmemptr(gpu->rb[0], rptr)); - } - return 0; } @@ -427,11 +407,8 @@ int adreno_hw_init(struct msm_gpu *gpu) static uint32_t get_rptr(struct adreno_gpu *adreno_gpu, struct msm_ringbuffer *ring) { - if (adreno_is_a430(adreno_gpu)) - return ring->memptrs->rptr = adreno_gpu_read( - adreno_gpu, REG_ADRENO_CP_RB_RPTR); - else - return ring->memptrs->rptr; + return ring->memptrs->rptr = adreno_gpu_read( + adreno_gpu, REG_ADRENO_CP_RB_RPTR); } struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu) From 44a049c42681de71c783d75cd6e56b4e339488b0 Mon Sep 17 00:00:00 2001 From: Xie He Date: Wed, 2 Sep 2020 17:06:58 -0700 Subject: [PATCH 219/648] drivers/net/wan/hdlc_fr: Add needed_headroom for PVC devices PVC devices are virtual devices in this driver stacked on top of the actual HDLC device. They are the devices normal users would use. PVC devices have two types: normal PVC devices and Ethernet-emulating PVC devices. When transmitting data with PVC devices, the ndo_start_xmit function will prepend a header of 4 or 10 bytes. Currently this driver requests this headroom to be reserved for normal PVC devices by setting their hard_header_len to 10. However, this does not work when these devices are used with AF_PACKET/RAW sockets. Also, this driver does not request this headroom for Ethernet-emulating PVC devices (but deals with this problem by reallocating the skb when needed, which is not optimal). This patch replaces hard_header_len with needed_headroom, and set needed_headroom for Ethernet-emulating PVC devices, too. This makes the driver to request headroom for all PVC devices in all cases. Cc: Krzysztof Halasa Cc: Martin Schiller Signed-off-by: Xie He Signed-off-by: Jakub Kicinski --- drivers/net/wan/hdlc_fr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index 9acad651ea1f..12b35404cd8e 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -1041,7 +1041,7 @@ static void pvc_setup(struct net_device *dev) { dev->type = ARPHRD_DLCI; dev->flags = IFF_POINTOPOINT; - dev->hard_header_len = 10; + dev->hard_header_len = 0; dev->addr_len = 2; netif_keep_dst(dev); } @@ -1093,6 +1093,7 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type) dev->mtu = HDLC_MAX_MTU; dev->min_mtu = 68; dev->max_mtu = HDLC_MAX_MTU; + dev->needed_headroom = 10; dev->priv_flags |= IFF_NO_QUEUE; dev->ml_priv = pvc; From c2b947879ca320ac5505c6c29a731ff17da5e805 Mon Sep 17 00:00:00 2001 From: Jing Xiangfeng Date: Fri, 4 Sep 2020 10:51:03 +0800 Subject: [PATCH 220/648] atm: eni: fix the missed pci_disable_device() for eni_init_one() eni_init_one() misses to call pci_disable_device() in an error path. Jump to err_disable to fix it. Fixes: ede58ef28e10 ("atm: remove deprecated use of pci api") Signed-off-by: Jing Xiangfeng Signed-off-by: Jakub Kicinski --- drivers/atm/eni.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index 39be444534d0..316a9947541f 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -2224,7 +2224,7 @@ static int eni_init_one(struct pci_dev *pci_dev, rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32)); if (rc < 0) - goto out; + goto err_disable; rc = -ENOMEM; eni_dev = kmalloc(sizeof(struct eni_dev), GFP_KERNEL); From cc8e58f8325cdf14b9516b61c384cdfd02a4f408 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Sep 2020 19:10:11 -0700 Subject: [PATCH 221/648] act_ife: load meta modules before tcf_idr_check_alloc() The following deadlock scenario is triggered by syzbot: Thread A: Thread B: tcf_idr_check_alloc() ... populate_metalist() rtnl_unlock() rtnl_lock() ... request_module() tcf_idr_check_alloc() rtnl_lock() At this point, thread A is waiting for thread B to release RTNL lock, while thread B is waiting for thread A to commit the IDR change with tcf_idr_insert() later. Break this deadlock situation by preloading ife modules earlier, before tcf_idr_check_alloc(), this is fine because we only need to load modules we need potentially. Reported-and-tested-by: syzbot+80e32b5d1f9923f8ace6@syzkaller.appspotmail.com Fixes: 0190c1d452a9 ("net: sched: atomically check-allocate action") Cc: Jamal Hadi Salim Cc: Vlad Buslov Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: Jakub Kicinski --- net/sched/act_ife.c | 44 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index c1fcd85719d6..5c568757643b 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -436,6 +436,25 @@ static void tcf_ife_cleanup(struct tc_action *a) kfree_rcu(p, rcu); } +static int load_metalist(struct nlattr **tb, bool rtnl_held) +{ + int i; + + for (i = 1; i < max_metacnt; i++) { + if (tb[i]) { + void *val = nla_data(tb[i]); + int len = nla_len(tb[i]); + int rc; + + rc = load_metaops_and_vet(i, val, len, rtnl_held); + if (rc != 0) + return rc; + } + } + + return 0; +} + static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, bool exists, bool rtnl_held) { @@ -449,10 +468,6 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, val = nla_data(tb[i]); len = nla_len(tb[i]); - rc = load_metaops_and_vet(i, val, len, rtnl_held); - if (rc != 0) - return rc; - rc = add_metainfo(ife, i, val, len, exists); if (rc) return rc; @@ -509,6 +524,21 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, if (!p) return -ENOMEM; + if (tb[TCA_IFE_METALST]) { + err = nla_parse_nested_deprecated(tb2, IFE_META_MAX, + tb[TCA_IFE_METALST], NULL, + NULL); + if (err) { + kfree(p); + return err; + } + err = load_metalist(tb2, rtnl_held); + if (err) { + kfree(p); + return err; + } + } + index = parm->index; err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) { @@ -570,15 +600,9 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, } if (tb[TCA_IFE_METALST]) { - err = nla_parse_nested_deprecated(tb2, IFE_META_MAX, - tb[TCA_IFE_METALST], NULL, - NULL); - if (err) - goto metadata_parse_err; err = populate_metalist(ife, tb2, exists, rtnl_held); if (err) goto metadata_parse_err; - } else { /* if no passed metadata allow list or passed allow-all * then here we process by adding as many supported metadatum From 7dda5b3384121181c4e79f6eaeac2b94c0622c8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Fri, 4 Sep 2020 20:28:00 +0200 Subject: [PATCH 222/648] batman-adv: mcast/TT: fix wrongly dropped or rerouted packets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The unicast packet rerouting code makes several assumptions. For instance it assumes that there is always exactly one destination in the TT. This breaks for multicast frames in a unicast packets in several ways: For one thing if there is actually no TT entry and the destination node was selected due to the multicast tvlv flags it announced. Then an intermediate node will wrongly drop the packet. For another thing if there is a TT entry but the TTVN of this entry is newer than the originally addressed destination node: Then the intermediate node will wrongly redirect the packet, leading to duplicated multicast packets at a multicast listener and missing packets at other multicast listeners or multicast routers. Fixing this by not applying the unicast packet rerouting to batman-adv unicast packets with a multicast payload. We are not able to detect a roaming multicast listener at the moment and will just continue to send the multicast frame to both the new and old destination for a while in case of such a roaming multicast listener. Fixes: a73105b8d4c7 ("batman-adv: improved client announcement mechanism") Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/routing.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 27cdf5e4349a..9e5c71e406ff 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -826,6 +826,10 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, vid = batadv_get_vid(skb, hdr_len); ethhdr = (struct ethhdr *)(skb->data + hdr_len); + /* do not reroute multicast frames in a unicast header */ + if (is_multicast_ether_addr(ethhdr->h_dest)) + return true; + /* check if the destination client was served by this node and it is now * roaming. In this case, it means that the node has got a ROAM_ADV * message and that it knows the new destination in the mesh to re-route From 66138621f2473e29625dfa6bb229872203b71b90 Mon Sep 17 00:00:00 2001 From: Robin Gong Date: Tue, 1 Sep 2020 18:21:49 +0800 Subject: [PATCH 223/648] arm64: dts: imx8mp: correct sdma1 clk setting Correct sdma1 ahb clk, otherwise wrong 1:1 clk ratio will be chosed so that sdma1 function broken. sdma1 should use 1:2 clk, while sdma2/3 use 1:1. Fixes: 6d9b8d20431f ("arm64: dts: freescale: Add i.MX8MP dtsi support") Cc: Signed-off-by: Robin Gong Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index 9de2aa1c573c..a5154f13a18e 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -702,7 +702,7 @@ sdma1: dma-controller@30bd0000 { reg = <0x30bd0000 0x10000>; interrupts = ; clocks = <&clk IMX8MP_CLK_SDMA1_ROOT>, - <&clk IMX8MP_CLK_SDMA1_ROOT>; + <&clk IMX8MP_CLK_AHB>; clock-names = "ipg", "ahb"; #dma-cells = <3>; fsl,sdma-ram-script-name = "imx/sdma/sdma-imx7d.bin"; From 3ee99f6a2379eca87ab11122b7e9abd68f3441e2 Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Thu, 3 Sep 2020 18:05:21 +0800 Subject: [PATCH 224/648] ARM: dts: imx6sx: fix the pad QSPI1B_SCLK mux mode for uart3 The pad QSPI1B_SCLK mux mode 0x1 is for function UART3_DTE_TX, correct the mux mode. Fixes: 743636f25f1d ("ARM: dts: imx: add pin function header for imx6sx") Signed-off-by: Fugang Duan Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6sx-pinfunc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6sx-pinfunc.h b/arch/arm/boot/dts/imx6sx-pinfunc.h index 0b02c7e60c17..f4dc46207954 100644 --- a/arch/arm/boot/dts/imx6sx-pinfunc.h +++ b/arch/arm/boot/dts/imx6sx-pinfunc.h @@ -1026,7 +1026,7 @@ #define MX6SX_PAD_QSPI1B_DQS__SIM_M_HADDR_15 0x01B0 0x04F8 0x0000 0x7 0x0 #define MX6SX_PAD_QSPI1B_SCLK__QSPI1_B_SCLK 0x01B4 0x04FC 0x0000 0x0 0x0 #define MX6SX_PAD_QSPI1B_SCLK__UART3_DCE_RX 0x01B4 0x04FC 0x0840 0x1 0x4 -#define MX6SX_PAD_QSPI1B_SCLK__UART3_DTE_TX 0x01B4 0x04FC 0x0000 0x0 0x0 +#define MX6SX_PAD_QSPI1B_SCLK__UART3_DTE_TX 0x01B4 0x04FC 0x0000 0x1 0x0 #define MX6SX_PAD_QSPI1B_SCLK__ECSPI3_SCLK 0x01B4 0x04FC 0x0730 0x2 0x1 #define MX6SX_PAD_QSPI1B_SCLK__ESAI_RX_HF_CLK 0x01B4 0x04FC 0x0780 0x3 0x2 #define MX6SX_PAD_QSPI1B_SCLK__CSI1_DATA_16 0x01B4 0x04FC 0x06DC 0x4 0x1 From 644c9f40cf71969f29add32f32349e71d4995c0b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 4 Sep 2020 17:39:12 -0400 Subject: [PATCH 225/648] NFS: Zero-stateid SETATTR should first return delegation If a write delegation isn't available, the Linux NFS client uses a zero-stateid when performing a SETATTR. NFSv4.0 provides no mechanism for an NFS server to match such a request to a particular client. It recalls all delegations for that file, even delegations held by the client issuing the request. If that client happens to hold a read delegation, the server will recall it immediately, resulting in an NFS4ERR_DELAY/CB_RECALL/ DELEGRETURN sequence. Optimize out this pipeline bubble by having the client return any delegations it may hold on a file before it issues a SETATTR(zero-stateid) on that file. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4a6cfb497103..3e824110b470 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3293,8 +3293,10 @@ static int _nfs4_do_setattr(struct inode *inode, /* Servers should only apply open mode checks for file size changes */ truncate = (arg->iap->ia_valid & ATTR_SIZE) ? true : false; - if (!truncate) + if (!truncate) { + nfs4_inode_make_writeable(inode); goto zero_stateid; + } if (nfs4_copy_delegation_stateid(inode, FMODE_WRITE, &arg->stateid, &delegation_cred)) { /* Use that stateid */ From 8c6b6c793ed32b8f9770ebcdf1ba99af423c303b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 5 Sep 2020 10:03:26 -0400 Subject: [PATCH 226/648] SUNRPC: stop printk reading past end of string Since p points at raw xdr data, there's no guarantee that it's NULL terminated, so we should give a length. And probably escape any special characters too. Reported-by: Zhi Li Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index c27123e6ba80..4a67685c83eb 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -982,8 +982,8 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr, p = xdr_inline_decode(xdr, len); if (unlikely(p == NULL)) goto out_fail; - dprintk("RPC: %5u RPCB_%s reply: %s\n", req->rq_task->tk_pid, - req->rq_task->tk_msg.rpc_proc->p_name, (char *)p); + dprintk("RPC: %5u RPCB_%s reply: %*pE\n", req->rq_task->tk_pid, + req->rq_task->tk_msg.rpc_proc->p_name, len, (char *)p); if (rpc_uaddr2sockaddr(req->rq_xprt->xprt_net, (char *)p, len, sap, sizeof(address)) == 0) From c183edff33fdcd639d222a8f473bf44602adc655 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 4 Sep 2020 22:36:52 -0600 Subject: [PATCH 227/648] io_uring: fix explicit async read/write mapping for large segments If we exceed UIO_FASTIOV, we don't handle the transition correctly between an allocated vec for requests that are queued with IOSQE_ASYNC. Store the iovec appropriately and re-set it in the iter iov in case it changed. Fixes: ff6165b2d7f6 ("io_uring: retain iov_iter state over io_read/io_write calls") Reported-by: Nick Hill Tested-by: Norman Maurer Signed-off-by: Jens Axboe --- fs/io_uring.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 64c5a52fad12..f703182df2d4 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2980,14 +2980,15 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw, bool force_nonblock) { struct io_async_rw *iorw = &req->io->rw; + struct iovec *iov; ssize_t ret; - iorw->iter.iov = iorw->fast_iov; - ret = __io_import_iovec(rw, req, (struct iovec **) &iorw->iter.iov, - &iorw->iter, !force_nonblock); + iorw->iter.iov = iov = iorw->fast_iov; + ret = __io_import_iovec(rw, req, &iov, &iorw->iter, !force_nonblock); if (unlikely(ret < 0)) return ret; + iorw->iter.iov = iov; io_req_map_rw(req, iorw->iter.iov, iorw->fast_iov, &iorw->iter); return 0; } From d2b86100245080cfdf1e95e9e07477474c1be2bd Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 29 Jul 2020 01:19:40 +0300 Subject: [PATCH 228/648] rapidio: Replace 'select' DMAENGINES 'with depends on' Enabling a whole subsystem from a single driver 'select' is frowned upon and won't be accepted in new drivers, that need to use 'depends on' instead. Existing selection of DMAENGINES will then cause circular dependencies. Replace them with a dependency. Signed-off-by: Laurent Pinchart Acked-by: Randy Dunlap --- drivers/rapidio/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rapidio/Kconfig b/drivers/rapidio/Kconfig index e4c422d806be..b9f8514909bf 100644 --- a/drivers/rapidio/Kconfig +++ b/drivers/rapidio/Kconfig @@ -37,7 +37,7 @@ config RAPIDIO_ENABLE_RX_TX_PORTS config RAPIDIO_DMA_ENGINE bool "DMA Engine support for RapidIO" depends on RAPIDIO - select DMADEVICES + depends on DMADEVICES select DMA_ENGINE help Say Y here if you want to use DMA Engine frameork for RapidIO data From 3e8b2403545efd46c6347002e27eae4708205fd4 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 29 Jul 2020 01:23:32 +0300 Subject: [PATCH 229/648] drm: xlnx: dpsub: Fix DMADEVICES Kconfig dependency The dpsub driver uses the DMA engine API, and thus selects DMA_ENGINE to provide that API. DMA_ENGINE depends on DMADEVICES, which can be deselected by the user, creating a possibly unmet indirect dependency: WARNING: unmet direct dependencies detected for DMA_ENGINE Depends on [n]: DMADEVICES [=n] Selected by [m]: - DRM_ZYNQMP_DPSUB [=m] && HAS_IOMEM [=y] && (ARCH_ZYNQMP || COMPILE_TEST [=y]) && COMMON_CLK [=y] && DRM [=m] && OF [=y] Add a dependency on DMADEVICES to fix this. Reported-by: Randy Dunlap Signed-off-by: Laurent Pinchart Acked-by: Randy Dunlap --- drivers/gpu/drm/xlnx/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xlnx/Kconfig b/drivers/gpu/drm/xlnx/Kconfig index aa6cd889bd11..b52c6cdfc0b8 100644 --- a/drivers/gpu/drm/xlnx/Kconfig +++ b/drivers/gpu/drm/xlnx/Kconfig @@ -2,6 +2,7 @@ config DRM_ZYNQMP_DPSUB tristate "ZynqMP DisplayPort Controller Driver" depends on ARCH_ZYNQMP || COMPILE_TEST depends on COMMON_CLK && DRM && OF + depends on DMADEVICES select DMA_ENGINE select DRM_GEM_CMA_HELPER select DRM_KMS_CMA_HELPER From 1ef6ea0efe8e68d0299dad44c39dc6ad9e5d1f39 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sat, 5 Sep 2020 08:12:01 -0400 Subject: [PATCH 230/648] ext2: don't update mtime on COW faults When running in a dax mode, if the user maps a page with MAP_PRIVATE and PROT_WRITE, the ext2 filesystem would incorrectly update ctime and mtime when the user hits a COW fault. This breaks building of the Linux kernel. How to reproduce: 1. extract the Linux kernel tree on dax-mounted ext2 filesystem 2. run make clean 3. run make -j12 4. run make -j12 at step 4, make would incorrectly rebuild the whole kernel (although it was already built in step 3). The reason for the breakage is that almost all object files depend on objtool. When we run objtool, it takes COW page fault on its .data section, and these faults will incorrectly update the timestamp of the objtool binary. The updated timestamp causes make to rebuild the whole tree. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- fs/ext2/file.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 60378ddf1424..96044f5dbc0e 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -93,8 +93,10 @@ static vm_fault_t ext2_dax_fault(struct vm_fault *vmf) struct inode *inode = file_inode(vmf->vma->vm_file); struct ext2_inode_info *ei = EXT2_I(inode); vm_fault_t ret; + bool write = (vmf->flags & FAULT_FLAG_WRITE) && + (vmf->vma->vm_flags & VM_SHARED); - if (vmf->flags & FAULT_FLAG_WRITE) { + if (write) { sb_start_pagefault(inode->i_sb); file_update_time(vmf->vma->vm_file); } @@ -103,7 +105,7 @@ static vm_fault_t ext2_dax_fault(struct vm_fault *vmf) ret = dax_iomap_fault(vmf, PE_SIZE_PTE, NULL, NULL, &ext2_iomap_ops); up_read(&ei->dax_sem); - if (vmf->flags & FAULT_FLAG_WRITE) + if (write) sb_end_pagefault(inode->i_sb); return ret; } From b17164e258e3888d376a7434415013175d637377 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sat, 5 Sep 2020 08:13:02 -0400 Subject: [PATCH 231/648] xfs: don't update mtime on COW faults When running in a dax mode, if the user maps a page with MAP_PRIVATE and PROT_WRITE, the xfs filesystem would incorrectly update ctime and mtime when the user hits a COW fault. This breaks building of the Linux kernel. How to reproduce: 1. extract the Linux kernel tree on dax-mounted xfs filesystem 2. run make clean 3. run make -j12 4. run make -j12 at step 4, make would incorrectly rebuild the whole kernel (although it was already built in step 3). The reason for the breakage is that almost all object files depend on objtool. When we run objtool, it takes COW page fault on its .data section, and these faults will incorrectly update the timestamp of the objtool binary. The updated timestamp causes make to rebuild the whole tree. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- fs/xfs/xfs_file.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index c31cd3be9fb2..a29f78a663ca 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1223,6 +1223,14 @@ __xfs_filemap_fault( return ret; } +static inline bool +xfs_is_write_fault( + struct vm_fault *vmf) +{ + return (vmf->flags & FAULT_FLAG_WRITE) && + (vmf->vma->vm_flags & VM_SHARED); +} + static vm_fault_t xfs_filemap_fault( struct vm_fault *vmf) @@ -1230,7 +1238,7 @@ xfs_filemap_fault( /* DAX can shortcut the normal fault path on write faults! */ return __xfs_filemap_fault(vmf, PE_SIZE_PTE, IS_DAX(file_inode(vmf->vma->vm_file)) && - (vmf->flags & FAULT_FLAG_WRITE)); + xfs_is_write_fault(vmf)); } static vm_fault_t @@ -1243,7 +1251,7 @@ xfs_filemap_huge_fault( /* DAX can shortcut the normal fault path on write faults! */ return __xfs_filemap_fault(vmf, pe_size, - (vmf->flags & FAULT_FLAG_WRITE)); + xfs_is_write_fault(vmf)); } static vm_fault_t From f1796544a0ca0f14386a679d3d05fbc69235015e Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 4 Sep 2020 16:35:24 -0700 Subject: [PATCH 232/648] memcg: fix use-after-free in uncharge_batch syzbot has reported an use-after-free in the uncharge_batch path BUG: KASAN: use-after-free in instrument_atomic_write include/linux/instrumented.h:71 [inline] BUG: KASAN: use-after-free in atomic64_sub_return include/asm-generic/atomic-instrumented.h:970 [inline] BUG: KASAN: use-after-free in atomic_long_sub_return include/asm-generic/atomic-long.h:113 [inline] BUG: KASAN: use-after-free in page_counter_cancel mm/page_counter.c:54 [inline] BUG: KASAN: use-after-free in page_counter_uncharge+0x3d/0xc0 mm/page_counter.c:155 Write of size 8 at addr ffff8880371c0148 by task syz-executor.0/9304 CPU: 0 PID: 9304 Comm: syz-executor.0 Not tainted 5.8.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1f0/0x31e lib/dump_stack.c:118 print_address_description+0x66/0x620 mm/kasan/report.c:383 __kasan_report mm/kasan/report.c:513 [inline] kasan_report+0x132/0x1d0 mm/kasan/report.c:530 check_memory_region_inline mm/kasan/generic.c:183 [inline] check_memory_region+0x2b5/0x2f0 mm/kasan/generic.c:192 instrument_atomic_write include/linux/instrumented.h:71 [inline] atomic64_sub_return include/asm-generic/atomic-instrumented.h:970 [inline] atomic_long_sub_return include/asm-generic/atomic-long.h:113 [inline] page_counter_cancel mm/page_counter.c:54 [inline] page_counter_uncharge+0x3d/0xc0 mm/page_counter.c:155 uncharge_batch+0x6c/0x350 mm/memcontrol.c:6764 uncharge_page+0x115/0x430 mm/memcontrol.c:6796 uncharge_list mm/memcontrol.c:6835 [inline] mem_cgroup_uncharge_list+0x70/0xe0 mm/memcontrol.c:6877 release_pages+0x13a2/0x1550 mm/swap.c:911 tlb_batch_pages_flush mm/mmu_gather.c:49 [inline] tlb_flush_mmu_free mm/mmu_gather.c:242 [inline] tlb_flush_mmu+0x780/0x910 mm/mmu_gather.c:249 tlb_finish_mmu+0xcb/0x200 mm/mmu_gather.c:328 exit_mmap+0x296/0x550 mm/mmap.c:3185 __mmput+0x113/0x370 kernel/fork.c:1076 exit_mm+0x4cd/0x550 kernel/exit.c:483 do_exit+0x576/0x1f20 kernel/exit.c:793 do_group_exit+0x161/0x2d0 kernel/exit.c:903 get_signal+0x139b/0x1d30 kernel/signal.c:2743 arch_do_signal+0x33/0x610 arch/x86/kernel/signal.c:811 exit_to_user_mode_loop kernel/entry/common.c:135 [inline] exit_to_user_mode_prepare+0x8d/0x1b0 kernel/entry/common.c:166 syscall_exit_to_user_mode+0x5e/0x1a0 kernel/entry/common.c:241 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Commit 1a3e1f40962c ("mm: memcontrol: decouple reference counting from page accounting") reworked the memcg lifetime to be bound the the struct page rather than charges. It also removed the css_put_many from uncharge_batch and that is causing the above splat. uncharge_batch() is supposed to uncharge accumulated charges for all pages freed from the same memcg. The queuing is done by uncharge_page which however drops the memcg reference after it adds charges to the batch. If the current page happens to be the last one holding the reference for its memcg then the memcg is OK to go and the next page to be freed will trigger batched uncharge which needs to access the memcg which is gone already. Fix the issue by taking a reference for the memcg in the current batch. Fixes: 1a3e1f40962c ("mm: memcontrol: decouple reference counting from page accounting") Reported-by: syzbot+b305848212deec86eabe@syzkaller.appspotmail.com Reported-by: syzbot+b5ea6fb6f139c8b9482b@syzkaller.appspotmail.com Signed-off-by: Michal Hocko Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Roman Gushchin Cc: Hugh Dickins Link: https://lkml.kernel.org/r/20200820090341.GC5033@dhcp22.suse.cz Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b807952b4d43..cfa6cbad21d5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6774,6 +6774,9 @@ static void uncharge_batch(const struct uncharge_gather *ug) __this_cpu_add(ug->memcg->vmstats_percpu->nr_page_events, ug->nr_pages); memcg_check_events(ug->memcg, ug->dummy_page); local_irq_restore(flags); + + /* drop reference from uncharge_page */ + css_put(&ug->memcg->css); } static void uncharge_page(struct page *page, struct uncharge_gather *ug) @@ -6797,6 +6800,9 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug) uncharge_gather_clear(ug); } ug->memcg = page->mem_cgroup; + + /* pairs with css_put in uncharge_batch */ + css_get(&ug->memcg->css); } nr_pages = compound_nr(page); From e3336cab2579012b1e72b5265adf98e2d6e244ad Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Fri, 4 Sep 2020 16:35:27 -0700 Subject: [PATCH 233/648] mm: memcg: fix memcg reclaim soft lockup We've met softlockup with "CONFIG_PREEMPT_NONE=y", when the target memcg doesn't have any reclaimable memory. It can be easily reproduced as below: watchdog: BUG: soft lockup - CPU#0 stuck for 111s![memcg_test:2204] CPU: 0 PID: 2204 Comm: memcg_test Not tainted 5.9.0-rc2+ #12 Call Trace: shrink_lruvec+0x49f/0x640 shrink_node+0x2a6/0x6f0 do_try_to_free_pages+0xe9/0x3e0 try_to_free_mem_cgroup_pages+0xef/0x1f0 try_charge+0x2c1/0x750 mem_cgroup_charge+0xd7/0x240 __add_to_page_cache_locked+0x2fd/0x370 add_to_page_cache_lru+0x4a/0xc0 pagecache_get_page+0x10b/0x2f0 filemap_fault+0x661/0xad0 ext4_filemap_fault+0x2c/0x40 __do_fault+0x4d/0xf9 handle_mm_fault+0x1080/0x1790 It only happens on our 1-vcpu instances, because there's no chance for oom reaper to run to reclaim the to-be-killed process. Add a cond_resched() at the upper shrink_node_memcgs() to solve this issue, this will mean that we will get a scheduling point for each memcg in the reclaimed hierarchy without any dependency on the reclaimable memory in that memcg thus making it more predictable. Suggested-by: Michal Hocko Signed-off-by: Xunlei Pang Signed-off-by: Andrew Morton Acked-by: Chris Down Acked-by: Michal Hocko Acked-by: Johannes Weiner Link: http://lkml.kernel.org/r/1598495549-67324-1-git-send-email-xlpang@linux.alibaba.com Signed-off-by: Linus Torvalds --- mm/vmscan.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/vmscan.c b/mm/vmscan.c index 99e1796eb833..9727dd8e2581 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2615,6 +2615,14 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc) unsigned long reclaimed; unsigned long scanned; + /* + * This loop can become CPU-bound when target memcgs + * aren't eligible for reclaim - either because they + * don't have any reclaimable pages, or because their + * memory is explicitly protected. Avoid soft lockups. + */ + cond_resched(); + mem_cgroup_calculate_protection(target_memcg, memcg); if (mem_cgroup_below_min(memcg)) { From dc07a728d49cf025f5da2c31add438d839d076c0 Mon Sep 17 00:00:00 2001 From: Eugeniu Rosca Date: Fri, 4 Sep 2020 16:35:30 -0700 Subject: [PATCH 234/648] mm: slub: fix conversion of freelist_corrupted() Commit 52f23478081ae0 ("mm/slub.c: fix corrupted freechain in deactivate_slab()") suffered an update when picked up from LKML [1]. Specifically, relocating 'freelist = NULL' into 'freelist_corrupted()' created a no-op statement. Fix it by sticking to the behavior intended in the original patch [1]. In addition, make freelist_corrupted() immune to passing NULL instead of &freelist. The issue has been spotted via static analysis and code review. [1] https://lore.kernel.org/linux-mm/20200331031450.12182-1-dongli.zhang@oracle.com/ Fixes: 52f23478081ae0 ("mm/slub.c: fix corrupted freechain in deactivate_slab()") Signed-off-by: Eugeniu Rosca Signed-off-by: Andrew Morton Cc: Dongli Zhang Cc: Joe Jin Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Link: https://lkml.kernel.org/r/20200824130643.10291-1-erosca@de.adit-jv.com Signed-off-by: Linus Torvalds --- mm/slub.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 68c02b2eecd9..d4177aecedf6 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -672,12 +672,12 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...) } static bool freelist_corrupted(struct kmem_cache *s, struct page *page, - void *freelist, void *nextfree) + void **freelist, void *nextfree) { if ((s->flags & SLAB_CONSISTENCY_CHECKS) && - !check_valid_pointer(s, page, nextfree)) { - object_err(s, page, freelist, "Freechain corrupt"); - freelist = NULL; + !check_valid_pointer(s, page, nextfree) && freelist) { + object_err(s, page, *freelist, "Freechain corrupt"); + *freelist = NULL; slab_fix(s, "Isolate corrupted freechain"); return true; } @@ -1494,7 +1494,7 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) {} static bool freelist_corrupted(struct kmem_cache *s, struct page *page, - void *freelist, void *nextfree) + void **freelist, void *nextfree) { return false; } @@ -2184,7 +2184,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, * 'freelist' is already corrupted. So isolate all objects * starting at 'freelist'. */ - if (freelist_corrupted(s, page, freelist, nextfree)) + if (freelist_corrupted(s, page, &freelist, nextfree)) break; do { From f548a64570cbb7ca3760643b9e2d7c45b9fe2931 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 4 Sep 2020 16:35:33 -0700 Subject: [PATCH 235/648] MAINTAINERS: update Cavium/Marvell entries I am leaving Marvell and already do not have access to my @marvell.com email address. So switching over to my korg mail address or removing my address there another maintainer is already listed. For the entries there no other maintainer is listed I will keep looking into patches for Cavium systems for a while until someone from Marvell takes it over. Since I might have limited access to hardware and also limited time I changed state to 'Odd Fixes' for those entries. Signed-off-by: Robert Richter Signed-off-by: Andrew Morton Cc: Ganapatrao Kulkarni Cc: Sunil Goutham CC: Borislav Petkov Cc: Marc Zyngier Cc: Arnd Bergmann Cc: Wolfram Sang , Link: https://lkml.kernel.org/r/20200824122050.31164-1-rric@kernel.org Signed-off-by: Linus Torvalds --- MAINTAINERS | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index dca9bfd8c888..94cfce940c6c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1694,7 +1694,6 @@ F: arch/arm/mach-cns3xxx/ ARM/CAVIUM THUNDER NETWORK DRIVER M: Sunil Goutham -M: Robert Richter L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Supported F: drivers/net/ethernet/cavium/thunder/ @@ -3948,8 +3947,8 @@ W: https://wireless.wiki.kernel.org/en/users/Drivers/carl9170 F: drivers/net/wireless/ath/carl9170/ CAVIUM I2C DRIVER -M: Robert Richter -S: Supported +M: Robert Richter +S: Odd Fixes W: http://www.marvell.com F: drivers/i2c/busses/i2c-octeon* F: drivers/i2c/busses/i2c-thunderx* @@ -3964,8 +3963,8 @@ W: http://www.marvell.com F: drivers/net/ethernet/cavium/liquidio/ CAVIUM MMC DRIVER -M: Robert Richter -S: Supported +M: Robert Richter +S: Odd Fixes W: http://www.marvell.com F: drivers/mmc/host/cavium* @@ -3977,9 +3976,9 @@ W: http://www.marvell.com F: drivers/crypto/cavium/cpt/ CAVIUM THUNDERX2 ARM64 SOC -M: Robert Richter +M: Robert Richter L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -S: Maintained +S: Odd Fixes F: Documentation/devicetree/bindings/arm/cavium-thunder2.txt F: arch/arm64/boot/dts/cavium/thunder2-99xx* @@ -6191,16 +6190,15 @@ F: drivers/edac/highbank* EDAC-CAVIUM OCTEON M: Ralf Baechle -M: Robert Richter L: linux-edac@vger.kernel.org L: linux-mips@vger.kernel.org S: Supported F: drivers/edac/octeon_edac* EDAC-CAVIUM THUNDERX -M: Robert Richter +M: Robert Richter L: linux-edac@vger.kernel.org -S: Supported +S: Odd Fixes F: drivers/edac/thunderx_edac* EDAC-CORE @@ -6208,7 +6206,7 @@ M: Borislav Petkov M: Mauro Carvalho Chehab M: Tony Luck R: James Morse -R: Robert Richter +R: Robert Richter L: linux-edac@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras.git edac-for-next @@ -13446,10 +13444,10 @@ F: Documentation/devicetree/bindings/pci/axis,artpec* F: drivers/pci/controller/dwc/*artpec* PCIE DRIVER FOR CAVIUM THUNDERX -M: Robert Richter +M: Robert Richter L: linux-pci@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -S: Supported +S: Odd Fixes F: drivers/pci/controller/pci-thunder-* PCIE DRIVER FOR HISILICON @@ -17237,8 +17235,8 @@ S: Maintained F: drivers/net/thunderbolt.c THUNDERX GPIO DRIVER -M: Robert Richter -S: Maintained +M: Robert Richter +S: Odd Fixes F: drivers/gpio/gpio-thunderx.c TI AM437X VPFE DRIVER From b9644289657748314dbfe6502c316b3f09e251ed Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 4 Sep 2020 16:35:37 -0700 Subject: [PATCH 236/648] MAINTAINERS: add LLVM maintainers Nominate Nathan and myself to be point of contact for clang/LLVM related support, after a poll at the LLVM BoF at Linux Plumbers Conf 2020. While corporate sponsorship is beneficial, its important to not entrust the keys to the nukes with any one entity. Should Nathan and I find ourselves at the same employer, I would gladly step down. Signed-off-by: Nick Desaulniers Signed-off-by: Andrew Morton Reviewed-by: Sedat Dilek Acked-by: Nathan Chancellor Acked-by: Lukas Bulwahn Acked-by: Miguel Ojeda Acked-by: Masahiro Yamada Link: https://lkml.kernel.org/r/20200825143540.2948637-1-ndesaulniers@google.com Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 94cfce940c6c..9f5937cbf9de 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4257,6 +4257,8 @@ S: Maintained F: .clang-format CLANG/LLVM BUILD SUPPORT +M: Nathan Chancellor +M: Nick Desaulniers L: clang-built-linux@googlegroups.com S: Supported W: https://clangbuiltlinux.github.io/ From 9d90dd188d5e25742d41bd8b91dd5f22da587cdc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 4 Sep 2020 16:35:40 -0700 Subject: [PATCH 237/648] MAINTAINERS: IA64: mark Status as Odd Fixes only IA64 isn't really being maintained, so mark it as Odd Fixes only. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Acked-by: Tony Luck Cc: Fenghua Yu Link: http://lkml.kernel.org/r/7e719139-450f-52c2-59a2-7964a34eda1f@infradead.org Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 9f5937cbf9de..cef55acca692 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8272,7 +8272,7 @@ IA64 (Itanium) PLATFORM M: Tony Luck M: Fenghua Yu L: linux-ia64@vger.kernel.org -S: Maintained +S: Odd Fixes T: git git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux.git F: Documentation/ia64/ F: arch/ia64/ From e80d3909be42f7e38cc350c1ba109cf0aa51956a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 4 Sep 2020 16:35:43 -0700 Subject: [PATCH 238/648] mm: track page table modifications in __apply_to_page_range() __apply_to_page_range() is also used to change and/or allocate page-table pages in the vmalloc area of the address space. Make sure these changes get synchronized to other page-tables in the system by calling arch_sync_kernel_mappings() when necessary. The impact appears limited to x86-32, where apply_to_page_range may miss updating the PMD. That leads to explosions in drivers like BUG: unable to handle page fault for address: fe036000 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page *pde = 00000000 Oops: 0002 [#1] SMP CPU: 3 PID: 1300 Comm: gem_concurrent_ Not tainted 5.9.0-rc1+ #16 Hardware name: /NUC6i3SYB, BIOS SYSKLi35.86A.0024.2015.1027.2142 10/27/2015 EIP: __execlists_context_alloc+0x132/0x2d0 [i915] Code: 31 d2 89 f0 e8 2f 55 02 00 89 45 e8 3d 00 f0 ff ff 0f 87 11 01 00 00 8b 4d e8 03 4b 30 b8 5a 5a 5a 5a ba 01 00 00 00 8d 79 04 01 5a 5a 5a 5a c7 81 fc 0f 00 00 5a 5a 5a 5a 83 e7 fc 29 f9 81 EAX: 5a5a5a5a EBX: f60ca000 ECX: fe036000 EDX: 00000001 ESI: f43b7340 EDI: fe036004 EBP: f6389cb8 ESP: f6389c9c DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010286 CR0: 80050033 CR2: fe036000 CR3: 2d361000 CR4: 001506d0 DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000 DR6: fffe0ff0 DR7: 00000400 Call Trace: execlists_context_alloc+0x10/0x20 [i915] intel_context_alloc_state+0x3f/0x70 [i915] __intel_context_do_pin+0x117/0x170 [i915] i915_gem_do_execbuffer+0xcc7/0x2500 [i915] i915_gem_execbuffer2_ioctl+0xcd/0x1f0 [i915] drm_ioctl_kernel+0x8f/0xd0 drm_ioctl+0x223/0x3d0 __ia32_sys_ioctl+0x1ab/0x760 __do_fast_syscall_32+0x3f/0x70 do_fast_syscall_32+0x29/0x60 do_SYSENTER_32+0x15/0x20 entry_SYSENTER_32+0x9f/0xf2 EIP: 0xb7f28559 Code: 03 74 c0 01 10 05 03 74 b8 01 10 06 03 74 b4 01 10 07 03 74 b0 01 10 08 03 74 d8 01 00 00 00 00 00 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d 76 00 58 b8 77 00 00 00 cd 80 90 8d 76 EAX: ffffffda EBX: 00000005 ECX: c0406469 EDX: bf95556c ESI: b7e68000 EDI: c0406469 EBP: 00000005 ESP: bf9554d8 DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00000296 Modules linked in: i915 x86_pkg_temp_thermal intel_powerclamp crc32_pclmul crc32c_intel intel_cstate intel_uncore intel_gtt drm_kms_helper intel_pch_thermal video button autofs4 i2c_i801 i2c_smbus fan CR2: 00000000fe036000 It looks like kasan, xen and i915 are vulnerable. Actual impact is "on thinkpad X60 in 5.9-rc1, screen starts blinking after 30-or-so minutes, and machine is unusable" [sfr@canb.auug.org.au: ARCH_PAGE_TABLE_SYNC_MASK needs vmalloc.h] Link: https://lkml.kernel.org/r/20200825172508.16800a4f@canb.auug.org.au [chris@chris-wilson.co.uk: changelog addition] [pavel@ucw.cz: changelog addition] Fixes: 2ba3e6947aed ("mm/vmalloc: track which page-table levels were modified") Fixes: 86cf69f1d893 ("x86/mm/32: implement arch_sync_kernel_mappings()") Signed-off-by: Joerg Roedel Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Tested-by: Chris Wilson [x86-32] Tested-by: Pavel Machek Acked-by: Linus Torvalds Cc: [5.8+] Link: https://lkml.kernel.org/r/20200821123746.16904-1-joro@8bytes.org Signed-off-by: Linus Torvalds --- mm/memory.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 602f4283122f..547b81a14059 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -73,6 +73,7 @@ #include #include #include +#include #include @@ -83,6 +84,7 @@ #include #include +#include "pgalloc-track.h" #include "internal.h" #if defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) && !defined(CONFIG_COMPILE_TEST) @@ -2206,7 +2208,8 @@ EXPORT_SYMBOL(vm_iomap_memory); static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data, bool create) + pte_fn_t fn, void *data, bool create, + pgtbl_mod_mask *mask) { pte_t *pte; int err = 0; @@ -2214,7 +2217,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, if (create) { pte = (mm == &init_mm) ? - pte_alloc_kernel(pmd, addr) : + pte_alloc_kernel_track(pmd, addr, mask) : pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; @@ -2235,6 +2238,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, break; } } while (addr += PAGE_SIZE, addr != end); + *mask |= PGTBL_PTE_MODIFIED; arch_leave_lazy_mmu_mode(); @@ -2245,7 +2249,8 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data, bool create) + pte_fn_t fn, void *data, bool create, + pgtbl_mod_mask *mask) { pmd_t *pmd; unsigned long next; @@ -2254,7 +2259,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, BUG_ON(pud_huge(*pud)); if (create) { - pmd = pmd_alloc(mm, pud, addr); + pmd = pmd_alloc_track(mm, pud, addr, mask); if (!pmd) return -ENOMEM; } else { @@ -2264,7 +2269,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, next = pmd_addr_end(addr, end); if (create || !pmd_none_or_clear_bad(pmd)) { err = apply_to_pte_range(mm, pmd, addr, next, fn, data, - create); + create, mask); if (err) break; } @@ -2274,14 +2279,15 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data, bool create) + pte_fn_t fn, void *data, bool create, + pgtbl_mod_mask *mask) { pud_t *pud; unsigned long next; int err = 0; if (create) { - pud = pud_alloc(mm, p4d, addr); + pud = pud_alloc_track(mm, p4d, addr, mask); if (!pud) return -ENOMEM; } else { @@ -2291,7 +2297,7 @@ static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, next = pud_addr_end(addr, end); if (create || !pud_none_or_clear_bad(pud)) { err = apply_to_pmd_range(mm, pud, addr, next, fn, data, - create); + create, mask); if (err) break; } @@ -2301,14 +2307,15 @@ static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data, bool create) + pte_fn_t fn, void *data, bool create, + pgtbl_mod_mask *mask) { p4d_t *p4d; unsigned long next; int err = 0; if (create) { - p4d = p4d_alloc(mm, pgd, addr); + p4d = p4d_alloc_track(mm, pgd, addr, mask); if (!p4d) return -ENOMEM; } else { @@ -2318,7 +2325,7 @@ static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd, next = p4d_addr_end(addr, end); if (create || !p4d_none_or_clear_bad(p4d)) { err = apply_to_pud_range(mm, p4d, addr, next, fn, data, - create); + create, mask); if (err) break; } @@ -2331,8 +2338,9 @@ static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr, void *data, bool create) { pgd_t *pgd; - unsigned long next; + unsigned long start = addr, next; unsigned long end = addr + size; + pgtbl_mod_mask mask = 0; int err = 0; if (WARN_ON(addr >= end)) @@ -2343,11 +2351,14 @@ static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr, next = pgd_addr_end(addr, end); if (!create && pgd_none_or_clear_bad(pgd)) continue; - err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, create); + err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, create, &mask); if (err) break; } while (pgd++, addr = next, addr != end); + if (mask & ARCH_PAGE_TABLE_SYNC_MASK) + arch_sync_kernel_mappings(start, start + size); + return err; } From fff1662cc423be00c5a7dffc6cf2332161c882d6 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 4 Sep 2020 16:35:46 -0700 Subject: [PATCH 239/648] ipc: adjust proc_ipc_sem_dointvec definition to match prototype Commit 32927393dc1c ("sysctl: pass kernel pointers to ->proc_handler") changed ctl_table.proc_handler to take a kernel pointer. Adjust the signature of proc_ipc_sem_dointvec to match ctl_table.proc_handler which fixes the following sparse error/warning: ipc/ipc_sysctl.c:94:47: warning: incorrect type in argument 3 (different address spaces) ipc/ipc_sysctl.c:94:47: expected void *buffer ipc/ipc_sysctl.c:94:47: got void [noderef] __user *buffer ipc/ipc_sysctl.c:194:35: warning: incorrect type in initializer (incompatible argument 3 (different address spaces)) ipc/ipc_sysctl.c:194:35: expected int ( [usertype] *proc_handler )( ... ) ipc/ipc_sysctl.c:194:35: got int ( * )( ... ) Fixes: 32927393dc1c ("sysctl: pass kernel pointers to ->proc_handler") Signed-off-by: Tobias Klauser Signed-off-by: Andrew Morton Cc: Christoph Hellwig Cc: Alexander Viro Link: https://lkml.kernel.org/r/20200825105846.5193-1-tklauser@distanz.ch Signed-off-by: Linus Torvalds --- ipc/ipc_sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index d1b8644bfb88..3f312bf2b116 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -85,7 +85,7 @@ static int proc_ipc_auto_msgmni(struct ctl_table *table, int write, } static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret, semmni; struct ipc_namespace *ns = current->nsproxy->ipc_ns; From b0daa2c73f3a8ab9183a9d298495b583b6c1bd19 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 4 Sep 2020 16:35:49 -0700 Subject: [PATCH 240/648] fork: adjust sysctl_max_threads definition to match prototype Commit 32927393dc1c ("sysctl: pass kernel pointers to ->proc_handler") changed ctl_table.proc_handler to take a kernel pointer. Adjust the definition of sysctl_max_threads to match its prototype in linux/sysctl.h which fixes the following sparse error/warning: kernel/fork.c:3050:47: warning: incorrect type in argument 3 (different address spaces) kernel/fork.c:3050:47: expected void * kernel/fork.c:3050:47: got void [noderef] __user *buffer kernel/fork.c:3036:5: error: symbol 'sysctl_max_threads' redeclared with different type (incompatible argument 3 (different address spaces)): kernel/fork.c:3036:5: int extern [addressable] [signed] [toplevel] sysctl_max_threads( ... ) kernel/fork.c: note: in included file (through include/linux/key.h, include/linux/cred.h, include/linux/sched/signal.h, include/linux/sched/cputime.h): include/linux/sysctl.h:242:5: note: previously declared as: include/linux/sysctl.h:242:5: int extern [addressable] [signed] [toplevel] sysctl_max_threads( ... ) Fixes: 32927393dc1c ("sysctl: pass kernel pointers to ->proc_handler") Signed-off-by: Tobias Klauser Signed-off-by: Andrew Morton Cc: Christoph Hellwig Cc: Al Viro Link: https://lkml.kernel.org/r/20200825093647.24263-1-tklauser@distanz.ch Signed-off-by: Linus Torvalds --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/fork.c b/kernel/fork.c index 4d32190861bd..49677d668de4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -3014,7 +3014,7 @@ int unshare_files(struct files_struct **displaced) } int sysctl_max_threads(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; int ret; From 13e45417cedbfc44b1926124b1846f5ee8c6ba4a Mon Sep 17 00:00:00 2001 From: Mrinal Pandey Date: Fri, 4 Sep 2020 16:35:52 -0700 Subject: [PATCH 241/648] checkpatch: fix the usage of capture group ( ... ) The usage of "capture group (...)" in the immediate condition after `&&` results in `$1` being uninitialized. This issues a warning "Use of uninitialized value $1 in regexp compilation at ./scripts/checkpatch.pl line 2638". I noticed this bug while running checkpatch on the set of commits from v5.7 to v5.8-rc1 of the kernel on the commits with a diff content in their commit message. This bug was introduced in the script by commit e518e9a59ec3 ("checkpatch: emit an error when there's a diff in a changelog"). It has been in the script since then. The author intended to store the match made by capture group in variable `$1`. This should have contained the name of the file as `[\w/]+` matched. However, this couldn't be accomplished due to usage of capture group and `$1` in the same regular expression. Fix this by placing the capture group in the condition before `&&`. Thus, `$1` can be initialized to the text that capture group matches thereby setting it to the desired and required value. Fixes: e518e9a59ec3 ("checkpatch: emit an error when there's a diff in a changelog") Signed-off-by: Mrinal Pandey Signed-off-by: Andrew Morton Tested-by: Lukas Bulwahn Reviewed-by: Lukas Bulwahn Cc: Joe Perches Link: https://lkml.kernel.org/r/20200714032352.f476hanaj2dlmiot@mrinalpandey Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 60d4a79674b6..504d2e431c60 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2639,8 +2639,8 @@ sub process { # Check if the commit log has what seems like a diff which can confuse patch if ($in_commit_log && !$commit_log_has_diff && - (($line =~ m@^\s+diff\b.*a/[\w/]+@ && - $line =~ m@^\s+diff\b.*a/([\w/]+)\s+b/$1\b@) || + (($line =~ m@^\s+diff\b.*a/([\w/]+)@ && + $line =~ m@^\s+diff\b.*a/[\w/]+\s+b/$1\b@) || $line =~ m@^\s*(?:\-\-\-\s+a/|\+\+\+\s+b/)@ || $line =~ m/^\s*\@\@ \-\d+,\d+ \+\d+,\d+ \@\@/)) { ERROR("DIFF_IN_COMMIT_MSG", From 7867fd7cc44e63c6673cd0f8fea155456d34d0de Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Fri, 4 Sep 2020 16:35:55 -0700 Subject: [PATCH 242/648] mm: madvise: fix vma user-after-free The syzbot reported the below use-after-free: BUG: KASAN: use-after-free in madvise_willneed mm/madvise.c:293 [inline] BUG: KASAN: use-after-free in madvise_vma mm/madvise.c:942 [inline] BUG: KASAN: use-after-free in do_madvise.part.0+0x1c8b/0x1cf0 mm/madvise.c:1145 Read of size 8 at addr ffff8880a6163eb0 by task syz-executor.0/9996 CPU: 0 PID: 9996 Comm: syz-executor.0 Not tainted 5.9.0-rc1-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x18f/0x20d lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xae/0x497 mm/kasan/report.c:383 __kasan_report mm/kasan/report.c:513 [inline] kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530 madvise_willneed mm/madvise.c:293 [inline] madvise_vma mm/madvise.c:942 [inline] do_madvise.part.0+0x1c8b/0x1cf0 mm/madvise.c:1145 do_madvise mm/madvise.c:1169 [inline] __do_sys_madvise mm/madvise.c:1171 [inline] __se_sys_madvise mm/madvise.c:1169 [inline] __x64_sys_madvise+0xd9/0x110 mm/madvise.c:1169 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Allocated by task 9992: kmem_cache_alloc+0x138/0x3a0 mm/slab.c:3482 vm_area_alloc+0x1c/0x110 kernel/fork.c:347 mmap_region+0x8e5/0x1780 mm/mmap.c:1743 do_mmap+0xcf9/0x11d0 mm/mmap.c:1545 vm_mmap_pgoff+0x195/0x200 mm/util.c:506 ksys_mmap_pgoff+0x43a/0x560 mm/mmap.c:1596 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 9992: kmem_cache_free.part.0+0x67/0x1f0 mm/slab.c:3693 remove_vma+0x132/0x170 mm/mmap.c:184 remove_vma_list mm/mmap.c:2613 [inline] __do_munmap+0x743/0x1170 mm/mmap.c:2869 do_munmap mm/mmap.c:2877 [inline] mmap_region+0x257/0x1780 mm/mmap.c:1716 do_mmap+0xcf9/0x11d0 mm/mmap.c:1545 vm_mmap_pgoff+0x195/0x200 mm/util.c:506 ksys_mmap_pgoff+0x43a/0x560 mm/mmap.c:1596 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 It is because vma is accessed after releasing mmap_lock, but someone else acquired the mmap_lock and the vma is gone. Releasing mmap_lock after accessing vma should fix the problem. Fixes: 692fe62433d4c ("mm: Handle MADV_WILLNEED through vfs_fadvise()") Reported-by: syzbot+b90df26038d1d5d85c97@syzkaller.appspotmail.com Signed-off-by: Yang Shi Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Reviewed-by: Jan Kara Cc: [5.4+] Link: https://lkml.kernel.org/r/20200816141204.162624-1-shy828301@gmail.com Signed-off-by: Linus Torvalds --- mm/madvise.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/madvise.c b/mm/madvise.c index dd1d43cf026d..d4aa5f776543 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -289,9 +289,9 @@ static long madvise_willneed(struct vm_area_struct *vma, */ *prev = NULL; /* tell sys_madvise we drop mmap_lock */ get_file(file); - mmap_read_unlock(current->mm); offset = (loff_t)(start - vma->vm_start) + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); + mmap_read_unlock(current->mm); vfs_fadvise(file, offset, end - start, POSIX_FADV_WILLNEED); fput(file); mmap_read_lock(current->mm); From ebdf8321eeeb623aed60f7ed16f7445363230118 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 4 Sep 2020 16:35:58 -0700 Subject: [PATCH 243/648] mm/migrate: fixup setting UFFD_WP flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit f45ec5ff16a75 ("userfaultfd: wp: support swap and page migration") introduced support for tracking the uffd wp bit during page migration. However the non-swap PTE variant was used to set the flag for zone device private pages which are a type of swap page. This leads to corruption of the swap offset if the original PTE has the uffd_wp flag set. Fixes: f45ec5ff16a75 ("userfaultfd: wp: support swap and page migration") Signed-off-by: Alistair Popple Signed-off-by: Andrew Morton Reviewed-by: Peter Xu Cc: Jérôme Glisse Cc: John Hubbard Cc: Ralph Campbell Link: https://lkml.kernel.org/r/20200825064232.10023-1-alistair@popple.id.au Signed-off-by: Linus Torvalds --- mm/migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index 34a842a8eb6a..ddb64253fe3e 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -251,7 +251,7 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma, entry = make_device_private_entry(new, pte_write(pte)); pte = swp_entry_to_pte(entry); if (pte_swp_uffd_wp(*pvmw.pte)) - pte = pte_mkuffd_wp(pte); + pte = pte_swp_mkuffd_wp(pte); } } From ad7df764b7e1c7dc64e016da7ada2e3e1bb90700 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 4 Sep 2020 16:36:01 -0700 Subject: [PATCH 244/648] mm/rmap: fixup copying of soft dirty and uffd ptes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During memory migration a pte is temporarily replaced with a migration swap pte. Some pte bits from the existing mapping such as the soft-dirty and uffd write-protect bits are preserved by copying these to the temporary migration swap pte. However these bits are not stored at the same location for swap and non-swap ptes. Therefore testing these bits requires using the appropriate helper function for the given pte type. Unfortunately several code locations were found where the wrong helper function is being used to test soft_dirty and uffd_wp bits which leads to them getting incorrectly set or cleared during page-migration. Fix these by using the correct tests based on pte type. Fixes: a5430dda8a3a ("mm/migrate: support un-addressable ZONE_DEVICE page in migration") Fixes: 8c3328f1f36a ("mm/migrate: migrate_vma() unmap page from vma while collecting pages") Fixes: f45ec5ff16a7 ("userfaultfd: wp: support swap and page migration") Signed-off-by: Alistair Popple Signed-off-by: Andrew Morton Reviewed-by: Peter Xu Cc: Jérôme Glisse Cc: John Hubbard Cc: Ralph Campbell Cc: Alistair Popple Cc: Link: https://lkml.kernel.org/r/20200825064232.10023-2-alistair@popple.id.au Signed-off-by: Linus Torvalds --- mm/migrate.c | 15 +++++++++++---- mm/rmap.c | 9 +++++++-- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index ddb64253fe3e..12f63806d0ac 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2427,10 +2427,17 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, entry = make_migration_entry(page, mpfn & MIGRATE_PFN_WRITE); swp_pte = swp_entry_to_pte(entry); - if (pte_soft_dirty(pte)) - swp_pte = pte_swp_mksoft_dirty(swp_pte); - if (pte_uffd_wp(pte)) - swp_pte = pte_swp_mkuffd_wp(swp_pte); + if (pte_present(pte)) { + if (pte_soft_dirty(pte)) + swp_pte = pte_swp_mksoft_dirty(swp_pte); + if (pte_uffd_wp(pte)) + swp_pte = pte_swp_mkuffd_wp(swp_pte); + } else { + if (pte_swp_soft_dirty(pte)) + swp_pte = pte_swp_mksoft_dirty(swp_pte); + if (pte_swp_uffd_wp(pte)) + swp_pte = pte_swp_mkuffd_wp(swp_pte); + } set_pte_at(mm, addr, ptep, swp_pte); /* diff --git a/mm/rmap.c b/mm/rmap.c index 83cc459edc40..9425260774a1 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1511,9 +1511,14 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, */ entry = make_migration_entry(page, 0); swp_pte = swp_entry_to_pte(entry); - if (pte_soft_dirty(pteval)) + + /* + * pteval maps a zone device page and is therefore + * a swap pte. + */ + if (pte_swp_soft_dirty(pteval)) swp_pte = pte_swp_mksoft_dirty(swp_pte); - if (pte_uffd_wp(pteval)) + if (pte_swp_uffd_wp(pteval)) swp_pte = pte_swp_mkuffd_wp(swp_pte); set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte); /* From 6128763fc3244d1b4868e5f0aa401f7f987b5c4d Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 4 Sep 2020 16:36:04 -0700 Subject: [PATCH 245/648] mm/migrate: remove unnecessary is_zone_device_page() check Patch series "mm/migrate: preserve soft dirty in remove_migration_pte()". I happened to notice this from code inspection after seeing Alistair Popple's patch ("mm/rmap: Fixup copying of soft dirty and uffd ptes"). This patch (of 2): The check for is_zone_device_page() and is_device_private_page() is unnecessary since the latter is sufficient to determine if the page is a device private page. Simplify the code for easier reading. Signed-off-by: Ralph Campbell Signed-off-by: Andrew Morton Reviewed-by: Christoph Hellwig Cc: Jerome Glisse Cc: Alistair Popple Cc: Christoph Hellwig Cc: Jason Gunthorpe Cc: Bharata B Rao Link: https://lkml.kernel.org/r/20200831212222.22409-1-rcampbell@nvidia.com Link: https://lkml.kernel.org/r/20200831212222.22409-2-rcampbell@nvidia.com Signed-off-by: Linus Torvalds --- mm/migrate.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 12f63806d0ac..1d791d420725 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -246,13 +246,11 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma, else if (pte_swp_uffd_wp(*pvmw.pte)) pte = pte_mkuffd_wp(pte); - if (unlikely(is_zone_device_page(new))) { - if (is_device_private_page(new)) { - entry = make_device_private_entry(new, pte_write(pte)); - pte = swp_entry_to_pte(entry); - if (pte_swp_uffd_wp(*pvmw.pte)) - pte = pte_swp_mkuffd_wp(pte); - } + if (unlikely(is_device_private_page(new))) { + entry = make_device_private_entry(new, pte_write(pte)); + pte = swp_entry_to_pte(entry); + if (pte_swp_uffd_wp(*pvmw.pte)) + pte = pte_swp_mkuffd_wp(pte); } #ifdef CONFIG_HUGETLB_PAGE From 3d321bf82c4be8e33261754a5775bc65fc5d2184 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 4 Sep 2020 16:36:07 -0700 Subject: [PATCH 246/648] mm/migrate: preserve soft dirty in remove_migration_pte() The code to remove a migration PTE and replace it with a device private PTE was not copying the soft dirty bit from the migration entry. This could lead to page contents not being marked dirty when faulting the page back from device private memory. Signed-off-by: Ralph Campbell Signed-off-by: Andrew Morton Reviewed-by: Christoph Hellwig Cc: Jerome Glisse Cc: Alistair Popple Cc: Christoph Hellwig Cc: Jason Gunthorpe Cc: Bharata B Rao Link: https://lkml.kernel.org/r/20200831212222.22409-3-rcampbell@nvidia.com Signed-off-by: Linus Torvalds --- mm/migrate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/migrate.c b/mm/migrate.c index 1d791d420725..941b89383cf3 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -249,6 +249,8 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma, if (unlikely(is_device_private_page(new))) { entry = make_device_private_entry(new, pte_write(pte)); pte = swp_entry_to_pte(entry); + if (pte_swp_soft_dirty(*pvmw.pte)) + pte = pte_swp_mksoft_dirty(pte); if (pte_swp_uffd_wp(*pvmw.pte)) pte = pte_swp_mkuffd_wp(pte); } From 953f064aa6b29debcc211869b60bd59f26d19c34 Mon Sep 17 00:00:00 2001 From: Li Xinhai Date: Fri, 4 Sep 2020 16:36:10 -0700 Subject: [PATCH 247/648] mm/hugetlb: try preferred node first when alloc gigantic page from cma Since commit cf11e85fc08c ("mm: hugetlb: optionally allocate gigantic hugepages using cma"), the gigantic page would be allocated from node which is not the preferred node, although there are pages available from that node. The reason is that the nid parameter has been ignored in alloc_gigantic_page(). Besides, the __GFP_THISNODE also need be checked if user required to alloc only from the preferred node. After this patch, the preferred node is tried first before other allowed nodes, and don't try to allocate from other nodes if __GFP_THISNODE is specified. If user don't specify the preferred node, the current node will be used as preferred node, which makes sure consistent behavior of allocating gigantic and non-gigantic hugetlb page. Fixes: cf11e85fc08c ("mm: hugetlb: optionally allocate gigantic hugepages using cma") Signed-off-by: Li Xinhai Signed-off-by: Andrew Morton Reviewed-by: Mike Kravetz Acked-by: Michal Hocko Cc: Roman Gushchin Link: https://lkml.kernel.org/r/20200902025016.697260-1-lixinhai.lxh@gmail.com Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a301c2d672bf..5957dc80ebb1 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1250,21 +1250,32 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask, int nid, nodemask_t *nodemask) { unsigned long nr_pages = 1UL << huge_page_order(h); + if (nid == NUMA_NO_NODE) + nid = numa_mem_id(); #ifdef CONFIG_CMA { struct page *page; int node; - for_each_node_mask(node, *nodemask) { - if (!hugetlb_cma[node]) - continue; - - page = cma_alloc(hugetlb_cma[node], nr_pages, - huge_page_order(h), true); + if (hugetlb_cma[nid]) { + page = cma_alloc(hugetlb_cma[nid], nr_pages, + huge_page_order(h), true); if (page) return page; } + + if (!(gfp_mask & __GFP_THISNODE)) { + for_each_node_mask(node, *nodemask) { + if (node == nid || !hugetlb_cma[node]) + continue; + + page = cma_alloc(hugetlb_cma[node], nr_pages, + huge_page_order(h), true); + if (page) + return page; + } + } } #endif From 17743798d81238ab13050e8e2833699b54e15467 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Fri, 4 Sep 2020 16:36:13 -0700 Subject: [PATCH 248/648] mm/hugetlb: fix a race between hugetlb sysctl handlers There is a race between the assignment of `table->data` and write value to the pointer of `table->data` in the __do_proc_doulongvec_minmax() on the other thread. CPU0: CPU1: proc_sys_write hugetlb_sysctl_handler proc_sys_call_handler hugetlb_sysctl_handler_common hugetlb_sysctl_handler table->data = &tmp; hugetlb_sysctl_handler_common table->data = &tmp; proc_doulongvec_minmax do_proc_doulongvec_minmax sysctl_head_finish __do_proc_doulongvec_minmax unuse_table i = table->data; *i = val; // corrupt CPU1's stack Fix this by duplicating the `table`, and only update the duplicate of it. And introduce a helper of proc_hugetlb_doulongvec_minmax() to simplify the code. The following oops was seen: BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor instruction fetch in kernel mode #PF: error_code(0x0010) - not-present page Code: Bad RIP value. ... Call Trace: ? set_max_huge_pages+0x3da/0x4f0 ? alloc_pool_huge_page+0x150/0x150 ? proc_doulongvec_minmax+0x46/0x60 ? hugetlb_sysctl_handler_common+0x1c7/0x200 ? nr_hugepages_store+0x20/0x20 ? copy_fd_bitmaps+0x170/0x170 ? hugetlb_sysctl_handler+0x1e/0x20 ? proc_sys_call_handler+0x2f1/0x300 ? unregister_sysctl_table+0xb0/0xb0 ? __fd_install+0x78/0x100 ? proc_sys_write+0x14/0x20 ? __vfs_write+0x4d/0x90 ? vfs_write+0xef/0x240 ? ksys_write+0xc0/0x160 ? __ia32_sys_read+0x50/0x50 ? __close_fd+0x129/0x150 ? __x64_sys_write+0x43/0x50 ? do_syscall_64+0x6c/0x200 ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: e5ff215941d5 ("hugetlb: multiple hstates for multiple page sizes") Signed-off-by: Muchun Song Signed-off-by: Andrew Morton Reviewed-by: Mike Kravetz Cc: Andi Kleen Link: http://lkml.kernel.org/r/20200828031146.43035-1-songmuchun@bytedance.com Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 5957dc80ebb1..67fc6383995b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3465,6 +3465,22 @@ static unsigned int allowed_mems_nr(struct hstate *h) } #ifdef CONFIG_SYSCTL +static int proc_hugetlb_doulongvec_minmax(struct ctl_table *table, int write, + void *buffer, size_t *length, + loff_t *ppos, unsigned long *out) +{ + struct ctl_table dup_table; + + /* + * In order to avoid races with __do_proc_doulongvec_minmax(), we + * can duplicate the @table and alter the duplicate of it. + */ + dup_table = *table; + dup_table.data = out; + + return proc_doulongvec_minmax(&dup_table, write, buffer, length, ppos); +} + static int hugetlb_sysctl_handler_common(bool obey_mempolicy, struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos) @@ -3476,9 +3492,8 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy, if (!hugepages_supported()) return -EOPNOTSUPP; - table->data = &tmp; - table->maxlen = sizeof(unsigned long); - ret = proc_doulongvec_minmax(table, write, buffer, length, ppos); + ret = proc_hugetlb_doulongvec_minmax(table, write, buffer, length, ppos, + &tmp); if (ret) goto out; @@ -3521,9 +3536,8 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write, if (write && hstate_is_gigantic(h)) return -EINVAL; - table->data = &tmp; - table->maxlen = sizeof(unsigned long); - ret = proc_doulongvec_minmax(table, write, buffer, length, ppos); + ret = proc_hugetlb_doulongvec_minmax(table, write, buffer, length, ppos, + &tmp); if (ret) goto out; From e5a59d308f52bb0052af5790c22173651b187465 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 4 Sep 2020 16:36:16 -0700 Subject: [PATCH 249/648] mm/khugepaged.c: fix khugepaged's request size in collapse_file collapse_file() in khugepaged passes PAGE_SIZE as the number of pages to be read to page_cache_sync_readahead(). The intent was probably to read a single page. Fix it to use the number of pages to the end of the window instead. Fixes: 99cb0dbd47a1 ("mm,thp: add read-only THP support for (non-shmem) FS") Signed-off-by: David Howells Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Reviewed-by: Matthew Wilcox (Oracle) Acked-by: Song Liu Acked-by: Yang Shi Acked-by: Pankaj Gupta Cc: Eric Biggers Link: https://lkml.kernel.org/r/20200903140844.14194-2-willy@infradead.org Signed-off-by: Linus Torvalds --- mm/khugepaged.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index e749e568e1ea..cfa0dba5fd3b 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1709,7 +1709,7 @@ static void collapse_file(struct mm_struct *mm, xas_unlock_irq(&xas); page_cache_sync_readahead(mapping, &file->f_ra, file, index, - PAGE_SIZE); + end - index); /* drain pagevecs to help isolate_lru_page() */ lru_add_drain(); page = find_lock_page(mapping, index); From 428fc0aff4e59399ec719ffcc1f7a5d29a4ee476 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 4 Sep 2020 16:36:19 -0700 Subject: [PATCH 250/648] include/linux/log2.h: add missing () around n in roundup_pow_of_two() Otherwise gcc generates warnings if the expression is complicated. Fixes: 312a0c170945 ("[PATCH] LOG2: Alter roundup_pow_of_two() so that it can use a ilog2() on a constant") Signed-off-by: Jason Gunthorpe Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/0-v1-8a2697e3c003+41165-log_brackets_jgg@nvidia.com Signed-off-by: Linus Torvalds --- include/linux/log2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/log2.h b/include/linux/log2.h index 83a4a3ca3e8a..c619ec6eff4a 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -173,7 +173,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n) #define roundup_pow_of_two(n) \ ( \ __builtin_constant_p(n) ? ( \ - (n == 1) ? 1 : \ + ((n) == 1) ? 1 : \ (1UL << (ilog2((n) - 1) + 1)) \ ) : \ __roundup_pow_of_two(n) \ From b7ddce3cbf010edbfac6c6d8cc708560a7bcd7a4 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 6 Sep 2020 00:45:14 +0300 Subject: [PATCH 251/648] io_uring: fix cancel of deferred reqs with ->files While trying to cancel requests with ->files, it also should look for requests in ->defer_list, otherwise it might end up hanging a thread. Cancel all requests in ->defer_list up to the last request there with matching ->files, that's needed to follow drain ordering semantics. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index f703182df2d4..6129c67b0e3b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8098,12 +8098,39 @@ static void io_attempt_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req) io_timeout_remove_link(ctx, req); } +static void io_cancel_defer_files(struct io_ring_ctx *ctx, + struct files_struct *files) +{ + struct io_defer_entry *de = NULL; + LIST_HEAD(list); + + spin_lock_irq(&ctx->completion_lock); + list_for_each_entry_reverse(de, &ctx->defer_list, list) { + if ((de->req->flags & REQ_F_WORK_INITIALIZED) + && de->req->work.files == files) { + list_cut_position(&list, &ctx->defer_list, &de->list); + break; + } + } + spin_unlock_irq(&ctx->completion_lock); + + while (!list_empty(&list)) { + de = list_first_entry(&list, struct io_defer_entry, list); + list_del_init(&de->list); + req_set_fail_links(de->req); + io_put_req(de->req); + io_req_complete(de->req, -ECANCELED); + kfree(de); + } +} + static void io_uring_cancel_files(struct io_ring_ctx *ctx, struct files_struct *files) { if (list_empty_careful(&ctx->inflight_list)) return; + io_cancel_defer_files(ctx, files); /* cancel all at once, should be faster than doing it one by one*/ io_wq_cancel_cb(ctx->io_wq, io_wq_files_match, files, true); From c127a2a1b7baa5eb40a7e2de4b7f0c51ccbbb2ef Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 6 Sep 2020 00:45:15 +0300 Subject: [PATCH 252/648] io_uring: fix linked deferred ->files cancellation While looking for ->files in ->defer_list, consider that requests there may actually be links. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 6129c67b0e3b..175fb647d099 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8024,6 +8024,28 @@ static bool io_match_link(struct io_kiocb *preq, struct io_kiocb *req) return false; } +static inline bool io_match_files(struct io_kiocb *req, + struct files_struct *files) +{ + return (req->flags & REQ_F_WORK_INITIALIZED) && req->work.files == files; +} + +static bool io_match_link_files(struct io_kiocb *req, + struct files_struct *files) +{ + struct io_kiocb *link; + + if (io_match_files(req, files)) + return true; + if (req->flags & REQ_F_LINK_HEAD) { + list_for_each_entry(link, &req->link_list, link_list) { + if (io_match_files(link, files)) + return true; + } + } + return false; +} + /* * We're looking to cancel 'req' because it's holding on to our files, but * 'req' could be a link to another request. See if it is, and cancel that @@ -8106,8 +8128,7 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx, spin_lock_irq(&ctx->completion_lock); list_for_each_entry_reverse(de, &ctx->defer_list, list) { - if ((de->req->flags & REQ_F_WORK_INITIALIZED) - && de->req->work.files == files) { + if (io_match_link_files(de->req, files)) { list_cut_position(&list, &ctx->defer_list, &de->list); break; } From 4e4269ebe7e18038fffacf113e2dd5ded6d49942 Mon Sep 17 00:00:00 2001 From: Luo bin Date: Fri, 4 Sep 2020 16:37:28 +0800 Subject: [PATCH 253/648] hinic: bump up the timeout of SET_FUNC_STATE cmd We free memory regardless of the return value of SET_FUNC_STATE cmd in hinic_close function to avoid memory leak and this cmd may timeout when fw is busy with handling other cmds, so we bump up the timeout of this cmd to ensure it won't return failure. Fixes: 00e57a6d4ad3 ("net-next/hinic: Add Tx operation") Signed-off-by: Luo bin Signed-off-by: Jakub Kicinski --- .../net/ethernet/huawei/hinic/hinic_hw_mgmt.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c index c6ce5966284c..0d56c6ceccd9 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c @@ -47,6 +47,8 @@ #define MGMT_MSG_TIMEOUT 5000 +#define SET_FUNC_PORT_MBOX_TIMEOUT 30000 + #define SET_FUNC_PORT_MGMT_TIMEOUT 25000 #define mgmt_to_pfhwdev(pf_mgmt) \ @@ -361,16 +363,20 @@ int hinic_msg_to_mgmt(struct hinic_pf_to_mgmt *pf_to_mgmt, return -EINVAL; } - if (cmd == HINIC_PORT_CMD_SET_FUNC_STATE) - timeout = SET_FUNC_PORT_MGMT_TIMEOUT; + if (HINIC_IS_VF(hwif)) { + if (cmd == HINIC_PORT_CMD_SET_FUNC_STATE) + timeout = SET_FUNC_PORT_MBOX_TIMEOUT; - if (HINIC_IS_VF(hwif)) return hinic_mbox_to_pf(pf_to_mgmt->hwdev, mod, cmd, buf_in, - in_size, buf_out, out_size, 0); - else + in_size, buf_out, out_size, timeout); + } else { + if (cmd == HINIC_PORT_CMD_SET_FUNC_STATE) + timeout = SET_FUNC_PORT_MGMT_TIMEOUT; + return msg_to_mgmt_sync(pf_to_mgmt, mod, cmd, buf_in, in_size, buf_out, out_size, MGMT_DIRECT_SEND, MSG_NOT_RESP, timeout); + } } static void recv_mgmt_msg_work_handler(struct work_struct *work) From 0c97ee5fbdca1be3cc7af730832170f4f7c38e68 Mon Sep 17 00:00:00 2001 From: Luo bin Date: Fri, 4 Sep 2020 16:37:29 +0800 Subject: [PATCH 254/648] hinic: bump up the timeout of UPDATE_FW cmd Firmware erases the entire flash region which may take several seconds before flashing, so we bump up the timeout to ensure this cmd won't return failure. Fixes: 5e126e7c4e52 ("hinic: add firmware update support") Signed-off-by: Luo bin Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c index 0d56c6ceccd9..2ebae6cb5db5 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c @@ -51,6 +51,8 @@ #define SET_FUNC_PORT_MGMT_TIMEOUT 25000 +#define UPDATE_FW_MGMT_TIMEOUT 20000 + #define mgmt_to_pfhwdev(pf_mgmt) \ container_of(pf_mgmt, struct hinic_pfhwdev, pf_to_mgmt) @@ -372,6 +374,8 @@ int hinic_msg_to_mgmt(struct hinic_pf_to_mgmt *pf_to_mgmt, } else { if (cmd == HINIC_PORT_CMD_SET_FUNC_STATE) timeout = SET_FUNC_PORT_MGMT_TIMEOUT; + else if (cmd == HINIC_PORT_CMD_UPDATE_FW) + timeout = UPDATE_FW_MGMT_TIMEOUT; return msg_to_mgmt_sync(pf_to_mgmt, mod, cmd, buf_in, in_size, buf_out, out_size, MGMT_DIRECT_SEND, From 94cc242a067a869c29800aa789d38b7676136e50 Mon Sep 17 00:00:00 2001 From: Ganji Aravind Date: Fri, 4 Sep 2020 15:58:18 +0530 Subject: [PATCH 255/648] cxgb4: Fix offset when clearing filter byte counters Pass the correct offset to clear the stale filter hit bytes counter. Otherwise, the counter starts incrementing from the stale information, instead of 0. Fixes: 12b276fbf6e0 ("cxgb4: add support to create hash filters") Signed-off-by: Ganji Aravind Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 650db92cb11c..481498585ead 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -1911,13 +1911,16 @@ int cxgb4_del_filter(struct net_device *dev, int filter_id, static int configure_filter_tcb(struct adapter *adap, unsigned int tid, struct filter_entry *f) { - if (f->fs.hitcnts) + if (f->fs.hitcnts) { set_tcb_field(adap, f, tid, TCB_TIMESTAMP_W, - TCB_TIMESTAMP_V(TCB_TIMESTAMP_M) | + TCB_TIMESTAMP_V(TCB_TIMESTAMP_M), + TCB_TIMESTAMP_V(0ULL), + 1); + set_tcb_field(adap, f, tid, TCB_RTT_TS_RECENT_AGE_W, TCB_RTT_TS_RECENT_AGE_V(TCB_RTT_TS_RECENT_AGE_M), - TCB_TIMESTAMP_V(0ULL) | TCB_RTT_TS_RECENT_AGE_V(0ULL), 1); + } if (f->fs.newdmac) set_tcb_tflag(adap, f, tid, TF_CCTRL_ECE_S, 1, From d7739b0b6d15ef9ad5c79424736b8ded5ed3e913 Mon Sep 17 00:00:00 2001 From: Parshuram Thombare Date: Sat, 5 Sep 2020 10:21:33 +0200 Subject: [PATCH 256/648] net: macb: fix for pause frame receive enable bit PAE bit of NCFGR register, when set, pauses transmission if a non-zero 802.3 classic pause frame is received. Fixes: 7897b071ac3b ("net: macb: convert to phylink") Signed-off-by: Parshuram Thombare Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 6761f404b8aa..9179f7b0b900 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -647,8 +647,7 @@ static void macb_mac_link_up(struct phylink_config *config, ctrl |= GEM_BIT(GBE); } - /* We do not support MLO_PAUSE_RX yet */ - if (tx_pause) + if (rx_pause) ctrl |= MACB_BIT(PAE); macb_set_tx_clk(bp->tx_clk, speed, ndev); From 4ddcaf1ebb5e4e99240f29d531ee69d4244fe416 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 5 Sep 2020 12:32:33 +0200 Subject: [PATCH 257/648] net: dsa: rtl8366: Properly clear member config When removing a port from a VLAN we are just erasing the member config for the VLAN, which is wrong: other ports can be using it. Just mask off the port and only zero out the rest of the member config once ports using of the VLAN are removed from it. Reported-by: Florian Fainelli Fixes: d8652956cf37 ("net: dsa: realtek-smi: Add Realtek SMI driver") Signed-off-by: Linus Walleij Signed-off-by: Jakub Kicinski --- drivers/net/dsa/rtl8366.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/net/dsa/rtl8366.c b/drivers/net/dsa/rtl8366.c index 8f40fbf70a82..a8c5a934c3d3 100644 --- a/drivers/net/dsa/rtl8366.c +++ b/drivers/net/dsa/rtl8366.c @@ -452,13 +452,19 @@ int rtl8366_vlan_del(struct dsa_switch *ds, int port, return ret; if (vid == vlanmc.vid) { - /* clear VLAN member configurations */ - vlanmc.vid = 0; - vlanmc.priority = 0; - vlanmc.member = 0; - vlanmc.untag = 0; - vlanmc.fid = 0; - + /* Remove this port from the VLAN */ + vlanmc.member &= ~BIT(port); + vlanmc.untag &= ~BIT(port); + /* + * If no ports are members of this VLAN + * anymore then clear the whole member + * config so it can be reused. + */ + if (!vlanmc.member && vlanmc.untag) { + vlanmc.vid = 0; + vlanmc.priority = 0; + vlanmc.fid = 0; + } ret = smi->ops->set_vlan_mc(smi, i, &vlanmc); if (ret) { dev_err(smi->dev, From f4d51dffc6c01a9e94650d95ce0104964f8ae822 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 6 Sep 2020 17:11:40 -0700 Subject: [PATCH 258/648] Linux 5.9-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ff5e0731d26d..ec2330ce0fc5 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Kleptomaniac Octopus # *DOCUMENTATION* From 01ec372cef1e5afa4ab843bbaf88a6fcb64dc14c Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 3 Sep 2020 10:02:39 +1000 Subject: [PATCH 259/648] cifs: fix DFS mount with cifsacl/modefromsid RHBZ: 1871246 If during cifs_lookup()/get_inode_info() we encounter a DFS link and we use the cifsacl or modefromsid mount options we must suppress any -EREMOTE errors that triggers or else we will not be able to follow the DFS link and automount the target. This fixes an issue with modefromsid/cifsacl where these mountoptions would break DFS and we would no longer be able to access the share. Signed-off-by: Ronnie Sahlberg Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/inode.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 3989d08396ac..1f75b25e559a 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1017,6 +1017,8 @@ cifs_get_inode_info(struct inode **inode, if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID) { rc = cifs_acl_to_fattr(cifs_sb, &fattr, *inode, true, full_path, fid); + if (rc == -EREMOTE) + rc = 0; if (rc) { cifs_dbg(FYI, "%s: Get mode from SID failed. rc=%d\n", __func__, rc); @@ -1025,6 +1027,8 @@ cifs_get_inode_info(struct inode **inode, } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { rc = cifs_acl_to_fattr(cifs_sb, &fattr, *inode, false, full_path, fid); + if (rc == -EREMOTE) + rc = 0; if (rc) { cifs_dbg(FYI, "%s: Getting ACL failed with error: %d\n", __func__, rc); From 2cf9bfe9be75ed3656bbf882fb70c3e3047866e4 Mon Sep 17 00:00:00 2001 From: Raul E Rangel Date: Mon, 31 Aug 2020 15:10:32 -0600 Subject: [PATCH 260/648] mmc: sdhci-acpi: Clear amd_sdhci_host on reset The commit 61d7437ed1390 ("mmc: sdhci-acpi: Fix HS400 tuning for AMDI0040") broke resume for eMMC HS400. When the system suspends the eMMC controller is powered down. So, on resume we need to reinitialize the controller. Although, amd_sdhci_host was not getting cleared, so the DLL was never re-enabled on resume. This results in HS400 being non-functional. To fix the problem, this change clears the tuned_clock flag, clears the dll_enabled flag and disables the DLL on reset. Fixes: 61d7437ed1390 ("mmc: sdhci-acpi: Fix HS400 tuning for AMDI0040") Signed-off-by: Raul E Rangel Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20200831150517.1.I93c78bfc6575771bb653c9d3fca5eb018a08417d@changeid Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-acpi.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index 962f074ca174..284cba11e279 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -551,12 +551,18 @@ static int amd_select_drive_strength(struct mmc_card *card, return MMC_SET_DRIVER_TYPE_A; } -static void sdhci_acpi_amd_hs400_dll(struct sdhci_host *host) +static void sdhci_acpi_amd_hs400_dll(struct sdhci_host *host, bool enable) { + struct sdhci_acpi_host *acpi_host = sdhci_priv(host); + struct amd_sdhci_host *amd_host = sdhci_acpi_priv(acpi_host); + /* AMD Platform requires dll setting */ sdhci_writel(host, 0x40003210, SDHCI_AMD_RESET_DLL_REGISTER); usleep_range(10, 20); - sdhci_writel(host, 0x40033210, SDHCI_AMD_RESET_DLL_REGISTER); + if (enable) + sdhci_writel(host, 0x40033210, SDHCI_AMD_RESET_DLL_REGISTER); + + amd_host->dll_enabled = enable; } /* @@ -596,10 +602,8 @@ static void amd_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) /* DLL is only required for HS400 */ if (host->timing == MMC_TIMING_MMC_HS400 && - !amd_host->dll_enabled) { - sdhci_acpi_amd_hs400_dll(host); - amd_host->dll_enabled = true; - } + !amd_host->dll_enabled) + sdhci_acpi_amd_hs400_dll(host, true); } } @@ -620,10 +624,23 @@ static int amd_sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode) return err; } +static void amd_sdhci_reset(struct sdhci_host *host, u8 mask) +{ + struct sdhci_acpi_host *acpi_host = sdhci_priv(host); + struct amd_sdhci_host *amd_host = sdhci_acpi_priv(acpi_host); + + if (mask & SDHCI_RESET_ALL) { + amd_host->tuned_clock = false; + sdhci_acpi_amd_hs400_dll(host, false); + } + + sdhci_reset(host, mask); +} + static const struct sdhci_ops sdhci_acpi_ops_amd = { .set_clock = sdhci_set_clock, .set_bus_width = sdhci_set_bus_width, - .reset = sdhci_reset, + .reset = amd_sdhci_reset, .set_uhs_signaling = sdhci_set_uhs_signaling, }; From 9d5dcefb7b114d610aeb2371f6a6f119af316e43 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 27 Aug 2020 07:58:41 -0700 Subject: [PATCH 261/648] mmc: sdhci-msm: Add retries when all tuning phases are found valid As the comments in this patch say, if we tune and find all phases are valid it's _almost_ as bad as no phases being found valid. Probably all phases are not really reliable but we didn't detect where the unreliable place is. That means we'll essentially be guessing and hoping we get a good phase. This is not just a problem in theory. It was causing real problems on a real board. On that board, most often phase 10 is found as the only invalid phase, though sometimes 10 and 11 are invalid and sometimes just 11. Some percentage of the time, however, all phases are found to be valid. When this happens, the current logic will decide to use phase 11. Since phase 11 is sometimes found to be invalid, this is a bad choice. Sure enough, when phase 11 is picked we often get mmc errors later in boot. I have seen cases where all phases were found to be valid 3 times in a row, so increase the retry count to 10 just to be extra sure. Fixes: 415b5a75da43 ("mmc: sdhci-msm: Add platform_execute_tuning implementation") Signed-off-by: Douglas Anderson Reviewed-by: Veerabhadrarao Badiganti Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20200827075809.1.If179abf5ecb67c963494db79c3bc4247d987419b@changeid Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-msm.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index 5a33389037cd..729868abd2db 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -1166,7 +1166,7 @@ static void sdhci_msm_set_cdr(struct sdhci_host *host, bool enable) static int sdhci_msm_execute_tuning(struct mmc_host *mmc, u32 opcode) { struct sdhci_host *host = mmc_priv(mmc); - int tuning_seq_cnt = 3; + int tuning_seq_cnt = 10; u8 phase, tuned_phases[16], tuned_phase_cnt = 0; int rc; struct mmc_ios ios = host->mmc->ios; @@ -1222,6 +1222,22 @@ static int sdhci_msm_execute_tuning(struct mmc_host *mmc, u32 opcode) } while (++phase < ARRAY_SIZE(tuned_phases)); if (tuned_phase_cnt) { + if (tuned_phase_cnt == ARRAY_SIZE(tuned_phases)) { + /* + * All phases valid is _almost_ as bad as no phases + * valid. Probably all phases are not really reliable + * but we didn't detect where the unreliable place is. + * That means we'll essentially be guessing and hoping + * we get a good phase. Better to try a few times. + */ + dev_dbg(mmc_dev(mmc), "%s: All phases valid; try again\n", + mmc_hostname(mmc)); + if (--tuning_seq_cnt) { + tuned_phase_cnt = 0; + goto retry; + } + } + rc = msm_find_most_appropriate_phase(host, tuned_phases, tuned_phase_cnt); if (rc < 0) From a395acf0f6dc6409a704dea6fc3cd71eb8e3e4ec Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 1 Sep 2020 17:04:38 +0200 Subject: [PATCH 262/648] mmc: mmc_spi: Allow the driver to be built when CONFIG_HAS_DMA is unset The commit cd57d07b1e4e ("sh: don't allow non-coherent DMA for NOMMU") made CONFIG_NO_DMA to be set for some platforms, for good reasons. Consequentially, CONFIG_HAS_DMA doesn't get set, which makes the DMA mapping interface to be built as stub functions, but also prevent the mmc_spi driver from being built as it depends on CONFIG_HAS_DMA. It turns out that for some odd cases, the driver still relied on the DMA mapping interface, even if the DMA was not actively being used. To fixup the behaviour, let's drop the build dependency for CONFIG_HAS_DMA. Moreover, as to allow the driver to succeed probing, let's move the DMA initializations behind "#ifdef CONFIG_HAS_DMA". Fixes: cd57d07b1e4e ("sh: don't allow non-coherent DMA for NOMMU") Reported-by: Rich Felker Signed-off-by: Ulf Hansson Tested-by: Rich Felker Link: https://lore.kernel.org/r/20200901150438.228887-1-ulf.hansson@linaro.org --- drivers/mmc/host/Kconfig | 2 +- drivers/mmc/host/mmc_spi.c | 86 +++++++++++++++++++++++--------------- 2 files changed, 53 insertions(+), 35 deletions(-) diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 9c89a5b780e8..9a34c827c96e 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -602,7 +602,7 @@ config MMC_GOLDFISH config MMC_SPI tristate "MMC/SD/SDIO over SPI" - depends on SPI_MASTER && HAS_DMA + depends on SPI_MASTER select CRC7 select CRC_ITU_T help diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index 39bb1e30c2d7..5055a7eb134a 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -1278,6 +1278,52 @@ mmc_spi_detect_irq(int irq, void *mmc) return IRQ_HANDLED; } +#ifdef CONFIG_HAS_DMA +static int mmc_spi_dma_alloc(struct mmc_spi_host *host) +{ + struct spi_device *spi = host->spi; + struct device *dev; + + if (!spi->master->dev.parent->dma_mask) + return 0; + + dev = spi->master->dev.parent; + + host->ones_dma = dma_map_single(dev, host->ones, MMC_SPI_BLOCKSIZE, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, host->ones_dma)) + return -ENOMEM; + + host->data_dma = dma_map_single(dev, host->data, sizeof(*host->data), + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, host->data_dma)) { + dma_unmap_single(dev, host->ones_dma, MMC_SPI_BLOCKSIZE, + DMA_TO_DEVICE); + return -ENOMEM; + } + + dma_sync_single_for_cpu(dev, host->data_dma, sizeof(*host->data), + DMA_BIDIRECTIONAL); + + host->dma_dev = dev; + return 0; +} + +static void mmc_spi_dma_free(struct mmc_spi_host *host) +{ + if (!host->dma_dev) + return; + + dma_unmap_single(host->dma_dev, host->ones_dma, MMC_SPI_BLOCKSIZE, + DMA_TO_DEVICE); + dma_unmap_single(host->dma_dev, host->data_dma, sizeof(*host->data), + DMA_BIDIRECTIONAL); +} +#else +static inline mmc_spi_dma_alloc(struct mmc_spi_host *host) { return 0; } +static inline void mmc_spi_dma_free(struct mmc_spi_host *host) {} +#endif + static int mmc_spi_probe(struct spi_device *spi) { void *ones; @@ -1374,23 +1420,9 @@ static int mmc_spi_probe(struct spi_device *spi) if (!host->data) goto fail_nobuf1; - if (spi->master->dev.parent->dma_mask) { - struct device *dev = spi->master->dev.parent; - - host->dma_dev = dev; - host->ones_dma = dma_map_single(dev, ones, - MMC_SPI_BLOCKSIZE, DMA_TO_DEVICE); - if (dma_mapping_error(dev, host->ones_dma)) - goto fail_ones_dma; - host->data_dma = dma_map_single(dev, host->data, - sizeof(*host->data), DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, host->data_dma)) - goto fail_data_dma; - - dma_sync_single_for_cpu(host->dma_dev, - host->data_dma, sizeof(*host->data), - DMA_BIDIRECTIONAL); - } + status = mmc_spi_dma_alloc(host); + if (status) + goto fail_dma; /* setup message for status/busy readback */ spi_message_init(&host->readback); @@ -1458,20 +1490,12 @@ static int mmc_spi_probe(struct spi_device *spi) fail_add_host: mmc_remove_host(mmc); fail_glue_init: - if (host->dma_dev) - dma_unmap_single(host->dma_dev, host->data_dma, - sizeof(*host->data), DMA_BIDIRECTIONAL); -fail_data_dma: - if (host->dma_dev) - dma_unmap_single(host->dma_dev, host->ones_dma, - MMC_SPI_BLOCKSIZE, DMA_TO_DEVICE); -fail_ones_dma: + mmc_spi_dma_free(host); +fail_dma: kfree(host->data); - fail_nobuf1: mmc_free_host(mmc); mmc_spi_put_pdata(spi); - nomem: kfree(ones); return status; @@ -1489,13 +1513,7 @@ static int mmc_spi_remove(struct spi_device *spi) mmc_remove_host(mmc); - if (host->dma_dev) { - dma_unmap_single(host->dma_dev, host->ones_dma, - MMC_SPI_BLOCKSIZE, DMA_TO_DEVICE); - dma_unmap_single(host->dma_dev, host->data_dma, - sizeof(*host->data), DMA_BIDIRECTIONAL); - } - + mmc_spi_dma_free(host); kfree(host->data); kfree(host->ones); From 060522d89705f9d961ef1762dc1468645dd21fbd Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Thu, 3 Sep 2020 13:20:29 +1200 Subject: [PATCH 263/648] mmc: sdhci-of-esdhc: Don't walk device-tree on every interrupt Commit b214fe592ab7 ("mmc: sdhci-of-esdhc: add erratum eSDHC7 support") added code to check for a specific compatible string in the device-tree on every esdhc interrupat. Instead of doing this record the quirk in struct sdhci_esdhc and lookup the struct in esdhc_irq. Signed-off-by: Chris Packham Link: https://lore.kernel.org/r/20200903012029.25673-1-chris.packham@alliedtelesis.co.nz Fixes: b214fe592ab7 ("mmc: sdhci-of-esdhc: add erratum eSDHC7 support") Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-esdhc.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index 7c73d243dc6c..45881b309956 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -81,6 +81,7 @@ struct sdhci_esdhc { bool quirk_tuning_erratum_type2; bool quirk_ignore_data_inhibit; bool quirk_delay_before_data_reset; + bool quirk_trans_complete_erratum; bool in_sw_tuning; unsigned int peripheral_clock; const struct esdhc_clk_fixup *clk_fixup; @@ -1177,10 +1178,11 @@ static void esdhc_set_uhs_signaling(struct sdhci_host *host, static u32 esdhc_irq(struct sdhci_host *host, u32 intmask) { + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_esdhc *esdhc = sdhci_pltfm_priv(pltfm_host); u32 command; - if (of_find_compatible_node(NULL, NULL, - "fsl,p2020-esdhc")) { + if (esdhc->quirk_trans_complete_erratum) { command = SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND)); if (command == MMC_WRITE_MULTIPLE_BLOCK && @@ -1334,8 +1336,10 @@ static void esdhc_init(struct platform_device *pdev, struct sdhci_host *host) esdhc->clk_fixup = match->data; np = pdev->dev.of_node; - if (of_device_is_compatible(np, "fsl,p2020-esdhc")) + if (of_device_is_compatible(np, "fsl,p2020-esdhc")) { esdhc->quirk_delay_before_data_reset = true; + esdhc->quirk_trans_complete_erratum = true; + } clk = of_clk_get(np, 0); if (!IS_ERR(clk)) { From f0c393e2104e48c8a881719a8bd37996f71b0aee Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 3 Sep 2020 11:20:07 +0300 Subject: [PATCH 264/648] mmc: sdio: Use mmc_pre_req() / mmc_post_req() SDHCI changed from using a tasklet to finish requests, to using an IRQ thread i.e. commit c07a48c2651965 ("mmc: sdhci: Remove finish_tasklet"). Because this increased the latency to complete requests, a preparatory change was made to complete the request from the IRQ handler if possible i.e. commit 19d2f695f4e827 ("mmc: sdhci: Call mmc_request_done() from IRQ handler if possible"). That alleviated the situation for MMC block devices because the MMC block driver makes use of mmc_pre_req() and mmc_post_req() so that successful requests are completed in the IRQ handler and any DMA unmapping is handled separately in mmc_post_req(). However SDIO was still affected, and an example has been reported with up to 20% degradation in performance. Looking at SDIO I/O helper functions, sdio_io_rw_ext_helper() appeared to be a possible candidate for making use of asynchronous requests within its I/O loops, but analysis revealed that these loops almost never iterate more than once, so the complexity of the change would not be warrented. Instead, mmc_pre_req() and mmc_post_req() are added before and after I/O submission (mmc_wait_for_req) in mmc_io_rw_extended(). This still has the potential benefit of reducing the duration of interrupt handlers, as well as addressing the latency issue for SDHCI. It also seems a more reasonable solution than forcing drivers to do everything in the IRQ handler. Reported-by: Dmitry Osipenko Fixes: c07a48c2651965 ("mmc: sdhci: Remove finish_tasklet") Signed-off-by: Adrian Hunter Tested-by: Dmitry Osipenko Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200903082007.18715-1-adrian.hunter@intel.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/sdio_ops.c | 39 +++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/drivers/mmc/core/sdio_ops.c b/drivers/mmc/core/sdio_ops.c index 93d346c01110..4c229dd2b6e5 100644 --- a/drivers/mmc/core/sdio_ops.c +++ b/drivers/mmc/core/sdio_ops.c @@ -121,6 +121,7 @@ int mmc_io_rw_extended(struct mmc_card *card, int write, unsigned fn, struct sg_table sgtable; unsigned int nents, left_size, i; unsigned int seg_size = card->host->max_seg_size; + int err; WARN_ON(blksz == 0); @@ -170,28 +171,32 @@ int mmc_io_rw_extended(struct mmc_card *card, int write, unsigned fn, mmc_set_data_timeout(&data, card); + mmc_pre_req(card->host, &mrq); + mmc_wait_for_req(card->host, &mrq); + if (cmd.error) + err = cmd.error; + else if (data.error) + err = data.error; + else if (mmc_host_is_spi(card->host)) + /* host driver already reported errors */ + err = 0; + else if (cmd.resp[0] & R5_ERROR) + err = -EIO; + else if (cmd.resp[0] & R5_FUNCTION_NUMBER) + err = -EINVAL; + else if (cmd.resp[0] & R5_OUT_OF_RANGE) + err = -ERANGE; + else + err = 0; + + mmc_post_req(card->host, &mrq, err); + if (nents > 1) sg_free_table(&sgtable); - if (cmd.error) - return cmd.error; - if (data.error) - return data.error; - - if (mmc_host_is_spi(card->host)) { - /* host driver already reported errors */ - } else { - if (cmd.resp[0] & R5_ERROR) - return -EIO; - if (cmd.resp[0] & R5_FUNCTION_NUMBER) - return -EINVAL; - if (cmd.resp[0] & R5_OUT_OF_RANGE) - return -ERANGE; - } - - return 0; + return err; } int sdio_reset(struct mmc_host *host) From c3cdf189276c2a63da62ee250615bd55e3fb680d Mon Sep 17 00:00:00 2001 From: Luke D Jones Date: Mon, 7 Sep 2020 20:19:59 +1200 Subject: [PATCH 265/648] ALSA: hda: fixup headset for ASUS GX502 laptop The GX502 requires a few steps to enable the headset i/o: pincfg, verbs to enable and unmute the amp used for headpone out, and a jacksense callback to toggle output via internal or jack using a verb. Signed-off-by: Luke D Jones Cc: BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=208005 Link: https://lore.kernel.org/r/20200907081959.56186-1-luke@ljones.dev Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 65 +++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c521a1f17096..abfc602c3b92 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5993,6 +5993,40 @@ static void alc_fixup_disable_mic_vref(struct hda_codec *codec, snd_hda_codec_set_pin_target(codec, 0x19, PIN_VREFHIZ); } + +static void alc294_gx502_toggle_output(struct hda_codec *codec, + struct hda_jack_callback *cb) +{ + /* The Windows driver sets the codec up in a very different way where + * it appears to leave 0x10 = 0x8a20 set. For Linux we need to toggle it + */ + if (snd_hda_jack_detect_state(codec, 0x21) == HDA_JACK_PRESENT) + alc_write_coef_idx(codec, 0x10, 0x8a20); + else + alc_write_coef_idx(codec, 0x10, 0x0a20); +} + +static void alc294_fixup_gx502_hp(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + /* Pin 0x21: headphones/headset mic */ + if (!is_jack_detectable(codec, 0x21)) + return; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + snd_hda_jack_detect_enable_callback(codec, 0x21, + alc294_gx502_toggle_output); + break; + case HDA_FIXUP_ACT_INIT: + /* Make sure to start in a correct state, i.e. if + * headphones have been plugged in before powering up the system + */ + alc294_gx502_toggle_output(codec, NULL); + break; + } +} + static void alc285_fixup_hp_gpio_amp_init(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -6173,6 +6207,9 @@ enum { ALC285_FIXUP_THINKPAD_HEADSET_JACK, ALC294_FIXUP_ASUS_HPE, ALC294_FIXUP_ASUS_COEF_1B, + ALC294_FIXUP_ASUS_GX502_HP, + ALC294_FIXUP_ASUS_GX502_PINS, + ALC294_FIXUP_ASUS_GX502_VERBS, ALC285_FIXUP_HP_GPIO_LED, ALC285_FIXUP_HP_MUTE_LED, ALC236_FIXUP_HP_MUTE_LED, @@ -7338,6 +7375,33 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC294_FIXUP_ASUS_HEADSET_MIC }, + [ALC294_FIXUP_ASUS_GX502_PINS] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x03a11050 }, /* front HP mic */ + { 0x1a, 0x01a11830 }, /* rear external mic */ + { 0x21, 0x03211020 }, /* front HP out */ + { } + }, + .chained = true, + .chain_id = ALC294_FIXUP_ASUS_GX502_VERBS + }, + [ALC294_FIXUP_ASUS_GX502_VERBS] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + /* set 0x15 to HP-OUT ctrl */ + { 0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, 0xc0 }, + /* unmute the 0x15 amp */ + { 0x15, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000 }, + { } + }, + .chained = true, + .chain_id = ALC294_FIXUP_ASUS_GX502_HP + }, + [ALC294_FIXUP_ASUS_GX502_HP] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc294_fixup_gx502_hp, + }, [ALC294_FIXUP_ASUS_COEF_1B] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -7711,6 +7775,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502), SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401), + SND_PCI_QUIRK(0x1043, 0x1881, "ASUS Zephyrus S/M", ALC294_FIXUP_ASUS_GX502_PINS), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x834a, "ASUS S101", ALC269_FIXUP_STEREO_DMIC), From 1264c1e0cfe55e2d6c35e869244093195529af37 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 26 Aug 2020 07:49:34 +0200 Subject: [PATCH 266/648] Revert "wlcore: Adding suppoprt for IGTK key in wlcore driver" This patch causes a regression betwen Kernel 5.7 and 5.8 at wlcore: with it applied, WiFi stops working, and the Kernel starts printing this message every second: wlcore: PHY firmware version: Rev 8.2.0.0.242 wlcore: firmware booted (Rev 8.9.0.0.79) wlcore: ERROR command execute failure 14 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 133 at drivers/net/wireless/ti/wlcore/main.c:795 wl12xx_queue_recovery_work.part.0+0x6c/0x74 [wlcore] Modules linked in: wl18xx wlcore mac80211 libarc4 cfg80211 rfkill snd_soc_hdmi_codec crct10dif_ce wlcore_sdio adv7511 cec kirin9xx_drm(C) kirin9xx_dw_drm_dsi(C) drm_kms_helper drm ip_tables x_tables ipv6 nf_defrag_ipv6 CPU: 0 PID: 133 Comm: kworker/0:1 Tainted: G WC 5.8.0+ #186 Hardware name: HiKey970 (DT) Workqueue: events_freezable ieee80211_restart_work [mac80211] pstate: 60000005 (nZCv daif -PAN -UAO BTYPE=--) pc : wl12xx_queue_recovery_work.part.0+0x6c/0x74 [wlcore] lr : wl12xx_queue_recovery_work+0x24/0x30 [wlcore] sp : ffff8000126c3a60 x29: ffff8000126c3a60 x28: 00000000000025de x27: 0000000000000010 x26: 0000000000000005 x25: ffff0001a5d49e80 x24: ffff8000092cf580 x23: ffff0001b7c12623 x22: ffff0001b6fcf2e8 x21: ffff0001b7e46200 x20: 00000000fffffffb x19: ffff0001a78e6400 x18: 0000000000000030 x17: 0000000000000001 x16: 0000000000000001 x15: ffff0001b7e46670 x14: ffffffffffffffff x13: ffff8000926c37d7 x12: ffff8000126c37e0 x11: ffff800011e01000 x10: ffff8000120526d0 x9 : 0000000000000000 x8 : 3431206572756c69 x7 : 6166206574756365 x6 : 0000000000000c2c x5 : 0000000000000000 x4 : ffff0001bf1361e8 x3 : ffff0001bf1790b0 x2 : 0000000000000000 x1 : ffff0001a5d49e80 x0 : 0000000000000001 Call trace: wl12xx_queue_recovery_work.part.0+0x6c/0x74 [wlcore] wl12xx_queue_recovery_work+0x24/0x30 [wlcore] wl1271_cmd_set_sta_key+0x258/0x25c [wlcore] wl1271_set_key+0x7c/0x2dc [wlcore] wlcore_set_key+0xe4/0x360 [wlcore] wl18xx_set_key+0x48/0x1d0 [wl18xx] wlcore_op_set_key+0xa4/0x180 [wlcore] ieee80211_key_enable_hw_accel+0xb0/0x2d0 [mac80211] ieee80211_reenable_keys+0x70/0x110 [mac80211] ieee80211_reconfig+0xa00/0xca0 [mac80211] ieee80211_restart_work+0xc4/0xfc [mac80211] process_one_work+0x1cc/0x350 worker_thread+0x13c/0x470 kthread+0x154/0x160 ret_from_fork+0x10/0x30 ---[ end trace b1f722abf9af5919 ]--- wlcore: WARNING could not set keys wlcore: ERROR Could not add or replace key wlan0: failed to set key (4, ff:ff:ff:ff:ff:ff) to hardware (-5) wlcore: Hardware recovery in progress. FW ver: Rev 8.9.0.0.79 wlcore: pc: 0x0, hint_sts: 0x00000040 count: 39 wlcore: down wlcore: down ieee80211 phy0: Hardware restart was requested mmc_host mmc0: Bus speed (slot 0) = 400000Hz (slot req 400000Hz, actual 400000HZ div = 0) mmc_host mmc0: Bus speed (slot 0) = 25000000Hz (slot req 25000000Hz, actual 25000000HZ div = 0) wlcore: PHY firmware version: Rev 8.2.0.0.242 wlcore: firmware booted (Rev 8.9.0.0.79) wlcore: ERROR command execute failure 14 ------------[ cut here ]------------ Tested on Hikey 970. This reverts commit 2b7aadd3b9e17e8b81eeb8d9cc46756ae4658265. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/f0a2cb7ea606f1a284d4c23cbf983da2954ce9b6.1598420968.git.mchehab+huawei@kernel.org --- drivers/net/wireless/ti/wlcore/cmd.h | 1 - drivers/net/wireless/ti/wlcore/main.c | 4 ---- 2 files changed, 5 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/cmd.h b/drivers/net/wireless/ti/wlcore/cmd.h index 9acd8a41ea61..f2609d5b6bf7 100644 --- a/drivers/net/wireless/ti/wlcore/cmd.h +++ b/drivers/net/wireless/ti/wlcore/cmd.h @@ -458,7 +458,6 @@ enum wl1271_cmd_key_type { KEY_TKIP = 2, KEY_AES = 3, KEY_GEM = 4, - KEY_IGTK = 5, }; struct wl1271_cmd_set_keys { diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 821ad1acd505..d2bbd5108f7e 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -3559,9 +3559,6 @@ int wlcore_set_key(struct wl1271 *wl, enum set_key_cmd cmd, case WL1271_CIPHER_SUITE_GEM: key_type = KEY_GEM; break; - case WLAN_CIPHER_SUITE_AES_CMAC: - key_type = KEY_IGTK; - break; default: wl1271_error("Unknown key algo 0x%x", key_conf->cipher); @@ -6231,7 +6228,6 @@ static int wl1271_init_ieee80211(struct wl1271 *wl) WLAN_CIPHER_SUITE_TKIP, WLAN_CIPHER_SUITE_CCMP, WL1271_CIPHER_SUITE_GEM, - WLAN_CIPHER_SUITE_AES_CMAC, }; /* The tx descriptor buffer */ From 07ecc6693f9157cf293da5d165c73fb28fd69bf4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 3 Aug 2020 11:35:20 +0300 Subject: [PATCH 267/648] kobject: Drop unneeded conditional in __kobject_del() __kobject_del() is called from two places, in one where kobj is dereferenced before and thus can't be NULL, and in the other the NULL check is done before call. Drop unneeded conditional in __kobject_del(). Signed-off-by: Andy Shevchenko Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20200803083520.5460-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/lib/kobject.c b/lib/kobject.c index 9dce68c378e6..ea53b30cf483 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -604,9 +604,6 @@ static void __kobject_del(struct kobject *kobj) struct kernfs_node *sd; const struct kobj_type *ktype; - if (!kobj) - return; - sd = kobj->sd; ktype = get_ktype(kobj); From f44d04e696feaf13d192d942c4f14ad2e117065a Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 3 Sep 2020 13:24:11 +0200 Subject: [PATCH 268/648] rbd: require global CAP_SYS_ADMIN for mapping and unmapping It turns out that currently we rely only on sysfs attribute permissions: $ ll /sys/bus/rbd/{add*,remove*} --w------- 1 root root 4096 Sep 3 20:37 /sys/bus/rbd/add --w------- 1 root root 4096 Sep 3 20:37 /sys/bus/rbd/add_single_major --w------- 1 root root 4096 Sep 3 20:37 /sys/bus/rbd/remove --w------- 1 root root 4096 Sep 3 20:38 /sys/bus/rbd/remove_single_major This means that images can be mapped and unmapped (i.e. block devices can be created and deleted) by a UID 0 process even after it drops all privileges or by any process with CAP_DAC_OVERRIDE in its user namespace as long as UID 0 is mapped into that user namespace. Be consistent with other virtual block devices (loop, nbd, dm, md, etc) and require CAP_SYS_ADMIN in the initial user namespace for mapping and unmapping, and also for dumping the configuration string and refreshing the image header. Cc: stable@vger.kernel.org Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton --- drivers/block/rbd.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 011539039693..e77eaab5cf23 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -5120,6 +5120,9 @@ static ssize_t rbd_config_info_show(struct device *dev, { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + return sprintf(buf, "%s\n", rbd_dev->config_info); } @@ -5231,6 +5234,9 @@ static ssize_t rbd_image_refresh(struct device *dev, struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); int ret; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + ret = rbd_dev_refresh(rbd_dev); if (ret) return ret; @@ -7059,6 +7065,9 @@ static ssize_t do_rbd_add(struct bus_type *bus, struct rbd_client *rbdc; int rc; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!try_module_get(THIS_MODULE)) return -ENODEV; @@ -7209,6 +7218,9 @@ static ssize_t do_rbd_remove(struct bus_type *bus, bool force = false; int ret; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + dev_id = -1; opt_buf[0] = '\0'; sscanf(buf, "%d %5s", &dev_id, opt_buf); From 20244b2a8a8728c63233d33146e007dcacbcc5c4 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Mon, 7 Sep 2020 13:19:39 +0200 Subject: [PATCH 269/648] ASoC: core: Do not cleanup uninitialized dais on soc_pcm_open failure Introduce for_each_rtd_dais_rollback macro which behaves exactly like for_each_codec_dais_rollback and its cpu_dais equivalent but for all dais instead. Use newly added macro to fix soc_pcm_open error path and prevent uninitialized dais from being cleaned-up. Signed-off-by: Cezary Rojewski Fixes: 5d9fa03e6c35 ("ASoC: soc-pcm: tidyup soc_pcm_open() order") Acked-by: Liam Girdwood Acked-by: Kuninori Morimoto Link: https://lore.kernel.org/r/20200907111939.16169-1-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- include/sound/soc.h | 2 ++ sound/soc/soc-pcm.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/sound/soc.h b/include/sound/soc.h index 4176071f61bf..fc4fcac72cf7 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -1193,6 +1193,8 @@ struct snd_soc_pcm_runtime { ((i) < (rtd)->num_cpus + (rtd)->num_codecs) && \ ((dai) = (rtd)->dais[i]); \ (i)++) +#define for_each_rtd_dais_rollback(rtd, i, dai) \ + for (; (--(i) >= 0) && ((dai) = (rtd)->dais[i]);) void snd_soc_close_delayed_work(struct snd_soc_pcm_runtime *rtd); diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 00ac1cbf6f88..4c9d4cd8cf0b 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -812,7 +812,7 @@ static int soc_pcm_open(struct snd_pcm_substream *substream) return 0; config_err: - for_each_rtd_dais(rtd, i, dai) + for_each_rtd_dais_rollback(rtd, i, dai) snd_soc_dai_shutdown(dai, substream); snd_soc_link_shutdown(substream); From ea8be08cc9358f811e4175ba7fa7fea23c5d393e Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 5 Sep 2020 17:19:13 +0200 Subject: [PATCH 270/648] spi: stm32: Rate-limit the 'Communication suspended' message The 'spi_stm32 44004000.spi: Communication suspended' message means that when using PIO, the kernel did not read the FIFO fast enough and so the SPI controller paused the transfer. Currently, this is printed on every single such event, so if the kernel is busy and the controller is pausing the transfers often, the kernel will be all the more busy scrolling this message into the log buffer every few milliseconds. That is not helpful. Instead, rate-limit the message and print it every once in a while. It is not possible to use the default dev_warn_ratelimited(), because that is still too verbose, as it prints 10 lines (DEFAULT_RATELIMIT_BURST) every 5 seconds (DEFAULT_RATELIMIT_INTERVAL). The policy here is to print 1 line every 50 seconds (DEFAULT_RATELIMIT_INTERVAL * 10), because 1 line is more than enough and the cycles saved on printing are better left to the CPU to handle the SPI. However, dev_warn_once() is also not useful, as the user should be aware that this condition is possibly recurring or ongoing. Thus the custom rate-limit policy. Finally, turn the message from dev_warn() to dev_dbg(), since the system does not suffer any sort of malfunction if this message appears, it is just slowing down. This further reduces the printing into the log buffer and frees the CPU to do useful work. Fixes: dcbe0d84dfa5 ("spi: add driver for STM32 SPI controller") Signed-off-by: Marek Vasut Cc: Alexandre Torgue Cc: Amelie Delaunay Cc: Antonio Borneo Cc: Mark Brown Link: https://lore.kernel.org/r/20200905151913.117775-1-marex@denx.de Signed-off-by: Mark Brown --- drivers/spi/spi-stm32.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index d4b33b358a31..a00f6b51ccbf 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -936,7 +936,11 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void *dev_id) } if (sr & STM32H7_SPI_SR_SUSP) { - dev_warn(spi->dev, "Communication suspended\n"); + static DEFINE_RATELIMIT_STATE(rs, + DEFAULT_RATELIMIT_INTERVAL * 10, + 1); + if (__ratelimit(&rs)) + dev_dbg_ratelimited(spi->dev, "Communication suspended\n"); if (!spi->cur_usedma && (spi->rx_buf && (spi->rx_len > 0))) stm32h7_spi_read_rxfifo(spi, false); /* From 141d170495beb4772fad653312364dac6f4716f5 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Sun, 30 Aug 2020 20:58:11 +0900 Subject: [PATCH 271/648] openrisc: Reserve memblock for initrd Recently OpenRISC added support for external initrd images, but I found some instability when using larger buildroot initrd images. It turned out that I forgot to reserve the memblock space for the initrd image. This patch fixes the instability issue by reserving memblock space. Fixes: ff6c923dbec3 ("openrisc: Add support for external initrd images") Signed-off-by: Stafford Horne Reviewed-by: Mike Rapoport --- arch/openrisc/kernel/setup.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index b18e775f8be3..13c87f1f872b 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -80,6 +80,16 @@ static void __init setup_memory(void) */ memblock_reserve(__pa(_stext), _end - _stext); +#ifdef CONFIG_BLK_DEV_INITRD + /* Then reserve the initrd, if any */ + if (initrd_start && (initrd_end > initrd_start)) { + unsigned long aligned_start = ALIGN_DOWN(initrd_start, PAGE_SIZE); + unsigned long aligned_end = ALIGN(initrd_end, PAGE_SIZE); + + memblock_reserve(__pa(aligned_start), aligned_end - aligned_start); + } +#endif /* CONFIG_BLK_DEV_INITRD */ + early_init_fdt_reserve_self(); early_init_fdt_scan_reserved_mem(); From 3ae90d764093dfcd6ab8ab6875377302892c87d4 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Thu, 3 Sep 2020 05:48:58 +0900 Subject: [PATCH 272/648] openrisc: Fix cache API compile issue when not inlining I found this when compiling a kbuild random config with GCC 11. The config enables CONFIG_DEBUG_SECTION_MISMATCH, which sets CFLAGS -fno-inline-functions-called-once. This causes the call to cache_loop in cache.c to not be inlined causing the below compile error. In file included from arch/openrisc/mm/cache.c:13: arch/openrisc/mm/cache.c: In function 'cache_loop': ./arch/openrisc/include/asm/spr.h:16:27: warning: 'asm' operand 0 probably does not match constraints 16 | #define mtspr(_spr, _val) __asm__ __volatile__ ( \ | ^~~~~~~ arch/openrisc/mm/cache.c:25:3: note: in expansion of macro 'mtspr' 25 | mtspr(reg, line); | ^~~~~ ./arch/openrisc/include/asm/spr.h:16:27: error: impossible constraint in 'asm' 16 | #define mtspr(_spr, _val) __asm__ __volatile__ ( \ | ^~~~~~~ arch/openrisc/mm/cache.c:25:3: note: in expansion of macro 'mtspr' 25 | mtspr(reg, line); | ^~~~~ make[1]: *** [scripts/Makefile.build:283: arch/openrisc/mm/cache.o] Error 1 The asm constraint "K" requires a immediate constant argument to mtspr, however because of no inlining a register argument is passed causing a failure. Fix this by using __always_inline. Link: https://lore.kernel.org/lkml/202008200453.ohnhqkjQ%25lkp@intel.com/ Signed-off-by: Stafford Horne --- arch/openrisc/mm/cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/openrisc/mm/cache.c b/arch/openrisc/mm/cache.c index 08f56af387ac..534a52ec5e66 100644 --- a/arch/openrisc/mm/cache.c +++ b/arch/openrisc/mm/cache.c @@ -16,7 +16,7 @@ #include #include -static void cache_loop(struct page *page, const unsigned int reg) +static __always_inline void cache_loop(struct page *page, const unsigned int reg) { unsigned long paddr = page_to_pfn(page) << PAGE_SHIFT; unsigned long line = paddr & ~(L1_CACHE_BYTES - 1); From 1c30474826682bc970c3200700d8bcfa2b771278 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 6 Sep 2020 20:42:52 -0700 Subject: [PATCH 273/648] PM: : fix @em_pd kernel-doc warning Fix kernel-doc warning in : ../include/linux/device.h:613: warning: Function parameter or member 'em_pd' not described in 'device' Fixes: 1bc138c62295 ("PM / EM: add support for other devices than CPUs in Energy Model") Signed-off-by: Randy Dunlap Cc: Rafael J. Wysocki Reviewed-by: Lukasz Luba Link: https://lore.kernel.org/r/d97f40ad-3033-703a-c3cb-2843ce0f6371@infradead.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/device.h b/include/linux/device.h index ca18da4768e3..9e6ea8931a52 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -454,6 +454,7 @@ struct dev_links_info { * @pm_domain: Provide callbacks that are executed during system suspend, * hibernation, system resume and during runtime PM transitions * along with subsystem-level and driver-level callbacks. + * @em_pd: device's energy model performance domain * @pins: For device pin management. * See Documentation/driver-api/pinctl.rst for details. * @msi_list: Hosts MSI descriptors From fccc0007b8dc952c6bc0805cdf842eb8ea06a639 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 31 Aug 2020 10:52:42 -0400 Subject: [PATCH 274/648] btrfs: fix lockdep splat in add_missing_dev Nikolay reported a lockdep splat in generic/476 that I could reproduce with btrfs/187. ====================================================== WARNING: possible circular locking dependency detected 5.9.0-rc2+ #1 Tainted: G W ------------------------------------------------------ kswapd0/100 is trying to acquire lock: ffff9e8ef38b6268 (&delayed_node->mutex){+.+.}-{3:3}, at: __btrfs_release_delayed_node.part.0+0x3f/0x330 but task is already holding lock: ffffffffa9d74700 (fs_reclaim){+.+.}-{0:0}, at: __fs_reclaim_acquire+0x5/0x30 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (fs_reclaim){+.+.}-{0:0}: fs_reclaim_acquire+0x65/0x80 slab_pre_alloc_hook.constprop.0+0x20/0x200 kmem_cache_alloc_trace+0x3a/0x1a0 btrfs_alloc_device+0x43/0x210 add_missing_dev+0x20/0x90 read_one_chunk+0x301/0x430 btrfs_read_sys_array+0x17b/0x1b0 open_ctree+0xa62/0x1896 btrfs_mount_root.cold+0x12/0xea legacy_get_tree+0x30/0x50 vfs_get_tree+0x28/0xc0 vfs_kern_mount.part.0+0x71/0xb0 btrfs_mount+0x10d/0x379 legacy_get_tree+0x30/0x50 vfs_get_tree+0x28/0xc0 path_mount+0x434/0xc00 __x64_sys_mount+0xe3/0x120 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 -> #1 (&fs_info->chunk_mutex){+.+.}-{3:3}: __mutex_lock+0x7e/0x7e0 btrfs_chunk_alloc+0x125/0x3a0 find_free_extent+0xdf6/0x1210 btrfs_reserve_extent+0xb3/0x1b0 btrfs_alloc_tree_block+0xb0/0x310 alloc_tree_block_no_bg_flush+0x4a/0x60 __btrfs_cow_block+0x11a/0x530 btrfs_cow_block+0x104/0x220 btrfs_search_slot+0x52e/0x9d0 btrfs_lookup_inode+0x2a/0x8f __btrfs_update_delayed_inode+0x80/0x240 btrfs_commit_inode_delayed_inode+0x119/0x120 btrfs_evict_inode+0x357/0x500 evict+0xcf/0x1f0 vfs_rmdir.part.0+0x149/0x160 do_rmdir+0x136/0x1a0 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 -> #0 (&delayed_node->mutex){+.+.}-{3:3}: __lock_acquire+0x1184/0x1fa0 lock_acquire+0xa4/0x3d0 __mutex_lock+0x7e/0x7e0 __btrfs_release_delayed_node.part.0+0x3f/0x330 btrfs_evict_inode+0x24c/0x500 evict+0xcf/0x1f0 dispose_list+0x48/0x70 prune_icache_sb+0x44/0x50 super_cache_scan+0x161/0x1e0 do_shrink_slab+0x178/0x3c0 shrink_slab+0x17c/0x290 shrink_node+0x2b2/0x6d0 balance_pgdat+0x30a/0x670 kswapd+0x213/0x4c0 kthread+0x138/0x160 ret_from_fork+0x1f/0x30 other info that might help us debug this: Chain exists of: &delayed_node->mutex --> &fs_info->chunk_mutex --> fs_reclaim Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(fs_reclaim); lock(&fs_info->chunk_mutex); lock(fs_reclaim); lock(&delayed_node->mutex); *** DEADLOCK *** 3 locks held by kswapd0/100: #0: ffffffffa9d74700 (fs_reclaim){+.+.}-{0:0}, at: __fs_reclaim_acquire+0x5/0x30 #1: ffffffffa9d65c50 (shrinker_rwsem){++++}-{3:3}, at: shrink_slab+0x115/0x290 #2: ffff9e8e9da260e0 (&type->s_umount_key#48){++++}-{3:3}, at: super_cache_scan+0x38/0x1e0 stack backtrace: CPU: 1 PID: 100 Comm: kswapd0 Tainted: G W 5.9.0-rc2+ #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 Call Trace: dump_stack+0x92/0xc8 check_noncircular+0x12d/0x150 __lock_acquire+0x1184/0x1fa0 lock_acquire+0xa4/0x3d0 ? __btrfs_release_delayed_node.part.0+0x3f/0x330 __mutex_lock+0x7e/0x7e0 ? __btrfs_release_delayed_node.part.0+0x3f/0x330 ? __btrfs_release_delayed_node.part.0+0x3f/0x330 ? lock_acquire+0xa4/0x3d0 ? btrfs_evict_inode+0x11e/0x500 ? find_held_lock+0x2b/0x80 __btrfs_release_delayed_node.part.0+0x3f/0x330 btrfs_evict_inode+0x24c/0x500 evict+0xcf/0x1f0 dispose_list+0x48/0x70 prune_icache_sb+0x44/0x50 super_cache_scan+0x161/0x1e0 do_shrink_slab+0x178/0x3c0 shrink_slab+0x17c/0x290 shrink_node+0x2b2/0x6d0 balance_pgdat+0x30a/0x670 kswapd+0x213/0x4c0 ? _raw_spin_unlock_irqrestore+0x46/0x60 ? add_wait_queue_exclusive+0x70/0x70 ? balance_pgdat+0x670/0x670 kthread+0x138/0x160 ? kthread_create_worker_on_cpu+0x40/0x40 ret_from_fork+0x1f/0x30 This is because we are holding the chunk_mutex when we call btrfs_alloc_device, which does a GFP_KERNEL allocation. We don't want to switch that to a GFP_NOFS lock because this is the only place where it matters. So instead use memalloc_nofs_save() around the allocation in order to avoid the lockdep splat. Reported-by: Nikolay Borisov CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Anand Jain Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 214856c4ccb1..117b43367629 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4,6 +4,7 @@ */ #include +#include #include #include #include @@ -6484,8 +6485,17 @@ static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices, u64 devid, u8 *dev_uuid) { struct btrfs_device *device; + unsigned int nofs_flag; + /* + * We call this under the chunk_mutex, so we want to use NOFS for this + * allocation, however we don't want to change btrfs_alloc_device() to + * always do NOFS because we use it in a lot of other GFP_KERNEL safe + * places. + */ + nofs_flag = memalloc_nofs_save(); device = btrfs_alloc_device(NULL, &devid, dev_uuid); + memalloc_nofs_restore(nofs_flag); if (IS_ERR(device)) return device; From ea57788eb76dc81f6003245427356a1dcd0ac524 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 26 Aug 2020 17:26:43 +0800 Subject: [PATCH 275/648] btrfs: require only sector size alignment for parent eb bytenr [BUG] A completely sane converted fs will cause kernel warning at balance time: [ 1557.188633] BTRFS info (device sda7): relocating block group 8162107392 flags data [ 1563.358078] BTRFS info (device sda7): found 11722 extents [ 1563.358277] BTRFS info (device sda7): leaf 7989321728 gen 95 total ptrs 213 free space 3458 owner 2 [ 1563.358280] item 0 key (7984947200 169 0) itemoff 16250 itemsize 33 [ 1563.358281] extent refs 1 gen 90 flags 2 [ 1563.358282] ref#0: tree block backref root 4 [ 1563.358285] item 1 key (7985602560 169 0) itemoff 16217 itemsize 33 [ 1563.358286] extent refs 1 gen 93 flags 258 [ 1563.358287] ref#0: shared block backref parent 7985602560 [ 1563.358288] (parent 7985602560 is NOT ALIGNED to nodesize 16384) [ 1563.358290] item 2 key (7985635328 169 0) itemoff 16184 itemsize 33 ... [ 1563.358995] BTRFS error (device sda7): eb 7989321728 invalid extent inline ref type 182 [ 1563.358996] ------------[ cut here ]------------ [ 1563.359005] WARNING: CPU: 14 PID: 2930 at 0xffffffff9f231766 Then with transaction abort, and obviously failed to balance the fs. [CAUSE] That mentioned inline ref type 182 is completely sane, it's BTRFS_SHARED_BLOCK_REF_KEY, it's some extra check making kernel to believe it's invalid. Commit 64ecdb647ddb ("Btrfs: add one more sanity check for shared ref type") introduced extra checks for backref type. One of the requirement is, parent bytenr must be aligned to node size, which is not correct. One example is like this: 0 1G 1G+4K 2G 2G+4K | |///////////////////|//| <- A chunk starts at 1G+4K | | <- A tree block get reserved at bytenr 1G+4K Then we have a valid tree block at bytenr 1G+4K, but not aligned to nodesize (16K). Such chunk is not ideal, but current kernel can handle it pretty well. We may warn about such tree block in the future, but should not reject them. [FIX] Change the alignment requirement from node size alignment to sector size alignment. Also, to make our lives a little easier, also output @iref when btrfs_get_extent_inline_ref_type() failed, so we can locate the item easier. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=205475 Fixes: 64ecdb647ddb ("Btrfs: add one more sanity check for shared ref type") CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Josef Bacik Signed-off-by: Qu Wenruo [ update comments and messages ] Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 19 +++++++++---------- fs/btrfs/print-tree.c | 12 +++++++----- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e9eedc053fc5..780b9c9a98fe 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -400,12 +400,11 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb, if (type == BTRFS_SHARED_BLOCK_REF_KEY) { ASSERT(eb->fs_info); /* - * Every shared one has parent tree - * block, which must be aligned to - * nodesize. + * Every shared one has parent tree block, + * which must be aligned to sector size. */ if (offset && - IS_ALIGNED(offset, eb->fs_info->nodesize)) + IS_ALIGNED(offset, eb->fs_info->sectorsize)) return type; } } else if (is_data == BTRFS_REF_TYPE_DATA) { @@ -414,12 +413,11 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb, if (type == BTRFS_SHARED_DATA_REF_KEY) { ASSERT(eb->fs_info); /* - * Every shared one has parent tree - * block, which must be aligned to - * nodesize. + * Every shared one has parent tree block, + * which must be aligned to sector size. */ if (offset && - IS_ALIGNED(offset, eb->fs_info->nodesize)) + IS_ALIGNED(offset, eb->fs_info->sectorsize)) return type; } } else { @@ -429,8 +427,9 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb, } btrfs_print_leaf((struct extent_buffer *)eb); - btrfs_err(eb->fs_info, "eb %llu invalid extent inline ref type %d", - eb->start, type); + btrfs_err(eb->fs_info, + "eb %llu iref 0x%lx invalid extent inline ref type %d", + eb->start, (unsigned long)iref, type); WARN_ON(1); return BTRFS_REF_TYPE_INVALID; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 61f44e78e3c9..80567c11ec12 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -95,9 +95,10 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type) * offset is supposed to be a tree block which * must be aligned to nodesize. */ - if (!IS_ALIGNED(offset, eb->fs_info->nodesize)) - pr_info("\t\t\t(parent %llu is NOT ALIGNED to nodesize %llu)\n", - offset, (unsigned long long)eb->fs_info->nodesize); + if (!IS_ALIGNED(offset, eb->fs_info->sectorsize)) + pr_info( + "\t\t\t(parent %llu not aligned to sectorsize %u)\n", + offset, eb->fs_info->sectorsize); break; case BTRFS_EXTENT_DATA_REF_KEY: dref = (struct btrfs_extent_data_ref *)(&iref->offset); @@ -112,8 +113,9 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type) * must be aligned to nodesize. */ if (!IS_ALIGNED(offset, eb->fs_info->nodesize)) - pr_info("\t\t\t(parent %llu is NOT ALIGNED to nodesize %llu)\n", - offset, (unsigned long long)eb->fs_info->nodesize); + pr_info( + "\t\t\t(parent %llu not aligned to sectorsize %u)\n", + offset, eb->fs_info->sectorsize); break; default: pr_cont("(extent %llu has INVALID ref type %d)\n", From 9e3aa8054453d23d9f477f0cdae70a6a1ea6ec8a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 3 Sep 2020 14:29:50 -0400 Subject: [PATCH 276/648] btrfs: free data reloc tree on failed mount While testing a weird problem with -o degraded, I noticed I was getting leaked root errors BTRFS warning (device loop0): writable mount is not allowed due to too many missing devices BTRFS error (device loop0): open_ctree failed BTRFS error (device loop0): leaked root -9-0 refcount 1 This is the DATA_RELOC root, which gets read before the other fs roots, but is included in the fs roots radix tree. Handle this by adding a btrfs_drop_and_free_fs_root() on the data reloc root if it exists. This is ok to do here if we fail further up because we will only drop the ref if we delete the root from the radix tree, and all other cleanup won't be duplicated. CC: stable@vger.kernel.org # 5.8+ Reviewed-by: Nikolay Borisov Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 465bc8372e09..9fe9ec6aab71 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3441,6 +3441,8 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device btrfs_put_block_group_cache(fs_info); fail_tree_roots: + if (fs_info->data_reloc_root) + btrfs_drop_and_free_fs_root(fs_info, fs_info->data_reloc_root); free_root_pointers(fs_info, true); invalidate_inode_pages2(fs_info->btree_inode->i_mapping); From 162343a876f14f7604881dd2bc53ca140c82c230 Mon Sep 17 00:00:00 2001 From: Rustam Kovhaev Date: Mon, 10 Aug 2020 08:36:50 -0700 Subject: [PATCH 277/648] scripts/tags.sh: exclude tools directory from tags generation when COMPILED_SOURCE is set, running 'make ARCH=x86_64 COMPILED_SOURCE=1 cscope tags' in KBUILD_OUTPUT directory produces lots of "No such file or directory" warnings: ... realpath: sigchain.h: No such file or directory realpath: orc_gen.c: No such file or directory realpath: objtool.c: No such file or directory ... let's exclude tools directory from tags generation Fixes: 4f491bb6ea2a ("scripts/tags.sh: collect compiled source precisely") Link: https://lore.kernel.org/lkml/20200809210056.GA1344537@thinkpad Signed-off-by: Rustam Kovhaev Link: https://lore.kernel.org/r/20200810153650.1822316-1-rkovhaev@gmail.com Signed-off-by: Greg Kroah-Hartman --- scripts/tags.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/tags.sh b/scripts/tags.sh index 32d3f53af10b..850f4ccb6afc 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -26,7 +26,11 @@ else fi # ignore userspace tools -ignore="$ignore ( -path ${tree}tools ) -prune -o" +if [ -n "$COMPILED_SOURCE" ]; then + ignore="$ignore ( -path ./tools ) -prune -o" +else + ignore="$ignore ( -path ${tree}tools ) -prune -o" +fi # Detect if ALLSOURCE_ARCHS is set. If not, we assume SRCARCH if [ "${ALLSOURCE_ARCHS}" = "" ]; then @@ -92,7 +96,7 @@ all_sources() all_compiled_sources() { realpath -es $([ -z "$KBUILD_ABS_SRCTREE" ] && echo --relative-to=.) \ - include/generated/autoconf.h $(find -name "*.cmd" -exec \ + include/generated/autoconf.h $(find $ignore -name "*.cmd" -exec \ grep -Poh '(?(?=^source_.* \K).*|(?=^ \K\S).*(?= \\))' {} \+ | awk '!a[$0]++') | sort -u } From 294955fd43dbf1e8f3a84cffa4797c6f22badc31 Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Mon, 7 Sep 2020 17:21:51 +0300 Subject: [PATCH 278/648] usb: typec: intel_pmc_mux: Do not configure Altmode HPD High According to the PMC Type C Subsystem (TCSS) Mux programming guide rev 0.7, bit 14 is reserved in Alternate mode. In DP Alternate Mode state, if the HPD_STATE (bit 7) field in the status update command VDO is set to HPD_HIGH, HPD is configured via separate HPD mode request after configuring DP Alternate mode request. Configuring reserved bit may show unexpected behaviour. So do not configure them while issuing the Alternate Mode request. Fixes: 7990be48ef4d ("usb: typec: mux: intel: Handle alt mode HPD_HIGH") Cc: stable@vger.kernel.org Signed-off-by: Utkarsh Patel Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200907142152.35678-2-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/mux/intel_pmc_mux.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c index fd9008f19208..bb9cb4ec4d6f 100644 --- a/drivers/usb/typec/mux/intel_pmc_mux.c +++ b/drivers/usb/typec/mux/intel_pmc_mux.c @@ -68,7 +68,6 @@ enum { #define PMC_USB_ALTMODE_DP_MODE_SHIFT 8 /* TBT specific Mode Data bits */ -#define PMC_USB_ALTMODE_HPD_HIGH BIT(14) #define PMC_USB_ALTMODE_TBT_TYPE BIT(17) #define PMC_USB_ALTMODE_CABLE_TYPE BIT(18) #define PMC_USB_ALTMODE_ACTIVE_LINK BIT(20) @@ -185,9 +184,6 @@ pmc_usb_mux_dp(struct pmc_usb_port *port, struct typec_mux_state *state) req.mode_data |= (state->mode - TYPEC_STATE_MODAL) << PMC_USB_ALTMODE_DP_MODE_SHIFT; - if (data->status & DP_STATUS_HPD_STATE) - req.mode_data |= PMC_USB_ALTMODE_HPD_HIGH; - ret = pmc_usb_command(port, (void *)&req, sizeof(req)); if (ret) return ret; From 7c6bbdf086ac7f1374bcf1ef0994b15109ecaf48 Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Mon, 7 Sep 2020 17:21:52 +0300 Subject: [PATCH 279/648] usb: typec: intel_pmc_mux: Do not configure SBU and HSL Orientation in Alternate modes According to the PMC Type C Subsystem (TCSS) Mux programming guide rev 0.7, bits 4 and 5 are reserved in Alternate modes. SBU Orientation and HSL Orientation needs to be configured only during initial cable detection in USB connect flow based on device property of "sbu-orientation" and "hsl-orientation". Configuring these reserved bits in the Alternate modes may result in delay in display link training or some unexpected behaviour. So do not configure them while issuing Alternate Mode requests. Fixes: ff4a30d5e243 ("usb: typec: mux: intel_pmc_mux: Support for static SBU/HSL orientation") Signed-off-by: Utkarsh Patel Cc: stable Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200907142152.35678-3-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/mux/intel_pmc_mux.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c index bb9cb4ec4d6f..ec7da0fa3cf8 100644 --- a/drivers/usb/typec/mux/intel_pmc_mux.c +++ b/drivers/usb/typec/mux/intel_pmc_mux.c @@ -61,8 +61,6 @@ enum { #define PMC_USB_ALTMODE_ORI_SHIFT 1 #define PMC_USB_ALTMODE_UFP_SHIFT 3 -#define PMC_USB_ALTMODE_ORI_AUX_SHIFT 4 -#define PMC_USB_ALTMODE_ORI_HSL_SHIFT 5 /* DP specific Mode Data bits */ #define PMC_USB_ALTMODE_DP_MODE_SHIFT 8 @@ -178,9 +176,6 @@ pmc_usb_mux_dp(struct pmc_usb_port *port, struct typec_mux_state *state) req.mode_data = (port->orientation - 1) << PMC_USB_ALTMODE_ORI_SHIFT; req.mode_data |= (port->role - 1) << PMC_USB_ALTMODE_UFP_SHIFT; - req.mode_data |= sbu_orientation(port) << PMC_USB_ALTMODE_ORI_AUX_SHIFT; - req.mode_data |= hsl_orientation(port) << PMC_USB_ALTMODE_ORI_HSL_SHIFT; - req.mode_data |= (state->mode - TYPEC_STATE_MODAL) << PMC_USB_ALTMODE_DP_MODE_SHIFT; @@ -208,9 +203,6 @@ pmc_usb_mux_tbt(struct pmc_usb_port *port, struct typec_mux_state *state) req.mode_data = (port->orientation - 1) << PMC_USB_ALTMODE_ORI_SHIFT; req.mode_data |= (port->role - 1) << PMC_USB_ALTMODE_UFP_SHIFT; - req.mode_data |= sbu_orientation(port) << PMC_USB_ALTMODE_ORI_AUX_SHIFT; - req.mode_data |= hsl_orientation(port) << PMC_USB_ALTMODE_ORI_HSL_SHIFT; - if (TBT_ADAPTER(data->device_mode) == TBT_ADAPTER_TBT3) req.mode_data |= PMC_USB_ALTMODE_TBT_TYPE; From b340dc680ed48dcc05b56e1ebe1b9535813c3ee0 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sat, 5 Sep 2020 22:55:36 -0400 Subject: [PATCH 280/648] bnxt_en: Avoid sending firmware messages when AER error is detected. When the driver goes through PCIe AER reset in error state, all firmware messages will timeout because the PCIe bus is no longer accessible. This can lead to AER reset taking many minutes to complete as each firmware command takes time to timeout. Define a new macro BNXT_NO_FW_ACCESS() to skip these firmware messages when either firmware is in fatal error state or when pci_channel_offline() is true. It now takes a more reasonable 20 to 30 seconds to complete AER recovery. Fixes: b4fff2079d10 ("bnxt_en: Do not send firmware messages if firmware is in error state.") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 +++--- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b167066af450..619eb556683d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4305,7 +4305,7 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len, u32 bar_offset = BNXT_GRCPF_REG_CHIMP_COMM; u16 dst = BNXT_HWRM_CHNL_CHIMP; - if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) + if (BNXT_NO_FW_ACCESS(bp)) return -EBUSY; if (msg_len > BNXT_HWRM_MAX_REQ_LEN) { @@ -5723,7 +5723,7 @@ static int hwrm_ring_free_send_msg(struct bnxt *bp, struct hwrm_ring_free_output *resp = bp->hwrm_cmd_resp_addr; u16 error_code; - if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) + if (BNXT_NO_FW_ACCESS(bp)) return 0; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_FREE, cmpl_ring_id, -1); @@ -7817,7 +7817,7 @@ static int bnxt_set_tpa(struct bnxt *bp, bool set_tpa) if (set_tpa) tpa_flags = bp->flags & BNXT_FLAG_TPA; - else if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) + else if (BNXT_NO_FW_ACCESS(bp)) return 0; for (i = 0; i < bp->nr_vnics; i++) { rc = bnxt_hwrm_vnic_set_tpa(bp, i, tpa_flags); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 5a13eb66beda..0ef89dabfd61 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1737,6 +1737,10 @@ struct bnxt { #define BNXT_STATE_FW_FATAL_COND 6 #define BNXT_STATE_DRV_REGISTERED 7 +#define BNXT_NO_FW_ACCESS(bp) \ + (test_bit(BNXT_STATE_FW_FATAL_COND, &(bp)->state) || \ + pci_channel_offline((bp)->pdev)) + struct bnxt_irq *irq_tbl; int total_irqs; u8 mac_addr[ETH_ALEN]; From b16939b59cc00231a75d224fd058d22c9d064976 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sat, 5 Sep 2020 22:55:37 -0400 Subject: [PATCH 281/648] bnxt_en: Fix NULL ptr dereference crash in bnxt_fw_reset_task() bnxt_fw_reset_task() which runs from a workqueue can race with bnxt_remove_one(). For example, if firmware reset and VF FLR are happening at about the same time. bnxt_remove_one() already cancels the workqueue and waits for it to finish, but we need to do this earlier before the devlink reporters are destroyed. This will guarantee that the devlink reporters will always be valid when bnxt_fw_reset_task() is still running. Fixes: b148bb238c02 ("bnxt_en: Fix possible crash in bnxt_fw_reset_task().") Reviewed-by: Edwin Peer Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 619eb556683d..8eb73fe39e84 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11779,6 +11779,10 @@ static void bnxt_remove_one(struct pci_dev *pdev) if (BNXT_PF(bp)) bnxt_sriov_disable(bp); + clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); + bnxt_cancel_sp_work(bp); + bp->sp_event = 0; + bnxt_dl_fw_reporters_destroy(bp, true); if (BNXT_PF(bp)) devlink_port_type_clear(&bp->dl_port); @@ -11786,9 +11790,6 @@ static void bnxt_remove_one(struct pci_dev *pdev) unregister_netdev(dev); bnxt_dl_unregister(bp); bnxt_shutdown_tc(bp); - clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); - bnxt_cancel_sp_work(bp); - bp->sp_event = 0; bnxt_clear_int_mode(bp); bnxt_hwrm_func_drv_unrgtr(bp); From 2d892ccdc163a3d2e08c5ed1cea8b61bf7e4f531 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 4 Sep 2020 17:22:57 +0100 Subject: [PATCH 282/648] btrfs: fix NULL pointer dereference after failure to create snapshot When trying to get a new fs root for a snapshot during the transaction at transaction.c:create_pending_snapshot(), if btrfs_get_new_fs_root() fails we leave "pending->snap" pointing to an error pointer, and then later at ioctl.c:create_snapshot() we dereference that pointer, resulting in a crash: [12264.614689] BUG: kernel NULL pointer dereference, address: 00000000000007c4 [12264.615650] #PF: supervisor write access in kernel mode [12264.616487] #PF: error_code(0x0002) - not-present page [12264.617436] PGD 0 P4D 0 [12264.618328] Oops: 0002 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI [12264.619150] CPU: 0 PID: 2310635 Comm: fsstress Tainted: G W 5.9.0-rc3-btrfs-next-67 #1 [12264.619960] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [12264.621769] RIP: 0010:btrfs_mksubvol+0x438/0x4a0 [btrfs] [12264.622528] Code: bc ef ff ff (...) [12264.624092] RSP: 0018:ffffaa6fc7277cd8 EFLAGS: 00010282 [12264.624669] RAX: 00000000fffffff4 RBX: ffff9d3e8f151a60 RCX: 0000000000000000 [12264.625249] RDX: 0000000000000001 RSI: ffffffff9d56c9be RDI: fffffffffffffff4 [12264.625830] RBP: ffff9d3e8f151b48 R08: 0000000000000000 R09: 0000000000000000 [12264.626413] R10: 0000000000000000 R11: 0000000000000000 R12: 00000000fffffff4 [12264.626994] R13: ffff9d3ede380538 R14: ffff9d3ede380500 R15: ffff9d3f61b2eeb8 [12264.627582] FS: 00007f140d5d8200(0000) GS:ffff9d3fb5e00000(0000) knlGS:0000000000000000 [12264.628176] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [12264.628773] CR2: 00000000000007c4 CR3: 000000020f8e8004 CR4: 00000000003706f0 [12264.629379] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [12264.629994] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [12264.630594] Call Trace: [12264.631227] btrfs_mksnapshot+0x7b/0xb0 [btrfs] [12264.631840] __btrfs_ioctl_snap_create+0x16f/0x1a0 [btrfs] [12264.632458] btrfs_ioctl_snap_create_v2+0xb0/0xf0 [btrfs] [12264.633078] btrfs_ioctl+0x1864/0x3130 [btrfs] [12264.633689] ? do_sys_openat2+0x1a7/0x2d0 [12264.634295] ? kmem_cache_free+0x147/0x3a0 [12264.634899] ? __x64_sys_ioctl+0x83/0xb0 [12264.635488] __x64_sys_ioctl+0x83/0xb0 [12264.636058] do_syscall_64+0x33/0x80 [12264.636616] entry_SYSCALL_64_after_hwframe+0x44/0xa9 (gdb) list *(btrfs_mksubvol+0x438) 0x7c7b8 is in btrfs_mksubvol (fs/btrfs/ioctl.c:858). 853 ret = 0; 854 pending_snapshot->anon_dev = 0; 855 fail: 856 /* Prevent double freeing of anon_dev */ 857 if (ret && pending_snapshot->snap) 858 pending_snapshot->snap->anon_dev = 0; 859 btrfs_put_root(pending_snapshot->snap); 860 btrfs_subvolume_release_metadata(root, &pending_snapshot->block_rsv); 861 free_pending: 862 if (pending_snapshot->anon_dev) So fix this by setting "pending->snap" to NULL if we get an error from the call to btrfs_get_new_fs_root() at transaction.c:create_pending_snapshot(). Fixes: 2dfb1e43f57dd3 ("btrfs: preallocate anon block device at first phase of snapshot creation") Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/transaction.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 20c6ac1a5de7..d2fc292ac61b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1636,6 +1636,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev); if (IS_ERR(pending->snap)) { ret = PTR_ERR(pending->snap); + pending->snap = NULL; btrfs_abort_transaction(trans, ret); goto fail; } From eb02d39ad3099c3dcd96c5b3fdc0303541766ed1 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 6 Sep 2020 20:31:16 -0700 Subject: [PATCH 283/648] netdevice.h: fix proto_down_reason kernel-doc warning Fix kernel-doc warning in : ../include/linux/netdevice.h:2158: warning: Function parameter or member 'proto_down_reason' not described in 'net_device' Fixes: 829eb208e80d ("rtnetlink: add support for protodown reason") Signed-off-by: Randy Dunlap Acked-by: Roopa Prabhu Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b0e303f6603f..bf0e313486be 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1784,6 +1784,7 @@ enum netdev_priv_flags { * the watchdog (see dev_watchdog()) * @watchdog_timer: List of timers * + * @proto_down_reason: reason a netdev interface is held down * @pcpu_refcnt: Number of references to this device * @todo_list: Delayed register/unregister * @link_watch_list: XXX: need comments on this one From ffa59b0b396cb2c0ea6712ff30ee05c35d4c9c8d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 6 Sep 2020 20:32:30 -0700 Subject: [PATCH 284/648] netdevice.h: fix xdp_state kernel-doc warning Fix kernel-doc warning in : ../include/linux/netdevice.h:2158: warning: Function parameter or member 'xdp_state' not described in 'net_device' Fixes: 7f0a838254bd ("bpf, xdp: Maintain info on attached XDP BPF programs in net_device") Signed-off-by: Randy Dunlap Cc: Andrii Nakryiko Cc: Alexei Starovoitov Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bf0e313486be..7bd4fcdd0738 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1849,6 +1849,7 @@ enum netdev_priv_flags { * @udp_tunnel_nic_info: static structure describing the UDP tunnel * offload capabilities of the device * @udp_tunnel_nic: UDP tunnel offload state + * @xdp_state: stores info on attached XDP BPF programs * * FIXME: cleanup struct net_device such that network protocol info * moves out. From 8ae4dff882eb879c17bf46574201bd37fc6bc8b5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 4 Sep 2020 21:07:49 -0700 Subject: [PATCH 285/648] ibmvnic: add missing parenthesis in do_reset() Indentation and logic clearly show that this code is missing parenthesis. Fixes: 9f1345737790 ("ibmvnic fix NULL tx_pools and rx_tools issue at do_reset") Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index d3a774331afc..1b702a43a5d0 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2032,16 +2032,18 @@ static int do_reset(struct ibmvnic_adapter *adapter, } else { rc = reset_tx_pools(adapter); - if (rc) + if (rc) { netdev_dbg(adapter->netdev, "reset tx pools failed (%d)\n", rc); goto out; + } rc = reset_rx_pools(adapter); - if (rc) + if (rc) { netdev_dbg(adapter->netdev, "reset rx pools failed (%d)\n", rc); goto out; + } } ibmvnic_disable_irqs(adapter); } From e1f469cd5866499ac40bfdca87411e1c525a10c7 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 7 Sep 2020 15:54:41 +0000 Subject: [PATCH 286/648] Revert "netns: don't disable BHs when locking "nsid_lock"" This reverts commit 8d7e5dee972f1cde2ba96c621f1541fa36e7d4f4. To protect netns id, the nsid_lock is used when netns id is being allocated and removed by peernet2id_alloc() and unhash_nsid(). The nsid_lock can be used in BH context but only spin_lock() is used in this code. Using spin_lock() instead of spin_lock_bh() can result in a deadlock in the following scenario reported by the lockdep. In order to avoid a deadlock, the spin_lock_bh() should be used instead of spin_lock() to acquire nsid_lock. Test commands: ip netns del nst ip netns add nst ip link add veth1 type veth peer name veth2 ip link set veth1 netns nst ip netns exec nst ip link add name br1 type bridge vlan_filtering 1 ip netns exec nst ip link set dev br1 up ip netns exec nst ip link set dev veth1 master br1 ip netns exec nst ip link set dev veth1 up ip netns exec nst ip link add macvlan0 link br1 up type macvlan Splat looks like: [ 33.615860][ T607] WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected [ 33.617194][ T607] 5.9.0-rc1+ #665 Not tainted [ ... ] [ 33.670615][ T607] Chain exists of: [ 33.670615][ T607] &mc->mca_lock --> &bridge_netdev_addr_lock_key --> &net->nsid_lock [ 33.670615][ T607] [ 33.673118][ T607] Possible interrupt unsafe locking scenario: [ 33.673118][ T607] [ 33.674599][ T607] CPU0 CPU1 [ 33.675557][ T607] ---- ---- [ 33.676516][ T607] lock(&net->nsid_lock); [ 33.677306][ T607] local_irq_disable(); [ 33.678517][ T607] lock(&mc->mca_lock); [ 33.679725][ T607] lock(&bridge_netdev_addr_lock_key); [ 33.681166][ T607] [ 33.681791][ T607] lock(&mc->mca_lock); [ 33.682579][ T607] [ 33.682579][ T607] *** DEADLOCK *** [ ... ] [ 33.922046][ T607] stack backtrace: [ 33.922999][ T607] CPU: 3 PID: 607 Comm: ip Not tainted 5.9.0-rc1+ #665 [ 33.924099][ T607] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 [ 33.925714][ T607] Call Trace: [ 33.926238][ T607] dump_stack+0x78/0xab [ 33.926905][ T607] check_irq_usage+0x70b/0x720 [ 33.927708][ T607] ? iterate_chain_key+0x60/0x60 [ 33.928507][ T607] ? check_path+0x22/0x40 [ 33.929201][ T607] ? check_noncircular+0xcf/0x180 [ 33.930024][ T607] ? __lock_acquire+0x1952/0x1f20 [ 33.930860][ T607] __lock_acquire+0x1952/0x1f20 [ 33.931667][ T607] lock_acquire+0xaf/0x3a0 [ 33.932366][ T607] ? peernet2id_alloc+0x3a/0x170 [ 33.933147][ T607] ? br_port_fill_attrs+0x54c/0x6b0 [bridge] [ 33.934140][ T607] ? br_port_fill_attrs+0x5de/0x6b0 [bridge] [ 33.935113][ T607] ? kvm_sched_clock_read+0x14/0x30 [ 33.935974][ T607] _raw_spin_lock+0x30/0x70 [ 33.936728][ T607] ? peernet2id_alloc+0x3a/0x170 [ 33.937523][ T607] peernet2id_alloc+0x3a/0x170 [ 33.938313][ T607] rtnl_fill_ifinfo+0xb5e/0x1400 [ 33.939091][ T607] rtmsg_ifinfo_build_skb+0x8a/0xf0 [ 33.939953][ T607] rtmsg_ifinfo_event.part.39+0x17/0x50 [ 33.940863][ T607] rtmsg_ifinfo+0x1f/0x30 [ 33.941571][ T607] __dev_notify_flags+0xa5/0xf0 [ 33.942376][ T607] ? __irq_work_queue_local+0x49/0x50 [ 33.943249][ T607] ? irq_work_queue+0x1d/0x30 [ 33.943993][ T607] ? __dev_set_promiscuity+0x7b/0x1a0 [ 33.944878][ T607] __dev_set_promiscuity+0x7b/0x1a0 [ 33.945758][ T607] dev_set_promiscuity+0x1e/0x50 [ 33.946582][ T607] br_port_set_promisc+0x1f/0x40 [bridge] [ 33.947487][ T607] br_manage_promisc+0x8b/0xe0 [bridge] [ 33.948388][ T607] __dev_set_promiscuity+0x123/0x1a0 [ 33.949244][ T607] __dev_set_rx_mode+0x68/0x90 [ 33.950021][ T607] dev_uc_add+0x50/0x60 [ 33.950720][ T607] macvlan_open+0x18e/0x1f0 [macvlan] [ 33.951601][ T607] __dev_open+0xd6/0x170 [ 33.952269][ T607] __dev_change_flags+0x181/0x1d0 [ 33.953056][ T607] rtnl_configure_link+0x2f/0xa0 [ 33.953884][ T607] __rtnl_newlink+0x6b9/0x8e0 [ 33.954665][ T607] ? __lock_acquire+0x95d/0x1f20 [ 33.955450][ T607] ? lock_acquire+0xaf/0x3a0 [ 33.956193][ T607] ? is_bpf_text_address+0x5/0xe0 [ 33.956999][ T607] rtnl_newlink+0x47/0x70 Acked-by: Guillaume Nault Fixes: 8d7e5dee972f ("netns: don't disable BHs when locking "nsid_lock"") Reported-by: syzbot+3f960c64a104eaa2c813@syzkaller.appspotmail.com Signed-off-by: Taehee Yoo Signed-off-by: Jakub Kicinski --- net/core/net_namespace.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index dcd61aca343e..944ab214e5ae 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -251,10 +251,10 @@ int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp) if (refcount_read(&net->count) == 0) return NETNSA_NSID_NOT_ASSIGNED; - spin_lock(&net->nsid_lock); + spin_lock_bh(&net->nsid_lock); id = __peernet2id(net, peer); if (id >= 0) { - spin_unlock(&net->nsid_lock); + spin_unlock_bh(&net->nsid_lock); return id; } @@ -264,12 +264,12 @@ int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp) * just been idr_remove()'d from there in cleanup_net(). */ if (!maybe_get_net(peer)) { - spin_unlock(&net->nsid_lock); + spin_unlock_bh(&net->nsid_lock); return NETNSA_NSID_NOT_ASSIGNED; } id = alloc_netid(net, peer, -1); - spin_unlock(&net->nsid_lock); + spin_unlock_bh(&net->nsid_lock); put_net(peer); if (id < 0) @@ -534,20 +534,20 @@ static void unhash_nsid(struct net *net, struct net *last) for_each_net(tmp) { int id; - spin_lock(&tmp->nsid_lock); + spin_lock_bh(&tmp->nsid_lock); id = __peernet2id(tmp, net); if (id >= 0) idr_remove(&tmp->netns_ids, id); - spin_unlock(&tmp->nsid_lock); + spin_unlock_bh(&tmp->nsid_lock); if (id >= 0) rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL, GFP_KERNEL); if (tmp == last) break; } - spin_lock(&net->nsid_lock); + spin_lock_bh(&net->nsid_lock); idr_destroy(&net->netns_ids); - spin_unlock(&net->nsid_lock); + spin_unlock_bh(&net->nsid_lock); } static LLIST_HEAD(cleanup_list); @@ -760,9 +760,9 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, return PTR_ERR(peer); } - spin_lock(&net->nsid_lock); + spin_lock_bh(&net->nsid_lock); if (__peernet2id(net, peer) >= 0) { - spin_unlock(&net->nsid_lock); + spin_unlock_bh(&net->nsid_lock); err = -EEXIST; NL_SET_BAD_ATTR(extack, nla); NL_SET_ERR_MSG(extack, @@ -771,7 +771,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, } err = alloc_netid(net, peer, nsid); - spin_unlock(&net->nsid_lock); + spin_unlock_bh(&net->nsid_lock); if (err >= 0) { rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid, nlh, GFP_KERNEL); From 19162fd4063a3211843b997a454b505edb81d5ce Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 7 Sep 2020 00:13:39 -0700 Subject: [PATCH 287/648] hv_netvsc: Fix hibernation for mlx5 VF driver mlx5_suspend()/resume() keep the network interface, so during hibernation netvsc_unregister_vf() and netvsc_register_vf() are not called, and hence netvsc_resume() should call netvsc_vf_changed() to switch the data path back to the VF after hibernation. Note: after we close and re-open the vmbus channel of the netvsc NIC in netvsc_suspend() and netvsc_resume(), the data path is implicitly switched to the netvsc NIC. Similarly, netvsc_suspend() should not call netvsc_unregister_vf(), otherwise the VF can no longer be used after hibernation. For mlx4, since the VF network interafce is explicitly destroyed and re-created during hibernation (see mlx4_suspend()/resume()), hv_netvsc already explicitly switches the data path from and to the VF automatically via netvsc_register_vf() and netvsc_unregister_vf(), so mlx4 doesn't need this fix. Note: mlx4 can still work with the fix because in netvsc_suspend()/resume() ndev_ctx->vf_netdev is NULL for mlx4. Fixes: 0efeea5fb153 ("hv_netvsc: Add the support of hibernation") Signed-off-by: Dexuan Cui Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/netvsc_drv.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 64b0a74c1523..81c5c70b616a 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2587,8 +2587,8 @@ static int netvsc_remove(struct hv_device *dev) static int netvsc_suspend(struct hv_device *dev) { struct net_device_context *ndev_ctx; - struct net_device *vf_netdev, *net; struct netvsc_device *nvdev; + struct net_device *net; int ret; net = hv_get_drvdata(dev); @@ -2604,10 +2604,6 @@ static int netvsc_suspend(struct hv_device *dev) goto out; } - vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev); - if (vf_netdev) - netvsc_unregister_vf(vf_netdev); - /* Save the current config info */ ndev_ctx->saved_netvsc_dev_info = netvsc_devinfo_get(nvdev); @@ -2623,6 +2619,7 @@ static int netvsc_resume(struct hv_device *dev) struct net_device *net = hv_get_drvdata(dev); struct net_device_context *net_device_ctx; struct netvsc_device_info *device_info; + struct net_device *vf_netdev; int ret; rtnl_lock(); @@ -2635,6 +2632,15 @@ static int netvsc_resume(struct hv_device *dev) netvsc_devinfo_put(device_info); net_device_ctx->saved_netvsc_dev_info = NULL; + /* A NIC driver (e.g. mlx5) may keep the VF network interface across + * hibernation, but here the data path is implicitly switched to the + * netvsc NIC since the vmbus channel is closed and re-opened, so + * netvsc_vf_changed() must be used to switch the data path to the VF. + */ + vf_netdev = rtnl_dereference(net_device_ctx->vf_netdev); + if (vf_netdev && netvsc_vf_changed(vf_netdev) != NOTIFY_OK) + ret = -EINVAL; + rtnl_unlock(); return ret; From 437ef802e0adc9f162a95213a3488e8646e5fc03 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 8 Sep 2020 11:51:06 +1000 Subject: [PATCH 288/648] powerpc/dma: Fix dma_map_ops::get_required_mask There are 2 problems with it: 1. "<" vs expected "<<" 2. the shift number is an IOMMU page number mask, not an address mask as the IOMMU page shift is missing. This did not hit us before f1565c24b596 ("powerpc: use the generic dma_ops_bypass mode") because we had additional code to handle bypass mask so this chunk (almost?) never executed.However there were reports that aacraid does not work with "iommu=nobypass". After f1565c24b596, aacraid (and probably others which call dma_get_required_mask() before setting the mask) was unable to enable 64bit DMA and fall back to using IOMMU which was known not to work, one of the problems is double free of an IOMMU page. This fixes DMA for aacraid, both with and without "iommu=nobypass" in the kernel command line. Verified with "stress-ng -d 4". Fixes: 6a5c7be5e484 ("powerpc: Override dma_get_required_mask by platform hook and ops") Cc: stable@vger.kernel.org # v3.2+ Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200908015106.79661-1-aik@ozlabs.ru --- arch/powerpc/kernel/dma-iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 569fecd7b5b2..9053fc9d20c7 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -120,7 +120,8 @@ u64 dma_iommu_get_required_mask(struct device *dev) if (!tbl) return 0; - mask = 1ULL < (fls_long(tbl->it_offset + tbl->it_size) - 1); + mask = 1ULL << (fls_long(tbl->it_offset + tbl->it_size) + + tbl->it_page_shift - 1); mask += mask - 1; return mask; From 4993a8a378088be8b2f64fd9d00de9c6fb0a7ce9 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 8 Sep 2020 15:40:43 +1000 Subject: [PATCH 289/648] Revert "drm/i915: Remove i915_gem_object_get_dirty_page()" These commits caused a regression on Lenovo t520 sandybridge machine belonging to reporter. We are reverting them for 5.10 for other reasons, so just do it for 5.9 as well. This reverts commit 763fedd6a216f94c2eb98d2f7ca21be3d3806e69. Reported-by: Harald Arnesen Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/gem/i915_gem_object.h | 4 ++++ drivers/gpu/drm/i915/gem/i915_gem_pages.c | 14 ++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index e5b9276d254c..9cf4ad78ece6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -258,6 +258,10 @@ struct page * i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n); +struct page * +i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, + unsigned int n); + dma_addr_t i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, unsigned long n, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index d15ff6748a50..e8a083743e09 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -548,6 +548,20 @@ i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) return nth_page(sg_page(sg), offset); } +/* Like i915_gem_object_get_page(), but mark the returned page dirty */ +struct page * +i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, + unsigned int n) +{ + struct page *page; + + page = i915_gem_object_get_page(obj, n); + if (!obj->mm.dirty) + set_page_dirty(page); + + return page; +} + dma_addr_t i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, unsigned long n, From ad5d95e4d538737ed3fa25493777decf264a3011 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 8 Sep 2020 15:41:17 +1000 Subject: [PATCH 290/648] Revert "drm/i915/gem: Async GPU relocations only" These commits caused a regression on Lenovo t520 sandybridge machine belonging to reporter. We are reverting them for 5.10 for other reasons, so just do it for 5.9 as well. This reverts commit 9e0f9464e2ab36b864359a59b0e9058fdef0ce47. Reported-by: Harald Arnesen Signed-off-by: Dave Airlie --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 293 ++++++++++++++++-- .../i915/gem/selftests/i915_gem_execbuffer.c | 21 +- 2 files changed, 288 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 6b4ec66cb558..64901cf52b7a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -45,6 +45,13 @@ struct eb_vma_array { struct eb_vma vma[]; }; +enum { + FORCE_CPU_RELOC = 1, + FORCE_GTT_RELOC, + FORCE_GPU_RELOC, +#define DBG_FORCE_RELOC 0 /* choose one of the above! */ +}; + #define __EXEC_OBJECT_HAS_PIN BIT(31) #define __EXEC_OBJECT_HAS_FENCE BIT(30) #define __EXEC_OBJECT_NEEDS_MAP BIT(29) @@ -253,6 +260,8 @@ struct i915_execbuffer { */ struct reloc_cache { struct drm_mm_node node; /** temporary GTT binding */ + unsigned long vaddr; /** Current kmap address */ + unsigned long page; /** Currently mapped page index */ unsigned int gen; /** Cached value of INTEL_GEN */ bool use_64bit_reloc : 1; bool has_llc : 1; @@ -596,6 +605,23 @@ eb_add_vma(struct i915_execbuffer *eb, } } +static inline int use_cpu_reloc(const struct reloc_cache *cache, + const struct drm_i915_gem_object *obj) +{ + if (!i915_gem_object_has_struct_page(obj)) + return false; + + if (DBG_FORCE_RELOC == FORCE_CPU_RELOC) + return true; + + if (DBG_FORCE_RELOC == FORCE_GTT_RELOC) + return false; + + return (cache->has_llc || + obj->cache_dirty || + obj->cache_level != I915_CACHE_NONE); +} + static int eb_reserve_vma(const struct i915_execbuffer *eb, struct eb_vma *ev, u64 pin_flags) @@ -926,6 +952,8 @@ relocation_target(const struct drm_i915_gem_relocation_entry *reloc, static void reloc_cache_init(struct reloc_cache *cache, struct drm_i915_private *i915) { + cache->page = -1; + cache->vaddr = 0; /* Must be a variable in the struct to allow GCC to unroll. */ cache->gen = INTEL_GEN(i915); cache->has_llc = HAS_LLC(i915); @@ -1049,6 +1077,181 @@ static int reloc_gpu_flush(struct reloc_cache *cache) return err; } +static void reloc_cache_reset(struct reloc_cache *cache) +{ + void *vaddr; + + if (!cache->vaddr) + return; + + vaddr = unmask_page(cache->vaddr); + if (cache->vaddr & KMAP) { + if (cache->vaddr & CLFLUSH_AFTER) + mb(); + + kunmap_atomic(vaddr); + i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm); + } else { + struct i915_ggtt *ggtt = cache_to_ggtt(cache); + + intel_gt_flush_ggtt_writes(ggtt->vm.gt); + io_mapping_unmap_atomic((void __iomem *)vaddr); + + if (drm_mm_node_allocated(&cache->node)) { + ggtt->vm.clear_range(&ggtt->vm, + cache->node.start, + cache->node.size); + mutex_lock(&ggtt->vm.mutex); + drm_mm_remove_node(&cache->node); + mutex_unlock(&ggtt->vm.mutex); + } else { + i915_vma_unpin((struct i915_vma *)cache->node.mm); + } + } + + cache->vaddr = 0; + cache->page = -1; +} + +static void *reloc_kmap(struct drm_i915_gem_object *obj, + struct reloc_cache *cache, + unsigned long page) +{ + void *vaddr; + + if (cache->vaddr) { + kunmap_atomic(unmask_page(cache->vaddr)); + } else { + unsigned int flushes; + int err; + + err = i915_gem_object_prepare_write(obj, &flushes); + if (err) + return ERR_PTR(err); + + BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); + BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); + + cache->vaddr = flushes | KMAP; + cache->node.mm = (void *)obj; + if (flushes) + mb(); + } + + vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page)); + cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; + cache->page = page; + + return vaddr; +} + +static void *reloc_iomap(struct drm_i915_gem_object *obj, + struct reloc_cache *cache, + unsigned long page) +{ + struct i915_ggtt *ggtt = cache_to_ggtt(cache); + unsigned long offset; + void *vaddr; + + if (cache->vaddr) { + intel_gt_flush_ggtt_writes(ggtt->vm.gt); + io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); + } else { + struct i915_vma *vma; + int err; + + if (i915_gem_object_is_tiled(obj)) + return ERR_PTR(-EINVAL); + + if (use_cpu_reloc(cache, obj)) + return NULL; + + i915_gem_object_lock(obj); + err = i915_gem_object_set_to_gtt_domain(obj, true); + i915_gem_object_unlock(obj); + if (err) + return ERR_PTR(err); + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, + PIN_MAPPABLE | + PIN_NONBLOCK /* NOWARN */ | + PIN_NOEVICT); + if (IS_ERR(vma)) { + memset(&cache->node, 0, sizeof(cache->node)); + mutex_lock(&ggtt->vm.mutex); + err = drm_mm_insert_node_in_range + (&ggtt->vm.mm, &cache->node, + PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, + 0, ggtt->mappable_end, + DRM_MM_INSERT_LOW); + mutex_unlock(&ggtt->vm.mutex); + if (err) /* no inactive aperture space, use cpu reloc */ + return NULL; + } else { + cache->node.start = vma->node.start; + cache->node.mm = (void *)vma; + } + } + + offset = cache->node.start; + if (drm_mm_node_allocated(&cache->node)) { + ggtt->vm.insert_page(&ggtt->vm, + i915_gem_object_get_dma_address(obj, page), + offset, I915_CACHE_NONE, 0); + } else { + offset += page << PAGE_SHIFT; + } + + vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap, + offset); + cache->page = page; + cache->vaddr = (unsigned long)vaddr; + + return vaddr; +} + +static void *reloc_vaddr(struct drm_i915_gem_object *obj, + struct reloc_cache *cache, + unsigned long page) +{ + void *vaddr; + + if (cache->page == page) { + vaddr = unmask_page(cache->vaddr); + } else { + vaddr = NULL; + if ((cache->vaddr & KMAP) == 0) + vaddr = reloc_iomap(obj, cache, page); + if (!vaddr) + vaddr = reloc_kmap(obj, cache, page); + } + + return vaddr; +} + +static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) +{ + if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) { + if (flushes & CLFLUSH_BEFORE) { + clflushopt(addr); + mb(); + } + + *addr = value; + + /* + * Writes to the same cacheline are serialised by the CPU + * (including clflush). On the write path, we only require + * that it hits memory in an orderly fashion and place + * mb barriers at the start and end of the relocation phase + * to ensure ordering of clflush wrt to the system. + */ + if (flushes & CLFLUSH_AFTER) + clflushopt(addr); + } else + *addr = value; +} + static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; @@ -1214,6 +1417,17 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, return cmd; } +static inline bool use_reloc_gpu(struct i915_vma *vma) +{ + if (DBG_FORCE_RELOC == FORCE_GPU_RELOC) + return true; + + if (DBG_FORCE_RELOC) + return false; + + return !dma_resv_test_signaled_rcu(vma->resv, true); +} + static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset) { struct page *page; @@ -1228,10 +1442,10 @@ static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset) return addr + offset_in_page(offset); } -static int __reloc_entry_gpu(struct i915_execbuffer *eb, - struct i915_vma *vma, - u64 offset, - u64 target_addr) +static bool __reloc_entry_gpu(struct i915_execbuffer *eb, + struct i915_vma *vma, + u64 offset, + u64 target_addr) { const unsigned int gen = eb->reloc_cache.gen; unsigned int len; @@ -1247,7 +1461,7 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb, batch = reloc_gpu(eb, vma, len); if (IS_ERR(batch)) - return PTR_ERR(batch); + return false; addr = gen8_canonical_addr(vma->node.start + offset); if (gen >= 8) { @@ -1296,21 +1510,55 @@ static int __reloc_entry_gpu(struct i915_execbuffer *eb, *batch++ = target_addr; } - return 0; + return true; +} + +static bool reloc_entry_gpu(struct i915_execbuffer *eb, + struct i915_vma *vma, + u64 offset, + u64 target_addr) +{ + if (eb->reloc_cache.vaddr) + return false; + + if (!use_reloc_gpu(vma)) + return false; + + return __reloc_entry_gpu(eb, vma, offset, target_addr); } static u64 -relocate_entry(struct i915_execbuffer *eb, - struct i915_vma *vma, +relocate_entry(struct i915_vma *vma, const struct drm_i915_gem_relocation_entry *reloc, + struct i915_execbuffer *eb, const struct i915_vma *target) { u64 target_addr = relocation_target(reloc, target); - int err; + u64 offset = reloc->offset; - err = __reloc_entry_gpu(eb, vma, reloc->offset, target_addr); - if (err) - return err; + if (!reloc_entry_gpu(eb, vma, offset, target_addr)) { + bool wide = eb->reloc_cache.use_64bit_reloc; + void *vaddr; + +repeat: + vaddr = reloc_vaddr(vma->obj, + &eb->reloc_cache, + offset >> PAGE_SHIFT); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32))); + clflush_write32(vaddr + offset_in_page(offset), + lower_32_bits(target_addr), + eb->reloc_cache.vaddr); + + if (wide) { + offset += sizeof(u32); + target_addr >>= 32; + wide = false; + goto repeat; + } + } return target->node.start | UPDATE; } @@ -1375,7 +1623,8 @@ eb_relocate_entry(struct i915_execbuffer *eb, * If the relocation already has the right value in it, no * more work needs to be done. */ - if (gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset) + if (!DBG_FORCE_RELOC && + gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset) return 0; /* Check that the relocation address is valid... */ @@ -1407,7 +1656,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, ev->flags &= ~EXEC_OBJECT_ASYNC; /* and update the user's relocation entry */ - return relocate_entry(eb, ev->vma, reloc, target->vma); + return relocate_entry(ev->vma, reloc, eb, target->vma); } static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) @@ -1445,8 +1694,10 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) * this is bad and so lockdep complains vehemently. */ copied = __copy_from_user(r, urelocs, count * sizeof(r[0])); - if (unlikely(copied)) - return -EFAULT; + if (unlikely(copied)) { + remain = -EFAULT; + goto out; + } remain -= count; do { @@ -1454,7 +1705,8 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) if (likely(offset == 0)) { } else if ((s64)offset < 0) { - return (int)offset; + remain = (int)offset; + goto out; } else { /* * Note that reporting an error now @@ -1484,8 +1736,9 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev) } while (r++, --count); urelocs += ARRAY_SIZE(stack); } while (remain); - - return 0; +out: + reloc_cache_reset(&eb->reloc_cache); + return remain; } static int eb_relocate(struct i915_execbuffer *eb) @@ -2392,7 +2645,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.i915 = i915; eb.file = file; eb.args = args; - if (!(args->flags & I915_EXEC_NO_RELOC)) + if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) args->flags |= __EXEC_HAS_RELOC; eb.exec = exec; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c index 57c14d3340cd..a49016f8ee0d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c @@ -37,14 +37,20 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb, return err; /* 8-Byte aligned */ - err = __reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0); - if (err) + if (!__reloc_entry_gpu(eb, vma, + offsets[0] * sizeof(u32), + 0)) { + err = -EIO; goto unpin_vma; + } /* !8-Byte aligned */ - err = __reloc_entry_gpu(eb, vma, offsets[1] * sizeof(u32), 1); - if (err) + if (!__reloc_entry_gpu(eb, vma, + offsets[1] * sizeof(u32), + 1)) { + err = -EIO; goto unpin_vma; + } /* Skip to the end of the cmd page */ i = PAGE_SIZE / sizeof(u32) - RELOC_TAIL - 1; @@ -54,9 +60,12 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb, eb->reloc_cache.rq_size += i; /* Force batch chaining */ - err = __reloc_entry_gpu(eb, vma, offsets[2] * sizeof(u32), 2); - if (err) + if (!__reloc_entry_gpu(eb, vma, + offsets[2] * sizeof(u32), + 2)) { + err = -EIO; goto unpin_vma; + } GEM_BUG_ON(!eb->reloc_cache.rq); rq = i915_request_get(eb->reloc_cache.rq); From 20561da3a2e1e0e827ef5510cb0f74bcfd377e41 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 8 Sep 2020 15:41:43 +1000 Subject: [PATCH 291/648] Revert "drm/i915/gem: Delete unused code" These commits caused a regression on Lenovo t520 sandybridge machine belonging to reporter. We are reverting them for 5.10 for other reasons, so just do it for 5.9 as well. This reverts commit 7ac2d2536dfa71c275a74813345779b1e7522c91. Reported-by: Harald Arnesen Signed-off-by: Dave Airlie --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 64901cf52b7a..446e76e95c38 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -965,6 +965,25 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->target = NULL; } +static inline void *unmask_page(unsigned long p) +{ + return (void *)(uintptr_t)(p & PAGE_MASK); +} + +static inline unsigned int unmask_flags(unsigned long p) +{ + return p & ~PAGE_MASK; +} + +#define KMAP 0x4 /* after CLFLUSH_FLAGS */ + +static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) +{ + struct drm_i915_private *i915 = + container_of(cache, struct i915_execbuffer, reloc_cache)->i915; + return &i915->ggtt; +} + #define RELOC_TAIL 4 static int reloc_gpu_chain(struct reloc_cache *cache) From 06be67266a0c9a6a1ffb330a4ab50c2f21612e2b Mon Sep 17 00:00:00 2001 From: Tali Perry Date: Mon, 31 Aug 2020 00:31:21 +0300 Subject: [PATCH 292/648] i2c: npcm7xx: Fix timeout calculation timeout_usec value calculation was wrong, the calculated value was in msec instead of usec. Fixes: 56a1485b102e ("i2c: npcm7xx: Add Nuvoton NPCM I2C controller driver") Signed-off-by: Tali Perry Reviewed-by: Avi Fishman Reviewed-by: Joel Stanley Reviewed-by: Alex Qiu Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-npcm7xx.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c index 75f07138a6fa..dfcf04e1967f 100644 --- a/drivers/i2c/busses/i2c-npcm7xx.c +++ b/drivers/i2c/busses/i2c-npcm7xx.c @@ -2093,8 +2093,12 @@ static int npcm_i2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, } } - /* Adaptive TimeOut: astimated time in usec + 100% margin */ - timeout_usec = (2 * 10000 / bus->bus_freq) * (2 + nread + nwrite); + /* + * Adaptive TimeOut: estimated time in usec + 100% margin: + * 2: double the timeout for clock stretching case + * 9: bits per transaction (including the ack/nack) + */ + timeout_usec = (2 * 9 * USEC_PER_SEC / bus->bus_freq) * (2 + nread + nwrite); timeout = max(msecs_to_jiffies(35), usecs_to_jiffies(timeout_usec)); if (nwrite >= 32 * 1024 || nread >= 32 * 1024) { dev_err(bus->dev, "i2c%d buffer too big\n", bus->num); From 1d3ee7df009a46440c58508b8819213c09503acd Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Thu, 3 Sep 2020 14:57:27 +0530 Subject: [PATCH 293/648] cpuidle: pseries: Fix CEDE latency conversion from tb to us Commit d947fb4c965c ("cpuidle: pseries: Fixup exit latency for CEDE(0)") sets the exit latency of CEDE(0) based on the latency values of the Extended CEDE states advertised by the platform. The values advertised by the platform are in timebase ticks. However the cpuidle framework requires the latency values in microseconds. If the tb-ticks value advertised by the platform correspond to a value smaller than 1us, during the conversion from tb-ticks to microseconds, in the current code, the result becomes zero. This is incorrect as it puts a CEDE state on par with the snooze state. This patch fixes this by rounding up the result obtained while converting the latency value from tb-ticks to microseconds. It also prints a warning in case we discover an extended-cede state with wakeup latency to be 0. In such a case, ensure that CEDE(0) has a non-zero wakeup latency. Fixes: d947fb4c965c ("cpuidle: pseries: Fixup exit latency for CEDE(0)") Signed-off-by: Gautham R. Shenoy Reviewed-by: Vaidyanathan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1599125247-28488-1-git-send-email-ego@linux.vnet.ibm.com --- drivers/cpuidle/cpuidle-pseries.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c index ff6d99e923a4..a2b5c6f60cf0 100644 --- a/drivers/cpuidle/cpuidle-pseries.c +++ b/drivers/cpuidle/cpuidle-pseries.c @@ -361,7 +361,10 @@ static void __init fixup_cede0_latency(void) for (i = 0; i < nr_xcede_records; i++) { struct xcede_latency_record *record = &payload->records[i]; u64 latency_tb = be64_to_cpu(record->latency_ticks); - u64 latency_us = tb_to_ns(latency_tb) / NSEC_PER_USEC; + u64 latency_us = DIV_ROUND_UP_ULL(tb_to_ns(latency_tb), NSEC_PER_USEC); + + if (latency_us == 0) + pr_warn("cpuidle: xcede record %d has an unrealistic latency of 0us.\n", i); if (latency_us < min_latency_us) min_latency_us = latency_us; @@ -378,10 +381,14 @@ static void __init fixup_cede0_latency(void) * Perform the fix-up. */ if (min_latency_us < dedicated_states[1].exit_latency) { - u64 cede0_latency = min_latency_us - 1; + /* + * We set a minimum of 1us wakeup latency for cede0 to + * distinguish it from snooze + */ + u64 cede0_latency = 1; - if (cede0_latency <= 0) - cede0_latency = min_latency_us; + if (min_latency_us > cede0_latency) + cede0_latency = min_latency_us - 1; dedicated_states[1].exit_latency = cede0_latency; dedicated_states[1].target_residency = 10 * (cede0_latency); From 1cc5ef91d2ff94d2bf2de3b3585423e8a1051cb6 Mon Sep 17 00:00:00 2001 From: Will McVicker Date: Mon, 24 Aug 2020 19:38:32 +0000 Subject: [PATCH 294/648] netfilter: ctnetlink: add a range check for l3/l4 protonum The indexes to the nf_nat_l[34]protos arrays come from userspace. So check the tuple's family, e.g. l3num, when creating the conntrack in order to prevent an OOB memory access during setup. Here is an example kernel panic on 4.14.180 when userspace passes in an index greater than NFPROTO_NUMPROTO. Internal error: Oops - BUG: 0 [#1] PREEMPT SMP Modules linked in:... Process poc (pid: 5614, stack limit = 0x00000000a3933121) CPU: 4 PID: 5614 Comm: poc Tainted: G S W O 4.14.180-g051355490483 Hardware name: Qualcomm Technologies, Inc. SM8150 V2 PM8150 Google Inc. MSM task: 000000002a3dfffe task.stack: 00000000a3933121 pc : __cfi_check_fail+0x1c/0x24 lr : __cfi_check_fail+0x1c/0x24 ... Call trace: __cfi_check_fail+0x1c/0x24 name_to_dev_t+0x0/0x468 nfnetlink_parse_nat_setup+0x234/0x258 ctnetlink_parse_nat_setup+0x4c/0x228 ctnetlink_new_conntrack+0x590/0xc40 nfnetlink_rcv_msg+0x31c/0x4d4 netlink_rcv_skb+0x100/0x184 nfnetlink_rcv+0xf4/0x180 netlink_unicast+0x360/0x770 netlink_sendmsg+0x5a0/0x6a4 ___sys_sendmsg+0x314/0x46c SyS_sendmsg+0xb4/0x108 el0_svc_naked+0x34/0x38 This crash is not happening since 5.4+, however, ctnetlink still allows for creating entries with unsupported layer 3 protocol number. Fixes: c1d10adb4a521 ("[NETFILTER]: Add ctnetlink port for nf_conntrack") Signed-off-by: Will McVicker [pablo@netfilter.org: rebased original patch on top of nf.git] Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 832eabecfbdd..d65846aa8059 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1404,7 +1404,8 @@ ctnetlink_parse_tuple_filter(const struct nlattr * const cda[], if (err < 0) return err; - + if (l3num != NFPROTO_IPV4 && l3num != NFPROTO_IPV6) + return -EOPNOTSUPP; tuple->src.l3num = l3num; if (flags & CTA_FILTER_FLAG(CTA_IP_DST) || From 67cc570edaa02016a8685a06a0ee91f05a6277d9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 27 Aug 2020 19:28:42 +0200 Subject: [PATCH 295/648] netfilter: nf_tables: coalesce multiple notifications into one skbuff On x86_64, each notification results in one skbuff allocation which consumes at least 768 bytes due to the skbuff overhead. This patch coalesces several notifications into one single skbuff, so each notification consumes at least ~211 bytes, that ~3.5 times less memory consumption. As a result, this is reducing the chances to exhaust the netlink socket receive buffer. Rule of thumb is that each notification batch only contains netlink messages whose report flag is the same, nfnetlink_send() requires this to do appropriate delivery to userspace, either via unicast (echo mode) or multicast (monitor mode). The skbuff control buffer is used to annotate the report flag for later handling at the new coalescing routine. The batch skbuff notification size is NLMSG_GOODSIZE, using a larger skbuff would allow for more socket receiver buffer savings (to amortize the cost of the skbuff even more), however, going over that size might break userspace applications, so let's be conservative and stick to NLMSG_GOODSIZE. Reported-by: Phil Sutter Acked-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netns/nftables.h | 1 + net/netfilter/nf_tables_api.c | 70 ++++++++++++++++++++++++++++------- 2 files changed, 58 insertions(+), 13 deletions(-) diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h index a1a8d45adb42..6c0806bd8d1e 100644 --- a/include/net/netns/nftables.h +++ b/include/net/netns/nftables.h @@ -8,6 +8,7 @@ struct netns_nftables { struct list_head tables; struct list_head commit_list; struct list_head module_list; + struct list_head notify_list; struct mutex commit_mutex; unsigned int base_seq; u8 gencursor; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b7dc1cbf40ea..4603b667973a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -684,6 +684,18 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, return -1; } +struct nftnl_skb_parms { + bool report; +}; +#define NFT_CB(skb) (*(struct nftnl_skb_parms*)&((skb)->cb)) + +static void nft_notify_enqueue(struct sk_buff *skb, bool report, + struct list_head *notify_list) +{ + NFT_CB(skb).report = report; + list_add_tail(&skb->list, notify_list); +} + static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) { struct sk_buff *skb; @@ -715,8 +727,7 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) goto err; } - nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -1468,8 +1479,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) goto err; } - nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -2807,8 +2817,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, goto err; } - nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -3837,8 +3846,7 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx, goto err; } - nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, ctx->report, - gfp_flags); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -4959,8 +4967,7 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx, goto err; } - nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report, - GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -6275,7 +6282,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, goto err; } - nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, gfp); + nft_notify_enqueue(skb, report, &net->nft.notify_list); return; err: nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -7085,8 +7092,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, goto err; } - nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -7695,6 +7701,41 @@ static void nf_tables_commit_release(struct net *net) mutex_unlock(&net->nft.commit_mutex); } +static void nft_commit_notify(struct net *net, u32 portid) +{ + struct sk_buff *batch_skb = NULL, *nskb, *skb; + unsigned char *data; + int len; + + list_for_each_entry_safe(skb, nskb, &net->nft.notify_list, list) { + if (!batch_skb) { +new_batch: + batch_skb = skb; + len = NLMSG_GOODSIZE - skb->len; + list_del(&skb->list); + continue; + } + len -= skb->len; + if (len > 0 && NFT_CB(skb).report == NFT_CB(batch_skb).report) { + data = skb_put(batch_skb, skb->len); + memcpy(data, skb->data, skb->len); + list_del(&skb->list); + kfree_skb(skb); + continue; + } + nfnetlink_send(batch_skb, net, portid, NFNLGRP_NFTABLES, + NFT_CB(batch_skb).report, GFP_KERNEL); + goto new_batch; + } + + if (batch_skb) { + nfnetlink_send(batch_skb, net, portid, NFNLGRP_NFTABLES, + NFT_CB(batch_skb).report, GFP_KERNEL); + } + + WARN_ON_ONCE(!list_empty(&net->nft.notify_list)); +} + static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nft_trans *trans, *next; @@ -7897,6 +7938,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } } + nft_commit_notify(net, NETLINK_CB(skb).portid); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); nf_tables_commit_release(net); @@ -8721,6 +8763,7 @@ static int __net_init nf_tables_init_net(struct net *net) INIT_LIST_HEAD(&net->nft.tables); INIT_LIST_HEAD(&net->nft.commit_list); INIT_LIST_HEAD(&net->nft.module_list); + INIT_LIST_HEAD(&net->nft.notify_list); mutex_init(&net->nft.commit_mutex); net->nft.base_seq = 1; net->nft.validate_state = NFT_VALIDATE_SKIP; @@ -8737,6 +8780,7 @@ static void __net_exit nf_tables_exit_net(struct net *net) mutex_unlock(&net->nft.commit_mutex); WARN_ON_ONCE(!list_empty(&net->nft.tables)); WARN_ON_ONCE(!list_empty(&net->nft.module_list)); + WARN_ON_ONCE(!list_empty(&net->nft.notify_list)); } static struct pernet_operations nf_tables_net_ops = { From 6c0d95d1238d944fe54f0bbfc7ec017d78435daa Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Tue, 1 Sep 2020 08:56:19 +0200 Subject: [PATCH 296/648] netfilter: ctnetlink: fix mark based dump filtering regression conntrack mark based dump filtering may falsely skip entries if a mask is given: If the mask-based check does not filter out the entry, the else-if check is always true and compares the mark without considering the mask. The if/else-if logic seems wrong. Given that the mask during filter setup is implicitly set to 0xffffffff if not specified explicitly, the mark filtering flags seem to just complicate things. Restore the previously used approach by always matching against a zero mask is no filter mark is given. Fixes: cb8aa9a3affb ("netfilter: ctnetlink: add kernel side filtering for dump") Signed-off-by: Martin Willi Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d65846aa8059..c3a4214dc958 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -851,7 +851,6 @@ static int ctnetlink_done(struct netlink_callback *cb) } struct ctnetlink_filter { - u_int32_t cta_flags; u8 family; u_int32_t orig_flags; @@ -906,10 +905,6 @@ static int ctnetlink_parse_tuple_filter(const struct nlattr * const cda[], struct nf_conntrack_zone *zone, u_int32_t flags); -/* applied on filters */ -#define CTA_FILTER_F_CTA_MARK (1 << 0) -#define CTA_FILTER_F_CTA_MARK_MASK (1 << 1) - static struct ctnetlink_filter * ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family) { @@ -930,14 +925,10 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family) #ifdef CONFIG_NF_CONNTRACK_MARK if (cda[CTA_MARK]) { filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK])); - filter->cta_flags |= CTA_FILTER_FLAG(CTA_MARK); - - if (cda[CTA_MARK_MASK]) { + if (cda[CTA_MARK_MASK]) filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK])); - filter->cta_flags |= CTA_FILTER_FLAG(CTA_MARK_MASK); - } else { + else filter->mark.mask = 0xffffffff; - } } else if (cda[CTA_MARK_MASK]) { err = -EINVAL; goto err_filter; @@ -1117,11 +1108,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data) } #ifdef CONFIG_NF_CONNTRACK_MARK - if ((filter->cta_flags & CTA_FILTER_FLAG(CTA_MARK_MASK)) && - (ct->mark & filter->mark.mask) != filter->mark.val) - goto ignore_entry; - else if ((filter->cta_flags & CTA_FILTER_FLAG(CTA_MARK)) && - ct->mark != filter->mark.val) + if ((ct->mark & filter->mark.mask) != filter->mark.val) goto ignore_entry; #endif From 526e81b990e53e31ba40ba304a2285ffd098721f Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Tue, 1 Sep 2020 16:56:02 +0200 Subject: [PATCH 297/648] netfilter: conntrack: nf_conncount_init is failing with IPv6 disabled The openvswitch module fails initialization when used in a kernel without IPv6 enabled. nf_conncount_init() fails because the ct code unconditionally tries to initialize the netns IPv6 related bit, regardless of the build option. The change below ignores the IPv6 part if not enabled. Note that the corresponding _put() function already has this IPv6 configuration check. Fixes: 11efd5cb04a1 ("openvswitch: Support conntrack zone limit") Signed-off-by: Eelco Chaudron Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 95f79980348c..47e9319d2cf3 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -565,6 +565,7 @@ static int nf_ct_netns_inet_get(struct net *net) int err; err = nf_ct_netns_do_get(net, NFPROTO_IPV4); +#if IS_ENABLED(CONFIG_IPV6) if (err < 0) goto err1; err = nf_ct_netns_do_get(net, NFPROTO_IPV6); @@ -575,6 +576,7 @@ static int nf_ct_netns_inet_get(struct net *net) err2: nf_ct_netns_put(net, NFPROTO_IPV4); err1: +#endif return err; } From 0c92411bb81de9bc516d6924f50289d8d5f880e5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 3 Sep 2020 19:00:52 +0200 Subject: [PATCH 298/648] netfilter: nft_meta: use socket user_ns to retrieve skuid and skgid ... instead of using init_user_ns. Fixes: 96518518cc41 ("netfilter: add nftables") Tested-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_meta.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 7bc6537f3ccb..b37bd02448d8 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -147,11 +147,11 @@ nft_meta_get_eval_skugid(enum nft_meta_keys key, switch (key) { case NFT_META_SKUID: - *dest = from_kuid_munged(&init_user_ns, + *dest = from_kuid_munged(sock_net(sk)->user_ns, sock->file->f_cred->fsuid); break; case NFT_META_SKGID: - *dest = from_kgid_munged(&init_user_ns, + *dest = from_kgid_munged(sock_net(sk)->user_ns, sock->file->f_cred->fsgid); break; default: From 0c4c801b31f89cfc1b97207abbab687f43d8e258 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Tue, 1 Sep 2020 18:10:36 +0300 Subject: [PATCH 299/648] drm/i915: fix regression leading to display audio probe failure on GLK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 4f0b4352bd26 ("drm/i915: Extract cdclk requirements checking to separate function") the order of force_min_cdclk_changed check and intel_modeset_checks(), was reversed. This broke the mechanism to immediately force a new CDCLK minimum, and lead to driver probe errors for display audio on GLK platform with 5.9-rc1 kernel. Fix the issue by moving intel_modeset_checks() call later. [vsyrjala: It also broke the ability of planes to bump up the cdclk and thus could lead to underruns when eg. flipping from 32bpp to 64bpp framebuffer. To be clear, we still compute the new cdclk correctly but fail to actually program it to the hardware due to intel_set_cdclk_{pre,post}_plane_update() not getting called on account of state->modeset==false.] Fixes: 4f0b4352bd26 ("drm/i915: Extract cdclk requirements checking to separate function") BugLink: https://github.com/thesofproject/linux/issues/2410 Signed-off-by: Kai Vehmanen Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200901151036.1312357-1-kai.vehmanen@linux.intel.com (cherry picked from commit cf696856bc54a31f78e6538b84c8f7a006b6108b) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 68325678f5ef..b18c5ac2934d 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -14956,12 +14956,6 @@ static int intel_atomic_check(struct drm_device *dev, if (dev_priv->wm.distrust_bios_wm) any_ms = true; - if (any_ms) { - ret = intel_modeset_checks(state); - if (ret) - goto fail; - } - intel_fbc_choose_crtc(dev_priv, state); ret = calc_watermark_data(state); if (ret) @@ -14976,6 +14970,10 @@ static int intel_atomic_check(struct drm_device *dev, goto fail; if (any_ms) { + ret = intel_modeset_checks(state); + if (ret) + goto fail; + ret = intel_modeset_calc_cdclk(state); if (ret) return ret; From 88ce2a530cc9865a894454b2e40eba5957a60e1a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 16:15:06 +0200 Subject: [PATCH 300/648] block: restore a specific error code in bdev_del_partition mdadm relies on the fact that deleting an invalid partition returns -ENXIO or -ENOTTY to detect if a block device is a partition or a whole device. Fixes: 08fc1ab6d748 ("block: fix locking in bdev_del_partition") Reported-by: kernel test robot Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/partitions/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/partitions/core.c b/block/partitions/core.c index 5b4869c08fb3..722406b841df 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -537,7 +537,7 @@ int bdev_del_partition(struct block_device *bdev, int partno) bdevp = bdget_disk(bdev->bd_disk, partno); if (!bdevp) - return -ENOMEM; + return -ENXIO; mutex_lock(&bdevp->bd_mutex); mutex_lock_nested(&bdev->bd_mutex, 1); From 83048015ff7710b46e7c489458a93c6fe348229d Mon Sep 17 00:00:00 2001 From: Vignesh Raghavendra Date: Mon, 31 Aug 2020 18:37:20 +0530 Subject: [PATCH 301/648] spi: spi-cadence-quadspi: Fix mapping of buffers for DMA reads Buffers need to mapped to DMA channel's device pointer instead of SPI controller's device pointer as its system DMA that actually does data transfer. Data inconsistencies have been reported when reading from flash without this fix. Fixes: ffa639e069fb ("mtd: spi-nor: cadence-quadspi: Add DMA support for direct mode reads") Signed-off-by: Vignesh Raghavendra Tested-by: Jan Kiszka Link: https://lore.kernel.org/r/20200831130720.4524-1-vigneshr@ti.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 508b219eabf8..c6795c684b16 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -907,14 +907,16 @@ static int cqspi_direct_read_execute(struct cqspi_flash_pdata *f_pdata, struct dma_async_tx_descriptor *tx; dma_cookie_t cookie; dma_addr_t dma_dst; + struct device *ddev; if (!cqspi->rx_chan || !virt_addr_valid(buf)) { memcpy_fromio(buf, cqspi->ahb_base + from, len); return 0; } - dma_dst = dma_map_single(dev, buf, len, DMA_FROM_DEVICE); - if (dma_mapping_error(dev, dma_dst)) { + ddev = cqspi->rx_chan->device->dev; + dma_dst = dma_map_single(ddev, buf, len, DMA_FROM_DEVICE); + if (dma_mapping_error(ddev, dma_dst)) { dev_err(dev, "dma mapping failed\n"); return -ENOMEM; } @@ -948,7 +950,7 @@ static int cqspi_direct_read_execute(struct cqspi_flash_pdata *f_pdata, } err_unmap: - dma_unmap_single(dev, dma_dst, len, DMA_FROM_DEVICE); + dma_unmap_single(ddev, dma_dst, len, DMA_FROM_DEVICE); return ret; } From 0ff4628f4c6c1ab87eef9f16b25355cadc426d64 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Sep 2020 03:40:25 -0700 Subject: [PATCH 302/648] mac802154: tx: fix use-after-free syzbot reported a bug in ieee802154_tx() [1] A similar issue in ieee802154_xmit_worker() is also fixed in this patch. [1] BUG: KASAN: use-after-free in ieee802154_tx+0x3d2/0x480 net/mac802154/tx.c:88 Read of size 4 at addr ffff8880251a8c70 by task syz-executor.3/928 CPU: 0 PID: 928 Comm: syz-executor.3 Not tainted 5.9.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x198/0x1fd lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xae/0x497 mm/kasan/report.c:383 __kasan_report mm/kasan/report.c:513 [inline] kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530 ieee802154_tx+0x3d2/0x480 net/mac802154/tx.c:88 ieee802154_subif_start_xmit+0xbe/0xe4 net/mac802154/tx.c:130 __netdev_start_xmit include/linux/netdevice.h:4634 [inline] netdev_start_xmit include/linux/netdevice.h:4648 [inline] dev_direct_xmit+0x4e9/0x6e0 net/core/dev.c:4203 packet_snd net/packet/af_packet.c:2989 [inline] packet_sendmsg+0x2413/0x5290 net/packet/af_packet.c:3014 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:671 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2353 ___sys_sendmsg+0xf3/0x170 net/socket.c:2407 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2440 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45d5b9 Code: 5d b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 2b b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fc98e749c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 000000000002ccc0 RCX: 000000000045d5b9 RDX: 0000000000000000 RSI: 0000000020007780 RDI: 000000000000000b RBP: 000000000118d020 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 000000000118cfec R13: 00007fff690c720f R14: 00007fc98e74a9c0 R15: 000000000118cfec Allocated by task 928: kasan_save_stack+0x1b/0x40 mm/kasan/common.c:48 kasan_set_track mm/kasan/common.c:56 [inline] __kasan_kmalloc.constprop.0+0xbf/0xd0 mm/kasan/common.c:461 slab_post_alloc_hook mm/slab.h:518 [inline] slab_alloc_node mm/slab.c:3254 [inline] kmem_cache_alloc_node+0x136/0x3e0 mm/slab.c:3574 __alloc_skb+0x71/0x550 net/core/skbuff.c:198 alloc_skb include/linux/skbuff.h:1094 [inline] alloc_skb_with_frags+0x92/0x570 net/core/skbuff.c:5771 sock_alloc_send_pskb+0x72a/0x880 net/core/sock.c:2348 packet_alloc_skb net/packet/af_packet.c:2837 [inline] packet_snd net/packet/af_packet.c:2932 [inline] packet_sendmsg+0x19fb/0x5290 net/packet/af_packet.c:3014 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:671 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2353 ___sys_sendmsg+0xf3/0x170 net/socket.c:2407 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2440 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 928: kasan_save_stack+0x1b/0x40 mm/kasan/common.c:48 kasan_set_track+0x1c/0x30 mm/kasan/common.c:56 kasan_set_free_info+0x1b/0x30 mm/kasan/generic.c:355 __kasan_slab_free+0xd8/0x120 mm/kasan/common.c:422 __cache_free mm/slab.c:3418 [inline] kmem_cache_free.part.0+0x74/0x1e0 mm/slab.c:3693 kfree_skbmem+0xef/0x1b0 net/core/skbuff.c:622 __kfree_skb net/core/skbuff.c:679 [inline] consume_skb net/core/skbuff.c:838 [inline] consume_skb+0xcf/0x160 net/core/skbuff.c:832 __dev_kfree_skb_any+0x9c/0xc0 net/core/dev.c:3107 fakelb_hw_xmit+0x20e/0x2a0 drivers/net/ieee802154/fakelb.c:81 drv_xmit_async net/mac802154/driver-ops.h:16 [inline] ieee802154_tx+0x282/0x480 net/mac802154/tx.c:81 ieee802154_subif_start_xmit+0xbe/0xe4 net/mac802154/tx.c:130 __netdev_start_xmit include/linux/netdevice.h:4634 [inline] netdev_start_xmit include/linux/netdevice.h:4648 [inline] dev_direct_xmit+0x4e9/0x6e0 net/core/dev.c:4203 packet_snd net/packet/af_packet.c:2989 [inline] packet_sendmsg+0x2413/0x5290 net/packet/af_packet.c:3014 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:671 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2353 ___sys_sendmsg+0xf3/0x170 net/socket.c:2407 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2440 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The buggy address belongs to the object at ffff8880251a8c00 which belongs to the cache skbuff_head_cache of size 224 The buggy address is located 112 bytes inside of 224-byte region [ffff8880251a8c00, ffff8880251a8ce0) The buggy address belongs to the page: page:0000000062b6a4f1 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x251a8 flags: 0xfffe0000000200(slab) raw: 00fffe0000000200 ffffea0000435c88 ffffea00028b6c08 ffff8880a9055d00 raw: 0000000000000000 ffff8880251a80c0 000000010000000c 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8880251a8b00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880251a8b80: fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc >ffff8880251a8c00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8880251a8c80: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc ffff8880251a8d00: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb Fixes: 409c3b0c5f03 ("mac802154: tx: move stats tx increment") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Alexander Aring Cc: Stefan Schmidt Cc: linux-wpan@vger.kernel.org Link: https://lore.kernel.org/r/20200908104025.4009085-1-edumazet@google.com Signed-off-by: Stefan Schmidt --- net/mac802154/tx.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c index ab52811523e9..c829e4a75325 100644 --- a/net/mac802154/tx.c +++ b/net/mac802154/tx.c @@ -34,11 +34,11 @@ void ieee802154_xmit_worker(struct work_struct *work) if (res) goto err_tx; - ieee802154_xmit_complete(&local->hw, skb, false); - dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; + ieee802154_xmit_complete(&local->hw, skb, false); + return; err_tx: @@ -78,6 +78,8 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb) /* async is priority, otherwise sync is fallback */ if (local->ops->xmit_async) { + unsigned int len = skb->len; + ret = drv_xmit_async(local, skb); if (ret) { ieee802154_wake_queue(&local->hw); @@ -85,7 +87,7 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb) } dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; + dev->stats.tx_bytes += len; } else { local->tx_skb = skb; queue_work(local->workqueue, &local->tx_work); From e6a18d36118bea3bf497c9df4d9988b6df120689 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 8 Sep 2020 00:04:10 +0200 Subject: [PATCH 303/648] bpf: Fix clobbering of r2 in bpf_gen_ld_abs Bryce reported that he saw the following with: 0: r6 = r1 1: r1 = 12 2: r0 = *(u16 *)skb[r1] The xlated sequence was incorrectly clobbering r2 with pointer value of r6 ... 0: (bf) r6 = r1 1: (b7) r1 = 12 2: (bf) r1 = r6 3: (bf) r2 = r1 4: (85) call bpf_skb_load_helper_16_no_cache#7692160 ... and hence call to the load helper never succeeded given the offset was too high. Fix it by reordering the load of r6 to r1. Other than that the insn has similar calling convention than BPF helpers, that is, r0 - r5 are scratch regs, so nothing else affected after the insn. Fixes: e0cea7ce988c ("bpf: implement ld_abs/ld_ind in native bpf") Reported-by: Bryce Kahle Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/cace836e4d07bb63b1a53e49c5dfb238a040c298.1599512096.git.daniel@iogearbox.net --- net/core/filter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index b2df52086445..2d62c25e0395 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7065,8 +7065,6 @@ static int bpf_gen_ld_abs(const struct bpf_insn *orig, bool indirect = BPF_MODE(orig->code) == BPF_IND; struct bpf_insn *insn = insn_buf; - /* We're guaranteed here that CTX is in R6. */ - *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX); if (!indirect) { *insn++ = BPF_MOV64_IMM(BPF_REG_2, orig->imm); } else { @@ -7074,6 +7072,8 @@ static int bpf_gen_ld_abs(const struct bpf_insn *orig, if (orig->imm) *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, orig->imm); } + /* We're guaranteed here that CTX is in R6. */ + *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX); switch (BPF_SIZE(orig->code)) { case BPF_B: From 1a5ce48fd667128e369fdc7fb87e21539aed21b5 Mon Sep 17 00:00:00 2001 From: Camel Guo Date: Tue, 8 Sep 2020 10:35:21 +0200 Subject: [PATCH 304/648] ASoC: tlv320adcx140: Wake up codec before accessing register According to its datasheet, after reset this codec goes into sleep mode. In this mode, any register accessing should be avoided except for exiting sleep mode. Hence this commit moves SLEEP_CFG access before any register accessing. Signed-off-by: Camel Guo Acked-by: Dan Murphy Link: https://lore.kernel.org/r/20200908083521.14105-2-camel.guo@axis.com Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320adcx140.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/sound/soc/codecs/tlv320adcx140.c b/sound/soc/codecs/tlv320adcx140.c index 7ae6ec374be3..8efe20605f9b 100644 --- a/sound/soc/codecs/tlv320adcx140.c +++ b/sound/soc/codecs/tlv320adcx140.c @@ -842,6 +842,18 @@ static int adcx140_codec_probe(struct snd_soc_component *component) if (ret) goto out; + if (adcx140->supply_areg == NULL) + sleep_cfg_val |= ADCX140_AREG_INTERNAL; + + ret = regmap_write(adcx140->regmap, ADCX140_SLEEP_CFG, sleep_cfg_val); + if (ret) { + dev_err(adcx140->dev, "setting sleep config failed %d\n", ret); + goto out; + } + + /* 8.4.3: Wait >= 1ms after entering active mode. */ + usleep_range(1000, 100000); + pdm_count = device_property_count_u32(adcx140->dev, "ti,pdm-edge-select"); if (pdm_count <= ADCX140_NUM_PDM_EDGES && pdm_count > 0) { @@ -889,18 +901,6 @@ static int adcx140_codec_probe(struct snd_soc_component *component) if (ret) goto out; - if (adcx140->supply_areg == NULL) - sleep_cfg_val |= ADCX140_AREG_INTERNAL; - - ret = regmap_write(adcx140->regmap, ADCX140_SLEEP_CFG, sleep_cfg_val); - if (ret) { - dev_err(adcx140->dev, "setting sleep config failed %d\n", ret); - goto out; - } - - /* 8.4.3: Wait >= 1ms after entering active mode. */ - usleep_range(1000, 100000); - ret = regmap_update_bits(adcx140->regmap, ADCX140_BIAS_CFG, ADCX140_MIC_BIAS_VAL_MSK | ADCX140_MIC_BIAS_VREF_MSK, bias_cfg); From b63de8400a6e1001b5732286cf6f5ec27799b7b4 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 28 Aug 2020 12:01:50 -0700 Subject: [PATCH 305/648] nvme: Revert: Fix controller creation races with teardown flow The indicated patch introduced a barrier in the sysfs_delete attribute for the controller that rejects the request if the controller isn't created. "Created" is defined as at least 1 call to nvme_start_ctrl(). This is problematic in error-injection testing. If an error occurs on the initial attempt to create an association and the controller enters reconnect(s) attempts, the admin cannot delete the controller until either there is a successful association created or ctrl_loss_tmo times out. Where this issue is particularly hurtful is when the "admin" is the nvme-cli, it is performing a connection to a discovery controller, and it is initiated via auto-connect scripts. With the FC transport, if the first connection attempt fails, the controller enters a normal reconnect state but returns control to the cli thread that created the controller. In this scenario, the cli attempts to read the discovery log via ioctl, which fails, causing the cli to see it as an empty log and then proceeds to delete the discovery controller. The delete is rejected and the controller is left live. If the discovery controller reconnect then succeeds, there is no action to delete it, and it sits live doing nothing. Cc: # v5.7+ Fixes: ce1518139e69 ("nvme: Fix controller creation races with teardown flow") Signed-off-by: James Smart CC: Israel Rukshin CC: Max Gurtovoy CC: Christoph Hellwig CC: Keith Busch CC: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 5 ----- drivers/nvme/host/nvme.h | 1 - 2 files changed, 6 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 5702a3843746..8b75f6ca0b61 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3525,10 +3525,6 @@ static ssize_t nvme_sysfs_delete(struct device *dev, { struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - /* Can't delete non-created controllers */ - if (!ctrl->created) - return -EBUSY; - if (device_remove_file_self(dev, attr)) nvme_delete_ctrl_sync(ctrl); return count; @@ -4403,7 +4399,6 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) nvme_queue_scan(ctrl); nvme_start_queues(ctrl); } - ctrl->created = true; } EXPORT_SYMBOL_GPL(nvme_start_ctrl); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 2910f6caab7d..9fd45ff656da 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -307,7 +307,6 @@ struct nvme_ctrl { struct nvme_command ka_cmd; struct work_struct fw_act_work; unsigned long events; - bool created; #ifdef CONFIG_NVME_MULTIPATH /* asymmetric namespace access: */ From e126e8210e950bb83414c4f57b3120ddb8450742 Mon Sep 17 00:00:00 2001 From: David Milburn Date: Wed, 2 Sep 2020 17:42:54 -0500 Subject: [PATCH 306/648] nvme-fc: cancel async events before freeing event struct Cancel async event work in case async event has been queued up, and nvme_fc_submit_async_event() runs after event has been freed. Signed-off-by: David Milburn Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index a7f474ddfff7..e8ef42b9d50c 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2160,6 +2160,7 @@ nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl) struct nvme_fc_fcp_op *aen_op; int i; + cancel_work_sync(&ctrl->ctrl.async_event_work); aen_op = ctrl->aen_ops; for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) { __nvme_fc_exit_request(ctrl, aen_op); From 925dd04c1f9825194b9e444c12478084813b2b5d Mon Sep 17 00:00:00 2001 From: David Milburn Date: Wed, 2 Sep 2020 17:42:52 -0500 Subject: [PATCH 307/648] nvme-rdma: cancel async events before freeing event struct Cancel async event work in case async event has been queued up, and nvme_rdma_submit_async_event() runs after event has been freed. Signed-off-by: David Milburn Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 1d8ea5305d60..3247d4ee46b1 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -835,6 +835,7 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, blk_mq_free_tag_set(ctrl->ctrl.admin_tagset); } if (ctrl->async_event_sqe.data) { + cancel_work_sync(&ctrl->ctrl.async_event_work); nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, sizeof(struct nvme_command), DMA_TO_DEVICE); ctrl->async_event_sqe.data = NULL; From ceb1e0874dba5cbfc4e0b4145796a4bfb3716e6a Mon Sep 17 00:00:00 2001 From: David Milburn Date: Wed, 2 Sep 2020 17:42:53 -0500 Subject: [PATCH 308/648] nvme-tcp: cancel async events before freeing event struct Cancel async event work in case async event has been queued up, and nvme_tcp_submit_async_event() runs after event has been freed. Signed-off-by: David Milburn Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 873d6afcbc9e..245a73d0a1ca 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1597,6 +1597,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl, static void nvme_tcp_free_admin_queue(struct nvme_ctrl *ctrl) { if (to_tcp_ctrl(ctrl)->async_req.pdu) { + cancel_work_sync(&ctrl->async_event_work); nvme_tcp_free_async_req(to_tcp_ctrl(ctrl)); to_tcp_ctrl(ctrl)->async_req.pdu = NULL; } From a566a9012acd7c9a4be7e30dc7acb7a811ec2260 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Tue, 1 Sep 2020 19:40:16 -0600 Subject: [PATCH 309/648] seccomp: don't leak memory when filter install races In seccomp_set_mode_filter() with TSYNC | NEW_LISTENER, we first initialize the listener fd, then check to see if we can actually use it later in seccomp_may_assign_mode(), which can fail if anyone else in our thread group has installed a filter and caused some divergence. If we can't, we partially clean up the newly allocated file: we put the fd, put the file, but don't actually clean up the *memory* that was allocated at filter->notif. Let's clean that up too. To accomplish this, let's hoist the actual "detach a notifier from a filter" code to its own helper out of seccomp_notify_release(), so that in case anyone adds stuff to init_listener(), they only have to add the cleanup code in one spot. This does a bit of extra locking and such on the failure path when the filter is not attached, but it's a slow failure path anyway. Fixes: 51891498f2da ("seccomp: allow TSYNC and USER_NOTIF together") Reported-by: syzbot+3ad9614a12f80994c32e@syzkaller.appspotmail.com Signed-off-by: Tycho Andersen Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20200902014017.934315-1-tycho@tycho.pizza Signed-off-by: Kees Cook --- kernel/seccomp.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 3ee59ce0a323..bb0dd9ae699a 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -1109,13 +1109,12 @@ static long seccomp_set_mode_strict(void) } #ifdef CONFIG_SECCOMP_FILTER -static int seccomp_notify_release(struct inode *inode, struct file *file) +static void seccomp_notify_detach(struct seccomp_filter *filter) { - struct seccomp_filter *filter = file->private_data; struct seccomp_knotif *knotif; if (!filter) - return 0; + return; mutex_lock(&filter->notify_lock); @@ -1142,6 +1141,13 @@ static int seccomp_notify_release(struct inode *inode, struct file *file) kfree(filter->notif); filter->notif = NULL; mutex_unlock(&filter->notify_lock); +} + +static int seccomp_notify_release(struct inode *inode, struct file *file) +{ + struct seccomp_filter *filter = file->private_data; + + seccomp_notify_detach(filter); __put_seccomp_filter(filter); return 0; } @@ -1581,6 +1587,7 @@ static long seccomp_set_mode_filter(unsigned int flags, listener_f->private_data = NULL; fput(listener_f); put_unused_fd(listener); + seccomp_notify_detach(prepared); } else { fd_install(listener, listener_f); ret = listener; From 19d1d49f2a8ce7adb10d93ff31909b0932c0d628 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Tue, 1 Sep 2020 19:40:17 -0600 Subject: [PATCH 310/648] mailmap, MAINTAINERS: move to tycho.pizza I've changed my e-mail address to tycho.pizza, so let's reflect that in these files. Signed-off-by: Tycho Andersen Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20200902014017.934315-2-tycho@tycho.pizza Signed-off-by: Kees Cook --- .mailmap | 1 + MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index 332c7833057f..50096b96c85d 100644 --- a/.mailmap +++ b/.mailmap @@ -308,6 +308,7 @@ Tony Luck TripleX Chung TripleX Chung Tsuneo Yoshioka +Tycho Andersen Uwe Kleine-König Uwe Kleine-König Uwe Kleine-König diff --git a/MAINTAINERS b/MAINTAINERS index f0068bceeb61..adc4f0619b19 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9775,7 +9775,7 @@ F: drivers/scsi/53c700* LEAKING_ADDRESSES M: Tobin C. Harding -M: Tycho Andersen +M: Tycho Andersen L: kernel-hardening@lists.openwall.com S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tobin/leaks.git From e839317900e9f13c83d8711d684de88c625b307a Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Wed, 2 Sep 2020 08:09:53 -0600 Subject: [PATCH 311/648] seccomp: don't leave dangling ->notif if file allocation fails Christian and Kees both pointed out that this is a bit sloppy to open-code both places, and Christian points out that we leave a dangling pointer to ->notif if file allocation fails. Since we check ->notif for null in order to determine if it's ok to install a filter, this means people won't be able to install a filter if the file allocation fails for some reason, even if they subsequently should be able to. To fix this, let's hoist this free+null into its own little helper and use it. Reported-by: Kees Cook Reported-by: Christian Brauner Signed-off-by: Tycho Andersen Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20200902140953.1201956-1-tycho@tycho.pizza Signed-off-by: Kees Cook --- kernel/seccomp.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index bb0dd9ae699a..676d4af62103 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -1109,6 +1109,12 @@ static long seccomp_set_mode_strict(void) } #ifdef CONFIG_SECCOMP_FILTER +static void seccomp_notify_free(struct seccomp_filter *filter) +{ + kfree(filter->notif); + filter->notif = NULL; +} + static void seccomp_notify_detach(struct seccomp_filter *filter) { struct seccomp_knotif *knotif; @@ -1138,8 +1144,7 @@ static void seccomp_notify_detach(struct seccomp_filter *filter) complete(&knotif->ready); } - kfree(filter->notif); - filter->notif = NULL; + seccomp_notify_free(filter); mutex_unlock(&filter->notify_lock); } @@ -1494,7 +1499,7 @@ static struct file *init_listener(struct seccomp_filter *filter) out_notif: if (IS_ERR(ret)) - kfree(filter->notif); + seccomp_notify_free(filter); out: return ret; } From 746f534a4809e07f427f7d13d10f3a6a9641e5c3 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Sat, 5 Sep 2020 14:48:31 -0700 Subject: [PATCH 312/648] tools/libbpf: Avoid counting local symbols in ABI check Encountered the following failure building libbpf from kernel 5.8.5 sources with GCC 8.4.0 and binutils 2.34: (long paths shortened) Warning: Num of global symbols in sharedobjs/libbpf-in.o (234) does NOT match with num of versioned symbols in libbpf.so (236). Please make sure all LIBBPF_API symbols are versioned in libbpf.map. --- libbpf_global_syms.tmp 2020-09-02 07:30:58.920084380 +0000 +++ libbpf_versioned_syms.tmp 2020-09-02 07:30:58.924084388 +0000 @@ -1,3 +1,5 @@ +_fini +_init bpf_btf_get_fd_by_id bpf_btf_get_next_id bpf_create_map make[4]: *** [Makefile:210: check_abi] Error 1 Investigation shows _fini and _init are actually local symbols counted amongst global ones: $ readelf --dyn-syms --wide libbpf.so|head -10 Symbol table '.dynsym' contains 343 entries: Num: Value Size Type Bind Vis Ndx Name 0: 00000000 0 NOTYPE LOCAL DEFAULT UND 1: 00004098 0 SECTION LOCAL DEFAULT 11 2: 00004098 8 FUNC LOCAL DEFAULT 11 _init@@LIBBPF_0.0.1 3: 00023040 8 FUNC LOCAL DEFAULT 14 _fini@@LIBBPF_0.0.1 4: 00000000 0 OBJECT GLOBAL DEFAULT ABS LIBBPF_0.0.4 5: 00000000 0 OBJECT GLOBAL DEFAULT ABS LIBBPF_0.0.1 6: 0000ffa4 8 FUNC GLOBAL DEFAULT 12 bpf_object__find_map_by_offset@@LIBBPF_0.0.1 A previous commit filtered global symbols in sharedobjs/libbpf-in.o. Do the same with the libbpf.so DSO for consistent comparison. Fixes: 306b267cb3c4 ("libbpf: Verify versioned symbols") Signed-off-by: Tony Ambardar Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200905214831.1565465-1-Tony.Ambardar@gmail.com --- tools/lib/bpf/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index b78484e7a608..9ae8f4ef0aac 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -152,6 +152,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ sort -u | wc -l) VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) CMD_TARGETS = $(LIB_TARGET) $(PC_FILE) @@ -219,6 +220,7 @@ check_abi: $(OUTPUT)libbpf.so awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \ sort -u > $(OUTPUT)libbpf_global_syms.tmp; \ readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \ sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \ diff -u $(OUTPUT)libbpf_global_syms.tmp \ From c4440b8a457779adeec42c5e181cb4016f19ce0f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 8 Sep 2020 16:27:29 -0700 Subject: [PATCH 313/648] Input: i8042 - add Entroware Proteus EL07R4 to nomux and reset lists The keyboard drops keypresses early during boot unless both the nomux and reset quirks are set. Add DMI table entries for this. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1806085 Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200907095656.13155-1-hdegoede@redhat.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-x86ia64io.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 7d7f73702726..37fb9aa88f9c 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -548,6 +548,14 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5738"), }, }, + { + /* Entroware Proteus */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Entroware"), + DMI_MATCH(DMI_PRODUCT_NAME, "Proteus"), + DMI_MATCH(DMI_PRODUCT_VERSION, "EL07R4"), + }, + }, { } }; @@ -676,6 +684,14 @@ static const struct dmi_system_id __initconst i8042_dmi_reset_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "33474HU"), }, }, + { + /* Entroware Proteus */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Entroware"), + DMI_MATCH(DMI_PRODUCT_NAME, "Proteus"), + DMI_MATCH(DMI_PRODUCT_VERSION, "EL07R4"), + }, + }, { } }; From e8a8a185051a460e3eb0617dca33f996f4e31516 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 8 Sep 2020 13:46:37 -0700 Subject: [PATCH 314/648] block: only call sched requeue_request() for scheduled requests Yang Yang reported the following crash caused by requeueing a flush request in Kyber: [ 2.517297] Unable to handle kernel paging request at virtual address ffffffd8071c0b00 ... [ 2.517468] pc : clear_bit+0x18/0x2c [ 2.517502] lr : sbitmap_queue_clear+0x40/0x228 [ 2.517503] sp : ffffff800832bc60 pstate : 00c00145 ... [ 2.517599] Process ksoftirqd/5 (pid: 51, stack limit = 0xffffff8008328000) [ 2.517602] Call trace: [ 2.517606] clear_bit+0x18/0x2c [ 2.517619] kyber_finish_request+0x74/0x80 [ 2.517627] blk_mq_requeue_request+0x3c/0xc0 [ 2.517637] __scsi_queue_insert+0x11c/0x148 [ 2.517640] scsi_softirq_done+0x114/0x130 [ 2.517643] blk_done_softirq+0x7c/0xb0 [ 2.517651] __do_softirq+0x208/0x3bc [ 2.517657] run_ksoftirqd+0x34/0x60 [ 2.517663] smpboot_thread_fn+0x1c4/0x2c0 [ 2.517667] kthread+0x110/0x120 [ 2.517669] ret_from_fork+0x10/0x18 This happens because Kyber doesn't track flush requests, so kyber_finish_request() reads a garbage domain token. Only call the scheduler's requeue_request() hook if RQF_ELVPRIV is set (like we do for the finish_request() hook in blk_mq_free_request()). Now that we're handling it in blk-mq, also remove the check from BFQ. Reported-by: Yang Yang Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 12 ------------ block/blk-mq-sched.h | 2 +- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index a4c0bec920cb..ee767fa000e4 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -5895,18 +5895,6 @@ static void bfq_finish_requeue_request(struct request *rq) struct bfq_queue *bfqq = RQ_BFQQ(rq); struct bfq_data *bfqd; - /* - * Requeue and finish hooks are invoked in blk-mq without - * checking whether the involved request is actually still - * referenced in the scheduler. To handle this fact, the - * following two checks make this function exit in case of - * spurious invocations, for which there is nothing to do. - * - * First, check whether rq has nothing to do with an elevator. - */ - if (unlikely(!(rq->rq_flags & RQF_ELVPRIV))) - return; - /* * rq either is not associated with any icq, or is an already * requeued request that has not (yet) been re-inserted into diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index 126021fc3a11..e81ca1bf6e10 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -66,7 +66,7 @@ static inline void blk_mq_sched_requeue_request(struct request *rq) struct request_queue *q = rq->q; struct elevator_queue *e = q->elevator; - if (e && e->type->ops.requeue_request) + if ((rq->rq_flags & RQF_ELVPRIV) && e && e->type->ops.requeue_request) e->type->ops.requeue_request(rq); } From 2f1e8ea726e9020e01e9e2ae29c2d5eb11133032 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 8 Sep 2020 02:48:42 +0300 Subject: [PATCH 315/648] net: dsa: link interfaces with the DSA master to get rid of lockdep warnings Since commit 845e0ebb4408 ("net: change addr_list_lock back to static key"), cascaded DSA setups (DSA switch port as DSA master for another DSA switch port) are emitting this lockdep warning: ============================================ WARNING: possible recursive locking detected 5.8.0-rc1-00133-g923e4b5032dd-dirty #208 Not tainted -------------------------------------------- dhcpcd/323 is trying to acquire lock: ffff000066dd4268 (&dsa_master_addr_list_lock_key/1){+...}-{2:2}, at: dev_mc_sync+0x44/0x90 but task is already holding lock: ffff00006608c268 (&dsa_master_addr_list_lock_key/1){+...}-{2:2}, at: dev_mc_sync+0x44/0x90 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&dsa_master_addr_list_lock_key/1); lock(&dsa_master_addr_list_lock_key/1); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by dhcpcd/323: #0: ffffdbd1381dda18 (rtnl_mutex){+.+.}-{3:3}, at: rtnl_lock+0x24/0x30 #1: ffff00006614b268 (_xmit_ETHER){+...}-{2:2}, at: dev_set_rx_mode+0x28/0x48 #2: ffff00006608c268 (&dsa_master_addr_list_lock_key/1){+...}-{2:2}, at: dev_mc_sync+0x44/0x90 stack backtrace: Call trace: dump_backtrace+0x0/0x1e0 show_stack+0x20/0x30 dump_stack+0xec/0x158 __lock_acquire+0xca0/0x2398 lock_acquire+0xe8/0x440 _raw_spin_lock_nested+0x64/0x90 dev_mc_sync+0x44/0x90 dsa_slave_set_rx_mode+0x34/0x50 __dev_set_rx_mode+0x60/0xa0 dev_mc_sync+0x84/0x90 dsa_slave_set_rx_mode+0x34/0x50 __dev_set_rx_mode+0x60/0xa0 dev_set_rx_mode+0x30/0x48 __dev_open+0x10c/0x180 __dev_change_flags+0x170/0x1c8 dev_change_flags+0x2c/0x70 devinet_ioctl+0x774/0x878 inet_ioctl+0x348/0x3b0 sock_do_ioctl+0x50/0x310 sock_ioctl+0x1f8/0x580 ksys_ioctl+0xb0/0xf0 __arm64_sys_ioctl+0x28/0x38 el0_svc_common.constprop.0+0x7c/0x180 do_el0_svc+0x2c/0x98 el0_sync_handler+0x9c/0x1b8 el0_sync+0x158/0x180 Since DSA never made use of the netdev API for describing links between upper devices and lower devices, the dev->lower_level value of a DSA switch interface would be 1, which would warn when it is a DSA master. We can use netdev_upper_dev_link() to describe the relationship between a DSA slave and a DSA master. To be precise, a DSA "slave" (switch port) is an "upper" to a DSA "master" (host port). The relationship is "many uppers to one lower", like in the case of VLAN. So, for that reason, we use the same function as VLAN uses. There might be a chance that somebody will try to take hold of this interface and use it immediately after register_netdev() and before netdev_upper_dev_link(). To avoid that, we do the registration and linkage while holding the RTNL, and we use the RTNL-locked cousin of register_netdev(), which is register_netdevice(). Since this warning was not there when lockdep was using dynamic keys for addr_list_lock, we are blaming the lockdep patch itself. The network stack _has_ been using static lockdep keys before, and it _is_ likely that stacked DSA setups have been triggering these lockdep warnings since forever, however I can't test very old kernels on this particular stacked DSA setup, to ensure I'm not in fact introducing regressions. Fixes: 845e0ebb4408 ("net: change addr_list_lock back to static key") Suggested-by: Cong Wang Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 9af1a2d0cec4..16e5f98d4882 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1799,15 +1799,27 @@ int dsa_slave_create(struct dsa_port *port) dsa_slave_notify(slave_dev, DSA_PORT_REGISTER); - ret = register_netdev(slave_dev); + rtnl_lock(); + + ret = register_netdevice(slave_dev); if (ret) { netdev_err(master, "error %d registering interface %s\n", ret, slave_dev->name); + rtnl_unlock(); goto out_phy; } + ret = netdev_upper_dev_link(master, slave_dev, NULL); + + rtnl_unlock(); + + if (ret) + goto out_unregister; + return 0; +out_unregister: + unregister_netdev(slave_dev); out_phy: rtnl_lock(); phylink_disconnect_phy(p->dp->pl); @@ -1824,16 +1836,18 @@ int dsa_slave_create(struct dsa_port *port) void dsa_slave_destroy(struct net_device *slave_dev) { + struct net_device *master = dsa_slave_to_master(slave_dev); struct dsa_port *dp = dsa_slave_to_port(slave_dev); struct dsa_slave_priv *p = netdev_priv(slave_dev); netif_carrier_off(slave_dev); rtnl_lock(); + netdev_upper_dev_unlink(master, slave_dev); + unregister_netdevice(slave_dev); phylink_disconnect_phy(dp->pl); rtnl_unlock(); dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER); - unregister_netdev(slave_dev); phylink_destroy(dp->pl); gro_cells_destroy(&p->gcells); free_percpu(p->stats64); From 843d926b003ea692468c8cc5bea1f9f58dfa8c75 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Sep 2020 01:20:23 -0700 Subject: [PATCH 316/648] ipv6: avoid lockdep issue in fib6_del() syzbot reported twice a lockdep issue in fib6_del() [1] which I think is caused by net->ipv6.fib6_null_entry having a NULL fib6_table pointer. fib6_del() already checks for fib6_null_entry special case, we only need to return earlier. Bug seems to occur very rarely, I have thus chosen a 'bug origin' that makes backports not too complex. [1] WARNING: suspicious RCU usage 5.9.0-rc4-syzkaller #0 Not tainted ----------------------------- net/ipv6/ip6_fib.c:1996 suspicious rcu_dereference_protected() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 4 locks held by syz-executor.5/8095: #0: ffffffff8a7ea708 (rtnl_mutex){+.+.}-{3:3}, at: ppp_release+0x178/0x240 drivers/net/ppp/ppp_generic.c:401 #1: ffff88804c422dd8 (&net->ipv6.fib6_gc_lock){+.-.}-{2:2}, at: spin_trylock_bh include/linux/spinlock.h:414 [inline] #1: ffff88804c422dd8 (&net->ipv6.fib6_gc_lock){+.-.}-{2:2}, at: fib6_run_gc+0x21b/0x2d0 net/ipv6/ip6_fib.c:2312 #2: ffffffff89bd6a40 (rcu_read_lock){....}-{1:2}, at: __fib6_clean_all+0x0/0x290 net/ipv6/ip6_fib.c:2613 #3: ffff8880a82e6430 (&tb->tb6_lock){+.-.}-{2:2}, at: spin_lock_bh include/linux/spinlock.h:359 [inline] #3: ffff8880a82e6430 (&tb->tb6_lock){+.-.}-{2:2}, at: __fib6_clean_all+0x107/0x290 net/ipv6/ip6_fib.c:2245 stack backtrace: CPU: 1 PID: 8095 Comm: syz-executor.5 Not tainted 5.9.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x198/0x1fd lib/dump_stack.c:118 fib6_del+0x12b4/0x1630 net/ipv6/ip6_fib.c:1996 fib6_clean_node+0x39b/0x570 net/ipv6/ip6_fib.c:2180 fib6_walk_continue+0x4aa/0x8e0 net/ipv6/ip6_fib.c:2102 fib6_walk+0x182/0x370 net/ipv6/ip6_fib.c:2150 fib6_clean_tree+0xdb/0x120 net/ipv6/ip6_fib.c:2230 __fib6_clean_all+0x120/0x290 net/ipv6/ip6_fib.c:2246 fib6_clean_all net/ipv6/ip6_fib.c:2257 [inline] fib6_run_gc+0x113/0x2d0 net/ipv6/ip6_fib.c:2320 ndisc_netdev_event+0x217/0x350 net/ipv6/ndisc.c:1805 notifier_call_chain+0xb5/0x200 kernel/notifier.c:83 call_netdevice_notifiers_info+0xb5/0x130 net/core/dev.c:2033 call_netdevice_notifiers_extack net/core/dev.c:2045 [inline] call_netdevice_notifiers net/core/dev.c:2059 [inline] dev_close_many+0x30b/0x650 net/core/dev.c:1634 rollback_registered_many+0x3a8/0x1210 net/core/dev.c:9261 rollback_registered net/core/dev.c:9329 [inline] unregister_netdevice_queue+0x2dd/0x570 net/core/dev.c:10410 unregister_netdevice include/linux/netdevice.h:2774 [inline] ppp_release+0x216/0x240 drivers/net/ppp/ppp_generic.c:403 __fput+0x285/0x920 fs/file_table.c:281 task_work_run+0xdd/0x190 kernel/task_work.c:141 tracehook_notify_resume include/linux/tracehook.h:188 [inline] exit_to_user_mode_loop kernel/entry/common.c:163 [inline] exit_to_user_mode_prepare+0x1e1/0x200 kernel/entry/common.c:190 syscall_exit_to_user_mode+0x7e/0x2e0 kernel/entry/common.c:265 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 421842edeaf6 ("net/ipv6: Add fib6_null_entry") Signed-off-by: Eric Dumazet Cc: David Ahern Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 25a90f3f705c..4a664ad4f4d4 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1993,14 +1993,19 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, /* Need to own table->tb6_lock */ int fib6_del(struct fib6_info *rt, struct nl_info *info) { - struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node, - lockdep_is_held(&rt->fib6_table->tb6_lock)); - struct fib6_table *table = rt->fib6_table; struct net *net = info->nl_net; struct fib6_info __rcu **rtp; struct fib6_info __rcu **rtp_next; + struct fib6_table *table; + struct fib6_node *fn; - if (!fn || rt == net->ipv6.fib6_null_entry) + if (rt == net->ipv6.fib6_null_entry) + return -ENOENT; + + table = rt->fib6_table; + fn = rcu_dereference_protected(rt->fib6_node, + lockdep_is_held(&table->tb6_lock)); + if (!fn) return -ENOENT; WARN_ON(!(fn->fn_flags & RTN_RTINFO)); From 923f614cdba2842b6f7eaf0713cba1c5668c1b5f Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Tue, 8 Sep 2020 09:18:12 -0700 Subject: [PATCH 317/648] fib: fix fib_rule_ops indirect call wrappers when CONFIG_IPV6=m If CONFIG_IPV6=m, the IPV6 functions won't be found by the linker: ld: net/core/fib_rules.o: in function `fib_rules_lookup': fib_rules.c:(.text+0x606): undefined reference to `fib6_rule_match' ld: fib_rules.c:(.text+0x611): undefined reference to `fib6_rule_match' ld: fib_rules.c:(.text+0x68c): undefined reference to `fib6_rule_action' ld: fib_rules.c:(.text+0x693): undefined reference to `fib6_rule_action' ld: fib_rules.c:(.text+0x6aa): undefined reference to `fib6_rule_suppress' ld: fib_rules.c:(.text+0x6bc): undefined reference to `fib6_rule_suppress' make: *** [Makefile:1166: vmlinux] Error 1 Reported-by: Sven Joachim Fixes: b9aaec8f0be5 ("fib: use indirect call wrappers in the most common fib_rules_ops") Acked-by: Randy Dunlap # build-tested Signed-off-by: Brian Vazquez Signed-off-by: David S. Miller --- net/core/fib_rules.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 51678a528f85..7bcfb16854cb 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -16,7 +16,7 @@ #include #include -#ifdef CONFIG_IPV6_MULTIPLE_TABLES +#if defined(CONFIG_IPV6) && defined(CONFIG_IPV6_MULTIPLE_TABLES) #ifdef CONFIG_IP_MULTIPLE_TABLES #define INDIRECT_CALL_MT(f, f2, f1, ...) \ INDIRECT_CALL_INET(f, f2, f1, __VA_ARGS__) From 2a154988aa4b2b87bef5dc0678df675829448e32 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 8 Sep 2020 09:30:12 -0700 Subject: [PATCH 318/648] MAINTAINERS: remove John Allen from ibmvnic John's email has bounced and Thomas confirms he no longer works on ibmvnic. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index dca9bfd8c888..12be7ae4d989 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8322,7 +8322,6 @@ F: drivers/pci/hotplug/rpaphp* IBM Power SRIOV Virtual NIC Device Driver M: Thomas Falcon -M: John Allen L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/ibm/ibmvnic.* From ba9e04a7ddf4f22a10e05bf9403db6b97743c7bf Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 8 Sep 2020 14:09:34 -0700 Subject: [PATCH 319/648] ip: fix tos reflection in ack and reset packets Currently, in tcp_v4_reqsk_send_ack() and tcp_v4_send_reset(), we echo the TOS value of the received packets in the response. However, we do not want to echo the lower 2 ECN bits in accordance with RFC 3168 6.1.5 robustness principles. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 61f802d5350c..e6f2ada9e7d5 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -74,6 +74,7 @@ #include #include #include +#include #include #include #include @@ -1703,7 +1704,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, if (IS_ERR(rt)) return; - inet_sk(sk)->tos = arg->tos; + inet_sk(sk)->tos = arg->tos & ~INET_ECN_MASK; sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; From 123aaf774f70ba48fb9ab064b6ce75a9b64d7b85 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Wed, 19 Aug 2020 10:34:48 +0900 Subject: [PATCH 320/648] f2fs: Fix type of section block count variables Commit da52f8ade40b ("f2fs: get the right gc victim section when section has several segments") added code to count blocks of each section using variables with type 'unsigned short', which has 2 bytes size in many systems. However, the counts can be larger than the 2 bytes range and type conversion results in wrong values. Especially when the f2fs sections have blocks as many as USHRT_MAX + 1, the count is handled as 0. This triggers eternal loop in init_dirty_segmap() at mount system call. Fix this by changing the type of the variables to block_t. Fixes: da52f8ade40b ("f2fs: get the right gc victim section when section has several segments") Signed-off-by: Shin'ichiro Kawasaki Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a65d357f89a9..e247a5ef3713 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -799,7 +799,7 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, if (__is_large_section(sbi)) { unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); - unsigned short valid_blocks = + block_t valid_blocks = get_valid_blocks(sbi, segno, true); f2fs_bug_on(sbi, unlikely(!valid_blocks || @@ -815,7 +815,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, enum dirty_type dirty_type) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned short valid_blocks; + block_t valid_blocks; if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type])) dirty_i->nr_dirty[dirty_type]--; @@ -4316,8 +4316,8 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct free_segmap_info *free_i = FREE_I(sbi); unsigned int segno = 0, offset = 0, secno; - unsigned short valid_blocks; - unsigned short blks_per_sec = BLKS_PER_SEC(sbi); + block_t valid_blocks; + block_t blks_per_sec = BLKS_PER_SEC(sbi); while (1) { /* find dirty segment based on free segmap */ From e2cab031ba7b5003cd12185b3ef38f1a75e3dae8 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Tue, 18 Aug 2020 15:40:14 +0530 Subject: [PATCH 321/648] f2fs: fix indefinite loop scanning for free nid If the sbi->ckpt->next_free_nid is not NAT block aligned and if there are free nids in that NAT block between the start of the block and next_free_nid, then those free nids will not be scanned in scan_nat_page(). This results into mismatch between nm_i->available_nids and the sum of nm_i->free_nid_count of all NAT blocks scanned. And nm_i->available_nids will always be greater than the sum of free nids in all the blocks. Under this condition, if we use all the currently scanned free nids, then it will loop forever in f2fs_alloc_nid() as nm_i->available_nids is still not zero but nm_i->free_nid_count of that partially scanned NAT block is zero. Fix this to align the nm_i->next_scan_nid to the first nid of the corresponding NAT block. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 3ad7bdbda5ca..cb1b5b61a1da 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2373,6 +2373,9 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, if (unlikely(nid >= nm_i->max_nid)) nid = 0; + if (unlikely(nid % NAT_ENTRY_PER_BLOCK)) + nid = NAT_BLOCK_OFFSET(nid) * NAT_ENTRY_PER_BLOCK; + /* Enough entries */ if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK) return 0; From 20d0a107fb35f37578b919f62bd474d6d358d579 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Wed, 19 Aug 2020 16:07:31 -0400 Subject: [PATCH 322/648] f2fs: Return EOF on unaligned end of file DIO read Reading past end of file returns EOF for aligned reads but -EINVAL for unaligned reads on f2fs. While documentation is not strict about this corner case, most filesystem returns EOF on this case, like iomap filesystems. This patch consolidates the behavior for f2fs, by making it return EOF(0). it can be verified by a read loop on a file that does a partial read before EOF (A file that doesn't end at an aligned address). The following code fails on an unaligned file on f2fs, but not on btrfs, ext4, and xfs. while (done < total) { ssize_t delta = pread(fd, buf + done, total - done, off + done); if (!delta) break; ... } It is arguable whether filesystems should actually return EOF or -EINVAL, but since iomap filesystems support it, and so does the original DIO code, it seems reasonable to consolidate on that. Signed-off-by: Gabriel Krisman Bertazi Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ed2bca0fce92..73683e58a08d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3550,6 +3550,9 @@ static int check_direct_IO(struct inode *inode, struct iov_iter *iter, unsigned long align = offset | iov_iter_alignment(iter); struct block_device *bdev = inode->i_sb->s_bdev; + if (iov_iter_rw(iter) == READ && offset >= i_size_read(inode)) + return 1; + if (align & blocksize_mask) { if (bdev) blkbits = blksize_bits(bdev_logical_block_size(bdev)); From 0460534b532e5518c657c7d6492b9337d975eaa3 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Mon, 7 Sep 2020 16:35:40 +0530 Subject: [PATCH 323/648] powerpc/papr_scm: Limit the readability of 'perf_stats' sysfs attribute The newly introduced 'perf_stats' attribute uses the default access mode of 0444, allowing non-root users to access performance stats of an nvdimm and potentially force the kernel into issuing a large number of expensive hypercalls. Since the information exposed by this attribute cannot be cached it is better to ward off access to this attribute from users who don't need to access to these performance statistics. Hence update the access mode of 'perf_stats' attribute to be only readable by root users. Fixes: 2d02bf835e57 ("powerpc/papr_scm: Fetch nvdimm performance stats from PHYP") Reported-by: Aneesh Kumar K.V Signed-off-by: Vaibhav Jain Reviewed-by: Ira Weiny Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200907110540.21349-1-vaibhav@linux.ibm.com --- arch/powerpc/platforms/pseries/papr_scm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index f439f0dfea7d..a88a707a608a 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -822,7 +822,7 @@ static ssize_t perf_stats_show(struct device *dev, kfree(stats); return rc ? rc : seq_buf_used(&s); } -DEVICE_ATTR_RO(perf_stats); +DEVICE_ATTR_ADMIN_RO(perf_stats); static ssize_t flags_show(struct device *dev, struct device_attribute *attr, char *buf) From 73a5379937ec89b91e907bb315e2434ee9696a2c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 8 Sep 2020 12:56:08 -0700 Subject: [PATCH 324/648] nvme-fabrics: allow to queue requests for live queues Right now we are failing requests based on the controller state (which is checked inline in nvmf_check_ready) however we should definitely accept requests if the queue is live. When entering controller reset, we transition the controller into NVME_CTRL_RESETTING, and then return BLK_STS_RESOURCE for non-mpath requests (have blk_noretry_request set). This is also the case for NVME_REQ_USER for the wrong reason. There shouldn't be any reason for us to reject this I/O in a controller reset. We do want to prevent passthru commands on the admin queue because we need the controller to fully initialize first before we let user passthru admin commands to be issued. In a non-mpath setup, this means that the requests will simply be requeued over and over forever not allowing the q_usage_counter to drop its final reference, causing controller reset to hang if running concurrently with heavy I/O. Fixes: 35897b920c8a ("nvme-fabrics: fix and refine state checks in __nvmf_check_ready") Reviewed-by: James Smart Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 32f61fc5f4c5..8575724734e0 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -565,10 +565,14 @@ bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, struct nvme_request *req = nvme_req(rq); /* - * If we are in some state of setup or teardown only allow - * internally generated commands. + * currently we have a problem sending passthru commands + * on the admin_q if the controller is not LIVE because we can't + * make sure that they are going out after the admin connect, + * controller enable and/or other commands in the initialization + * sequence. until the controller will be LIVE, fail with + * BLK_STS_RESOURCE so that they will be rescheduled. */ - if (!blk_rq_is_passthrough(rq) || (req->flags & NVME_REQ_USERCMD)) + if (rq->q == ctrl->admin_q && (req->flags & NVME_REQ_USERCMD)) return false; /* @@ -577,7 +581,7 @@ bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, */ switch (ctrl->state) { case NVME_CTRL_CONNECTING: - if (nvme_is_fabrics(req->cmd) && + if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) && req->cmd->fabrics.fctype == nvme_fabrics_type_connect) return true; break; From 76366050eb1b3151c4b4110c76538ff14dffb74c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20D=C3=ADaz?= Date: Wed, 19 Aug 2020 12:32:24 -0500 Subject: [PATCH 325/648] x86/defconfigs: Explicitly unset CONFIG_64BIT in i386_defconfig MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A recent refresh of the defconfigs got rid of the following (unset) config: # CONFIG_64BIT is not set Innocuous as it seems, when the config file is saved again the behavior is changed so that CONFIG_64BIT=y. Currently, $ make i386_defconfig $ grep CONFIG_64BIT .config CONFIG_64BIT=y whereas previously (and with this patch): $ make i386_defconfig $ grep CONFIG_64BIT .config # CONFIG_64BIT is not set ( This was found with weird compiler errors on OpenEmbedded builds, as the compiler was unable to cope with 64-bits data types. ) Fixes: 1d0e12fd3a84 ("x86/defconfigs: Refresh defconfig files") Reported-by: Jarkko Nikula Reported-by: Andy Shevchenko Tested-by: Sedat Dilek Signed-off-by: Daniel Díaz Signed-off-by: Ingo Molnar --- arch/x86/configs/i386_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index d7577fece9eb..4cfdf5755ab5 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -19,6 +19,7 @@ CONFIG_CGROUP_CPUACCT=y CONFIG_BLK_DEV_INITRD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y +# CONFIG_64BIT is not set CONFIG_SMP=y CONFIG_X86_GENERIC=y CONFIG_HPET_TIMER=y From 0a355aeb24081e4538d4d424cd189f16c0bbd983 Mon Sep 17 00:00:00 2001 From: Evan Nimmo Date: Wed, 9 Sep 2020 08:32:47 +1200 Subject: [PATCH 326/648] i2c: algo: pca: Reapply i2c bus settings after reset If something goes wrong (such as the SCL being stuck low) then we need to reset the PCA chip. The issue with this is that on reset we lose all config settings and the chip ends up in a disabled state which results in a lock up/high CPU usage. We need to re-apply any configuration that had previously been set and re-enable the chip. Signed-off-by: Evan Nimmo Reviewed-by: Chris Packham Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- drivers/i2c/algos/i2c-algo-pca.c | 35 +++++++++++++++++++++----------- include/linux/i2c-algo-pca.h | 15 ++++++++++++++ 2 files changed, 38 insertions(+), 12 deletions(-) diff --git a/drivers/i2c/algos/i2c-algo-pca.c b/drivers/i2c/algos/i2c-algo-pca.c index 710fbef9a9c2..384af88e58ad 100644 --- a/drivers/i2c/algos/i2c-algo-pca.c +++ b/drivers/i2c/algos/i2c-algo-pca.c @@ -41,8 +41,22 @@ static void pca_reset(struct i2c_algo_pca_data *adap) pca_outw(adap, I2C_PCA_INDPTR, I2C_PCA_IPRESET); pca_outw(adap, I2C_PCA_IND, 0xA5); pca_outw(adap, I2C_PCA_IND, 0x5A); + + /* + * After a reset we need to re-apply any configuration + * (calculated in pca_init) to get the bus in a working state. + */ + pca_outw(adap, I2C_PCA_INDPTR, I2C_PCA_IMODE); + pca_outw(adap, I2C_PCA_IND, adap->bus_settings.mode); + pca_outw(adap, I2C_PCA_INDPTR, I2C_PCA_ISCLL); + pca_outw(adap, I2C_PCA_IND, adap->bus_settings.tlow); + pca_outw(adap, I2C_PCA_INDPTR, I2C_PCA_ISCLH); + pca_outw(adap, I2C_PCA_IND, adap->bus_settings.thi); + + pca_set_con(adap, I2C_PCA_CON_ENSIO); } else { adap->reset_chip(adap->data); + pca_set_con(adap, I2C_PCA_CON_ENSIO | adap->bus_settings.clock_freq); } } @@ -423,13 +437,14 @@ static int pca_init(struct i2c_adapter *adap) " Use the nominal frequency.\n", adap->name); } - pca_reset(pca_data); - clock = pca_clock(pca_data); printk(KERN_INFO "%s: Clock frequency is %dkHz\n", adap->name, freqs[clock]); - pca_set_con(pca_data, I2C_PCA_CON_ENSIO | clock); + /* Store settings as these will be needed when the PCA chip is reset */ + pca_data->bus_settings.clock_freq = clock; + + pca_reset(pca_data); } else { int clock; int mode; @@ -496,19 +511,15 @@ static int pca_init(struct i2c_adapter *adap) thi = tlow * min_thi / min_tlow; } + /* Store settings as these will be needed when the PCA chip is reset */ + pca_data->bus_settings.mode = mode; + pca_data->bus_settings.tlow = tlow; + pca_data->bus_settings.thi = thi; + pca_reset(pca_data); printk(KERN_INFO "%s: Clock frequency is %dHz\n", adap->name, clock * 100); - - pca_outw(pca_data, I2C_PCA_INDPTR, I2C_PCA_IMODE); - pca_outw(pca_data, I2C_PCA_IND, mode); - pca_outw(pca_data, I2C_PCA_INDPTR, I2C_PCA_ISCLL); - pca_outw(pca_data, I2C_PCA_IND, tlow); - pca_outw(pca_data, I2C_PCA_INDPTR, I2C_PCA_ISCLH); - pca_outw(pca_data, I2C_PCA_IND, thi); - - pca_set_con(pca_data, I2C_PCA_CON_ENSIO); } udelay(500); /* 500 us for oscillator to stabilise */ diff --git a/include/linux/i2c-algo-pca.h b/include/linux/i2c-algo-pca.h index d03071732db4..7c522fdd9ea7 100644 --- a/include/linux/i2c-algo-pca.h +++ b/include/linux/i2c-algo-pca.h @@ -53,6 +53,20 @@ #define I2C_PCA_CON_SI 0x08 /* Serial Interrupt */ #define I2C_PCA_CON_CR 0x07 /* Clock Rate (MASK) */ +/** + * struct pca_i2c_bus_settings - The configured PCA i2c bus settings + * @mode: Configured i2c bus mode + * @tlow: Configured SCL LOW period + * @thi: Configured SCL HIGH period + * @clock_freq: The configured clock frequency + */ +struct pca_i2c_bus_settings { + int mode; + int tlow; + int thi; + int clock_freq; +}; + struct i2c_algo_pca_data { void *data; /* private low level data */ void (*write_byte) (void *data, int reg, int val); @@ -64,6 +78,7 @@ struct i2c_algo_pca_data { * For PCA9665, use the frequency you want here. */ unsigned int i2c_clock; unsigned int chip; + struct pca_i2c_bus_settings bus_settings; }; int i2c_pca_add_bus(struct i2c_adapter *); From fc19d559b0d31b5b831fd468b10d7dadafc0d0ec Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Wed, 9 Sep 2020 10:00:41 +0800 Subject: [PATCH 327/648] ALSA: hda/realtek - The Mic on a RedmiBook doesn't work The Mic connects to the Nid 0x19, but the configuration of Nid 0x19 is not defined to Mic, and also need to set the coeff to enable the auto detection on the Nid 0x19. After this change, the Mic plugging in or plugging out could be detected and could record the sound from the Mic. And the coeff value is suggested by Kailang of Realtek. Cc: Kailang Yang Cc: Signed-off-by: Hui Wang Link: https://lore.kernel.org/r/20200909020041.8967-1-hui.wang@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index abfc602c3b92..85e207173f5d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6228,6 +6228,7 @@ enum { ALC269_FIXUP_LEMOTE_A1802, ALC269_FIXUP_LEMOTE_A190X, ALC256_FIXUP_INTEL_NUC8_RUGGED, + ALC255_FIXUP_XIAOMI_HEADSET_MIC, }; static const struct hda_fixup alc269_fixups[] = { @@ -7591,6 +7592,16 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE }, + [ALC255_FIXUP_XIAOMI_HEADSET_MIC] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + { 0x20, AC_VERB_SET_COEF_INDEX, 0x45 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x5089 }, + { } + }, + .chained = true, + .chain_id = ALC289_FIXUP_ASUS_GA401 + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -7888,6 +7899,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1b35, 0x1236, "CZC TMI", ALC269_FIXUP_CZC_TMI), SND_PCI_QUIRK(0x1b35, 0x1237, "CZC L101", ALC269_FIXUP_CZC_L101), SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */ + SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x10ec, 0x118c, "Medion EE4254 MD62100", ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE), SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802), @@ -8065,6 +8077,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC298_FIXUP_HUAWEI_MBX_STEREO, .name = "huawei-mbx-stereo"}, {.id = ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE, .name = "alc256-medion-headset"}, {.id = ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, .name = "alc298-samsung-headphone"}, + {.id = ALC255_FIXUP_XIAOMI_HEADSET_MIC, .name = "alc255-xiaomi-headset"}, {} }; #define ALC225_STANDARD_PINS \ From f022ff7bf377ca94367be05de61277934d42ea74 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 7 Sep 2020 12:20:36 +0300 Subject: [PATCH 328/648] thunderbolt: Retry DROM read once if parsing fails Kai-Heng reported that sometimes DROM parsing of ASUS PA27AC Thunderbolt 3 monitor fails. This makes the driver to fail to add the device so only DisplayPort tunneling is functional. It is not clear what exactly happens but waiting for 100 ms and retrying the read seems to work this around so we do that here. Link: https://bugzilla.kernel.org/show_bug.cgi?id=206493 Reported-by: Kai-Heng Feng Tested-by: Kai-Heng Feng Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg --- drivers/thunderbolt/eeprom.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/thunderbolt/eeprom.c b/drivers/thunderbolt/eeprom.c index 3ebca44ab3fa..0c8471be3e32 100644 --- a/drivers/thunderbolt/eeprom.c +++ b/drivers/thunderbolt/eeprom.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include "tb.h" @@ -389,8 +390,8 @@ static int tb_drom_parse_entries(struct tb_switch *sw) struct tb_drom_entry_header *entry = (void *) (sw->drom + pos); if (pos + 1 == drom_size || pos + entry->len > drom_size || !entry->len) { - tb_sw_warn(sw, "drom buffer overrun, aborting\n"); - return -EIO; + tb_sw_warn(sw, "DROM buffer overrun\n"); + return -EILSEQ; } switch (entry->type) { @@ -526,7 +527,8 @@ int tb_drom_read(struct tb_switch *sw) u16 size; u32 crc; struct tb_drom_header *header; - int res; + int res, retries = 1; + if (sw->drom) return 0; @@ -612,7 +614,17 @@ int tb_drom_read(struct tb_switch *sw) tb_sw_warn(sw, "drom device_rom_revision %#x unknown\n", header->device_rom_revision); - return tb_drom_parse_entries(sw); + res = tb_drom_parse_entries(sw); + /* If the DROM parsing fails, wait a moment and retry once */ + if (res == -EILSEQ && retries--) { + tb_sw_warn(sw, "parsing DROM failed, retrying\n"); + msleep(100); + res = tb_drom_read_n(sw, 0, sw->drom, size); + if (!res) + goto parse; + } + + return res; err: kfree(sw->drom); sw->drom = NULL; From 19873eec7e13fda140a0ebc75d6664e57c00bfb1 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Fri, 4 Sep 2020 19:55:55 -0700 Subject: [PATCH 329/648] Drivers: hv: vmbus: hibernation: do not hang forever in vmbus_bus_resume() After we Stop and later Start a VM that uses Accelerated Networking (NIC SR-IOV), currently the VF vmbus device's Instance GUID can change, so after vmbus_bus_resume() -> vmbus_request_offers(), vmbus_onoffer() can not find the original vmbus channel of the VF, and hence we can't complete() vmbus_connection.ready_for_resume_event in check_ready_for_resume_event(), and the VM hangs in vmbus_bus_resume() forever. Fix the issue by adding a timeout, so the resuming can still succeed, and the saved state is not lost, and according to my test, the user can disable Accelerated Networking and then will be able to SSH into the VM for further recovery. Also prevent the VM in question from suspending again. The host will be fixed so in future the Instance GUID will stay the same across hibernation. Fixes: d8bd2d442bb2 ("Drivers: hv: vmbus: Resume after fixing up old primary channels") Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20200905025555.45614-1-decui@microsoft.com Signed-off-by: Wei Liu --- drivers/hv/vmbus_drv.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 45c954d75e70..0f2d6a60f769 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2387,7 +2387,10 @@ static int vmbus_bus_suspend(struct device *dev) if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0) wait_for_completion(&vmbus_connection.ready_for_suspend_event); - WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0); + if (atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0) { + pr_err("Can not suspend due to a previous failed resuming\n"); + return -EBUSY; + } mutex_lock(&vmbus_connection.channel_mutex); @@ -2463,7 +2466,9 @@ static int vmbus_bus_resume(struct device *dev) vmbus_request_offers(); - wait_for_completion(&vmbus_connection.ready_for_resume_event); + if (wait_for_completion_timeout( + &vmbus_connection.ready_for_resume_event, 10 * HZ) == 0) + pr_err("Some vmbus device is missing after suspending?\n"); /* Reset the event for the next suspend. */ reinit_completion(&vmbus_connection.ready_for_suspend_event); From b59a7ca15464c78ea1ba3b280cfc5ac5ece11ade Mon Sep 17 00:00:00 2001 From: Gustav Wiklander Date: Tue, 8 Sep 2020 17:11:29 +0200 Subject: [PATCH 330/648] spi: Fix memory leak on splited transfers In the prepare_message callback the bus driver has the opportunity to split a transfer into smaller chunks. spi_map_msg is done after prepare_message. Function spi_res_release releases the splited transfers in the message. Therefore spi_res_release should be called after spi_map_msg. The previous try at this was commit c9ba7a16d0f1 which released the splited transfers after spi_finalize_current_message had been called. This introduced a race since the message struct could be out of scope because the spi_sync call got completed. Fixes this leak on spi bus driver spi-bcm2835.c when transfer size is greater than 65532: Kmemleak: sg_alloc_table+0x28/0xc8 spi_map_buf+0xa4/0x300 __spi_pump_messages+0x370/0x748 __spi_sync+0x1d4/0x270 spi_sync+0x34/0x58 spi_test_execute_msg+0x60/0x340 [spi_loopback_test] spi_test_run_iter+0x548/0x578 [spi_loopback_test] spi_test_run_test+0x94/0x140 [spi_loopback_test] spi_test_run_tests+0x150/0x180 [spi_loopback_test] spi_loopback_test_probe+0x50/0xd0 [spi_loopback_test] spi_drv_probe+0x84/0xe0 Signed-off-by: Gustav Wiklander Link: https://lore.kernel.org/r/20200908151129.15915-1-gustav.wiklander@axis.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 92b8fb416dca..832926b27c92 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1327,8 +1327,6 @@ static int spi_transfer_one_message(struct spi_controller *ctlr, if (msg->status && ctlr->handle_err) ctlr->handle_err(ctlr, msg); - spi_res_release(ctlr, msg); - spi_finalize_current_message(ctlr); return ret; @@ -1727,6 +1725,13 @@ void spi_finalize_current_message(struct spi_controller *ctlr) spi_unmap_msg(ctlr, mesg); + /* In the prepare_messages callback the spi bus has the opportunity to + * split a transfer to smaller chunks. + * Release splited transfers here since spi_map_msg is done on the + * splited transfers. + */ + spi_res_release(ctlr, mesg); + if (ctlr->cur_msg_prepared && ctlr->unprepare_message) { ret = ctlr->unprepare_message(ctlr, mesg); if (ret) { From c170a5a3b6944ad8e76547c4a1d9fe81c8f23ac8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 9 Sep 2020 12:43:04 +0300 Subject: [PATCH 331/648] spi: stm32: fix pm_runtime_get_sync() error checking The pm_runtime_get_sync() can return either 0 or 1 on success but this code treats 1 as a failure. Fixes: db96bf976a4f ("spi: stm32: fixes suspend/resume management") Signed-off-by: Dan Carpenter Reviewed-by: Alain Volmat Link: https://lore.kernel.org/r/20200909094304.GA420136@mwanda Signed-off-by: Mark Brown --- drivers/spi/spi-stm32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index a00f6b51ccbf..3056428b09f3 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -2064,7 +2064,7 @@ static int stm32_spi_resume(struct device *dev) } ret = pm_runtime_get_sync(dev); - if (ret) { + if (ret < 0) { dev_err(dev, "Unable to power device:%d\n", ret); return ret; } From 2cd896a5e86fc326bda8614b96c0401dcc145868 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Wed, 9 Sep 2020 08:44:25 +0530 Subject: [PATCH 332/648] block: Set same_page to false in __bio_try_merge_page if ret is false If we hit the UINT_MAX limit of bio->bi_iter.bi_size and so we are anyway not merging this page in this bio, then it make sense to make same_page also as false before returning. Without this patch, we hit below WARNING in iomap. This mostly happens with very large memory system and / or after tweaking vm dirty threshold params to delay writeback of dirty data. WARNING: CPU: 18 PID: 5130 at fs/iomap/buffered-io.c:74 iomap_page_release+0x120/0x150 CPU: 18 PID: 5130 Comm: fio Kdump: loaded Tainted: G W 5.8.0-rc3 #6 Call Trace: __remove_mapping+0x154/0x320 (unreliable) iomap_releasepage+0x80/0x180 try_to_release_page+0x94/0xe0 invalidate_inode_page+0xc8/0x110 invalidate_mapping_pages+0x1dc/0x540 generic_fadvise+0x3c8/0x450 xfs_file_fadvise+0x2c/0xe0 [xfs] vfs_fadvise+0x3c/0x60 ksys_fadvise64_64+0x68/0xe0 sys_fadvise64+0x28/0x40 system_call_exception+0xf8/0x1c0 system_call_common+0xf0/0x278 Fixes: cc90bc68422 ("block: fix "check bi_size overflow before merge"") Reported-by: Shivaprasad G Bhat Suggested-by: Christoph Hellwig Signed-off-by: Anju T Sudhakar Signed-off-by: Ritesh Harjani Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/bio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/block/bio.c b/block/bio.c index a9931f23d933..e865ea55b9f9 100644 --- a/block/bio.c +++ b/block/bio.c @@ -879,8 +879,10 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page, struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; if (page_is_mergeable(bv, page, len, off, same_page)) { - if (bio->bi_iter.bi_size > UINT_MAX - len) + if (bio->bi_iter.bi_size > UINT_MAX - len) { + *same_page = false; return false; + } bv->bv_len += len; bio->bi_iter.bi_size += len; return true; From 9e712446a80bba9ede824ff00f2af630ed9ac0be Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 5 Sep 2020 20:16:24 +0800 Subject: [PATCH 333/648] RDMA/bnxt_re: Remove set but not used variable 'qplib_ctx' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/infiniband/hw/bnxt_re/main.c:1012:25: warning: variable ‘qplib_ctx’ set but not used [-Wunused-but-set-variable] Fixes: f86b31c6a28f ("RDMA/bnxt_re: Static NQ depth allocation") Link: https://lore.kernel.org/r/20200905121624.32776-1-yuehaibing@huawei.com Signed-off-by: YueHaibing Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 13bbeb42794f..53aee5a42ab8 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1009,7 +1009,6 @@ static void bnxt_re_free_res(struct bnxt_re_dev *rdev) static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) { struct bnxt_re_ring_attr rattr = {}; - struct bnxt_qplib_ctx *qplib_ctx; int num_vec_created = 0; int rc = 0, i; u8 type; @@ -1032,7 +1031,6 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) if (rc) goto dealloc_res; - qplib_ctx = &rdev->qplib_ctx; for (i = 0; i < rdev->num_msix - 1; i++) { struct bnxt_qplib_nq *nq; From 39c2d639ca183a400ba3259fa0825714cbb09c53 Mon Sep 17 00:00:00 2001 From: Md Haris Iqbal Date: Mon, 7 Sep 2020 15:52:16 +0530 Subject: [PATCH 334/648] RDMA/rtrs-srv: Set .release function for rtrs srv device during device init The device .release function was not being set during the device initialization. This was leading to the below warning, in error cases when put_srv was called before device_add was called. Warning: Device '(null)' does not have a release() function, it is broken and must be fixed. See Documentation/kobject.txt. So, set the device .release function during device initialization in the __alloc_srv() function. Fixes: baa5b28b7a47 ("RDMA/rtrs-srv: Replace device_register with device_initialize and device_add") Link: https://lore.kernel.org/r/20200907102216.104041-1-haris.iqbal@cloud.ionos.com Signed-off-by: Md Haris Iqbal Reviewed-by: Leon Romanovsky Acked-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c | 8 -------- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 8 ++++++++ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index 2f981ae97076..cf6a2be61695 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -152,13 +152,6 @@ static struct attribute_group rtrs_srv_stats_attr_group = { .attrs = rtrs_srv_stats_attrs, }; -static void rtrs_srv_dev_release(struct device *dev) -{ - struct rtrs_srv *srv = container_of(dev, struct rtrs_srv, dev); - - kfree(srv); -} - static int rtrs_srv_create_once_sysfs_root_folders(struct rtrs_srv_sess *sess) { struct rtrs_srv *srv = sess->srv; @@ -172,7 +165,6 @@ static int rtrs_srv_create_once_sysfs_root_folders(struct rtrs_srv_sess *sess) goto unlock; } srv->dev.class = rtrs_dev_class; - srv->dev.release = rtrs_srv_dev_release; err = dev_set_name(&srv->dev, "%s", sess->s.sessname); if (err) goto unlock; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index b61a18e57aeb..28f6414dfa3d 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -1319,6 +1319,13 @@ static int rtrs_srv_get_next_cq_vector(struct rtrs_srv_sess *sess) return sess->cur_cq_vector; } +static void rtrs_srv_dev_release(struct device *dev) +{ + struct rtrs_srv *srv = container_of(dev, struct rtrs_srv, dev); + + kfree(srv); +} + static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx, const uuid_t *paths_uuid) { @@ -1337,6 +1344,7 @@ static struct rtrs_srv *__alloc_srv(struct rtrs_srv_ctx *ctx, srv->queue_depth = sess_queue_depth; srv->ctx = ctx; device_initialize(&srv->dev); + srv->dev.release = rtrs_srv_dev_release; srv->chunks = kcalloc(srv->queue_depth, sizeof(*srv->chunks), GFP_KERNEL); From 0b089c1ef7047652b13b4cdfdb1e0e7dbdb8c9ab Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 4 Sep 2020 12:50:39 -0700 Subject: [PATCH 335/648] IB/isert: Fix unaligned immediate-data handling Currently we allocate rx buffers in a single contiguous buffers for headers (iser and iscsi) and data trailer. This means that most likely the data starting offset is aligned to 76 bytes (size of both headers). This worked fine for years, but at some point this broke, resulting in data corruptions in isert when a command comes with immediate data and the underlying backend device assumes 512 bytes buffer alignment. We assume a hard-requirement for all direct I/O buffers to be 512 bytes aligned. To fix this, we should avoid passing unaligned buffers for I/O. Instead, we allocate our recv buffers with some extra space such that we can have the data portion align to 512 byte boundary. This also means that we cannot reference headers or data using structure but rather accessors (as they may move based on alignment). Also, get rid of the wrong __packed annotation from iser_rx_desc as this has only harmful effects (not aligned to anything). This affects the rx descriptors for iscsi login and data plane. Fixes: 3d75ca0adef4 ("block: introduce multi-page bvec helpers") Link: https://lore.kernel.org/r/20200904195039.31687-1-sagi@grimberg.me Reported-by: Stephen Rust Tested-by: Doug Dumitru Signed-off-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/isert/ib_isert.c | 93 +++++++++++++------------ drivers/infiniband/ulp/isert/ib_isert.h | 41 ++++++++--- 2 files changed, 78 insertions(+), 56 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 61e2f7fc513d..62a61ca949c5 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -140,15 +140,15 @@ isert_alloc_rx_descriptors(struct isert_conn *isert_conn) rx_desc = isert_conn->rx_descs; for (i = 0; i < ISERT_QP_MAX_RECV_DTOS; i++, rx_desc++) { - dma_addr = ib_dma_map_single(ib_dev, (void *)rx_desc, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + dma_addr = ib_dma_map_single(ib_dev, rx_desc->buf, + ISER_RX_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(ib_dev, dma_addr)) goto dma_map_fail; rx_desc->dma_addr = dma_addr; rx_sg = &rx_desc->rx_sg; - rx_sg->addr = rx_desc->dma_addr; + rx_sg->addr = rx_desc->dma_addr + isert_get_hdr_offset(rx_desc); rx_sg->length = ISER_RX_PAYLOAD_SIZE; rx_sg->lkey = device->pd->local_dma_lkey; rx_desc->rx_cqe.done = isert_recv_done; @@ -160,7 +160,7 @@ isert_alloc_rx_descriptors(struct isert_conn *isert_conn) rx_desc = isert_conn->rx_descs; for (j = 0; j < i; j++, rx_desc++) { ib_dma_unmap_single(ib_dev, rx_desc->dma_addr, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + ISER_RX_SIZE, DMA_FROM_DEVICE); } kfree(isert_conn->rx_descs); isert_conn->rx_descs = NULL; @@ -181,7 +181,7 @@ isert_free_rx_descriptors(struct isert_conn *isert_conn) rx_desc = isert_conn->rx_descs; for (i = 0; i < ISERT_QP_MAX_RECV_DTOS; i++, rx_desc++) { ib_dma_unmap_single(ib_dev, rx_desc->dma_addr, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + ISER_RX_SIZE, DMA_FROM_DEVICE); } kfree(isert_conn->rx_descs); @@ -299,10 +299,9 @@ isert_free_login_buf(struct isert_conn *isert_conn) ISER_RX_PAYLOAD_SIZE, DMA_TO_DEVICE); kfree(isert_conn->login_rsp_buf); - ib_dma_unmap_single(ib_dev, isert_conn->login_req_dma, - ISER_RX_PAYLOAD_SIZE, - DMA_FROM_DEVICE); - kfree(isert_conn->login_req_buf); + ib_dma_unmap_single(ib_dev, isert_conn->login_desc->dma_addr, + ISER_RX_SIZE, DMA_FROM_DEVICE); + kfree(isert_conn->login_desc); } static int @@ -311,25 +310,25 @@ isert_alloc_login_buf(struct isert_conn *isert_conn, { int ret; - isert_conn->login_req_buf = kzalloc(sizeof(*isert_conn->login_req_buf), + isert_conn->login_desc = kzalloc(sizeof(*isert_conn->login_desc), GFP_KERNEL); - if (!isert_conn->login_req_buf) + if (!isert_conn->login_desc) return -ENOMEM; - isert_conn->login_req_dma = ib_dma_map_single(ib_dev, - isert_conn->login_req_buf, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); - ret = ib_dma_mapping_error(ib_dev, isert_conn->login_req_dma); + isert_conn->login_desc->dma_addr = ib_dma_map_single(ib_dev, + isert_conn->login_desc->buf, + ISER_RX_SIZE, DMA_FROM_DEVICE); + ret = ib_dma_mapping_error(ib_dev, isert_conn->login_desc->dma_addr); if (ret) { - isert_err("login_req_dma mapping error: %d\n", ret); - isert_conn->login_req_dma = 0; - goto out_free_login_req_buf; + isert_err("login_desc dma mapping error: %d\n", ret); + isert_conn->login_desc->dma_addr = 0; + goto out_free_login_desc; } isert_conn->login_rsp_buf = kzalloc(ISER_RX_PAYLOAD_SIZE, GFP_KERNEL); if (!isert_conn->login_rsp_buf) { ret = -ENOMEM; - goto out_unmap_login_req_buf; + goto out_unmap_login_desc; } isert_conn->login_rsp_dma = ib_dma_map_single(ib_dev, @@ -346,11 +345,11 @@ isert_alloc_login_buf(struct isert_conn *isert_conn, out_free_login_rsp_buf: kfree(isert_conn->login_rsp_buf); -out_unmap_login_req_buf: - ib_dma_unmap_single(ib_dev, isert_conn->login_req_dma, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); -out_free_login_req_buf: - kfree(isert_conn->login_req_buf); +out_unmap_login_desc: + ib_dma_unmap_single(ib_dev, isert_conn->login_desc->dma_addr, + ISER_RX_SIZE, DMA_FROM_DEVICE); +out_free_login_desc: + kfree(isert_conn->login_desc); return ret; } @@ -476,7 +475,7 @@ isert_connect_release(struct isert_conn *isert_conn) if (isert_conn->qp) isert_destroy_qp(isert_conn); - if (isert_conn->login_req_buf) + if (isert_conn->login_desc) isert_free_login_buf(isert_conn); isert_device_put(device); @@ -862,17 +861,18 @@ isert_login_post_recv(struct isert_conn *isert_conn) int ret; memset(&sge, 0, sizeof(struct ib_sge)); - sge.addr = isert_conn->login_req_dma; + sge.addr = isert_conn->login_desc->dma_addr + + isert_get_hdr_offset(isert_conn->login_desc); sge.length = ISER_RX_PAYLOAD_SIZE; sge.lkey = isert_conn->device->pd->local_dma_lkey; isert_dbg("Setup sge: addr: %llx length: %d 0x%08x\n", sge.addr, sge.length, sge.lkey); - isert_conn->login_req_buf->rx_cqe.done = isert_login_recv_done; + isert_conn->login_desc->rx_cqe.done = isert_login_recv_done; memset(&rx_wr, 0, sizeof(struct ib_recv_wr)); - rx_wr.wr_cqe = &isert_conn->login_req_buf->rx_cqe; + rx_wr.wr_cqe = &isert_conn->login_desc->rx_cqe; rx_wr.sg_list = &sge; rx_wr.num_sge = 1; @@ -949,7 +949,7 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login, static void isert_rx_login_req(struct isert_conn *isert_conn) { - struct iser_rx_desc *rx_desc = isert_conn->login_req_buf; + struct iser_rx_desc *rx_desc = isert_conn->login_desc; int rx_buflen = isert_conn->login_req_len; struct iscsi_conn *conn = isert_conn->conn; struct iscsi_login *login = conn->conn_login; @@ -961,7 +961,7 @@ isert_rx_login_req(struct isert_conn *isert_conn) if (login->first_request) { struct iscsi_login_req *login_req = - (struct iscsi_login_req *)&rx_desc->iscsi_header; + (struct iscsi_login_req *)isert_get_iscsi_hdr(rx_desc); /* * Setup the initial iscsi_login values from the leading * login request PDU. @@ -980,13 +980,13 @@ isert_rx_login_req(struct isert_conn *isert_conn) login->tsih = be16_to_cpu(login_req->tsih); } - memcpy(&login->req[0], (void *)&rx_desc->iscsi_header, ISCSI_HDR_LEN); + memcpy(&login->req[0], isert_get_iscsi_hdr(rx_desc), ISCSI_HDR_LEN); size = min(rx_buflen, MAX_KEY_VALUE_PAIRS); isert_dbg("Using login payload size: %d, rx_buflen: %d " "MAX_KEY_VALUE_PAIRS: %d\n", size, rx_buflen, MAX_KEY_VALUE_PAIRS); - memcpy(login->req_buf, &rx_desc->data[0], size); + memcpy(login->req_buf, isert_get_data(rx_desc), size); if (login->first_request) { complete(&isert_conn->login_comp); @@ -1051,14 +1051,15 @@ isert_handle_scsi_cmd(struct isert_conn *isert_conn, if (imm_data_len != data_len) { sg_nents = max(1UL, DIV_ROUND_UP(imm_data_len, PAGE_SIZE)); sg_copy_from_buffer(cmd->se_cmd.t_data_sg, sg_nents, - &rx_desc->data[0], imm_data_len); + isert_get_data(rx_desc), imm_data_len); isert_dbg("Copy Immediate sg_nents: %u imm_data_len: %d\n", sg_nents, imm_data_len); } else { sg_init_table(&isert_cmd->sg, 1); cmd->se_cmd.t_data_sg = &isert_cmd->sg; cmd->se_cmd.t_data_nents = 1; - sg_set_buf(&isert_cmd->sg, &rx_desc->data[0], imm_data_len); + sg_set_buf(&isert_cmd->sg, isert_get_data(rx_desc), + imm_data_len); isert_dbg("Transfer Immediate imm_data_len: %d\n", imm_data_len); } @@ -1127,9 +1128,9 @@ isert_handle_iscsi_dataout(struct isert_conn *isert_conn, } isert_dbg("Copying DataOut: sg_start: %p, sg_off: %u " "sg_nents: %u from %p %u\n", sg_start, sg_off, - sg_nents, &rx_desc->data[0], unsol_data_len); + sg_nents, isert_get_data(rx_desc), unsol_data_len); - sg_copy_from_buffer(sg_start, sg_nents, &rx_desc->data[0], + sg_copy_from_buffer(sg_start, sg_nents, isert_get_data(rx_desc), unsol_data_len); rc = iscsit_check_dataout_payload(cmd, hdr, false); @@ -1188,7 +1189,7 @@ isert_handle_text_cmd(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd } cmd->text_in_ptr = text_in; - memcpy(cmd->text_in_ptr, &rx_desc->data[0], payload_length); + memcpy(cmd->text_in_ptr, isert_get_data(rx_desc), payload_length); return iscsit_process_text_cmd(conn, cmd, hdr); } @@ -1198,7 +1199,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, uint32_t read_stag, uint64_t read_va, uint32_t write_stag, uint64_t write_va) { - struct iscsi_hdr *hdr = &rx_desc->iscsi_header; + struct iscsi_hdr *hdr = isert_get_iscsi_hdr(rx_desc); struct iscsi_conn *conn = isert_conn->conn; struct iscsi_cmd *cmd; struct isert_cmd *isert_cmd; @@ -1296,8 +1297,8 @@ isert_recv_done(struct ib_cq *cq, struct ib_wc *wc) struct isert_conn *isert_conn = wc->qp->qp_context; struct ib_device *ib_dev = isert_conn->cm_id->device; struct iser_rx_desc *rx_desc = cqe_to_rx_desc(wc->wr_cqe); - struct iscsi_hdr *hdr = &rx_desc->iscsi_header; - struct iser_ctrl *iser_ctrl = &rx_desc->iser_header; + struct iscsi_hdr *hdr = isert_get_iscsi_hdr(rx_desc); + struct iser_ctrl *iser_ctrl = isert_get_iser_hdr(rx_desc); uint64_t read_va = 0, write_va = 0; uint32_t read_stag = 0, write_stag = 0; @@ -1311,7 +1312,7 @@ isert_recv_done(struct ib_cq *cq, struct ib_wc *wc) rx_desc->in_use = true; ib_dma_sync_single_for_cpu(ib_dev, rx_desc->dma_addr, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + ISER_RX_SIZE, DMA_FROM_DEVICE); isert_dbg("DMA: 0x%llx, iSCSI opcode: 0x%02x, ITT: 0x%08x, flags: 0x%02x dlen: %d\n", rx_desc->dma_addr, hdr->opcode, hdr->itt, hdr->flags, @@ -1346,7 +1347,7 @@ isert_recv_done(struct ib_cq *cq, struct ib_wc *wc) read_stag, read_va, write_stag, write_va); ib_dma_sync_single_for_device(ib_dev, rx_desc->dma_addr, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + ISER_RX_SIZE, DMA_FROM_DEVICE); } static void @@ -1360,8 +1361,8 @@ isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc) return; } - ib_dma_sync_single_for_cpu(ib_dev, isert_conn->login_req_dma, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + ib_dma_sync_single_for_cpu(ib_dev, isert_conn->login_desc->dma_addr, + ISER_RX_SIZE, DMA_FROM_DEVICE); isert_conn->login_req_len = wc->byte_len - ISER_HEADERS_LEN; @@ -1376,8 +1377,8 @@ isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc) complete(&isert_conn->login_req_comp); mutex_unlock(&isert_conn->mutex); - ib_dma_sync_single_for_device(ib_dev, isert_conn->login_req_dma, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + ib_dma_sync_single_for_device(ib_dev, isert_conn->login_desc->dma_addr, + ISER_RX_SIZE, DMA_FROM_DEVICE); } static void diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index c55f7d9bfced..7fee4a65e181 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -59,9 +59,11 @@ ISERT_MAX_TX_MISC_PDUS + \ ISERT_MAX_RX_MISC_PDUS) -#define ISER_RX_PAD_SIZE (ISCSI_DEF_MAX_RECV_SEG_LEN + 4096 - \ - (ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge) + \ - sizeof(struct ib_cqe) + sizeof(bool))) +/* + * RX size is default of 8k plus headers, but data needs to align to + * 512 boundary, so use 1024 to have the extra space for alignment. + */ +#define ISER_RX_SIZE (ISCSI_DEF_MAX_RECV_SEG_LEN + 1024) /* Maximum support is 16MB I/O size */ #define ISCSI_ISER_MAX_SG_TABLESIZE 4096 @@ -81,21 +83,41 @@ enum iser_conn_state { }; struct iser_rx_desc { - struct iser_ctrl iser_header; - struct iscsi_hdr iscsi_header; - char data[ISCSI_DEF_MAX_RECV_SEG_LEN]; + char buf[ISER_RX_SIZE]; u64 dma_addr; struct ib_sge rx_sg; struct ib_cqe rx_cqe; bool in_use; - char pad[ISER_RX_PAD_SIZE]; -} __packed; +}; static inline struct iser_rx_desc *cqe_to_rx_desc(struct ib_cqe *cqe) { return container_of(cqe, struct iser_rx_desc, rx_cqe); } +static void *isert_get_iser_hdr(struct iser_rx_desc *desc) +{ + return PTR_ALIGN(desc->buf + ISER_HEADERS_LEN, 512) - ISER_HEADERS_LEN; +} + +static size_t isert_get_hdr_offset(struct iser_rx_desc *desc) +{ + return isert_get_iser_hdr(desc) - (void *)desc->buf; +} + +static void *isert_get_iscsi_hdr(struct iser_rx_desc *desc) +{ + return isert_get_iser_hdr(desc) + sizeof(struct iser_ctrl); +} + +static void *isert_get_data(struct iser_rx_desc *desc) +{ + void *data = isert_get_iser_hdr(desc) + ISER_HEADERS_LEN; + + WARN_ON((uintptr_t)data & 511); + return data; +} + struct iser_tx_desc { struct iser_ctrl iser_header; struct iscsi_hdr iscsi_header; @@ -142,9 +164,8 @@ struct isert_conn { u32 responder_resources; u32 initiator_depth; bool pi_support; - struct iser_rx_desc *login_req_buf; + struct iser_rx_desc *login_desc; char *login_rsp_buf; - u64 login_req_dma; int login_req_len; u64 login_rsp_dma; struct iser_rx_desc *rx_descs; From 3ca1a42a52ca4b4f02061683851692ad65fefac8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Sep 2020 01:27:39 -0700 Subject: [PATCH 336/648] net: qrtr: check skb_put_padto() return value If skb_put_padto() returns an error, skb has been freed. Better not touch it anymore, as reported by syzbot [1] Note to qrtr maintainers : this suggests qrtr_sendmsg() should adjust sock_alloc_send_skb() second parameter to account for the potential added alignment to avoid reallocation. [1] BUG: KASAN: use-after-free in __skb_insert include/linux/skbuff.h:1907 [inline] BUG: KASAN: use-after-free in __skb_queue_before include/linux/skbuff.h:2016 [inline] BUG: KASAN: use-after-free in __skb_queue_tail include/linux/skbuff.h:2049 [inline] BUG: KASAN: use-after-free in skb_queue_tail+0x6b/0x120 net/core/skbuff.c:3146 Write of size 8 at addr ffff88804d8ab3c0 by task syz-executor.4/4316 CPU: 1 PID: 4316 Comm: syz-executor.4 Not tainted 5.9.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1d6/0x29e lib/dump_stack.c:118 print_address_description+0x66/0x620 mm/kasan/report.c:383 __kasan_report mm/kasan/report.c:513 [inline] kasan_report+0x132/0x1d0 mm/kasan/report.c:530 __skb_insert include/linux/skbuff.h:1907 [inline] __skb_queue_before include/linux/skbuff.h:2016 [inline] __skb_queue_tail include/linux/skbuff.h:2049 [inline] skb_queue_tail+0x6b/0x120 net/core/skbuff.c:3146 qrtr_tun_send+0x1a/0x40 net/qrtr/tun.c:23 qrtr_node_enqueue+0x44f/0xc00 net/qrtr/qrtr.c:364 qrtr_bcast_enqueue+0xbe/0x140 net/qrtr/qrtr.c:861 qrtr_sendmsg+0x680/0x9c0 net/qrtr/qrtr.c:960 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg net/socket.c:671 [inline] sock_write_iter+0x317/0x470 net/socket.c:998 call_write_iter include/linux/fs.h:1882 [inline] new_sync_write fs/read_write.c:503 [inline] vfs_write+0xa96/0xd10 fs/read_write.c:578 ksys_write+0x11b/0x220 fs/read_write.c:631 do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45d5b9 Code: 5d b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 2b b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f84b5b81c78 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000038b40 RCX: 000000000045d5b9 RDX: 0000000000000055 RSI: 0000000020001240 RDI: 0000000000000003 RBP: 00007f84b5b81ca0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000000f R13: 00007ffcbbf86daf R14: 00007f84b5b829c0 R15: 000000000118cf4c Allocated by task 4316: kasan_save_stack mm/kasan/common.c:48 [inline] kasan_set_track mm/kasan/common.c:56 [inline] __kasan_kmalloc+0x100/0x130 mm/kasan/common.c:461 slab_post_alloc_hook+0x3e/0x290 mm/slab.h:518 slab_alloc mm/slab.c:3312 [inline] kmem_cache_alloc+0x1c1/0x2d0 mm/slab.c:3482 skb_clone+0x1b2/0x370 net/core/skbuff.c:1449 qrtr_bcast_enqueue+0x6d/0x140 net/qrtr/qrtr.c:857 qrtr_sendmsg+0x680/0x9c0 net/qrtr/qrtr.c:960 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg net/socket.c:671 [inline] sock_write_iter+0x317/0x470 net/socket.c:998 call_write_iter include/linux/fs.h:1882 [inline] new_sync_write fs/read_write.c:503 [inline] vfs_write+0xa96/0xd10 fs/read_write.c:578 ksys_write+0x11b/0x220 fs/read_write.c:631 do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 4316: kasan_save_stack mm/kasan/common.c:48 [inline] kasan_set_track+0x3d/0x70 mm/kasan/common.c:56 kasan_set_free_info+0x17/0x30 mm/kasan/generic.c:355 __kasan_slab_free+0xdd/0x110 mm/kasan/common.c:422 __cache_free mm/slab.c:3418 [inline] kmem_cache_free+0x82/0xf0 mm/slab.c:3693 __skb_pad+0x3f5/0x5a0 net/core/skbuff.c:1823 __skb_put_padto include/linux/skbuff.h:3233 [inline] skb_put_padto include/linux/skbuff.h:3252 [inline] qrtr_node_enqueue+0x62f/0xc00 net/qrtr/qrtr.c:360 qrtr_bcast_enqueue+0xbe/0x140 net/qrtr/qrtr.c:861 qrtr_sendmsg+0x680/0x9c0 net/qrtr/qrtr.c:960 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg net/socket.c:671 [inline] sock_write_iter+0x317/0x470 net/socket.c:998 call_write_iter include/linux/fs.h:1882 [inline] new_sync_write fs/read_write.c:503 [inline] vfs_write+0xa96/0xd10 fs/read_write.c:578 ksys_write+0x11b/0x220 fs/read_write.c:631 do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The buggy address belongs to the object at ffff88804d8ab3c0 which belongs to the cache skbuff_head_cache of size 224 The buggy address is located 0 bytes inside of 224-byte region [ffff88804d8ab3c0, ffff88804d8ab4a0) The buggy address belongs to the page: page:00000000ea8cccfb refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88804d8abb40 pfn:0x4d8ab flags: 0xfffe0000000200(slab) raw: 00fffe0000000200 ffffea0002237ec8 ffffea00029b3388 ffff88821bb66800 raw: ffff88804d8abb40 ffff88804d8ab000 000000010000000b 0000000000000000 page dumped because: kasan: bad access detected Fixes: ce57785bf91b ("net: qrtr: fix len of skb_put_padto in qrtr_node_enqueue") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Carl Huang Cc: Wen Gong Cc: Bjorn Andersson Cc: Manivannan Sadhasivam Acked-by: Manivannan Sadhasivam Reviewed-by: Bjorn Andersson Signed-off-by: David S. Miller --- net/qrtr/qrtr.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 90c558f89d46..957aa9263ba4 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -332,8 +332,7 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb, { struct qrtr_hdr_v1 *hdr; size_t len = skb->len; - int rc = -ENODEV; - int confirm_rx; + int rc, confirm_rx; confirm_rx = qrtr_tx_wait(node, to->sq_node, to->sq_port, type); if (confirm_rx < 0) { @@ -357,15 +356,17 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb, hdr->size = cpu_to_le32(len); hdr->confirm_rx = !!confirm_rx; - skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr)); - - mutex_lock(&node->ep_lock); - if (node->ep) - rc = node->ep->xmit(node->ep, skb); - else - kfree_skb(skb); - mutex_unlock(&node->ep_lock); + rc = skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr)); + if (!rc) { + mutex_lock(&node->ep_lock); + rc = -ENODEV; + if (node->ep) + rc = node->ep->xmit(node->ep, skb); + else + kfree_skb(skb); + mutex_unlock(&node->ep_lock); + } /* Need to ensure that a subsequent message carries the otherwise lost * confirm_rx flag if we dropped this one */ if (rc && confirm_rx) From 4a009cb04aeca0de60b73f37b102573354214b52 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Sep 2020 01:27:40 -0700 Subject: [PATCH 337/648] net: add __must_check to skb_put_padto() skb_put_padto() and __skb_put_padto() callers must check return values or risk use-after-free. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ed9bea924dc3..04a18e01b362 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3223,8 +3223,9 @@ static inline int skb_padto(struct sk_buff *skb, unsigned int len) * is untouched. Otherwise it is extended. Returns zero on * success. The skb is freed on error if @free_on_error is true. */ -static inline int __skb_put_padto(struct sk_buff *skb, unsigned int len, - bool free_on_error) +static inline int __must_check __skb_put_padto(struct sk_buff *skb, + unsigned int len, + bool free_on_error) { unsigned int size = skb->len; @@ -3247,7 +3248,7 @@ static inline int __skb_put_padto(struct sk_buff *skb, unsigned int len, * is untouched. Otherwise it is extended. Returns zero on * success. The skb is freed on error. */ -static inline int skb_put_padto(struct sk_buff *skb, unsigned int len) +static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int len) { return __skb_put_padto(skb, len, true); } From b87f9fe1ac9441b75656dfd95eba70ef9f0375e0 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Wed, 9 Sep 2020 17:38:21 +0800 Subject: [PATCH 338/648] hsr: avoid newline at end of message in NL_SET_ERR_MSG_MOD clean follow coccicheck warning: net//hsr/hsr_netlink.c:94:8-42: WARNING avoid newline at end of message in NL_SET_ERR_MSG_MOD net//hsr/hsr_netlink.c:87:30-57: WARNING avoid newline at end of message in NL_SET_ERR_MSG_MOD net//hsr/hsr_netlink.c:79:29-53: WARNING avoid newline at end of message in NL_SET_ERR_MSG_MOD Signed-off-by: Ye Bin Signed-off-by: David S. Miller --- net/hsr/hsr_netlink.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 06c3cd988760..0e4681cf71db 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -76,7 +76,7 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, proto = nla_get_u8(data[IFLA_HSR_PROTOCOL]); if (proto >= HSR_PROTOCOL_MAX) { - NL_SET_ERR_MSG_MOD(extack, "Unsupported protocol\n"); + NL_SET_ERR_MSG_MOD(extack, "Unsupported protocol"); return -EINVAL; } @@ -84,14 +84,14 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, proto_version = HSR_V0; } else { if (proto == HSR_PROTOCOL_PRP) { - NL_SET_ERR_MSG_MOD(extack, "PRP version unsupported\n"); + NL_SET_ERR_MSG_MOD(extack, "PRP version unsupported"); return -EINVAL; } proto_version = nla_get_u8(data[IFLA_HSR_VERSION]); if (proto_version > HSR_V1) { NL_SET_ERR_MSG_MOD(extack, - "Only HSR version 0/1 supported\n"); + "Only HSR version 0/1 supported"); return -EINVAL; } } From 9179ba31367bcf481c3c79b5f028c94faad9f30a Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 9 Sep 2020 13:58:14 +0200 Subject: [PATCH 339/648] wireguard: noise: take lock when removing handshake entry from table Eric reported that syzkaller found a race of this variety: CPU 1 CPU 2 -------------------------------------------|--------------------------------------- wg_index_hashtable_replace(old, ...) | if (hlist_unhashed(&old->index_hash)) | | wg_index_hashtable_remove(old) | hlist_del_init_rcu(&old->index_hash) | old->index_hash.pprev = NULL hlist_replace_rcu(&old->index_hash, ...) | *old->index_hash.pprev | Syzbot wasn't actually able to reproduce this more than once or create a reproducer, because the race window between checking "hlist_unhashed" and calling "hlist_replace_rcu" is just so small. Adding an mdelay(5) or similar there helps make this demonstrable using this simple script: #!/bin/bash set -ex trap 'kill $pid1; kill $pid2; ip link del wg0; ip link del wg1' EXIT ip link add wg0 type wireguard ip link add wg1 type wireguard wg set wg0 private-key <(wg genkey) listen-port 9999 wg set wg1 private-key <(wg genkey) peer $(wg show wg0 public-key) endpoint 127.0.0.1:9999 persistent-keepalive 1 wg set wg0 peer $(wg show wg1 public-key) ip link set wg0 up yes link set wg1 up | ip -force -batch - & pid1=$! yes link set wg1 down | ip -force -batch - & pid2=$! wait The fundumental underlying problem is that we permit calls to wg_index_ hashtable_remove(handshake.entry) without requiring the caller to take the handshake mutex that is intended to protect members of handshake during mutations. This is consistently the case with calls to wg_index_ hashtable_insert(handshake.entry) and wg_index_hashtable_replace( handshake.entry), but it's missing from a pertinent callsite of wg_ index_hashtable_remove(handshake.entry). So, this patch makes sure that mutex is taken. The original code was a little bit funky though, in the form of: remove(handshake.entry) lock(), memzero(handshake.some_members), unlock() remove(handshake.entry) The original intention of that double removal pattern outside the lock appears to be some attempt to prevent insertions that might happen while locks are dropped during expensive crypto operations, but actually, all callers of wg_index_hashtable_insert(handshake.entry) take the write lock and then explicitly check handshake.state, as they should, which the aforementioned memzero clears, which means an insertion should already be impossible. And regardless, the original intention was necessarily racy, since it wasn't guaranteed that something else would run after the unlock() instead of after the remove(). So, from a soundness perspective, it seems positive to remove what looks like a hack at best. The crash from both syzbot and from the script above is as follows: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 0 PID: 7395 Comm: kworker/0:3 Not tainted 5.9.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: wg-kex-wg1 wg_packet_handshake_receive_worker RIP: 0010:hlist_replace_rcu include/linux/rculist.h:505 [inline] RIP: 0010:wg_index_hashtable_replace+0x176/0x330 drivers/net/wireguard/peerlookup.c:174 Code: 00 fc ff df 48 89 f9 48 c1 e9 03 80 3c 01 00 0f 85 44 01 00 00 48 b9 00 00 00 00 00 fc ff df 48 8b 45 10 48 89 c6 48 c1 ee 03 <80> 3c 0e 00 0f 85 06 01 00 00 48 85 d2 4c 89 28 74 47 e8 a3 4f b5 RSP: 0018:ffffc90006a97bf8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff888050ffc4f8 RCX: dffffc0000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88808e04e010 RBP: ffff88808e04e000 R08: 0000000000000001 R09: ffff8880543d0000 R10: ffffed100a87a000 R11: 000000000000016e R12: ffff8880543d0000 R13: ffff88808e04e008 R14: ffff888050ffc508 R15: ffff888050ffc500 FS: 0000000000000000(0000) GS:ffff8880ae600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000f5505db0 CR3: 0000000097cf7000 CR4: 00000000001526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: wg_noise_handshake_begin_session+0x752/0xc9a drivers/net/wireguard/noise.c:820 wg_receive_handshake_packet drivers/net/wireguard/receive.c:183 [inline] wg_packet_handshake_receive_worker+0x33b/0x730 drivers/net/wireguard/receive.c:220 process_one_work+0x94c/0x1670 kernel/workqueue.c:2269 worker_thread+0x64c/0x1120 kernel/workqueue.c:2415 kthread+0x3b5/0x4a0 kernel/kthread.c:292 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294 Reported-by: syzbot Reported-by: Eric Dumazet Link: https://lore.kernel.org/wireguard/20200908145911.4090480-1-edumazet@google.com/ Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- drivers/net/wireguard/noise.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c index 3dd3b76790d0..c0cfd9b36c0b 100644 --- a/drivers/net/wireguard/noise.c +++ b/drivers/net/wireguard/noise.c @@ -87,15 +87,12 @@ static void handshake_zero(struct noise_handshake *handshake) void wg_noise_handshake_clear(struct noise_handshake *handshake) { + down_write(&handshake->lock); wg_index_hashtable_remove( handshake->entry.peer->device->index_hashtable, &handshake->entry); - down_write(&handshake->lock); handshake_zero(handshake); up_write(&handshake->lock); - wg_index_hashtable_remove( - handshake->entry.peer->device->index_hashtable, - &handshake->entry); } static struct noise_keypair *keypair_create(struct wg_peer *peer) From 6147f7b1e90ff09bd52afc8b9206a7fcd133daf7 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 9 Sep 2020 13:58:15 +0200 Subject: [PATCH 340/648] wireguard: peerlookup: take lock before checking hash in replace operation Eric's suggested fix for the previous commit's mentioned race condition was to simply take the table->lock in wg_index_hashtable_replace(). The table->lock of the hash table is supposed to protect the bucket heads, not the entires, but actually, since all the mutator functions are already taking it, it makes sense to take it too for the test to hlist_unhashed, as a defense in depth measure, so that it no longer races with deletions, regardless of what other locks are protecting individual entries. This is sensible from a performance perspective because, as Eric pointed out, the case of being unhashed is already the unlikely case, so this won't add common contention. And comparing instructions, this basically doesn't make much of a difference other than pushing and popping %r13, used by the new `bool ret`. More generally, I like the idea of locking consistency across table mutator functions, and this might let me rest slightly easier at night. Suggested-by: Eric Dumazet Link: https://lore.kernel.org/wireguard/20200908145911.4090480-1-edumazet@google.com/ Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- drivers/net/wireguard/peerlookup.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireguard/peerlookup.c b/drivers/net/wireguard/peerlookup.c index e4deb331476b..f2783aa7a88f 100644 --- a/drivers/net/wireguard/peerlookup.c +++ b/drivers/net/wireguard/peerlookup.c @@ -167,9 +167,13 @@ bool wg_index_hashtable_replace(struct index_hashtable *table, struct index_hashtable_entry *old, struct index_hashtable_entry *new) { - if (unlikely(hlist_unhashed(&old->index_hash))) - return false; + bool ret; + spin_lock_bh(&table->lock); + ret = !hlist_unhashed(&old->index_hash); + if (unlikely(!ret)) + goto out; + new->index = old->index; hlist_replace_rcu(&old->index_hash, &new->index_hash); @@ -180,8 +184,9 @@ bool wg_index_hashtable_replace(struct index_hashtable *table, * simply gets dropped, which isn't terrible. */ INIT_HLIST_NODE(&old->index_hash); +out: spin_unlock_bh(&table->lock); - return true; + return ret; } void wg_index_hashtable_remove(struct index_hashtable *table, From fcd2e4b9ca20faf6de959f67df5b454a5b055c56 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 26 Aug 2020 16:40:13 -0700 Subject: [PATCH 341/648] dt-bindings: spi: Fix spi-bcm-qspi compatible ordering The binding is currently incorrectly defining the compatible strings from least specifice to most specific instead of the converse. Re-order them from most specific (left) to least specific (right) and fix the examples as well. Fixes: 5fc78f4c842a ("spi: Broadcom BRCMSTB, NSP, NS2 SoC bindings") Reviewed-by: Rob Herring Signed-off-by: Florian Fainelli --- .../bindings/spi/brcm,spi-bcm-qspi.txt | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Documentation/devicetree/bindings/spi/brcm,spi-bcm-qspi.txt b/Documentation/devicetree/bindings/spi/brcm,spi-bcm-qspi.txt index f5e518d099f2..62d4ed2d7fd7 100644 --- a/Documentation/devicetree/bindings/spi/brcm,spi-bcm-qspi.txt +++ b/Documentation/devicetree/bindings/spi/brcm,spi-bcm-qspi.txt @@ -23,8 +23,8 @@ Required properties: - compatible: Must be one of : - "brcm,spi-bcm-qspi", "brcm,spi-brcmstb-qspi" : MSPI+BSPI on BRCMSTB SoCs - "brcm,spi-bcm-qspi", "brcm,spi-brcmstb-mspi" : Second Instance of MSPI + "brcm,spi-brcmstb-qspi", "brcm,spi-bcm-qspi" : MSPI+BSPI on BRCMSTB SoCs + "brcm,spi-brcmstb-mspi", "brcm,spi-bcm-qspi" : Second Instance of MSPI BRCMSTB SoCs "brcm,spi-bcm7425-qspi", "brcm,spi-bcm-qspi", "brcm,spi-brcmstb-mspi" : Second Instance of MSPI BRCMSTB SoCs @@ -36,8 +36,8 @@ Required properties: BRCMSTB SoCs "brcm,spi-bcm7278-qspi", "brcm,spi-bcm-qspi", "brcm,spi-brcmstb-mspi" : Second Instance of MSPI BRCMSTB SoCs - "brcm,spi-bcm-qspi", "brcm,spi-nsp-qspi" : MSPI+BSPI on Cygnus, NSP - "brcm,spi-bcm-qspi", "brcm,spi-ns2-qspi" : NS2 SoCs + "brcm,spi-nsp-qspi", "brcm,spi-bcm-qspi" : MSPI+BSPI on Cygnus, NSP + "brcm,spi-ns2-qspi", "brcm,spi-bcm-qspi" : NS2 SoCs - reg: Define the bases and ranges of the associated I/O address spaces. @@ -86,7 +86,7 @@ BRCMSTB SoC Example: spi@f03e3400 { #address-cells = <0x1>; #size-cells = <0x0>; - compatible = "brcm,spi-brcmstb-qspi", "brcm,spi-brcmstb-qspi"; + compatible = "brcm,spi-brcmstb-qspi", "brcm,spi-bcm-qspi"; reg = <0xf03e0920 0x4 0xf03e3400 0x188 0xf03e3200 0x50>; reg-names = "cs_reg", "mspi", "bspi"; interrupts = <0x6 0x5 0x4 0x3 0x2 0x1 0x0>; @@ -149,7 +149,7 @@ BRCMSTB SoC Example: #address-cells = <1>; #size-cells = <0>; clocks = <&upg_fixed>; - compatible = "brcm,spi-brcmstb-qspi", "brcm,spi-brcmstb-mspi"; + compatible = "brcm,spi-brcmstb-mspi", "brcm,spi-bcm-qspi"; reg = <0xf0416000 0x180>; reg-names = "mspi"; interrupts = <0x14>; @@ -160,7 +160,7 @@ BRCMSTB SoC Example: iProc SoC Example: qspi: spi@18027200 { - compatible = "brcm,spi-bcm-qspi", "brcm,spi-nsp-qspi"; + compatible = "brcm,spi-nsp-qspi", "brcm,spi-bcm-qspi"; reg = <0x18027200 0x184>, <0x18027000 0x124>, <0x1811c408 0x004>, @@ -191,7 +191,7 @@ iProc SoC Example: NS2 SoC Example: qspi: spi@66470200 { - compatible = "brcm,spi-bcm-qspi", "brcm,spi-ns2-qspi"; + compatible = "brcm,spi-ns2-qspi", "brcm,spi-bcm-qspi"; reg = <0x66470200 0x184>, <0x66470000 0x124>, <0x67017408 0x004>, From d663186293a818af97c648624bee6c7a59e8218b Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 26 Aug 2020 16:43:41 -0700 Subject: [PATCH 342/648] ARM: dts: bcm: HR2: Fixed QSPI compatible string The string was incorrectly defined before from least to most specific, swap the compatible strings accordingly. Fixes: b9099ec754b5 ("ARM: dts: Add Broadcom Hurricane 2 DTS include file") Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm-hr2.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/bcm-hr2.dtsi b/arch/arm/boot/dts/bcm-hr2.dtsi index cbebed5f050e..e8df458aad39 100644 --- a/arch/arm/boot/dts/bcm-hr2.dtsi +++ b/arch/arm/boot/dts/bcm-hr2.dtsi @@ -217,7 +217,7 @@ rng: rng@33000 { }; qspi: spi@27200 { - compatible = "brcm,spi-bcm-qspi", "brcm,spi-nsp-qspi"; + compatible = "brcm,spi-nsp-qspi", "brcm,spi-bcm-qspi"; reg = <0x027200 0x184>, <0x027000 0x124>, <0x11c408 0x004>, From d1ecc40a954fd0f5e3789b91fa80f15e82284e39 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 26 Aug 2020 16:44:25 -0700 Subject: [PATCH 343/648] ARM: dts: NSP: Fixed QSPI compatible string The string was incorrectly defined before from least to most specific, swap the compatible strings accordingly. Fixes: 329f98c1974e ("ARM: dts: NSP: Add QSPI nodes to NSPI and bcm958625k DTSes") Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm-nsp.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi index 0346ea621f0f..c846fa3c244d 100644 --- a/arch/arm/boot/dts/bcm-nsp.dtsi +++ b/arch/arm/boot/dts/bcm-nsp.dtsi @@ -284,7 +284,7 @@ nand: nand@26000 { }; qspi: spi@27200 { - compatible = "brcm,spi-bcm-qspi", "brcm,spi-nsp-qspi"; + compatible = "brcm,spi-nsp-qspi", "brcm,spi-bcm-qspi"; reg = <0x027200 0x184>, <0x027000 0x124>, <0x11c408 0x004>, From b793dab8d811e103665d6bddaaea1c25db3776eb Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 26 Aug 2020 16:45:29 -0700 Subject: [PATCH 344/648] ARM: dts: BCM5301X: Fixed QSPI compatible string The string was incorrectly defined before from least to most specific, swap the compatible strings accordingly. Fixes: 1c8f40650723 ("ARM: dts: BCM5301X: convert to iProc QSPI") Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm5301x.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi index 2d9b4dd05830..0016720ce530 100644 --- a/arch/arm/boot/dts/bcm5301x.dtsi +++ b/arch/arm/boot/dts/bcm5301x.dtsi @@ -488,7 +488,7 @@ nand: nand@18028000 { }; spi@18029200 { - compatible = "brcm,spi-bcm-qspi", "brcm,spi-nsp-qspi"; + compatible = "brcm,spi-nsp-qspi", "brcm,spi-bcm-qspi"; reg = <0x18029200 0x184>, <0x18029000 0x124>, <0x1811b408 0x004>, From 686e0a0c8c61e0e3f55321d0181fece3efd92777 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 26 Aug 2020 16:49:19 -0700 Subject: [PATCH 345/648] arm64: dts: ns2: Fixed QSPI compatible string The string was incorrectly defined before from least to most specific, swap the compatible strings accordingly. Fixes: ff73917d38a6 ("ARM64: dts: Add QSPI Device Tree node for NS2") Signed-off-by: Florian Fainelli --- arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi index 15f7b0ed3836..39802066232e 100644 --- a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi @@ -745,7 +745,7 @@ nand: nand@66460000 { }; qspi: spi@66470200 { - compatible = "brcm,spi-bcm-qspi", "brcm,spi-ns2-qspi"; + compatible = "brcm,spi-ns2-qspi", "brcm,spi-bcm-qspi"; reg = <0x66470200 0x184>, <0x66470000 0x124>, <0x67017408 0x004>, From 2d2fe8433796603091ac8ea235b9165ac5a85f9a Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Wed, 9 Sep 2020 20:43:08 +0300 Subject: [PATCH 346/648] net: qed: Disable aRFS for NPAR and 100G In CMT and NPAR the PF is unknown when the GFS block processes the packet. Therefore cannot use searcher as it has a per PF database, and thus ARFS must be disabled. Fixes: d51e4af5c209 ("qed: aRFS infrastructure support") Signed-off-by: Manish Chopra Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 11 ++++++++++- drivers/net/ethernet/qlogic/qed/qed_l2.c | 3 +++ drivers/net/ethernet/qlogic/qed/qed_main.c | 2 ++ include/linux/qed/qed_if.h | 1 + 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index b8f076e4e6b8..3db181f3617a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -4253,7 +4253,8 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) cdev->mf_bits = BIT(QED_MF_LLH_MAC_CLSS) | BIT(QED_MF_LLH_PROTO_CLSS) | BIT(QED_MF_LL2_NON_UNICAST) | - BIT(QED_MF_INTER_PF_SWITCH); + BIT(QED_MF_INTER_PF_SWITCH) | + BIT(QED_MF_DISABLE_ARFS); break; case NVM_CFG1_GLOB_MF_MODE_DEFAULT: cdev->mf_bits = BIT(QED_MF_LLH_MAC_CLSS) | @@ -4266,6 +4267,14 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) DP_INFO(p_hwfn, "Multi function mode is 0x%lx\n", cdev->mf_bits); + + /* In CMT the PF is unknown when the GFS block processes the + * packet. Therefore cannot use searcher as it has a per PF + * database, and thus ARFS must be disabled. + * + */ + if (QED_IS_CMT(cdev)) + cdev->mf_bits |= BIT(QED_MF_DISABLE_ARFS); } DP_INFO(p_hwfn, "Multi function mode is 0x%lx\n", diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 4c6ac8862744..07824bf9d68d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -1980,6 +1980,9 @@ void qed_arfs_mode_configure(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct qed_arfs_config_params *p_cfg_params) { + if (test_bit(QED_MF_DISABLE_ARFS, &p_hwfn->cdev->mf_bits)) + return; + if (p_cfg_params->mode != QED_FILTER_CONFIG_MODE_DISABLE) { qed_gft_config(p_hwfn, p_ptt, p_hwfn->rel_pf_id, p_cfg_params->tcp, diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index f39f629242a1..50e5eb22e60a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -444,6 +444,8 @@ int qed_fill_dev_info(struct qed_dev *cdev, dev_info->fw_eng = FW_ENGINEERING_VERSION; dev_info->b_inter_pf_switch = test_bit(QED_MF_INTER_PF_SWITCH, &cdev->mf_bits); + if (!test_bit(QED_MF_DISABLE_ARFS, &cdev->mf_bits)) + dev_info->b_arfs_capable = true; dev_info->tx_switching = true; if (hw_info->b_wol_support == QED_WOL_SUPPORT_PME) diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index cd6a5c7e56eb..cdd73afc4c46 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -623,6 +623,7 @@ struct qed_dev_info { #define QED_MFW_VERSION_3_OFFSET 24 u32 flash_size; + bool b_arfs_capable; bool b_inter_pf_switch; bool tx_switching; bool rdma_supported; From 0367f05885b9f21d062447bd2ba1302ba3cc7392 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Wed, 9 Sep 2020 20:43:09 +0300 Subject: [PATCH 347/648] net: qede: Disable aRFS for NPAR and 100G In some configurations ARFS cannot be used, so disable it if device is not capable. Fixes: e4917d46a653 ("qede: Add aRFS support") Signed-off-by: Manish Chopra Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 3 +++ drivers/net/ethernet/qlogic/qede/qede_main.c | 11 +++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index f961f65d9372..c59b72c90293 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -311,6 +311,9 @@ int qede_alloc_arfs(struct qede_dev *edev) { int i; + if (!edev->dev_info.common.b_arfs_capable) + return -EINVAL; + edev->arfs = vzalloc(sizeof(*edev->arfs)); if (!edev->arfs) return -ENOMEM; diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 140a392a81bb..9e1f41ba766c 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -804,7 +804,7 @@ static void qede_init_ndev(struct qede_dev *edev) NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_HW_TC; - if (!IS_VF(edev) && edev->dev_info.common.num_hwfns == 1) + if (edev->dev_info.common.b_arfs_capable) hw_features |= NETIF_F_NTUPLE; if (edev->dev_info.common.vxlan_enable || @@ -2274,7 +2274,7 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode, qede_vlan_mark_nonconfigured(edev); edev->ops->fastpath_stop(edev->cdev); - if (!IS_VF(edev) && edev->dev_info.common.num_hwfns == 1) { + if (edev->dev_info.common.b_arfs_capable) { qede_poll_for_freeing_arfs_filters(edev); qede_free_arfs(edev); } @@ -2341,10 +2341,9 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode, if (rc) goto err2; - if (!IS_VF(edev) && edev->dev_info.common.num_hwfns == 1) { - rc = qede_alloc_arfs(edev); - if (rc) - DP_NOTICE(edev, "aRFS memory allocation failed\n"); + if (qede_alloc_arfs(edev)) { + edev->ndev->features &= ~NETIF_F_NTUPLE; + edev->dev_info.common.b_arfs_capable = false; } qede_napi_add_enable(edev); From ce1cf9e5025f4e2d2198728391f1847b3e168bc6 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Wed, 9 Sep 2020 20:43:10 +0300 Subject: [PATCH 348/648] net: qed: RDMA personality shouldn't fail VF load Fix the assert during VF driver installation when the personality is iWARP Fixes: 1fe614d10f45 ("qed: Relax VF firmware requirements") Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_sriov.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index f1f75b6d0421..b8dc5c4591ef 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -71,6 +71,7 @@ static int qed_sp_vf_start(struct qed_hwfn *p_hwfn, struct qed_vf_info *p_vf) p_ramrod->personality = PERSONALITY_ETH; break; case QED_PCI_ETH_ROCE: + case QED_PCI_ETH_IWARP: p_ramrod->personality = PERSONALITY_RDMA_AND_ETH; break; default: From e1e1b5356eb48dce4307f5cae10e4d6d5bd3df74 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Thu, 13 Aug 2020 13:26:38 +0200 Subject: [PATCH 349/648] i40e: fix return of uninitialized aq_ret in i40e_set_vsi_promisc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c: In function ‘i40e_set_vsi_promisc’: drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c:1176:14: error: ‘aq_ret’ may be used uninitialized in this function [-Werror=maybe-uninitialized] i40e_status aq_ret; In case the code inside the if statement and the for loop does not get executed aq_ret will be uninitialized when the variable gets returned at the end of the function. Avoid this by changing num_vlans from int to u16, so aq_ret always gets set. Making this change in additional places as num_vlans should never be negative. Fixes: 37d318d7805f ("i40e: Remove scheduling while atomic possibility") Signed-off-by: Stefan Assmann Acked-by: Jakub Kicinski Tested-by: Aaron Brown Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 8e133d6545bd..5defcb777e92 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1115,7 +1115,7 @@ static int i40e_quiesce_vf_pci(struct i40e_vf *vf) static int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi) { struct i40e_mac_filter *f; - int num_vlans = 0, bkt; + u16 num_vlans = 0, bkt; hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) { if (f->vlan >= 0 && f->vlan <= I40E_MAX_VLANID) @@ -1134,8 +1134,8 @@ static int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi) * * Called to get number of VLANs and VLAN list present in mac_filter_hash. **/ -static void i40e_get_vlan_list_sync(struct i40e_vsi *vsi, int *num_vlans, - s16 **vlan_list) +static void i40e_get_vlan_list_sync(struct i40e_vsi *vsi, u16 *num_vlans, + s16 **vlan_list) { struct i40e_mac_filter *f; int i = 0; @@ -1169,7 +1169,7 @@ static void i40e_get_vlan_list_sync(struct i40e_vsi *vsi, int *num_vlans, **/ static i40e_status i40e_set_vsi_promisc(struct i40e_vf *vf, u16 seid, bool multi_enable, - bool unicast_enable, s16 *vl, int num_vlans) + bool unicast_enable, s16 *vl, u16 num_vlans) { struct i40e_pf *pf = vf->pf; struct i40e_hw *hw = &pf->hw; @@ -1258,7 +1258,7 @@ static i40e_status i40e_config_vf_promiscuous_mode(struct i40e_vf *vf, i40e_status aq_ret = I40E_SUCCESS; struct i40e_pf *pf = vf->pf; struct i40e_vsi *vsi; - int num_vlans; + u16 num_vlans; s16 *vl; vsi = i40e_find_vsi_from_id(pf, vsi_id); From b6f23d3817b965bcd6d72aab1f438ff6d16a0691 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Thu, 20 Aug 2020 13:53:12 +0200 Subject: [PATCH 350/648] i40e: always propagate error value in i40e_set_vsi_promisc() The for loop in i40e_set_vsi_promisc() reports errors via dev_err() but does not propagate the error up the call chain. Instead it continues the loop and potentially overwrites the reported error value. This results in the error being recorded in the log buffer, but the caller might never know anything went the wrong way. To avoid this situation i40e_set_vsi_promisc() needs to temporarily store the error after reporting it. This is still not optimal as multiple different errors may occur, so store the first error and hope that's the main issue. Fixes: 37d318d7805f (i40e: Remove scheduling while atomic possibility) Reported-by: Michal Schmidt Signed-off-by: Stefan Assmann Tested-by: Aaron Brown Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 5defcb777e92..47bfb2e95e2d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1171,9 +1171,9 @@ static i40e_status i40e_set_vsi_promisc(struct i40e_vf *vf, u16 seid, bool multi_enable, bool unicast_enable, s16 *vl, u16 num_vlans) { + i40e_status aq_ret, aq_tmp = 0; struct i40e_pf *pf = vf->pf; struct i40e_hw *hw = &pf->hw; - i40e_status aq_ret; int i; /* No VLAN to set promisc on, set on VSI */ @@ -1222,6 +1222,9 @@ i40e_set_vsi_promisc(struct i40e_vf *vf, u16 seid, bool multi_enable, vf->vf_id, i40e_stat_str(&pf->hw, aq_ret), i40e_aq_str(&pf->hw, aq_err)); + + if (!aq_tmp) + aq_tmp = aq_ret; } aq_ret = i40e_aq_set_vsi_uc_promisc_on_vlan(hw, seid, @@ -1235,8 +1238,15 @@ i40e_set_vsi_promisc(struct i40e_vf *vf, u16 seid, bool multi_enable, vf->vf_id, i40e_stat_str(&pf->hw, aq_ret), i40e_aq_str(&pf->hw, aq_err)); + + if (!aq_tmp) + aq_tmp = aq_ret; } } + + if (aq_tmp) + aq_ret = aq_tmp; + return aq_ret; } From f03369b9bfab8e23ac28ca7ab7e6631c374b7cbe Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Tue, 18 Aug 2020 16:40:01 -0700 Subject: [PATCH 351/648] igc: Fix wrong timestamp latency numbers The previous timestamping latency numbers were obtained by interpolating the i210 numbers with the i225 crystal clock value. That calculation was wrong. Use the correct values from real measurements. Fixes: 81b055205e8b ("igc: Add support for RX timestamping") Signed-off-by: Vinicius Costa Gomes Tested-by: Aaron Brown Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 3070dfdb7eb4..2d566f3c827b 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -299,18 +299,14 @@ extern char igc_driver_name[]; #define IGC_RX_HDR_LEN IGC_RXBUFFER_256 /* Transmit and receive latency (for PTP timestamps) */ -/* FIXME: These values were estimated using the ones that i225 has as - * basis, they seem to provide good numbers with ptp4l/phc2sys, but we - * need to confirm them. - */ -#define IGC_I225_TX_LATENCY_10 9542 -#define IGC_I225_TX_LATENCY_100 1024 -#define IGC_I225_TX_LATENCY_1000 178 -#define IGC_I225_TX_LATENCY_2500 64 -#define IGC_I225_RX_LATENCY_10 20662 -#define IGC_I225_RX_LATENCY_100 2213 -#define IGC_I225_RX_LATENCY_1000 448 -#define IGC_I225_RX_LATENCY_2500 160 +#define IGC_I225_TX_LATENCY_10 240 +#define IGC_I225_TX_LATENCY_100 58 +#define IGC_I225_TX_LATENCY_1000 80 +#define IGC_I225_TX_LATENCY_2500 1325 +#define IGC_I225_RX_LATENCY_10 6450 +#define IGC_I225_RX_LATENCY_100 185 +#define IGC_I225_RX_LATENCY_1000 300 +#define IGC_I225_RX_LATENCY_2500 1485 /* RX and TX descriptor control thresholds. * PTHRESH - MAC will consider prefetch if it has fewer than this number of From 4406e977a4a1e997818b6d77c3218ef8d15b2abf Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Tue, 18 Aug 2020 16:40:02 -0700 Subject: [PATCH 352/648] igc: Fix not considering the TX delay for timestamps When timestamping a packet there's a delay between the start of the packet and the point where the hardware actually captures the timestamp. This difference needs to be considered if we want accurate timestamps. This was done on the RX side, but not on the TX side. Fixes: 2c344ae24501 ("igc: Add support for TX timestamping") Signed-off-by: Vinicius Costa Gomes Tested-by: Aaron Brown Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ptp.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 36c999250fcc..6a9b5102aa55 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -364,6 +364,7 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) struct sk_buff *skb = adapter->ptp_tx_skb; struct skb_shared_hwtstamps shhwtstamps; struct igc_hw *hw = &adapter->hw; + int adjust = 0; u64 regval; if (WARN_ON_ONCE(!skb)) @@ -373,6 +374,24 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) regval |= (u64)rd32(IGC_TXSTMPH) << 32; igc_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); + switch (adapter->link_speed) { + case SPEED_10: + adjust = IGC_I225_TX_LATENCY_10; + break; + case SPEED_100: + adjust = IGC_I225_TX_LATENCY_100; + break; + case SPEED_1000: + adjust = IGC_I225_TX_LATENCY_1000; + break; + case SPEED_2500: + adjust = IGC_I225_TX_LATENCY_2500; + break; + } + + shhwtstamps.hwtstamp = + ktime_add_ns(shhwtstamps.hwtstamp, adjust); + /* Clear the lock early before calling skb_tstamp_tx so that * applications are not woken up before the lock bit is clear. We use * a copy of the skb pointer to ensure other threads can't change it From 244359c99fd90f1c61c3944f93250f8219435c75 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 5 Sep 2020 15:58:36 +0300 Subject: [PATCH 353/648] scsi: libsas: Fix error path in sas_notify_lldd_dev_found() In sas_notify_lldd_dev_found(), if we can't allocate the necessary resources, then it seems like the wrong thing to mark the device as found and to increment the reference count. None of the callers ever drop the reference in that situation. [mkp: tweaked commit desc based on feedback from John] Link: https://lore.kernel.org/r/20200905125836.GF183976@mwanda Fixes: 735f7d2fedf5 ("[SCSI] libsas: fix domain_device leak") Reviewed-by: Jason Yan Acked-by: John Garry Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_discover.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c index daf951b0b3f5..13ad2b3d314e 100644 --- a/drivers/scsi/libsas/sas_discover.c +++ b/drivers/scsi/libsas/sas_discover.c @@ -182,10 +182,11 @@ int sas_notify_lldd_dev_found(struct domain_device *dev) pr_warn("driver on host %s cannot handle device %016llx, error:%d\n", dev_name(sas_ha->dev), SAS_ADDR(dev->sas_addr), res); + return res; } set_bit(SAS_DEV_FOUND, &dev->state); kref_get(&dev->kref); - return res; + return 0; } From 73a203b0dc56d2e8d4facc3820b0fcab43a300e1 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 3 Sep 2020 10:26:36 +0200 Subject: [PATCH 354/648] clk: bcm: dvp: Select the reset framework The DVP driver depends both on the RESET_SIMPLE driver but also on the reset framework itself. Let's make sure we have it enabled. Fixes: 1bc95972715a ("clk: bcm: Add BCM2711 DVP driver") Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20200903082636.3844629-1-maxime@cerno.tech Acked-by: Nicolas Saenz Julienne Signed-off-by: Stephen Boyd --- drivers/clk/bcm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/bcm/Kconfig b/drivers/clk/bcm/Kconfig index 784f12c72365..ec738f74a026 100644 --- a/drivers/clk/bcm/Kconfig +++ b/drivers/clk/bcm/Kconfig @@ -5,6 +5,7 @@ config CLK_BCM2711_DVP depends on ARCH_BCM2835 ||COMPILE_TEST depends on COMMON_CLK default ARCH_BCM2835 + select RESET_CONTROLLER select RESET_SIMPLE help Enable common clock framework support for the Broadcom BCM2711 From da9c43dc0e2ec5c42a3d414e389feb30467000e2 Mon Sep 17 00:00:00 2001 From: Sumera Priyadarsini Date: Sat, 29 Aug 2020 23:27:04 +0530 Subject: [PATCH 355/648] clk: versatile: Add of_node_put() before return statement Every iteration of for_each_available_child_of_node() decrements the reference count of the previous node, however when control is transferred from the middle of the loop, as in the case of a return or break or goto, there is no decrement thus ultimately resulting in a memory leak. Fix a potential memory leak in clk-impd1.c by inserting of_node_put() before a return statement. Issue found with Coccinelle. Signed-off-by: Sumera Priyadarsini Link: https://lore.kernel.org/r/20200829175704.GA10998@Kaladin Reviewed-by: Linus Walleij Signed-off-by: Stephen Boyd --- drivers/clk/versatile/clk-impd1.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/clk/versatile/clk-impd1.c b/drivers/clk/versatile/clk-impd1.c index ca798249544d..85c395df9c00 100644 --- a/drivers/clk/versatile/clk-impd1.c +++ b/drivers/clk/versatile/clk-impd1.c @@ -109,8 +109,10 @@ static int integrator_impd1_clk_probe(struct platform_device *pdev) for_each_available_child_of_node(np, child) { ret = integrator_impd1_clk_spawn(dev, np, child); - if (ret) + if (ret) { + of_node_put(child); break; + } } return ret; From 46cf789b68b25744be3dc99efd4afce4a5381b5c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 10 Sep 2020 08:40:13 -0700 Subject: [PATCH 356/648] connector: Move maintainence under networking drivers umbrella. Evgeniy does not have the time nor capacity to maintain the connector subsystem any longer, so just move it under networking as that is effectively what has been happening lately. Signed-off-by: David S. Miller --- MAINTAINERS | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 12be7ae4d989..2783a5f68d2c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4407,12 +4407,6 @@ T: git git://git.infradead.org/users/hch/configfs.git F: fs/configfs/ F: include/linux/configfs.h -CONNECTOR -M: Evgeniy Polyakov -L: netdev@vger.kernel.org -S: Maintained -F: drivers/connector/ - CONSOLE SUBSYSTEM M: Greg Kroah-Hartman S: Supported @@ -12046,6 +12040,7 @@ Q: http://patchwork.ozlabs.org/project/netdev/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git F: Documentation/devicetree/bindings/net/ +F: drivers/connector/ F: drivers/net/ F: include/linux/etherdevice.h F: include/linux/fcdevice.h From baaabecfc80fad255f866563b53b8c7a3eec176e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 9 Sep 2020 15:53:54 -0700 Subject: [PATCH 357/648] test_firmware: Test platform fw loading on non-EFI systems On non-EFI systems, it wasn't possible to test the platform firmware loader because it will have never set "checked_fw" during __init. Instead, allow the test code to override this check. Additionally split the declarations into a private symbol namespace so there is greater enforcement of the symbol visibility. Fixes: 548193cba2a7 ("test_firmware: add support for firmware_request_platform") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20200909225354.3118328-1-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/embedded-firmware.c | 10 +++++----- include/linux/efi_embedded_fw.h | 6 ++---- lib/test_firmware.c | 9 +++++++++ 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/firmware/efi/embedded-firmware.c b/drivers/firmware/efi/embedded-firmware.c index e97a9c9d010c..21ae0c48232a 100644 --- a/drivers/firmware/efi/embedded-firmware.c +++ b/drivers/firmware/efi/embedded-firmware.c @@ -16,9 +16,9 @@ /* Exported for use by lib/test_firmware.c only */ LIST_HEAD(efi_embedded_fw_list); -EXPORT_SYMBOL_GPL(efi_embedded_fw_list); - -static bool checked_for_fw; +EXPORT_SYMBOL_NS_GPL(efi_embedded_fw_list, TEST_FIRMWARE); +bool efi_embedded_fw_checked; +EXPORT_SYMBOL_NS_GPL(efi_embedded_fw_checked, TEST_FIRMWARE); static const struct dmi_system_id * const embedded_fw_table[] = { #ifdef CONFIG_TOUCHSCREEN_DMI @@ -116,14 +116,14 @@ void __init efi_check_for_embedded_firmwares(void) } } - checked_for_fw = true; + efi_embedded_fw_checked = true; } int efi_get_embedded_fw(const char *name, const u8 **data, size_t *size) { struct efi_embedded_fw *iter, *fw = NULL; - if (!checked_for_fw) { + if (!efi_embedded_fw_checked) { pr_warn("Warning %s called while we did not check for embedded fw\n", __func__); return -ENOENT; diff --git a/include/linux/efi_embedded_fw.h b/include/linux/efi_embedded_fw.h index 57eac5241303..a97a12bb2c9e 100644 --- a/include/linux/efi_embedded_fw.h +++ b/include/linux/efi_embedded_fw.h @@ -8,8 +8,8 @@ #define EFI_EMBEDDED_FW_PREFIX_LEN 8 /* - * This struct and efi_embedded_fw_list are private to the efi-embedded fw - * implementation they are in this header for use by lib/test_firmware.c only! + * This struct is private to the efi-embedded fw implementation. + * They are in this header for use by lib/test_firmware.c only! */ struct efi_embedded_fw { struct list_head list; @@ -18,8 +18,6 @@ struct efi_embedded_fw { size_t length; }; -extern struct list_head efi_embedded_fw_list; - /** * struct efi_embedded_fw_desc - This struct is used by the EFI embedded-fw * code to search for embedded firmwares. diff --git a/lib/test_firmware.c b/lib/test_firmware.c index 9fee2b93a8d1..06c955057756 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -26,6 +26,8 @@ #include #include +MODULE_IMPORT_NS(TEST_FIRMWARE); + #define TEST_FIRMWARE_NAME "test-firmware.bin" #define TEST_FIRMWARE_NUM_REQS 4 #define TEST_FIRMWARE_BUF_SIZE SZ_1K @@ -489,6 +491,9 @@ static ssize_t trigger_request_store(struct device *dev, static DEVICE_ATTR_WO(trigger_request); #ifdef CONFIG_EFI_EMBEDDED_FIRMWARE +extern struct list_head efi_embedded_fw_list; +extern bool efi_embedded_fw_checked; + static ssize_t trigger_request_platform_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -501,6 +506,7 @@ static ssize_t trigger_request_platform_store(struct device *dev, }; struct efi_embedded_fw efi_embedded_fw; const struct firmware *firmware = NULL; + bool saved_efi_embedded_fw_checked; char *name; int rc; @@ -513,6 +519,8 @@ static ssize_t trigger_request_platform_store(struct device *dev, efi_embedded_fw.data = (void *)test_data; efi_embedded_fw.length = sizeof(test_data); list_add(&efi_embedded_fw.list, &efi_embedded_fw_list); + saved_efi_embedded_fw_checked = efi_embedded_fw_checked; + efi_embedded_fw_checked = true; pr_info("loading '%s'\n", name); rc = firmware_request_platform(&firmware, name, dev); @@ -530,6 +538,7 @@ static ssize_t trigger_request_platform_store(struct device *dev, rc = count; out: + efi_embedded_fw_checked = saved_efi_embedded_fw_checked; release_firmware(firmware); list_del(&efi_embedded_fw.list); kfree(name); From 7f6e1f3072b6842b2491b2cce28360e8cfea12ad Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 10 Sep 2020 18:42:38 +0200 Subject: [PATCH 358/648] Revert "dyndbg: fix problem parsing format="foo bar"" This reverts commit 42f07816ac0cc797928119cc039c414ae2b95d34 as it still causes problems. It will be resolved later, let's revert it so we can also revert the original patch this was supposed to be helping with. Reported-by: Naresh Kamboju Fixes: 42f07816ac0c ("dyndbg: fix problem parsing format="foo bar"") Cc: Jim Cromie Cc: Stephen Rothwell Signed-off-by: Greg Kroah-Hartman --- lib/dynamic_debug.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 04f4c80b0d16..08e4b057514c 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -237,7 +237,6 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords) { int nwords = 0; - vpr_info("entry, buf:'%s'\n", buf); while (*buf) { char *end; @@ -248,8 +247,6 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords) if (*buf == '#') break; /* token starts comment, skip rest of line */ - vpr_info("start-of-word:%d '%s'\n", nwords, buf); - /* find `end' of word, whitespace separated or quoted */ if (*buf == '"' || *buf == '\'') { int quote = *buf++; @@ -260,9 +257,7 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords) return -EINVAL; /* unclosed quote */ } } else { - for (end = buf; - *end && *end != '=' && !isspace(*end); - end++) + for (end = buf; *end && !isspace(*end); end++) ; BUG_ON(end == buf); } @@ -378,21 +373,30 @@ static int ddebug_parse_query(char *words[], int nwords, unsigned int i; int rc = 0; char *fline; + char *keyword, *arg; - if (nwords % 2 != 0) { - pr_err("expecting pairs of match-spec \n"); - return -EINVAL; - } - if (modname) { + if (modname) /* support $modname.dyndbg= */ - vpr_info("module:%s queries:'%s'\n", modname); query->module = modname; - } - for (i = 0; i < nwords; i += 2) { - char *keyword = words[i]; - char *arg = words[i+1]; - vpr_info("keyword:'%s' value:'%s'\n", keyword, arg); + for (i = 0; i < nwords; i++) { + /* accept keyword=arg */ + vpr_info("%d w:%s\n", i, words[i]); + + keyword = words[i]; + arg = strchr(keyword, '='); + if (arg) { + *arg++ = '\0'; + } else { + i++; /* next word is arg */ + if (!(i < nwords)) { + pr_err("missing arg to keyword: %s\n", keyword); + return -EINVAL; + } + arg = words[i]; + } + vpr_info("%d key:%s arg:%s\n", i, keyword, arg); + if (!strcmp(keyword, "func")) { rc = check_set(&query->function, arg, "func"); } else if (!strcmp(keyword, "file")) { From 952e934d7f682a961c92eb9bbd521a4876e201fe Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 10 Sep 2020 18:45:03 +0200 Subject: [PATCH 359/648] Revert "dyndbg: accept query terms like file=bar and module=foo" This reverts commit 14775b04964264189caa4a0862eac05dab8c0502 as there were still some parsing problems with it, and the follow-on patch for it. Let's revisit it later, just drop it for now. Cc: Cc: Jim Cromie Reported-by: Naresh Kamboju Cc: Stephen Rothwell Fixes: 14775b049642 ("dyndbg: accept query terms like file=bar and module=foo") Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/dynamic-debug-howto.rst | 1 - lib/dynamic_debug.c | 53 +++++++------------ 2 files changed, 20 insertions(+), 34 deletions(-) diff --git a/Documentation/admin-guide/dynamic-debug-howto.rst b/Documentation/admin-guide/dynamic-debug-howto.rst index e5a8def45f3f..6c04aea8f4cd 100644 --- a/Documentation/admin-guide/dynamic-debug-howto.rst +++ b/Documentation/admin-guide/dynamic-debug-howto.rst @@ -156,7 +156,6 @@ against. Possible keywords are::: ``line-range`` cannot contain space, e.g. "1-30" is valid range but "1 - 30" is not. - ``module=foo`` combined keyword=value form is interchangably accepted The meanings of each keyword are: diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 08e4b057514c..2d4dfd44b0fa 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -353,8 +353,7 @@ static int check_set(const char **dest, char *src, char *name) /* * Parse words[] as a ddebug query specification, which is a series - * of (keyword, value) pairs or combined keyword=value terms, - * chosen from these possibilities: + * of (keyword, value) pairs chosen from these possibilities: * * func * file @@ -373,34 +372,22 @@ static int ddebug_parse_query(char *words[], int nwords, unsigned int i; int rc = 0; char *fline; - char *keyword, *arg; + + /* check we have an even number of words */ + if (nwords % 2 != 0) { + pr_err("expecting pairs of match-spec \n"); + return -EINVAL; + } if (modname) /* support $modname.dyndbg= */ query->module = modname; - for (i = 0; i < nwords; i++) { - /* accept keyword=arg */ - vpr_info("%d w:%s\n", i, words[i]); - - keyword = words[i]; - arg = strchr(keyword, '='); - if (arg) { - *arg++ = '\0'; - } else { - i++; /* next word is arg */ - if (!(i < nwords)) { - pr_err("missing arg to keyword: %s\n", keyword); - return -EINVAL; - } - arg = words[i]; - } - vpr_info("%d key:%s arg:%s\n", i, keyword, arg); - - if (!strcmp(keyword, "func")) { - rc = check_set(&query->function, arg, "func"); - } else if (!strcmp(keyword, "file")) { - if (check_set(&query->filename, arg, "file")) + for (i = 0; i < nwords; i += 2) { + if (!strcmp(words[i], "func")) { + rc = check_set(&query->function, words[i+1], "func"); + } else if (!strcmp(words[i], "file")) { + if (check_set(&query->filename, words[i+1], "file")) return -EINVAL; /* tail :$info is function or line-range */ @@ -416,18 +403,18 @@ static int ddebug_parse_query(char *words[], int nwords, if (parse_linerange(query, fline)) return -EINVAL; } - } else if (!strcmp(keyword, "module")) { - rc = check_set(&query->module, arg, "module"); - } else if (!strcmp(keyword, "format")) { - string_unescape_inplace(arg, UNESCAPE_SPACE | + } else if (!strcmp(words[i], "module")) { + rc = check_set(&query->module, words[i+1], "module"); + } else if (!strcmp(words[i], "format")) { + string_unescape_inplace(words[i+1], UNESCAPE_SPACE | UNESCAPE_OCTAL | UNESCAPE_SPECIAL); - rc = check_set(&query->format, arg, "format"); - } else if (!strcmp(keyword, "line")) { - if (parse_linerange(query, arg)) + rc = check_set(&query->format, words[i+1], "format"); + } else if (!strcmp(words[i], "line")) { + if (parse_linerange(query, words[i+1])) return -EINVAL; } else { - pr_err("unknown keyword \"%s\"\n", keyword); + pr_err("unknown keyword \"%s\"\n", words[i]); return -EINVAL; } if (rc) From 57a2fb068a9513bf0fe51a1f2057235423330709 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Thu, 10 Sep 2020 15:48:58 +0800 Subject: [PATCH 360/648] powercap/intel_rapl: add support for TigerLake Desktop Add intel_rapl support for the TigerLake desktop platform. Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 6f55aaef8afc..60dbe0e43dd4 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -1035,6 +1035,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server), X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &rapl_defaults_byt), From 64e5f367155fe64854a0555bfa809af45f6e7e39 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Thu, 10 Sep 2020 15:49:11 +0800 Subject: [PATCH 361/648] powercap/intel_rapl: add support for RocketLake Add intel_rapl support for the RocketLake platform. Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 60dbe0e43dd4..55a219e968b3 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -1036,6 +1036,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server), X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &rapl_defaults_byt), From ba92a4201167d945ccdc5a84e6a0994f7ab71870 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Thu, 10 Sep 2020 15:49:21 +0800 Subject: [PATCH 362/648] powercap/intel_rapl: add support for AlderLake Add intel_rapl support for the AlderLake platform. Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 55a219e968b3..25c764905f9b 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -1037,6 +1037,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server), X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &rapl_defaults_byt), From 95035eac763294eb4543aea9afd48d2f7c8caa5c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 6 Sep 2020 20:42:52 -0700 Subject: [PATCH 363/648] PM: : fix @em_pd kernel-doc warning Fix kernel-doc warning in : ../include/linux/device.h:613: warning: Function parameter or member 'em_pd' not described in 'device' Fixes: 1bc138c62295 ("PM / EM: add support for other devices than CPUs in Energy Model") Signed-off-by: Randy Dunlap Reviewed-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki --- include/linux/device.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/device.h b/include/linux/device.h index ca18da4768e3..9e6ea8931a52 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -454,6 +454,7 @@ struct dev_links_info { * @pm_domain: Provide callbacks that are executed during system suspend, * hibernation, system resume and during runtime PM transitions * along with subsystem-level and driver-level callbacks. + * @em_pd: device's energy model performance domain * @pins: For device pin management. * See Documentation/driver-api/pinctl.rst for details. * @msi_list: Hosts MSI descriptors From cc88b78c0870ebcab2123ba9e73689d97fbf3b14 Mon Sep 17 00:00:00 2001 From: Amit Kucheria Date: Thu, 10 Sep 2020 15:57:46 +0530 Subject: [PATCH 364/648] powercap: make documentation reflect code Fix up the documentation of the struct powercap_control_type members to match the code. Also fixup stray whitespace. Signed-off-by: Amit Kucheria [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- include/linux/powercap.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/include/linux/powercap.h b/include/linux/powercap.h index 4537f57f9e42..3d557bbcd2c7 100644 --- a/include/linux/powercap.h +++ b/include/linux/powercap.h @@ -44,19 +44,18 @@ struct powercap_control_type_ops { }; /** - * struct powercap_control_type- Defines a powercap control_type - * @name: name of control_type + * struct powercap_control_type - Defines a powercap control_type * @dev: device for this control_type * @idr: idr to have unique id for its child - * @root_node: Root holding power zones for this control_type + * @nr_zones: counter for number of zones of this type * @ops: Pointer to callback struct - * @node_lock: mutex for control type + * @lock: mutex for control type * @allocated: This is possible that client owns the memory * used by this structure. In this case * this flag is set to false by framework to * prevent deallocation during release process. * Otherwise this flag is set to true. - * @ctrl_inst: link to the control_type list + * @node: linked-list node * * Defines powercap control_type. This acts as a container for power * zones, which use same method to control power. E.g. RAPL, RAPL-PCI etc. @@ -129,7 +128,7 @@ struct powercap_zone_ops { * this flag is set to false by framework to * prevent deallocation during release process. * Otherwise this flag is set to true. - * @constraint_ptr: List of constraints for this zone. + * @constraints: List of constraints for this zone. * * This defines a power zone instance. The fields of this structure are * private, and should not be used by client drivers. From a4b5cc9e10803ecba64a7d54c0f47e4564b4a980 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 5 Sep 2020 15:14:47 +0900 Subject: [PATCH 365/648] tipc: fix shutdown() of connection oriented socket I confirmed that the problem fixed by commit 2a63866c8b51a3f7 ("tipc: fix shutdown() of connectionless socket") also applies to stream socket. ---------- #include #include #include int main(int argc, char *argv[]) { int fds[2] = { -1, -1 }; socketpair(PF_TIPC, SOCK_STREAM /* or SOCK_DGRAM */, 0, fds); if (fork() == 0) _exit(read(fds[0], NULL, 1)); shutdown(fds[0], SHUT_RDWR); /* This must make read() return. */ wait(NULL); /* To be woken up by _exit(). */ return 0; } ---------- Since shutdown(SHUT_RDWR) should affect all processes sharing that socket, unconditionally setting sk->sk_shutdown to SHUTDOWN_MASK will be the right behavior. Signed-off-by: Tetsuo Handa Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/socket.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ebd280e767bd..11b27ddc75ba 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2771,10 +2771,7 @@ static int tipc_shutdown(struct socket *sock, int how) trace_tipc_sk_shutdown(sk, NULL, TIPC_DUMP_ALL, " "); __tipc_shutdown(sock, TIPC_CONN_SHUTDOWN); - if (tipc_sk_type_connectionless(sk)) - sk->sk_shutdown = SHUTDOWN_MASK; - else - sk->sk_shutdown = SEND_SHUTDOWN; + sk->sk_shutdown = SHUTDOWN_MASK; if (sk->sk_state == TIPC_DISCONNECTING) { /* Discard any unreceived messages */ From 57025817eaa42c0b6e2a907f28a125c74e3b2fc6 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 8 Sep 2020 10:49:38 +0800 Subject: [PATCH 366/648] mptcp: fix subflow's local_id issues In mptcp_pm_nl_get_local_id, skc_local is the same as msk_local, so it always return 0. Thus every subflow's local_id is 0. It's incorrect. This patch fixed this issue. Also, we need to ignore the zero address here, like 0.0.0.0 in IPv4. When we use the zero address as a local address, it means that we can use any one of the local addresses. The zero address is not a new address, we don't need to add it to PM, so this patch added a new function address_zero to check whether an address is the zero address, if it is, we ignore this address. Fixes: 01cacb00b35cb ("mptcp: add netlink-based PM") Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts Reported-by: kernel test robot Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index c8820c4156e6..6b41d1d939a0 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -66,6 +66,16 @@ static bool addresses_equal(const struct mptcp_addr_info *a, return a->port == b->port; } +static bool address_zero(const struct mptcp_addr_info *addr) +{ + struct mptcp_addr_info zero; + + memset(&zero, 0, sizeof(zero)); + zero.family = addr->family; + + return addresses_equal(addr, &zero, false); +} + static void local_address(const struct sock_common *skc, struct mptcp_addr_info *addr) { @@ -323,10 +333,13 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) * addr */ local_address((struct sock_common *)msk, &msk_local); - local_address((struct sock_common *)msk, &skc_local); + local_address((struct sock_common *)skc, &skc_local); if (addresses_equal(&msk_local, &skc_local, false)) return 0; + if (address_zero(&skc_local)) + return 0; + pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id); rcu_read_lock(); From 2ff0e566faa4e92cba8138c5b396d6ba96a215f1 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 8 Sep 2020 10:49:39 +0800 Subject: [PATCH 367/648] mptcp: fix subflow's remote_id issues This patch set the init remote_id to zero, otherwise it will be a random number. Then it added the missing subflow's remote_id setting code both in __mptcp_subflow_connect and in subflow_ulp_clone. Fixes: 01cacb00b35cb ("mptcp: add netlink-based PM") Fixes: ec3edaa7ca6ce ("mptcp: Add handling of outgoing MP_JOIN requests") Fixes: f296234c98a8f ("mptcp: Add handling of incoming MP_JOIN requests") Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 2 +- net/mptcp/subflow.c | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 6b41d1d939a0..5ea121d0222e 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -181,9 +181,9 @@ static void check_work_pending(struct mptcp_sock *msk) static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { + struct mptcp_addr_info remote = { 0 }; struct sock *sk = (struct sock *)msk; struct mptcp_pm_addr_entry *local; - struct mptcp_addr_info remote; struct pm_nl_pernet *pernet; pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index e8cac2655c82..9ead43f79023 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1063,6 +1063,7 @@ int __mptcp_subflow_connect(struct sock *sk, int ifindex, struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_subflow_context *subflow; struct sockaddr_storage addr; + int remote_id = remote->id; int local_id = loc->id; struct socket *sf; struct sock *ssk; @@ -1107,10 +1108,11 @@ int __mptcp_subflow_connect(struct sock *sk, int ifindex, goto failed; mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL); - pr_debug("msk=%p remote_token=%u local_id=%d", msk, remote_token, - local_id); + pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk, + remote_token, local_id, remote_id); subflow->remote_token = remote_token; subflow->local_id = local_id; + subflow->remote_id = remote_id; subflow->request_join = 1; subflow->request_bkup = 1; mptcp_info2sockaddr(remote, &addr); @@ -1347,6 +1349,7 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->fully_established = 1; new_ctx->backup = subflow_req->backup; new_ctx->local_id = subflow_req->local_id; + new_ctx->remote_id = subflow_req->remote_id; new_ctx->token = subflow_req->token; new_ctx->thmac = subflow_req->thmac; } From f612eb76f349919128d5f5cc4e8cc4251a16bf30 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 9 Sep 2020 11:01:24 +0800 Subject: [PATCH 368/648] mptcp: fix kmalloc flag in mptcp_pm_nl_get_local_id mptcp_pm_nl_get_local_id may be called in interrupt context, so we need to use GFP_ATOMIC flag to allocate memory to avoid sleeping in atomic context. [ 280.209809] BUG: sleeping function called from invalid context at mm/slab.h:498 [ 280.209812] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1680, name: kworker/1:3 [ 280.209814] INFO: lockdep is turned off. [ 280.209816] CPU: 1 PID: 1680 Comm: kworker/1:3 Tainted: G W 5.9.0-rc3-mptcp+ #146 [ 280.209818] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 280.209820] Workqueue: events mptcp_worker [ 280.209822] Call Trace: [ 280.209824] [ 280.209826] dump_stack+0x77/0xa0 [ 280.209829] ___might_sleep.cold+0xa6/0xb6 [ 280.209832] kmem_cache_alloc_trace+0x1d1/0x290 [ 280.209835] mptcp_pm_nl_get_local_id+0x23c/0x410 [ 280.209840] subflow_init_req+0x1e9/0x2ea [ 280.209843] ? inet_reqsk_alloc+0x1c/0x120 [ 280.209845] ? kmem_cache_alloc+0x264/0x290 [ 280.209849] tcp_conn_request+0x303/0xae0 [ 280.209854] ? printk+0x53/0x6a [ 280.209857] ? tcp_rcv_state_process+0x28f/0x1374 [ 280.209859] tcp_rcv_state_process+0x28f/0x1374 [ 280.209864] ? tcp_v4_do_rcv+0xb3/0x1f0 [ 280.209866] tcp_v4_do_rcv+0xb3/0x1f0 [ 280.209869] tcp_v4_rcv+0xed6/0xfa0 [ 280.209873] ip_protocol_deliver_rcu+0x28/0x270 [ 280.209875] ip_local_deliver_finish+0x89/0x120 [ 280.209877] ip_local_deliver+0x180/0x220 [ 280.209881] ip_rcv+0x166/0x210 [ 280.209885] __netif_receive_skb_one_core+0x82/0x90 [ 280.209888] process_backlog+0xd6/0x230 [ 280.209891] net_rx_action+0x13a/0x410 [ 280.209895] __do_softirq+0xcf/0x468 [ 280.209899] asm_call_on_stack+0x12/0x20 [ 280.209901] [ 280.209903] ? ip_finish_output2+0x240/0x9a0 [ 280.209906] do_softirq_own_stack+0x4d/0x60 [ 280.209908] do_softirq.part.0+0x2b/0x60 [ 280.209911] __local_bh_enable_ip+0x9a/0xa0 [ 280.209913] ip_finish_output2+0x264/0x9a0 [ 280.209916] ? rcu_read_lock_held+0x4d/0x60 [ 280.209920] ? ip_output+0x7a/0x250 [ 280.209922] ip_output+0x7a/0x250 [ 280.209925] ? __ip_finish_output+0x330/0x330 [ 280.209928] __ip_queue_xmit+0x1dc/0x5a0 [ 280.209931] __tcp_transmit_skb+0xa0f/0xc70 [ 280.209937] tcp_connect+0xb03/0xff0 [ 280.209939] ? lockdep_hardirqs_on_prepare+0xe7/0x190 [ 280.209942] ? ktime_get_with_offset+0x125/0x150 [ 280.209944] ? trace_hardirqs_on+0x1c/0xe0 [ 280.209948] tcp_v4_connect+0x449/0x550 [ 280.209953] __inet_stream_connect+0xbb/0x320 [ 280.209955] ? mark_held_locks+0x49/0x70 [ 280.209958] ? lockdep_hardirqs_on_prepare+0xe7/0x190 [ 280.209960] ? __local_bh_enable_ip+0x6b/0xa0 [ 280.209963] inet_stream_connect+0x32/0x50 [ 280.209966] __mptcp_subflow_connect+0x1fd/0x242 [ 280.209972] mptcp_pm_create_subflow_or_signal_addr+0x2db/0x600 [ 280.209975] mptcp_worker+0x543/0x7a0 [ 280.209980] process_one_work+0x26d/0x5b0 [ 280.209984] ? process_one_work+0x5b0/0x5b0 [ 280.209987] worker_thread+0x48/0x3d0 [ 280.209990] ? process_one_work+0x5b0/0x5b0 [ 280.209993] kthread+0x117/0x150 [ 280.209996] ? kthread_park+0x80/0x80 [ 280.209998] ret_from_fork+0x22/0x30 Fixes: 01cacb00b35cb ("mptcp: add netlink-based PM") Signed-off-by: Geliang Tang Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 5ea121d0222e..770da3627848 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -354,7 +354,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) return ret; /* address not found, add to local list */ - entry = kmalloc(sizeof(*entry), GFP_KERNEL); + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) return -ENOMEM; From edecfa98f602a597666e3c5cab2677ada38d93c5 Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Tue, 8 Sep 2020 10:01:38 +0200 Subject: [PATCH 369/648] net: dsa: microchip: look for phy-mode in port nodes Documentation/devicetree/bindings/net/dsa/dsa.txt says that the phy-mode property should be specified on port nodes. However, the microchip drivers read it from the switch node. Let the driver use the per-port property and fall back to the old location with a warning. Fix in-tree users. Signed-off-by: Helmut Grohne Link: https://lore.kernel.org/netdev/20200617082235.GA1523@laureti-dev/ Acked-by: Alexandre Belloni Signed-off-by: David S. Miller --- arch/arm/boot/dts/at91-sama5d2_icp.dts | 2 +- drivers/net/dsa/microchip/ksz8795.c | 18 +++++++++++----- drivers/net/dsa/microchip/ksz9477.c | 29 +++++++++++++++++--------- drivers/net/dsa/microchip/ksz_common.c | 13 +++++++++++- drivers/net/dsa/microchip/ksz_common.h | 3 ++- 5 files changed, 47 insertions(+), 18 deletions(-) diff --git a/arch/arm/boot/dts/at91-sama5d2_icp.dts b/arch/arm/boot/dts/at91-sama5d2_icp.dts index 8d19925fc09e..6783cf16ff81 100644 --- a/arch/arm/boot/dts/at91-sama5d2_icp.dts +++ b/arch/arm/boot/dts/at91-sama5d2_icp.dts @@ -116,7 +116,6 @@ spi2: spi@400 { switch0: ksz8563@0 { compatible = "microchip,ksz8563"; reg = <0>; - phy-mode = "mii"; reset-gpios = <&pioA PIN_PD4 GPIO_ACTIVE_LOW>; spi-max-frequency = <500000>; @@ -140,6 +139,7 @@ port@2 { reg = <2>; label = "cpu"; ethernet = <&macb0>; + phy-mode = "mii"; fixed-link { speed = <100>; full-duplex; diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index 8f1d15ea15d9..f7d59d7b2cbe 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -932,11 +932,19 @@ static void ksz8795_port_setup(struct ksz_device *dev, int port, bool cpu_port) ksz_port_cfg(dev, port, P_PRIO_CTRL, PORT_802_1P_ENABLE, true); if (cpu_port) { + if (!p->interface && dev->compat_interface) { + dev_warn(dev->dev, + "Using legacy switch \"phy-mode\" property, because it is missing on port %d node. " + "Please update your device tree.\n", + port); + p->interface = dev->compat_interface; + } + /* Configure MII interface for proper network communication. */ ksz_read8(dev, REG_PORT_5_CTRL_6, &data8); data8 &= ~PORT_INTERFACE_TYPE; data8 &= ~PORT_GMII_1GPS_MODE; - switch (dev->interface) { + switch (p->interface) { case PHY_INTERFACE_MODE_MII: p->phydev.speed = SPEED_100; break; @@ -952,11 +960,11 @@ static void ksz8795_port_setup(struct ksz_device *dev, int port, bool cpu_port) default: data8 &= ~PORT_RGMII_ID_IN_ENABLE; data8 &= ~PORT_RGMII_ID_OUT_ENABLE; - if (dev->interface == PHY_INTERFACE_MODE_RGMII_ID || - dev->interface == PHY_INTERFACE_MODE_RGMII_RXID) + if (p->interface == PHY_INTERFACE_MODE_RGMII_ID || + p->interface == PHY_INTERFACE_MODE_RGMII_RXID) data8 |= PORT_RGMII_ID_IN_ENABLE; - if (dev->interface == PHY_INTERFACE_MODE_RGMII_ID || - dev->interface == PHY_INTERFACE_MODE_RGMII_TXID) + if (p->interface == PHY_INTERFACE_MODE_RGMII_ID || + p->interface == PHY_INTERFACE_MODE_RGMII_TXID) data8 |= PORT_RGMII_ID_OUT_ENABLE; data8 |= PORT_GMII_1GPS_MODE; data8 |= PORT_INTERFACE_RGMII; diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 3cb22d149813..2f5506ac7d19 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -1208,7 +1208,7 @@ static void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port) /* configure MAC to 1G & RGMII mode */ ksz_pread8(dev, port, REG_PORT_XMII_CTRL_1, &data8); - switch (dev->interface) { + switch (p->interface) { case PHY_INTERFACE_MODE_MII: ksz9477_set_xmii(dev, 0, &data8); ksz9477_set_gbit(dev, false, &data8); @@ -1229,11 +1229,11 @@ static void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port) ksz9477_set_gbit(dev, true, &data8); data8 &= ~PORT_RGMII_ID_IG_ENABLE; data8 &= ~PORT_RGMII_ID_EG_ENABLE; - if (dev->interface == PHY_INTERFACE_MODE_RGMII_ID || - dev->interface == PHY_INTERFACE_MODE_RGMII_RXID) + if (p->interface == PHY_INTERFACE_MODE_RGMII_ID || + p->interface == PHY_INTERFACE_MODE_RGMII_RXID) data8 |= PORT_RGMII_ID_IG_ENABLE; - if (dev->interface == PHY_INTERFACE_MODE_RGMII_ID || - dev->interface == PHY_INTERFACE_MODE_RGMII_TXID) + if (p->interface == PHY_INTERFACE_MODE_RGMII_ID || + p->interface == PHY_INTERFACE_MODE_RGMII_TXID) data8 |= PORT_RGMII_ID_EG_ENABLE; p->phydev.speed = SPEED_1000; break; @@ -1269,23 +1269,32 @@ static void ksz9477_config_cpu_port(struct dsa_switch *ds) dev->cpu_port = i; dev->host_mask = (1 << dev->cpu_port); dev->port_mask |= dev->host_mask; + p = &dev->ports[i]; /* Read from XMII register to determine host port * interface. If set specifically in device tree * note the difference to help debugging. */ interface = ksz9477_get_interface(dev, i); - if (!dev->interface) - dev->interface = interface; - if (interface && interface != dev->interface) + if (!p->interface) { + if (dev->compat_interface) { + dev_warn(dev->dev, + "Using legacy switch \"phy-mode\" property, because it is missing on port %d node. " + "Please update your device tree.\n", + i); + p->interface = dev->compat_interface; + } else { + p->interface = interface; + } + } + if (interface && interface != p->interface) dev_info(dev->dev, "use %s instead of %s\n", - phy_modes(dev->interface), + phy_modes(p->interface), phy_modes(interface)); /* enable cpu port */ ksz9477_port_setup(dev, i, true); - p = &dev->ports[dev->cpu_port]; p->vid_member = dev->port_mask; p->on = 1; } diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 8d53b12d40a8..8e755b50c9c1 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -388,6 +388,8 @@ int ksz_switch_register(struct ksz_device *dev, const struct ksz_dev_ops *ops) { phy_interface_t interface; + struct device_node *port; + unsigned int port_num; int ret; if (dev->pdata) @@ -421,10 +423,19 @@ int ksz_switch_register(struct ksz_device *dev, /* Host port interface will be self detected, or specifically set in * device tree. */ + for (port_num = 0; port_num < dev->port_cnt; ++port_num) + dev->ports[port_num].interface = PHY_INTERFACE_MODE_NA; if (dev->dev->of_node) { ret = of_get_phy_mode(dev->dev->of_node, &interface); if (ret == 0) - dev->interface = interface; + dev->compat_interface = interface; + for_each_available_child_of_node(dev->dev->of_node, port) { + if (of_property_read_u32(port, "reg", &port_num)) + continue; + if (port_num >= dev->port_cnt) + return -EINVAL; + of_get_phy_mode(port, &dev->ports[port_num].interface); + } dev->synclko_125 = of_property_read_bool(dev->dev->of_node, "microchip,synclko-125"); } diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index 206838160f49..cf866e48ff66 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -39,6 +39,7 @@ struct ksz_port { u32 freeze:1; /* MIB counter freeze is enabled */ struct ksz_port_mib mib; + phy_interface_t interface; }; struct ksz_device { @@ -72,7 +73,7 @@ struct ksz_device { int mib_cnt; int mib_port_cnt; int last_port; /* ports after that not used */ - phy_interface_t interface; + phy_interface_t compat_interface; u32 regs_size; bool phy_errata_9477; bool synclko_125; From 2fb541c862c987d02dfdf28f1545016deecfa0d5 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Tue, 8 Sep 2020 19:02:34 +0800 Subject: [PATCH 370/648] net: sch_generic: aviod concurrent reset and enqueue op for lockless qdisc Currently there is concurrent reset and enqueue operation for the same lockless qdisc when there is no lock to synchronize the q->enqueue() in __dev_xmit_skb() with the qdisc reset operation in qdisc_deactivate() called by dev_deactivate_queue(), which may cause out-of-bounds access for priv->ring[] in hns3 driver if user has requested a smaller queue num when __dev_xmit_skb() still enqueue a skb with a larger queue_mapping after the corresponding qdisc is reset, and call hns3_nic_net_xmit() with that skb later. Reused the existing synchronize_net() in dev_deactivate_many() to make sure skb with larger queue_mapping enqueued to old qdisc(which is saved in dev_queue->qdisc_sleeping) will always be reset when dev_reset_queue() is called. Fixes: 6b3ba9146fe6 ("net: sched: allow qdiscs to handle locking") Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 48 ++++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 265a61d011df..54c417244642 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1131,24 +1131,10 @@ EXPORT_SYMBOL(dev_activate); static void qdisc_deactivate(struct Qdisc *qdisc) { - bool nolock = qdisc->flags & TCQ_F_NOLOCK; - if (qdisc->flags & TCQ_F_BUILTIN) return; - if (test_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state)) - return; - - if (nolock) - spin_lock_bh(&qdisc->seqlock); - spin_lock_bh(qdisc_lock(qdisc)); set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state); - - qdisc_reset(qdisc); - - spin_unlock_bh(qdisc_lock(qdisc)); - if (nolock) - spin_unlock_bh(&qdisc->seqlock); } static void dev_deactivate_queue(struct net_device *dev, @@ -1165,6 +1151,30 @@ static void dev_deactivate_queue(struct net_device *dev, } } +static void dev_reset_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_unused) +{ + struct Qdisc *qdisc; + bool nolock; + + qdisc = dev_queue->qdisc_sleeping; + if (!qdisc) + return; + + nolock = qdisc->flags & TCQ_F_NOLOCK; + + if (nolock) + spin_lock_bh(&qdisc->seqlock); + spin_lock_bh(qdisc_lock(qdisc)); + + qdisc_reset(qdisc); + + spin_unlock_bh(qdisc_lock(qdisc)); + if (nolock) + spin_unlock_bh(&qdisc->seqlock); +} + static bool some_qdisc_is_busy(struct net_device *dev) { unsigned int i; @@ -1213,12 +1223,20 @@ void dev_deactivate_many(struct list_head *head) dev_watchdog_down(dev); } - /* Wait for outstanding qdisc-less dev_queue_xmit calls. + /* Wait for outstanding qdisc-less dev_queue_xmit calls or + * outstanding qdisc enqueuing calls. * This is avoided if all devices are in dismantle phase : * Caller will call synchronize_net() for us */ synchronize_net(); + list_for_each_entry(dev, head, close_list) { + netdev_for_each_tx_queue(dev, dev_reset_queue, NULL); + + if (dev_ingress_queue(dev)) + dev_reset_queue(dev, dev_ingress_queue(dev), NULL); + } + /* Wait for outstanding qdisc_run calls. */ list_for_each_entry(dev, head, close_list) { while (some_qdisc_is_busy(dev)) { From de214e52de1bba5392b5b7054924a08dbd57c2f6 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Tue, 8 Sep 2020 21:07:32 -0700 Subject: [PATCH 371/648] hv_netvsc: Switch the data path at the right time during hibernation When netvsc_resume() is called, the mlx5 VF NIC has not been resumed yet, so in the future the host might sliently fail the call netvsc_vf_changed() -> netvsc_switch_datapath() there, even if the call works now. Call netvsc_vf_changed() in the NETDEV_CHANGE event handler: at that time the mlx5 VF NIC has been resumed. Fixes: 19162fd4063a ("hv_netvsc: Fix hibernation for mlx5 VF driver") Signed-off-by: Dexuan Cui Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 81c5c70b616a..4a25886e2346 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2619,7 +2619,6 @@ static int netvsc_resume(struct hv_device *dev) struct net_device *net = hv_get_drvdata(dev); struct net_device_context *net_device_ctx; struct netvsc_device_info *device_info; - struct net_device *vf_netdev; int ret; rtnl_lock(); @@ -2632,15 +2631,6 @@ static int netvsc_resume(struct hv_device *dev) netvsc_devinfo_put(device_info); net_device_ctx->saved_netvsc_dev_info = NULL; - /* A NIC driver (e.g. mlx5) may keep the VF network interface across - * hibernation, but here the data path is implicitly switched to the - * netvsc NIC since the vmbus channel is closed and re-opened, so - * netvsc_vf_changed() must be used to switch the data path to the VF. - */ - vf_netdev = rtnl_dereference(net_device_ctx->vf_netdev); - if (vf_netdev && netvsc_vf_changed(vf_netdev) != NOTIFY_OK) - ret = -EINVAL; - rtnl_unlock(); return ret; @@ -2701,6 +2691,7 @@ static int netvsc_netdev_event(struct notifier_block *this, return netvsc_unregister_vf(event_dev); case NETDEV_UP: case NETDEV_DOWN: + case NETDEV_CHANGE: return netvsc_vf_changed(event_dev); default: return NOTIFY_DONE; From da26658c3d7005aa67a706dceff7b2807b59e123 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Tue, 8 Sep 2020 21:08:19 -0700 Subject: [PATCH 372/648] hv_netvsc: Cache the current data path to avoid duplicate call and message The previous change "hv_netvsc: Switch the data path at the right time during hibernation" adds the call of netvsc_vf_changed() upon NETDEV_CHANGE, so it's necessary to avoid the duplicate call and message when the VF is brought UP or DOWN. Signed-off-by: Dexuan Cui Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 3 +++ drivers/net/hyperv/netvsc_drv.c | 21 ++++++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 2181d4538ab7..ff33f27cdcd3 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -974,6 +974,9 @@ struct net_device_context { /* Serial number of the VF to team with */ u32 vf_serial; + /* Is the current data path through the VF NIC? */ + bool data_path_is_vf; + /* Used to temporarily save the config info across hibernation */ struct netvsc_device_info *saved_netvsc_dev_info; }; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 4a25886e2346..b7db3766f5b9 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2366,7 +2366,16 @@ static int netvsc_register_vf(struct net_device *vf_netdev) return NOTIFY_OK; } -/* VF up/down change detected, schedule to change data path */ +/* Change the data path when VF UP/DOWN/CHANGE are detected. + * + * Typically a UP or DOWN event is followed by a CHANGE event, so + * net_device_ctx->data_path_is_vf is used to cache the current data path + * to avoid the duplicate call of netvsc_switch_datapath() and the duplicate + * message. + * + * During hibernation, if a VF NIC driver (e.g. mlx5) preserves the network + * interface, there is only the CHANGE event and no UP or DOWN event. + */ static int netvsc_vf_changed(struct net_device *vf_netdev) { struct net_device_context *net_device_ctx; @@ -2383,6 +2392,10 @@ static int netvsc_vf_changed(struct net_device *vf_netdev) if (!netvsc_dev) return NOTIFY_DONE; + if (net_device_ctx->data_path_is_vf == vf_is_up) + return NOTIFY_OK; + net_device_ctx->data_path_is_vf = vf_is_up; + netvsc_switch_datapath(ndev, vf_is_up); netdev_info(ndev, "Data path switched %s VF: %s\n", vf_is_up ? "to" : "from", vf_netdev->name); @@ -2624,6 +2637,12 @@ static int netvsc_resume(struct hv_device *dev) rtnl_lock(); net_device_ctx = netdev_priv(net); + + /* Reset the data path to the netvsc NIC before re-opening the vmbus + * channel. Later netvsc_netdev_event() will switch the data path to + * the VF upon the UP or CHANGE event. + */ + net_device_ctx->data_path_is_vf = false; device_info = net_device_ctx->saved_netvsc_dev_info; ret = netvsc_attach(net, device_info); From 7d3ba9360c6dac7c077fbd6631e08f32ea2bcd53 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 9 Sep 2020 14:43:14 +0900 Subject: [PATCH 373/648] net: phy: call phy_disable_interrupts() in phy_attach_direct() instead Since the micrel phy driver calls phy_init_hw() as a workaround, the commit 9886a4dbd2aa ("net: phy: call phy_disable_interrupts() in phy_init_hw()") disables the interrupt unexpectedly. So, call phy_disable_interrupts() in phy_attach_direct() instead. Otherwise, the phy cannot link up after the ethernet cable was disconnected. Note that other drivers (like at803x.c) also calls phy_init_hw(). So, perhaps, the driver caused a similar issue too. Fixes: 9886a4dbd2aa ("net: phy: call phy_disable_interrupts() in phy_init_hw()") Signed-off-by: Yoshihiro Shimoda Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 8adfbad0a1e8..b93b40cda54a 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1143,10 +1143,6 @@ int phy_init_hw(struct phy_device *phydev) if (ret < 0) return ret; - ret = phy_disable_interrupts(phydev); - if (ret) - return ret; - if (phydev->drv->config_init) ret = phydev->drv->config_init(phydev); @@ -1423,6 +1419,10 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, if (err) goto error; + err = phy_disable_interrupts(phydev); + if (err) + return err; + phy_resume(phydev); phy_led_triggers_register(phydev); From 66d42ed8b25b64eb63111a2b8582c5afc8bf1105 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 9 Sep 2020 12:46:48 +0300 Subject: [PATCH 374/648] hdlc_ppp: add range checks in ppp_cp_parse_cr() There are a couple bugs here: 1) If opt[1] is zero then this results in a forever loop. If the value is less than 2 then it is invalid. 2) It assumes that "len" is more than sizeof(valid_accm) or 6 which can result in memory corruption. In the case of LCP_OPTION_ACCM, then we should check "opt[1]" instead of "len" because, if "opt[1]" is less than sizeof(valid_accm) then "nak_len" gets out of sync and it can lead to memory corruption in the next iterations through the loop. In case of LCP_OPTION_MAGIC, the only valid value for opt[1] is 6, but the code is trying to log invalid data so we should only discard the data when "len" is less than 6 because that leads to a read overflow. Reported-by: ChenNan Of Chaitin Security Research Lab Fixes: e022c2f07ae5 ("WAN: new synchronous PPP implementation for generic HDLC.") Signed-off-by: Dan Carpenter Reviewed-by: Eric Dumazet Reviewed-by: Greg Kroah-Hartman Signed-off-by: David S. Miller --- drivers/net/wan/hdlc_ppp.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c index 48ced3912576..16f33d1ffbfb 100644 --- a/drivers/net/wan/hdlc_ppp.c +++ b/drivers/net/wan/hdlc_ppp.c @@ -383,11 +383,8 @@ static void ppp_cp_parse_cr(struct net_device *dev, u16 pid, u8 id, } for (opt = data; len; len -= opt[1], opt += opt[1]) { - if (len < 2 || len < opt[1]) { - dev->stats.rx_errors++; - kfree(out); - return; /* bad packet, drop silently */ - } + if (len < 2 || opt[1] < 2 || len < opt[1]) + goto err_out; if (pid == PID_LCP) switch (opt[0]) { @@ -395,6 +392,8 @@ static void ppp_cp_parse_cr(struct net_device *dev, u16 pid, u8 id, continue; /* MRU always OK and > 1500 bytes? */ case LCP_OPTION_ACCM: /* async control character map */ + if (opt[1] < sizeof(valid_accm)) + goto err_out; if (!memcmp(opt, valid_accm, sizeof(valid_accm))) continue; @@ -406,6 +405,8 @@ static void ppp_cp_parse_cr(struct net_device *dev, u16 pid, u8 id, } break; case LCP_OPTION_MAGIC: + if (len < 6) + goto err_out; if (opt[1] != 6 || (!opt[2] && !opt[3] && !opt[4] && !opt[5])) break; /* reject invalid magic number */ @@ -424,6 +425,11 @@ static void ppp_cp_parse_cr(struct net_device *dev, u16 pid, u8 id, ppp_cp_event(dev, pid, RCR_GOOD, CP_CONF_ACK, id, req_len, data); kfree(out); + return; + +err_out: + dev->stats.rx_errors++; + kfree(out); } static int ppp_rx(struct sk_buff *skb) From 1be107de2ee4b3f0808e2071529364cf4d9a67b9 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 10 Sep 2020 04:41:53 -0400 Subject: [PATCH 375/648] net: Correct the comment of dst_dev_put() Since commit 8d7017fd621d ("blackhole_netdev: use blackhole_netdev to invalidate dst entries"), we use blackhole_netdev to invalidate dst entries instead of loopback device anymore. Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/core/dst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dst.c b/net/core/dst.c index d6b6ced0d451..0c01bd8d9d81 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -144,7 +144,7 @@ static void dst_destroy_rcu(struct rcu_head *head) /* Operations to mark dst as DEAD and clean up the net device referenced * by dst: - * 1. put the dst under loopback interface and discard all tx/rx packets + * 1. put the dst under blackhole interface and discard all tx/rx packets * on this route. * 2. release the net_device * This function should be called when removing routes from the fib tree From 83896b0bd8223ac33bcc609bcc82a57a587002ff Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 10 Sep 2020 04:46:40 -0400 Subject: [PATCH 376/648] net: Fix broken NETIF_F_CSUM_MASK spell in netdev_features.h Remove the weird space inside the NETIF_F_CSUM_MASK. Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 2cc3cf80b49a..0b17c4322b09 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -193,7 +193,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) #define NETIF_F_GSO_MASK (__NETIF_F_BIT(NETIF_F_GSO_LAST + 1) - \ __NETIF_F_BIT(NETIF_F_GSO_SHIFT)) -/* List of IP checksum features. Note that NETIF_F_ HW_CSUM should not be +/* List of IP checksum features. Note that NETIF_F_HW_CSUM should not be * set in features when NETIF_F_IP_CSUM or NETIF_F_IPV6_CSUM are set-- * this would be contradictory */ From 5bf490e6807bf56f49b5991b4be817407dd32656 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 10 Sep 2020 11:05:18 +0200 Subject: [PATCH 377/648] s390/qeth: delay draining the TX buffers Wait until the QDIO data connection is severed. Otherwise the device might still be processing the buffers, and end up accessing skb data that we already freed. Fixes: 8b5026bc1693 ("s390/qeth: fix qdio teardown after early init error") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l2_main.c | 2 +- drivers/s390/net/qeth_l3_main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 3a94f6cad167..6384f7adba66 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -284,11 +284,11 @@ static void qeth_l2_stop_card(struct qeth_card *card) if (card->state == CARD_STATE_SOFTSETUP) { qeth_clear_ipacmd_list(card); - qeth_drain_output_queues(card); card->state = CARD_STATE_DOWN; } qeth_qdio_clear_card(card, 0); + qeth_drain_output_queues(card); qeth_clear_working_pool_list(card); flush_workqueue(card->event_wq); qeth_flush_local_addrs(card); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 4d461960370d..09ef518ca1ea 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1168,11 +1168,11 @@ static void qeth_l3_stop_card(struct qeth_card *card) if (card->state == CARD_STATE_SOFTSETUP) { qeth_l3_clear_ip_htable(card, 1); qeth_clear_ipacmd_list(card); - qeth_drain_output_queues(card); card->state = CARD_STATE_DOWN; } qeth_qdio_clear_card(card, 0); + qeth_drain_output_queues(card); qeth_clear_working_pool_list(card); flush_workqueue(card->event_wq); qeth_flush_local_addrs(card); From d2249bf25c565b6e310453962fef63f8d38677a6 Mon Sep 17 00:00:00 2001 From: Jing Xiangfeng Date: Thu, 27 Aug 2020 22:16:29 +0800 Subject: [PATCH 378/648] clk: qcom: lpass: Correct goto target in lpass_core_sc7180_probe() lpass_core_sc7180_probe() misses to call pm_clk_destroy() and pm_runtime_disable() in error paths. Correct goto target to fix it. This issue is found by code inspection. Signed-off-by: Jing Xiangfeng Link: https://lore.kernel.org/r/20200827141629.101802-1-jingxiangfeng@huawei.com Fixes: edab812d802d ("clk: qcom: lpass: Add support for LPASS clock controller for SC7180") Signed-off-by: Stephen Boyd --- drivers/clk/qcom/lpasscorecc-sc7180.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/clk/qcom/lpasscorecc-sc7180.c b/drivers/clk/qcom/lpasscorecc-sc7180.c index d4c1864e1ee9..228d08f5d26f 100644 --- a/drivers/clk/qcom/lpasscorecc-sc7180.c +++ b/drivers/clk/qcom/lpasscorecc-sc7180.c @@ -420,17 +420,18 @@ static int lpass_core_sc7180_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); ret = pm_clk_create(&pdev->dev); if (ret) - return ret; + goto disable_pm_runtime; ret = pm_clk_add(&pdev->dev, "iface"); if (ret < 0) { dev_err(&pdev->dev, "failed to acquire iface clock\n"); - goto disable_pm_runtime; + goto destroy_pm_clk; } + ret = -EINVAL; clk_probe = of_device_get_match_data(&pdev->dev); if (!clk_probe) - return -EINVAL; + goto destroy_pm_clk; ret = clk_probe(pdev); if (ret) From 9d3b2d3e4942cb82630f0d638de4b2253c0af56d Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 10 Sep 2020 11:08:01 +0200 Subject: [PATCH 379/648] net: mvneta: fix possible use-after-free in mvneta_xdp_put_buff Release first buffer as last one since it contains references to subsequent fragments. This code will be optimized introducing multi-buffer bit in xdp_buff structure. Fixes: ca0e014609f05 ("net: mvneta: move skb build after descriptors processing") Signed-off-by: Lorenzo Bianconi Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index dfcb1767acbb..69a900081165 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2029,11 +2029,11 @@ mvneta_xdp_put_buff(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); int i; - page_pool_put_page(rxq->page_pool, virt_to_head_page(xdp->data), - sync_len, napi); for (i = 0; i < sinfo->nr_frags; i++) page_pool_put_full_page(rxq->page_pool, skb_frag_page(&sinfo->frags[i]), napi); + page_pool_put_page(rxq->page_pool, virt_to_head_page(xdp->data), + sync_len, napi); } static int From e1b9efe6baebe79019a2183176686a0e709388ae Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 10 Sep 2020 14:01:26 +0300 Subject: [PATCH 380/648] net: Fix bridge enslavement failure When a netdev is enslaved to a bridge, its parent identifier is queried. This is done so that packets that were already forwarded in hardware will not be forwarded again by the bridge device between netdevs belonging to the same hardware instance. The operation fails when the netdev is an upper of netdevs with different parent identifiers. Instead of failing the enslavement, have dev_get_port_parent_id() return '-EOPNOTSUPP' which will signal the bridge to skip the query operation. Other callers of the function are not affected by this change. Fixes: 7e1146e8c10c ("net: devlink: introduce devlink_compat_switch_id_get() helper") Signed-off-by: Ido Schimmel Reported-by: Vasundhara Volam Reviewed-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 4086d335978c..266073e300b5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8647,7 +8647,7 @@ int dev_get_port_parent_id(struct net_device *dev, if (!first.id_len) first = *ppid; else if (memcmp(&first, ppid, sizeof(*ppid))) - return -ENODATA; + return -EOPNOTSUPP; } return err; From 6374a5606990ea4de7fe0f7035dd04422c4690f5 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 10 Sep 2020 14:01:27 +0300 Subject: [PATCH 381/648] selftests: rtnetlink: Test bridge enslavement with different parent IDs Test that an upper device of netdevs with different parent IDs can be enslaved to a bridge. The test fails without previous commit. Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- tools/testing/selftests/net/rtnetlink.sh | 47 ++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 7c38a909f8b8..8a2fe6d64bf2 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -1175,6 +1175,51 @@ kci_test_neigh_get() echo "PASS: neigh get" } +kci_test_bridge_parent_id() +{ + local ret=0 + sysfsnet=/sys/bus/netdevsim/devices/netdevsim + probed=false + + if [ ! -w /sys/bus/netdevsim/new_device ] ; then + modprobe -q netdevsim + check_err $? + if [ $ret -ne 0 ]; then + echo "SKIP: bridge_parent_id can't load netdevsim" + return $ksft_skip + fi + probed=true + fi + + echo "10 1" > /sys/bus/netdevsim/new_device + while [ ! -d ${sysfsnet}10 ] ; do :; done + echo "20 1" > /sys/bus/netdevsim/new_device + while [ ! -d ${sysfsnet}20 ] ; do :; done + udevadm settle + dev10=`ls ${sysfsnet}10/net/` + dev20=`ls ${sysfsnet}20/net/` + + ip link add name test-bond0 type bond mode 802.3ad + ip link set dev $dev10 master test-bond0 + ip link set dev $dev20 master test-bond0 + ip link add name test-br0 type bridge + ip link set dev test-bond0 master test-br0 + check_err $? + + # clean up any leftovers + ip link del dev test-br0 + ip link del dev test-bond0 + echo 20 > /sys/bus/netdevsim/del_device + echo 10 > /sys/bus/netdevsim/del_device + $probed && rmmod netdevsim + + if [ $ret -ne 0 ]; then + echo "FAIL: bridge_parent_id" + return 1 + fi + echo "PASS: bridge_parent_id" +} + kci_test_rtnl() { local ret=0 @@ -1224,6 +1269,8 @@ kci_test_rtnl() check_err $? kci_test_neigh_get check_err $? + kci_test_bridge_parent_id + check_err $? kci_del_dummy return $ret From 297e77e53eadb332d5062913447b104a772dc33b Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 10 Sep 2020 14:09:05 +0200 Subject: [PATCH 382/648] net: DCB: Validate DCB_ATTR_DCB_BUFFER argument The parameter passed via DCB_ATTR_DCB_BUFFER is a struct dcbnl_buffer. The field prio2buffer is an array of IEEE_8021Q_MAX_PRIORITIES bytes, where each value is a number of a buffer to direct that priority's traffic to. That value is however never validated to lie within the bounds set by DCBX_MAX_BUFFERS. The only driver that currently implements the callback is mlx5 (maintainers CCd), and that does not do any validation either, in particual allowing incorrect configuration if the prio2buffer value does not fit into 4 bits. Instead of offloading the need to validate the buffer index to drivers, do it right there in core, and bounce the request if the value is too large. CC: Parav Pandit CC: Saeed Mahameed Fixes: e549f6f9c098 ("net/dcb: Add dcbnl buffer attribute") Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/dcb/dcbnl.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 84dde5a2066e..16014ad19406 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1426,6 +1426,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh, { const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1]; + int prio; int err; if (!ops) @@ -1475,6 +1476,13 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh, struct dcbnl_buffer *buffer = nla_data(ieee[DCB_ATTR_DCB_BUFFER]); + for (prio = 0; prio < ARRAY_SIZE(buffer->prio2buffer); prio++) { + if (buffer->prio2buffer[prio] >= DCBX_MAX_BUFFERS) { + err = -EINVAL; + goto err; + } + } + err = ops->dcbnl_setbuffer(netdev, buffer); if (err) goto err; From 553d87b658fed0e22a0f86b4f1b093c39d3e3074 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 10 Sep 2020 15:34:39 +0200 Subject: [PATCH 383/648] netlink: fix doc about nlmsg_parse/nla_validate There is no @validate argument. CC: Johannes Berg Fixes: 3de644035446 ("netlink: re-add parse/validate functions in strict mode") Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/net/netlink.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/net/netlink.h b/include/net/netlink.h index c0411f14fb53..8e0eb2c9c528 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -726,7 +726,6 @@ static inline int __nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, * @hdrlen: length of family specific header * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected - * @validate: validation strictness * @extack: extended ACK report struct * * See nla_parse() @@ -824,7 +823,6 @@ static inline int nla_validate_deprecated(const struct nlattr *head, int len, * @len: length of attribute stream * @maxtype: maximum attribute type to be expected * @policy: validation policy - * @validate: validation strictness * @extack: extended ACK report struct * * Validates all attributes in the specified attribute stream against the From ee460417d254d941dfea5fb7cff841f589643992 Mon Sep 17 00:00:00 2001 From: Lucy Yan Date: Thu, 10 Sep 2020 12:05:09 -0700 Subject: [PATCH 384/648] net: dec: de2104x: Increase receive ring size for Tulip Increase Rx ring size to address issue where hardware is reaching the receive work limit. Before: [ 102.223342] de2104x 0000:17:00.0 eth0: rx work limit reached [ 102.245695] de2104x 0000:17:00.0 eth0: rx work limit reached [ 102.251387] de2104x 0000:17:00.0 eth0: rx work limit reached [ 102.267444] de2104x 0000:17:00.0 eth0: rx work limit reached Signed-off-by: Lucy Yan Reviewed-by: Moritz Fischer Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/de2104x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c index cb116b530f5e..2610efe4f873 100644 --- a/drivers/net/ethernet/dec/tulip/de2104x.c +++ b/drivers/net/ethernet/dec/tulip/de2104x.c @@ -85,7 +85,7 @@ MODULE_PARM_DESC (rx_copybreak, "de2104x Breakpoint at which Rx packets are copi #define DSL CONFIG_DE2104X_DSL #endif -#define DE_RX_RING_SIZE 64 +#define DE_RX_RING_SIZE 128 #define DE_TX_RING_SIZE 64 #define DE_RING_BYTES \ ((sizeof(struct de_desc) * DE_RX_RING_SIZE) + \ From fde6dedfb794ec8a89c3c980a245a2e43dad2411 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 10 Sep 2020 15:52:45 -0700 Subject: [PATCH 385/648] docs/bpf: Fix ringbuf documentation Remove link to litmus tests that didn't make it to upstream. Fix ringbuf benchmark link. I wasn't able to test this with `make htmldocs`, unfortunately, because of Sphinx dependencies. But bench_ringbufs.c path is certainly correct now. Fixes: 97abb2b39682 ("docs/bpf: Add BPF ring buffer design notes") Reported-by: Mauro Carvalho Chehab Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200910225245.2896991-1-andriin@fb.com --- Documentation/bpf/ringbuf.rst | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Documentation/bpf/ringbuf.rst b/Documentation/bpf/ringbuf.rst index 75f943f0009d..4d4f3bcb1477 100644 --- a/Documentation/bpf/ringbuf.rst +++ b/Documentation/bpf/ringbuf.rst @@ -182,9 +182,6 @@ in the order of reservations, but only after all previous records where already committed. It is thus possible for slow producers to temporarily hold off submitted records, that were reserved later. -Reservation/commit/consumer protocol is verified by litmus tests in -Documentation/litmus_tests/bpf-rb/_. - One interesting implementation bit, that significantly simplifies (and thus speeds up as well) implementation of both producers and consumers is how data area is mapped twice contiguously back-to-back in the virtual memory. This @@ -200,7 +197,7 @@ a self-pacing notifications of new data being availability. being available after commit only if consumer has already caught up right up to the record being committed. If not, consumer still has to catch up and thus will see new data anyways without needing an extra poll notification. -Benchmarks (see tools/testing/selftests/bpf/benchs/bench_ringbuf.c_) show that +Benchmarks (see tools/testing/selftests/bpf/benchs/bench_ringbufs.c_) show that this allows to achieve a very high throughput without having to resort to tricks like "notify only every Nth sample", which are necessary with perf buffer. For extreme cases, when BPF program wants more manual control of From 40249c6962075c040fd071339acae524f18bfac9 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Thu, 10 Sep 2020 14:52:01 +0200 Subject: [PATCH 386/648] gcov: add support for GCC 10.1 Using gcov to collect coverage data for kernels compiled with GCC 10.1 causes random malfunctions and kernel crashes. This is the result of a changed GCOV_COUNTERS value in GCC 10.1 that causes a mismatch between the layout of the gcov_info structure created by GCC profiling code and the related structure used by the kernel. Fix this by updating the in-kernel GCOV_COUNTERS value. Also re-enable config GCOV_KERNEL for use with GCC 10. Reported-by: Colin Ian King Reported-by: Leon Romanovsky Signed-off-by: Peter Oberparleiter Tested-by: Leon Romanovsky Tested-and-Acked-by: Colin Ian King Signed-off-by: Linus Torvalds --- kernel/gcov/Kconfig | 1 - kernel/gcov/gcc_4_7.c | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index bb4b680e8455..3110c77230c7 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig @@ -4,7 +4,6 @@ menu "GCOV-based kernel profiling" config GCOV_KERNEL bool "Enable gcov-based kernel profiling" depends on DEBUG_FS - depends on !CC_IS_GCC || GCC_VERSION < 100000 select CONSTRUCTORS if !UML default n help diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c index 908fdf5098c3..53c67c87f141 100644 --- a/kernel/gcov/gcc_4_7.c +++ b/kernel/gcov/gcc_4_7.c @@ -19,7 +19,9 @@ #include #include "gcov.h" -#if (__GNUC__ >= 7) +#if (__GNUC__ >= 10) +#define GCOV_COUNTERS 8 +#elif (__GNUC__ >= 7) #define GCOV_COUNTERS 9 #elif (__GNUC__ > 5) || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1) #define GCOV_COUNTERS 10 From 43fea4e42599c3eb4109996698f5c25761d3f815 Mon Sep 17 00:00:00 2001 From: Peter Shier Date: Thu, 20 Aug 2020 16:05:45 -0700 Subject: [PATCH 387/648] KVM: nVMX: Update VMCS02 when L2 PAE PDPTE updates detected When L2 uses PAE, L0 intercepts of L2 writes to CR0/CR3/CR4 call load_pdptrs to read the possibly updated PDPTEs from the guest physical address referenced by CR3. It loads them into vcpu->arch.walk_mmu->pdptrs and sets VCPU_EXREG_PDPTR in vcpu->arch.regs_dirty. At the subsequent assumed reentry into L2, the mmu will call vmx_load_mmu_pgd which calls ept_load_pdptrs. ept_load_pdptrs sees VCPU_EXREG_PDPTR set in vcpu->arch.regs_dirty and loads VMCS02.GUEST_PDPTRn from vcpu->arch.walk_mmu->pdptrs[]. This all works if the L2 CRn write intercept always resumes L2. The resume path calls vmx_check_nested_events which checks for exceptions, MTF, and expired VMX preemption timers. If vmx_check_nested_events finds any of these conditions pending it will reflect the corresponding exit into L1. Live migration at this point would also cause a missed immediate reentry into L2. After L1 exits, vmx_vcpu_run calls vmx_register_cache_reset which clears VCPU_EXREG_PDPTR in vcpu->arch.regs_dirty. When L2 next resumes, ept_load_pdptrs finds VCPU_EXREG_PDPTR clear in vcpu->arch.regs_dirty and does not load VMCS02.GUEST_PDPTRn from vcpu->arch.walk_mmu->pdptrs[]. prepare_vmcs02 will then load VMCS02.GUEST_PDPTRn from vmcs12->pdptr0/1/2/3 which contain the stale values stored at last L2 exit. A repro of this bug showed L2 entering triple fault immediately due to the bad VMCS02.GUEST_PDPTRn values. When L2 is in PAE paging mode add a call to ept_load_pdptrs before leaving L2. This will update VMCS02.GUEST_PDPTRn if they are dirty in vcpu->arch.walk_mmu->pdptrs[]. Tested: kvm-unit-tests with new directed test: vmx_mtf_pdpte_test. Verified that test fails without the fix. Also ran Google internal VMM with an Ubuntu 16.04 4.4.0-83 guest running a custom hypervisor with a 32-bit Windows XP L2 guest using PAE. Prior to fix would repro readily. Ran 14 simultaneous L2s for 140 iterations with no failures. Signed-off-by: Peter Shier Reviewed-by: Jim Mattson Message-Id: <20200820230545.2411347-1-pshier@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 8 ++++++++ arch/x86/kvm/vmx/vmx.c | 4 ++-- arch/x86/kvm/vmx/vmx.h | 1 + 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 23b58c28a1c9..d7482ccf6a8d 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4404,6 +4404,14 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) kvm_vcpu_flush_tlb_current(vcpu); + /* + * VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between + * now and the new vmentry. Ensure that the VMCS02 PDPTR fields are + * up-to-date before switching to L1. + */ + if (enable_ept && is_pae_paging(vcpu)) + vmx_ept_load_pdptrs(vcpu); + leave_guest_mode(vcpu); if (nested_cpu_has_preemption_timer(vmcs12)) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 46ba2e03a892..19a599bebd5c 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2971,7 +2971,7 @@ static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu) vpid_sync_context(to_vmx(vcpu)->vpid); } -static void ept_load_pdptrs(struct kvm_vcpu *vcpu) +void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.walk_mmu; @@ -3114,7 +3114,7 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long pgd, guest_cr3 = vcpu->arch.cr3; else /* vmcs01.GUEST_CR3 is already up-to-date. */ update_guest_cr3 = false; - ept_load_pdptrs(vcpu); + vmx_ept_load_pdptrs(vcpu); } else { guest_cr3 = pgd; } diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 26175a4759fa..a2f82127c170 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -356,6 +356,7 @@ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); int vmx_find_msr_index(struct vmx_msrs *m, u32 msr); int vmx_handle_memory_failure(struct kvm_vcpu *vcpu, int r, struct x86_exception *e); +void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu); #define POSTED_INTR_ON 0 #define POSTED_INTR_SN 1 From 0f990222108d214a0924d920e6095b58107d7b59 Mon Sep 17 00:00:00 2001 From: Haiwei Li Date: Tue, 1 Sep 2020 19:41:37 +0800 Subject: [PATCH 388/648] KVM: Check the allocation of pv cpu mask check the allocation of per-cpu __pv_cpu_mask. Initialize ops only when successful. Signed-off-by: Haiwei Li Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 08320b0b2b27..9e7dd3a96873 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -654,7 +654,6 @@ static void __init kvm_guest_init(void) } if (pv_tlb_flush_supported()) { - pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others; pv_ops.mmu.tlb_remove_table = tlb_remove_table; pr_info("KVM setup pv remote TLB flush\n"); } @@ -767,6 +766,14 @@ static __init int activate_jump_labels(void) } arch_initcall(activate_jump_labels); +static void kvm_free_pv_cpu_mask(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) + free_cpumask_var(per_cpu(__pv_cpu_mask, cpu)); +} + static __init int kvm_alloc_cpumask(void) { int cpu; @@ -785,11 +792,20 @@ static __init int kvm_alloc_cpumask(void) if (alloc) for_each_possible_cpu(cpu) { - zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu), - GFP_KERNEL, cpu_to_node(cpu)); + if (!zalloc_cpumask_var_node( + per_cpu_ptr(&__pv_cpu_mask, cpu), + GFP_KERNEL, cpu_to_node(cpu))) { + goto zalloc_cpumask_fail; + } } + apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself; + pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others; return 0; + +zalloc_cpumask_fail: + kvm_free_pv_cpu_mask(); + return -ENOMEM; } arch_initcall(kvm_alloc_cpumask); From f65886606c2d3b562716de030706dfe1bea4ed5e Mon Sep 17 00:00:00 2001 From: Rustam Kovhaev Date: Mon, 7 Sep 2020 11:55:35 -0700 Subject: [PATCH 389/648] KVM: fix memory leak in kvm_io_bus_unregister_dev() when kmalloc() fails in kvm_io_bus_unregister_dev(), before removing the bus, we should iterate over all other devices linked to it and call kvm_iodevice_destructor() for them Fixes: 90db10434b16 ("KVM: kvm_io_bus_unregister_dev() should never fail") Cc: stable@vger.kernel.org Reported-and-tested-by: syzbot+f196caa45793d6374707@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?extid=f196caa45793d6374707 Signed-off-by: Rustam Kovhaev Reviewed-by: Vitaly Kuznetsov Message-Id: <20200907185535.233114-1-rkovhaev@gmail.com> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 67cd0b88a6b6..cf88233b819a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4332,7 +4332,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, struct kvm_io_device *dev) { - int i; + int i, j; struct kvm_io_bus *new_bus, *bus; bus = kvm_get_bus(kvm, bus_idx); @@ -4349,17 +4349,20 @@ void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, new_bus = kmalloc(struct_size(bus, range, bus->dev_count - 1), GFP_KERNEL_ACCOUNT); - if (!new_bus) { + if (new_bus) { + memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); + new_bus->dev_count--; + memcpy(new_bus->range + i, bus->range + i + 1, + (new_bus->dev_count - i) * sizeof(struct kvm_io_range)); + } else { pr_err("kvm: failed to shrink bus, removing it completely\n"); - goto broken; + for (j = 0; j < bus->dev_count; j++) { + if (j == i) + continue; + kvm_iodevice_destructor(bus->range[j].dev); + } } - memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); - new_bus->dev_count--; - memcpy(new_bus->range + i, bus->range + i + 1, - (new_bus->dev_count - i) * sizeof(struct kvm_io_range)); - -broken: rcu_assign_pointer(kvm->buses[bus_idx], new_bus); synchronize_srcu_expedited(&kvm->srcu); kfree(bus); From c6b177a3beb9140dc0ba05b61c5142fcec5f2bf7 Mon Sep 17 00:00:00 2001 From: Chenyi Qiang Date: Fri, 28 Aug 2020 16:56:21 +0800 Subject: [PATCH 390/648] KVM: nVMX: Fix the update value of nested load IA32_PERF_GLOBAL_CTRL control A minor fix for the update of VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL field in exit_ctls_high. Fixes: 03a8871add95 ("KVM: nVMX: Expose load IA32_PERF_GLOBAL_CTRL VM-{Entry,Exit} control") Signed-off-by: Chenyi Qiang Reviewed-by: Xiaoyao Li Message-Id: <20200828085622.8365-5-chenyi.qiang@intel.com> Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index d7482ccf6a8d..1bb6b31eb646 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4676,7 +4676,7 @@ void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu) vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; vmx->nested.msrs.exit_ctls_high &= - ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; + ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; } } From f6f6195b888c28a0b59ceb0562daff92a2be86c3 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 2 Sep 2020 21:54:21 +0800 Subject: [PATCH 391/648] kvm x86/mmu: use KVM_REQ_MMU_SYNC to sync when needed When kvm_mmu_get_page() gets a page with unsynced children, the spt pagetable is unsynchronized with the guest pagetable. But the guest might not issue a "flush" operation on it when the pagetable entry is changed from zero or other cases. The hypervisor has the responsibility to synchronize the pagetables. KVM behaved as above for many years, But commit 8c8560b83390 ("KVM: x86/mmu: Use KVM_REQ_TLB_FLUSH_CURRENT for MMU specific flushes") inadvertently included a line of code to change it without giving any reason in the changelog. It is clear that the commit's intention was to change KVM_REQ_TLB_FLUSH -> KVM_REQ_TLB_FLUSH_CURRENT, so we don't needlessly flush other contexts; however, one of the hunks changed a nearby KVM_REQ_MMU_SYNC instead. This patch changes it back. Link: https://lore.kernel.org/lkml/20200320212833.3507-26-sean.j.christopherson@intel.com/ Cc: Sean Christopherson Cc: Vitaly Kuznetsov Signed-off-by: Lai Jiangshan Message-Id: <20200902135421.31158-1-jiangshanlai@gmail.com> fixes: 8c8560b83390 ("KVM: x86/mmu: Use KVM_REQ_TLB_FLUSH_CURRENT for MMU specific flushes") Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index a5d0207e7189..76c5826e29a2 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2469,7 +2469,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, } if (sp->unsync_children) - kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); + kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); __clear_sp_write_flooding_count(sp); From 15e9e35cd1dec2bc138464de6bf8ef828df19235 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 10 Sep 2020 18:33:51 +0800 Subject: [PATCH 392/648] KVM: MIPS: Change the definition of kvm type MIPS defines two kvm types: #define KVM_VM_MIPS_TE 0 #define KVM_VM_MIPS_VZ 1 In Documentation/virt/kvm/api.rst it is said that "You probably want to use 0 as machine type", which implies that type 0 be the "automatic" or "default" type. And, in user-space libvirt use the null-machine (with type 0) to detect the kvm capability, which returns "KVM not supported" on a VZ platform. I try to fix it in QEMU but it is ugly: https://lists.nongnu.org/archive/html/qemu-devel/2020-08/msg05629.html And Thomas Huth suggests me to change the definition of kvm type: https://lists.nongnu.org/archive/html/qemu-devel/2020-09/msg03281.html So I define like this: #define KVM_VM_MIPS_AUTO 0 #define KVM_VM_MIPS_VZ 1 #define KVM_VM_MIPS_TE 2 Since VZ and TE cannot co-exists, using type 0 on a TE platform will still return success (so old user-space tools have no problems on new kernels); the advantage is that using type 0 on a VZ platform will not return failure. So, the only problem is "new user-space tools use type 2 on old kernels", but if we treat this as a kernel bug, we can backport this patch to old stable kernels. Signed-off-by: Huacai Chen Message-Id: <1599734031-28746-1-git-send-email-chenhc@lemote.com> Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 2 ++ include/uapi/linux/kvm.h | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 7de85d2253ff..0c50ac444222 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -137,6 +137,8 @@ extern void kvm_init_loongson_ipi(struct kvm *kvm); int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { switch (type) { + case KVM_VM_MIPS_AUTO: + break; #ifdef CONFIG_KVM_MIPS_VZ case KVM_VM_MIPS_VZ: #else diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 3d8023474f2a..7d8eced6f459 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -790,9 +790,10 @@ struct kvm_ppc_resize_hpt { #define KVM_VM_PPC_HV 1 #define KVM_VM_PPC_PR 2 -/* on MIPS, 0 forces trap & emulate, 1 forces VZ ASE */ -#define KVM_VM_MIPS_TE 0 +/* on MIPS, 0 indicates auto, 1 forces VZ ASE, 2 forces trap & emulate */ +#define KVM_VM_MIPS_AUTO 0 #define KVM_VM_MIPS_VZ 1 +#define KVM_VM_MIPS_TE 2 #define KVM_S390_SIE_PAGE_OFFSET 1 From 7be74942f184fdfba34ddd19a0d995deb34d4a03 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 25 Aug 2020 12:56:28 -0700 Subject: [PATCH 393/648] KVM: SVM: Periodically schedule when unregistering regions on destroy There may be many encrypted regions that need to be unregistered when a SEV VM is destroyed. This can lead to soft lockups. For example, on a host running 4.15: watchdog: BUG: soft lockup - CPU#206 stuck for 11s! [t_virtual_machi:194348] CPU: 206 PID: 194348 Comm: t_virtual_machi RIP: 0010:free_unref_page_list+0x105/0x170 ... Call Trace: [<0>] release_pages+0x159/0x3d0 [<0>] sev_unpin_memory+0x2c/0x50 [kvm_amd] [<0>] __unregister_enc_region_locked+0x2f/0x70 [kvm_amd] [<0>] svm_vm_destroy+0xa9/0x200 [kvm_amd] [<0>] kvm_arch_destroy_vm+0x47/0x200 [<0>] kvm_put_kvm+0x1a8/0x2f0 [<0>] kvm_vm_release+0x25/0x30 [<0>] do_exit+0x335/0xc10 [<0>] do_group_exit+0x3f/0xa0 [<0>] get_signal+0x1bc/0x670 [<0>] do_signal+0x31/0x130 Although the CLFLUSH is no longer issued on every encrypted region to be unregistered, there are no other changes that can prevent soft lockups for very large SEV VMs in the latest kernel. Periodically schedule if necessary. This still holds kvm->lock across the resched, but since this only happens when the VM is destroyed this is assumed to be acceptable. Signed-off-by: David Rientjes Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 402dc4234e39..7bf7bf734979 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1106,6 +1106,7 @@ void sev_vm_destroy(struct kvm *kvm) list_for_each_safe(pos, q, head) { __unregister_enc_region_locked(kvm, list_entry(pos, struct enc_region, list)); + cond_resched(); } } From d831de177217cd494bfb99f2c849a0d40c2a7890 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 11 Sep 2020 11:31:47 +0200 Subject: [PATCH 394/648] KVM: x86: always allow writing '0' to MSR_KVM_ASYNC_PF_EN Even without in-kernel LAPIC we should allow writing '0' to MSR_KVM_ASYNC_PF_EN as we're not enabling the mechanism. In particular, QEMU with 'kernel-irqchip=off' fails to start a guest with qemu-system-x86_64: error: failed to set MSR 0x4b564d02 to 0x0 Fixes: 9d3c447c72fb2 ("KVM: X86: Fix async pf caused null-ptr-deref") Reported-by: Dr. David Alan Gilbert Signed-off-by: Vitaly Kuznetsov Message-Id: <20200911093147.484565-1-vkuznets@redhat.com> [Actually commit the version proposed by Sean Christopherson. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 432ef34b9ea9..e3de0fe5af37 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2735,7 +2735,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) return 1; if (!lapic_in_kernel(vcpu)) - return 1; + return data ? 1 : 0; vcpu->arch.apf.msr_en_val = data; From 66d18dbda8469a944dfec6c49d26d5946efba218 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Mon, 24 Aug 2020 17:21:22 -0700 Subject: [PATCH 395/648] RISC-V: Take text_mutex in ftrace_init_nop() Without this we get lockdep failures. They're spurious failures as SMP isn't up when ftrace_init_nop() is called. As far as I can tell the easiest fix is to just take the lock, which also seems like the safest fix. Signed-off-by: Palmer Dabbelt Acked-by: Guo Ren Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/ftrace.h | 7 +++++++ arch/riscv/kernel/ftrace.c | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index ace8a6e2d11d..845002cc2e57 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -66,6 +66,13 @@ do { \ * Let auipc+jalr be the basic *mcount unit*, so we make it 8 bytes here. */ #define MCOUNT_INSN_SIZE 8 + +#ifndef __ASSEMBLY__ +struct dyn_ftrace; +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); +#define ftrace_init_nop ftrace_init_nop +#endif + #endif #endif /* _ASM_RISCV_FTRACE_H */ diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 2ff63d0cbb50..99e12faa5498 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -97,6 +97,25 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, return __ftrace_modify_call(rec->ip, addr, false); } + +/* + * This is called early on, and isn't wrapped by + * ftrace_arch_code_modify_{prepare,post_process}() and therefor doesn't hold + * text_mutex, which triggers a lockdep failure. SMP isn't running so we could + * just directly poke the text, but it's simpler to just take the lock + * ourselves. + */ +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) +{ + int out; + + ftrace_arch_code_modify_prepare(); + out = ftrace_make_nop(mod, rec, MCOUNT_ADDR); + ftrace_arch_code_modify_post_process(); + + return out; +} + int ftrace_update_ftrace_func(ftrace_func_t func) { int ret = __ftrace_modify_call((unsigned long)&ftrace_call, From cdb0e6dccc1fd0f7096d333d90cad0ac81c2b342 Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Fri, 11 Sep 2020 13:16:35 +0300 Subject: [PATCH 396/648] enetc: Fix mdio bus removal on PF probe bailout This is the correct resolution for the conflict from merging the "net" tree fix: commit 26cb7085c898 ("enetc: Remove the mdio bus on PF probe bailout") with the "net-next" new work: commit 07095c025ac2 ("net: enetc: Use DT protocol information to set up the ports") that moved mdio bus allocation to an ealier stage of the PF probing routine. Fixes: a57066b1a019 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net") Signed-off-by: Claudiu Manoil Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc_pf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 26d5981b798f..177334f0adb1 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -1053,7 +1053,6 @@ static int enetc_pf_probe(struct pci_dev *pdev, err_reg_netdev: enetc_teardown_serdes(priv); - enetc_mdio_remove(pf); enetc_free_msix(priv); err_alloc_msix: enetc_free_si_resources(priv); @@ -1061,6 +1060,7 @@ static int enetc_pf_probe(struct pci_dev *pdev, si->ndev = NULL; free_netdev(ndev); err_alloc_netdev: + enetc_mdio_remove(pf); enetc_of_put_phy(pf); err_map_pf_space: enetc_pci_remove(pdev); From b5b73b26b3ca34574124ed7ae9c5ba8391a7f176 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 9 Sep 2020 17:03:11 -0700 Subject: [PATCH 397/648] taprio: Fix allowing too small intervals It's possible that the user specifies an interval that couldn't allow any packet to be transmitted. This also avoids the issue of the hrtimer handler starving the other threads because it's running too often. The solution is to reject interval sizes that according to the current link speed wouldn't allow any packet to be transmitted. Reported-by: syzbot+8267241609ae8c23b248@syzkaller.appspotmail.com Fixes: 5a781ccbd19e ("tc: Add support for configuring the taprio scheduler") Signed-off-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- net/sched/sch_taprio.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index fe53c1e38c7d..b0ad7687ee2c 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -777,9 +777,11 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 }, }; -static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, +static int fill_sched_entry(struct taprio_sched *q, struct nlattr **tb, + struct sched_entry *entry, struct netlink_ext_ack *extack) { + int min_duration = length_to_duration(q, ETH_ZLEN); u32 interval = 0; if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD]) @@ -794,7 +796,10 @@ static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, interval = nla_get_u32( tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]); - if (interval == 0) { + /* The interval should allow at least the minimum ethernet + * frame to go out. + */ + if (interval < min_duration) { NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry"); return -EINVAL; } @@ -804,8 +809,9 @@ static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, return 0; } -static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry, - int index, struct netlink_ext_ack *extack) +static int parse_sched_entry(struct taprio_sched *q, struct nlattr *n, + struct sched_entry *entry, int index, + struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; int err; @@ -819,10 +825,10 @@ static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry, entry->index = index; - return fill_sched_entry(tb, entry, extack); + return fill_sched_entry(q, tb, entry, extack); } -static int parse_sched_list(struct nlattr *list, +static int parse_sched_list(struct taprio_sched *q, struct nlattr *list, struct sched_gate_list *sched, struct netlink_ext_ack *extack) { @@ -847,7 +853,7 @@ static int parse_sched_list(struct nlattr *list, return -ENOMEM; } - err = parse_sched_entry(n, entry, i, extack); + err = parse_sched_entry(q, n, entry, i, extack); if (err < 0) { kfree(entry); return err; @@ -862,7 +868,7 @@ static int parse_sched_list(struct nlattr *list, return i; } -static int parse_taprio_schedule(struct nlattr **tb, +static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb, struct sched_gate_list *new, struct netlink_ext_ack *extack) { @@ -883,8 +889,8 @@ static int parse_taprio_schedule(struct nlattr **tb, new->cycle_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]); if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]) - err = parse_sched_list( - tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], new, extack); + err = parse_sched_list(q, tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], + new, extack); if (err < 0) return err; @@ -1473,7 +1479,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, goto free_sched; } - err = parse_taprio_schedule(tb, new_admin, extack); + err = parse_taprio_schedule(q, tb, new_admin, extack); if (err < 0) goto free_sched; From a1b80e0143a1b878f8e21d82fd55f3f46f0014be Mon Sep 17 00:00:00 2001 From: Luo bin Date: Thu, 10 Sep 2020 22:04:40 +0800 Subject: [PATCH 398/648] hinic: fix rewaking txq after netif_tx_disable When calling hinic_close in hinic_set_channels, all queues are stopped after netif_tx_disable, but some queue may be rewaken in free_tx_poll by mistake while drv is handling tx irq. If one queue is rewaken core may call hinic_xmit_frame to send pkt after netif_tx_disable within a short time which may results in accessing memory that has been already freed in hinic_close. So we call napi_disable before netif_tx_disable in hinic_close to fix this bug. Fixes: 2eed5a8b614b ("hinic: add set_channels ethtool_ops support") Signed-off-by: Luo bin Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- .../net/ethernet/huawei/hinic/hinic_main.c | 24 +++++++++++++++++++ drivers/net/ethernet/huawei/hinic/hinic_tx.c | 20 ++++------------ 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index 501056fd32ee..28581bd8ce07 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -174,6 +174,24 @@ static int create_txqs(struct hinic_dev *nic_dev) return err; } +static void enable_txqs_napi(struct hinic_dev *nic_dev) +{ + int num_txqs = hinic_hwdev_num_qps(nic_dev->hwdev); + int i; + + for (i = 0; i < num_txqs; i++) + napi_enable(&nic_dev->txqs[i].napi); +} + +static void disable_txqs_napi(struct hinic_dev *nic_dev) +{ + int num_txqs = hinic_hwdev_num_qps(nic_dev->hwdev); + int i; + + for (i = 0; i < num_txqs; i++) + napi_disable(&nic_dev->txqs[i].napi); +} + /** * free_txqs - Free the Logical Tx Queues of specific NIC device * @nic_dev: the specific NIC device @@ -400,6 +418,8 @@ int hinic_open(struct net_device *netdev) goto err_create_txqs; } + enable_txqs_napi(nic_dev); + err = create_rxqs(nic_dev); if (err) { netif_err(nic_dev, drv, netdev, @@ -484,6 +504,7 @@ int hinic_open(struct net_device *netdev) } err_create_rxqs: + disable_txqs_napi(nic_dev); free_txqs(nic_dev); err_create_txqs: @@ -497,6 +518,9 @@ int hinic_close(struct net_device *netdev) struct hinic_dev *nic_dev = netdev_priv(netdev); unsigned int flags; + /* Disable txq napi firstly to aviod rewaking txq in free_tx_poll */ + disable_txqs_napi(nic_dev); + down(&nic_dev->mgmt_lock); flags = nic_dev->flags; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c index a97498ee6914..2b418b568767 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c @@ -745,18 +745,6 @@ static int free_tx_poll(struct napi_struct *napi, int budget) return budget; } -static void tx_napi_add(struct hinic_txq *txq, int weight) -{ - netif_napi_add(txq->netdev, &txq->napi, free_tx_poll, weight); - napi_enable(&txq->napi); -} - -static void tx_napi_del(struct hinic_txq *txq) -{ - napi_disable(&txq->napi); - netif_napi_del(&txq->napi); -} - static irqreturn_t tx_irq(int irq, void *data) { struct hinic_txq *txq = data; @@ -790,7 +778,7 @@ static int tx_request_irq(struct hinic_txq *txq) qp = container_of(sq, struct hinic_qp, sq); - tx_napi_add(txq, nic_dev->tx_weight); + netif_napi_add(txq->netdev, &txq->napi, free_tx_poll, nic_dev->tx_weight); hinic_hwdev_msix_set(nic_dev->hwdev, sq->msix_entry, TX_IRQ_NO_PENDING, TX_IRQ_NO_COALESC, @@ -807,14 +795,14 @@ static int tx_request_irq(struct hinic_txq *txq) if (err) { netif_err(nic_dev, drv, txq->netdev, "Failed to set TX interrupt coalescing attribute\n"); - tx_napi_del(txq); + netif_napi_del(&txq->napi); return err; } err = request_irq(sq->irq, tx_irq, 0, txq->irq_name, txq); if (err) { dev_err(&pdev->dev, "Failed to request Tx irq\n"); - tx_napi_del(txq); + netif_napi_del(&txq->napi); return err; } @@ -826,7 +814,7 @@ static void tx_free_irq(struct hinic_txq *txq) struct hinic_sq *sq = txq->sq; free_irq(sq->irq, txq); - tx_napi_del(txq); + netif_napi_del(&txq->napi); } /** From c047dc1d260f2593035d63747d616c3512f9d6b6 Mon Sep 17 00:00:00 2001 From: Vadym Kochan Date: Thu, 10 Sep 2020 18:41:52 +0300 Subject: [PATCH 399/648] net: ipa: fix u32_replace_bits by u32p_xxx version Looks like u32p_replace_bits() should be used instead of u32_replace_bits() which does not modifies the value but returns the modified version. Fixes: 2b9feef2b6c2 ("soc: qcom: ipa: filter and routing tables") Signed-off-by: Vadym Kochan Reviewed-by: Alex Elder Signed-off-by: David S. Miller --- drivers/net/ipa/ipa_table.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c index 2098ca2f2c90..b3790aa952a1 100644 --- a/drivers/net/ipa/ipa_table.c +++ b/drivers/net/ipa/ipa_table.c @@ -521,7 +521,7 @@ static void ipa_filter_tuple_zero(struct ipa_endpoint *endpoint) val = ioread32(endpoint->ipa->reg_virt + offset); /* Zero all filter-related fields, preserving the rest */ - u32_replace_bits(val, 0, IPA_REG_ENDP_FILTER_HASH_MSK_ALL); + u32p_replace_bits(&val, 0, IPA_REG_ENDP_FILTER_HASH_MSK_ALL); iowrite32(val, endpoint->ipa->reg_virt + offset); } @@ -573,7 +573,7 @@ static void ipa_route_tuple_zero(struct ipa *ipa, u32 route_id) val = ioread32(ipa->reg_virt + offset); /* Zero all route-related fields, preserving the rest */ - u32_replace_bits(val, 0, IPA_REG_ENDP_ROUTER_HASH_MSK_ALL); + u32p_replace_bits(&val, 0, IPA_REG_ENDP_ROUTER_HASH_MSK_ALL); iowrite32(val, ipa->reg_virt + offset); } From 5760d9acbe9514eec68eb70821d6fa5764f57042 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Thu, 10 Sep 2020 23:52:29 +0300 Subject: [PATCH 400/648] net: ethernet: ti: cpsw_new: fix suspend/resume Add missed suspend/resume callbacks to properly restore networking after suspend/resume cycle. Fixes: ed3525eda4c4 ("net: ethernet: ti: introduce cpsw switchdev based driver part 1 - dual-emac") Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw_new.c | 53 ++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 8ed78577cded..15672d0a4de6 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -2070,9 +2071,61 @@ static int cpsw_remove(struct platform_device *pdev) return 0; } +static int __maybe_unused cpsw_suspend(struct device *dev) +{ + struct cpsw_common *cpsw = dev_get_drvdata(dev); + int i; + + rtnl_lock(); + + for (i = 0; i < cpsw->data.slaves; i++) { + struct net_device *ndev = cpsw->slaves[i].ndev; + + if (!(ndev && netif_running(ndev))) + continue; + + cpsw_ndo_stop(ndev); + } + + rtnl_unlock(); + + /* Select sleep pin state */ + pinctrl_pm_select_sleep_state(dev); + + return 0; +} + +static int __maybe_unused cpsw_resume(struct device *dev) +{ + struct cpsw_common *cpsw = dev_get_drvdata(dev); + int i; + + /* Select default pin state */ + pinctrl_pm_select_default_state(dev); + + /* shut up ASSERT_RTNL() warning in netif_set_real_num_tx/rx_queues */ + rtnl_lock(); + + for (i = 0; i < cpsw->data.slaves; i++) { + struct net_device *ndev = cpsw->slaves[i].ndev; + + if (!(ndev && netif_running(ndev))) + continue; + + cpsw_ndo_open(ndev); + } + + rtnl_unlock(); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(cpsw_pm_ops, cpsw_suspend, cpsw_resume); + static struct platform_driver cpsw_driver = { .driver = { .name = "cpsw-switch", + .pm = &cpsw_pm_ops, .of_match_table = cpsw_of_mtable, }, .probe = cpsw_probe, From e42c68281b444f9a20d72a062f8c6fd0d31e4de8 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sat, 12 Sep 2020 02:16:39 -0400 Subject: [PATCH 401/648] KVM: SVM: avoid emulation with stale next_rip svm->next_rip is reset in svm_vcpu_run() only after calling svm_exit_handlers_fastpath(), which will cause SVM's skip_emulated_instruction() to write a stale RIP. We can move svm_exit_handlers_fastpath towards the end of svm_vcpu_run(). To align VMX with SVM, keep svm_complete_interrupts() close as well. Suggested-by: Sean Christopherson Cc: Paul K. Reviewed-by: Vitaly Kuznetsov Signed-off-by: Wanpeng Li [Also move vmcb_mark_all_clean before any possible write to the VMCB. - Paolo] --- arch/x86/kvm/svm/svm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 03dd7bac8034..8ba4a32843fa 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2938,8 +2938,6 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) if (npt_enabled) vcpu->arch.cr3 = svm->vmcb->save.cr3; - svm_complete_interrupts(svm); - if (is_guest_mode(vcpu)) { int vmexit; @@ -3504,7 +3502,6 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) stgi(); /* Any pending NMI will happen here */ - exit_fastpath = svm_exit_handlers_fastpath(vcpu); if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) kvm_after_interrupt(&svm->vcpu); @@ -3518,6 +3515,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) } svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; + vmcb_mark_all_clean(svm->vmcb); /* if exit due to PF check for async PF */ if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) @@ -3537,7 +3535,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) SVM_EXIT_EXCP_BASE + MC_VECTOR)) svm_handle_mce(svm); - vmcb_mark_all_clean(svm->vmcb); + svm_complete_interrupts(svm); + exit_fastpath = svm_exit_handlers_fastpath(vcpu); return exit_fastpath; } From 99b82a1437cb31340dbb2c437a2923b9814a7b15 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 19 Aug 2020 16:55:27 +0800 Subject: [PATCH 402/648] KVM: VMX: Don't freeze guest when event delivery causes an APIC-access exit According to SDM 27.2.4, Event delivery causes an APIC-access VM exit. Don't report internal error and freeze guest when event delivery causes an APIC-access exit, it is handleable and the event will be re-injected during the next vmentry. Signed-off-by: Wanpeng Li Message-Id: <1597827327-25055-2-git-send-email-wanpengli@tencent.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 19a599bebd5c..75cd720c9e8c 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6054,6 +6054,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) (exit_reason != EXIT_REASON_EXCEPTION_NMI && exit_reason != EXIT_REASON_EPT_VIOLATION && exit_reason != EXIT_REASON_PML_FULL && + exit_reason != EXIT_REASON_APIC_ACCESS && exit_reason != EXIT_REASON_TASK_SWITCH)) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV; From 244081f9073fe934adbcb2db6496b91b8fc51655 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 8 Sep 2020 15:53:49 +0200 Subject: [PATCH 403/648] x86/kvm: properly use DEFINE_IDTENTRY_SYSVEC() macro DEFINE_IDTENTRY_SYSVEC() already contains irqentry_enter()/ irqentry_exit(). Signed-off-by: Vitaly Kuznetsov Message-Id: <20200908135350.355053-2-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 9e7dd3a96873..02d15485ff1d 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -270,9 +270,6 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt) { struct pt_regs *old_regs = set_irq_regs(regs); u32 token; - irqentry_state_t state; - - state = irqentry_enter(regs); inc_irq_stat(irq_hv_callback_count); @@ -283,7 +280,6 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt) wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1); } - irqentry_exit(regs, state); set_irq_regs(old_regs); } From cc17b22559d9b9c8b7540810df172f3d7af901ce Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 8 Sep 2020 15:53:50 +0200 Subject: [PATCH 404/648] x86/kvm: don't forget to ACK async PF IRQ Merge commit 26d05b368a5c0 ("Merge branch 'kvm-async-pf-int' into HEAD") tried to adapt the new interrupt based async PF mechanism to the newly introduced IDTENTRY magic but unfortunately it missed the fact that DEFINE_IDTENTRY_SYSVEC() doesn't call ack_APIC_irq() on its own and all DEFINE_IDTENTRY_SYSVEC() users have to call it manually. As the result all multi-CPU KVM guest hang on boot when KVM_FEATURE_ASYNC_PF_INT is present. The breakage went unnoticed because no KVM userspace (e.g. QEMU) currently set it (and thus async PF mechanism is currently disabled) but we're about to change that. Fixes: 26d05b368a5c0 ("Merge branch 'kvm-async-pf-int' into HEAD") Signed-off-by: Vitaly Kuznetsov Message-Id: <20200908135350.355053-3-vkuznets@redhat.com> Tested-by: Ingo Molnar Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 02d15485ff1d..1b51b727b140 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -271,6 +271,8 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt) struct pt_regs *old_regs = set_irq_regs(regs); u32 token; + ack_APIC_irq(); + inc_irq_stat(irq_hv_callback_count); if (__this_cpu_read(apf_reason.enabled)) { From d877322bc1adcab9850732275670409e8bcca4c4 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Thu, 3 Sep 2020 05:54:40 +0900 Subject: [PATCH 405/648] openrisc: Fix issue with get_user for 64-bit values A build failure was raised by kbuild with the following error. drivers/android/binder.c: Assembler messages: drivers/android/binder.c:3861: Error: unrecognized keyword/register name `l.lwz ?ap,4(r24)' drivers/android/binder.c:3866: Error: unrecognized keyword/register name `l.addi ?ap,r0,0' The issue is with 64-bit get_user() calls on openrisc. I traced this to a problem where in the internally in the get_user macros there is a cast to long __gu_val this causes GCC to think the get_user call is 32-bit. This binder code is really long and GCC allocates register r30, which triggers the issue. The 64-bit get_user asm tries to get the 64-bit pair register, which for r30 overflows the general register names and returns the dummy register ?ap. The fix here is to move the temporary variables into the asm macros. We use a 32-bit __gu_tmp for 32-bit and smaller macro and a 64-bit tmp in the 64-bit macro. The cast in the 64-bit macro has a trick of casting through __typeof__((x)-(x)) which avoids the below warning. This was barrowed from riscv. arch/openrisc/include/asm/uaccess.h:240:8: warning: cast to pointer from integer of different size I tested this in a small unit test to check reading between 64-bit and 32-bit pointers to 64-bit and 32-bit values in all combinations. Also I ran make C=1 to confirm no new sparse warnings came up. It all looks clean to me. Link: https://lore.kernel.org/lkml/202008200453.ohnhqkjQ%25lkp@intel.com/ Signed-off-by: Stafford Horne Reviewed-by: Luc Van Oostenryck --- arch/openrisc/include/asm/uaccess.h | 33 ++++++++++++++++++----------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index f0390211236b..120f5005461b 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -165,19 +165,19 @@ struct __large_struct { #define __get_user_nocheck(x, ptr, size) \ ({ \ - long __gu_err, __gu_val; \ - __get_user_size(__gu_val, (ptr), (size), __gu_err); \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ + long __gu_err; \ + __get_user_size((x), (ptr), (size), __gu_err); \ __gu_err; \ }) #define __get_user_check(x, ptr, size) \ ({ \ - long __gu_err = -EFAULT, __gu_val = 0; \ + long __gu_err = -EFAULT; \ const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ - if (access_ok(__gu_addr, size)) \ - __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ + if (access_ok(__gu_addr, size)) \ + __get_user_size((x), __gu_addr, (size), __gu_err); \ + else \ + (x) = (__typeof__(*(ptr))) 0; \ __gu_err; \ }) @@ -191,11 +191,13 @@ do { \ case 2: __get_user_asm(x, ptr, retval, "l.lhz"); break; \ case 4: __get_user_asm(x, ptr, retval, "l.lwz"); break; \ case 8: __get_user_asm2(x, ptr, retval); break; \ - default: (x) = __get_user_bad(); \ + default: (x) = (__typeof__(*(ptr)))__get_user_bad(); \ } \ } while (0) #define __get_user_asm(x, addr, err, op) \ +{ \ + unsigned long __gu_tmp; \ __asm__ __volatile__( \ "1: "op" %1,0(%2)\n" \ "2:\n" \ @@ -209,10 +211,14 @@ do { \ " .align 2\n" \ " .long 1b,3b\n" \ ".previous" \ - : "=r"(err), "=r"(x) \ - : "r"(addr), "i"(-EFAULT), "0"(err)) + : "=r"(err), "=r"(__gu_tmp) \ + : "r"(addr), "i"(-EFAULT), "0"(err)); \ + (x) = (__typeof__(*(addr)))__gu_tmp; \ +} #define __get_user_asm2(x, addr, err) \ +{ \ + unsigned long long __gu_tmp; \ __asm__ __volatile__( \ "1: l.lwz %1,0(%2)\n" \ "2: l.lwz %H1,4(%2)\n" \ @@ -229,8 +235,11 @@ do { \ " .long 1b,4b\n" \ " .long 2b,4b\n" \ ".previous" \ - : "=r"(err), "=&r"(x) \ - : "r"(addr), "i"(-EFAULT), "0"(err)) + : "=r"(err), "=&r"(__gu_tmp) \ + : "r"(addr), "i"(-EFAULT), "0"(err)); \ + (x) = (__typeof__(*(addr)))( \ + (__typeof__((x)-(x)))__gu_tmp); \ +} /* more complex routines */ From 9883764ad0ce037c554ac0ef302dcf671f8d1ccb Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 27 Aug 2020 19:27:18 +0300 Subject: [PATCH 406/648] SVM: nSVM: correctly restore GIF on vmexit from nesting after migration Currently code in svm_set_nested_state copies the current vmcb control area to L1 control area (hsave->control), under assumption that it mostly reflects the defaults that kvm choose, and later qemu overrides these defaults with L2 state using standard KVM interfaces, like KVM_SET_REGS. However nested GIF (which is AMD specific thing) is by default is true, and it is copied to hsave area as such. This alone is not a big deal since on VMexit, GIF is always set to false, regardless of what it was on VM entry. However in nested_svm_vmexit we were first were setting GIF to false, but then we overwrite the control fields with value from the hsave area. (including the nested GIF field itself if GIF virtualization is enabled). Now on normal vm entry this is not a problem, since GIF is usually false prior to normal vm entry, and this is the value that copied to hsave, and then restored, but this is not always the case when the nested state is loaded as explained above. To fix this issue, move svm_set_gif after we restore the L1 control state in nested_svm_vmexit, so that even with wrong GIF in the saved L1 control area, we still clear GIF as the spec says. Signed-off-by: Maxim Levitsky Message-Id: <20200827162720.278690-2-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index fb68467e6049..95fdf068fe4c 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -586,7 +586,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm) svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE; /* Give the current vmcb to the guest */ - svm_set_gif(svm, false); nested_vmcb->save.es = vmcb->save.es; nested_vmcb->save.cs = vmcb->save.cs; @@ -632,6 +631,9 @@ int nested_svm_vmexit(struct vcpu_svm *svm) /* Restore the original control entries */ copy_vmcb_control_area(&vmcb->control, &hsave->control); + /* On vmexit the GIF is set to false */ + svm_set_gif(svm, false); + svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset = svm->vcpu.arch.l1_tsc_offset; From 772b81bb2f9b191a046ba7bba1f232eb7b109b84 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 27 Aug 2020 19:27:19 +0300 Subject: [PATCH 407/648] SVM: nSVM: setup nested msr permission bitmap on nested state load This code was missing and was forcing the L2 run with L1's msr permission bitmap Signed-off-by: Maxim Levitsky Message-Id: <20200827162720.278690-3-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 95fdf068fe4c..e90bc436f584 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1134,6 +1134,9 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, load_nested_vmcb_control(svm, &ctl); nested_prepare_vmcb_control(svm); + if (!nested_svm_vmrun_msrpm(svm)) + return -EINVAL; + out_set_gif: svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET)); return 0; From 3ebb5d2617fbf45567975f878232178c5b292d58 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 27 Aug 2020 19:27:20 +0300 Subject: [PATCH 408/648] KVM: nSVM: more strict SMM checks when returning to nested guest * check that guest is 64 bit guest, otherwise the SVM related fields in the smm state area are not defined * If the SMM area indicates that SMM interrupted a running guest, check that EFER.SVME which is also saved in this area is set, otherwise the guest might have tampered with SMM save area, and so indicate emulation failure which should triple fault the guest. * Check that that guest CPUID supports SVM (due to the same issue as above) Signed-off-by: Maxim Levitsky Message-Id: <20200827162720.278690-4-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 8ba4a32843fa..5764b87379cf 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3899,21 +3899,28 @@ static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) { struct vcpu_svm *svm = to_svm(vcpu); - struct vmcb *nested_vmcb; struct kvm_host_map map; - u64 guest; - u64 vmcb; int ret = 0; - guest = GET_SMSTATE(u64, smstate, 0x7ed8); - vmcb = GET_SMSTATE(u64, smstate, 0x7ee0); + if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) { + u64 saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0); + u64 guest = GET_SMSTATE(u64, smstate, 0x7ed8); + u64 vmcb = GET_SMSTATE(u64, smstate, 0x7ee0); - if (guest) { - if (kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb), &map) == -EINVAL) - return 1; - nested_vmcb = map.hva; - ret = enter_svm_guest_mode(svm, vmcb, nested_vmcb); - kvm_vcpu_unmap(&svm->vcpu, &map, true); + if (guest) { + if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM)) + return 1; + + if (!(saved_efer & EFER_SVME)) + return 1; + + if (kvm_vcpu_map(&svm->vcpu, + gpa_to_gfn(vmcb), &map) == -EINVAL) + return 1; + + ret = enter_svm_guest_mode(svm, vmcb, map.hva); + kvm_vcpu_unmap(&svm->vcpu, &map, true); + } } return ret; From 37f66bbef0920429b8cb5eddba849ec4308a9f8e Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 27 Aug 2020 20:11:44 +0300 Subject: [PATCH 409/648] KVM: emulator: more strict rsm checks. Don't ignore return values in rsm_load_state_64/32 to avoid loading invalid state from SMM state area if it was tampered with by the guest. This is primarly intended to avoid letting guest set bits in EFER (like EFER.SVME when nesting is disabled) by manipulating SMM save area. Signed-off-by: Maxim Levitsky Message-Id: <20200827171145.374620-8-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d0e2825ae617..1d450d7710d6 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2505,9 +2505,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4); val = GET_SMSTATE(u32, smstate, 0x7fcc); - ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1); + + if (ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1)) + return X86EMUL_UNHANDLEABLE; + val = GET_SMSTATE(u32, smstate, 0x7fc8); - ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1); + + if (ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1)) + return X86EMUL_UNHANDLEABLE; selector = GET_SMSTATE(u32, smstate, 0x7fc4); set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64)); @@ -2560,16 +2565,23 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED; val = GET_SMSTATE(u32, smstate, 0x7f68); - ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1); + + if (ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1)) + return X86EMUL_UNHANDLEABLE; + val = GET_SMSTATE(u32, smstate, 0x7f60); - ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1); + + if (ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1)) + return X86EMUL_UNHANDLEABLE; cr0 = GET_SMSTATE(u64, smstate, 0x7f58); cr3 = GET_SMSTATE(u64, smstate, 0x7f50); cr4 = GET_SMSTATE(u64, smstate, 0x7f48); ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00)); val = GET_SMSTATE(u64, smstate, 0x7ed0); - ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA); + + if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA)) + return X86EMUL_UNHANDLEABLE; selector = GET_SMSTATE(u32, smstate, 0x7e90); rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8); From ed888cb0d1ebce69f12794e89fbd5e2c86d40b8d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 11 Sep 2020 19:16:11 +0100 Subject: [PATCH 410/648] arm64: Allow CPUs unffected by ARM erratum 1418040 to come in late Now that we allow CPUs affected by erratum 1418040 to come in late, this prevents their unaffected sibblings from coming in late (or coming back after a suspend or hotplug-off, which amounts to the same thing). To allow this, we need to add ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU, which amounts to set .type to ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE. Fixes: bf87bb0881d0 ("arm64: Allow booting of late CPUs affected by erratum 1418040") Reported-by: Matthias Kaehlcke Signed-off-by: Marc Zyngier Tested-by: Sai Prakash Ranjan Tested-by: Matthias Kaehlcke Acked-by: Will Deacon Link: https://lore.kernel.org/r/20200911181611.2073183-1-maz@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpu_errata.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index c332d49780dc..560ba69e13c1 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -910,8 +910,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .desc = "ARM erratum 1418040", .capability = ARM64_WORKAROUND_1418040, ERRATA_MIDR_RANGE_LIST(erratum_1418040_list), - .type = (ARM64_CPUCAP_SCOPE_LOCAL_CPU | - ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU), + /* + * We need to allow affected CPUs to come in late, but + * also need the non-affected CPUs to be able to come + * in at any point in time. Wonderful. + */ + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, }, #endif #ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT From 202700e18acbed55970dbb9d4d518ac59b1172c8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 12 Sep 2020 13:18:10 -0600 Subject: [PATCH 411/648] io_uring: grab any needed state during defer prep Always grab work environment for deferred links. The assumption that we will be running it always from the task in question is false, as exiting tasks may mean that we're deferring this one to a thread helper. And at that point it's too late to grab the work environment. Fixes: debb85f496c9 ("io_uring: factor out grab_env() from defer_prep()") Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 175fb647d099..be9d628e7854 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5449,6 +5449,8 @@ static int io_req_defer_prep(struct io_kiocb *req, if (unlikely(ret)) return ret; + io_prep_async_work(req); + switch (req->opcode) { case IORING_OP_NOP: break; From 856deb866d16e29bd65952e0289066f6078af773 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 13 Sep 2020 16:06:00 -0700 Subject: [PATCH 412/648] Linux 5.9-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2b1061ec98ed..19d012810fbb 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Kleptomaniac Octopus # *DOCUMENTATION* From ca6345de57a46ba1bd35bd15b0ceb42e05b3d71f Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 2 Sep 2020 13:40:17 -0400 Subject: [PATCH 413/648] sh: remove spurious circular inclusion from asm/smp.h Commit 0cd39f4600ed4de8 added inclusion of smp.h to lockdep.h, creating a circular include dependency where arch/sh's asm/smp.h in turn includes spinlock.h which depends on lockdep.h. Since our asm/smp.h does not actually need spinlock.h, just remove it. Fixes: 0cd39f4600ed4de8 ("locking/seqlock, headers: Untangle the spaghetti monster") Tested-by: Rob Landley Signed-off-by: Rich Felker --- arch/sh/include/asm/smp.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sh/include/asm/smp.h b/arch/sh/include/asm/smp.h index 1a0d7cf71c10..100bf241340b 100644 --- a/arch/sh/include/asm/smp.h +++ b/arch/sh/include/asm/smp.h @@ -8,7 +8,6 @@ #ifdef CONFIG_SMP -#include #include #include #include From b0cfc315ff38c423a5ce9ce159bd5baa4135e688 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Fri, 28 Aug 2020 21:01:41 -0400 Subject: [PATCH 414/648] sh: fix syscall tracing Addition of SECCOMP_FILTER exposed a longstanding bug in do_syscall_trace_enter, whereby r0 (the 5th argument register) was mistakenly used where r3 (syscall_nr) was intended. By overwriting r0 rather than r3 with -1 when attempting to block a syscall, the existing code would instead have caused the syscall to execute with an argument clobbered. Commit 0bb605c2c7f2b4b3 then introduced skipping of the syscall when do_syscall_trace_enter returns -1, so that the return value set by seccomp filters would not be clobbered by -ENOSYS. This eliminated the clobbering of the 5th argument register, but instead caused syscalls made with a 5th argument of -1 to be misinterpreted as a request by do_syscall_trace_enter to suppress the syscall. Fixes: 0bb605c2c7f2b4b3 ("sh: Add SECCOMP_FILTER") Fixes: ab99c733ae73cce3 ("sh: Make syscall tracer use tracehook notifiers, add TIF_NOTIFY_RESUME.") Tested-by: John Paul Adrian Glaubitz Signed-off-by: Rich Felker --- arch/sh/kernel/entry-common.S | 1 - arch/sh/kernel/ptrace_32.c | 15 +++++---------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S index ad963104d22d..91ab2607a1ff 100644 --- a/arch/sh/kernel/entry-common.S +++ b/arch/sh/kernel/entry-common.S @@ -370,7 +370,6 @@ syscall_trace_entry: nop cmp/eq #-1, r0 bt syscall_exit - mov.l r0, @(OFF_R0,r15) ! Save return value ! Reload R0-R4 from kernel stack, where the ! parent may have modified them using ! ptrace(POKEUSR). (Note that R0-R2 are diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c index b05bf92f9c32..5281685f6ad1 100644 --- a/arch/sh/kernel/ptrace_32.c +++ b/arch/sh/kernel/ptrace_32.c @@ -455,16 +455,11 @@ long arch_ptrace(struct task_struct *child, long request, asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) { - long ret = 0; - if (test_thread_flag(TIF_SYSCALL_TRACE) && - tracehook_report_syscall_entry(regs)) - /* - * Tracing decided this syscall should not happen. - * We'll return a bogus call number to get an ENOSYS - * error, but leave the original number in regs->regs[0]. - */ - ret = -1L; + tracehook_report_syscall_entry(regs)) { + regs->regs[0] = -ENOSYS; + return -1; + } if (secure_computing() == -1) return -1; @@ -475,7 +470,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) audit_syscall_entry(regs->regs[3], regs->regs[4], regs->regs[5], regs->regs[6], regs->regs[7]); - return ret ?: regs->regs[0]; + return 0; } asmlinkage void do_syscall_trace_leave(struct pt_regs *regs) From 1a1d6db23ddacde0b15ea589e9103373e05af8de Mon Sep 17 00:00:00 2001 From: Eddie James Date: Wed, 9 Sep 2020 15:30:57 -0500 Subject: [PATCH 415/648] i2c: aspeed: Mask IRQ status to relevant bits Mask the IRQ status to only the bits that the driver checks. This prevents excessive driver warnings when operating in slave mode when additional bits are set that the driver doesn't handle. Signed-off-by: Eddie James Reviewed-by: Tao Ren Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-aspeed.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c index 31268074c422..724bf30600d6 100644 --- a/drivers/i2c/busses/i2c-aspeed.c +++ b/drivers/i2c/busses/i2c-aspeed.c @@ -69,6 +69,7 @@ * These share bit definitions, so use the same values for the enable & * status bits. */ +#define ASPEED_I2CD_INTR_RECV_MASK 0xf000ffff #define ASPEED_I2CD_INTR_SDA_DL_TIMEOUT BIT(14) #define ASPEED_I2CD_INTR_BUS_RECOVER_DONE BIT(13) #define ASPEED_I2CD_INTR_SLAVE_MATCH BIT(7) @@ -604,6 +605,7 @@ static irqreturn_t aspeed_i2c_bus_irq(int irq, void *dev_id) writel(irq_received & ~ASPEED_I2CD_INTR_RX_DONE, bus->base + ASPEED_I2C_INTR_STS_REG); readl(bus->base + ASPEED_I2C_INTR_STS_REG); + irq_received &= ASPEED_I2CD_INTR_RECV_MASK; irq_remaining = irq_received; #if IS_ENABLED(CONFIG_I2C_SLAVE) From 66d402e2e9455cf0213c42b97f22a0493372d7cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Volker=20R=C3=BCmelin?= Date: Tue, 1 Sep 2020 15:22:21 +0200 Subject: [PATCH 416/648] i2c: i801: Fix resume bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On suspend the original host configuration gets restored. The resume routine has to undo this, otherwise the SMBus master may be left in disabled state or in i2c mode. [JD: Rebased on v5.8, moved the write into i801_setup_hstcfg.] Signed-off-by: Volker Rümelin Signed-off-by: Jean Delvare Signed-off-by: Wolfram Sang Cc: stable@vger.kernel.org --- drivers/i2c/busses/i2c-i801.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index e32ef3f01fe8..22597fa415f1 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1709,6 +1709,16 @@ static inline int i801_acpi_probe(struct i801_priv *priv) { return 0; } static inline void i801_acpi_remove(struct i801_priv *priv) { } #endif +static unsigned char i801_setup_hstcfg(struct i801_priv *priv) +{ + unsigned char hstcfg = priv->original_hstcfg; + + hstcfg &= ~SMBHSTCFG_I2C_EN; /* SMBus timing */ + hstcfg |= SMBHSTCFG_HST_EN; + pci_write_config_byte(priv->pci_dev, SMBHSTCFG, hstcfg); + return hstcfg; +} + static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) { unsigned char temp; @@ -1830,14 +1840,10 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) return err; } - pci_read_config_byte(priv->pci_dev, SMBHSTCFG, &temp); - priv->original_hstcfg = temp; - temp &= ~SMBHSTCFG_I2C_EN; /* SMBus timing */ - if (!(temp & SMBHSTCFG_HST_EN)) { + pci_read_config_byte(priv->pci_dev, SMBHSTCFG, &priv->original_hstcfg); + temp = i801_setup_hstcfg(priv); + if (!(priv->original_hstcfg & SMBHSTCFG_HST_EN)) dev_info(&dev->dev, "Enabling SMBus device\n"); - temp |= SMBHSTCFG_HST_EN; - } - pci_write_config_byte(priv->pci_dev, SMBHSTCFG, temp); if (temp & SMBHSTCFG_SMB_SMI_EN) { dev_dbg(&dev->dev, "SMBus using interrupt SMI#\n"); @@ -1963,6 +1969,7 @@ static int i801_resume(struct device *dev) { struct i801_priv *priv = dev_get_drvdata(dev); + i801_setup_hstcfg(priv); i801_enable_host_notify(&priv->adapter); return 0; From 811a6e18a8633ef66c6c9bad09f359dd9daa9ec1 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 1 Sep 2020 15:28:37 +0200 Subject: [PATCH 417/648] i2c: i801: Simplify the suspend callback We don't actually need to derive the PCI device from the device structure, as we already have a pointer to it in our private data structure. Signed-off-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-i801.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 22597fa415f1..ebb4c0b03057 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1958,10 +1958,9 @@ static void i801_shutdown(struct pci_dev *dev) #ifdef CONFIG_PM_SLEEP static int i801_suspend(struct device *dev) { - struct pci_dev *pci_dev = to_pci_dev(dev); - struct i801_priv *priv = pci_get_drvdata(pci_dev); + struct i801_priv *priv = dev_get_drvdata(dev); - pci_write_config_byte(pci_dev, SMBHSTCFG, priv->original_hstcfg); + pci_write_config_byte(priv->pci_dev, SMBHSTCFG, priv->original_hstcfg); return 0; } From f20d4e924b4465822c4a35290b85aadf88ddf466 Mon Sep 17 00:00:00 2001 From: Collin Walling Date: Thu, 25 Jun 2020 11:07:23 -0400 Subject: [PATCH 418/648] docs: kvm: add documentation for KVM_CAP_S390_DIAG318 Documentation for the s390 DIAGNOSE 0x318 instruction handling. Signed-off-by: Collin Walling Reviewed-by: Cornelia Huck Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/kvm/20200625150724.10021-2-walling@linux.ibm.com/ Message-Id: <20200625150724.10021-2-walling@linux.ibm.com> Signed-off-by: Christian Borntraeger --- Documentation/virt/kvm/api.rst | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index d2b733dc7892..51191b56e61c 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6173,3 +6173,23 @@ specific interfaces must be consistent, i.e. if one says the feature is supported, than the other should as well and vice versa. For arm64 see Documentation/virt/kvm/devices/vcpu.rst "KVM_ARM_VCPU_PVTIME_CTRL". For x86 see Documentation/virt/kvm/msr.rst "MSR_KVM_STEAL_TIME". + +8.25 KVM_CAP_S390_DIAG318 +------------------------- + +:Architectures: s390 + +This capability enables a guest to set information about its control program +(i.e. guest kernel type and version). The information is helpful during +system/firmware service events, providing additional data about the guest +environments running on the machine. + +The information is associated with the DIAGNOSE 0x318 instruction, which sets +an 8-byte value consisting of a one-byte Control Program Name Code (CPNC) and +a 7-byte Control Program Version Code (CPVC). The CPNC determines what +environment the control program is running in (e.g. Linux, z/VM...), and the +CPVC is used for information specific to OS (e.g. Linux version, Linux +distribution...) + +If this capability is available, then the CPNC and CPVC can be synchronized +between KVM and userspace via the sync regs mechanism (KVM_SYNC_DIAG318). From ca589ea8d1b64938329c016a5c07fc2eea985712 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Sep 2020 15:30:31 +0200 Subject: [PATCH 419/648] s390/idle: fix suspicious RCU usage After commit eb1f00237aca ("lockdep,trace: Expose tracepoints") the lock tracepoints are visible to lockdep and RCU-lockdep is finding a bunch more RCU violations that were previously hidden. Switch the idle->seqcount over to using raw_write_*() to avoid the lockdep annotation and thus the lock tracepoints. Reported-by: Guenter Roeck Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/idle.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index c73f50649e7e..f7f1e64e0d98 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -39,14 +39,13 @@ void enabled_wait(void) local_irq_restore(flags); /* Account time spent with enabled wait psw loaded as idle time. */ - /* XXX seqcount has tracepoints that require RCU */ - write_seqcount_begin(&idle->seqcount); + raw_write_seqcount_begin(&idle->seqcount); idle_time = idle->clock_idle_exit - idle->clock_idle_enter; idle->clock_idle_enter = idle->clock_idle_exit = 0ULL; idle->idle_time += idle_time; idle->idle_count++; account_idle_time(cputime_to_nsecs(idle_time)); - write_seqcount_end(&idle->seqcount); + raw_write_seqcount_end(&idle->seqcount); } NOKPROBE_SYMBOL(enabled_wait); From b6186d7fb53349efd274263a45f0b08749ccaa2d Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 9 Sep 2020 11:59:43 +0200 Subject: [PATCH 420/648] s390/zcrypt: fix kmalloc 256k failure Tests showed that under stress conditions the kernel may temporary fail to allocate 256k with kmalloc. However, this fix reworks the related code in the cca_findcard2() function to use kvmalloc instead. Signed-off-by: Harald Freudenberger Reviewed-by: Ingo Franzki Cc: Stable Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/zcrypt_ccamisc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/s390/crypto/zcrypt_ccamisc.c b/drivers/s390/crypto/zcrypt_ccamisc.c index 3f5b61351cde..c793dcabd551 100644 --- a/drivers/s390/crypto/zcrypt_ccamisc.c +++ b/drivers/s390/crypto/zcrypt_ccamisc.c @@ -1692,9 +1692,9 @@ int cca_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain, *nr_apqns = 0; /* fetch status of all crypto cards */ - device_status = kmalloc_array(MAX_ZDEV_ENTRIES_EXT, - sizeof(struct zcrypt_device_status_ext), - GFP_KERNEL); + device_status = kvmalloc_array(MAX_ZDEV_ENTRIES_EXT, + sizeof(struct zcrypt_device_status_ext), + GFP_KERNEL); if (!device_status) return -ENOMEM; zcrypt_device_status_mask_ext(device_status); @@ -1762,7 +1762,7 @@ int cca_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain, verify = 0; } - kfree(device_status); + kvfree(device_status); return rc; } EXPORT_SYMBOL(cca_findcard2); From fcb2b70cdb194157678fb1a75f9ff499aeba3d2a Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 9 Sep 2020 14:27:25 +0200 Subject: [PATCH 421/648] s390/init: add missing __init annotations Add __init to reserve_memory_end, reserve_oldmem and remove_oldmem. Sometimes these functions are not inlined, and then the build complains about section mismatch. Signed-off-by: Ilya Leoshkevich Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/setup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index e600f6953d7c..c2c1b4e723ea 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -619,7 +619,7 @@ static struct notifier_block kdump_mem_nb = { /* * Make sure that the area behind memory_end is protected */ -static void reserve_memory_end(void) +static void __init reserve_memory_end(void) { if (memory_end_set) memblock_reserve(memory_end, ULONG_MAX); @@ -628,7 +628,7 @@ static void reserve_memory_end(void) /* * Make sure that oldmem, where the dump is stored, is protected */ -static void reserve_oldmem(void) +static void __init reserve_oldmem(void) { #ifdef CONFIG_CRASH_DUMP if (OLDMEM_BASE) @@ -640,7 +640,7 @@ static void reserve_oldmem(void) /* * Make sure that oldmem, where the dump is stored, is protected */ -static void remove_oldmem(void) +static void __init remove_oldmem(void) { #ifdef CONFIG_CRASH_DUMP if (OLDMEM_BASE) From afdf9550e54627fcf4dd609bdc1153059378cdf5 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 3 Sep 2020 13:42:57 +0200 Subject: [PATCH 422/648] s390/pci: fix leak of DMA tables on hard unplug commit f606b3ef47c9 ("s390/pci: adapt events for zbus") removed the zpci_disable_device() call for a zPCI event with PEC 0x0304 because the device is already deconfigured by the platform. This however skips the Linux side of the disable in particular it leads to leaking the DMA tables and bitmaps because zpci_dma_exit_device() is never called on the device. If the device transitions to the Reserved state we call zpci_zdev_put() but zpci_release_device() will not call zpci_disable_device() because the state of the zPCI function is already ZPCI_FN_STATE_STANDBY. If the device is put into the Standby state, zpci_disable_device() is not called and the device is assumed to have been put in Standby through platform action. At this point the device may be removed by a subsequent event with PEC 0x0308 or 0x0306 which calls zpci_zdev_put() with the same problem as above or the device may be configured again in which case zpci_disable_device() is also not called. Fix this by calling zpci_disable_device() explicitly for PEC 0x0304 as before. To make it more clear that zpci_disable_device() may be called, even if the lower level device has already been disabled by the platform, add a comment to zpci_disable_device(). Cc: # 5.8 Fixes: f606b3ef47c9 ("s390/pci: adapt events for zbus") Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/pci/pci.c | 4 ++++ arch/s390/pci/pci_event.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 4b62d6b55024..1804230dd8d8 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -668,6 +668,10 @@ EXPORT_SYMBOL_GPL(zpci_enable_device); int zpci_disable_device(struct zpci_dev *zdev) { zpci_dma_exit_device(zdev); + /* + * The zPCI function may already be disabled by the platform, this is + * detected in clp_disable_fh() which becomes a no-op. + */ return clp_disable_fh(zdev); } EXPORT_SYMBOL_GPL(zpci_disable_device); diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 9a3a291cad43..d9ae7456dd4c 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -143,6 +143,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) zpci_remove_device(zdev); } + zdev->fh = ccdf->fh; + zpci_disable_device(zdev); zdev->state = ZPCI_FN_STATE_STANDBY; if (!clp_get_state(ccdf->fid, &state) && state == ZPCI_FN_STATE_RESERVED) { From 73ac74c7d489756d2313219a108809921dbfaea1 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Thu, 10 Sep 2020 12:24:53 +0200 Subject: [PATCH 423/648] lockdep: fix order in trace_hardirqs_off_caller() Switch order so that locking state is consistent even if the IRQ tracer calls into lockdep again. Acked-by: Peter Zijlstra Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- kernel/trace/trace_preemptirq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c index f10073e62603..f4938040c228 100644 --- a/kernel/trace/trace_preemptirq.c +++ b/kernel/trace/trace_preemptirq.c @@ -102,14 +102,14 @@ NOKPROBE_SYMBOL(trace_hardirqs_on_caller); __visible void trace_hardirqs_off_caller(unsigned long caller_addr) { + lockdep_hardirqs_off(CALLER_ADDR0); + if (!this_cpu_read(tracing_irq_cpu)) { this_cpu_write(tracing_irq_cpu, 1); tracer_hardirqs_off(CALLER_ADDR0, caller_addr); if (!in_nmi()) trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr); } - - lockdep_hardirqs_off(CALLER_ADDR0); } EXPORT_SYMBOL(trace_hardirqs_off_caller); NOKPROBE_SYMBOL(trace_hardirqs_off_caller); From cd4d3d5f21ddbfae3f686ac0ff405f21f7847ad3 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Tue, 8 Sep 2020 09:05:04 -0400 Subject: [PATCH 424/648] s390: add 3f program exception handler Program exception 3f (secure storage violation) can only be detected when the CPU is running in SIE with a format 4 state description, e.g. running a protected guest. Because of this and because user space partly controls the guest memory mapping and can trigger this exception, we want to send a SIGSEGV to the process running the guest and not panic the kernel. Signed-off-by: Janosch Frank Cc: # 5.7 Fixes: 084ea4d611a3 ("s390/mm: add (non)secure page access exceptions handlers") Reviewed-by: Claudio Imbrenda Reviewed-by: Cornelia Huck Acked-by: Christian Borntraeger Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.h | 1 + arch/s390/kernel/pgm_check.S | 2 +- arch/s390/mm/fault.c | 20 ++++++++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index faca269d5f27..a44ddc2f2dec 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -26,6 +26,7 @@ void do_protection_exception(struct pt_regs *regs); void do_dat_exception(struct pt_regs *regs); void do_secure_storage_access(struct pt_regs *regs); void do_non_secure_storage_access(struct pt_regs *regs); +void do_secure_storage_violation(struct pt_regs *regs); void addressing_exception(struct pt_regs *regs); void data_exception(struct pt_regs *regs); diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S index 2c27907a5ffc..9a92638360ee 100644 --- a/arch/s390/kernel/pgm_check.S +++ b/arch/s390/kernel/pgm_check.S @@ -80,7 +80,7 @@ PGM_CHECK(do_dat_exception) /* 3b */ PGM_CHECK_DEFAULT /* 3c */ PGM_CHECK(do_secure_storage_access) /* 3d */ PGM_CHECK(do_non_secure_storage_access) /* 3e */ -PGM_CHECK_DEFAULT /* 3f */ +PGM_CHECK(do_secure_storage_violation) /* 3f */ PGM_CHECK(monitor_event_exception) /* 40 */ PGM_CHECK_DEFAULT /* 41 */ PGM_CHECK_DEFAULT /* 42 */ diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 4c8c063bce5b..996884dcc9fd 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -859,6 +859,21 @@ void do_non_secure_storage_access(struct pt_regs *regs) } NOKPROBE_SYMBOL(do_non_secure_storage_access); +void do_secure_storage_violation(struct pt_regs *regs) +{ + /* + * Either KVM messed up the secure guest mapping or the same + * page is mapped into multiple secure guests. + * + * This exception is only triggered when a guest 2 is running + * and can therefore never occur in kernel context. + */ + printk_ratelimited(KERN_WARNING + "Secure storage violation in task: %s, pid %d\n", + current->comm, current->pid); + send_sig(SIGSEGV, current, 0); +} + #else void do_secure_storage_access(struct pt_regs *regs) { @@ -869,4 +884,9 @@ void do_non_secure_storage_access(struct pt_regs *regs) { default_trap_handler(regs); } + +void do_secure_storage_violation(struct pt_regs *regs) +{ + default_trap_handler(regs); +} #endif From 09e43968db40c33a73e9ddbfd937f46d5c334924 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Tue, 11 Aug 2020 20:43:08 -0400 Subject: [PATCH 425/648] x86/boot/compressed: Disable relocation relaxation The x86-64 psABI [0] specifies special relocation types (R_X86_64_[REX_]GOTPCRELX) for indirection through the Global Offset Table, semantically equivalent to R_X86_64_GOTPCREL, which the linker can take advantage of for optimization (relaxation) at link time. This is supported by LLD and binutils versions 2.26 onwards. The compressed kernel is position-independent code, however, when using LLD or binutils versions before 2.27, it must be linked without the -pie option. In this case, the linker may optimize certain instructions into a non-position-independent form, by converting foo@GOTPCREL(%rip) to $foo. This potential issue has been present with LLD and binutils-2.26 for a long time, but it has never manifested itself before now: - LLD and binutils-2.26 only relax movq foo@GOTPCREL(%rip), %reg to leaq foo(%rip), %reg which is still position-independent, rather than mov $foo, %reg which is permitted by the psABI when -pie is not enabled. - GCC happens to only generate GOTPCREL relocations on mov instructions. - CLang does generate GOTPCREL relocations on non-mov instructions, but when building the compressed kernel, it uses its integrated assembler (due to the redefinition of KBUILD_CFLAGS dropping -no-integrated-as), which has so far defaulted to not generating the GOTPCRELX relocations. Nick Desaulniers reports [1,2]: "A recent change [3] to a default value of configuration variable (ENABLE_X86_RELAX_RELOCATIONS OFF -> ON) in LLVM now causes Clang's integrated assembler to emit R_X86_64_GOTPCRELX/R_X86_64_REX_GOTPCRELX relocations. LLD will relax instructions with these relocations based on whether the image is being linked as position independent or not. When not, then LLD will relax these instructions to use absolute addressing mode (R_RELAX_GOT_PC_NOPIC). This causes kernels built with Clang and linked with LLD to fail to boot." Patch series [4] is a solution to allow the compressed kernel to be linked with -pie unconditionally, but even if merged is unlikely to be backported. As a simple solution that can be applied to stable as well, prevent the assembler from generating the relaxed relocation types using the -mrelax-relocations=no option. For ease of backporting, do this unconditionally. [0] https://gitlab.com/x86-psABIs/x86-64-ABI/-/blob/master/x86-64-ABI/linker-optimization.tex#L65 [1] https://lore.kernel.org/lkml/20200807194100.3570838-1-ndesaulniers@google.com/ [2] https://github.com/ClangBuiltLinux/linux/issues/1121 [3] https://reviews.llvm.org/rGc41a18cf61790fc898dcda1055c3efbf442c14c0 [4] https://lore.kernel.org/lkml/20200731202738.2577854-1-nivedita@alum.mit.edu/ Reported-by: Nick Desaulniers Signed-off-by: Arvind Sankar Signed-off-by: Ingo Molnar Tested-by: Nick Desaulniers Tested-by: Sedat Dilek Acked-by: Ard Biesheuvel Reviewed-by: Nick Desaulniers Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200812004308.1448603-1-nivedita@alum.mit.edu --- arch/x86/boot/compressed/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 3962f592633d..ff7894f39e0e 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -43,6 +43,8 @@ KBUILD_CFLAGS += -Wno-pointer-sign KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables KBUILD_CFLAGS += -D__DISABLE_EXPORTS +# Disable relocation relaxation in case the link is not PIE. +KBUILD_CFLAGS += $(call as-option,-Wa$(comma)-mrelax-relocations=no) KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n From 911e1987efc8f3e6445955fbae7f54b428b92bd3 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 13 Sep 2020 12:47:29 -0700 Subject: [PATCH 426/648] Drivers: hv: vmbus: Add timeout to vmbus_wait_for_unload vmbus_wait_for_unload() looks for a CHANNELMSG_UNLOAD_RESPONSE message coming from Hyper-V. But if the message isn't found for some reason, the panic path gets hung forever. Add a timeout of 10 seconds to prevent this. Fixes: 415719160de3 ("Drivers: hv: vmbus: avoid scheduling in interrupt context in vmbus_initiate_unload()") Signed-off-by: Michael Kelley Reviewed-by: Dexuan Cui Reviewed-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/1600026449-23651-1-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- drivers/hv/channel_mgmt.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 417a95e5094d..af7832e13167 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -750,7 +750,7 @@ static void vmbus_wait_for_unload(void) void *page_addr; struct hv_message *msg; struct vmbus_channel_message_header *hdr; - u32 message_type; + u32 message_type, i; /* * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was @@ -760,8 +760,11 @@ static void vmbus_wait_for_unload(void) * functional and vmbus_unload_response() will complete * vmbus_connection.unload_event. If not, the last thing we can do is * read message pages for all CPUs directly. + * + * Wait no more than 10 seconds so that the panic path can't get + * hung forever in case the response message isn't seen. */ - while (1) { + for (i = 0; i < 1000; i++) { if (completion_done(&vmbus_connection.unload_event)) break; From 1c78544eaa4660096aeb6a57ec82b42cdb3bfe5a Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 14 Sep 2020 09:01:04 +0100 Subject: [PATCH 427/648] btrfs: fix wrong address when faulting in pages in the search ioctl When faulting in the pages for the user supplied buffer for the search ioctl, we are passing only the base address of the buffer to the function fault_in_pages_writeable(). This means that after the first iteration of the while loop that searches for leaves, when we have a non-zero offset, stored in 'sk_offset', we try to fault in a wrong page range. So fix this by adding the offset in 'sk_offset' to the base address of the user supplied buffer when calling fault_in_pages_writeable(). Several users have reported that the applications compsize and bees have started to operate incorrectly since commit a48b73eca4ceb9 ("btrfs: fix potential deadlock in the search ioctl") was added to stable trees, and these applications make heavy use of the search ioctls. This fixes their issues. Link: https://lore.kernel.org/linux-btrfs/632b888d-a3c3-b085-cdf5-f9bb61017d92@lechevalier.se/ Link: https://github.com/kilobyte/compsize/issues/34 Fixes: a48b73eca4ceb9 ("btrfs: fix potential deadlock in the search ioctl") CC: stable@vger.kernel.org # 4.4+ Tested-by: A L Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ac45f022b495..2d9109d9e98f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2193,7 +2193,8 @@ static noinline int search_ioctl(struct inode *inode, key.offset = sk->min_offset; while (1) { - ret = fault_in_pages_writeable(ubuf, *buf_size - sk_offset); + ret = fault_in_pages_writeable(ubuf + sk_offset, + *buf_size - sk_offset); if (ret) break; From 7d90a9b2693a753abfc7034eacea5bbcbb566560 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 4 Sep 2020 15:47:19 +0800 Subject: [PATCH 428/648] Revert "mtd: spi-nor: Disable the flash quad mode in spi_nor_restore()" Previous patch intends to restore the flash's QE bit when removed/shutdown, but may have some problems and break the flash: - for those originally in Quad mode, this patch will clear the QE bit when unloaded the flash, which is incorrect. - even with above problem solved, it may still break the flash as some flash's QE bit is non-volatile and lots of set/reset will wear out the bit. - the restore method cannot be proved to be valid as if a hard reset or accident crash happened, the spi_nor_restore() won't be performed the the QE bit will not be restored as we expected to. So let's revert it to fix this. The discussion can be found at [1]. This reverts commit cc59e6bb6cd69d3347c06ccce088c5c6052e041e. [1] https://lore.kernel.org/linux-mtd/CAO8h3eFLVLRmw7u+rurKsg7=Nh2q-HVq-HgVXig8gf5Dffk8MA@mail.gmail.com/ Reported-by: Matthias Weisser Suggested-by: Vignesh Raghavendra Signed-off-by: Yicong Yang Signed-off-by: Vignesh Raghavendra Link: https://lore.kernel.org/r/1599205640-26690-1-git-send-email-yangyicong@hisilicon.com --- drivers/mtd/spi-nor/core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 65eff4ce6ab1..ab8f55438e21 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -3000,8 +3000,6 @@ void spi_nor_restore(struct spi_nor *nor) if (nor->addr_width == 4 && !(nor->flags & SNOR_F_4B_OPCODES) && nor->flags & SNOR_F_BROKEN_RESET) nor->params->set_4byte_addr_mode(nor, false); - - spi_nor_quad_enable(nor, false); } EXPORT_SYMBOL_GPL(spi_nor_restore); From 1afc0c89f6a155c54c76d65ffcf72cd1232daf3a Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 4 Sep 2020 15:47:20 +0800 Subject: [PATCH 429/648] Revert "mtd: spi-nor: Add capability to disable flash quad mode" As the only user has been removed in previous patch, let's revert this one together. This reverts commit be192209d5a33c912caa4a05d6f92b89328d8db8. Reported-by: Matthias Weisser Suggested-by: Vignesh Raghavendra Signed-off-by: Yicong Yang Signed-off-by: Vignesh Raghavendra Link: https://lore.kernel.org/r/1599205640-26690-2-git-send-email-yangyicong@hisilicon.com --- drivers/mtd/spi-nor/core.c | 55 +++++++++++++------------------------- drivers/mtd/spi-nor/core.h | 10 +++---- 2 files changed, 24 insertions(+), 41 deletions(-) diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index ab8f55438e21..0369d98b2d12 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -1907,16 +1907,15 @@ static int spi_nor_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len) } /** - * spi_nor_sr1_bit6_quad_enable() - Set/Unset the Quad Enable BIT(6) in the - * Status Register 1. + * spi_nor_sr1_bit6_quad_enable() - Set the Quad Enable BIT(6) in the Status + * Register 1. * @nor: pointer to a 'struct spi_nor' - * @enable: true to enable Quad mode, false to disable Quad mode. * * Bit 6 of the Status Register 1 is the QE bit for Macronix like QSPI memories. * * Return: 0 on success, -errno otherwise. */ -int spi_nor_sr1_bit6_quad_enable(struct spi_nor *nor, bool enable) +int spi_nor_sr1_bit6_quad_enable(struct spi_nor *nor) { int ret; @@ -1924,56 +1923,45 @@ int spi_nor_sr1_bit6_quad_enable(struct spi_nor *nor, bool enable) if (ret) return ret; - if ((enable && (nor->bouncebuf[0] & SR1_QUAD_EN_BIT6)) || - (!enable && !(nor->bouncebuf[0] & SR1_QUAD_EN_BIT6))) + if (nor->bouncebuf[0] & SR1_QUAD_EN_BIT6) return 0; - if (enable) - nor->bouncebuf[0] |= SR1_QUAD_EN_BIT6; - else - nor->bouncebuf[0] &= ~SR1_QUAD_EN_BIT6; + nor->bouncebuf[0] |= SR1_QUAD_EN_BIT6; return spi_nor_write_sr1_and_check(nor, nor->bouncebuf[0]); } /** - * spi_nor_sr2_bit1_quad_enable() - set/unset the Quad Enable BIT(1) in the - * Status Register 2. + * spi_nor_sr2_bit1_quad_enable() - set the Quad Enable BIT(1) in the Status + * Register 2. * @nor: pointer to a 'struct spi_nor'. - * @enable: true to enable Quad mode, false to disable Quad mode. * * Bit 1 of the Status Register 2 is the QE bit for Spansion like QSPI memories. * * Return: 0 on success, -errno otherwise. */ -int spi_nor_sr2_bit1_quad_enable(struct spi_nor *nor, bool enable) +int spi_nor_sr2_bit1_quad_enable(struct spi_nor *nor) { int ret; if (nor->flags & SNOR_F_NO_READ_CR) - return spi_nor_write_16bit_cr_and_check(nor, - enable ? SR2_QUAD_EN_BIT1 : 0); + return spi_nor_write_16bit_cr_and_check(nor, SR2_QUAD_EN_BIT1); ret = spi_nor_read_cr(nor, nor->bouncebuf); if (ret) return ret; - if ((enable && (nor->bouncebuf[0] & SR2_QUAD_EN_BIT1)) || - (!enable && !(nor->bouncebuf[0] & SR2_QUAD_EN_BIT1))) + if (nor->bouncebuf[0] & SR2_QUAD_EN_BIT1) return 0; - if (enable) - nor->bouncebuf[0] |= SR2_QUAD_EN_BIT1; - else - nor->bouncebuf[0] &= ~SR2_QUAD_EN_BIT1; + nor->bouncebuf[0] |= SR2_QUAD_EN_BIT1; return spi_nor_write_16bit_cr_and_check(nor, nor->bouncebuf[0]); } /** - * spi_nor_sr2_bit7_quad_enable() - set/unset QE bit in Status Register 2. + * spi_nor_sr2_bit7_quad_enable() - set QE bit in Status Register 2. * @nor: pointer to a 'struct spi_nor' - * @enable: true to enable Quad mode, false to disable Quad mode. * * Set the Quad Enable (QE) bit in the Status Register 2. * @@ -1983,7 +1971,7 @@ int spi_nor_sr2_bit1_quad_enable(struct spi_nor *nor, bool enable) * * Return: 0 on success, -errno otherwise. */ -int spi_nor_sr2_bit7_quad_enable(struct spi_nor *nor, bool enable) +int spi_nor_sr2_bit7_quad_enable(struct spi_nor *nor) { u8 *sr2 = nor->bouncebuf; int ret; @@ -1993,15 +1981,11 @@ int spi_nor_sr2_bit7_quad_enable(struct spi_nor *nor, bool enable) ret = spi_nor_read_sr2(nor, sr2); if (ret) return ret; - if ((enable && (*sr2 & SR2_QUAD_EN_BIT7)) || - (!enable && !(*sr2 & SR2_QUAD_EN_BIT7))) + if (*sr2 & SR2_QUAD_EN_BIT7) return 0; /* Update the Quad Enable bit. */ - if (enable) - *sr2 |= SR2_QUAD_EN_BIT7; - else - *sr2 &= ~SR2_QUAD_EN_BIT7; + *sr2 |= SR2_QUAD_EN_BIT7; ret = spi_nor_write_sr2(nor, sr2); if (ret) @@ -2914,13 +2898,12 @@ static int spi_nor_init_params(struct spi_nor *nor) } /** - * spi_nor_quad_enable() - enable/disable Quad I/O if needed. + * spi_nor_quad_enable() - enable Quad I/O if needed. * @nor: pointer to a 'struct spi_nor' - * @enable: true to enable Quad mode. false to disable Quad mode. * * Return: 0 on success, -errno otherwise. */ -static int spi_nor_quad_enable(struct spi_nor *nor, bool enable) +static int spi_nor_quad_enable(struct spi_nor *nor) { if (!nor->params->quad_enable) return 0; @@ -2929,7 +2912,7 @@ static int spi_nor_quad_enable(struct spi_nor *nor, bool enable) spi_nor_get_protocol_width(nor->write_proto) == 4)) return 0; - return nor->params->quad_enable(nor, enable); + return nor->params->quad_enable(nor); } /** @@ -2953,7 +2936,7 @@ static int spi_nor_init(struct spi_nor *nor) { int err; - err = spi_nor_quad_enable(nor, true); + err = spi_nor_quad_enable(nor); if (err) { dev_dbg(nor->dev, "quad mode not supported\n"); return err; diff --git a/drivers/mtd/spi-nor/core.h b/drivers/mtd/spi-nor/core.h index 95aa32f3ceb1..6f2f6b27173f 100644 --- a/drivers/mtd/spi-nor/core.h +++ b/drivers/mtd/spi-nor/core.h @@ -198,7 +198,7 @@ struct spi_nor_locking_ops { * higher index in the array, the higher priority. * @erase_map: the erase map parsed from the SFDP Sector Map Parameter * Table. - * @quad_enable: enables/disables SPI NOR Quad mode. + * @quad_enable: enables SPI NOR quad mode. * @set_4byte_addr_mode: puts the SPI NOR in 4 byte addressing mode. * @convert_addr: converts an absolute address into something the flash * will understand. Particularly useful when pagesize is @@ -219,7 +219,7 @@ struct spi_nor_flash_parameter { struct spi_nor_erase_map erase_map; - int (*quad_enable)(struct spi_nor *nor, bool enable); + int (*quad_enable)(struct spi_nor *nor); int (*set_4byte_addr_mode)(struct spi_nor *nor, bool enable); u32 (*convert_addr)(struct spi_nor *nor, u32 addr); int (*setup)(struct spi_nor *nor, const struct spi_nor_hwcaps *hwcaps); @@ -406,9 +406,9 @@ int spi_nor_write_ear(struct spi_nor *nor, u8 ear); int spi_nor_wait_till_ready(struct spi_nor *nor); int spi_nor_lock_and_prep(struct spi_nor *nor); void spi_nor_unlock_and_unprep(struct spi_nor *nor); -int spi_nor_sr1_bit6_quad_enable(struct spi_nor *nor, bool enable); -int spi_nor_sr2_bit1_quad_enable(struct spi_nor *nor, bool enable); -int spi_nor_sr2_bit7_quad_enable(struct spi_nor *nor, bool enable); +int spi_nor_sr1_bit6_quad_enable(struct spi_nor *nor); +int spi_nor_sr2_bit1_quad_enable(struct spi_nor *nor); +int spi_nor_sr2_bit7_quad_enable(struct spi_nor *nor); int spi_nor_xread_sr(struct spi_nor *nor, u8 *sr); ssize_t spi_nor_read_data(struct spi_nor *nor, loff_t from, size_t len, From 87ceb6a6b81eca000911403446d4c6043b4e4f82 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 14 Sep 2020 08:20:12 -0600 Subject: [PATCH 430/648] io_uring: drop 'ctx' ref on task work cancelation If task_work ends up being marked for cancelation, we go through a cancelation helper instead of the queue path. In converting task_work to always hold a ctx reference, this path was missed. Make sure that io_req_task_cancel() puts the reference that is being held against the ctx. Fixes: 6d816e088c35 ("io_uring: hold 'ctx' reference around task_work queue + execute") Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index be9d628e7854..01756a131be6 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1787,8 +1787,10 @@ static void __io_req_task_cancel(struct io_kiocb *req, int error) static void io_req_task_cancel(struct callback_head *cb) { struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work); + struct io_ring_ctx *ctx = req->ctx; __io_req_task_cancel(req, -ECANCELED); + percpu_ref_put(&ctx->refs); } static void __io_req_task_submit(struct io_kiocb *req) From 6200b0ae4ea28a4bfd8eb434e33e6201b7a6a282 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 13 Sep 2020 14:38:30 -0600 Subject: [PATCH 431/648] io_uring: don't run task work on an exiting task This isn't safe, and isn't needed either. We are guaranteed that any work we queue is on a live task (and will be run), or it goes to our backup io-wq threads if the task is exiting. Signed-off-by: Jens Axboe --- fs/io_uring.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 01756a131be6..a29c8913b1f0 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1753,6 +1753,9 @@ static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb, struct io_ring_ctx *ctx = req->ctx; int ret, notify; + if (tsk->flags & PF_EXITING) + return -ESRCH; + /* * SQPOLL kernel thread doesn't need notification, just a wakeup. For * all other cases, use TWA_SIGNAL unconditionally to ensure we're @@ -2012,6 +2015,12 @@ static inline unsigned int io_put_rw_kbuf(struct io_kiocb *req) static inline bool io_run_task_work(void) { + /* + * Not safe to run on exiting task, and the task_work handling will + * not add work to such a task. + */ + if (unlikely(current->flags & PF_EXITING)) + return false; if (current->task_works) { __set_current_state(TASK_RUNNING); task_work_run(); @@ -8184,6 +8193,8 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, /* cancel this request, or head link requests */ io_attempt_cancel(ctx, cancel_req); io_put_req(cancel_req); + /* cancellations _may_ trigger task work */ + io_run_task_work(); schedule(); finish_wait(&ctx->inflight_wait, &wait); } From 50145474f6ef4a9c19205b173da6264a644c7489 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 7 Sep 2020 11:45:27 -0700 Subject: [PATCH 432/648] fbcon: remove soft scrollback code This (and the VGA soft scrollback) turns out to have various nasty small special cases that nobody really is willing to fight. The soft scrollback code was really useful a few decades ago when you typically used the console interactively as the main way to interact with the machine, but that just isn't the case any more. So it's not worth dragging along. Tested-by: Yuan Ming Tested-by: Willy Tarreau Acked-by: Bartlomiej Zolnierkiewicz Acked-by: Daniel Vetter Reviewed-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- drivers/video/fbdev/core/fbcon.c | 334 +------------------------------ 1 file changed, 4 insertions(+), 330 deletions(-) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 66167830fefd..983472dbd2aa 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -122,12 +122,6 @@ static int logo_lines; /* logo_shown is an index to vc_cons when >= 0; otherwise follows FBCON_LOGO enums. */ static int logo_shown = FBCON_LOGO_CANSHOW; -/* Software scrollback */ -static int fbcon_softback_size = 32768; -static unsigned long softback_buf, softback_curr; -static unsigned long softback_in; -static unsigned long softback_top, softback_end; -static int softback_lines; /* console mappings */ static int first_fb_vc; static int last_fb_vc = MAX_NR_CONSOLES - 1; @@ -167,8 +161,6 @@ static int margin_color; static const struct consw fb_con; -#define CM_SOFTBACK (8) - #define advance_row(p, delta) (unsigned short *)((unsigned long)(p) + (delta) * vc->vc_size_row) static int fbcon_set_origin(struct vc_data *); @@ -373,18 +365,6 @@ static int get_color(struct vc_data *vc, struct fb_info *info, return color; } -static void fbcon_update_softback(struct vc_data *vc) -{ - int l = fbcon_softback_size / vc->vc_size_row; - - if (l > 5) - softback_end = softback_buf + l * vc->vc_size_row; - else - /* Smaller scrollback makes no sense, and 0 would screw - the operation totally */ - softback_top = 0; -} - static void fb_flashcursor(struct work_struct *work) { struct fb_info *info = container_of(work, struct fb_info, queue); @@ -414,7 +394,7 @@ static void fb_flashcursor(struct work_struct *work) c = scr_readw((u16 *) vc->vc_pos); mode = (!ops->cursor_flash || ops->cursor_state.enable) ? CM_ERASE : CM_DRAW; - ops->cursor(vc, info, mode, softback_lines, get_color(vc, info, c, 1), + ops->cursor(vc, info, mode, 0, get_color(vc, info, c, 1), get_color(vc, info, c, 0)); console_unlock(); } @@ -471,13 +451,7 @@ static int __init fb_console_setup(char *this_opt) } if (!strncmp(options, "scrollback:", 11)) { - options += 11; - if (*options) { - fbcon_softback_size = simple_strtoul(options, &options, 0); - if (*options == 'k' || *options == 'K') { - fbcon_softback_size *= 1024; - } - } + pr_warn("Ignoring scrollback size option\n"); continue; } @@ -1022,31 +996,6 @@ static const char *fbcon_startup(void) set_blitting_type(vc, info); - if (info->fix.type != FB_TYPE_TEXT) { - if (fbcon_softback_size) { - if (!softback_buf) { - softback_buf = - (unsigned long) - kvmalloc(fbcon_softback_size, - GFP_KERNEL); - if (!softback_buf) { - fbcon_softback_size = 0; - softback_top = 0; - } - } - } else { - if (softback_buf) { - kvfree((void *) softback_buf); - softback_buf = 0; - softback_top = 0; - } - } - if (softback_buf) - softback_in = softback_top = softback_curr = - softback_buf; - softback_lines = 0; - } - /* Setup default font */ if (!p->fontdata && !vc->vc_font.data) { if (!fontname[0] || !(font = find_font(fontname))) @@ -1220,9 +1169,6 @@ static void fbcon_init(struct vc_data *vc, int init) if (logo) fbcon_prepare_logo(vc, info, cols, rows, new_cols, new_rows); - if (vc == svc && softback_buf) - fbcon_update_softback(vc); - if (ops->rotate_font && ops->rotate_font(info, vc)) { ops->rotate = FB_ROTATE_UR; set_blitting_type(vc, info); @@ -1385,7 +1331,6 @@ static void fbcon_cursor(struct vc_data *vc, int mode) { struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]]; struct fbcon_ops *ops = info->fbcon_par; - int y; int c = scr_readw((u16 *) vc->vc_pos); ops->cur_blink_jiffies = msecs_to_jiffies(vc->vc_cur_blink_ms); @@ -1399,16 +1344,8 @@ static void fbcon_cursor(struct vc_data *vc, int mode) fbcon_add_cursor_timer(info); ops->cursor_flash = (mode == CM_ERASE) ? 0 : 1; - if (mode & CM_SOFTBACK) { - mode &= ~CM_SOFTBACK; - y = softback_lines; - } else { - if (softback_lines) - fbcon_set_origin(vc); - y = 0; - } - ops->cursor(vc, info, mode, y, get_color(vc, info, c, 1), + ops->cursor(vc, info, mode, 0, get_color(vc, info, c, 1), get_color(vc, info, c, 0)); } @@ -1479,8 +1416,6 @@ static void fbcon_set_disp(struct fb_info *info, struct fb_var_screeninfo *var, if (con_is_visible(vc)) { update_screen(vc); - if (softback_buf) - fbcon_update_softback(vc); } } @@ -1618,99 +1553,6 @@ static __inline__ void ypan_down_redraw(struct vc_data *vc, int t, int count) scrollback_current = 0; } -static void fbcon_redraw_softback(struct vc_data *vc, struct fbcon_display *p, - long delta) -{ - int count = vc->vc_rows; - unsigned short *d, *s; - unsigned long n; - int line = 0; - - d = (u16 *) softback_curr; - if (d == (u16 *) softback_in) - d = (u16 *) vc->vc_origin; - n = softback_curr + delta * vc->vc_size_row; - softback_lines -= delta; - if (delta < 0) { - if (softback_curr < softback_top && n < softback_buf) { - n += softback_end - softback_buf; - if (n < softback_top) { - softback_lines -= - (softback_top - n) / vc->vc_size_row; - n = softback_top; - } - } else if (softback_curr >= softback_top - && n < softback_top) { - softback_lines -= - (softback_top - n) / vc->vc_size_row; - n = softback_top; - } - } else { - if (softback_curr > softback_in && n >= softback_end) { - n += softback_buf - softback_end; - if (n > softback_in) { - n = softback_in; - softback_lines = 0; - } - } else if (softback_curr <= softback_in && n > softback_in) { - n = softback_in; - softback_lines = 0; - } - } - if (n == softback_curr) - return; - softback_curr = n; - s = (u16 *) softback_curr; - if (s == (u16 *) softback_in) - s = (u16 *) vc->vc_origin; - while (count--) { - unsigned short *start; - unsigned short *le; - unsigned short c; - int x = 0; - unsigned short attr = 1; - - start = s; - le = advance_row(s, 1); - do { - c = scr_readw(s); - if (attr != (c & 0xff00)) { - attr = c & 0xff00; - if (s > start) { - fbcon_putcs(vc, start, s - start, - line, x); - x += s - start; - start = s; - } - } - if (c == scr_readw(d)) { - if (s > start) { - fbcon_putcs(vc, start, s - start, - line, x); - x += s - start + 1; - start = s + 1; - } else { - x++; - start++; - } - } - s++; - d++; - } while (s < le); - if (s > start) - fbcon_putcs(vc, start, s - start, line, x); - line++; - if (d == (u16 *) softback_end) - d = (u16 *) softback_buf; - if (d == (u16 *) softback_in) - d = (u16 *) vc->vc_origin; - if (s == (u16 *) softback_end) - s = (u16 *) softback_buf; - if (s == (u16 *) softback_in) - s = (u16 *) vc->vc_origin; - } -} - static void fbcon_redraw_move(struct vc_data *vc, struct fbcon_display *p, int line, int count, int dy) { @@ -1850,31 +1692,6 @@ static void fbcon_redraw(struct vc_data *vc, struct fbcon_display *p, } } -static inline void fbcon_softback_note(struct vc_data *vc, int t, - int count) -{ - unsigned short *p; - - if (vc->vc_num != fg_console) - return; - p = (unsigned short *) (vc->vc_origin + t * vc->vc_size_row); - - while (count) { - scr_memcpyw((u16 *) softback_in, p, vc->vc_size_row); - count--; - p = advance_row(p, 1); - softback_in += vc->vc_size_row; - if (softback_in == softback_end) - softback_in = softback_buf; - if (softback_in == softback_top) { - softback_top += vc->vc_size_row; - if (softback_top == softback_end) - softback_top = softback_buf; - } - } - softback_curr = softback_in; -} - static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b, enum con_scroll dir, unsigned int count) { @@ -1897,8 +1714,6 @@ static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b, case SM_UP: if (count > vc->vc_rows) /* Maximum realistic size */ count = vc->vc_rows; - if (softback_top) - fbcon_softback_note(vc, t, count); if (logo_shown >= 0) goto redraw_up; switch (p->scrollmode) { @@ -2269,14 +2084,6 @@ static int fbcon_switch(struct vc_data *vc) info = registered_fb[con2fb_map[vc->vc_num]]; ops = info->fbcon_par; - if (softback_top) { - if (softback_lines) - fbcon_set_origin(vc); - softback_top = softback_curr = softback_in = softback_buf; - softback_lines = 0; - fbcon_update_softback(vc); - } - if (logo_shown >= 0) { struct vc_data *conp2 = vc_cons[logo_shown].d; @@ -2600,9 +2407,6 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int cnt; char *old_data = NULL; - if (con_is_visible(vc) && softback_lines) - fbcon_set_origin(vc); - resize = (w != vc->vc_font.width) || (h != vc->vc_font.height); if (p->userfont) old_data = vc->vc_font.data; @@ -2628,8 +2432,6 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, cols /= w; rows /= h; vc_resize(vc, cols, rows); - if (con_is_visible(vc) && softback_buf) - fbcon_update_softback(vc); } else if (con_is_visible(vc) && vc->vc_mode == KD_TEXT) { fbcon_clear_margins(vc, 0); @@ -2788,19 +2590,7 @@ static void fbcon_set_palette(struct vc_data *vc, const unsigned char *table) static u16 *fbcon_screen_pos(struct vc_data *vc, int offset) { - unsigned long p; - int line; - - if (vc->vc_num != fg_console || !softback_lines) - return (u16 *) (vc->vc_origin + offset); - line = offset / vc->vc_size_row; - if (line >= softback_lines) - return (u16 *) (vc->vc_origin + offset - - softback_lines * vc->vc_size_row); - p = softback_curr + offset; - if (p >= softback_end) - p += softback_buf - softback_end; - return (u16 *) p; + return (u16 *) (vc->vc_origin + offset); } static unsigned long fbcon_getxy(struct vc_data *vc, unsigned long pos, @@ -2814,22 +2604,7 @@ static unsigned long fbcon_getxy(struct vc_data *vc, unsigned long pos, x = offset % vc->vc_cols; y = offset / vc->vc_cols; - if (vc->vc_num == fg_console) - y += softback_lines; ret = pos + (vc->vc_cols - x) * 2; - } else if (vc->vc_num == fg_console && softback_lines) { - unsigned long offset = pos - softback_curr; - - if (pos < softback_curr) - offset += softback_end - softback_buf; - offset /= 2; - x = offset % vc->vc_cols; - y = offset / vc->vc_cols; - ret = pos + (vc->vc_cols - x) * 2; - if (ret == softback_end) - ret = softback_buf; - if (ret == softback_in) - ret = vc->vc_origin; } else { /* Should not happen */ x = y = 0; @@ -2857,106 +2632,11 @@ static void fbcon_invert_region(struct vc_data *vc, u16 * p, int cnt) a = ((a) & 0x88ff) | (((a) & 0x7000) >> 4) | (((a) & 0x0700) << 4); scr_writew(a, p++); - if (p == (u16 *) softback_end) - p = (u16 *) softback_buf; - if (p == (u16 *) softback_in) - p = (u16 *) vc->vc_origin; } } -static void fbcon_scrolldelta(struct vc_data *vc, int lines) -{ - struct fb_info *info = registered_fb[con2fb_map[fg_console]]; - struct fbcon_ops *ops = info->fbcon_par; - struct fbcon_display *disp = &fb_display[fg_console]; - int offset, limit, scrollback_old; - - if (softback_top) { - if (vc->vc_num != fg_console) - return; - if (vc->vc_mode != KD_TEXT || !lines) - return; - if (logo_shown >= 0) { - struct vc_data *conp2 = vc_cons[logo_shown].d; - - if (conp2->vc_top == logo_lines - && conp2->vc_bottom == conp2->vc_rows) - conp2->vc_top = 0; - if (logo_shown == vc->vc_num) { - unsigned long p, q; - int i; - - p = softback_in; - q = vc->vc_origin + - logo_lines * vc->vc_size_row; - for (i = 0; i < logo_lines; i++) { - if (p == softback_top) - break; - if (p == softback_buf) - p = softback_end; - p -= vc->vc_size_row; - q -= vc->vc_size_row; - scr_memcpyw((u16 *) q, (u16 *) p, - vc->vc_size_row); - } - softback_in = softback_curr = p; - update_region(vc, vc->vc_origin, - logo_lines * vc->vc_cols); - } - logo_shown = FBCON_LOGO_CANSHOW; - } - fbcon_cursor(vc, CM_ERASE | CM_SOFTBACK); - fbcon_redraw_softback(vc, disp, lines); - fbcon_cursor(vc, CM_DRAW | CM_SOFTBACK); - return; - } - - if (!scrollback_phys_max) - return; - - scrollback_old = scrollback_current; - scrollback_current -= lines; - if (scrollback_current < 0) - scrollback_current = 0; - else if (scrollback_current > scrollback_max) - scrollback_current = scrollback_max; - if (scrollback_current == scrollback_old) - return; - - if (fbcon_is_inactive(vc, info)) - return; - - fbcon_cursor(vc, CM_ERASE); - - offset = disp->yscroll - scrollback_current; - limit = disp->vrows; - switch (disp->scrollmode) { - case SCROLL_WRAP_MOVE: - info->var.vmode |= FB_VMODE_YWRAP; - break; - case SCROLL_PAN_MOVE: - case SCROLL_PAN_REDRAW: - limit -= vc->vc_rows; - info->var.vmode &= ~FB_VMODE_YWRAP; - break; - } - if (offset < 0) - offset += limit; - else if (offset >= limit) - offset -= limit; - - ops->var.xoffset = 0; - ops->var.yoffset = offset * vc->vc_font.height; - ops->update_start(info); - - if (!scrollback_current) - fbcon_cursor(vc, CM_DRAW); -} - static int fbcon_set_origin(struct vc_data *vc) { - if (softback_lines) - fbcon_scrolldelta(vc, softback_lines); return 0; } @@ -3020,8 +2700,6 @@ static void fbcon_modechanged(struct fb_info *info) fbcon_set_palette(vc, color_table); update_screen(vc); - if (softback_buf) - fbcon_update_softback(vc); } } @@ -3432,7 +3110,6 @@ static const struct consw fb_con = { .con_font_default = fbcon_set_def_font, .con_font_copy = fbcon_copy_font, .con_set_palette = fbcon_set_palette, - .con_scrolldelta = fbcon_scrolldelta, .con_set_origin = fbcon_set_origin, .con_invert_region = fbcon_invert_region, .con_screen_pos = fbcon_screen_pos, @@ -3667,9 +3344,6 @@ static void fbcon_exit(void) } #endif - kvfree((void *)softback_buf); - softback_buf = 0UL; - for_each_registered_fb(i) { int pending = 0; From 06a0df4d1b8b13b551668e47b11fd7629033b7df Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 8 Sep 2020 10:56:27 -0700 Subject: [PATCH 433/648] fbcon: remove now unusued 'softback_lines' cursor() argument Since the softscroll code got removed, this argument is always zero and makes no sense any more. Tested-by: Yuan Ming Tested-by: Willy Tarreau Reviewed-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- drivers/video/fbdev/core/bitblit.c | 11 +---------- drivers/video/fbdev/core/fbcon.c | 4 ++-- drivers/video/fbdev/core/fbcon.h | 2 +- drivers/video/fbdev/core/fbcon_ccw.c | 11 +---------- drivers/video/fbdev/core/fbcon_cw.c | 11 +---------- drivers/video/fbdev/core/fbcon_ud.c | 11 +---------- drivers/video/fbdev/core/tileblit.c | 2 +- 7 files changed, 8 insertions(+), 44 deletions(-) diff --git a/drivers/video/fbdev/core/bitblit.c b/drivers/video/fbdev/core/bitblit.c index 4e6cbc24346d..9725ecd1255b 100644 --- a/drivers/video/fbdev/core/bitblit.c +++ b/drivers/video/fbdev/core/bitblit.c @@ -234,7 +234,7 @@ static void bit_clear_margins(struct vc_data *vc, struct fb_info *info, } static void bit_cursor(struct vc_data *vc, struct fb_info *info, int mode, - int softback_lines, int fg, int bg) + int fg, int bg) { struct fb_cursor cursor; struct fbcon_ops *ops = info->fbcon_par; @@ -247,15 +247,6 @@ static void bit_cursor(struct vc_data *vc, struct fb_info *info, int mode, cursor.set = 0; - if (softback_lines) { - if (y + softback_lines >= vc->vc_rows) { - mode = CM_ERASE; - ops->cursor_flash = 0; - return; - } else - y += softback_lines; - } - c = scr_readw((u16 *) vc->vc_pos); attribute = get_attribute(info, c); src = vc->vc_font.data + ((c & charmask) * (w * vc->vc_font.height)); diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 983472dbd2aa..0b49b0f44edf 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -394,7 +394,7 @@ static void fb_flashcursor(struct work_struct *work) c = scr_readw((u16 *) vc->vc_pos); mode = (!ops->cursor_flash || ops->cursor_state.enable) ? CM_ERASE : CM_DRAW; - ops->cursor(vc, info, mode, 0, get_color(vc, info, c, 1), + ops->cursor(vc, info, mode, get_color(vc, info, c, 1), get_color(vc, info, c, 0)); console_unlock(); } @@ -1345,7 +1345,7 @@ static void fbcon_cursor(struct vc_data *vc, int mode) ops->cursor_flash = (mode == CM_ERASE) ? 0 : 1; - ops->cursor(vc, info, mode, 0, get_color(vc, info, c, 1), + ops->cursor(vc, info, mode, get_color(vc, info, c, 1), get_color(vc, info, c, 0)); } diff --git a/drivers/video/fbdev/core/fbcon.h b/drivers/video/fbdev/core/fbcon.h index 20dea853765f..78bb14c03643 100644 --- a/drivers/video/fbdev/core/fbcon.h +++ b/drivers/video/fbdev/core/fbcon.h @@ -62,7 +62,7 @@ struct fbcon_ops { void (*clear_margins)(struct vc_data *vc, struct fb_info *info, int color, int bottom_only); void (*cursor)(struct vc_data *vc, struct fb_info *info, int mode, - int softback_lines, int fg, int bg); + int fg, int bg); int (*update_start)(struct fb_info *info); int (*rotate_font)(struct fb_info *info, struct vc_data *vc); struct fb_var_screeninfo var; /* copy of the current fb_var_screeninfo */ diff --git a/drivers/video/fbdev/core/fbcon_ccw.c b/drivers/video/fbdev/core/fbcon_ccw.c index 5b177131e062..bbd869efd03b 100644 --- a/drivers/video/fbdev/core/fbcon_ccw.c +++ b/drivers/video/fbdev/core/fbcon_ccw.c @@ -219,7 +219,7 @@ static void ccw_clear_margins(struct vc_data *vc, struct fb_info *info, } static void ccw_cursor(struct vc_data *vc, struct fb_info *info, int mode, - int softback_lines, int fg, int bg) + int fg, int bg) { struct fb_cursor cursor; struct fbcon_ops *ops = info->fbcon_par; @@ -236,15 +236,6 @@ static void ccw_cursor(struct vc_data *vc, struct fb_info *info, int mode, cursor.set = 0; - if (softback_lines) { - if (y + softback_lines >= vc->vc_rows) { - mode = CM_ERASE; - ops->cursor_flash = 0; - return; - } else - y += softback_lines; - } - c = scr_readw((u16 *) vc->vc_pos); attribute = get_attribute(info, c); src = ops->fontbuffer + ((c & charmask) * (w * vc->vc_font.width)); diff --git a/drivers/video/fbdev/core/fbcon_cw.c b/drivers/video/fbdev/core/fbcon_cw.c index 894d01a62f30..a34cbe8e9874 100644 --- a/drivers/video/fbdev/core/fbcon_cw.c +++ b/drivers/video/fbdev/core/fbcon_cw.c @@ -202,7 +202,7 @@ static void cw_clear_margins(struct vc_data *vc, struct fb_info *info, } static void cw_cursor(struct vc_data *vc, struct fb_info *info, int mode, - int softback_lines, int fg, int bg) + int fg, int bg) { struct fb_cursor cursor; struct fbcon_ops *ops = info->fbcon_par; @@ -219,15 +219,6 @@ static void cw_cursor(struct vc_data *vc, struct fb_info *info, int mode, cursor.set = 0; - if (softback_lines) { - if (y + softback_lines >= vc->vc_rows) { - mode = CM_ERASE; - ops->cursor_flash = 0; - return; - } else - y += softback_lines; - } - c = scr_readw((u16 *) vc->vc_pos); attribute = get_attribute(info, c); src = ops->fontbuffer + ((c & charmask) * (w * vc->vc_font.width)); diff --git a/drivers/video/fbdev/core/fbcon_ud.c b/drivers/video/fbdev/core/fbcon_ud.c index 01b87f278d79..199cbc7abe35 100644 --- a/drivers/video/fbdev/core/fbcon_ud.c +++ b/drivers/video/fbdev/core/fbcon_ud.c @@ -249,7 +249,7 @@ static void ud_clear_margins(struct vc_data *vc, struct fb_info *info, } static void ud_cursor(struct vc_data *vc, struct fb_info *info, int mode, - int softback_lines, int fg, int bg) + int fg, int bg) { struct fb_cursor cursor; struct fbcon_ops *ops = info->fbcon_par; @@ -267,15 +267,6 @@ static void ud_cursor(struct vc_data *vc, struct fb_info *info, int mode, cursor.set = 0; - if (softback_lines) { - if (y + softback_lines >= vc->vc_rows) { - mode = CM_ERASE; - ops->cursor_flash = 0; - return; - } else - y += softback_lines; - } - c = scr_readw((u16 *) vc->vc_pos); attribute = get_attribute(info, c); src = ops->fontbuffer + ((c & charmask) * (w * vc->vc_font.height)); diff --git a/drivers/video/fbdev/core/tileblit.c b/drivers/video/fbdev/core/tileblit.c index 1dfaff0881fb..31b85b71cc37 100644 --- a/drivers/video/fbdev/core/tileblit.c +++ b/drivers/video/fbdev/core/tileblit.c @@ -80,7 +80,7 @@ static void tile_clear_margins(struct vc_data *vc, struct fb_info *info, } static void tile_cursor(struct vc_data *vc, struct fb_info *info, int mode, - int softback_lines, int fg, int bg) + int fg, int bg) { struct fb_tilecursor cursor; int use_sw = vc->vc_cursor_type & CUR_SW; From 973c096f6a85e5b5f2a295126ba6928d9a6afd45 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 9 Sep 2020 14:53:50 -0700 Subject: [PATCH 434/648] vgacon: remove software scrollback support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Yunhai Zhang recently fixed a VGA software scrollback bug in commit ebfdfeeae8c0 ("vgacon: Fix for missing check in scrollback handling"), but that then made people look more closely at some of this code, and there were more problems on the vgacon side, but also the fbcon software scrollback. We don't really have anybody who maintains this code - probably because nobody actually _uses_ it any more. Sure, people still use both VGA and the framebuffer consoles, but they are no longer the main user interfaces to the kernel, and haven't been for decades, so these kinds of extra features end up bitrotting and not really being used. So rather than try to maintain a likely unused set of code, I'll just aggressively remove it, and see if anybody even notices. Maybe there are people who haven't jumped on the whole GUI badnwagon yet, and think it's just a fad. And maybe those people use the scrollback code. If that turns out to be the case, we can resurrect this again, once we've found the sucker^Wmaintainer for it who actually uses it. Reported-by: NopNop Nop Tested-by: Willy Tarreau Cc: 张云海 Acked-by: Andy Lutomirski Acked-by: Willy Tarreau Reviewed-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- arch/powerpc/configs/pasemi_defconfig | 1 - arch/powerpc/configs/ppc6xx_defconfig | 1 - arch/x86/configs/i386_defconfig | 1 - arch/x86/configs/x86_64_defconfig | 1 - drivers/video/console/Kconfig | 46 ------ drivers/video/console/vgacon.c | 221 +------------------------- 6 files changed, 1 insertion(+), 270 deletions(-) diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index af9af03059e4..15ed8d0aa014 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -108,7 +108,6 @@ CONFIG_FB_NVIDIA=y CONFIG_FB_NVIDIA_I2C=y CONFIG_FB_RADEON=y # CONFIG_LCD_CLASS_DEVICE is not set -CONFIG_VGACON_SOFT_SCROLLBACK=y CONFIG_LOGO=y CONFIG_SOUND=y CONFIG_SND=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index 5e6f92ba3210..66e9a0fd64ff 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -743,7 +743,6 @@ CONFIG_FB_TRIDENT=m CONFIG_FB_SM501=m CONFIG_FB_IBM_GXT4500=y CONFIG_LCD_PLATFORM=m -CONFIG_VGACON_SOFT_SCROLLBACK=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y CONFIG_LOGO=y diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index d7577fece9eb..f556827dea58 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -186,7 +186,6 @@ CONFIG_DRM_I915=y CONFIG_FB_MODE_HELPERS=y CONFIG_FB_TILEBLITTING=y CONFIG_FB_EFI=y -CONFIG_VGACON_SOFT_SCROLLBACK=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index f85600143747..9936528e1939 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -181,7 +181,6 @@ CONFIG_DRM_I915=y CONFIG_FB_MODE_HELPERS=y CONFIG_FB_TILEBLITTING=y CONFIG_FB_EFI=y -CONFIG_VGACON_SOFT_SCROLLBACK=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig index 5e850cc9f891..39deb22a4180 100644 --- a/drivers/video/console/Kconfig +++ b/drivers/video/console/Kconfig @@ -22,52 +22,6 @@ config VGA_CONSOLE Say Y. -config VGACON_SOFT_SCROLLBACK - bool "Enable Scrollback Buffer in System RAM" - depends on VGA_CONSOLE - default n - help - The scrollback buffer of the standard VGA console is located in - the VGA RAM. The size of this RAM is fixed and is quite small. - If you require a larger scrollback buffer, this can be placed in - System RAM which is dynamically allocated during initialization. - Placing the scrollback buffer in System RAM will slightly slow - down the console. - - If you want this feature, say 'Y' here and enter the amount of - RAM to allocate for this buffer. If unsure, say 'N'. - -config VGACON_SOFT_SCROLLBACK_SIZE - int "Scrollback Buffer Size (in KB)" - depends on VGACON_SOFT_SCROLLBACK - range 1 1024 - default "64" - help - Enter the amount of System RAM to allocate for scrollback - buffers of VGA consoles. Each 64KB will give you approximately - 16 80x25 screenfuls of scrollback buffer. - -config VGACON_SOFT_SCROLLBACK_PERSISTENT_ENABLE_BY_DEFAULT - bool "Persistent Scrollback History for each console by default" - depends on VGACON_SOFT_SCROLLBACK - default n - help - Say Y here if the scrollback history should persist by default when - switching between consoles. Otherwise, the scrollback history will be - flushed each time the console is switched. This feature can also be - enabled using the boot command line parameter - 'vgacon.scrollback_persistent=1'. - - This feature might break your tool of choice to flush the scrollback - buffer, e.g. clear(1) will work fine but Debian's clear_console(1) - will be broken, which might cause security issues. - You can use the escape sequence \e[3J instead if this feature is - activated. - - Note that a buffer of VGACON_SOFT_SCROLLBACK_SIZE is taken for each - created tty device. - So if you use a RAM-constrained system, say N here. - config MDA_CONSOLE depends on !M68K && !PARISC && ISA tristate "MDA text console (dual-headed)" diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index a52bb3740073..17876f0179b5 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -165,214 +165,6 @@ static inline void vga_set_mem_top(struct vc_data *c) write_vga(12, (c->vc_visible_origin - vga_vram_base) / 2); } -#ifdef CONFIG_VGACON_SOFT_SCROLLBACK -/* software scrollback */ -struct vgacon_scrollback_info { - void *data; - int tail; - int size; - int rows; - int cnt; - int cur; - int save; - int restore; -}; - -static struct vgacon_scrollback_info *vgacon_scrollback_cur; -static struct vgacon_scrollback_info vgacon_scrollbacks[MAX_NR_CONSOLES]; -static bool scrollback_persistent = \ - IS_ENABLED(CONFIG_VGACON_SOFT_SCROLLBACK_PERSISTENT_ENABLE_BY_DEFAULT); -module_param_named(scrollback_persistent, scrollback_persistent, bool, 0000); -MODULE_PARM_DESC(scrollback_persistent, "Enable persistent scrollback for all vga consoles"); - -static void vgacon_scrollback_reset(int vc_num, size_t reset_size) -{ - struct vgacon_scrollback_info *scrollback = &vgacon_scrollbacks[vc_num]; - - if (scrollback->data && reset_size > 0) - memset(scrollback->data, 0, reset_size); - - scrollback->cnt = 0; - scrollback->tail = 0; - scrollback->cur = 0; -} - -static void vgacon_scrollback_init(int vc_num) -{ - int pitch = vga_video_num_columns * 2; - size_t size = CONFIG_VGACON_SOFT_SCROLLBACK_SIZE * 1024; - int rows = size / pitch; - void *data; - - data = kmalloc_array(CONFIG_VGACON_SOFT_SCROLLBACK_SIZE, 1024, - GFP_NOWAIT); - - vgacon_scrollbacks[vc_num].data = data; - vgacon_scrollback_cur = &vgacon_scrollbacks[vc_num]; - - vgacon_scrollback_cur->rows = rows - 1; - vgacon_scrollback_cur->size = rows * pitch; - - vgacon_scrollback_reset(vc_num, size); -} - -static void vgacon_scrollback_switch(int vc_num) -{ - if (!scrollback_persistent) - vc_num = 0; - - if (!vgacon_scrollbacks[vc_num].data) { - vgacon_scrollback_init(vc_num); - } else { - if (scrollback_persistent) { - vgacon_scrollback_cur = &vgacon_scrollbacks[vc_num]; - } else { - size_t size = CONFIG_VGACON_SOFT_SCROLLBACK_SIZE * 1024; - - vgacon_scrollback_reset(vc_num, size); - } - } -} - -static void vgacon_scrollback_startup(void) -{ - vgacon_scrollback_cur = &vgacon_scrollbacks[0]; - vgacon_scrollback_init(0); -} - -static void vgacon_scrollback_update(struct vc_data *c, int t, int count) -{ - void *p; - - if (!vgacon_scrollback_cur->data || !vgacon_scrollback_cur->size || - c->vc_num != fg_console) - return; - - p = (void *) (c->vc_origin + t * c->vc_size_row); - - while (count--) { - if ((vgacon_scrollback_cur->tail + c->vc_size_row) > - vgacon_scrollback_cur->size) - vgacon_scrollback_cur->tail = 0; - - scr_memcpyw(vgacon_scrollback_cur->data + - vgacon_scrollback_cur->tail, - p, c->vc_size_row); - - vgacon_scrollback_cur->cnt++; - p += c->vc_size_row; - vgacon_scrollback_cur->tail += c->vc_size_row; - - if (vgacon_scrollback_cur->tail >= vgacon_scrollback_cur->size) - vgacon_scrollback_cur->tail = 0; - - if (vgacon_scrollback_cur->cnt > vgacon_scrollback_cur->rows) - vgacon_scrollback_cur->cnt = vgacon_scrollback_cur->rows; - - vgacon_scrollback_cur->cur = vgacon_scrollback_cur->cnt; - } -} - -static void vgacon_restore_screen(struct vc_data *c) -{ - c->vc_origin = c->vc_visible_origin; - vgacon_scrollback_cur->save = 0; - - if (!vga_is_gfx && !vgacon_scrollback_cur->restore) { - scr_memcpyw((u16 *) c->vc_origin, (u16 *) c->vc_screenbuf, - c->vc_screenbuf_size > vga_vram_size ? - vga_vram_size : c->vc_screenbuf_size); - vgacon_scrollback_cur->restore = 1; - vgacon_scrollback_cur->cur = vgacon_scrollback_cur->cnt; - } -} - -static void vgacon_scrolldelta(struct vc_data *c, int lines) -{ - int start, end, count, soff; - - if (!lines) { - vgacon_restore_screen(c); - return; - } - - if (!vgacon_scrollback_cur->data) - return; - - if (!vgacon_scrollback_cur->save) { - vgacon_cursor(c, CM_ERASE); - vgacon_save_screen(c); - c->vc_origin = (unsigned long)c->vc_screenbuf; - vgacon_scrollback_cur->save = 1; - } - - vgacon_scrollback_cur->restore = 0; - start = vgacon_scrollback_cur->cur + lines; - end = start + abs(lines); - - if (start < 0) - start = 0; - - if (start > vgacon_scrollback_cur->cnt) - start = vgacon_scrollback_cur->cnt; - - if (end < 0) - end = 0; - - if (end > vgacon_scrollback_cur->cnt) - end = vgacon_scrollback_cur->cnt; - - vgacon_scrollback_cur->cur = start; - count = end - start; - soff = vgacon_scrollback_cur->tail - - ((vgacon_scrollback_cur->cnt - end) * c->vc_size_row); - soff -= count * c->vc_size_row; - - if (soff < 0) - soff += vgacon_scrollback_cur->size; - - count = vgacon_scrollback_cur->cnt - start; - - if (count > c->vc_rows) - count = c->vc_rows; - - if (count) { - int copysize; - - int diff = c->vc_rows - count; - void *d = (void *) c->vc_visible_origin; - void *s = (void *) c->vc_screenbuf; - - count *= c->vc_size_row; - /* how much memory to end of buffer left? */ - copysize = min(count, vgacon_scrollback_cur->size - soff); - scr_memcpyw(d, vgacon_scrollback_cur->data + soff, copysize); - d += copysize; - count -= copysize; - - if (count) { - scr_memcpyw(d, vgacon_scrollback_cur->data, count); - d += count; - } - - if (diff) - scr_memcpyw(d, s, diff * c->vc_size_row); - } else - vgacon_cursor(c, CM_MOVE); -} - -static void vgacon_flush_scrollback(struct vc_data *c) -{ - size_t size = CONFIG_VGACON_SOFT_SCROLLBACK_SIZE * 1024; - - vgacon_scrollback_reset(c->vc_num, size); -} -#else -#define vgacon_scrollback_startup(...) do { } while (0) -#define vgacon_scrollback_init(...) do { } while (0) -#define vgacon_scrollback_update(...) do { } while (0) -#define vgacon_scrollback_switch(...) do { } while (0) - static void vgacon_restore_screen(struct vc_data *c) { if (c->vc_origin != c->vc_visible_origin) @@ -386,11 +178,6 @@ static void vgacon_scrolldelta(struct vc_data *c, int lines) vga_set_mem_top(c); } -static void vgacon_flush_scrollback(struct vc_data *c) -{ -} -#endif /* CONFIG_VGACON_SOFT_SCROLLBACK */ - static const char *vgacon_startup(void) { const char *display_desc = NULL; @@ -573,10 +360,7 @@ static const char *vgacon_startup(void) vgacon_xres = screen_info.orig_video_cols * VGA_FONTWIDTH; vgacon_yres = vga_scan_lines; - if (!vga_init_done) { - vgacon_scrollback_startup(); - vga_init_done = true; - } + vga_init_done = true; return display_desc; } @@ -869,7 +653,6 @@ static int vgacon_switch(struct vc_data *c) vgacon_doresize(c, c->vc_cols, c->vc_rows); } - vgacon_scrollback_switch(c->vc_num); return 0; /* Redrawing not needed */ } @@ -1386,7 +1169,6 @@ static bool vgacon_scroll(struct vc_data *c, unsigned int t, unsigned int b, oldo = c->vc_origin; delta = lines * c->vc_size_row; if (dir == SM_UP) { - vgacon_scrollback_update(c, t, lines); if (c->vc_scr_end + delta >= vga_vram_end) { scr_memcpyw((u16 *) vga_vram_base, (u16 *) (oldo + delta), @@ -1450,7 +1232,6 @@ const struct consw vga_con = { .con_save_screen = vgacon_save_screen, .con_build_attr = vgacon_build_attr, .con_invert_region = vgacon_invert_region, - .con_flush_scrollback = vgacon_flush_scrollback, }; EXPORT_SYMBOL(vga_con); From 6c77545af100a72bf5e28142b510ba042a17648d Mon Sep 17 00:00:00 2001 From: Vincent Huang Date: Mon, 14 Sep 2020 12:19:08 -0700 Subject: [PATCH 435/648] Input: trackpoint - add new trackpoint variant IDs Add trackpoint variant IDs to allow supported control on Synaptics trackpoints. Signed-off-by: Vincent Huang Link: https://lore.kernel.org/r/20200914120327.2592-1-vincent.huang@tw.synaptics.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/trackpoint.c | 10 ++++++---- drivers/input/mouse/trackpoint.h | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/input/mouse/trackpoint.c b/drivers/input/mouse/trackpoint.c index 3eefee2ee2a1..854d5e758724 100644 --- a/drivers/input/mouse/trackpoint.c +++ b/drivers/input/mouse/trackpoint.c @@ -17,10 +17,12 @@ #include "trackpoint.h" static const char * const trackpoint_variants[] = { - [TP_VARIANT_IBM] = "IBM", - [TP_VARIANT_ALPS] = "ALPS", - [TP_VARIANT_ELAN] = "Elan", - [TP_VARIANT_NXP] = "NXP", + [TP_VARIANT_IBM] = "IBM", + [TP_VARIANT_ALPS] = "ALPS", + [TP_VARIANT_ELAN] = "Elan", + [TP_VARIANT_NXP] = "NXP", + [TP_VARIANT_JYT_SYNAPTICS] = "JYT_Synaptics", + [TP_VARIANT_SYNAPTICS] = "Synaptics", }; /* diff --git a/drivers/input/mouse/trackpoint.h b/drivers/input/mouse/trackpoint.h index 5cb93ed26085..eb5412904fe0 100644 --- a/drivers/input/mouse/trackpoint.h +++ b/drivers/input/mouse/trackpoint.h @@ -24,10 +24,12 @@ * 0x01 was the original IBM trackpoint, others implement very limited * subset of trackpoint features. */ -#define TP_VARIANT_IBM 0x01 -#define TP_VARIANT_ALPS 0x02 -#define TP_VARIANT_ELAN 0x03 -#define TP_VARIANT_NXP 0x04 +#define TP_VARIANT_IBM 0x01 +#define TP_VARIANT_ALPS 0x02 +#define TP_VARIANT_ELAN 0x03 +#define TP_VARIANT_NXP 0x04 +#define TP_VARIANT_JYT_SYNAPTICS 0x05 +#define TP_VARIANT_SYNAPTICS 0x06 /* * Commands From 4bba9dab86b6ac15ca560ef1f2b5aa4529cbf784 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 14 Sep 2020 13:58:16 +0200 Subject: [PATCH 436/648] batman-adv: Add missing include for in_interrupt() The fix for receiving (internally generated) bla packets outside the interrupt context introduced the usage of in_interrupt(). But this functionality is only defined in linux/preempt.h which was not included with the same patch. Fixes: 279e89b2281a ("batman-adv: bla: use netif_rx_ni when not in interrupt context") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bridge_loop_avoidance.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index d8c5d3170676..08419a2e6b95 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include From b6ec413461034d49f9e586845825adb35ba308f6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 11 Sep 2020 17:58:26 -0700 Subject: [PATCH 437/648] core/entry: Report syscall correctly for trace and audit On v5.8 when doing seccomp syscall rewrites (e.g. getpid into getppid as seen in the seccomp selftests), trace (and audit) correctly see the rewritten syscall on entry and exit: seccomp_bpf-1307 [000] .... 22974.874393: sys_enter: NR 110 (... seccomp_bpf-1307 [000] .N.. 22974.874401: sys_exit: NR 110 = 1304 With mainline we see a mismatched enter and exit (the original syscall is incorrectly visible on entry): seccomp_bpf-1030 [000] .... 21.806766: sys_enter: NR 39 (... seccomp_bpf-1030 [000] .... 21.806767: sys_exit: NR 110 = 1027 When ptrace or seccomp change the syscall, this needs to be visible to trace and audit at that time as well. Update the syscall earlier so they see the correct value. Fixes: d88d59b64ca3 ("core/entry: Respect syscall number rewrites") Reported-by: Michael Ellerman Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20200912005826.586171-1-keescook@chromium.org --- kernel/entry/common.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 18683598edbc..6fdb6105e6d6 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -60,13 +60,15 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall, return ret; } + /* Either of the above might have changed the syscall number */ + syscall = syscall_get_nr(current, regs); + if (unlikely(ti_work & _TIF_SYSCALL_TRACEPOINT)) trace_sys_enter(regs, syscall); syscall_enter_audit(regs, syscall); - /* The above might have changed the syscall number */ - return ret ? : syscall_get_nr(current, regs); + return ret ? : syscall; } static __always_inline long From 8a39e8c4d9baf65d88f66d49ac684df381e30055 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 11 Sep 2020 15:00:05 +0200 Subject: [PATCH 438/648] perf test: Fix the "signal" test inline assembly When compiling with DEBUG=1 on Fedora 32 I'm getting crash for 'perf test signal': Program received signal SIGSEGV, Segmentation fault. 0x0000000000c68548 in __test_function () (gdb) bt #0 0x0000000000c68548 in __test_function () #1 0x00000000004d62e9 in test_function () at tests/bp_signal.c:61 #2 0x00000000004d689a in test__bp_signal (test=0xa8e280 DW_AT_producer : (indirect string, offset: 0x254a): GNU C99 10.2.1 20200723 (Red Hat 10.2.1-1) -mtune=generic -march=x86-64 -ggdb3 -std=gnu99 -fno-omit-frame-pointer -funwind-tables -fstack-protector-all ^^^^^ ^^^^^ ^^^^^ $ Before: $ perf test signal 20: Breakpoint overflow signal handler : FAILED! $ After: $ perf test signal 20: Breakpoint overflow signal handler : Ok $ Fixes: 8fd34e1cce18 ("perf test: Improve bp_signal") Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lore.kernel.org/lkml/20200911130005.1842138-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bp_signal.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c index da8ec1e8e064..cc9fbcedb364 100644 --- a/tools/perf/tests/bp_signal.c +++ b/tools/perf/tests/bp_signal.c @@ -45,10 +45,13 @@ volatile long the_var; #if defined (__x86_64__) extern void __test_function(volatile long *ptr); asm ( + ".pushsection .text;" ".globl __test_function\n" + ".type __test_function, @function;" "__test_function:\n" "incq (%rdi)\n" - "ret\n"); + "ret\n" + ".popsection\n"); #else static void __test_function(volatile long *ptr) { From d2c73501a767514b6c85c7feff9457a165d51057 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 11 Sep 2020 22:37:25 -0700 Subject: [PATCH 439/648] perf bench: Fix 2 memory sanitizer warnings Memory sanitizer warns if a write is performed where the memory being read for the write is uninitialized. Avoid this warning by initializing the memory. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200912053725.1405857-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/sched-messaging.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index 71d830d7b923..cecce93ccc63 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -66,11 +66,10 @@ static void fdpair(int fds[2]) /* Block until we're ready to go */ static void ready(int ready_out, int wakefd) { - char dummy; struct pollfd pollfd = { .fd = wakefd, .events = POLLIN }; /* Tell them we're ready. */ - if (write(ready_out, &dummy, 1) != 1) + if (write(ready_out, "R", 1) != 1) err(EXIT_FAILURE, "CLIENT: ready write"); /* Wait for "GO" signal */ @@ -85,6 +84,7 @@ static void *sender(struct sender_context *ctx) unsigned int i, j; ready(ctx->ready_out, ctx->wakefd); + memset(data, 'S', sizeof(data)); /* Now pump to every receiver. */ for (i = 0; i < nr_loops; i++) { From 4202c9fdf03d79dedaa94b2c4cf574f25793d669 Mon Sep 17 00:00:00 2001 From: Olympia Giannou Date: Fri, 11 Sep 2020 14:17:24 +0000 Subject: [PATCH 440/648] rndis_host: increase sleep time in the query-response loop Some WinCE devices face connectivity issues via the NDIS interface. They fail to register, resulting in -110 timeout errors and failures during the probe procedure. In this kind of WinCE devices, the Windows-side ndis driver needs quite more time to be loaded and configured, so that the linux rndis host queries to them fail to be responded correctly on time. More specifically, when INIT is called on the WinCE side - no other requests can be served by the Client and this results in a failed QUERY afterwards. The increase of the waiting time on the side of the linux rndis host in the command-response loop leaves the INIT process to complete and respond to a QUERY, which comes afterwards. The WinCE devices with this special "feature" in their ndis driver are satisfied by this fix. Signed-off-by: Olympia Giannou Signed-off-by: David S. Miller --- drivers/net/usb/rndis_host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index bd9c07888ebb..6fa7a009a24a 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -201,7 +201,7 @@ int rndis_command(struct usbnet *dev, struct rndis_msg_hdr *buf, int buflen) dev_dbg(&info->control->dev, "rndis response error, code %d\n", retval); } - msleep(20); + msleep(40); } dev_dbg(&info->control->dev, "rndis response timeout\n"); return -ETIMEDOUT; From ce4326d2752fd51726be50e94b750d3f74670ae3 Mon Sep 17 00:00:00 2001 From: David Sharp Date: Fri, 11 Sep 2020 19:56:52 -0700 Subject: [PATCH 441/648] perf record: Set PERF_RECORD_PERIOD if attr->freq is set. evsel__config() would only set PERF_RECORD_PERIOD if it set attr->freq from perf record options. When it is set by libpfm events, it would not get set. This changes evsel__config to see if attr->freq is set outside of whether or not it changes attr->freq itself. Signed-off-by: David Sharp Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andi Kleen Cc: Andrii Nakryiko Cc: Athira Jajeev Cc: Daniel Borkmann Cc: Ian Rogers Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Cc: Yonghong Song Cc: david sharp Link: http://lore.kernel.org/lkml/20200912025655.1337192-2-irogers@google.com Signed-off-by: Ian Rogers Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index fd865002cbbd..3e985016da7e 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -979,13 +979,18 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, if (!attr->sample_period || (opts->user_freq != UINT_MAX || opts->user_interval != ULLONG_MAX)) { if (opts->freq) { - evsel__set_sample_bit(evsel, PERIOD); attr->freq = 1; attr->sample_freq = opts->freq; } else { attr->sample_period = opts->default_interval; } } + /* + * If attr->freq was set (here or earlier), ask for period + * to be sampled. + */ + if (attr->freq) + evsel__set_sample_bit(evsel, PERIOD); if (opts->no_samples) attr->sample_freq = 0; From ae5dcc8abe311ceafd8753fb488e38c4f5e8e699 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Fri, 11 Sep 2020 19:56:53 -0700 Subject: [PATCH 442/648] perf record: Prevent override of attr->sample_period for libpfm4 events Before: $ perf record -c 10000 --pfm-events=cycles:period=77777 Would yield a cycles event with period=10000, instead of 77777. the event string and perf record initializing the event. This was due to an ordering issue between libpfm4 parsing events with attr->sample_period != 0 by the time intent of the author. perf_evsel__config() is invoked. This seems to have been the This patch fixes the problem by preventing override for Signed-off-by: Stephane Eranian Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andi Kleen Cc: Andrii Nakryiko Cc: Athira Jajeev Cc: Daniel Borkmann Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Yonghong Song Link: http://lore.kernel.org/lkml/20200912025655.1337192-3-irogers@google.com Signed-off-by: Ian Rogers Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 3e985016da7e..459b51e90063 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -976,8 +976,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, * We default some events to have a default interval. But keep * it a weak assumption overridable by the user. */ - if (!attr->sample_period || (opts->user_freq != UINT_MAX || - opts->user_interval != ULLONG_MAX)) { + if (!attr->sample_period) { if (opts->freq) { attr->freq = 1; attr->sample_freq = opts->freq; From dea36631e6f186d4b853af67a4aef2e35cfa8bb7 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sat, 12 Sep 2020 21:36:26 +0200 Subject: [PATCH 443/648] net: lantiq: Wake TX queue again The call to netif_wake_queue() when the TX descriptors were freed was missing. When there are no TX buffers available the TX queue will be stopped, but it was not started again when they are available again, this is fixed in this patch. Fixes: fe1a56420cf2 ("net: lantiq: Add Lantiq / Intel VRX200 Ethernet driver") Signed-off-by: Hauke Mehrtens Signed-off-by: David S. Miller --- drivers/net/ethernet/lantiq_xrx200.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 1645e4e7ebdb..1feb9fc710e0 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -268,6 +268,9 @@ static int xrx200_tx_housekeeping(struct napi_struct *napi, int budget) net_dev->stats.tx_bytes += bytes; netdev_completed_queue(ch->priv->net_dev, pkts, bytes); + if (netif_queue_stopped(net_dev)) + netif_wake_queue(net_dev); + if (pkts < budget) { napi_complete(&ch->napi); ltq_dma_enable_irq(&ch->dma); From 74c7b80e222b58d3cea731d31e2a31a77fea8345 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sat, 12 Sep 2020 21:36:27 +0200 Subject: [PATCH 444/648] net: lantiq: use netif_tx_napi_add() for TX NAPI netif_tx_napi_add() should be used for NAPI in the TX direction instead of the netif_napi_add() function. Signed-off-by: Hauke Mehrtens Signed-off-by: David S. Miller --- drivers/net/ethernet/lantiq_xrx200.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 1feb9fc710e0..f34e4dc8c661 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -502,7 +502,7 @@ static int xrx200_probe(struct platform_device *pdev) /* setup NAPI */ netif_napi_add(net_dev, &priv->chan_rx.napi, xrx200_poll_rx, 32); - netif_napi_add(net_dev, &priv->chan_tx.napi, xrx200_tx_housekeeping, 32); + netif_tx_napi_add(net_dev, &priv->chan_tx.napi, xrx200_tx_housekeeping, 32); platform_set_drvdata(pdev, priv); From c582a7fea9dad4d309437d1a7e22e6d2cb380e2e Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sat, 12 Sep 2020 21:36:28 +0200 Subject: [PATCH 445/648] net: lantiq: Use napi_complete_done() Use napi_complete_done() and activate the interrupts when this function returns true. This way the generic NAPI code can take care of activating the interrupts. Signed-off-by: Hauke Mehrtens Signed-off-by: David S. Miller --- drivers/net/ethernet/lantiq_xrx200.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index f34e4dc8c661..abee7d61074c 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -230,8 +230,8 @@ static int xrx200_poll_rx(struct napi_struct *napi, int budget) } if (rx < budget) { - napi_complete(&ch->napi); - ltq_dma_enable_irq(&ch->dma); + if (napi_complete_done(&ch->napi, rx)) + ltq_dma_enable_irq(&ch->dma); } return rx; @@ -272,8 +272,8 @@ static int xrx200_tx_housekeeping(struct napi_struct *napi, int budget) netif_wake_queue(net_dev); if (pkts < budget) { - napi_complete(&ch->napi); - ltq_dma_enable_irq(&ch->dma); + if (napi_complete_done(&ch->napi, pkts)) + ltq_dma_enable_irq(&ch->dma); } return pkts; From 9423361da52356cb68642db5b2729b6b85aad330 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sat, 12 Sep 2020 21:36:29 +0200 Subject: [PATCH 446/648] net: lantiq: Disable IRQs only if NAPI gets scheduled The napi_schedule() call will only schedule the NAPI if it is not already running. To make sure that we do not deactivate interrupts without scheduling NAPI only deactivate the interrupts in case NAPI also gets scheduled. Signed-off-by: Hauke Mehrtens Signed-off-by: David S. Miller --- drivers/net/ethernet/lantiq_xrx200.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index abee7d61074c..635ff3a5dcfb 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -345,10 +345,12 @@ static irqreturn_t xrx200_dma_irq(int irq, void *ptr) { struct xrx200_chan *ch = ptr; - ltq_dma_disable_irq(&ch->dma); - ltq_dma_ack_irq(&ch->dma); + if (napi_schedule_prep(&ch->napi)) { + __napi_schedule(&ch->napi); + ltq_dma_disable_irq(&ch->dma); + } - napi_schedule(&ch->napi); + ltq_dma_ack_irq(&ch->dma); return IRQ_HANDLED; } From 1869e226a7b3ef75b4f70ede2f1b7229f7157fa4 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 13 Sep 2020 12:43:39 -0600 Subject: [PATCH 447/648] ipv4: Initialize flowi4_multipath_hash in data path flowi4_multipath_hash was added by the commit referenced below for tunnels. Unfortunately, the patch did not initialize the new field for several fast path lookups that do not initialize the entire flow struct to 0. Fix those locations. Currently, flowi4_multipath_hash is random garbage and affects the hash value computed by fib_multipath_hash for multipath selection. Fixes: 24ba14406c5c ("route: Add multipath_hash in flowi_common to make user-define hash") Signed-off-by: David Ahern Cc: wenxu Signed-off-by: David S. Miller --- include/net/flow.h | 1 + net/core/filter.c | 1 + net/ipv4/fib_frontend.c | 1 + net/ipv4/route.c | 1 + 4 files changed, 4 insertions(+) diff --git a/include/net/flow.h b/include/net/flow.h index 929d3ca614d0..b2531df3f65f 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -116,6 +116,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, fl4->saddr = saddr; fl4->fl4_dport = dport; fl4->fl4_sport = sport; + fl4->flowi4_multipath_hash = 0; } /* Reset some input parameters after previous lookup */ diff --git a/net/core/filter.c b/net/core/filter.c index 1f647ab986b6..1b168371ba96 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4838,6 +4838,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, fl4.saddr = params->ipv4_src; fl4.fl4_sport = params->sport; fl4.fl4_dport = params->dport; + fl4.flowi4_multipath_hash = 0; if (flags & BPF_FIB_LOOKUP_DIRECT) { u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 41079490a118..86a23e4a6a50 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -362,6 +362,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.flowi4_tun_key.tun_id = 0; fl4.flowi4_flags = 0; fl4.flowi4_uid = sock_net_uid(net, NULL); + fl4.flowi4_multipath_hash = 0; no_addr = idev->ifa_list == NULL; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8ca6bcab7b03..e5f210d00851 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2147,6 +2147,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.daddr = daddr; fl4.saddr = saddr; fl4.flowi4_uid = sock_net_uid(net, NULL); + fl4.flowi4_multipath_hash = 0; if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) { flkeys = &_flkeys; From 8d761d2ccc3d41107b7805383a0baeeff05e91c9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 14 Sep 2020 19:02:18 -0300 Subject: [PATCH 448/648] tools headers UAPI: Sync kvm.h headers with the kernel sources To pick the changes in: 15e9e35cd1dec2bc ("KVM: MIPS: Change the definition of kvm type") 004a01241c5a0d37 ("arm64/x86: KVM: Introduce steal-time cap") That do not result in any change in tooling, as the additions are not being used in any table generator. This silences these perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Cc: Adrian Hunter Cc: Andrew Jones Cc: Huacai Chen Cc: Jiri Olsa Cc: Marc Zyngier Cc: Namhyung Kim Cc: Paolo Bonzini Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kvm.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index f6d86033c4fa..7d8eced6f459 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -790,9 +790,10 @@ struct kvm_ppc_resize_hpt { #define KVM_VM_PPC_HV 1 #define KVM_VM_PPC_PR 2 -/* on MIPS, 0 forces trap & emulate, 1 forces VZ ASE */ -#define KVM_VM_MIPS_TE 0 +/* on MIPS, 0 indicates auto, 1 forces VZ ASE, 2 forces trap & emulate */ +#define KVM_VM_MIPS_AUTO 0 #define KVM_VM_MIPS_VZ 1 +#define KVM_VM_MIPS_TE 2 #define KVM_S390_SIE_PAGE_OFFSET 1 @@ -1035,6 +1036,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_LAST_CPU 184 #define KVM_CAP_SMALLER_MAXPHYADDR 185 #define KVM_CAP_S390_DIAG318 186 +#define KVM_CAP_STEAL_TIME 187 #ifdef KVM_CAP_IRQ_ROUTING From 2fa3fc9579c9f7682d832cd1dfdc56f46f4d2b67 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 14 Sep 2020 19:06:41 -0300 Subject: [PATCH 449/648] tools headers UAPI: update linux/in.h copy To get the changes from: 645f08975f49441b ("net: Fix some comments") That don't cause any changes in tooling, its just a typo fix. This silences this tools/perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/in.h' differs from latest version at 'include/uapi/linux/in.h' diff -u tools/include/uapi/linux/in.h include/uapi/linux/in.h Cc: Adrian Hunter Cc: David S. Miller Cc: Jiri Olsa Cc: Miaohe Lin Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/in.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index 3d0d8231dc19..7d6687618d80 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -135,7 +135,7 @@ struct in_addr { * this socket to prevent accepting spoofed ones. */ #define IP_PMTUDISC_INTERFACE 4 -/* weaker version of IP_PMTUDISC_INTERFACE, which allos packets to get +/* weaker version of IP_PMTUDISC_INTERFACE, which allows packets to get * fragmented if they exeed the interface mtu */ #define IP_PMTUDISC_OMIT 5 From 3b0a18c1aa6cbfa7b0dd513b6be9893ef6e6ac30 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 11 Sep 2020 19:56:54 -0700 Subject: [PATCH 450/648] perf record: Don't clear event's period if set by a term If events in a group explicitly set a frequency or period with leader sampling, don't disable the samples on those events. Prior to 5.8: perf record -e '{cycles/period=12345000/,instructions/period=6789000/}:S' would clear the attributes then apply the config terms. In commit 5f34278867b7 leader sampling configuration was moved to after applying the config terms, in the example, making the instructions' event have its period cleared. This change makes it so that sampling is only disabled if configuration terms aren't present. Committer testing: Before: # perf record -e '{cycles/period=1/,instructions/period=2/}:S' sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.051 MB perf.data (6 samples) ] # # perf evlist -v cycles/period=1/: size: 120, { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|READ|ID, read_format: ID|GROUP, disabled: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1 instructions/period=2/: size: 120, config: 0x1, sample_type: IP|TID|TIME|READ|ID, read_format: ID|GROUP, sample_id_all: 1, exclude_guest: 1 # After: # perf record -e '{cycles/period=1/,instructions/period=2/}:S' sleep 0.0001 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.052 MB perf.data (4 samples) ] # perf evlist -v cycles/period=1/: size: 120, { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|READ|ID, read_format: ID|GROUP, disabled: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1 instructions/period=2/: size: 120, config: 0x1, { sample_period, sample_freq }: 2, sample_type: IP|TID|TIME|READ|ID, read_format: ID|GROUP, sample_id_all: 1, exclude_guest: 1 # Fixes: 5f34278867b7 ("perf evlist: Move leader-sampling configuration") Signed-off-by: Ian Rogers Acked-by: Adrian Hunter Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andi Kleen Cc: Andrii Nakryiko Cc: Athira Jajeev Cc: Daniel Borkmann Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Cc: Yonghong Song Link: http://lore.kernel.org/lkml/20200912025655.1337192-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/record.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index a4cc11592f6b..ea9aa1d7cf50 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -2,6 +2,7 @@ #include "debug.h" #include "evlist.h" #include "evsel.h" +#include "evsel_config.h" #include "parse-events.h" #include #include @@ -33,11 +34,24 @@ static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evl return leader; } +static u64 evsel__config_term_mask(struct evsel *evsel) +{ + struct evsel_config_term *term; + struct list_head *config_terms = &evsel->config_terms; + u64 term_types = 0; + + list_for_each_entry(term, config_terms, list) { + term_types |= 1 << term->type; + } + return term_types; +} + static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *evlist) { struct perf_event_attr *attr = &evsel->core.attr; struct evsel *leader = evsel->leader; struct evsel *read_sampler; + u64 term_types, freq_mask; if (!leader->sample_read) return; @@ -47,16 +61,20 @@ static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *ev if (evsel == read_sampler) return; + term_types = evsel__config_term_mask(evsel); /* - * Disable sampling for all group members other than the leader in - * case the leader 'leads' the sampling, except when the leader is an - * AUX area event, in which case the 2nd event in the group is the one - * that 'leads' the sampling. + * Disable sampling for all group members except those with explicit + * config terms or the leader. In the case of an AUX area event, the 2nd + * event in the group is the one that 'leads' the sampling. */ - attr->freq = 0; - attr->sample_freq = 0; - attr->sample_period = 0; - attr->write_backward = 0; + freq_mask = (1 << EVSEL__CONFIG_TERM_FREQ) | (1 << EVSEL__CONFIG_TERM_PERIOD); + if ((term_types & freq_mask) == 0) { + attr->freq = 0; + attr->sample_freq = 0; + attr->sample_period = 0; + } + if ((term_types & (1 << EVSEL__CONFIG_TERM_OVERWRITE)) == 0) + attr->write_backward = 0; /* * We don't get a sample for slave events, we make them when delivering From 880a784344fb8a7c498a1145fb541fe8e8d2040f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 11 Sep 2020 19:56:55 -0700 Subject: [PATCH 451/648] perf test: Leader sampling shouldn't clear sample period Add test that a sibling with leader sampling doesn't have its period cleared. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andi Kleen Cc: Andrii Nakryiko Cc: Athira Jajeev Cc: Daniel Borkmann Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Cc: Yonghong Song Cc: bpf@vger.kernel.org Cc: netdev@vger.kernel.org Link: http://lore.kernel.org/lkml/20200912025655.1337192-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr/README | 1 + tools/perf/tests/attr/test-record-group2 | 29 ++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 tools/perf/tests/attr/test-record-group2 diff --git a/tools/perf/tests/attr/README b/tools/perf/tests/attr/README index 6cd408108595..a36f49fb4dbe 100644 --- a/tools/perf/tests/attr/README +++ b/tools/perf/tests/attr/README @@ -49,6 +49,7 @@ Following tests are defined (with perf commands): perf record --call-graph fp kill (test-record-graph-fp) perf record --group -e cycles,instructions kill (test-record-group) perf record -e '{cycles,instructions}' kill (test-record-group1) + perf record -e '{cycles/period=1/,instructions/period=2/}:S' kill (test-record-group2) perf record -D kill (test-record-no-delay) perf record -i kill (test-record-no-inherit) perf record -n kill (test-record-no-samples) diff --git a/tools/perf/tests/attr/test-record-group2 b/tools/perf/tests/attr/test-record-group2 new file mode 100644 index 000000000000..6b9f8d182ce1 --- /dev/null +++ b/tools/perf/tests/attr/test-record-group2 @@ -0,0 +1,29 @@ +[config] +command = record +args = --no-bpf-event -e '{cycles/period=1234000/,instructions/period=6789000/}:S' kill >/dev/null 2>&1 +ret = 1 + +[event-1:base-record] +fd=1 +group_fd=-1 +config=0|1 +sample_period=1234000 +sample_type=87 +read_format=12 +inherit=0 +freq=0 + +[event-2:base-record] +fd=2 +group_fd=1 +config=0|1 +sample_period=6789000 +sample_type=87 +read_format=12 +disabled=0 +inherit=0 +mmap=0 +comm=0 +freq=0 +enable_on_exec=0 +task=0 From bb3a420d47ab00d7e1e5083286cab15235a96680 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Sun, 13 Sep 2020 04:06:05 -0400 Subject: [PATCH 452/648] tipc: Fix memory leak in tipc_group_create_member() tipc_group_add_to_tree() returns silently if `key` matches `nkey` of an existing node, causing tipc_group_create_member() to leak memory. Let tipc_group_add_to_tree() return an error in such a case, so that tipc_group_create_member() can handle it properly. Fixes: 75da2163dbb6 ("tipc: introduce communication groups") Reported-and-tested-by: syzbot+f95d90c454864b3b5bc9@syzkaller.appspotmail.com Cc: Hillf Danton Link: https://syzkaller.appspot.com/bug?id=048390604fe1b60df34150265479202f10e13aff Signed-off-by: Peilin Ye Signed-off-by: David S. Miller --- net/tipc/group.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/tipc/group.c b/net/tipc/group.c index 588c2d2b0c69..b1fcd2ad5ecf 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -273,8 +273,8 @@ static struct tipc_member *tipc_group_find_node(struct tipc_group *grp, return NULL; } -static void tipc_group_add_to_tree(struct tipc_group *grp, - struct tipc_member *m) +static int tipc_group_add_to_tree(struct tipc_group *grp, + struct tipc_member *m) { u64 nkey, key = (u64)m->node << 32 | m->port; struct rb_node **n, *parent = NULL; @@ -291,10 +291,11 @@ static void tipc_group_add_to_tree(struct tipc_group *grp, else if (key > nkey) n = &(*n)->rb_right; else - return; + return -EEXIST; } rb_link_node(&m->tree_node, parent, n); rb_insert_color(&m->tree_node, &grp->members); + return 0; } static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, @@ -302,6 +303,7 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, u32 instance, int state) { struct tipc_member *m; + int ret; m = kzalloc(sizeof(*m), GFP_ATOMIC); if (!m) @@ -314,8 +316,12 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, m->port = port; m->instance = instance; m->bc_acked = grp->bc_snd_nxt - 1; + ret = tipc_group_add_to_tree(grp, m); + if (ret < 0) { + kfree(m); + return NULL; + } grp->member_cnt++; - tipc_group_add_to_tree(grp, m); tipc_nlist_add(&grp->dests, m->node); m->state = state; return m; From ff48b6222e65ebdba5a403ef1deba6214e749193 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 13 Sep 2020 19:37:31 +0800 Subject: [PATCH 453/648] tipc: use skb_unshare() instead in tipc_buf_append() In tipc_buf_append() it may change skb's frag_list, and it causes problems when this skb is cloned. skb_unclone() doesn't really make this skb's flag_list available to change. Shuang Li has reported an use-after-free issue because of this when creating quite a few macvlan dev over the same dev, where the broadcast packets will be cloned and go up to the stack: [ ] BUG: KASAN: use-after-free in pskb_expand_head+0x86d/0xea0 [ ] Call Trace: [ ] dump_stack+0x7c/0xb0 [ ] print_address_description.constprop.7+0x1a/0x220 [ ] kasan_report.cold.10+0x37/0x7c [ ] check_memory_region+0x183/0x1e0 [ ] pskb_expand_head+0x86d/0xea0 [ ] process_backlog+0x1df/0x660 [ ] net_rx_action+0x3b4/0xc90 [ ] [ ] Allocated by task 1786: [ ] kmem_cache_alloc+0xbf/0x220 [ ] skb_clone+0x10a/0x300 [ ] macvlan_broadcast+0x2f6/0x590 [macvlan] [ ] macvlan_process_broadcast+0x37c/0x516 [macvlan] [ ] process_one_work+0x66a/0x1060 [ ] worker_thread+0x87/0xb10 [ ] [ ] Freed by task 3253: [ ] kmem_cache_free+0x82/0x2a0 [ ] skb_release_data+0x2c3/0x6e0 [ ] kfree_skb+0x78/0x1d0 [ ] tipc_recvmsg+0x3be/0xa40 [tipc] So fix it by using skb_unshare() instead, which would create a new skb for the cloned frag and it'll be safe to change its frag_list. The similar things were also done in sctp_make_reassembled_event(), which is using skb_copy(). Reported-by: Shuang Li Fixes: 37e22164a8a3 ("tipc: rename and move message reassembly function") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/tipc/msg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 848fae674532..52e93ba4d8e2 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -150,7 +150,8 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (fragid == FIRST_FRAGMENT) { if (unlikely(head)) goto err; - if (unlikely(skb_unclone(frag, GFP_ATOMIC))) + frag = skb_unshare(frag, GFP_ATOMIC); + if (unlikely(!frag)) goto err; head = *headbuf = frag; *buf = NULL; From 13e6ce98aa65ab5ce19351c020419360dfe8af29 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 13 Sep 2020 19:51:50 +0800 Subject: [PATCH 454/648] net: sched: only keep the available bits when setting vxlan md->gbp As we can see from vxlan_build/parse_gbp_hdr(), when processing metadata on vxlan rx/tx path, only dont_learn/policy_applied/policy_id fields can be set to or parse from the packet for vxlan gbp option. So we'd better do the mask when set it in act_tunnel_key and cls_flower. Otherwise, when users don't know these bits, they may configure with a value which can never be matched. Reported-by: Shuang Li Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/vxlan.h | 3 +++ net/sched/act_tunnel_key.c | 1 + net/sched/cls_flower.c | 4 +++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 3a41627cbdfe..08537aa14f7c 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -121,6 +121,9 @@ struct vxlanhdr_gbp { #define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16) #define VXLAN_GBP_ID_MASK (0xFFFF) +#define VXLAN_GBP_MASK (VXLAN_GBP_DONT_LEARN | VXLAN_GBP_POLICY_APPLIED | \ + VXLAN_GBP_ID_MASK) + /* * VXLAN Generic Protocol Extension (VXLAN_F_GPE): * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 536c4bc31be6..37f1e10f35e0 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -156,6 +156,7 @@ tunnel_key_copy_vxlan_opt(const struct nlattr *nla, void *dst, int dst_len, struct vxlan_metadata *md = dst; md->gbp = nla_get_u32(tb[TCA_TUNNEL_KEY_ENC_OPT_VXLAN_GBP]); + md->gbp &= VXLAN_GBP_MASK; } return sizeof(struct vxlan_metadata); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index a4f7ef1de7e7..e8fda1bd4d9d 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1175,8 +1175,10 @@ static int fl_set_vxlan_opt(const struct nlattr *nla, struct fl_flow_key *key, return -EINVAL; } - if (tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]) + if (tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]) { md->gbp = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]); + md->gbp &= VXLAN_GBP_MASK; + } return sizeof(*md); } From 681d2cfb790339a3e95b98bc140baf1f816a896a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 13 Sep 2020 19:51:51 +0800 Subject: [PATCH 455/648] lwtunnel: only keep the available bits when setting vxlan md->gbp As we can see from vxlan_build/parse_gbp_hdr(), when processing metadata on vxlan rx/tx path, only dont_learn/policy_applied/policy_id fields can be set to or parse from the packet for vxlan gbp option. So do the mask when set it in lwtunnel, as it does in act_tunnel_key and cls_flower. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 75c6013ff9a4..b2ea1a8c5fd6 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -554,6 +554,7 @@ static int ip_tun_parse_opts_vxlan(struct nlattr *attr, attr = tb[LWTUNNEL_IP_OPT_VXLAN_GBP]; md->gbp = nla_get_u32(attr); + md->gbp &= VXLAN_GBP_MASK; info->key.tun_flags |= TUNNEL_VXLAN_OPT; } From 8e1b3ac4786680c2d2b5a24e38a2d714c3bcd1ef Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 13 Sep 2020 19:43:03 +0800 Subject: [PATCH 456/648] net: sched: initialize with 0 before setting erspan md->u In fl_set_erspan_opt(), all bits of erspan md was set 1, as this function is also used to set opt MASK. However, when setting for md->u.index for opt VALUE, the rest bits of the union md->u will be left 1. It would cause to fail the match of the whole md when version is 1 and only index is set. This patch is to fix by initializing with 0 before setting erspan md->u. Reported-by: Shuang Li Fixes: 79b1011cb33d ("net: sched: allow flower to match erspan options") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index e8fda1bd4d9d..fed18fd2c50b 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1223,6 +1223,7 @@ static int fl_set_erspan_opt(const struct nlattr *nla, struct fl_flow_key *key, } if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]) { nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]; + memset(&md->u, 0x00, sizeof(md->u)); md->u.index = nla_get_be32(nla); } } else if (md->version == 2) { From 2b1667e54caf95e1e4249d9068eea7a3089a5229 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Thu, 10 Sep 2020 09:56:09 +0200 Subject: [PATCH 457/648] xsk: Fix number of pinned pages/umem size discrepancy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For AF_XDP sockets, there was a discrepancy between the number of of pinned pages and the size of the umem region. The size of the umem region is used to validate the AF_XDP descriptor addresses. The logic that pinned the pages covered by the region only took whole pages into consideration, creating a mismatch between the size and pinned pages. A user could then pass AF_XDP addresses outside the range of pinned pages, but still within the size of the region, crashing the kernel. This change correctly calculates the number of pages to be pinned. Further, the size check for the aligned mode is simplified. Now the code simply checks if the size is divisible by the chunk size. Fixes: bbff2f321a86 ("xsk: new descriptor addressing scheme") Reported-by: Ciara Loftus Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov Tested-by: Ciara Loftus Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200910075609.7904-1-bjorn.topel@gmail.com --- net/xdp/xdp_umem.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index e97db37354e4..b010bfde0149 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -303,10 +303,10 @@ static int xdp_umem_account_pages(struct xdp_umem *umem) static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) { + u32 npgs_rem, chunk_size = mr->chunk_size, headroom = mr->headroom; bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; - u32 chunk_size = mr->chunk_size, headroom = mr->headroom; u64 npgs, addr = mr->addr, size = mr->len; - unsigned int chunks, chunks_per_page; + unsigned int chunks, chunks_rem; int err; if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { @@ -336,19 +336,18 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) if ((addr + size) < addr) return -EINVAL; - npgs = size >> PAGE_SHIFT; + npgs = div_u64_rem(size, PAGE_SIZE, &npgs_rem); + if (npgs_rem) + npgs++; if (npgs > U32_MAX) return -EINVAL; - chunks = (unsigned int)div_u64(size, chunk_size); + chunks = (unsigned int)div_u64_rem(size, chunk_size, &chunks_rem); if (chunks == 0) return -EINVAL; - if (!unaligned_chunks) { - chunks_per_page = PAGE_SIZE / chunk_size; - if (chunks < chunks_per_page || chunks % chunks_per_page) - return -EINVAL; - } + if (!unaligned_chunks && chunks_rem) + return -EINVAL; if (headroom >= chunk_size - XDP_PACKET_HEADROOM) return -EINVAL; From 709192d531e5b0a91f20aa14abfe2fc27ddd47af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B6ppner?= Date: Mon, 14 Sep 2020 13:56:47 +0200 Subject: [PATCH 458/648] s390/dasd: Fix zero write for FBA devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A discard request that writes zeros using the global kernel internal ZERO_PAGE will fail for machines with more than 2GB of memory due to the location of the ZERO_PAGE. Fix this by using a driver owned global zero page allocated with GFP_DMA flag set. Fixes: 28b841b3a7cb ("s390/dasd: Add discard support for FBA devices") Signed-off-by: Jan Höppner Reviewed-by: Stefan Haberland Cc: # 4.14+ Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_fba.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index cbb770824226..1a44e321b54e 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -40,6 +40,7 @@ MODULE_LICENSE("GPL"); static struct dasd_discipline dasd_fba_discipline; +static void *dasd_fba_zero_page; struct dasd_fba_private { struct dasd_fba_characteristics rdc_data; @@ -270,7 +271,7 @@ static void ccw_write_zero(struct ccw1 *ccw, int count) ccw->cmd_code = DASD_FBA_CCW_WRITE; ccw->flags |= CCW_FLAG_SLI; ccw->count = count; - ccw->cda = (__u32) (addr_t) page_to_phys(ZERO_PAGE(0)); + ccw->cda = (__u32) (addr_t) dasd_fba_zero_page; } /* @@ -830,6 +831,11 @@ dasd_fba_init(void) int ret; ASCEBC(dasd_fba_discipline.ebcname, 4); + + dasd_fba_zero_page = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); + if (!dasd_fba_zero_page) + return -ENOMEM; + ret = ccw_driver_register(&dasd_fba_driver); if (!ret) wait_for_device_probe(); @@ -841,6 +847,7 @@ static void __exit dasd_fba_cleanup(void) { ccw_driver_unregister(&dasd_fba_driver); + free_page((unsigned long)dasd_fba_zero_page); } module_init(dasd_fba_init); From 65dce596e2b839bc324b9543eac99a923eb6d794 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 14 Sep 2020 17:50:31 -0700 Subject: [PATCH 459/648] docs/bpf: Remove source code links Make path to bench_ringbufs.c just a text, not a special link. Fixes: 97abb2b39682 ("docs/bpf: Add BPF ring buffer design notes") Reported-by: Mauro Carvalho Chehab Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200915005031.2748397-1-andriin@fb.com --- Documentation/bpf/ringbuf.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/bpf/ringbuf.rst b/Documentation/bpf/ringbuf.rst index 4d4f3bcb1477..6a615cd62bda 100644 --- a/Documentation/bpf/ringbuf.rst +++ b/Documentation/bpf/ringbuf.rst @@ -197,7 +197,7 @@ a self-pacing notifications of new data being availability. being available after commit only if consumer has already caught up right up to the record being committed. If not, consumer still has to catch up and thus will see new data anyways without needing an extra poll notification. -Benchmarks (see tools/testing/selftests/bpf/benchs/bench_ringbufs.c_) show that +Benchmarks (see tools/testing/selftests/bpf/benchs/bench_ringbufs.c) show that this allows to achieve a very high throughput without having to resort to tricks like "notify only every Nth sample", which are necessary with perf buffer. For extreme cases, when BPF program wants more manual control of From ce4cc3133dc72c31bd49ddcf22d0f9eeff47a761 Mon Sep 17 00:00:00 2001 From: David Milburn Date: Thu, 10 Sep 2020 16:18:50 -0500 Subject: [PATCH 460/648] nvme-pci: disable the write zeros command for Intel 600P/P3100 The write zeros command does not work with 4k range. bash-4.4# ./blkdiscard /dev/nvme0n1p2 bash-4.4# strace -efallocate xfs_io -c "fzero 536895488 2048" /dev/nvme0n1p2 fallocate(3, FALLOC_FL_ZERO_RANGE, 536895488, 2048) = 0 +++ exited with 0 +++ bash-4.4# dd bs=1 if=/dev/nvme0n1p2 skip=536895488 count=512 | hexdump -C 00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| * 00000200 bash-4.4# ./blkdiscard /dev/nvme0n1p2 bash-4.4# strace -efallocate xfs_io -c "fzero 536895488 4096" /dev/nvme0n1p2 fallocate(3, FALLOC_FL_ZERO_RANGE, 536895488, 4096) = 0 +++ exited with 0 +++ bash-4.4# dd bs=1 if=/dev/nvme0n1p2 skip=536895488 count=512 | hexdump -C 00000000 5c 61 5c b0 96 21 1b 5e 85 0c 07 32 9c 8c eb 3c |\a\..!.^...2...<| 00000010 4a a2 06 ca 67 15 2d 8e 29 8d a8 a0 7e 46 8c 62 |J...g.-.)...~F.b| 00000020 bb 4c 6c c1 6b f5 ae a5 e4 a9 bc 93 4f 60 ff 7a |.Ll.k.......O`.z| Reported-by: Eric Sandeen Signed-off-by: David Milburn Tested-by: Eric Sandeen Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 5e07d5628864..d31e298669a9 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3153,7 +3153,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS | NVME_QUIRK_MEDIUM_PRIO_SQ | - NVME_QUIRK_NO_TEMP_THRESH_CHANGE }, + NVME_QUIRK_NO_TEMP_THRESH_CHANGE | + NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_VDEVICE(INTEL, 0xf1a6), /* Intel 760p/Pro 7600p */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ From af5ad17854f96a6d3c9775e776bd01ab262672a1 Mon Sep 17 00:00:00 2001 From: Necip Fazil Yildiran Date: Mon, 14 Sep 2020 18:01:21 +0300 Subject: [PATCH 461/648] nvme-tcp: fix kconfig dependency warning when !CRYPTO When NVME_TCP is enabled and CRYPTO is disabled, it results in the following Kbuild warning: WARNING: unmet direct dependencies detected for CRYPTO_CRC32C Depends on [n]: CRYPTO [=n] Selected by [y]: - NVME_TCP [=y] && INET [=y] && BLK_DEV_NVME [=y] The reason is that NVME_TCP selects CRYPTO_CRC32C without depending on or selecting CRYPTO while CRYPTO_CRC32C is subordinate to CRYPTO. Honor the kconfig menu hierarchy to remove kconfig dependency warnings. Fixes: 79fd751d61aa ("nvme: tcp: selects CRYPTO_CRC32C for nvme-tcp") Signed-off-by: Necip Fazil Yildiran Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 3ed9786b88d8..a44d49d63968 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -73,6 +73,7 @@ config NVME_TCP depends on INET depends on BLK_DEV_NVME select NVME_FABRICS + select CRYPTO select CRYPTO_CRC32C help This provides support for the NVMe over Fabrics protocol using From cd8100f1f3be406a4e0e122e7307ac0fca6d57cc Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 11 Sep 2020 12:55:55 +0200 Subject: [PATCH 462/648] EDAC/ghes: Clear scanned data on unload Commit b972fdba8665 ("EDAC/ghes: Fix NULL pointer dereference in ghes_edac_register()") didn't clear all the information from the scanned system and, more specifically, left ghes_hw.num_dimms to its previous value. On a second load (CONFIG_DEBUG_TEST_DRIVER_REMOVE=y), the driver would use the leftover num_dimms value which is not 0 and thus the 0 check in enumerate_dimms() will get bypassed and it would go directly to the pointer deref: d = &hw->dimms[hw->num_dimms]; which is, of course, NULL: #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 0 P4D 0 Oops: 0002 [#1] PREEMPT SMP CPU: 7 PID: 1 Comm: swapper/0 Not tainted 5.9.0-rc4+ #7 Hardware name: GIGABYTE MZ01-CE1-00/MZ01-CE1-00, BIOS F02 08/29/2018 RIP: 0010:enumerate_dimms.cold+0x7b/0x375 Reset the whole ghes_hw on driver unregister so that no stale values are used on a second system scan. Fixes: b972fdba8665 ("EDAC/ghes: Fix NULL pointer dereference in ghes_edac_register()") Cc: Shiju Jose Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200911164817.GA19320@zn.tnic --- drivers/edac/ghes_edac.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index 54ebc8afc6b1..bf7fb9f3e117 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -632,6 +632,7 @@ void ghes_edac_unregister(struct ghes *ghes) mutex_lock(&ghes_reg_mutex); system_scanned = false; + memset(&ghes_hw, 0, sizeof(struct ghes_hw_desc)); if (!refcount_dec_and_test(&ghes_refcount)) goto unlock; From 251c54ea26fa6029b01a76161a37a12fde5124e4 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 11 Sep 2020 18:17:30 +0200 Subject: [PATCH 463/648] EDAC/ghes: Check whether the driver is on the safe list correctly With CONFIG_DEBUG_TEST_DRIVER_REMOVE=y, a system would try to probe, unregister and probe again a driver. When ghes_edac is attempted to be loaded on a system which is not on the safe platforms list, ghes_edac_register() would return early. The unregister counterpart ghes_edac_unregister() would still attempt to unregister and exit early at the refcount test, leading to the refcount underflow below. In order to not do *anything* on the unregister path too, reuse the force_load parameter and check it on that path too, before fumbling with the refcount. ghes_edac: ghes_edac_register: entry ghes_edac: ghes_edac_register: return -ENODEV ------------[ cut here ]------------ refcount_t: underflow; use-after-free. WARNING: CPU: 10 PID: 1 at lib/refcount.c:28 refcount_warn_saturate+0xb9/0x100 Modules linked in: CPU: 10 PID: 1 Comm: swapper/0 Not tainted 5.9.0-rc4+ #12 Hardware name: GIGABYTE MZ01-CE1-00/MZ01-CE1-00, BIOS F02 08/29/2018 RIP: 0010:refcount_warn_saturate+0xb9/0x100 Code: 82 e8 fb 8f 4d 00 90 0f 0b 90 90 c3 80 3d 55 4c f5 00 00 75 88 c6 05 4c 4c f5 00 01 90 48 c7 c7 d0 8a 10 82 e8 d8 8f 4d 00 90 <0f> 0b 90 90 c3 80 3d 30 4c f5 00 00 0f 85 61 ff ff ff c6 05 23 4c RSP: 0018:ffffc90000037d58 EFLAGS: 00010292 RAX: 0000000000000026 RBX: ffff88840b8da000 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffffffff8216b24f RDI: 00000000ffffffff RBP: ffff88840c662e00 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000000001 R11: 0000000000000046 R12: 0000000000000000 R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88840ee80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000800002211000 CR4: 00000000003506e0 Call Trace: ghes_edac_unregister ghes_remove platform_drv_remove really_probe driver_probe_device device_driver_attach __driver_attach ? device_driver_attach ? device_driver_attach bus_for_each_dev bus_add_driver driver_register ? bert_init ghes_init do_one_initcall ? rcu_read_lock_sched_held kernel_init_freeable ? rest_init kernel_init ret_from_fork ... ghes_edac: ghes_edac_unregister: FALSE, refcount: -1073741824 Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200911164950.GB19320@zn.tnic --- drivers/edac/ghes_edac.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index bf7fb9f3e117..94d1e3165052 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -508,6 +508,7 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev) if (!force_load && idx < 0) return -ENODEV; } else { + force_load = true; idx = 0; } @@ -629,6 +630,9 @@ void ghes_edac_unregister(struct ghes *ghes) struct mem_ctl_info *mci; unsigned long flags; + if (!force_load) + return; + mutex_lock(&ghes_reg_mutex); system_scanned = false; From 3236d215ad38a3f5372e65cd1e0a52cf93d3c6a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 15 Sep 2020 09:54:08 +0200 Subject: [PATCH 464/648] batman-adv: mcast: fix duplicate mcast packets in BLA backbone from LAN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scenario: * Multicast frame send from a BLA backbone (multiple nodes with their bat0 bridged together, with BLA enabled) Issue: * BLA backbone nodes receive the frame multiple times on bat0 For multicast frames received via batman-adv broadcast packets the originator of the broadcast packet is checked before decapsulating and forwarding the frame to bat0 (batadv_bla_is_backbone_gw()-> batadv_recv_bcast_packet()). If it came from a node which shares the same BLA backbone with us then it is not forwarded to bat0 to avoid a loop. When sending a multicast frame in a non-4-address batman-adv unicast packet we are currently missing this check - and cannot do so because the batman-adv unicast packet has no originator address field. However, we can simply fix this on the sender side by only sending the multicast frame via unicasts to interested nodes which do not share the same BLA backbone with us. This also nicely avoids some unnecessary transmissions on mesh side. Note that no infinite loop was observed, probably because of dropping via batadv_interface_tx()->batadv_bla_tx(). However the duplicates still utterly confuse switches/bridges, ICMPv6 duplicate address detection and neighbor discovery and therefore leads to long delays before being able to establish TCP connections, for instance. And it also leads to the Linux bridge printing messages like: "br-lan: received packet on eth1 with own address as source address ..." Fixes: 2d3f6ccc4ea5 ("batman-adv: Modified forwarding behaviour for multicast packets") Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/multicast.c | 46 ++++++++++++++++++++++++++------- net/batman-adv/multicast.h | 15 +++++++++++ net/batman-adv/soft-interface.c | 5 ++-- 3 files changed, 53 insertions(+), 13 deletions(-) diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index bdc4a1fba1c6..ca24a2e522b7 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -51,6 +51,7 @@ #include #include +#include "bridge_loop_avoidance.h" #include "hard-interface.h" #include "hash.h" #include "log.h" @@ -1434,6 +1435,35 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, return BATADV_FORW_ALL; } +/** + * batadv_mcast_forw_send_orig() - send a multicast packet to an originator + * @bat_priv: the bat priv with all the soft interface information + * @skb: the multicast packet to send + * @vid: the vlan identifier + * @orig_node: the originator to send the packet to + * + * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. + */ +int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, + struct sk_buff *skb, + unsigned short vid, + struct batadv_orig_node *orig_node) +{ + /* Avoid sending multicast-in-unicast packets to other BLA + * gateways - they already got the frame from the LAN side + * we share with them. + * TODO: Refactor to take BLA into account earlier, to avoid + * reducing the mcast_fanout count. + */ + if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig, vid)) { + dev_kfree_skb(skb); + return NET_XMIT_SUCCESS; + } + + return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST, 0, + orig_node, vid); +} + /** * batadv_mcast_forw_tt() - forwards a packet to multicast listeners * @bat_priv: the bat priv with all the soft interface information @@ -1471,8 +1501,8 @@ batadv_mcast_forw_tt(struct batadv_priv *bat_priv, struct sk_buff *skb, break; } - batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0, - orig_entry->orig_node, vid); + batadv_mcast_forw_send_orig(bat_priv, newskb, vid, + orig_entry->orig_node); } rcu_read_unlock(); @@ -1513,8 +1543,7 @@ batadv_mcast_forw_want_all_ipv4(struct batadv_priv *bat_priv, break; } - batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0, - orig_node, vid); + batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node); } rcu_read_unlock(); return ret; @@ -1551,8 +1580,7 @@ batadv_mcast_forw_want_all_ipv6(struct batadv_priv *bat_priv, break; } - batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0, - orig_node, vid); + batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node); } rcu_read_unlock(); return ret; @@ -1618,8 +1646,7 @@ batadv_mcast_forw_want_all_rtr4(struct batadv_priv *bat_priv, break; } - batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0, - orig_node, vid); + batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node); } rcu_read_unlock(); return ret; @@ -1656,8 +1683,7 @@ batadv_mcast_forw_want_all_rtr6(struct batadv_priv *bat_priv, break; } - batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0, - orig_node, vid); + batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node); } rcu_read_unlock(); return ret; diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index ebf825991ecd..3e114bc5ca3b 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -46,6 +46,11 @@ enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, struct batadv_orig_node **mcast_single_orig); +int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, + struct sk_buff *skb, + unsigned short vid, + struct batadv_orig_node *orig_node); + int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid); @@ -71,6 +76,16 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, return BATADV_FORW_ALL; } +static inline int +batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, + struct sk_buff *skb, + unsigned short vid, + struct batadv_orig_node *orig_node) +{ + kfree_skb(skb); + return NET_XMIT_DROP; +} + static inline int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 23833a0ba5e6..3d037b17f3a7 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -364,9 +364,8 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, goto dropped; ret = batadv_send_skb_via_gw(bat_priv, skb, vid); } else if (mcast_single_orig) { - ret = batadv_send_skb_unicast(bat_priv, skb, - BATADV_UNICAST, 0, - mcast_single_orig, vid); + ret = batadv_mcast_forw_send_orig(bat_priv, skb, vid, + mcast_single_orig); } else if (forw_mode == BATADV_FORW_SOME) { ret = batadv_mcast_forw_send(bat_priv, skb, vid); } else { From 74c09b7275126da1b642b90c9cdc3ae8b729ad4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 15 Sep 2020 09:54:09 +0200 Subject: [PATCH 465/648] batman-adv: mcast: fix duplicate mcast packets in BLA backbone from mesh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scenario: * Multicast frame send from mesh to a BLA backbone (multiple nodes with their bat0 bridged together, with BLA enabled) Issue: * BLA backbone nodes receive the frame multiple times on bat0, once from mesh->bat0 and once from each backbone_gw from LAN For unicast, a node will send only to the best backbone gateway according to the TQ. However for multicast we currently cannot determine if multiple destination nodes share the same backbone if they don't share the same backbone with us. So we need to keep sending the unicasts to all backbone gateways and let the backbone gateways decide which one will forward the frame. We can use the CLAIM mechanism to make this decision. One catch: The batman-adv gateway feature for DHCP packets potentially sends multicast packets in the same batman-adv unicast header as the multicast optimizations code. And we are not allowed to drop those even if we did not claim the source address of the sender, as for such packets there is only this one multicast-in-unicast packet. How can we distinguish the two cases? The gateway feature uses a batman-adv unicast 4 address header. While the multicast-to-unicasts feature uses a simple, 3 address batman-adv unicast header. So let's use this to distinguish. Fixes: fe2da6ff27c7 ("batman-adv: check incoming packet type for bla") Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bridge_loop_avoidance.c | 34 +++++++++++++++++++------- net/batman-adv/bridge_loop_avoidance.h | 4 +-- net/batman-adv/soft-interface.c | 6 ++--- 3 files changed, 30 insertions(+), 14 deletions(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 08419a2e6b95..68715783a742 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1814,7 +1814,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb, * @bat_priv: the bat priv with all the soft interface information * @skb: the frame to be checked * @vid: the VLAN ID of the frame - * @is_bcast: the packet came in a broadcast packet type. + * @packet_type: the batman packet type this frame came in * * batadv_bla_rx avoidance checks if: * * we have to race for a claim @@ -1826,7 +1826,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb, * further process the skb. */ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, - unsigned short vid, bool is_bcast) + unsigned short vid, int packet_type) { struct batadv_bla_backbone_gw *backbone_gw; struct ethhdr *ethhdr; @@ -1848,9 +1848,24 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, goto handled; if (unlikely(atomic_read(&bat_priv->bla.num_requests))) - /* don't allow broadcasts while requests are in flight */ - if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast) - goto handled; + /* don't allow multicast packets while requests are in flight */ + if (is_multicast_ether_addr(ethhdr->h_dest)) + /* Both broadcast flooding or multicast-via-unicasts + * delivery might send to multiple backbone gateways + * sharing the same LAN and therefore need to coordinate + * which backbone gateway forwards into the LAN, + * by claiming the payload source address. + * + * Broadcast flooding and multicast-via-unicasts + * delivery use the following two batman packet types. + * Note: explicitly exclude BATADV_UNICAST_4ADDR, + * as the DHCP gateway feature will send explicitly + * to only one BLA gateway, so the claiming process + * should be avoided there. + */ + if (packet_type == BATADV_BCAST || + packet_type == BATADV_UNICAST) + goto handled; ether_addr_copy(search_claim.addr, ethhdr->h_source); search_claim.vid = vid; @@ -1885,13 +1900,14 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, goto allow; } - /* if it is a broadcast ... */ - if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast) { + /* if it is a multicast ... */ + if (is_multicast_ether_addr(ethhdr->h_dest) && + (packet_type == BATADV_BCAST || packet_type == BATADV_UNICAST)) { /* ... drop it. the responsible gateway is in charge. * - * We need to check is_bcast because with the gateway + * We need to check packet type because with the gateway * feature, broadcasts (like DHCP requests) may be sent - * using a unicast packet type. + * using a unicast 4 address packet type. See comment above. */ goto handled; } else { diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index 41edb2c4a327..a81c41b636f9 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -35,7 +35,7 @@ static inline bool batadv_bla_is_loopdetect_mac(const uint8_t *mac) #ifdef CONFIG_BATMAN_ADV_BLA bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, - unsigned short vid, bool is_bcast); + unsigned short vid, int packet_type); bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid); bool batadv_bla_is_backbone_gw(struct sk_buff *skb, @@ -66,7 +66,7 @@ bool batadv_bla_check_claim(struct batadv_priv *bat_priv, u8 *addr, static inline bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid, - bool is_bcast) + int packet_type) { return false; } diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 3d037b17f3a7..cdde943c1b83 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -424,10 +424,10 @@ void batadv_interface_rx(struct net_device *soft_iface, struct vlan_ethhdr *vhdr; struct ethhdr *ethhdr; unsigned short vid; - bool is_bcast; + int packet_type; batadv_bcast_packet = (struct batadv_bcast_packet *)skb->data; - is_bcast = (batadv_bcast_packet->packet_type == BATADV_BCAST); + packet_type = batadv_bcast_packet->packet_type; skb_pull_rcsum(skb, hdr_size); skb_reset_mac_header(skb); @@ -470,7 +470,7 @@ void batadv_interface_rx(struct net_device *soft_iface, /* Let the bridge loop avoidance check the packet. If will * not handle it, we can safely push it up. */ - if (batadv_bla_rx(bat_priv, skb, vid, is_bcast)) + if (batadv_bla_rx(bat_priv, skb, vid, packet_type)) goto out; if (orig_node) From 2369e827046920ef0599e6a36b975ac5c0a359c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 15 Sep 2020 09:54:10 +0200 Subject: [PATCH 466/648] batman-adv: mcast: fix duplicate mcast packets from BLA backbone to mesh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scenario: * Multicast frame send from BLA backbone gateways (multiple nodes with their bat0 bridged together, with BLA enabled) sharing the same LAN to nodes in the mesh Issue: * Nodes receive the frame multiple times on bat0 from the mesh, once from each foreign BLA backbone gateway which shares the same LAN with another For multicast frames via batman-adv broadcast packets coming from the same BLA backbone but from different backbone gateways duplicates are currently detected via a CRC history of previously received packets. However this CRC so far was not performed for multicast frames received via batman-adv unicast packets. Fixing this by appyling the same check for such packets, too. Room for improvements in the future: Ideally we would introduce the possibility to not only claim a client, but a complete originator, too. This would allow us to only send a multicast-in-unicast packet from a BLA backbone gateway claiming the node and by that avoid potential redundant transmissions in the first place. Fixes: 279e89b2281a ("batman-adv: add broadcast duplicate check") Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bridge_loop_avoidance.c | 103 +++++++++++++++++++++---- 1 file changed, 87 insertions(+), 16 deletions(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 68715783a742..c350ab63cd54 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1581,13 +1581,16 @@ int batadv_bla_init(struct batadv_priv *bat_priv) } /** - * batadv_bla_check_bcast_duplist() - Check if a frame is in the broadcast dup. + * batadv_bla_check_duplist() - Check if a frame is in the broadcast dup. * @bat_priv: the bat priv with all the soft interface information - * @skb: contains the bcast_packet to be checked + * @skb: contains the multicast packet to be checked + * @payload_ptr: pointer to position inside the head buffer of the skb + * marking the start of the data to be CRC'ed + * @orig: originator mac address, NULL if unknown * - * check if it is on our broadcast list. Another gateway might - * have sent the same packet because it is connected to the same backbone, - * so we have to remove this duplicate. + * Check if it is on our broadcast list. Another gateway might have sent the + * same packet because it is connected to the same backbone, so we have to + * remove this duplicate. * * This is performed by checking the CRC, which will tell us * with a good chance that it is the same packet. If it is furthermore @@ -1596,19 +1599,17 @@ int batadv_bla_init(struct batadv_priv *bat_priv) * * Return: true if a packet is in the duplicate list, false otherwise. */ -bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, - struct sk_buff *skb) +static bool batadv_bla_check_duplist(struct batadv_priv *bat_priv, + struct sk_buff *skb, u8 *payload_ptr, + const u8 *orig) { - int i, curr; - __be32 crc; - struct batadv_bcast_packet *bcast_packet; struct batadv_bcast_duplist_entry *entry; bool ret = false; - - bcast_packet = (struct batadv_bcast_packet *)skb->data; + int i, curr; + __be32 crc; /* calculate the crc ... */ - crc = batadv_skb_crc32(skb, (u8 *)(bcast_packet + 1)); + crc = batadv_skb_crc32(skb, payload_ptr); spin_lock_bh(&bat_priv->bla.bcast_duplist_lock); @@ -1627,8 +1628,21 @@ bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, if (entry->crc != crc) continue; - if (batadv_compare_eth(entry->orig, bcast_packet->orig)) - continue; + /* are the originators both known and not anonymous? */ + if (orig && !is_zero_ether_addr(orig) && + !is_zero_ether_addr(entry->orig)) { + /* If known, check if the new frame came from + * the same originator: + * We are safe to take identical frames from the + * same orig, if known, as multiplications in + * the mesh are detected via the (orig, seqno) pair. + * So we can be a bit more liberal here and allow + * identical frames from the same orig which the source + * host might have sent multiple times on purpose. + */ + if (batadv_compare_eth(entry->orig, orig)) + continue; + } /* this entry seems to match: same crc, not too old, * and from another gw. therefore return true to forbid it. @@ -1644,7 +1658,14 @@ bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, entry = &bat_priv->bla.bcast_duplist[curr]; entry->crc = crc; entry->entrytime = jiffies; - ether_addr_copy(entry->orig, bcast_packet->orig); + + /* known originator */ + if (orig) + ether_addr_copy(entry->orig, orig); + /* anonymous originator */ + else + eth_zero_addr(entry->orig); + bat_priv->bla.bcast_duplist_curr = curr; out: @@ -1653,6 +1674,48 @@ bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, return ret; } +/** + * batadv_bla_check_ucast_duplist() - Check if a frame is in the broadcast dup. + * @bat_priv: the bat priv with all the soft interface information + * @skb: contains the multicast packet to be checked, decapsulated from a + * unicast_packet + * + * Check if it is on our broadcast list. Another gateway might have sent the + * same packet because it is connected to the same backbone, so we have to + * remove this duplicate. + * + * Return: true if a packet is in the duplicate list, false otherwise. + */ +static bool batadv_bla_check_ucast_duplist(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + return batadv_bla_check_duplist(bat_priv, skb, (u8 *)skb->data, NULL); +} + +/** + * batadv_bla_check_bcast_duplist() - Check if a frame is in the broadcast dup. + * @bat_priv: the bat priv with all the soft interface information + * @skb: contains the bcast_packet to be checked + * + * Check if it is on our broadcast list. Another gateway might have sent the + * same packet because it is connected to the same backbone, so we have to + * remove this duplicate. + * + * Return: true if a packet is in the duplicate list, false otherwise. + */ +bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + struct batadv_bcast_packet *bcast_packet; + u8 *payload_ptr; + + bcast_packet = (struct batadv_bcast_packet *)skb->data; + payload_ptr = (u8 *)(bcast_packet + 1); + + return batadv_bla_check_duplist(bat_priv, skb, payload_ptr, + bcast_packet->orig); +} + /** * batadv_bla_is_backbone_gw_orig() - Check if the originator is a gateway for * the VLAN identified by vid. @@ -1867,6 +1930,14 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, packet_type == BATADV_UNICAST) goto handled; + /* potential duplicates from foreign BLA backbone gateways via + * multicast-in-unicast packets + */ + if (is_multicast_ether_addr(ethhdr->h_dest) && + packet_type == BATADV_UNICAST && + batadv_bla_check_ucast_duplist(bat_priv, skb)) + goto handled; + ether_addr_copy(search_claim.addr, ethhdr->h_source); search_claim.vid = vid; claim = batadv_claim_hash_find(bat_priv, &search_claim); From 564c836fd945a94b5dd46597d6b7adb464092650 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Mon, 14 Sep 2020 18:05:00 +0200 Subject: [PATCH 467/648] MIPS: SNI: Fix MIPS_L1_CACHE_SHIFT Commit 930beb5ac09a ("MIPS: introduce MIPS_L1_CACHE_SHIFT_") forgot to select the correct MIPS_L1_CACHE_SHIFT for SNI RM. This breaks non coherent DMA because of a wrong allocation alignment. Fixes: 930beb5ac09a ("MIPS: introduce MIPS_L1_CACHE_SHIFT_") Signed-off-by: Thomas Bogendoerfer --- arch/mips/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index c95fa3a2484c..8f328298f8cc 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -877,6 +877,7 @@ config SNI_RM select I8253 select I8259 select ISA + select MIPS_L1_CACHE_SHIFT_6 select SWAP_IO_SPACE if CPU_BIG_ENDIAN select SYS_HAS_CPU_R4X00 select SYS_HAS_CPU_R5000 From 56f3a1cdafc25c5f78e4267ba9e2f341da3eec16 Mon Sep 17 00:00:00 2001 From: Henry Burns Date: Mon, 14 Sep 2020 20:40:49 -0400 Subject: [PATCH 468/648] perf vendor events amd: Remove trailing commas The amdzen2/core.json and amdzen/core.json vendor events files have the occasional trailing comma. Since that goes against the JSON standard, lets remove it. Signed-off-by: Henry Burns Acked-by: Kim Phillips Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Vijay Thakkar Link: http://lore.kernel.org/lkml/20200915004125.971-1-henrywolfeburns@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/amdzen1/core.json | 2 +- tools/perf/pmu-events/arch/x86/amdzen2/core.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/core.json b/tools/perf/pmu-events/arch/x86/amdzen1/core.json index 7e1aa8273935..653b11b23399 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/core.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/core.json @@ -61,7 +61,7 @@ { "EventName": "ex_ret_brn_ind_misp", "EventCode": "0xca", - "BriefDescription": "Retired Indirect Branch Instructions Mispredicted.", + "BriefDescription": "Retired Indirect Branch Instructions Mispredicted." }, { "EventName": "ex_ret_mmx_fp_instr.sse_instr", diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/core.json b/tools/perf/pmu-events/arch/x86/amdzen2/core.json index de89e5a44ff1..4b75183da94a 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen2/core.json +++ b/tools/perf/pmu-events/arch/x86/amdzen2/core.json @@ -125,6 +125,6 @@ { "EventName": "ex_ret_fus_brnch_inst", "EventCode": "0x1d0", - "BriefDescription": "Retired Fused Instructions. The number of fuse-branch instructions retired per cycle. The number of events logged per cycle can vary from 0-8.", + "BriefDescription": "Retired Fused Instructions. The number of fuse-branch instructions retired per cycle. The number of events logged per cycle can vary from 0-8." } ] From 22fe5a25b5d8c4f8008dc4a8738d6d8a5f5ddbe9 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 15 Sep 2020 12:18:18 +0900 Subject: [PATCH 469/648] perf test: Free aliases for PMU event map aliases test The aliases were never released causing the following leaks: Indirect leak of 1224 byte(s) in 9 object(s) allocated from: #0 0x7feefb830628 in malloc (/lib/x86_64-linux-gnu/libasan.so.5+0x107628) #1 0x56332c8f1b62 in __perf_pmu__new_alias util/pmu.c:322 #2 0x56332c8f401f in pmu_add_cpu_aliases_map util/pmu.c:778 #3 0x56332c792ce9 in __test__pmu_event_aliases tests/pmu-events.c:295 #4 0x56332c792ce9 in test_aliases tests/pmu-events.c:367 #5 0x56332c76a09b in run_test tests/builtin-test.c:410 #6 0x56332c76a09b in test_and_print tests/builtin-test.c:440 #7 0x56332c76ce69 in __cmd_test tests/builtin-test.c:695 #8 0x56332c76ce69 in cmd_test tests/builtin-test.c:807 #9 0x56332c7d2214 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:312 #10 0x56332c6701a8 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:364 #11 0x56332c6701a8 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:408 #12 0x56332c6701a8 in main /home/namhyung/project/linux/tools/perf/perf.c:538 #13 0x7feefb359cc9 in __libc_start_main ../csu/libc-start.c:308 Fixes: 956a78356c24c ("perf test: Test pmu-events aliases") Signed-off-by: Namhyung Kim Reviewed-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200915031819.386559-11-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/pmu-events.c | 5 +++++ tools/perf/util/pmu.c | 2 +- tools/perf/util/pmu.h | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index eb19f9a0bc15..d3517a74d95e 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -274,6 +274,7 @@ static int __test__pmu_event_aliases(char *pmu_name, int *count) int res = 0; bool use_uncore_table; struct pmu_events_map *map = __test_pmu_get_events_map(); + struct perf_pmu_alias *a, *tmp; if (!map) return -1; @@ -347,6 +348,10 @@ static int __test__pmu_event_aliases(char *pmu_name, int *count) pmu_name, alias->name); } + list_for_each_entry_safe(a, tmp, &aliases, list) { + list_del(&a->list); + perf_pmu_free_alias(a); + } free(pmu); return res; } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index f1688e1f6ed7..555cb3524c25 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -274,7 +274,7 @@ static void perf_pmu_update_alias(struct perf_pmu_alias *old, } /* Delete an alias entry. */ -static void perf_pmu_free_alias(struct perf_pmu_alias *newalias) +void perf_pmu_free_alias(struct perf_pmu_alias *newalias) { zfree(&newalias->name); zfree(&newalias->desc); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 44ccbdbb1c37..b63c4c5e335e 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -113,6 +113,7 @@ void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); bool pmu_uncore_alias_match(const char *pmu_name, const char *name); +void perf_pmu_free_alias(struct perf_pmu_alias *alias); int perf_pmu__convert_scale(const char *scale, char **end, double *sval); From 4f57a1ed749a81ec553d89233cab53db9365e193 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 15 Sep 2020 12:18:09 +0900 Subject: [PATCH 470/648] perf metric: Fix some memory leaks I found some memory leaks while reading the metric code. Some are real and others only occur in the error path. When it failed during metric or event parsing, it should release all resources properly. Fixes: b18f3e365019d ("perf stat: Support JSON metrics in perf stat") Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: John Garry Cc: Kajol Jain Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200915031819.386559-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 8831b964288f..af664d6218d6 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -530,6 +530,9 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, continue; strlist__add(me->metrics, s); } + + if (!raw) + free(s); } free(omg); } @@ -1040,7 +1043,7 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, ret = metricgroup__add_metric_list(str, metric_no_group, &extra_events, &metric_list, map); if (ret) - return ret; + goto out; pr_debug("adding %s\n", extra_events.buf); bzero(&parse_error, sizeof(parse_error)); ret = __parse_events(perf_evlist, extra_events.buf, &parse_error, fake_pmu); @@ -1048,11 +1051,11 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, parse_events_print_error(&parse_error, extra_events.buf); goto out; } - strbuf_release(&extra_events); ret = metricgroup__setup_events(&metric_list, metric_no_merge, perf_evlist, metric_events); out: metricgroup__free_metrics(&metric_list); + strbuf_release(&extra_events); return ret; } From b033ab11ad0c7f9a9c9fa4a1ac2ffad14454a9f3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 15 Sep 2020 12:18:10 +0900 Subject: [PATCH 471/648] perf metric: Fix some memory leaks - part 2 The metric_event_delete() missed to free expr->metric_events and it should free an expr when metric_refs allocation failed. Fixes: 4ea2896715e67 ("perf metric: Collect referenced metrics in struct metric_expr") Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: John Garry Cc: Kajol Jain Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200915031819.386559-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index af664d6218d6..b28c09447c10 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -85,6 +85,7 @@ static void metric_event_delete(struct rblist *rblist __maybe_unused, list_for_each_entry_safe(expr, tmp, &me->head, nd) { free(expr->metric_refs); + free(expr->metric_events); free(expr); } @@ -316,6 +317,7 @@ static int metricgroup__setup_events(struct list_head *groups, if (!metric_refs) { ret = -ENOMEM; free(metric_events); + free(expr); break; } From bfd1b83d75e44a9f65de30accb3dd3b5940bd3ac Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 15 Sep 2020 12:18:11 +0900 Subject: [PATCH 472/648] perf evlist: Fix cpu/thread map leak Asan reported leak of cpu and thread maps as they have one more refcount than released. I found that after setting evlist maps it should release it's refcount. It seems to be broken from the beginning so I chose the original commit as the culprit. But not sure how it's applied to stable trees since there are many changes in the code after that. Fixes: 7e2ed097538c5 ("perf evlist: Store pointer to the cpu and thread maps") Fixes: 4112eb1899c0e ("perf evlist: Default to syswide target when no thread/cpu maps set") Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200915031819.386559-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e3fa3bf7498a..c0768c61eb43 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -946,6 +946,10 @@ int perf_evlist__create_maps(struct evlist *evlist, struct target *target) perf_evlist__set_maps(&evlist->core, cpus, threads); + /* as evlist now has references, put count here */ + perf_cpu_map__put(cpus); + perf_thread_map__put(threads); + return 0; out_delete_threads: @@ -1273,11 +1277,12 @@ static int perf_evlist__create_syswide_maps(struct evlist *evlist) goto out_put; perf_evlist__set_maps(&evlist->core, cpus, threads); -out: - return err; + + perf_thread_map__put(threads); out_put: perf_cpu_map__put(cpus); - goto out; +out: + return err; } int evlist__open(struct evlist *evlist) From b12eea5ad8e77f8a380a141e3db67c07432dde16 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 15 Sep 2020 12:18:13 +0900 Subject: [PATCH 473/648] perf parse-event: Fix memory leak in evsel->unit The evsel->unit borrows a pointer of pmu event or alias instead of owns a string. But tool event (duration_time) passes a result of strdup() caused a leak. It was found by ASAN during metric test: Direct leak of 210 byte(s) in 70 object(s) allocated from: #0 0x7fe366fca0b5 in strdup (/lib/x86_64-linux-gnu/libasan.so.5+0x920b5) #1 0x559fbbcc6ea3 in add_event_tool util/parse-events.c:414 #2 0x559fbbcc6ea3 in parse_events_add_tool util/parse-events.c:1414 #3 0x559fbbd8474d in parse_events_parse util/parse-events.y:439 #4 0x559fbbcc95da in parse_events__scanner util/parse-events.c:2096 #5 0x559fbbcc95da in __parse_events util/parse-events.c:2141 #6 0x559fbbc28555 in check_parse_id tests/pmu-events.c:406 #7 0x559fbbc28555 in check_parse_id tests/pmu-events.c:393 #8 0x559fbbc28555 in check_parse_cpu tests/pmu-events.c:415 #9 0x559fbbc28555 in test_parsing tests/pmu-events.c:498 #10 0x559fbbc0109b in run_test tests/builtin-test.c:410 #11 0x559fbbc0109b in test_and_print tests/builtin-test.c:440 #12 0x559fbbc03e69 in __cmd_test tests/builtin-test.c:695 #13 0x559fbbc03e69 in cmd_test tests/builtin-test.c:807 #14 0x559fbbc691f4 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:312 #15 0x559fbbb071a8 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:364 #16 0x559fbbb071a8 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:408 #17 0x559fbbb071a8 in main /home/namhyung/project/linux/tools/perf/perf.c:538 #18 0x7fe366b68cc9 in __libc_start_main ../csu/libc-start.c:308 Fixes: f0fbb114e3025 ("perf stat: Implement duration_time as a proper event") Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200915031819.386559-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index c4d2394e2b2d..667cbca1547a 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -411,7 +411,7 @@ static int add_event_tool(struct list_head *list, int *idx, return -ENOMEM; evsel->tool_event = tool_event; if (tool_event == PERF_TOOL_DURATION_TIME) - evsel->unit = strdup("ns"); + evsel->unit = "ns"; return 0; } From f5a56570a3f2c01e5307a972ae7d4636edff7b13 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 15 Sep 2020 12:18:14 +0900 Subject: [PATCH 474/648] perf test: Fix memory leaks in parse-metric test It didn't release resources when there's an error so the test_recursion_fail() will leak some memory. Fixes: 0a507af9c681a ("perf tests: Add parse metric test for ipc metric") Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200915031819.386559-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-metric.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index 23db8acc492d..cd7331aac3bd 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -153,8 +153,10 @@ static int __compute_metric(const char *name, struct value *vals, return -ENOMEM; cpus = perf_cpu_map__new("0"); - if (!cpus) + if (!cpus) { + evlist__delete(evlist); return -ENOMEM; + } perf_evlist__set_maps(&evlist->core, cpus, NULL); @@ -163,10 +165,11 @@ static int __compute_metric(const char *name, struct value *vals, false, false, &metric_events); if (err) - return err; + goto out; - if (perf_evlist__alloc_stats(evlist, false)) - return -1; + err = perf_evlist__alloc_stats(evlist, false); + if (err) + goto out; /* Load the runtime stats with given numbers for events. */ runtime_stat__init(&st); @@ -178,13 +181,14 @@ static int __compute_metric(const char *name, struct value *vals, if (name2 && ratio2) *ratio2 = compute_single(&metric_events, evlist, &st, name2); +out: /* ... clenup. */ metricgroup__rblist_exit(&metric_events); runtime_stat__exit(&st); perf_evlist__free_stats(evlist); perf_cpu_map__put(cpus); evlist__delete(evlist); - return 0; + return err; } static int compute_metric(const char *name, struct value *vals, double *ratio) From 437822bf38c1dc3bda6ffde52689e4e667046a6a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 15 Sep 2020 12:18:15 +0900 Subject: [PATCH 475/648] perf metric: Release expr_parse_ctx after testing The test_generic_metric() missed to release entries in the pctx. Asan reported following leak (and more): Direct leak of 128 byte(s) in 1 object(s) allocated from: #0 0x7f4c9396980e in calloc (/lib/x86_64-linux-gnu/libasan.so.5+0x10780e) #1 0x55f7e748cc14 in hashmap_grow (/home/namhyung/project/linux/tools/perf/perf+0x90cc14) #2 0x55f7e748d497 in hashmap__insert (/home/namhyung/project/linux/tools/perf/perf+0x90d497) #3 0x55f7e7341667 in hashmap__set /home/namhyung/project/linux/tools/perf/util/hashmap.h:111 #4 0x55f7e7341667 in expr__add_ref util/expr.c:120 #5 0x55f7e7292436 in prepare_metric util/stat-shadow.c:783 #6 0x55f7e729556d in test_generic_metric util/stat-shadow.c:858 #7 0x55f7e712390b in compute_single tests/parse-metric.c:128 #8 0x55f7e712390b in __compute_metric tests/parse-metric.c:180 #9 0x55f7e712446d in compute_metric tests/parse-metric.c:196 #10 0x55f7e712446d in test_dcache_l2 tests/parse-metric.c:295 #11 0x55f7e712446d in test__parse_metric tests/parse-metric.c:355 #12 0x55f7e70be09b in run_test tests/builtin-test.c:410 #13 0x55f7e70be09b in test_and_print tests/builtin-test.c:440 #14 0x55f7e70c101a in __cmd_test tests/builtin-test.c:661 #15 0x55f7e70c101a in cmd_test tests/builtin-test.c:807 #16 0x55f7e7126214 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:312 #17 0x55f7e6fc41a8 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:364 #18 0x55f7e6fc41a8 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:408 #19 0x55f7e6fc41a8 in main /home/namhyung/project/linux/tools/perf/perf.c:538 #20 0x7f4c93492cc9 in __libc_start_main ../csu/libc-start.c:308 Fixes: 6d432c4c8aa56 ("perf tools: Add test_generic_metric function") Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200915031819.386559-8-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index e1ba6c1b916a..a5f42c22c484 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -853,14 +853,16 @@ static void generic_metric(struct perf_stat_config *config, double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st) { struct expr_parse_ctx pctx; - double ratio; + double ratio = 0.0; if (prepare_metric(mexp->metric_events, mexp->metric_refs, &pctx, cpu, st) < 0) - return 0.; + goto out; if (expr__parse(&ratio, &pctx, mexp->metric_expr, 1)) - return 0.; + ratio = 0.0; +out: + expr__ctx_clear(&pctx); return ratio; } From 27adafcda3d8b8a818e85d58eea95b85b5f513f9 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 15 Sep 2020 12:18:16 +0900 Subject: [PATCH 476/648] perf metric: Free metric when it failed to resolve The metricgroup__add_metric() can find multiple match for a metric group and it's possible to fail. Also it can fail in the middle like in resolve_metric() even for single metric. In those cases, the intermediate list and ids will be leaked like: Direct leak of 3 byte(s) in 1 object(s) allocated from: #0 0x7f4c938f40b5 in strdup (/lib/x86_64-linux-gnu/libasan.so.5+0x920b5) #1 0x55f7e71c1bef in __add_metric util/metricgroup.c:683 #2 0x55f7e71c31d0 in add_metric util/metricgroup.c:906 #3 0x55f7e71c3844 in metricgroup__add_metric util/metricgroup.c:940 #4 0x55f7e71c488d in metricgroup__add_metric_list util/metricgroup.c:993 #5 0x55f7e71c488d in parse_groups util/metricgroup.c:1045 #6 0x55f7e71c60a4 in metricgroup__parse_groups_test util/metricgroup.c:1087 #7 0x55f7e71235ae in __compute_metric tests/parse-metric.c:164 #8 0x55f7e7124650 in compute_metric tests/parse-metric.c:196 #9 0x55f7e7124650 in test_recursion_fail tests/parse-metric.c:318 #10 0x55f7e7124650 in test__parse_metric tests/parse-metric.c:356 #11 0x55f7e70be09b in run_test tests/builtin-test.c:410 #12 0x55f7e70be09b in test_and_print tests/builtin-test.c:440 #13 0x55f7e70c101a in __cmd_test tests/builtin-test.c:661 #14 0x55f7e70c101a in cmd_test tests/builtin-test.c:807 #15 0x55f7e7126214 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:312 #16 0x55f7e6fc41a8 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:364 #17 0x55f7e6fc41a8 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:408 #18 0x55f7e6fc41a8 in main /home/namhyung/project/linux/tools/perf/perf.c:538 #19 0x7f4c93492cc9 in __libc_start_main ../csu/libc-start.c:308 Fixes: 83de0b7d535de ("perf metric: Collect referenced metrics in struct metric_ref_node") Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200915031819.386559-9-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index b28c09447c10..c8904e471a71 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -939,7 +939,7 @@ static int metricgroup__add_metric(const char *metric, bool metric_no_group, ret = add_metric(&list, pe, metric_no_group, &m, NULL, &ids); if (ret) - return ret; + goto out; /* * Process any possible referenced metrics @@ -948,12 +948,14 @@ static int metricgroup__add_metric(const char *metric, bool metric_no_group, ret = resolve_metric(metric_no_group, &list, map, &ids); if (ret) - return ret; + goto out; } /* End of pmu events. */ - if (!has_match) - return -EINVAL; + if (!has_match) { + ret = -EINVAL; + goto out; + } list_for_each_entry(m, &list, nd) { if (events->len > 0) @@ -968,9 +970,14 @@ static int metricgroup__add_metric(const char *metric, bool metric_no_group, } } +out: + /* + * add to metric_list so that they can be released + * even if it's failed + */ list_splice(&list, metric_list); expr_ids__exit(&ids); - return 0; + return ret; } static int metricgroup__add_metric_list(const char *list, bool metric_no_group, From 6f47ed6cd12a0ea9b55b19b5d6e4a3b490a97bdf Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 15 Sep 2020 12:18:17 +0900 Subject: [PATCH 477/648] perf metric: Do not free metric when failed to resolve It's dangerous to free the original metric when it's called from resolve_metric() as it's already in the metric_list and might have other resources too. Instead, it'd better let them bail out and be released properly at the later stage. So add a check when it's called from metricgroup__add_metric() and release it. Also make sure that mp is set properly. Fixes: 83de0b7d535de ("perf metric: Collect referenced metrics in struct metric_ref_node") Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200915031819.386559-10-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index c8904e471a71..ab5030fcfed4 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -672,7 +672,6 @@ static int __add_metric(struct list_head *metric_list, m->has_constraint = metric_no_group || metricgroup__has_constraint(pe); INIT_LIST_HEAD(&m->metric_refs); m->metric_refs_cnt = 0; - *mp = m; parent = expr_ids__alloc(ids); if (!parent) { @@ -685,6 +684,7 @@ static int __add_metric(struct list_head *metric_list, free(m); return -ENOMEM; } + *mp = m; } else { /* * We got here for the referenced metric, via the @@ -719,8 +719,11 @@ static int __add_metric(struct list_head *metric_list, * all the metric's IDs and add it to the parent context. */ if (expr__find_other(pe->metric_expr, NULL, &m->pctx, runtime) < 0) { - expr__ctx_clear(&m->pctx); - free(m); + if (m->metric_refs_cnt == 0) { + expr__ctx_clear(&m->pctx); + free(m); + *mp = NULL; + } return -EINVAL; } From d26383dcb2b4b8629fde05270b4e3633be9e3d4b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 15 Sep 2020 12:18:19 +0900 Subject: [PATCH 478/648] perf test: Free formats for perf pmu parse test The following leaks were detected by ASAN: Indirect leak of 360 byte(s) in 9 object(s) allocated from: #0 0x7fecc305180e in calloc (/lib/x86_64-linux-gnu/libasan.so.5+0x10780e) #1 0x560578f6dce5 in perf_pmu__new_format util/pmu.c:1333 #2 0x560578f752fc in perf_pmu_parse util/pmu.y:59 #3 0x560578f6a8b7 in perf_pmu__format_parse util/pmu.c:73 #4 0x560578e07045 in test__pmu tests/pmu.c:155 #5 0x560578de109b in run_test tests/builtin-test.c:410 #6 0x560578de109b in test_and_print tests/builtin-test.c:440 #7 0x560578de401a in __cmd_test tests/builtin-test.c:661 #8 0x560578de401a in cmd_test tests/builtin-test.c:807 #9 0x560578e49354 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:312 #10 0x560578ce71a8 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:364 #11 0x560578ce71a8 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:408 #12 0x560578ce71a8 in main /home/namhyung/project/linux/tools/perf/perf.c:538 #13 0x7fecc2b7acc9 in __libc_start_main ../csu/libc-start.c:308 Fixes: cff7f956ec4a1 ("perf tests: Move pmu tests into separate object") Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200915031819.386559-12-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/pmu.c | 1 + tools/perf/util/pmu.c | 11 +++++++++++ tools/perf/util/pmu.h | 1 + 3 files changed, 13 insertions(+) diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index 5c11fe2b3040..714e6830a758 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c @@ -173,6 +173,7 @@ int test__pmu(struct test *test __maybe_unused, int subtest __maybe_unused) ret = 0; } while (0); + perf_pmu__del_formats(&formats); test_format_dir_put(format); return ret; } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 555cb3524c25..d41caeb35cf6 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1354,6 +1354,17 @@ void perf_pmu__set_format(unsigned long *bits, long from, long to) set_bit(b, bits); } +void perf_pmu__del_formats(struct list_head *formats) +{ + struct perf_pmu_format *fmt, *tmp; + + list_for_each_entry_safe(fmt, tmp, formats, list) { + list_del(&fmt->list); + free(fmt->name); + free(fmt); + } +} + static int sub_non_neg(int a, int b) { if (b > a) diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index b63c4c5e335e..a64e9c9ce731 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -94,6 +94,7 @@ int perf_pmu__new_format(struct list_head *list, char *name, int config, unsigned long *bits); void perf_pmu__set_format(unsigned long *bits, long from, long to); int perf_pmu__format_parse(char *dir, struct list_head *head); +void perf_pmu__del_formats(struct list_head *formats); struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); From 46908326c6b801201f1e46f5ed0db6e85bef74ae Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 15 Sep 2020 18:12:09 +0300 Subject: [PATCH 479/648] efi: efibc: check for efivars write capability Branden reports that commit f88814cc2578c1 ("efi/efivars: Expose RT service availability via efivars abstraction") regresses UEFI platforms that implement GetVariable but not SetVariable when booting kernels that have EFIBC (bootloader control) enabled. The reason is that EFIBC is a user of the efivars abstraction, which was updated to permit users that rely only on the read capability, but not on the write capability. EFIBC is in the latter category, so it has to check explicitly whether efivars supports writes. Fixes: f88814cc2578c1 ("efi/efivars: Expose RT service availability via efivars abstraction") Tested-by: Branden Sherrell Link: https://lore.kernel.org/linux-efi/AE217103-C96F-4AFC-8417-83EC11962004@gmail.com/ Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efibc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/efibc.c b/drivers/firmware/efi/efibc.c index 35dccc88ac0a..15a47539dc56 100644 --- a/drivers/firmware/efi/efibc.c +++ b/drivers/firmware/efi/efibc.c @@ -84,7 +84,7 @@ static int __init efibc_init(void) { int ret; - if (!efi_enabled(EFI_RUNTIME_SERVICES)) + if (!efivars_kobject() || !efivar_supports_writes()) return -ENODEV; ret = register_reboot_notifier(&efibc_reboot_notifier); From d3f2ef1887e18a091ac25e3ec8985ecb9a5c76fc Mon Sep 17 00:00:00 2001 From: Dany Madden Date: Mon, 14 Sep 2020 20:35:35 -0400 Subject: [PATCH 480/648] ibmvnic: update MAINTAINERS Update supporters for IBM Power SRIOV Virtual NIC Device Driver. Thomas Falcon is moving on to other works. Dany Madden, Lijun Pan and Sukadev Bhattiprolu are the current supporters. Signed-off-by: Dany Madden Signed-off-by: David S. Miller --- MAINTAINERS | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2783a5f68d2c..923c69ad4eec 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8315,7 +8315,9 @@ S: Supported F: drivers/pci/hotplug/rpaphp* IBM Power SRIOV Virtual NIC Device Driver -M: Thomas Falcon +M: Dany Madden +M: Lijun Pan +M: Sukadev Bhattiprolu L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/ibm/ibmvnic.* From 2e5117ba9f582262e93a1fdf8e1a7b9affd5121c Mon Sep 17 00:00:00 2001 From: Lu Wei Date: Tue, 15 Sep 2020 10:39:55 +0800 Subject: [PATCH 481/648] net: tipc: kerneldoc fixes Fix parameter description of tipc_link_bc_create() Reported-by: Hulk Robot Fixes: 16ad3f4022bb ("tipc: introduce variable window congestion control") Signed-off-by: Lu Wei Signed-off-by: David S. Miller --- net/tipc/link.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index b7362556da95..cef38a910107 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -532,7 +532,8 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, * tipc_link_bc_create - create new link to be used for broadcast * @net: pointer to associated network namespace * @mtu: mtu to be used initially if no peers - * @window: send window to be used + * @min_win: minimal send window to be used by link + * @max_win: maximal send window to be used by link * @inputq: queue to put messages ready for delivery * @namedq: queue to put binding table update messages ready for delivery * @link: return value, pointer to put the created link From 66a5710beaf42903d553378f609166034bd219c7 Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Wed, 2 Sep 2020 12:57:59 +0800 Subject: [PATCH 482/648] drm/kfd: fix a system crash issue during GPU recovery The crash log as the below: [Thu Aug 20 23:18:14 2020] general protection fault: 0000 [#1] SMP NOPTI [Thu Aug 20 23:18:14 2020] CPU: 152 PID: 1837 Comm: kworker/152:1 Tainted: G OE 5.4.0-42-generic #46~18.04.1-Ubuntu [Thu Aug 20 23:18:14 2020] Hardware name: GIGABYTE G482-Z53-YF/MZ52-G40-00, BIOS R12 05/13/2020 [Thu Aug 20 23:18:14 2020] Workqueue: events amdgpu_ras_do_recovery [amdgpu] [Thu Aug 20 23:18:14 2020] RIP: 0010:evict_process_queues_cpsch+0xc9/0x130 [amdgpu] [Thu Aug 20 23:18:14 2020] Code: 49 8d 4d 10 48 39 c8 75 21 eb 44 83 fa 03 74 36 80 78 72 00 74 0c 83 ab 68 01 00 00 01 41 c6 45 41 00 48 8b 00 48 39 c8 74 25 <80> 78 70 00 c6 40 6d 01 74 ee 8b 50 28 c6 40 70 00 83 ab 60 01 00 [Thu Aug 20 23:18:14 2020] RSP: 0018:ffffb29b52f6fc90 EFLAGS: 00010213 [Thu Aug 20 23:18:14 2020] RAX: 1c884edb0a118914 RBX: ffff8a0d45ff3c00 RCX: ffff8a2d83e41038 [Thu Aug 20 23:18:14 2020] RDX: 0000000000000000 RSI: 0000000000000082 RDI: ffff8a0e2e4178c0 [Thu Aug 20 23:18:14 2020] RBP: ffffb29b52f6fcb0 R08: 0000000000001b64 R09: 0000000000000004 [Thu Aug 20 23:18:14 2020] R10: ffffb29b52f6fb78 R11: 0000000000000001 R12: ffff8a0d45ff3d28 [Thu Aug 20 23:18:14 2020] R13: ffff8a2d83e41028 R14: 0000000000000000 R15: 0000000000000000 [Thu Aug 20 23:18:14 2020] FS: 0000000000000000(0000) GS:ffff8a0e2e400000(0000) knlGS:0000000000000000 [Thu Aug 20 23:18:14 2020] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [Thu Aug 20 23:18:14 2020] CR2: 000055c783c0e6a8 CR3: 00000034a1284000 CR4: 0000000000340ee0 [Thu Aug 20 23:18:14 2020] Call Trace: [Thu Aug 20 23:18:14 2020] kfd_process_evict_queues+0x43/0xd0 [amdgpu] [Thu Aug 20 23:18:14 2020] kfd_suspend_all_processes+0x60/0xf0 [amdgpu] [Thu Aug 20 23:18:14 2020] kgd2kfd_suspend.part.7+0x43/0x50 [amdgpu] [Thu Aug 20 23:18:14 2020] kgd2kfd_pre_reset+0x46/0x60 [amdgpu] [Thu Aug 20 23:18:14 2020] amdgpu_amdkfd_pre_reset+0x1a/0x20 [amdgpu] [Thu Aug 20 23:18:14 2020] amdgpu_device_gpu_recover+0x377/0xf90 [amdgpu] [Thu Aug 20 23:18:14 2020] ? amdgpu_ras_error_query+0x1b8/0x2a0 [amdgpu] [Thu Aug 20 23:18:14 2020] amdgpu_ras_do_recovery+0x159/0x190 [amdgpu] [Thu Aug 20 23:18:14 2020] process_one_work+0x20f/0x400 [Thu Aug 20 23:18:14 2020] worker_thread+0x34/0x410 When GPU hang, user process will fail to create a compute queue whose struct object will be freed later, but driver wrongly add this queue to queue list of the proccess. And then kfd_process_evict_queues will access a freed memory, which cause a system crash. v2: The failure to execute_queues should probably not be reported to the caller of create_queue, because the queue was already created. Therefore change to ignore the return value from execute_queues. Reviewed-by: Felix Kuehling Signed-off-by: Dennis Li Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index e0e60b0d0669..ef78492ef956 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1326,7 +1326,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (q->properties.is_active) { increment_queue_count(dqm, q->properties.type); - retval = execute_queues_cpsch(dqm, + execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); } From 087d764159996ae378b08c0fdd557537adfd6899 Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Wed, 2 Sep 2020 17:11:09 +0800 Subject: [PATCH 483/648] drm/amdkfd: fix a memory leak issue In the resume stage of GPU recovery, start_cpsch will call pm_init which set pm->allocated as false, cause the next pm_release_ib has no chance to release ib memory. Add pm_release_ib in stop_cpsch which will be called in the suspend stage of GPU recovery. Reviewed-by: Felix Kuehling Signed-off-by: Dennis Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index ef78492ef956..0f4508b4903e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1216,6 +1216,8 @@ static int stop_cpsch(struct device_queue_manager *dqm) dqm->sched_running = false; dqm_unlock(dqm); + pm_release_ib(&dqm->packets); + kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); pm_uninit(&dqm->packets, hanging); From cc8e66e769ebd1d10c406a3152474bab24ac1730 Mon Sep 17 00:00:00 2001 From: Jiansong Chen Date: Mon, 14 Sep 2020 14:42:51 +0800 Subject: [PATCH 484/648] drm/amd/pm: support runtime pptable update for sienna_cichlid etc. This avoids smu issue when enabling runtime pptable update for sienna_cichlid and so on. Runtime pptable udpate is needed for test and debug purpose. Signed-off-by: Jiansong Chen Reviewed-by: Kenneth Feng Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 0826625573dc..63f945f9f331 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -1126,7 +1126,7 @@ static int smu_disable_dpms(struct smu_context *smu) */ if (smu->uploading_custom_pp_table && (adev->asic_type >= CHIP_NAVI10) && - (adev->asic_type <= CHIP_NAVI12)) + (adev->asic_type <= CHIP_NAVY_FLOUNDER)) return 0; /* @@ -1211,7 +1211,9 @@ static int smu_hw_fini(void *handle) int smu_reset(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - int ret = 0; + int ret; + + amdgpu_gfx_off_ctrl(smu->adev, false); ret = smu_hw_fini(adev); if (ret) @@ -1222,8 +1224,12 @@ int smu_reset(struct smu_context *smu) return ret; ret = smu_late_init(adev); + if (ret) + return ret; - return ret; + amdgpu_gfx_off_ctrl(smu->adev, true); + + return 0; } static int smu_suspend(void *handle) From 4cdd7b332ed139b1e37faeb82409a14490adb644 Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha Date: Fri, 28 Aug 2020 11:09:38 -0400 Subject: [PATCH 485/648] drm/amd/display: Don't use DRM_ERROR() for DTM add topology [Why] Previously we were only calling add_topology when hdcp was being enabled. Now we call add_topology by default so the ERROR messages are printed if the firmware is not loaded. This error message is not relevant for normal display functionality so no need to print a ERROR message. [How] Change DRM_ERROR to DRM_INFO Signed-off-by: Bhawanpreet Lakha Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c index fb1161dd7ea8..3a367a5968ae 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c @@ -88,7 +88,7 @@ enum mod_hdcp_status mod_hdcp_add_display_to_topology(struct mod_hdcp *hdcp, enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; if (!psp->dtm_context.dtm_initialized) { - DRM_ERROR("Failed to add display topology, DTM TA is not initialized."); + DRM_INFO("Failed to add display topology, DTM TA is not initialized."); display->state = MOD_HDCP_DISPLAY_INACTIVE; return MOD_HDCP_STATUS_FAILURE; } From c4790a8894232f39c25c7c546c06efe074e63384 Mon Sep 17 00:00:00 2001 From: Jun Lei Date: Thu, 3 Sep 2020 16:17:46 -0400 Subject: [PATCH 486/648] drm/amd/display: update nv1x stutter latencies [why] Recent characterization shows increased stutter latencies on some SKUs, leading to underflow. [how] Update SOC params to account for this worst case latency. Signed-off-by: Jun Lei Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 9140b3fc767a..f31f48dd0da2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -409,8 +409,8 @@ static struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv14_soc = { }, }, .num_states = 5, - .sr_exit_time_us = 8.6, - .sr_enter_plus_exit_time_us = 10.9, + .sr_exit_time_us = 11.6, + .sr_enter_plus_exit_time_us = 13.9, .urgent_latency_us = 4.0, .urgent_latency_pixel_data_only_us = 4.0, .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, From 5367eb6d8a98b8682877961f4ccee1088d5735f3 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Thu, 10 Sep 2020 13:59:33 -0400 Subject: [PATCH 487/648] drm/amdgpu: Include sienna_cichlid in USBC PD FW support. Create sysfs interface also for sienna_cichlid. Reviewed-by: Alex Deucher Signed-off-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index d8c6520ff74a..06757681b2ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -178,7 +178,7 @@ static int psp_sw_init(void *handle) return ret; } - if (adev->asic_type == CHIP_NAVI10) { + if (adev->asic_type == CHIP_NAVI10 || adev->asic_type == CHIP_SIENNA_CICHLID) { ret= psp_sysfs_init(adev); if (ret) { return ret; From 40eab0f8956724b0c2bb9e5679269632afb72b26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 9 Sep 2020 13:12:46 +0200 Subject: [PATCH 488/648] drm/radeon: revert "Prefer lower feedback dividers" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turns out this breaks a lot of different hardware. This reverts commit fc8c70526bd30733ea8667adb8b8ffebea30a8ed. Signed-off-by: Christian König Acked-by: Nirmoy Das Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 7b69d6dfe44a..e0ae911ef427 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -933,7 +933,7 @@ static void avivo_get_fb_ref_div(unsigned nom, unsigned den, unsigned post_div, /* get matching reference and feedback divider */ *ref_div = min(max(den/post_div, 1u), ref_div_max); - *fb_div = max(nom * *ref_div * post_div / den, 1u); + *fb_div = DIV_ROUND_CLOSEST(nom * *ref_div * post_div, den); /* limit fb divider to its maximum */ if (*fb_div > fb_div_max) { From 2f228aab21bbc74e90e267a721215ec8be51daf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Fri, 4 Sep 2020 12:43:04 +0200 Subject: [PATCH 489/648] drm/amdgpu/dc: Require primary plane to be enabled whenever the CRTC is MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't check drm_crtc_state::active for this either, per its documentation in include/drm/drm_crtc.h: * Hence drivers must not consult @active in their various * &drm_mode_config_funcs.atomic_check callback to reject an atomic * commit. atomic_remove_fb disables the CRTC as needed for disabling the primary plane. This prevents at least the following problems if the primary plane gets disabled (e.g. due to destroying the FB assigned to the primary plane, as happens e.g. with mutter in Wayland mode): * The legacy cursor ioctl returned EINVAL for a non-0 cursor FB ID (which enables the cursor plane). * If the cursor plane was enabled, changing the legacy DPMS property value from off to on returned EINVAL. v2: * Minor changes to code comment and commit log, per review feedback. GitLab: https://gitlab.gnome.org/GNOME/mutter/-/issues/1108 GitLab: https://gitlab.gnome.org/GNOME/mutter/-/issues/1165 GitLab: https://gitlab.gnome.org/GNOME/mutter/-/issues/1344 Suggested-by: Daniel Vetter Acked-by: Daniel Vetter Reviewed-by: Nicholas Kazlauskas Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 32 ++++++------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b51c527a3f0d..4ba8b54a2695 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5278,19 +5278,6 @@ static void dm_crtc_helper_disable(struct drm_crtc *crtc) { } -static bool does_crtc_have_active_cursor(struct drm_crtc_state *new_crtc_state) -{ - struct drm_device *dev = new_crtc_state->crtc->dev; - struct drm_plane *plane; - - drm_for_each_plane_mask(plane, dev, new_crtc_state->plane_mask) { - if (plane->type == DRM_PLANE_TYPE_CURSOR) - return true; - } - - return false; -} - static int count_crtc_active_planes(struct drm_crtc_state *new_crtc_state) { struct drm_atomic_state *state = new_crtc_state->state; @@ -5354,19 +5341,20 @@ static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc, return ret; } + /* + * We require the primary plane to be enabled whenever the CRTC is, otherwise + * drm_mode_cursor_universal may end up trying to enable the cursor plane while all other + * planes are disabled, which is not supported by the hardware. And there is legacy + * userspace which stops using the HW cursor altogether in response to the resulting EINVAL. + */ + if (state->enable && + !(state->plane_mask & drm_plane_mask(crtc->primary))) + return -EINVAL; + /* In some use cases, like reset, no stream is attached */ if (!dm_crtc_state->stream) return 0; - /* - * We want at least one hardware plane enabled to use - * the stream with a cursor enabled. - */ - if (state->enable && state->active && - does_crtc_have_active_cursor(state) && - dm_crtc_state->active_planes == 0) - return -EINVAL; - if (dc_validate_stream(dc, dm_crtc_state->stream) == DC_OK) return 0; From 2fbc6e89b2f1403189e624cabaf73e189c5e50c6 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 14 Sep 2020 21:03:54 -0600 Subject: [PATCH 490/648] ipv4: Update exception handling for multipath routes via same device Kfir reported that pmtu exceptions are not created properly for deployments where multipath routes use the same device. After some digging I see 2 compounding problems: 1. ip_route_output_key_hash_rcu is updating the flowi4_oif *after* the route lookup. This is the second use case where this has been a problem (the first is related to use of vti devices with VRF). I can not find any reason for the oif to be changed after the lookup; the code goes back to the start of git. It does not seem logical so remove it. 2. fib_lookups for exceptions do not call fib_select_path to handle multipath route selection based on the hash. The end result is that the fib_lookup used to add the exception always creates it based using the first leg of the route. An example topology showing the problem: | host1 +------+ | eth0 | .209 +------+ | +------+ switch | br0 | +------+ | +---------+---------+ | host2 | host3 +------+ +------+ | eth0 | .250 | eth0 | 192.168.252.252 +------+ +------+ +-----+ +-----+ | vti | .2 | vti | 192.168.247.3 +-----+ +-----+ \ / ================================= tunnels 192.168.247.1/24 for h in host1 host2 host3; do ip netns add ${h} ip -netns ${h} link set lo up ip netns exec ${h} sysctl -wq net.ipv4.ip_forward=1 done ip netns add switch ip -netns switch li set lo up ip -netns switch link add br0 type bridge stp 0 ip -netns switch link set br0 up for n in 1 2 3; do ip -netns switch link add eth-sw type veth peer name eth-h${n} ip -netns switch li set eth-h${n} master br0 up ip -netns switch li set eth-sw netns host${n} name eth0 done ip -netns host1 addr add 192.168.252.209/24 dev eth0 ip -netns host1 link set dev eth0 up ip -netns host1 route add 192.168.247.0/24 \ nexthop via 192.168.252.250 dev eth0 nexthop via 192.168.252.252 dev eth0 ip -netns host2 addr add 192.168.252.250/24 dev eth0 ip -netns host2 link set dev eth0 up ip -netns host2 addr add 192.168.252.252/24 dev eth0 ip -netns host3 link set dev eth0 up ip netns add tunnel ip -netns tunnel li set lo up ip -netns tunnel li add br0 type bridge ip -netns tunnel li set br0 up for n in $(seq 11 20); do ip -netns tunnel addr add dev br0 192.168.247.${n}/24 done for n in 2 3 do ip -netns tunnel link add vti${n} type veth peer name eth${n} ip -netns tunnel link set eth${n} mtu 1360 master br0 up ip -netns tunnel link set vti${n} netns host${n} mtu 1360 up ip -netns host${n} addr add dev vti${n} 192.168.247.${n}/24 done ip -netns tunnel ro add default nexthop via 192.168.247.2 nexthop via 192.168.247.3 ip netns exec host1 ping -M do -s 1400 -c3 -I 192.168.252.209 192.168.247.11 ip netns exec host1 ping -M do -s 1400 -c3 -I 192.168.252.209 192.168.247.15 ip -netns host1 ro ls cache Before this patch the cache always shows exceptions against the first leg in the multipath route; 192.168.252.250 per this example. Since the hash has an initial random seed, you may need to vary the final octet more than what is listed. In my tests, using addresses between 11 and 19 usually found 1 that used both legs. With this patch, the cache will have exceptions for both legs. Fixes: 4895c771c7f0 ("ipv4: Add FIB nexthop exceptions") Reported-by: Kfir Itzhak Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/route.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e5f210d00851..58642b29a499 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -786,8 +786,10 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow neigh_event_send(n, NULL); } else { if (fib_lookup(net, fl4, &res, 0) == 0) { - struct fib_nh_common *nhc = FIB_RES_NHC(res); + struct fib_nh_common *nhc; + fib_select_path(net, &res, fl4, skb); + nhc = FIB_RES_NHC(res); update_or_create_fnhe(nhc, fl4->daddr, new_gw, 0, false, jiffies + ip_rt_gc_timeout); @@ -1013,6 +1015,7 @@ out: kfree_skb(skb); static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { struct dst_entry *dst = &rt->dst; + struct net *net = dev_net(dst->dev); u32 old_mtu = ipv4_mtu(dst); struct fib_result res; bool lock = false; @@ -1033,9 +1036,11 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) return; rcu_read_lock(); - if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { - struct fib_nh_common *nhc = FIB_RES_NHC(res); + if (fib_lookup(net, fl4, &res, 0) == 0) { + struct fib_nh_common *nhc; + fib_select_path(net, &res, fl4, NULL); + nhc = FIB_RES_NHC(res); update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, jiffies + ip_rt_mtu_expires); } @@ -2668,8 +2673,6 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, fib_select_path(net, res, fl4, skb); dev_out = FIB_RES_DEV(*res); - fl4->flowi4_oif = dev_out->ifindex; - make_route: rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags); From 8c33dadc3e0eef1599811a55d748a0b95da0317d Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 15 Sep 2020 11:29:59 -0700 Subject: [PATCH 491/648] bpf: Bpf_skc_to_* casting helpers require a NULL check on sk The bpf_skc_to_* type casting helpers are available to BPF_PROG_TYPE_TRACING. The traced PTR_TO_BTF_ID may be NULL. For example, the skb->sk may be NULL. Thus, these casting helpers need to check "!sk" also and this patch fixes them. Fixes: 0d4fad3e57df ("bpf: Add bpf_skc_to_udp6_sock() helper") Fixes: 478cfbdf5f13 ("bpf: Add bpf_skc_to_{tcp, tcp_timewait, tcp_request}_sock() helpers") Fixes: af7ec1383361 ("bpf: Add bpf_skc_to_tcp6_sock() helper") Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200915182959.241101-1-kafai@fb.com --- net/core/filter.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 2d62c25e0395..23e8ded0ec97 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -9522,7 +9522,7 @@ BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk) * trigger an explicit type generation here. */ BTF_TYPE_EMIT(struct tcp6_sock); - if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && + if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk->sk_family == AF_INET6) return (unsigned long)sk; @@ -9540,7 +9540,7 @@ const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = { BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk) { - if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) + if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) return (unsigned long)sk; return (unsigned long)NULL; @@ -9558,12 +9558,12 @@ const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = { BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk) { #ifdef CONFIG_INET - if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT) + if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT) return (unsigned long)sk; #endif #if IS_BUILTIN(CONFIG_IPV6) - if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT) + if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT) return (unsigned long)sk; #endif @@ -9582,12 +9582,12 @@ const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = { BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk) { #ifdef CONFIG_INET - if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV) + if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV) return (unsigned long)sk; #endif #if IS_BUILTIN(CONFIG_IPV6) - if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV) + if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV) return (unsigned long)sk; #endif @@ -9609,7 +9609,7 @@ BPF_CALL_1(bpf_skc_to_udp6_sock, struct sock *, sk) * trigger an explicit type generation here. */ BTF_TYPE_EMIT(struct udp6_sock); - if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP && + if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP && sk->sk_type == SOCK_DGRAM && sk->sk_family == AF_INET6) return (unsigned long)sk; From ce880cb825fcc22d4e39046a6c3a3a7f6603883d Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 15 Sep 2020 17:44:01 -0700 Subject: [PATCH 492/648] bpf: Fix a rcu warning for bpffs map pretty-print Running selftest ./btf_btf -p the kernel had the following warning: [ 51.528185] WARNING: CPU: 3 PID: 1756 at kernel/bpf/hashtab.c:717 htab_map_get_next_key+0x2eb/0x300 [ 51.529217] Modules linked in: [ 51.529583] CPU: 3 PID: 1756 Comm: test_btf Not tainted 5.9.0-rc1+ #878 [ 51.530346] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-1.el7.centos 04/01/2014 [ 51.531410] RIP: 0010:htab_map_get_next_key+0x2eb/0x300 ... [ 51.542826] Call Trace: [ 51.543119] map_seq_next+0x53/0x80 [ 51.543528] seq_read+0x263/0x400 [ 51.543932] vfs_read+0xad/0x1c0 [ 51.544311] ksys_read+0x5f/0xe0 [ 51.544689] do_syscall_64+0x33/0x40 [ 51.545116] entry_SYSCALL_64_after_hwframe+0x44/0xa9 The related source code in kernel/bpf/hashtab.c: 709 static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 710 { 711 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 712 struct hlist_nulls_head *head; 713 struct htab_elem *l, *next_l; 714 u32 hash, key_size; 715 int i = 0; 716 717 WARN_ON_ONCE(!rcu_read_lock_held()); In kernel/bpf/inode.c, bpffs map pretty print calls map->ops->map_get_next_key() without holding a rcu_read_lock(), hence causing the above warning. To fix the issue, just surrounding map->ops->map_get_next_key() with rcu read lock. Fixes: a26ca7c982cb ("bpf: btf: Add pretty print support to the basic arraymap") Reported-by: Alexei Starovoitov Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Cc: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200916004401.146277-1-yhs@fb.com --- kernel/bpf/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index fb878ba3f22f..18f4969552ac 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -226,10 +226,12 @@ static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos) else prev_key = key; + rcu_read_lock(); if (map->ops->map_get_next_key(map, prev_key, key)) { map_iter(m)->done = true; - return NULL; + key = NULL; } + rcu_read_unlock(); return key; } From e7d95527f27a6d9edcffbd74eee38e5cb6b91785 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 30 Jul 2020 10:28:56 +0100 Subject: [PATCH 493/648] drm/i915/gem: Delay tracking the GEM context until it is registered Avoid exposing a partially constructed context by deferring the list_add() from the initial construction to the end of registration. Otherwise, if we peek into the list of contexts from inside debugfs, we may see the partially constructed context and chase down some dangling incomplete pointers. Reported-by: CQ Tang Fixes: 3aa9945a528e ("drm/i915: Separate GEM context construction and registration to userspace") References: f6e8aa387171 ("drm/i915: Report the number of closed vma held by each context in debugfs") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: CQ Tang Cc: # v5.2+ Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200730092856.23615-1-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi Signed-off-by: Joonas Lahtinen (cherry picked from commit eb4dedae920a07c485328af3da2202ec5184fb17) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index d0bdb6d447ed..efc4ba34c06e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -713,6 +713,7 @@ __create_context(struct drm_i915_private *i915) ctx->i915 = i915; ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL); mutex_init(&ctx->mutex); + INIT_LIST_HEAD(&ctx->link); spin_lock_init(&ctx->stale.lock); INIT_LIST_HEAD(&ctx->stale.engines); @@ -740,10 +741,6 @@ __create_context(struct drm_i915_private *i915) for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; - spin_lock(&i915->gem.contexts.lock); - list_add_tail(&ctx->link, &i915->gem.contexts.list); - spin_unlock(&i915->gem.contexts.lock); - return ctx; err_free: @@ -931,6 +928,7 @@ static int gem_context_register(struct i915_gem_context *ctx, struct drm_i915_file_private *fpriv, u32 *id) { + struct drm_i915_private *i915 = ctx->i915; struct i915_address_space *vm; int ret; @@ -949,8 +947,16 @@ static int gem_context_register(struct i915_gem_context *ctx, /* And finally expose ourselves to userspace via the idr */ ret = xa_alloc(&fpriv->context_xa, id, ctx, xa_limit_32b, GFP_KERNEL); if (ret) - put_pid(fetch_and_zero(&ctx->pid)); + goto err_pid; + spin_lock(&i915->gem.contexts.lock); + list_add_tail(&ctx->link, &i915->gem.contexts.list); + spin_unlock(&i915->gem.contexts.lock); + + return 0; + +err_pid: + put_pid(fetch_and_zero(&ctx->pid)); return ret; } From c2314b8bd4c009793b6f9d57bc8363af034e02ca Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 6 Aug 2020 11:59:54 +0100 Subject: [PATCH 494/648] drm/i915/gem: Reduce context termination list iteration guard to RCU As we now protect the timeline list using RCU, we can drop the timeline->mutex for guarding the list iteration during context close, as we are searching for an inflight request. Any new request will see the context is banned and not be submitted. In doing so, pull the checks for a concurrent submission of the request (notably the i915_request_completed()) under the engine spinlock, to fully serialise with __i915_request_submit()). That is in the case of preempt-to-busy where the request may be completed during the __i915_request_submit(), we need to be careful that we sample the request status after serialising so that we don't miss the request the engine is actually submitting. Fixes: 4a3174152147 ("drm/i915/gem: Refine occupancy test in kill_context()") References: d22d2d073ef8 ("drm/i915: Protect i915_request_await_start from early waits") # rcu protection of timeline->requests References: https://gitlab.freedesktop.org/drm/intel/-/issues/1622 References: https://gitlab.freedesktop.org/drm/intel/-/issues/2158 Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200806105954.7766-1-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi Signed-off-by: Joonas Lahtinen (cherry picked from commit 736e785f9b28cd9ef2d16a80960a04fd00e64b22) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 32 ++++++++++++--------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index efc4ba34c06e..ef755dd5e68f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -439,29 +439,36 @@ static bool __cancel_engine(struct intel_engine_cs *engine) return __reset_engine(engine); } -static struct intel_engine_cs *__active_engine(struct i915_request *rq) +static bool +__active_engine(struct i915_request *rq, struct intel_engine_cs **active) { struct intel_engine_cs *engine, *locked; + bool ret = false; /* * Serialise with __i915_request_submit() so that it sees * is-banned?, or we know the request is already inflight. + * + * Note that rq->engine is unstable, and so we double + * check that we have acquired the lock on the final engine. */ locked = READ_ONCE(rq->engine); spin_lock_irq(&locked->active.lock); while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { spin_unlock(&locked->active.lock); - spin_lock(&engine->active.lock); locked = engine; + spin_lock(&locked->active.lock); } - engine = NULL; - if (i915_request_is_active(rq) && rq->fence.error != -EIO) - engine = rq->engine; + if (!i915_request_completed(rq)) { + if (i915_request_is_active(rq) && rq->fence.error != -EIO) + *active = locked; + ret = true; + } spin_unlock_irq(&locked->active.lock); - return engine; + return ret; } static struct intel_engine_cs *active_engine(struct intel_context *ce) @@ -472,17 +479,16 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce) if (!ce->timeline) return NULL; - mutex_lock(&ce->timeline->mutex); - list_for_each_entry_reverse(rq, &ce->timeline->requests, link) { - if (i915_request_completed(rq)) - break; + rcu_read_lock(); + list_for_each_entry_rcu(rq, &ce->timeline->requests, link) { + if (i915_request_is_active(rq) && i915_request_completed(rq)) + continue; /* Check with the backend if the request is inflight */ - engine = __active_engine(rq); - if (engine) + if (__active_engine(rq, &engine)) break; } - mutex_unlock(&ce->timeline->mutex); + rcu_read_unlock(); return engine; } From b82a8b93b4f9008de97d6920184948563980df37 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 16 Jul 2020 15:22:06 +0100 Subject: [PATCH 495/648] drm/i915: Be wary of data races when reading the active execlists To implement preempt-to-busy (and so efficient timeslicing and best utilization of the hardware submission ports) we let the GPU run asynchronously in respect to the ELSP submission queue. This created challenges in keeping and accessing the driver state mirroring the asynchronous GPU execution. The latest occurence of this was spotted by KCSAN: [ 1413.563200] BUG: KCSAN: data-race in __await_execution+0x217/0x370 [i915] [ 1413.563221] [ 1413.563236] race at unknown origin, with read to 0xffff88885bb6c478 of 8 bytes by task 9654 on cpu 1: [ 1413.563548] __await_execution+0x217/0x370 [i915] [ 1413.563891] i915_request_await_dma_fence+0x4eb/0x6a0 [i915] [ 1413.564235] i915_request_await_object+0x421/0x490 [i915] [ 1413.564577] i915_gem_do_execbuffer+0x29b7/0x3c40 [i915] [ 1413.564967] i915_gem_execbuffer2_ioctl+0x22f/0x5c0 [i915] [ 1413.564998] drm_ioctl_kernel+0x156/0x1b0 [ 1413.565022] drm_ioctl+0x2ff/0x480 [ 1413.565046] __x64_sys_ioctl+0x87/0xd0 [ 1413.565069] do_syscall_64+0x4d/0x80 [ 1413.565094] entry_SYSCALL_64_after_hwframe+0x44/0xa9 To complicate matters, we have to both avoid the read tearing of *active and avoid any write tearing as perform the pending[] -> inflight[] promotion of the execlists. This is because we cannot rely on the memcpy doing u64 aligned copies on all kernels/platforms and so we opt to open-code it with explicit WRITE_ONCE annotations to satisfy KCSAN. v2: When in doubt, write the same comment again. v3: Expanded commit message. Fixes: b55230e5e800 ("drm/i915: Check for awaits on still currently executing requests") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200716142207.13003-1-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi [Joonas: Rebased and reordered into drm-intel-gt-next branch] [Joonas: Added expanded commit message from Tvrtko and Chris] Signed-off-by: Joonas Lahtinen (cherry picked from commit b4d9145b0154f8c71dafc2db5fd445f1f3db9426) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_lrc.c | 15 +++++++++++---- drivers/gpu/drm/i915/i915_request.c | 25 +++++++++++++++++++++++-- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 24322ef08aa4..9eeaca957a7e 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2060,6 +2060,14 @@ static inline void clear_ports(struct i915_request **ports, int count) memset_p((void **)ports, NULL, count); } +static inline void +copy_ports(struct i915_request **dst, struct i915_request **src, int count) +{ + /* A memcpy_p() would be very useful here! */ + while (count--) + WRITE_ONCE(*dst++, *src++); /* avoid write tearing */ +} + static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -2648,10 +2656,9 @@ static void process_csb(struct intel_engine_cs *engine) /* switch pending to inflight */ GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); - memcpy(execlists->inflight, - execlists->pending, - execlists_num_ports(execlists) * - sizeof(*execlists->pending)); + copy_ports(execlists->inflight, + execlists->pending, + execlists_num_ports(execlists)); smp_wmb(); /* complete the seqlock */ WRITE_ONCE(execlists->active, execlists->inflight); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 0b2fe55e6194..781a6783affe 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -388,17 +388,38 @@ static bool __request_in_flight(const struct i915_request *signal) * As we know that there are always preemption points between * requests, we know that only the currently executing request * may be still active even though we have cleared the flag. - * However, we can't rely on our tracking of ELSP[0] to known + * However, we can't rely on our tracking of ELSP[0] to know * which request is currently active and so maybe stuck, as * the tracking maybe an event behind. Instead assume that * if the context is still inflight, then it is still active * even if the active flag has been cleared. + * + * To further complicate matters, if there a pending promotion, the HW + * may either perform a context switch to the second inflight execlists, + * or it may switch to the pending set of execlists. In the case of the + * latter, it may send the ACK and we process the event copying the + * pending[] over top of inflight[], _overwriting_ our *active. Since + * this implies the HW is arbitrating and not struck in *active, we do + * not worry about complete accuracy, but we do require no read/write + * tearing of the pointer [the read of the pointer must be valid, even + * as the array is being overwritten, for which we require the writes + * to avoid tearing.] + * + * Note that the read of *execlists->active may race with the promotion + * of execlists->pending[] to execlists->inflight[], overwritting + * the value at *execlists->active. This is fine. The promotion implies + * that we received an ACK from the HW, and so the context is not + * stuck -- if we do not see ourselves in *active, the inflight status + * is valid. If instead we see ourselves being copied into *active, + * we are inflight and may signal the callback. */ if (!intel_context_inflight(signal->context)) return false; rcu_read_lock(); - for (port = __engine_active(signal->engine); (rq = *port); port++) { + for (port = __engine_active(signal->engine); + (rq = READ_ONCE(*port)); /* may race with promotion of pending[] */ + port++) { if (rq->context == signal->context) { inflight = i915_seqno_passed(rq->fence.seqno, signal->fence.seqno); From 20612303a0b45de748d31331407e84300c38e497 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 28 Jul 2020 16:21:44 +0100 Subject: [PATCH 496/648] drm/i915: Filter wake_flags passed to default_wake_function (NOTE: This is the minimal backportable fix, a full fix is being developed at https://patchwork.freedesktop.org/patch/388048/) The flags passed to the wait_entry.func are passed onwards to try_to_wake_up(), which has a very particular interpretation for its wake_flags. In particular, beyond the published WF_SYNC, it has a few internal flags as well. Since we passed the fence->error down the chain via the flags argument, these ended up in the default_wake_function confusing the kernel/sched. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2110 Fixes: ef4688497512 ("drm/i915: Propagate fence errors") Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: # v5.4+ Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200728152144.1100-1-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi [Joonas: Rebased and reordered into drm-intel-gt-next branch] [Joonas: Added a note and link about more complete fix] Signed-off-by: Joonas Lahtinen (cherry picked from commit f4b3c395540aa3d4f5a6275c5bdd83ab89034806) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_sw_fence.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 295b9829e2da..4cd2038cbe35 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -164,9 +164,13 @@ static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence, do { list_for_each_entry_safe(pos, next, &x->head, entry) { - pos->func(pos, - TASK_NORMAL, fence->error, - &extra); + int wake_flags; + + wake_flags = fence->error; + if (pos->func == autoremove_wake_function) + wake_flags = 0; + + pos->func(pos, TASK_NORMAL, wake_flags, &extra); } if (list_empty(&extra)) From 130a96d698d7bee9f339832d1e47ab26aad8dbf1 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 16 Sep 2020 12:00:33 +0300 Subject: [PATCH 497/648] usb: typec: ucsi: acpi: Increase command completion timeout value UCSI specification quite clearly states that if a command can't be completed in 10ms, the firmware must notify about BUSY condition. Unfortunately almost none of the platforms (the firmware on them) generate the BUSY notification even if a command can't be completed in time. The driver already considered that, and used a timeout value of 5 seconds, but processing especially the alternate mode discovery commands takes often considerable amount of time from the firmware, much more than the 5 seconds. That happens especially after bootup when devices are already connected to the USB Type-C connector. For now on those platforms the alternate mode discovery has simply failed because of the timeout. To improve the situation, increasing the timeout value for the command completion to 1 minute. That should give enough time for even the slowest firmware to process the commands. Fixes: f56de278e8ec ("usb: typec: ucsi: acpi: Move to the new API") Cc: stable@vger.kernel.org Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200916090034.25119-2-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index c0aca2f0f23f..fbfe8f5933af 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -78,7 +78,7 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset, if (ret) goto out_clear_bit; - if (!wait_for_completion_timeout(&ua->complete, msecs_to_jiffies(5000))) + if (!wait_for_completion_timeout(&ua->complete, 60 * HZ)) ret = -ETIMEDOUT; out_clear_bit: From 386e15a650447f53de3d2d8819ce9393f31650a4 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 16 Sep 2020 12:00:34 +0300 Subject: [PATCH 498/648] usb: typec: ucsi: Prevent mode overrun Sometimes the embedded controller firmware does not terminate the list of alternate modes that the partner supports in its response to the GET_ALTERNATE_MODES command. Instead the firmware returns the supported alternate modes over and over again until the driver stops requesting them. If that happens, the number of modes for each alternate mode will exceed the maximum 6 that is defined in the USB Power Delivery specification. Making sure that can't happen by adding a check for it. This fixes NULL pointer dereference that is caused by the overrun. Fixes: ad74b8649beaf ("usb: typec: ucsi: Preliminary support for alternate modes") Cc: stable@vger.kernel.org Reported-by: Zwane Mwaikambo Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200916090034.25119-3-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index e680fcfdee60..758b988ac518 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -216,14 +216,18 @@ void ucsi_altmode_update_active(struct ucsi_connector *con) con->partner_altmode[i] == altmode); } -static u8 ucsi_altmode_next_mode(struct typec_altmode **alt, u16 svid) +static int ucsi_altmode_next_mode(struct typec_altmode **alt, u16 svid) { u8 mode = 1; int i; - for (i = 0; alt[i]; i++) + for (i = 0; alt[i]; i++) { + if (i > MODE_DISCOVERY_MAX) + return -ERANGE; + if (alt[i]->svid == svid) mode++; + } return mode; } @@ -258,8 +262,11 @@ static int ucsi_register_altmode(struct ucsi_connector *con, goto err; } - desc->mode = ucsi_altmode_next_mode(con->port_altmode, - desc->svid); + ret = ucsi_altmode_next_mode(con->port_altmode, desc->svid); + if (ret < 0) + return ret; + + desc->mode = ret; switch (desc->svid) { case USB_TYPEC_DP_SID: @@ -292,8 +299,11 @@ static int ucsi_register_altmode(struct ucsi_connector *con, goto err; } - desc->mode = ucsi_altmode_next_mode(con->partner_altmode, - desc->svid); + ret = ucsi_altmode_next_mode(con->partner_altmode, desc->svid); + if (ret < 0) + return ret; + + desc->mode = ret; alt = typec_partner_register_altmode(con->partner, desc); if (IS_ERR(alt)) { From 325b008723b2dd31de020e85ab9d2e9aa4637d35 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 16 Sep 2020 11:40:25 +0200 Subject: [PATCH 499/648] USB: UAS: fix disconnect by unplugging a hub The SCSI layer can go into an ugly loop if you ignore that a device is gone. You need to report an error in the command rather than in the return value of the queue method. We need to specifically check for ENODEV. The issue goes back to the introduction of the driver. Fixes: 115bb1ffa54c3 ("USB: Add UAS driver") Signed-off-by: Oliver Neukum Cc: stable Link: https://lore.kernel.org/r/20200916094026.30085-2-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 08f9296431e9..8183504e3abb 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -662,8 +662,7 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd, if (devinfo->resetting) { cmnd->result = DID_ERROR << 16; cmnd->scsi_done(cmnd); - spin_unlock_irqrestore(&devinfo->lock, flags); - return 0; + goto zombie; } /* Find a free uas-tag */ @@ -699,6 +698,16 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd, cmdinfo->state &= ~(SUBMIT_DATA_IN_URB | SUBMIT_DATA_OUT_URB); err = uas_submit_urbs(cmnd, devinfo); + /* + * in case of fatal errors the SCSI layer is peculiar + * a command that has finished is a success for the purpose + * of queueing, no matter how fatal the error + */ + if (err == -ENODEV) { + cmnd->result = DID_ERROR << 16; + cmnd->scsi_done(cmnd); + goto zombie; + } if (err) { /* If we did nothing, give up now */ if (cmdinfo->state & SUBMIT_STATUS_URB) { @@ -709,6 +718,7 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd, } devinfo->cmnd[idx] = cmnd; +zombie: spin_unlock_irqrestore(&devinfo->lock, flags); return 0; } From bcea6dafeeef7d1a6a8320a249aabf981d63b881 Mon Sep 17 00:00:00 2001 From: Penghao Date: Mon, 7 Sep 2020 10:30:26 +0800 Subject: [PATCH 500/648] USB: quirks: Add USB_QUIRK_IGNORE_REMOTE_WAKEUP quirk for BYD zhaoxin notebook Add a USB_QUIRK_IGNORE_REMOTE_WAKEUP quirk for the BYD zhaoxin notebook. This notebook come with usb touchpad. And we would like to disable touchpad wakeup on this notebook by default. Signed-off-by: Penghao Cc: stable Link: https://lore.kernel.org/r/20200907023026.28189-1-penghao@uniontech.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index f232914de5fd..10574fa3f927 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -397,6 +397,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* Generic RTL8153 based ethernet adapters */ { USB_DEVICE(0x0bda, 0x8153), .driver_info = USB_QUIRK_NO_LPM }, + /* SONiX USB DEVICE Touchpad */ + { USB_DEVICE(0x0c45, 0x7056), .driver_info = + USB_QUIRK_IGNORE_REMOTE_WAKEUP }, + /* Action Semiconductor flash disk */ { USB_DEVICE(0x10d6, 0x2200), .driver_info = USB_QUIRK_STRING_FETCH_255 }, From d69030c91b3765934048151792f141f0571efa86 Mon Sep 17 00:00:00 2001 From: Madhusudanarao Amara Date: Wed, 16 Sep 2020 12:11:02 +0300 Subject: [PATCH 501/648] usb: typec: intel_pmc_mux: Handle SCU IPC error conditions Check and return if there are errors. The response bits are valid only on no errors. Fixes: b7404a29cd3d ("usb: typec: intel_pmc_mux: Definitions for response status bits") Signed-off-by: Madhusudanarao Amara Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200916091102.27118-4-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/mux/intel_pmc_mux.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c index ec7da0fa3cf8..676b525c2a66 100644 --- a/drivers/usb/typec/mux/intel_pmc_mux.c +++ b/drivers/usb/typec/mux/intel_pmc_mux.c @@ -125,13 +125,19 @@ static int hsl_orientation(struct pmc_usb_port *port) static int pmc_usb_command(struct pmc_usb_port *port, u8 *msg, u32 len) { u8 response[4]; + int ret; /* * Error bit will always be 0 with the USBC command. - * Status can be checked from the response message. + * Status can be checked from the response message if the + * function intel_scu_ipc_dev_command succeeds. */ - intel_scu_ipc_dev_command(port->pmc->ipc, PMC_USBC_CMD, 0, msg, len, - response, sizeof(response)); + ret = intel_scu_ipc_dev_command(port->pmc->ipc, PMC_USBC_CMD, 0, msg, + len, response, sizeof(response)); + + if (ret) + return ret; + if (response[2] & PMC_USB_RESP_STATUS_FAILURE) { if (response[2] & PMC_USB_RESP_STATUS_FATAL) return -EIO; From fe88c6489264eaea23570dfdf03e1d3f5f47f423 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 9 Sep 2020 16:31:00 +0200 Subject: [PATCH 502/648] serial: core: fix port-lock initialisation Commit f743061a85f5 ("serial: core: Initialise spin lock before use in uart_configure_port()") tried to work around a breakage introduced by commit a3cb39d258ef ("serial: core: Allow detach and attach serial device for console") by adding a second initialisation of the port lock when registering the port. As reported by the build robots [1], this doesn't really solve the regression introduced by the console-detach changes and also adds a second redundant initialisation of the lock for normal ports. Start cleaning up this mess by removing the redundant initialisation and making sure that the port lock is again initialised once-only for ports that aren't already in use as a console. [1] https://lore.kernel.org/r/20200802054852.GR23458@shao2-debian Fixes: f743061a85f5 ("serial: core: Initialise spin lock before use in uart_configure_port()") Fixes: a3cb39d258ef ("serial: core: Allow detach and attach serial device for console") Cc: stable # 5.7 Signed-off-by: Johan Hovold Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200909143101.15389-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index f797c971cd82..53b79e1fcbc8 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -2378,13 +2378,6 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, /* Power up port for set_mctrl() */ uart_change_pm(state, UART_PM_STATE_ON); - /* - * If this driver supports console, and it hasn't been - * successfully registered yet, initialise spin lock for it. - */ - if (port->cons && !(port->cons->flags & CON_ENABLED)) - __uart_port_spin_lock_init(port); - /* * Ensure that the modem control lines are de-activated. * keep the DTR setting that is set in uart_set_options() @@ -2900,7 +2893,12 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport) goto out; } - uart_port_spin_lock_init(uport); + /* + * If this port is in use as a console then the spinlock is already + * initialised. + */ + if (!uart_console_enabled(uport)) + __uart_port_spin_lock_init(uport); if (uport->cons && uport->dev) of_console_check(uport->dev->of_node, uport->cons->name, uport->line); From e0830dbf71f191851ed3772d2760f007b7c5bc3a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 9 Sep 2020 16:31:01 +0200 Subject: [PATCH 503/648] serial: core: fix console port-lock regression Fix the port-lock initialisation regression introduced by commit a3cb39d258ef ("serial: core: Allow detach and attach serial device for console") by making sure that the lock is again initialised during console setup. The console may be registered before the serial controller has been probed in which case the port lock needs to be initialised during console setup by a call to uart_set_options(). The console-detach changes introduced a regression in several drivers by effectively removing that initialisation by not initialising the lock when the port is used as a console (which is always the case during console setup). Add back the early lock initialisation and instead use a new console-reinit flag to handle the case where a console is being re-attached through sysfs. The question whether the console-detach interface should have been added in the first place is left for another discussion. Note that the console-enabled check in uart_set_options() is not redundant because of kgdboc, which can end up reinitialising an already enabled console (see commit 42b6a1baa3ec ("serial_core: Don't re-initialize a previously initialized spinlock.")). Fixes: a3cb39d258ef ("serial: core: Allow detach and attach serial device for console") Cc: stable # 5.7 Signed-off-by: Johan Hovold Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200909143101.15389-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 32 +++++++++++++++----------------- include/linux/serial_core.h | 1 + 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 53b79e1fcbc8..124524ecfe26 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -1916,24 +1916,12 @@ static inline bool uart_console_enabled(struct uart_port *port) return uart_console(port) && (port->cons->flags & CON_ENABLED); } -static void __uart_port_spin_lock_init(struct uart_port *port) +static void uart_port_spin_lock_init(struct uart_port *port) { spin_lock_init(&port->lock); lockdep_set_class(&port->lock, &port_lock_key); } -/* - * Ensure that the serial console lock is initialised early. - * If this port is a console, then the spinlock is already initialised. - */ -static inline void uart_port_spin_lock_init(struct uart_port *port) -{ - if (uart_console(port)) - return; - - __uart_port_spin_lock_init(port); -} - #if defined(CONFIG_SERIAL_CORE_CONSOLE) || defined(CONFIG_CONSOLE_POLL) /** * uart_console_write - write a console message to a serial port @@ -2086,7 +2074,15 @@ uart_set_options(struct uart_port *port, struct console *co, struct ktermios termios; static struct ktermios dummy; - uart_port_spin_lock_init(port); + /* + * Ensure that the serial-console lock is initialised early. + * + * Note that the console-enabled check is needed because of kgdboc, + * which can end up calling uart_set_options() for an already enabled + * console via tty_find_polling_driver() and uart_poll_init(). + */ + if (!uart_console_enabled(port) && !port->console_reinit) + uart_port_spin_lock_init(port); memset(&termios, 0, sizeof(struct ktermios)); @@ -2794,10 +2790,12 @@ static ssize_t console_store(struct device *dev, if (oldconsole && !newconsole) { ret = unregister_console(uport->cons); } else if (!oldconsole && newconsole) { - if (uart_console(uport)) + if (uart_console(uport)) { + uport->console_reinit = 1; register_console(uport->cons); - else + } else { ret = -ENOENT; + } } } else { ret = -ENXIO; @@ -2898,7 +2896,7 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport) * initialised. */ if (!uart_console_enabled(uport)) - __uart_port_spin_lock_init(uport); + uart_port_spin_lock_init(uport); if (uport->cons && uport->dev) of_console_check(uport->dev->of_node, uport->cons->name, uport->line); diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 01fc4d9c9c54..8a99279a579b 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -248,6 +248,7 @@ struct uart_port { unsigned char hub6; /* this should be in the 8250 driver */ unsigned char suspended; + unsigned char console_reinit; const char *name; /* port name */ struct attribute_group *attr_group; /* port specific attributes */ const struct attribute_group **tty_groups; /* all attributes (serial core use only) */ From 3c5a87be170aba8ac40982182f812dcff6ed1ad1 Mon Sep 17 00:00:00 2001 From: Tobias Diedrich Date: Mon, 14 Sep 2020 19:36:28 +0200 Subject: [PATCH 504/648] serial: 8250_pci: Add Realtek 816a and 816b These serial ports are exposed by the OOB-management-engine on RealManage-enabled network cards (e.g. AMD DASH enabled systems using Realtek cards). Because these have 3 BARs, they fail the "num_iomem <= 1" check in serial_pci_guess_board. I've manually checked the two IOMEM regions and BAR 2 doesn't seem to respond to reads, but BAR 4 seems to be an MMIO version of the IO ports (untested). With this change, the ports are detected: 0000:02:00.1: ttyS0 at I/O 0x2200 (irq = 82, base_baud = 115200) is a 16550A 0000:02:00.2: ttyS1 at I/O 0x2100 (irq = 55, base_baud = 115200) is a 16550A lspci output: 02:00.1 0700: 10ec:816a (rev 0e) (prog-if 02 [16550]) Subsystem: 17aa:5082 Control: I/O+ Mem+ BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx- Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort+ SERR- Cc: stable Link: https://lore.kernel.org/r/20200914173628.GA22508@yamamaya.is-a-geek.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 3eb2d485eaeb..55bb7b897d97 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -5566,6 +5566,17 @@ static const struct pci_device_id serial_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_wch384_4 }, + /* + * Realtek RealManage + */ + { PCI_VENDOR_ID_REALTEK, 0x816a, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, pbn_b0_1_115200 }, + + { PCI_VENDOR_ID_REALTEK, 0x816b, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, pbn_b0_1_115200 }, + /* Fintek PCI serial cards */ { PCI_DEVICE(0x1c29, 0x1104), .driver_data = pbn_fintek_4 }, { PCI_DEVICE(0x1c29, 0x1108), .driver_data = pbn_fintek_8 }, From e1c2d96cd0196383e6c390c8abf0b9045a9616b2 Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Fri, 21 Aug 2020 11:48:10 -0700 Subject: [PATCH 505/648] powercap: RAPL: Add support for Lakefield Simply add Lakefield model ID. No additional changes are needed. Signed-off-by: Ricardo Neri [ rjw: Minor subject edit ] Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 25c764905f9b..983d75bd5bd1 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -1039,6 +1039,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server), + X86_MATCH_INTEL_FAM6_MODEL(LAKEFIELD, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &rapl_defaults_byt), X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &rapl_defaults_cht), From ec0972adecb391a8d8650832263a4790f3bfb4df Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 11 Sep 2020 07:57:06 +0900 Subject: [PATCH 506/648] fbcon: Fix user font detection test at fbcon_resize(). syzbot is reporting OOB read at fbcon_resize() [1], for commit 39b3cffb8cf31117 ("fbcon: prevent user font height or width change from causing potential out-of-bounds access") is by error using registered_fb[con2fb_map[vc->vc_num]]->fbcon_par->p->userfont (which was set to non-zero) instead of fb_display[vc->vc_num].userfont (which remains zero for that display). We could remove tricky userfont flag [2], for we can determine it by comparing address of the font data and addresses of built-in font data. But since that commit is failing to fix the original OOB read [3], this patch keeps the change minimal in case we decide to revert altogether. [1] https://syzkaller.appspot.com/bug?id=ebcbbb6576958a496500fee9cf7aa83ea00b5920 [2] https://syzkaller.appspot.com/text?tag=Patch&x=14030853900000 [3] https://syzkaller.appspot.com/bug?id=6fba8c186d97cf1011ab17660e633b1cc4e080c9 Reported-by: syzbot Signed-off-by: Tetsuo Handa Fixes: 39b3cffb8cf31117 ("fbcon: prevent user font height or width change from causing potential out-of-bounds access") Cc: George Kennedy Link: https://lore.kernel.org/r/f6e3e611-8704-1263-d163-f52c906a4f06@I-love.SAKURA.ne.jp Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/core/fbcon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 66167830fefd..dae7ae7f225a 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2203,7 +2203,7 @@ static int fbcon_resize(struct vc_data *vc, unsigned int width, struct fb_var_screeninfo var = info->var; int x_diff, y_diff, virt_w, virt_h, virt_fw, virt_fh; - if (ops->p && ops->p->userfont && FNTSIZE(vc->vc_font.data)) { + if (p->userfont && FNTSIZE(vc->vc_font.data)) { int size; int pitch = PITCH(vc->vc_font.width); From ce9c13f31b10560d4e4d801f9f94895f595756aa Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Wed, 16 Sep 2020 18:48:51 +0800 Subject: [PATCH 507/648] perf stat: Fix the ratio comments of miss-events 'perf stat' displays miss ratio of L1-dcache, L1-icache, dTLB cache, iTLB cache and LL-cache. Take L1-dcache for example, miss ratio is caculated as "L1-dcache-load-misses/L1-dcache-loads". So "of all L1-dcache hits" is unsuitable to describe it, and "of all L1-dcache accesses" seems better. The comments of L1-icache, dTLB cache, iTLB cache and LL-cache are fixed in the same way. Signed-off-by: Qi Liu Reviewed-by: Andi Kleen Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1600253331-10535-1-git-send-email-liuqi115@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index a5f42c22c484..924b54d15d54 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -517,7 +517,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config, color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache accesses", ratio); } static void print_l1_icache_misses(struct perf_stat_config *config, @@ -538,7 +538,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache accesses", ratio); } static void print_dtlb_cache_misses(struct perf_stat_config *config, @@ -558,7 +558,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache accesses", ratio); } static void print_itlb_cache_misses(struct perf_stat_config *config, @@ -578,7 +578,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache accesses", ratio); } static void print_ll_cache_misses(struct perf_stat_config *config, @@ -598,7 +598,7 @@ static void print_ll_cache_misses(struct perf_stat_config *config, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache accesses", ratio); } /* @@ -920,7 +920,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) print_l1_dcache_misses(config, cpu, evsel, avg, out, st); else - print_metric(config, ctxp, NULL, NULL, "of all L1-dcache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0); } else if ( evsel->core.attr.type == PERF_TYPE_HW_CACHE && evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_L1I | @@ -930,7 +930,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) print_l1_icache_misses(config, cpu, evsel, avg, out, st); else - print_metric(config, ctxp, NULL, NULL, "of all L1-icache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0); } else if ( evsel->core.attr.type == PERF_TYPE_HW_CACHE && evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_DTLB | @@ -940,7 +940,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) print_dtlb_cache_misses(config, cpu, evsel, avg, out, st); else - print_metric(config, ctxp, NULL, NULL, "of all dTLB cache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0); } else if ( evsel->core.attr.type == PERF_TYPE_HW_CACHE && evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_ITLB | @@ -950,7 +950,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) print_itlb_cache_misses(config, cpu, evsel, avg, out, st); else - print_metric(config, ctxp, NULL, NULL, "of all iTLB cache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0); } else if ( evsel->core.attr.type == PERF_TYPE_HW_CACHE && evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_LL | @@ -960,7 +960,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) print_ll_cache_misses(config, cpu, evsel, avg, out, st); else - print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0); } else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); From e6b1a44eccfcab5e5e280be376f65478c3b2c7a2 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Tue, 15 Sep 2020 22:07:50 +0800 Subject: [PATCH 508/648] locking/percpu-rwsem: Use this_cpu_{inc,dec}() for read_count The __this_cpu*() accessors are (in general) IRQ-unsafe which, given that percpu-rwsem is a blocking primitive, should be just fine. However, file_end_write() is used from IRQ context and will cause load-store issues on architectures where the per-cpu accessors are not natively irq-safe. Fix it by using the IRQ-safe this_cpu_*() for operations on read_count. This will generate more expensive code on a number of platforms, which might cause a performance regression for some of the other percpu-rwsem users. If any such is reported, we can consider alternative solutions. Fixes: 70fe2f48152e ("aio: fix freeze protection of aio writes") Signed-off-by: Hou Tao Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Acked-by: Oleg Nesterov Link: https://lkml.kernel.org/r/20200915140750.137881-1-houtao1@huawei.com --- include/linux/percpu-rwsem.h | 8 ++++---- kernel/locking/percpu-rwsem.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 5e033fe1ff4e..5fda40f97fe9 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -60,7 +60,7 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem) * anything we did within this RCU-sched read-size critical section. */ if (likely(rcu_sync_is_idle(&sem->rss))) - __this_cpu_inc(*sem->read_count); + this_cpu_inc(*sem->read_count); else __percpu_down_read(sem, false); /* Unconditional memory barrier */ /* @@ -79,7 +79,7 @@ static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem) * Same as in percpu_down_read(). */ if (likely(rcu_sync_is_idle(&sem->rss))) - __this_cpu_inc(*sem->read_count); + this_cpu_inc(*sem->read_count); else ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */ preempt_enable(); @@ -103,7 +103,7 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem) * Same as in percpu_down_read(). */ if (likely(rcu_sync_is_idle(&sem->rss))) { - __this_cpu_dec(*sem->read_count); + this_cpu_dec(*sem->read_count); } else { /* * slowpath; reader will only ever wake a single blocked @@ -115,7 +115,7 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem) * aggregate zero, as that is the only time it matters) they * will also see our critical section. */ - __this_cpu_dec(*sem->read_count); + this_cpu_dec(*sem->read_count); rcuwait_wake_up(&sem->writer); } preempt_enable(); diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index 8bbafe3e5203..70a32a576f3f 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -45,7 +45,7 @@ EXPORT_SYMBOL_GPL(percpu_free_rwsem); static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem) { - __this_cpu_inc(*sem->read_count); + this_cpu_inc(*sem->read_count); /* * Due to having preemption disabled the decrement happens on @@ -71,7 +71,7 @@ static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem) if (likely(!atomic_read_acquire(&sem->block))) return true; - __this_cpu_dec(*sem->read_count); + this_cpu_dec(*sem->read_count); /* Prod writer to re-evaluate readers_active_check() */ rcuwait_wake_up(&sem->writer); From aa6b43d57f995f6eec0e9e744dcb200e09ecb8e0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Sep 2020 12:31:58 +0200 Subject: [PATCH 509/648] ACPI: processor: Use CPUIDLE_FLAG_TIMER_STOP Make acpi_processor_idle use the common broadcast code, there's no reason not to. This also removes some RCU usage after rcu_idle_enter(). Signed-off-by: Peter Zijlstra (Intel) Reported-by: Borislav Petkov Tested-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_idle.c | 47 +++++++++++------------------------ 1 file changed, 15 insertions(+), 32 deletions(-) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 71a30b0d0f05..01ea986b6197 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -161,18 +161,10 @@ static void lapic_timer_propagate_broadcast(struct acpi_processor *pr) } /* Power(C) State timer broadcast control */ -static void lapic_timer_state_broadcast(struct acpi_processor *pr, - struct acpi_processor_cx *cx, - int broadcast) +static bool lapic_timer_needs_broadcast(struct acpi_processor *pr, + struct acpi_processor_cx *cx) { - int state = cx - pr->power.states; - - if (state >= pr->power.timer_broadcast_on_state) { - if (broadcast) - tick_broadcast_enter(); - else - tick_broadcast_exit(); - } + return cx - pr->power.states >= pr->power.timer_broadcast_on_state; } #else @@ -180,9 +172,9 @@ static void lapic_timer_state_broadcast(struct acpi_processor *pr, static void lapic_timer_check_state(int state, struct acpi_processor *pr, struct acpi_processor_cx *cstate) { } static void lapic_timer_propagate_broadcast(struct acpi_processor *pr) { } -static void lapic_timer_state_broadcast(struct acpi_processor *pr, - struct acpi_processor_cx *cx, - int broadcast) + +static bool lapic_timer_needs_broadcast(struct acpi_processor *pr, + struct acpi_processor_cx *cx) { } @@ -568,20 +560,12 @@ static DEFINE_RAW_SPINLOCK(c3_lock); * acpi_idle_enter_bm - enters C3 with proper BM handling * @pr: Target processor * @cx: Target state context - * @timer_bc: Whether or not to change timer mode to broadcast */ static void acpi_idle_enter_bm(struct acpi_processor *pr, - struct acpi_processor_cx *cx, bool timer_bc) + struct acpi_processor_cx *cx) { acpi_unlazy_tlb(smp_processor_id()); - /* - * Must be done before busmaster disable as we might need to - * access HPET ! - */ - if (timer_bc) - lapic_timer_state_broadcast(pr, cx, 1); - /* * disable bus master * bm_check implies we need ARB_DIS @@ -609,9 +593,6 @@ static void acpi_idle_enter_bm(struct acpi_processor *pr, c3_cpu_count--; raw_spin_unlock(&c3_lock); } - - if (timer_bc) - lapic_timer_state_broadcast(pr, cx, 0); } static int acpi_idle_enter(struct cpuidle_device *dev, @@ -630,7 +611,7 @@ static int acpi_idle_enter(struct cpuidle_device *dev, cx = per_cpu(acpi_cstate[index], dev->cpu); } else if (cx->type == ACPI_STATE_C3 && pr->flags.bm_check) { if (cx->bm_sts_skip || !acpi_idle_bm_check()) { - acpi_idle_enter_bm(pr, cx, true); + acpi_idle_enter_bm(pr, cx); return index; } else if (drv->safe_state_index >= 0) { index = drv->safe_state_index; @@ -642,15 +623,11 @@ static int acpi_idle_enter(struct cpuidle_device *dev, } } - lapic_timer_state_broadcast(pr, cx, 1); - if (cx->type == ACPI_STATE_C3) ACPI_FLUSH_CPU_CACHE(); acpi_idle_do_entry(cx); - lapic_timer_state_broadcast(pr, cx, 0); - return index; } @@ -666,7 +643,7 @@ static int acpi_idle_enter_s2idle(struct cpuidle_device *dev, return 0; if (pr->flags.bm_check) { - acpi_idle_enter_bm(pr, cx, false); + acpi_idle_enter_bm(pr, cx); return 0; } else { ACPI_FLUSH_CPU_CACHE(); @@ -682,6 +659,7 @@ static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr, { int i, count = ACPI_IDLE_STATE_START; struct acpi_processor_cx *cx; + struct cpuidle_state *state; if (max_cstate == 0) max_cstate = 1; @@ -694,6 +672,11 @@ static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr, per_cpu(acpi_cstate[count], dev->cpu) = cx; + if (lapic_timer_needs_broadcast(pr, cx)) { + state = &acpi_idle_driver.states[count]; + state->flags |= CPUIDLE_FLAG_TIMER_STOP; + } + count++; if (count == CPUIDLE_STATE_MAX) break; From a889a23a98fee183c9b6b0b14b2fd70583429c5e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Sep 2020 12:31:59 +0200 Subject: [PATCH 510/648] ACPI: processor: Use CPUIDLE_FLAG_TLB_FLUSHED Make acpi_processor_idle() use the generic TLB flushing code. This again removes RCU usage after rcu_idle_enter(). (XXX make every C3 invalidate TLBs, not just C3-BM) Signed-off-by: Peter Zijlstra (Intel) Tested-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- arch/ia64/include/asm/acpi.h | 2 -- arch/x86/include/asm/acpi.h | 2 -- drivers/acpi/processor_idle.c | 10 +++++----- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h index b66ba907019c..87927eb824cc 100644 --- a/arch/ia64/include/asm/acpi.h +++ b/arch/ia64/include/asm/acpi.h @@ -74,8 +74,6 @@ static inline void arch_acpi_set_pdc_bits(u32 *buf) buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP; } -#define acpi_unlazy_tlb(x) - #ifdef CONFIG_ACPI_NUMA extern cpumask_t early_cpu_possible_map; #define for_each_possible_early_cpu(cpu) \ diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index ca0976456a6b..6d2df1ee427b 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -159,8 +159,6 @@ static inline u64 x86_default_get_root_pointer(void) extern int x86_acpi_numa_init(void); #endif /* CONFIG_ACPI_NUMA */ -#define acpi_unlazy_tlb(x) leave_mm(x) - #ifdef CONFIG_ACPI_APEI static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) { diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 01ea986b6197..9ec504067f98 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -564,8 +564,6 @@ static DEFINE_RAW_SPINLOCK(c3_lock); static void acpi_idle_enter_bm(struct acpi_processor *pr, struct acpi_processor_cx *cx) { - acpi_unlazy_tlb(smp_processor_id()); - /* * disable bus master * bm_check implies we need ARB_DIS @@ -665,6 +663,7 @@ static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr, max_cstate = 1; for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) { + state = &acpi_idle_driver.states[count]; cx = &pr->power.states[i]; if (!cx->valid) @@ -672,10 +671,11 @@ static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr, per_cpu(acpi_cstate[count], dev->cpu) = cx; - if (lapic_timer_needs_broadcast(pr, cx)) { - state = &acpi_idle_driver.states[count]; + if (lapic_timer_needs_broadcast(pr, cx)) state->flags |= CPUIDLE_FLAG_TIMER_STOP; - } + + if (cx->type == ACPI_STATE_C3) + state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; count++; if (count == CPUIDLE_STATE_MAX) From 8747f2022fe8d8029193707ee86ff5c792cbef9b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Sep 2020 12:32:00 +0200 Subject: [PATCH 511/648] cpuidle: Allow cpuidle drivers to take over RCU-idle Some drivers have to do significant work, some of which relies on RCU still being active. Instead of using RCU_NONIDLE in the drivers and flipping RCU back on, allow drivers to take over RCU-idle duty. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ulf Hansson Tested-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle.c | 15 ++++++++++----- include/linux/cpuidle.h | 1 + 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 04becd70cc41..6c7e5621cf9a 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -138,6 +138,7 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index) { ktime_t time_start, time_end; + struct cpuidle_state *target_state = &drv->states[index]; time_start = ns_to_ktime(local_clock()); @@ -153,8 +154,9 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv, * suspended is generally unsafe. */ stop_critical_timings(); - rcu_idle_enter(); - drv->states[index].enter_s2idle(dev, drv, index); + if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) + rcu_idle_enter(); + target_state->enter_s2idle(dev, drv, index); if (WARN_ON_ONCE(!irqs_disabled())) local_irq_disable(); /* @@ -162,7 +164,8 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv, * first CPU executing it calls functions containing RCU read-side * critical sections, so tell RCU about that. */ - rcu_idle_exit(); + if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) + rcu_idle_exit(); tick_unfreeze(); start_critical_timings(); @@ -239,9 +242,11 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, time_start = ns_to_ktime(local_clock()); stop_critical_timings(); - rcu_idle_enter(); + if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) + rcu_idle_enter(); entered_state = target_state->enter(dev, drv, index); - rcu_idle_exit(); + if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) + rcu_idle_exit(); start_critical_timings(); sched_clock_idle_wakeup_event(); diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 75895e6363b8..6175c77bf25e 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -82,6 +82,7 @@ struct cpuidle_state { #define CPUIDLE_FLAG_UNUSABLE BIT(3) /* avoid using this state */ #define CPUIDLE_FLAG_OFF BIT(4) /* disable this state by default */ #define CPUIDLE_FLAG_TLB_FLUSHED BIT(5) /* idle-state flushes TLBs */ +#define CPUIDLE_FLAG_RCU_IDLE BIT(6) /* idle-state takes care of RCU */ struct cpuidle_device_kobj; struct cpuidle_state_kobj; From 1fecfdbb7acc6624655450a609221c89b5197a06 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Sep 2020 12:32:01 +0200 Subject: [PATCH 512/648] ACPI: processor: Take over RCU-idle for C3-BM idle The C3 BusMaster idle code takes lock in a number of places, some deep inside the ACPI code. Instead of wrapping it all in RCU_NONIDLE, have the driver take over RCU-idle duty and avoid flipping RCU state back and forth a lot. ( by marking 'C3 && bm_check' as RCU_IDLE, we _must_ call enter_bm() for that combination, otherwise we'll loose RCU-idle, this requires shuffling some code around ) Signed-off-by: Peter Zijlstra (Intel) Tested-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_idle.c | 69 +++++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 20 deletions(-) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 9ec504067f98..7ecb90e90afd 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -558,22 +558,43 @@ static DEFINE_RAW_SPINLOCK(c3_lock); /** * acpi_idle_enter_bm - enters C3 with proper BM handling + * @drv: cpuidle driver * @pr: Target processor * @cx: Target state context + * @index: index of target state */ -static void acpi_idle_enter_bm(struct acpi_processor *pr, - struct acpi_processor_cx *cx) +static int acpi_idle_enter_bm(struct cpuidle_driver *drv, + struct acpi_processor *pr, + struct acpi_processor_cx *cx, + int index) { + static struct acpi_processor_cx safe_cx = { + .entry_method = ACPI_CSTATE_HALT, + }; + /* * disable bus master * bm_check implies we need ARB_DIS * bm_control implies whether we can do ARB_DIS * - * That leaves a case where bm_check is set and bm_control is - * not set. In that case we cannot do much, we enter C3 - * without doing anything. + * That leaves a case where bm_check is set and bm_control is not set. + * In that case we cannot do much, we enter C3 without doing anything. */ - if (pr->flags.bm_control) { + bool dis_bm = pr->flags.bm_control; + + /* If we can skip BM, demote to a safe state. */ + if (!cx->bm_sts_skip && acpi_idle_bm_check()) { + dis_bm = false; + index = drv->safe_state_index; + if (index >= 0) { + cx = this_cpu_read(acpi_cstate[index]); + } else { + cx = &safe_cx; + index = -EBUSY; + } + } + + if (dis_bm) { raw_spin_lock(&c3_lock); c3_cpu_count++; /* Disable bus master arbitration when all CPUs are in C3 */ @@ -582,15 +603,21 @@ static void acpi_idle_enter_bm(struct acpi_processor *pr, raw_spin_unlock(&c3_lock); } + rcu_idle_enter(); + acpi_idle_do_entry(cx); + rcu_idle_exit(); + /* Re-enable bus master arbitration */ - if (pr->flags.bm_control) { + if (dis_bm) { raw_spin_lock(&c3_lock); acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 0); c3_cpu_count--; raw_spin_unlock(&c3_lock); } + + return index; } static int acpi_idle_enter(struct cpuidle_device *dev, @@ -604,20 +631,13 @@ static int acpi_idle_enter(struct cpuidle_device *dev, return -EINVAL; if (cx->type != ACPI_STATE_C1) { + if (cx->type == ACPI_STATE_C3 && pr->flags.bm_check) + return acpi_idle_enter_bm(drv, pr, cx, index); + + /* C2 to C1 demotion. */ if (acpi_idle_fallback_to_c1(pr) && num_online_cpus() > 1) { index = ACPI_IDLE_STATE_START; cx = per_cpu(acpi_cstate[index], dev->cpu); - } else if (cx->type == ACPI_STATE_C3 && pr->flags.bm_check) { - if (cx->bm_sts_skip || !acpi_idle_bm_check()) { - acpi_idle_enter_bm(pr, cx); - return index; - } else if (drv->safe_state_index >= 0) { - index = drv->safe_state_index; - cx = per_cpu(acpi_cstate[index], dev->cpu); - } else { - acpi_safe_halt(); - return -EBUSY; - } } } @@ -641,7 +661,13 @@ static int acpi_idle_enter_s2idle(struct cpuidle_device *dev, return 0; if (pr->flags.bm_check) { - acpi_idle_enter_bm(pr, cx); + u8 bm_sts_skip = cx->bm_sts_skip; + + /* Don't check BM_STS, do an unconditional ARB_DIS for S2IDLE */ + cx->bm_sts_skip = 1; + acpi_idle_enter_bm(drv, pr, cx, index); + cx->bm_sts_skip = bm_sts_skip; + return 0; } else { ACPI_FLUSH_CPU_CACHE(); @@ -674,8 +700,11 @@ static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr, if (lapic_timer_needs_broadcast(pr, cx)) state->flags |= CPUIDLE_FLAG_TIMER_STOP; - if (cx->type == ACPI_STATE_C3) + if (cx->type == ACPI_STATE_C3) { state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; + if (pr->flags.bm_check) + state->flags |= CPUIDLE_FLAG_RCU_IDLE; + } count++; if (count == CPUIDLE_STATE_MAX) From 6279e774b0e4abfab20ff6cfaff99e752a32f27a Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 15 Sep 2020 11:27:41 +0300 Subject: [PATCH 513/648] MAINTAINERS: Fix Max's and Shravan's emails Max's and Shravan's usernames were changed while @mellanox.com emails were transferred to be @nvidia.com. Fixes: f6da70d99c96 ("MAINTAINERS: Update Mellanox and Cumulus Network addresses to new domain") Signed-off-by: Leon Romanovsky Signed-off-by: Linus Torvalds --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0d0862b19ce5..d746519253c3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6180,7 +6180,7 @@ F: Documentation/devicetree/bindings/edac/aspeed-sdram-edac.txt F: drivers/edac/aspeed_edac.c EDAC-BLUEFIELD -M: Shravan Kumar Ramani +M: Shravan Kumar Ramani S: Supported F: drivers/edac/bluefield_edac.c @@ -9251,7 +9251,7 @@ F: drivers/firmware/iscsi_ibft* ISCSI EXTENSIONS FOR RDMA (ISER) INITIATOR M: Sagi Grimberg -M: Max Gurtovoy +M: Max Gurtovoy L: linux-rdma@vger.kernel.org S: Supported W: http://www.openfabrics.org From b959b97860d0fee8c8f6a3e641d3c2ad76eab6be Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Wed, 16 Sep 2020 15:54:37 +0200 Subject: [PATCH 514/648] MIPS: SNI: Fix spurious interrupts On A20R machines the interrupt pending bits in cause register need to be updated by requesting the chipset to do it. This needs to be done to find the interrupt cause and after interrupt service. In commit 0b888c7f3a03 ("MIPS: SNI: Convert to new irq_chip functions") the function to do after service update got lost, which caused spurious interrupts. Fixes: 0b888c7f3a03 ("MIPS: SNI: Convert to new irq_chip functions") Signed-off-by: Thomas Bogendoerfer --- arch/mips/sni/a20r.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/mips/sni/a20r.c b/arch/mips/sni/a20r.c index b09dc844985a..eeeec18c420a 100644 --- a/arch/mips/sni/a20r.c +++ b/arch/mips/sni/a20r.c @@ -143,7 +143,10 @@ static struct platform_device sc26xx_pdev = { }, }; -static u32 a20r_ack_hwint(void) +/* + * Trigger chipset to update CPU's CAUSE IP field + */ +static u32 a20r_update_cause_ip(void) { u32 status = read_c0_status(); @@ -205,12 +208,14 @@ static void a20r_hwint(void) int irq; clear_c0_status(IE_IRQ0); - status = a20r_ack_hwint(); + status = a20r_update_cause_ip(); cause = read_c0_cause(); irq = ffs(((cause & status) >> 8) & 0xf8); if (likely(irq > 0)) do_IRQ(SNI_A20R_IRQ_BASE + irq - 1); + + a20r_update_cause_ip(); set_c0_status(IE_IRQ0); } From 855f3e08d5565811ab109d9c0f64ee11c24feabf Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Wed, 19 Aug 2020 10:58:29 +0800 Subject: [PATCH 515/648] drm/mediatek: Remove duplicated include Remove mtk_drm_ddp.h which is included more than once Fixes: 9aef5867c86c ("drm/mediatek: drop use of drmP.h") Reported-by: Hulk Robot Signed-off-by: Wang Hai Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 040a8f393fe2..a34fd805ce6d 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -27,7 +27,6 @@ #include "mtk_drm_crtc.h" #include "mtk_drm_ddp.h" -#include "mtk_drm_ddp.h" #include "mtk_drm_ddp_comp.h" #include "mtk_drm_drv.h" #include "mtk_drm_gem.h" From f85acdad07fe36b91f2244263a890bf372528326 Mon Sep 17 00:00:00 2001 From: Chun-Kuang Hu Date: Tue, 1 Sep 2020 07:39:52 +0800 Subject: [PATCH 516/648] drm/mediatek: Use CPU when fail to get cmdq event Even though cmdq client is created successfully, without the cmdq event, cmdq could not work correctly, so use CPU when fail to get cmdq event. Fixes: 60fa8c13ab1a ("drm/mediatek: Move gce event property to mutex device node") Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 3fc5511330b9..4d29568be3f5 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -831,13 +831,19 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, drm_crtc_index(&mtk_crtc->base)); mtk_crtc->cmdq_client = NULL; } - ret = of_property_read_u32_index(priv->mutex_node, - "mediatek,gce-events", - drm_crtc_index(&mtk_crtc->base), - &mtk_crtc->cmdq_event); - if (ret) - dev_dbg(dev, "mtk_crtc %d failed to get mediatek,gce-events property\n", - drm_crtc_index(&mtk_crtc->base)); + + if (mtk_crtc->cmdq_client) { + ret = of_property_read_u32_index(priv->mutex_node, + "mediatek,gce-events", + drm_crtc_index(&mtk_crtc->base), + &mtk_crtc->cmdq_event); + if (ret) { + dev_dbg(dev, "mtk_crtc %d failed to get mediatek,gce-events property\n", + drm_crtc_index(&mtk_crtc->base)); + cmdq_mbox_destroy(mtk_crtc->cmdq_client); + mtk_crtc->cmdq_client = NULL; + } + } #endif return 0; } From d494c257271153633a05c11e6dec85ddfc7700ee Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Sat, 5 Sep 2020 16:30:58 +0800 Subject: [PATCH 517/648] drm/mediatek: Add missing put_device() call in mtk_ddp_comp_init() if of_find_device_by_node() succeed, mtk_ddp_comp_init() doesn't have a corresponding put_device(). Thus add put_device() to fix the exception handling for this function implementation. Fixes: d0afe37f5209 ("drm/mediatek: support CMDQ interface in ddp component") Signed-off-by: Yu Kuai Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c index 57c88de9a329..526648885b97 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c @@ -496,6 +496,7 @@ int mtk_ddp_comp_init(struct device *dev, struct device_node *node, #if IS_REACHABLE(CONFIG_MTK_CMDQ) if (of_address_to_resource(node, 0, &res) != 0) { dev_err(dev, "Missing reg in %s node\n", node->full_name); + put_device(&larb_pdev->dev); return -EINVAL; } comp->regs_pa = res.start; From 64c194c00789889b0f9454f583712f079ba414ee Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 9 Sep 2020 16:49:42 +0800 Subject: [PATCH 518/648] drm/mediatek: Add exception handing in mtk_drm_probe() if component init fail mtk_ddp_comp_init() is called in a loop in mtk_drm_probe(), if it fail, previous successive init component is not proccessed. Thus uninitialize valid component and put their device if component init failed. Fixes: 119f5173628a ("drm/mediatek: Add DRM Driver for Mediatek SoC MT8173.") Signed-off-by: Yu Kuai Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index a34fd805ce6d..215f3f7979e0 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -543,8 +543,13 @@ static int mtk_drm_probe(struct platform_device *pdev) pm_runtime_disable(dev); err_node: of_node_put(private->mutex_node); - for (i = 0; i < DDP_COMPONENT_ID_MAX; i++) + for (i = 0; i < DDP_COMPONENT_ID_MAX; i++) { of_node_put(private->comp_node[i]); + if (private->ddp_comp[i]) { + put_device(private->ddp_comp[i]->larb_dev); + private->ddp_comp[i] = NULL; + } + } return ret; } From 2132940f2192824acf160d115192755f7c58a847 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 11 Sep 2020 19:21:19 +0800 Subject: [PATCH 519/648] drm/mediatek: Add missing put_device() call in mtk_drm_kms_init() if of_find_device_by_node() succeed, mtk_drm_kms_init() doesn't have a corresponding put_device(). Thus add jump target to fix the exception handling for this function implementation. Fixes: 119f5173628a ("drm/mediatek: Add DRM Driver for Mediatek SoC MT8173.") Signed-off-by: Yu Kuai Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 215f3f7979e0..2d982740b1a4 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -164,7 +164,7 @@ static int mtk_drm_kms_init(struct drm_device *drm) ret = drmm_mode_config_init(drm); if (ret) - return ret; + goto put_mutex_dev; drm->mode_config.min_width = 64; drm->mode_config.min_height = 64; @@ -181,7 +181,7 @@ static int mtk_drm_kms_init(struct drm_device *drm) ret = component_bind_all(drm->dev, drm); if (ret) - return ret; + goto put_mutex_dev; /* * We currently support two fixed data streams, each optional, @@ -228,7 +228,7 @@ static int mtk_drm_kms_init(struct drm_device *drm) } if (!dma_dev->dma_parms) { ret = -ENOMEM; - goto err_component_unbind; + goto put_dma_dev; } ret = dma_set_max_seg_size(dma_dev, (unsigned int)DMA_BIT_MASK(32)); @@ -255,9 +255,12 @@ static int mtk_drm_kms_init(struct drm_device *drm) err_unset_dma_parms: if (private->dma_parms_allocated) dma_dev->dma_parms = NULL; +put_dma_dev: + put_device(private->dma_dev); err_component_unbind: component_unbind_all(drm->dev, drm); - +put_mutex_dev: + put_device(private->mutex_dev); return ret; } From 0680a622318b8d657323b94082f4b9a44038dfee Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 11 Sep 2020 19:21:51 +0800 Subject: [PATCH 520/648] drm/mediatek: Add missing put_device() call in mtk_hdmi_dt_parse_pdata() if of_find_device_by_node() succeed, mtk_drm_kms_init() doesn't have a corresponding put_device(). Thus add jump target to fix the exception handling for this function implementation. Fixes: 8f83f26891e1 ("drm/mediatek: Add HDMI support") Signed-off-by: Yu Kuai Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index f2e9b429960b..a97725680d4e 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1507,25 +1507,30 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi, dev_err(dev, "Failed to get system configuration registers: %d\n", ret); - return ret; + goto put_device; } hdmi->sys_regmap = regmap; mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); hdmi->regs = devm_ioremap_resource(dev, mem); - if (IS_ERR(hdmi->regs)) - return PTR_ERR(hdmi->regs); + if (IS_ERR(hdmi->regs)) { + ret = PTR_ERR(hdmi->regs); + goto put_device; + } remote = of_graph_get_remote_node(np, 1, 0); - if (!remote) - return -EINVAL; + if (!remote) { + ret = -EINVAL; + goto put_device; + } if (!of_device_is_compatible(remote, "hdmi-connector")) { hdmi->next_bridge = of_drm_find_bridge(remote); if (!hdmi->next_bridge) { dev_err(dev, "Waiting for external bridge\n"); of_node_put(remote); - return -EPROBE_DEFER; + ret = -EPROBE_DEFER; + goto put_device; } } @@ -1534,7 +1539,8 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi, dev_err(dev, "Failed to find ddc-i2c-bus node in %pOF\n", remote); of_node_put(remote); - return -EINVAL; + ret = -EINVAL; + goto put_device; } of_node_put(remote); @@ -1542,10 +1548,14 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi, of_node_put(i2c_np); if (!hdmi->ddc_adpt) { dev_err(dev, "Failed to get ddc i2c adapter by node\n"); - return -EINVAL; + ret = -EINVAL; + goto put_device; } return 0; +put_device: + put_device(hdmi->cec_dev); + return ret; } /* From ffbc3dd1975f1e2dac7b1752aa8b5cac3cd5b459 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 16 Sep 2020 23:18:43 +0300 Subject: [PATCH 521/648] fs: fix cast in fsparam_u32hex() macro Signed-off-by: Alexey Dobriyan Signed-off-by: Al Viro --- include/linux/fs_parser.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h index 2eab6d5f6736..aab0ffc6bac6 100644 --- a/include/linux/fs_parser.h +++ b/include/linux/fs_parser.h @@ -120,7 +120,7 @@ static inline bool fs_validate_description(const char *name, #define fsparam_u32oct(NAME, OPT) \ __fsparam(fs_param_is_u32, NAME, OPT, 0, (void *)8) #define fsparam_u32hex(NAME, OPT) \ - __fsparam(fs_param_is_u32_hex, NAME, OPT, 0, (void *16)) + __fsparam(fs_param_is_u32_hex, NAME, OPT, 0, (void *)16) #define fsparam_s32(NAME, OPT) __fsparam(fs_param_is_s32, NAME, OPT, 0, NULL) #define fsparam_u64(NAME, OPT) __fsparam(fs_param_is_u64, NAME, OPT, 0, NULL) #define fsparam_enum(NAME, OPT, array) __fsparam(fs_param_is_enum, NAME, OPT, 0, array) From 78edc005f477a4987ee0a5d96bfe117295c231fd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 25 Aug 2020 08:09:40 -0700 Subject: [PATCH 522/648] rcu-tasks: Prevent complaints of unused show_rcu_tasks_classic_gp_kthread() Commit 8344496e8b49 ("rcu-tasks: Conditionally compile show_rcu_tasks_gp_kthreads()") introduced conditional compilation of several functions, but forgot one occurrence of show_rcu_tasks_classic_gp_kthread() that causes the compiler to warn of an unused static function. This commit uses "static inline" to avoid these complaints and possibly also to avoid emitting an actual definition of this function. Fixes: 8344496e8b49 ("rcu-tasks: Conditionally compile show_rcu_tasks_gp_kthreads()") Cc: # 5.8.x Reported-by: Laurent Pinchart Signed-off-by: Paul E. McKenney --- kernel/rcu/tasks.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 835e2df8590a..05d3e1375e4c 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -590,7 +590,7 @@ void exit_tasks_rcu_finish(void) __releases(&tasks_rcu_exit_srcu) } #else /* #ifdef CONFIG_TASKS_RCU */ -static void show_rcu_tasks_classic_gp_kthread(void) { } +static inline void show_rcu_tasks_classic_gp_kthread(void) { } void exit_tasks_rcu_start(void) { } void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); } #endif /* #else #ifdef CONFIG_TASKS_RCU */ From 5f1ab0f493f81517a7b47d859cd5d32e6af1eb9f Mon Sep 17 00:00:00 2001 From: Lu Wei Date: Wed, 16 Sep 2020 10:04:28 +0800 Subject: [PATCH 523/648] net: hns: kerneldoc fixes Fix some parameter description or spelling mistakes. Signed-off-by: Lu Wei Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns/hns_ethtool.c | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 4eb50296f653..14e60c9e491d 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -463,8 +463,8 @@ static int __lb_clean_rings(struct hns_nic_priv *priv, /** * nic_run_loopback_test - run loopback test - * @nic_dev: net device - * @loopback_type: loopback type + * @ndev: net device + * @loop_mode: loopback mode */ static int __lb_run_test(struct net_device *ndev, enum hnae_loop loop_mode) @@ -572,7 +572,7 @@ static int __lb_down(struct net_device *ndev, enum hnae_loop loop) /** * hns_nic_self_test - self test - * @dev: net device + * @ndev: net device * @eth_test: test cmd * @data: test result */ @@ -633,7 +633,7 @@ static void hns_nic_self_test(struct net_device *ndev, /** * hns_nic_get_drvinfo - get net driver info - * @dev: net device + * @net_dev: net device * @drvinfo: driver info */ static void hns_nic_get_drvinfo(struct net_device *net_dev, @@ -658,7 +658,7 @@ static void hns_nic_get_drvinfo(struct net_device *net_dev, /** * hns_get_ringparam - get ring parameter - * @dev: net device + * @net_dev: net device * @param: ethtool parameter */ static void hns_get_ringparam(struct net_device *net_dev, @@ -683,7 +683,7 @@ static void hns_get_ringparam(struct net_device *net_dev, /** * hns_get_pauseparam - get pause parameter - * @dev: net device + * @net_dev: net device * @param: pause parameter */ static void hns_get_pauseparam(struct net_device *net_dev, @@ -701,7 +701,7 @@ static void hns_get_pauseparam(struct net_device *net_dev, /** * hns_set_pauseparam - set pause parameter - * @dev: net device + * @net_dev: net device * @param: pause parameter * * Return 0 on success, negative on failure @@ -725,7 +725,7 @@ static int hns_set_pauseparam(struct net_device *net_dev, /** * hns_get_coalesce - get coalesce info. - * @dev: net device + * @net_dev: net device * @ec: coalesce info. * * Return 0 on success, negative on failure. @@ -769,7 +769,7 @@ static int hns_get_coalesce(struct net_device *net_dev, /** * hns_set_coalesce - set coalesce info. - * @dev: net device + * @net_dev: net device * @ec: coalesce info. * * Return 0 on success, negative on failure. @@ -808,7 +808,7 @@ static int hns_set_coalesce(struct net_device *net_dev, /** * hns_get_channels - get channel info. - * @dev: net device + * @net_dev: net device * @ch: channel info. */ static void @@ -825,7 +825,7 @@ hns_get_channels(struct net_device *net_dev, struct ethtool_channels *ch) /** * get_ethtool_stats - get detail statistics. - * @dev: net device + * @netdev: net device * @stats: statistics info. * @data: statistics data. */ @@ -883,8 +883,8 @@ static void hns_get_ethtool_stats(struct net_device *netdev, /** * get_strings: Return a set of strings that describe the requested objects - * @dev: net device - * @stats: string set ID. + * @netdev: net device + * @stringset: string set ID. * @data: objects data. */ static void hns_get_strings(struct net_device *netdev, u32 stringset, u8 *data) @@ -972,7 +972,7 @@ static void hns_get_strings(struct net_device *netdev, u32 stringset, u8 *data) /** * nic_get_sset_count - get string set count witch returned by nic_get_strings. - * @dev: net device + * @netdev: net device * @stringset: string set index, 0: self test string; 1: statistics string. * * Return string set count. @@ -1006,7 +1006,7 @@ static int hns_get_sset_count(struct net_device *netdev, int stringset) /** * hns_phy_led_set - set phy LED status. - * @dev: net device + * @netdev: net device * @value: LED state. * * Return 0 on success, negative on failure. @@ -1028,7 +1028,7 @@ static int hns_phy_led_set(struct net_device *netdev, int value) /** * nic_set_phys_id - set phy identify LED. - * @dev: net device + * @netdev: net device * @state: LED state. * * Return 0 on success, negative on failure. @@ -1104,9 +1104,9 @@ hns_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) /** * hns_get_regs - get net device register - * @dev: net device + * @net_dev: net device * @cmd: ethtool cmd - * @date: register data + * @data: register data */ static void hns_get_regs(struct net_device *net_dev, struct ethtool_regs *cmd, void *data) @@ -1126,7 +1126,7 @@ static void hns_get_regs(struct net_device *net_dev, struct ethtool_regs *cmd, /** * nic_get_regs_len - get total register len. - * @dev: net device + * @net_dev: net device * * Return total register len. */ @@ -1151,7 +1151,7 @@ static int hns_get_regs_len(struct net_device *net_dev) /** * hns_nic_nway_reset - nway reset - * @dev: net device + * @netdev: net device * * Return 0 on success, negative on failure */ From 34beb21594519ce64a55a498c2fe7d567bc1ca20 Mon Sep 17 00:00:00 2001 From: Mark Gray Date: Wed, 16 Sep 2020 05:19:35 -0400 Subject: [PATCH 524/648] geneve: add transport ports in route lookup for geneve This patch adds transport ports information for route lookup so that IPsec can select Geneve tunnel traffic to do encryption. This is needed for OVS/OVN IPsec with encrypted Geneve tunnels. This can be tested by configuring a host-host VPN using an IKE daemon and specifying port numbers. For example, for an Openswan-type configuration, the following parameters should be configured on both hosts and IPsec set up as-per normal: $ cat /etc/ipsec.conf conn in ... left=$IP1 right=$IP2 ... leftprotoport=udp/6081 rightprotoport=udp ... conn out ... left=$IP1 right=$IP2 ... leftprotoport=udp rightprotoport=udp/6081 ... The tunnel can then be setup using "ip" on both hosts (but changing the relevant IP addresses): $ ip link add tun type geneve id 1000 remote $IP2 $ ip addr add 192.168.0.1/24 dev tun $ ip link set tun up This can then be tested by pinging from $IP1: $ ping 192.168.0.2 Without this patch the traffic is unencrypted on the wire. Fixes: 2d07dc79fe04 ("geneve: add initial netdev driver for GENEVE tunnels") Signed-off-by: Qiuyu Xiao Signed-off-by: Mark Gray Reviewed-by: Greg Rose Signed-off-by: David S. Miller --- drivers/net/geneve.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index c71f994fbc73..974a244f45ba 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -777,7 +777,8 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, struct net_device *dev, struct geneve_sock *gs4, struct flowi4 *fl4, - const struct ip_tunnel_info *info) + const struct ip_tunnel_info *info, + __be16 dport, __be16 sport) { bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct geneve_dev *geneve = netdev_priv(dev); @@ -793,6 +794,8 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, fl4->flowi4_proto = IPPROTO_UDP; fl4->daddr = info->key.u.ipv4.dst; fl4->saddr = info->key.u.ipv4.src; + fl4->fl4_dport = dport; + fl4->fl4_sport = sport; tos = info->key.tos; if ((tos == 1) && !geneve->cfg.collect_md) { @@ -827,7 +830,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, struct net_device *dev, struct geneve_sock *gs6, struct flowi6 *fl6, - const struct ip_tunnel_info *info) + const struct ip_tunnel_info *info, + __be16 dport, __be16 sport) { bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct geneve_dev *geneve = netdev_priv(dev); @@ -843,6 +847,9 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, fl6->flowi6_proto = IPPROTO_UDP; fl6->daddr = info->key.u.ipv6.dst; fl6->saddr = info->key.u.ipv6.src; + fl6->fl6_dport = dport; + fl6->fl6_sport = sport; + prio = info->key.tos; if ((prio == 1) && !geneve->cfg.collect_md) { prio = ip_tunnel_get_dsfield(ip_hdr(skb), skb); @@ -889,7 +896,9 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info); + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); + rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info, + geneve->cfg.info.key.tp_dst, sport); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -919,7 +928,6 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, return -EMSGSIZE; } - sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); if (geneve->cfg.collect_md) { tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); ttl = key->ttl; @@ -974,7 +982,9 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info); + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); + dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info, + geneve->cfg.info.key.tp_dst, sport); if (IS_ERR(dst)) return PTR_ERR(dst); @@ -1003,7 +1013,6 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, return -EMSGSIZE; } - sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); if (geneve->cfg.collect_md) { prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); ttl = key->ttl; @@ -1085,13 +1094,18 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) { struct ip_tunnel_info *info = skb_tunnel_info(skb); struct geneve_dev *geneve = netdev_priv(dev); + __be16 sport; if (ip_tunnel_info_af(info) == AF_INET) { struct rtable *rt; struct flowi4 fl4; - struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); - rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info); + struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); + sport = udp_flow_src_port(geneve->net, skb, + 1, USHRT_MAX, true); + + rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info, + geneve->cfg.info.key.tp_dst, sport); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -1101,9 +1115,13 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) } else if (ip_tunnel_info_af(info) == AF_INET6) { struct dst_entry *dst; struct flowi6 fl6; - struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); - dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info); + struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); + sport = udp_flow_src_port(geneve->net, skb, + 1, USHRT_MAX, true); + + dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info, + geneve->cfg.info.key.tp_dst, sport); if (IS_ERR(dst)) return PTR_ERR(dst); @@ -1114,8 +1132,7 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) return -EINVAL; } - info->key.tp_src = udp_flow_src_port(geneve->net, skb, - 1, USHRT_MAX, true); + info->key.tp_src = sport; info->key.tp_dst = geneve->cfg.info.key.tp_dst; return 0; } From fd944dc24336922656a48f4608bfb41abdcdc4aa Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Wed, 16 Sep 2020 12:08:39 +0200 Subject: [PATCH 525/648] net: dsa: microchip: ksz8795: really set the correct number of ports The KSZ9477 and KSZ8795 use the port_cnt field differently: For the KSZ9477, it includes the CPU port(s), while for the KSZ8795, it doesn't. It would be a good cleanup to make the handling of both drivers match, but as a first step, fix the recently broken assignment of num_ports in the KSZ8795 driver (which completely broke probing, as the CPU port index was always failing the num_ports check). Fixes: af199a1a9cb0 ("net: dsa: microchip: set the correct number of ports") Signed-off-by: Matthias Schiffer Reviewed-by: Codrin Ciubotariu Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz8795.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index f7d59d7b2cbe..f5779e152377 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -1260,7 +1260,7 @@ static int ksz8795_switch_init(struct ksz_device *dev) } /* set the real number of ports */ - dev->ds->num_ports = dev->port_cnt; + dev->ds->num_ports = dev->port_cnt + 1; return 0; } From e60c27f1ffc733e729319662f75419f4d4fb6a80 Mon Sep 17 00:00:00 2001 From: Jiansong Chen Date: Wed, 16 Sep 2020 19:17:20 +0800 Subject: [PATCH 526/648] drm/amdgpu: declare ta firmware for navy_flounder The firmware provided via MODULE_FIRMWARE appears in the module information. External tools(eg. dracut) may use the list of fw files to include them as appropriate in an initramfs, thus missing declaration will lead to request firmware failure in boot time. Signed-off-by: Jiansong Chen Reviewed-by: Tianci Yin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index e16874f30d5d..6c5d9612abcb 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -58,7 +58,7 @@ MODULE_FIRMWARE("amdgpu/arcturus_ta.bin"); MODULE_FIRMWARE("amdgpu/sienna_cichlid_sos.bin"); MODULE_FIRMWARE("amdgpu/sienna_cichlid_ta.bin"); MODULE_FIRMWARE("amdgpu/navy_flounder_sos.bin"); -MODULE_FIRMWARE("amdgpu/navy_flounder_asd.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_ta.bin"); /* address block */ #define smnMP1_FIRMWARE_FLAGS 0x3010024 From 875d369d8f75275d30e59421602d9366426abff7 Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha Date: Tue, 15 Sep 2020 17:26:29 -0400 Subject: [PATCH 527/648] drm/amd/display: Don't log hdcp module warnings in dmesg [Why] DTM topology updates happens by default now. This results in DTM warnings when hdcp is not even being enabled. This spams the dmesg and doesn't effect normal display functionality so it is better to log it using DRM_DEBUG_KMS() [How] Change the DRM_WARN() to DRM_DEBUG_KMS() Signed-off-by: Bhawanpreet Lakha Acked-by: Alex Deucher Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h index d3192b9d0c3d..47f8ee2832ff 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h @@ -27,7 +27,7 @@ #define MOD_HDCP_LOG_H_ #ifdef CONFIG_DRM_AMD_DC_HDCP -#define HDCP_LOG_ERR(hdcp, ...) DRM_WARN(__VA_ARGS__) +#define HDCP_LOG_ERR(hdcp, ...) DRM_DEBUG_KMS(__VA_ARGS__) #define HDCP_LOG_VER(hdcp, ...) DRM_DEBUG_KMS(__VA_ARGS__) #define HDCP_LOG_FSM(hdcp, ...) DRM_DEBUG_KMS(__VA_ARGS__) #define HDCP_LOG_TOP(hdcp, ...) pr_debug("[HDCP_TOP]:"__VA_ARGS__) From 29231826f3bd65500118c473fccf31c0cf14dbc0 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Wed, 16 Sep 2020 18:18:25 +0100 Subject: [PATCH 528/648] ehci-hcd: Move include to keep CRC stable The CRC calculation done by genksyms is triggered when the parser hits EXPORT_SYMBOL*() macros. At this point, genksyms recursively expands the types of the function parameters, and uses that as the input for the CRC calculation. In the case of forward-declared structs, the type expands to 'UNKNOWN'. Following this, it appears that the result of the expansion of each type is cached somewhere, and seems to be re-used when/if the same type is seen again for another exported symbol in the same C file. Unfortunately, this can cause CRC 'stability' issues when a struct definition becomes visible in the middle of a C file. For example, let's assume code with the following pattern: struct foo; int bar(struct foo *arg) { /* Do work ... */ } EXPORT_SYMBOL_GPL(bar); /* This contains struct foo's definition */ #include "foo.h" int baz(struct foo *arg) { /* Do more work ... */ } EXPORT_SYMBOL_GPL(baz); Here, baz's CRC will be computed using the expansion of struct foo that was cached after bar's CRC calculation ('UNKOWN' here). But if EXPORT_SYMBOL_GPL(bar) is removed from the file (because of e.g. symbol trimming using CONFIG_TRIM_UNUSED_KSYMS), struct foo will be expanded late, during baz's CRC calculation, which now has visibility over the full struct definition, hence resulting in a different CRC for baz. The proper fix for this certainly is in genksyms, but that will take me some time to get right. In the meantime, we have seen one occurrence of this in the ehci-hcd code which hits this problem because of the way it includes C files halfway through the code together with an unlucky mix of symbol trimming. In order to workaround this, move the include done in ehci-hub.c early in ehci-hcd.c, hence making sure the struct definitions are visible to the entire file. This improves CRC stability of the ehci-hcd exports even when symbol trimming is enabled. Acked-by: Alan Stern Cc: stable Signed-off-by: Quentin Perret Link: https://lore.kernel.org/r/20200916171825.3228122-1-qperret@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-hcd.c | 1 + drivers/usb/host/ehci-hub.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index 6257be4110ca..3575b7201881 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c index ce0eaf7d7c12..087402aec5cb 100644 --- a/drivers/usb/host/ehci-hub.c +++ b/drivers/usb/host/ehci-hub.c @@ -14,7 +14,6 @@ */ /*-------------------------------------------------------------------------*/ -#include #define PORT_WAKE_BITS (PORT_WKOC_E|PORT_WKDISC_E|PORT_WKCONN_E) From 52a3974feb1a3eec25d8836d37a508b67b0a9cd0 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 15 Sep 2020 20:53:25 -0700 Subject: [PATCH 529/648] nvme-core: get/put ctrl and transport module in nvme_dev_open/release() Get and put the reference to the ctrl in the nvme_dev_open() and nvme_dev_release() before and after module get/put for ctrl in char device file operations. Introduce char_dev relase function, get/put the controller and module which allows us to fix the potential Oops which can be easily reproduced with a passthru ctrl (although the problem also exists with pure user access): Entering kdb (current=0xffff8887f8290000, pid 3128) on processor 30 Oops: (null) due to oops @ 0xffffffffa01019ad CPU: 30 PID: 3128 Comm: bash Tainted: G W OE 5.8.0-rc4nvme-5.9+ #35 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.4 RIP: 0010:nvme_free_ctrl+0x234/0x285 [nvme_core] Code: 57 10 a0 e8 73 bf 02 e1 ba 3d 11 00 00 48 c7 c6 98 33 10 a0 48 c7 c7 1d 57 10 a0 e8 5b bf 02 e1 8 RSP: 0018:ffffc90001d63de0 EFLAGS: 00010246 RAX: ffffffffa05c0440 RBX: ffff8888119e45a0 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff8888177e9550 RDI: ffff8888119e43b0 RBP: ffff8887d4768000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: ffffc90001d63c90 R12: ffff8888119e43b0 R13: ffff8888119e5108 R14: dead000000000100 R15: ffff8888119e5108 FS: 00007f1ef27b0740(0000) GS:ffff888817600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffa05c0470 CR3: 00000007f6bee000 CR4: 00000000003406e0 Call Trace: device_release+0x27/0x80 kobject_put+0x98/0x170 nvmet_passthru_ctrl_disable+0x4a/0x70 [nvmet] nvmet_passthru_enable_store+0x4c/0x90 [nvmet] configfs_write_file+0xe6/0x150 vfs_write+0xba/0x1e0 ksys_write+0x5f/0xe0 do_syscall_64+0x52/0xb0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f1ef1eb2840 Code: Bad RIP value. RSP: 002b:00007fffdbff0eb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f1ef1eb2840 RDX: 0000000000000002 RSI: 00007f1ef27d2000 RDI: 0000000000000001 RBP: 00007f1ef27d2000 R08: 000000000000000a R09: 00007f1ef27b0740 R10: 0000000000000001 R11: 0000000000000246 R12: 00007f1ef2186400 R13: 0000000000000002 R14: 0000000000000001 R15: 0000000000000000 With this patch fix we take the module ref count in nvme_dev_open() and release that ref count in newly introduced nvme_dev_release(). Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8b75f6ca0b61..c013eb52fdc8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3261,10 +3261,24 @@ static int nvme_dev_open(struct inode *inode, struct file *file) return -EWOULDBLOCK; } + nvme_get_ctrl(ctrl); + if (!try_module_get(ctrl->ops->module)) + return -EINVAL; + file->private_data = ctrl; return 0; } +static int nvme_dev_release(struct inode *inode, struct file *file) +{ + struct nvme_ctrl *ctrl = + container_of(inode->i_cdev, struct nvme_ctrl, cdev); + + module_put(ctrl->ops->module); + nvme_put_ctrl(ctrl); + return 0; +} + static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) { struct nvme_ns *ns; @@ -3327,6 +3341,7 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd, static const struct file_operations nvme_dev_fops = { .owner = THIS_MODULE, .open = nvme_dev_open, + .release = nvme_dev_release, .unlocked_ioctl = nvme_dev_ioctl, .compat_ioctl = compat_ptr_ioctl, }; From 3a6b076168e20a50289d71f601f1dd02be0f8e88 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Sep 2020 18:11:01 -0700 Subject: [PATCH 530/648] nvmet: get transport reference for passthru ctrl Grab a reference to the transport driver to ensure it can't be unloaded while a passthrough controller is active. Fixes: c1fef73f793b ("nvmet: add passthru code to process commands") Reported-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe --- drivers/nvme/target/passthru.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 8bd7f656e240..dacfa7435d0b 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -517,6 +517,7 @@ int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys) subsys->ver = NVME_VS(1, 2, 1); } + __module_get(subsys->passthru_ctrl->ops->module); mutex_unlock(&subsys->lock); return 0; @@ -531,6 +532,7 @@ static void __nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys) { if (subsys->passthru_ctrl) { xa_erase(&passthru_subsystems, subsys->passthru_ctrl->cntlid); + module_put(subsys->passthru_ctrl->ops->module); nvme_put_ctrl(subsys->passthru_ctrl); } subsys->passthru_ctrl = NULL; From 32f6865c7aa3c422f710903baa6eb81abc6f559b Mon Sep 17 00:00:00 2001 From: Ilias Apalodimas Date: Thu, 17 Sep 2020 11:49:25 +0300 Subject: [PATCH 531/648] arm64: bpf: Fix branch offset in JIT Running the eBPF test_verifier leads to random errors looking like this: [ 6525.735488] Unexpected kernel BRK exception at EL1 [ 6525.735502] Internal error: ptrace BRK handler: f2000100 [#1] SMP [ 6525.741609] Modules linked in: nls_utf8 cifs libdes libarc4 dns_resolver fscache binfmt_misc nls_ascii nls_cp437 vfat fat aes_ce_blk crypto_simd cryptd aes_ce_cipher ghash_ce gf128mul efi_pstore sha2_ce sha256_arm64 sha1_ce evdev efivars efivarfs ip_tables x_tables autofs4 btrfs blake2b_generic xor xor_neon zstd_compress raid6_pq libcrc32c crc32c_generic ahci xhci_pci libahci xhci_hcd igb libata i2c_algo_bit nvme realtek usbcore nvme_core scsi_mod t10_pi netsec mdio_devres of_mdio gpio_keys fixed_phy libphy gpio_mb86s7x [ 6525.787760] CPU: 3 PID: 7881 Comm: test_verifier Tainted: G W 5.9.0-rc1+ #47 [ 6525.796111] Hardware name: Socionext SynQuacer E-series DeveloperBox, BIOS build #1 Jun 6 2020 [ 6525.804812] pstate: 20000005 (nzCv daif -PAN -UAO BTYPE=--) [ 6525.810390] pc : bpf_prog_c3d01833289b6311_F+0xc8/0x9f4 [ 6525.815613] lr : bpf_prog_d53bb52e3f4483f9_F+0x38/0xc8c [ 6525.820832] sp : ffff8000130cbb80 [ 6525.824141] x29: ffff8000130cbbb0 x28: 0000000000000000 [ 6525.829451] x27: 000005ef6fcbf39b x26: 0000000000000000 [ 6525.834759] x25: ffff8000130cbb80 x24: ffff800011dc7038 [ 6525.840067] x23: ffff8000130cbd00 x22: ffff0008f624d080 [ 6525.845375] x21: 0000000000000001 x20: ffff800011dc7000 [ 6525.850682] x19: 0000000000000000 x18: 0000000000000000 [ 6525.855990] x17: 0000000000000000 x16: 0000000000000000 [ 6525.861298] x15: 0000000000000000 x14: 0000000000000000 [ 6525.866606] x13: 0000000000000000 x12: 0000000000000000 [ 6525.871913] x11: 0000000000000001 x10: ffff8000000a660c [ 6525.877220] x9 : ffff800010951810 x8 : ffff8000130cbc38 [ 6525.882528] x7 : 0000000000000000 x6 : 0000009864cfa881 [ 6525.887836] x5 : 00ffffffffffffff x4 : 002880ba1a0b3e9f [ 6525.893144] x3 : 0000000000000018 x2 : ffff8000000a4374 [ 6525.898452] x1 : 000000000000000a x0 : 0000000000000009 [ 6525.903760] Call trace: [ 6525.906202] bpf_prog_c3d01833289b6311_F+0xc8/0x9f4 [ 6525.911076] bpf_prog_d53bb52e3f4483f9_F+0x38/0xc8c [ 6525.915957] bpf_dispatcher_xdp_func+0x14/0x20 [ 6525.920398] bpf_test_run+0x70/0x1b0 [ 6525.923969] bpf_prog_test_run_xdp+0xec/0x190 [ 6525.928326] __do_sys_bpf+0xc88/0x1b28 [ 6525.932072] __arm64_sys_bpf+0x24/0x30 [ 6525.935820] el0_svc_common.constprop.0+0x70/0x168 [ 6525.940607] do_el0_svc+0x28/0x88 [ 6525.943920] el0_sync_handler+0x88/0x190 [ 6525.947838] el0_sync+0x140/0x180 [ 6525.951154] Code: d4202000 d4202000 d4202000 d4202000 (d4202000) [ 6525.957249] ---[ end trace cecc3f93b14927e2 ]--- The reason is the offset[] creation and later usage, while building the eBPF body. The code currently omits the first instruction, since build_insn() will increase our ctx->idx before saving it. That was fine up until bounded eBPF loops were introduced. After that introduction, offset[0] must be the offset of the end of prologue which is the start of the 1st insn while, offset[n] holds the offset of the end of n-th insn. When "taken loop with back jump to 1st insn" test runs, it will eventually call bpf2a64_offset(-1, 2, ctx). Since negative indexing is permitted, the current outcome depends on the value stored in ctx->offset[-1], which has nothing to do with our array. If the value happens to be 0 the tests will work. If not this error triggers. commit 7c2e988f400e ("bpf: fix x64 JIT code generation for jmp to 1st insn") fixed an indentical bug on x86 when eBPF bounded loops were introduced. So let's fix it by creating the ctx->offset[] differently. Track the beginning of instruction and account for the extra instruction while calculating the arm instruction offsets. Fixes: 2589726d12a1 ("bpf: introduce bounded loops") Reported-by: Naresh Kamboju Reported-by: Jiri Olsa Co-developed-by: Jean-Philippe Brucker Co-developed-by: Yauheni Kaliuta Signed-off-by: Jean-Philippe Brucker Signed-off-by: Yauheni Kaliuta Signed-off-by: Ilias Apalodimas Acked-by: Will Deacon Link: https://lore.kernel.org/r/20200917084925.177348-1-ilias.apalodimas@linaro.org Signed-off-by: Catalin Marinas --- arch/arm64/net/bpf_jit_comp.c | 43 +++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index f8912e45be7a..ef9f1d5e989d 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -143,14 +143,17 @@ static inline void emit_addr_mov_i64(const int reg, const u64 val, } } -static inline int bpf2a64_offset(int bpf_to, int bpf_from, +static inline int bpf2a64_offset(int bpf_insn, int off, const struct jit_ctx *ctx) { - int to = ctx->offset[bpf_to]; - /* -1 to account for the Branch instruction */ - int from = ctx->offset[bpf_from] - 1; - - return to - from; + /* BPF JMP offset is relative to the next instruction */ + bpf_insn++; + /* + * Whereas arm64 branch instructions encode the offset + * from the branch itself, so we must subtract 1 from the + * instruction offset. + */ + return ctx->offset[bpf_insn + off] - (ctx->offset[bpf_insn] - 1); } static void jit_fill_hole(void *area, unsigned int size) @@ -642,7 +645,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, /* JUMP off */ case BPF_JMP | BPF_JA: - jmp_offset = bpf2a64_offset(i + off, i, ctx); + jmp_offset = bpf2a64_offset(i, off, ctx); check_imm26(jmp_offset); emit(A64_B(jmp_offset), ctx); break; @@ -669,7 +672,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, case BPF_JMP32 | BPF_JSLE | BPF_X: emit(A64_CMP(is64, dst, src), ctx); emit_cond_jmp: - jmp_offset = bpf2a64_offset(i + off, i, ctx); + jmp_offset = bpf2a64_offset(i, off, ctx); check_imm19(jmp_offset); switch (BPF_OP(code)) { case BPF_JEQ: @@ -908,10 +911,21 @@ static int build_body(struct jit_ctx *ctx, bool extra_pass) const struct bpf_prog *prog = ctx->prog; int i; + /* + * - offset[0] offset of the end of prologue, + * start of the 1st instruction. + * - offset[1] - offset of the end of 1st instruction, + * start of the 2nd instruction + * [....] + * - offset[3] - offset of the end of 3rd instruction, + * start of 4th instruction + */ for (i = 0; i < prog->len; i++) { const struct bpf_insn *insn = &prog->insnsi[i]; int ret; + if (ctx->image == NULL) + ctx->offset[i] = ctx->idx; ret = build_insn(insn, ctx, extra_pass); if (ret > 0) { i++; @@ -919,11 +933,16 @@ static int build_body(struct jit_ctx *ctx, bool extra_pass) ctx->offset[i] = ctx->idx; continue; } - if (ctx->image == NULL) - ctx->offset[i] = ctx->idx; if (ret) return ret; } + /* + * offset is allocated with prog->len + 1 so fill in + * the last element with the offset after the last + * instruction (end of program) + */ + if (ctx->image == NULL) + ctx->offset[i] = ctx->idx; return 0; } @@ -1002,7 +1021,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) memset(&ctx, 0, sizeof(ctx)); ctx.prog = prog; - ctx.offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL); + ctx.offset = kcalloc(prog->len + 1, sizeof(int), GFP_KERNEL); if (ctx.offset == NULL) { prog = orig_prog; goto out_off; @@ -1089,7 +1108,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) prog->jited_len = prog_size; if (!prog->is_func || extra_pass) { - bpf_prog_fill_jited_linfo(prog, ctx.offset); + bpf_prog_fill_jited_linfo(prog, ctx.offset + 1); out_off: kfree(ctx.offset); kfree(jit_data); From 9cdabcb3ef8c24ca3a456e4db7b012befb688e73 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 17 Sep 2020 12:34:27 +0200 Subject: [PATCH 532/648] usblp: fix race between disconnect() and read() read() needs to check whether the device has been disconnected before it tries to talk to the device. Signed-off-by: Oliver Neukum Reported-by: syzbot+be5b5f86a162a6c281e6@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20200917103427.15740-1-oneukum@suse.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usblp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index 084c48c5848f..67cbd42421be 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -827,6 +827,11 @@ static ssize_t usblp_read(struct file *file, char __user *buffer, size_t len, lo if (rv < 0) return rv; + if (!usblp->present) { + count = -ENODEV; + goto done; + } + if ((avail = usblp->rstatus) < 0) { printk(KERN_ERR "usblp%d: error %d reading from printer\n", usblp->minor, (int)avail); From 75df529bec9110dad43ab30e2d9490242529e8b8 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 16 Sep 2020 17:45:30 +0200 Subject: [PATCH 533/648] arm64: paravirt: Initialize steal time when cpu is online Steal time initialization requires mapping a memory region which invokes a memory allocation. Doing this at CPU starting time results in the following trace when CONFIG_DEBUG_ATOMIC_SLEEP is enabled: BUG: sleeping function called from invalid context at mm/slab.h:498 in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/1 CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.9.0-rc5+ #1 Call trace: dump_backtrace+0x0/0x208 show_stack+0x1c/0x28 dump_stack+0xc4/0x11c ___might_sleep+0xf8/0x130 __might_sleep+0x58/0x90 slab_pre_alloc_hook.constprop.101+0xd0/0x118 kmem_cache_alloc_node_trace+0x84/0x270 __get_vm_area_node+0x88/0x210 get_vm_area_caller+0x38/0x40 __ioremap_caller+0x70/0xf8 ioremap_cache+0x78/0xb0 memremap+0x9c/0x1a8 init_stolen_time_cpu+0x54/0xf0 cpuhp_invoke_callback+0xa8/0x720 notify_cpu_starting+0xc8/0xd8 secondary_start_kernel+0x114/0x180 CPU1: Booted secondary processor 0x0000000001 [0x431f0a11] However we don't need to initialize steal time at CPU starting time. We can simply wait until CPU online time, just sacrificing a bit of accuracy by returning zero for steal time until we know better. While at it, add __init to the functions that are only called by pv_time_init() which is __init. Signed-off-by: Andrew Jones Fixes: e0685fa228fd ("arm64: Retrieve stolen time as paravirtualized guest") Cc: stable@vger.kernel.org Reviewed-by: Steven Price Link: https://lore.kernel.org/r/20200916154530.40809-1-drjones@redhat.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/paravirt.c | 26 +++++++++++++++----------- include/linux/cpuhotplug.h | 1 - 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 295d66490584..c07d7a034941 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -50,16 +50,19 @@ static u64 pv_steal_clock(int cpu) struct pv_time_stolen_time_region *reg; reg = per_cpu_ptr(&stolen_time_region, cpu); - if (!reg->kaddr) { - pr_warn_once("stolen time enabled but not configured for cpu %d\n", - cpu); + + /* + * paravirt_steal_clock() may be called before the CPU + * online notification callback runs. Until the callback + * has run we just return zero. + */ + if (!reg->kaddr) return 0; - } return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time)); } -static int stolen_time_dying_cpu(unsigned int cpu) +static int stolen_time_cpu_down_prepare(unsigned int cpu) { struct pv_time_stolen_time_region *reg; @@ -73,7 +76,7 @@ static int stolen_time_dying_cpu(unsigned int cpu) return 0; } -static int init_stolen_time_cpu(unsigned int cpu) +static int stolen_time_cpu_online(unsigned int cpu) { struct pv_time_stolen_time_region *reg; struct arm_smccc_res res; @@ -103,19 +106,20 @@ static int init_stolen_time_cpu(unsigned int cpu) return 0; } -static int pv_time_init_stolen_time(void) +static int __init pv_time_init_stolen_time(void) { int ret; - ret = cpuhp_setup_state(CPUHP_AP_ARM_KVMPV_STARTING, - "hypervisor/arm/pvtime:starting", - init_stolen_time_cpu, stolen_time_dying_cpu); + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "hypervisor/arm/pvtime:online", + stolen_time_cpu_online, + stolen_time_cpu_down_prepare); if (ret < 0) return ret; return 0; } -static bool has_pv_steal_clock(void) +static bool __init has_pv_steal_clock(void) { struct arm_smccc_res res; diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 3215023d4852..bf9181cef444 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -142,7 +142,6 @@ enum cpuhp_state { /* Must be the last timer callback */ CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_ARM_XEN_STARTING, - CPUHP_AP_ARM_KVMPV_STARTING, CPUHP_AP_ARM_CORESIGHT_STARTING, CPUHP_AP_ARM_CORESIGHT_CTI_STARTING, CPUHP_AP_ARM64_ISNDEP_STARTING, From 5ef64cc8987a9211d3f3667331ba3411a94ddc79 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 13 Sep 2020 14:05:35 -0700 Subject: [PATCH 534/648] mm: allow a controlled amount of unfairness in the page lock Commit 2a9127fcf229 ("mm: rewrite wait_on_page_bit_common() logic") made the page locking entirely fair, in that if a waiter came in while the lock was held, the lock would be transferred to the lockers strictly in order. That was intended to finally get rid of the long-reported watchdog failures that involved the page lock under extreme load, where a process could end up waiting essentially forever, as other page lockers stole the lock from under it. It also improved some benchmarks, but it ended up causing huge performance regressions on others, simply because fair lock behavior doesn't end up giving out the lock as aggressively, causing better worst-case latency, but potentially much worse average latencies and throughput. Instead of reverting that change entirely, this introduces a controlled amount of unfairness, with a sysctl knob to tune it if somebody needs to. But the default value should hopefully be good for any normal load, allowing a few rounds of lock stealing, but enforcing the strict ordering before the lock has been stolen too many times. There is also a hint from Matthieu Baerts that the fair page coloring may end up exposing an ABBA deadlock that is hidden by the usual optimistic lock stealing, and while the unfairness doesn't fix the fundamental issue (and I'm still looking at that), it avoids it in practice. The amount of unfairness can be modified by writing a new value to the 'sysctl_page_lock_unfairness' variable (default value of 5, exposed through /proc/sys/vm/page_lock_unfairness), but that is hopefully something we'd use mainly for debugging rather than being necessary for any deep system tuning. This whole issue has exposed just how critical the page lock can be, and how contended it gets under certain locks. And the main contention doesn't really seem to be anything related to IO (which was the origin of this lock), but for things like just verifying that the page file mapping is stable while faulting in the page into a page table. Link: https://lore.kernel.org/linux-fsdevel/ed8442fd-6f54-dd84-cd4a-941e8b7ee603@MichaelLarabel.com/ Link: https://www.phoronix.com/scan.php?page=article&item=linux-50-59&num=1 Link: https://lore.kernel.org/linux-fsdevel/c560a38d-8313-51fb-b1ec-e904bd8836bc@tessares.net/ Reported-and-tested-by: Michael Larabel Tested-by: Matthieu Baerts Cc: Dave Chinner Cc: Matthew Wilcox Cc: Chris Mason Cc: Jan Kara Cc: Amir Goldstein Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 + include/linux/wait.h | 1 + kernel/sysctl.c | 8 +++ mm/filemap.c | 160 ++++++++++++++++++++++++++++++++++--------- 4 files changed, 140 insertions(+), 31 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index ca6e6a81576b..b2f370f0b420 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -41,6 +41,8 @@ struct writeback_control; struct bdi_writeback; struct pt_regs; +extern int sysctl_page_lock_unfairness; + void init_mm_internals(void); #ifndef CONFIG_NEED_MULTIPLE_NODES /* Don't use mapnrs, do it properly */ diff --git a/include/linux/wait.h b/include/linux/wait.h index 898c890fc153..27fb99cfeb02 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -21,6 +21,7 @@ int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int #define WQ_FLAG_WOKEN 0x02 #define WQ_FLAG_BOOKMARK 0x04 #define WQ_FLAG_CUSTOM 0x08 +#define WQ_FLAG_DONE 0x10 /* * A single wait-queue entry structure: diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 09e70ee2332e..afad085960b8 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2912,6 +2912,14 @@ static struct ctl_table vm_table[] = { .proc_handler = percpu_pagelist_fraction_sysctl_handler, .extra1 = SYSCTL_ZERO, }, + { + .procname = "page_lock_unfairness", + .data = &sysctl_page_lock_unfairness, + .maxlen = sizeof(sysctl_page_lock_unfairness), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + }, #ifdef CONFIG_MMU { .procname = "max_map_count", diff --git a/mm/filemap.c b/mm/filemap.c index 1aaea26556cc..6aa08e7714ce 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -988,9 +988,43 @@ void __init pagecache_init(void) page_writeback_init(); } +/* + * The page wait code treats the "wait->flags" somewhat unusually, because + * we have multiple different kinds of waits, not just he usual "exclusive" + * one. + * + * We have: + * + * (a) no special bits set: + * + * We're just waiting for the bit to be released, and when a waker + * calls the wakeup function, we set WQ_FLAG_WOKEN and wake it up, + * and remove it from the wait queue. + * + * Simple and straightforward. + * + * (b) WQ_FLAG_EXCLUSIVE: + * + * The waiter is waiting to get the lock, and only one waiter should + * be woken up to avoid any thundering herd behavior. We'll set the + * WQ_FLAG_WOKEN bit, wake it up, and remove it from the wait queue. + * + * This is the traditional exclusive wait. + * + * (b) WQ_FLAG_EXCLUSIVE | WQ_FLAG_CUSTOM: + * + * The waiter is waiting to get the bit, and additionally wants the + * lock to be transferred to it for fair lock behavior. If the lock + * cannot be taken, we stop walking the wait queue without waking + * the waiter. + * + * This is the "fair lock handoff" case, and in addition to setting + * WQ_FLAG_WOKEN, we set WQ_FLAG_DONE to let the waiter easily see + * that it now has the lock. + */ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg) { - int ret; + unsigned int flags; struct wait_page_key *key = arg; struct wait_page_queue *wait_page = container_of(wait, struct wait_page_queue, wait); @@ -999,35 +1033,44 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, return 0; /* - * If it's an exclusive wait, we get the bit for it, and - * stop walking if we can't. - * - * If it's a non-exclusive wait, then the fact that this - * wake function was called means that the bit already - * was cleared, and we don't care if somebody then - * re-took it. + * If it's a lock handoff wait, we get the bit for it, and + * stop walking (and do not wake it up) if we can't. */ - ret = 0; - if (wait->flags & WQ_FLAG_EXCLUSIVE) { - if (test_and_set_bit(key->bit_nr, &key->page->flags)) + flags = wait->flags; + if (flags & WQ_FLAG_EXCLUSIVE) { + if (test_bit(key->bit_nr, &key->page->flags)) return -1; - ret = 1; + if (flags & WQ_FLAG_CUSTOM) { + if (test_and_set_bit(key->bit_nr, &key->page->flags)) + return -1; + flags |= WQ_FLAG_DONE; + } } - wait->flags |= WQ_FLAG_WOKEN; + /* + * We are holding the wait-queue lock, but the waiter that + * is waiting for this will be checking the flags without + * any locking. + * + * So update the flags atomically, and wake up the waiter + * afterwards to avoid any races. This store-release pairs + * with the load-acquire in wait_on_page_bit_common(). + */ + smp_store_release(&wait->flags, flags | WQ_FLAG_WOKEN); wake_up_state(wait->private, mode); /* * Ok, we have successfully done what we're waiting for, * and we can unconditionally remove the wait entry. * - * Note that this has to be the absolute last thing we do, - * since after list_del_init(&wait->entry) the wait entry + * Note that this pairs with the "finish_wait()" in the + * waiter, and has to be the absolute last thing we do. + * After this list_del_init(&wait->entry) the wait entry * might be de-allocated and the process might even have * exited. */ list_del_init_careful(&wait->entry); - return ret; + return (flags & WQ_FLAG_EXCLUSIVE) != 0; } static void wake_up_page_bit(struct page *page, int bit_nr) @@ -1107,8 +1150,8 @@ enum behavior { }; /* - * Attempt to check (or get) the page bit, and mark the - * waiter woken if successful. + * Attempt to check (or get) the page bit, and mark us done + * if successful. */ static inline bool trylock_page_bit_common(struct page *page, int bit_nr, struct wait_queue_entry *wait) @@ -1119,13 +1162,17 @@ static inline bool trylock_page_bit_common(struct page *page, int bit_nr, } else if (test_bit(bit_nr, &page->flags)) return false; - wait->flags |= WQ_FLAG_WOKEN; + wait->flags |= WQ_FLAG_WOKEN | WQ_FLAG_DONE; return true; } +/* How many times do we accept lock stealing from under a waiter? */ +int sysctl_page_lock_unfairness = 5; + static inline int wait_on_page_bit_common(wait_queue_head_t *q, struct page *page, int bit_nr, int state, enum behavior behavior) { + int unfairness = sysctl_page_lock_unfairness; struct wait_page_queue wait_page; wait_queue_entry_t *wait = &wait_page.wait; bool thrashing = false; @@ -1143,11 +1190,18 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, } init_wait(wait); - wait->flags = behavior == EXCLUSIVE ? WQ_FLAG_EXCLUSIVE : 0; wait->func = wake_page_function; wait_page.page = page; wait_page.bit_nr = bit_nr; +repeat: + wait->flags = 0; + if (behavior == EXCLUSIVE) { + wait->flags = WQ_FLAG_EXCLUSIVE; + if (--unfairness < 0) + wait->flags |= WQ_FLAG_CUSTOM; + } + /* * Do one last check whether we can get the * page bit synchronously. @@ -1170,27 +1224,63 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, /* * From now on, all the logic will be based on - * the WQ_FLAG_WOKEN flag, and the and the page - * bit testing (and setting) will be - or has - * already been - done by the wake function. + * the WQ_FLAG_WOKEN and WQ_FLAG_DONE flag, to + * see whether the page bit testing has already + * been done by the wake function. * * We can drop our reference to the page. */ if (behavior == DROP) put_page(page); + /* + * Note that until the "finish_wait()", or until + * we see the WQ_FLAG_WOKEN flag, we need to + * be very careful with the 'wait->flags', because + * we may race with a waker that sets them. + */ for (;;) { + unsigned int flags; + set_current_state(state); - if (signal_pending_state(state, current)) + /* Loop until we've been woken or interrupted */ + flags = smp_load_acquire(&wait->flags); + if (!(flags & WQ_FLAG_WOKEN)) { + if (signal_pending_state(state, current)) + break; + + io_schedule(); + continue; + } + + /* If we were non-exclusive, we're done */ + if (behavior != EXCLUSIVE) break; - if (wait->flags & WQ_FLAG_WOKEN) + /* If the waker got the lock for us, we're done */ + if (flags & WQ_FLAG_DONE) break; - io_schedule(); + /* + * Otherwise, if we're getting the lock, we need to + * try to get it ourselves. + * + * And if that fails, we'll have to retry this all. + */ + if (unlikely(test_and_set_bit(bit_nr, &page->flags))) + goto repeat; + + wait->flags |= WQ_FLAG_DONE; + break; } + /* + * If a signal happened, this 'finish_wait()' may remove the last + * waiter from the wait-queues, but the PageWaiters bit will remain + * set. That's ok. The next wakeup will take care of it, and trying + * to do it here would be difficult and prone to races. + */ finish_wait(q, wait); if (thrashing) { @@ -1200,12 +1290,20 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, } /* - * A signal could leave PageWaiters set. Clearing it here if - * !waitqueue_active would be possible (by open-coding finish_wait), - * but still fail to catch it in the case of wait hash collision. We - * already can fail to clear wait hash collision cases, so don't - * bother with signals either. + * NOTE! The wait->flags weren't stable until we've done the + * 'finish_wait()', and we could have exited the loop above due + * to a signal, and had a wakeup event happen after the signal + * test but before the 'finish_wait()'. + * + * So only after the finish_wait() can we reliably determine + * if we got woken up or not, so we can now figure out the final + * return value based on that state without races. + * + * Also note that WQ_FLAG_WOKEN is sufficient for a non-exclusive + * waiter, but an exclusive one requires WQ_FLAG_DONE. */ + if (behavior == EXCLUSIVE) + return wait->flags & WQ_FLAG_DONE ? 0 : -EINTR; return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR; } From b3b33d3c43bbe0177d70653f4e889c78cc37f097 Mon Sep 17 00:00:00 2001 From: Sunghyun Jin Date: Thu, 3 Sep 2020 21:41:16 +0900 Subject: [PATCH 535/648] percpu: fix first chunk size calculation for populated bitmap Variable populated, which is a member of struct pcpu_chunk, is used as a unit of size of unsigned long. However, size of populated is miscounted. So, I fix this minor part. Fixes: 8ab16c43ea79 ("percpu: change the number of pages marked in the first_chunk pop bitmap") Cc: # 4.14+ Signed-off-by: Sunghyun Jin Signed-off-by: Dennis Zhou --- mm/percpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/percpu.c b/mm/percpu.c index f4709629e6de..1ed1a349eab8 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1316,7 +1316,7 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr, /* allocate chunk */ alloc_size = sizeof(struct pcpu_chunk) + - BITS_TO_LONGS(region_size >> PAGE_SHIFT); + BITS_TO_LONGS(region_size >> PAGE_SHIFT) * sizeof(unsigned long); chunk = memblock_alloc(alloc_size, SMP_CACHE_BYTES); if (!chunk) panic("%s: Failed to allocate %zu bytes\n", __func__, From 21653a4181ff292480599dad996a2b759ccf050f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 9 Sep 2020 12:32:33 +0200 Subject: [PATCH 536/648] i2c: core: Call i2c_acpi_install_space_handler() before i2c_acpi_register_devices() Some ACPI i2c-devices _STA method (which is used to detect if the device is present) use autodetection code which probes which device is present over i2c. This requires the I2C ACPI OpRegion handler to be registered before we enumerate i2c-clients under the i2c-adapter. This fixes the i2c touchpad on the Lenovo ThinkBook 14-IIL and ThinkBook 15 IIL not getting an i2c-client instantiated and thus not working. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1842039 Signed-off-by: Hans de Goede Reviewed-by: Mika Westerberg Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 5ec082e2039d..573b5da145d1 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -1464,8 +1464,8 @@ static int i2c_register_adapter(struct i2c_adapter *adap) /* create pre-declared device nodes */ of_i2c_register_devices(adap); - i2c_acpi_register_devices(adap); i2c_acpi_install_space_handler(adap); + i2c_acpi_register_devices(adap); if (adap->nr < __i2c_first_dynamic_bus_num) i2c_scan_static_board_info(adap); From 933a3752babcf6513117d5773d2b70782d6ad149 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Sep 2020 17:26:56 -0400 Subject: [PATCH 537/648] fuse: fix the ->direct_IO() treatment of iov_iter the callers rely upon having any iov_iter_truncate() done inside ->direct_IO() countered by iov_iter_reexpand(). Reported-by: Qian Cai Tested-by: Qian Cai Signed-off-by: Al Viro --- fs/fuse/file.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 6611ef3269a8..43c165e796da 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -3091,11 +3091,10 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) ssize_t ret = 0; struct file *file = iocb->ki_filp; struct fuse_file *ff = file->private_data; - bool async_dio = ff->fc->async_dio; loff_t pos = 0; struct inode *inode; loff_t i_size; - size_t count = iov_iter_count(iter); + size_t count = iov_iter_count(iter), shortened = 0; loff_t offset = iocb->ki_pos; struct fuse_io_priv *io; @@ -3103,17 +3102,9 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) inode = file->f_mapping->host; i_size = i_size_read(inode); - if ((iov_iter_rw(iter) == READ) && (offset > i_size)) + if ((iov_iter_rw(iter) == READ) && (offset >= i_size)) return 0; - /* optimization for short read */ - if (async_dio && iov_iter_rw(iter) != WRITE && offset + count > i_size) { - if (offset >= i_size) - return 0; - iov_iter_truncate(iter, fuse_round_up(ff->fc, i_size - offset)); - count = iov_iter_count(iter); - } - io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); if (!io) return -ENOMEM; @@ -3129,15 +3120,22 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) * By default, we want to optimize all I/Os with async request * submission to the client filesystem if supported. */ - io->async = async_dio; + io->async = ff->fc->async_dio; io->iocb = iocb; io->blocking = is_sync_kiocb(iocb); + /* optimization for short read */ + if (io->async && !io->write && offset + count > i_size) { + iov_iter_truncate(iter, fuse_round_up(ff->fc, i_size - offset)); + shortened = count - iov_iter_count(iter); + count -= shortened; + } + /* * We cannot asynchronously extend the size of a file. * In such case the aio will behave exactly like sync io. */ - if ((offset + count > i_size) && iov_iter_rw(iter) == WRITE) + if ((offset + count > i_size) && io->write) io->blocking = true; if (io->async && io->blocking) { @@ -3155,6 +3153,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } else { ret = __fuse_direct_read(io, iter, &pos); } + iov_iter_reexpand(iter, iov_iter_count(iter) + shortened); if (io->async) { bool blocking = io->blocking; From 44144185951a0ff9b50bf21c0cd1f79ff688e5ca Mon Sep 17 00:00:00 2001 From: Andres Beltran Date: Wed, 16 Sep 2020 11:47:27 +0200 Subject: [PATCH 538/648] hv_netvsc: Add validation for untrusted Hyper-V values For additional robustness in the face of Hyper-V errors or malicious behavior, validate all values that originate from packets that Hyper-V has sent to the guest in the host-to-guest ring buffer. Ensure that invalid values cannot cause indexing off the end of an array, or subvert an existing validation via integer overflow. Ensure that outgoing packets do not have any leftover guest memory that has not been zeroed out. Signed-off-by: Andres Beltran Co-developed-by: Andrea Parri (Microsoft) Signed-off-by: Andrea Parri (Microsoft) Cc: "David S. Miller" Cc: Jakub Kicinski Cc: netdev@vger.kernel.org Reviewed-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 4 + drivers/net/hyperv/netvsc.c | 124 ++++++++++++++++++++++++++---- drivers/net/hyperv/netvsc_drv.c | 7 ++ drivers/net/hyperv/rndis_filter.c | 73 ++++++++++++++++-- 4 files changed, 188 insertions(+), 20 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index ff33f27cdcd3..a0f338cf1424 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -847,6 +847,10 @@ struct nvsp_message { #define NETVSC_XDP_HDRM 256 +#define NETVSC_XFER_HEADER_SIZE(rng_cnt) \ + (offsetof(struct vmtransfer_page_packet_header, ranges) + \ + (rng_cnt) * sizeof(struct vmtransfer_page_range)) + struct multi_send_data { struct sk_buff *skb; /* skb containing the pkt */ struct hv_netvsc_packet *pkt; /* netvsc pkt pending */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 41f5cf0bb997..5a57d1985bae 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -388,6 +388,15 @@ static int netvsc_init_buf(struct hv_device *device, net_device->recv_section_size = resp->sections[0].sub_alloc_size; net_device->recv_section_cnt = resp->sections[0].num_sub_allocs; + /* Ensure buffer will not overflow */ + if (net_device->recv_section_size < NETVSC_MTU_MIN || (u64)net_device->recv_section_size * + (u64)net_device->recv_section_cnt > (u64)buf_size) { + netdev_err(ndev, "invalid recv_section_size %u\n", + net_device->recv_section_size); + ret = -EINVAL; + goto cleanup; + } + /* Setup receive completion ring. * Add 1 to the recv_section_cnt because at least one entry in a * ring buffer has to be empty. @@ -460,6 +469,12 @@ static int netvsc_init_buf(struct hv_device *device, /* Parse the response */ net_device->send_section_size = init_packet->msg. v1_msg.send_send_buf_complete.section_size; + if (net_device->send_section_size < NETVSC_MTU_MIN) { + netdev_err(ndev, "invalid send_section_size %u\n", + net_device->send_section_size); + ret = -EINVAL; + goto cleanup; + } /* Section count is simply the size divided by the section size. */ net_device->send_section_cnt = buf_size / net_device->send_section_size; @@ -731,12 +746,49 @@ static void netvsc_send_completion(struct net_device *ndev, int budget) { const struct nvsp_message *nvsp_packet = hv_pkt_data(desc); + u32 msglen = hv_pkt_datalen(desc); + + /* Ensure packet is big enough to read header fields */ + if (msglen < sizeof(struct nvsp_message_header)) { + netdev_err(ndev, "nvsp_message length too small: %u\n", msglen); + return; + } switch (nvsp_packet->hdr.msg_type) { case NVSP_MSG_TYPE_INIT_COMPLETE: + if (msglen < sizeof(struct nvsp_message_header) + + sizeof(struct nvsp_message_init_complete)) { + netdev_err(ndev, "nvsp_msg length too small: %u\n", + msglen); + return; + } + fallthrough; + case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE: + if (msglen < sizeof(struct nvsp_message_header) + + sizeof(struct nvsp_1_message_send_receive_buffer_complete)) { + netdev_err(ndev, "nvsp_msg1 length too small: %u\n", + msglen); + return; + } + fallthrough; + case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE: + if (msglen < sizeof(struct nvsp_message_header) + + sizeof(struct nvsp_1_message_send_send_buffer_complete)) { + netdev_err(ndev, "nvsp_msg1 length too small: %u\n", + msglen); + return; + } + fallthrough; + case NVSP_MSG5_TYPE_SUBCHANNEL: + if (msglen < sizeof(struct nvsp_message_header) + + sizeof(struct nvsp_5_subchannel_complete)) { + netdev_err(ndev, "nvsp_msg5 length too small: %u\n", + msglen); + return; + } /* Copy the response back */ memcpy(&net_device->channel_init_pkt, nvsp_packet, sizeof(struct nvsp_message)); @@ -1117,19 +1169,28 @@ static void enq_receive_complete(struct net_device *ndev, static int netvsc_receive(struct net_device *ndev, struct netvsc_device *net_device, struct netvsc_channel *nvchan, - const struct vmpacket_descriptor *desc, - const struct nvsp_message *nvsp) + const struct vmpacket_descriptor *desc) { struct net_device_context *net_device_ctx = netdev_priv(ndev); struct vmbus_channel *channel = nvchan->channel; const struct vmtransfer_page_packet_header *vmxferpage_packet = container_of(desc, const struct vmtransfer_page_packet_header, d); + const struct nvsp_message *nvsp = hv_pkt_data(desc); + u32 msglen = hv_pkt_datalen(desc); u16 q_idx = channel->offermsg.offer.sub_channel_index; char *recv_buf = net_device->recv_buf; u32 status = NVSP_STAT_SUCCESS; int i; int count = 0; + /* Ensure packet is big enough to read header fields */ + if (msglen < sizeof(struct nvsp_message_header)) { + netif_err(net_device_ctx, rx_err, ndev, + "invalid nvsp header, length too small: %u\n", + msglen); + return 0; + } + /* Make sure this is a valid nvsp packet */ if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) { netif_err(net_device_ctx, rx_err, ndev, @@ -1138,6 +1199,14 @@ static int netvsc_receive(struct net_device *ndev, return 0; } + /* Validate xfer page pkt header */ + if ((desc->offset8 << 3) < sizeof(struct vmtransfer_page_packet_header)) { + netif_err(net_device_ctx, rx_err, ndev, + "Invalid xfer page pkt, offset too small: %u\n", + desc->offset8 << 3); + return 0; + } + if (unlikely(vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID)) { netif_err(net_device_ctx, rx_err, ndev, "Invalid xfer page set id - expecting %x got %x\n", @@ -1148,6 +1217,14 @@ static int netvsc_receive(struct net_device *ndev, count = vmxferpage_packet->range_cnt; + /* Check count for a valid value */ + if (NETVSC_XFER_HEADER_SIZE(count) > desc->offset8 << 3) { + netif_err(net_device_ctx, rx_err, ndev, + "Range count is not valid: %d\n", + count); + return 0; + } + /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */ for (i = 0; i < count; i++) { u32 offset = vmxferpage_packet->ranges[i].byte_offset; @@ -1155,7 +1232,8 @@ static int netvsc_receive(struct net_device *ndev, void *data; int ret; - if (unlikely(offset + buflen > net_device->recv_buf_size)) { + if (unlikely(offset > net_device->recv_buf_size || + buflen > net_device->recv_buf_size - offset)) { nvchan->rsc.cnt = 0; status = NVSP_STAT_FAIL; netif_err(net_device_ctx, rx_err, ndev, @@ -1194,6 +1272,13 @@ static void netvsc_send_table(struct net_device *ndev, u32 count, offset, *tab; int i; + /* Ensure packet is big enough to read send_table fields */ + if (msglen < sizeof(struct nvsp_message_header) + + sizeof(struct nvsp_5_send_indirect_table)) { + netdev_err(ndev, "nvsp_v5_msg length too small: %u\n", msglen); + return; + } + count = nvmsg->msg.v5_msg.send_table.count; offset = nvmsg->msg.v5_msg.send_table.offset; @@ -1225,10 +1310,18 @@ static void netvsc_send_table(struct net_device *ndev, } static void netvsc_send_vf(struct net_device *ndev, - const struct nvsp_message *nvmsg) + const struct nvsp_message *nvmsg, + u32 msglen) { struct net_device_context *net_device_ctx = netdev_priv(ndev); + /* Ensure packet is big enough to read its fields */ + if (msglen < sizeof(struct nvsp_message_header) + + sizeof(struct nvsp_4_send_vf_association)) { + netdev_err(ndev, "nvsp_v4_msg length too small: %u\n", msglen); + return; + } + net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated; net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial; netdev_info(ndev, "VF slot %u %s\n", @@ -1238,16 +1331,24 @@ static void netvsc_send_vf(struct net_device *ndev, static void netvsc_receive_inband(struct net_device *ndev, struct netvsc_device *nvscdev, - const struct nvsp_message *nvmsg, - u32 msglen) + const struct vmpacket_descriptor *desc) { + const struct nvsp_message *nvmsg = hv_pkt_data(desc); + u32 msglen = hv_pkt_datalen(desc); + + /* Ensure packet is big enough to read header fields */ + if (msglen < sizeof(struct nvsp_message_header)) { + netdev_err(ndev, "inband nvsp_message length too small: %u\n", msglen); + return; + } + switch (nvmsg->hdr.msg_type) { case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE: netvsc_send_table(ndev, nvscdev, nvmsg, msglen); break; case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION: - netvsc_send_vf(ndev, nvmsg); + netvsc_send_vf(ndev, nvmsg, msglen); break; } } @@ -1261,23 +1362,20 @@ static int netvsc_process_raw_pkt(struct hv_device *device, { struct vmbus_channel *channel = nvchan->channel; const struct nvsp_message *nvmsg = hv_pkt_data(desc); - u32 msglen = hv_pkt_datalen(desc); trace_nvsp_recv(ndev, channel, nvmsg); switch (desc->type) { case VM_PKT_COMP: - netvsc_send_completion(ndev, net_device, channel, - desc, budget); + netvsc_send_completion(ndev, net_device, channel, desc, budget); break; case VM_PKT_DATA_USING_XFER_PAGES: - return netvsc_receive(ndev, net_device, nvchan, - desc, nvmsg); + return netvsc_receive(ndev, net_device, nvchan, desc); break; case VM_PKT_DATA_INBAND: - netvsc_receive_inband(ndev, net_device, nvmsg, msglen); + netvsc_receive_inband(ndev, net_device, desc); break; default: diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index b7db3766f5b9..9869e390875e 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -748,6 +748,13 @@ void netvsc_linkstatus_callback(struct net_device *net, struct netvsc_reconfig *event; unsigned long flags; + /* Ensure the packet is big enough to access its fields */ + if (resp->msg_len - RNDIS_HEADER_SIZE < sizeof(struct rndis_indicate_status)) { + netdev_err(net, "invalid rndis_indicate_status packet, len: %u\n", + resp->msg_len); + return; + } + /* Update the physical link speed when changing to another vSwitch */ if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) { u32 speed; diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index b81ceba38218..12ad471ac5e1 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -275,6 +275,16 @@ static void rndis_filter_receive_response(struct net_device *ndev, return; } + /* Ensure the packet is big enough to read req_id. Req_id is the 1st + * field in any request/response message, so the payload should have at + * least sizeof(u32) bytes + */ + if (resp->msg_len - RNDIS_HEADER_SIZE < sizeof(u32)) { + netdev_err(ndev, "rndis msg_len too small: %u\n", + resp->msg_len); + return; + } + spin_lock_irqsave(&dev->request_lock, flags); list_for_each_entry(request, &dev->req_list, list_ent) { /* @@ -331,8 +341,9 @@ static void rndis_filter_receive_response(struct net_device *ndev, * Get the Per-Packet-Info with the specified type * return NULL if not found. */ -static inline void *rndis_get_ppi(struct rndis_packet *rpkt, - u32 type, u8 internal) +static inline void *rndis_get_ppi(struct net_device *ndev, + struct rndis_packet *rpkt, + u32 rpkt_len, u32 type, u8 internal) { struct rndis_per_packet_info *ppi; int len; @@ -340,11 +351,36 @@ static inline void *rndis_get_ppi(struct rndis_packet *rpkt, if (rpkt->per_pkt_info_offset == 0) return NULL; + /* Validate info_offset and info_len */ + if (rpkt->per_pkt_info_offset < sizeof(struct rndis_packet) || + rpkt->per_pkt_info_offset > rpkt_len) { + netdev_err(ndev, "Invalid per_pkt_info_offset: %u\n", + rpkt->per_pkt_info_offset); + return NULL; + } + + if (rpkt->per_pkt_info_len > rpkt_len - rpkt->per_pkt_info_offset) { + netdev_err(ndev, "Invalid per_pkt_info_len: %u\n", + rpkt->per_pkt_info_len); + return NULL; + } + ppi = (struct rndis_per_packet_info *)((ulong)rpkt + rpkt->per_pkt_info_offset); len = rpkt->per_pkt_info_len; while (len > 0) { + /* Validate ppi_offset and ppi_size */ + if (ppi->size > len) { + netdev_err(ndev, "Invalid ppi size: %u\n", ppi->size); + continue; + } + + if (ppi->ppi_offset >= ppi->size) { + netdev_err(ndev, "Invalid ppi_offset: %u\n", ppi->ppi_offset); + continue; + } + if (ppi->type == type && ppi->internal == internal) return (void *)((ulong)ppi + ppi->ppi_offset); len -= ppi->size; @@ -388,14 +424,29 @@ static int rndis_filter_receive_data(struct net_device *ndev, const struct ndis_pkt_8021q_info *vlan; const struct rndis_pktinfo_id *pktinfo_id; const u32 *hash_info; - u32 data_offset; + u32 data_offset, rpkt_len; void *data; bool rsc_more = false; int ret; + /* Ensure data_buflen is big enough to read header fields */ + if (data_buflen < RNDIS_HEADER_SIZE + sizeof(struct rndis_packet)) { + netdev_err(ndev, "invalid rndis pkt, data_buflen too small: %u\n", + data_buflen); + return NVSP_STAT_FAIL; + } + + /* Validate rndis_pkt offset */ + if (rndis_pkt->data_offset >= data_buflen - RNDIS_HEADER_SIZE) { + netdev_err(ndev, "invalid rndis packet offset: %u\n", + rndis_pkt->data_offset); + return NVSP_STAT_FAIL; + } + /* Remove the rndis header and pass it back up the stack */ data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset; + rpkt_len = data_buflen - RNDIS_HEADER_SIZE; data_buflen -= data_offset; /* @@ -410,13 +461,13 @@ static int rndis_filter_receive_data(struct net_device *ndev, return NVSP_STAT_FAIL; } - vlan = rndis_get_ppi(rndis_pkt, IEEE_8021Q_INFO, 0); + vlan = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, IEEE_8021Q_INFO, 0); - csum_info = rndis_get_ppi(rndis_pkt, TCPIP_CHKSUM_PKTINFO, 0); + csum_info = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, TCPIP_CHKSUM_PKTINFO, 0); - hash_info = rndis_get_ppi(rndis_pkt, NBL_HASH_VALUE, 0); + hash_info = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, NBL_HASH_VALUE, 0); - pktinfo_id = rndis_get_ppi(rndis_pkt, RNDIS_PKTINFO_ID, 1); + pktinfo_id = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, RNDIS_PKTINFO_ID, 1); data = (void *)msg + data_offset; @@ -474,6 +525,14 @@ int rndis_filter_receive(struct net_device *ndev, if (netif_msg_rx_status(net_device_ctx)) dump_rndis_message(ndev, rndis_msg); + /* Validate incoming rndis_message packet */ + if (buflen < RNDIS_HEADER_SIZE || rndis_msg->msg_len < RNDIS_HEADER_SIZE || + buflen < rndis_msg->msg_len) { + netdev_err(ndev, "Invalid rndis_msg (buflen: %u, msg_len: %u)\n", + buflen, rndis_msg->msg_len); + return NVSP_STAT_FAIL; + } + switch (rndis_msg->ndis_msg_type) { case RNDIS_MSG_PACKET: return rndis_filter_receive_data(ndev, net_dev, nvchan, From f4a26a9b311d7ff9db461278faf2869d06496ef8 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 16 Sep 2020 21:50:39 +0530 Subject: [PATCH 539/648] cxgb4: fix memory leak during module unload Fix the memory leak in mps during module unload path by freeing mps reference entries if the list adpter->mps_ref is not already empty Fixes: 28b3870578ef ("cxgb4: Re-work the logic for mps refcounting") Signed-off-by: Raju Rangoju Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c index b1a073eea60b..a020e8490681 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c @@ -229,7 +229,7 @@ void cxgb4_free_mps_ref_entries(struct adapter *adap) { struct mps_entries_ref *mps_entry, *tmp; - if (!list_empty(&adap->mps_ref)) + if (list_empty(&adap->mps_ref)) return; spin_lock(&adap->mps_ref_lock); From 83f9a9c8c1edc222846dc1bde6e3479703e8e5a3 Mon Sep 17 00:00:00 2001 From: Xie He Date: Wed, 16 Sep 2020 09:49:18 -0700 Subject: [PATCH 540/648] drivers/net/wan/lapbether: Make skb->protocol consistent with the header This driver is a virtual driver stacked on top of Ethernet interfaces. When this driver transmits data on the Ethernet device, the skb->protocol setting is inconsistent with the Ethernet header prepended to the skb. This causes a user listening on the Ethernet interface with an AF_PACKET socket, to see different sll_protocol values for incoming and outgoing frames, because incoming frames would have this value set by parsing the Ethernet header. This patch changes the skb->protocol value for outgoing Ethernet frames, making it consistent with the Ethernet header prepended. This makes a user listening on the Ethernet device with an AF_PACKET socket, to see the same sll_protocol value for incoming and outgoing frames. Cc: Martin Schiller Signed-off-by: Xie He Signed-off-by: David S. Miller --- drivers/net/wan/lapbether.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index 732a6c1851f5..b6be2454b8bd 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -198,8 +198,6 @@ static void lapbeth_data_transmit(struct net_device *ndev, struct sk_buff *skb) struct net_device *dev; int size = skb->len; - skb->protocol = htons(ETH_P_X25); - ptr = skb_push(skb, 2); *ptr++ = size % 256; @@ -210,6 +208,8 @@ static void lapbeth_data_transmit(struct net_device *ndev, struct sk_buff *skb) skb->dev = dev = lapbeth->ethdev; + skb->protocol = htons(ETH_P_DEC); + skb_reset_network_header(skb); dev_hard_header(skb, dev, ETH_P_DEC, bcast_addr, NULL, 0); From 9fb030a70431a2a2a1b292dbf0b2f399cc072c16 Mon Sep 17 00:00:00 2001 From: Xie He Date: Wed, 16 Sep 2020 14:25:07 -0700 Subject: [PATCH 541/648] drivers/net/wan/hdlc: Set skb->protocol before transmitting This patch sets skb->protocol before transmitting frames on the HDLC device, so that a user listening on the HDLC device with an AF_PACKET socket will see outgoing frames' sll_protocol field correctly set and consistent with that of incoming frames. 1. Control frames in hdlc_cisco and hdlc_ppp When these drivers send control frames, skb->protocol is not set. This value should be set to htons(ETH_P_HDLC), because when receiving control frames, their skb->protocol is set to htons(ETH_P_HDLC). When receiving, hdlc_type_trans in hdlc.h is called, which then calls cisco_type_trans or ppp_type_trans. The skb->protocol of control frames is set to htons(ETH_P_HDLC) so that the control frames can be received by hdlc_rcv in hdlc.c, which calls cisco_rx or ppp_rx to process the control frames. 2. hdlc_fr When this driver sends control frames, skb->protocol is set to internal values used in this driver. When this driver sends data frames (from upper stacked PVC devices), skb->protocol is the same as that of the user data packet being sent on the upper PVC device (for normal PVC devices), or is htons(ETH_P_802_3) (for Ethernet-emulating PVC devices). However, skb->protocol for both control frames and data frames should be set to htons(ETH_P_HDLC), because when receiving, all frames received on the HDLC device will have their skb->protocol set to htons(ETH_P_HDLC). When receiving, hdlc_type_trans in hdlc.h is called, and because this driver doesn't provide a type_trans function in struct hdlc_proto, all frames will have their skb->protocol set to htons(ETH_P_HDLC). The frames are then received by hdlc_rcv in hdlc.c, which calls fr_rx to process the frames (control frames are consumed and data frames are re-received on upper PVC devices). Cc: Krzysztof Halasa Signed-off-by: Xie He Signed-off-by: David S. Miller --- drivers/net/wan/hdlc_cisco.c | 1 + drivers/net/wan/hdlc_fr.c | 3 +++ drivers/net/wan/hdlc_ppp.c | 1 + 3 files changed, 5 insertions(+) diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c index 444130655d8e..cb5898f7d68c 100644 --- a/drivers/net/wan/hdlc_cisco.c +++ b/drivers/net/wan/hdlc_cisco.c @@ -118,6 +118,7 @@ static void cisco_keepalive_send(struct net_device *dev, u32 type, skb_put(skb, sizeof(struct cisco_packet)); skb->priority = TC_PRIO_CONTROL; skb->dev = dev; + skb->protocol = htons(ETH_P_HDLC); skb_reset_network_header(skb); dev_queue_xmit(skb); diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index 12b35404cd8e..d6cfd51613ed 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -433,6 +433,8 @@ static netdev_tx_t pvc_xmit(struct sk_buff *skb, struct net_device *dev) if (pvc->state.fecn) /* TX Congestion counter */ dev->stats.tx_compressed++; skb->dev = pvc->frad; + skb->protocol = htons(ETH_P_HDLC); + skb_reset_network_header(skb); dev_queue_xmit(skb); return NETDEV_TX_OK; } @@ -555,6 +557,7 @@ static void fr_lmi_send(struct net_device *dev, int fullrep) skb_put(skb, i); skb->priority = TC_PRIO_CONTROL; skb->dev = dev; + skb->protocol = htons(ETH_P_HDLC); skb_reset_network_header(skb); dev_queue_xmit(skb); diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c index 16f33d1ffbfb..64f855651336 100644 --- a/drivers/net/wan/hdlc_ppp.c +++ b/drivers/net/wan/hdlc_ppp.c @@ -251,6 +251,7 @@ static void ppp_tx_cp(struct net_device *dev, u16 pid, u8 code, skb->priority = TC_PRIO_CONTROL; skb->dev = dev; + skb->protocol = htons(ETH_P_HDLC); skb_reset_network_header(skb); skb_queue_tail(&tx_queue, skb); } From 19a83d36f9837e8bd27435ebb31564a717a5d15a Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 17 Sep 2020 01:04:10 +0200 Subject: [PATCH 542/648] ethtool: add and use message type for tunnel info reply Tunnel offload info code uses ETHTOOL_MSG_TUNNEL_INFO_GET message type (cmd field in genetlink header) for replies to tunnel info netlink request, i.e. the same value as the request have. This is a problem because we are using two separate enums for userspace to kernel and kernel to userspace message types so that this ETHTOOL_MSG_TUNNEL_INFO_GET (28) collides with ETHTOOL_MSG_CABLE_TEST_TDR_NTF which is what message type 28 means for kernel to userspace messages. As the tunnel info request reached mainline in 5.9 merge window, we should still be able to fix the reply message type without breaking backward compatibility. Fixes: c7d759eb7b12 ("ethtool: add tunnel info interface") Signed-off-by: Michal Kubecek Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 3 +++ include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/tunnels.c | 4 ++-- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index d53bcb31645a..b5a79881551f 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -206,6 +206,7 @@ Userspace to kernel: ``ETHTOOL_MSG_TSINFO_GET`` get timestamping info ``ETHTOOL_MSG_CABLE_TEST_ACT`` action start cable test ``ETHTOOL_MSG_CABLE_TEST_TDR_ACT`` action start raw TDR cable test + ``ETHTOOL_MSG_TUNNEL_INFO_GET`` get tunnel offload info ===================================== ================================ Kernel to userspace: @@ -239,6 +240,7 @@ Kernel to userspace: ``ETHTOOL_MSG_TSINFO_GET_REPLY`` timestamping info ``ETHTOOL_MSG_CABLE_TEST_NTF`` Cable test results ``ETHTOOL_MSG_CABLE_TEST_TDR_NTF`` Cable test TDR results + ``ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY`` tunnel offload info ===================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -1363,4 +1365,5 @@ are netlink only. ``ETHTOOL_SFECPARAM`` n/a n/a ''ETHTOOL_MSG_CABLE_TEST_ACT'' n/a ''ETHTOOL_MSG_CABLE_TEST_TDR_ACT'' + n/a ``ETHTOOL_MSG_TUNNEL_INFO_GET`` =================================== ===================================== diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 5dcd24cb33ea..72ba36be9655 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -79,6 +79,7 @@ enum { ETHTOOL_MSG_TSINFO_GET_REPLY, ETHTOOL_MSG_CABLE_TEST_NTF, ETHTOOL_MSG_CABLE_TEST_TDR_NTF, + ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, diff --git a/net/ethtool/tunnels.c b/net/ethtool/tunnels.c index 84f23289475b..d93bf2da0f34 100644 --- a/net/ethtool/tunnels.c +++ b/net/ethtool/tunnels.c @@ -200,7 +200,7 @@ int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info) reply_len = ret + ethnl_reply_header_size(); rskb = ethnl_reply_init(reply_len, req_info.dev, - ETHTOOL_MSG_TUNNEL_INFO_GET, + ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY, ETHTOOL_A_TUNNEL_INFO_HEADER, info, &reply_payload); if (!rskb) { @@ -273,7 +273,7 @@ int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb) goto cont; ehdr = ethnl_dump_put(skb, cb, - ETHTOOL_MSG_TUNNEL_INFO_GET); + ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY); if (!ehdr) { ret = -EMSGSIZE; goto out; From c2b727df7caa33876e7066bde090f40001b6d643 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 16 Sep 2020 20:43:09 -0700 Subject: [PATCH 543/648] net: phy: Avoid NPD upon phy_detach() when driver is unbound If we have unbound the PHY driver prior to calling phy_detach() (often via phy_disconnect()) then we can cause a NULL pointer de-reference accessing the driver owner member. The steps to reproduce are: echo unimac-mdio-0:01 > /sys/class/net/eth0/phydev/driver/unbind ip link set eth0 down Fixes: cafe8df8b9bc ("net: phy: Fix lack of reference count on PHY driver") Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index b93b40cda54a..5dab6be6fc38 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1682,7 +1682,8 @@ void phy_detach(struct phy_device *phydev) phy_led_triggers_unregister(phydev); - module_put(phydev->mdio.dev.driver->owner); + if (phydev->mdio.dev.driver) + module_put(phydev->mdio.dev.driver->owner); /* If the device had no specific driver before (i.e. - it * was using the generic driver), we unbind the device From 5116a8ade333b6c2e180782139c9c516a437b21c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 16 Sep 2020 20:43:10 -0700 Subject: [PATCH 544/648] net: phy: Do not warn in phy_stop() on PHY_DOWN When phy_is_started() was added to catch incorrect PHY states, phy_stop() would not be qualified against PHY_DOWN. It is possible to reach that state when the PHY driver has been unbound and the network device is then brought down. Fixes: 2b3e88ea6528 ("net: phy: improve phy state checking") Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 735a806045ac..8947d58f2a25 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -996,7 +996,7 @@ void phy_stop(struct phy_device *phydev) { struct net_device *dev = phydev->attached_dev; - if (!phy_is_started(phydev)) { + if (!phy_is_started(phydev) && phydev->state != PHY_DOWN) { WARN(1, "called from state %s\n", phy_state_to_str(phydev->state)); return; From ce000c61b0bf96506ad4a537f3218a716b6fadcd Mon Sep 17 00:00:00 2001 From: Wei Li Date: Thu, 17 Sep 2020 20:29:50 +0800 Subject: [PATCH 545/648] hinic: fix potential resource leak In rx_request_irq(), it will just return what irq_set_affinity_hint() returns. If it is failed, the napi and irq requested are not freed properly. So add exits for failures to handle these. Signed-off-by: Wei Li Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_rx.c | 21 +++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c index 5bee951fe9d4..d0072f5e7efc 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c @@ -543,18 +543,25 @@ static int rx_request_irq(struct hinic_rxq *rxq) if (err) { netif_err(nic_dev, drv, rxq->netdev, "Failed to set RX interrupt coalescing attribute\n"); - rx_del_napi(rxq); - return err; + goto err_req_irq; } err = request_irq(rq->irq, rx_irq, 0, rxq->irq_name, rxq); - if (err) { - rx_del_napi(rxq); - return err; - } + if (err) + goto err_req_irq; cpumask_set_cpu(qp->q_id % num_online_cpus(), &rq->affinity_mask); - return irq_set_affinity_hint(rq->irq, &rq->affinity_mask); + err = irq_set_affinity_hint(rq->irq, &rq->affinity_mask); + if (err) + goto err_irq_affinity; + + return 0; + +err_irq_affinity: + free_irq(rq->irq, rxq); +err_req_irq: + rx_del_napi(rxq); + return err; } static void rx_free_irq(struct hinic_rxq *rxq) From 5f6857e808a8bd078296575b417c4b9d160b9779 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 17 Sep 2020 10:52:57 -0700 Subject: [PATCH 546/648] nfp: use correct define to return NONE fec struct ethtool_fecparam carries bitmasks not bit numbers. We want to return 1 (NONE), not 0. Fixes: 0d0870938337 ("nfp: implement ethtool FEC mode settings") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Jesse Brandeburg Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 6eb9fb9a1814..9c9ae33d84ce 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -829,8 +829,8 @@ nfp_port_get_fecparam(struct net_device *netdev, struct nfp_eth_table_port *eth_port; struct nfp_port *port; - param->active_fec = ETHTOOL_FEC_NONE_BIT; - param->fec = ETHTOOL_FEC_NONE_BIT; + param->active_fec = ETHTOOL_FEC_NONE; + param->fec = ETHTOOL_FEC_NONE; port = nfp_port_from_netdev(netdev); eth_port = nfp_port_get_eth_port(port); From 6f9885a36c006d798319661fa849f9c2922223b9 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 14 Sep 2020 12:04:22 -0500 Subject: [PATCH 547/648] x86/unwind/fp: Fix FP unwinding in ret_from_fork There have been some reports of "bad bp value" warnings printed by the frame pointer unwinder: WARNING: kernel stack regs at 000000005bac7112 in sh:1014 has bad 'bp' value 0000000000000000 This warning happens when unwinding from an interrupt in ret_from_fork(). If entry code gets interrupted, the state of the frame pointer (rbp) may be undefined, which can confuse the unwinder, resulting in warnings like the above. There's an in_entry_code() check which normally silences such warnings for entry code. But in this case, ret_from_fork() is getting interrupted. It recently got moved out of .entry.text, so the in_entry_code() check no longer works. It could be moved back into .entry.text, but that would break the noinstr validation because of the call to schedule_tail(). Instead, initialize each new task's RBP to point to the task's entry regs via an encoded frame pointer. That will allow the unwinder to reach the end of the stack gracefully. Fixes: b9f6976bfb94 ("x86/entry/64: Move non entry code into .text section") Reported-by: Naresh Kamboju Reported-by: Logan Gunthorpe Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Link: https://lkml.kernel.org/r/f366bbf5a8d02e2318ee312f738112d0af74d16f.1600103007.git.jpoimboe@redhat.com --- arch/x86/include/asm/frame.h | 19 +++++++++++++++++++ arch/x86/kernel/process.c | 3 ++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h index 296b346184b2..fb42659f6e98 100644 --- a/arch/x86/include/asm/frame.h +++ b/arch/x86/include/asm/frame.h @@ -60,12 +60,26 @@ #define FRAME_END "pop %" _ASM_BP "\n" #ifdef CONFIG_X86_64 + #define ENCODE_FRAME_POINTER \ "lea 1(%rsp), %rbp\n\t" + +static inline unsigned long encode_frame_pointer(struct pt_regs *regs) +{ + return (unsigned long)regs + 1; +} + #else /* !CONFIG_X86_64 */ + #define ENCODE_FRAME_POINTER \ "movl %esp, %ebp\n\t" \ "andl $0x7fffffff, %ebp\n\t" + +static inline unsigned long encode_frame_pointer(struct pt_regs *regs) +{ + return (unsigned long)regs & 0x7fffffff; +} + #endif /* CONFIG_X86_64 */ #endif /* __ASSEMBLY__ */ @@ -83,6 +97,11 @@ #define ENCODE_FRAME_POINTER +static inline unsigned long encode_frame_pointer(struct pt_regs *regs) +{ + return 0; +} + #endif #define FRAME_BEGIN diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 13ce616cc7af..ba4593a913fa 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -42,6 +42,7 @@ #include #include #include +#include #include "process.h" @@ -133,7 +134,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, fork_frame = container_of(childregs, struct fork_frame, regs); frame = &fork_frame->frame; - frame->bp = 0; + frame->bp = encode_frame_pointer(childregs); frame->ret_addr = (unsigned long) ret_from_fork; p->thread.sp = (unsigned long) fork_frame; p->thread.io_bitmap = NULL; From 14c4acc5ed22c21f9821103be7c48efdf9763584 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Thu, 10 Sep 2020 18:16:21 +0100 Subject: [PATCH 548/648] iommu/amd: Fix potential @entry null deref After commit 26e495f34107 ("iommu/amd: Restore IRTE.RemapEn bit after programming IRTE"), smatch warns: drivers/iommu/amd/iommu.c:3870 amd_iommu_deactivate_guest_mode() warn: variable dereferenced before check 'entry' (see line 3867) Fix this by moving the @valid assignment to after @entry has been checked for NULL. Fixes: 26e495f34107 ("iommu/amd: Restore IRTE.RemapEn bit after programming IRTE") Reported-by: Dan Carpenter Signed-off-by: Joao Martins Reviewed-by: Suravee Suthikulpanit Cc: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20200910171621.12879-1-joao.m.martins@oracle.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 07ae8b93887e..8abe1c7ad45b 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -3864,12 +3864,14 @@ int amd_iommu_deactivate_guest_mode(void *data) struct amd_ir_data *ir_data = (struct amd_ir_data *)data; struct irte_ga *entry = (struct irte_ga *) ir_data->entry; struct irq_cfg *cfg = ir_data->cfg; - u64 valid = entry->lo.fields_remap.valid; + u64 valid; if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || !entry || !entry->lo.fields_vapic.guest_mode) return 0; + valid = entry->lo.fields_remap.valid; + entry->lo.val = 0; entry->hi.val = 0; From e97685abd5d711c885053d4949178f7ab9acbaef Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 16 Sep 2020 11:17:20 +0000 Subject: [PATCH 549/648] iommu/amd: Restore IRTE.RemapEn bit for amd_iommu_activate_guest_mode Commit e52d58d54a32 ("iommu/amd: Use cmpxchg_double() when updating 128-bit IRTE") removed an assumption that modify_irte_ga always set the valid bit, which requires the callers to set the appropriate value for the struct irte_ga.valid bit before calling the function. Similar to the commit 26e495f34107 ("iommu/amd: Restore IRTE.RemapEn bit after programming IRTE"), which is for the function amd_iommu_deactivate_guest_mode(). The same change is also needed for the amd_iommu_activate_guest_mode(). Otherwise, this could trigger IO_PAGE_FAULT for the VFIO based VMs with AVIC enabled. Fixes: e52d58d54a321 ("iommu/amd: Use cmpxchg_double() when updating 128-bit IRTE") Reported-by: Maxim Levitsky Signed-off-by: Suravee Suthikulpanit Tested-by: Maxim Levitsky Reviewed-by: Joao Martins Reviewed-by: Maxim Levitsky Cc: Joao Martins Link: https://lore.kernel.org/r/20200916111720.43913-1-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 8abe1c7ad45b..10e4200d3552 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -3840,14 +3840,18 @@ int amd_iommu_activate_guest_mode(void *data) { struct amd_ir_data *ir_data = (struct amd_ir_data *)data; struct irte_ga *entry = (struct irte_ga *) ir_data->entry; + u64 valid; if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || !entry || entry->lo.fields_vapic.guest_mode) return 0; + valid = entry->lo.fields_vapic.valid; + entry->lo.val = 0; entry->hi.val = 0; + entry->lo.fields_vapic.valid = valid; entry->lo.fields_vapic.guest_mode = 1; entry->lo.fields_vapic.ga_log_intr = 1; entry->hi.fields.ga_root_ptr = ir_data->ga_root_ptr; From 1f38b8c564b8c24132428f6a4d04e05366fa10f4 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 15 Sep 2020 10:59:44 +0200 Subject: [PATCH 550/648] mac80211: extend AQL aggregation estimation to HE and fix unit mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The unit of the return value of ieee80211_get_rate_duration is nanoseconds, not microseconds. Adjust the duration checks to account for that. For higher data rates, allow larger estimated aggregation sizes, and add some values for HE as well, which can use much larger aggregates. Since small packets with high data rates can now lead to duration values too small for info->tx_time_est, return a minimum of 4us. Fixes: f01cfbaf9b29 ("mac80211: improve AQL aggregation estimation for low data rates") Signed-off-by: Felix Fietkau Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20200915085945.3782-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/airtime.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c index 314973033d03..45140e535151 100644 --- a/net/mac80211/airtime.c +++ b/net/mac80211/airtime.c @@ -668,20 +668,26 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw, * This will not be very accurate, but much better than simply * assuming un-aggregated tx in all cases. */ - if (duration > 400) /* <= VHT20 MCS2 1S */ + if (duration > 400 * 1024) /* <= VHT20 MCS2 1S */ agg_shift = 1; - else if (duration > 250) /* <= VHT20 MCS3 1S or MCS1 2S */ + else if (duration > 250 * 1024) /* <= VHT20 MCS3 1S or MCS1 2S */ agg_shift = 2; - else if (duration > 150) /* <= VHT20 MCS5 1S or MCS3 2S */ + else if (duration > 150 * 1024) /* <= VHT20 MCS5 1S or MCS2 2S */ agg_shift = 3; - else + else if (duration > 70 * 1024) /* <= VHT20 MCS5 2S */ agg_shift = 4; + else if (stat.encoding != RX_ENC_HE || + duration > 20 * 1024) /* <= HE40 MCS6 2S */ + agg_shift = 5; + else + agg_shift = 6; duration *= len; duration /= AVG_PKT_SIZE; duration /= 1024; + duration += (overhead >> agg_shift); - return duration + (overhead >> agg_shift); + return max_t(u32, duration, 4); } if (!conf) From 8e280369b9076dc08ce13c802c1687b81b4cccd4 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 15 Sep 2020 10:59:45 +0200 Subject: [PATCH 551/648] mac80211: add AQL support for VHT160 tx rates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When converting from struct ieee80211_tx_rate to ieee80211_rx_status, there was one check missing to fill in the bandwidth for 160 MHz Signed-off-by: Felix Fietkau Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20200915085945.3782-2-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/airtime.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c index 45140e535151..26d2f8ba7029 100644 --- a/net/mac80211/airtime.c +++ b/net/mac80211/airtime.c @@ -560,7 +560,9 @@ static int ieee80211_fill_rx_status(struct ieee80211_rx_status *stat, if (rate->idx < 0 || !rate->count) return -1; - if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH) + if (rate->flags & IEEE80211_TX_RC_160_MHZ_WIDTH) + stat->bw = RATE_INFO_BW_160; + else if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH) stat->bw = RATE_INFO_BW_80; else if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH) stat->bw = RATE_INFO_BW_40; From b959ba9f468b1c581f40e92661ad58b093abaa03 Mon Sep 17 00:00:00 2001 From: Necip Fazil Yildiran Date: Wed, 9 Sep 2020 12:54:53 +0300 Subject: [PATCH 552/648] lib80211: fix unmet direct dependendices config warning when !CRYPTO When LIB80211_CRYPT_CCMP is enabled and CRYPTO is disabled, it results in unmet direct dependencies config warning. The reason is that LIB80211_CRYPT_CCMP selects CRYPTO_AES and CRYPTO_CCM, which are subordinate to CRYPTO. This is reproducible with CRYPTO disabled and R8188EU enabled, where R8188EU selects LIB80211_CRYPT_CCMP but does not select or depend on CRYPTO. Honor the kconfig menu hierarchy to remove kconfig dependency warnings. Fixes: a11e2f85481c ("lib80211: use crypto API ccm(aes) transform for CCMP processing") Signed-off-by: Necip Fazil Yildiran Link: https://lore.kernel.org/r/20200909095452.3080-1-fazilyildiran@gmail.com Signed-off-by: Johannes Berg --- net/wireless/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index faf74850a1b5..27026f587fa6 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -217,6 +217,7 @@ config LIB80211_CRYPT_WEP config LIB80211_CRYPT_CCMP tristate + select CRYPTO select CRYPTO_AES select CRYPTO_CCM From 412a84b5714af56f3eb648bba155107b5edddfdf Mon Sep 17 00:00:00 2001 From: Aloka Dixit Date: Fri, 11 Sep 2020 01:11:35 +0000 Subject: [PATCH 553/648] mac80211: Fix radiotap header channel flag for 6GHz band Radiotap header field 'Channel flags' has '2 GHz spectrum' set to 'true' for 6GHz packet. Change it to 5GHz as there isn't a separate option available for 6GHz. Signed-off-by: Aloka Dixit Link: https://lore.kernel.org/r/010101747ab7b703-1d7c9851-1594-43bf-81f7-f79ce7a67cc6-000000@us-west-2.amazonses.com Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 836cde516a18..a959ebf56852 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -451,7 +451,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, else if (status->bw == RATE_INFO_BW_5) channel_flags |= IEEE80211_CHAN_QUARTER; - if (status->band == NL80211_BAND_5GHZ) + if (status->band == NL80211_BAND_5GHZ || + status->band == NL80211_BAND_6GHZ) channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ; else if (status->encoding != RX_ENC_LEGACY) channel_flags |= IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ; From 780a8c9efc65f6d86acd44794495cedcd32eeb26 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Fri, 11 Sep 2020 10:29:02 +0000 Subject: [PATCH 554/648] mac80211: do not disable HE if HT is missing on 2.4 GHz VHT is not supported on 2.4 GHz, but HE is; don't disable HE if HT is missing there, do that only on 5 GHz (6 GHz is only HE). Fixes: 57fa5e85d53ce51 ("mac80211: determine chandef from HE 6 GHz operation") Signed-off-by: Wen Gong Link: https://lore.kernel.org/r/010101747cb617f2-593c5410-1648-4a42-97a0-f3646a5a6dd1-000000@us-west-2.amazonses.com [rewrite the commit message] Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ac870309b911..2e400b0ff696 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4861,6 +4861,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband; struct cfg80211_chan_def chandef; bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ; + bool is_5ghz = cbss->channel->band == NL80211_BAND_5GHZ; struct ieee80211_bss *bss = (void *)cbss->priv; int ret; u32 i; @@ -4879,7 +4880,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= IEEE80211_STA_DISABLE_HE; } - if (!sband->vht_cap.vht_supported && !is_6ghz) { + if (!sband->vht_cap.vht_supported && is_5ghz) { ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; ifmgd->flags |= IEEE80211_STA_DISABLE_HE; } From c0de8776af6543e10d1a5c8969679fd9f6b66fa9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 17 Sep 2020 11:52:23 +0200 Subject: [PATCH 555/648] cfg80211: fix 6 GHz channel conversion We shouldn't accept any channels bigger than 233, fix that. Reported-by: Amar Fixes: d1a1646c0de7 ("cfg80211: adapt to new channelization of the 6GHz band") Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20200917115222.312ba6f1d461.I3a8c8fbcc3cc019814fd9cd0aced7eb591626136@changeid Signed-off-by: Johannes Berg --- net/wireless/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index 4a9ff9ef513f..6fa99df52f86 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -95,7 +95,7 @@ u32 ieee80211_channel_to_freq_khz(int chan, enum nl80211_band band) /* see 802.11ax D6.1 27.3.23.2 */ if (chan == 2) return MHZ_TO_KHZ(5935); - if (chan <= 253) + if (chan <= 233) return MHZ_TO_KHZ(5950 + chan * 5); break; case NL80211_BAND_60GHZ: From 3bd5c7a28a7c3aba07a2d300d43f8e988809e147 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 17 Sep 2020 14:50:31 +0200 Subject: [PATCH 556/648] mac80211: do not allow bigger VHT MPDUs than the hardware supports Limit maximum VHT MPDU size by local capability. Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20200917125031.45009-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/vht.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 9c6045f9c24d..d1b64d0751f2 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -168,10 +168,7 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, /* take some capabilities as-is */ cap_info = le32_to_cpu(vht_cap_ie->vht_cap_info); vht_cap->cap = cap_info; - vht_cap->cap &= IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 | - IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 | - IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 | - IEEE80211_VHT_CAP_RXLDPC | + vht_cap->cap &= IEEE80211_VHT_CAP_RXLDPC | IEEE80211_VHT_CAP_VHT_TXOP_PS | IEEE80211_VHT_CAP_HTC_VHT | IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK | @@ -180,6 +177,9 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN | IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN; + vht_cap->cap |= min_t(u32, cap_info & IEEE80211_VHT_CAP_MAX_MPDU_MASK, + own_cap.cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK); + /* and some based on our own capabilities */ switch (own_cap.cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: From 75bcbd6913de649601f4e7d3fb6d2b5effc24e9e Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 18 Sep 2020 13:53:04 +0200 Subject: [PATCH 557/648] mac80211: fix 80 MHz association to 160/80+80 AP on 6 GHz When trying to associate to an AP support 180 or 80+80 MHz on 6 GHz with a STA that only has 80 Mhz support the cf2 field inside the chandef will get set causing the association to fail when trying to validate the chandef. Fix this by checking the support flags prior to setting cf2. Fixes: 57fa5e85d53ce ("mac80211: determine chandef from HE 6 GHz operation") Signed-off-by: John Crispin Link: https://lore.kernel.org/r/20200918115304.1135693-1-john@phrozen.org [reword commit message a bit] Signed-off-by: Johannes Berg --- net/mac80211/util.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index c8504ffc71a1..8d3bfc0fe176 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3353,9 +3353,10 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, he_chandef.center_freq1 = ieee80211_channel_to_frequency(he_6ghz_oper->ccfs0, NL80211_BAND_6GHZ); - he_chandef.center_freq2 = - ieee80211_channel_to_frequency(he_6ghz_oper->ccfs1, - NL80211_BAND_6GHZ); + if (support_80_80 || support_160) + he_chandef.center_freq2 = + ieee80211_channel_to_frequency(he_6ghz_oper->ccfs1, + NL80211_BAND_6GHZ); } if (!cfg80211_chandef_valid(&he_chandef)) { From a608b6a646e8816bc0db156baad2e0679fa4d137 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 9 Sep 2020 07:16:37 +0900 Subject: [PATCH 558/648] kconfig: qconf: use delete[] instead of delete to free array (again) Commit c9b09a9249e6 ("kconfig: qconf: use delete[] instead of delete to free array") fixed two lines, but there is one more. (cppcheck does not report it for some reason...) This was detected by Clang. "make HOSTCXX=clang++ xconfig" reports the following: scripts/kconfig/qconf.cc:1279:2: warning: 'delete' applied to a pointer that was allocated with 'new[]'; did you mean 'delete[]'? [-Wmismatched-new-delete] delete data; ^ [] scripts/kconfig/qconf.cc:1239:15: note: allocated with 'new[]' here char *data = new char[count + 1]; ^ Fixes: c4f7398bee9c ("kconfig: qconf: make debug links work again") Fixes: c9b09a9249e6 ("kconfig: qconf: use delete[] instead of delete to free array") Signed-off-by: Masahiro Yamada Reviewed-by: Nick Desaulniers --- scripts/kconfig/qconf.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index 8638785328a7..c7216b9110fc 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -1276,7 +1276,7 @@ void ConfigInfoView::clicked(const QUrl &url) } free(result); - delete data; + delete[] data; } void ConfigInfoView::contextMenuEvent(QContextMenuEvent *event) From 3031313eb3d549b7ad6f9fbcc52ba04412e3eb9e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 1 Sep 2020 00:12:07 +0900 Subject: [PATCH 559/648] kprobes: Fix to check probe enabled before disarm_kprobe_ftrace() Commit 0cb2f1372baa ("kprobes: Fix NULL pointer dereference at kprobe_ftrace_handler") fixed one bug but not completely fixed yet. If we run a kprobe_module.tc of ftracetest, kernel showed a warning as below. # ./ftracetest test.d/kprobe/kprobe_module.tc === Ftrace unit tests === [1] Kprobe dynamic event - probing module ... [ 22.400215] ------------[ cut here ]------------ [ 22.400962] Failed to disarm kprobe-ftrace at trace_printk_irq_work+0x0/0x7e [trace_printk] (-2) [ 22.402139] WARNING: CPU: 7 PID: 200 at kernel/kprobes.c:1091 __disarm_kprobe_ftrace.isra.0+0x7e/0xa0 [ 22.403358] Modules linked in: trace_printk(-) [ 22.404028] CPU: 7 PID: 200 Comm: rmmod Not tainted 5.9.0-rc2+ #66 [ 22.404870] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1 04/01/2014 [ 22.406139] RIP: 0010:__disarm_kprobe_ftrace.isra.0+0x7e/0xa0 [ 22.406947] Code: 30 8b 03 eb c9 80 3d e5 09 1f 01 00 75 dc 49 8b 34 24 89 c2 48 c7 c7 a0 c2 05 82 89 45 e4 c6 05 cc 09 1f 01 01 e8 a9 c7 f0 ff <0f> 0b 8b 45 e4 eb b9 89 c6 48 c7 c7 70 c2 05 82 89 45 e4 e8 91 c7 [ 22.409544] RSP: 0018:ffffc90000237df0 EFLAGS: 00010286 [ 22.410385] RAX: 0000000000000000 RBX: ffffffff83066024 RCX: 0000000000000000 [ 22.411434] RDX: 0000000000000001 RSI: ffffffff810de8d3 RDI: ffffffff810de8d3 [ 22.412687] RBP: ffffc90000237e10 R08: 0000000000000001 R09: 0000000000000001 [ 22.413762] R10: 0000000000000000 R11: 0000000000000001 R12: ffff88807c478640 [ 22.414852] R13: ffffffff8235ebc0 R14: ffffffffa00060c0 R15: 0000000000000000 [ 22.415941] FS: 00000000019d48c0(0000) GS:ffff88807d7c0000(0000) knlGS:0000000000000000 [ 22.417264] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 22.418176] CR2: 00000000005bb7e3 CR3: 0000000078f7a000 CR4: 00000000000006a0 [ 22.419309] Call Trace: [ 22.419990] kill_kprobe+0x94/0x160 [ 22.420652] kprobes_module_callback+0x64/0x230 [ 22.421470] notifier_call_chain+0x4f/0x70 [ 22.422184] blocking_notifier_call_chain+0x49/0x70 [ 22.422979] __x64_sys_delete_module+0x1ac/0x240 [ 22.423733] do_syscall_64+0x38/0x50 [ 22.424366] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 22.425176] RIP: 0033:0x4bb81d [ 22.425741] Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 e0 ff ff ff f7 d8 64 89 01 48 [ 22.428726] RSP: 002b:00007ffc70fef008 EFLAGS: 00000246 ORIG_RAX: 00000000000000b0 [ 22.430169] RAX: ffffffffffffffda RBX: 00000000019d48a0 RCX: 00000000004bb81d [ 22.431375] RDX: 0000000000000000 RSI: 0000000000000880 RDI: 00007ffc70fef028 [ 22.432543] RBP: 0000000000000880 R08: 00000000ffffffff R09: 00007ffc70fef320 [ 22.433692] R10: 0000000000656300 R11: 0000000000000246 R12: 00007ffc70fef028 [ 22.434635] R13: 0000000000000000 R14: 0000000000000002 R15: 0000000000000000 [ 22.435682] irq event stamp: 1169 [ 22.436240] hardirqs last enabled at (1179): [] console_unlock+0x422/0x580 [ 22.437466] hardirqs last disabled at (1188): [] console_unlock+0x7b/0x580 [ 22.438608] softirqs last enabled at (866): [] __do_softirq+0x38e/0x490 [ 22.439637] softirqs last disabled at (859): [] asm_call_on_stack+0x12/0x20 [ 22.440690] ---[ end trace 1e7ce7e1e4567276 ]--- [ 22.472832] trace_kprobe: This probe might be able to register after target module is loaded. Continue. This is because the kill_kprobe() calls disarm_kprobe_ftrace() even if the given probe is not enabled. In that case, ftrace_set_filter_ip() fails because the given probe point is not registered to ftrace. Fix to check the given (going) probe is enabled before invoking disarm_kprobe_ftrace(). Link: https://lkml.kernel.org/r/159888672694.1411785.5987998076694782591.stgit@devnote2 Fixes: 0cb2f1372baa ("kprobes: Fix NULL pointer dereference at kprobe_ftrace_handler") Cc: Ingo Molnar Cc: "Naveen N . Rao" Cc: Anil S Keshavamurthy Cc: David Miller Cc: Muchun Song Cc: Chengming Zhou Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/kprobes.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 287b263c9cb9..d43b48ecdb4f 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2159,9 +2159,10 @@ static void kill_kprobe(struct kprobe *p) /* * The module is going away. We should disarm the kprobe which - * is using ftrace. + * is using ftrace, because ftrace framework is still available at + * MODULE_STATE_GOING notification. */ - if (kprobe_ftrace(p)) + if (kprobe_ftrace(p) && !kprobe_disabled(p) && !kprobes_all_disarmed) disarm_kprobe_ftrace(p); } From d5e47505e02666d5b3fe99212e0100791847dc8d Mon Sep 17 00:00:00 2001 From: Miroslav Benes Date: Mon, 31 Aug 2020 14:26:31 +0200 Subject: [PATCH 560/648] ftrace: Free the trampoline when ftrace_startup() fails Commit fc0ea795f53c ("ftrace: Add symbols for ftrace trampolines") missed to remove ops from new ftrace_ops_trampoline_list in ftrace_startup() if ftrace_hash_ipmodify_enable() fails there. It may lead to BUG if such ops come from a module which may be removed. Moreover, the trampoline itself is not freed in this case. Fix it by calling ftrace_trampoline_free() during the rollback. Link: https://lkml.kernel.org/r/20200831122631.28057-1-mbenes@suse.cz Fixes: fc0ea795f53c ("ftrace: Add symbols for ftrace trampolines") Fixes: f8b8be8a310a ("ftrace, kprobes: Support IPMODIFY flag to find IP modify conflict") Signed-off-by: Miroslav Benes Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 275441254bb5..656d7cb5a78c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2862,6 +2862,8 @@ int ftrace_startup(struct ftrace_ops *ops, int command) __unregister_ftrace_function(ops); ftrace_start_up--; ops->flags &= ~FTRACE_OPS_FL_ENABLED; + if (ops->flags & FTRACE_OPS_FL_DYNAMIC) + ftrace_trampoline_free(ops); return ret; } From 478ece9573f04aed834e05e0bbf034320d240f9f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 1 Sep 2020 12:16:17 +0300 Subject: [PATCH 561/648] ftrace: Fix missing synchronize_rcu() removing trampoline from kallsyms Add synchronize_rcu() after list_del_rcu() in ftrace_remove_trampoline_from_kallsyms() to protect readers of ftrace_ops_trampoline_list (in ftrace_get_trampoline_kallsym) which is used when kallsyms is read. Link: https://lkml.kernel.org/r/20200901091617.31837-1-adrian.hunter@intel.com Fixes: fc0ea795f53c8d ("ftrace: Add symbols for ftrace trampolines") Signed-off-by: Adrian Hunter Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 656d7cb5a78c..e9f893388245 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2782,6 +2782,7 @@ static void ftrace_remove_trampoline_from_kallsyms(struct ftrace_ops *ops) { lockdep_assert_held(&ftrace_lock); list_del_rcu(&ops->list); + synchronize_rcu(); } /* From 795d6379a47bcbb88bd95a69920e4acc52849f88 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 4 Sep 2020 10:23:31 +0200 Subject: [PATCH 562/648] tracing: Make the space reserved for the pid wider For 64bit CONFIG_BASE_SMALL=0 systems PID_MAX_LIMIT is set by default to 4194304. During boot the kernel sets a new value based on number of CPUs but no lower than 32768. It is 1024 per CPU so with 128 CPUs the default becomes 131072 which needs six digits. This value can be increased during run time but must not exceed the initial upper limit. Systemd sometime after v241 sets it to the upper limit during boot. The result is that when the pid exceeds five digits, the trace output is a little hard to read because it is no longer properly padded (same like on big iron with 98+ CPUs). Increase the pid padding to seven digits. Link: https://lkml.kernel.org/r/20200904082331.dcdkrr3bkn3e4qlg@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 38 ++++++++++++++++++------------------- kernel/trace/trace_output.c | 12 ++++++------ 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f40d850ebabc..2a7c26345e83 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3782,14 +3782,14 @@ unsigned long trace_total_entries(struct trace_array *tr) static void print_lat_help_header(struct seq_file *m) { - seq_puts(m, "# _------=> CPU# \n" - "# / _-----=> irqs-off \n" - "# | / _----=> need-resched \n" - "# || / _---=> hardirq/softirq \n" - "# ||| / _--=> preempt-depth \n" - "# |||| / delay \n" - "# cmd pid ||||| time | caller \n" - "# \\ / ||||| \\ | / \n"); + seq_puts(m, "# _------=> CPU# \n" + "# / _-----=> irqs-off \n" + "# | / _----=> need-resched \n" + "# || / _---=> hardirq/softirq \n" + "# ||| / _--=> preempt-depth \n" + "# |||| / delay \n" + "# cmd pid ||||| time | caller \n" + "# \\ / ||||| \\ | / \n"); } static void print_event_info(struct array_buffer *buf, struct seq_file *m) @@ -3810,26 +3810,26 @@ static void print_func_help_header(struct array_buffer *buf, struct seq_file *m, print_event_info(buf, m); - seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? "TGID " : ""); - seq_printf(m, "# | | %s | | |\n", tgid ? " | " : ""); + seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : ""); + seq_printf(m, "# | | %s | | |\n", tgid ? " | " : ""); } static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m, unsigned int flags) { bool tgid = flags & TRACE_ITER_RECORD_TGID; - const char *space = " "; - int prec = tgid ? 10 : 2; + const char *space = " "; + int prec = tgid ? 12 : 2; print_event_info(buf, m); - seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space); - seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); - seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); - seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); - seq_printf(m, "# %.*s||| / delay\n", prec, space); - seq_printf(m, "# TASK-PID %.*sCPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID "); - seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | "); + seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space); + seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); + seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); + seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); + seq_printf(m, "# %.*s||| / delay\n", prec, space); + seq_printf(m, "# TASK-PID %.*s CPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID "); + seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | "); } void diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 4d1893564912..000e9dc224c6 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -497,7 +497,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) trace_find_cmdline(entry->pid, comm); - trace_seq_printf(s, "%8.8s-%-5d %3d", + trace_seq_printf(s, "%8.8s-%-7d %3d", comm, entry->pid, cpu); return trace_print_lat_fmt(s, entry); @@ -588,15 +588,15 @@ int trace_print_context(struct trace_iterator *iter) trace_find_cmdline(entry->pid, comm); - trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); + trace_seq_printf(s, "%16s-%-7d ", comm, entry->pid); if (tr->trace_flags & TRACE_ITER_RECORD_TGID) { unsigned int tgid = trace_find_tgid(entry->pid); if (!tgid) - trace_seq_printf(s, "(-----) "); + trace_seq_printf(s, "(-------) "); else - trace_seq_printf(s, "(%5d) ", tgid); + trace_seq_printf(s, "(%7d) ", tgid); } trace_seq_printf(s, "[%03d] ", iter->cpu); @@ -636,7 +636,7 @@ int trace_print_lat_context(struct trace_iterator *iter) trace_find_cmdline(entry->pid, comm); trace_seq_printf( - s, "%16s %5d %3d %d %08x %08lx ", + s, "%16s %7d %3d %d %08x %08lx ", comm, entry->pid, iter->cpu, entry->flags, entry->preempt_count, iter->idx); } else { @@ -917,7 +917,7 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter, S = task_index_to_char(field->prev_state); trace_find_cmdline(field->next_pid, comm); trace_seq_printf(&iter->seq, - " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n", + " %7d:%3d:%c %s [%03d] %7d:%3d:%c %s\n", field->prev_pid, field->prev_prio, S, delim, From c4ad98e4b72cb5be30ea282fce935248f2300e62 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 15 Sep 2020 11:42:17 +0100 Subject: [PATCH 563/648] KVM: arm64: Assume write fault on S1PTW permission fault on instruction fetch KVM currently assumes that an instruction abort can never be a write. This is in general true, except when the abort is triggered by a S1PTW on instruction fetch that tries to update the S1 page tables (to set AF, for example). This can happen if the page tables have been paged out and brought back in without seeing a direct write to them (they are thus marked read only), and the fault handling code will make the PT executable(!) instead of writable. The guest gets stuck forever. In these conditions, the permission fault must be considered as a write so that the Stage-1 update can take place. This is essentially the I-side equivalent of the problem fixed by 60e21a0ef54c ("arm64: KVM: Take S1 walks into account when determining S2 write faults"). Update kvm_is_write_fault() to return true on IABT+S1PTW, and introduce kvm_vcpu_trap_is_exec_fault() that only return true when no faulting on a S1 fault. Additionally, kvm_vcpu_dabt_iss1tw() is renamed to kvm_vcpu_abt_iss1tw(), as the above makes it plain that it isn't specific to data abort. Signed-off-by: Marc Zyngier Reviewed-by: Will Deacon Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200915104218.1284701-2-maz@kernel.org --- arch/arm64/include/asm/kvm_emulate.h | 12 ++++++++++-- arch/arm64/kvm/hyp/include/hyp/switch.h | 2 +- arch/arm64/kvm/mmu.c | 4 ++-- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 49a55be2b9a2..4f618af660ba 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -298,7 +298,7 @@ static __always_inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) return (kvm_vcpu_get_esr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; } -static __always_inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) +static __always_inline bool kvm_vcpu_abt_iss1tw(const struct kvm_vcpu *vcpu) { return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_S1PTW); } @@ -306,7 +306,7 @@ static __always_inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) static __always_inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu) { return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_WNR) || - kvm_vcpu_dabt_iss1tw(vcpu); /* AF/DBM update */ + kvm_vcpu_abt_iss1tw(vcpu); /* AF/DBM update */ } static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu) @@ -335,6 +335,11 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) return kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_IABT_LOW; } +static inline bool kvm_vcpu_trap_is_exec_fault(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_trap_is_iabt(vcpu) && !kvm_vcpu_abt_iss1tw(vcpu); +} + static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) { return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC; @@ -372,6 +377,9 @@ static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) { + if (kvm_vcpu_abt_iss1tw(vcpu)) + return true; + if (kvm_vcpu_trap_is_iabt(vcpu)) return false; diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 426ef65601dd..d64c5d56c860 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -445,7 +445,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && kvm_vcpu_dabt_isvalid(vcpu) && !kvm_vcpu_abt_issea(vcpu) && - !kvm_vcpu_dabt_iss1tw(vcpu); + !kvm_vcpu_abt_iss1tw(vcpu); if (valid) { int ret = __vgic_v2_perform_cpuif_access(vcpu); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index f58d657a898d..9aec1ce491d2 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1843,7 +1843,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_s2_mmu *mmu = vcpu->arch.hw_mmu; write_fault = kvm_is_write_fault(vcpu); - exec_fault = kvm_vcpu_trap_is_iabt(vcpu); + exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); VM_BUG_ON(write_fault && exec_fault); if (fault_status == FSC_PERM && !write_fault && !exec_fault) { @@ -2125,7 +2125,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) goto out; } - if (kvm_vcpu_dabt_iss1tw(vcpu)) { + if (kvm_vcpu_abt_iss1tw(vcpu)) { kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); ret = 1; goto out_unlock; From 620cf45f7a516bf5fe9e5dce675a652e935c8bde Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 15 Sep 2020 11:42:18 +0100 Subject: [PATCH 564/648] KVM: arm64: Remove S1PTW check from kvm_vcpu_dabt_iswrite() Now that kvm_vcpu_trap_is_write_fault() checks for S1PTW, there is no need for kvm_vcpu_dabt_iswrite() to do the same thing, as we already check for this condition on all existing paths. Drop the check and add a comment instead. Signed-off-by: Marc Zyngier Reviewed-by: Will Deacon Link: https://lore.kernel.org/r/20200915104218.1284701-3-maz@kernel.org --- arch/arm64/include/asm/kvm_emulate.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 4f618af660ba..1cc5f5f72d0b 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -303,10 +303,10 @@ static __always_inline bool kvm_vcpu_abt_iss1tw(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_S1PTW); } +/* Always check for S1PTW *before* using this. */ static __always_inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_esr(vcpu) & ESR_ELx_WNR) || - kvm_vcpu_abt_iss1tw(vcpu); /* AF/DBM update */ + return kvm_vcpu_get_esr(vcpu) & ESR_ELx_WNR; } static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu) From 54fa9ba564b717fc2cf689427e195c360315999d Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Mon, 7 Sep 2020 11:32:07 +0200 Subject: [PATCH 565/648] ftrace: Let ftrace_enable_sysctl take a kernel pointer buffer Commit 32927393dc1c ("sysctl: pass kernel pointers to ->proc_handler") changed ctl_table.proc_handler to take a kernel pointer. Adjust the signature of ftrace_enable_sysctl to match ctl_table.proc_handler which fixes the following sparse warning: kernel/trace/ftrace.c:7544:43: warning: incorrect type in argument 3 (different address spaces) kernel/trace/ftrace.c:7544:43: expected void * kernel/trace/ftrace.c:7544:43: got void [noderef] __user *buffer Link: https://lkml.kernel.org/r/20200907093207.13540-1-tklauser@distanz.ch Fixes: 32927393dc1c ("sysctl: pass kernel pointers to ->proc_handler") Cc: Andrew Morton Cc: Ingo Molnar Cc: Christoph Hellwig Cc: Al Viro Signed-off-by: Tobias Klauser Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 3 +-- kernel/trace/ftrace.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ce2c06f72e86..e5c2d5cc6e6a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -85,8 +85,7 @@ static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *val extern int ftrace_enabled; extern int ftrace_enable_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); struct ftrace_ops; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e9f893388245..603255f5f085 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7534,8 +7534,7 @@ static bool is_permanent_ops_registered(void) int ftrace_enable_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret = -ENODEV; From 46bbe5c671e06f070428b9be142cc4ee5cedebac Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Mon, 7 Sep 2020 06:58:45 -0700 Subject: [PATCH 566/648] tracing: fix double free clang static analyzer reports this problem trace_events_hist.c:3824:3: warning: Attempt to free released memory kfree(hist_data->attrs->var_defs.name[i]); In parse_var_defs() if there is a problem allocating var_defs.expr, the earlier var_defs.name is freed. This free is duplicated by free_var_defs() which frees the rest of the list. Because free_var_defs() has to run anyway, remove the second free fom parse_var_defs(). Link: https://lkml.kernel.org/r/20200907135845.15804-1-trix@redhat.com Cc: stable@vger.kernel.org Fixes: 30350d65ac56 ("tracing: Add variable support to hist triggers") Reviewed-by: Tom Zanussi Signed-off-by: Tom Rix Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 0b933546142e..1b2ef6490229 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -3865,7 +3865,6 @@ static int parse_var_defs(struct hist_trigger_data *hist_data) s = kstrdup(field_str, GFP_KERNEL); if (!s) { - kfree(hist_data->attrs->var_defs.name[n_vars]); ret = -ENOMEM; goto free; } From db6c6a0df840e3f52c84cc302cc1a08ba11a4416 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 10 Sep 2020 10:24:57 -0500 Subject: [PATCH 567/648] objtool: Fix noreturn detection for ignored functions When a function is annotated with STACK_FRAME_NON_STANDARD, objtool doesn't validate its code paths. It also skips sibling call detection within the function. But sibling call detection is actually needed for the case where the ignored function doesn't have any return instructions. Otherwise objtool naively marks the function as implicit static noreturn, which affects the reachability of its callers, resulting in "unreachable instruction" warnings. Fix it by just enabling sibling call detection for ignored functions. The 'insn->ignore' check in add_jump_destinations() is no longer needed after e6da9567959e ("objtool: Don't use ignore flag for fake jumps"). Fixes the following warning: arch/x86/kvm/vmx/vmx.o: warning: objtool: vmx_handle_exit_irqoff()+0x142: unreachable instruction which triggers on an allmodconfig with CONFIG_GCOV_KERNEL unset. Reported-by: Linus Torvalds Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Acked-by: Linus Torvalds Link: https://lkml.kernel.org/r/5b1e2536cdbaa5246b60d7791b76130a74082c62.1599751464.git.jpoimboe@redhat.com --- tools/objtool/check.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index e034a8f24f46..90a66891441a 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -619,7 +619,7 @@ static int add_jump_destinations(struct objtool_file *file) if (!is_static_jump(insn)) continue; - if (insn->ignore || insn->offset == FAKE_JUMP_OFFSET) + if (insn->offset == FAKE_JUMP_OFFSET) continue; reloc = find_reloc_by_dest_range(file->elf, insn->sec, From 82d083ab60c3693201c6f5c7a5f23a6ed422098d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 10 Sep 2020 17:55:05 +0900 Subject: [PATCH 568/648] kprobes: tracing/kprobes: Fix to kill kprobes on initmem after boot Since kprobe_event= cmdline option allows user to put kprobes on the functions in initmem, kprobe has to make such probes gone after boot. Currently the probes on the init functions in modules will be handled by module callback, but the kernel init text isn't handled. Without this, kprobes may access non-exist text area to disable or remove it. Link: https://lkml.kernel.org/r/159972810544.428528.1839307531600646955.stgit@devnote2 Fixes: 970988e19eb0 ("tracing/kprobe: Add kprobe_event= boot parameter") Cc: Jonathan Corbet Cc: Shuah Khan Cc: Randy Dunlap Cc: Ingo Molnar Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/kprobes.h | 5 +++++ init/main.c | 2 ++ kernel/kprobes.c | 22 ++++++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 9be1bff4f586..8aab327b5539 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -373,6 +373,8 @@ void unregister_kretprobes(struct kretprobe **rps, int num); void kprobe_flush_task(struct task_struct *tk); void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head); +void kprobe_free_init_mem(void); + int disable_kprobe(struct kprobe *kp); int enable_kprobe(struct kprobe *kp); @@ -435,6 +437,9 @@ static inline void unregister_kretprobes(struct kretprobe **rps, int num) static inline void kprobe_flush_task(struct task_struct *tk) { } +static inline void kprobe_free_init_mem(void) +{ +} static inline int disable_kprobe(struct kprobe *kp) { return -ENOSYS; diff --git a/init/main.c b/init/main.c index ae78fb68d231..038128b2a755 100644 --- a/init/main.c +++ b/init/main.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -1402,6 +1403,7 @@ static int __ref kernel_init(void *unused) kernel_init_freeable(); /* need to finish all async __init code before freeing the memory */ async_synchronize_full(); + kprobe_free_init_mem(); ftrace_free_init_mem(); free_initmem(); mark_readonly(); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index d43b48ecdb4f..d750e025d1ff 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2453,6 +2453,28 @@ static struct notifier_block kprobe_module_nb = { extern unsigned long __start_kprobe_blacklist[]; extern unsigned long __stop_kprobe_blacklist[]; +void kprobe_free_init_mem(void) +{ + void *start = (void *)(&__init_begin); + void *end = (void *)(&__init_end); + struct hlist_head *head; + struct kprobe *p; + int i; + + mutex_lock(&kprobe_mutex); + + /* Kill all kprobes on initmem */ + for (i = 0; i < KPROBE_TABLE_SIZE; i++) { + head = &kprobe_table[i]; + hlist_for_each_entry(p, head, hlist) { + if (start <= (void *)p->addr && (void *)p->addr < end) + kill_kprobe(p); + } + } + + mutex_unlock(&kprobe_mutex); +} + static int __init init_kprobes(void) { int i, err = 0; From ff6f3aff46beb3c29e0802cffcc559e1756c4814 Mon Sep 17 00:00:00 2001 From: Qii Wang Date: Thu, 17 Sep 2020 19:55:41 +0800 Subject: [PATCH 569/648] i2c: mediatek: Fix generic definitions for bus frequency The max frequency of mediatek i2c controller driver is I2C_MAX_HIGH_SPEED_MODE_FREQ, not I2C_MAX_FAST_MODE_PLUS_FREQ. Fix it. Fixes: 90224e6468e1 ("i2c: drivers: Use generic definitions for bus frequencies") Reviewed-by: Yingjoe Chen Reviewed-by: Andy Shevchenko Signed-off-by: Qii Wang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mt65xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index efc14041d45b..a1978ebd8177 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -681,8 +681,8 @@ static int mtk_i2c_calculate_speed(struct mtk_i2c *i2c, unsigned int clk_src, unsigned int cnt_mul; int ret = -EINVAL; - if (target_speed > I2C_MAX_FAST_MODE_PLUS_FREQ) - target_speed = I2C_MAX_FAST_MODE_PLUS_FREQ; + if (target_speed > I2C_MAX_HIGH_SPEED_MODE_FREQ) + target_speed = I2C_MAX_HIGH_SPEED_MODE_FREQ; max_step_cnt = mtk_i2c_max_step_cnt(target_speed); base_step_cnt = max_step_cnt; From b44658e755b5a733e9df04449facbc738df09170 Mon Sep 17 00:00:00 2001 From: Qii Wang Date: Thu, 17 Sep 2020 19:55:42 +0800 Subject: [PATCH 570/648] i2c: mediatek: Send i2c master code at more than 1MHz The master code needs to being sent when the speed is more than I2C_MAX_FAST_MODE_PLUS_FREQ, not I2C_MAX_FAST_MODE_FREQ in the latest I2C-bus specification and user manual. Signed-off-by: Qii Wang Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mt65xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index a1978ebd8177..0cbdfbe605b5 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -759,7 +759,7 @@ static int mtk_i2c_set_speed(struct mtk_i2c *i2c, unsigned int parent_clk) for (clk_div = 1; clk_div <= max_clk_div; clk_div++) { clk_src = parent_clk / clk_div; - if (target_speed > I2C_MAX_FAST_MODE_FREQ) { + if (target_speed > I2C_MAX_FAST_MODE_PLUS_FREQ) { /* Set master code speed register */ ret = mtk_i2c_calculate_speed(i2c, clk_src, I2C_MAX_FAST_MODE_FREQ, From 9dda66acddcbcd5c9e94a1292c679738ba999345 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 18 Sep 2020 04:07:23 +0300 Subject: [PATCH 571/648] net: mscc: ocelot: fix race condition with TX timestamping The TX-timestampable skb is added late to the ocelot_port->tx_skbs. It is in a race with the TX timestamp IRQ, which checks that queue trying to match the timestamp with the skb by the ts_id. The skb should be added to the queue before the IRQ can fire. Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support") Signed-off-by: Vladimir Oltean Reviewed-by: Horatiu Vultur Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot_net.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 0668d23cdbfa..cacabc23215a 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -330,6 +330,7 @@ static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev) u8 grp = 0; /* Send everything on CPU group 0 */ unsigned int i, count, last; int port = priv->chip_port; + bool do_tstamp; val = ocelot_read(ocelot, QS_INJ_STATUS); if (!(val & QS_INJ_STATUS_FIFO_RDY(BIT(grp))) || @@ -344,10 +345,14 @@ static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev) info.vid = skb_vlan_tag_get(skb); /* Check if timestamping is needed */ + do_tstamp = (ocelot_port_add_txtstamp_skb(ocelot_port, skb) == 0); + if (ocelot->ptp && shinfo->tx_flags & SKBTX_HW_TSTAMP) { info.rew_op = ocelot_port->ptp_cmd; - if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) + if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) { info.rew_op |= (ocelot_port->ts_id % 4) << 3; + ocelot_port->ts_id++; + } } ocelot_gen_ifh(ifh, &info); @@ -380,12 +385,9 @@ static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev) dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; - if (!ocelot_port_add_txtstamp_skb(ocelot_port, skb)) { - ocelot_port->ts_id++; - return NETDEV_TX_OK; - } + if (!do_tstamp) + dev_kfree_skb_any(skb); - dev_kfree_skb_any(skb); return NETDEV_TX_OK; } From 6565243c0677aa2befa5a953cf11bc7b4a6f0a47 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 18 Sep 2020 04:07:24 +0300 Subject: [PATCH 572/648] net: mscc: ocelot: add locking for the port TX timestamp ID The ocelot_port->ts_id is used to: (a) populate skb->cb[0] for matching the TX timestamp in the PTP IRQ with an skb. (b) populate the REW_OP from the injection header of the ongoing skb. Only then is ocelot_port->ts_id incremented. This is a problem because, at least theoretically, another timestampable skb might use the same ocelot_port->ts_id before that is incremented. Normally all transmit calls are serialized by the netdev transmit spinlock, but in this case, ocelot_port_add_txtstamp_skb() is also called by DSA, which has started declaring the NETIF_F_LLTX feature since commit 2b86cb829976 ("net: dsa: declare lockless TX feature for slave ports"). So the logic of using and incrementing the timestamp id should be atomic per port. The solution is to use the global ocelot_port->ts_id only while protected by the associated ocelot_port->ts_id_lock. That's where we populate skb->cb[0]. Note that for ocelot, ocelot_port_add_txtstamp_skb is called for the actual skb, but for felix, it is called for the skb's clone. That is something which will also be changed in the future. Signed-off-by: Vladimir Oltean Reviewed-by: Horatiu Vultur Reviewed-by: Florian Fainelli Tested-by: Alexandre Belloni Reviewed-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 8 +++++++- drivers/net/ethernet/mscc/ocelot_net.c | 6 ++---- include/soc/mscc/ocelot.h | 1 + net/dsa/tag_ocelot.c | 11 +++++++---- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 5abb7d2b0a9e..83eb7c325061 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -421,10 +421,15 @@ int ocelot_port_add_txtstamp_skb(struct ocelot_port *ocelot_port, if (ocelot->ptp && shinfo->tx_flags & SKBTX_HW_TSTAMP && ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) { + spin_lock(&ocelot_port->ts_id_lock); + shinfo->tx_flags |= SKBTX_IN_PROGRESS; /* Store timestamp ID in cb[0] of sk_buff */ - skb->cb[0] = ocelot_port->ts_id % 4; + skb->cb[0] = ocelot_port->ts_id; + ocelot_port->ts_id = (ocelot_port->ts_id + 1) % 4; skb_queue_tail(&ocelot_port->tx_skbs, skb); + + spin_unlock(&ocelot_port->ts_id_lock); return 0; } return -ENODATA; @@ -1300,6 +1305,7 @@ void ocelot_init_port(struct ocelot *ocelot, int port) struct ocelot_port *ocelot_port = ocelot->ports[port]; skb_queue_head_init(&ocelot_port->tx_skbs); + spin_lock_init(&ocelot_port->ts_id_lock); /* Basic L2 initialization */ diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index cacabc23215a..8490e42e9e2d 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -349,10 +349,8 @@ static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev) if (ocelot->ptp && shinfo->tx_flags & SKBTX_HW_TSTAMP) { info.rew_op = ocelot_port->ptp_cmd; - if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) { - info.rew_op |= (ocelot_port->ts_id % 4) << 3; - ocelot_port->ts_id++; - } + if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) + info.rew_op |= skb->cb[0] << 3; } ocelot_gen_ifh(ifh, &info); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index da369b12005f..4521dd602ddc 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -566,6 +566,7 @@ struct ocelot_port { u8 ptp_cmd; struct sk_buff_head tx_skbs; u8 ts_id; + spinlock_t ts_id_lock; phy_interface_t phy_mode; diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index 42f327c06dca..b4fc05cafaa6 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -160,11 +160,14 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb, packing(injection, &qos_class, 19, 17, OCELOT_TAG_LEN, PACK, 0); if (ocelot->ptp && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + struct sk_buff *clone = DSA_SKB_CB(skb)->clone; + rew_op = ocelot_port->ptp_cmd; - if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) { - rew_op |= (ocelot_port->ts_id % 4) << 3; - ocelot_port->ts_id++; - } + /* Retrieve timestamp ID populated inside skb->cb[0] of the + * clone by ocelot_port_add_txtstamp_skb + */ + if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) + rew_op |= clone->cb[0] << 3; packing(injection, &rew_op, 125, 117, OCELOT_TAG_LEN, PACK, 0); } From a63ed92d217fcc2a36a0fbb77d80d8b9ee31aac1 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 18 Sep 2020 04:07:25 +0300 Subject: [PATCH 573/648] net: dsa: seville: fix buffer size of the queue system The VSC9953 Seville switch has 2 megabits of buffer split into 4360 words of 60 bytes each. Signed-off-by: Vladimir Oltean Reviewed-by: Horatiu Vultur Reviewed-by: Florian Fainelli Tested-by: Alexandre Belloni Reviewed-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/seville_vsc9953.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c index 625b1891d955..0fdeff22a76c 100644 --- a/drivers/net/dsa/ocelot/seville_vsc9953.c +++ b/drivers/net/dsa/ocelot/seville_vsc9953.c @@ -1008,7 +1008,7 @@ static const struct felix_info seville_info_vsc9953 = { .vcap_is2_keys = vsc9953_vcap_is2_keys, .vcap_is2_actions = vsc9953_vcap_is2_actions, .vcap = vsc9953_vcap_props, - .shared_queue_sz = 128 * 1024, + .shared_queue_sz = 2048 * 1024, .num_mact_rows = 2048, .num_ports = 10, .mdio_bus_alloc = vsc9953_mdio_bus_alloc, From c9d4b2cf16021d1f65e764f4035d7d1616e3bda3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 18 Sep 2020 04:07:26 +0300 Subject: [PATCH 574/648] net: mscc: ocelot: check for errors on memory allocation of ports Do not proceed probing if we couldn't allocate memory for the ports array, just error out. Signed-off-by: Vladimir Oltean Reviewed-by: Horatiu Vultur Reviewed-by: Florian Fainelli Tested-by: Alexandre Belloni Reviewed-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot_vsc7514.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 65408bc994c4..904ea299a5e8 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -993,6 +993,10 @@ static int mscc_ocelot_probe(struct platform_device *pdev) ocelot->ports = devm_kcalloc(&pdev->dev, ocelot->num_phys_ports, sizeof(struct ocelot_port *), GFP_KERNEL); + if (!ocelot->ports) { + err = -ENOMEM; + goto out_put_ports; + } ocelot->vcap_is2_keys = vsc7514_vcap_is2_keys; ocelot->vcap_is2_actions = vsc7514_vcap_is2_actions; From d1cc0e932039cc40357baf037ab403d1d2e228fd Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 18 Sep 2020 04:07:27 +0300 Subject: [PATCH 575/648] net: mscc: ocelot: error checking when calling ocelot_init() ocelot_init() allocates memory, resets the switch and polls for a status register, things which can fail. Stop probing the driver in that case, and propagate the error result. Signed-off-by: Vladimir Oltean Reviewed-by: Horatiu Vultur Reviewed-by: Florian Fainelli Tested-by: Alexandre Belloni Reviewed-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix.c | 5 ++++- drivers/net/ethernet/mscc/ocelot_vsc7514.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 04bfa6e465ff..66da447c4096 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -585,7 +585,10 @@ static int felix_setup(struct dsa_switch *ds) if (err) return err; - ocelot_init(ocelot); + err = ocelot_init(ocelot); + if (err) + return err; + if (ocelot->ptp) { err = ocelot_init_timestamp(ocelot, &ocelot_ptp_clock_info); if (err) { diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 904ea299a5e8..a1cbb20a7757 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -1002,7 +1002,10 @@ static int mscc_ocelot_probe(struct platform_device *pdev) ocelot->vcap_is2_actions = vsc7514_vcap_is2_actions; ocelot->vcap = vsc7514_vcap_props; - ocelot_init(ocelot); + err = ocelot_init(ocelot); + if (err) + goto out_put_ports; + if (ocelot->ptp) { err = ocelot_init_timestamp(ocelot, &ocelot_ptp_clock_info); if (err) { From 7c411799e1b3e0efa3a50e44e98cce6a771441c6 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 18 Sep 2020 04:07:28 +0300 Subject: [PATCH 576/648] net: mscc: ocelot: refactor ports parsing code into a dedicated function mscc_ocelot_probe() is already pretty large and hard to follow. So move the code for parsing ports in a separate function. This makes it easier for the next patch to just call mscc_ocelot_release_ports from the error path of mscc_ocelot_init_ports. Signed-off-by: Vladimir Oltean Reviewed-by: Horatiu Vultur Reviewed-by: Florian Fainelli Tested-by: Alexandre Belloni Reviewed-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot_vsc7514.c | 209 +++++++++++---------- 1 file changed, 110 insertions(+), 99 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index a1cbb20a7757..ff4a01424953 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -896,11 +896,115 @@ static struct ptp_clock_info ocelot_ptp_clock_info = { .enable = ocelot_ptp_enable, }; +static int mscc_ocelot_init_ports(struct platform_device *pdev, + struct device_node *ports) +{ + struct ocelot *ocelot = platform_get_drvdata(pdev); + struct device_node *portnp; + int err; + + ocelot->ports = devm_kcalloc(ocelot->dev, ocelot->num_phys_ports, + sizeof(struct ocelot_port *), GFP_KERNEL); + if (!ocelot->ports) + return -ENOMEM; + + /* No NPI port */ + ocelot_configure_cpu(ocelot, -1, OCELOT_TAG_PREFIX_NONE, + OCELOT_TAG_PREFIX_NONE); + + for_each_available_child_of_node(ports, portnp) { + struct ocelot_port_private *priv; + struct ocelot_port *ocelot_port; + struct device_node *phy_node; + phy_interface_t phy_mode; + struct phy_device *phy; + struct regmap *target; + struct resource *res; + struct phy *serdes; + char res_name[8]; + u32 port; + + if (of_property_read_u32(portnp, "reg", &port)) + continue; + + snprintf(res_name, sizeof(res_name), "port%d", port); + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, + res_name); + target = ocelot_regmap_init(ocelot, res); + if (IS_ERR(target)) + continue; + + phy_node = of_parse_phandle(portnp, "phy-handle", 0); + if (!phy_node) + continue; + + phy = of_phy_find_device(phy_node); + of_node_put(phy_node); + if (!phy) + continue; + + err = ocelot_probe_port(ocelot, port, target, phy); + if (err) { + of_node_put(portnp); + return err; + } + + ocelot_port = ocelot->ports[port]; + priv = container_of(ocelot_port, struct ocelot_port_private, + port); + + of_get_phy_mode(portnp, &phy_mode); + + ocelot_port->phy_mode = phy_mode; + + switch (ocelot_port->phy_mode) { + case PHY_INTERFACE_MODE_NA: + continue; + case PHY_INTERFACE_MODE_SGMII: + break; + case PHY_INTERFACE_MODE_QSGMII: + /* Ensure clock signals and speed is set on all + * QSGMII links + */ + ocelot_port_writel(ocelot_port, + DEV_CLOCK_CFG_LINK_SPEED + (OCELOT_SPEED_1000), + DEV_CLOCK_CFG); + break; + default: + dev_err(ocelot->dev, + "invalid phy mode for port%d, (Q)SGMII only\n", + port); + of_node_put(portnp); + return -EINVAL; + } + + serdes = devm_of_phy_get(ocelot->dev, portnp, NULL); + if (IS_ERR(serdes)) { + err = PTR_ERR(serdes); + if (err == -EPROBE_DEFER) + dev_dbg(ocelot->dev, "deferring probe\n"); + else + dev_err(ocelot->dev, + "missing SerDes phys for port%d\n", + port); + + of_node_put(portnp); + return err; + } + + priv->serdes = serdes; + } + + return 0; +} + static int mscc_ocelot_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; - struct device_node *ports, *portnp; int err, irq_xtr, irq_ptp_rdy; + struct device_node *ports; struct ocelot *ocelot; struct regmap *hsio; unsigned int i; @@ -985,19 +1089,12 @@ static int mscc_ocelot_probe(struct platform_device *pdev) ports = of_get_child_by_name(np, "ethernet-ports"); if (!ports) { - dev_err(&pdev->dev, "no ethernet-ports child node found\n"); + dev_err(ocelot->dev, "no ethernet-ports child node found\n"); return -ENODEV; } ocelot->num_phys_ports = of_get_child_count(ports); - ocelot->ports = devm_kcalloc(&pdev->dev, ocelot->num_phys_ports, - sizeof(struct ocelot_port *), GFP_KERNEL); - if (!ocelot->ports) { - err = -ENOMEM; - goto out_put_ports; - } - ocelot->vcap_is2_keys = vsc7514_vcap_is2_keys; ocelot->vcap_is2_actions = vsc7514_vcap_is2_actions; ocelot->vcap = vsc7514_vcap_props; @@ -1006,6 +1103,10 @@ static int mscc_ocelot_probe(struct platform_device *pdev) if (err) goto out_put_ports; + err = mscc_ocelot_init_ports(pdev, ports); + if (err) + goto out_put_ports; + if (ocelot->ptp) { err = ocelot_init_timestamp(ocelot, &ocelot_ptp_clock_info); if (err) { @@ -1015,96 +1116,6 @@ static int mscc_ocelot_probe(struct platform_device *pdev) } } - /* No NPI port */ - ocelot_configure_cpu(ocelot, -1, OCELOT_TAG_PREFIX_NONE, - OCELOT_TAG_PREFIX_NONE); - - for_each_available_child_of_node(ports, portnp) { - struct ocelot_port_private *priv; - struct ocelot_port *ocelot_port; - struct device_node *phy_node; - phy_interface_t phy_mode; - struct phy_device *phy; - struct regmap *target; - struct resource *res; - struct phy *serdes; - char res_name[8]; - u32 port; - - if (of_property_read_u32(portnp, "reg", &port)) - continue; - - snprintf(res_name, sizeof(res_name), "port%d", port); - - res = platform_get_resource_byname(pdev, IORESOURCE_MEM, - res_name); - target = ocelot_regmap_init(ocelot, res); - if (IS_ERR(target)) - continue; - - phy_node = of_parse_phandle(portnp, "phy-handle", 0); - if (!phy_node) - continue; - - phy = of_phy_find_device(phy_node); - of_node_put(phy_node); - if (!phy) - continue; - - err = ocelot_probe_port(ocelot, port, target, phy); - if (err) { - of_node_put(portnp); - goto out_put_ports; - } - - ocelot_port = ocelot->ports[port]; - priv = container_of(ocelot_port, struct ocelot_port_private, - port); - - of_get_phy_mode(portnp, &phy_mode); - - ocelot_port->phy_mode = phy_mode; - - switch (ocelot_port->phy_mode) { - case PHY_INTERFACE_MODE_NA: - continue; - case PHY_INTERFACE_MODE_SGMII: - break; - case PHY_INTERFACE_MODE_QSGMII: - /* Ensure clock signals and speed is set on all - * QSGMII links - */ - ocelot_port_writel(ocelot_port, - DEV_CLOCK_CFG_LINK_SPEED - (OCELOT_SPEED_1000), - DEV_CLOCK_CFG); - break; - default: - dev_err(ocelot->dev, - "invalid phy mode for port%d, (Q)SGMII only\n", - port); - of_node_put(portnp); - err = -EINVAL; - goto out_put_ports; - } - - serdes = devm_of_phy_get(ocelot->dev, portnp, NULL); - if (IS_ERR(serdes)) { - err = PTR_ERR(serdes); - if (err == -EPROBE_DEFER) - dev_dbg(ocelot->dev, "deferring probe\n"); - else - dev_err(ocelot->dev, - "missing SerDes phys for port%d\n", - port); - - of_node_put(portnp); - goto out_put_ports; - } - - priv->serdes = serdes; - } - register_netdevice_notifier(&ocelot_netdevice_nb); register_switchdev_notifier(&ocelot_switchdev_nb); register_switchdev_blocking_notifier(&ocelot_switchdev_blocking_nb); From 22cdb493de54ca4cb161553376f7b02fcaca2328 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 18 Sep 2020 04:07:29 +0300 Subject: [PATCH 577/648] net: mscc: ocelot: unregister net devices on unbind This driver was not unregistering its network interfaces on unbind. Now it is. Signed-off-by: Vladimir Oltean Reviewed-by: Horatiu Vultur Reviewed-by: Florian Fainelli Tested-by: Alexandre Belloni Reviewed-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot_vsc7514.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index ff4a01424953..252c49b5f22b 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -896,6 +896,26 @@ static struct ptp_clock_info ocelot_ptp_clock_info = { .enable = ocelot_ptp_enable, }; +static void mscc_ocelot_release_ports(struct ocelot *ocelot) +{ + int port; + + for (port = 0; port < ocelot->num_phys_ports; port++) { + struct ocelot_port_private *priv; + struct ocelot_port *ocelot_port; + + ocelot_port = ocelot->ports[port]; + if (!ocelot_port) + continue; + + priv = container_of(ocelot_port, struct ocelot_port_private, + port); + + unregister_netdev(priv->dev); + free_netdev(priv->dev); + } +} + static int mscc_ocelot_init_ports(struct platform_device *pdev, struct device_node *ports) { @@ -1132,6 +1152,7 @@ static int mscc_ocelot_remove(struct platform_device *pdev) struct ocelot *ocelot = platform_get_drvdata(pdev); ocelot_deinit_timestamp(ocelot); + mscc_ocelot_release_ports(ocelot); ocelot_deinit(ocelot); unregister_switchdev_blocking_notifier(&ocelot_switchdev_blocking_nb); unregister_switchdev_notifier(&ocelot_switchdev_nb); From e5fb512d81d021b7c7a0c2547c3dafb9de759285 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 18 Sep 2020 04:07:30 +0300 Subject: [PATCH 578/648] net: mscc: ocelot: deinitialize only initialized ports Currently mscc_ocelot_init_ports() will skip initializing a port when it doesn't have a phy-handle, so the ocelot->ports[port] pointer will be NULL. Take this into consideration when tearing down the driver, and add a new function ocelot_deinit_port() to the switch library, mirror of ocelot_init_port(), which needs to be called by the driver for all ports it has initialized. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Tested-by: Alexandre Belloni Reviewed-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix.c | 3 +++ drivers/net/ethernet/mscc/ocelot.c | 16 ++++++++-------- drivers/net/ethernet/mscc/ocelot_vsc7514.c | 2 ++ include/soc/mscc/ocelot.h | 1 + 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 66da447c4096..01427cd08448 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -643,10 +643,13 @@ static void felix_teardown(struct dsa_switch *ds) { struct ocelot *ocelot = ds->priv; struct felix *felix = ocelot_to_felix(ocelot); + int port; if (felix->info->mdio_bus_free) felix->info->mdio_bus_free(ocelot); + for (port = 0; port < ocelot->num_phys_ports; port++) + ocelot_deinit_port(ocelot, port); ocelot_deinit_timestamp(ocelot); /* stop workqueue thread */ ocelot_deinit(ocelot); diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 83eb7c325061..8518e1d60da4 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1550,18 +1550,18 @@ EXPORT_SYMBOL(ocelot_init); void ocelot_deinit(struct ocelot *ocelot) { - struct ocelot_port *port; - int i; - cancel_delayed_work(&ocelot->stats_work); destroy_workqueue(ocelot->stats_queue); mutex_destroy(&ocelot->stats_lock); - - for (i = 0; i < ocelot->num_phys_ports; i++) { - port = ocelot->ports[i]; - skb_queue_purge(&port->tx_skbs); - } } EXPORT_SYMBOL(ocelot_deinit); +void ocelot_deinit_port(struct ocelot *ocelot, int port) +{ + struct ocelot_port *ocelot_port = ocelot->ports[port]; + + skb_queue_purge(&ocelot_port->tx_skbs); +} +EXPORT_SYMBOL(ocelot_deinit_port); + MODULE_LICENSE("Dual MIT/GPL"); diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 252c49b5f22b..e02fb8bfab63 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -908,6 +908,8 @@ static void mscc_ocelot_release_ports(struct ocelot *ocelot) if (!ocelot_port) continue; + ocelot_deinit_port(ocelot, port); + priv = container_of(ocelot_port, struct ocelot_port_private, port); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 4521dd602ddc..0ac4e7fba086 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -678,6 +678,7 @@ void ocelot_configure_cpu(struct ocelot *ocelot, int npi, int ocelot_init(struct ocelot *ocelot); void ocelot_deinit(struct ocelot *ocelot); void ocelot_init_port(struct ocelot *ocelot, int port); +void ocelot_deinit_port(struct ocelot *ocelot, int port); /* DSA callbacks */ void ocelot_port_enable(struct ocelot *ocelot, int port, From fc25fa97976b1c2fd226ae2b43e1f5abb61b77c9 Mon Sep 17 00:00:00 2001 From: Luo bin Date: Fri, 18 Sep 2020 12:09:38 +0800 Subject: [PATCH 579/648] hinic: fix sending pkts from core while self testing Call netif_tx_disable firstly before starting doing self-test to avoid sending packet from networking core and self-test packet simultaneously which may cause self-test failure or hw abnormal. Fixes: 4aa218a4fe77 ("hinic: add self test support") Signed-off-by: Luo bin Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_ethtool.c | 4 ++++ drivers/net/ethernet/huawei/hinic/hinic_tx.c | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c index 6bb65ade1d77..c340d9acba80 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c @@ -1654,6 +1654,7 @@ static void hinic_diag_test(struct net_device *netdev, } netif_carrier_off(netdev); + netif_tx_disable(netdev); err = do_lp_test(nic_dev, eth_test->flags, LP_DEFAULT_TIME, &test_index); @@ -1662,9 +1663,12 @@ static void hinic_diag_test(struct net_device *netdev, data[test_index] = 1; } + netif_tx_wake_all_queues(netdev); + err = hinic_port_link_state(nic_dev, &link_state); if (!err && link_state == HINIC_LINK_STATE_UP) netif_carrier_on(netdev); + } static int hinic_set_phys_id(struct net_device *netdev, diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c index 2b418b568767..c1f81e9144a1 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c @@ -717,8 +717,8 @@ static int free_tx_poll(struct napi_struct *napi, int budget) netdev_txq = netdev_get_tx_queue(txq->netdev, qp->q_id); __netif_tx_lock(netdev_txq, smp_processor_id()); - - netif_wake_subqueue(nic_dev->netdev, qp->q_id); + if (!netif_testing(nic_dev->netdev)) + netif_wake_subqueue(nic_dev->netdev, qp->q_id); __netif_tx_unlock(netdev_txq); From 6eb158ec0a45dbfd98bc6971c461b7d4d5bf61b3 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Fri, 11 Sep 2020 17:01:39 +0200 Subject: [PATCH 580/648] i2c: mxs: use MXS_DMA_CTRL_WAIT4END instead of DMA_CTRL_ACK The driver-specific usage of the DMA_CTRL_ACK flag was replaced with a custom flag in commit ceeeb99cd821 ("dmaengine: mxs: rename custom flag"), but i2c-mxs was not updated to use the new flag, completely breaking I2C transactions using DMA. Fixes: ceeeb99cd821 ("dmaengine: mxs: rename custom flag") Signed-off-by: Matthias Schiffer Reviewed-by: Fabio Estevam Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mxs.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c index 9587347447f0..c4b08a924461 100644 --- a/drivers/i2c/busses/i2c-mxs.c +++ b/drivers/i2c/busses/i2c-mxs.c @@ -25,6 +25,7 @@ #include #include #include +#include #define DRIVER_NAME "mxs-i2c" @@ -200,7 +201,8 @@ static int mxs_i2c_dma_setup_xfer(struct i2c_adapter *adap, dma_map_sg(i2c->dev, &i2c->sg_io[0], 1, DMA_TO_DEVICE); desc = dmaengine_prep_slave_sg(i2c->dmach, &i2c->sg_io[0], 1, DMA_MEM_TO_DEV, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + DMA_PREP_INTERRUPT | + MXS_DMA_CTRL_WAIT4END); if (!desc) { dev_err(i2c->dev, "Failed to get DMA data write descriptor.\n"); @@ -228,7 +230,8 @@ static int mxs_i2c_dma_setup_xfer(struct i2c_adapter *adap, dma_map_sg(i2c->dev, &i2c->sg_io[1], 1, DMA_FROM_DEVICE); desc = dmaengine_prep_slave_sg(i2c->dmach, &i2c->sg_io[1], 1, DMA_DEV_TO_MEM, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + DMA_PREP_INTERRUPT | + MXS_DMA_CTRL_WAIT4END); if (!desc) { dev_err(i2c->dev, "Failed to get DMA data write descriptor.\n"); @@ -260,7 +263,8 @@ static int mxs_i2c_dma_setup_xfer(struct i2c_adapter *adap, dma_map_sg(i2c->dev, i2c->sg_io, 2, DMA_TO_DEVICE); desc = dmaengine_prep_slave_sg(i2c->dmach, i2c->sg_io, 2, DMA_MEM_TO_DEV, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + DMA_PREP_INTERRUPT | + MXS_DMA_CTRL_WAIT4END); if (!desc) { dev_err(i2c->dev, "Failed to get DMA data write descriptor.\n"); From a3a94156c15737513a8ddda2b8770d2659bbc617 Mon Sep 17 00:00:00 2001 From: Lu Wei Date: Fri, 18 Sep 2020 14:36:46 +0800 Subject: [PATCH 581/648] net: hns: kerneldoc fixes Fix some parameter description mistakes. Signed-off-by: Lu Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c index ed3829ae4ef1..a769273b36f7 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -334,7 +334,7 @@ static void hns_dsaf_xge_srst_by_port_acpi(struct dsaf_device *dsaf_dev, * bit6-11 for ppe0-5 * bit12-17 for roce0-5 * bit18-19 for com/dfx - * @enable: false - request reset , true - drop reset + * @dereset: false - request reset , true - drop reset */ static void hns_dsaf_srst_chns(struct dsaf_device *dsaf_dev, u32 msk, bool dereset) @@ -357,7 +357,7 @@ hns_dsaf_srst_chns(struct dsaf_device *dsaf_dev, u32 msk, bool dereset) * bit6-11 for ppe0-5 * bit12-17 for roce0-5 * bit18-19 for com/dfx - * @enable: false - request reset , true - drop reset + * @dereset: false - request reset , true - drop reset */ static void hns_dsaf_srst_chns_acpi(struct dsaf_device *dsaf_dev, u32 msk, bool dereset) From a128592799b89d3985331d261fa4d41868ee4b04 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Fri, 18 Sep 2020 17:22:25 +0800 Subject: [PATCH 582/648] dpaa2-eth: fix a build warning in dpmac.c Fix below sparse warning in dpmac.c. warning: cast to restricted __le64 Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa2/dpmac-cmd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpmac-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpmac-cmd.h index 3ea51dd9374b..a24b20f76938 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpmac-cmd.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpmac-cmd.h @@ -66,8 +66,8 @@ struct dpmac_cmd_get_counter { }; struct dpmac_rsp_get_counter { - u64 pad; - u64 counter; + __le64 pad; + __le64 counter; }; #endif /* _FSL_DPMAC_CMD_H */ From db7cd91a4be15e1485d6b58c6afc8761c59c4efb Mon Sep 17 00:00:00 2001 From: Necip Fazil Yildiran Date: Thu, 17 Sep 2020 19:46:43 +0300 Subject: [PATCH 583/648] net: ipv6: fix kconfig dependency warning for IPV6_SEG6_HMAC When IPV6_SEG6_HMAC is enabled and CRYPTO is disabled, it results in the following Kbuild warning: WARNING: unmet direct dependencies detected for CRYPTO_HMAC Depends on [n]: CRYPTO [=n] Selected by [y]: - IPV6_SEG6_HMAC [=y] && NET [=y] && INET [=y] && IPV6 [=y] WARNING: unmet direct dependencies detected for CRYPTO_SHA1 Depends on [n]: CRYPTO [=n] Selected by [y]: - IPV6_SEG6_HMAC [=y] && NET [=y] && INET [=y] && IPV6 [=y] WARNING: unmet direct dependencies detected for CRYPTO_SHA256 Depends on [n]: CRYPTO [=n] Selected by [y]: - IPV6_SEG6_HMAC [=y] && NET [=y] && INET [=y] && IPV6 [=y] The reason is that IPV6_SEG6_HMAC selects CRYPTO_HMAC, CRYPTO_SHA1, and CRYPTO_SHA256 without depending on or selecting CRYPTO while those configs are subordinate to CRYPTO. Honor the kconfig menu hierarchy to remove kconfig dependency warnings. Fixes: bf355b8d2c30 ("ipv6: sr: add core files for SR HMAC support") Signed-off-by: Necip Fazil Yildiran Signed-off-by: David S. Miller --- net/ipv6/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 76bff79d6fed..747f56e0c636 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -303,6 +303,7 @@ config IPV6_SEG6_LWTUNNEL config IPV6_SEG6_HMAC bool "IPv6: Segment Routing HMAC support" depends on IPV6 + select CRYPTO select CRYPTO_HMAC select CRYPTO_SHA1 select CRYPTO_SHA256 From f13d783a184e4868c5fdbdf20c90a8e323f66dd7 Mon Sep 17 00:00:00 2001 From: Cristobal Forno Date: Fri, 18 Sep 2020 13:47:43 -0500 Subject: [PATCH 584/648] MAINTAINERS: Update ibmveth maintainer Removed Thomas Falcon. Added myself (Cristobal Forno) as the maintainer of ibmveth. Signed-off-by: Cristobal Forno Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 923c69ad4eec..ad9a35b91325 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8331,7 +8331,7 @@ F: arch/powerpc/platforms/powernv/copy-paste.h F: arch/powerpc/platforms/powernv/vas* IBM Power Virtual Ethernet Device Driver -M: Thomas Falcon +M: Cristobal Forno L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/ibm/ibmveth.* From a27026e95b57acbb8df7182e020b099e7f3b5ac3 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 15 Sep 2020 15:03:24 +0800 Subject: [PATCH 585/648] bootconfig: init: make xbc_namebuf static This eliminates the following sparse warning: init/main.c:306:6: warning: symbol 'xbc_namebuf' was not declared. Should it be static? Link: https://lkml.kernel.org/r/20200915070324.2239473-1-yanaijie@huawei.com Reported-by: Hulk Robot Acked-by: Masami Hiramatsu Signed-off-by: Jason Yan Signed-off-by: Steven Rostedt (VMware) --- init/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/main.c b/init/main.c index 038128b2a755..e880b4ecb314 100644 --- a/init/main.c +++ b/init/main.c @@ -304,7 +304,7 @@ static void * __init get_boot_config_from_initrd(u32 *_size, u32 *_csum) #ifdef CONFIG_BOOT_CONFIG -char xbc_namebuf[XBC_KEYLEN_MAX] __initdata; +static char xbc_namebuf[XBC_KEYLEN_MAX] __initdata; #define rest(dst, end) ((end) > (dst) ? (end) - (dst) : 0) From b5bfe7dca3e00a7c7de900b2d671c06cd66d7dee Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 18 Sep 2020 21:20:00 -0700 Subject: [PATCH 586/648] mailmap: add older email addresses for Kees Cook This adds explicit mailmap entries for my older/other email addresses. Reported-by: Joe Perches Signed-off-by: Kees Cook Signed-off-by: Andrew Morton Cc: Jonathan Corbet Link: https://lkml.kernel.org/r/20200910193939.3798377-1-keescook@chromium.org Signed-off-by: Linus Torvalds --- .mailmap | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.mailmap b/.mailmap index 50096b96c85d..a780211468e4 100644 --- a/.mailmap +++ b/.mailmap @@ -169,6 +169,10 @@ Juha Yrjola Julien Thierry Kamil Konieczny Kay Sievers +Kees Cook +Kees Cook +Kees Cook +Kees Cook Kenneth W Chen Konstantin Khlebnikov Konstantin Khlebnikov From 62fdb1632bcbed30c40f6bd2b58297617e442658 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 18 Sep 2020 21:20:03 -0700 Subject: [PATCH 587/648] ksm: reinstate memcg charge on copied pages Patch series "mm: fixes to past from future testing". Here's a set of independent fixes against 5.9-rc2: prompted by testing Alex Shi's "warning on !memcg" and lru_lock series, but I think fit for 5.9 - though maybe only the first for stable. This patch (of 5): In 5.8 some instances of memcg charging in do_swap_page() and unuse_pte() were removed, on the understanding that swap cache is now already charged at those points; but a case was missed, when ksm_might_need_to_copy() has decided it must allocate a substitute page: such pages were never charged. Fix it inside ksm_might_need_to_copy(). This was discovered by Alex Shi's prospective commit "mm/memcg: warning on !memcg after readahead page charged". But there is a another surprise: this also fixes some rarer uncharged PageAnon cases, when KSM is configured in, but has never been activated. ksm_might_need_to_copy()'s anon_vma->root and linear_page_index() check sometimes catches a case which would need to have been copied if KSM were turned on. Or that's my optimistic interpretation (of my own old code), but it leaves some doubt as to whether everything is working as intended there - might it hint at rare anon ptes which rmap cannot find? A question not easily answered: put in the fix for missed memcg charges. Cc; Matthew Wilcox Fixes: 4c6355b25e8b ("mm: memcontrol: charge swapin pages on instantiation") Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Alex Shi Cc: Michal Hocko Cc: Mike Kravetz Cc: Qian Cai Cc: [5.8] Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008301343270.5954@eggly.anvils Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008301358020.5954@eggly.anvils Signed-off-by: Linus Torvalds --- mm/ksm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/ksm.c b/mm/ksm.c index 235f55d01541..9afccc36dbd2 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2586,6 +2586,10 @@ struct page *ksm_might_need_to_copy(struct page *page, return page; /* let do_swap_page report the error */ new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); + if (new_page && mem_cgroup_charge(new_page, vma->vm_mm, GFP_KERNEL)) { + put_page(new_page); + new_page = NULL; + } if (new_page) { copy_user_highpage(new_page, page, address, vma); From a333e3e73b6648d3bd3ef6b971a59a6363bfcfc5 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 18 Sep 2020 21:20:06 -0700 Subject: [PATCH 588/648] mm: migration of hugetlbfs page skip memcg hugetlbfs pages do not participate in memcg: so although they do find most of migrate_page_states() useful, it would be better if they did not call into mem_cgroup_migrate() - where Qian Cai reported that LTP's move_pages12 triggers the warning in Alex Shi's prospective commit "mm/memcg: warning on !memcg after readahead page charged". Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Alex Shi Cc: Michal Hocko Cc: Mike Kravetz Cc: Qian Cai Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008301359460.5954@eggly.anvils Signed-off-by: Linus Torvalds --- mm/migrate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index 941b89383cf3..aecb1433cf3c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -668,7 +668,8 @@ void migrate_page_states(struct page *newpage, struct page *page) copy_page_owner(page, newpage); - mem_cgroup_migrate(page, newpage); + if (!PageHuge(page)) + mem_cgroup_migrate(page, newpage); } EXPORT_SYMBOL(migrate_page_states); From 8d8869ca5d2d9d86db96271ab063fdcfa9baf5b4 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 18 Sep 2020 21:20:12 -0700 Subject: [PATCH 589/648] mm: fix check_move_unevictable_pages() on THP check_move_unevictable_pages() is used in making unevictable shmem pages evictable: by shmem_unlock_mapping(), drm_gem_check_release_pagevec() and i915/gem check_release_pagevec(). Those may pass down subpages of a huge page, when /sys/kernel/mm/transparent_hugepage/shmem_enabled is "force". That does not crash or warn at present, but the accounting of vmstats unevictable_pgs_scanned and unevictable_pgs_rescued is inconsistent: scanned being incremented on each subpage, rescued only on the head (since tails already appear evictable once the head has been updated). 5.8 commit 5d91f31faf8e ("mm: swap: fix vmstats for huge page") has established that vm_events in general (and unevictable_pgs_rescued in particular) should count every subpage: so follow that precedent here. Do this in such a way that if mem_cgroup_page_lruvec() is made stricter (to check page->mem_cgroup is always set), no problem: skip the tails before calling it, and add thp_nr_pages() to vmstats on the head. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Yang Shi Cc: Johannes Weiner Cc: Michal Hocko Cc: Mike Kravetz Cc: Qian Cai Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008301405000.5954@eggly.anvils Signed-off-by: Linus Torvalds --- mm/vmscan.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 9727dd8e2581..466fc3144fff 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4268,8 +4268,14 @@ void check_move_unevictable_pages(struct pagevec *pvec) for (i = 0; i < pvec->nr; i++) { struct page *page = pvec->pages[i]; struct pglist_data *pagepgdat = page_pgdat(page); + int nr_pages; + + if (PageTransTail(page)) + continue; + + nr_pages = thp_nr_pages(page); + pgscanned += nr_pages; - pgscanned++; if (pagepgdat != pgdat) { if (pgdat) spin_unlock_irq(&pgdat->lru_lock); @@ -4288,7 +4294,7 @@ void check_move_unevictable_pages(struct pagevec *pvec) ClearPageUnevictable(page); del_page_from_lru_list(page, lruvec, LRU_UNEVICTABLE); add_page_to_lru_list(page, lruvec, lru); - pgrescued++; + pgrescued += nr_pages; } } From 0964730bf46b4e271c5ecad5badbbd95737c087b Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 18 Sep 2020 21:20:15 -0700 Subject: [PATCH 590/648] mlock: fix unevictable_pgs event counts on THP 5.8 commit 5d91f31faf8e ("mm: swap: fix vmstats for huge page") has established that vm_events should count every subpage of a THP, including unevictable_pgs_culled and unevictable_pgs_rescued; but lru_cache_add_inactive_or_unevictable() was not doing so for unevictable_pgs_mlocked, and mm/mlock.c was not doing so for unevictable_pgs mlocked, munlocked, cleared and stranded. Fix them; but THPs don't go the pagevec way in mlock.c, so no fixes needed on that path. Fixes: 5d91f31faf8e ("mm: swap: fix vmstats for huge page") Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Yang Shi Cc: Alex Shi Cc: Johannes Weiner Cc: Michal Hocko Cc: Mike Kravetz Cc: Qian Cai Link: http://lkml.kernel.org/r/alpine.LSU.2.11.2008301408230.5954@eggly.anvils Signed-off-by: Linus Torvalds --- mm/mlock.c | 24 +++++++++++++++--------- mm/swap.c | 6 +++--- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/mm/mlock.c b/mm/mlock.c index 93ca2bf30b4f..884b1216da6a 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -58,11 +58,14 @@ EXPORT_SYMBOL(can_do_mlock); */ void clear_page_mlock(struct page *page) { + int nr_pages; + if (!TestClearPageMlocked(page)) return; - mod_zone_page_state(page_zone(page), NR_MLOCK, -thp_nr_pages(page)); - count_vm_event(UNEVICTABLE_PGCLEARED); + nr_pages = thp_nr_pages(page); + mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); + count_vm_events(UNEVICTABLE_PGCLEARED, nr_pages); /* * The previous TestClearPageMlocked() corresponds to the smp_mb() * in __pagevec_lru_add_fn(). @@ -76,7 +79,7 @@ void clear_page_mlock(struct page *page) * We lost the race. the page already moved to evictable list. */ if (PageUnevictable(page)) - count_vm_event(UNEVICTABLE_PGSTRANDED); + count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages); } } @@ -93,9 +96,10 @@ void mlock_vma_page(struct page *page) VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page); if (!TestSetPageMlocked(page)) { - mod_zone_page_state(page_zone(page), NR_MLOCK, - thp_nr_pages(page)); - count_vm_event(UNEVICTABLE_PGMLOCKED); + int nr_pages = thp_nr_pages(page); + + mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages); + count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages); if (!isolate_lru_page(page)) putback_lru_page(page); } @@ -138,7 +142,7 @@ static void __munlock_isolated_page(struct page *page) /* Did try_to_unlock() succeed or punt? */ if (!PageMlocked(page)) - count_vm_event(UNEVICTABLE_PGMUNLOCKED); + count_vm_events(UNEVICTABLE_PGMUNLOCKED, thp_nr_pages(page)); putback_lru_page(page); } @@ -154,10 +158,12 @@ static void __munlock_isolated_page(struct page *page) */ static void __munlock_isolation_failed(struct page *page) { + int nr_pages = thp_nr_pages(page); + if (PageUnevictable(page)) - __count_vm_event(UNEVICTABLE_PGSTRANDED); + __count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages); else - __count_vm_event(UNEVICTABLE_PGMUNLOCKED); + __count_vm_events(UNEVICTABLE_PGMUNLOCKED, nr_pages); } /** diff --git a/mm/swap.c b/mm/swap.c index d16d65d9b4e0..e7bdf094f76a 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -494,14 +494,14 @@ void lru_cache_add_inactive_or_unevictable(struct page *page, unevictable = (vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED; if (unlikely(unevictable) && !TestSetPageMlocked(page)) { + int nr_pages = thp_nr_pages(page); /* * We use the irq-unsafe __mod_zone_page_stat because this * counter is not modified from interrupt context, and the pte * lock is held(spinlock), which implies preemption disabled. */ - __mod_zone_page_state(page_zone(page), NR_MLOCK, - thp_nr_pages(page)); - count_vm_event(UNEVICTABLE_PGMLOCKED); + __mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages); + count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages); } lru_cache_add(page); } From bb3e96d63eb75a2f4ff790b089f6b93614c729a1 Mon Sep 17 00:00:00 2001 From: Byron Stanoszek Date: Fri, 18 Sep 2020 21:20:18 -0700 Subject: [PATCH 591/648] tmpfs: restore functionality of nr_inodes=0 Commit e809d5f0b5c9 ("tmpfs: per-superblock i_ino support") made changes to shmem_reserve_inode() in mm/shmem.c, however the original test for (sbinfo->max_inodes) got dropped. This causes mounting tmpfs with option nr_inodes=0 to fail: # mount -ttmpfs -onr_inodes=0 none /ext0 mount: /ext0: mount(2) system call failed: Cannot allocate memory. This patch restores the nr_inodes=0 functionality. Fixes: e809d5f0b5c9 ("tmpfs: per-superblock i_ino support") Signed-off-by: Byron Stanoszek Signed-off-by: Andrew Morton Acked-by: Hugh Dickins Acked-by: Chris Down Link: https://lkml.kernel.org/r/20200902035715.16414-1-gandalf@winds.org Signed-off-by: Linus Torvalds --- mm/shmem.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 271548ca20f3..8e2b35ba93ad 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -279,11 +279,13 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop) if (!(sb->s_flags & SB_KERNMOUNT)) { spin_lock(&sbinfo->stat_lock); - if (!sbinfo->free_inodes) { - spin_unlock(&sbinfo->stat_lock); - return -ENOSPC; + if (sbinfo->max_inodes) { + if (!sbinfo->free_inodes) { + spin_unlock(&sbinfo->stat_lock); + return -ENOSPC; + } + sbinfo->free_inodes--; } - sbinfo->free_inodes--; if (inop) { ino = sbinfo->next_ino++; if (unlikely(is_zero_ino(ino))) From b0399092ccebd9feef68d4ceb8d6219a8c0caa05 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Fri, 18 Sep 2020 21:20:21 -0700 Subject: [PATCH 592/648] kprobes: fix kill kprobe which has been marked as gone If a kprobe is marked as gone, we should not kill it again. Otherwise, we can disarm the kprobe more than once. In that case, the statistics of kprobe_ftrace_enabled can unbalance which can lead to that kprobe do not work. Fixes: e8386a0cb22f ("kprobes: support probing module __exit function") Co-developed-by: Chengming Zhou Signed-off-by: Muchun Song Signed-off-by: Chengming Zhou Signed-off-by: Andrew Morton Acked-by: Masami Hiramatsu Cc: "Naveen N . Rao" Cc: Anil S Keshavamurthy Cc: David S. Miller Cc: Song Liu Cc: Steven Rostedt Cc: Link: https://lkml.kernel.org/r/20200822030055.32383-1-songmuchun@bytedance.com Signed-off-by: Linus Torvalds --- kernel/kprobes.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 287b263c9cb9..049da84e1952 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2140,6 +2140,9 @@ static void kill_kprobe(struct kprobe *p) lockdep_assert_held(&kprobe_mutex); + if (WARN_ON_ONCE(kprobe_gone(p))) + return; + p->flags |= KPROBE_FLAG_GONE; if (kprobe_aggrprobe(p)) { /* @@ -2419,7 +2422,10 @@ static int kprobes_module_callback(struct notifier_block *nb, mutex_lock(&kprobe_mutex); for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; - hlist_for_each_entry(p, head, hlist) + hlist_for_each_entry(p, head, hlist) { + if (kprobe_gone(p)) + continue; + if (within_module_init((unsigned long)p->addr, mod) || (checkcore && within_module_core((unsigned long)p->addr, mod))) { @@ -2436,6 +2442,7 @@ static int kprobes_module_callback(struct notifier_block *nb, */ kill_kprobe(p); } + } } if (val == MODULE_STATE_GOING) remove_module_kprobe_blacklist(mod); From ec0abae6dcdf7ef88607c869bf35a4b63ce1b370 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Fri, 18 Sep 2020 21:20:24 -0700 Subject: [PATCH 593/648] mm/thp: fix __split_huge_pmd_locked() for migration PMD A migrating transparent huge page has to already be unmapped. Otherwise, the page could be modified while it is being copied to a new page and data could be lost. The function __split_huge_pmd() checks for a PMD migration entry before calling __split_huge_pmd_locked() leading one to think that __split_huge_pmd_locked() can handle splitting a migrating PMD. However, the code always increments the page->_mapcount and adjusts the memory control group accounting assuming the page is mapped. Also, if the PMD entry is a migration PMD entry, the call to is_huge_zero_pmd(*pmd) is incorrect because it calls pmd_pfn(pmd) instead of migration_entry_to_pfn(pmd_to_swp_entry(pmd)). Fix these problems by checking for a PMD migration entry. Fixes: 84c3fc4e9c56 ("mm: thp: check pmd migration entry in common path") Signed-off-by: Ralph Campbell Signed-off-by: Andrew Morton Reviewed-by: Yang Shi Reviewed-by: Zi Yan Cc: Jerome Glisse Cc: John Hubbard Cc: Alistair Popple Cc: Christoph Hellwig Cc: Jason Gunthorpe Cc: Bharata B Rao Cc: Ben Skeggs Cc: Shuah Khan Cc: [4.14+] Link: https://lkml.kernel.org/r/20200903183140.19055-1-rcampbell@nvidia.com Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 7ff29cc3d55c..faadc449cca5 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2022,7 +2022,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, put_page(page); add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR); return; - } else if (is_huge_zero_pmd(*pmd)) { + } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) { /* * FIXME: Do we want to invalidate secondary mmu by calling * mmu_notifier_invalidate_range() see comments below inside @@ -2116,30 +2116,34 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, pte = pte_offset_map(&_pmd, addr); BUG_ON(!pte_none(*pte)); set_pte_at(mm, addr, pte, entry); - atomic_inc(&page[i]._mapcount); + if (!pmd_migration) + atomic_inc(&page[i]._mapcount); pte_unmap(pte); } - /* - * Set PG_double_map before dropping compound_mapcount to avoid - * false-negative page_mapped(). - */ - if (compound_mapcount(page) > 1 && !TestSetPageDoubleMap(page)) { - for (i = 0; i < HPAGE_PMD_NR; i++) - atomic_inc(&page[i]._mapcount); - } - - lock_page_memcg(page); - if (atomic_add_negative(-1, compound_mapcount_ptr(page))) { - /* Last compound_mapcount is gone. */ - __dec_lruvec_page_state(page, NR_ANON_THPS); - if (TestClearPageDoubleMap(page)) { - /* No need in mapcount reference anymore */ + if (!pmd_migration) { + /* + * Set PG_double_map before dropping compound_mapcount to avoid + * false-negative page_mapped(). + */ + if (compound_mapcount(page) > 1 && + !TestSetPageDoubleMap(page)) { for (i = 0; i < HPAGE_PMD_NR; i++) - atomic_dec(&page[i]._mapcount); + atomic_inc(&page[i]._mapcount); } + + lock_page_memcg(page); + if (atomic_add_negative(-1, compound_mapcount_ptr(page))) { + /* Last compound_mapcount is gone. */ + __dec_lruvec_page_state(page, NR_ANON_THPS); + if (TestClearPageDoubleMap(page)) { + /* No need in mapcount reference anymore */ + for (i = 0; i < HPAGE_PMD_NR; i++) + atomic_dec(&page[i]._mapcount); + } + } + unlock_page_memcg(page); } - unlock_page_memcg(page); smp_wmb(); /* make pte visible before pmd */ pmd_populate(mm, pmd, pgtable); From 1ec882fc81e3177faf055877310dbdb0c68eb7db Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 18 Sep 2020 21:20:28 -0700 Subject: [PATCH 594/648] selftests/vm: fix display of page size in map_hugetlb The displayed size is in bytes while the text says it is in kB. Shift it by 10 to really display kBytes. Fixes: fa7b9a805c79 ("tools/selftest/vm: allow choosing mem size and page size in map_hugetlb") Signed-off-by: Christophe Leroy Signed-off-by: Andrew Morton Cc: Link: https://lkml.kernel.org/r/e27481224564a93d14106e750de31189deaa8bc8.1598861977.git.christophe.leroy@csgroup.eu Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/map_hugetlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c index 6af951900aa3..312889edb84a 100644 --- a/tools/testing/selftests/vm/map_hugetlb.c +++ b/tools/testing/selftests/vm/map_hugetlb.c @@ -83,7 +83,7 @@ int main(int argc, char **argv) } if (shift) - printf("%u kB hugepages\n", 1 << shift); + printf("%u kB hugepages\n", 1 << (shift - 10)); else printf("Default size hugepages\n"); printf("Mapping %lu Mbytes\n", (unsigned long)length >> 20); From 9683182612214aa5f5e709fad49444b847cd866a Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Fri, 18 Sep 2020 21:20:31 -0700 Subject: [PATCH 595/648] mm/memory_hotplug: drain per-cpu pages again during memory offline There is a race during page offline that can lead to infinite loop: a page never ends up on a buddy list and __offline_pages() keeps retrying infinitely or until a termination signal is received. Thread#1 - a new process: load_elf_binary begin_new_exec exec_mmap mmput exit_mmap tlb_finish_mmu tlb_flush_mmu release_pages free_unref_page_list free_unref_page_prepare set_pcppage_migratetype(page, migratetype); // Set page->index migration type below MIGRATE_PCPTYPES Thread#2 - hot-removes memory __offline_pages start_isolate_page_range set_migratetype_isolate set_pageblock_migratetype(page, MIGRATE_ISOLATE); Set migration type to MIGRATE_ISOLATE-> set drain_all_pages(zone); // drain per-cpu page lists to buddy allocator. Thread#1 - continue free_unref_page_commit migratetype = get_pcppage_migratetype(page); // get old migration type list_add(&page->lru, &pcp->lists[migratetype]); // add new page to already drained pcp list Thread#2 Never drains pcp again, and therefore gets stuck in the loop. The fix is to try to drain per-cpu lists again after check_pages_isolated_cb() fails. Fixes: c52e75935f8d ("mm: remove extra drain pages on pcp list") Signed-off-by: Pavel Tatashin Signed-off-by: Andrew Morton Acked-by: David Rientjes Acked-by: Vlastimil Babka Acked-by: Michal Hocko Acked-by: David Hildenbrand Cc: Oscar Salvador Cc: Wei Yang Cc: Link: https://lkml.kernel.org/r/20200903140032.380431-1-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20200904151448.100489-2-pasha.tatashin@soleen.com Link: http://lkml.kernel.org/r/20200904070235.GA15277@dhcp22.suse.cz Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 14 ++++++++++++++ mm/page_isolation.c | 8 ++++++++ 2 files changed, 22 insertions(+) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e9d5ab5d3ca0..b11a269e2356 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1575,6 +1575,20 @@ static int __ref __offline_pages(unsigned long start_pfn, /* check again */ ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn, NULL, check_pages_isolated_cb); + /* + * per-cpu pages are drained in start_isolate_page_range, but if + * there are still pages that are not free, make sure that we + * drain again, because when we isolated range we might + * have raced with another thread that was adding pages to pcp + * list. + * + * Forward progress should be still guaranteed because + * pages on the pcp list can only belong to MOVABLE_ZONE + * because has_unmovable_pages explicitly checks for + * PageBuddy on freed pages on other zones. + */ + if (ret) + drain_all_pages(zone); } while (ret); /* Ok, all of our target is isolated. diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 242c03121d73..63a3db10a8c0 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -170,6 +170,14 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) * pageblocks we may have modified and return -EBUSY to caller. This * prevents two threads from simultaneously working on overlapping ranges. * + * Please note that there is no strong synchronization with the page allocator + * either. Pages might be freed while their page blocks are marked ISOLATED. + * In some cases pages might still end up on pcp lists and that would allow + * for their allocation even when they are in fact isolated already. Depending + * on how strong of a guarantee the caller needs drain_all_pages might be needed + * (e.g. __offline_pages will need to call it after check for isolated range for + * a next retry). + * * Return: the number of isolated pageblocks on success and -EBUSY if any part * of range cannot be isolated. */ From 7bb82ac30c3dd4ecf1485685cbe84d2ba10dddf4 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 18 Sep 2020 21:20:34 -0700 Subject: [PATCH 596/648] ftrace: let ftrace_enable_sysctl take a kernel pointer buffer Commit 32927393dc1c ("sysctl: pass kernel pointers to ->proc_handler") changed ctl_table.proc_handler to take a kernel pointer. Adjust the signature of ftrace_enable_sysctl to match ctl_table.proc_handler which fixes the following sparse warning: kernel/trace/ftrace.c:7544:43: warning: incorrect type in argument 3 (different address spaces) kernel/trace/ftrace.c:7544:43: expected void * kernel/trace/ftrace.c:7544:43: got void [noderef] __user *buffer Fixes: 32927393dc1c ("sysctl: pass kernel pointers to ->proc_handler") Signed-off-by: Tobias Klauser Signed-off-by: Andrew Morton Cc: Christoph Hellwig Cc: Al Viro Link: https://lkml.kernel.org/r/20200907093207.13540-1-tklauser@distanz.ch Signed-off-by: Linus Torvalds --- include/linux/ftrace.h | 3 +-- kernel/trace/ftrace.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ce2c06f72e86..e5c2d5cc6e6a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -85,8 +85,7 @@ static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *val extern int ftrace_enabled; extern int ftrace_enable_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); struct ftrace_ops; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 275441254bb5..e9fa580f3083 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7531,8 +7531,7 @@ static bool is_permanent_ops_registered(void) int ftrace_enable_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret = -ENODEV; From 4773ef33fc6e59bad2e5d19e334de2fa79c27b74 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 18 Sep 2020 21:20:37 -0700 Subject: [PATCH 597/648] stackleak: let stack_erasing_sysctl take a kernel pointer buffer Commit 32927393dc1c ("sysctl: pass kernel pointers to ->proc_handler") changed ctl_table.proc_handler to take a kernel pointer. Adjust the signature of stack_erasing_sysctl to match ctl_table.proc_handler which fixes the following sparse warning: kernel/stackleak.c:31:50: warning: incorrect type in argument 3 (different address spaces) kernel/stackleak.c:31:50: expected void * kernel/stackleak.c:31:50: got void [noderef] __user *buffer Fixes: 32927393dc1c ("sysctl: pass kernel pointers to ->proc_handler") Signed-off-by: Tobias Klauser Signed-off-by: Andrew Morton Cc: Christoph Hellwig Cc: Al Viro Link: https://lkml.kernel.org/r/20200907093253.13656-1-tklauser@distanz.ch Signed-off-by: Linus Torvalds --- include/linux/stackleak.h | 2 +- kernel/stackleak.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h index 3d5c3271a9a8..a59db2f08e76 100644 --- a/include/linux/stackleak.h +++ b/include/linux/stackleak.h @@ -25,7 +25,7 @@ static inline void stackleak_task_init(struct task_struct *t) #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE int stack_erasing_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); #endif #else /* !CONFIG_GCC_PLUGIN_STACKLEAK */ diff --git a/kernel/stackleak.c b/kernel/stackleak.c index a8fc9ae1d03d..ce161a8e8d97 100644 --- a/kernel/stackleak.c +++ b/kernel/stackleak.c @@ -20,7 +20,7 @@ static DEFINE_STATIC_KEY_FALSE(stack_erasing_bypass); int stack_erasing_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret = 0; int state = !static_branch_unlikely(&stack_erasing_bypass); From 9ca48e20ec5cb3427ca57ffac9ed2b87090ab488 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 18 Sep 2020 21:20:39 -0700 Subject: [PATCH 598/648] fs/fs-writeback.c: adjust dirtytime_interval_handler definition to match prototype Commit 32927393dc1c ("sysctl: pass kernel pointers to ->proc_handler") changed ctl_table.proc_handler to take a kernel pointer. Adjust the definition of dirtytime_interval_handler to match its prototype in linux/writeback.h which fixes the following sparse error/warning: fs/fs-writeback.c:2189:50: warning: incorrect type in argument 3 (different address spaces) fs/fs-writeback.c:2189:50: expected void * fs/fs-writeback.c:2189:50: got void [noderef] __user *buffer fs/fs-writeback.c:2184:5: error: symbol 'dirtytime_interval_handler' redeclared with different type (incompatible argument 3 (different address spaces)): fs/fs-writeback.c:2184:5: int extern [addressable] [signed] [toplevel] dirtytime_interval_handler( ... ) fs/fs-writeback.c: note: in included file: ./include/linux/writeback.h:374:5: note: previously declared as: ./include/linux/writeback.h:374:5: int extern [addressable] [signed] [toplevel] dirtytime_interval_handler( ... ) Fixes: 32927393dc1c ("sysctl: pass kernel pointers to ->proc_handler") Signed-off-by: Tobias Klauser Signed-off-by: Andrew Morton Reviewed-by: Jan Kara Cc: Christoph Hellwig Cc: Al Viro Link: https://lkml.kernel.org/r/20200907093140.13434-1-tklauser@distanz.ch Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 149227160ff0..58b27e4070a3 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -2184,7 +2184,7 @@ static int __init start_dirtytime_writeback(void) __initcall(start_dirtytime_writeback); int dirtytime_interval_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret; From 2645d432051cd4e6c04ee8af23be07c92f1f52a2 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Fri, 18 Sep 2020 21:20:42 -0700 Subject: [PATCH 599/648] kcsan: kconfig: move to menu 'Generic Kernel Debugging Instruments' This moves the KCSAN kconfig items under menu 'Generic Kernel Debugging Instruments' where UBSAN resides. Signed-off-by: Changbin Du Signed-off-by: Andrew Morton Tested-by: Randy Dunlap Reviewed-by: Randy Dunlap Cc: Greg Kroah-Hartman Cc: Marco Elver Link: https://lkml.kernel.org/r/20200904152224.5570-1-changbin.du@gmail.com Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e068c3c7189a..0c781f912f9f 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -520,8 +520,8 @@ config DEBUG_FS_ALLOW_NONE endchoice source "lib/Kconfig.kgdb" - source "lib/Kconfig.ubsan" +source "lib/Kconfig.kcsan" endmenu @@ -1620,8 +1620,6 @@ config PROVIDE_OHCI1394_DMA_INIT source "samples/Kconfig" -source "lib/Kconfig.kcsan" - config ARCH_HAS_DEVMEM_IS_ALLOWED bool From 21190b74bcf3a36ebab9a715088c29f59877e1f3 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Tue, 4 Aug 2020 11:02:05 +0800 Subject: [PATCH 600/648] riscv: Add sfence.vma after early page table changes This invalidates local TLB after modifying the page tables during early init as it's too early to handle suprious faults as we otherwise do. Fixes: f2c17aabc917 ("RISC-V: Implement compile-time fixed mappings") Reported-by: Syven Wang Signed-off-by: Syven Wang Signed-off-by: Greentime Hu Reviewed-by: Anup Patel [Palmer: Cleaned up the commit text] Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 787c75f751a5..ca03762a3733 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -226,12 +226,11 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) ptep = &fixmap_pte[pte_index(addr)]; - if (pgprot_val(prot)) { + if (pgprot_val(prot)) set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot)); - } else { + else pte_clear(&init_mm, addr, ptep); - local_flush_tlb_page(addr); - } + local_flush_tlb_page(addr); } static pte_t *__init get_pte_virt(phys_addr_t pa) From f025d9d9934b84cd03b7796072d10686029c408e Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 16 Sep 2020 16:59:41 +0900 Subject: [PATCH 601/648] riscv: Fix Kendryte K210 device tree The Kendryte K210 SoC CLINT is compatible with Sifive clint v0 (sifive,clint0). Fix the Kendryte K210 device tree clint entry to be inline with the sifive timer definition documented in Documentation/devicetree/bindings/timer/sifive,clint.yaml. The device tree clint entry is renamed similarly to u-boot device tree definition to improve compatibility with u-boot defined device tree. To ensure correct initialization, the interrup-cells attribute is added and the interrupt-extended attribute definition fixed. This fixes boot failures with Kendryte K210 SoC boards. Note that the clock referenced is kept as K210_CLK_ACLK, which does not necessarilly match the clint MTIME increment rate. This however does not seem to cause any problem for now. Signed-off-by: Damien Le Moal Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/kendryte/k210.dtsi | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/riscv/boot/dts/kendryte/k210.dtsi b/arch/riscv/boot/dts/kendryte/k210.dtsi index c1df56ccb8d5..d2d0ff645632 100644 --- a/arch/riscv/boot/dts/kendryte/k210.dtsi +++ b/arch/riscv/boot/dts/kendryte/k210.dtsi @@ -95,10 +95,12 @@ sysctl: sysctl@50440000 { #clock-cells = <1>; }; - clint0: interrupt-controller@2000000 { + clint0: clint@2000000 { + #interrupt-cells = <1>; compatible = "riscv,clint0"; reg = <0x2000000 0xC000>; - interrupts-extended = <&cpu0_intc 3>, <&cpu1_intc 3>; + interrupts-extended = <&cpu0_intc 3 &cpu0_intc 7 + &cpu1_intc 3 &cpu1_intc 7>; clocks = <&sysctl K210_CLK_ACLK>; }; From d5be89a8d118a8e8d09cd74a921a808f17fbdd09 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Mon, 14 Sep 2020 09:56:30 -0700 Subject: [PATCH 602/648] RISC-V: Resurrect the MMIO timer implementation for M-mode systems The K210 doesn't implement rdtime in M-mode, and since that's where Linux runs in the NOMMU systems that means we can't use rdtime. The K210 is the only system that anyone is currently running NOMMU or M-mode on, so here we're just inlining the timer read directly. This also adds the CLINT driver as an !MMU dependency, as it's currently the only timer driver availiable for these systems and without it we get a build failure for some configurations. Tested-by: Damien Le Moal Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/clint.h | 26 ++++++++++++++++++++++++++ arch/riscv/include/asm/timex.h | 27 +++++++++++++++++++++++++++ drivers/clocksource/timer-clint.c | 17 +++++++++++++++++ 4 files changed, 71 insertions(+) create mode 100644 arch/riscv/include/asm/clint.h diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index df18372861d8..7766e1289468 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -32,6 +32,7 @@ config RISCV select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if 64BIT select CLONE_BACKWARDS + select CLINT_TIMER if !MMU select COMMON_CLK select EDAC_SUPPORT select GENERIC_ARCH_TOPOLOGY if SMP diff --git a/arch/riscv/include/asm/clint.h b/arch/riscv/include/asm/clint.h new file mode 100644 index 000000000000..0789fd37b40a --- /dev/null +++ b/arch/riscv/include/asm/clint.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 Google, Inc + */ + +#ifndef _ASM_RISCV_CLINT_H +#define _ASM_RISCV_CLINT_H + +#include +#include + +#ifdef CONFIG_RISCV_M_MODE +/* + * This lives in the CLINT driver, but is accessed directly by timex.h to avoid + * any overhead when accessing the MMIO timer. + * + * The ISA defines mtime as a 64-bit memory-mapped register that increments at + * a constant frequency, but it doesn't define some other constraints we depend + * on (most notably ordering constraints, but also some simpler stuff like the + * memory layout). Thus, this is called "clint_time_val" instead of something + * like "riscv_mtime", to signify that these non-ISA assumptions must hold. + */ +extern u64 __iomem *clint_time_val; +#endif + +#endif diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h index a3fb85d505d4..7f659dda0032 100644 --- a/arch/riscv/include/asm/timex.h +++ b/arch/riscv/include/asm/timex.h @@ -10,6 +10,31 @@ typedef unsigned long cycles_t; +#ifdef CONFIG_RISCV_M_MODE + +#include + +#ifdef CONFIG_64BIT +static inline cycles_t get_cycles(void) +{ + return readq_relaxed(clint_time_val); +} +#else /* !CONFIG_64BIT */ +static inline u32 get_cycles(void) +{ + return readl_relaxed(((u32 *)clint_time_val)); +} +#define get_cycles get_cycles + +static inline u32 get_cycles_hi(void) +{ + return readl_relaxed(((u32 *)clint_time_val) + 1); +} +#define get_cycles_hi get_cycles_hi +#endif /* CONFIG_64BIT */ + +#else /* CONFIG_RISCV_M_MODE */ + static inline cycles_t get_cycles(void) { return csr_read(CSR_TIME); @@ -41,6 +66,8 @@ static inline u64 get_cycles64(void) } #endif /* CONFIG_64BIT */ +#endif /* !CONFIG_RISCV_M_MODE */ + #define ARCH_HAS_READ_CURRENT_TIMER static inline int read_current_timer(unsigned long *timer_val) { diff --git a/drivers/clocksource/timer-clint.c b/drivers/clocksource/timer-clint.c index 8eeafa82c03d..d17367dee02c 100644 --- a/drivers/clocksource/timer-clint.c +++ b/drivers/clocksource/timer-clint.c @@ -19,6 +19,11 @@ #include #include #include +#include + +#ifndef CONFIG_RISCV_M_MODE +#include +#endif #define CLINT_IPI_OFF 0 #define CLINT_TIMER_CMP_OFF 0x4000 @@ -31,6 +36,10 @@ static u64 __iomem *clint_timer_val; static unsigned long clint_timer_freq; static unsigned int clint_timer_irq; +#ifdef CONFIG_RISCV_M_MODE +u64 __iomem *clint_time_val; +#endif + static void clint_send_ipi(const struct cpumask *target) { unsigned int cpu; @@ -184,6 +193,14 @@ static int __init clint_timer_init_dt(struct device_node *np) clint_timer_val = base + CLINT_TIMER_VAL_OFF; clint_timer_freq = riscv_timebase; +#ifdef CONFIG_RISCV_M_MODE + /* + * Yes, that's an odd naming scheme. time_val is public, but hopefully + * will die in favor of something cleaner. + */ + clint_time_val = clint_timer_val; +#endif + pr_info("%pOFP: timer running at %ld Hz\n", np, clint_timer_freq); rc = clocksource_register_hz(&clint_clocksource, clint_timer_freq); From 19b835a5db93254a46cb90c95ddde6959d7e1e06 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 9 Sep 2020 07:16:38 +0900 Subject: [PATCH 603/648] kconfig: qconf: fix incomplete type 'struct gstr' warning "make HOSTCXX=clang++ xconfig" reports the following: HOSTCXX scripts/kconfig/qconf.o In file included from scripts/kconfig/qconf.cc:23: In file included from scripts/kconfig/lkc.h:15: scripts/kconfig/lkc_proto.h:26:13: warning: 'get_relations_str' has C-linkage specified, but returns incomplete type 'struct gstr' which could be incompatible with C [-Wreturn-type-c-linkage] struct gstr get_relations_str(struct symbol **sym_arr, struct list_head *head); ^ Currently, get_relations_str() is declared before the struct gstr definition. Move all declarations of menu.c functions below. BTW, some are declared in lkc.h and some in lkc_proto.h, but the difference is unclear. I guess some refactoring is needed. Signed-off-by: Masahiro Yamada Acked-by: Boris Kolpackov --- scripts/kconfig/lkc.h | 47 +++++++++++++++++++++++-------------- scripts/kconfig/lkc_proto.h | 14 ----------- 2 files changed, 30 insertions(+), 31 deletions(-) diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h index d4ca8297364f..8454649b17bd 100644 --- a/scripts/kconfig/lkc.h +++ b/scripts/kconfig/lkc.h @@ -66,23 +66,6 @@ static inline void xfwrite(const void *str, size_t len, size_t count, FILE *out) fprintf(stderr, "Error in writing or end of file.\n"); } -/* menu.c */ -void _menu_init(void); -void menu_warn(struct menu *menu, const char *fmt, ...); -struct menu *menu_add_menu(void); -void menu_end_menu(void); -void menu_add_entry(struct symbol *sym); -void menu_add_dep(struct expr *dep); -void menu_add_visibility(struct expr *dep); -struct property *menu_add_prompt(enum prop_type type, char *prompt, struct expr *dep); -void menu_add_expr(enum prop_type type, struct expr *expr, struct expr *dep); -void menu_add_symbol(enum prop_type type, struct symbol *sym, struct expr *dep); -void menu_add_option_modules(void); -void menu_add_option_defconfig_list(void); -void menu_add_option_allnoconfig_y(void); -void menu_finalize(struct menu *parent); -void menu_set_type(int type); - /* util.c */ struct file *file_lookup(const char *name); void *xmalloc(size_t size); @@ -109,6 +92,36 @@ void str_append(struct gstr *gs, const char *s); void str_printf(struct gstr *gs, const char *fmt, ...); const char *str_get(struct gstr *gs); +/* menu.c */ +void _menu_init(void); +void menu_warn(struct menu *menu, const char *fmt, ...); +struct menu *menu_add_menu(void); +void menu_end_menu(void); +void menu_add_entry(struct symbol *sym); +void menu_add_dep(struct expr *dep); +void menu_add_visibility(struct expr *dep); +struct property *menu_add_prompt(enum prop_type type, char *prompt, struct expr *dep); +void menu_add_expr(enum prop_type type, struct expr *expr, struct expr *dep); +void menu_add_symbol(enum prop_type type, struct symbol *sym, struct expr *dep); +void menu_add_option_modules(void); +void menu_add_option_defconfig_list(void); +void menu_add_option_allnoconfig_y(void); +void menu_finalize(struct menu *parent); +void menu_set_type(int type); + +extern struct menu rootmenu; + +bool menu_is_empty(struct menu *menu); +bool menu_is_visible(struct menu *menu); +bool menu_has_prompt(struct menu *menu); +const char *menu_get_prompt(struct menu *menu); +struct menu *menu_get_root_menu(struct menu *menu); +struct menu *menu_get_parent_menu(struct menu *menu); +bool menu_has_help(struct menu *menu); +const char *menu_get_help(struct menu *menu); +struct gstr get_relations_str(struct symbol **sym_arr, struct list_head *head); +void menu_get_ext_help(struct menu *menu, struct gstr *help); + /* symbol.c */ void sym_clear_all_valid(void); struct symbol *sym_choice_default(struct symbol *sym); diff --git a/scripts/kconfig/lkc_proto.h b/scripts/kconfig/lkc_proto.h index f9ab98238aef..9e81be33c40f 100644 --- a/scripts/kconfig/lkc_proto.h +++ b/scripts/kconfig/lkc_proto.h @@ -12,20 +12,6 @@ bool conf_get_changed(void); void conf_set_changed_callback(void (*fn)(void)); void conf_set_message_callback(void (*fn)(const char *s)); -/* menu.c */ -extern struct menu rootmenu; - -bool menu_is_empty(struct menu *menu); -bool menu_is_visible(struct menu *menu); -bool menu_has_prompt(struct menu *menu); -const char * menu_get_prompt(struct menu *menu); -struct menu * menu_get_root_menu(struct menu *menu); -struct menu * menu_get_parent_menu(struct menu *menu); -bool menu_has_help(struct menu *menu); -const char * menu_get_help(struct menu *menu); -struct gstr get_relations_str(struct symbol **sym_arr, struct list_head *head); -void menu_get_ext_help(struct menu *menu, struct gstr *help); - /* symbol.c */ extern struct symbol * symbol_hash[SYMBOL_HASHSIZE]; From a46afd11414758233f23798cc5be51e554d04efe Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 14 Sep 2020 23:59:48 +0900 Subject: [PATCH 604/648] kconfig: qconf: revive help message in the info view Since commit 68fd110b3e7e ("kconfig: qconf: remove redundant help in the info view"), the help message is no longer displayed. I intended to drop duplicated "Symbol:", "Type:", but precious info about help and reverse dependencies was lost too. Revive it now. "defined at" is contained in menu_get_ext_help(), so I made sure to not display it twice. Fixes: 68fd110b3e7e ("kconfig: qconf: remove redundant help in the info view") Reported-by: Maxim Levitsky Signed-off-by: Masahiro Yamada --- scripts/kconfig/qconf.cc | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index c7216b9110fc..8ce624a3b54b 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -1108,6 +1108,11 @@ void ConfigInfoView::menuInfo(void) if (showDebug()) stream << debug_info(sym); + struct gstr help_gstr = str_new(); + + menu_get_ext_help(_menu, &help_gstr); + stream << print_filter(str_get(&help_gstr)); + str_free(&help_gstr); } else if (_menu->prompt) { stream << ""; stream << print_filter(_menu->prompt->text); @@ -1119,11 +1124,11 @@ void ConfigInfoView::menuInfo(void) expr_print_help, &stream, E_NONE); stream << "

"; } + + stream << "defined at " << _menu->file->name << ":" + << _menu->lineno << "

"; } } - if (showDebug()) - stream << "defined at " << _menu->file->name << ":" - << _menu->lineno << "

"; setText(info); } From 02186d8897d49b0afd3c80b6cf23437d91024065 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 18 Sep 2020 12:51:15 -0700 Subject: [PATCH 605/648] dm/dax: Fix table reference counts A recent fix to the dm_dax_supported() flow uncovered a latent bug. When dm_get_live_table() fails it is still required to drop the srcu_read_lock(). Without this change the lvm2 test-suite triggers this warning: # lvm2-testsuite --only pvmove-abort-all.sh WARNING: lock held when returning to user space! 5.9.0-rc5+ #251 Tainted: G OE ------------------------------------------------ lvm/1318 is leaving the kernel with locks still held! 1 lock held by lvm/1318: #0: ffff9372abb5a340 (&md->io_barrier){....}-{0:0}, at: dm_get_live_table+0x5/0xb0 [dm_mod] ...and later on this hang signature: INFO: task lvm:1344 blocked for more than 122 seconds. Tainted: G OE 5.9.0-rc5+ #251 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:lvm state:D stack: 0 pid: 1344 ppid: 1 flags:0x00004000 Call Trace: __schedule+0x45f/0xa80 ? finish_task_switch+0x249/0x2c0 ? wait_for_completion+0x86/0x110 schedule+0x5f/0xd0 schedule_timeout+0x212/0x2a0 ? __schedule+0x467/0xa80 ? wait_for_completion+0x86/0x110 wait_for_completion+0xb0/0x110 __synchronize_srcu+0xd1/0x160 ? __bpf_trace_rcu_utilization+0x10/0x10 __dm_suspend+0x6d/0x210 [dm_mod] dm_suspend+0xf6/0x140 [dm_mod] Fixes: 7bf7eac8d648 ("dax: Arrange for dax_supported check to span multiple devices") Cc: Cc: Jan Kara Cc: Alasdair Kergon Cc: Mike Snitzer Reported-by: Adrian Huang Reviewed-by: Ira Weiny Tested-by: Adrian Huang Link: https://lore.kernel.org/r/160045867590.25663.7548541079217827340.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/md/dm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index fb0255d25e4b..4a40df8af7d3 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1136,15 +1136,16 @@ static bool dm_dax_supported(struct dax_device *dax_dev, struct block_device *bd { struct mapped_device *md = dax_get_private(dax_dev); struct dm_table *map; + bool ret = false; int srcu_idx; - bool ret; map = dm_get_live_table(md, &srcu_idx); if (!map) - return false; + goto out; ret = dm_table_supports_dax(map, device_supports_dax, &blocksize); +out: dm_put_live_table(md, srcu_idx); return ret; From e2ec5128254518cae320d5dc631b71b94160f663 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sun, 20 Sep 2020 08:54:42 -0700 Subject: [PATCH 606/648] dm: Call proper helper to determine dax support DM was calling generic_fsdax_supported() to determine whether a device referenced in the DM table supports DAX. However this is a helper for "leaf" device drivers so that they don't have to duplicate common generic checks. High level code should call dax_supported() helper which that calls into appropriate helper for the particular device. This problem manifested itself as kernel messages: dm-3: error: dax access failed (-95) when lvm2-testsuite run in cases where a DM device was stacked on top of another DM device. Fixes: 7bf7eac8d648 ("dax: Arrange for dax_supported check to span multiple devices") Cc: Tested-by: Adrian Huang Signed-off-by: Jan Kara Acked-by: Mike Snitzer Reported-by: kernel test robot Link: https://lore.kernel.org/r/160061715195.13131.5503173247632041975.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/dax/super.c | 4 ++++ drivers/md/dm-table.c | 10 +++++++--- include/linux/dax.h | 22 ++++++++++++++++++++-- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index e5767c83ea23..b6284c5cae0a 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -325,11 +325,15 @@ EXPORT_SYMBOL_GPL(dax_direct_access); bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev, int blocksize, sector_t start, sector_t len) { + if (!dax_dev) + return false; + if (!dax_alive(dax_dev)) return false; return dax_dev->ops->dax_supported(dax_dev, bdev, blocksize, start, len); } +EXPORT_SYMBOL_GPL(dax_supported); size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 5edc3079e7c1..229f461e7def 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -860,10 +860,14 @@ EXPORT_SYMBOL_GPL(dm_table_set_type); int device_supports_dax(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { - int blocksize = *(int *) data; + int blocksize = *(int *) data, id; + bool rc; - return generic_fsdax_supported(dev->dax_dev, dev->bdev, blocksize, - start, len); + id = dax_read_lock(); + rc = dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len); + dax_read_unlock(id); + + return rc; } /* Check devices support synchronous DAX */ diff --git a/include/linux/dax.h b/include/linux/dax.h index 6904d4e0b2e0..497031392e0a 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -130,6 +130,8 @@ static inline bool generic_fsdax_supported(struct dax_device *dax_dev, return __generic_fsdax_supported(dax_dev, bdev, blocksize, start, sectors); } +bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev, + int blocksize, sector_t start, sector_t len); static inline void fs_put_dax(struct dax_device *dax_dev) { @@ -157,6 +159,13 @@ static inline bool generic_fsdax_supported(struct dax_device *dax_dev, return false; } +static inline bool dax_supported(struct dax_device *dax_dev, + struct block_device *bdev, int blocksize, sector_t start, + sector_t len) +{ + return false; +} + static inline void fs_put_dax(struct dax_device *dax_dev) { } @@ -189,14 +198,23 @@ static inline void dax_unlock_page(struct page *page, dax_entry_t cookie) } #endif +#if IS_ENABLED(CONFIG_DAX) int dax_read_lock(void); void dax_read_unlock(int id); +#else +static inline int dax_read_lock(void) +{ + return 0; +} + +static inline void dax_read_unlock(int id) +{ +} +#endif /* CONFIG_DAX */ bool dax_alive(struct dax_device *dax_dev); void *dax_get_private(struct dax_device *dax_dev); long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); -bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev, - int blocksize, sector_t start, sector_t len); size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, From d4c5da5049ac27c6ef8f6f98548c3a1ade352d25 Mon Sep 17 00:00:00 2001 From: Adrian Huang Date: Thu, 17 Sep 2020 19:15:49 +0800 Subject: [PATCH 607/648] dax: Fix stack overflow when mounting fsdax pmem device When mounting fsdax pmem device, commit 6180bb446ab6 ("dax: fix detection of dax support for non-persistent memory block devices") introduces the stack overflow [1][2]. Here is the call path for mounting ext4 file system: ext4_fill_super bdev_dax_supported __bdev_dax_supported dax_supported generic_fsdax_supported __generic_fsdax_supported bdev_dax_supported The call path leads to the infinite calling loop, so we cannot call bdev_dax_supported() in __generic_fsdax_supported(). The sanity checking of the variable 'dax_dev' is moved prior to the two bdev_dax_pgoff() checks [3][4]. [1] https://lore.kernel.org/linux-nvdimm/1420999447.1004543.1600055488770.JavaMail.zimbra@redhat.com/ [2] https://lore.kernel.org/linux-nvdimm/alpine.LRH.2.02.2009141131220.30651@file01.intranet.prod.int.rdu2.redhat.com/ [3] https://lore.kernel.org/linux-nvdimm/CA+RJvhxBHriCuJhm-D8NvJRe3h2MLM+ZMFgjeJjrRPerMRLvdg@mail.gmail.com/ [4] https://lore.kernel.org/linux-nvdimm/20200903160608.GU878166@iweiny-DESK2.sc.intel.com/ Fixes: 6180bb446ab6 ("dax: fix detection of dax support for non-persistent memory block devices") Reported-by: Yi Zhang Reported-by: Mikulas Patocka Signed-off-by: Adrian Huang Reviewed-by: Jan Kara Tested-by: Ritesh Harjani Cc: Coly Li Cc: Ira Weiny Cc: John Pittman Link: https://lore.kernel.org/r/20200917111549.6367-1-adrianhuang0701@gmail.com Signed-off-by: Dan Williams --- drivers/dax/super.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index b6284c5cae0a..e84070b55463 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -85,6 +85,12 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev, return false; } + if (!dax_dev) { + pr_debug("%s: error: dax unsupported by block device\n", + bdevname(bdev, buf)); + return false; + } + err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff); if (err) { pr_info("%s: error: unaligned partition for dax\n", @@ -100,12 +106,6 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev, return false; } - if (!dax_dev || !bdev_dax_supported(bdev, blocksize)) { - pr_debug("%s: error: dax unsupported by block device\n", - bdevname(bdev, buf)); - return false; - } - id = dax_read_lock(); len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn); len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn); From 5868ec267de5eade3ef80bd8716d6b7621a0c4c0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Sep 2020 10:38:47 -0700 Subject: [PATCH 608/648] mm: fix wake_page_function() comment typos Sedat Dilek pointed out some silly comment typo issues. Reported-by: Sedat Dilek Signed-off-by: Linus Torvalds --- mm/filemap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 6aa08e7714ce..5202e38ab79e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -990,7 +990,7 @@ void __init pagecache_init(void) /* * The page wait code treats the "wait->flags" somewhat unusually, because - * we have multiple different kinds of waits, not just he usual "exclusive" + * we have multiple different kinds of waits, not just the usual "exclusive" * one. * * We have: @@ -1011,7 +1011,7 @@ void __init pagecache_init(void) * * This is the traditional exclusive wait. * - * (b) WQ_FLAG_EXCLUSIVE | WQ_FLAG_CUSTOM: + * (c) WQ_FLAG_EXCLUSIVE | WQ_FLAG_CUSTOM: * * The waiter is waiting to get the bit, and additionally wants the * lock to be transferred to it for fair lock behavior. If the lock From 769f5083c5e2de371d9a451e3bb9d4aaa24f3346 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 18 Sep 2020 22:51:26 +0100 Subject: [PATCH 609/648] rhashtable: fix indentation of a continue statement A continue statement is indented incorrectly, add in the missing tab. Signed-off-by: Colin Ian King Acked-by: Herbert Xu Signed-off-by: David S. Miller --- lib/test_rhashtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index c5a6fef7b45d..76c607ee6db5 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -434,7 +434,7 @@ static int __init test_rhltable(unsigned int entries) } else { if (WARN(err != -ENOENT, "removed non-existent element, error %d not %d", err, -ENOENT)) - continue; + continue; } } From b6e11785cf957f4f4dd7da7f5a1f41ca4f1ea913 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 18 Sep 2020 23:25:56 +0200 Subject: [PATCH 610/648] net: mvneta: recycle the page in case of out-of-order Recycle the received page into the page_pool cache if the dma descriptors arrived in a wrong order Fixes: ca0e014609f05 ("net: mvneta: move skb build after descriptors processing") Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 69a900081165..c4345e3d616f 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2383,8 +2383,12 @@ static int mvneta_rx_swbm(struct napi_struct *napi, mvneta_swbm_rx_frame(pp, rx_desc, rxq, &xdp_buf, &size, page, &ps); } else { - if (unlikely(!xdp_buf.data_hard_start)) + if (unlikely(!xdp_buf.data_hard_start)) { + rx_desc->buf_phys_addr = 0; + page_pool_put_full_page(rxq->page_pool, page, + true); continue; + } mvneta_swbm_add_rx_fragment(pp, rx_desc, rxq, &xdp_buf, &size, page); From fe81d9f6182d1160e625894eecb3d7ff0222cac5 Mon Sep 17 00:00:00 2001 From: Henry Ptasinski Date: Sat, 19 Sep 2020 00:12:11 +0000 Subject: [PATCH 611/648] net: sctp: Fix IPv6 ancestor_size calc in sctp_copy_descendant When calculating ancestor_size with IPv6 enabled, simply using sizeof(struct ipv6_pinfo) doesn't account for extra bytes needed for alignment in the struct sctp6_sock. On x86, there aren't any extra bytes, but on ARM the ipv6_pinfo structure is aligned on an 8-byte boundary so there were 4 pad bytes that were omitted from the ancestor_size calculation. This would lead to corruption of the pd_lobby pointers, causing an oops when trying to free the sctp structure on socket close. Fixes: 636d25d557d1 ("sctp: not copy sctp_sock pd_lobby in sctp_copy_descendant") Signed-off-by: Henry Ptasinski Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 8 +++++--- net/sctp/socket.c | 9 +++------ 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index b33f1aefad09..0bdff38eb4bb 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -226,12 +226,14 @@ struct sctp_sock { data_ready_signalled:1; atomic_t pd_mode; + + /* Fields after this point will be skipped on copies, like on accept + * and peeloff operations + */ + /* Receive to here while partial delivery is in effect. */ struct sk_buff_head pd_lobby; - /* These must be the last fields, as they will skipped on copies, - * like on accept and peeloff operations - */ struct list_head auto_asconf_list; int do_auto_asconf; }; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 836615f71a7d..53d0a4161df3 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -9220,13 +9220,10 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, static inline void sctp_copy_descendant(struct sock *sk_to, const struct sock *sk_from) { - int ancestor_size = sizeof(struct inet_sock) + - sizeof(struct sctp_sock) - - offsetof(struct sctp_sock, pd_lobby); - - if (sk_from->sk_family == PF_INET6) - ancestor_size += sizeof(struct ipv6_pinfo); + size_t ancestor_size = sizeof(struct inet_sock); + ancestor_size += sk_from->sk_prot->obj_size; + ancestor_size -= offsetof(struct sctp_sock, pd_lobby); __inet_sk_copy_descendant(sk_to, sk_from, ancestor_size); } From 7d1f8691ccffe88cec70a6e4044adf1b9bbd8a7c Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Sun, 20 Sep 2020 09:10:12 -0400 Subject: [PATCH 612/648] Revert "KVM: Check the allocation of pv cpu mask" The commit 0f990222108d ("KVM: Check the allocation of pv cpu mask") we have in 5.9-rc5 has two issue: 1) Compilation fails for !CONFIG_SMP, see: https://bugzilla.kernel.org/show_bug.cgi?id=209285 2) This commit completely disables PV TLB flush, see https://lore.kernel.org/kvm/87y2lrnnyf.fsf@vitty.brq.redhat.com/ The allocation problem is likely a theoretical one, if we don't have memory that early in boot process we're likely doomed anyway. Let's solve it properly later. This reverts commit 0f990222108d214a0924d920e6095b58107d7b59. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 1b51b727b140..9663ba31347c 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -652,6 +652,7 @@ static void __init kvm_guest_init(void) } if (pv_tlb_flush_supported()) { + pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others; pv_ops.mmu.tlb_remove_table = tlb_remove_table; pr_info("KVM setup pv remote TLB flush\n"); } @@ -764,14 +765,6 @@ static __init int activate_jump_labels(void) } arch_initcall(activate_jump_labels); -static void kvm_free_pv_cpu_mask(void) -{ - unsigned int cpu; - - for_each_possible_cpu(cpu) - free_cpumask_var(per_cpu(__pv_cpu_mask, cpu)); -} - static __init int kvm_alloc_cpumask(void) { int cpu; @@ -790,20 +783,11 @@ static __init int kvm_alloc_cpumask(void) if (alloc) for_each_possible_cpu(cpu) { - if (!zalloc_cpumask_var_node( - per_cpu_ptr(&__pv_cpu_mask, cpu), - GFP_KERNEL, cpu_to_node(cpu))) { - goto zalloc_cpumask_fail; - } + zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu), + GFP_KERNEL, cpu_to_node(cpu)); } - apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself; - pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others; return 0; - -zalloc_cpumask_fail: - kvm_free_pv_cpu_mask(); - return -ENOMEM; } arch_initcall(kvm_alloc_cpumask); From ba4f184e126b751d1bffad5897f263108befc780 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Sep 2020 16:33:55 -0700 Subject: [PATCH 613/648] Linux 5.9-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 19d012810fbb..2b66d3398878 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Kleptomaniac Octopus # *DOCUMENTATION* From 492adcf481292521ee8df1a482dc12acdb28aa15 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sun, 20 Sep 2020 21:08:54 -0400 Subject: [PATCH 614/648] bnxt_en: Use memcpy to copy VPD field info. Using strlcpy() to copy from VPD is not correct because VPD strings are not necessarily NULL terminated. Use memcpy() to copy the VPD length up to the destination buffer size - 1. The destination is zeroed memory so it will always be NULL terminated. Fixes: a0d0fd70fed5 ("bnxt_en: Read partno and serialno of the board from VPD") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 8eb73fe39e84..4af42b1da827 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12090,7 +12090,7 @@ static int bnxt_init_mac_addr(struct bnxt *bp) static void bnxt_vpd_read_info(struct bnxt *bp) { struct pci_dev *pdev = bp->pdev; - int i, len, pos, ro_size; + int i, len, pos, ro_size, size; ssize_t vpd_size; u8 *vpd_data; @@ -12125,7 +12125,8 @@ static void bnxt_vpd_read_info(struct bnxt *bp) if (len + pos > vpd_size) goto read_sn; - strlcpy(bp->board_partno, &vpd_data[pos], min(len, BNXT_VPD_FLD_LEN)); + size = min(len, BNXT_VPD_FLD_LEN - 1); + memcpy(bp->board_partno, &vpd_data[pos], size); read_sn: pos = pci_vpd_find_info_keyword(vpd_data, i, ro_size, @@ -12138,7 +12139,8 @@ static void bnxt_vpd_read_info(struct bnxt *bp) if (len + pos > vpd_size) goto exit; - strlcpy(bp->board_serialno, &vpd_data[pos], min(len, BNXT_VPD_FLD_LEN)); + size = min(len, BNXT_VPD_FLD_LEN - 1); + memcpy(bp->board_serialno, &vpd_data[pos], size); exit: kfree(vpd_data); } From d69753fa1ecb3218b56b022722f7a5822735b876 Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Sun, 20 Sep 2020 21:08:55 -0400 Subject: [PATCH 615/648] bnxt_en: return proper error codes in bnxt_show_temp Returning "unknown" as a temperature value violates the hwmon interface rules. Appropriate error codes should be returned via device_attribute show instead. These will ultimately be propagated to the user via the file system interface. In addition to the corrected error handling, it is an even better idea to not present the sensor in sysfs at all if it is known that the read will definitely fail. Given that temp1_input is currently the only sensor reported, ensure no hwmon registration if TEMP_MONITOR_QUERY is not supported or if it will fail due to access permissions. Something smarter may be needed if and when other sensors are added. Fixes: 12cce90b934b ("bnxt_en: fix HWRM error when querying VF temperature") Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4af42b1da827..2865e248e35f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9311,18 +9311,16 @@ static ssize_t bnxt_show_temp(struct device *dev, struct hwrm_temp_monitor_query_output *resp; struct bnxt *bp = dev_get_drvdata(dev); u32 len = 0; + int rc; resp = bp->hwrm_cmd_resp_addr; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_TEMP_MONITOR_QUERY, -1, -1); mutex_lock(&bp->hwrm_cmd_lock); - if (!_hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT)) + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (!rc) len = sprintf(buf, "%u\n", resp->temp * 1000); /* display millidegree */ mutex_unlock(&bp->hwrm_cmd_lock); - - if (len) - return len; - - return sprintf(buf, "unknown\n"); + return rc ?: len; } static SENSOR_DEVICE_ATTR(temp1_input, 0444, bnxt_show_temp, NULL, 0); @@ -9342,7 +9340,16 @@ static void bnxt_hwmon_close(struct bnxt *bp) static void bnxt_hwmon_open(struct bnxt *bp) { + struct hwrm_temp_monitor_query_input req = {0}; struct pci_dev *pdev = bp->pdev; + int rc; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_TEMP_MONITOR_QUERY, -1, -1); + rc = hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (rc == -EACCES || rc == -EOPNOTSUPP) { + bnxt_hwmon_close(bp); + return; + } if (bp->hwmon_dev) return; From a53906908148d64423398a62c4435efb0d09652c Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 20 Sep 2020 21:08:56 -0400 Subject: [PATCH 616/648] bnxt_en: Protect bnxt_set_eee() and bnxt_set_pauseparam() with mutex. All changes related to bp->link_info require the protection of the link_lock mutex. It's not sufficient to rely just on RTNL. Fixes: 163e9ef63641 ("bnxt_en: Fix race when modifying pause settings.") Reviewed-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 31 ++++++++++++------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index d0928334bdc8..6c751013798d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1788,9 +1788,12 @@ static int bnxt_set_pauseparam(struct net_device *dev, if (!BNXT_PHY_CFG_ABLE(bp)) return -EOPNOTSUPP; + mutex_lock(&bp->link_lock); if (epause->autoneg) { - if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) - return -EINVAL; + if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) { + rc = -EINVAL; + goto pause_exit; + } link_info->autoneg |= BNXT_AUTONEG_FLOW_CTRL; if (bp->hwrm_spec_code >= 0x10201) @@ -1811,11 +1814,11 @@ static int bnxt_set_pauseparam(struct net_device *dev, if (epause->tx_pause) link_info->req_flow_ctrl |= BNXT_LINK_PAUSE_TX; - if (netif_running(dev)) { - mutex_lock(&bp->link_lock); + if (netif_running(dev)) rc = bnxt_hwrm_set_pause(bp); - mutex_unlock(&bp->link_lock); - } + +pause_exit: + mutex_unlock(&bp->link_lock); return rc; } @@ -2552,8 +2555,7 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata) struct bnxt *bp = netdev_priv(dev); struct ethtool_eee *eee = &bp->eee; struct bnxt_link_info *link_info = &bp->link_info; - u32 advertising = - _bnxt_fw_to_ethtool_adv_spds(link_info->advertising, 0); + u32 advertising; int rc = 0; if (!BNXT_PHY_CFG_ABLE(bp)) @@ -2562,19 +2564,23 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata) if (!(bp->flags & BNXT_FLAG_EEE_CAP)) return -EOPNOTSUPP; + mutex_lock(&bp->link_lock); + advertising = _bnxt_fw_to_ethtool_adv_spds(link_info->advertising, 0); if (!edata->eee_enabled) goto eee_ok; if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) { netdev_warn(dev, "EEE requires autoneg\n"); - return -EINVAL; + rc = -EINVAL; + goto eee_exit; } if (edata->tx_lpi_enabled) { if (bp->lpi_tmr_hi && (edata->tx_lpi_timer > bp->lpi_tmr_hi || edata->tx_lpi_timer < bp->lpi_tmr_lo)) { netdev_warn(dev, "Valid LPI timer range is %d and %d microsecs\n", bp->lpi_tmr_lo, bp->lpi_tmr_hi); - return -EINVAL; + rc = -EINVAL; + goto eee_exit; } else if (!bp->lpi_tmr_hi) { edata->tx_lpi_timer = eee->tx_lpi_timer; } @@ -2584,7 +2590,8 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata) } else if (edata->advertised & ~advertising) { netdev_warn(dev, "EEE advertised %x must be a subset of autoneg advertised speeds %x\n", edata->advertised, advertising); - return -EINVAL; + rc = -EINVAL; + goto eee_exit; } eee->advertised = edata->advertised; @@ -2596,6 +2603,8 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata) if (netif_running(dev)) rc = bnxt_hwrm_set_link_setting(bp, false, true); +eee_exit: + mutex_unlock(&bp->link_lock); return rc; } From f0f47b2f8cbc22e6e611ef6cc988f5452e5aec3f Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sun, 20 Sep 2020 21:08:57 -0400 Subject: [PATCH 617/648] bnxt_en: Return -EOPNOTSUPP for ETHTOOL_GREGS on VFs. Debug firmware commands are not supported on VFs to read registers. This patch avoids logging unnecessary access_denied error on VFs when user calls ETHTOOL_GREGS. By returning error in get_regs_len() method on the VF, the get_regs() method will not be called. Fixes: b5d600b027eb ("bnxt_en: Add support for 'ethtool -d'") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 6c751013798d..fecdfd875af1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1322,6 +1322,9 @@ static int bnxt_get_regs_len(struct net_device *dev) struct bnxt *bp = netdev_priv(dev); int reg_len; + if (!BNXT_PF(bp)) + return -EOPNOTSUPP; + reg_len = BNXT_PXP_REG_LEN; if (bp->fw_cap & BNXT_FW_CAP_PCIE_STATS_SUPPORTED) From d2b42d010f2941c2a85d970500b9d4ba79765593 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 20 Sep 2020 21:08:58 -0400 Subject: [PATCH 618/648] bnxt_en: Fix HWRM_FUNC_QSTATS_EXT firmware call. Fix it to set the required fid input parameter. The firmware call fails without this patch. Fixes: d752d0536c97 ("bnxt_en: Retrieve hardware counter masks from firmware if available.") Reviewed-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 2865e248e35f..d527c9c0fc9d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3782,6 +3782,7 @@ static int bnxt_hwrm_func_qstat_ext(struct bnxt *bp, return -EOPNOTSUPP; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QSTATS_EXT, -1, -1); + req.fid = cpu_to_le16(0xffff); req.flags = FUNC_QSTATS_EXT_REQ_FLAGS_COUNTER_MASK; mutex_lock(&bp->hwrm_cmd_lock); rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); From c07fa08f02f4053b51dae1a6ee08bc644dc7846d Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 20 Sep 2020 21:08:59 -0400 Subject: [PATCH 619/648] bnxt_en: Fix wrong flag value passed to HWRM_PORT_QSTATS_EXT fw call. The wrong flag value caused the firmware call to return actual port counters instead of the counter masks. This messed up the counter overflow logic and caused erratic extended port counters to be displayed under ethtool -S. Fixes: 531d1d269c1d ("bnxt_en: Retrieve hardware masks for port counters.") Reviewed-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index d527c9c0fc9d..7b7e8b7883c8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3853,7 +3853,7 @@ static void bnxt_init_stats(struct bnxt *bp) tx_masks = stats->hw_masks; tx_count = sizeof(struct tx_port_stats_ext) / 8; - flags = FUNC_QSTATS_EXT_REQ_FLAGS_COUNTER_MASK; + flags = PORT_QSTATS_EXT_REQ_FLAGS_COUNTER_MASK; rc = bnxt_hwrm_port_qstats_ext(bp, flags); if (rc) { mask = (1ULL << 40) - 1; From 8f3d749685e48c44dbe877ac9781079d85f914c8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 14 Sep 2020 09:28:14 -0600 Subject: [PATCH 620/648] io_uring: don't re-setup vecs/iter in io_resumit_prep() is already there If we already have mapped the necessary data for retry, then don't set it up again. It's a pointless operation, and we leak the iovec if it's a large (non-stack) vec. Fixes: b63534c41e20 ("io_uring: re-issue block requests that failed because of resources") Signed-off-by: Jens Axboe --- fs/io_uring.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index a29c8913b1f0..b93355ccceeb 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2294,13 +2294,17 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error) goto end_req; } - ret = io_import_iovec(rw, req, &iovec, &iter, false); - if (ret < 0) - goto end_req; - ret = io_setup_async_rw(req, iovec, inline_vecs, &iter, false); - if (!ret) + if (!req->io) { + ret = io_import_iovec(rw, req, &iovec, &iter, false); + if (ret < 0) + goto end_req; + ret = io_setup_async_rw(req, iovec, inline_vecs, &iter, false); + if (!ret) + return true; + kfree(iovec); + } else { return true; - kfree(iovec); + } end_req: req_set_fail_links(req); io_req_complete(req, ret); From f5cac8b156e8b7b67bb0fdfd19900855bf9569f3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 14 Sep 2020 09:30:38 -0600 Subject: [PATCH 621/648] io_uring: don't use retry based buffered reads for non-async bdev Some block devices, like dm, bubble back -EAGAIN through the completion handler. We check for this in io_read(), but don't honor it for when we have copied the iov. Return -EAGAIN for this case before retrying, to force punt to io-wq. Fixes: bcf5a06304d6 ("io_uring: support true async buffered reads, if file provides it") Reported-by: Zorro Lang Tested-by: Zorro Lang Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index b93355ccceeb..10d742391544 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3130,6 +3130,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, struct iov_iter __iter, *iter = &__iter; ssize_t io_size, ret, ret2; size_t iov_count; + bool no_async; if (req->io) iter = &req->io->rw.iter; @@ -3147,7 +3148,8 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, kiocb->ki_flags &= ~IOCB_NOWAIT; /* If the file doesn't support async, just async punt */ - if (force_nonblock && !io_file_supports_async(req->file, READ)) + no_async = force_nonblock && !io_file_supports_async(req->file, READ); + if (no_async) goto copy_iov; ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), iov_count); @@ -3191,6 +3193,8 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, ret = ret2; goto out_free; } + if (no_async) + return -EAGAIN; /* it's copied and will be cleaned with ->io */ iovec = NULL; /* now use our persistent iterator, if we aren't already */ From 72f04da48a9828ba3ae8ac77bea648bda8b7d0ff Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Mon, 14 Sep 2020 17:36:09 -0400 Subject: [PATCH 622/648] tools/io_uring: fix compile breakage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It would seem none of the kernel continuous integration does this: $ cd tools/io_uring $ make Otherwise it may have noticed: cc -Wall -Wextra -g -D_GNU_SOURCE -c -o io_uring-bench.o io_uring-bench.c io_uring-bench.c:133:12: error: static declaration of ‘gettid’ follows non-static declaration 133 | static int gettid(void) | ^~~~~~ In file included from /usr/include/unistd.h:1170, from io_uring-bench.c:27: /usr/include/x86_64-linux-gnu/bits/unistd_ext.h:34:16: note: previous declaration of ‘gettid’ was here 34 | extern __pid_t gettid (void) __THROW; | ^~~~~~ make: *** [: io_uring-bench.o] Error 1 The problem on Ubuntu 20.04 (with lk 5.9.0-rc5) is that unistd.h already defines gettid(). So prefix the local definition with "lk_". Signed-off-by: Douglas Gilbert Signed-off-by: Jens Axboe --- tools/io_uring/io_uring-bench.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/io_uring/io_uring-bench.c b/tools/io_uring/io_uring-bench.c index 0f257139b003..7703f0118385 100644 --- a/tools/io_uring/io_uring-bench.c +++ b/tools/io_uring/io_uring-bench.c @@ -130,7 +130,7 @@ static int io_uring_register_files(struct submitter *s) s->nr_files); } -static int gettid(void) +static int lk_gettid(void) { return syscall(__NR_gettid); } @@ -281,7 +281,7 @@ static void *submitter_fn(void *data) struct io_sq_ring *ring = &s->sq_ring; int ret, prepped; - printf("submitter=%d\n", gettid()); + printf("submitter=%d\n", lk_gettid()); srand48_r(pthread_self(), &s->rand); From 6ca56f845955e325033758f90a2cffe150f31bc8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 18 Sep 2020 16:51:19 -0600 Subject: [PATCH 623/648] io_uring: mark statx/files_update/epoll_ctl as non-SQPOLL These will naturally fail when attempted through SQPOLL, but either with -EFAULT or -EBADF. Make it explicit that these are not workable through SQPOLL and return -EINVAL, just like other ops that need to use ->files. Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 10d742391544..f2b180731676 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3786,7 +3786,7 @@ static int io_epoll_ctl_prep(struct io_kiocb *req, #if defined(CONFIG_EPOLL) if (sqe->ioprio || sqe->buf_index) return -EINVAL; - if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL))) return -EINVAL; req->epoll.epfd = READ_ONCE(sqe->fd); @@ -3901,7 +3901,7 @@ static int io_fadvise(struct io_kiocb *req, bool force_nonblock) static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL))) return -EINVAL; if (sqe->ioprio || sqe->buf_index) return -EINVAL; @@ -5418,6 +5418,8 @@ static int io_async_cancel(struct io_kiocb *req) static int io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { + if (unlikely(req->ctx->flags & IORING_SETUP_SQPOLL)) + return -EINVAL; if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) return -EINVAL; if (sqe->ioprio || sqe->rw_flags) From 4eb8dded6b82e184c09bb963bea0335fa3f30b55 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 18 Sep 2020 19:36:24 -0600 Subject: [PATCH 624/648] io_uring: fix openat/openat2 unified prep handling A previous commit unified how we handle prep for these two functions, but this means that we check the allowed context (SQPOLL, specifically) later than we should. Move the ring type checking into the two parent functions, instead of doing it after we've done some setup work. Fixes: ec65fea5a8d7 ("io_uring: deduplicate io_openat{,2}_prep()") Reported-by: Andy Lutomirski Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index f2b180731676..e6004b92e553 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3527,8 +3527,6 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe const char __user *fname; int ret; - if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) - return -EINVAL; if (unlikely(sqe->ioprio || sqe->buf_index)) return -EINVAL; if (unlikely(req->flags & REQ_F_FIXED_FILE)) @@ -3555,6 +3553,8 @@ static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { u64 flags, mode; + if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) + return -EINVAL; if (req->flags & REQ_F_NEED_CLEANUP) return 0; mode = READ_ONCE(sqe->len); @@ -3569,6 +3569,8 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) size_t len; int ret; + if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) + return -EINVAL; if (req->flags & REQ_F_NEED_CLEANUP) return 0; how = u64_to_user_ptr(READ_ONCE(sqe->addr2)); From 88b67edd7247466bc47f01e1dc539b0d0d4b931e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 21 Sep 2020 11:33:23 +0200 Subject: [PATCH 625/648] dax: Fix compilation for CONFIG_DAX && !CONFIG_FS_DAX dax_supported() is defined whenever CONFIG_DAX is enabled. So dummy implementation should be defined only in !CONFIG_DAX case, not in !CONFIG_FS_DAX case. Fixes: e2ec51282545 ("dm: Call proper helper to determine dax support") Cc: Reported-by: Geert Uytterhoeven Reported-by: Naresh Kamboju Reported-by: kernel test robot Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- include/linux/dax.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/include/linux/dax.h b/include/linux/dax.h index 497031392e0a..43b39ab9de1a 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -58,6 +58,8 @@ static inline void set_dax_synchronous(struct dax_device *dax_dev) { __set_dax_synchronous(dax_dev); } +bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev, + int blocksize, sector_t start, sector_t len); /* * Check if given mapping is supported by the file / underlying device. */ @@ -104,6 +106,12 @@ static inline bool dax_synchronous(struct dax_device *dax_dev) static inline void set_dax_synchronous(struct dax_device *dax_dev) { } +static inline bool dax_supported(struct dax_device *dax_dev, + struct block_device *bdev, int blocksize, sector_t start, + sector_t len) +{ + return false; +} static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, struct dax_device *dax_dev) { @@ -130,8 +138,6 @@ static inline bool generic_fsdax_supported(struct dax_device *dax_dev, return __generic_fsdax_supported(dax_dev, bdev, blocksize, start, sectors); } -bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev, - int blocksize, sector_t start, sector_t len); static inline void fs_put_dax(struct dax_device *dax_dev) { @@ -159,13 +165,6 @@ static inline bool generic_fsdax_supported(struct dax_device *dax_dev, return false; } -static inline bool dax_supported(struct dax_device *dax_dev, - struct block_device *bdev, int blocksize, sector_t start, - sector_t len) -{ - return false; -} - static inline void fs_put_dax(struct dax_device *dax_dev) { } From 58ed68b592377f7483bcbff937567d8cbaab38ed Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 18 Sep 2020 17:33:11 +0300 Subject: [PATCH 626/648] sfc: Fix error code in probe This failure path should return a negative error code but it currently returns success. Fixes: 51b35a454efd ("sfc: skeleton EF100 PF driver") Signed-off-by: Dan Carpenter Acked-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef100.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/sfc/ef100.c b/drivers/net/ethernet/sfc/ef100.c index c54b7f8243f3..ffdb36715a49 100644 --- a/drivers/net/ethernet/sfc/ef100.c +++ b/drivers/net/ethernet/sfc/ef100.c @@ -490,6 +490,7 @@ static int ef100_pci_probe(struct pci_dev *pci_dev, if (fcw.offset > pci_resource_len(efx->pci_dev, fcw.bar) - ESE_GZ_FCW_LEN) { netif_err(efx, probe, efx->net_dev, "Func control window overruns BAR\n"); + rc = -EIO; goto fail; } From 91b2c9a0fdb56e3d7eb6bf06a39ad7a50f000916 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Mon, 21 Sep 2020 06:38:56 +0000 Subject: [PATCH 627/648] ipv6: route: convert comma to semicolon Replace a comma between expression statements by a semicolon. Signed-off-by: Xu Wang Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5e7e25e2523a..fb075d9545b9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4202,7 +4202,7 @@ static struct fib6_info *rt6_add_route_info(struct net *net, .fc_nlinfo.nl_net = net, }; - cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO, + cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO; cfg.fc_dst = *prefix; cfg.fc_gateway = *gwaddr; From cefc23554fc259114e78a7b0908aac4610ee18eb Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 31 Aug 2020 20:50:42 +0300 Subject: [PATCH 628/648] net/mlx5: Fix FTE cleanup Currently, when an FTE is allocated, its refcount is decreased to 0 with the purpose it will not be a stand alone steering object and every rule (destination) of the FTE would increase the refcount. When mlx5_cleanup_fs is called while not all rules were deleted by the steering users, it hit refcount underflow on the FTE once clean_tree calls to tree_remove_node after the deleted rules already decreased the refcount to 0. FTE is no longer destroyed implicitly when the last rule (destination) is deleted. mlx5_del_flow_rules avoids it by increasing the refcount on the FTE and destroy it explicitly after all rules were deleted. So we can avoid the refcount underflow by making FTE as stand alone object. In addition need to set del_hw_func to FTE so the HW object will be destroyed when the FTE is deleted from the cleanup_tree flow. refcount_t: underflow; use-after-free. WARNING: CPU: 2 PID: 15715 at lib/refcount.c:28 refcount_warn_saturate+0xd9/0xe0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: tree_put_node+0xf2/0x140 [mlx5_core] clean_tree+0x4e/0xf0 [mlx5_core] clean_tree+0x4e/0xf0 [mlx5_core] clean_tree+0x4e/0xf0 [mlx5_core] clean_tree+0x5f/0xf0 [mlx5_core] clean_tree+0x4e/0xf0 [mlx5_core] clean_tree+0x5f/0xf0 [mlx5_core] mlx5_cleanup_fs+0x26/0x270 [mlx5_core] mlx5_unload+0x2e/0xa0 [mlx5_core] mlx5_unload_one+0x51/0x120 [mlx5_core] mlx5_devlink_reload_down+0x51/0x90 [mlx5_core] devlink_reload+0x39/0x120 ? devlink_nl_cmd_reload+0x43/0x220 genl_rcv_msg+0x1e4/0x420 ? genl_family_rcv_msg_attrs_parse+0x100/0x100 netlink_rcv_skb+0x47/0x110 genl_rcv+0x24/0x40 netlink_unicast+0x217/0x2f0 netlink_sendmsg+0x30f/0x430 sock_sendmsg+0x30/0x40 __sys_sendto+0x10e/0x140 ? handle_mm_fault+0xc4/0x1f0 ? do_page_fault+0x33f/0x630 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x48/0x130 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 718ce4d601db ("net/mlx5: Consolidate update FTE for all removal changes") Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel") Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 9ccec5f8b92a..75fa44eee434 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -654,7 +654,7 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, fte->action = *flow_act; fte->flow_context = spec->flow_context; - tree_init_node(&fte->node, NULL, del_sw_fte); + tree_init_node(&fte->node, del_hw_fte, del_sw_fte); return fte; } @@ -1792,7 +1792,6 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, up_write_ref_node(&g->node, false); rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node, false); - tree_put_node(&fte->node, false); return rule; } rule = ERR_PTR(-ENOENT); @@ -1891,7 +1890,6 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, up_write_ref_node(&g->node, false); rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node, false); - tree_put_node(&fte->node, false); tree_put_node(&g->node, false); return rule; @@ -2001,7 +1999,9 @@ void mlx5_del_flow_rules(struct mlx5_flow_handle *handle) up_write_ref_node(&fte->node, false); } else { del_hw_fte(&fte->node); - up_write(&fte->node.lock); + /* Avoid double call to del_hw_fte */ + fte->node.del_hw_func = NULL; + up_write_ref_node(&fte->node, false); tree_put_node(&fte->node, false); } kfree(handle); From fe45386a208277cae4648106133c08246eecd012 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Thu, 11 Jun 2020 13:55:19 +0300 Subject: [PATCH 629/648] net/mlx5e: Use RCU to protect rq->xdp_prog Currently, the RQs are temporarily deactivated while hot-replacing the XDP program, and napi_synchronize is used to make sure rq->xdp_prog is not in use. However, napi_synchronize is not ideal: instead of waiting till the end of a NAPI cycle, it polls and waits until NAPI is not running, sleeping for 1ms between the periodic checks. Under heavy workloads, this loop will never end, which may even lead to a kernel panic if the kernel detects the hangup. Such workloads include XSK TX and possibly also heavy RX (XSK or normal). The fix is inspired by commit 326fe02d1ed6 ("net/mlx4_en: protect ring->xdp_prog with rcu_read_lock"). As mlx5e_xdp_handle is already protected by rcu_read_lock, and bpf_prog_put uses call_rcu to free the program, there is no need for additional synchronization if proper RCU functions are used to access the pointer. This patch converts all accesses to rq->xdp_prog to use RCU functions. Fixes: 86994156c736 ("net/mlx5e: XDP fast RX drop bpf programs support") Fixes: db05815b36cb ("net/mlx5e: Add XSK zero-copy support") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- .../net/ethernet/mellanox/mlx5/core/en/xdp.c | 2 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 53 +++++++++---------- 3 files changed, 27 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 0cc2080fd847..5d7b79518449 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -600,7 +600,7 @@ struct mlx5e_rq { struct dim dim; /* Dynamic Interrupt Moderation */ /* XDP */ - struct bpf_prog *xdp_prog; + struct bpf_prog __rcu *xdp_prog; struct mlx5e_xdpsq *xdpsq; DECLARE_BITMAP(flags, 8); struct page_pool *page_pool; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 0e6946fc121f..b28df21981a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -122,7 +122,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, u32 *len, struct xdp_buff *xdp) { - struct bpf_prog *prog = READ_ONCE(rq->xdp_prog); + struct bpf_prog *prog = rcu_dereference(rq->xdp_prog); u32 act; int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index aebcf73f8546..bde97b108db5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -399,7 +399,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, if (params->xdp_prog) bpf_prog_inc(params->xdp_prog); - rq->xdp_prog = params->xdp_prog; + RCU_INIT_POINTER(rq->xdp_prog, params->xdp_prog); rq_xdp_ix = rq->ix; if (xsk) @@ -408,7 +408,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, if (err < 0) goto err_rq_wq_destroy; - rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; + rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk); pool_size = 1 << params->log_rq_mtu_frames; @@ -564,8 +564,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, } err_rq_wq_destroy: - if (rq->xdp_prog) - bpf_prog_put(rq->xdp_prog); + if (params->xdp_prog) + bpf_prog_put(params->xdp_prog); xdp_rxq_info_unreg(&rq->xdp_rxq); page_pool_destroy(rq->page_pool); mlx5_wq_destroy(&rq->wq_ctrl); @@ -575,10 +575,16 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, static void mlx5e_free_rq(struct mlx5e_rq *rq) { + struct mlx5e_channel *c = rq->channel; + struct bpf_prog *old_prog = NULL; int i; - if (rq->xdp_prog) - bpf_prog_put(rq->xdp_prog); + /* drop_rq has neither channel nor xdp_prog. */ + if (c) + old_prog = rcu_dereference_protected(rq->xdp_prog, + lockdep_is_held(&c->priv->state_lock)); + if (old_prog) + bpf_prog_put(old_prog); switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: @@ -4330,6 +4336,16 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) return 0; } +static void mlx5e_rq_replace_xdp_prog(struct mlx5e_rq *rq, struct bpf_prog *prog) +{ + struct bpf_prog *old_prog; + + old_prog = rcu_replace_pointer(rq->xdp_prog, prog, + lockdep_is_held(&rq->channel->priv->state_lock)); + if (old_prog) + bpf_prog_put(old_prog); +} + static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -4388,29 +4404,10 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) */ for (i = 0; i < priv->channels.num; i++) { struct mlx5e_channel *c = priv->channels.c[i]; - bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); - clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); - if (xsk_open) - clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); - napi_synchronize(&c->napi); - /* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */ - - old_prog = xchg(&c->rq.xdp_prog, prog); - if (old_prog) - bpf_prog_put(old_prog); - - if (xsk_open) { - old_prog = xchg(&c->xskrq.xdp_prog, prog); - if (old_prog) - bpf_prog_put(old_prog); - } - - set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); - if (xsk_open) - set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); - /* napi_schedule in case we have missed anything */ - napi_schedule(&c->napi); + mlx5e_rq_replace_xdp_prog(&c->rq, prog); + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + mlx5e_rq_replace_xdp_prog(&c->xskrq, prog); } unlock: From 9c25a22dfb00270372224721fed646965420323a Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Thu, 11 Jun 2020 14:25:19 +0300 Subject: [PATCH 630/648] net/mlx5e: Use synchronize_rcu to sync with NAPI As described in the previous commit, napi_synchronize doesn't quite fit the purpose when we just need to wait until the currently running NAPI quits. Its implementation waits until NAPI is not running by polling and waiting for 1ms in between. In cases where we need to deactivate one queue (e.g., recovery flows) or where we deactivate them one-by-one (deactivate channel flow), we may get stuck in napi_synchronize forever if other queues keep NAPI active, causing a soft lockup. Depending on kernel configuration (CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC), it may result in a kernel panic. To fix the issue, use synchronize_rcu to wait for NAPI to quit, and wrap the whole NAPI in rcu_read_lock. Fixes: acc6c5953af1 ("net/mlx5e: Split open/close channels to stages") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/xsk/rx.c | 14 ++------------ .../ethernet/mellanox/mlx5/core/en/xsk/setup.c | 3 +-- .../net/ethernet/mellanox/mlx5/core/en_main.c | 12 ++++-------- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 12 ++---------- .../net/ethernet/mellanox/mlx5/core/en_txrx.c | 17 +++++++++++++---- 5 files changed, 22 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index a33a1f762c70..40db27bf790b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -31,7 +31,6 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, { struct xdp_buff *xdp = wi->umr.dma_info[page_idx].xsk; u32 cqe_bcnt32 = cqe_bcnt; - bool consumed; /* Check packet size. Note LRO doesn't use linear SKB */ if (unlikely(cqe_bcnt > rq->hw_mtu)) { @@ -51,10 +50,6 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, xsk_buff_dma_sync_for_cpu(xdp); prefetch(xdp->data); - rcu_read_lock(); - consumed = mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp); - rcu_read_unlock(); - /* Possible flows: * - XDP_REDIRECT to XSKMAP: * The page is owned by the userspace from now. @@ -70,7 +65,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, * allocated first from the Reuse Ring, so it has enough space. */ - if (likely(consumed)) { + if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp))) { if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ return NULL; /* page/packet was consumed by XDP */ @@ -88,7 +83,6 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, u32 cqe_bcnt) { struct xdp_buff *xdp = wi->di->xsk; - bool consumed; /* wi->offset is not used in this function, because xdp->data and the * DMA address point directly to the necessary place. Furthermore, the @@ -107,11 +101,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, return NULL; } - rcu_read_lock(); - consumed = mlx5e_xdp_handle(rq, NULL, &cqe_bcnt, xdp); - rcu_read_unlock(); - - if (likely(consumed)) + if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt, xdp))) return NULL; /* page/packet was consumed by XDP */ /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index dd9df519d383..55e65a438de7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -106,8 +106,7 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, void mlx5e_close_xsk(struct mlx5e_channel *c) { clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state); - napi_synchronize(&c->napi); - synchronize_rcu(); /* Sync with the XSK wakeup. */ + synchronize_rcu(); /* Sync with the XSK wakeup and with NAPI. */ mlx5e_close_rq(&c->xskrq); mlx5e_close_cq(&c->xskrq.cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index bde97b108db5..917c28e7f29e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -873,7 +873,7 @@ void mlx5e_activate_rq(struct mlx5e_rq *rq) void mlx5e_deactivate_rq(struct mlx5e_rq *rq) { clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); - napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */ + synchronize_rcu(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */ } void mlx5e_close_rq(struct mlx5e_rq *rq) @@ -1318,12 +1318,10 @@ void mlx5e_tx_disable_queue(struct netdev_queue *txq) static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq) { - struct mlx5e_channel *c = sq->channel; struct mlx5_wq_cyc *wq = &sq->wq; clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - /* prevent netif_tx_wake_queue */ - napi_synchronize(&c->napi); + synchronize_rcu(); /* Sync with NAPI to prevent netif_tx_wake_queue. */ mlx5e_tx_disable_queue(sq->txq); @@ -1398,10 +1396,8 @@ void mlx5e_activate_icosq(struct mlx5e_icosq *icosq) void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq) { - struct mlx5e_channel *c = icosq->channel; - clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state); - napi_synchronize(&c->napi); + synchronize_rcu(); /* Sync with NAPI. */ } void mlx5e_close_icosq(struct mlx5e_icosq *sq) @@ -1480,7 +1476,7 @@ void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq) struct mlx5e_channel *c = sq->channel; clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - napi_synchronize(&c->napi); + synchronize_rcu(); /* Sync with NAPI. */ mlx5e_destroy_sq(c->mdev, sq->sqn); mlx5e_free_xdpsq_descs(sq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 65828af120b7..99d102c035b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1132,7 +1132,6 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, struct xdp_buff xdp; struct sk_buff *skb; void *va, *data; - bool consumed; u32 frag_size; va = page_address(di->page) + wi->offset; @@ -1144,11 +1143,8 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, prefetchw(va); /* xdp_frame data area */ prefetch(data); - rcu_read_lock(); mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp); - consumed = mlx5e_xdp_handle(rq, di, &cqe_bcnt, &xdp); - rcu_read_unlock(); - if (consumed) + if (mlx5e_xdp_handle(rq, di, &cqe_bcnt, &xdp)) return NULL; /* page/packet was consumed by XDP */ rx_headroom = xdp.data - xdp.data_hard_start; @@ -1438,7 +1434,6 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, struct sk_buff *skb; void *va, *data; u32 frag_size; - bool consumed; /* Check packet size. Note LRO doesn't use linear SKB */ if (unlikely(cqe_bcnt > rq->hw_mtu)) { @@ -1455,11 +1450,8 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, prefetchw(va); /* xdp_frame data area */ prefetch(data); - rcu_read_lock(); mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt32, &xdp); - consumed = mlx5e_xdp_handle(rq, di, &cqe_bcnt32, &xdp); - rcu_read_unlock(); - if (consumed) { + if (mlx5e_xdp_handle(rq, di, &cqe_bcnt32, &xdp)) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ return NULL; /* page/packet was consumed by XDP */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index de10b06bade5..d5868670f8a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -121,13 +121,17 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) struct mlx5e_xdpsq *xsksq = &c->xsksq; struct mlx5e_rq *xskrq = &c->xskrq; struct mlx5e_rq *rq = &c->rq; - bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); bool aff_change = false; bool busy_xsk = false; bool busy = false; int work_done = 0; + bool xsk_open; int i; + rcu_read_lock(); + + xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); + ch_stats->poll++; for (i = 0; i < c->num_tc; i++) @@ -167,8 +171,10 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) busy |= busy_xsk; if (busy) { - if (likely(mlx5e_channel_no_affinity_change(c))) - return budget; + if (likely(mlx5e_channel_no_affinity_change(c))) { + work_done = budget; + goto out; + } ch_stats->aff_change++; aff_change = true; if (budget && work_done == budget) @@ -176,7 +182,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) } if (unlikely(!napi_complete_done(napi, work_done))) - return work_done; + goto out; ch_stats->arm++; @@ -203,6 +209,9 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) ch_stats->force_irq++; } +out: + rcu_read_unlock(); + return work_done; } From 12a240a41427d37b5e70570700704e84c827452f Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 7 Jul 2020 06:16:24 +0000 Subject: [PATCH 631/648] net/mlx5e: Fix memory leak of tunnel info when rule under multipath not ready When deleting vxlan flow rule under multipath, tun_info in parse_attr is not freed when the rule is not ready. Fixes: ef06c9ee8933 ("net/mlx5e: Allow one failure when offloading tc encap rules under multipath") Signed-off-by: Jianbo Liu Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index fd53d101d8fd..7be282d2ddde 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1290,11 +1290,8 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, mlx5e_put_flow_tunnel_id(flow); - if (flow_flag_test(flow, NOT_READY)) { + if (flow_flag_test(flow, NOT_READY)) remove_unready_flow(flow); - kvfree(attr->parse_attr); - return; - } if (mlx5e_is_offloaded_flow(flow)) { if (flow_flag_test(flow, SLOW)) From 4c8594adb9d9250120cffc57a7175a1a09f2b084 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 26 Jul 2020 16:37:47 +0300 Subject: [PATCH 632/648] net/mlx5e: CT: Fix freeing ct_label mapping Add missing mapping remove call when removing ct rule, as the mapping was allocated when ct rule was adding with ct_label. Also there is a missing mapping remove call in error flow. Fixes: 54b154ecfb8c ("net/mlx5e: CT: Map 128 bits labels to 32 bit map ID") Signed-off-by: Roi Dayan Reviewed-by: Eli Britstein Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/tc_ct.c | 21 +++++++++++---- .../ethernet/mellanox/mlx5/core/en/tc_ct.h | 26 ++++++++++++------- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 6 +++-- 3 files changed, 36 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index c6bc9224c3b1..bc5f72ec3623 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -699,6 +699,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, err_rule: mlx5e_mod_hdr_detach(ct_priv->esw->dev, &esw->offloads.mod_hdr, zone_rule->mh); + mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id); err_mod_hdr: kfree(spec); return err; @@ -958,12 +959,22 @@ mlx5_tc_ct_add_no_trk_match(struct mlx5e_priv *priv, return 0; } +void mlx5_tc_ct_match_del(struct mlx5e_priv *priv, struct mlx5_ct_attr *ct_attr) +{ + struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); + + if (!ct_priv || !ct_attr->ct_labels_id) + return; + + mapping_remove(ct_priv->labels_mapping, ct_attr->ct_labels_id); +} + int -mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct flow_cls_offload *f, - struct mlx5_ct_attr *ct_attr, - struct netlink_ext_ack *extack) +mlx5_tc_ct_match_add(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct mlx5_ct_attr *ct_attr, + struct netlink_ext_ack *extack) { struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv); struct flow_rule *rule = flow_cls_offload_flow_rule(f); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 3baef917a677..708c216325d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -87,12 +87,15 @@ mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv); void mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv); +void +mlx5_tc_ct_match_del(struct mlx5e_priv *priv, struct mlx5_ct_attr *ct_attr); + int -mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct flow_cls_offload *f, - struct mlx5_ct_attr *ct_attr, - struct netlink_ext_ack *extack); +mlx5_tc_ct_match_add(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct mlx5_ct_attr *ct_attr, + struct netlink_ext_ack *extack); int mlx5_tc_ct_add_no_trk_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec); @@ -130,12 +133,15 @@ mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv) { } +static inline void +mlx5_tc_ct_match_del(struct mlx5e_priv *priv, struct mlx5_ct_attr *ct_attr) {} + static inline int -mlx5_tc_ct_parse_match(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct flow_cls_offload *f, - struct mlx5_ct_attr *ct_attr, - struct netlink_ext_ack *extack) +mlx5_tc_ct_match_add(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + struct mlx5_ct_attr *ct_attr, + struct netlink_ext_ack *extack) { struct flow_rule *rule = flow_cls_offload_flow_rule(f); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 7be282d2ddde..bf0c6f063941 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1312,6 +1312,8 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, } kvfree(attr->parse_attr); + mlx5_tc_ct_match_del(priv, &flow->esw_attr->ct_attr); + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow); @@ -4399,8 +4401,8 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, goto err_free; /* actions validation depends on parsing the ct matches first */ - err = mlx5_tc_ct_parse_match(priv, &parse_attr->spec, f, - &flow->esw_attr->ct_attr, extack); + err = mlx5_tc_ct_match_add(priv, &parse_attr->spec, f, + &flow->esw_attr->ct_attr, extack); if (err) goto err_free; From 6cec0229ab1959259e71e9a5bbe47c04577950b1 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Wed, 5 Aug 2020 17:56:04 +0300 Subject: [PATCH 633/648] net/mlx5e: Enable adding peer miss rules only if merged eswitch is supported The cited commit creates peer miss group during switchdev mode initialization in order to handle miss packets correctly while in VF LAG mode. This is done regardless of FW support of such groups which could cause rules setups failure later on. Fix by adding FW capability check before creating peer groups/rule. Fixes: ac004b832128 ("net/mlx5e: E-Switch, Add peer miss rules") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Reviewed-by: Raed Salem Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 56 ++++++++++--------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d2516922d867..1bcf2609dca8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1219,36 +1219,38 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) } esw->fdb_table.offloads.send_to_vport_grp = g; - /* create peer esw miss group */ - memset(flow_group_in, 0, inlen); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { + /* create peer esw miss group */ + memset(flow_group_in, 0, inlen); - esw_set_flow_group_source_port(esw, flow_group_in); + esw_set_flow_group_source_port(esw, flow_group_in); - if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) { - match_criteria = MLX5_ADDR_OF(create_flow_group_in, - flow_group_in, - match_criteria); + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) { + match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, + match_criteria); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - misc_parameters.source_eswitch_owner_vhca_id); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); - MLX5_SET(create_flow_group_in, flow_group_in, - source_eswitch_owner_vhca_id_valid, 1); + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + } + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ix + esw->total_vports - 1); + ix += esw->total_vports; + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err); + goto peer_miss_err; + } + esw->fdb_table.offloads.peer_miss_grp = g; } - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, - ix + esw->total_vports - 1); - ix += esw->total_vports; - - g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR(g)) { - err = PTR_ERR(g); - esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err); - goto peer_miss_err; - } - esw->fdb_table.offloads.peer_miss_grp = g; - /* create miss group */ memset(flow_group_in, 0, inlen); MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, @@ -1281,7 +1283,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) miss_rule_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); miss_err: - mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); peer_miss_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); send_vport_err: @@ -1305,7 +1308,8 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi); mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni); mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); - mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); mlx5_esw_chains_destroy(esw); From 82198d8bcdeff01d19215d712aa55031e21bccbc Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Wed, 2 Sep 2020 16:49:52 +0300 Subject: [PATCH 634/648] net/mlx5e: Fix endianness when calculating pedit mask first bit The field mask value is provided in network byte order and has to be converted to host byte order before calculating pedit mask first bit. Fixes: 88f30bbcbaaa ("net/mlx5e: Bit sized fields rewrite support") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 34 ++++++++++++------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index bf0c6f063941..1c93f92d9210 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2624,6 +2624,22 @@ static struct mlx5_fields fields[] = { OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport), }; +static unsigned long mask_to_le(unsigned long mask, int size) +{ + __be32 mask_be32; + __be16 mask_be16; + + if (size == 32) { + mask_be32 = (__force __be32)(mask); + mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32)); + } else if (size == 16) { + mask_be32 = (__force __be32)(mask); + mask_be16 = *(__be16 *)&mask_be32; + mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16)); + } + + return mask; +} static int offload_pedit_fields(struct mlx5e_priv *priv, int namespace, struct pedit_headers_action *hdrs, @@ -2637,9 +2653,7 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, u32 *s_masks_p, *a_masks_p, s_mask, a_mask; struct mlx5e_tc_mod_hdr_acts *mod_acts; struct mlx5_fields *f; - unsigned long mask; - __be32 mask_be32; - __be16 mask_be16; + unsigned long mask, field_mask; int err; u8 cmd; @@ -2705,14 +2719,7 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, if (skip) continue; - if (f->field_bsize == 32) { - mask_be32 = (__force __be32)(mask); - mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32)); - } else if (f->field_bsize == 16) { - mask_be32 = (__force __be32)(mask); - mask_be16 = *(__be16 *)&mask_be32; - mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16)); - } + mask = mask_to_le(mask, f->field_bsize); first = find_first_bit(&mask, f->field_bsize); next_z = find_next_zero_bit(&mask, f->field_bsize, first); @@ -2743,9 +2750,10 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, if (cmd == MLX5_ACTION_TYPE_SET) { int start; + field_mask = mask_to_le(f->field_mask, f->field_bsize); + /* if field is bit sized it can start not from first bit */ - start = find_first_bit((unsigned long *)&f->field_mask, - f->field_bsize); + start = find_first_bit(&field_mask, f->field_bsize); MLX5_SET(set_action_in, action, offset, first - start); /* length is num of bits to be written, zero means length of 32 */ From 47c97e6b10a1e3680cad539929da092bfa535446 Mon Sep 17 00:00:00 2001 From: Ron Diskin Date: Sun, 10 May 2020 14:39:51 +0300 Subject: [PATCH 635/648] net/mlx5e: Fix multicast counter not up-to-date in "ip -s" Currently the FW does not generate events for counters other than error counters. Unlike ".get_ethtool_stats", ".ndo_get_stats64" (which ip -s uses) might run in atomic context, while the FW interface is non atomic. Thus, 'ip' is not allowed to issue FW commands, so it will only display cached counters in the driver. Add a SW counter (mcast_packets) in the driver to count rx multicast packets. The counter also counts broadcast packets, as we consider it a special case of multicast. Use the counter value when calling "ip -s"/"ifconfig". Fixes: f62b8bb8f2d3 ("net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet functionality") Signed-off-by: Ron Diskin Reviewed-by: Tariq Toukan Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 5 +++++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 +------- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 4 ++++ drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 2 ++ 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 9334c9c3e208..24336c60123a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -20,6 +20,11 @@ enum mlx5e_icosq_wqe_type { }; /* General */ +static inline bool mlx5e_skb_is_multicast(struct sk_buff *skb) +{ + return skb->pkt_type == PACKET_MULTICAST || skb->pkt_type == PACKET_BROADCAST; +} + void mlx5e_trigger_irq(struct mlx5e_icosq *sq); void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe); void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 917c28e7f29e..d46047235568 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3569,6 +3569,7 @@ void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s) s->rx_packets += rq_stats->packets + xskrq_stats->packets; s->rx_bytes += rq_stats->bytes + xskrq_stats->bytes; + s->multicast += rq_stats->mcast_packets + xskrq_stats->mcast_packets; for (j = 0; j < priv->max_opened_tc; j++) { struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j]; @@ -3584,7 +3585,6 @@ void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_vport_stats *vstats = &priv->stats.vport; struct mlx5e_pport_stats *pstats = &priv->stats.pport; /* In switchdev mode, monitor counters doesn't monitor @@ -3619,12 +3619,6 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors + stats->rx_frame_errors; stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors; - - /* vport multicast also counts packets that are dropped due to steering - * or rx out of buffer - */ - stats->multicast = - VPORT_COUNTER_GET(vstats, received_eth_multicast.packets); } static void mlx5e_set_rx_mode(struct net_device *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 99d102c035b0..64c8ac5eabf6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -53,6 +53,7 @@ #include "en/xsk/rx.h" #include "en/health.h" #include "en/params.h" +#include "en/txrx.h" static struct sk_buff * mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, @@ -1080,6 +1081,9 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, mlx5e_enable_ecn(rq, skb); skb->protocol = eth_type_trans(skb, netdev); + + if (unlikely(mlx5e_skb_is_multicast(skb))) + stats->mcast_packets++; } static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 2e1cca1923b9..be7cda283781 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -119,6 +119,7 @@ struct mlx5e_sw_stats { u64 tx_nop; u64 rx_lro_packets; u64 rx_lro_bytes; + u64 rx_mcast_packets; u64 rx_ecn_mark; u64 rx_removed_vlan_packets; u64 rx_csum_unnecessary; @@ -298,6 +299,7 @@ struct mlx5e_rq_stats { u64 csum_none; u64 lro_packets; u64 lro_bytes; + u64 mcast_packets; u64 ecn_mark; u64 removed_vlan_packets; u64 xdp_drop; From b521105b68a2e3334d69202fb354d7b711b01aa3 Mon Sep 17 00:00:00 2001 From: Alaa Hleihel Date: Tue, 25 Aug 2020 10:41:50 +0300 Subject: [PATCH 636/648] net/mlx5e: Fix using wrong stats_grps in mlx5e_update_ndo_stats() The cited commit started to reuse function mlx5e_update_ndo_stats() for the representors as well. However, the function is hard-coded to work on mlx5e_nic_stats_grps only. Due to this issue, the representors statistics were not updated in the output of "ip -s". Fix it to work with the correct group by extracting it from the caller's profile. Also, while at it and since this function became generic, move it to en_stats.c and rename it accordingly. Fixes: 8a236b15144b ("net/mlx5e: Convert rep stats to mlx5e_stats_grp-based infra") Signed-off-by: Alaa Hleihel Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 - .../ethernet/mellanox/mlx5/core/en/monitor_stats.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 12 +----------- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 12 ++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 1 + 6 files changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 5d7b79518449..90d5caabd6af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1005,7 +1005,6 @@ int mlx5e_update_nic_rx(struct mlx5e_priv *priv); void mlx5e_update_carrier(struct mlx5e_priv *priv); int mlx5e_close(struct net_device *netdev); int mlx5e_open(struct net_device *netdev); -void mlx5e_update_ndo_stats(struct mlx5e_priv *priv); void mlx5e_queue_update_stats(struct mlx5e_priv *priv); int mlx5e_bits_invert(unsigned long a, int size); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c index 8fe8b4d6ad1c..254c84739046 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c @@ -51,7 +51,7 @@ static void mlx5e_monitor_counters_work(struct work_struct *work) monitor_counters_work); mutex_lock(&priv->state_lock); - mlx5e_update_ndo_stats(priv); + mlx5e_stats_update_ndo_stats(priv); mutex_unlock(&priv->state_lock); mlx5e_monitor_counter_arm(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d46047235568..b3cda7b6e5e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -158,16 +158,6 @@ static void mlx5e_update_carrier_work(struct work_struct *work) mutex_unlock(&priv->state_lock); } -void mlx5e_update_ndo_stats(struct mlx5e_priv *priv) -{ - int i; - - for (i = mlx5e_nic_stats_grps_num(priv) - 1; i >= 0; i--) - if (mlx5e_nic_stats_grps[i]->update_stats_mask & - MLX5E_NDO_UPDATE_STATS) - mlx5e_nic_stats_grps[i]->update_stats(priv); -} - static void mlx5e_update_stats_work(struct work_struct *work) { struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, @@ -5187,7 +5177,7 @@ static const struct mlx5e_profile mlx5e_nic_profile = { .enable = mlx5e_nic_enable, .disable = mlx5e_nic_disable, .update_rx = mlx5e_update_nic_rx, - .update_stats = mlx5e_update_ndo_stats, + .update_stats = mlx5e_stats_update_ndo_stats, .update_carrier = mlx5e_update_carrier, .rx_handlers = &mlx5e_rx_handlers_nic, .max_tc = MLX5E_MAX_NUM_TC, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index e13e5d1b3eae..e979bff64c49 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1171,7 +1171,7 @@ static const struct mlx5e_profile mlx5e_rep_profile = { .cleanup_tx = mlx5e_cleanup_rep_tx, .enable = mlx5e_rep_enable, .update_rx = mlx5e_update_rep_rx, - .update_stats = mlx5e_update_ndo_stats, + .update_stats = mlx5e_stats_update_ndo_stats, .rx_handlers = &mlx5e_rx_handlers_rep, .max_tc = 1, .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), @@ -1189,7 +1189,7 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = { .enable = mlx5e_uplink_rep_enable, .disable = mlx5e_uplink_rep_disable, .update_rx = mlx5e_update_rep_rx, - .update_stats = mlx5e_update_ndo_stats, + .update_stats = mlx5e_stats_update_ndo_stats, .update_carrier = mlx5e_update_carrier, .rx_handlers = &mlx5e_rx_handlers_rep, .max_tc = MLX5E_MAX_NUM_TC, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index e3b2f59408e6..f6383bc2bc3f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -54,6 +54,18 @@ unsigned int mlx5e_stats_total_num(struct mlx5e_priv *priv) return total; } +void mlx5e_stats_update_ndo_stats(struct mlx5e_priv *priv) +{ + mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps; + const unsigned int num_stats_grps = stats_grps_num(priv); + int i; + + for (i = num_stats_grps - 1; i >= 0; i--) + if (stats_grps[i]->update_stats && + stats_grps[i]->update_stats_mask & MLX5E_NDO_UPDATE_STATS) + stats_grps[i]->update_stats(priv); +} + void mlx5e_stats_update(struct mlx5e_priv *priv) { mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index be7cda283781..562263d62141 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -103,6 +103,7 @@ unsigned int mlx5e_stats_total_num(struct mlx5e_priv *priv); void mlx5e_stats_update(struct mlx5e_priv *priv); void mlx5e_stats_fill(struct mlx5e_priv *priv, u64 *data, int idx); void mlx5e_stats_fill_strings(struct mlx5e_priv *priv, u8 *data); +void mlx5e_stats_update_ndo_stats(struct mlx5e_priv *priv); /* Concrete NIC Stats */ From 8f0bcd19b1da3f264223abea985b9462e85a3718 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 28 Jun 2020 13:06:06 +0300 Subject: [PATCH 637/648] net/mlx5e: TLS, Do not expose FPGA TLS counter if not supported The set of TLS TX global SW counters in mlx5e_tls_sw_stats_desc is updated from all rings by using atomic ops. This set of stats is used only in the FPGA TLS use case, not in the Connect-X TLS one, where regular per-ring counters are used. Do not expose them in the Connect-X use case, as this would cause counter duplication. For example, tx_tls_drop_no_sync_data would appear twice in the ethtool stats. Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Signed-off-by: Tariq Toukan Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_accel/tls_stats.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c index 01468ec27446..b949b9a7538b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c @@ -35,7 +35,6 @@ #include #include "en.h" -#include "accel/tls.h" #include "fpga/sdk.h" #include "en_accel/tls.h" @@ -51,9 +50,14 @@ static const struct counter_desc mlx5e_tls_sw_stats_desc[] = { #define NUM_TLS_SW_COUNTERS ARRAY_SIZE(mlx5e_tls_sw_stats_desc) +static bool is_tls_atomic_stats(struct mlx5e_priv *priv) +{ + return priv->tls && !mlx5_accel_is_ktls_device(priv->mdev); +} + int mlx5e_tls_get_count(struct mlx5e_priv *priv) { - if (!priv->tls) + if (!is_tls_atomic_stats(priv)) return 0; return NUM_TLS_SW_COUNTERS; @@ -63,7 +67,7 @@ int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { unsigned int i, idx = 0; - if (!priv->tls) + if (!is_tls_atomic_stats(priv)) return 0; for (i = 0; i < NUM_TLS_SW_COUNTERS; i++) @@ -77,7 +81,7 @@ int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) { int i, idx = 0; - if (!priv->tls) + if (!is_tls_atomic_stats(priv)) return 0; for (i = 0; i < NUM_TLS_SW_COUNTERS; i++) From 6e8de0b6b4690ba7a685db33801bf7536bac674f Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 10 Aug 2020 15:59:41 +0300 Subject: [PATCH 638/648] net/mlx5e: kTLS, Fix napi sync and possible use-after-free Using synchronize_rcu() is sufficient to wait until running NAPI quits. See similar upstream fix with detailed explanation: ("net/mlx5e: Use synchronize_rcu to sync with NAPI") This change also fixes a possible use-after-free as the NAPI might be already released at this stage. Fixes: 0419d8c9d8f8 ("net/mlx5e: kTLS, Add kTLS RX resync support") Signed-off-by: Tariq Toukan Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index acf6d80a6bb7..f95aa50ab51a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -659,7 +659,7 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx) priv_rx = mlx5e_get_ktls_rx_priv_ctx(tls_ctx); set_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags); mlx5e_set_ktls_rx_priv_ctx(tls_ctx, NULL); - napi_synchronize(&priv->channels.c[priv_rx->rxq]->napi); + synchronize_rcu(); /* Sync with NAPI */ if (!cancel_work_sync(&priv_rx->rule.work)) /* completion is needed, as the priv_rx in the add flow * is maintained on the wqe info (wi), not on the socket. From 66ce5fc05713e6b90f74c01439f2aaa137ffd9f4 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 7 Sep 2020 22:58:50 -0700 Subject: [PATCH 639/648] net/mlx5e: kTLS, Add missing dma_unmap in RX resync Progress params dma address is never unmapped, unmap it when completion handling is over. Fixes: 0419d8c9d8f8 ("net/mlx5e: kTLS, Add kTLS RX resync support") Signed-off-by: Saeed Mahameed Reviewed-by: Tariq Toukan --- .../net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index f95aa50ab51a..fb4e4f2ebe02 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -386,16 +386,17 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, struct mlx5e_ktls_offload_context_rx *priv_rx; struct mlx5e_ktls_rx_resync_ctx *resync; u8 tracker_state, auth_state, *ctx; + struct device *dev; u32 hw_seq; priv_rx = buf->priv_rx; resync = &priv_rx->resync; - + dev = resync->priv->mdev->device; if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) goto out; - dma_sync_single_for_cpu(resync->priv->mdev->device, buf->dma_addr, - PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); + dma_sync_single_for_cpu(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, + DMA_FROM_DEVICE); ctx = buf->progress.ctx; tracker_state = MLX5_GET(tls_progress_params, ctx, record_tracker_state); @@ -411,6 +412,7 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, priv_rx->stats->tls_resync_req_end++; out: refcount_dec(&resync->refcnt); + dma_unmap_single(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); kfree(buf); } From 581642f32f33017619355b7d91704c152df1fc2d Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 10 Sep 2020 21:16:04 -0700 Subject: [PATCH 640/648] net/mlx5e: kTLS, Fix leak on resync error flow Resync progress params buffer and dma weren't released on error, Add missing error unwinding for resync_post_get_progress_params(). Fixes: 0419d8c9d8f8 ("net/mlx5e: kTLS, Add kTLS RX resync support") Signed-off-by: Saeed Mahameed Reviewed-by: Tariq Toukan --- .../net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index fb4e4f2ebe02..e85411bd1fed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -258,7 +258,7 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(pdev, buf->dma_addr))) { err = -ENOMEM; - goto err_out; + goto err_free; } buf->priv_rx = priv_rx; @@ -266,7 +266,7 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, BUILD_BUG_ON(MLX5E_KTLS_GET_PROGRESS_WQEBBS != 1); if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) { err = -ENOSPC; - goto err_out; + goto err_dma_unmap; } pi = mlx5e_icosq_get_next_pi(sq, 1); @@ -297,6 +297,10 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, return cseg; +err_dma_unmap: + dma_unmap_single(pdev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); +err_free: + kfree(buf); err_out: priv_rx->stats->tls_resync_req_skip++; return ERR_PTR(err); From 94c4fed710c372313157f20b5eced39d04a96f88 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 7 Sep 2020 22:54:43 -0700 Subject: [PATCH 641/648] net/mlx5e: kTLS, Avoid kzalloc(GFP_KERNEL) under spinlock The spinlock only needed when accessing the channel's icosq, grab the lock after the buf allocation in resync_post_get_progress_params() to avoid kzalloc(GFP_KERNEL) in atomic context. Fixes: 0419d8c9d8f8 ("net/mlx5e: kTLS, Add kTLS RX resync support") Reported-by: YueHaibing Signed-off-by: Saeed Mahameed Reviewed-by: Tariq Toukan --- .../mellanox/mlx5/core/en_accel/ktls_rx.c | 25 ++++++++----------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index e85411bd1fed..6bbfcf18107d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -234,7 +234,7 @@ mlx5e_get_ktls_rx_priv_ctx(struct tls_context *tls_ctx) /* Re-sync */ /* Runs in work context */ -static struct mlx5_wqe_ctrl_seg * +static int resync_post_get_progress_params(struct mlx5e_icosq *sq, struct mlx5e_ktls_offload_context_rx *priv_rx) { @@ -264,7 +264,11 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, buf->priv_rx = priv_rx; BUILD_BUG_ON(MLX5E_KTLS_GET_PROGRESS_WQEBBS != 1); + + spin_lock(&sq->channel->async_icosq_lock); + if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) { + spin_unlock(&sq->channel->async_icosq_lock); err = -ENOSPC; goto err_dma_unmap; } @@ -294,8 +298,10 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, }; icosq_fill_wi(sq, pi, &wi); sq->pc++; + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); + spin_unlock(&sq->channel->async_icosq_lock); - return cseg; + return 0; err_dma_unmap: dma_unmap_single(pdev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); @@ -303,7 +309,7 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, kfree(buf); err_out: priv_rx->stats->tls_resync_req_skip++; - return ERR_PTR(err); + return err; } /* Function is called with elevated refcount. @@ -313,10 +319,8 @@ static void resync_handle_work(struct work_struct *work) { struct mlx5e_ktls_offload_context_rx *priv_rx; struct mlx5e_ktls_rx_resync_ctx *resync; - struct mlx5_wqe_ctrl_seg *cseg; struct mlx5e_channel *c; struct mlx5e_icosq *sq; - struct mlx5_wq_cyc *wq; resync = container_of(work, struct mlx5e_ktls_rx_resync_ctx, work); priv_rx = container_of(resync, struct mlx5e_ktls_offload_context_rx, resync); @@ -328,18 +332,9 @@ static void resync_handle_work(struct work_struct *work) c = resync->priv->channels.c[priv_rx->rxq]; sq = &c->async_icosq; - wq = &sq->wq; - spin_lock(&c->async_icosq_lock); - - cseg = resync_post_get_progress_params(sq, priv_rx); - if (IS_ERR(cseg)) { + if (resync_post_get_progress_params(sq, priv_rx)) refcount_dec(&resync->refcnt); - goto unlock; - } - mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg); -unlock: - spin_unlock(&c->async_icosq_lock); } static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync, From cb39ccc5cbe1011d8d21886b75e2468070ac672c Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 11 Sep 2020 11:00:06 -0700 Subject: [PATCH 642/648] net/mlx5e: mlx5e_fec_in_caps() returns a boolean Returning errno is a bug, fix that. Also fixes smatch warnings: drivers/net/ethernet/mellanox/mlx5/core/en/port.c:453 mlx5e_fec_in_caps() warn: signedness bug returning '(-95)' Fixes: 2132b71f78d2 ("net/mlx5e: Advertise globaly supported FEC modes") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Saeed Mahameed Reviewed-by: Moshe Shemesh Reviewed-by: Aya Levin --- drivers/net/ethernet/mellanox/mlx5/core/en/port.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 5de1cb9f5330..96608dbb9314 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -490,11 +490,8 @@ bool mlx5e_fec_in_caps(struct mlx5_core_dev *dev, int fec_policy) int err; int i; - if (!MLX5_CAP_GEN(dev, pcam_reg)) - return -EOPNOTSUPP; - - if (!MLX5_CAP_PCAM_REG(dev, pplm)) - return -EOPNOTSUPP; + if (!MLX5_CAP_GEN(dev, pcam_reg) || !MLX5_CAP_PCAM_REG(dev, pplm)) + return false; MLX5_SET(pplm_reg, in, local_port, 1); err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0); From 2b617c11d7c0791a18402407222ca9f2c343c47b Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Tue, 22 Sep 2020 07:09:23 +0800 Subject: [PATCH 643/648] net: Update MAINTAINERS for MediaTek switch driver Update maintainers for MediaTek switch driver with Landen Chao who is familiar with MediaTek MT753x switch devices and will help maintenance from the vendor side. Cc: Steven Liu Signed-off-by: Sean Wang Signed-off-by: Landen Chao Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index ad9a35b91325..8f04b81e19a5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11029,6 +11029,7 @@ F: drivers/char/hw_random/mtk-rng.c MEDIATEK SWITCH DRIVER M: Sean Wang +M: Landen Chao L: netdev@vger.kernel.org S: Maintained F: drivers/net/dsa/mt7530.* From 99f62a746066fa436aa15d4606a538569540db08 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 22 Sep 2020 01:07:09 +0300 Subject: [PATCH 644/648] net: bridge: br_vlan_get_pvid_rcu() should dereference the VLAN group under RCU When calling the RCU brother of br_vlan_get_pvid(), lockdep warns: ============================= WARNING: suspicious RCU usage 5.9.0-rc3-01631-g13c17acb8e38-dirty #814 Not tainted ----------------------------- net/bridge/br_private.h:1054 suspicious rcu_dereference_protected() usage! Call trace: lockdep_rcu_suspicious+0xd4/0xf8 __br_vlan_get_pvid+0xc0/0x100 br_vlan_get_pvid_rcu+0x78/0x108 The warning is because br_vlan_get_pvid_rcu() calls nbp_vlan_group() which calls rtnl_dereference() instead of rcu_dereference(). In turn, rtnl_dereference() calls rcu_dereference_protected() which assumes operation under an RCU write-side critical section, which obviously is not the case here. So, when the incorrect primitive is used to access the RCU-protected VLAN group pointer, READ_ONCE() is not used, which may cause various unexpected problems. I'm sad to say that br_vlan_get_pvid() and br_vlan_get_pvid_rcu() cannot share the same implementation. So fix the bug by splitting the 2 functions, and making br_vlan_get_pvid_rcu() retrieve the VLAN groups under proper locking annotations. Fixes: 7582f5b70f9a ("bridge: add br_vlan_get_pvid_rcu()") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/bridge/br_vlan.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index f9092c71225f..61c94cefa843 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -1288,11 +1288,13 @@ void br_vlan_get_stats(const struct net_bridge_vlan *v, } } -static int __br_vlan_get_pvid(const struct net_device *dev, - struct net_bridge_port *p, u16 *p_pvid) +int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid) { struct net_bridge_vlan_group *vg; + struct net_bridge_port *p; + ASSERT_RTNL(); + p = br_port_get_check_rtnl(dev); if (p) vg = nbp_vlan_group(p); else if (netif_is_bridge_master(dev)) @@ -1303,18 +1305,23 @@ static int __br_vlan_get_pvid(const struct net_device *dev, *p_pvid = br_get_pvid(vg); return 0; } - -int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid) -{ - ASSERT_RTNL(); - - return __br_vlan_get_pvid(dev, br_port_get_check_rtnl(dev), p_pvid); -} EXPORT_SYMBOL_GPL(br_vlan_get_pvid); int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid) { - return __br_vlan_get_pvid(dev, br_port_get_check_rcu(dev), p_pvid); + struct net_bridge_vlan_group *vg; + struct net_bridge_port *p; + + p = br_port_get_check_rcu(dev); + if (p) + vg = nbp_vlan_group_rcu(p); + else if (netif_is_bridge_master(dev)) + vg = br_vlan_group_rcu(netdev_priv(dev)); + else + return -EINVAL; + + *p_pvid = br_get_pvid(vg); + return 0; } EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu); From d5e4d0a5e692a942f0c212e37dc6aeac47ecbdea Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 21 Sep 2020 07:27:20 -0700 Subject: [PATCH 645/648] inet_diag: validate INET_DIAG_REQ_PROTOCOL attribute User space could send an invalid INET_DIAG_REQ_PROTOCOL attribute as caught by syzbot. BUG: KMSAN: uninit-value in inet_diag_lock_handler net/ipv4/inet_diag.c:55 [inline] BUG: KMSAN: uninit-value in __inet_diag_dump+0x58c/0x720 net/ipv4/inet_diag.c:1147 CPU: 0 PID: 8505 Comm: syz-executor174 Not tainted 5.9.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x21c/0x280 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:122 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:219 inet_diag_lock_handler net/ipv4/inet_diag.c:55 [inline] __inet_diag_dump+0x58c/0x720 net/ipv4/inet_diag.c:1147 inet_diag_dump_compat+0x2a5/0x380 net/ipv4/inet_diag.c:1254 netlink_dump+0xb73/0x1cb0 net/netlink/af_netlink.c:2246 __netlink_dump_start+0xcf2/0xea0 net/netlink/af_netlink.c:2354 netlink_dump_start include/linux/netlink.h:246 [inline] inet_diag_rcv_msg_compat+0x5da/0x6c0 net/ipv4/inet_diag.c:1288 sock_diag_rcv_msg+0x24f/0x620 net/core/sock_diag.c:256 netlink_rcv_skb+0x6d7/0x7e0 net/netlink/af_netlink.c:2470 sock_diag_rcv+0x63/0x80 net/core/sock_diag.c:275 netlink_unicast_kernel net/netlink/af_netlink.c:1304 [inline] netlink_unicast+0x11c8/0x1490 net/netlink/af_netlink.c:1330 netlink_sendmsg+0x173a/0x1840 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg net/socket.c:671 [inline] ____sys_sendmsg+0xc82/0x1240 net/socket.c:2353 ___sys_sendmsg net/socket.c:2407 [inline] __sys_sendmsg+0x6d1/0x820 net/socket.c:2440 __do_sys_sendmsg net/socket.c:2449 [inline] __se_sys_sendmsg+0x97/0xb0 net/socket.c:2447 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2447 do_syscall_64+0x9f/0x140 arch/x86/entry/common.c:48 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x441389 Code: e8 fc ab 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 1b 09 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fff3b02ce98 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000441389 RDX: 0000000000000000 RSI: 0000000020001500 RDI: 0000000000000003 RBP: 00000000006cb018 R08: 00000000004002c8 R09: 00000000004002c8 R10: 0000000000000004 R11: 0000000000000246 R12: 0000000000402130 R13: 00000000004021c0 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:143 [inline] kmsan_internal_poison_shadow+0x66/0xd0 mm/kmsan/kmsan.c:126 kmsan_slab_alloc+0x8a/0xe0 mm/kmsan/kmsan_hooks.c:80 slab_alloc_node mm/slub.c:2907 [inline] __kmalloc_node_track_caller+0x9aa/0x12f0 mm/slub.c:4511 __kmalloc_reserve net/core/skbuff.c:142 [inline] __alloc_skb+0x35f/0xb30 net/core/skbuff.c:210 alloc_skb include/linux/skbuff.h:1094 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1176 [inline] netlink_sendmsg+0xdb9/0x1840 net/netlink/af_netlink.c:1894 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg net/socket.c:671 [inline] ____sys_sendmsg+0xc82/0x1240 net/socket.c:2353 ___sys_sendmsg net/socket.c:2407 [inline] __sys_sendmsg+0x6d1/0x820 net/socket.c:2440 __do_sys_sendmsg net/socket.c:2449 [inline] __se_sys_sendmsg+0x97/0xb0 net/socket.c:2447 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2447 do_syscall_64+0x9f/0x140 arch/x86/entry/common.c:48 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 3f935c75eb52 ("inet_diag: support for wider protocol numbers") Signed-off-by: Eric Dumazet Cc: Paolo Abeni Cc: Christoph Paasch Cc: Mat Martineau Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv4/inet_diag.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 4a98dd736270..f1bd95f243b3 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -186,8 +186,8 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, } EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill); -static void inet_diag_parse_attrs(const struct nlmsghdr *nlh, int hdrlen, - struct nlattr **req_nlas) +static int inet_diag_parse_attrs(const struct nlmsghdr *nlh, int hdrlen, + struct nlattr **req_nlas) { struct nlattr *nla; int remaining; @@ -195,9 +195,13 @@ static void inet_diag_parse_attrs(const struct nlmsghdr *nlh, int hdrlen, nlmsg_for_each_attr(nla, nlh, hdrlen, remaining) { int type = nla_type(nla); + if (type == INET_DIAG_REQ_PROTOCOL && nla_len(nla) != sizeof(u32)) + return -EINVAL; + if (type < __INET_DIAG_REQ_MAX) req_nlas[type] = nla; } + return 0; } static int inet_diag_get_protocol(const struct inet_diag_req_v2 *req, @@ -574,7 +578,10 @@ static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, int err, protocol; memset(&dump_data, 0, sizeof(dump_data)); - inet_diag_parse_attrs(nlh, hdrlen, dump_data.req_nlas); + err = inet_diag_parse_attrs(nlh, hdrlen, dump_data.req_nlas); + if (err) + return err; + protocol = inet_diag_get_protocol(req, &dump_data); handler = inet_diag_lock_handler(protocol); @@ -1180,8 +1187,11 @@ static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen) if (!cb_data) return -ENOMEM; - inet_diag_parse_attrs(nlh, hdrlen, cb_data->req_nlas); - + err = inet_diag_parse_attrs(nlh, hdrlen, cb_data->req_nlas); + if (err) { + kfree(cb_data); + return err; + } nla = cb_data->inet_diag_nla_bc; if (nla) { err = inet_diag_bc_audit(nla, skb); From 8b9e03cd08250c60409099c791e858157838d9eb Mon Sep 17 00:00:00 2001 From: Xiaoliang Yang Date: Tue, 22 Sep 2020 01:56:36 +0300 Subject: [PATCH 646/648] net: dsa: felix: fix some key offsets for IP4_TCP_UDP VCAP IS2 entries Some of the IS2 IP4_TCP_UDP keys are not correct, like L4_DPORT, L4_SPORT and other L4 keys. This prevents offloaded tc-flower rules from matching on src_port and dst_port for TCP and UDP packets. Signed-off-by: Xiaoliang Yang Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix_vsc9959.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 9b720c8ddfc3..6855c94256f8 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -645,17 +645,17 @@ static struct vcap_field vsc9959_vcap_is2_keys[] = { [VCAP_IS2_HK_DIP_EQ_SIP] = {118, 1}, /* IP4_TCP_UDP (TYPE=100) */ [VCAP_IS2_HK_TCP] = {119, 1}, - [VCAP_IS2_HK_L4_SPORT] = {120, 16}, - [VCAP_IS2_HK_L4_DPORT] = {136, 16}, + [VCAP_IS2_HK_L4_DPORT] = {120, 16}, + [VCAP_IS2_HK_L4_SPORT] = {136, 16}, [VCAP_IS2_HK_L4_RNG] = {152, 8}, [VCAP_IS2_HK_L4_SPORT_EQ_DPORT] = {160, 1}, [VCAP_IS2_HK_L4_SEQUENCE_EQ0] = {161, 1}, - [VCAP_IS2_HK_L4_URG] = {162, 1}, - [VCAP_IS2_HK_L4_ACK] = {163, 1}, - [VCAP_IS2_HK_L4_PSH] = {164, 1}, - [VCAP_IS2_HK_L4_RST] = {165, 1}, - [VCAP_IS2_HK_L4_SYN] = {166, 1}, - [VCAP_IS2_HK_L4_FIN] = {167, 1}, + [VCAP_IS2_HK_L4_FIN] = {162, 1}, + [VCAP_IS2_HK_L4_SYN] = {163, 1}, + [VCAP_IS2_HK_L4_RST] = {164, 1}, + [VCAP_IS2_HK_L4_PSH] = {165, 1}, + [VCAP_IS2_HK_L4_ACK] = {166, 1}, + [VCAP_IS2_HK_L4_URG] = {167, 1}, [VCAP_IS2_HK_L4_1588_DOM] = {168, 8}, [VCAP_IS2_HK_L4_1588_VER] = {176, 4}, /* IP4_OTHER (TYPE=101) */ From 7a0230759ea6795af037f3fd9526d00dc896ce19 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 22 Sep 2020 01:56:37 +0300 Subject: [PATCH 647/648] net: dsa: seville: fix some key offsets for IP4_TCP_UDP VCAP IS2 entries Since these were copied from the Felix VCAP IS2 code, and only the offsets were adjusted, the order of the bit fields is still wrong. Fix it. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/seville_vsc9953.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c index 0fdeff22a76c..29df0797ecf5 100644 --- a/drivers/net/dsa/ocelot/seville_vsc9953.c +++ b/drivers/net/dsa/ocelot/seville_vsc9953.c @@ -659,17 +659,17 @@ static struct vcap_field vsc9953_vcap_is2_keys[] = { [VCAP_IS2_HK_DIP_EQ_SIP] = {122, 1}, /* IP4_TCP_UDP (TYPE=100) */ [VCAP_IS2_HK_TCP] = {123, 1}, - [VCAP_IS2_HK_L4_SPORT] = {124, 16}, - [VCAP_IS2_HK_L4_DPORT] = {140, 16}, + [VCAP_IS2_HK_L4_DPORT] = {124, 16}, + [VCAP_IS2_HK_L4_SPORT] = {140, 16}, [VCAP_IS2_HK_L4_RNG] = {156, 8}, [VCAP_IS2_HK_L4_SPORT_EQ_DPORT] = {164, 1}, [VCAP_IS2_HK_L4_SEQUENCE_EQ0] = {165, 1}, - [VCAP_IS2_HK_L4_URG] = {166, 1}, - [VCAP_IS2_HK_L4_ACK] = {167, 1}, - [VCAP_IS2_HK_L4_PSH] = {168, 1}, - [VCAP_IS2_HK_L4_RST] = {169, 1}, - [VCAP_IS2_HK_L4_SYN] = {170, 1}, - [VCAP_IS2_HK_L4_FIN] = {171, 1}, + [VCAP_IS2_HK_L4_FIN] = {166, 1}, + [VCAP_IS2_HK_L4_SYN] = {167, 1}, + [VCAP_IS2_HK_L4_RST] = {168, 1}, + [VCAP_IS2_HK_L4_PSH] = {169, 1}, + [VCAP_IS2_HK_L4_ACK] = {170, 1}, + [VCAP_IS2_HK_L4_URG] = {171, 1}, /* IP4_OTHER (TYPE=101) */ [VCAP_IS2_HK_IP4_L3_PROTO] = {123, 8}, [VCAP_IS2_HK_L3_PAYLOAD] = {131, 56}, From 8194d8fa719f78007c0c4d5c721b6b0dfa290f81 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 22 Sep 2020 01:56:38 +0300 Subject: [PATCH 648/648] net: mscc: ocelot: fix some key offsets for IP4_TCP_UDP VCAP IS2 entries The IS2 IP4_TCP_UDP key offsets do not correspond to the VSC7514 datasheet. Whether they work or not is unknown to me. On VSC9959 and VSC9953, with the same mistake and same discrepancy from the documentation, tc-flower src_port and dst_port rules did not work, so I am assuming the same is true here. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot_vsc7514.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index e02fb8bfab63..dfb1535f26f2 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -806,17 +806,17 @@ static const struct vcap_field vsc7514_vcap_is2_keys[] = { [VCAP_IS2_HK_DIP_EQ_SIP] = {123, 1}, /* IP4_TCP_UDP (TYPE=100) */ [VCAP_IS2_HK_TCP] = {124, 1}, - [VCAP_IS2_HK_L4_SPORT] = {125, 16}, - [VCAP_IS2_HK_L4_DPORT] = {141, 16}, + [VCAP_IS2_HK_L4_DPORT] = {125, 16}, + [VCAP_IS2_HK_L4_SPORT] = {141, 16}, [VCAP_IS2_HK_L4_RNG] = {157, 8}, [VCAP_IS2_HK_L4_SPORT_EQ_DPORT] = {165, 1}, [VCAP_IS2_HK_L4_SEQUENCE_EQ0] = {166, 1}, - [VCAP_IS2_HK_L4_URG] = {167, 1}, - [VCAP_IS2_HK_L4_ACK] = {168, 1}, - [VCAP_IS2_HK_L4_PSH] = {169, 1}, - [VCAP_IS2_HK_L4_RST] = {170, 1}, - [VCAP_IS2_HK_L4_SYN] = {171, 1}, - [VCAP_IS2_HK_L4_FIN] = {172, 1}, + [VCAP_IS2_HK_L4_FIN] = {167, 1}, + [VCAP_IS2_HK_L4_SYN] = {168, 1}, + [VCAP_IS2_HK_L4_RST] = {169, 1}, + [VCAP_IS2_HK_L4_PSH] = {170, 1}, + [VCAP_IS2_HK_L4_ACK] = {171, 1}, + [VCAP_IS2_HK_L4_URG] = {172, 1}, [VCAP_IS2_HK_L4_1588_DOM] = {173, 8}, [VCAP_IS2_HK_L4_1588_VER] = {181, 4}, /* IP4_OTHER (TYPE=101) */