From 30b3f68715595dee7fe4d9bd91a2252c3becdf0a Mon Sep 17 00:00:00 2001 From: Caleb Connolly Date: Sun, 7 Mar 2021 15:12:22 -0800 Subject: [PATCH 001/180] Input: s6sy761 - fix coordinate read bit shift The touch coordinate register contains the following: byte 3 byte 2 byte 1 +--------+--------+ +-----------------+ +-----------------+ | | | | | | | | X[3:0] | Y[3:0] | | Y[11:4] | | X[11:4] | | | | | | | | +--------+--------+ +-----------------+ +-----------------+ Bytes 2 and 1 need to be shifted left by 4 bits, the least significant nibble of each is stored in byte 3. Currently they are only being shifted by 3 causing the reported coordinates to be incorrect. This matches downstream examples, and has been confirmed on my device (OnePlus 7 Pro). Fixes: 0145a7141e59 ("Input: add support for the Samsung S6SY761 touchscreen") Signed-off-by: Caleb Connolly Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20210305185710.225168-1-caleb@connolly.tech Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/s6sy761.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/s6sy761.c b/drivers/input/touchscreen/s6sy761.c index b63d7fdf0cd2..85a1f465c097 100644 --- a/drivers/input/touchscreen/s6sy761.c +++ b/drivers/input/touchscreen/s6sy761.c @@ -145,8 +145,8 @@ static void s6sy761_report_coordinates(struct s6sy761_data *sdata, u8 major = event[4]; u8 minor = event[5]; u8 z = event[6] & S6SY761_MASK_Z; - u16 x = (event[1] << 3) | ((event[3] & S6SY761_MASK_X) >> 4); - u16 y = (event[2] << 3) | (event[3] & S6SY761_MASK_Y); + u16 x = (event[1] << 4) | ((event[3] & S6SY761_MASK_X) >> 4); + u16 y = (event[2] << 4) | (event[3] & S6SY761_MASK_Y); input_mt_slot(sdata->input, tid); From 36b87cf302a4f13f8b4344bcf98f67405a145e2f Mon Sep 17 00:00:00 2001 From: Shou-Chieh Hsu Date: Tue, 2 Mar 2021 11:58:01 +0800 Subject: [PATCH 002/180] HID: google: add don USB id Add 1 additional hammer-like device. Signed-off-by: Shou-Chieh Hsu Signed-off-by: Jiri Kosina --- drivers/hid/hid-google-hammer.c | 2 ++ drivers/hid/hid-ids.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c index d9319622da44..e60c31dd05ff 100644 --- a/drivers/hid/hid-google-hammer.c +++ b/drivers/hid/hid-google-hammer.c @@ -573,6 +573,8 @@ static void hammer_remove(struct hid_device *hdev) } static const struct hid_device_id hammer_devices[] = { + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_DON) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_HAMMER) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index e42aaae3138f..0b4929258478 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -493,6 +493,7 @@ #define USB_DEVICE_ID_GOOGLE_MASTERBALL 0x503c #define USB_DEVICE_ID_GOOGLE_MAGNEMITE 0x503d #define USB_DEVICE_ID_GOOGLE_MOONBALL 0x5044 +#define USB_DEVICE_ID_GOOGLE_DON 0x5050 #define USB_VENDOR_ID_GOTOP 0x08f2 #define USB_DEVICE_ID_SUPER_Q2 0x007f From 9a0b44fbfea1932196a4879b44a37dd182e984c5 Mon Sep 17 00:00:00 2001 From: Luke D Jones Date: Fri, 19 Feb 2021 10:10:02 +1300 Subject: [PATCH 003/180] HID: asus: Add support for 2021 ASUS N-Key keyboard Some new 2021 version of ASUS gamer laptops are using an updated N-Key keyboard with the PID of 0x19b6. This version is using the same init sequence and brightness control as the 0x1866 keyboard. Signed-off-by: Luke D Jones Signed-off-by: Jiri Kosina --- drivers/hid/hid-asus.c | 3 +++ drivers/hid/hid-ids.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index 1dfe184ebf5a..2ab22b925941 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -1221,6 +1221,9 @@ static const struct hid_device_id asus_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD), QUIRK_USE_KBD_BACKLIGHT | QUIRK_ROG_NKEY_KEYBOARD }, + { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, + USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD2), + QUIRK_USE_KBD_BACKLIGHT | QUIRK_ROG_NKEY_KEYBOARD }, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_T100TA_KEYBOARD), QUIRK_T100_KEYBOARD | QUIRK_NO_CONSUMER_USAGES }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 0b4929258478..67fd8a2f5aba 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -194,6 +194,7 @@ #define USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD2 0x1837 #define USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD3 0x1822 #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD 0x1866 +#define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD2 0x19b6 #define USB_DEVICE_ID_ASUSTEK_FX503VD_KEYBOARD 0x1869 #define USB_VENDOR_ID_ATEN 0x0557 From fa8ba6e5dc0e78e409e503ddcfceef5dd96527f4 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Thu, 4 Mar 2021 05:19:57 -0800 Subject: [PATCH 004/180] HID: alps: fix error return code in alps_input_configured() When input_register_device() fails, no error return code is assigned. To fix this bug, ret is assigned with -ENOENT as error return code. Reported-by: TOTE Robot Signed-off-by: Jia-Ju Bai Signed-off-by: Jiri Kosina --- drivers/hid/hid-alps.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/hid-alps.c b/drivers/hid/hid-alps.c index 3feaece13ade..6b665931147d 100644 --- a/drivers/hid/hid-alps.c +++ b/drivers/hid/hid-alps.c @@ -761,6 +761,7 @@ static int alps_input_configured(struct hid_device *hdev, struct hid_input *hi) if (input_register_device(data->input2)) { input_free_device(input2); + ret = -ENOENT; goto exit; } } From 2a2b09c867fdac63f430a45051e7bd0c46edc381 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Sat, 9 Jan 2021 17:36:58 -0500 Subject: [PATCH 005/180] HID cp2112: fix support for multiple gpiochips In lk 5.11.0-rc2 connecting a USB based Silicon Labs HID to I2C bridge evaluation board (CP2112EK) causes this warning: gpio gpiochip0: (cp2112_gpio): detected irqchip that is shared with multiple gpiochips: please fix the driver Simply copy what other gpio related drivers do to fix this particular warning: replicate the struct irq_chip object in each device instance rather than have a static object which makes that object (incorrectly) shared by each device. Signed-off-by: Douglas Gilbert Signed-off-by: Jiri Kosina --- drivers/hid/hid-cp2112.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index 21e15627a461..477baa30889c 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -161,6 +161,7 @@ struct cp2112_device { atomic_t read_avail; atomic_t xfer_avail; struct gpio_chip gc; + struct irq_chip irq; u8 *in_out_buffer; struct mutex lock; @@ -1175,16 +1176,6 @@ static int cp2112_gpio_irq_type(struct irq_data *d, unsigned int type) return 0; } -static struct irq_chip cp2112_gpio_irqchip = { - .name = "cp2112-gpio", - .irq_startup = cp2112_gpio_irq_startup, - .irq_shutdown = cp2112_gpio_irq_shutdown, - .irq_ack = cp2112_gpio_irq_ack, - .irq_mask = cp2112_gpio_irq_mask, - .irq_unmask = cp2112_gpio_irq_unmask, - .irq_set_type = cp2112_gpio_irq_type, -}; - static int __maybe_unused cp2112_allocate_irq(struct cp2112_device *dev, int pin) { @@ -1339,8 +1330,17 @@ static int cp2112_probe(struct hid_device *hdev, const struct hid_device_id *id) dev->gc.can_sleep = 1; dev->gc.parent = &hdev->dev; + dev->irq.name = "cp2112-gpio"; + dev->irq.irq_startup = cp2112_gpio_irq_startup; + dev->irq.irq_shutdown = cp2112_gpio_irq_shutdown; + dev->irq.irq_ack = cp2112_gpio_irq_ack; + dev->irq.irq_mask = cp2112_gpio_irq_mask; + dev->irq.irq_unmask = cp2112_gpio_irq_unmask; + dev->irq.irq_set_type = cp2112_gpio_irq_type; + dev->irq.flags = IRQCHIP_MASK_ON_SUSPEND; + girq = &dev->gc.irq; - girq->chip = &cp2112_gpio_irqchip; + girq->chip = &dev->irq; /* The event comes from the outside so no parent handler */ girq->parent_handler = NULL; girq->num_parents = 0; From e29c62ffb008829dc8bcc0a2ec438adc25a8255e Mon Sep 17 00:00:00 2001 From: Jiapeng Zhong Date: Wed, 20 Jan 2021 15:34:30 +0800 Subject: [PATCH 006/180] HID: wacom: Assign boolean values to a bool variable Fix the following coccicheck warnings: ./drivers/hid/wacom_wac.c:2536:2-6: WARNING: Assignment of 0/1 to bool variable. Reported-by: Abaci Robot Signed-off-by: Jiapeng Zhong Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 44d715c12f6a..bdd9ba577150 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2533,7 +2533,7 @@ static void wacom_wac_finger_slot(struct wacom_wac *wacom_wac, !wacom_wac->shared->is_touch_on) { if (!wacom_wac->shared->touch_down) return; - prox = 0; + prox = false; } wacom_wac->hid_data.num_received++; From a9e54f4b62dcfed4432a5a89b1cd5903737f6e83 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 28 Jan 2021 13:12:17 +0100 Subject: [PATCH 007/180] AMD_SFH: Removed unused activecontrolstatus member from the amd_mp2_dev struct This value is only used once inside amd_mp2_get_sensor_num(), so there is no need to store this in the amd_mp2_dev struct, amd_mp2_get_sensor_num() can simple use a local variable for this. Fixes: 4f567b9f8141 ("SFH: PCIe driver to add support of AMD sensor fusion hub") Signed-off-by: Hans de Goede Acked-by: Sandeep Singh --- drivers/hid/amd-sfh-hid/amd_sfh_pcie.c | 6 ++++-- drivers/hid/amd-sfh-hid/amd_sfh_pcie.h | 1 - 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c index dbac16641662..f3cdb4ea33da 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c @@ -76,9 +76,11 @@ void amd_stop_all_sensors(struct amd_mp2_dev *privdata) int amd_mp2_get_sensor_num(struct amd_mp2_dev *privdata, u8 *sensor_id) { int activestatus, num_of_sensors = 0; + u32 activecontrolstatus; + + activecontrolstatus = readl(privdata->mmio + AMD_P2C_MSG3); + activestatus = activecontrolstatus >> 4; - privdata->activecontrolstatus = readl(privdata->mmio + AMD_P2C_MSG3); - activestatus = privdata->activecontrolstatus >> 4; if (ACEL_EN & activestatus) sensor_id[num_of_sensors++] = accel_idx; diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h index 8f8d19b2cfe5..489415f7c22c 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h @@ -61,7 +61,6 @@ struct amd_mp2_dev { struct pci_dev *pdev; struct amdtp_cl_data *cl_data; void __iomem *mmio; - u32 activecontrolstatus; }; struct amd_mp2_sensor_info { From 952f7d10c6b1685c6700fb24cf4ecbcf26ede77e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 28 Jan 2021 13:12:18 +0100 Subject: [PATCH 008/180] AMD_SFH: Add sensor_mask module parameter Add a sensor_mask module parameter which can be used to override the sensor-mask read from the activestatus bits of the AMD_P2C_MSG3 registers. Some BIOS-es do not program the activestatus bits, leading to the AMD-SFH driver not registering any HID devices even though the laptop in question does actually have sensors. While at it also fix the wrong indentation of the MAGNO_EN define. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=199715 BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1651886 Fixes: 4f567b9f8141 ("SFH: PCIe driver to add support of AMD sensor fusion hub") Suggested-by: Richard Neumann Signed-off-by: Hans de Goede Acked-by: Sandeep Singh --- drivers/hid/amd-sfh-hid/amd_sfh_pcie.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c index f3cdb4ea33da..ab0a9443e252 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c @@ -22,9 +22,13 @@ #define ACEL_EN BIT(0) #define GYRO_EN BIT(1) -#define MAGNO_EN BIT(2) +#define MAGNO_EN BIT(2) #define ALS_EN BIT(19) +static int sensor_mask_override = -1; +module_param_named(sensor_mask, sensor_mask_override, int, 0444); +MODULE_PARM_DESC(sensor_mask, "override the detected sensors mask"); + void amd_start_sensor(struct amd_mp2_dev *privdata, struct amd_mp2_sensor_info info) { union sfh_cmd_param cmd_param; @@ -78,8 +82,12 @@ int amd_mp2_get_sensor_num(struct amd_mp2_dev *privdata, u8 *sensor_id) int activestatus, num_of_sensors = 0; u32 activecontrolstatus; - activecontrolstatus = readl(privdata->mmio + AMD_P2C_MSG3); - activestatus = activecontrolstatus >> 4; + if (sensor_mask_override >= 0) { + activestatus = sensor_mask_override; + } else { + activecontrolstatus = readl(privdata->mmio + AMD_P2C_MSG3); + activestatus = activecontrolstatus >> 4; + } if (ACEL_EN & activestatus) sensor_id[num_of_sensors++] = accel_idx; From 25615e454a0ec198254f17d2ed79b607cb755d0e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 28 Jan 2021 13:12:19 +0100 Subject: [PATCH 009/180] AMD_SFH: Add DMI quirk table for BIOS-es which don't set the activestatus bits Some BIOS-es do not initialize the activestatus bits of the AMD_P2C_MSG3 register. This cause the AMD_SFH driver to not register any sensors even though the laptops in question do have sensors. Add a DMI quirk-table for specifying sensor-mask overrides based on DMI match, to make the sensors work OOTB on these laptop models. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=199715 BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1651886 Fixes: 4f567b9f8141 ("SFH: PCIe driver to add support of AMD sensor fusion hub") Signed-off-by: Hans de Goede Acked-by: Sandeep Singh --- drivers/hid/amd-sfh-hid/amd_sfh_pcie.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c index ab0a9443e252..ddecc84fd6f0 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -77,11 +78,34 @@ void amd_stop_all_sensors(struct amd_mp2_dev *privdata) writel(cmd_base.ul, privdata->mmio + AMD_C2P_MSG0); } +static const struct dmi_system_id dmi_sensor_mask_overrides[] = { + { + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "HP ENVY x360 Convertible 13-ag0xxx"), + }, + .driver_data = (void *)(ACEL_EN | MAGNO_EN), + }, + { + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "HP ENVY x360 Convertible 15-cp0xxx"), + }, + .driver_data = (void *)(ACEL_EN | MAGNO_EN), + }, + { } +}; + int amd_mp2_get_sensor_num(struct amd_mp2_dev *privdata, u8 *sensor_id) { int activestatus, num_of_sensors = 0; + const struct dmi_system_id *dmi_id; u32 activecontrolstatus; + if (sensor_mask_override == -1) { + dmi_id = dmi_first_match(dmi_sensor_mask_overrides); + if (dmi_id) + sensor_mask_override = (long)dmi_id->driver_data; + } + if (sensor_mask_override >= 0) { activestatus = sensor_mask_override; } else { From 2d8aaa1720c6128ce263a2afcd3f8ee2e5551af8 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 8 Mar 2021 11:40:30 -0800 Subject: [PATCH 010/180] Input: n64joy - fix return value check in n64joy_probe() In case of error, the function devm_platform_ioremap_resource() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Fixes: 3bdffa8ffb45 ("Input: Add N64 controller driver") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Reviewed-by: Lauri Kasanen Link: https://lore.kernel.org/r/20210308122856.2177071-1-weiyongjun1@huawei.com Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/n64joy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/joystick/n64joy.c b/drivers/input/joystick/n64joy.c index 8bcc529942bc..9dbca366613e 100644 --- a/drivers/input/joystick/n64joy.c +++ b/drivers/input/joystick/n64joy.c @@ -252,8 +252,8 @@ static int __init n64joy_probe(struct platform_device *pdev) mutex_init(&priv->n64joy_mutex); priv->reg_base = devm_platform_ioremap_resource(pdev, 0); - if (!priv->reg_base) { - err = -EINVAL; + if (IS_ERR(priv->reg_base)) { + err = PTR_ERR(priv->reg_base); goto fail; } From 2fb164f0ce95e504e2688b4f984893c29ebd19ab Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Tue, 9 Mar 2021 01:01:07 +0100 Subject: [PATCH 011/180] mtd: rawnand: mtk: Fix WAITRDY break condition and timeout This fixes NAND_OP_WAITRDY_INSTR operation in the driver. Without this change the driver waits till the system is busy, but we should wait till the busy flag is cleared. The readl_poll_timeout() function gets a break condition, not a wait condition. In addition fix the timeout. The timeout_ms is given in ms, but the readl_poll_timeout() function takes the timeout in us. Multiple the given timeout by 1000 to convert it. Without this change, the driver does not work at all, it doesn't even identify the NAND chip. Fixes: 5197360f9e09 ("mtd: rawnand: mtk: Convert the driver to exec_op()") Signed-off-by: Hauke Mehrtens Reviewed-by: Boris Brezillon Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210309000107.1368404-1-hauke@hauke-m.de --- drivers/mtd/nand/raw/mtk_nand.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c index 57f1f1708994..5c5c92132287 100644 --- a/drivers/mtd/nand/raw/mtk_nand.c +++ b/drivers/mtd/nand/raw/mtk_nand.c @@ -488,8 +488,8 @@ static int mtk_nfc_exec_instr(struct nand_chip *chip, return 0; case NAND_OP_WAITRDY_INSTR: return readl_poll_timeout(nfc->regs + NFI_STA, status, - status & STA_BUSY, 20, - instr->ctx.waitrdy.timeout_ms); + !(status & STA_BUSY), 20, + instr->ctx.waitrdy.timeout_ms * 1000); default: break; } From 1cbd44666216278bbb6a55bcb6b9283702171c77 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 7 Mar 2021 06:06:28 +0200 Subject: [PATCH 012/180] dmaengine: xilinx: dpdma: Fix descriptor issuing on video group When multiple channels are part of a video group, the transfer is triggered only when all channels in the group are ready. The logic to do so is incorrect, as it causes the descriptors for all channels but the last one in a group to not being pushed to the hardware. Fix it. Signed-off-by: Laurent Pinchart Link: https://lore.kernel.org/r/20210307040629.29308-2-laurent.pinchart@ideasonboard.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dpdma.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c index 55df63dead8d..d504112c609e 100644 --- a/drivers/dma/xilinx/xilinx_dpdma.c +++ b/drivers/dma/xilinx/xilinx_dpdma.c @@ -839,6 +839,7 @@ static void xilinx_dpdma_chan_queue_transfer(struct xilinx_dpdma_chan *chan) struct xilinx_dpdma_tx_desc *desc; struct virt_dma_desc *vdesc; u32 reg, channels; + bool first_frame; lockdep_assert_held(&chan->lock); @@ -852,14 +853,6 @@ static void xilinx_dpdma_chan_queue_transfer(struct xilinx_dpdma_chan *chan) chan->running = true; } - if (chan->video_group) - channels = xilinx_dpdma_chan_video_group_ready(chan); - else - channels = BIT(chan->id); - - if (!channels) - return; - vdesc = vchan_next_desc(&chan->vchan); if (!vdesc) return; @@ -884,13 +877,26 @@ static void xilinx_dpdma_chan_queue_transfer(struct xilinx_dpdma_chan *chan) FIELD_PREP(XILINX_DPDMA_CH_DESC_START_ADDRE_MASK, upper_32_bits(sw_desc->dma_addr))); - if (chan->first_frame) + first_frame = chan->first_frame; + chan->first_frame = false; + + if (chan->video_group) { + channels = xilinx_dpdma_chan_video_group_ready(chan); + /* + * Trigger the transfer only when all channels in the group are + * ready. + */ + if (!channels) + return; + } else { + channels = BIT(chan->id); + } + + if (first_frame) reg = XILINX_DPDMA_GBL_TRIG_MASK(channels); else reg = XILINX_DPDMA_GBL_RETRIG_MASK(channels); - chan->first_frame = false; - dpdma_write(xdev->reg, XILINX_DPDMA_GBL, reg); } From 868833fbffbe51c487df4f95d4de9194264a4b30 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 7 Mar 2021 06:06:29 +0200 Subject: [PATCH 013/180] dmaengine: xilinx: dpdma: Fix race condition in done IRQ The active descriptor pointer is accessed from different contexts, including different interrupt handlers, and its access must be protected by the channel's lock. This wasn't done in the done IRQ handler. Fix it. Signed-off-by: Laurent Pinchart Link: https://lore.kernel.org/r/20210307040629.29308-3-laurent.pinchart@ideasonboard.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dpdma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c index d504112c609e..70b29bd079c9 100644 --- a/drivers/dma/xilinx/xilinx_dpdma.c +++ b/drivers/dma/xilinx/xilinx_dpdma.c @@ -1048,13 +1048,14 @@ static int xilinx_dpdma_chan_stop(struct xilinx_dpdma_chan *chan) */ static void xilinx_dpdma_chan_done_irq(struct xilinx_dpdma_chan *chan) { - struct xilinx_dpdma_tx_desc *active = chan->desc.active; + struct xilinx_dpdma_tx_desc *active; unsigned long flags; spin_lock_irqsave(&chan->lock, flags); xilinx_dpdma_debugfs_desc_done_irq(chan); + active = chan->desc.active; if (active) vchan_cyclic_callback(&active->vdesc); else From 276559d8d02c2709281578976ca2f53bc62063d4 Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Thu, 11 Mar 2021 11:30:09 -0800 Subject: [PATCH 014/180] HID: wacom: set EV_KEY and EV_ABS only for non-HID_GENERIC type of devices Valid HID_GENERIC type of devices set EV_KEY and EV_ABS by wacom_map_usage. When *_input_capabilities are reached, those devices should already have their proper EV_* set. EV_KEY and EV_ABS only need to be set for non-HID_GENERIC type of devices in *_input_capabilities. Devices that don't support HID descitoprs will pass back to hid-input for registration without being accidentally rejected by the introduction of patch: "Input: refuse to register absolute devices without absinfo" Fixes: 6ecfe51b4082 ("Input: refuse to register absolute devices without absinfo") Signed-off-by: Ping Cheng Reviewed-by: Jason Gerecke Tested-by: Juan Garrido CC: stable@vger.kernel.org Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index bdd9ba577150..2d70dc4bea65 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -3574,8 +3574,6 @@ int wacom_setup_pen_input_capabilities(struct input_dev *input_dev, { struct wacom_features *features = &wacom_wac->features; - input_dev->evbit[0] |= BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); - if (!(features->device_type & WACOM_DEVICETYPE_PEN)) return -ENODEV; @@ -3590,6 +3588,7 @@ int wacom_setup_pen_input_capabilities(struct input_dev *input_dev, return 0; } + input_dev->evbit[0] |= BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); __set_bit(BTN_TOUCH, input_dev->keybit); __set_bit(ABS_MISC, input_dev->absbit); @@ -3742,8 +3741,6 @@ int wacom_setup_touch_input_capabilities(struct input_dev *input_dev, { struct wacom_features *features = &wacom_wac->features; - input_dev->evbit[0] |= BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); - if (!(features->device_type & WACOM_DEVICETYPE_TOUCH)) return -ENODEV; @@ -3756,6 +3753,7 @@ int wacom_setup_touch_input_capabilities(struct input_dev *input_dev, /* setup has already been done */ return 0; + input_dev->evbit[0] |= BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); __set_bit(BTN_TOUCH, input_dev->keybit); if (features->touch_max == 1) { From e409a6a3e0690efdef9b8a96197bc61ff117cfaf Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 15 Mar 2021 12:50:08 +0100 Subject: [PATCH 015/180] i2c: turn recovery error on init to debug In some configurations, recovery is optional. So, don't throw an error when it is not used because e.g. pinctrl settings for recovery are not provided. Reword the message and make it debug output. Reported-by: Klaus Kudielka Tested-by: Klaus Kudielka Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/i2c-core-base.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 63ebf722a424..f21362355973 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -378,7 +378,7 @@ static int i2c_gpio_init_recovery(struct i2c_adapter *adap) static int i2c_init_recovery(struct i2c_adapter *adap) { struct i2c_bus_recovery_info *bri = adap->bus_recovery_info; - char *err_str; + char *err_str, *err_level = KERN_ERR; if (!bri) return 0; @@ -387,7 +387,8 @@ static int i2c_init_recovery(struct i2c_adapter *adap) return -EPROBE_DEFER; if (!bri->recover_bus) { - err_str = "no recover_bus() found"; + err_str = "no suitable method provided"; + err_level = KERN_DEBUG; goto err; } @@ -414,7 +415,7 @@ static int i2c_init_recovery(struct i2c_adapter *adap) return 0; err: - dev_err(&adap->dev, "Not using recovery: %s\n", err_str); + dev_printk(err_level, &adap->dev, "Not using recovery: %s\n", err_str); adap->bus_recovery_info = NULL; return -EINVAL; From 3a85969e9d912d5dd85362ee37b5f81266e00e77 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 21 Mar 2021 15:49:13 +0900 Subject: [PATCH 016/180] lockdep: Add a missing initialization hint to the "INFO: Trying to register non-static key" message Since this message is printed when dynamically allocated spinlocks (e.g. kzalloc()) are used without initialization (e.g. spin_lock_init()), suggest to developers to check whether initialization functions for objects were called, before making developers wonder what annotation is missing. [ mingo: Minor tweaks to the message. ] Signed-off-by: Tetsuo Handa Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210321064913.4619-1-penguin-kernel@I-love.SAKURA.ne.jp Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index c6d0c1dc6253..c30eb887ca7d 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -930,7 +930,8 @@ static bool assign_lock_key(struct lockdep_map *lock) /* Debug-check: all keys must be persistent! */ debug_locks_off(); pr_err("INFO: trying to register non-static key.\n"); - pr_err("the code is fine but needs lockdep annotation.\n"); + pr_err("The code is fine but needs lockdep annotation, or maybe\n"); + pr_err("you didn't initialize this object before use?\n"); pr_err("turning off the locking correctness validator.\n"); dump_stack(); return false; From 9de82caad0282205d4c38a39456bce58e3219540 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 22 Mar 2021 12:44:58 +0100 Subject: [PATCH 017/180] dax: avoid -Wempty-body warnings gcc warns about an empty body in an else statement: drivers/dax/bus.c: In function 'do_id_store': drivers/dax/bus.c:94:48: error: suggest braces around empty body in an 'else' statement [-Werror=empty-body] 94 | /* nothing to remove */; | ^ drivers/dax/bus.c:99:43: error: suggest braces around empty body in an 'else' statement [-Werror=empty-body] 99 | /* dax_id already added */; | ^ In both of these cases, the 'else' exists only to have a place to add a comment, but that comment doesn't really explain that much either, so the easiest way to shut up that warning is to just remove the else. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20210322114514.3490752-1-arnd@kernel.org Signed-off-by: Dan Williams --- drivers/dax/bus.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 452e85ae87a8..5aee26e1bbd6 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -90,13 +90,11 @@ static ssize_t do_id_store(struct device_driver *drv, const char *buf, list_add(&dax_id->list, &dax_drv->ids); } else rc = -ENOMEM; - } else - /* nothing to remove */; + } } else if (action == ID_REMOVE) { list_del(&dax_id->list); kfree(dax_id); - } else - /* dax_id already added */; + } mutex_unlock(&dax_bus_lock); if (rc < 0) From 6d48b7912cc72275dc7c59ff961c8bac7ef66a92 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 22 Mar 2021 12:55:25 +0100 Subject: [PATCH 018/180] lockdep: Address clang -Wformat warning printing for %hd Clang doesn't like format strings that truncate a 32-bit value to something shorter: kernel/locking/lockdep.c:709:4: error: format specifies type 'short' but the argument has type 'int' [-Werror,-Wformat] In this case, the warning is a slightly questionable, as it could realize that both class->wait_type_outer and class->wait_type_inner are in fact 8-bit struct members, even though the result of the ?: operator becomes an 'int'. However, there is really no point in printing the number as a 16-bit 'short' rather than either an 8-bit or 32-bit number, so just change it to a normal %d. Fixes: de8f5e4f2dc1 ("lockdep: Introduce wait-type checks") Signed-off-by: Arnd Bergmann Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210322115531.3987555-1-arnd@kernel.org --- kernel/locking/lockdep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index c30eb887ca7d..f160f1c97ca1 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -705,7 +705,7 @@ static void print_lock_name(struct lock_class *class) printk(KERN_CONT " ("); __print_lock_name(class); - printk(KERN_CONT "){%s}-{%hd:%hd}", usage, + printk(KERN_CONT "){%s}-{%d:%d}", usage, class->wait_type_outer ?: class->wait_type_inner, class->wait_type_inner); } From daa58c8eec0a65ac8e2e77ff3ea8a233d8eec954 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 23 Mar 2021 09:56:34 -0700 Subject: [PATCH 019/180] Input: i8042 - fix Pegatron C15B ID entry The Zenbook Flip entry that was added overwrites a previous one because of a typo: In file included from drivers/input/serio/i8042.h:23, from drivers/input/serio/i8042.c:131: drivers/input/serio/i8042-x86ia64io.h:591:28: error: initialized field overwritten [-Werror=override-init] 591 | .matches = { | ^ drivers/input/serio/i8042-x86ia64io.h:591:28: note: (near initialization for 'i8042_dmi_noselftest_table[0].matches') Add the missing separator between the two. Fixes: b5d6e7ab7fe7 ("Input: i8042 - add ASUS Zenbook Flip to noselftest list") Signed-off-by: Arnd Bergmann Reviewed-by: Hans de Goede Reviewed-by: Marcos Paulo de Souza Link: https://lore.kernel.org/r/20210323130623.2302402-1-arnd@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-x86ia64io.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 9119e12a5778..a5a003553646 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -588,6 +588,7 @@ static const struct dmi_system_id i8042_dmi_noselftest_table[] = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_CHASSIS_TYPE, "10"), /* Notebook */ }, + }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_CHASSIS_TYPE, "31"), /* Convertible Notebook */ From 69d5ff3e9e51e23d5d81bf48480aa5671be67a71 Mon Sep 17 00:00:00 2001 From: Fabian Vogt Date: Tue, 23 Mar 2021 10:45:55 -0700 Subject: [PATCH 020/180] Input: nspire-keypad - enable interrupts only when opened The driver registers an interrupt handler in _probe, but didn't configure them until later when the _open function is called. In between, the keypad can fire an IRQ due to touchpad activity, which the handler ignores. This causes the kernel to disable the interrupt, blocking the keypad from working. Fix this by disabling interrupts before registering the handler. Additionally, disable them in _close, so that they're only enabled while open. Fixes: fc4f31461892 ("Input: add TI-Nspire keypad support") Signed-off-by: Fabian Vogt Link: https://lore.kernel.org/r/3383725.iizBOSrK1V@linux-e202.suse.de Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/nspire-keypad.c | 56 ++++++++++++++------------ 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/drivers/input/keyboard/nspire-keypad.c b/drivers/input/keyboard/nspire-keypad.c index 63d5e488137d..e9fa1423f136 100644 --- a/drivers/input/keyboard/nspire-keypad.c +++ b/drivers/input/keyboard/nspire-keypad.c @@ -93,9 +93,15 @@ static irqreturn_t nspire_keypad_irq(int irq, void *dev_id) return IRQ_HANDLED; } -static int nspire_keypad_chip_init(struct nspire_keypad *keypad) +static int nspire_keypad_open(struct input_dev *input) { + struct nspire_keypad *keypad = input_get_drvdata(input); unsigned long val = 0, cycles_per_us, delay_cycles, row_delay_cycles; + int error; + + error = clk_prepare_enable(keypad->clk); + if (error) + return error; cycles_per_us = (clk_get_rate(keypad->clk) / 1000000); if (cycles_per_us == 0) @@ -121,30 +127,6 @@ static int nspire_keypad_chip_init(struct nspire_keypad *keypad) keypad->int_mask = 1 << 1; writel(keypad->int_mask, keypad->reg_base + KEYPAD_INTMSK); - /* Disable GPIO interrupts to prevent hanging on touchpad */ - /* Possibly used to detect touchpad events */ - writel(0, keypad->reg_base + KEYPAD_UNKNOWN_INT); - /* Acknowledge existing interrupts */ - writel(~0, keypad->reg_base + KEYPAD_UNKNOWN_INT_STS); - - return 0; -} - -static int nspire_keypad_open(struct input_dev *input) -{ - struct nspire_keypad *keypad = input_get_drvdata(input); - int error; - - error = clk_prepare_enable(keypad->clk); - if (error) - return error; - - error = nspire_keypad_chip_init(keypad); - if (error) { - clk_disable_unprepare(keypad->clk); - return error; - } - return 0; } @@ -152,6 +134,11 @@ static void nspire_keypad_close(struct input_dev *input) { struct nspire_keypad *keypad = input_get_drvdata(input); + /* Disable interrupts */ + writel(0, keypad->reg_base + KEYPAD_INTMSK); + /* Acknowledge existing interrupts */ + writel(~0, keypad->reg_base + KEYPAD_INT); + clk_disable_unprepare(keypad->clk); } @@ -210,6 +197,25 @@ static int nspire_keypad_probe(struct platform_device *pdev) return -ENOMEM; } + error = clk_prepare_enable(keypad->clk); + if (error) { + dev_err(&pdev->dev, "failed to enable clock\n"); + return error; + } + + /* Disable interrupts */ + writel(0, keypad->reg_base + KEYPAD_INTMSK); + /* Acknowledge existing interrupts */ + writel(~0, keypad->reg_base + KEYPAD_INT); + + /* Disable GPIO interrupts to prevent hanging on touchpad */ + /* Possibly used to detect touchpad events */ + writel(0, keypad->reg_base + KEYPAD_UNKNOWN_INT); + /* Acknowledge existing GPIO interrupts */ + writel(~0, keypad->reg_base + KEYPAD_UNKNOWN_INT_STS); + + clk_disable_unprepare(keypad->clk); + input_set_drvdata(input, keypad); input->id.bustype = BUS_HOST; From cf97d7af246831ea23c216f17205f91319afc85f Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Mon, 22 Mar 2021 06:47:14 +0100 Subject: [PATCH 021/180] usb: cdnsp: Fixes issue with dequeuing requests after disabling endpoint Patch fixes the bug: BUG: kernel NULL pointer dereference, address: 0000000000000050 PGD 0 P4D 0 Oops: 0002 [#1] SMP PTI CPU: 0 PID: 4137 Comm: uvc-gadget Tainted: G OE 5.10.0-next-20201214+ #3 Hardware name: ASUS All Series/Q87T, BIOS 0908 07/22/2014 RIP: 0010:cdnsp_remove_request+0xe9/0x530 [cdnsp_udc_pci] Code: 01 00 00 31 f6 48 89 df e8 64 d4 ff ff 48 8b 43 08 48 8b 13 45 31 f6 48 89 42 08 48 89 10 b8 98 ff ff ff 48 89 1b 48 89 5b 08 <41> 83 6d 50 01 41 83 af d0 00 00 00 01 41 f6 84 24 78 20 00 00 08 RSP: 0018:ffffb68d00d07b60 EFLAGS: 00010046 RAX: 00000000ffffff98 RBX: ffff9d29c57fbf00 RCX: 0000000000001400 RDX: ffff9d29c57fbf00 RSI: 0000000000000000 RDI: ffff9d29c57fbf00 RBP: ffffb68d00d07bb0 R08: ffff9d2ad9510a00 R09: ffff9d2ac011c000 R10: ffff9d2a12b6e760 R11: 0000000000000000 R12: ffff9d29d3fb8000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff9d29d3fb88c0 FS: 0000000000000000(0000) GS:ffff9d2adba00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000050 CR3: 0000000102164005 CR4: 00000000001706f0 Call Trace: cdnsp_ep_dequeue+0x3c/0x90 [cdnsp_udc_pci] cdnsp_gadget_ep_dequeue+0x3f/0x80 [cdnsp_udc_pci] usb_ep_dequeue+0x21/0x70 [udc_core] uvcg_video_enable+0x19d/0x220 [usb_f_uvc] uvc_v4l2_release+0x49/0x90 [usb_f_uvc] v4l2_release+0xa5/0x100 [videodev] __fput+0x99/0x250 ____fput+0xe/0x10 task_work_run+0x75/0xb0 do_exit+0x370/0xb80 do_group_exit+0x43/0xa0 get_signal+0x12d/0x820 arch_do_signal_or_restart+0xb2/0x870 ? __switch_to_asm+0x36/0x70 ? kern_select+0xc6/0x100 exit_to_user_mode_prepare+0xfc/0x170 syscall_exit_to_user_mode+0x2a/0x40 do_syscall_64+0x43/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7fe969cf5dd7 Code: Unable to access opcode bytes at RIP 0x7fe969cf5dad. Problem occurs for UVC class. During disconnecting the UVC class disable endpoints and then start dequeuing all requests. This leads to situation where requests are removed twice. The first one in cdnsp_gadget_ep_disable and the second in cdnsp_gadget_ep_dequeue function. Patch adds condition in cdnsp_gadget_ep_dequeue function which allows dequeue requests only from enabled endpoint. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Signed-off-by: Peter Chen --- drivers/usb/cdns3/cdnsp-gadget.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index f2ebbacd932e..d7d4bdd57f46 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -1128,6 +1128,10 @@ static int cdnsp_gadget_ep_dequeue(struct usb_ep *ep, return -ESHUTDOWN; } + /* Requests has been dequeued during disabling endpoint. */ + if (!(pep->ep_state & EP_ENABLED)) + return 0; + spin_lock_irqsave(&pdev->lock, flags); ret = cdnsp_ep_dequeue(pep, to_cdnsp_request(request)); spin_unlock_irqrestore(&pdev->lock, flags); From 781bab3238c21c8cc6d1999a6ee43de76252fdfd Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 26 Mar 2021 15:19:27 -0700 Subject: [PATCH 022/180] Input: elants_i2c - fix division by zero if firmware reports zero phys size Touchscreen firmware of ASUS Transformer TF700T reports zeros for the phys size. Hence check whether the size is zero and don't set the resolution in this case. Reported-by: Jasper Korten Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20210302100824.3423-1-digetx@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/elants_i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c index 4c2b579f6c8b..78172c31529a 100644 --- a/drivers/input/touchscreen/elants_i2c.c +++ b/drivers/input/touchscreen/elants_i2c.c @@ -1441,7 +1441,7 @@ static int elants_i2c_probe(struct i2c_client *client, touchscreen_parse_properties(ts->input, true, &ts->prop); - if (ts->chip_id == EKTF3624) { + if (ts->chip_id == EKTF3624 && ts->phy_x && ts->phy_y) { /* calculate resolution from size */ ts->x_res = DIV_ROUND_CLOSEST(ts->prop.max_x, ts->phy_x); ts->y_res = DIV_ROUND_CLOSEST(ts->prop.max_y, ts->phy_y); From 56cfe6f820a6315291eb5a1b82bb49633b993d3b Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 28 Mar 2021 22:57:48 -0700 Subject: [PATCH 023/180] Input: elants_i2c - drop zero-checking of ABS_MT_TOUCH_MAJOR resolution Drop unnecessary zero-checking of ABS_MT_TOUCH_MAJOR resolution since there is no difference between setting resolution to 0 vs not setting it at all. This change makes code cleaner a tad. Suggested-by: Dmitry Torokhov Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20210328235507.19240-1-digetx@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/elants_i2c.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c index 78172c31529a..5f7706febcb0 100644 --- a/drivers/input/touchscreen/elants_i2c.c +++ b/drivers/input/touchscreen/elants_i2c.c @@ -1449,8 +1449,7 @@ static int elants_i2c_probe(struct i2c_client *client, input_abs_set_res(ts->input, ABS_MT_POSITION_X, ts->x_res); input_abs_set_res(ts->input, ABS_MT_POSITION_Y, ts->y_res); - if (ts->major_res > 0) - input_abs_set_res(ts->input, ABS_MT_TOUCH_MAJOR, ts->major_res); + input_abs_set_res(ts->input, ABS_MT_TOUCH_MAJOR, ts->major_res); error = input_mt_init_slots(ts->input, MAX_CONTACT_NUM, INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED); From 2867b9746cef78745c594894aece6f8ef826e0b4 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 14 Mar 2021 12:07:09 +0100 Subject: [PATCH 024/180] clk: socfpga: fix iomem pointer cast on 64-bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pointers should be cast with uintptr_t instead of integer. This fixes warning when compile testing on ARM64: drivers/clk/socfpga/clk-gate.c: In function ‘socfpga_clk_recalc_rate’: drivers/clk/socfpga/clk-gate.c:102:7: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] Fixes: b7cec13f082f ("clk: socfpga: Look for the GPIO_DB_CLK by its offset") Signed-off-by: Krzysztof Kozlowski Acked-by: Dinh Nguyen Link: https://lore.kernel.org/r/20210314110709.32599-1-krzysztof.kozlowski@canonical.com Signed-off-by: Stephen Boyd --- drivers/clk/socfpga/clk-gate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/socfpga/clk-gate.c b/drivers/clk/socfpga/clk-gate.c index 43ecd507bf83..cf94a12459ea 100644 --- a/drivers/clk/socfpga/clk-gate.c +++ b/drivers/clk/socfpga/clk-gate.c @@ -99,7 +99,7 @@ static unsigned long socfpga_clk_recalc_rate(struct clk_hw *hwclk, val = readl(socfpgaclk->div_reg) >> socfpgaclk->shift; val &= GENMASK(socfpgaclk->width - 1, 0); /* Check for GPIO_DB_CLK by its offset */ - if ((int) socfpgaclk->div_reg & SOCFPGA_GPIO_DB_CLK_OFFSET) + if ((uintptr_t) socfpgaclk->div_reg & SOCFPGA_GPIO_DB_CLK_OFFSET) div = val + 1; else div = (1 << val); From e5c359f70e4b5e7b6c2bf4b0ca2d2686d543a37b Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Sat, 27 Mar 2021 07:11:05 +0530 Subject: [PATCH 025/180] clk: qcom: camcc: Update the clock ops for the SC7180 Some of the RCGs could be always ON from the XO source and could be used as the clock on signal for the GDSC to be operational. In the cases where the GDSCs are parked at different source with the source clock disabled, it could lead to the GDSC to be stuck at ON/OFF during gdsc disable/enable. Thus park the RCGs at XO during clock disable and update the rcg_ops to use the shared_ops. Fixes: 15d09e830bbc ("clk: qcom: camcc: Add camera clock controller driver for SC7180") Signed-off-by: Taniya Das Link: https://lore.kernel.org/r/1616809265-11912-1-git-send-email-tdas@codeaurora.org Signed-off-by: Stephen Boyd --- drivers/clk/qcom/camcc-sc7180.c | 50 ++++++++++++++++----------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/clk/qcom/camcc-sc7180.c b/drivers/clk/qcom/camcc-sc7180.c index dbac5651ab85..9bcf2f8ed4de 100644 --- a/drivers/clk/qcom/camcc-sc7180.c +++ b/drivers/clk/qcom/camcc-sc7180.c @@ -304,7 +304,7 @@ static struct clk_rcg2 cam_cc_bps_clk_src = { .name = "cam_cc_bps_clk_src", .parent_data = cam_cc_parent_data_2, .num_parents = 5, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -325,7 +325,7 @@ static struct clk_rcg2 cam_cc_cci_0_clk_src = { .name = "cam_cc_cci_0_clk_src", .parent_data = cam_cc_parent_data_5, .num_parents = 3, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -339,7 +339,7 @@ static struct clk_rcg2 cam_cc_cci_1_clk_src = { .name = "cam_cc_cci_1_clk_src", .parent_data = cam_cc_parent_data_5, .num_parents = 3, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -360,7 +360,7 @@ static struct clk_rcg2 cam_cc_cphy_rx_clk_src = { .name = "cam_cc_cphy_rx_clk_src", .parent_data = cam_cc_parent_data_3, .num_parents = 6, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -379,7 +379,7 @@ static struct clk_rcg2 cam_cc_csi0phytimer_clk_src = { .name = "cam_cc_csi0phytimer_clk_src", .parent_data = cam_cc_parent_data_0, .num_parents = 4, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -393,7 +393,7 @@ static struct clk_rcg2 cam_cc_csi1phytimer_clk_src = { .name = "cam_cc_csi1phytimer_clk_src", .parent_data = cam_cc_parent_data_0, .num_parents = 4, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -407,7 +407,7 @@ static struct clk_rcg2 cam_cc_csi2phytimer_clk_src = { .name = "cam_cc_csi2phytimer_clk_src", .parent_data = cam_cc_parent_data_0, .num_parents = 4, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -421,7 +421,7 @@ static struct clk_rcg2 cam_cc_csi3phytimer_clk_src = { .name = "cam_cc_csi3phytimer_clk_src", .parent_data = cam_cc_parent_data_0, .num_parents = 4, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -443,7 +443,7 @@ static struct clk_rcg2 cam_cc_fast_ahb_clk_src = { .name = "cam_cc_fast_ahb_clk_src", .parent_data = cam_cc_parent_data_0, .num_parents = 4, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -466,7 +466,7 @@ static struct clk_rcg2 cam_cc_icp_clk_src = { .name = "cam_cc_icp_clk_src", .parent_data = cam_cc_parent_data_2, .num_parents = 5, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -488,7 +488,7 @@ static struct clk_rcg2 cam_cc_ife_0_clk_src = { .name = "cam_cc_ife_0_clk_src", .parent_data = cam_cc_parent_data_4, .num_parents = 4, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -510,7 +510,7 @@ static struct clk_rcg2 cam_cc_ife_0_csid_clk_src = { .name = "cam_cc_ife_0_csid_clk_src", .parent_data = cam_cc_parent_data_3, .num_parents = 6, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -524,7 +524,7 @@ static struct clk_rcg2 cam_cc_ife_1_clk_src = { .name = "cam_cc_ife_1_clk_src", .parent_data = cam_cc_parent_data_4, .num_parents = 4, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -538,7 +538,7 @@ static struct clk_rcg2 cam_cc_ife_1_csid_clk_src = { .name = "cam_cc_ife_1_csid_clk_src", .parent_data = cam_cc_parent_data_3, .num_parents = 6, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -553,7 +553,7 @@ static struct clk_rcg2 cam_cc_ife_lite_clk_src = { .parent_data = cam_cc_parent_data_4, .num_parents = 4, .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -567,7 +567,7 @@ static struct clk_rcg2 cam_cc_ife_lite_csid_clk_src = { .name = "cam_cc_ife_lite_csid_clk_src", .parent_data = cam_cc_parent_data_3, .num_parents = 6, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -590,7 +590,7 @@ static struct clk_rcg2 cam_cc_ipe_0_clk_src = { .name = "cam_cc_ipe_0_clk_src", .parent_data = cam_cc_parent_data_2, .num_parents = 5, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -613,7 +613,7 @@ static struct clk_rcg2 cam_cc_jpeg_clk_src = { .name = "cam_cc_jpeg_clk_src", .parent_data = cam_cc_parent_data_2, .num_parents = 5, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -635,7 +635,7 @@ static struct clk_rcg2 cam_cc_lrme_clk_src = { .name = "cam_cc_lrme_clk_src", .parent_data = cam_cc_parent_data_6, .num_parents = 5, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -656,7 +656,7 @@ static struct clk_rcg2 cam_cc_mclk0_clk_src = { .name = "cam_cc_mclk0_clk_src", .parent_data = cam_cc_parent_data_1, .num_parents = 3, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -670,7 +670,7 @@ static struct clk_rcg2 cam_cc_mclk1_clk_src = { .name = "cam_cc_mclk1_clk_src", .parent_data = cam_cc_parent_data_1, .num_parents = 3, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -684,7 +684,7 @@ static struct clk_rcg2 cam_cc_mclk2_clk_src = { .name = "cam_cc_mclk2_clk_src", .parent_data = cam_cc_parent_data_1, .num_parents = 3, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -698,7 +698,7 @@ static struct clk_rcg2 cam_cc_mclk3_clk_src = { .name = "cam_cc_mclk3_clk_src", .parent_data = cam_cc_parent_data_1, .num_parents = 3, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -712,7 +712,7 @@ static struct clk_rcg2 cam_cc_mclk4_clk_src = { .name = "cam_cc_mclk4_clk_src", .parent_data = cam_cc_parent_data_1, .num_parents = 3, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; @@ -732,7 +732,7 @@ static struct clk_rcg2 cam_cc_slow_ahb_clk_src = { .parent_data = cam_cc_parent_data_0, .num_parents = 4, .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_shared_ops, }, }; From bec4d7c93afc07dd0454ae41c559513f858cfb83 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 29 Mar 2021 09:07:18 +0300 Subject: [PATCH 026/180] thunderbolt: Fix a leak in tb_retimer_add() After the device_register() succeeds, then the correct way to clean up is to call device_unregister(). The unregister calls both device_del() and device_put(). Since this code was only device_del() it results in a memory leak. Fixes: dacb12877d92 ("thunderbolt: Add support for on-board retimers") Cc: stable@vger.kernel.org Signed-off-by: Dan Carpenter Reviewed-by: Jason Gunthorpe Signed-off-by: Mika Westerberg --- drivers/thunderbolt/retimer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thunderbolt/retimer.c b/drivers/thunderbolt/retimer.c index 620bcf586ee2..7a5d61604c8b 100644 --- a/drivers/thunderbolt/retimer.c +++ b/drivers/thunderbolt/retimer.c @@ -347,7 +347,7 @@ static int tb_retimer_add(struct tb_port *port, u8 index, u32 auth_status) ret = tb_retimer_nvm_add(rt); if (ret) { dev_err(&rt->dev, "failed to add NVM devices: %d\n", ret); - device_del(&rt->dev); + device_unregister(&rt->dev); return ret; } From 08fe7ae1857080f5075df5ac7fef2ecd4e289117 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 29 Mar 2021 09:08:01 +0300 Subject: [PATCH 027/180] thunderbolt: Fix off by one in tb_port_find_retimer() This array uses 1-based indexing so it corrupts memory one element beyond of the array. Fix it by making the array one element larger. Fixes: dacb12877d92 ("thunderbolt: Add support for on-board retimers") Cc: stable@vger.kernel.org Signed-off-by: Dan Carpenter Signed-off-by: Mika Westerberg --- drivers/thunderbolt/retimer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thunderbolt/retimer.c b/drivers/thunderbolt/retimer.c index 7a5d61604c8b..c44fad2b9fbb 100644 --- a/drivers/thunderbolt/retimer.c +++ b/drivers/thunderbolt/retimer.c @@ -406,7 +406,7 @@ static struct tb_retimer *tb_port_find_retimer(struct tb_port *port, u8 index) */ int tb_retimer_scan(struct tb_port *port) { - u32 status[TB_MAX_RETIMER_INDEX] = {}; + u32 status[TB_MAX_RETIMER_INDEX + 1] = {}; int ret, i, last_idx = 0; if (!port->cap_usb4) From 0e07e25b481aa021e4b48085ecb8a049e9614510 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 30 Mar 2021 16:24:11 +0200 Subject: [PATCH 028/180] netfilter: flowtable: fix NAT IPv6 offload mangling Fix out-of-bound access in the address array. Fixes: 5c27d8d76ce8 ("netfilter: nf_flow_table_offload: add IPv6 support") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_offload.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 2a6993fa40d7..1c5460e7bce8 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -305,12 +305,12 @@ static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule, const __be32 *addr, const __be32 *mask) { struct flow_action_entry *entry; - int i; + int i, j; - for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32)) { + for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) { entry = flow_action_entry_next(flow_rule); flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6, - offset + i, &addr[i], mask); + offset + i, &addr[j], mask); } } From fbea31808ca124dd73ff6bb1e67c9af4607c3e32 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 31 Mar 2021 01:04:45 +0200 Subject: [PATCH 029/180] netfilter: conntrack: do not print icmpv6 as unknown via /proc /proc/net/nf_conntrack shows icmpv6 as unknown. Fixes: 09ec82f5af99 ("netfilter: conntrack: remove protocol name from l4proto struct") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_standalone.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 0ee702d374b0..c6c0cb465664 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -266,6 +266,7 @@ static const char* l4proto_name(u16 proto) case IPPROTO_GRE: return "gre"; case IPPROTO_SCTP: return "sctp"; case IPPROTO_UDPLITE: return "udplite"; + case IPPROTO_ICMPV6: return "icmpv6"; } return "unknown"; From 942bfbecc0281c75db84f744b9b77b0f2396f484 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=91=A8=E7=90=B0=E6=9D=B0=20=28Zhou=20Yanjie=29?= Date: Fri, 19 Mar 2021 18:12:13 +0800 Subject: [PATCH 030/180] I2C: JZ4780: Fix bug for Ingenic X1000. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only send "X1000_I2C_DC_STOP" when last byte, or it will cause error when I2C write operation which should look like this: device_addr + w, reg_addr, data; But without this patch, it looks like this: device_addr + w, reg_addr, device_addr + w, data; Fixes: 21575a7a8d4c ("I2C: JZ4780: Add support for the X1000.") Reported-by: 杨文龙 (Yang Wenlong) Tested-by: 杨文龙 (Yang Wenlong) Signed-off-by: 周琰杰 (Zhou Yanjie) Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-jz4780.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-jz4780.c b/drivers/i2c/busses/i2c-jz4780.c index 8509c5f11356..55177eb21d7b 100644 --- a/drivers/i2c/busses/i2c-jz4780.c +++ b/drivers/i2c/busses/i2c-jz4780.c @@ -525,8 +525,8 @@ static irqreturn_t jz4780_i2c_irq(int irqno, void *dev_id) i2c_sta = jz4780_i2c_readw(i2c, JZ4780_I2C_STA); data = *i2c->wbuf; data &= ~JZ4780_I2C_DC_READ; - if ((!i2c->stop_hold) && (i2c->cdata->version >= - ID_X1000)) + if ((i2c->wt_len == 1) && (!i2c->stop_hold) && + (i2c->cdata->version >= ID_X1000)) data |= X1000_I2C_DC_STOP; jz4780_i2c_writew(i2c, JZ4780_I2C_DC, data); i2c->wbuf++; From 67ff1d98652ac141f46b3871ebc34350ddffa2ef Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Wed, 24 Mar 2021 19:36:10 +0530 Subject: [PATCH 031/180] i2c: stm32f4: Mundane typo fix s/postion/position/ Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap Reviewed-by: Alain Volmat Reviewed-by: Pierre-Yves MORDRET Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-stm32f4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-stm32f4.c b/drivers/i2c/busses/i2c-stm32f4.c index 937c2c8fd349..4933fc8ce3fd 100644 --- a/drivers/i2c/busses/i2c-stm32f4.c +++ b/drivers/i2c/busses/i2c-stm32f4.c @@ -534,7 +534,7 @@ static void stm32f4_i2c_handle_rx_addr(struct stm32f4_i2c_dev *i2c_dev) default: /* * N-byte reception: - * Enable ACK, reset POS (ACK postion) and clear ADDR flag. + * Enable ACK, reset POS (ACK position) and clear ADDR flag. * In that way, ACK will be sent as soon as the current byte * will be received in the shift register */ From 1ae6b3780848c6b1efc6b994963082cfd04ac114 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 29 Mar 2021 21:24:09 +0200 Subject: [PATCH 032/180] i2c: imx: drop me as maintainer of binding docs I dunno why I got added here, but I haven't been using this driver for years. Remove me to make space for interested parties. Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/i2c-imx.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/Documentation/devicetree/bindings/i2c/i2c-imx.yaml b/Documentation/devicetree/bindings/i2c/i2c-imx.yaml index f23966b0d6c6..f33c6b29966b 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-imx.yaml +++ b/Documentation/devicetree/bindings/i2c/i2c-imx.yaml @@ -6,9 +6,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Freescale Inter IC (I2C) and High Speed Inter IC (HS-I2C) for i.MX -maintainers: - - Wolfram Sang - allOf: - $ref: /schemas/i2c/i2c-controller.yaml# From 98a479991dc5b986d9d48a1b73f568e58e3d82b6 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 29 Mar 2021 21:25:41 +0200 Subject: [PATCH 033/180] i2c: gpio: update email address in binding docs Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/i2c-gpio.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/i2c/i2c-gpio.yaml b/Documentation/devicetree/bindings/i2c/i2c-gpio.yaml index ff99344788ab..fd040284561f 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-gpio.yaml +++ b/Documentation/devicetree/bindings/i2c/i2c-gpio.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Bindings for GPIO bitbanged I2C maintainers: - - Wolfram Sang + - Wolfram Sang allOf: - $ref: /schemas/i2c/i2c-controller.yaml# From 629a411f7e71afeee34edd4c1418c4e7f7d5575a Mon Sep 17 00:00:00 2001 From: Hao Fang Date: Tue, 30 Mar 2021 14:37:14 +0800 Subject: [PATCH 034/180] i2c: hix5hd2: use the correct HiSilicon copyright s/Hisilicon/HiSilicon/g. It should use capital S, according to https://www.hisilicon.com/en/terms-of-use. Signed-off-by: Hao Fang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-hix5hd2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-hix5hd2.c b/drivers/i2c/busses/i2c-hix5hd2.c index c45f226c2b85..aa00ba8bcb70 100644 --- a/drivers/i2c/busses/i2c-hix5hd2.c +++ b/drivers/i2c/busses/i2c-hix5hd2.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2014 Linaro Ltd. - * Copyright (c) 2014 Hisilicon Limited. + * Copyright (c) 2014 HiSilicon Limited. * * Now only support 7 bit address. */ From 23cf00ddd2e1aacf1873e43f5e0c519c120daf7a Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 29 Mar 2021 14:41:12 +0300 Subject: [PATCH 035/180] gpio: sysfs: Obey valid_mask Do not allow exporting GPIOs which are set invalid by the driver's valid mask. Fixes: 726cb3ba4969 ("gpiolib: Support 'gpio-reserved-ranges' property") Signed-off-by: Matti Vaittinen Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-sysfs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpio/gpiolib-sysfs.c b/drivers/gpio/gpiolib-sysfs.c index 26c5466b8179..ae49bb23c6ed 100644 --- a/drivers/gpio/gpiolib-sysfs.c +++ b/drivers/gpio/gpiolib-sysfs.c @@ -458,6 +458,8 @@ static ssize_t export_store(struct class *class, long gpio; struct gpio_desc *desc; int status; + struct gpio_chip *gc; + int offset; status = kstrtol(buf, 0, &gpio); if (status < 0) @@ -469,6 +471,12 @@ static ssize_t export_store(struct class *class, pr_warn("%s: invalid GPIO %ld\n", __func__, gpio); return -EINVAL; } + gc = desc->gdev->chip; + offset = gpio_chip_hwgpio(desc); + if (!gpiochip_line_is_valid(gc, offset)) { + pr_warn("%s: GPIO %ld masked\n", __func__, gpio); + return -EINVAL; + } /* No extra locking here; FLAG_SYSFS just signifies that the * request and export were done by on behalf of userspace, so From 3618250c8399cb36f4a0fbc48610a178307e1c64 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 31 Mar 2021 14:14:58 +0000 Subject: [PATCH 036/180] powerpc/ptrace: Don't return error when getting/setting FP regs without CONFIG_PPC_FPU_REGS An #ifdef CONFIG_PPC_FPU_REGS is missing in arch_ptrace() leading to the following Oops because [REGSET_FPR] entry is not initialised in native_regsets[]. [ 41.917608] BUG: Unable to handle kernel instruction fetch [ 41.922849] Faulting instruction address: 0xff8fd228 [ 41.927760] Oops: Kernel access of bad area, sig: 11 [#1] [ 41.933089] BE PAGE_SIZE=4K PREEMPT CMPC885 [ 41.940753] Modules linked in: [ 41.943768] CPU: 0 PID: 366 Comm: gdb Not tainted 5.12.0-rc5-s3k-dev-01666-g7aac86a0f057-dirty #4835 [ 41.952800] NIP: ff8fd228 LR: c004d9e0 CTR: ff8fd228 [ 41.957790] REGS: caae9df0 TRAP: 0400 Not tainted (5.12.0-rc5-s3k-dev-01666-g7aac86a0f057-dirty) [ 41.966741] MSR: 40009032 CR: 82004248 XER: 20000000 [ 41.973540] [ 41.973540] GPR00: c004d9b4 caae9eb0 c1b64f60 c1b64520 c0713cd4 caae9eb8 c1bacdfc 00000004 [ 41.973540] GPR08: 00000200 ff8fd228 c1bac700 00001032 28004242 1061aaf4 00000001 106d64a0 [ 41.973540] GPR16: 00000000 00000000 7fa0a774 10610000 7fa0aef9 00000000 10610000 7fa0a538 [ 41.973540] GPR24: 7fa0a580 7fa0a570 c1bacc00 c1b64520 c1bacc00 caae9ee8 00000108 c0713cd4 [ 42.009685] NIP [ff8fd228] 0xff8fd228 [ 42.013300] LR [c004d9e0] __regset_get+0x100/0x124 [ 42.018036] Call Trace: [ 42.020443] [caae9eb0] [c004d9b4] __regset_get+0xd4/0x124 (unreliable) [ 42.026899] [caae9ee0] [c004da94] copy_regset_to_user+0x5c/0xb0 [ 42.032751] [caae9f10] [c002f640] sys_ptrace+0xe4/0x588 [ 42.037915] [caae9f30] [c0011010] ret_from_syscall+0x0/0x28 [ 42.043422] --- interrupt: c00 at 0xfd1f8e4 [ 42.047553] NIP: 0fd1f8e4 LR: 1004a688 CTR: 00000000 [ 42.052544] REGS: caae9f40 TRAP: 0c00 Not tainted (5.12.0-rc5-s3k-dev-01666-g7aac86a0f057-dirty) [ 42.061494] MSR: 0000d032 CR: 48004442 XER: 00000000 [ 42.068551] [ 42.068551] GPR00: 0000001a 7fa0a040 77dad7e0 0000000e 00000170 00000000 7fa0a078 00000004 [ 42.068551] GPR08: 00000000 108deb88 108dda40 106d6010 44004442 1061aaf4 00000001 106d64a0 [ 42.068551] GPR16: 00000000 00000000 7fa0a774 10610000 7fa0aef9 00000000 10610000 7fa0a538 [ 42.068551] GPR24: 7fa0a580 7fa0a570 1078fe00 1078fd70 1078fd70 00000170 0fdd3244 0000000d [ 42.104696] NIP [0fd1f8e4] 0xfd1f8e4 [ 42.108225] LR [1004a688] 0x1004a688 [ 42.111753] --- interrupt: c00 [ 42.114768] Instruction dump: [ 42.117698] XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX [ 42.125443] XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX [ 42.133195] ---[ end trace d35616f22ab2100c ]--- Adding the missing #ifdef is not good because gdb doesn't like getting an error when getting registers. Instead, make ptrace return 0s when CONFIG_PPC_FPU_REGS is not set. Fixes: b6254ced4da6 ("powerpc/signal: Don't manage floating point regs when no FPU") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9121a44a2d50ba1af18d8aa5ada06c9a3bea8afd.1617200085.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/ptrace/Makefile | 4 ++-- arch/powerpc/kernel/ptrace/ptrace-decl.h | 14 -------------- arch/powerpc/kernel/ptrace/ptrace-fpu.c | 10 ++++++++++ arch/powerpc/kernel/ptrace/ptrace-novsx.c | 8 ++++++++ arch/powerpc/kernel/ptrace/ptrace-view.c | 2 -- 5 files changed, 20 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/kernel/ptrace/Makefile b/arch/powerpc/kernel/ptrace/Makefile index 8ebc11d1168d..77abd1a5a508 100644 --- a/arch/powerpc/kernel/ptrace/Makefile +++ b/arch/powerpc/kernel/ptrace/Makefile @@ -6,11 +6,11 @@ CFLAGS_ptrace-view.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' obj-y += ptrace.o ptrace-view.o -obj-$(CONFIG_PPC_FPU_REGS) += ptrace-fpu.o +obj-y += ptrace-fpu.o obj-$(CONFIG_COMPAT) += ptrace32.o obj-$(CONFIG_VSX) += ptrace-vsx.o ifneq ($(CONFIG_VSX),y) -obj-$(CONFIG_PPC_FPU_REGS) += ptrace-novsx.o +obj-y += ptrace-novsx.o endif obj-$(CONFIG_ALTIVEC) += ptrace-altivec.o obj-$(CONFIG_SPE) += ptrace-spe.o diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h index 3487f2c9735c..eafe5f0f6289 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-decl.h +++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h @@ -165,22 +165,8 @@ int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data); extern const struct user_regset_view user_ppc_native_view; /* ptrace-fpu */ -#ifdef CONFIG_PPC_FPU_REGS int ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data); int ptrace_put_fpr(struct task_struct *child, int index, unsigned long data); -#else -static inline int -ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data) -{ - return -EIO; -} - -static inline int -ptrace_put_fpr(struct task_struct *child, int index, unsigned long data) -{ - return -EIO; -} -#endif /* ptrace-(no)adv */ void ppc_gethwdinfo(struct ppc_debug_info *dbginfo); diff --git a/arch/powerpc/kernel/ptrace/ptrace-fpu.c b/arch/powerpc/kernel/ptrace/ptrace-fpu.c index 8301cb52dd99..5dca19361316 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-fpu.c +++ b/arch/powerpc/kernel/ptrace/ptrace-fpu.c @@ -8,32 +8,42 @@ int ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data) { +#ifdef CONFIG_PPC_FPU_REGS unsigned int fpidx = index - PT_FPR0; +#endif if (index > PT_FPSCR) return -EIO; +#ifdef CONFIG_PPC_FPU_REGS flush_fp_to_thread(child); if (fpidx < (PT_FPSCR - PT_FPR0)) memcpy(data, &child->thread.TS_FPR(fpidx), sizeof(long)); else *data = child->thread.fp_state.fpscr; +#else + *data = 0; +#endif return 0; } int ptrace_put_fpr(struct task_struct *child, int index, unsigned long data) { +#ifdef CONFIG_PPC_FPU_REGS unsigned int fpidx = index - PT_FPR0; +#endif if (index > PT_FPSCR) return -EIO; +#ifdef CONFIG_PPC_FPU_REGS flush_fp_to_thread(child); if (fpidx < (PT_FPSCR - PT_FPR0)) memcpy(&child->thread.TS_FPR(fpidx), &data, sizeof(long)); else child->thread.fp_state.fpscr = data; +#endif return 0; } diff --git a/arch/powerpc/kernel/ptrace/ptrace-novsx.c b/arch/powerpc/kernel/ptrace/ptrace-novsx.c index b3b36835658a..7433f3db979a 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-novsx.c +++ b/arch/powerpc/kernel/ptrace/ptrace-novsx.c @@ -21,12 +21,16 @@ int fpr_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { +#ifdef CONFIG_PPC_FPU_REGS BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != offsetof(struct thread_fp_state, fpr[32])); flush_fp_to_thread(target); return membuf_write(&to, &target->thread.fp_state, 33 * sizeof(u64)); +#else + return membuf_write(&to, &empty_zero_page, 33 * sizeof(u64)); +#endif } /* @@ -46,6 +50,7 @@ int fpr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { +#ifdef CONFIG_PPC_FPU_REGS BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != offsetof(struct thread_fp_state, fpr[32])); @@ -53,4 +58,7 @@ int fpr_set(struct task_struct *target, const struct user_regset *regset, return user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.fp_state, 0, -1); +#else + return 0; +#endif } diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 2bad8068f598..6ccffc65ac97 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -522,13 +522,11 @@ static const struct user_regset native_regsets[] = { .size = sizeof(long), .align = sizeof(long), .regset_get = gpr_get, .set = gpr_set }, -#ifdef CONFIG_PPC_FPU_REGS [REGSET_FPR] = { .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, .size = sizeof(double), .align = sizeof(double), .regset_get = fpr_get, .set = fpr_set }, -#endif #ifdef CONFIG_ALTIVEC [REGSET_VMX] = { .core_note_type = NT_PPC_VMX, .n = 34, From acca57217c688c5bbbd5140974533d81e8757cc9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 31 Mar 2021 14:07:04 +0000 Subject: [PATCH 037/180] powerpc/signal32: Fix Oops on sigreturn with unmapped VDSO PPC32 encounters a KUAP fault when trying to handle a signal with VDSO unmapped. Kernel attempted to read user page (7fc07ec0) - exploit attempt? (uid: 0) BUG: Unable to handle kernel data access on read at 0x7fc07ec0 Faulting instruction address: 0xc00111d4 Oops: Kernel access of bad area, sig: 11 [#1] BE PAGE_SIZE=16K PREEMPT CMPC885 CPU: 0 PID: 353 Comm: sigreturn_vdso Not tainted 5.12.0-rc4-s3k-dev-01553-gb30c310ea220 #4814 NIP: c00111d4 LR: c0005a28 CTR: 00000000 REGS: cadb3dd0 TRAP: 0300 Not tainted (5.12.0-rc4-s3k-dev-01553-gb30c310ea220) MSR: 00009032 CR: 48000884 XER: 20000000 DAR: 7fc07ec0 DSISR: 88000000 GPR00: c0007788 cadb3e90 c28d4a40 7fc07ec0 7fc07ed0 000004e0 7fc07ce0 00000000 GPR08: 00000001 00000001 7fc07ec0 00000000 28000282 1001b828 100a0920 00000000 GPR16: 100cac0c 100b0000 105c43a4 105c5685 100d0000 100d0000 100d0000 100b2e9e GPR24: ffffffff 105c43c8 00000000 7fc07ec8 cadb3f40 cadb3ec8 c28d4a40 00000000 NIP [c00111d4] flush_icache_range+0x90/0xb4 LR [c0005a28] handle_signal32+0x1bc/0x1c4 Call Trace: [cadb3e90] [100d0000] 0x100d0000 (unreliable) [cadb3ec0] [c0007788] do_notify_resume+0x260/0x314 [cadb3f20] [c000c764] syscall_exit_prepare+0x120/0x184 [cadb3f30] [c00100b4] ret_from_syscall+0xc/0x28 --- interrupt: c00 at 0xfe807f8 NIP: 0fe807f8 LR: 10001060 CTR: c0139378 REGS: cadb3f40 TRAP: 0c00 Not tainted (5.12.0-rc4-s3k-dev-01553-gb30c310ea220) MSR: 0000d032 CR: 28000482 XER: 20000000 GPR00: 00000025 7fc081c0 77bb1690 00000000 0000000a 28000482 00000001 0ff03a38 GPR08: 0000d032 00006de5 c28d4a40 00000009 88000482 1001b828 100a0920 00000000 GPR16: 100cac0c 100b0000 105c43a4 105c5685 100d0000 100d0000 100d0000 100b2e9e GPR24: ffffffff 105c43c8 00000000 77ba7628 10002398 10010000 10002124 00024000 NIP [0fe807f8] 0xfe807f8 LR [10001060] 0x10001060 --- interrupt: c00 Instruction dump: 38630010 7c001fac 38630010 4200fff0 7c0004ac 4c00012c 4e800020 7c001fac 2c0a0000 38630010 4082ffcc 4bffffe4 <7c00186c> 2c070000 39430010 4082ff8c ---[ end trace 3973fb72b049cb06 ]--- This is because flush_icache_range() is called on user addresses. The same problem was detected some time ago on PPC64. It was fixed by enabling KUAP in commit 59bee45b9712 ("powerpc/mm: Fix missing KUAP disable in flush_coherent_icache()"). PPC32 doesn't use flush_coherent_icache() and fallbacks on clean_dcache_range() and invalidate_icache_range(). We could fix it similarly by enabling user access in those functions, but this is overkill for just flushing two instructions. The two instructions are 8 bytes aligned, so a single dcbst/icbi is enough to flush them. Do like __patch_instruction() and inline a dcbst followed by an icbi just after the write of the instructions, while user access is still allowed. The isync is not required because rfi will be used to return to user. icbi() is handled as a read so read-write user access is needed. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/bde9154e5351a5ac7bca3d59cdb5a5e8edacbb79.1617199569.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal_32.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 75ee918a120a..f651b992fe01 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -775,7 +775,7 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, else prepare_save_user_regs(1); - if (!user_write_access_begin(frame, sizeof(*frame))) + if (!user_access_begin(frame, sizeof(*frame))) goto badframe; /* Put the siginfo & fill in most of the ucontext */ @@ -809,17 +809,15 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, unsafe_put_user(PPC_INST_ADDI + __NR_rt_sigreturn, &mctx->mc_pad[0], failed); unsafe_put_user(PPC_INST_SC, &mctx->mc_pad[1], failed); + asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0])); } unsafe_put_sigset_t(&frame->uc.uc_sigmask, oldset, failed); - user_write_access_end(); + user_access_end(); if (copy_siginfo_to_user(&frame->info, &ksig->info)) goto badframe; - if (tramp == (unsigned long)mctx->mc_pad) - flush_icache_range(tramp, tramp + 2 * sizeof(unsigned long)); - regs->link = tramp; #ifdef CONFIG_PPC_FPU_REGS @@ -844,7 +842,7 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, return 0; failed: - user_write_access_end(); + user_access_end(); badframe: signal_fault(tsk, regs, "handle_rt_signal32", frame); @@ -879,7 +877,7 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, else prepare_save_user_regs(1); - if (!user_write_access_begin(frame, sizeof(*frame))) + if (!user_access_begin(frame, sizeof(*frame))) goto badframe; sc = (struct sigcontext __user *) &frame->sctx; @@ -908,11 +906,9 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, /* Set up the sigreturn trampoline: li r0,sigret; sc */ unsafe_put_user(PPC_INST_ADDI + __NR_sigreturn, &mctx->mc_pad[0], failed); unsafe_put_user(PPC_INST_SC, &mctx->mc_pad[1], failed); + asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0])); } - user_write_access_end(); - - if (tramp == (unsigned long)mctx->mc_pad) - flush_icache_range(tramp, tramp + 2 * sizeof(unsigned long)); + user_access_end(); regs->link = tramp; @@ -935,7 +931,7 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, return 0; failed: - user_write_access_end(); + user_access_end(); badframe: signal_fault(tsk, regs, "handle_signal32", frame); From 791f9e36599d94af5a76d3f74d04e16326761aae Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 31 Mar 2021 09:12:19 +0000 Subject: [PATCH 038/180] powerpc/vdso: Make sure vdso_wrapper.o is rebuilt everytime vdso.so is rebuilt Commit bce74491c300 ("powerpc/vdso: fix unnecessary rebuilds of vgettimeofday.o") moved vdso32_wrapper.o and vdso64_wrapper.o out of arch/powerpc/kernel/vdso[32/64]/ and removed the dependencies in the Makefile. This leads to the wrappers not being re-build hence the kernel embedding the old vdso library. Add back missing dependencies to ensure vdso32_wrapper.o and vdso64_wrapper.o are rebuilt when vdso32.so.dbg and vdso64.so.dbg are changed. Fixes: bce74491c300 ("powerpc/vdso: fix unnecessary rebuilds of vgettimeofday.o") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8bb015bc98c51d8ced581415b7e3d157e18da7c9.1617181918.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 6084fa499aa3..f66b63e81c3b 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -191,3 +191,7 @@ $(obj)/prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o FORCE targets += prom_init_check clean-files := vmlinux.lds + +# Force dependency (incbin is bad) +$(obj)/vdso32_wrapper.o : $(obj)/vdso32/vdso32.so.dbg +$(obj)/vdso64_wrapper.o : $(obj)/vdso64/vdso64.so.dbg From 185f2e5f51c2029efd9dd26cceb968a44fe053c6 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 1 Apr 2021 09:51:10 -0700 Subject: [PATCH 039/180] arm64: fix inline asm in load_unaligned_zeropad() The inline asm's addr operand is marked as input-only, however in the case where an exception is taken it may be modified by the BIC instruction on the exception path. Fix the problem by using a temporary register as the destination register for the BIC instruction. Signed-off-by: Peter Collingbourne Cc: stable@vger.kernel.org Link: https://linux-review.googlesource.com/id/I84538c8a2307d567b4f45bb20b715451005f9617 Link: https://lore.kernel.org/r/20210401165110.3952103-1-pcc@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/word-at-a-time.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h index 3333950b5909..ea487218db79 100644 --- a/arch/arm64/include/asm/word-at-a-time.h +++ b/arch/arm64/include/asm/word-at-a-time.h @@ -53,7 +53,7 @@ static inline unsigned long find_zero(unsigned long mask) */ static inline unsigned long load_unaligned_zeropad(const void *addr) { - unsigned long ret, offset; + unsigned long ret, tmp; /* Load word from unaligned pointer addr */ asm( @@ -61,9 +61,9 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) "2:\n" " .pushsection .fixup,\"ax\"\n" " .align 2\n" - "3: and %1, %2, #0x7\n" - " bic %2, %2, #0x7\n" - " ldr %0, [%2]\n" + "3: bic %1, %2, #0x7\n" + " ldr %0, [%1]\n" + " and %1, %2, #0x7\n" " lsl %1, %1, #0x3\n" #ifndef __AARCH64EB__ " lsr %0, %0, %1\n" @@ -73,7 +73,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) " b 2b\n" " .popsection\n" _ASM_EXTABLE(1b, 3b) - : "=&r" (ret), "=&r" (offset) + : "=&r" (ret), "=&r" (tmp) : "r" (addr), "Q" (*(unsigned long *)addr)); return ret; From 5482a9a1a8fd23fbb57afc6d409e12713aa93fa5 Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Tue, 30 Mar 2021 08:19:56 +0100 Subject: [PATCH 040/180] scsi: hpsa: Use __packed on individual structs, not header-wide The hpsa driver uses data structures which contain a combination of driver internals and commands sent directly to the hardware. To manage alignment for the hardware portions the driver used #pragma pack(1). Commit f749d8b7a989 ("scsi: hpsa: Correct dev cmds outstanding for retried cmds") switched an existing variable from int to bool. Due to the pragma an atomic_t in the same data structure ended up being misaligned and broke boot on ia64. Add __packed to every struct and union in the header file. Subsequent commits will address the actual atomic_t misalignment regression. The commit is a no-op at least on ia64: $ diff -u <(objdump -d -r old.o) <(objdump -d -r new.o) Link: https://lore.kernel.org/r/20210330071958.3788214-1-slyfox@gentoo.org Fixes: f749d8b7a989 ("scsi: hpsa: Correct dev cmds outstanding for retried cmds") CC: linux-ia64@vger.kernel.org CC: storagedev@microchip.com CC: linux-scsi@vger.kernel.org CC: Joe Szczypek CC: Scott Benesh CC: Scott Teel CC: Tomas Henzl CC: "Martin K. Petersen" CC: Don Brace Reported-by: John Paul Adrian Glaubitz Suggested-by: Don Brace Reviewed-by: Arnd Bergmann Signed-off-by: Sergei Trofimovich Signed-off-by: Martin K. Petersen --- drivers/scsi/hpsa_cmd.h | 68 ++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h index d126bb877250..280e933d27e7 100644 --- a/drivers/scsi/hpsa_cmd.h +++ b/drivers/scsi/hpsa_cmd.h @@ -20,6 +20,8 @@ #ifndef HPSA_CMD_H #define HPSA_CMD_H +#include + /* general boundary defintions */ #define SENSEINFOBYTES 32 /* may vary between hbas */ #define SG_ENTRIES_IN_CMD 32 /* Max SG entries excluding chain blocks */ @@ -200,12 +202,10 @@ union u64bit { MAX_EXT_TARGETS + 1) /* + 1 is for the controller itself */ /* SCSI-3 Commands */ -#pragma pack(1) - #define HPSA_INQUIRY 0x12 struct InquiryData { u8 data_byte[36]; -}; +} __packed; #define HPSA_REPORT_LOG 0xc2 /* Report Logical LUNs */ #define HPSA_REPORT_PHYS 0xc3 /* Report Physical LUNs */ @@ -221,7 +221,7 @@ struct raid_map_disk_data { u8 xor_mult[2]; /**< XOR multipliers for this position, * valid for data disks only */ u8 reserved[2]; -}; +} __packed; struct raid_map_data { __le32 structure_size; /* Size of entire structure in bytes */ @@ -247,14 +247,14 @@ struct raid_map_data { __le16 dekindex; /* Data encryption key index. */ u8 reserved[16]; struct raid_map_disk_data data[RAID_MAP_MAX_ENTRIES]; -}; +} __packed; struct ReportLUNdata { u8 LUNListLength[4]; u8 extended_response_flag; u8 reserved[3]; u8 LUN[HPSA_MAX_LUN][8]; -}; +} __packed; struct ext_report_lun_entry { u8 lunid[8]; @@ -269,20 +269,20 @@ struct ext_report_lun_entry { u8 lun_count; /* multi-lun device, how many luns */ u8 redundant_paths; u32 ioaccel_handle; /* ioaccel1 only uses lower 16 bits */ -}; +} __packed; struct ReportExtendedLUNdata { u8 LUNListLength[4]; u8 extended_response_flag; u8 reserved[3]; struct ext_report_lun_entry LUN[HPSA_MAX_PHYS_LUN]; -}; +} __packed; struct SenseSubsystem_info { u8 reserved[36]; u8 portname[8]; u8 reserved1[1108]; -}; +} __packed; /* BMIC commands */ #define BMIC_READ 0x26 @@ -317,7 +317,7 @@ union SCSI3Addr { u8 Targ:6; u8 Mode:2; /* b10 */ } LogUnit; -}; +} __packed; struct PhysDevAddr { u32 TargetId:24; @@ -325,20 +325,20 @@ struct PhysDevAddr { u32 Mode:2; /* 2 level target device addr */ union SCSI3Addr Target[2]; -}; +} __packed; struct LogDevAddr { u32 VolId:30; u32 Mode:2; u8 reserved[4]; -}; +} __packed; union LUNAddr { u8 LunAddrBytes[8]; union SCSI3Addr SCSI3Lun[4]; struct PhysDevAddr PhysDev; struct LogDevAddr LogDev; -}; +} __packed; struct CommandListHeader { u8 ReplyQueue; @@ -346,7 +346,7 @@ struct CommandListHeader { __le16 SGTotal; __le64 tag; union LUNAddr LUN; -}; +} __packed; struct RequestBlock { u8 CDBLen; @@ -365,18 +365,18 @@ struct RequestBlock { #define GET_DIR(tad) (((tad) >> 6) & 0x03) u16 Timeout; u8 CDB[16]; -}; +} __packed; struct ErrDescriptor { __le64 Addr; __le32 Len; -}; +} __packed; struct SGDescriptor { __le64 Addr; __le32 Len; __le32 Ext; -}; +} __packed; union MoreErrInfo { struct { @@ -390,7 +390,8 @@ union MoreErrInfo { u8 offense_num; /* byte # of offense 0-base */ u32 offense_value; } Invalid_Cmd; -}; +} __packed; + struct ErrorInfo { u8 ScsiStatus; u8 SenseLen; @@ -398,7 +399,7 @@ struct ErrorInfo { u32 ResidualCnt; union MoreErrInfo MoreErrInfo; u8 SenseInfo[SENSEINFOBYTES]; -}; +} __packed; /* Command types */ #define CMD_IOCTL_PEND 0x01 #define CMD_SCSI 0x03 @@ -451,7 +452,7 @@ struct CommandList { bool retry_pending; struct hpsa_scsi_dev_t *device; atomic_t refcount; /* Must be last to avoid memset in hpsa_cmd_init() */ -} __aligned(COMMANDLIST_ALIGNMENT); +} __packed __aligned(COMMANDLIST_ALIGNMENT); /* Max S/G elements in I/O accelerator command */ #define IOACCEL1_MAXSGENTRIES 24 @@ -489,7 +490,7 @@ struct io_accel1_cmd { __le64 host_addr; /* 0x70 - 0x77 */ u8 CISS_LUN[8]; /* 0x78 - 0x7F */ struct SGDescriptor SG[IOACCEL1_MAXSGENTRIES]; -} __aligned(IOACCEL1_COMMANDLIST_ALIGNMENT); +} __packed __aligned(IOACCEL1_COMMANDLIST_ALIGNMENT); #define IOACCEL1_FUNCTION_SCSIIO 0x00 #define IOACCEL1_SGLOFFSET 32 @@ -519,7 +520,7 @@ struct ioaccel2_sg_element { u8 chain_indicator; #define IOACCEL2_CHAIN 0x80 #define IOACCEL2_LAST_SG 0x40 -}; +} __packed; /* * SCSI Response Format structure for IO Accelerator Mode 2 @@ -559,7 +560,7 @@ struct io_accel2_scsi_response { u8 sense_data_len; /* sense/response data length */ u8 resid_cnt[4]; /* residual count */ u8 sense_data_buff[32]; /* sense/response data buffer */ -}; +} __packed; /* * Structure for I/O accelerator (mode 2 or m2) commands. @@ -592,7 +593,7 @@ struct io_accel2_cmd { __le32 tweak_upper; /* Encryption tweak, upper 4 bytes */ struct ioaccel2_sg_element sg[IOACCEL2_MAXSGENTRIES]; struct io_accel2_scsi_response error_data; -} __aligned(IOACCEL2_COMMANDLIST_ALIGNMENT); +} __packed __aligned(IOACCEL2_COMMANDLIST_ALIGNMENT); /* * defines for Mode 2 command struct @@ -618,7 +619,7 @@ struct hpsa_tmf_struct { __le64 abort_tag; /* cciss tag of SCSI cmd or TMF to abort */ __le64 error_ptr; /* Error Pointer */ __le32 error_len; /* Error Length */ -} __aligned(IOACCEL2_COMMANDLIST_ALIGNMENT); +} __packed __aligned(IOACCEL2_COMMANDLIST_ALIGNMENT); /* Configuration Table Structure */ struct HostWrite { @@ -626,7 +627,7 @@ struct HostWrite { __le32 command_pool_addr_hi; __le32 CoalIntDelay; __le32 CoalIntCount; -}; +} __packed; #define SIMPLE_MODE 0x02 #define PERFORMANT_MODE 0x04 @@ -675,7 +676,7 @@ struct CfgTable { #define HPSA_EVENT_NOTIFY_ACCEL_IO_PATH_STATE_CHANGE (1 << 30) #define HPSA_EVENT_NOTIFY_ACCEL_IO_PATH_CONFIG_CHANGE (1 << 31) __le32 clear_event_notify; -}; +} __packed; #define NUM_BLOCKFETCH_ENTRIES 8 struct TransTable_struct { @@ -686,14 +687,14 @@ struct TransTable_struct { __le32 RepQCtrAddrHigh32; #define MAX_REPLY_QUEUES 64 struct vals32 RepQAddr[MAX_REPLY_QUEUES]; -}; +} __packed; struct hpsa_pci_info { unsigned char bus; unsigned char dev_fn; unsigned short domain; u32 board_id; -}; +} __packed; struct bmic_identify_controller { u8 configured_logical_drive_count; /* offset 0 */ @@ -702,7 +703,7 @@ struct bmic_identify_controller { u8 pad2[136]; u8 controller_mode; /* offset 292 */ u8 pad3[32]; -}; +} __packed; struct bmic_identify_physical_device { @@ -845,7 +846,7 @@ struct bmic_identify_physical_device { u8 max_link_rate[256]; u8 neg_phys_link_rate[256]; u8 box_conn_name[8]; -} __attribute((aligned(512))); +} __packed __attribute((aligned(512))); struct bmic_sense_subsystem_info { u8 primary_slot_number; @@ -858,7 +859,7 @@ struct bmic_sense_subsystem_info { u8 secondary_array_serial_number[32]; u8 secondary_cache_serial_number[32]; u8 pad[332]; -}; +} __packed; struct bmic_sense_storage_box_params { u8 reserved[36]; @@ -870,7 +871,6 @@ struct bmic_sense_storage_box_params { u8 reserver_3[84]; u8 phys_connector[2]; u8 reserved_4[296]; -}; +} __packed; -#pragma pack() #endif /* HPSA_CMD_H */ From 02ec144292bc424a5800d45d4cb472c66e97c520 Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Tue, 30 Mar 2021 08:19:57 +0100 Subject: [PATCH 041/180] scsi: hpsa: Fix boot on ia64 (atomic_t alignment) Boot failure was observed on an HP rx3600 ia64 machine with RAID bus controller: Hewlett-Packard Company Smart Array P600: kernel unaligned access to 0xe000000105dd8b95, ip=0xa000000100b87551 kernel unaligned access to 0xe000000105dd8e95, ip=0xa000000100b87551 hpsa 0000:14:01.0: Controller reports max supported commands of 0 Using 16 instead. Ensure that firmware is up to date. swapper/0[1]: error during unaligned kernel access The unaligned access comes from 'struct CommandList' that happens to be packed. Commit f749d8b7a989 ("scsi: hpsa: Correct dev cmds outstanding for retried cmds") introduced unexpected padding and unaligned atomic_t from natural alignment to something else. This change removes packing annotation from a struct not intended to be sent to controller as is. This restores natural `atomic_t` alignment. The change was tested on the same rx3600 machine. Link: https://lore.kernel.org/r/20210330071958.3788214-2-slyfox@gentoo.org Fixes: f749d8b7a989 ("scsi: hpsa: Correct dev cmds outstanding for retried cmds") CC: linux-ia64@vger.kernel.org CC: linux-kernel@vger.kernel.org CC: storagedev@microchip.com CC: linux-scsi@vger.kernel.org CC: Joe Szczypek CC: Scott Benesh CC: Scott Teel CC: Tomas Henzl CC: "Martin K. Petersen" CC: Don Brace Reported-by: John Paul Adrian Glaubitz Suggested-by: Don Brace Reviewed-by: Arnd Bergmann Signed-off-by: Sergei Trofimovich Signed-off-by: Martin K. Petersen --- drivers/scsi/hpsa_cmd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h index 280e933d27e7..885b1f1fb20a 100644 --- a/drivers/scsi/hpsa_cmd.h +++ b/drivers/scsi/hpsa_cmd.h @@ -452,7 +452,7 @@ struct CommandList { bool retry_pending; struct hpsa_scsi_dev_t *device; atomic_t refcount; /* Must be last to avoid memset in hpsa_cmd_init() */ -} __packed __aligned(COMMANDLIST_ALIGNMENT); +} __aligned(COMMANDLIST_ALIGNMENT); /* Max S/G elements in I/O accelerator command */ #define IOACCEL1_MAXSGENTRIES 24 From e01a00ff62adca8ec464f3c8d82cfa0e8d8728dd Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Tue, 30 Mar 2021 08:19:58 +0100 Subject: [PATCH 042/180] scsi: hpsa: Add an assert to prevent __packed reintroduction Link: https://lore.kernel.org/r/20210330071958.3788214-3-slyfox@gentoo.org Fixes: f749d8b7a989 ("scsi: hpsa: Correct dev cmds outstanding for retried cmds") CC: linux-ia64@vger.kernel.org CC: storagedev@microchip.com CC: linux-scsi@vger.kernel.org CC: Joe Szczypek CC: Scott Benesh CC: Scott Teel CC: Tomas Henzl CC: "Martin K. Petersen" CC: Don Brace Reported-by: John Paul Adrian Glaubitz Suggested-by: Don Brace Reviewed-by: Arnd Bergmann Signed-off-by: Sergei Trofimovich Signed-off-by: Martin K. Petersen --- drivers/scsi/hpsa_cmd.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h index 885b1f1fb20a..ba6a3aa8d954 100644 --- a/drivers/scsi/hpsa_cmd.h +++ b/drivers/scsi/hpsa_cmd.h @@ -22,6 +22,9 @@ #include +#include /* static_assert */ +#include /* offsetof */ + /* general boundary defintions */ #define SENSEINFOBYTES 32 /* may vary between hbas */ #define SG_ENTRIES_IN_CMD 32 /* Max SG entries excluding chain blocks */ @@ -454,6 +457,15 @@ struct CommandList { atomic_t refcount; /* Must be last to avoid memset in hpsa_cmd_init() */ } __aligned(COMMANDLIST_ALIGNMENT); +/* + * Make sure our embedded atomic variable is aligned. Otherwise we break atomic + * operations on architectures that don't support unaligned atomics like IA64. + * + * The assert guards against reintroductin against unwanted __packed to + * the struct CommandList. + */ +static_assert(offsetof(struct CommandList, refcount) % __alignof__(atomic_t) == 0); + /* Max S/G elements in I/O accelerator command */ #define IOACCEL1_MAXSGENTRIES 24 #define IOACCEL2_MAXSGENTRIES 28 From 1235fc569e0bf541ddda0a1224d4c6fa6d914890 Mon Sep 17 00:00:00 2001 From: Can Guo Date: Thu, 1 Apr 2021 00:39:08 -0700 Subject: [PATCH 043/180] scsi: ufs: core: Fix task management request completion timeout ufshcd_tmc_handler() calls blk_mq_tagset_busy_iter(fn = ufshcd_compl_tm()), but since blk_mq_tagset_busy_iter() only iterates over all reserved tags and requests which are not in IDLE state, ufshcd_compl_tm() never gets a chance to run. Thus, TMR always ends up with completion timeout. Fix it by calling blk_mq_start_request() in __ufshcd_issue_tm_cmd(). Link: https://lore.kernel.org/r/1617262750-4864-2-git-send-email-cang@codeaurora.org Fixes: 69a6c269c097 ("scsi: ufs: Use blk_{get,put}_request() to allocate and free TMFs") Reviewed-by: Bart Van Assche Signed-off-by: Can Guo Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index c86760788c72..564918ffc026 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -6404,6 +6404,7 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba, spin_lock_irqsave(host->host_lock, flags); task_tag = hba->nutrs + free_slot; + blk_mq_start_request(req); treq->req_header.dword_0 |= cpu_to_be32(task_tag); From 4b42d557a8add52b9a9924fb31e40a218aab7801 Mon Sep 17 00:00:00 2001 From: Can Guo Date: Thu, 1 Apr 2021 00:39:09 -0700 Subject: [PATCH 044/180] scsi: ufs: core: Fix wrong Task Tag used in task management request UPIUs In __ufshcd_issue_tm_cmd(), it is not correct to use hba->nutrs + req->tag as the Task Tag in a TMR UPIU. Directly use req->tag as the Task Tag. Fixes: e293313262d3 ("scsi: ufs: Fix broken task management command implementation") Link: https://lore.kernel.org/r/1617262750-4864-3-git-send-email-cang@codeaurora.org Reviewed-by: Bart Van Assche Signed-off-by: Can Guo Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 564918ffc026..d3d05e997c13 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -6386,38 +6386,34 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba, DECLARE_COMPLETION_ONSTACK(wait); struct request *req; unsigned long flags; - int free_slot, task_tag, err; + int task_tag, err; /* - * Get free slot, sleep if slots are unavailable. - * Even though we use wait_event() which sleeps indefinitely, - * the maximum wait time is bounded by %TM_CMD_TIMEOUT. + * blk_get_request() is used here only to get a free tag. */ req = blk_get_request(q, REQ_OP_DRV_OUT, 0); if (IS_ERR(req)) return PTR_ERR(req); req->end_io_data = &wait; - free_slot = req->tag; - WARN_ON_ONCE(free_slot < 0 || free_slot >= hba->nutmrs); ufshcd_hold(hba, false); spin_lock_irqsave(host->host_lock, flags); - task_tag = hba->nutrs + free_slot; blk_mq_start_request(req); + task_tag = req->tag; treq->req_header.dword_0 |= cpu_to_be32(task_tag); - memcpy(hba->utmrdl_base_addr + free_slot, treq, sizeof(*treq)); - ufshcd_vops_setup_task_mgmt(hba, free_slot, tm_function); + memcpy(hba->utmrdl_base_addr + task_tag, treq, sizeof(*treq)); + ufshcd_vops_setup_task_mgmt(hba, task_tag, tm_function); /* send command to the controller */ - __set_bit(free_slot, &hba->outstanding_tasks); + __set_bit(task_tag, &hba->outstanding_tasks); /* Make sure descriptors are ready before ringing the task doorbell */ wmb(); - ufshcd_writel(hba, 1 << free_slot, REG_UTP_TASK_REQ_DOOR_BELL); + ufshcd_writel(hba, 1 << task_tag, REG_UTP_TASK_REQ_DOOR_BELL); /* Make sure that doorbell is committed immediately */ wmb(); @@ -6437,24 +6433,24 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba, ufshcd_add_tm_upiu_trace(hba, task_tag, UFS_TM_ERR); dev_err(hba->dev, "%s: task management cmd 0x%.2x timed-out\n", __func__, tm_function); - if (ufshcd_clear_tm_cmd(hba, free_slot)) - dev_WARN(hba->dev, "%s: unable clear tm cmd (slot %d) after timeout\n", - __func__, free_slot); + if (ufshcd_clear_tm_cmd(hba, task_tag)) + dev_WARN(hba->dev, "%s: unable to clear tm cmd (slot %d) after timeout\n", + __func__, task_tag); err = -ETIMEDOUT; } else { err = 0; - memcpy(treq, hba->utmrdl_base_addr + free_slot, sizeof(*treq)); + memcpy(treq, hba->utmrdl_base_addr + task_tag, sizeof(*treq)); ufshcd_add_tm_upiu_trace(hba, task_tag, UFS_TM_COMP); } spin_lock_irqsave(hba->host->host_lock, flags); - __clear_bit(free_slot, &hba->outstanding_tasks); + __clear_bit(task_tag, &hba->outstanding_tasks); spin_unlock_irqrestore(hba->host->host_lock, flags); + ufshcd_release(hba); blk_put_request(req); - ufshcd_release(hba); return err; } From 8d3c0c01cb2e36b2bf3c06a82b18b228d0c8f5d0 Mon Sep 17 00:00:00 2001 From: Lukasz Bartosik Date: Fri, 2 Apr 2021 00:51:48 +0200 Subject: [PATCH 045/180] clk: fix invalid usage of list cursor in register Fix invalid usage of a list_for_each_entry cursor in clk_notifier_register(). When list is empty or if the list is completely traversed (without breaking from the loop on one of the entries) then the list cursor does not point to a valid entry and therefore should not be used. The issue was dicovered when running 5.12-rc1 kernel on x86_64 with KASAN enabled: BUG: KASAN: global-out-of-bounds in clk_notifier_register+0xab/0x230 Read of size 8 at addr ffffffffa0d10588 by task swapper/0/1 CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.12.0-rc1 #1 Hardware name: Google Caroline/Caroline, BIOS Google_Caroline.7820.430.0 07/20/2018 Call Trace: dump_stack+0xee/0x15c print_address_description+0x1e/0x2dc kasan_report+0x188/0x1ce ? clk_notifier_register+0xab/0x230 ? clk_prepare_lock+0x15/0x7b ? clk_notifier_register+0xab/0x230 clk_notifier_register+0xab/0x230 dw8250_probe+0xc01/0x10d4 ... Memory state around the buggy address: ffffffffa0d10480: 00 00 00 00 00 03 f9 f9 f9 f9 f9 f9 00 00 00 00 ffffffffa0d10500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 f9 f9 >ffffffffa0d10580: f9 f9 f9 f9 00 00 00 00 00 00 00 00 00 00 00 00 ^ ffffffffa0d10600: 00 00 00 00 00 00 f9 f9 f9 f9 f9 f9 00 00 00 00 ffffffffa0d10680: 00 00 00 00 00 00 00 00 f9 f9 f9 f9 00 00 00 00 ================================================================== Fixes: b2476490ef11 ("clk: introduce the common clock framework") Reported-by: Lukasz Majczak Signed-off-by: Lukasz Bartosik Link: https://lore.kernel.org/r/20210401225149.18826-1-lb@semihalf.com Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 5052541a0986..16634d5912be 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -4357,20 +4357,19 @@ int clk_notifier_register(struct clk *clk, struct notifier_block *nb) /* search the list of notifiers for this clk */ list_for_each_entry(cn, &clk_notifier_list, node) if (cn->clk == clk) - break; + goto found; /* if clk wasn't in the notifier list, allocate new clk_notifier */ - if (cn->clk != clk) { - cn = kzalloc(sizeof(*cn), GFP_KERNEL); - if (!cn) - goto out; + cn = kzalloc(sizeof(*cn), GFP_KERNEL); + if (!cn) + goto out; - cn->clk = clk; - srcu_init_notifier_head(&cn->notifier_head); + cn->clk = clk; + srcu_init_notifier_head(&cn->notifier_head); - list_add(&cn->node, &clk_notifier_list); - } + list_add(&cn->node, &clk_notifier_list); +found: ret = srcu_notifier_chain_register(&cn->notifier_head, nb); clk->core->notifier_count++; From 7045465500e465b09f09d6e5bdc260a9f1aab97b Mon Sep 17 00:00:00 2001 From: Lukasz Bartosik Date: Fri, 2 Apr 2021 00:51:49 +0200 Subject: [PATCH 046/180] clk: fix invalid usage of list cursor in unregister Fix invalid usage of a list_for_each_entry cursor in clk_notifier_unregister(). When list is empty or if the list is completely traversed (without breaking from the loop on one of the entries) then the list cursor does not point to a valid entry and therefore should not be used. The patch fixes a logical bug that hasn't been seen in pratice however it is analogus to the bug fixed in clk_notifier_register(). The issue was dicovered when running 5.12-rc1 kernel on x86_64 with KASAN enabled: BUG: KASAN: global-out-of-bounds in clk_notifier_register+0xab/0x230 Read of size 8 at addr ffffffffa0d10588 by task swapper/0/1 CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.12.0-rc1 #1 Hardware name: Google Caroline/Caroline, BIOS Google_Caroline.7820.430.0 07/20/2018 Call Trace: dump_stack+0xee/0x15c print_address_description+0x1e/0x2dc kasan_report+0x188/0x1ce ? clk_notifier_register+0xab/0x230 ? clk_prepare_lock+0x15/0x7b ? clk_notifier_register+0xab/0x230 clk_notifier_register+0xab/0x230 dw8250_probe+0xc01/0x10d4 ... Memory state around the buggy address: ffffffffa0d10480: 00 00 00 00 00 03 f9 f9 f9 f9 f9 f9 00 00 00 00 ffffffffa0d10500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 f9 f9 >ffffffffa0d10580: f9 f9 f9 f9 00 00 00 00 00 00 00 00 00 00 00 00 ^ ffffffffa0d10600: 00 00 00 00 00 00 f9 f9 f9 f9 f9 f9 00 00 00 00 ffffffffa0d10680: 00 00 00 00 00 00 00 00 f9 f9 f9 f9 00 00 00 00 ================================================================== Fixes: b2476490ef11 ("clk: introduce the common clock framework") Reported-by: Lukasz Majczak Signed-off-by: Lukasz Bartosik Link: https://lore.kernel.org/r/20210401225149.18826-2-lb@semihalf.com Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 16634d5912be..39cfc6c6a8d2 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -4394,32 +4394,28 @@ EXPORT_SYMBOL_GPL(clk_notifier_register); */ int clk_notifier_unregister(struct clk *clk, struct notifier_block *nb) { - struct clk_notifier *cn = NULL; - int ret = -EINVAL; + struct clk_notifier *cn; + int ret = -ENOENT; if (!clk || !nb) return -EINVAL; clk_prepare_lock(); - list_for_each_entry(cn, &clk_notifier_list, node) - if (cn->clk == clk) + list_for_each_entry(cn, &clk_notifier_list, node) { + if (cn->clk == clk) { + ret = srcu_notifier_chain_unregister(&cn->notifier_head, nb); + + clk->core->notifier_count--; + + /* XXX the notifier code should handle this better */ + if (!cn->notifier_head.head) { + srcu_cleanup_notifier_head(&cn->notifier_head); + list_del(&cn->node); + kfree(cn); + } break; - - if (cn->clk == clk) { - ret = srcu_notifier_chain_unregister(&cn->notifier_head, nb); - - clk->core->notifier_count--; - - /* XXX the notifier code should handle this better */ - if (!cn->notifier_head.head) { - srcu_cleanup_notifier_head(&cn->notifier_head); - list_del(&cn->node); - kfree(cn); } - - } else { - ret = -ENOENT; } clk_prepare_unlock(); From 4e9c93af7279b059faf5bb1897ee90512b258a12 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 29 Mar 2021 19:36:48 -0600 Subject: [PATCH 047/180] usbip: add sysfs_lock to synchronize sysfs code paths Fuzzing uncovered race condition between sysfs code paths in usbip drivers. Device connect/disconnect code paths initiated through sysfs interface are prone to races if disconnect happens during connect and vice versa. This problem is common to all drivers while it can be reproduced easily in vhci_hcd. Add a sysfs_lock to usbip_device struct to protect the paths. Use this in vhci_hcd to protect sysfs paths. For a complete fix, usip_host and usip-vudc drivers and the event handler will have to use this lock to protect the paths. These changes will be done in subsequent patches. Cc: stable@vger.kernel.org Reported-and-tested-by: syzbot+a93fba6d384346a761e3@syzkaller.appspotmail.com Signed-off-by: Shuah Khan Link: https://lore.kernel.org/r/b6568f7beae702bbc236a545d3c020106ca75eac.1616807117.git.skhan@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/usbip_common.h | 3 +++ drivers/usb/usbip/vhci_hcd.c | 1 + drivers/usb/usbip/vhci_sysfs.c | 30 +++++++++++++++++++++++++----- 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/usb/usbip/usbip_common.h b/drivers/usb/usbip/usbip_common.h index d60ce17d3dd2..ea2a20e6d27d 100644 --- a/drivers/usb/usbip/usbip_common.h +++ b/drivers/usb/usbip/usbip_common.h @@ -263,6 +263,9 @@ struct usbip_device { /* lock for status */ spinlock_t lock; + /* mutex for synchronizing sysfs store paths */ + struct mutex sysfs_lock; + int sockfd; struct socket *tcp_socket; diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index a20a8380ca0c..4ba6bcdaa8e9 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -1101,6 +1101,7 @@ static void vhci_device_init(struct vhci_device *vdev) vdev->ud.side = USBIP_VHCI; vdev->ud.status = VDEV_ST_NULL; spin_lock_init(&vdev->ud.lock); + mutex_init(&vdev->ud.sysfs_lock); INIT_LIST_HEAD(&vdev->priv_rx); INIT_LIST_HEAD(&vdev->priv_tx); diff --git a/drivers/usb/usbip/vhci_sysfs.c b/drivers/usb/usbip/vhci_sysfs.c index c4b4256e5dad..e2847cd3e6e3 100644 --- a/drivers/usb/usbip/vhci_sysfs.c +++ b/drivers/usb/usbip/vhci_sysfs.c @@ -185,6 +185,8 @@ static int vhci_port_disconnect(struct vhci_hcd *vhci_hcd, __u32 rhport) usbip_dbg_vhci_sysfs("enter\n"); + mutex_lock(&vdev->ud.sysfs_lock); + /* lock */ spin_lock_irqsave(&vhci->lock, flags); spin_lock(&vdev->ud.lock); @@ -195,6 +197,7 @@ static int vhci_port_disconnect(struct vhci_hcd *vhci_hcd, __u32 rhport) /* unlock */ spin_unlock(&vdev->ud.lock); spin_unlock_irqrestore(&vhci->lock, flags); + mutex_unlock(&vdev->ud.sysfs_lock); return -EINVAL; } @@ -205,6 +208,8 @@ static int vhci_port_disconnect(struct vhci_hcd *vhci_hcd, __u32 rhport) usbip_event_add(&vdev->ud, VDEV_EVENT_DOWN); + mutex_unlock(&vdev->ud.sysfs_lock); + return 0; } @@ -349,30 +354,36 @@ static ssize_t attach_store(struct device *dev, struct device_attribute *attr, else vdev = &vhci->vhci_hcd_hs->vdev[rhport]; + mutex_lock(&vdev->ud.sysfs_lock); + /* Extract socket from fd. */ socket = sockfd_lookup(sockfd, &err); if (!socket) { dev_err(dev, "failed to lookup sock"); - return -EINVAL; + err = -EINVAL; + goto unlock_mutex; } if (socket->type != SOCK_STREAM) { dev_err(dev, "Expecting SOCK_STREAM - found %d", socket->type); sockfd_put(socket); - return -EINVAL; + err = -EINVAL; + goto unlock_mutex; } /* create threads before locking */ tcp_rx = kthread_create(vhci_rx_loop, &vdev->ud, "vhci_rx"); if (IS_ERR(tcp_rx)) { sockfd_put(socket); - return -EINVAL; + err = -EINVAL; + goto unlock_mutex; } tcp_tx = kthread_create(vhci_tx_loop, &vdev->ud, "vhci_tx"); if (IS_ERR(tcp_tx)) { kthread_stop(tcp_rx); sockfd_put(socket); - return -EINVAL; + err = -EINVAL; + goto unlock_mutex; } /* get task structs now */ @@ -397,7 +408,8 @@ static ssize_t attach_store(struct device *dev, struct device_attribute *attr, * Will be retried from userspace * if there's another free port. */ - return -EBUSY; + err = -EBUSY; + goto unlock_mutex; } dev_info(dev, "pdev(%u) rhport(%u) sockfd(%d)\n", @@ -423,7 +435,15 @@ static ssize_t attach_store(struct device *dev, struct device_attribute *attr, rh_port_connect(vdev, speed); + dev_info(dev, "Device attached\n"); + + mutex_unlock(&vdev->ud.sysfs_lock); + return count; + +unlock_mutex: + mutex_unlock(&vdev->ud.sysfs_lock); + return err; } static DEVICE_ATTR_WO(attach); From 9dbf34a834563dada91366c2ac266f32ff34641a Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 29 Mar 2021 19:36:49 -0600 Subject: [PATCH 048/180] usbip: stub-dev synchronize sysfs code paths Fuzzing uncovered race condition between sysfs code paths in usbip drivers. Device connect/disconnect code paths initiated through sysfs interface are prone to races if disconnect happens during connect and vice versa. Use sysfs_lock to protect sysfs paths in stub-dev. Cc: stable@vger.kernel.org Reported-and-tested-by: syzbot+a93fba6d384346a761e3@syzkaller.appspotmail.com Signed-off-by: Shuah Khan Link: https://lore.kernel.org/r/2b182f3561b4a065bf3bf6dce3b0e9944ba17b3f.1616807117.git.skhan@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_dev.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c index 8f1de1fbbeed..d8d3892e5a69 100644 --- a/drivers/usb/usbip/stub_dev.c +++ b/drivers/usb/usbip/stub_dev.c @@ -63,6 +63,7 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a dev_info(dev, "stub up\n"); + mutex_lock(&sdev->ud.sysfs_lock); spin_lock_irq(&sdev->ud.lock); if (sdev->ud.status != SDEV_ST_AVAILABLE) { @@ -87,13 +88,13 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a tcp_rx = kthread_create(stub_rx_loop, &sdev->ud, "stub_rx"); if (IS_ERR(tcp_rx)) { sockfd_put(socket); - return -EINVAL; + goto unlock_mutex; } tcp_tx = kthread_create(stub_tx_loop, &sdev->ud, "stub_tx"); if (IS_ERR(tcp_tx)) { kthread_stop(tcp_rx); sockfd_put(socket); - return -EINVAL; + goto unlock_mutex; } /* get task structs now */ @@ -112,6 +113,8 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a wake_up_process(sdev->ud.tcp_rx); wake_up_process(sdev->ud.tcp_tx); + mutex_unlock(&sdev->ud.sysfs_lock); + } else { dev_info(dev, "stub down\n"); @@ -122,6 +125,7 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a spin_unlock_irq(&sdev->ud.lock); usbip_event_add(&sdev->ud, SDEV_EVENT_DOWN); + mutex_unlock(&sdev->ud.sysfs_lock); } return count; @@ -130,6 +134,8 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a sockfd_put(socket); err: spin_unlock_irq(&sdev->ud.lock); +unlock_mutex: + mutex_unlock(&sdev->ud.sysfs_lock); return -EINVAL; } static DEVICE_ATTR_WO(usbip_sockfd); @@ -270,6 +276,7 @@ static struct stub_device *stub_device_alloc(struct usb_device *udev) sdev->ud.side = USBIP_STUB; sdev->ud.status = SDEV_ST_AVAILABLE; spin_lock_init(&sdev->ud.lock); + mutex_init(&sdev->ud.sysfs_lock); sdev->ud.tcp_socket = NULL; sdev->ud.sockfd = -1; From bd8b82042269a95db48074b8bb400678dbac1815 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 29 Mar 2021 19:36:50 -0600 Subject: [PATCH 049/180] usbip: vudc synchronize sysfs code paths Fuzzing uncovered race condition between sysfs code paths in usbip drivers. Device connect/disconnect code paths initiated through sysfs interface are prone to races if disconnect happens during connect and vice versa. Use sysfs_lock to protect sysfs paths in vudc. Cc: stable@vger.kernel.org Reported-and-tested-by: syzbot+a93fba6d384346a761e3@syzkaller.appspotmail.com Signed-off-by: Shuah Khan Link: https://lore.kernel.org/r/caabcf3fc87bdae970509b5ff32d05bb7ce2fb15.1616807117.git.skhan@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vudc_dev.c | 1 + drivers/usb/usbip/vudc_sysfs.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/drivers/usb/usbip/vudc_dev.c b/drivers/usb/usbip/vudc_dev.c index c8eeabdd9b56..2bc428f2e261 100644 --- a/drivers/usb/usbip/vudc_dev.c +++ b/drivers/usb/usbip/vudc_dev.c @@ -572,6 +572,7 @@ static int init_vudc_hw(struct vudc *udc) init_waitqueue_head(&udc->tx_waitq); spin_lock_init(&ud->lock); + mutex_init(&ud->sysfs_lock); ud->status = SDEV_ST_AVAILABLE; ud->side = USBIP_VUDC; diff --git a/drivers/usb/usbip/vudc_sysfs.c b/drivers/usb/usbip/vudc_sysfs.c index 7383a543c6d1..f7633ee655a1 100644 --- a/drivers/usb/usbip/vudc_sysfs.c +++ b/drivers/usb/usbip/vudc_sysfs.c @@ -112,6 +112,7 @@ static ssize_t usbip_sockfd_store(struct device *dev, dev_err(dev, "no device"); return -ENODEV; } + mutex_lock(&udc->ud.sysfs_lock); spin_lock_irqsave(&udc->lock, flags); /* Don't export what we don't have */ if (!udc->driver || !udc->pullup) { @@ -187,6 +188,8 @@ static ssize_t usbip_sockfd_store(struct device *dev, wake_up_process(udc->ud.tcp_rx); wake_up_process(udc->ud.tcp_tx); + + mutex_unlock(&udc->ud.sysfs_lock); return count; } else { @@ -207,6 +210,7 @@ static ssize_t usbip_sockfd_store(struct device *dev, } spin_unlock_irqrestore(&udc->lock, flags); + mutex_unlock(&udc->ud.sysfs_lock); return count; @@ -216,6 +220,7 @@ static ssize_t usbip_sockfd_store(struct device *dev, spin_unlock_irq(&udc->ud.lock); unlock: spin_unlock_irqrestore(&udc->lock, flags); + mutex_unlock(&udc->ud.sysfs_lock); return ret; } From 363eaa3a450abb4e63bd6e3ad79d1f7a0f717814 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 29 Mar 2021 19:36:51 -0600 Subject: [PATCH 050/180] usbip: synchronize event handler with sysfs code paths Fuzzing uncovered race condition between sysfs code paths in usbip drivers. Device connect/disconnect code paths initiated through sysfs interface are prone to races if disconnect happens during connect and vice versa. Use sysfs_lock to synchronize event handler with sysfs paths in usbip drivers. Cc: stable@vger.kernel.org Reported-and-tested-by: syzbot+a93fba6d384346a761e3@syzkaller.appspotmail.com Signed-off-by: Shuah Khan Link: https://lore.kernel.org/r/c5c8723d3f29dfe3d759cfaafa7dd16b0dfe2918.1616807117.git.skhan@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/usbip_event.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/usbip/usbip_event.c b/drivers/usb/usbip/usbip_event.c index 5d88917c9631..086ca76dd053 100644 --- a/drivers/usb/usbip/usbip_event.c +++ b/drivers/usb/usbip/usbip_event.c @@ -70,6 +70,7 @@ static void event_handler(struct work_struct *work) while ((ud = get_event()) != NULL) { usbip_dbg_eh("pending event %lx\n", ud->event); + mutex_lock(&ud->sysfs_lock); /* * NOTE: shutdown must come first. * Shutdown the device. @@ -90,6 +91,7 @@ static void event_handler(struct work_struct *work) ud->eh_ops.unusable(ud); unset_event(ud, USBIP_EH_UNUSABLE); } + mutex_unlock(&ud->sysfs_lock); wake_up(&ud->eh_waitq); } From eed6e41813deb9ee622cd9242341f21430d7789f Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Thu, 1 Apr 2021 21:03:40 -0700 Subject: [PATCH 051/180] driver core: Fix locking bug in deferred_probe_timeout_work_func() list_for_each_entry_safe() is only useful if we are deleting nodes in a linked list within the loop. It doesn't protect against other threads adding/deleting nodes to the list in parallel. We need to grab deferred_probe_mutex when traversing the deferred_probe_pending_list. Cc: stable@vger.kernel.org Fixes: 25b4e70dcce9 ("driver core: allow stopping deferred probe after init") Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20210402040342.2944858-2-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index e2cf3b29123e..37a5e5f8b221 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -292,14 +292,16 @@ int driver_deferred_probe_check_state(struct device *dev) static void deferred_probe_timeout_work_func(struct work_struct *work) { - struct device_private *private, *p; + struct device_private *p; driver_deferred_probe_timeout = 0; driver_deferred_probe_trigger(); flush_work(&deferred_probe_work); - list_for_each_entry_safe(private, p, &deferred_probe_pending_list, deferred_probe) - dev_info(private->device, "deferred probe pending\n"); + mutex_lock(&deferred_probe_mutex); + list_for_each_entry(p, &deferred_probe_pending_list, deferred_probe) + dev_info(p->device, "deferred probe pending\n"); + mutex_unlock(&deferred_probe_mutex); wake_up_all(&probe_timeout_waitqueue); } static DECLARE_DELAYED_WORK(deferred_probe_timeout_work, deferred_probe_timeout_work_func); From 026334a3bb6a3919b42aba9fc11843db2b77fd41 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 1 Apr 2021 13:36:05 +0300 Subject: [PATCH 052/180] perf inject: Fix repipe usage Since commit 14d3d54052539a1e ("perf session: Try to read pipe data from file") 'perf inject' has started printing "PERFILE2h" when not processing pipes. The commit exposed perf to the possiblity that the input is not a pipe but the 'repipe' parameter gets used. That causes the printing because perf inject sets 'repipe' to true always. The 'repipe' parameter of perf_session__new() is used by 2 functions: - perf_file_header__read_pipe() - trace_report() In both cases, the functions copy data to STDOUT_FILENO when 'repipe' is true. Fix by setting 'repipe' to true only if the output is a pipe. Fixes: e558a5bd8b74aff4 ("perf inject: Work with files") Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Andrew Vagin Link: http://lore.kernel.org/lkml/20210401103605.9000-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 6fe44d97fde5..ddccc0eb7390 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -906,7 +906,7 @@ int cmd_inject(int argc, const char **argv) } data.path = inject.input_name; - inject.session = perf_session__new(&data, true, &inject.tool); + inject.session = perf_session__new(&data, inject.output.is_pipe, &inject.tool); if (IS_ERR(inject.session)) return PTR_ERR(inject.session); From 5e729bc54bda705f64941008b018b4e41a4322bf Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 31 Mar 2021 14:05:10 +0300 Subject: [PATCH 053/180] i2c: designware: Adjust bus_freq_hz when refuse high speed mode set When hardware doesn't support High Speed Mode, we forget bus_freq_hz timing adjustment. This makes the timings and real registers being unsynchronized. Adjust bus_freq_hz when refuse high speed mode set. Fixes: b6e67145f149 ("i2c: designware: Enable high speed mode") Reported-by: "Song Bao Hua (Barry Song)" Signed-off-by: Andy Shevchenko Reviewed-by: Barry Song Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-master.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index dd27b9dbe931..873ef38eb1c8 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -129,6 +129,7 @@ static int i2c_dw_set_timings_master(struct dw_i2c_dev *dev) if ((comp_param1 & DW_IC_COMP_PARAM_1_SPEED_MODE_MASK) != DW_IC_COMP_PARAM_1_SPEED_MODE_HIGH) { dev_err(dev->dev, "High Speed not supported!\n"); + t->bus_freq_hz = I2C_MAX_FAST_MODE_FREQ; dev->master_cfg &= ~DW_IC_CON_SPEED_MASK; dev->master_cfg |= DW_IC_CON_SPEED_FAST; dev->hs_hcnt = 0; From 65df7d1986a1909a0869419919e7d9c78d70407e Mon Sep 17 00:00:00 2001 From: Viswas G Date: Fri, 2 Apr 2021 11:12:12 +0530 Subject: [PATCH 054/180] scsi: pm80xx: Fix chip initialization failure Inbound and outbound queues were not properly configured and that lead to MPI configuration failure. Fixes: 05c6c029a44d ("scsi: pm80xx: Increase number of supported queues") Cc: stable@vger.kernel.org # 5.10+ Link: https://lore.kernel.org/r/20210402054212.17834-1-Viswas.G@microchip.com.com Reported-and-tested-by: Ash Izat Signed-off-by: Viswas G Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm8001_hwi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c index 49bf2f70a470..31e5455d280c 100644 --- a/drivers/scsi/pm8001/pm8001_hwi.c +++ b/drivers/scsi/pm8001/pm8001_hwi.c @@ -223,7 +223,7 @@ static void init_default_table_values(struct pm8001_hba_info *pm8001_ha) PM8001_EVENT_LOG_SIZE; pm8001_ha->main_cfg_tbl.pm8001_tbl.iop_event_log_option = 0x01; pm8001_ha->main_cfg_tbl.pm8001_tbl.fatal_err_interrupt = 0x01; - for (i = 0; i < PM8001_MAX_INB_NUM; i++) { + for (i = 0; i < pm8001_ha->max_q_num; i++) { pm8001_ha->inbnd_q_tbl[i].element_pri_size_cnt = PM8001_MPI_QUEUE | (pm8001_ha->iomb_size << 16) | (0x00<<30); pm8001_ha->inbnd_q_tbl[i].upper_base_addr = @@ -249,7 +249,7 @@ static void init_default_table_values(struct pm8001_hba_info *pm8001_ha) pm8001_ha->inbnd_q_tbl[i].producer_idx = 0; pm8001_ha->inbnd_q_tbl[i].consumer_index = 0; } - for (i = 0; i < PM8001_MAX_OUTB_NUM; i++) { + for (i = 0; i < pm8001_ha->max_q_num; i++) { pm8001_ha->outbnd_q_tbl[i].element_size_cnt = PM8001_MPI_QUEUE | (pm8001_ha->iomb_size << 16) | (0x01<<30); pm8001_ha->outbnd_q_tbl[i].upper_base_addr = @@ -671,9 +671,9 @@ static int pm8001_chip_init(struct pm8001_hba_info *pm8001_ha) read_outbnd_queue_table(pm8001_ha); /* update main config table ,inbound table and outbound table */ update_main_config_table(pm8001_ha); - for (i = 0; i < PM8001_MAX_INB_NUM; i++) + for (i = 0; i < pm8001_ha->max_q_num; i++) update_inbnd_queue_table(pm8001_ha, i); - for (i = 0; i < PM8001_MAX_OUTB_NUM; i++) + for (i = 0; i < pm8001_ha->max_q_num; i++) update_outbnd_queue_table(pm8001_ha, i); /* 8081 controller donot require these operations */ if (deviceid != 0x8081 && deviceid != 0x0042) { From 0352c3d3959a6cf543075b88c7e662fd3546f12e Mon Sep 17 00:00:00 2001 From: Roman Bolshakov Date: Sun, 4 Apr 2021 00:54:15 +0300 Subject: [PATCH 055/180] scsi: target: iscsi: Fix zero tag inside a trace event target_sequencer_start event is triggered inside target_cmd_init_cdb(). se_cmd.tag is not initialized with ITT at the moment so the event always prints zero tag. Link: https://lore.kernel.org/r/20210403215415.95077-1-r.bolshakov@yadro.com Cc: stable@vger.kernel.org # 5.10+ Reviewed-by: Mike Christie Signed-off-by: Roman Bolshakov Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/iscsi_target.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index d0e7ed8f28cc..e5c443bfbdf9 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1166,6 +1166,7 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, target_get_sess_cmd(&cmd->se_cmd, true); + cmd->se_cmd.tag = (__force u32)cmd->init_task_tag; cmd->sense_reason = target_cmd_init_cdb(&cmd->se_cmd, hdr->cdb); if (cmd->sense_reason) { if (cmd->sense_reason == TCM_OUT_OF_RESOURCES) { @@ -1180,8 +1181,6 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (cmd->sense_reason) goto attach_cmd; - /* only used for printks or comparing with ->ref_task_tag */ - cmd->se_cmd.tag = (__force u32)cmd->init_task_tag; cmd->sense_reason = target_cmd_parse_cdb(&cmd->se_cmd); if (cmd->sense_reason) goto attach_cmd; From 5cd0f6f57639c5afbb36100c69281fee82c95ee7 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Thu, 1 Apr 2021 11:11:05 +0200 Subject: [PATCH 056/180] scsi: scsi_transport_srp: Don't block target in SRP_PORT_LOST state rport_dev_loss_timedout() sets the rport state to SRP_PORT_LOST and the SCSI target state to SDEV_TRANSPORT_OFFLINE. If this races with srp_reconnect_work(), a warning is printed: Mar 27 18:48:07 ictm1604s01h4 kernel: dev_loss_tmo expired for SRP port-18:1 / host18. Mar 27 18:48:07 ictm1604s01h4 kernel: ------------[ cut here ]------------ Mar 27 18:48:07 ictm1604s01h4 kernel: scsi_internal_device_block(18:0:0:100) failed: ret = -22 Mar 27 18:48:07 ictm1604s01h4 kernel: Call Trace: Mar 27 18:48:07 ictm1604s01h4 kernel: ? scsi_target_unblock+0x50/0x50 [scsi_mod] Mar 27 18:48:07 ictm1604s01h4 kernel: starget_for_each_device+0x80/0xb0 [scsi_mod] Mar 27 18:48:07 ictm1604s01h4 kernel: target_block+0x24/0x30 [scsi_mod] Mar 27 18:48:07 ictm1604s01h4 kernel: device_for_each_child+0x57/0x90 Mar 27 18:48:07 ictm1604s01h4 kernel: srp_reconnect_rport+0xe4/0x230 [scsi_transport_srp] Mar 27 18:48:07 ictm1604s01h4 kernel: srp_reconnect_work+0x40/0xc0 [scsi_transport_srp] Avoid this by not trying to block targets for rports in SRP_PORT_LOST state. Link: https://lore.kernel.org/r/20210401091105.8046-1-mwilck@suse.com Reviewed-by: Bart Van Assche Signed-off-by: Martin Wilck Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_srp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index 1e939a2a387f..98a34ed10f1a 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -541,7 +541,7 @@ int srp_reconnect_rport(struct srp_rport *rport) res = mutex_lock_interruptible(&rport->mutex); if (res) goto out; - if (rport->state != SRP_RPORT_FAIL_FAST) + if (rport->state != SRP_RPORT_FAIL_FAST && rport->state != SRP_RPORT_LOST) /* * sdev state must be SDEV_TRANSPORT_OFFLINE, transition * to SDEV_BLOCK is illegal. Calling scsi_target_unblock() From 6eff5721933c08c3b76d6126aee24d8f134518ef Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 1 Apr 2021 07:33:14 -0700 Subject: [PATCH 057/180] cxl/mem: Use sysfs_emit() for attribute show routines While none the CXL sysfs attributes are threatening to overrun a PAGE_SIZE of output, it is good form to use the recommended helpers. Fixes: b39cb1052a5c ("cxl/mem: Register CXL memX devices") Reported-by: Jason Gunthorpe Reviewed-by: Ben Widawsky Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/161728759424.2474381.11231441014951343463.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 244cb7d89678..832582033683 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -1066,7 +1066,7 @@ static ssize_t firmware_version_show(struct device *dev, struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_mem *cxlm = cxlmd->cxlm; - return sprintf(buf, "%.16s\n", cxlm->firmware_version); + return sysfs_emit(buf, "%.16s\n", cxlm->firmware_version); } static DEVICE_ATTR_RO(firmware_version); @@ -1076,7 +1076,7 @@ static ssize_t payload_max_show(struct device *dev, struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_mem *cxlm = cxlmd->cxlm; - return sprintf(buf, "%zu\n", cxlm->payload_size); + return sysfs_emit(buf, "%zu\n", cxlm->payload_size); } static DEVICE_ATTR_RO(payload_max); @@ -1087,7 +1087,7 @@ static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr, struct cxl_mem *cxlm = cxlmd->cxlm; unsigned long long len = range_len(&cxlm->ram_range); - return sprintf(buf, "%#llx\n", len); + return sysfs_emit(buf, "%#llx\n", len); } static struct device_attribute dev_attr_ram_size = @@ -1100,7 +1100,7 @@ static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr, struct cxl_mem *cxlm = cxlmd->cxlm; unsigned long long len = range_len(&cxlm->pmem_range); - return sprintf(buf, "%#llx\n", len); + return sysfs_emit(buf, "%#llx\n", len); } static struct device_attribute dev_attr_pmem_size = From 5877515912cc4f0d67071b7cee15076ebef24708 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 1 Apr 2021 07:33:19 -0700 Subject: [PATCH 058/180] cxl/mem: Fix synchronization mechanism for device removal vs ioctl operations The percpu_ref to gate whether cxl_memdev_ioctl() is free to use the driver context (@cxlm) to issue I/O is overkill, implemented incorrectly (missing a device reference before accessing the percpu_ref), and the complexities of shutting down a percpu_ref contributed to a bug in the error unwind in cxl_mem_add_memdev() (missing put_device() to be fixed separately). Use an rwsem to explicitly synchronize the usage of cxlmd->cxlm, and add the missing reference counting for cxlmd in cxl_memdev_open() and cxl_memdev_release_file(). Fixes: b39cb1052a5c ("cxl/mem: Register CXL memX devices") Reported-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/161728759948.2474381.17481500816783671817.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 101 ++++++++++++++++++++++++---------------------- 1 file changed, 52 insertions(+), 49 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 832582033683..438f0861c46c 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -96,21 +96,18 @@ struct mbox_cmd { * @dev: driver core device object * @cdev: char dev core object for ioctl operations * @cxlm: pointer to the parent device driver data - * @ops_active: active user of @cxlm in ops handlers - * @ops_dead: completion when all @cxlm ops users have exited * @id: id number of this memdev instance. */ struct cxl_memdev { struct device dev; struct cdev cdev; struct cxl_mem *cxlm; - struct percpu_ref ops_active; - struct completion ops_dead; int id; }; static int cxl_mem_major; static DEFINE_IDA(cxl_memdev_ida); +static DECLARE_RWSEM(cxl_memdev_rwsem); static struct dentry *cxl_debugfs; static bool cxl_raw_allow_all; @@ -776,26 +773,43 @@ static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd, static long cxl_memdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct cxl_memdev *cxlmd; - struct inode *inode; - int rc = -ENOTTY; + struct cxl_memdev *cxlmd = file->private_data; + int rc = -ENXIO; - inode = file_inode(file); - cxlmd = container_of(inode->i_cdev, typeof(*cxlmd), cdev); - - if (!percpu_ref_tryget_live(&cxlmd->ops_active)) - return -ENXIO; - - rc = __cxl_memdev_ioctl(cxlmd, cmd, arg); - - percpu_ref_put(&cxlmd->ops_active); + down_read(&cxl_memdev_rwsem); + if (cxlmd->cxlm) + rc = __cxl_memdev_ioctl(cxlmd, cmd, arg); + up_read(&cxl_memdev_rwsem); return rc; } +static int cxl_memdev_open(struct inode *inode, struct file *file) +{ + struct cxl_memdev *cxlmd = + container_of(inode->i_cdev, typeof(*cxlmd), cdev); + + get_device(&cxlmd->dev); + file->private_data = cxlmd; + + return 0; +} + +static int cxl_memdev_release_file(struct inode *inode, struct file *file) +{ + struct cxl_memdev *cxlmd = + container_of(inode->i_cdev, typeof(*cxlmd), cdev); + + put_device(&cxlmd->dev); + + return 0; +} + static const struct file_operations cxl_memdev_fops = { .owner = THIS_MODULE, .unlocked_ioctl = cxl_memdev_ioctl, + .open = cxl_memdev_open, + .release = cxl_memdev_release_file, .compat_ioctl = compat_ptr_ioctl, .llseek = noop_llseek, }; @@ -1049,7 +1063,6 @@ static void cxl_memdev_release(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - percpu_ref_exit(&cxlmd->ops_active); ida_free(&cxl_memdev_ida, cxlmd->id); kfree(cxlmd); } @@ -1150,26 +1163,23 @@ static const struct device_type cxl_memdev_type = { .groups = cxl_memdev_attribute_groups, }; -static void cxlmdev_unregister(void *_cxlmd) +static void cxl_memdev_shutdown(struct cxl_memdev *cxlmd) +{ + down_write(&cxl_memdev_rwsem); + cxlmd->cxlm = NULL; + up_write(&cxl_memdev_rwsem); +} + +static void cxl_memdev_unregister(void *_cxlmd) { struct cxl_memdev *cxlmd = _cxlmd; struct device *dev = &cxlmd->dev; - percpu_ref_kill(&cxlmd->ops_active); cdev_device_del(&cxlmd->cdev, dev); - wait_for_completion(&cxlmd->ops_dead); - cxlmd->cxlm = NULL; + cxl_memdev_shutdown(cxlmd); put_device(dev); } -static void cxlmdev_ops_active_release(struct percpu_ref *ref) -{ - struct cxl_memdev *cxlmd = - container_of(ref, typeof(*cxlmd), ops_active); - - complete(&cxlmd->ops_dead); -} - static int cxl_mem_add_memdev(struct cxl_mem *cxlm) { struct pci_dev *pdev = cxlm->pdev; @@ -1181,17 +1191,6 @@ static int cxl_mem_add_memdev(struct cxl_mem *cxlm) cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL); if (!cxlmd) return -ENOMEM; - init_completion(&cxlmd->ops_dead); - - /* - * @cxlm is deallocated when the driver unbinds so operations - * that are using it need to hold a live reference. - */ - cxlmd->cxlm = cxlm; - rc = percpu_ref_init(&cxlmd->ops_active, cxlmdev_ops_active_release, 0, - GFP_KERNEL); - if (rc) - goto err_ref; rc = ida_alloc_range(&cxl_memdev_ida, 0, CXL_MEM_MAX_DEVS, GFP_KERNEL); if (rc < 0) @@ -1209,23 +1208,27 @@ static int cxl_mem_add_memdev(struct cxl_mem *cxlm) cdev = &cxlmd->cdev; cdev_init(cdev, &cxl_memdev_fops); + /* + * Activate ioctl operations, no cxl_memdev_rwsem manipulation + * needed as this is ordered with cdev_add() publishing the device. + */ + cxlmd->cxlm = cxlm; + rc = cdev_device_add(cdev, dev); if (rc) goto err_add; - return devm_add_action_or_reset(dev->parent, cxlmdev_unregister, cxlmd); + return devm_add_action_or_reset(dev->parent, cxl_memdev_unregister, + cxlmd); err_add: + /* + * The cdev was briefly live, shutdown any ioctl operations that + * saw that state. + */ + cxl_memdev_shutdown(cxlmd); ida_free(&cxl_memdev_ida, cxlmd->id); err_id: - /* - * Theoretically userspace could have already entered the fops, - * so flush ops_active. - */ - percpu_ref_kill(&cxlmd->ops_active); - wait_for_completion(&cxlmd->ops_dead); - percpu_ref_exit(&cxlmd->ops_active); -err_ref: kfree(cxlmd); return rc; From 1c3333a28d4532cfc37d4d25bfc76654a0c76643 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 1 Apr 2021 07:33:25 -0700 Subject: [PATCH 059/180] cxl/mem: Do not rely on device_add() side effects for dev_set_name() failures While device_add() will happen to catch dev_set_name() failures it is a broken pattern to follow given that the core may try to fall back to a different name. Add explicit checking for dev_set_name() failures to be cleaned up by put_device(). Skip cdev_device_add() and proceed directly to put_device() if the name set fails. This type of bug is easier to see if 'alloc' is split from 'add' operations that require put_device() on failure. So cxl_memdev_alloc() is split out as a result. Fixes: b39cb1052a5c ("cxl/mem: Register CXL memX devices") Reported-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/161728760514.2474381.1163928273337158134.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 438f0861c46c..da93b633531f 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -1180,7 +1180,7 @@ static void cxl_memdev_unregister(void *_cxlmd) put_device(dev); } -static int cxl_mem_add_memdev(struct cxl_mem *cxlm) +static struct cxl_memdev *cxl_memdev_alloc(struct cxl_mem *cxlm) { struct pci_dev *pdev = cxlm->pdev; struct cxl_memdev *cxlmd; @@ -1190,11 +1190,11 @@ static int cxl_mem_add_memdev(struct cxl_mem *cxlm) cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL); if (!cxlmd) - return -ENOMEM; + return ERR_PTR(-ENOMEM); rc = ida_alloc_range(&cxl_memdev_ida, 0, CXL_MEM_MAX_DEVS, GFP_KERNEL); if (rc < 0) - goto err_id; + goto err; cxlmd->id = rc; dev = &cxlmd->dev; @@ -1203,10 +1203,31 @@ static int cxl_mem_add_memdev(struct cxl_mem *cxlm) dev->bus = &cxl_bus_type; dev->devt = MKDEV(cxl_mem_major, cxlmd->id); dev->type = &cxl_memdev_type; - dev_set_name(dev, "mem%d", cxlmd->id); cdev = &cxlmd->cdev; cdev_init(cdev, &cxl_memdev_fops); + return cxlmd; + +err: + kfree(cxlmd); + return ERR_PTR(rc); +} + +static int cxl_mem_add_memdev(struct cxl_mem *cxlm) +{ + struct cxl_memdev *cxlmd; + struct device *dev; + struct cdev *cdev; + int rc; + + cxlmd = cxl_memdev_alloc(cxlm); + if (IS_ERR(cxlmd)) + return PTR_ERR(cxlmd); + + dev = &cxlmd->dev; + rc = dev_set_name(dev, "mem%d", cxlmd->id); + if (rc) + goto err; /* * Activate ioctl operations, no cxl_memdev_rwsem manipulation @@ -1214,23 +1235,21 @@ static int cxl_mem_add_memdev(struct cxl_mem *cxlm) */ cxlmd->cxlm = cxlm; + cdev = &cxlmd->cdev; rc = cdev_device_add(cdev, dev); if (rc) - goto err_add; + goto err; return devm_add_action_or_reset(dev->parent, cxl_memdev_unregister, cxlmd); -err_add: +err: /* * The cdev was briefly live, shutdown any ioctl operations that * saw that state. */ cxl_memdev_shutdown(cxlmd); - ida_free(&cxl_memdev_ida, cxlmd->id); -err_id: - kfree(cxlmd); - + put_device(dev); return rc; } From 7eda6457a9ca4dc9754e1158c3794e4487ea4392 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 1 Apr 2021 07:33:30 -0700 Subject: [PATCH 060/180] cxl/mem: Disable cxl device power management There is no power management of cxl virtual devices, disable device-power-management and runtime-power-management to prevent userspace from growing expectations of those attributes appearing. They can be added back in the future if needed. Reviewed-by: Ben Widawsky Link: https://lore.kernel.org/r/161728761025.2474381.808344500111924819.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index da93b633531f..52f7da49c560 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -1203,6 +1203,7 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_mem *cxlm) dev->bus = &cxl_bus_type; dev->devt = MKDEV(cxl_mem_major, cxlmd->id); dev->type = &cxl_memdev_type; + device_set_pm_not_required(dev); cdev = &cxlmd->cdev; cdev_init(cdev, &cxl_memdev_fops); From 392be0bda730df3c71241b2a16bbecac78ee627d Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 24 Mar 2021 15:16:35 +0100 Subject: [PATCH 061/180] cxl/mem: Force array size of mem_commands[] to CXL_MEM_COMMAND_ID_MAX Typically the mem_commands[] array is in sync with 'enum { CXL_CMDS }'. Current code works well. However, the array size of mem_commands[] may not strictly be the same as CXL_MEM_COMMAND_ID_MAX. E.g. if a new CXL_CMD() is added that is guarded by #ifdefs, the array could be shorter. This could lead then further to an out-of-bounds array access in cxl_validate_cmd_from_user(). Fix this by forcing the array size to CXL_MEM_COMMAND_ID_MAX. This also adds range checks for array items in mem_commands[] at compile time. Signed-off-by: Robert Richter Link: https://lore.kernel.org/r/20210324141635.22335-1-rrichter@amd.com Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 52f7da49c560..e3003f49b329 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -166,7 +166,7 @@ struct cxl_mem_command { * table will be validated against the user's input. For example, if size_in is * 0, and the user passed in 1, it is an error. */ -static struct cxl_mem_command mem_commands[] = { +static struct cxl_mem_command mem_commands[CXL_MEM_COMMAND_ID_MAX] = { CXL_CMD(IDENTIFY, 0, 0x43, CXL_CMD_FLAG_FORCE_ENABLE), #ifdef CONFIG_CXL_MEM_RAW_COMMANDS CXL_CMD(RAW, ~0, ~0, 0), From 56b4c6515a367718ced8595debf1567c052ea0b2 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 5 Apr 2021 20:14:05 +0200 Subject: [PATCH 062/180] i2c: exynos5: correct top kerneldoc The top comment is not a kerneldoc, as W=1 build reports: drivers/i2c/busses/i2c-exynos5.c:39: warning: expecting prototype for i2c(). Prototype was for HSI2C_CTL() instead Signed-off-by: Krzysztof Kozlowski Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-exynos5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-exynos5.c b/drivers/i2c/busses/i2c-exynos5.c index 5ac30d95650c..97d4f3ac0abd 100644 --- a/drivers/i2c/busses/i2c-exynos5.c +++ b/drivers/i2c/busses/i2c-exynos5.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only -/** +/* * i2c-exynos5.c - Samsung Exynos5 I2C Controller Driver * * Copyright (C) 2013 Samsung Electronics Co., Ltd. From 3a62583c2853b0ab37a57dde79decea210b5fb89 Mon Sep 17 00:00:00 2001 From: William Roche Date: Tue, 6 Apr 2021 11:28:59 -0400 Subject: [PATCH 063/180] RAS/CEC: Correct ce_add_elem()'s returned values ce_add_elem() uses different return values to signal a result from adding an element to the collector. Commit in Fixes: broke the case where the element being added is not found in the array. Correct that. [ bp: Rewrite commit message, add kernel-doc comments. ] Fixes: de0e0624d86f ("RAS/CEC: Check count_threshold unconditionally") Signed-off-by: William Roche Signed-off-by: Borislav Petkov Cc: Link: https://lkml.kernel.org/r/1617722939-29670-1-git-send-email-william.roche@oracle.com --- drivers/ras/cec.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c index ddecf25b5dd4..d7894f178bd4 100644 --- a/drivers/ras/cec.c +++ b/drivers/ras/cec.c @@ -309,11 +309,20 @@ static bool sanity_check(struct ce_array *ca) return ret; } +/** + * cec_add_elem - Add an element to the CEC array. + * @pfn: page frame number to insert + * + * Return values: + * - <0: on error + * - 0: on success + * - >0: when the inserted pfn was offlined + */ static int cec_add_elem(u64 pfn) { struct ce_array *ca = &ce_arr; + int count, err, ret = 0; unsigned int to = 0; - int count, ret = 0; /* * We can be called very early on the identify_cpu() path where we are @@ -330,8 +339,8 @@ static int cec_add_elem(u64 pfn) if (ca->n == MAX_ELEMS) WARN_ON(!del_lru_elem_unlocked(ca)); - ret = find_elem(ca, pfn, &to); - if (ret < 0) { + err = find_elem(ca, pfn, &to); + if (err < 0) { /* * Shift range [to-end] to make room for one more element. */ From f2013278ae40b89cc27916366c407ce5261815ef Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 7 Apr 2021 10:44:52 +0800 Subject: [PATCH 064/180] perf report: Fix wrong LBR block sorting When '--total-cycles' is specified, it supports sorting for all blocks by 'Sampled Cycles%'. This is useful to concentrate on the globally hottest blocks. 'Sampled Cycles%' - block sampled cycles aggregation / total sampled cycles But in current code, it doesn't use the cycles aggregation. Part of 'cycles' counting is possibly dropped for some overlap jumps. But for identifying the hot block, we always need the full cycles. # perf record -b ./triad_loop # perf report --total-cycles --stdio Before: # # Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] Shared Object # ............... .............. ........... .......... ............................................................. ................. # 0.81% 793 4.32% 793 [setup-vdso.h:34 -> setup-vdso.h:40] ld-2.27.so 0.49% 480 0.87% 160 [native_write_msr+0 -> native_write_msr+16] [kernel.kallsyms] 0.48% 476 0.52% 95 [native_read_msr+0 -> native_read_msr+29] [kernel.kallsyms] 0.31% 303 1.65% 303 [nmi_restore+0 -> nmi_restore+37] [kernel.kallsyms] 0.26% 255 1.39% 255 [nohz_balance_exit_idle+75 -> nohz_balance_exit_idle+162] [kernel.kallsyms] 0.24% 234 1.28% 234 [end_repeat_nmi+67 -> end_repeat_nmi+83] [kernel.kallsyms] 0.23% 227 1.24% 227 [__irqentry_text_end+96 -> __irqentry_text_end+126] [kernel.kallsyms] 0.20% 194 1.06% 194 [native_set_debugreg+52 -> native_set_debugreg+56] [kernel.kallsyms] 0.11% 106 0.14% 26 [native_sched_clock+0 -> native_sched_clock+98] [kernel.kallsyms] 0.10% 97 0.53% 97 [trigger_load_balance+0 -> trigger_load_balance+67] [kernel.kallsyms] 0.09% 85 0.46% 85 [get-dynamic-info.h:102 -> get-dynamic-info.h:111] ld-2.27.so ... 0.00% 92.7K 0.02% 4 [triad_loop.c:64 -> triad_loop.c:65] triad_loop The hottest block '[triad_loop.c:64 -> triad_loop.c:65]' is not at the top of output. After: # Sampled Cycles% Sampled Cycles Avg Cycles% Avg Cycles [Program Block Range] Shared Object # ............... .............. ........... .......... .............................................................. ................. # 94.35% 92.7K 0.02% 4 [triad_loop.c:64 -> triad_loop.c:65] triad_loop 0.81% 793 4.32% 793 [setup-vdso.h:34 -> setup-vdso.h:40] ld-2.27.so 0.49% 480 0.87% 160 [native_write_msr+0 -> native_write_msr+16] [kernel.kallsyms] 0.48% 476 0.52% 95 [native_read_msr+0 -> native_read_msr+29] [kernel.kallsyms] 0.31% 303 1.65% 303 [nmi_restore+0 -> nmi_restore+37] [kernel.kallsyms] 0.26% 255 1.39% 255 [nohz_balance_exit_idle+75 -> nohz_balance_exit_idle+162] [kernel.kallsyms] 0.24% 234 1.28% 234 [end_repeat_nmi+67 -> end_repeat_nmi+83] [kernel.kallsyms] 0.23% 227 1.24% 227 [__irqentry_text_end+96 -> __irqentry_text_end+126] [kernel.kallsyms] 0.20% 194 1.06% 194 [native_set_debugreg+52 -> native_set_debugreg+56] [kernel.kallsyms] 0.11% 106 0.14% 26 [native_sched_clock+0 -> native_sched_clock+98] [kernel.kallsyms] 0.10% 97 0.53% 97 [trigger_load_balance+0 -> trigger_load_balance+67] [kernel.kallsyms] 0.09% 85 0.46% 85 [get-dynamic-info.h:102 -> get-dynamic-info.h:111] ld-2.27.so 0.08% 82 0.06% 11 [intel_pmu_drain_pebs_nhm+580 -> intel_pmu_drain_pebs_nhm+627] [kernel.kallsyms] 0.08% 77 0.42% 77 [lru_add_drain_cpu+0 -> lru_add_drain_cpu+133] [kernel.kallsyms] 0.08% 74 0.10% 18 [handle_pmi_common+271 -> handle_pmi_common+310] [kernel.kallsyms] 0.08% 74 0.40% 74 [get-dynamic-info.h:131 -> get-dynamic-info.h:157] ld-2.27.so 0.07% 69 0.09% 17 [intel_pmu_drain_pebs_nhm+432 -> intel_pmu_drain_pebs_nhm+468] [kernel.kallsyms] Now the hottest block is reported at the top of output. Fixes: b65a7d372b1a55db ("perf hist: Support block formats with compare/sort/display") Signed-off-by: Jin Yao Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210407024452.29988-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/block-info.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index 423ec69bda6c..5ecd4f401f32 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -201,7 +201,7 @@ static int block_total_cycles_pct_entry(struct perf_hpp_fmt *fmt, double ratio = 0.0; if (block_fmt->total_cycles) - ratio = (double)bi->cycles / (double)block_fmt->total_cycles; + ratio = (double)bi->cycles_aggr / (double)block_fmt->total_cycles; return color_pct(hpp, block_fmt->width, 100.0 * ratio); } @@ -216,9 +216,9 @@ static int64_t block_total_cycles_pct_sort(struct perf_hpp_fmt *fmt, double l, r; if (block_fmt->total_cycles) { - l = ((double)bi_l->cycles / + l = ((double)bi_l->cycles_aggr / (double)block_fmt->total_cycles) * 100000.0; - r = ((double)bi_r->cycles / + r = ((double)bi_r->cycles_aggr / (double)block_fmt->total_cycles) * 100000.0; return (int64_t)l - (int64_t)r; } From 92f1e8adf7db2ef9b90e5662182810c0cf8ac22e Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 7 Apr 2021 08:39:55 -0700 Subject: [PATCH 065/180] perf arm-spe: Avoid potential buffer overrun SPE extended headers are > 1 byte so ensure the buffer contains at least this before reading. This issue was detected by fuzzing. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andre Przywara Cc: Dave Martin Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Will Deacon Link: http://lore.kernel.org/lkml/20210407153955.317215-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index f3ac9d40cebf..2e5eff4f8f03 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -210,8 +210,10 @@ static int arm_spe_do_get_packet(const unsigned char *buf, size_t len, if ((hdr & SPE_HEADER0_MASK2) == SPE_HEADER0_EXTENDED) { /* 16-bit extended format header */ - ext_hdr = 1; + if (len == 1) + return ARM_SPE_BAD_PACKET; + ext_hdr = 1; hdr = buf[1]; if (hdr == SPE_HEADER1_ALIGNMENT) return arm_spe_get_alignment(buf, len, packet); From 50ce6826a48f119baf2794fa384a64efe9bd84a5 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 7 Apr 2021 02:06:06 +0300 Subject: [PATCH 066/180] clk: fixed: fix double free in resource managed fixed-factor clock devm_clk_hw_register_fixed_factor_release(), the release function for the devm_clk_hw_register_fixed_factor(), calls clk_hw_unregister_fixed_factor(), which will kfree() the clock. However after that the devres functions will also kfree the allocated data, resulting in double free/memory corruption. Just call clk_hw_unregister() instead, leaving kfree() to devres code. Reported-by: Rob Clark Cc: Daniel Palmer Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20210406230606.3007138-1-dmitry.baryshkov@linaro.org Fixes: 0b9266d295ce ("clk: fixed: add devm helper for clk_hw_register_fixed_factor()") [sboyd@kernel.org: Remove ugly cast] Signed-off-by: Stephen Boyd --- drivers/clk/clk-fixed-factor.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/clk/clk-fixed-factor.c b/drivers/clk/clk-fixed-factor.c index 4f7bf3929d6d..4e4b6d367612 100644 --- a/drivers/clk/clk-fixed-factor.c +++ b/drivers/clk/clk-fixed-factor.c @@ -66,7 +66,14 @@ EXPORT_SYMBOL_GPL(clk_fixed_factor_ops); static void devm_clk_hw_register_fixed_factor_release(struct device *dev, void *res) { - clk_hw_unregister_fixed_factor(&((struct clk_fixed_factor *)res)->hw); + struct clk_fixed_factor *fix = res; + + /* + * We can not use clk_hw_unregister_fixed_factor, since it will kfree() + * the hw, resulting in double free. Just unregister the hw and let + * devres code kfree() it. + */ + clk_hw_unregister(&fix->hw); } static struct clk_hw * From df8a39f2911a4c7769e0f760509f556a9e9d37af Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 7 Apr 2021 20:35:32 +0200 Subject: [PATCH 067/180] i2c: imx: mention Oleksij as maintainer of the binding docs When I removed myself as a maintainer of the yaml file, I missed that some maintainer is required. Oleksij is already listed in MAINTAINERS for this file, so add him here as well. Fixes: 1ae6b3780848 ("i2c: imx: drop me as maintainer of binding docs") Reviewed-by: Oleksij Rempel Reviewed-by: Rob Herring Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/i2c-imx.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/i2c/i2c-imx.yaml b/Documentation/devicetree/bindings/i2c/i2c-imx.yaml index f33c6b29966b..3592d49235e0 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-imx.yaml +++ b/Documentation/devicetree/bindings/i2c/i2c-imx.yaml @@ -6,6 +6,9 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Freescale Inter IC (I2C) and High Speed Inter IC (HS-I2C) for i.MX +maintainers: + - Oleksij Rempel + allOf: - $ref: /schemas/i2c/i2c-controller.yaml# From afd0be7299533bb2e2b09104399d8a467ecbd2c5 Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Thu, 8 Apr 2021 05:20:09 +0000 Subject: [PATCH 068/180] libbpf: Fix potential NULL pointer dereference Wait until after the UMEM is checked for null to dereference it. Fixes: 43f1bc1efff1 ("libbpf: Restore umem state after socket create failure") Signed-off-by: Ciara Loftus Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210408052009.7844-1-ciara.loftus@intel.com --- tools/lib/bpf/xsk.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index d24b5cc720ec..007fe5d59438 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -852,18 +852,19 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, struct xsk_ring_cons *comp, const struct xsk_socket_config *usr_config) { + bool unmap, rx_setup_done = false, tx_setup_done = false; void *rx_map = NULL, *tx_map = NULL; struct sockaddr_xdp sxdp = {}; struct xdp_mmap_offsets off; struct xsk_socket *xsk; struct xsk_ctx *ctx; int err, ifindex; - bool unmap = umem->fill_save != fill; - bool rx_setup_done = false, tx_setup_done = false; if (!umem || !xsk_ptr || !(rx || tx)) return -EFAULT; + unmap = umem->fill_save != fill; + xsk = calloc(1, sizeof(*xsk)); if (!xsk) return -ENOMEM; From 2361db89aaadfb671db6911b0063e01ec8922c28 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 9 Mar 2021 17:43:38 -0800 Subject: [PATCH 069/180] libnvdimm: Notify disk drivers to revalidate region read-only Previous kernels allowed the BLKROSET to override the disk's read-only status. With that situation fixed the pmem driver needs to rely on notification events to reevaluate the disk read-only status after the host region has been marked read-write. Recall that when libnvdimm determines that the persistent memory has lost persistence (for example lack of energy to flush from DRAM to FLASH on an NVDIMM-N device) it marks the region read-only, but that state can be overridden by the user via: echo 0 > /sys/bus/nd/devices/regionX/read_only ...to date there is no notification that the region has restored persistence, so the user override is the only recovery. Fixes: 52f019d43c22 ("block: add a hard-readonly flag to struct gendisk") Reported-by: kernel test robot Reported-by: Vishal Verma Tested-by: Vishal Verma Reviewed-by: Christoph Hellwig Cc: Christoph Hellwig Cc: Ming Lei Cc: Martin K. Petersen Cc: Hannes Reinecke Cc: Jens Axboe Link: https://lore.kernel.org/r/161534060720.528671.2341213328968989192.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/nvdimm/bus.c | 14 ++++++-------- drivers/nvdimm/pmem.c | 37 ++++++++++++++++++++++++++++++++---- drivers/nvdimm/region_devs.c | 7 +++++++ include/linux/nd.h | 1 + 4 files changed, 47 insertions(+), 12 deletions(-) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 48f0985ca8a0..3a777d0073b7 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -631,16 +631,14 @@ void nvdimm_check_and_set_ro(struct gendisk *disk) struct nd_region *nd_region = to_nd_region(dev->parent); int disk_ro = get_disk_ro(disk); - /* - * Upgrade to read-only if the region is read-only preserve as - * read-only if the disk is already read-only. - */ - if (disk_ro || nd_region->ro == disk_ro) + /* catch the disk up with the region ro state */ + if (disk_ro == nd_region->ro) return; - dev_info(dev, "%s read-only, marking %s read-only\n", - dev_name(&nd_region->dev), disk->disk_name); - set_disk_ro(disk, 1); + dev_info(dev, "%s read-%s, marking %s read-%s\n", + dev_name(&nd_region->dev), nd_region->ro ? "only" : "write", + disk->disk_name, nd_region->ro ? "only" : "write"); + set_disk_ro(disk, nd_region->ro); } EXPORT_SYMBOL(nvdimm_check_and_set_ro); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index b8a85bfb2e95..7daac795db39 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -26,6 +26,7 @@ #include #include #include "pmem.h" +#include "btt.h" #include "pfn.h" #include "nd.h" @@ -585,7 +586,7 @@ static void nd_pmem_shutdown(struct device *dev) nvdimm_flush(to_nd_region(dev->parent), NULL); } -static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) +static void pmem_revalidate_poison(struct device *dev) { struct nd_region *nd_region; resource_size_t offset = 0, end_trunc = 0; @@ -595,9 +596,6 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) struct range range; struct kernfs_node *bb_state; - if (event != NVDIMM_REVALIDATE_POISON) - return; - if (is_nd_btt(dev)) { struct nd_btt *nd_btt = to_nd_btt(dev); @@ -635,6 +633,37 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) sysfs_notify_dirent(bb_state); } +static void pmem_revalidate_region(struct device *dev) +{ + struct pmem_device *pmem; + + if (is_nd_btt(dev)) { + struct nd_btt *nd_btt = to_nd_btt(dev); + struct btt *btt = nd_btt->btt; + + nvdimm_check_and_set_ro(btt->btt_disk); + return; + } + + pmem = dev_get_drvdata(dev); + nvdimm_check_and_set_ro(pmem->disk); +} + +static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) +{ + switch (event) { + case NVDIMM_REVALIDATE_POISON: + pmem_revalidate_poison(dev); + break; + case NVDIMM_REVALIDATE_REGION: + pmem_revalidate_region(dev); + break; + default: + dev_WARN_ONCE(dev, 1, "notify: unknown event: %d\n", event); + break; + } +} + MODULE_ALIAS("pmem"); MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO); MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index ef23119db574..51870eb51da6 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -518,6 +518,12 @@ static ssize_t read_only_show(struct device *dev, return sprintf(buf, "%d\n", nd_region->ro); } +static int revalidate_read_only(struct device *dev, void *data) +{ + nd_device_notify(dev, NVDIMM_REVALIDATE_REGION); + return 0; +} + static ssize_t read_only_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -529,6 +535,7 @@ static ssize_t read_only_store(struct device *dev, return rc; nd_region->ro = ro; + device_for_each_child(dev, NULL, revalidate_read_only); return len; } static DEVICE_ATTR_RW(read_only); diff --git a/include/linux/nd.h b/include/linux/nd.h index cec526c8043d..ee9ad76afbba 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -11,6 +11,7 @@ enum nvdimm_event { NVDIMM_REVALIDATE_POISON, + NVDIMM_REVALIDATE_REGION, }; enum nvdimm_claim_class { From 632a1c209b8773cb0119fe3aada9f1db14fa357c Mon Sep 17 00:00:00 2001 From: Thomas Tai Date: Thu, 8 Apr 2021 13:28:33 -0400 Subject: [PATCH 070/180] x86/traps: Correct exc_general_protection() and math_error() return paths Commit 334872a09198 ("x86/traps: Attempt to fixup exceptions in vDSO before signaling") added return statements which bypass calling cond_local_irq_disable(). According to ca4c6a9858c2 ("x86/traps: Make interrupt enable/disable symmetric in C code"), cond_local_irq_disable() is needed because the asm return code no longer disables interrupts. Follow the existing code as an example to use "goto exit" instead of "return" statement. [ bp: Massage commit message. ] Fixes: 334872a09198 ("x86/traps: Attempt to fixup exceptions in vDSO before signaling") Signed-off-by: Thomas Tai Signed-off-by: Borislav Petkov Reviewed-by: Alexandre Chartre Link: https://lkml.kernel.org/r/1617902914-83245-1-git-send-email-thomas.tai@oracle.com --- arch/x86/kernel/traps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ac1874a2a70e..651e3e508959 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -556,7 +556,7 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection) tsk->thread.trap_nr = X86_TRAP_GP; if (fixup_vdso_exception(regs, X86_TRAP_GP, error_code, 0)) - return; + goto exit; show_signal(tsk, SIGSEGV, "", desc, regs, error_code); force_sig(SIGSEGV); @@ -1057,7 +1057,7 @@ static void math_error(struct pt_regs *regs, int trapnr) goto exit; if (fixup_vdso_exception(regs, trapnr, 0, 0)) - return; + goto exit; force_sig_fault(SIGFPE, si_code, (void __user *)uprobe_get_trap_addr(regs)); From 0760fa3d8f7fceeea508b98899f1c826e10ffe78 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 7 Apr 2021 20:57:33 -0700 Subject: [PATCH 071/180] percpu: make pcpu_nr_empty_pop_pages per chunk type nr_empty_pop_pages is used to guarantee that there are some free populated pages to satisfy atomic allocations. Accounted and non-accounted allocations are using separate sets of chunks, so both need to have a surplus of empty pages. This commit makes pcpu_nr_empty_pop_pages and the corresponding logic per chunk type. [Dennis] This issue came up as I was reviewing [1] and realized I missed this. Simultaneously, it was reported btrfs was seeing failed atomic allocations in fsstress tests [2] and [3]. [1] https://lore.kernel.org/linux-mm/20210324190626.564297-1-guro@fb.com/ [2] https://lore.kernel.org/linux-mm/20210401185158.3275.409509F4@e16-tech.com/ [3] https://lore.kernel.org/linux-mm/CAL3q7H5RNBjCi708GH7jnczAOe0BLnacT9C+OBgA-Dx9jhB6SQ@mail.gmail.com/ Fixes: 3c7be18ac9a0 ("mm: memcg/percpu: account percpu memory to memory cgroups") Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Roman Gushchin Tested-by: Filipe Manana Signed-off-by: Dennis Zhou --- mm/percpu-internal.h | 2 +- mm/percpu-stats.c | 9 +++++++-- mm/percpu.c | 14 +++++++------- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h index 18b768ac7dca..095d7eaa0db4 100644 --- a/mm/percpu-internal.h +++ b/mm/percpu-internal.h @@ -87,7 +87,7 @@ extern spinlock_t pcpu_lock; extern struct list_head *pcpu_chunk_lists; extern int pcpu_nr_slots; -extern int pcpu_nr_empty_pop_pages; +extern int pcpu_nr_empty_pop_pages[]; extern struct pcpu_chunk *pcpu_first_chunk; extern struct pcpu_chunk *pcpu_reserved_chunk; diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c index c8400a2adbc2..f6026dbcdf6b 100644 --- a/mm/percpu-stats.c +++ b/mm/percpu-stats.c @@ -145,6 +145,7 @@ static int percpu_stats_show(struct seq_file *m, void *v) int slot, max_nr_alloc; int *buffer; enum pcpu_chunk_type type; + int nr_empty_pop_pages; alloc_buffer: spin_lock_irq(&pcpu_lock); @@ -165,7 +166,11 @@ static int percpu_stats_show(struct seq_file *m, void *v) goto alloc_buffer; } -#define PL(X) \ + nr_empty_pop_pages = 0; + for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) + nr_empty_pop_pages += pcpu_nr_empty_pop_pages[type]; + +#define PL(X) \ seq_printf(m, " %-20s: %12lld\n", #X, (long long int)pcpu_stats_ai.X) seq_printf(m, @@ -196,7 +201,7 @@ static int percpu_stats_show(struct seq_file *m, void *v) PU(nr_max_chunks); PU(min_alloc_size); PU(max_alloc_size); - P("empty_pop_pages", pcpu_nr_empty_pop_pages); + P("empty_pop_pages", nr_empty_pop_pages); seq_putc(m, '\n'); #undef PU diff --git a/mm/percpu.c b/mm/percpu.c index 6596a0a4286e..23308113a5ff 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -173,10 +173,10 @@ struct list_head *pcpu_chunk_lists __ro_after_init; /* chunk list slots */ static LIST_HEAD(pcpu_map_extend_chunks); /* - * The number of empty populated pages, protected by pcpu_lock. The - * reserved chunk doesn't contribute to the count. + * The number of empty populated pages by chunk type, protected by pcpu_lock. + * The reserved chunk doesn't contribute to the count. */ -int pcpu_nr_empty_pop_pages; +int pcpu_nr_empty_pop_pages[PCPU_NR_CHUNK_TYPES]; /* * The number of populated pages in use by the allocator, protected by @@ -556,7 +556,7 @@ static inline void pcpu_update_empty_pages(struct pcpu_chunk *chunk, int nr) { chunk->nr_empty_pop_pages += nr; if (chunk != pcpu_reserved_chunk) - pcpu_nr_empty_pop_pages += nr; + pcpu_nr_empty_pop_pages[pcpu_chunk_type(chunk)] += nr; } /* @@ -1832,7 +1832,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, mutex_unlock(&pcpu_alloc_mutex); } - if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW) + if (pcpu_nr_empty_pop_pages[type] < PCPU_EMPTY_POP_PAGES_LOW) pcpu_schedule_balance_work(); /* clear the areas and return address relative to base address */ @@ -2000,7 +2000,7 @@ static void __pcpu_balance_workfn(enum pcpu_chunk_type type) pcpu_atomic_alloc_failed = false; } else { nr_to_pop = clamp(PCPU_EMPTY_POP_PAGES_HIGH - - pcpu_nr_empty_pop_pages, + pcpu_nr_empty_pop_pages[type], 0, PCPU_EMPTY_POP_PAGES_HIGH); } @@ -2580,7 +2580,7 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, /* link the first chunk in */ pcpu_first_chunk = chunk; - pcpu_nr_empty_pop_pages = pcpu_first_chunk->nr_empty_pop_pages; + pcpu_nr_empty_pop_pages[PCPU_CHUNK_ROOT] = pcpu_first_chunk->nr_empty_pop_pages; pcpu_chunk_relocate(pcpu_first_chunk, -1); /* include all regions of the first chunk */ From 4873d8cc141e76aa5f3e7ef6e98dfb3823fcbe6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Fri, 9 Apr 2021 13:27:01 -0700 Subject: [PATCH 072/180] MAINTAINERS: update CZ.NIC's Turris information MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add all the files maintained by Turris team, not only for MOX, but also for Omnia. Change website. Link: https://lkml.kernel.org/r/20210325171123.28093-1-kabel@kernel.org Signed-off-by: Marek Behún Cc: Pavel Machek Cc: Jassi Brar Cc: Linus Walleij Cc: Bartosz Golaszewski Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index c80ad735b384..2a6f7f5edd5a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1790,19 +1790,26 @@ F: drivers/net/ethernet/cortina/ F: drivers/pinctrl/pinctrl-gemini.c F: drivers/rtc/rtc-ftrtc010.c -ARM/CZ.NIC TURRIS MOX SUPPORT +ARM/CZ.NIC TURRIS SUPPORT M: Marek Behun S: Maintained -W: http://mox.turris.cz +W: https://www.turris.cz/ F: Documentation/ABI/testing/debugfs-moxtet F: Documentation/ABI/testing/sysfs-bus-moxtet-devices F: Documentation/ABI/testing/sysfs-firmware-turris-mox-rwtm F: Documentation/devicetree/bindings/bus/moxtet.txt F: Documentation/devicetree/bindings/firmware/cznic,turris-mox-rwtm.txt F: Documentation/devicetree/bindings/gpio/gpio-moxtet.txt +F: Documentation/devicetree/bindings/leds/cznic,turris-omnia-leds.yaml +F: Documentation/devicetree/bindings/watchdog/armada-37xx-wdt.txt F: drivers/bus/moxtet.c F: drivers/firmware/turris-mox-rwtm.c +F: drivers/leds/leds-turris-omnia.c +F: drivers/mailbox/armada-37xx-rwtm-mailbox.c F: drivers/gpio/gpio-moxtet.c +F: drivers/watchdog/armada_37xx_wdt.c +F: include/dt-bindings/bus/moxtet.h +F: include/linux/armada-37xx-rwtm-mailbox.h F: include/linux/moxtet.h ARM/EZX SMARTPHONES (A780, A910, A1200, E680, ROKR E2 and ROKR E6) From b37c38484375f5a204500e0b10b40da268090995 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Fri, 9 Apr 2021 13:27:04 -0700 Subject: [PATCH 073/180] treewide: change my e-mail address, fix my name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change my e-mail address to kabel@kernel.org, and fix my name in non-code parts (add diacritical mark). Link: https://lkml.kernel.org/r/20210325171123.28093-2-kabel@kernel.org Signed-off-by: Marek Behún Cc: Bartosz Golaszewski Cc: Greg Kroah-Hartman Cc: Jassi Brar Cc: Linus Walleij Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/testing/debugfs-moxtet | 4 ++-- Documentation/ABI/testing/debugfs-turris-mox-rwtm | 2 +- Documentation/ABI/testing/sysfs-bus-moxtet-devices | 6 +++--- .../ABI/testing/sysfs-class-led-driver-turris-omnia | 2 +- .../ABI/testing/sysfs-firmware-turris-mox-rwtm | 10 +++++----- .../bindings/leds/cznic,turris-omnia-leds.yaml | 2 +- MAINTAINERS | 2 +- arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts | 2 +- drivers/bus/moxtet.c | 4 ++-- drivers/firmware/turris-mox-rwtm.c | 4 ++-- drivers/gpio/gpio-moxtet.c | 4 ++-- drivers/leds/leds-turris-omnia.c | 4 ++-- drivers/mailbox/armada-37xx-rwtm-mailbox.c | 4 ++-- drivers/watchdog/armada_37xx_wdt.c | 4 ++-- include/dt-bindings/bus/moxtet.h | 2 +- include/linux/armada-37xx-rwtm-mailbox.h | 2 +- include/linux/moxtet.h | 2 +- 17 files changed, 30 insertions(+), 30 deletions(-) diff --git a/Documentation/ABI/testing/debugfs-moxtet b/Documentation/ABI/testing/debugfs-moxtet index 6eee10c3d5a1..637d8587d03d 100644 --- a/Documentation/ABI/testing/debugfs-moxtet +++ b/Documentation/ABI/testing/debugfs-moxtet @@ -1,7 +1,7 @@ What: /sys/kernel/debug/moxtet/input Date: March 2019 KernelVersion: 5.3 -Contact: Marek Behún +Contact: Marek Behún Description: (Read) Read input from the shift registers, in hexadecimal. Returns N+1 bytes, where N is the number of Moxtet connected modules. The first byte is from the CPU board itself. @@ -19,7 +19,7 @@ Description: (Read) Read input from the shift registers, in hexadecimal. What: /sys/kernel/debug/moxtet/output Date: March 2019 KernelVersion: 5.3 -Contact: Marek Behún +Contact: Marek Behún Description: (RW) Read last written value to the shift registers, in hexadecimal, or write values to the shift registers, also in hexadecimal. diff --git a/Documentation/ABI/testing/debugfs-turris-mox-rwtm b/Documentation/ABI/testing/debugfs-turris-mox-rwtm index 326df1b74707..813987d5de4e 100644 --- a/Documentation/ABI/testing/debugfs-turris-mox-rwtm +++ b/Documentation/ABI/testing/debugfs-turris-mox-rwtm @@ -1,7 +1,7 @@ What: /sys/kernel/debug/turris-mox-rwtm/do_sign Date: Jun 2020 KernelVersion: 5.8 -Contact: Marek Behún +Contact: Marek Behún Description: ======= =========================================================== diff --git a/Documentation/ABI/testing/sysfs-bus-moxtet-devices b/Documentation/ABI/testing/sysfs-bus-moxtet-devices index 4a6d61b44f3f..32dccc00d57d 100644 --- a/Documentation/ABI/testing/sysfs-bus-moxtet-devices +++ b/Documentation/ABI/testing/sysfs-bus-moxtet-devices @@ -1,17 +1,17 @@ What: /sys/bus/moxtet/devices/moxtet-./module_description Date: March 2019 KernelVersion: 5.3 -Contact: Marek Behún +Contact: Marek Behún Description: (Read) Moxtet module description. Format: string What: /sys/bus/moxtet/devices/moxtet-./module_id Date: March 2019 KernelVersion: 5.3 -Contact: Marek Behún +Contact: Marek Behún Description: (Read) Moxtet module ID. Format: %x What: /sys/bus/moxtet/devices/moxtet-./module_name Date: March 2019 KernelVersion: 5.3 -Contact: Marek Behún +Contact: Marek Behún Description: (Read) Moxtet module name. Format: string diff --git a/Documentation/ABI/testing/sysfs-class-led-driver-turris-omnia b/Documentation/ABI/testing/sysfs-class-led-driver-turris-omnia index 795a5de12fc1..c4d46970c1cf 100644 --- a/Documentation/ABI/testing/sysfs-class-led-driver-turris-omnia +++ b/Documentation/ABI/testing/sysfs-class-led-driver-turris-omnia @@ -1,7 +1,7 @@ What: /sys/class/leds//device/brightness Date: July 2020 KernelVersion: 5.9 -Contact: Marek Behún +Contact: Marek Behún Description: (RW) On the front panel of the Turris Omnia router there is also a button which can be used to control the intensity of all the LEDs at once, so that if they are too bright, user can dim them. diff --git a/Documentation/ABI/testing/sysfs-firmware-turris-mox-rwtm b/Documentation/ABI/testing/sysfs-firmware-turris-mox-rwtm index b8631f5a29c4..ea5e5b489bc7 100644 --- a/Documentation/ABI/testing/sysfs-firmware-turris-mox-rwtm +++ b/Documentation/ABI/testing/sysfs-firmware-turris-mox-rwtm @@ -1,21 +1,21 @@ What: /sys/firmware/turris-mox-rwtm/board_version Date: August 2019 KernelVersion: 5.4 -Contact: Marek Behún +Contact: Marek Behún Description: (Read) Board version burned into eFuses of this Turris Mox board. Format: %i What: /sys/firmware/turris-mox-rwtm/mac_address* Date: August 2019 KernelVersion: 5.4 -Contact: Marek Behún +Contact: Marek Behún Description: (Read) MAC addresses burned into eFuses of this Turris Mox board. Format: %pM What: /sys/firmware/turris-mox-rwtm/pubkey Date: August 2019 KernelVersion: 5.4 -Contact: Marek Behún +Contact: Marek Behún Description: (Read) ECDSA public key (in pubkey hex compressed form) computed as pair to the ECDSA private key burned into eFuses of this Turris Mox Board. @@ -24,7 +24,7 @@ Description: (Read) ECDSA public key (in pubkey hex compressed form) computed What: /sys/firmware/turris-mox-rwtm/ram_size Date: August 2019 KernelVersion: 5.4 -Contact: Marek Behún +Contact: Marek Behún Description: (Read) RAM size in MiB of this Turris Mox board as was detected during manufacturing and burned into eFuses. Can be 512 or 1024. Format: %i @@ -32,6 +32,6 @@ Description: (Read) RAM size in MiB of this Turris Mox board as was detected What: /sys/firmware/turris-mox-rwtm/serial_number Date: August 2019 KernelVersion: 5.4 -Contact: Marek Behún +Contact: Marek Behún Description: (Read) Serial number burned into eFuses of this Turris Mox device. Format: %016X diff --git a/Documentation/devicetree/bindings/leds/cznic,turris-omnia-leds.yaml b/Documentation/devicetree/bindings/leds/cznic,turris-omnia-leds.yaml index fe7fa25877fd..c7ed2871da06 100644 --- a/Documentation/devicetree/bindings/leds/cznic,turris-omnia-leds.yaml +++ b/Documentation/devicetree/bindings/leds/cznic,turris-omnia-leds.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: CZ.NIC's Turris Omnia LEDs driver maintainers: - - Marek Behún + - Marek Behún description: This module adds support for the RGB LEDs found on the front panel of the diff --git a/MAINTAINERS b/MAINTAINERS index 2a6f7f5edd5a..ba5cd29d2a48 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1791,7 +1791,7 @@ F: drivers/pinctrl/pinctrl-gemini.c F: drivers/rtc/rtc-ftrtc010.c ARM/CZ.NIC TURRIS SUPPORT -M: Marek Behun +M: Marek Behun S: Maintained W: https://www.turris.cz/ F: Documentation/ABI/testing/debugfs-moxtet diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts index d239ab70ed99..53e817c5f6f3 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts @@ -1,7 +1,7 @@ // SPDX-License-Identifier: (GPL-2.0+ OR MIT) /* * Device Tree file for CZ.NIC Turris Mox Board - * 2019 by Marek Behun + * 2019 by Marek Behún */ /dts-v1/; diff --git a/drivers/bus/moxtet.c b/drivers/bus/moxtet.c index b20fdcbd035b..fd87a59837fa 100644 --- a/drivers/bus/moxtet.c +++ b/drivers/bus/moxtet.c @@ -2,7 +2,7 @@ /* * Turris Mox module configuration bus driver * - * Copyright (C) 2019 Marek Behun + * Copyright (C) 2019 Marek Behún */ #include @@ -879,6 +879,6 @@ static void __exit moxtet_exit(void) } module_exit(moxtet_exit); -MODULE_AUTHOR("Marek Behun "); +MODULE_AUTHOR("Marek Behun "); MODULE_DESCRIPTION("CZ.NIC's Turris Mox module configuration bus"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/firmware/turris-mox-rwtm.c b/drivers/firmware/turris-mox-rwtm.c index 50bb2a6d6ccf..62f0d1a5dd32 100644 --- a/drivers/firmware/turris-mox-rwtm.c +++ b/drivers/firmware/turris-mox-rwtm.c @@ -2,7 +2,7 @@ /* * Turris Mox rWTM firmware driver * - * Copyright (C) 2019 Marek Behun + * Copyright (C) 2019 Marek Behún */ #include @@ -547,4 +547,4 @@ module_platform_driver(turris_mox_rwtm_driver); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Turris Mox rWTM firmware driver"); -MODULE_AUTHOR("Marek Behun "); +MODULE_AUTHOR("Marek Behun "); diff --git a/drivers/gpio/gpio-moxtet.c b/drivers/gpio/gpio-moxtet.c index 8299909318f4..61f9efd6c64f 100644 --- a/drivers/gpio/gpio-moxtet.c +++ b/drivers/gpio/gpio-moxtet.c @@ -2,7 +2,7 @@ /* * Turris Mox Moxtet GPIO expander * - * Copyright (C) 2018 Marek Behun + * Copyright (C) 2018 Marek Behún */ #include @@ -174,6 +174,6 @@ static struct moxtet_driver moxtet_gpio_driver = { }; module_moxtet_driver(moxtet_gpio_driver); -MODULE_AUTHOR("Marek Behun "); +MODULE_AUTHOR("Marek Behun "); MODULE_DESCRIPTION("Turris Mox Moxtet GPIO expander"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/leds/leds-turris-omnia.c b/drivers/leds/leds-turris-omnia.c index 7b2f4d0ae3fe..2f9a289ab245 100644 --- a/drivers/leds/leds-turris-omnia.c +++ b/drivers/leds/leds-turris-omnia.c @@ -2,7 +2,7 @@ /* * CZ.NIC's Turris Omnia LEDs driver * - * 2020 by Marek Behun + * 2020 by Marek Behún */ #include @@ -287,6 +287,6 @@ static struct i2c_driver omnia_leds_driver = { module_i2c_driver(omnia_leds_driver); -MODULE_AUTHOR("Marek Behun "); +MODULE_AUTHOR("Marek Behun "); MODULE_DESCRIPTION("CZ.NIC's Turris Omnia LEDs"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/mailbox/armada-37xx-rwtm-mailbox.c b/drivers/mailbox/armada-37xx-rwtm-mailbox.c index 9f2ce7f03c67..456a117a65fd 100644 --- a/drivers/mailbox/armada-37xx-rwtm-mailbox.c +++ b/drivers/mailbox/armada-37xx-rwtm-mailbox.c @@ -2,7 +2,7 @@ /* * rWTM BIU Mailbox driver for Armada 37xx * - * Author: Marek Behun + * Author: Marek Behún */ #include @@ -203,4 +203,4 @@ module_platform_driver(armada_37xx_mbox_driver); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("rWTM BIU Mailbox driver for Armada 37xx"); -MODULE_AUTHOR("Marek Behun "); +MODULE_AUTHOR("Marek Behun "); diff --git a/drivers/watchdog/armada_37xx_wdt.c b/drivers/watchdog/armada_37xx_wdt.c index e5dcb26d85f0..1635f421ef2c 100644 --- a/drivers/watchdog/armada_37xx_wdt.c +++ b/drivers/watchdog/armada_37xx_wdt.c @@ -2,7 +2,7 @@ /* * Watchdog driver for Marvell Armada 37xx SoCs * - * Author: Marek Behun + * Author: Marek Behún */ #include @@ -366,7 +366,7 @@ static struct platform_driver armada_37xx_wdt_driver = { module_platform_driver(armada_37xx_wdt_driver); -MODULE_AUTHOR("Marek Behun "); +MODULE_AUTHOR("Marek Behun "); MODULE_DESCRIPTION("Armada 37xx CPU Watchdog"); MODULE_LICENSE("GPL v2"); diff --git a/include/dt-bindings/bus/moxtet.h b/include/dt-bindings/bus/moxtet.h index dc9345440ebe..10528de7b3ef 100644 --- a/include/dt-bindings/bus/moxtet.h +++ b/include/dt-bindings/bus/moxtet.h @@ -2,7 +2,7 @@ /* * Constant for device tree bindings for Turris Mox module configuration bus * - * Copyright (C) 2019 Marek Behun + * Copyright (C) 2019 Marek Behún */ #ifndef _DT_BINDINGS_BUS_MOXTET_H diff --git a/include/linux/armada-37xx-rwtm-mailbox.h b/include/linux/armada-37xx-rwtm-mailbox.h index 57bb54f6767a..ef4bd705eb65 100644 --- a/include/linux/armada-37xx-rwtm-mailbox.h +++ b/include/linux/armada-37xx-rwtm-mailbox.h @@ -2,7 +2,7 @@ /* * rWTM BIU Mailbox driver for Armada 37xx * - * Author: Marek Behun + * Author: Marek Behún */ #ifndef _LINUX_ARMADA_37XX_RWTM_MAILBOX_H_ diff --git a/include/linux/moxtet.h b/include/linux/moxtet.h index 490db6886dcc..79184948fab4 100644 --- a/include/linux/moxtet.h +++ b/include/linux/moxtet.h @@ -2,7 +2,7 @@ /* * Turris Mox module configuration bus driver * - * Copyright (C) 2019 Marek Behun + * Copyright (C) 2019 Marek Behún */ #ifndef __LINUX_MOXTET_H From 620ff418ef440f2d585d8c8b2e9396ccfacaacd7 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Fri, 9 Apr 2021 13:27:07 -0700 Subject: [PATCH 074/180] mailmap: update email address for Jordan Crouse jcrouse at codeaurora.org has started bouncing. Redirect to a more permanent address. Link: https://lkml.kernel.org/r/20210325143700.1490518-1-jordan@cosmicpenguin.net Signed-off-by: Jordan Crouse Cc: Alexander Lobakin Cc: Jonathan Corbet Cc: Kees Cook Cc: Miguel Ojeda Cc: Thomas Bogendoerfer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 541635d2e02e..c5642d66ce72 100644 --- a/.mailmap +++ b/.mailmap @@ -168,6 +168,7 @@ Johan Hovold Johan Hovold John Paul Adrian Glaubitz John Stultz +Jordan Crouse From a5c5e441518f1f39da7ed3ef8e61361a9ea90c8b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 9 Apr 2021 13:27:10 -0700 Subject: [PATCH 075/180] .mailmap: fix old email addresses Update Nick & Nadia's old addresses. Link: https://lkml.kernel.org/r/20210406134036.GQ2531743@casper.infradead.org Signed-off-by: Matthew Wilcox Cc: Nicholas Piggin Cc: Nadia Yvette Chambers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .mailmap | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.mailmap b/.mailmap index c5642d66ce72..2d93232ed72b 100644 --- a/.mailmap +++ b/.mailmap @@ -254,8 +254,14 @@ Morten Welinder Morten Welinder Morten Welinder Mythri P K +Nadia Yvette Chambers William Lee Irwin III Nathan Chancellor Nguyen Anh Quynh +Nicholas Piggin +Nicholas Piggin +Nicholas Piggin +Nicholas Piggin +Nicholas Piggin Nicolas Ferre Nicolas Pitre Nicolas Pitre From d3378e86d1822b6d0bebfbc18a8348691c05dfa0 Mon Sep 17 00:00:00 2001 From: Aili Yao Date: Fri, 9 Apr 2021 13:27:19 -0700 Subject: [PATCH 076/180] mm/gup: check page posion status for coredump. When we do coredump for user process signal, this may be an SIGBUS signal with BUS_MCEERR_AR or BUS_MCEERR_AO code, which means this signal is resulted from ECC memory fail like SRAR or SRAO, we expect the memory recovery work is finished correctly, then the get_dump_page() will not return the error page as its process pte is set invalid by memory_failure(). But memory_failure() may fail, and the process's related pte may not be correctly set invalid, for current code, we will return the poison page, get it dumped, and then lead to system panic as its in kernel code. So check the poison status in get_dump_page(), and if TRUE, return NULL. There maybe other scenario that is also better to check the posion status and not to panic, so make a wrapper for this check, Thanks to David's suggestion(). [akpm@linux-foundation.org: s/0/false/] [yaoaili@kingsoft.com: is_page_poisoned() arg cannot be null, per Matthew] Link: https://lkml.kernel.org/r/20210322115233.05e4e82a@alex-virtual-machine Link: https://lkml.kernel.org/r/20210319104437.6f30e80d@alex-virtual-machine Signed-off-by: Aili Yao Cc: David Hildenbrand Cc: Matthew Wilcox Cc: Naoya Horiguchi Cc: Oscar Salvador Cc: Mike Kravetz Cc: Aili Yao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/gup.c | 4 ++++ mm/internal.h | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/mm/gup.c b/mm/gup.c index e40579624f10..ef7d2da9f03f 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1535,6 +1535,10 @@ struct page *get_dump_page(unsigned long addr) FOLL_FORCE | FOLL_DUMP | FOLL_GET); if (locked) mmap_read_unlock(mm); + + if (ret == 1 && is_page_poisoned(page)) + return NULL; + return (ret == 1) ? page : NULL; } #endif /* CONFIG_ELF_CORE */ diff --git a/mm/internal.h b/mm/internal.h index 1432feec62df..cb3c5e0a7799 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -97,6 +97,26 @@ static inline void set_page_refcounted(struct page *page) set_page_count(page, 1); } +/* + * When kernel touch the user page, the user page may be have been marked + * poison but still mapped in user space, if without this page, the kernel + * can guarantee the data integrity and operation success, the kernel is + * better to check the posion status and avoid touching it, be good not to + * panic, coredump for process fatal signal is a sample case matching this + * scenario. Or if kernel can't guarantee the data integrity, it's better + * not to call this function, let kernel touch the poison page and get to + * panic. + */ +static inline bool is_page_poisoned(struct page *page) +{ + if (PageHWPoison(page)) + return true; + else if (PageHuge(page) && PageHWPoison(compound_head(page))) + return true; + + return false; +} + extern unsigned long highest_memmap_pfn; /* From a3a8833dffb7e7329c2586b8bfc531adb503f123 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Fri, 9 Apr 2021 13:27:23 -0700 Subject: [PATCH 077/180] nds32: flush_dcache_page: use page_mapping_file to avoid races with swapoff Commit cb9f753a3731 ("mm: fix races between swapoff and flush dcache") updated flush_dcache_page implementations on several architectures to use page_mapping_file() in order to avoid races between page_mapping() and swapoff(). This update missed arch/nds32 and there is a possibility of a race there. Replace page_mapping() with page_mapping_file() in nds32 implementation of flush_dcache_page(). Link: https://lkml.kernel.org/r/20210330175126.26500-1-rppt@kernel.org Fixes: cb9f753a3731 ("mm: fix races between swapoff and flush dcache") Signed-off-by: Mike Rapoport Reviewed-by: Matthew Wilcox (Oracle) Acked-by: Greentime Hu Cc: Huang Ying Cc: Nick Hu Cc: Vincent Chen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/nds32/mm/cacheflush.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/nds32/mm/cacheflush.c b/arch/nds32/mm/cacheflush.c index 6eb98a7ad27d..ad5344ef5d33 100644 --- a/arch/nds32/mm/cacheflush.c +++ b/arch/nds32/mm/cacheflush.c @@ -238,7 +238,7 @@ void flush_dcache_page(struct page *page) { struct address_space *mapping; - mapping = page_mapping(page); + mapping = page_mapping_file(page); if (mapping && !mapping_mapped(mapping)) set_bit(PG_dcache_dirty, &page->flags); else { From 9562fd132985ea9185388a112e50f2a51557827d Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 9 Apr 2021 13:27:26 -0700 Subject: [PATCH 078/180] gcov: re-fix clang-11+ support LLVM changed the expected function signature for llvm_gcda_emit_function() in the clang-11 release. Users of clang-11 or newer may have noticed their kernels producing invalid coverage information: $ llvm-cov gcov -a -c -u -f -b .gcda -- gcno=.gcno 1 : checksum mismatch, \ (, ) != (, ) 2 Invalid .gcda File! ... Fix up the function signatures so calling this function interprets its parameters correctly and computes the correct cfg checksum. In particular, in clang-11, the additional checksum is no longer optional. Link: https://reviews.llvm.org/rG25544ce2df0daa4304c07e64b9c8b0f7df60c11d Link: https://lkml.kernel.org/r/20210408184631.1156669-1-ndesaulniers@google.com Reported-by: Prasad Sodagudi Tested-by: Prasad Sodagudi Signed-off-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Cc: [5.4+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/gcov/clang.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/kernel/gcov/clang.c b/kernel/gcov/clang.c index 8743150db2ac..c466c7fbdece 100644 --- a/kernel/gcov/clang.c +++ b/kernel/gcov/clang.c @@ -70,7 +70,9 @@ struct gcov_fn_info { u32 ident; u32 checksum; +#if CONFIG_CLANG_VERSION < 110000 u8 use_extra_checksum; +#endif u32 cfg_checksum; u32 num_counters; @@ -145,10 +147,8 @@ void llvm_gcda_emit_function(u32 ident, const char *function_name, list_add_tail(&info->head, ¤t_info->functions); } -EXPORT_SYMBOL(llvm_gcda_emit_function); #else -void llvm_gcda_emit_function(u32 ident, u32 func_checksum, - u8 use_extra_checksum, u32 cfg_checksum) +void llvm_gcda_emit_function(u32 ident, u32 func_checksum, u32 cfg_checksum) { struct gcov_fn_info *info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -158,12 +158,11 @@ void llvm_gcda_emit_function(u32 ident, u32 func_checksum, INIT_LIST_HEAD(&info->head); info->ident = ident; info->checksum = func_checksum; - info->use_extra_checksum = use_extra_checksum; info->cfg_checksum = cfg_checksum; list_add_tail(&info->head, ¤t_info->functions); } -EXPORT_SYMBOL(llvm_gcda_emit_function); #endif +EXPORT_SYMBOL(llvm_gcda_emit_function); void llvm_gcda_emit_arcs(u32 num_counters, u64 *counters) { @@ -293,11 +292,16 @@ int gcov_info_is_compatible(struct gcov_info *info1, struct gcov_info *info2) !list_is_last(&fn_ptr2->head, &info2->functions)) { if (fn_ptr1->checksum != fn_ptr2->checksum) return false; +#if CONFIG_CLANG_VERSION < 110000 if (fn_ptr1->use_extra_checksum != fn_ptr2->use_extra_checksum) return false; if (fn_ptr1->use_extra_checksum && fn_ptr1->cfg_checksum != fn_ptr2->cfg_checksum) return false; +#else + if (fn_ptr1->cfg_checksum != fn_ptr2->cfg_checksum) + return false; +#endif fn_ptr1 = list_next_entry(fn_ptr1, head); fn_ptr2 = list_next_entry(fn_ptr2, head); } @@ -529,17 +533,22 @@ static size_t convert_to_gcda(char *buffer, struct gcov_info *info) list_for_each_entry(fi_ptr, &info->functions, head) { u32 i; - u32 len = 2; - - if (fi_ptr->use_extra_checksum) - len++; pos += store_gcov_u32(buffer, pos, GCOV_TAG_FUNCTION); - pos += store_gcov_u32(buffer, pos, len); +#if CONFIG_CLANG_VERSION < 110000 + pos += store_gcov_u32(buffer, pos, + fi_ptr->use_extra_checksum ? 3 : 2); +#else + pos += store_gcov_u32(buffer, pos, 3); +#endif pos += store_gcov_u32(buffer, pos, fi_ptr->ident); pos += store_gcov_u32(buffer, pos, fi_ptr->checksum); +#if CONFIG_CLANG_VERSION < 110000 if (fi_ptr->use_extra_checksum) pos += store_gcov_u32(buffer, pos, fi_ptr->cfg_checksum); +#else + pos += store_gcov_u32(buffer, pos, fi_ptr->cfg_checksum); +#endif pos += store_gcov_u32(buffer, pos, GCOV_TAG_COUNTER_BASE); pos += store_gcov_u32(buffer, pos, fi_ptr->num_counters * 2); From 90bd070aae6c4fb5d302f9c4b9c88be60c8197ec Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Fri, 9 Apr 2021 13:27:29 -0700 Subject: [PATCH 079/180] ocfs2: fix deadlock between setattr and dio_end_io_write The following deadlock is detected: truncate -> setattr path is waiting for pending direct IO to be done (inode->i_dio_count become zero) with inode->i_rwsem held (down_write). PID: 14827 TASK: ffff881686a9af80 CPU: 20 COMMAND: "ora_p005_hrltd9" #0 __schedule at ffffffff818667cc #1 schedule at ffffffff81866de6 #2 inode_dio_wait at ffffffff812a2d04 #3 ocfs2_setattr at ffffffffc05f322e [ocfs2] #4 notify_change at ffffffff812a5a09 #5 do_truncate at ffffffff812808f5 #6 do_sys_ftruncate.constprop.18 at ffffffff81280cf2 #7 sys_ftruncate at ffffffff81280d8e #8 do_syscall_64 at ffffffff81003949 #9 entry_SYSCALL_64_after_hwframe at ffffffff81a001ad dio completion path is going to complete one direct IO (decrement inode->i_dio_count), but before that it hung at locking inode->i_rwsem: #0 __schedule+700 at ffffffff818667cc #1 schedule+54 at ffffffff81866de6 #2 rwsem_down_write_failed+536 at ffffffff8186aa28 #3 call_rwsem_down_write_failed+23 at ffffffff8185a1b7 #4 down_write+45 at ffffffff81869c9d #5 ocfs2_dio_end_io_write+180 at ffffffffc05d5444 [ocfs2] #6 ocfs2_dio_end_io+85 at ffffffffc05d5a85 [ocfs2] #7 dio_complete+140 at ffffffff812c873c #8 dio_aio_complete_work+25 at ffffffff812c89f9 #9 process_one_work+361 at ffffffff810b1889 #10 worker_thread+77 at ffffffff810b233d #11 kthread+261 at ffffffff810b7fd5 #12 ret_from_fork+62 at ffffffff81a0035e Thus above forms ABBA deadlock. The same deadlock was mentioned in upstream commit 28f5a8a7c033 ("ocfs2: should wait dio before inode lock in ocfs2_setattr()"). It seems that that commit only removed the cluster lock (the victim of above dead lock) from the ABBA deadlock party. End-user visible effects: Process hang in truncate -> ocfs2_setattr path and other processes hang at ocfs2_dio_end_io_write path. This is to fix the deadlock itself. It removes inode_lock() call from dio completion path to remove the deadlock and add ip_alloc_sem lock in setattr path to synchronize the inode modifications. [wen.gang.wang@oracle.com: remove the "had_alloc_lock" as suggested] Link: https://lkml.kernel.org/r/20210402171344.1605-1-wen.gang.wang@oracle.com Link: https://lkml.kernel.org/r/20210331203654.3911-1-wen.gang.wang@oracle.com Signed-off-by: Wengang Wang Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/aops.c | 11 +---------- fs/ocfs2/file.c | 8 ++++++-- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 3bfb4147895a..ad20403b383f 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2295,7 +2295,7 @@ static int ocfs2_dio_end_io_write(struct inode *inode, struct ocfs2_alloc_context *meta_ac = NULL; handle_t *handle = NULL; loff_t end = offset + bytes; - int ret = 0, credits = 0, locked = 0; + int ret = 0, credits = 0; ocfs2_init_dealloc_ctxt(&dealloc); @@ -2306,13 +2306,6 @@ static int ocfs2_dio_end_io_write(struct inode *inode, !dwc->dw_orphaned) goto out; - /* ocfs2_file_write_iter will get i_mutex, so we need not lock if we - * are in that context. */ - if (dwc->dw_writer_pid != task_pid_nr(current)) { - inode_lock(inode); - locked = 1; - } - ret = ocfs2_inode_lock(inode, &di_bh, 1); if (ret < 0) { mlog_errno(ret); @@ -2393,8 +2386,6 @@ static int ocfs2_dio_end_io_write(struct inode *inode, if (meta_ac) ocfs2_free_alloc_context(meta_ac); ocfs2_run_deallocs(osb, &dealloc); - if (locked) - inode_unlock(inode); ocfs2_dio_free_write_ctx(inode, dwc); return ret; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 6611c64ca0be..5edc1d0cf115 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1245,22 +1245,24 @@ int ocfs2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, goto bail_unlock; } } + down_write(&OCFS2_I(inode)->ip_alloc_sem); handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS + 2 * ocfs2_quota_trans_credits(sb)); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); - goto bail_unlock; + goto bail_unlock_alloc; } status = __dquot_transfer(inode, transfer_to); if (status < 0) goto bail_commit; } else { + down_write(&OCFS2_I(inode)->ip_alloc_sem); handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); - goto bail_unlock; + goto bail_unlock_alloc; } } @@ -1273,6 +1275,8 @@ int ocfs2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, bail_commit: ocfs2_commit_trans(osb, handle); +bail_unlock_alloc: + up_write(&OCFS2_I(inode)->ip_alloc_sem); bail_unlock: if (status && inode_locked) { ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); From 7ad1e366167837daeb93d0bacb57dee820b0b898 Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Fri, 9 Apr 2021 13:27:32 -0700 Subject: [PATCH 080/180] ia64: fix user_stack_pointer() for ptrace() ia64 has two stacks: - memory stack (or stack), pointed at by by r12 - register backing store (register stack), pointed at by ar.bsp/ar.bspstore with complications around dirty register frame on CPU. In [1] Dmitry noticed that PTRACE_GET_SYSCALL_INFO returns the register stack instead memory stack. The bug comes from the fact that user_stack_pointer() and current_user_stack_pointer() don't return the same register: ulong user_stack_pointer(struct pt_regs *regs) { return regs->ar_bspstore; } #define current_user_stack_pointer() (current_pt_regs()->r12) The change gets both back in sync. I think ptrace(PTRACE_GET_SYSCALL_INFO) is the only affected user by this bug on ia64. The change fixes 'rt_sigreturn.gen.test' strace test where it was observed initially. Link: https://bugs.gentoo.org/769614 [1] Link: https://lkml.kernel.org/r/20210331084447.2561532-1-slyfox@gentoo.org Signed-off-by: Sergei Trofimovich Reported-by: Dmitry V. Levin Cc: Oleg Nesterov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/ptrace.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h index b3aa46090101..08179135905c 100644 --- a/arch/ia64/include/asm/ptrace.h +++ b/arch/ia64/include/asm/ptrace.h @@ -54,8 +54,7 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs) { - /* FIXME: should this be bspstore + nr_dirty regs? */ - return regs->ar_bspstore; + return regs->r12; } static inline int is_syscall_success(struct pt_regs *regs) @@ -79,11 +78,6 @@ static inline long regs_return_value(struct pt_regs *regs) unsigned long __ip = instruction_pointer(regs); \ (__ip & ~3UL) + ((__ip & 3UL) << 2); \ }) -/* - * Why not default? Because user_stack_pointer() on ia64 gives register - * stack backing store instead... - */ -#define current_user_stack_pointer() (current_pt_regs()->r12) /* given a pointer to a task_struct, return the user's pt_regs */ # define task_pt_regs(t) (((struct pt_regs *) ((char *) (t) + IA64_STK_OFFSET)) - 1) From df41872b68601059dd4a84858952dcae58acd331 Mon Sep 17 00:00:00 2001 From: Jack Qiu Date: Fri, 9 Apr 2021 13:27:35 -0700 Subject: [PATCH 081/180] fs: direct-io: fix missing sdio->boundary I encountered a hung task issue, but not a performance one. I run DIO on a device (need lba continuous, for example open channel ssd), maybe hungtask in below case: DIO: Checkpoint: get addr A(at boundary), merge into BIO, no submit because boundary missing flush dirty data(get addr A+1), wait IO(A+1) writeback timeout, because DIO(A) didn't submit get addr A+2 fail, because checkpoint is doing dio_send_cur_page() may clear sdio->boundary, so prevent it from missing a boundary. Link: https://lkml.kernel.org/r/20210322042253.38312-1-jack.qiu@huawei.com Fixes: b1058b981272 ("direct-io: submit bio after boundary buffer is added to it") Signed-off-by: Jack Qiu Reviewed-by: Jan Kara Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/direct-io.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/direct-io.c b/fs/direct-io.c index b61491bf3166..b2e86e739d7a 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -812,6 +812,7 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, struct buffer_head *map_bh) { int ret = 0; + int boundary = sdio->boundary; /* dio_send_cur_page may clear it */ if (dio->op == REQ_OP_WRITE) { /* @@ -850,10 +851,10 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, sdio->cur_page_fs_offset = sdio->block_in_file << sdio->blkbits; out: /* - * If sdio->boundary then we want to schedule the IO now to + * If boundary then we want to schedule the IO now to * avoid metadata seeks. */ - if (sdio->boundary) { + if (boundary) { ret = dio_send_cur_page(dio, sdio, map_bh); if (sdio->bio) dio_bio_submit(dio, sdio); From 06b1f85588948bd4c772845e5d6891b8f6082248 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 9 Apr 2021 13:27:38 -0700 Subject: [PATCH 082/180] kasan: fix conflict with page poisoning When page poisoning is enabled, it accesses memory that is marked as poisoned by KASAN, which leas to false-positive KASAN reports. Suppress the reports by adding KASAN annotations to unpoison_page() (poison_page() already has them). Link: https://lkml.kernel.org/r/2dc799014d31ac13fd97bd906bad33e16376fc67.1617118501.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Marco Elver Cc: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Andrey Konovalov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_poison.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/page_poison.c b/mm/page_poison.c index 65cdf844c8ad..655dc5895604 100644 --- a/mm/page_poison.c +++ b/mm/page_poison.c @@ -77,12 +77,14 @@ static void unpoison_page(struct page *page) void *addr; addr = kmap_atomic(page); + kasan_disable_current(); /* * Page poisoning when enabled poisons each and every page * that is freed to buddy. Thus no extra check is done to * see if a page was poisoned. */ - check_poison_mem(addr, PAGE_SIZE); + check_poison_mem(kasan_reset_tag(addr), PAGE_SIZE); + kasan_enable_current(); kunmap_atomic(addr); } From e156656717b810202914b77557de8112df4dad0d Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 9 Apr 2021 13:27:41 -0700 Subject: [PATCH 083/180] lib/test_kasan_module.c: suppress unused var warning Local `unused' is intentionally unused - it is there to suppress __must_check warnings. Reported-by: kernel test robot Link: https://lkml.kernel.org/r/202104050216.HflRxfJm-lkp@intel.com Cc: Marco Elver Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_kasan_module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_kasan_module.c b/lib/test_kasan_module.c index eee017ff8980..f1017f345d6c 100644 --- a/lib/test_kasan_module.c +++ b/lib/test_kasan_module.c @@ -22,7 +22,7 @@ static noinline void __init copy_user_test(void) char *kmem; char __user *usermem; size_t size = 10; - int unused; + int __maybe_unused unused; kmem = kmalloc(size, GFP_KERNEL); if (!kmem) From 6a77d38efcda40f555a920909eab22ee0917fd0d Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 9 Apr 2021 13:27:44 -0700 Subject: [PATCH 084/180] kfence, x86: fix preemptible warning on KPTI-enabled systems On systems with KPTI enabled, we can currently observe the following warning: BUG: using smp_processor_id() in preemptible caller is invalidate_user_asid+0x13/0x50 CPU: 6 PID: 1075 Comm: dmesg Not tainted 5.12.0-rc4-gda4a2b1a5479-kfence_1+ #1 Hardware name: Hewlett-Packard HP Pro 3500 Series/2ABF, BIOS 8.11 10/24/2012 Call Trace: dump_stack+0x7f/0xad check_preemption_disabled+0xc8/0xd0 invalidate_user_asid+0x13/0x50 flush_tlb_one_kernel+0x5/0x20 kfence_protect+0x56/0x80 ... While it normally makes sense to require preemption to be off, so that the expected CPU's TLB is flushed and not another, in our case it really is best-effort (see comments in kfence_protect_page()). Avoid the warning by disabling preemption around flush_tlb_one_kernel(). Link: https://lore.kernel.org/lkml/YGIDBAboELGgMgXy@elver.google.com/ Link: https://lkml.kernel.org/r/20210330065737.652669-1-elver@google.com Signed-off-by: Marco Elver Reported-by: Tomi Sarvela Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Andrey Konovalov Cc: Jann Horn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/kfence.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kfence.h b/arch/x86/include/asm/kfence.h index 97bbb4a9083a..05b48b33baf0 100644 --- a/arch/x86/include/asm/kfence.h +++ b/arch/x86/include/asm/kfence.h @@ -56,8 +56,13 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect) else set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); - /* Flush this CPU's TLB. */ + /* + * Flush this CPU's TLB, assuming whoever did the allocation/free is + * likely to continue running on this CPU. + */ + preempt_disable(); flush_tlb_one_kernel(addr); + preempt_enable(); return true; } From 7d37cb2c912dc5c25ffac784a4f9b98c06c6bd08 Mon Sep 17 00:00:00 2001 From: Julian Braha Date: Fri, 9 Apr 2021 13:27:47 -0700 Subject: [PATCH 085/180] lib: fix kconfig dependency on ARCH_WANT_FRAME_POINTERS When LATENCYTOP, LOCKDEP, or FAULT_INJECTION_STACKTRACE_FILTER is enabled and ARCH_WANT_FRAME_POINTERS is disabled, Kbuild gives a warning such as: WARNING: unmet direct dependencies detected for FRAME_POINTER Depends on [n]: DEBUG_KERNEL [=y] && (M68K || UML || SUPERH) || ARCH_WANT_FRAME_POINTERS [=n] || MCOUNT [=n] Selected by [y]: - LATENCYTOP [=y] && DEBUG_KERNEL [=y] && STACKTRACE_SUPPORT [=y] && PROC_FS [=y] && !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM && !ARC && !X86 Depending on ARCH_WANT_FRAME_POINTERS causes a recursive dependency error. ARCH_WANT_FRAME_POINTERS is to be selected by the architecture, and is not supposed to be overridden by other config options. Link: https://lkml.kernel.org/r/20210329165329.27994-1-julianbraha@gmail.com Signed-off-by: Julian Braha Cc: Andreas Schwab Cc: Geert Uytterhoeven Cc: Necip Fazil Yildiran Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2779c29d9981..417c3d3e521b 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1363,7 +1363,7 @@ config LOCKDEP bool depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT select STACKTRACE - select FRAME_POINTER if !MIPS && !PPC && !ARM && !S390 && !MICROBLAZE && !ARC && !X86 + depends on FRAME_POINTER || MIPS || PPC || S390 || MICROBLAZE || ARM || ARC || X86 select KALLSYMS select KALLSYMS_ALL @@ -1665,7 +1665,7 @@ config LATENCYTOP depends on DEBUG_KERNEL depends on STACKTRACE_SUPPORT depends on PROC_FS - select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM && !ARC && !X86 + depends on FRAME_POINTER || MIPS || PPC || S390 || MICROBLAZE || ARM || ARC || X86 select KALLSYMS select KALLSYMS_ALL select STACKTRACE @@ -1918,7 +1918,7 @@ config FAULT_INJECTION_STACKTRACE_FILTER depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT depends on !X86_64 select STACKTRACE - select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM && !ARC && !X86 + depends on FRAME_POINTER || MIPS || PPC || S390 || MICROBLAZE || ARM || ARC || X86 help Provide stacktrace filter for fault-injection capabilities From a2948b17f6b936fc52f86c0f92c46d2f91928b79 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Fri, 2 Apr 2021 14:55:55 +0530 Subject: [PATCH 086/180] libnvdimm/region: Fix nvdimm_has_flush() to handle ND_REGION_ASYNC In case a platform doesn't provide explicit flush-hints but provides an explicit flush callback via ND_REGION_ASYNC region flag, then nvdimm_has_flush() still returns '0' indicating that writes do not require flushing. This happens on PPC64 with patch at [1] applied, where 'deep_flush' of a region was denied even though an explicit flush function was provided. Fix this by adding a condition to nvdimm_has_flush() to test for the ND_REGION_ASYNC flag on the region and see if a 'region->flush' callback is assigned. Link: http://lore.kernel.org/r/161703936121.36.7260632399582101498.stgit@e1fbed493c87 [1] Fixes: c5d4355d10d4 ("libnvdimm: nd_region flush callback support") Reported-by: Shivaprasad G Bhat Signed-off-by: Vaibhav Jain Link: https://lore.kernel.org/r/20210402092555.208590-1-vaibhav@linux.ibm.com Signed-off-by: Dan Williams --- drivers/nvdimm/region_devs.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 51870eb51da6..9ccf3d608799 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -1246,6 +1246,11 @@ int nvdimm_has_flush(struct nd_region *nd_region) || !IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API)) return -ENXIO; + /* Test if an explicit flush function is defined */ + if (test_bit(ND_REGION_ASYNC, &nd_region->flags) && nd_region->flush) + return 1; + + /* Test if any flush hints for the region are available */ for (i = 0; i < nd_region->ndr_mappings; i++) { struct nd_mapping *nd_mapping = &nd_region->mapping[i]; struct nvdimm *nvdimm = nd_mapping->nvdimm; @@ -1256,8 +1261,8 @@ int nvdimm_has_flush(struct nd_region *nd_region) } /* - * The platform defines dimm devices without hints, assume - * platform persistence mechanism like ADR + * The platform defines dimm devices without hints nor explicit flush, + * assume platform persistence mechanism like ADR */ return 0; } From 53b74fa990bf76f290aa5930abfcf37424a1a865 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Thu, 8 Apr 2021 17:25:28 +0900 Subject: [PATCH 087/180] btrfs: zoned: move superblock logging zone location Moves the location of the superblock logging zones. The new locations of the logging zones are now determined based on fixed block addresses instead of on fixed zone numbers. The old placement method based on fixed zone numbers causes problems when one needs to inspect a file system image without access to the drive zone information. In such case, the super block locations cannot be reliably determined as the zone size is unknown. By locating the superblock logging zones using fixed addresses, we can scan a dumped file system image without the zone information since a super block copy will always be present at or after the fixed known locations. Introduce the following three pairs of zones containing fixed offset locations, regardless of the device zone size. - primary superblock: offset 0B (and the following zone) - first copy: offset 512G (and the following zone) - Second copy: offset 4T (4096G, and the following zone) If a logging zone is outside of the disk capacity, we do not record the superblock copy. The first copy position is much larger than for a non-zoned filesystem, which is at 64M. This is to avoid overlapping with the log zones for the primary superblock. This higher location is arbitrary but allows supporting devices with very large zone sizes, plus some space around in between. Such large zone size is unrealistic and very unlikely to ever be seen in real devices. Currently, SMR disks have a zone size of 256MB, and we are expecting ZNS drives to be in the 1-4GB range, so this limit gives us room to breathe. For now, we only allow zone sizes up to 8GB. The maximum zone size that would still fit in the space is 256G. The fixed location addresses are somewhat arbitrary, with the intent of maintaining superblock reliability for smaller and larger devices, with the preference for the latter. For this reason, there are two superblocks under the first 1T. This should cover use cases for physical devices and for emulated/device-mapper devices. The superblock logging zones are reserved for superblock logging and never used for data or metadata blocks. Note that we only reserve the two zones per primary/copy actually used for superblock logging. We do not reserve the ranges of zones possibly containing superblocks with the largest supported zone size (0-16GB, 512G-528GB, 4096G-4112G). The zones containing the fixed location offsets used to store superblocks on a non-zoned volume are also reserved to avoid confusion. Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 53 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 43948bd40e02..ba7a303300a3 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -21,9 +21,30 @@ /* Pseudo write pointer value for conventional zone */ #define WP_CONVENTIONAL ((u64)-2) +/* + * Location of the first zone of superblock logging zone pairs. + * + * - primary superblock: 0B (zone 0) + * - first copy: 512G (zone starting at that offset) + * - second copy: 4T (zone starting at that offset) + */ +#define BTRFS_SB_LOG_PRIMARY_OFFSET (0ULL) +#define BTRFS_SB_LOG_FIRST_OFFSET (512ULL * SZ_1G) +#define BTRFS_SB_LOG_SECOND_OFFSET (4096ULL * SZ_1G) + +#define BTRFS_SB_LOG_FIRST_SHIFT const_ilog2(BTRFS_SB_LOG_FIRST_OFFSET) +#define BTRFS_SB_LOG_SECOND_SHIFT const_ilog2(BTRFS_SB_LOG_SECOND_OFFSET) + /* Number of superblock log zones */ #define BTRFS_NR_SB_LOG_ZONES 2 +/* + * Maximum supported zone size. Currently, SMR disks have a zone size of + * 256MiB, and we are expecting ZNS drives to be in the 1-4GiB range. We do not + * expect the zone size to become larger than 8GiB in the near future. + */ +#define BTRFS_MAX_ZONE_SIZE SZ_8G + static int copy_zone_info_cb(struct blk_zone *zone, unsigned int idx, void *data) { struct blk_zone *zones = data; @@ -111,23 +132,22 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones, } /* - * The following zones are reserved as the circular buffer on ZONED btrfs. - * - The primary superblock: zones 0 and 1 - * - The first copy: zones 16 and 17 - * - The second copy: zones 1024 or zone at 256GB which is minimum, and - * the following one + * Get the first zone number of the superblock mirror */ static inline u32 sb_zone_number(int shift, int mirror) { - ASSERT(mirror < BTRFS_SUPER_MIRROR_MAX); + u64 zone; + ASSERT(mirror < BTRFS_SUPER_MIRROR_MAX); switch (mirror) { - case 0: return 0; - case 1: return 16; - case 2: return min_t(u64, btrfs_sb_offset(mirror) >> shift, 1024); + case 0: zone = 0; break; + case 1: zone = 1ULL << (BTRFS_SB_LOG_FIRST_SHIFT - shift); break; + case 2: zone = 1ULL << (BTRFS_SB_LOG_SECOND_SHIFT - shift); break; } - return 0; + ASSERT(zone <= U32_MAX); + + return (u32)zone; } /* @@ -300,10 +320,21 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) zone_sectors = bdev_zone_sectors(bdev); } - nr_sectors = bdev_nr_sectors(bdev); /* Check if it's power of 2 (see is_power_of_2) */ ASSERT(zone_sectors != 0 && (zone_sectors & (zone_sectors - 1)) == 0); zone_info->zone_size = zone_sectors << SECTOR_SHIFT; + + /* We reject devices with a zone size larger than 8GB */ + if (zone_info->zone_size > BTRFS_MAX_ZONE_SIZE) { + btrfs_err_in_rcu(fs_info, + "zoned: %s: zone size %llu larger than supported maximum %llu", + rcu_str_deref(device->name), + zone_info->zone_size, BTRFS_MAX_ZONE_SIZE); + ret = -EINVAL; + goto out; + } + + nr_sectors = bdev_nr_sectors(bdev); zone_info->zone_size_shift = ilog2(zone_info->zone_size); zone_info->max_zone_append_size = (u64)queue_max_zone_append_sectors(queue) << SECTOR_SHIFT; From b895bdf5d643b6feb7c60856326dd4feb6981560 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 9 Apr 2021 08:49:39 -0700 Subject: [PATCH 088/180] netfilter: nft_limit: avoid possible divide error in nft_limit_init div_u64() divides u64 by u32. nft_limit_init() wants to divide u64 by u64, use the appropriate math function (div64_u64) divide error: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 8390 Comm: syz-executor188 Not tainted 5.12.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:div_u64_rem include/linux/math64.h:28 [inline] RIP: 0010:div_u64 include/linux/math64.h:127 [inline] RIP: 0010:nft_limit_init+0x2a2/0x5e0 net/netfilter/nft_limit.c:85 Code: ef 4c 01 eb 41 0f 92 c7 48 89 de e8 38 a5 22 fa 4d 85 ff 0f 85 97 02 00 00 e8 ea 9e 22 fa 4c 0f af f3 45 89 ed 31 d2 4c 89 f0 <49> f7 f5 49 89 c6 e8 d3 9e 22 fa 48 8d 7d 48 48 b8 00 00 00 00 00 RSP: 0018:ffffc90009447198 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000200000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff875152e6 RDI: 0000000000000003 RBP: ffff888020f80908 R08: 0000200000000000 R09: 0000000000000000 R10: ffffffff875152d8 R11: 0000000000000000 R12: ffffc90009447270 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 FS: 000000000097a300(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000200001c4 CR3: 0000000026a52000 CR4: 00000000001506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: nf_tables_newexpr net/netfilter/nf_tables_api.c:2675 [inline] nft_expr_init+0x145/0x2d0 net/netfilter/nf_tables_api.c:2713 nft_set_elem_expr_alloc+0x27/0x280 net/netfilter/nf_tables_api.c:5160 nf_tables_newset+0x1997/0x3150 net/netfilter/nf_tables_api.c:4321 nfnetlink_rcv_batch+0x85a/0x21b0 net/netfilter/nfnetlink.c:456 nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:580 [inline] nfnetlink_rcv+0x3af/0x420 net/netfilter/nfnetlink.c:598 netlink_unicast_kernel net/netlink/af_netlink.c:1312 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1338 netlink_sendmsg+0x856/0xd90 net/netlink/af_netlink.c:1927 sock_sendmsg_nosec net/socket.c:654 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:674 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2350 ___sys_sendmsg+0xf3/0x170 net/socket.c:2404 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2433 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: c26844eda9d4 ("netfilter: nf_tables: Fix nft limit burst handling") Fixes: 3e0f64b7dd31 ("netfilter: nft_limit: fix packet ratelimiting") Signed-off-by: Eric Dumazet Diagnosed-by: Luigi Rizzo Reported-by: syzbot Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_limit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 0e2c315c3b5e..82ec27bdf941 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -76,13 +76,13 @@ static int nft_limit_init(struct nft_limit *limit, return -EOVERFLOW; if (pkts) { - tokens = div_u64(limit->nsecs, limit->rate) * limit->burst; + tokens = div64_u64(limit->nsecs, limit->rate) * limit->burst; } else { /* The token bucket size limits the number of tokens can be * accumulated. tokens_max specifies the bucket size. * tokens_max = unit * (rate + burst) / rate. */ - tokens = div_u64(limit->nsecs * (limit->rate + limit->burst), + tokens = div64_u64(limit->nsecs * (limit->rate + limit->burst), limit->rate); } From 7ee3c61dcd28bf6e290e06ad382f13511dc790e9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 7 Apr 2021 21:43:39 +0200 Subject: [PATCH 089/180] netfilter: bridge: add pre_exit hooks for ebtable unregistration Just like ip/ip6/arptables, the hooks have to be removed, then synchronize_rcu() has to be called to make sure no more packets are being processed before the ruleset data is released. Place the hook unregistration in the pre_exit hook, then call the new ebtables pre_exit function from there. Years ago, when first netns support got added for netfilter+ebtables, this used an older (now removed) netfilter hook unregister API, that did a unconditional synchronize_rcu(). Now that all is done with call_rcu, ebtable_{filter,nat,broute} pernet exit handlers may free the ebtable ruleset while packets are still in flight. This can only happens on module removal, not during netns exit. The new function expects the table name, not the table struct. This is because upcoming patch set (targeting -next) will remove all net->xt.{nat,filter,broute}_table instances, this makes it necessary to avoid external references to those member variables. The existing APIs will be converted, so follow the upcoming scheme of passing name + hook type instead. Fixes: aee12a0a3727e ("ebtables: remove nf_hook_register usage") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge/ebtables.h | 5 ++-- net/bridge/netfilter/ebtable_broute.c | 8 +++++- net/bridge/netfilter/ebtable_filter.c | 8 +++++- net/bridge/netfilter/ebtable_nat.c | 8 +++++- net/bridge/netfilter/ebtables.c | 30 ++++++++++++++++++++--- 5 files changed, 51 insertions(+), 8 deletions(-) diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 2f5c4e6ecd8a..3a956145a25c 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -110,8 +110,9 @@ extern int ebt_register_table(struct net *net, const struct ebt_table *table, const struct nf_hook_ops *ops, struct ebt_table **res); -extern void ebt_unregister_table(struct net *net, struct ebt_table *table, - const struct nf_hook_ops *); +extern void ebt_unregister_table(struct net *net, struct ebt_table *table); +void ebt_unregister_table_pre_exit(struct net *net, const char *tablename, + const struct nf_hook_ops *ops); extern unsigned int ebt_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct ebt_table *table); diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 66e7af165494..32bc2821027f 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -105,14 +105,20 @@ static int __net_init broute_net_init(struct net *net) &net->xt.broute_table); } +static void __net_exit broute_net_pre_exit(struct net *net) +{ + ebt_unregister_table_pre_exit(net, "broute", &ebt_ops_broute); +} + static void __net_exit broute_net_exit(struct net *net) { - ebt_unregister_table(net, net->xt.broute_table, &ebt_ops_broute); + ebt_unregister_table(net, net->xt.broute_table); } static struct pernet_operations broute_net_ops = { .init = broute_net_init, .exit = broute_net_exit, + .pre_exit = broute_net_pre_exit, }; static int __init ebtable_broute_init(void) diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 78cb9b21022d..bcf982e12f16 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -99,14 +99,20 @@ static int __net_init frame_filter_net_init(struct net *net) &net->xt.frame_filter); } +static void __net_exit frame_filter_net_pre_exit(struct net *net) +{ + ebt_unregister_table_pre_exit(net, "filter", ebt_ops_filter); +} + static void __net_exit frame_filter_net_exit(struct net *net) { - ebt_unregister_table(net, net->xt.frame_filter, ebt_ops_filter); + ebt_unregister_table(net, net->xt.frame_filter); } static struct pernet_operations frame_filter_net_ops = { .init = frame_filter_net_init, .exit = frame_filter_net_exit, + .pre_exit = frame_filter_net_pre_exit, }; static int __init ebtable_filter_init(void) diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 0888936ef853..0d092773f816 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -99,14 +99,20 @@ static int __net_init frame_nat_net_init(struct net *net) &net->xt.frame_nat); } +static void __net_exit frame_nat_net_pre_exit(struct net *net) +{ + ebt_unregister_table_pre_exit(net, "nat", ebt_ops_nat); +} + static void __net_exit frame_nat_net_exit(struct net *net) { - ebt_unregister_table(net, net->xt.frame_nat, ebt_ops_nat); + ebt_unregister_table(net, net->xt.frame_nat); } static struct pernet_operations frame_nat_net_ops = { .init = frame_nat_net_init, .exit = frame_nat_net_exit, + .pre_exit = frame_nat_net_pre_exit, }; static int __init ebtable_nat_init(void) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index ebe33b60efd6..d481ff24a150 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1232,10 +1232,34 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, return ret; } -void ebt_unregister_table(struct net *net, struct ebt_table *table, - const struct nf_hook_ops *ops) +static struct ebt_table *__ebt_find_table(struct net *net, const char *name) +{ + struct ebt_table *t; + + mutex_lock(&ebt_mutex); + + list_for_each_entry(t, &net->xt.tables[NFPROTO_BRIDGE], list) { + if (strcmp(t->name, name) == 0) { + mutex_unlock(&ebt_mutex); + return t; + } + } + + mutex_unlock(&ebt_mutex); + return NULL; +} + +void ebt_unregister_table_pre_exit(struct net *net, const char *name, const struct nf_hook_ops *ops) +{ + struct ebt_table *table = __ebt_find_table(net, name); + + if (table) + nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); +} +EXPORT_SYMBOL(ebt_unregister_table_pre_exit); + +void ebt_unregister_table(struct net *net, struct ebt_table *table) { - nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); __ebt_unregister_table(net, table); } From d163a925ebbc6eb5b562b0f1d72c7e817aa75c40 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 7 Apr 2021 21:43:40 +0200 Subject: [PATCH 090/180] netfilter: arp_tables: add pre_exit hook for table unregister Same problem that also existed in iptables/ip(6)tables, when arptable_filter is removed there is no longer a wait period before the table/ruleset is free'd. Unregister the hook in pre_exit, then remove the table in the exit function. This used to work correctly because the old nf_hook_unregister API did unconditional synchronize_net. The per-net hook unregister function uses call_rcu instead. Fixes: b9e69e127397 ("netfilter: xtables: don't hook tables by default") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_arp/arp_tables.h | 5 +++-- net/ipv4/netfilter/arp_tables.c | 9 +++++++-- net/ipv4/netfilter/arptable_filter.c | 10 +++++++++- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 7d3537c40ec9..26a13294318c 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -52,8 +52,9 @@ extern void *arpt_alloc_initial_table(const struct xt_table *); int arpt_register_table(struct net *net, const struct xt_table *table, const struct arpt_replace *repl, const struct nf_hook_ops *ops, struct xt_table **res); -void arpt_unregister_table(struct net *net, struct xt_table *table, - const struct nf_hook_ops *ops); +void arpt_unregister_table(struct net *net, struct xt_table *table); +void arpt_unregister_table_pre_exit(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops); extern unsigned int arpt_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct xt_table *table); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index d1e04d2b5170..6c26533480dd 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1539,10 +1539,15 @@ int arpt_register_table(struct net *net, return ret; } -void arpt_unregister_table(struct net *net, struct xt_table *table, - const struct nf_hook_ops *ops) +void arpt_unregister_table_pre_exit(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops) { nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); +} +EXPORT_SYMBOL(arpt_unregister_table_pre_exit); + +void arpt_unregister_table(struct net *net, struct xt_table *table) +{ __arpt_unregister_table(net, table); } diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index c216b9ad3bb2..6c300ba5634e 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -56,16 +56,24 @@ static int __net_init arptable_filter_table_init(struct net *net) return err; } +static void __net_exit arptable_filter_net_pre_exit(struct net *net) +{ + if (net->ipv4.arptable_filter) + arpt_unregister_table_pre_exit(net, net->ipv4.arptable_filter, + arpfilter_ops); +} + static void __net_exit arptable_filter_net_exit(struct net *net) { if (!net->ipv4.arptable_filter) return; - arpt_unregister_table(net, net->ipv4.arptable_filter, arpfilter_ops); + arpt_unregister_table(net, net->ipv4.arptable_filter); net->ipv4.arptable_filter = NULL; } static struct pernet_operations arptable_filter_net_ops = { .exit = arptable_filter_net_exit, + .pre_exit = arptable_filter_net_pre_exit, }; static int __init arptable_filter_init(void) From d434405aaab7d0ebc516b68a8fc4100922d7f5ef Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 11 Apr 2021 15:16:13 -0700 Subject: [PATCH 091/180] Linux 5.12-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cc77fd45ca64..4730cf156f6b 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Frozen Wasteland # *DOCUMENTATION* From d2bd44c4c05d043fb65cfdf26c54e6d8b94a4b41 Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Sun, 28 Feb 2021 20:08:28 +0100 Subject: [PATCH 092/180] m68k: fix flatmem memory model setup Detected a broken boot on mcf54415, likely introduced from commit 4bfc848e0981 ("m68k/mm: enable use of generic memory_model.h for !DISCONTIGMEM") Fix ARCH_PFN_OFFSET to be a pfn. Signed-off-by: Angelo Dureghello Acked-by: Mike Rapoport Signed-off-by: Greg Ungerer --- arch/m68k/include/asm/page_mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h index 9e8f0cc30a2c..2411ea9ef578 100644 --- a/arch/m68k/include/asm/page_mm.h +++ b/arch/m68k/include/asm/page_mm.h @@ -167,7 +167,7 @@ static inline __attribute_const__ int __virt_to_node_shift(void) ((__p) - pgdat->node_mem_map) + pgdat->node_start_pfn; \ }) #else -#define ARCH_PFN_OFFSET (m68k_memory[0].addr) +#define ARCH_PFN_OFFSET (m68k_memory[0].addr >> PAGE_SHIFT) #include #endif From 4af2178ac605faf32ebe638f7ac17d841d40ea9b Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Fri, 9 Apr 2021 17:11:45 +0800 Subject: [PATCH 093/180] MAINTAINERS: update maintainer entry for freescale fec driver Update maintainer entry for freescale fec driver. Suggested-by: Heiner Kallweit Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index ccd9228350cf..163264c282eb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7089,7 +7089,7 @@ S: Maintained F: drivers/i2c/busses/i2c-cpm.c FREESCALE IMX / MXC FEC DRIVER -M: Fugang Duan +M: Joakim Zhang L: netdev@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/fsl-fec.txt From 31457db3750c0b0ed229d836f2609fdb8a5b790e Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 11 Apr 2021 11:02:08 +0200 Subject: [PATCH 094/180] net: davicom: Fix regulator not turned off on failed probe When the probe fails, we must disable the regulator that was previously enabled. This patch is a follow-up to commit ac88c531a5b3 ("net: davicom: Fix regulator not turned off on failed probe") which missed one case. Fixes: 7994fe55a4a2 ("dm9000: Add regulator and reset support to dm9000") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/davicom/dm9000.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index 252adfa5d837..8a9096aa85cd 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1471,8 +1471,10 @@ dm9000_probe(struct platform_device *pdev) /* Init network device */ ndev = alloc_etherdev(sizeof(struct board_info)); - if (!ndev) - return -ENOMEM; + if (!ndev) { + ret = -ENOMEM; + goto out_regulator_disable; + } SET_NETDEV_DEV(ndev, &pdev->dev); From 6628ddfec7580882f11fdc5c194a8ea781fdadfa Mon Sep 17 00:00:00 2001 From: Phillip Potter Date: Sun, 11 Apr 2021 12:28:24 +0100 Subject: [PATCH 095/180] net: geneve: check skb is large enough for IPv4/IPv6 header Check within geneve_xmit_skb/geneve6_xmit_skb that sk_buff structure is large enough to include IPv4 or IPv6 header, and reject if not. The geneve_xmit_skb portion and overall idea was contributed by Eric Dumazet. Fixes a KMSAN-found uninit-value bug reported by syzbot at: https://syzkaller.appspot.com/bug?id=abe95dc3e3e9667fc23b8d81f29ecad95c6f106f Suggested-by: Eric Dumazet Reported-by: syzbot+2e406a9ac75bb71d4b7a@syzkaller.appspotmail.com Signed-off-by: Phillip Potter Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/geneve.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index d5b1e48e0c09..42f31c681846 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -891,6 +891,9 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; + if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) + return -EINVAL; + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info, geneve->cfg.info.key.tp_dst, sport); @@ -985,6 +988,9 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; + if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) + return -EINVAL; + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info, geneve->cfg.info.key.tp_dst, sport); From ea941ac294d75d0ace50797aebf0056f6f8f7a7f Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 16 Feb 2021 17:13:42 -0700 Subject: [PATCH 096/180] dmaengine: idxd: Fix clobbering of SWERR overflow bit on writeback Current code blindly writes over the SWERR and the OVERFLOW bits. Write back the bits actually read instead so the driver avoids clobbering the OVERFLOW bit that comes after the register is read. Fixes: bfe1d56091c1 ("dmaengine: idxd: Init and probe for Intel data accelerators") Reported-by: Sanjay Kumar Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161352082229.3511254.1002151220537623503.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/irq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index a60ca11a5784..f1463fc58112 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -124,7 +124,9 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) for (i = 0; i < 4; i++) idxd->sw_err.bits[i] = ioread64(idxd->reg_base + IDXD_SWERR_OFFSET + i * sizeof(u64)); - iowrite64(IDXD_SWERR_ACK, idxd->reg_base + IDXD_SWERR_OFFSET); + + iowrite64(idxd->sw_err.bits[0] & IDXD_SWERR_ACK, + idxd->reg_base + IDXD_SWERR_OFFSET); if (idxd->sw_err.valid && idxd->sw_err.wq_idx_valid) { int id = idxd->sw_err.wq_idx; From 4ac823e9cd85f66da274c951d21bf9f6b714b729 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 22 Mar 2021 16:36:25 -0700 Subject: [PATCH 097/180] dmaengine: idxd: fix delta_rec and crc size field for completion record The delta_rec_size and crc_val in the completion record should be 32bits and not 16bits. Fixes: bfe1d56091c1 ("dmaengine: idxd: Init and probe for Intel data accelerators") Reported-by: Nikhil Rao Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161645618572.2003490.14466173451736323035.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 236d437947bc..e33997b4d750 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -247,8 +247,8 @@ struct dsa_completion_record { uint32_t rsvd2:8; }; - uint16_t delta_rec_size; - uint16_t crc_val; + uint32_t delta_rec_size; + uint32_t crc_val; /* DIF check & strip */ struct { From ea6a5735d2a61b938a302eb3629272342a9e7c46 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 22 Mar 2021 16:37:29 -0700 Subject: [PATCH 098/180] dmaengine: idxd: fix opcap sysfs attribute output The operation capability register is 256bits. The current output only prints out the first 64bits. Fix to output the entire 256bits. The current code omits operation caps from IAX devices. Fixes: c52ca478233c ("dmaengine: idxd: add configuration component of driver") Reported-by: Lucas Van Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161645624963.2003736.829798666998490151.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/sysfs.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 4dbb03c545e4..c27ca01cf8b2 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1449,8 +1449,14 @@ static ssize_t op_cap_show(struct device *dev, { struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev); + int i, rc = 0; - return sprintf(buf, "%#llx\n", idxd->hw.opcap.bits[0]); + for (i = 0; i < 4; i++) + rc += sysfs_emit_at(buf, rc, "%#llx ", idxd->hw.opcap.bits[i]); + + rc--; + rc += sysfs_emit_at(buf, rc, "\n"); + return rc; } static DEVICE_ATTR_RO(op_cap); From 0fff71c5a311e1264988179f7dcc217fda15fadd Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 7 Apr 2021 12:59:47 -0700 Subject: [PATCH 099/180] dmaengine: idxd: fix wq size store permission state WQ size can only be changed when the device is disabled. Current code allows change when device is enabled but wq is disabled. Change the check to detect device state. Fixes: c52ca478233c ("dmaengine: idxd: add configuration component of driver") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161782558755.107710.18138252584838406025.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index c27ca01cf8b2..5f7bc4b1621a 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -989,7 +989,7 @@ static ssize_t wq_size_store(struct device *dev, if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) return -EPERM; - if (wq->state != IDXD_WQ_DISABLED) + if (idxd->state == IDXD_DEV_ENABLED) return -EPERM; if (size + total_claimed_wq_size(idxd) - wq->size > idxd->max_wq_size) From 88cd1d6191b13689094310c2405394e4ce36d061 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 24 Mar 2021 16:17:57 +0200 Subject: [PATCH 100/180] dmaengine: dw: Make it dependent to HAS_IOMEM Some architectures do not provide devm_*() APIs. Hence make the driver dependent on HAVE_IOMEM. Fixes: dbde5c2934d1 ("dw_dmac: use devm_* functions to simplify code") Reported-by: kernel test robot Signed-off-by: Andy Shevchenko Acked-by: Viresh Kumar Link: https://lore.kernel.org/r/20210324141757.24710-1-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- drivers/dma/dw/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/dw/Kconfig b/drivers/dma/dw/Kconfig index e5162690de8f..db25f9b7778c 100644 --- a/drivers/dma/dw/Kconfig +++ b/drivers/dma/dw/Kconfig @@ -10,6 +10,7 @@ config DW_DMAC_CORE config DW_DMAC tristate "Synopsys DesignWare AHB DMA platform driver" + depends on HAS_IOMEM select DW_DMAC_CORE help Support the Synopsys DesignWare AHB DMA controller. This @@ -18,6 +19,7 @@ config DW_DMAC config DW_DMAC_PCI tristate "Synopsys DesignWare AHB DMA PCI driver" depends on PCI + depends on HAS_IOMEM select DW_DMAC_CORE help Support the Synopsys DesignWare AHB DMA controller on the From ea45b6008f8095db0cc09ad6e03c7785c2986197 Mon Sep 17 00:00:00 2001 From: Lv Yunlong Date: Tue, 30 Mar 2021 18:44:58 -0700 Subject: [PATCH 101/180] dmaengine: Fix a double free in dma_async_device_register In the first list_for_each_entry() macro of dma_async_device_register, it gets the chan from list and calls __dma_async_device_channel_register (..,chan). We can see that chan->local is allocated by alloc_percpu() and it is freed chan->local by free_percpu(chan->local) when __dma_async_device_channel_register() failed. But after __dma_async_device_channel_register() failed, the caller will goto err_out and freed the chan->local in the second time by free_percpu(). The cause of this problem is forget to set chan->local to NULL when chan->local was freed in __dma_async_device_channel_register(). My patch sets chan->local to NULL when the callee failed to avoid double free. Fixes: d2fb0a0438384 ("dmaengine: break out channel registration") Signed-off-by: Lv Yunlong Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20210331014458.3944-1-lyl2019@mail.ustc.edu.cn Signed-off-by: Vinod Koul --- drivers/dma/dmaengine.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index fe6a460c4373..af3ee288bc11 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -1086,6 +1086,7 @@ static int __dma_async_device_channel_register(struct dma_device *device, kfree(chan->dev); err_free_local: free_percpu(chan->local); + chan->local = NULL; return rc; } From 917a3200b9f467a154999c7572af345f2470aaf4 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Fri, 9 Apr 2021 16:28:05 +0800 Subject: [PATCH 102/180] dmaengine: tegra20: Fix runtime PM imbalance on error pm_runtime_get_sync() will increase the runtime PM counter even it returns an error. Thus a pairing decrement is needed to prevent refcount leak. Fix this by replacing this API with pm_runtime_resume_and_get(), which will not change the runtime PM counter on error. Signed-off-by: Dinghao Liu Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20210409082805.23643-1-dinghao.liu@zju.edu.cn Signed-off-by: Vinod Koul --- drivers/dma/tegra20-apb-dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index 71827d9b0aa1..b7260749e8ee 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -723,7 +723,7 @@ static void tegra_dma_issue_pending(struct dma_chan *dc) goto end; } if (!tdc->busy) { - err = pm_runtime_get_sync(tdc->tdma->dev); + err = pm_runtime_resume_and_get(tdc->tdma->dev); if (err < 0) { dev_err(tdc2dev(tdc), "Failed to enable DMA\n"); goto end; @@ -818,7 +818,7 @@ static void tegra_dma_synchronize(struct dma_chan *dc) struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); int err; - err = pm_runtime_get_sync(tdc->tdma->dev); + err = pm_runtime_resume_and_get(tdc->tdma->dev); if (err < 0) { dev_err(tdc2dev(tdc), "Failed to synchronize DMA: %d\n", err); return; From 07503e6aefe4a6efd777062191944a14f03b3a18 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 23 Mar 2021 16:19:59 +0300 Subject: [PATCH 103/180] dmaengine: plx_dma: add a missing put_device() on error path Add a missing put_device(&pdev->dev) if the call to dma_async_device_register(dma); fails. Fixes: 905ca51e63be ("dmaengine: plx-dma: Introduce PLX DMA engine PCI driver skeleton") Signed-off-by: Dan Carpenter Reviewed-by: Logan Gunthorpe Link: https://lore.kernel.org/r/YFnq/0IQzixtAbC1@mwanda Signed-off-by: Vinod Koul --- drivers/dma/plx_dma.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/dma/plx_dma.c b/drivers/dma/plx_dma.c index f387c5bbc170..166934544161 100644 --- a/drivers/dma/plx_dma.c +++ b/drivers/dma/plx_dma.c @@ -507,10 +507,8 @@ static int plx_dma_create(struct pci_dev *pdev) rc = request_irq(pci_irq_vector(pdev, 0), plx_dma_isr, 0, KBUILD_MODNAME, plxdev); - if (rc) { - kfree(plxdev); - return rc; - } + if (rc) + goto free_plx; spin_lock_init(&plxdev->ring_lock); tasklet_setup(&plxdev->desc_task, plx_dma_desc_task); @@ -540,14 +538,20 @@ static int plx_dma_create(struct pci_dev *pdev) rc = dma_async_device_register(dma); if (rc) { pci_err(pdev, "Failed to register dma device: %d\n", rc); - free_irq(pci_irq_vector(pdev, 0), plxdev); - kfree(plxdev); - return rc; + goto put_device; } pci_set_drvdata(pdev, plxdev); return 0; + +put_device: + put_device(&pdev->dev); + free_irq(pci_irq_vector(pdev, 0), plxdev); +free_plx: + kfree(plxdev); + + return rc; } static int plx_dma_probe(struct pci_dev *pdev, From b74e409ea1b18128b877a50883d92a12eba83c33 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 9 Apr 2021 00:13:18 +0200 Subject: [PATCH 104/180] s390/entry: avoid setting up backchain in ext|io handlers Currently when interrupt arrives to cpu while in kernel context INT_HANDLER macro (used for ext_int_handler and io_int_handler) allocates new stack frame and pt_regs on the kernel stack and sets up the backchain to jump over the pt_regs to the frame which has been interrupted. This is not ideal to two reasons: 1. This hides the fact that kernel stack contains interrupt frame in it and hence breaks arch_stack_walk_reliable(), which needs to know that to guarantee "reliability" and checks that there are no pt_regs on the way. 2. It breaks the backchain unwinder logic, which assumes that the next stack frame after an interrupt frame is reliable, while it is not. In some cases (when r14 contains garbage) this leads to early unwinding termination with an error, instead of marking frame as unreliable and continuing. To address that, only set backchain to 0. Fixes: 56e62a737028 ("s390: convert to generic entry") Reviewed-by: Sven Schnelle Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/kernel/entry.S | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index c10b9f31eef7..235bf2ac3359 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -401,15 +401,13 @@ ENTRY(\name) brasl %r14,.Lcleanup_sie_int #endif 0: CHECK_STACK __LC_SAVE_AREA_ASYNC - lgr %r11,%r15 aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - stg %r11,__SF_BACKCHAIN(%r15) j 2f 1: BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP lctlg %c1,%c1,__LC_KERNEL_ASCE lg %r15,__LC_KERNEL_STACK - xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) -2: la %r11,STACK_FRAME_OVERHEAD(%r15) +2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) + la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) # clear user controlled registers to prevent speculative use xgr %r0,%r0 From a994eddb947ea9ebb7b14d9a1267001699f0a136 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 9 Apr 2021 00:15:21 +0200 Subject: [PATCH 105/180] s390/entry: save the caller of psw_idle Currently psw_idle does not allocate a stack frame and does not save its r14 and r15 into the save area. Even though this is valid from call ABI point of view, because psw_idle does not make any calls explicitly, in reality psw_idle is an entry point for controlled transition into serving interrupts. So, in practice, psw_idle stack frame is analyzed during stack unwinding. Depending on build options that r14 slot in the save area of psw_idle might either contain a value saved by previous sibling call or complete garbage. [task 0000038000003c28] do_ext_irq+0xd6/0x160 [task 0000038000003c78] ext_int_handler+0xba/0xe8 [task *0000038000003dd8] psw_idle_exit+0x0/0x8 <-- pt_regs ([task 0000038000003dd8] 0x0) [task 0000038000003e10] default_idle_call+0x42/0x148 [task 0000038000003e30] do_idle+0xce/0x160 [task 0000038000003e70] cpu_startup_entry+0x36/0x40 [task 0000038000003ea0] arch_call_rest_init+0x76/0x80 So, to make a stacktrace nicer and actually point for the real caller of psw_idle in this frequently occurring case, make psw_idle save its r14. [task 0000038000003c28] do_ext_irq+0xd6/0x160 [task 0000038000003c78] ext_int_handler+0xba/0xe8 [task *0000038000003dd8] psw_idle_exit+0x0/0x6 <-- pt_regs ([task 0000038000003dd8] arch_cpu_idle+0x3c/0xd0) [task 0000038000003e10] default_idle_call+0x42/0x148 [task 0000038000003e30] do_idle+0xce/0x160 [task 0000038000003e70] cpu_startup_entry+0x36/0x40 [task 0000038000003ea0] arch_call_rest_init+0x76/0x80 Reviewed-by: Sven Schnelle Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/kernel/entry.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 235bf2ac3359..12de7a9c85b3 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -443,6 +443,7 @@ INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq * Load idle PSW. */ ENTRY(psw_idle) + stg %r14,(__SF_GPRS+8*8)(%r15) stg %r3,__SF_EMPTY(%r15) larl %r1,psw_idle_exit stg %r1,__SF_EMPTY+8(%r15) From 11664169981a025b7f6072d136ac724294b7b65c Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 18 Mar 2021 13:02:02 -0400 Subject: [PATCH 106/180] drm/i915/dpcd_bl: Don't try vesa interface unless specified by VBT Looks like that there actually are another subset of laptops on the market that don't support the Intel HDR backlight interface, but do advertise support for the VESA DPCD backlight interface despite the fact it doesn't seem to work. Note though I'm not entirely clear on this - on one of the machines where this issue was observed, I also noticed that we appeared to be rejecting the VBT defined backlight frequency in intel_dp_aux_vesa_calc_max_backlight(). It's noted in this function that: /* Use highest possible value of Pn for more granularity of brightness * adjustment while satifying the conditions below. * ... * - FxP is within 25% of desired value. * Note: 25% is arbitrary value and may need some tweak. */ So it's possible that this value might just need to be tweaked, but for now let's just disable the VESA backlight interface unless it's specified in the VBT just to be safe. We might be able to try enabling this again by default in the future. Fixes: 2227816e647a ("drm/i915/dp: Allow forcing specific interfaces through enable_dpcd_backlight") Cc: Jani Nikula Cc: Rodrigo Vivi Bugzilla: https://gitlab.freedesktop.org/drm/intel/-/issues/3169 Signed-off-by: Lyude Paul Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210318170204.513000-1-lyude@redhat.com (cherry picked from commit 9e2eb6d5380e9dadcd2baecb51f238e5eba94bee) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index 651884390137..4f8337c7fd2e 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -646,7 +646,6 @@ int intel_dp_aux_init_backlight_funcs(struct intel_connector *connector) break; case INTEL_BACKLIGHT_DISPLAY_DDI: try_intel_interface = true; - try_vesa_interface = true; break; default: return -ENODEV; From bf52dc49ba0101f648b4c3ea26b812061406b0d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 27 Mar 2021 02:59:45 +0200 Subject: [PATCH 107/180] drm/i915: Don't zero out the Y plane's watermarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't zero out the watermarks for the Y plane since we've already computed them when computing the UV plane's watermarks (since the UV plane always appears before ethe Y plane when iterating through the planes). This leads to allocating no DDB for the Y plane since .min_ddb_alloc also gets zeroed. And that of course leads to underruns when scanning out planar formats. Cc: stable@vger.kernel.org Cc: Stanislav Lisovskiy Fixes: dbf71381d733 ("drm/i915: Nuke intel_atomic_crtc_state_for_each_plane_state() from skl+ wm code") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210327005945.4929-1-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy (cherry picked from commit f99b805fb9413ff007ca0b6add871737664117dd) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 97b57acc02e2..4b4d8d034782 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5471,12 +5471,12 @@ static int icl_build_plane_wm(struct intel_crtc_state *crtc_state, struct skl_plane_wm *wm = &crtc_state->wm.skl.raw.planes[plane_id]; int ret; - memset(wm, 0, sizeof(*wm)); - /* Watermarks calculated in master */ if (plane_state->planar_slave) return 0; + memset(wm, 0, sizeof(*wm)); + if (plane_state->planar_linked_plane) { const struct drm_framebuffer *fb = plane_state->hw.fb; enum plane_id y_plane_id = plane_state->planar_linked_plane->id; From aee6f25e9c911323aa89a200e1bb160c1613ed3d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 25 Mar 2021 12:48:22 +0100 Subject: [PATCH 108/180] drm/i915/display/vlv_dsi: Do not skip panel_pwr_cycle_delay when disabling the panel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the recently added commit fe0f1e3bfdfe ("drm/i915: Shut down displays gracefully on reboot"), the DSI panel on a Cherry Trail based Predia Basic tablet would no longer properly light up after reboot. I've managed to reproduce this without rebooting by doing: chvt 3; echo 1 > /sys/class/graphics/fb0/blank;\ echo 0 > /sys/class/graphics/fb0/blank Which rapidly turns the panel off and back on again. The vlv_dsi.c code uses an intel_dsi_msleep() helper for the various delays used for panel on/off, since starting with MIPI-sequences version >= 3 the delays are already included inside the MIPI-sequences. The problems exposed by the "Shut down displays gracefully on reboot" change, show that using this helper for the panel_pwr_cycle_delay is not the right thing to do. This has not been noticed until now because normally the panel never is cycled off and directly on again in quick succession. Change the msleep for the panel_pwr_cycle_delay to a normal msleep() call to avoid the panel staying black after a quick off + on cycle. Cc: Ville Syrjälä Fixes: fe0f1e3bfdfe ("drm/i915: Shut down displays gracefully on reboot") Signed-off-by: Hans de Goede Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210325114823.44922-1-hdegoede@redhat.com (cherry picked from commit 2878b29fc25a0dac0e1c6c94177f07c7f94240f0) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/vlv_dsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index f94025ec603a..a9a8ba1d3aba 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -992,14 +992,14 @@ static void intel_dsi_post_disable(struct intel_atomic_state *state, * FIXME As we do with eDP, just make a note of the time here * and perform the wait before the next panel power on. */ - intel_dsi_msleep(intel_dsi, intel_dsi->panel_pwr_cycle_delay); + msleep(intel_dsi->panel_pwr_cycle_delay); } static void intel_dsi_shutdown(struct intel_encoder *encoder) { struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); - intel_dsi_msleep(intel_dsi, intel_dsi->panel_pwr_cycle_delay); + msleep(intel_dsi->panel_pwr_cycle_delay); } static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, From 2decad92f4731fac9755a083fcfefa66edb7d67d Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 9 Apr 2021 18:37:10 +0100 Subject: [PATCH 109/180] arm64: mte: Ensure TIF_MTE_ASYNC_FAULT is set atomically The entry from EL0 code checks the TFSRE0_EL1 register for any asynchronous tag check faults in user space and sets the TIF_MTE_ASYNC_FAULT flag. This is not done atomically, potentially racing with another CPU calling set_tsk_thread_flag(). Replace the non-atomic ORR+STR with an STSET instruction. While STSET requires ARMv8.1 and an assembler that understands LSE atomics, the MTE feature is part of ARMv8.5 and already requires an updated assembler. Signed-off-by: Catalin Marinas Fixes: 637ec831ea4f ("arm64: mte: Handle synchronous and asynchronous tag check faults") Cc: # 5.10.x Reported-by: Will Deacon Cc: Will Deacon Cc: Vincenzo Frascino Cc: Mark Rutland Link: https://lore.kernel.org/r/20210409173710.18582-1-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 6 +++++- arch/arm64/kernel/entry.S | 10 ++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e4e1b6550115..dfdc3e0af5e1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1406,10 +1406,13 @@ config ARM64_PAN config AS_HAS_LDAPR def_bool $(as-instr,.arch_extension rcpc) +config AS_HAS_LSE_ATOMICS + def_bool $(as-instr,.arch_extension lse) + config ARM64_LSE_ATOMICS bool default ARM64_USE_LSE_ATOMICS - depends on $(as-instr,.arch_extension lse) + depends on AS_HAS_LSE_ATOMICS config ARM64_USE_LSE_ATOMICS bool "Atomic instructions" @@ -1666,6 +1669,7 @@ config ARM64_MTE default y depends on ARM64_AS_HAS_MTE && ARM64_TAGGED_ADDR_ABI depends on AS_HAS_ARMV8_5 + depends on AS_HAS_LSE_ATOMICS # Required for tag checking in the uaccess routines depends on ARM64_PAN select ARCH_USES_HIGH_VMA_FLAGS diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a31a0a713c85..6acfc5e6b5e0 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -148,16 +148,18 @@ alternative_cb_end .endm /* Check for MTE asynchronous tag check faults */ - .macro check_mte_async_tcf, flgs, tmp + .macro check_mte_async_tcf, tmp, ti_flags #ifdef CONFIG_ARM64_MTE + .arch_extension lse alternative_if_not ARM64_MTE b 1f alternative_else_nop_endif mrs_s \tmp, SYS_TFSRE0_EL1 tbz \tmp, #SYS_TFSR_EL1_TF0_SHIFT, 1f /* Asynchronous TCF occurred for TTBR0 access, set the TI flag */ - orr \flgs, \flgs, #_TIF_MTE_ASYNC_FAULT - str \flgs, [tsk, #TSK_TI_FLAGS] + mov \tmp, #_TIF_MTE_ASYNC_FAULT + add \ti_flags, tsk, #TSK_TI_FLAGS + stset \tmp, [\ti_flags] msr_s SYS_TFSRE0_EL1, xzr 1: #endif @@ -244,7 +246,7 @@ alternative_else_nop_endif disable_step_tsk x19, x20 /* Check for asynchronous tag check faults in user space */ - check_mte_async_tcf x19, x22 + check_mte_async_tcf x22, x23 apply_ssbd 1, x22, x23 ptrauth_keys_install_kernel tsk, x20, x22, x23 From 6df0e6c57dfc064af330071f372f11aa8c584997 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 12 Apr 2021 09:23:27 -0700 Subject: [PATCH 110/180] dmaengine: idxd: clear MSIX permission entry on shutdown Add disabling/clearing of MSIX permission entries on device shutdown to mirror the enabling of the MSIX entries on probe. Current code left the MSIX enabled and the pasid entries still programmed at device shutdown. Fixes: 8e50d392652f ("dmaengine: idxd: Add shared workqueue support") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161824457969.882533.6020239898682672311.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 30 ++++++++++++++++++++++++++++++ drivers/dma/idxd/idxd.h | 2 ++ drivers/dma/idxd/init.c | 11 ++--------- 3 files changed, 34 insertions(+), 9 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 84a6ea60ecf0..c09687013d29 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -574,6 +574,36 @@ void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid) } /* Device configuration bits */ +void idxd_msix_perm_setup(struct idxd_device *idxd) +{ + union msix_perm mperm; + int i, msixcnt; + + msixcnt = pci_msix_vec_count(idxd->pdev); + if (msixcnt < 0) + return; + + mperm.bits = 0; + mperm.pasid = idxd->pasid; + mperm.pasid_en = device_pasid_enabled(idxd); + for (i = 1; i < msixcnt; i++) + iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8); +} + +void idxd_msix_perm_clear(struct idxd_device *idxd) +{ + union msix_perm mperm; + int i, msixcnt; + + msixcnt = pci_msix_vec_count(idxd->pdev); + if (msixcnt < 0) + return; + + mperm.bits = 0; + for (i = 1; i < msixcnt; i++) + iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8); +} + static void idxd_group_config_write(struct idxd_group *group) { struct idxd_device *idxd = group->idxd; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 81a0e65fd316..eda2ee10501f 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -316,6 +316,8 @@ void idxd_unregister_driver(void); struct bus_type *idxd_get_bus_type(struct idxd_device *idxd); /* device interrupt control */ +void idxd_msix_perm_setup(struct idxd_device *idxd); +void idxd_msix_perm_clear(struct idxd_device *idxd); irqreturn_t idxd_irq_handler(int vec, void *data); irqreturn_t idxd_misc_thread(int vec, void *data); irqreturn_t idxd_wq_thread(int irq, void *data); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 085a0c3b62c6..6584b0ec07d5 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -65,7 +65,6 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) struct idxd_irq_entry *irq_entry; int i, msixcnt; int rc = 0; - union msix_perm mperm; msixcnt = pci_msix_vec_count(pdev); if (msixcnt < 0) { @@ -144,14 +143,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) } idxd_unmask_error_interrupts(idxd); - - /* Setup MSIX permission table */ - mperm.bits = 0; - mperm.pasid = idxd->pasid; - mperm.pasid_en = device_pasid_enabled(idxd); - for (i = 1; i < msixcnt; i++) - iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8); - + idxd_msix_perm_setup(idxd); return 0; err_no_irq: @@ -510,6 +502,7 @@ static void idxd_shutdown(struct pci_dev *pdev) idxd_flush_work_list(irq_entry); } + idxd_msix_perm_clear(idxd); destroy_workqueue(idxd->wq); } From ea9aadc06a9f10ad20a90edc0a484f1147d88a7a Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 12 Apr 2021 09:02:36 -0700 Subject: [PATCH 111/180] dmaengine: idxd: fix wq cleanup of WQCFG registers A pre-release silicon erratum workaround where wq reset does not clear WQCFG registers was leaked into upstream code. Use wq reset command instead of blasting the MMIO region. This also address an issue where we clobber registers in future devices. Fixes: da32b28c95a7 ("dmaengine: idxd: cleanup workqueue config after disabling") Reported-by: Shreenivaas Devarajan Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161824330020.881560.16375921906426627033.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 35 ++++++++++++++++++++++++----------- drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/sysfs.c | 9 ++------- 3 files changed, 27 insertions(+), 18 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index c09687013d29..31c819544a22 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -282,6 +282,22 @@ void idxd_wq_drain(struct idxd_wq *wq) idxd_cmd_exec(idxd, IDXD_CMD_DRAIN_WQ, operand, NULL); } +void idxd_wq_reset(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + u32 operand; + + if (wq->state != IDXD_WQ_ENABLED) { + dev_dbg(dev, "WQ %d in wrong state: %d\n", wq->id, wq->state); + return; + } + + operand = BIT(wq->id % 16) | ((wq->id / 16) << 16); + idxd_cmd_exec(idxd, IDXD_CMD_RESET_WQ, operand, NULL); + wq->state = IDXD_WQ_DISABLED; +} + int idxd_wq_map_portal(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; @@ -363,8 +379,6 @@ int idxd_wq_disable_pasid(struct idxd_wq *wq) void idxd_wq_disable_cleanup(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; - struct device *dev = &idxd->pdev->dev; - int i, wq_offset; lockdep_assert_held(&idxd->dev_lock); memset(wq->wqcfg, 0, idxd->wqcfg_size); @@ -376,14 +390,6 @@ void idxd_wq_disable_cleanup(struct idxd_wq *wq) wq->ats_dis = 0; clear_bit(WQ_FLAG_DEDICATED, &wq->flags); memset(wq->name, 0, WQ_NAME_SIZE); - - for (i = 0; i < WQCFG_STRIDES(idxd); i++) { - wq_offset = WQCFG_OFFSET(idxd, wq->id, i); - iowrite32(0, idxd->reg_base + wq_offset); - dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n", - wq->id, i, wq_offset, - ioread32(idxd->reg_base + wq_offset)); - } } /* Device control bits */ @@ -672,7 +678,14 @@ static int idxd_wq_config_write(struct idxd_wq *wq) if (!wq->group) return 0; - memset(wq->wqcfg, 0, idxd->wqcfg_size); + /* + * Instead of memset the entire shadow copy of WQCFG, copy from the hardware after + * wq reset. This will copy back the sticky values that are present on some devices. + */ + for (i = 0; i < WQCFG_STRIDES(idxd); i++) { + wq_offset = WQCFG_OFFSET(idxd, wq->id, i); + wq->wqcfg->bits[i] = ioread32(idxd->reg_base + wq_offset); + } /* byte 0-3 */ wq->wqcfg->wq_size = wq->size; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index eda2ee10501f..76014c14f473 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -343,6 +343,7 @@ void idxd_wq_free_resources(struct idxd_wq *wq); int idxd_wq_enable(struct idxd_wq *wq); int idxd_wq_disable(struct idxd_wq *wq); void idxd_wq_drain(struct idxd_wq *wq); +void idxd_wq_reset(struct idxd_wq *wq); int idxd_wq_map_portal(struct idxd_wq *wq); void idxd_wq_unmap_portal(struct idxd_wq *wq); void idxd_wq_disable_cleanup(struct idxd_wq *wq); diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 5f7bc4b1621a..18bf4d148989 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -275,7 +275,6 @@ static void disable_wq(struct idxd_wq *wq) { struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; - int rc; mutex_lock(&wq->wq_lock); dev_dbg(dev, "%s removing WQ %s\n", __func__, dev_name(&wq->conf_dev)); @@ -296,17 +295,13 @@ static void disable_wq(struct idxd_wq *wq) idxd_wq_unmap_portal(wq); idxd_wq_drain(wq); - rc = idxd_wq_disable(wq); + idxd_wq_reset(wq); idxd_wq_free_resources(wq); wq->client_count = 0; mutex_unlock(&wq->wq_lock); - if (rc < 0) - dev_warn(dev, "Failed to disable %s: %d\n", - dev_name(&wq->conf_dev), rc); - else - dev_info(dev, "wq %s disabled\n", dev_name(&wq->conf_dev)); + dev_info(dev, "wq %s disabled\n", dev_name(&wq->conf_dev)); } static int idxd_config_bus_remove(struct device *dev) From 1fe976d308acb6374c899a4ee8025a0a016e453e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 12 Apr 2021 18:57:39 +0200 Subject: [PATCH 112/180] net: phy: marvell: fix detection of PHY on Topaz switches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit fee2d546414d ("net: phy: marvell: mv88e6390 temperature sensor reading"), Linux reports the temperature of Topaz hwmon as constant -75°C. This is because switches from the Topaz family (88E6141 / 88E6341) have the address of the temperature sensor register different from Peridot. This address is instead compatible with 88E1510 PHYs, as was used for Topaz before the above mentioned commit. Create a new mapping table between switch family and PHY ID for families which don't have a model number. And define PHY IDs for Topaz and Peridot families. Create a new PHY ID and a new PHY driver for Topaz's internal PHY. The only difference from Peridot's PHY driver is the HWMON probing method. Prior this change Topaz's internal PHY is detected by kernel as: PHY [...] driver [Marvell 88E6390] (irq=63) And afterwards as: PHY [...] driver [Marvell 88E6341 Family] (irq=63) Signed-off-by: Pali Rohár BugLink: https://github.com/globalscaletechnologies/linux/issues/1 Fixes: fee2d546414d ("net: phy: marvell: mv88e6390 temperature sensor reading") Reviewed-by: Marek Behún Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 30 +++++++++++++----------------- drivers/net/phy/marvell.c | 32 +++++++++++++++++++++++++++++--- include/linux/marvell_phy.h | 5 +++-- 3 files changed, 45 insertions(+), 22 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 903d619e08ed..e08bf9377140 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3026,10 +3026,17 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) return err; } +/* prod_id for switch families which do not have a PHY model number */ +static const u16 family_prod_id_table[] = { + [MV88E6XXX_FAMILY_6341] = MV88E6XXX_PORT_SWITCH_ID_PROD_6341, + [MV88E6XXX_FAMILY_6390] = MV88E6XXX_PORT_SWITCH_ID_PROD_6390, +}; + static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg) { struct mv88e6xxx_mdio_bus *mdio_bus = bus->priv; struct mv88e6xxx_chip *chip = mdio_bus->chip; + u16 prod_id; u16 val; int err; @@ -3040,23 +3047,12 @@ static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg) err = chip->info->ops->phy_read(chip, bus, phy, reg, &val); mv88e6xxx_reg_unlock(chip); - if (reg == MII_PHYSID2) { - /* Some internal PHYs don't have a model number. */ - if (chip->info->family != MV88E6XXX_FAMILY_6165) - /* Then there is the 6165 family. It gets is - * PHYs correct. But it can also have two - * SERDES interfaces in the PHY address - * space. And these don't have a model - * number. But they are not PHYs, so we don't - * want to give them something a PHY driver - * will recognise. - * - * Use the mv88e6390 family model number - * instead, for anything which really could be - * a PHY, - */ - if (!(val & 0x3f0)) - val |= MV88E6XXX_PORT_SWITCH_ID_PROD_6390 >> 4; + /* Some internal PHYs don't have a model number. */ + if (reg == MII_PHYSID2 && !(val & 0x3f0) && + chip->info->family < ARRAY_SIZE(family_prod_id_table)) { + prod_id = family_prod_id_table[chip->info->family]; + if (prod_id) + val |= prod_id >> 4; } return err ? err : val; diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index e26a5d663f8a..8018ddf7f316 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -3021,9 +3021,34 @@ static struct phy_driver marvell_drivers[] = { .get_stats = marvell_get_stats, }, { - .phy_id = MARVELL_PHY_ID_88E6390, + .phy_id = MARVELL_PHY_ID_88E6341_FAMILY, .phy_id_mask = MARVELL_PHY_ID_MASK, - .name = "Marvell 88E6390", + .name = "Marvell 88E6341 Family", + /* PHY_GBIT_FEATURES */ + .flags = PHY_POLL_CABLE_TEST, + .probe = m88e1510_probe, + .config_init = marvell_config_init, + .config_aneg = m88e6390_config_aneg, + .read_status = marvell_read_status, + .config_intr = marvell_config_intr, + .handle_interrupt = marvell_handle_interrupt, + .resume = genphy_resume, + .suspend = genphy_suspend, + .read_page = marvell_read_page, + .write_page = marvell_write_page, + .get_sset_count = marvell_get_sset_count, + .get_strings = marvell_get_strings, + .get_stats = marvell_get_stats, + .get_tunable = m88e1540_get_tunable, + .set_tunable = m88e1540_set_tunable, + .cable_test_start = marvell_vct7_cable_test_start, + .cable_test_tdr_start = marvell_vct5_cable_test_tdr_start, + .cable_test_get_status = marvell_vct7_cable_test_get_status, + }, + { + .phy_id = MARVELL_PHY_ID_88E6390_FAMILY, + .phy_id_mask = MARVELL_PHY_ID_MASK, + .name = "Marvell 88E6390 Family", /* PHY_GBIT_FEATURES */ .flags = PHY_POLL_CABLE_TEST, .probe = m88e6390_probe, @@ -3107,7 +3132,8 @@ static struct mdio_device_id __maybe_unused marvell_tbl[] = { { MARVELL_PHY_ID_88E1540, MARVELL_PHY_ID_MASK }, { MARVELL_PHY_ID_88E1545, MARVELL_PHY_ID_MASK }, { MARVELL_PHY_ID_88E3016, MARVELL_PHY_ID_MASK }, - { MARVELL_PHY_ID_88E6390, MARVELL_PHY_ID_MASK }, + { MARVELL_PHY_ID_88E6341_FAMILY, MARVELL_PHY_ID_MASK }, + { MARVELL_PHY_ID_88E6390_FAMILY, MARVELL_PHY_ID_MASK }, { MARVELL_PHY_ID_88E1340S, MARVELL_PHY_ID_MASK }, { MARVELL_PHY_ID_88E1548P, MARVELL_PHY_ID_MASK }, { } diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index 52b1610eae68..c544b70dfbd2 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -28,11 +28,12 @@ /* Marvel 88E1111 in Finisar SFP module with modified PHY ID */ #define MARVELL_PHY_ID_88E1111_FINISAR 0x01ff0cc0 -/* The MV88e6390 Ethernet switch contains embedded PHYs. These PHYs do +/* These Ethernet switch families contain embedded PHYs, but they do * not have a model ID. So the switch driver traps reads to the ID2 * register and returns the switch family ID */ -#define MARVELL_PHY_ID_88E6390 0x01410f90 +#define MARVELL_PHY_ID_88E6341_FAMILY 0x01410f41 +#define MARVELL_PHY_ID_88E6390_FAMILY 0x01410f90 #define MARVELL_PHY_FAMILY_ID(id) ((id) >> 4) From f33b0e196ed7aa3dc285b26db7768c1db1eb3a41 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 12 Apr 2021 11:47:07 -0700 Subject: [PATCH 113/180] ethtool: fix kdoc attr name Add missing 't' in attrtype. Signed-off-by: Jakub Kicinski Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/ethtool/netlink.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 6eabd58d81bf..cde9f3169ae5 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -36,9 +36,9 @@ static inline int ethnl_strz_size(const char *s) /** * ethnl_put_strz() - put string attribute with fixed size string - * @skb: skb with the message - * @attrype: attribute type - * @s: ETH_GSTRING_LEN sized string (may not be null terminated) + * @skb: skb with the message + * @attrtype: attribute type + * @s: ETH_GSTRING_LEN sized string (may not be null terminated) * * Puts an attribute with null terminated string from @s into the message. * From b29c457a6511435960115c0f548c4360d5f4801d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 7 Apr 2021 21:38:57 +0200 Subject: [PATCH 114/180] netfilter: x_tables: fix compat match/target pad out-of-bound write xt_compat_match/target_from_user doesn't check that zeroing the area to start of next rule won't write past end of allocated ruleset blob. Remove this code and zero the entire blob beforehand. Reported-by: syzbot+cfc0247ac173f597aaaa@syzkaller.appspotmail.com Reported-by: Andy Nguyen Fixes: 9fa492cdc160c ("[NETFILTER]: x_tables: simplify compat API") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 2 ++ net/ipv4/netfilter/ip_tables.c | 2 ++ net/ipv6/netfilter/ip6_tables.c | 2 ++ net/netfilter/x_tables.c | 10 ++-------- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 6c26533480dd..d6d45d820d79 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1193,6 +1193,8 @@ static int translate_compat_table(struct net *net, if (!newinfo) goto out_unlock; + memset(newinfo->entries, 0, size); + newinfo->number = compatr->num_entries; for (i = 0; i < NF_ARP_NUMHOOKS; i++) { newinfo->hook_entry[i] = compatr->hook_entry[i]; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index f15bc21d7301..f77ea0dbe656 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1428,6 +1428,8 @@ translate_compat_table(struct net *net, if (!newinfo) goto out_unlock; + memset(newinfo->entries, 0, size); + newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { newinfo->hook_entry[i] = compatr->hook_entry[i]; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 2e2119bfcf13..eb2b5404806c 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1443,6 +1443,8 @@ translate_compat_table(struct net *net, if (!newinfo) goto out_unlock; + memset(newinfo->entries, 0, size); + newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { newinfo->hook_entry[i] = compatr->hook_entry[i]; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 6bd31a7a27fc..92e9d4ebc5e8 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -733,7 +733,7 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, { const struct xt_match *match = m->u.kernel.match; struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m; - int pad, off = xt_compat_match_offset(match); + int off = xt_compat_match_offset(match); u_int16_t msize = cm->u.user.match_size; char name[sizeof(m->u.user.name)]; @@ -743,9 +743,6 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, match->compat_from_user(m->data, cm->data); else memcpy(m->data, cm->data, msize - sizeof(*cm)); - pad = XT_ALIGN(match->matchsize) - match->matchsize; - if (pad > 0) - memset(m->data + match->matchsize, 0, pad); msize += off; m->u.user.match_size = msize; @@ -1116,7 +1113,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, { const struct xt_target *target = t->u.kernel.target; struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t; - int pad, off = xt_compat_target_offset(target); + int off = xt_compat_target_offset(target); u_int16_t tsize = ct->u.user.target_size; char name[sizeof(t->u.user.name)]; @@ -1126,9 +1123,6 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, target->compat_from_user(t->data, ct->data); else memcpy(t->data, ct->data, tsize - sizeof(*ct)); - pad = XT_ALIGN(target->targetsize) - target->targetsize; - if (pad > 0) - memset(t->data + target->targetsize, 0, pad); tsize += off; t->u.user.target_size = tsize; From 4d8f9065830e526c83199186c5f56a6514f457d2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 10 Apr 2021 21:29:38 +0200 Subject: [PATCH 115/180] netfilter: nftables: clone set element expression template memcpy() breaks when using connlimit in set elements. Use nft_expr_clone() to initialize the connlimit expression list, otherwise connlimit garbage collector crashes when walking on the list head copy. [ 493.064656] Workqueue: events_power_efficient nft_rhash_gc [nf_tables] [ 493.064685] RIP: 0010:find_or_evict+0x5a/0x90 [nf_conncount] [ 493.064694] Code: 2b 43 40 83 f8 01 77 0d 48 c7 c0 f5 ff ff ff 44 39 63 3c 75 df 83 6d 18 01 48 8b 43 08 48 89 de 48 8b 13 48 8b 3d ee 2f 00 00 <48> 89 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 48 89 03 48 83 [ 493.064699] RSP: 0018:ffffc90000417dc0 EFLAGS: 00010297 [ 493.064704] RAX: 0000000000000000 RBX: ffff888134f38410 RCX: 0000000000000000 [ 493.064708] RDX: 0000000000000000 RSI: ffff888134f38410 RDI: ffff888100060cc0 [ 493.064711] RBP: ffff88812ce594a8 R08: ffff888134f38438 R09: 00000000ebb9025c [ 493.064714] R10: ffffffff8219f838 R11: 0000000000000017 R12: 0000000000000001 [ 493.064718] R13: ffffffff82146740 R14: ffff888134f38410 R15: 0000000000000000 [ 493.064721] FS: 0000000000000000(0000) GS:ffff88840e440000(0000) knlGS:0000000000000000 [ 493.064725] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 493.064729] CR2: 0000000000000008 CR3: 00000001330aa002 CR4: 00000000001706e0 [ 493.064733] Call Trace: [ 493.064737] nf_conncount_gc_list+0x8f/0x150 [nf_conncount] [ 493.064746] nft_rhash_gc+0x106/0x390 [nf_tables] Reported-by: Laura Garcia Liebana Fixes: 409444522976 ("netfilter: nf_tables: add elements with stateful expressions") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 46 ++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f57f1a6ba96f..589d2f6978d3 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5295,16 +5295,35 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, return -ENOMEM; } -static void nft_set_elem_expr_setup(const struct nft_set_ext *ext, int i, - struct nft_expr *expr_array[]) +static int nft_set_elem_expr_setup(struct nft_ctx *ctx, + const struct nft_set_ext *ext, + struct nft_expr *expr_array[], + u32 num_exprs) { struct nft_set_elem_expr *elem_expr = nft_set_ext_expr(ext); - struct nft_expr *expr = nft_setelem_expr_at(elem_expr, elem_expr->size); + struct nft_expr *expr; + int i, err; - memcpy(expr, expr_array[i], expr_array[i]->ops->size); - elem_expr->size += expr_array[i]->ops->size; - kfree(expr_array[i]); - expr_array[i] = NULL; + for (i = 0; i < num_exprs; i++) { + expr = nft_setelem_expr_at(elem_expr, elem_expr->size); + err = nft_expr_clone(expr, expr_array[i]); + if (err < 0) + goto err_elem_expr_setup; + + elem_expr->size += expr_array[i]->ops->size; + nft_expr_destroy(ctx, expr_array[i]); + expr_array[i] = NULL; + } + + return 0; + +err_elem_expr_setup: + for (; i < num_exprs; i++) { + nft_expr_destroy(ctx, expr_array[i]); + expr_array[i] = NULL; + } + + return -ENOMEM; } static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, @@ -5556,12 +5575,15 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, *nft_set_ext_obj(ext) = obj; obj->use++; } - for (i = 0; i < num_exprs; i++) - nft_set_elem_expr_setup(ext, i, expr_array); + err = nft_set_elem_expr_setup(ctx, ext, expr_array, num_exprs); + if (err < 0) + goto err_elem_expr; trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); - if (trans == NULL) - goto err_trans; + if (trans == NULL) { + err = -ENOMEM; + goto err_elem_expr; + } ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK; err = set->ops->insert(ctx->net, set, &elem, &ext2); @@ -5605,7 +5627,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, set->ops->remove(ctx->net, set, &elem); err_element_clash: kfree(trans); -err_trans: +err_elem_expr: if (obj) obj->use--; From 738fa58ee1328481d1d7889e7c430b3401c571b9 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Mon, 12 Apr 2021 17:41:01 +0800 Subject: [PATCH 116/180] arm64: kprobes: Restore local irqflag if kprobes is cancelled If instruction being single stepped caused a page fault, the kprobes is cancelled to let the page fault handler continue as a normal page fault. But the local irqflags are disabled so cpu will restore pstate with DAIF masked. After pagefault is serviced, the kprobes is triggerred again, we overwrite the saved_irqflag by calling kprobes_save_local_irqflag(). NOTE, DAIF is masked in this new saved irqflag. After kprobes is serviced, the cpu pstate is retored with DAIF masked. This patch is inspired by one patch for riscv from Liao Chang. Signed-off-by: Jisheng Zhang Acked-by: Masami Hiramatsu Link: https://lore.kernel.org/r/20210412174101.6bfb0594@xhacker.debian Signed-off-by: Will Deacon --- arch/arm64/kernel/probes/kprobes.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 66aac2881ba8..85645b2b0c7a 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -267,10 +267,12 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) if (!instruction_pointer(regs)) BUG(); - if (kcb->kprobe_status == KPROBE_REENTER) + if (kcb->kprobe_status == KPROBE_REENTER) { restore_previous_kprobe(kcb); - else + } else { + kprobes_restore_local_irqflag(kcb, regs); reset_current_kprobe(); + } break; case KPROBE_HIT_ACTIVE: From 6998a8800d73116187aad542391ce3b2dd0f9e30 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 13 Apr 2021 16:01:00 +0200 Subject: [PATCH 117/180] ACPI: x86: Call acpi_boot_table_init() after acpi_table_upgrade() Commit 1a1c130ab757 ("ACPI: tables: x86: Reserve memory occupied by ACPI tables") attempted to address an issue with reserving the memory occupied by ACPI tables, but it broke the initrd-based table override mechanism relied on by multiple users. To restore the initrd-based ACPI table override functionality, move the acpi_boot_table_init() invocation in setup_arch() on x86 after the acpi_table_upgrade() one. Fixes: 1a1c130ab757 ("ACPI: tables: x86: Reserve memory occupied by ACPI tables") Reported-by: Hans de Goede Tested-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/setup.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 5ecd69a48393..ccab6cf91283 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1045,9 +1045,6 @@ void __init setup_arch(char **cmdline_p) cleanup_highmap(); - /* Look for ACPI tables and reserve memory occupied by them. */ - acpi_boot_table_init(); - memblock_set_current_limit(ISA_END_ADDRESS); e820__memblock_setup(); @@ -1132,6 +1129,8 @@ void __init setup_arch(char **cmdline_p) reserve_initrd(); acpi_table_upgrade(); + /* Look for ACPI tables and reserve memory occupied by them. */ + acpi_boot_table_init(); vsmp_init(); From 909290786ea335366e21d7f1ed5812b90f2f0a92 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Mon, 12 Apr 2021 23:41:24 +0200 Subject: [PATCH 118/180] vfio/pci: Add missing range check in vfio_pci_mmap When mmaping an extra device region verify that the region index derived from the mmap offset is valid. Fixes: a15b1883fee1 ("vfio_pci: Allow mapping extra regions") Cc: stable@vger.kernel.org Signed-off-by: Christian A. Ehrhardt Message-Id: <20210412214124.GA241759@lisa.in-ulm.de> Reviewed-by: David Gibson Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 65e7e6b44578..5023e23db3bc 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -1656,6 +1656,8 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); + if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions) + return -EINVAL; if (vma->vm_end < vma->vm_start) return -EINVAL; if ((vma->vm_flags & VM_SHARED) == 0) @@ -1664,7 +1666,7 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) int regnum = index - VFIO_PCI_NUM_REGIONS; struct vfio_pci_region *region = vdev->region + regnum; - if (region && region->ops && region->ops->mmap && + if (region->ops && region->ops->mmap && (region->flags & VFIO_REGION_INFO_FLAG_MMAP)) return region->ops->mmap(vdev, region, vma); return -EINVAL; From 8db403b9631331ef1d5e302cdf353c48849ca9d5 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 11 Apr 2021 12:21:54 +0200 Subject: [PATCH 119/180] tracing/dynevent: Fix a memory leak in an error handling path We must free 'argv' before returning, as already done in all the other paths of this function. Link: https://lkml.kernel.org/r/21e3594ccd7fc88c5c162c98450409190f304327.1618136448.git.christophe.jaillet@wanadoo.fr Fixes: d262271d0483 ("tracing/dynevent: Delegate parsing to create function") Acked-by: Masami Hiramatsu Signed-off-by: Christophe JAILLET Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_dynevent.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index dc971a68dda4..e57cc0870892 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -63,8 +63,10 @@ int dyn_event_release(const char *raw_command, struct dyn_event_operations *type event = p + 1; *p = '\0'; } - if (event[0] == '\0') - return -EINVAL; + if (event[0] == '\0') { + ret = -EINVAL; + goto out; + } mutex_lock(&event_mutex); for_each_dyn_event_safe(pos, n) { From 31166efb1cee348eb6314e9c0095d84cbeb66b9d Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 8 Mar 2021 12:41:56 -0800 Subject: [PATCH 120/180] ixgbe: Fix NULL pointer dereference in ethtool loopback test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ixgbe driver currently generates a NULL pointer dereference when performing the ethtool loopback test. This is due to the fact that there isn't a q_vector associated with the test ring when it is setup as interrupts are not normally added to the test rings. To address this I have added code that will check for a q_vector before returning a napi_id value. If a q_vector is not present it will return a value of 0. Fixes: b02e5a0ebb17 ("xsk: Propagate napi_id to XDP socket Rx path") Signed-off-by: Alexander Duyck Acked-by: Björn Töpel Tested-by: Dave Switzer Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 03d9aad516d4..45d2c8f37c01 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6536,6 +6536,13 @@ static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter) return err; } +static int ixgbe_rx_napi_id(struct ixgbe_ring *rx_ring) +{ + struct ixgbe_q_vector *q_vector = rx_ring->q_vector; + + return q_vector ? q_vector->napi.napi_id : 0; +} + /** * ixgbe_setup_rx_resources - allocate Rx resources (Descriptors) * @adapter: pointer to ixgbe_adapter @@ -6583,7 +6590,7 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter, /* XDP RX-queue info */ if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev, - rx_ring->queue_index, rx_ring->q_vector->napi.napi_id) < 0) + rx_ring->queue_index, ixgbe_rx_napi_id(rx_ring)) < 0) goto err; rx_ring->xdp_prog = adapter->xdp_prog; From debb9df311582c83fe369baa35fa4b92e8a9c58a Mon Sep 17 00:00:00 2001 From: Yongxin Liu Date: Mon, 22 Mar 2021 15:14:48 +0800 Subject: [PATCH 121/180] ixgbe: fix unbalanced device enable/disable in suspend/resume pci_disable_device() called in __ixgbe_shutdown() decreases dev->enable_cnt by 1. pci_enable_device_mem() which increases dev->enable_cnt by 1, was removed from ixgbe_resume() in commit 6f82b2558735 ("ixgbe: use generic power management"). This caused unbalanced increase/decrease. So add pci_enable_device_mem() back. Fix the following call trace. ixgbe 0000:17:00.1: disabling already-disabled device Call Trace: __ixgbe_shutdown+0x10a/0x1e0 [ixgbe] ixgbe_suspend+0x32/0x70 [ixgbe] pci_pm_suspend+0x87/0x160 ? pci_pm_freeze+0xd0/0xd0 dpm_run_callback+0x42/0x170 __device_suspend+0x114/0x460 async_suspend+0x1f/0xa0 async_run_entry_fn+0x3c/0xf0 process_one_work+0x1dd/0x410 worker_thread+0x34/0x3f0 ? cancel_delayed_work+0x90/0x90 kthread+0x14c/0x170 ? kthread_park+0x90/0x90 ret_from_fork+0x1f/0x30 Fixes: 6f82b2558735 ("ixgbe: use generic power management") Signed-off-by: Yongxin Liu Tested-by: Dave Switzer Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 45d2c8f37c01..cffb95f8f632 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6899,6 +6899,11 @@ static int __maybe_unused ixgbe_resume(struct device *dev_d) adapter->hw.hw_addr = adapter->io_addr; + err = pci_enable_device_mem(pdev); + if (err) { + e_dev_err("Cannot enable PCI device from suspend\n"); + return err; + } smp_mb__before_atomic(); clear_bit(__IXGBE_DISABLED, &adapter->state); pci_set_master(pdev); From ef963ae427aa4669905e0a96b3bd9d44dc85db32 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 31 Mar 2021 15:46:28 +0100 Subject: [PATCH 122/180] ice: Fix potential infinite loop when using u8 loop counter A for-loop is using a u8 loop counter that is being compared to a u32 cmp_dcbcfg->numapp to check for the end of the loop. If cmp_dcbcfg->numapp is larger than 255 then the counter j will wrap around to zero and hence an infinite loop occurs. Fix this by making counter j the same type as cmp_dcbcfg->numapp. Addresses-Coverity: ("Infinite loop") Fixes: aeac8ce864d9 ("ice: Recognize 860 as iSCSI port in CEE mode") Signed-off-by: Colin Ian King Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_dcb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c index 211ac6f907ad..28e834a128c0 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb.c @@ -747,8 +747,8 @@ ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg, struct ice_port_info *pi) { u32 status, tlv_status = le32_to_cpu(cee_cfg->tlv_status); - u32 ice_aqc_cee_status_mask, ice_aqc_cee_status_shift; - u8 i, j, err, sync, oper, app_index, ice_app_sel_type; + u32 ice_aqc_cee_status_mask, ice_aqc_cee_status_shift, j; + u8 i, err, sync, oper, app_index, ice_app_sel_type; u16 app_prio = le16_to_cpu(cee_cfg->oper_app_prio); u16 ice_aqc_cee_app_mask, ice_aqc_cee_app_shift; struct ice_dcbx_cfg *cmp_dcbcfg, *dcbcfg; From 610f8c0fc8d46e0933955ce13af3d64484a4630a Mon Sep 17 00:00:00 2001 From: Hristo Venev Date: Mon, 12 Apr 2021 20:41:16 +0300 Subject: [PATCH 123/180] net: sit: Unregister catch-all devices A sit interface created without a local or a remote address is linked into the `sit_net::tunnels_wc` list of its original namespace. When deleting a network namespace, delete the devices that have been moved. The following script triggers a null pointer dereference if devices linked in a deleted `sit_net` remain: for i in `seq 1 30`; do ip netns add ns-test ip netns exec ns-test ip link add dev veth0 type veth peer veth1 ip netns exec ns-test ip link add dev sit$i type sit dev veth0 ip netns exec ns-test ip link set dev sit$i netns $$ ip netns del ns-test done for i in `seq 1 30`; do ip link del dev sit$i done Fixes: 5e6700b3bf98f ("sit: add support of x-netns") Signed-off-by: Hristo Venev Signed-off-by: David S. Miller --- net/ipv6/sit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 63ccd9f2dccc..9fdccf0718b5 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1867,9 +1867,9 @@ static void __net_exit sit_destroy_tunnels(struct net *net, if (dev->rtnl_link_ops == &sit_link_ops) unregister_netdevice_queue(dev, head); - for (prio = 1; prio < 4; prio++) { + for (prio = 0; prio < 4; prio++) { int h; - for (h = 0; h < IP6_SIT_HASH_SIZE; h++) { + for (h = 0; h < (prio ? IP6_SIT_HASH_SIZE : 1); h++) { struct ip_tunnel *t; t = rtnl_dereference(sitn->tunnels[prio][h]); From 941ea91e87a6e879ed82dad4949f6234f2702bec Mon Sep 17 00:00:00 2001 From: Hristo Venev Date: Mon, 12 Apr 2021 20:41:17 +0300 Subject: [PATCH 124/180] net: ip6_tunnel: Unregister catch-all devices Similarly to the sit case, we need to remove the tunnels with no addresses that have been moved to another network namespace. Fixes: 0bd8762824e73 ("ip6tnl: add x-netns support") Signed-off-by: Hristo Venev Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 3fa0eca5a06f..42fe7db6bbb3 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -2244,6 +2244,16 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head t = rtnl_dereference(t->next); } } + + t = rtnl_dereference(ip6n->tnls_wc[0]); + while (t) { + /* If dev is in the same netns, it has already + * been added to the list by the previous loop. + */ + if (!net_eq(dev_net(t->dev), net)) + unregister_netdevice_queue(t->dev, list); + t = rtnl_dereference(t->next); + } } static int __net_init ip6_tnl_init_net(struct net *net) From 97684f0970f6e112926de631fdd98d9693c7e5c1 Mon Sep 17 00:00:00 2001 From: Jonathon Reinhart Date: Tue, 13 Apr 2021 03:08:48 -0400 Subject: [PATCH 125/180] net: Make tcp_allowed_congestion_control readonly in non-init netns Currently, tcp_allowed_congestion_control is global and writable; writing to it in any net namespace will leak into all other net namespaces. tcp_available_congestion_control and tcp_allowed_congestion_control are the only sysctls in ipv4_net_table (the per-netns sysctl table) with a NULL data pointer; their handlers (proc_tcp_available_congestion_control and proc_allowed_congestion_control) have no other way of referencing a struct net. Thus, they operate globally. Because ipv4_net_table does not use designated initializers, there is no easy way to fix up this one "bad" table entry. However, the data pointer updating logic shouldn't be applied to NULL pointers anyway, so we instead force these entries to be read-only. These sysctls used to exist in ipv4_table (init-net only), but they were moved to the per-net ipv4_net_table, presumably without realizing that tcp_allowed_congestion_control was writable and thus introduced a leak. Because the intent of that commit was only to know (i.e. read) "which congestion algorithms are available or allowed", this read-only solution should be sufficient. The logic added in recent commit 31c4d2f160eb: ("net: Ensure net namespace isolation of sysctls") does not and cannot check for NULL data pointers, because other table entries (e.g. /proc/sys/net/netfilter/nf_log/) have .data=NULL but use other methods (.extra2) to access the struct net. Fixes: 9cb8e048e5d9 ("net/ipv4/sysctl: show tcp_{allowed, available}_congestion_control in non-initial netns") Signed-off-by: Jonathon Reinhart Signed-off-by: David S. Miller --- net/ipv4/sysctl_net_ipv4.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index f55095d3ed16..60465f077497 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1378,9 +1378,19 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) if (!table) goto err_alloc; - /* Update the variables to point into the current struct net */ - for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) - table[i].data += (void *)net - (void *)&init_net; + for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) { + if (table[i].data) { + /* Update the variables to point into + * the current struct net + */ + table[i].data += (void *)net - (void *)&init_net; + } else { + /* Entries without data pointer are global; + * Make them read-only in non-init_net ns + */ + table[i].mode &= ~0222; + } + } } net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); From ca09bf7bb109a37a7ff05f230bb3fa3627e6625f Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Tue, 13 Apr 2021 03:33:25 -0500 Subject: [PATCH 126/180] ibmvnic: correctly use dev_consume/free_skb_irq It is more correct to use dev_kfree_skb_irq when packets are dropped, and to use dev_consume_skb_irq when packets are consumed. Fixes: 0d973388185d ("ibmvnic: Introduce xmit_more support using batched subCRQ hcalls") Suggested-by: Thomas Falcon Signed-off-by: Lijun Pan Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 9c6438d3b3a5..110a0d0eaabb 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3204,9 +3204,6 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter, next = ibmvnic_next_scrq(adapter, scrq); for (i = 0; i < next->tx_comp.num_comps; i++) { - if (next->tx_comp.rcs[i]) - dev_err(dev, "tx error %x\n", - next->tx_comp.rcs[i]); index = be32_to_cpu(next->tx_comp.correlators[i]); if (index & IBMVNIC_TSO_POOL_MASK) { tx_pool = &adapter->tso_pool[pool]; @@ -3220,7 +3217,13 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter, num_entries += txbuff->num_entries; if (txbuff->skb) { total_bytes += txbuff->skb->len; - dev_consume_skb_irq(txbuff->skb); + if (next->tx_comp.rcs[i]) { + dev_err(dev, "tx error %x\n", + next->tx_comp.rcs[i]); + dev_kfree_skb_irq(txbuff->skb); + } else { + dev_consume_skb_irq(txbuff->skb); + } txbuff->skb = NULL; } else { netdev_warn(adapter->netdev, From b166a20b07382b8bc1dcee2a448715c9c2c81b5b Mon Sep 17 00:00:00 2001 From: Or Cohen Date: Tue, 13 Apr 2021 21:10:31 +0300 Subject: [PATCH 127/180] net/sctp: fix race condition in sctp_destroy_sock If sctp_destroy_sock is called without sock_net(sk)->sctp.addr_wq_lock held and sp->do_auto_asconf is true, then an element is removed from the auto_asconf_splist without any proper locking. This can happen in the following functions: 1. In sctp_accept, if sctp_sock_migrate fails. 2. In inet_create or inet6_create, if there is a bpf program attached to BPF_CGROUP_INET_SOCK_CREATE which denies creation of the sctp socket. The bug is fixed by acquiring addr_wq_lock in sctp_destroy_sock instead of sctp_close. This addresses CVE-2021-23133. Reported-by: Or Cohen Reviewed-by: Xin Long Fixes: 610236587600 ("bpf: Add new cgroup attach type to enable sock modifications") Signed-off-by: Or Cohen Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a710917c5ac7..b9b3d899a611 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1520,11 +1520,9 @@ static void sctp_close(struct sock *sk, long timeout) /* Supposedly, no process has access to the socket, but * the net layers still may. - * Also, sctp_destroy_sock() needs to be called with addr_wq_lock - * held and that should be grabbed before socket lock. */ - spin_lock_bh(&net->sctp.addr_wq_lock); - bh_lock_sock_nested(sk); + local_bh_disable(); + bh_lock_sock(sk); /* Hold the sock, since sk_common_release() will put sock_put() * and we have just a little more cleanup. @@ -1533,7 +1531,7 @@ static void sctp_close(struct sock *sk, long timeout) sk_common_release(sk); bh_unlock_sock(sk); - spin_unlock_bh(&net->sctp.addr_wq_lock); + local_bh_enable(); sock_put(sk); @@ -4993,9 +4991,6 @@ static int sctp_init_sock(struct sock *sk) sk_sockets_allocated_inc(sk); sock_prot_inuse_add(net, sk->sk_prot, 1); - /* Nothing can fail after this block, otherwise - * sctp_destroy_sock() will be called without addr_wq_lock held - */ if (net->sctp.default_auto_asconf) { spin_lock(&sock_net(sk)->sctp.addr_wq_lock); list_add_tail(&sp->auto_asconf_list, @@ -5030,7 +5025,9 @@ static void sctp_destroy_sock(struct sock *sk) if (sp->do_auto_asconf) { sp->do_auto_asconf = 0; + spin_lock_bh(&sock_net(sk)->sctp.addr_wq_lock); list_del(&sp->auto_asconf_list); + spin_unlock_bh(&sock_net(sk)->sctp.addr_wq_lock); } sctp_endpoint_free(sp->ep); local_bh_disable(); From 38ec4944b593fd90c5ef42aaaa53e66ae5769d04 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Apr 2021 05:41:35 -0700 Subject: [PATCH 128/180] gro: ensure frag0 meets IP header alignment After commit 0f6925b3e8da ("virtio_net: Do not pull payload in skb->head") Guenter Roeck reported one failure in his tests using sh architecture. After much debugging, we have been able to spot silent unaligned accesses in inet_gro_receive() The issue at hand is that upper networking stacks assume their header is word-aligned. Low level drivers are supposed to reserve NET_IP_ALIGN bytes before the Ethernet header to make that happen. This patch hardens skb_gro_reset_offset() to not allow frag0 fast-path if the fragment is not properly aligned. Some arches like x86, arm64 and powerpc do not care and define NET_IP_ALIGN as 0, this extra check will be a NOP for them. Note that if frag0 is not used, GRO will call pskb_may_pull() as many times as needed to pull network and transport headers. Fixes: 0f6925b3e8da ("virtio_net: Do not pull payload in skb->head") Fixes: 78a478d0efd9 ("gro: Inline skb_gro_header and cache frag0 virtual address") Signed-off-by: Eric Dumazet Reported-by: Guenter Roeck Cc: Xuan Zhuo Cc: "Michael S. Tsirkin" Cc: Jason Wang Acked-by: Michael S. Tsirkin Tested-by: Guenter Roeck Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index af8c1ea040b9..1f79b9aa9a3f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5924,7 +5924,8 @@ static void skb_gro_reset_offset(struct sk_buff *skb) NAPI_GRO_CB(skb)->frag0_len = 0; if (!skb_headlen(skb) && pinfo->nr_frags && - !PageHighMem(skb_frag_page(frag0))) { + !PageHighMem(skb_frag_page(frag0)) && + (!NET_IP_ALIGN || !(skb_frag_off(frag0) & 3))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int, skb_frag_size(frag0), From 04c4f2ee3f68c9a4bf1653d15f1a9a435ae33f7a Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Tue, 13 Apr 2021 15:47:40 +0000 Subject: [PATCH 129/180] KVM: VMX: Don't use vcpu->run->internal.ndata as an array index __vmx_handle_exit() uses vcpu->run->internal.ndata as an index for an array access. Since vcpu->run is (can be) mapped to a user address space with a writer permission, the 'ndata' could be updated by the user process at anytime (the user process can set it to outside the bounds of the array). So, it is not safe that __vmx_handle_exit() uses the 'ndata' that way. Fixes: 1aa561b1a4c0 ("kvm: x86: Add "last CPU" to some KVM_EXIT information") Signed-off-by: Reiji Watanabe Reviewed-by: Jim Mattson Message-Id: <20210413154739.490299-1-reijiw@google.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 32cf8287d4a7..29b40e092d13 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6027,19 +6027,19 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) exit_reason.basic != EXIT_REASON_PML_FULL && exit_reason.basic != EXIT_REASON_APIC_ACCESS && exit_reason.basic != EXIT_REASON_TASK_SWITCH)) { + int ndata = 3; + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV; - vcpu->run->internal.ndata = 3; vcpu->run->internal.data[0] = vectoring_info; vcpu->run->internal.data[1] = exit_reason.full; vcpu->run->internal.data[2] = vcpu->arch.exit_qualification; if (exit_reason.basic == EXIT_REASON_EPT_MISCONFIG) { - vcpu->run->internal.ndata++; - vcpu->run->internal.data[3] = + vcpu->run->internal.data[ndata++] = vmcs_read64(GUEST_PHYSICAL_ADDRESS); } - vcpu->run->internal.data[vcpu->run->internal.ndata++] = - vcpu->arch.last_vmentry_cpu; + vcpu->run->internal.data[ndata++] = vcpu->arch.last_vmentry_cpu; + vcpu->run->internal.ndata = ndata; return 0; } From 2afeec08ab5c86ae21952151f726bfe184f6b23d Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 13 Apr 2021 16:25:12 +0100 Subject: [PATCH 130/180] xen-netback: Check for hotplug-status existence before watching The logic in connect() is currently written with the assumption that xenbus_watch_pathfmt() will return an error for a node that does not exist. This assumption is incorrect: xenstore does allow a watch to be registered for a nonexistent node (and will send notifications should the node be subsequently created). As of commit 1f2565780 ("xen-netback: remove 'hotplug-status' once it has served its purpose"), this leads to a failure when a domU transitions into XenbusStateConnected more than once. On the first domU transition into Connected state, the "hotplug-status" node will be deleted by the hotplug_status_changed() callback in dom0. On the second or subsequent domU transition into Connected state, the hotplug_status_changed() callback will therefore never be invoked, and so the backend will remain stuck in InitWait. This failure prevents scenarios such as reloading the xen-netfront module within a domU, or booting a domU via iPXE. There is unfortunately no way for the domU to work around this dom0 bug. Fix by explicitly checking for existence of the "hotplug-status" node, thereby creating the behaviour that was previously assumed to exist. Signed-off-by: Michael Brown Reviewed-by: Paul Durrant Signed-off-by: David S. Miller --- drivers/net/xen-netback/xenbus.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index a5439c130130..d24b7a7993aa 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -824,11 +824,15 @@ static void connect(struct backend_info *be) xenvif_carrier_on(be->vif); unregister_hotplug_status_watch(be); - err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, NULL, - hotplug_status_changed, - "%s/%s", dev->nodename, "hotplug-status"); - if (!err) + if (xenbus_exists(XBT_NIL, dev->nodename, "hotplug-status")) { + err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, + NULL, hotplug_status_changed, + "%s/%s", dev->nodename, + "hotplug-status"); + if (err) + goto err; be->have_hotplug_status_watch = 1; + } netif_tx_wake_all_queues(be->vif->dev); From 8ca7cab82bda4eb0b8064befeeeaa38106cac637 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 14 Apr 2021 08:28:28 -0700 Subject: [PATCH 131/180] dm verity fec: fix misaligned RS roots IO commit df7b59ba9245 ("dm verity: fix FEC for RS roots unaligned to block size") introduced the possibility for misaligned roots IO relative to the underlying device's logical block size. E.g. Android's default RS roots=2 results in dm_bufio->block_size=1024, which causes the following EIO if the logical block size of the device is 4096, given v->data_dev_block_bits=12: E sd 0 : 0:0:0: [sda] tag#30 request not aligned to the logical block size E blk_update_request: I/O error, dev sda, sector 10368424 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 0 E device-mapper: verity-fec: 254:8: FEC 9244672: parity read failed (block 18056): -5 Fix this by onlu using f->roots for dm_bufio blocksize IFF it is aligned to v->data_dev_block_bits. Fixes: df7b59ba9245 ("dm verity: fix FEC for RS roots unaligned to block size") Cc: stable@vger.kernel.org Signed-off-by: Jaegeuk Kim Signed-off-by: Mike Snitzer --- drivers/md/dm-verity-fec.c | 11 ++++++++--- drivers/md/dm-verity-fec.h | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 66f4c6398f67..cea2b3789736 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -65,7 +65,7 @@ static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index, u8 *res; position = (index + rsb) * v->fec->roots; - block = div64_u64_rem(position, v->fec->roots << SECTOR_SHIFT, &rem); + block = div64_u64_rem(position, v->fec->io_size, &rem); *offset = (unsigned)rem; res = dm_bufio_read(v->fec->bufio, block, buf); @@ -154,7 +154,7 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio, /* read the next block when we run out of parity bytes */ offset += v->fec->roots; - if (offset >= v->fec->roots << SECTOR_SHIFT) { + if (offset >= v->fec->io_size) { dm_bufio_release(buf); par = fec_read_parity(v, rsb, block_offset, &offset, &buf); @@ -742,8 +742,13 @@ int verity_fec_ctr(struct dm_verity *v) return -E2BIG; } + if ((f->roots << SECTOR_SHIFT) & ((1 << v->data_dev_block_bits) - 1)) + f->io_size = 1 << v->data_dev_block_bits; + else + f->io_size = v->fec->roots << SECTOR_SHIFT; + f->bufio = dm_bufio_client_create(f->dev->bdev, - f->roots << SECTOR_SHIFT, + f->io_size, 1, 0, NULL, NULL); if (IS_ERR(f->bufio)) { ti->error = "Cannot initialize FEC bufio client"; diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h index 42fbd3a7fc9f..3c46c8d61883 100644 --- a/drivers/md/dm-verity-fec.h +++ b/drivers/md/dm-verity-fec.h @@ -36,6 +36,7 @@ struct dm_verity_fec { struct dm_dev *dev; /* parity data device */ struct dm_bufio_client *data_bufio; /* for data dev access */ struct dm_bufio_client *bufio; /* for parity data access */ + size_t io_size; /* IO size for roots */ sector_t start; /* parity data start in blocks */ sector_t blocks; /* number of blocks covered */ sector_t rounds; /* number of interleaving rounds */ From c7d95613c7d6e003969722a290397b8271bdad17 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 13 Apr 2021 11:43:00 +0100 Subject: [PATCH 132/180] io_uring: fix early sqd_list removal sqpoll hangs [ 245.463317] INFO: task iou-sqp-1374:1377 blocked for more than 122 seconds. [ 245.463334] task:iou-sqp-1374 state:D flags:0x00004000 [ 245.463345] Call Trace: [ 245.463352] __schedule+0x36b/0x950 [ 245.463376] schedule+0x68/0xe0 [ 245.463385] __io_uring_cancel+0xfb/0x1a0 [ 245.463407] do_exit+0xc0/0xb40 [ 245.463423] io_sq_thread+0x49b/0x710 [ 245.463445] ret_from_fork+0x22/0x30 It happens when sqpoll forgot to run park_task_work and goes to exit, then exiting user may remove ctx from sqd_list, and so corresponding io_sq_thread() -> io_uring_cancel_sqpoll() won't be executed. Hopefully it just stucks in do_exit() in this case. Fixes: dbe1bdbb39db ("io_uring: handle signals for IO threads like a normal thread") Reported-by: Joakim Hassila Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index bd14327c8e7e..dff34975d86b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6754,6 +6754,9 @@ static int io_sq_thread(void *data) current->flags |= PF_NO_SETAFFINITY; mutex_lock(&sqd->lock); + /* a user may had exited before the thread started */ + io_run_task_work_head(&sqd->park_task_work); + while (!test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)) { int ret; bool cap_entries, sqt_spin, needs_sched; @@ -6770,10 +6773,10 @@ static int io_sq_thread(void *data) } cond_resched(); mutex_lock(&sqd->lock); - if (did_sig) - break; io_run_task_work(); io_run_task_work_head(&sqd->park_task_work); + if (did_sig) + break; timeout = jiffies + sqd->sq_thread_idle; continue; } From 16756d3e77ad58cd07e36cbed724aa13ae5a0278 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 13 Apr 2021 20:46:14 -0700 Subject: [PATCH 133/180] ethtool: pause: make sure we init driver stats The intention was for pause statistics to not be reported when driver does not have the relevant callback (only report an empty netlink nest). What happens currently we report all 0s instead. Make sure statistics are initialized to "not set" (which is -1) so the dumping code skips them. Fixes: 9a27a33027f2 ("ethtool: add standard pause stats") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/ethtool/pause.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ethtool/pause.c b/net/ethtool/pause.c index 09998dc5c185..d4ac02718b72 100644 --- a/net/ethtool/pause.c +++ b/net/ethtool/pause.c @@ -38,16 +38,16 @@ static int pause_prepare_data(const struct ethnl_req_info *req_base, if (!dev->ethtool_ops->get_pauseparam) return -EOPNOTSUPP; + ethtool_stats_init((u64 *)&data->pausestat, + sizeof(data->pausestat) / 8); + ret = ethnl_ops_begin(dev); if (ret < 0) return ret; dev->ethtool_ops->get_pauseparam(dev, &data->pauseparam); if (req_base->flags & ETHTOOL_FLAG_STATS && - dev->ethtool_ops->get_pause_stats) { - ethtool_stats_init((u64 *)&data->pausestat, - sizeof(data->pausestat) / 8); + dev->ethtool_ops->get_pause_stats) dev->ethtool_ops->get_pause_stats(dev, &data->pausestat); - } ethnl_ops_complete(dev); return 0; From 453a77894efa4d9b6ef9644d74b9419c47ac427c Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 14 Apr 2021 10:47:10 +0200 Subject: [PATCH 134/180] r8169: don't advertise pause in jumbo mode It has been reported [0] that using pause frames in jumbo mode impacts performance. There's no available chip documentation, but vendor drivers r8168 and r8125 don't advertise pause in jumbo mode. So let's do the same, according to Roman it fixes the issue. [0] https://bugzilla.kernel.org/show_bug.cgi?id=212617 Fixes: 9cf9b84cc701 ("r8169: make use of phy_set_asym_pause") Reported-by: Roman Mamedov Tested-by: Roman Mamedov Signed-off-by: Heiner Kallweit Cc: stable@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 581a92fc3292..1df2c002c9f6 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2350,6 +2350,13 @@ static void rtl_jumbo_config(struct rtl8169_private *tp) if (pci_is_pcie(tp->pci_dev) && tp->supports_gmii) pcie_set_readrq(tp->pci_dev, readrq); + + /* Chip doesn't support pause in jumbo mode */ + linkmode_mod_bit(ETHTOOL_LINK_MODE_Pause_BIT, + tp->phydev->advertising, !jumbo); + linkmode_mod_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + tp->phydev->advertising, !jumbo); + phy_start_aneg(tp->phydev); } DECLARE_RTL_COND(rtl_chipcmd_cond) @@ -4630,8 +4637,6 @@ static int r8169_phy_connect(struct rtl8169_private *tp) if (!tp->supports_gmii) phy_set_max_speed(phydev, SPEED_100); - phy_support_asym_pause(phydev); - phy_attached_info(phydev); return 0; From 0775ebc4cf8554bdcd2c212669a0868ab68df5c0 Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Wed, 14 Apr 2021 02:46:14 -0500 Subject: [PATCH 135/180] ibmvnic: avoid calling napi_disable() twice __ibmvnic_open calls napi_disable without checking whether NAPI polling has already been disabled or not. This could cause napi_disable being called twice, which could generate deadlock. For example, the first napi_disable will spin until NAPI_STATE_SCHED is cleared by napi_complete_done, then set it again. When napi_disable is called the second time, it will loop infinitely because no dev->poll will be running to clear NAPI_STATE_SCHED. To prevent above scenario from happening, call ibmvnic_napi_disable() which checks if napi is disabled or not before calling napi_disable. Fixes: bfc32f297337 ("ibmvnic: Move resource initialization to its own routine") Suggested-by: Thomas Falcon Signed-off-by: Lijun Pan Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 110a0d0eaabb..2d27f8aa0d4b 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1149,8 +1149,7 @@ static int __ibmvnic_open(struct net_device *netdev) rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP); if (rc) { - for (i = 0; i < adapter->req_rx_queues; i++) - napi_disable(&adapter->napi[i]); + ibmvnic_napi_disable(adapter); release_resources(adapter); return rc; } From d3a6abccbd272aea7dc2c6f984bb5a2c11278e44 Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Wed, 14 Apr 2021 02:46:15 -0500 Subject: [PATCH 136/180] ibmvnic: remove duplicate napi_schedule call in do_reset function During adapter reset, do_reset/do_hard_reset calls ibmvnic_open(), which will calls napi_schedule if previous state is VNIC_CLOSED (i.e, the reset case, and "ifconfig down" case). So there is no need for do_reset to call napi_schedule again at the end of the function though napi_schedule will neglect the request if napi is already scheduled. Fixes: ed651a10875f ("ibmvnic: Updated reset handling") Signed-off-by: Lijun Pan Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 2d27f8aa0d4b..f4bd63216672 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1921,7 +1921,7 @@ static int do_reset(struct ibmvnic_adapter *adapter, u64 old_num_rx_queues, old_num_tx_queues; u64 old_num_rx_slots, old_num_tx_slots; struct net_device *netdev = adapter->netdev; - int i, rc; + int rc; netdev_dbg(adapter->netdev, "[S:%d FOP:%d] Reset reason %d, reset_state %d\n", @@ -2110,10 +2110,6 @@ static int do_reset(struct ibmvnic_adapter *adapter, /* refresh device's multicast list */ ibmvnic_set_multi(netdev); - /* kick napi */ - for (i = 0; i < adapter->req_rx_queues; i++) - napi_schedule(&adapter->napi[i]); - if (adapter->reset_reason == VNIC_RESET_FAILOVER || adapter->reset_reason == VNIC_RESET_MOBILITY) __netdev_notify_peers(netdev); From 7c451f3ef676c805a4b77a743a01a5c21a250a73 Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Wed, 14 Apr 2021 02:46:16 -0500 Subject: [PATCH 137/180] ibmvnic: remove duplicate napi_schedule call in open function Remove the unnecessary napi_schedule() call in __ibmvnic_open() since interrupt_rx() calls napi_schedule_prep/__napi_schedule during every receive interrupt. Fixes: ed651a10875f ("ibmvnic: Updated reset handling") Signed-off-by: Lijun Pan Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index f4bd63216672..ffb2a91750c7 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1156,11 +1156,6 @@ static int __ibmvnic_open(struct net_device *netdev) netif_tx_start_all_queues(netdev); - if (prev_state == VNIC_CLOSED) { - for (i = 0; i < adapter->req_rx_queues; i++) - napi_schedule(&adapter->napi[i]); - } - adapter->state = VNIC_OPEN; return rc; } From 292ecd9f5a94dd29d09fe03b5b669cb20b44f19e Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 14 Apr 2021 12:00:27 +0200 Subject: [PATCH 138/180] doc: move seg6_flowlabel to seg6-sysctl.rst Let's have all seg6 sysctl at the same place. Fixes: a6dc6670cd7e ("ipv6: sr: Add documentation for seg_flowlabel sysctl") Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 15 --------------- Documentation/networking/seg6-sysctl.rst | 13 +++++++++++++ 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index c7952ac5bd2f..3feb5e565b1a 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1849,21 +1849,6 @@ ip6frag_low_thresh - INTEGER ip6frag_time - INTEGER Time in seconds to keep an IPv6 fragment in memory. -IPv6 Segment Routing: - -seg6_flowlabel - INTEGER - Controls the behaviour of computing the flowlabel of outer - IPv6 header in case of SR T.encaps - - == ======================================================= - -1 set flowlabel to zero. - 0 copy flowlabel from Inner packet in case of Inner IPv6 - (Set flowlabel to 0 in case IPv4/L2) - 1 Compute the flowlabel using seg6_make_flowlabel() - == ======================================================= - - Default is 0. - ``conf/default/*``: Change the interface-specific default settings. diff --git a/Documentation/networking/seg6-sysctl.rst b/Documentation/networking/seg6-sysctl.rst index ec73e1445030..07c20e470baf 100644 --- a/Documentation/networking/seg6-sysctl.rst +++ b/Documentation/networking/seg6-sysctl.rst @@ -24,3 +24,16 @@ seg6_require_hmac - INTEGER * 1 - Drop SR packets without HMAC, validate SR packets with HMAC Default is 0. + +seg6_flowlabel - INTEGER + Controls the behaviour of computing the flowlabel of outer + IPv6 header in case of SR T.encaps + + == ======================================================= + -1 set flowlabel to zero. + 0 copy flowlabel from Inner packet in case of Inner IPv6 + (Set flowlabel to 0 in case IPv4/L2) + 1 Compute the flowlabel using seg6_make_flowlabel() + == ======================================================= + + Default is 0. From 2e1534f395e73152e2051332034bff61a56a8368 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 14 Apr 2021 12:03:25 +0200 Subject: [PATCH 139/180] vrf: fix a comment about loopback device This is a leftover of the below commit. Fixes: 4f04256c983a ("net: vrf: Drop local rtable and rt6_info") Signed-off-by: Nicolas Dichtel Acked-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 6d9130859c55..503e2fd7ce51 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -471,9 +471,8 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, skb_dst_drop(skb); - /* if dst.dev is loopback or the VRF device again this is locally - * originated traffic destined to a local address. Short circuit - * to Rx path + /* if dst.dev is the VRF device again this is locally originated traffic + * destined to a local address. Short circuit to Rx path. */ if (dst->dev == dev) return vrf_local_xmit(skb, dev, dst); @@ -547,9 +546,8 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, skb_dst_drop(skb); - /* if dst.dev is loopback or the VRF device again this is locally - * originated traffic destined to a local address. Short circuit - * to Rx path + /* if dst.dev is the VRF device again this is locally originated traffic + * destined to a local address. Short circuit to Rx path. */ if (rt->dst.dev == vrf_dev) return vrf_local_xmit(skb, vrf_dev, &rt->dst); From a714e27ea8bdee2b238748029d31472d0a65b611 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 14 Apr 2021 14:20:29 +0300 Subject: [PATCH 140/180] net: macb: fix the restore of cmp registers Commit a14d273ba159 ("net: macb: restore cmp registers on resume path") introduces the restore of CMP registers on resume path. In case the IP doesn't support type 2 screeners (zero on DCFG8 register) the struct macb::rx_fs_list::list is not initialized and thus the list_for_each_entry(item, &bp->rx_fs_list.list, list) loop introduced in commit a14d273ba159 ("net: macb: restore cmp registers on resume path") will access an uninitialized list leading to crash. Thus, initialize the struct macb::rx_fs_list::list without taking into account if the IP supports type 2 screeners or not. Fixes: a14d273ba159 ("net: macb: restore cmp registers on resume path") Signed-off-by: Claudiu Beznea Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 6e5cf490c01d..0f6a6cb7e98d 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3918,6 +3918,7 @@ static int macb_init(struct platform_device *pdev) reg = gem_readl(bp, DCFG8); bp->max_tuples = min((GEM_BFEXT(SCR2CMP, reg) / 3), GEM_BFEXT(T2SCR, reg)); + INIT_LIST_HEAD(&bp->rx_fs_list.list); if (bp->max_tuples > 0) { /* also needs one ethtype match to check IPv4 */ if (GEM_BFEXT(SCR2ETH, reg) > 0) { @@ -3928,7 +3929,6 @@ static int macb_init(struct platform_device *pdev) /* Filtering is supported in hw but don't enable it in kernel now */ dev->hw_features |= NETIF_F_NTUPLE; /* init Rx flow definitions */ - INIT_LIST_HEAD(&bp->rx_fs_list.list); bp->rx_fs_list.count = 0; spin_lock_init(&bp->rx_fs_lock); } else From 416dcc5ce9d2a810477171c62ffa061a98f87367 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Wed, 14 Apr 2021 19:31:48 +0800 Subject: [PATCH 141/180] cavium/liquidio: Fix duplicate argument Fix the following coccicheck warning: ./drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h:413:6-28: duplicated argument to & or | The CN6XXX_INTR_M1UPB0_ERR here is duplicate. Here should be CN6XXX_INTR_M1UNB0_ERR. Signed-off-by: Wan Jiabing Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h b/drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h index b248966837b4..7aad40b2aa73 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h +++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h @@ -412,7 +412,7 @@ | CN6XXX_INTR_M0UNWI_ERR \ | CN6XXX_INTR_M1UPB0_ERR \ | CN6XXX_INTR_M1UPWI_ERR \ - | CN6XXX_INTR_M1UPB0_ERR \ + | CN6XXX_INTR_M1UNB0_ERR \ | CN6XXX_INTR_M1UNWI_ERR \ | CN6XXX_INTR_INSTR_DB_OF_ERR \ | CN6XXX_INTR_SLIST_DB_OF_ERR \ From 00423969d806d7169d16fa6314c570a472ca26c9 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 14 Apr 2021 17:10:07 +0200 Subject: [PATCH 142/180] Revert "net: stmmac: re-init rx buffers when mac resume back" This reverts commit 9c63faaa931e443e7abbbee9de0169f1d4710546, which introduces a suspend/resume regression on Jetson TX2 boards that can be reproduced every time. Given that the issue that this was supposed to fix only occurs very sporadically the safest course of action is to revert before v5.12 and then we can have another go at fixing the more rare issue in the next release (and perhaps backport it if necessary). The root cause of the observed problem seems to be that when the system is suspended, some packets are still in transit. When the descriptors for these buffers are cleared on resume, the descriptors become invalid and cause a fatal bus error. Link: https://lore.kernel.org/r/708edb92-a5df-ecc4-3126-5ab36707e275@nvidia.com/ Reported-by: Jonathan Hunter Signed-off-by: Thierry Reding Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 84 +------------------ 1 file changed, 1 insertion(+), 83 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 208cae344ffa..4749bd0af160 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1379,88 +1379,6 @@ static void stmmac_free_tx_buffer(struct stmmac_priv *priv, u32 queue, int i) } } -/** - * stmmac_reinit_rx_buffers - reinit the RX descriptor buffer. - * @priv: driver private structure - * Description: this function is called to re-allocate a receive buffer, perform - * the DMA mapping and init the descriptor. - */ -static void stmmac_reinit_rx_buffers(struct stmmac_priv *priv) -{ - u32 rx_count = priv->plat->rx_queues_to_use; - u32 queue; - int i; - - for (queue = 0; queue < rx_count; queue++) { - struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; - - for (i = 0; i < priv->dma_rx_size; i++) { - struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i]; - - if (buf->page) { - page_pool_recycle_direct(rx_q->page_pool, buf->page); - buf->page = NULL; - } - - if (priv->sph && buf->sec_page) { - page_pool_recycle_direct(rx_q->page_pool, buf->sec_page); - buf->sec_page = NULL; - } - } - } - - for (queue = 0; queue < rx_count; queue++) { - struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; - - for (i = 0; i < priv->dma_rx_size; i++) { - struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i]; - struct dma_desc *p; - - if (priv->extend_desc) - p = &((rx_q->dma_erx + i)->basic); - else - p = rx_q->dma_rx + i; - - if (!buf->page) { - buf->page = page_pool_dev_alloc_pages(rx_q->page_pool); - if (!buf->page) - goto err_reinit_rx_buffers; - - buf->addr = page_pool_get_dma_addr(buf->page); - } - - if (priv->sph && !buf->sec_page) { - buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool); - if (!buf->sec_page) - goto err_reinit_rx_buffers; - - buf->sec_addr = page_pool_get_dma_addr(buf->sec_page); - } - - stmmac_set_desc_addr(priv, p, buf->addr); - if (priv->sph) - stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, true); - else - stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, false); - if (priv->dma_buf_sz == BUF_SIZE_16KiB) - stmmac_init_desc3(priv, p); - } - } - - return; - -err_reinit_rx_buffers: - do { - while (--i >= 0) - stmmac_free_rx_buffer(priv, queue, i); - - if (queue == 0) - break; - - i = priv->dma_rx_size; - } while (queue-- > 0); -} - /** * init_dma_rx_desc_rings - init the RX descriptor rings * @dev: net device structure @@ -5428,7 +5346,7 @@ int stmmac_resume(struct device *dev) mutex_lock(&priv->lock); stmmac_reset_queues_param(priv); - stmmac_reinit_rx_buffers(priv); + stmmac_free_tx_skbufs(priv); stmmac_clear_descriptors(priv); From 41bafb31dcd58d834bdffa5db703f94fd2cec727 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 12 Apr 2021 17:50:08 +0300 Subject: [PATCH 143/180] net/mlx5: Fix setting of devlink traps in switchdev mode Prevent setting of devlink traps on the uplink while in switchdev mode. In this mode, it is the SW switch responsibility to handle both packets with a mismatch in destination MAC or VLAN ID. Therefore, there are no flow steering tables to trap undesirable packets and driver crashes upon setting a trap. Fixes: 241dc159391f ("net/mlx5: Notify on trap action by blocking event") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Reviewed-by: Roi Dayan Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index d7d8a68ef23d..d0f9d3cee97d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -246,6 +246,11 @@ static int mlx5_devlink_trap_action_set(struct devlink *devlink, struct mlx5_devlink_trap *dl_trap; int err = 0; + if (is_mdev_switchdev_mode(dev)) { + NL_SET_ERR_MSG_MOD(extack, "Devlink traps can't be set in switchdev mode"); + return -EOPNOTSUPP; + } + dl_trap = mlx5_find_trap_by_id(dev, trap->id); if (!dl_trap) { mlx5_core_err(dev, "Devlink trap: Set action on invalid trap id 0x%x", trap->id); From 7a320c9db3e73fb6c4f9a331087df9df18767221 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Sun, 11 Apr 2021 09:33:12 +0300 Subject: [PATCH 144/180] net/mlx5e: Fix setting of RS FEC mode Change register setting from bit number to bit mask. Fixes: b5ede32d3329 ("net/mlx5e: Add support for FEC modes based on 50G per lane links") Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/port.c | 23 ++++--------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 308fd279669e..89510cac46c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -387,21 +387,6 @@ enum mlx5e_fec_supported_link_mode { *_policy = MLX5_GET(pplm_reg, _buf, fec_override_admin_##link); \ } while (0) -#define MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(buf, policy, write, link) \ - do { \ - unsigned long policy_long; \ - u16 *__policy = &(policy); \ - bool _write = (write); \ - \ - policy_long = *__policy; \ - if (_write && *__policy) \ - *__policy = find_first_bit(&policy_long, \ - sizeof(policy_long) * BITS_PER_BYTE);\ - MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, *__policy, _write, link); \ - if (!_write && *__policy) \ - *__policy = 1 << *__policy; \ - } while (0) - /* get/set FEC admin field for a given speed */ static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write, enum mlx5e_fec_supported_link_mode link_mode) @@ -423,16 +408,16 @@ static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write, MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 100g); break; case MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X: - MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 50g_1x); + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 50g_1x); break; case MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X: - MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 100g_2x); + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 100g_2x); break; case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X: - MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 200g_4x); + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 200g_4x); break; case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X: - MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 400g_8x); + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 400g_8x); break; default: return -EINVAL; From e3e0f9b279705154b951d579dc3d8b7041710e24 Mon Sep 17 00:00:00 2001 From: wenxu Date: Fri, 9 Apr 2021 13:33:48 +0800 Subject: [PATCH 145/180] net/mlx5e: fix ingress_ifindex check in mlx5e_flower_parse_meta In the nft_offload there is the mate flow_dissector with no ingress_ifindex but with ingress_iftype that only be used in the software. So if the mask of ingress_ifindex in meta is 0, this meta check should be bypass. Fixes: 6d65bc64e232 ("net/mlx5e: Add mlx5e_flower_parse_meta support") Signed-off-by: wenxu Acked-by: Pablo Neira Ayuso Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index df2a0af854bb..d675107d9eca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1895,6 +1895,9 @@ static int mlx5e_flower_parse_meta(struct net_device *filter_dev, return 0; flow_rule_match_meta(rule, &match); + if (!match.mask->ingress_ifindex) + return 0; + if (match.mask->ingress_ifindex != 0xFFFFFFFF) { NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask"); return -EOPNOTSUPP; From 22315a2296f4c251fa92aec45fbbae37e9301b6c Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 13 Apr 2021 17:08:04 -0700 Subject: [PATCH 146/180] arm64: alternatives: Move length validation in alternative_{insn, endif} After commit 2decad92f473 ("arm64: mte: Ensure TIF_MTE_ASYNC_FAULT is set atomically"), LLVM's integrated assembler fails to build entry.S: :5:7: error: expected assembly-time absolute expression .org . - (664b-663b) + (662b-661b) ^ :6:7: error: expected assembly-time absolute expression .org . - (662b-661b) + (664b-663b) ^ The root cause is LLVM's assembler has a one-pass design, meaning it cannot figure out these instruction lengths when the .org directive is outside of the subsection that they are in, which was changed by the .arch_extension directive added in the above commit. Apply the same fix from commit 966a0acce2fc ("arm64/alternatives: move length validation inside the subsection") to the alternative_endif macro, shuffling the .org directives so that the length validation happen will always happen in the same subsections. alternative_insn has not shown any issue yet but it appears that it could have the same issue in the future so just preemptively change it. Fixes: f7b93d42945c ("arm64/alternatives: use subsections for replacement sequences") Cc: # 5.8.x Link: https://github.com/ClangBuiltLinux/linux/issues/1347 Signed-off-by: Nathan Chancellor Reviewed-by: Sami Tolvanen Tested-by: Sami Tolvanen Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Link: https://lore.kernel.org/r/20210414000803.662534-1-nathan@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/alternative-macros.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index 5df500dcc627..8a078fc662ac 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -97,9 +97,9 @@ .popsection .subsection 1 663: \insn2 -664: .previous - .org . - (664b-663b) + (662b-661b) +664: .org . - (664b-663b) + (662b-661b) .org . - (662b-661b) + (664b-663b) + .previous .endif .endm @@ -169,11 +169,11 @@ */ .macro alternative_endif 664: + .org . - (664b-663b) + (662b-661b) + .org . - (662b-661b) + (664b-663b) .if .Lasm_alt_mode==0 .previous .endif - .org . - (664b-663b) + (662b-661b) - .org . - (662b-661b) + (664b-663b) .endm /* From 4e39a072a6a0fc422ba7da5e4336bdc295d70211 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Wed, 14 Apr 2021 10:34:28 +0800 Subject: [PATCH 147/180] i40e: fix the panic when running bpf in xdpdrv mode Fix this panic by adding more rules to calculate the value of @rss_size_max which could be used in allocating the queues when bpf is loaded, which, however, could cause the failure and then trigger the NULL pointer of vsi->rx_rings. Prio to this fix, the machine doesn't care about how many cpus are online and then allocates 256 queues on the machine with 32 cpus online actually. Once the load of bpf begins, the log will go like this "failed to get tracking for 256 queues for VSI 0 err -12" and this "setup of MAIN VSI failed". Thus, I attach the key information of the crash-log here. BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 RIP: 0010:i40e_xdp+0xdd/0x1b0 [i40e] Call Trace: [2160294.717292] ? i40e_reconfig_rss_queues+0x170/0x170 [i40e] [2160294.717666] dev_xdp_install+0x4f/0x70 [2160294.718036] dev_change_xdp_fd+0x11f/0x230 [2160294.718380] ? dev_disable_lro+0xe0/0xe0 [2160294.718705] do_setlink+0xac7/0xe70 [2160294.719035] ? __nla_parse+0xed/0x120 [2160294.719365] rtnl_newlink+0x73b/0x860 Fixes: 41c445ff0f48 ("i40e: main driver core") Co-developed-by: Shujin Li Signed-off-by: Shujin Li Signed-off-by: Jason Xing Reviewed-by: Jesse Brandeburg Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 30ad7c08d0fb..527023ee4c07 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -12357,6 +12357,7 @@ static int i40e_sw_init(struct i40e_pf *pf) { int err = 0; int size; + u16 pow; /* Set default capability flags */ pf->flags = I40E_FLAG_RX_CSUM_ENABLED | @@ -12375,6 +12376,11 @@ static int i40e_sw_init(struct i40e_pf *pf) pf->rss_table_size = pf->hw.func_caps.rss_table_size; pf->rss_size_max = min_t(int, pf->rss_size_max, pf->hw.func_caps.num_tx_qp); + + /* find the next higher power-of-2 of num cpus */ + pow = roundup_pow_of_two(num_online_cpus()); + pf->rss_size_max = min_t(int, pf->rss_size_max, pow); + if (pf->hw.func_caps.rss) { pf->flags |= I40E_FLAG_RSS_ENABLED; pf->alloc_rss_size = min_t(int, pf->rss_size_max, From 1a73e427b824133940c2dd95ebe26b6dce1cbf10 Mon Sep 17 00:00:00 2001 From: Vinay Kumar Yadav Date: Thu, 15 Apr 2021 13:17:45 +0530 Subject: [PATCH 148/180] ch_ktls: Fix kernel panic Taking page refcount is not ideal and causes kernel panic sometimes. It's better to take tx_ctx lock for the complete skb transmit, to avoid page cleanup if ACK received in middle. Fixes: 5a4b9fe7fece ("cxgb4/chcr: complete record tx handling") Signed-off-by: Vinay Kumar Yadav Signed-off-by: Rohit Maheshwari Signed-off-by: David S. Miller --- .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 24 ++++--------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index 1115b8f9ea4e..e39fa0940367 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -2010,12 +2010,11 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) * we will send the complete record again. */ + spin_lock_irqsave(&tx_ctx->base.lock, flags); + do { - int i; cxgb4_reclaim_completed_tx(adap, &q->q, true); - /* lock taken */ - spin_lock_irqsave(&tx_ctx->base.lock, flags); /* fetch the tls record */ record = tls_get_record(&tx_ctx->base, tcp_seq, &tx_info->record_no); @@ -2074,11 +2073,11 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) tls_end_offset, skb_offset, 0); - spin_unlock_irqrestore(&tx_ctx->base.lock, flags); if (ret) { /* free the refcount taken earlier */ if (tls_end_offset < data_len) dev_kfree_skb_any(skb); + spin_unlock_irqrestore(&tx_ctx->base.lock, flags); goto out; } @@ -2088,16 +2087,6 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) continue; } - /* increase page reference count of the record, so that there - * won't be any chance of page free in middle if in case stack - * receives ACK and try to delete the record. - */ - for (i = 0; i < record->num_frags; i++) - __skb_frag_ref(&record->frags[i]); - /* lock cleared */ - spin_unlock_irqrestore(&tx_ctx->base.lock, flags); - - /* if a tls record is finishing in this SKB */ if (tls_end_offset <= data_len) { ret = chcr_end_part_handler(tx_info, skb, record, @@ -2122,13 +2111,9 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) data_len = 0; } - /* clear the frag ref count which increased locally before */ - for (i = 0; i < record->num_frags; i++) { - /* clear the frag ref count */ - __skb_frag_unref(&record->frags[i]); - } /* if any failure, come out from the loop. */ if (ret) { + spin_unlock_irqrestore(&tx_ctx->base.lock, flags); if (th->fin) dev_kfree_skb_any(skb); @@ -2143,6 +2128,7 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) } while (data_len > 0); + spin_unlock_irqrestore(&tx_ctx->base.lock, flags); atomic64_inc(&port_stats->ktls_tx_encrypted_packets); atomic64_add(skb_data_len, &port_stats->ktls_tx_encrypted_bytes); From bc16efd2430652f894ae34b1de5eccc3bf0d2810 Mon Sep 17 00:00:00 2001 From: Vinay Kumar Yadav Date: Thu, 15 Apr 2021 13:17:46 +0530 Subject: [PATCH 149/180] ch_ktls: fix device connection close When sge queue is full and chcr_ktls_xmit_wr_complete() returns failure, skb is not freed if it is not the last tls record in this skb, causes refcount never gets freed and tls_dev_del() never gets called on this connection. Fixes: 5a4b9fe7fece ("cxgb4/chcr: complete record tx handling") Signed-off-by: Vinay Kumar Yadav Signed-off-by: Rohit Maheshwari Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index e39fa0940367..a626560f8365 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -1735,7 +1735,9 @@ static int chcr_end_part_handler(struct chcr_ktls_info *tx_info, struct sge_eth_txq *q, u32 skb_offset, u32 tls_end_offset, bool last_wr) { + bool free_skb_if_tx_fails = false; struct sk_buff *nskb = NULL; + /* check if it is a complete record */ if (tls_end_offset == record->len) { nskb = skb; @@ -1758,6 +1760,8 @@ static int chcr_end_part_handler(struct chcr_ktls_info *tx_info, if (last_wr) dev_kfree_skb_any(skb); + else + free_skb_if_tx_fails = true; last_wr = true; @@ -1769,6 +1773,8 @@ static int chcr_end_part_handler(struct chcr_ktls_info *tx_info, record->num_frags, (last_wr && tcp_push_no_fin), mss)) { + if (free_skb_if_tx_fails) + dev_kfree_skb_any(skb); goto out; } tx_info->prev_seq = record->end_seq; From 21d8c25e3f4b9052a471ced8f47b531956eb9963 Mon Sep 17 00:00:00 2001 From: Vinay Kumar Yadav Date: Thu, 15 Apr 2021 13:17:47 +0530 Subject: [PATCH 150/180] ch_ktls: tcb close causes tls connection failure HW doesn't need marking TCB closed. This TCB state change sometimes causes problem to the new connection which gets the same tid. Fixes: 34aba2c45024 ("cxgb4/chcr : Register to tls add and del callback") Signed-off-by: Vinay Kumar Yadav Signed-off-by: Rohit Maheshwari Signed-off-by: David S. Miller --- .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index a626560f8365..8559eec161f0 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -349,18 +349,6 @@ static int chcr_set_tcb_field(struct chcr_ktls_info *tx_info, u16 word, return cxgb4_ofld_send(tx_info->netdev, skb); } -/* - * chcr_ktls_mark_tcb_close: mark tcb state to CLOSE - * @tx_info - driver specific tls info. - * return: NET_TX_OK/NET_XMIT_DROP. - */ -static int chcr_ktls_mark_tcb_close(struct chcr_ktls_info *tx_info) -{ - return chcr_set_tcb_field(tx_info, TCB_T_STATE_W, - TCB_T_STATE_V(TCB_T_STATE_M), - CHCR_TCB_STATE_CLOSED, 1); -} - /* * chcr_ktls_dev_del: call back for tls_dev_del. * Remove the tid and l2t entry and close the connection. @@ -395,8 +383,6 @@ static void chcr_ktls_dev_del(struct net_device *netdev, /* clear tid */ if (tx_info->tid != -1) { - /* clear tcb state and then release tid */ - chcr_ktls_mark_tcb_close(tx_info); cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan, tx_info->tid, tx_info->ip_family); } @@ -574,7 +560,6 @@ static int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk, return 0; free_tid: - chcr_ktls_mark_tcb_close(tx_info); #if IS_ENABLED(CONFIG_IPV6) /* clear clip entry */ if (tx_info->ip_family == AF_INET6) @@ -672,10 +657,6 @@ static int chcr_ktls_cpl_act_open_rpl(struct adapter *adap, if (tx_info->pending_close) { spin_unlock(&tx_info->lock); if (!status) { - /* it's a late success, tcb status is established, - * mark it close. - */ - chcr_ktls_mark_tcb_close(tx_info); cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan, tid, tx_info->ip_family); } From e8a4155567b3c903f49cbf89b8017e9cc22c4fe4 Mon Sep 17 00:00:00 2001 From: Vinay Kumar Yadav Date: Thu, 15 Apr 2021 13:17:48 +0530 Subject: [PATCH 151/180] ch_ktls: do not send snd_una update to TCB in middle snd_una update should not be done when the same skb is being sent out.chcr_short_record_handler() sends it again even though SND_UNA update is already sent for the skb in chcr_ktls_xmit(), which causes mismatch in un-acked TCP seq number, later causes problem in sending out complete record. Fixes: 429765a149f1 ("chcr: handle partial end part of a record") Signed-off-by: Vinay Kumar Yadav Signed-off-by: Rohit Maheshwari Signed-off-by: David S. Miller --- .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 53 ------------------- 1 file changed, 53 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index 8559eec161f0..a3f5b80888e5 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -1644,54 +1644,6 @@ static void chcr_ktls_copy_record_in_skb(struct sk_buff *nskb, refcount_add(nskb->truesize, &nskb->sk->sk_wmem_alloc); } -/* - * chcr_ktls_update_snd_una: Reset the SEND_UNA. It will be done to avoid - * sending the same segment again. It will discard the segment which is before - * the current tx max. - * @tx_info - driver specific tls info. - * @q - TX queue. - * return: NET_TX_OK/NET_XMIT_DROP. - */ -static int chcr_ktls_update_snd_una(struct chcr_ktls_info *tx_info, - struct sge_eth_txq *q) -{ - struct fw_ulptx_wr *wr; - unsigned int ndesc; - int credits; - void *pos; - u32 len; - - len = sizeof(*wr) + roundup(CHCR_SET_TCB_FIELD_LEN, 16); - ndesc = DIV_ROUND_UP(len, 64); - - credits = chcr_txq_avail(&q->q) - ndesc; - if (unlikely(credits < 0)) { - chcr_eth_txq_stop(q); - return NETDEV_TX_BUSY; - } - - pos = &q->q.desc[q->q.pidx]; - - wr = pos; - /* ULPTX wr */ - wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR)); - wr->cookie = 0; - /* fill len in wr field */ - wr->flowid_len16 = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(len, 16))); - - pos += sizeof(*wr); - - pos = chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos, - TCB_SND_UNA_RAW_W, - TCB_SND_UNA_RAW_V(TCB_SND_UNA_RAW_M), - TCB_SND_UNA_RAW_V(0), 0); - - chcr_txq_advance(&q->q, ndesc); - cxgb4_ring_tx_db(tx_info->adap, &q->q, ndesc); - - return 0; -} - /* * chcr_end_part_handler: This handler will handle the record which * is complete or if record's end part is received. T6 adapter has a issue that @@ -1892,11 +1844,6 @@ static int chcr_short_record_handler(struct chcr_ktls_info *tx_info, /* reset tcp_seq as per the prior_data_required len */ tcp_seq -= prior_data_len; } - /* reset snd una, so the middle record won't send the already - * sent part. - */ - if (chcr_ktls_update_snd_una(tx_info, q)) - goto out; atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_middle_pkts); } else { atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_start_pkts); From b21bb4cd1102dd9e24a169d09cf4e6f3c8a46bcf Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 15 Apr 2021 16:26:08 -0700 Subject: [PATCH 152/180] cxl/mem: Fix register block offset calculation The "Register Offset Low" register of a "DVSEC Register Locator" contains the 64K aligned offset for the registers along with the BAR indicator and an id. The implementation was treating the "Register Block Offset Low" field a value rather than as a pre-aligned component of the 64-bit offset. So, just mask, don't mask and shift (FIELD_GET). The user visible result of this bug is that the driver fails to bind to the device after none of the required blocks are found. This was missed earlier because the primary development done in the QEMU environment only uses 0 offsets, i.e. 0 shifted is still 0. Fixes: 8adaf747c9f0 ("cxl/mem: Find device capabilities") Reported-by: Vishal Verma Signed-off-by: Ben Widawsky Link: https://lore.kernel.org/r/20210415232610.603273-1-ben.widawsky@intel.com Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index e3003f49b329..1b5078311f7d 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -998,7 +998,7 @@ static struct cxl_mem *cxl_mem_create(struct pci_dev *pdev, u32 reg_lo, return NULL; } - offset = ((u64)reg_hi << 32) | FIELD_GET(CXL_REGLOC_ADDR_MASK, reg_lo); + offset = ((u64)reg_hi << 32) | (reg_lo & CXL_REGLOC_ADDR_MASK); bar = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo); /* Basic sanity check that BAR is big enough */ From 199fc6b8dee7d6d50467a57e0dc7e3e1b7d59966 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 29 Mar 2021 11:13:07 +0800 Subject: [PATCH 153/180] riscv: Fix spelling mistake "SPARSEMEM" to "SPARSMEM" There is a spelling mistake when SPARSEMEM Kconfig copy. Fixes: a5406a7ff56e ("riscv: Correct SPARSEMEM configuration") Cc: stable@vger.kernel.org Signed-off-by: Kefeng Wang Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 0d0cf67359cb..4515a10c5d22 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -153,7 +153,7 @@ config ARCH_FLATMEM_ENABLE config ARCH_SPARSEMEM_ENABLE def_bool y depends on MMU - select SPARSEMEM_STATIC if 32BIT && SPARSMEM + select SPARSEMEM_STATIC if 32BIT && SPARSEMEM select SPARSEMEM_VMEMMAP_ENABLE if 64BIT config ARCH_SELECT_MEMORY_MODEL From 2349a3b26e29b8d860466bafda2e02b4b87a9e40 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 30 Mar 2021 02:12:26 +0800 Subject: [PATCH 154/180] riscv: add do_page_fault and do_trap_break into the kprobes blacklist These two functions are used to implement the kprobes feature so they can't be kprobed. Fixes: c22b0bcb1dd0 ("riscv: Add kprobes supported") Cc: stable@vger.kernel.org Signed-off-by: Jisheng Zhang Reviewed-by: Masami Hiramatsu Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/traps.c | 1 + arch/riscv/mm/fault.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 0879b5df11b9..1357abf79570 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -178,6 +178,7 @@ asmlinkage __visible void do_trap_break(struct pt_regs *regs) else die(regs, "Kernel BUG"); } +NOKPROBE_SYMBOL(do_trap_break); #ifdef CONFIG_GENERIC_BUG int is_valid_bugaddr(unsigned long pc) diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 8f17519208c7..c5dbd55cbf7c 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -328,3 +328,4 @@ asmlinkage void do_page_fault(struct pt_regs *regs) } return; } +NOKPROBE_SYMBOL(do_page_fault); From e31be8d343e64e7ab17aef55c1d1b36dc504da67 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 30 Mar 2021 02:14:40 +0800 Subject: [PATCH 155/180] riscv: kprobes/ftrace: Add recursion protection to the ftrace callback Currently, the riscv's kprobes(powerred by ftrace) handler is preemptible. Futher check indicates we miss something similar as the commit c536aa1c5b17 ("kprobes/ftrace: Add recursion protection to the ftrace callback"), so do similar modifications as the commit does. Fixes: 829adda597fe ("riscv: Add KPROBES_ON_FTRACE supported") Cc: stable@vger.kernel.org Signed-off-by: Jisheng Zhang Reviewed-by: Steven Rostedt (VMware) Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/probes/ftrace.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c index 17ca5e923bb0..aab85a82f419 100644 --- a/arch/riscv/kernel/probes/ftrace.c +++ b/arch/riscv/kernel/probes/ftrace.c @@ -9,10 +9,16 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct kprobe *p; struct pt_regs *regs; struct kprobe_ctlblk *kcb; + int bit; + bit = ftrace_test_recursion_trylock(ip, parent_ip); + if (bit < 0) + return; + + preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) - return; + goto out; regs = ftrace_get_regs(fregs); kcb = get_kprobe_ctlblk(); @@ -45,6 +51,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, */ __this_cpu_write(current_kprobe, NULL); } +out: + preempt_enable_notrace(); + ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); From 7ae11635ec90072083503c6b6485cdffe46203b3 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 30 Mar 2021 02:16:24 +0800 Subject: [PATCH 156/180] riscv: keep interrupts disabled for BREAKPOINT exception Current riscv's kprobe handlers are run with both preemption and interrupt enabled, this violates kprobe requirements. Fix this issue by keeping interrupts disabled for BREAKPOINT exception. Fixes: c22b0bcb1dd0 ("riscv: Add kprobes supported") Cc: stable@vger.kernel.org Signed-off-by: Jisheng Zhang Reviewed-by: Masami Hiramatsu [Palmer: add a comment] Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/entry.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 76274a4a1d8e..83095faa680e 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -130,6 +130,9 @@ skip_context_tracking: */ andi t0, s1, SR_PIE beqz t0, 1f + /* kprobes, entered via ebreak, must have interrupts disabled. */ + li t0, EXC_BREAKPOINT + beq s4, t0, 1f #ifdef CONFIG_TRACE_IRQFLAGS call trace_hardirqs_on #endif From 9601148392520e2e134936e76788fc2a6371e7be Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 23 Mar 2021 08:32:59 +0100 Subject: [PATCH 157/180] bpf: Use correct permission flag for mixed signed bounds arithmetic We forbid adding unknown scalars with mixed signed bounds due to the spectre v1 masking mitigation. Hence this also needs bypass_spec_v1 flag instead of allow_ptr_leaks. Fixes: 2c78ee898d8f ("bpf: Implement CAP_BPF") Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3a738724a380..2ede4b850230 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6085,7 +6085,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, dst, reg_type_str[ptr_reg->type]); return -EACCES; case PTR_TO_MAP_VALUE: - if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) { + if (!env->env->bypass_spec_v1 && !known && (smin_val < 0) != (smax_val < 0)) { verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n", off_reg == dst_reg ? dst : src); return -EACCES; From 6f55b2f2a1178856c19bbce2f71449926e731914 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 22 Mar 2021 15:45:52 +0100 Subject: [PATCH 158/180] bpf: Move off_reg into sanitize_ptr_alu Small refactor to drag off_reg into sanitize_ptr_alu(), so we later on can use off_reg for generalizing some of the checks for all pointer types. Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2ede4b850230..4ee014dadac7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5929,11 +5929,12 @@ static int sanitize_val_alu(struct bpf_verifier_env *env, static int sanitize_ptr_alu(struct bpf_verifier_env *env, struct bpf_insn *insn, const struct bpf_reg_state *ptr_reg, - struct bpf_reg_state *dst_reg, - bool off_is_neg) + const struct bpf_reg_state *off_reg, + struct bpf_reg_state *dst_reg) { struct bpf_verifier_state *vstate = env->cur_state; struct bpf_insn_aux_data *aux = cur_aux(env); + bool off_is_neg = off_reg->smin_value < 0; bool ptr_is_dst_reg = ptr_reg == dst_reg; u8 opcode = BPF_OP(insn->code); u32 alu_state, alu_limit; @@ -6110,7 +6111,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, switch (opcode) { case BPF_ADD: - ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); + ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg); if (ret < 0) { verbose(env, "R%d tried to add from different maps, paths, or prohibited types\n", dst); return ret; @@ -6165,7 +6166,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, } break; case BPF_SUB: - ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); + ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg); if (ret < 0) { verbose(env, "R%d tried to sub from different maps, paths, or prohibited types\n", dst); return ret; From 24c109bb1537c12c02aeed2d51a347b4d6a9b76e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 23 Mar 2021 08:51:02 +0100 Subject: [PATCH 159/180] bpf: Ensure off_reg has no mixed signed bounds for all types The mixed signed bounds check really belongs into retrieve_ptr_limit() instead of outside of it in adjust_ptr_min_max_vals(). The reason is that this check is not tied to PTR_TO_MAP_VALUE only, but to all pointer types that we handle in retrieve_ptr_limit() and given errors from the latter propagate back to adjust_ptr_min_max_vals() and lead to rejection of the program, it's a better place to reside to avoid anything slipping through for future types. The reason why we must reject such off_reg is that we otherwise would not be able to derive a mask, see details in 9d7eceede769 ("bpf: restrict unknown scalars of mixed signed bounds for unprivileged"). Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4ee014dadac7..a21d7f1a0ba8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5857,12 +5857,18 @@ static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env) } static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, - u32 *ptr_limit, u8 opcode, bool off_is_neg) + const struct bpf_reg_state *off_reg, + u32 *ptr_limit, u8 opcode) { + bool off_is_neg = off_reg->smin_value < 0; bool mask_to_left = (opcode == BPF_ADD && off_is_neg) || (opcode == BPF_SUB && !off_is_neg); u32 off, max; + if (!tnum_is_const(off_reg->var_off) && + (off_reg->smin_value < 0) != (off_reg->smax_value < 0)) + return -EACCES; + switch (ptr_reg->type) { case PTR_TO_STACK: /* Offset 0 is out-of-bounds, but acceptable start for the @@ -5956,7 +5962,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, alu_state |= ptr_is_dst_reg ? BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; - err = retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg); + err = retrieve_ptr_limit(ptr_reg, off_reg, &alu_limit, opcode); if (err < 0) return err; @@ -6036,8 +6042,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value, umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; - u32 dst = insn->dst_reg, src = insn->src_reg; u8 opcode = BPF_OP(insn->code); + u32 dst = insn->dst_reg; int ret; dst_reg = ®s[dst]; @@ -6085,13 +6091,6 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, verbose(env, "R%d pointer arithmetic on %s prohibited\n", dst, reg_type_str[ptr_reg->type]); return -EACCES; - case PTR_TO_MAP_VALUE: - if (!env->env->bypass_spec_v1 && !known && (smin_val < 0) != (smax_val < 0)) { - verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n", - off_reg == dst_reg ? dst : src); - return -EACCES; - } - fallthrough; default: break; } From b658bbb844e28f1862867f37e8ca11a8e2aa94a3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 23 Mar 2021 09:04:10 +0100 Subject: [PATCH 160/180] bpf: Rework ptr_limit into alu_limit and add common error path Small refactor with no semantic changes in order to consolidate the max ptr_limit boundary check. Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a21d7f1a0ba8..b8e0171d9591 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5858,12 +5858,12 @@ static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env) static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, const struct bpf_reg_state *off_reg, - u32 *ptr_limit, u8 opcode) + u32 *alu_limit, u8 opcode) { bool off_is_neg = off_reg->smin_value < 0; bool mask_to_left = (opcode == BPF_ADD && off_is_neg) || (opcode == BPF_SUB && !off_is_neg); - u32 off, max; + u32 off, max = 0, ptr_limit = 0; if (!tnum_is_const(off_reg->var_off) && (off_reg->smin_value < 0) != (off_reg->smax_value < 0)) @@ -5880,22 +5880,27 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, */ off = ptr_reg->off + ptr_reg->var_off.value; if (mask_to_left) - *ptr_limit = MAX_BPF_STACK + off; + ptr_limit = MAX_BPF_STACK + off; else - *ptr_limit = -off - 1; - return *ptr_limit >= max ? -ERANGE : 0; + ptr_limit = -off - 1; + break; case PTR_TO_MAP_VALUE: max = ptr_reg->map_ptr->value_size; if (mask_to_left) { - *ptr_limit = ptr_reg->umax_value + ptr_reg->off; + ptr_limit = ptr_reg->umax_value + ptr_reg->off; } else { off = ptr_reg->smin_value + ptr_reg->off; - *ptr_limit = ptr_reg->map_ptr->value_size - off - 1; + ptr_limit = ptr_reg->map_ptr->value_size - off - 1; } - return *ptr_limit >= max ? -ERANGE : 0; + break; default: return -EINVAL; } + + if (ptr_limit >= max) + return -ERANGE; + *alu_limit = ptr_limit; + return 0; } static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env, From a6aaece00a57fa6f22575364b3903dfbccf5345d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 23 Mar 2021 09:30:01 +0100 Subject: [PATCH 161/180] bpf: Improve verifier error messages for users Consolidate all error handling and provide more user-friendly error messages from sanitize_ptr_alu() and sanitize_val_alu(). Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 86 +++++++++++++++++++++++++++++++------------ 1 file changed, 63 insertions(+), 23 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b8e0171d9591..f378d4ae405f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5856,6 +5856,14 @@ static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env) return &env->insn_aux_data[env->insn_idx]; } +enum { + REASON_BOUNDS = -1, + REASON_TYPE = -2, + REASON_PATHS = -3, + REASON_LIMIT = -4, + REASON_STACK = -5, +}; + static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, const struct bpf_reg_state *off_reg, u32 *alu_limit, u8 opcode) @@ -5867,7 +5875,7 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, if (!tnum_is_const(off_reg->var_off) && (off_reg->smin_value < 0) != (off_reg->smax_value < 0)) - return -EACCES; + return REASON_BOUNDS; switch (ptr_reg->type) { case PTR_TO_STACK: @@ -5894,11 +5902,11 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, } break; default: - return -EINVAL; + return REASON_TYPE; } if (ptr_limit >= max) - return -ERANGE; + return REASON_LIMIT; *alu_limit = ptr_limit; return 0; } @@ -5918,7 +5926,7 @@ static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux, if (aux->alu_state && (aux->alu_state != alu_state || aux->alu_limit != alu_limit)) - return -EACCES; + return REASON_PATHS; /* Corresponding fixup done in fixup_bpf_calls(). */ aux->alu_state = alu_state; @@ -5991,7 +5999,46 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true); if (!ptr_is_dst_reg && ret) *dst_reg = tmp; - return !ret ? -EFAULT : 0; + return !ret ? REASON_STACK : 0; +} + +static int sanitize_err(struct bpf_verifier_env *env, + const struct bpf_insn *insn, int reason, + const struct bpf_reg_state *off_reg, + const struct bpf_reg_state *dst_reg) +{ + static const char *err = "pointer arithmetic with it prohibited for !root"; + const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub"; + u32 dst = insn->dst_reg, src = insn->src_reg; + + switch (reason) { + case REASON_BOUNDS: + verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n", + off_reg == dst_reg ? dst : src, err); + break; + case REASON_TYPE: + verbose(env, "R%d has pointer with unsupported alu operation, %s\n", + off_reg == dst_reg ? src : dst, err); + break; + case REASON_PATHS: + verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n", + dst, op, err); + break; + case REASON_LIMIT: + verbose(env, "R%d tried to %s beyond pointer bounds, %s\n", + dst, op, err); + break; + case REASON_STACK: + verbose(env, "R%d could not be pushed for speculative verification, %s\n", + dst, err); + break; + default: + verbose(env, "verifier internal error: unknown reason (%d)\n", + reason); + break; + } + + return -EACCES; } /* check that stack access falls within stack limits and that 'reg' doesn't @@ -6116,10 +6163,9 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, switch (opcode) { case BPF_ADD: ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg); - if (ret < 0) { - verbose(env, "R%d tried to add from different maps, paths, or prohibited types\n", dst); - return ret; - } + if (ret < 0) + return sanitize_err(env, insn, ret, off_reg, dst_reg); + /* We can take a fixed offset as long as it doesn't overflow * the s32 'off' field */ @@ -6171,10 +6217,9 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, break; case BPF_SUB: ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg); - if (ret < 0) { - verbose(env, "R%d tried to sub from different maps, paths, or prohibited types\n", dst); - return ret; - } + if (ret < 0) + return sanitize_err(env, insn, ret, off_reg, dst_reg); + if (dst_reg == off_reg) { /* scalar -= pointer. Creates an unknown scalar */ verbose(env, "R%d tried to subtract pointer from scalar\n", @@ -6863,9 +6908,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, s32 s32_min_val, s32_max_val; u32 u32_min_val, u32_max_val; u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; - u32 dst = insn->dst_reg; - int ret; bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64); + int ret; smin_val = src_reg.smin_value; smax_val = src_reg.smax_value; @@ -6924,20 +6968,16 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, switch (opcode) { case BPF_ADD: ret = sanitize_val_alu(env, insn); - if (ret < 0) { - verbose(env, "R%d tried to add from different pointers or scalars\n", dst); - return ret; - } + if (ret < 0) + return sanitize_err(env, insn, ret, NULL, NULL); scalar32_min_max_add(dst_reg, &src_reg); scalar_min_max_add(dst_reg, &src_reg); dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); break; case BPF_SUB: ret = sanitize_val_alu(env, insn); - if (ret < 0) { - verbose(env, "R%d tried to sub from different pointers or scalars\n", dst); - return ret; - } + if (ret < 0) + return sanitize_err(env, insn, ret, NULL, NULL); scalar32_min_max_sub(dst_reg, &src_reg); scalar_min_max_sub(dst_reg, &src_reg); dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off); From 073815b756c51ba9d8384d924c5d1c03ca3d1ae4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 23 Mar 2021 15:05:48 +0100 Subject: [PATCH 162/180] bpf: Refactor and streamline bounds check into helper Move the bounds check in adjust_ptr_min_max_vals() into a small helper named sanitize_check_bounds() in order to simplify the former a bit. Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 49 +++++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f378d4ae405f..db77e2c670b9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6075,6 +6075,37 @@ static int check_stack_access_for_ptr_arithmetic( return 0; } +static int sanitize_check_bounds(struct bpf_verifier_env *env, + const struct bpf_insn *insn, + const struct bpf_reg_state *dst_reg) +{ + u32 dst = insn->dst_reg; + + /* For unprivileged we require that resulting offset must be in bounds + * in order to be able to sanitize access later on. + */ + if (env->bypass_spec_v1) + return 0; + + switch (dst_reg->type) { + case PTR_TO_STACK: + if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg, + dst_reg->off + dst_reg->var_off.value)) + return -EACCES; + break; + case PTR_TO_MAP_VALUE: + if (check_map_access(env, dst, dst_reg->off, 1, false)) { + verbose(env, "R%d pointer arithmetic of map value goes out of range, " + "prohibited for !root\n", dst); + return -EACCES; + } + break; + default: + break; + } + + return 0; +} /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. * Caller should also handle BPF_MOV case separately. @@ -6300,22 +6331,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, __reg_deduce_bounds(dst_reg); __reg_bound_offset(dst_reg); - /* For unprivileged we require that resulting offset must be in bounds - * in order to be able to sanitize access later on. - */ - if (!env->bypass_spec_v1) { - if (dst_reg->type == PTR_TO_MAP_VALUE && - check_map_access(env, dst, dst_reg->off, 1, false)) { - verbose(env, "R%d pointer arithmetic of map value goes out of range, " - "prohibited for !root\n", dst); - return -EACCES; - } else if (dst_reg->type == PTR_TO_STACK && - check_stack_access_for_ptr_arithmetic( - env, dst, dst_reg, dst_reg->off + - dst_reg->var_off.value)) { - return -EACCES; - } - } + if (sanitize_check_bounds(env, insn, dst_reg) < 0) + return -EACCES; return 0; } From f528819334881fd622fdadeddb3f7edaed8b7c9b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 24 Mar 2021 11:25:39 +0100 Subject: [PATCH 163/180] bpf: Move sanitize_val_alu out of op switch Add a small sanitize_needed() helper function and move sanitize_val_alu() out of the main opcode switch. In upcoming work, we'll move sanitize_ptr_alu() as well out of its opcode switch so this helps to streamline both. Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index db77e2c670b9..e41b6326e3e6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5945,6 +5945,11 @@ static int sanitize_val_alu(struct bpf_verifier_env *env, return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0); } +static bool sanitize_needed(u8 opcode) +{ + return opcode == BPF_ADD || opcode == BPF_SUB; +} + static int sanitize_ptr_alu(struct bpf_verifier_env *env, struct bpf_insn *insn, const struct bpf_reg_state *ptr_reg, @@ -6968,6 +6973,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, return 0; } + if (sanitize_needed(opcode)) { + ret = sanitize_val_alu(env, insn); + if (ret < 0) + return sanitize_err(env, insn, ret, NULL, NULL); + } + /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops. * There are two classes of instructions: The first class we track both * alu32 and alu64 sign/unsigned bounds independently this provides the @@ -6984,17 +6995,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, */ switch (opcode) { case BPF_ADD: - ret = sanitize_val_alu(env, insn); - if (ret < 0) - return sanitize_err(env, insn, ret, NULL, NULL); scalar32_min_max_add(dst_reg, &src_reg); scalar_min_max_add(dst_reg, &src_reg); dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); break; case BPF_SUB: - ret = sanitize_val_alu(env, insn); - if (ret < 0) - return sanitize_err(env, insn, ret, NULL, NULL); scalar32_min_max_sub(dst_reg, &src_reg); scalar_min_max_sub(dst_reg, &src_reg); dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off); From 7fedb63a8307dda0ec3b8969a3b233a1dd7ea8e0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 24 Mar 2021 10:38:26 +0100 Subject: [PATCH 164/180] bpf: Tighten speculative pointer arithmetic mask This work tightens the offset mask we use for unprivileged pointer arithmetic in order to mitigate a corner case reported by Piotr and Benedict where in the speculative domain it is possible to advance, for example, the map value pointer by up to value_size-1 out-of-bounds in order to leak kernel memory via side-channel to user space. Before this change, the computed ptr_limit for retrieve_ptr_limit() helper represents largest valid distance when moving pointer to the right or left which is then fed as aux->alu_limit to generate masking instructions against the offset register. After the change, the derived aux->alu_limit represents the largest potential value of the offset register which we mask against which is just a narrower subset of the former limit. For minimal complexity, we call sanitize_ptr_alu() from 2 observation points in adjust_ptr_min_max_vals(), that is, before and after the simulated alu operation. In the first step, we retieve the alu_state and alu_limit before the operation as well as we branch-off a verifier path and push it to the verification stack as we did before which checks the dst_reg under truncation, in other words, when the speculative domain would attempt to move the pointer out-of-bounds. In the second step, we retrieve the new alu_limit and calculate the absolute distance between both. Moreover, we commit the alu_state and final alu_limit via update_alu_sanitation_state() to the env's instruction aux data, and bail out from there if there is a mismatch due to coming from different verification paths with different states. Reported-by: Piotr Krysiuk Reported-by: Benedict Schlueter Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov Tested-by: Benedict Schlueter --- kernel/bpf/verifier.c | 73 ++++++++++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 29 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e41b6326e3e6..0399ac092b36 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5871,7 +5871,7 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, bool off_is_neg = off_reg->smin_value < 0; bool mask_to_left = (opcode == BPF_ADD && off_is_neg) || (opcode == BPF_SUB && !off_is_neg); - u32 off, max = 0, ptr_limit = 0; + u32 max = 0, ptr_limit = 0; if (!tnum_is_const(off_reg->var_off) && (off_reg->smin_value < 0) != (off_reg->smax_value < 0)) @@ -5880,26 +5880,18 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, switch (ptr_reg->type) { case PTR_TO_STACK: /* Offset 0 is out-of-bounds, but acceptable start for the - * left direction, see BPF_REG_FP. + * left direction, see BPF_REG_FP. Also, unknown scalar + * offset where we would need to deal with min/max bounds is + * currently prohibited for unprivileged. */ max = MAX_BPF_STACK + mask_to_left; - /* Indirect variable offset stack access is prohibited in - * unprivileged mode so it's not handled here. - */ - off = ptr_reg->off + ptr_reg->var_off.value; - if (mask_to_left) - ptr_limit = MAX_BPF_STACK + off; - else - ptr_limit = -off - 1; + ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off); break; case PTR_TO_MAP_VALUE: max = ptr_reg->map_ptr->value_size; - if (mask_to_left) { - ptr_limit = ptr_reg->umax_value + ptr_reg->off; - } else { - off = ptr_reg->smin_value + ptr_reg->off; - ptr_limit = ptr_reg->map_ptr->value_size - off - 1; - } + ptr_limit = (mask_to_left ? + ptr_reg->smin_value : + ptr_reg->umax_value) + ptr_reg->off; break; default: return REASON_TYPE; @@ -5954,10 +5946,12 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, struct bpf_insn *insn, const struct bpf_reg_state *ptr_reg, const struct bpf_reg_state *off_reg, - struct bpf_reg_state *dst_reg) + struct bpf_reg_state *dst_reg, + struct bpf_insn_aux_data *tmp_aux, + const bool commit_window) { + struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : tmp_aux; struct bpf_verifier_state *vstate = env->cur_state; - struct bpf_insn_aux_data *aux = cur_aux(env); bool off_is_neg = off_reg->smin_value < 0; bool ptr_is_dst_reg = ptr_reg == dst_reg; u8 opcode = BPF_OP(insn->code); @@ -5976,18 +5970,33 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, if (vstate->speculative) goto do_sim; - alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0; - alu_state |= ptr_is_dst_reg ? - BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; - err = retrieve_ptr_limit(ptr_reg, off_reg, &alu_limit, opcode); if (err < 0) return err; + if (commit_window) { + /* In commit phase we narrow the masking window based on + * the observed pointer move after the simulated operation. + */ + alu_state = tmp_aux->alu_state; + alu_limit = abs(tmp_aux->alu_limit - alu_limit); + } else { + alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0; + alu_state |= ptr_is_dst_reg ? + BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; + } + err = update_alu_sanitation_state(aux, alu_state, alu_limit); if (err < 0) return err; do_sim: + /* If we're in commit phase, we're done here given we already + * pushed the truncated dst_reg into the speculative verification + * stack. + */ + if (commit_window) + return 0; + /* Simulate and find potential out-of-bounds access under * speculative execution from truncation as a result of * masking when off was not within expected range. If off @@ -6130,6 +6139,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value, umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; + struct bpf_insn_aux_data tmp_aux = {}; u8 opcode = BPF_OP(insn->code); u32 dst = insn->dst_reg; int ret; @@ -6196,12 +6206,15 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, /* pointer types do not carry 32-bit bounds at the moment. */ __mark_reg32_unbounded(dst_reg); - switch (opcode) { - case BPF_ADD: - ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg); + if (sanitize_needed(opcode)) { + ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg, + &tmp_aux, false); if (ret < 0) return sanitize_err(env, insn, ret, off_reg, dst_reg); + } + switch (opcode) { + case BPF_ADD: /* We can take a fixed offset as long as it doesn't overflow * the s32 'off' field */ @@ -6252,10 +6265,6 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, } break; case BPF_SUB: - ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg); - if (ret < 0) - return sanitize_err(env, insn, ret, off_reg, dst_reg); - if (dst_reg == off_reg) { /* scalar -= pointer. Creates an unknown scalar */ verbose(env, "R%d tried to subtract pointer from scalar\n", @@ -6338,6 +6347,12 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, if (sanitize_check_bounds(env, insn, dst_reg) < 0) return -EACCES; + if (sanitize_needed(opcode)) { + ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg, + &tmp_aux, true); + if (ret < 0) + return sanitize_err(env, insn, ret, off_reg, dst_reg); + } return 0; } From d7a5091351756d0ae8e63134313c455624e36a13 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 24 Mar 2021 14:52:31 +0100 Subject: [PATCH 165/180] bpf: Update selftests to reflect new error states Update various selftest error messages: * The 'Rx tried to sub from different maps, paths, or prohibited types' is reworked into more specific/differentiated error messages for better guidance. * The change into 'value -4294967168 makes map_value pointer be out of bounds' is due to moving the mixed bounds check into the speculation handling and thus occuring slightly later than above mentioned sanity check. * The change into 'math between map_value pointer and register with unbounded min value' is similarly due to register sanity check coming before the mixed bounds check. * The case of 'map access: known scalar += value_ptr from different maps' now loads fine given masks are the same from the different paths (despite max map value size being different). Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- tools/testing/selftests/bpf/verifier/bounds.c | 5 ----- .../selftests/bpf/verifier/bounds_deduction.c | 21 ++++++++++--------- .../bpf/verifier/bounds_mix_sign_unsign.c | 13 ------------ .../testing/selftests/bpf/verifier/map_ptr.c | 4 ++-- tools/testing/selftests/bpf/verifier/unpriv.c | 2 +- .../selftests/bpf/verifier/value_ptr_arith.c | 6 ++---- 6 files changed, 16 insertions(+), 35 deletions(-) diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c index 57ed67b86074..8a1caf46ffbc 100644 --- a/tools/testing/selftests/bpf/verifier/bounds.c +++ b/tools/testing/selftests/bpf/verifier/bounds.c @@ -261,8 +261,6 @@ }, .fixup_map_hash_8b = { 3 }, /* not actually fully unbounded, but the bound is very high */ - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root", - .result_unpriv = REJECT, .errstr = "value -4294967168 makes map_value pointer be out of bounds", .result = REJECT, }, @@ -298,9 +296,6 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 3 }, - /* not actually fully unbounded, but the bound is very high */ - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root", - .result_unpriv = REJECT, .errstr = "value -4294967168 makes map_value pointer be out of bounds", .result = REJECT, }, diff --git a/tools/testing/selftests/bpf/verifier/bounds_deduction.c b/tools/testing/selftests/bpf/verifier/bounds_deduction.c index c162498a64fc..91869aea6d64 100644 --- a/tools/testing/selftests/bpf/verifier/bounds_deduction.c +++ b/tools/testing/selftests/bpf/verifier/bounds_deduction.c @@ -6,7 +6,7 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types", + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .errstr = "R0 tried to subtract pointer from scalar", .result = REJECT, }, @@ -21,7 +21,7 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R1 tried to sub from different maps, paths, or prohibited types", + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .result_unpriv = REJECT, .result = ACCEPT, .retval = 1, @@ -34,22 +34,23 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types", + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .errstr = "R0 tried to subtract pointer from scalar", .result = REJECT, }, { "check deducing bounds from const, 4", .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), - BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), + BPF_ALU64_REG(BPF_SUB, BPF_REG_6, BPF_REG_0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R1 tried to sub from different maps, paths, or prohibited types", + .errstr_unpriv = "R6 has pointer with unsupported alu operation", .result_unpriv = REJECT, .result = ACCEPT, }, @@ -61,7 +62,7 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types", + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .errstr = "R0 tried to subtract pointer from scalar", .result = REJECT, }, @@ -74,7 +75,7 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types", + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .errstr = "R0 tried to subtract pointer from scalar", .result = REJECT, }, @@ -88,7 +89,7 @@ offsetof(struct __sk_buff, mark)), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R1 tried to sub from different maps, paths, or prohibited types", + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .errstr = "dereference of modified ctx ptr", .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -103,7 +104,7 @@ offsetof(struct __sk_buff, mark)), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R1 tried to add from different maps, paths, or prohibited types", + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .errstr = "dereference of modified ctx ptr", .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -116,7 +117,7 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types", + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .errstr = "R0 tried to subtract pointer from scalar", .result = REJECT, }, diff --git a/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c b/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c index 9baca7a75c42..c2aa6f26738b 100644 --- a/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c +++ b/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c @@ -19,7 +19,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -43,7 +42,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -69,7 +67,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R8 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -94,7 +91,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R8 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -141,7 +137,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -210,7 +205,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -260,7 +254,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -287,7 +280,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -313,7 +305,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -342,7 +333,6 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R7 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -372,7 +362,6 @@ }, .fixup_map_hash_8b = { 4 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -400,7 +389,5 @@ }, .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, - .result_unpriv = REJECT, }, diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c index 6f610cfddae5..1f82021429bf 100644 --- a/tools/testing/selftests/bpf/verifier/map_ptr.c +++ b/tools/testing/selftests/bpf/verifier/map_ptr.c @@ -76,7 +76,7 @@ }, .fixup_map_hash_16b = { 4 }, .result_unpriv = REJECT, - .errstr_unpriv = "R1 tried to add from different maps, paths, or prohibited types", + .errstr_unpriv = "R1 has pointer with unsupported alu operation", .result = ACCEPT, }, { @@ -94,6 +94,6 @@ }, .fixup_map_hash_16b = { 4 }, .result_unpriv = REJECT, - .errstr_unpriv = "R1 tried to add from different maps, paths, or prohibited types", + .errstr_unpriv = "R0 has pointer with unsupported alu operation", .result = ACCEPT, }, diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c index 3e32400c4b44..bd436df5cc32 100644 --- a/tools/testing/selftests/bpf/verifier/unpriv.c +++ b/tools/testing/selftests/bpf/verifier/unpriv.c @@ -505,7 +505,7 @@ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R1 tried to add from different maps, paths, or prohibited types", + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", .result_unpriv = REJECT, .result = ACCEPT, }, diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index feb91266db39..e5913fd3b903 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -21,8 +21,6 @@ .fixup_map_hash_16b = { 5 }, .fixup_map_array_48b = { 8 }, .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R1 tried to add from different maps", .retval = 1, }, { @@ -122,7 +120,7 @@ .fixup_map_array_48b = { 1 }, .result = ACCEPT, .result_unpriv = REJECT, - .errstr_unpriv = "R2 tried to add from different pointers or scalars", + .errstr_unpriv = "R2 tried to add from different maps, paths or scalars", .retval = 0, }, { @@ -169,7 +167,7 @@ .fixup_map_array_48b = { 1 }, .result = ACCEPT, .result_unpriv = REJECT, - .errstr_unpriv = "R2 tried to add from different maps, paths, or prohibited types", + .errstr_unpriv = "R2 tried to add from different maps, paths or scalars", .retval = 0, }, { From 6b389c16378a03fe71f3b1365b593ba41d2dd8ec Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Thu, 15 Apr 2021 23:18:13 -0500 Subject: [PATCH 166/180] MAINTAINERS: update my email Update my email and change myself to Reviewer. Signed-off-by: Lijun Pan Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 163264c282eb..a31f76c41aec 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8517,9 +8517,9 @@ F: drivers/pci/hotplug/rpaphp* IBM Power SRIOV Virtual NIC Device Driver M: Dany Madden -M: Lijun Pan M: Sukadev Bhattiprolu R: Thomas Falcon +R: Lijun Pan L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/ibm/ibmvnic.* From 845be1cd34464620861b457b808e5fb2115d06b0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 16 Apr 2021 15:45:54 -0700 Subject: [PATCH 167/180] mm: eliminate "expecting prototype" kernel-doc warnings Fix stray kernel-doc warnings in mm/ due to mis-typed or missing function names. Quietens these kernel-doc warnings: mm/mmu_gather.c:264: warning: expecting prototype for tlb_gather_mmu(). Prototype was for __tlb_gather_mmu() instead mm/oom_kill.c:180: warning: expecting prototype for Check whether unreclaimable slab amount is greater than(). Prototype was for should_dump_unreclaim_slab() instead mm/shuffle.c:155: warning: expecting prototype for shuffle_free_memory(). Prototype was for __shuffle_free_memory() instead Link: https://lkml.kernel.org/r/20210411210642.11362-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mmu_gather.c | 29 +++++++++++++++++++---------- mm/oom_kill.c | 2 +- mm/shuffle.c | 4 ++-- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 0dc7149b0c61..1b9837419bf9 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -249,16 +249,6 @@ void tlb_flush_mmu(struct mmu_gather *tlb) tlb_flush_mmu_free(tlb); } -/** - * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down - * @tlb: the mmu_gather structure to initialize - * @mm: the mm_struct of the target address space - * @fullmm: @mm is without users and we're going to destroy the full address - * space (exit/execve) - * - * Called to initialize an (on-stack) mmu_gather structure for page-table - * tear-down from @mm. - */ static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm) { @@ -283,11 +273,30 @@ static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, inc_tlb_flush_pending(tlb->mm); } +/** + * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down + * @tlb: the mmu_gather structure to initialize + * @mm: the mm_struct of the target address space + * + * Called to initialize an (on-stack) mmu_gather structure for page-table + * tear-down from @mm. + */ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm) { __tlb_gather_mmu(tlb, mm, false); } +/** + * tlb_gather_mmu_fullmm - initialize an mmu_gather structure for page-table tear-down + * @tlb: the mmu_gather structure to initialize + * @mm: the mm_struct of the target address space + * + * In this case, @mm is without users and we're going to destroy the + * full address space (exit/execve). + * + * Called to initialize an (on-stack) mmu_gather structure for page-table + * tear-down from @mm. + */ void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm) { __tlb_gather_mmu(tlb, mm, true); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 9efaf430cfd3..fa1cf18bac97 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -170,7 +170,7 @@ static bool oom_unkillable_task(struct task_struct *p) return false; } -/** +/* * Check whether unreclaimable slab amount is greater than * all user memory(LRU pages). * dump_unreclaimable_slab() could help in the case that diff --git a/mm/shuffle.c b/mm/shuffle.c index 9c2e145a747a..c13c33b247e8 100644 --- a/mm/shuffle.c +++ b/mm/shuffle.c @@ -147,8 +147,8 @@ void __meminit __shuffle_zone(struct zone *z) spin_unlock_irqrestore(&z->lock, flags); } -/** - * shuffle_free_memory - reduce the predictability of the page allocator +/* + * __shuffle_free_memory - reduce the predictability of the page allocator * @pgdat: node page data */ void __meminit __shuffle_free_memory(pg_data_t *pgdat) From 5c595ac4c776c44b5c59de22ab43b3fe256d9fbb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 16 Apr 2021 15:45:57 -0700 Subject: [PATCH 168/180] kasan: fix hwasan build for gcc gcc-11 adds support for -fsanitize=kernel-hwaddress, so it becomes possible to enable CONFIG_KASAN_SW_TAGS. Unfortunately this fails to build at the moment, because the corresponding command line arguments use llvm specific syntax. Change it to use the cc-param macro instead, which works on both clang and gcc. [elver@google.com: fixup for "kasan: fix hwasan build for gcc"] Link: https://lkml.kernel.org/r/YHQZVfVVLE/LDK2v@elver.google.com Link: https://lkml.kernel.org/r/20210323124112.1229772-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Marco Elver Reviewed-by: Marco Elver Acked-by: Andrey Konovalov Cc: Masahiro Yamada Cc: Michal Marek Cc: Andrey Ryabinin Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/Makefile.kasan | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 1e000cc2e7b4..127012f45166 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -2,6 +2,8 @@ CFLAGS_KASAN_NOSANITIZE := -fno-builtin KASAN_SHADOW_OFFSET ?= $(CONFIG_KASAN_SHADOW_OFFSET) +cc-param = $(call cc-option, -mllvm -$(1), $(call cc-option, --param $(1))) + ifdef CONFIG_KASAN_GENERIC ifdef CONFIG_KASAN_INLINE @@ -12,8 +14,6 @@ endif CFLAGS_KASAN_MINIMAL := -fsanitize=kernel-address -cc-param = $(call cc-option, -mllvm -$(1), $(call cc-option, --param $(1))) - # -fasan-shadow-offset fails without -fsanitize CFLAGS_KASAN_SHADOW := $(call cc-option, -fsanitize=kernel-address \ -fasan-shadow-offset=$(KASAN_SHADOW_OFFSET), \ @@ -36,14 +36,14 @@ endif # CONFIG_KASAN_GENERIC ifdef CONFIG_KASAN_SW_TAGS ifdef CONFIG_KASAN_INLINE - instrumentation_flags := -mllvm -hwasan-mapping-offset=$(KASAN_SHADOW_OFFSET) + instrumentation_flags := $(call cc-param,hwasan-mapping-offset=$(KASAN_SHADOW_OFFSET)) else - instrumentation_flags := -mllvm -hwasan-instrument-with-calls=1 + instrumentation_flags := $(call cc-param,hwasan-instrument-with-calls=1) endif CFLAGS_KASAN := -fsanitize=kernel-hwaddress \ - -mllvm -hwasan-instrument-stack=$(CONFIG_KASAN_STACK) \ - -mllvm -hwasan-use-short-granules=0 \ + $(call cc-param,hwasan-instrument-stack=$(CONFIG_KASAN_STACK)) \ + $(call cc-param,hwasan-use-short-granules=0) \ $(instrumentation_flags) endif # CONFIG_KASAN_SW_TAGS From 02c587733c8161355a43e6e110c2e29bd0acff72 Mon Sep 17 00:00:00 2001 From: Walter Wu Date: Fri, 16 Apr 2021 15:46:00 -0700 Subject: [PATCH 169/180] kasan: remove redundant config option CONFIG_KASAN_STACK and CONFIG_KASAN_STACK_ENABLE both enable KASAN stack instrumentation, but we should only need one config, so that we remove CONFIG_KASAN_STACK_ENABLE and make CONFIG_KASAN_STACK workable. see [1]. When enable KASAN stack instrumentation, then for gcc we could do no prompt and default value y, and for clang prompt and default value n. This patch fixes the following compilation warning: include/linux/kasan.h:333:30: warning: 'CONFIG_KASAN_STACK' is not defined, evaluates to 0 [-Wundef] [akpm@linux-foundation.org: fix merge snafu] Link: https://bugzilla.kernel.org/show_bug.cgi?id=210221 [1] Link: https://lkml.kernel.org/r/20210226012531.29231-1-walter-zh.wu@mediatek.com Fixes: d9b571c885a8 ("kasan: fix KASAN_STACK dependency for HW_TAGS") Signed-off-by: Walter Wu Suggested-by: Dmitry Vyukov Reviewed-by: Nathan Chancellor Acked-by: Arnd Bergmann Reviewed-by: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Alexander Potapenko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/kernel/sleep.S | 2 +- arch/x86/kernel/acpi/wakeup_64.S | 2 +- include/linux/kasan.h | 2 +- lib/Kconfig.kasan | 9 ++------- mm/kasan/common.c | 2 +- mm/kasan/kasan.h | 2 +- mm/kasan/report_generic.c | 2 +- scripts/Makefile.kasan | 10 ++++++++-- security/Kconfig.hardening | 4 ++-- 9 files changed, 18 insertions(+), 17 deletions(-) diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 5bfd9b87f85d..4ea9392f86e0 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -134,7 +134,7 @@ SYM_FUNC_START(_cpu_resume) */ bl cpu_do_resume -#if defined(CONFIG_KASAN) && CONFIG_KASAN_STACK +#if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK) mov x0, sp bl kasan_unpoison_task_stack_below #endif diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index 56b6865afb2a..d5d8a352eafa 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -115,7 +115,7 @@ SYM_FUNC_START(do_suspend_lowlevel) movq pt_regs_r14(%rax), %r14 movq pt_regs_r15(%rax), %r15 -#if defined(CONFIG_KASAN) && CONFIG_KASAN_STACK +#if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK) /* * The suspend path may have poisoned some areas deeper in the stack, * which we now need to unpoison. diff --git a/include/linux/kasan.h b/include/linux/kasan.h index b91732bd05d7..14f72ec96492 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -330,7 +330,7 @@ static inline bool kasan_check_byte(const void *address) #endif /* CONFIG_KASAN */ -#if defined(CONFIG_KASAN) && CONFIG_KASAN_STACK +#if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK) void kasan_unpoison_task_stack(struct task_struct *task); #else static inline void kasan_unpoison_task_stack(struct task_struct *task) {} diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index fba9909e31b7..cffc2ebbf185 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -138,9 +138,10 @@ config KASAN_INLINE endchoice -config KASAN_STACK_ENABLE +config KASAN_STACK bool "Enable stack instrumentation (unsafe)" if CC_IS_CLANG && !COMPILE_TEST depends on KASAN_GENERIC || KASAN_SW_TAGS + default y if CC_IS_GCC help The LLVM stack address sanitizer has a know problem that causes excessive stack usage in a lot of functions, see @@ -154,12 +155,6 @@ config KASAN_STACK_ENABLE CONFIG_COMPILE_TEST. On gcc it is assumed to always be safe to use and enabled by default. -config KASAN_STACK - int - depends on KASAN_GENERIC || KASAN_SW_TAGS - default 1 if KASAN_STACK_ENABLE || CC_IS_GCC - default 0 - config KASAN_SW_TAGS_IDENTIFY bool "Enable memory corruption identification" depends on KASAN_SW_TAGS diff --git a/mm/kasan/common.c b/mm/kasan/common.c index b5e08d4cefec..7b53291dafa1 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -63,7 +63,7 @@ void __kasan_unpoison_range(const void *address, size_t size) kasan_unpoison(address, size); } -#if CONFIG_KASAN_STACK +#ifdef CONFIG_KASAN_STACK /* Unpoison the entire stack for a task. */ void kasan_unpoison_task_stack(struct task_struct *task) { diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 8c55634d6edd..3436c6bf7c0c 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -231,7 +231,7 @@ void *kasan_find_first_bad_addr(void *addr, size_t size); const char *kasan_get_bug_type(struct kasan_access_info *info); void kasan_metadata_fetch_row(char *buffer, void *row); -#if defined(CONFIG_KASAN_GENERIC) && CONFIG_KASAN_STACK +#if defined(CONFIG_KASAN_GENERIC) && defined(CONFIG_KASAN_STACK) void kasan_print_address_stack_frame(const void *addr); #else static inline void kasan_print_address_stack_frame(const void *addr) { } diff --git a/mm/kasan/report_generic.c b/mm/kasan/report_generic.c index 41f374585144..de732bc341c5 100644 --- a/mm/kasan/report_generic.c +++ b/mm/kasan/report_generic.c @@ -128,7 +128,7 @@ void kasan_metadata_fetch_row(char *buffer, void *row) memcpy(buffer, kasan_mem_to_shadow(row), META_BYTES_PER_ROW); } -#if CONFIG_KASAN_STACK +#ifdef CONFIG_KASAN_STACK static bool __must_check tokenize_frame_descr(const char **frame_descr, char *token, size_t max_tok_len, unsigned long *value) diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 127012f45166..3d791908ed36 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -4,6 +4,12 @@ KASAN_SHADOW_OFFSET ?= $(CONFIG_KASAN_SHADOW_OFFSET) cc-param = $(call cc-option, -mllvm -$(1), $(call cc-option, --param $(1))) +ifdef CONFIG_KASAN_STACK + stack_enable := 1 +else + stack_enable := 0 +endif + ifdef CONFIG_KASAN_GENERIC ifdef CONFIG_KASAN_INLINE @@ -27,7 +33,7 @@ else CFLAGS_KASAN := $(CFLAGS_KASAN_SHADOW) \ $(call cc-param,asan-globals=1) \ $(call cc-param,asan-instrumentation-with-call-threshold=$(call_threshold)) \ - $(call cc-param,asan-stack=$(CONFIG_KASAN_STACK)) \ + $(call cc-param,asan-stack=$(stack_enable)) \ $(call cc-param,asan-instrument-allocas=1) endif @@ -42,7 +48,7 @@ else endif CFLAGS_KASAN := -fsanitize=kernel-hwaddress \ - $(call cc-param,hwasan-instrument-stack=$(CONFIG_KASAN_STACK)) \ + $(call cc-param,hwasan-instrument-stack=$(stack_enable)) \ $(call cc-param,hwasan-use-short-granules=0) \ $(instrumentation_flags) diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index 269967c4fc1b..a56c36470cb1 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -64,7 +64,7 @@ choice config GCC_PLUGIN_STRUCTLEAK_BYREF bool "zero-init structs passed by reference (strong)" depends on GCC_PLUGINS - depends on !(KASAN && KASAN_STACK=1) + depends on !(KASAN && KASAN_STACK) select GCC_PLUGIN_STRUCTLEAK help Zero-initialize any structures on the stack that may @@ -82,7 +82,7 @@ choice config GCC_PLUGIN_STRUCTLEAK_BYREF_ALL bool "zero-init anything passed by reference (very strong)" depends on GCC_PLUGINS - depends on !(KASAN && KASAN_STACK=1) + depends on !(KASAN && KASAN_STACK) select GCC_PLUGIN_STRUCTLEAK help Zero-initialize any stack variables that may be passed From d199161653d612b8fb96ac51bfd5b2d2782ecef3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 16 Apr 2021 15:46:03 -0700 Subject: [PATCH 170/180] csky: change a Kconfig symbol name to fix e1000 build error e1000's #define of CONFIG_RAM_BASE conflicts with a Kconfig symbol in arch/csky/Kconfig. The symbol in e1000 has been around longer, so change arch/csky/ to use DRAM_BASE instead of RAM_BASE to remove the conflict. (although e1000 is also a 2-line change) Link: https://lkml.kernel.org/r/20210411055335.7111-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Reported-by: kernel test robot Acked-by: Guo Ren Cc: Jesse Brandeburg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/csky/Kconfig | 2 +- arch/csky/include/asm/page.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 34e91224adc3..8de5b987edb9 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -314,7 +314,7 @@ config FORCE_MAX_ZONEORDER int "Maximum zone order" default "11" -config RAM_BASE +config DRAM_BASE hex "DRAM start addr (the same with memory-section in dts)" default 0x0 diff --git a/arch/csky/include/asm/page.h b/arch/csky/include/asm/page.h index 3b91fc3cf36f..ed7451478b1b 100644 --- a/arch/csky/include/asm/page.h +++ b/arch/csky/include/asm/page.h @@ -28,7 +28,7 @@ #define SSEG_SIZE 0x20000000 #define LOWMEM_LIMIT (SSEG_SIZE * 2) -#define PHYS_OFFSET_OFFSET (CONFIG_RAM_BASE & (SSEG_SIZE - 1)) +#define PHYS_OFFSET_OFFSET (CONFIG_DRAM_BASE & (SSEG_SIZE - 1)) #ifndef __ASSEMBLY__ From 19d000d93303e05bd7b1326e3de9df05a41b25b5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 16 Apr 2021 15:46:06 -0700 Subject: [PATCH 171/180] ia64: remove duplicate entries in generic_defconfig Fix ia64 generic_defconfig duplicate entries, as warned by: arch/ia64/configs/generic_defconfig: warning: override: reassigning to symbol ATA: => 58 arch/ia64/configs/generic_defconfig: warning: override: reassigning to symbol ATA_PIIX: => 59 These 2 symbols still have the same value as in the removed lines. Link: https://lkml.kernel.org/r/20210411020255.18052-1-rdunlap@infradead.org Fixes: c331649e6371 ("ia64: Use libata instead of the legacy ide driver in defconfigs") Signed-off-by: Randy Dunlap Reported-by: Geert Uytterhoeven Reviewed-by: Christoph Hellwig Cc: Tony Luck Cc: Fenghua Yu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/configs/generic_defconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig index ca0d596c800d..8916a2850c48 100644 --- a/arch/ia64/configs/generic_defconfig +++ b/arch/ia64/configs/generic_defconfig @@ -55,8 +55,6 @@ CONFIG_CHR_DEV_SG=m CONFIG_SCSI_FC_ATTRS=y CONFIG_SCSI_SYM53C8XX_2=y CONFIG_SCSI_QLOGIC_1280=y -CONFIG_ATA=y -CONFIG_ATA_PIIX=y CONFIG_SATA_VITESSE=y CONFIG_MD=y CONFIG_BLK_DEV_MD=m From e2af9da4f867a1a54f1252bf3abc1a5c63951778 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 16 Apr 2021 15:46:09 -0700 Subject: [PATCH 172/180] ia64: fix discontig.c section mismatches Fix IA64 discontig.c Section mismatch warnings. When CONFIG_SPARSEMEM=y and CONFIG_MEMORY_HOTPLUG=y, the functions computer_pernodesize() and scatter_node_data() should not be marked as __meminit because they are needed after init, on any memory hotplug event. Also, early_nr_cpus_node() is called by compute_pernodesize(), so early_nr_cpus_node() cannot be __meminit either. WARNING: modpost: vmlinux.o(.text.unlikely+0x1612): Section mismatch in reference from the function arch_alloc_nodedata() to the function .meminit.text:compute_pernodesize() The function arch_alloc_nodedata() references the function __meminit compute_pernodesize(). This is often because arch_alloc_nodedata lacks a __meminit annotation or the annotation of compute_pernodesize is wrong. WARNING: modpost: vmlinux.o(.text.unlikely+0x1692): Section mismatch in reference from the function arch_refresh_nodedata() to the function .meminit.text:scatter_node_data() The function arch_refresh_nodedata() references the function __meminit scatter_node_data(). This is often because arch_refresh_nodedata lacks a __meminit annotation or the annotation of scatter_node_data is wrong. WARNING: modpost: vmlinux.o(.text.unlikely+0x1502): Section mismatch in reference from the function compute_pernodesize() to the function .meminit.text:early_nr_cpus_node() The function compute_pernodesize() references the function __meminit early_nr_cpus_node(). This is often because compute_pernodesize lacks a __meminit annotation or the annotation of early_nr_cpus_node is wrong. Link: https://lkml.kernel.org/r/20210411001201.3069-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/mm/discontig.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 03b3a02375ff..c310b4c99fb3 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -95,7 +95,7 @@ static int __init build_node_maps(unsigned long start, unsigned long len, * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been * called yet. Note that node 0 will also count all non-existent cpus. */ -static int __meminit early_nr_cpus_node(int node) +static int early_nr_cpus_node(int node) { int cpu, n = 0; @@ -110,7 +110,7 @@ static int __meminit early_nr_cpus_node(int node) * compute_pernodesize - compute size of pernode data * @node: the node id. */ -static unsigned long __meminit compute_pernodesize(int node) +static unsigned long compute_pernodesize(int node) { unsigned long pernodesize = 0, cpus; @@ -367,7 +367,7 @@ static void __init reserve_pernode_space(void) } } -static void __meminit scatter_node_data(void) +static void scatter_node_data(void) { pg_data_t **dst; int node; From 17786fea414393813b56e33a1a01b2dfa03c0915 Mon Sep 17 00:00:00 2001 From: John Paul Adrian Glaubitz Date: Fri, 16 Apr 2021 15:46:12 -0700 Subject: [PATCH 173/180] ia64: tools: remove inclusion of ia64-specific version of errno.h header There is no longer an ia64-specific version of the errno.h header below arch/ia64/include/uapi/asm/, so trying to build tools/bpf fails with: CC /usr/src/linux/tools/bpf/bpftool/btf_dumper.o In file included from /usr/src/linux/tools/include/linux/err.h:8, from btf_dumper.c:11: /usr/src/linux/tools/include/uapi/asm/errno.h:13:10: fatal error: ../../../arch/ia64/include/uapi/asm/errno.h: No such file or directory 13 | #include "../../../arch/ia64/include/uapi/asm/errno.h" | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compilation terminated. Thus, just remove the inclusion of the ia64-specific errno.h so that the build will use the generic errno.h header on this target which was used there anyway as the ia64-specific errno.h was just a wrapper for the generic header. Fixes: c25f867ddd00 ("ia64: remove unneeded uapi asm-generic wrappers") Signed-off-by: John Paul Adrian Glaubitz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/include/uapi/asm/errno.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/include/uapi/asm/errno.h b/tools/include/uapi/asm/errno.h index 637189ec1ab9..d30439b4b8ab 100644 --- a/tools/include/uapi/asm/errno.h +++ b/tools/include/uapi/asm/errno.h @@ -9,8 +9,6 @@ #include "../../../arch/alpha/include/uapi/asm/errno.h" #elif defined(__mips__) #include "../../../arch/mips/include/uapi/asm/errno.h" -#elif defined(__ia64__) -#include "../../../arch/ia64/include/uapi/asm/errno.h" #elif defined(__xtensa__) #include "../../../arch/xtensa/include/uapi/asm/errno.h" #else From f4bf09dc3aaa4b07cd15630f2023f68cb2668809 Mon Sep 17 00:00:00 2001 From: John Paul Adrian Glaubitz Date: Fri, 16 Apr 2021 15:46:15 -0700 Subject: [PATCH 174/180] ia64: tools: remove duplicate definition of ia64_mf() on ia64 The ia64_mf() macro defined in tools/arch/ia64/include/asm/barrier.h is already defined in on ia64 which causes libbpf failing to build: CC /usr/src/linux/tools/bpf/bpftool//libbpf/staticobjs/libbpf.o In file included from /usr/src/linux/tools/include/asm/barrier.h:24, from /usr/src/linux/tools/include/linux/ring_buffer.h:4, from libbpf.c:37: /usr/src/linux/tools/include/asm/../../arch/ia64/include/asm/barrier.h:43: error: "ia64_mf" redefined [-Werror] 43 | #define ia64_mf() asm volatile ("mf" ::: "memory") | In file included from /usr/include/ia64-linux-gnu/asm/intrinsics.h:20, from /usr/include/ia64-linux-gnu/asm/swab.h:11, from /usr/include/linux/swab.h:8, from /usr/include/linux/byteorder/little_endian.h:13, from /usr/include/ia64-linux-gnu/asm/byteorder.h:5, from /usr/src/linux/tools/include/uapi/linux/perf_event.h:20, from libbpf.c:36: /usr/include/ia64-linux-gnu/asm/gcc_intrin.h:382: note: this is the location of the previous definition 382 | #define ia64_mf() __asm__ volatile ("mf" ::: "memory") | cc1: all warnings being treated as errors Thus, remove the definition from tools/arch/ia64/include/asm/barrier.h. Signed-off-by: John Paul Adrian Glaubitz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/arch/ia64/include/asm/barrier.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/arch/ia64/include/asm/barrier.h b/tools/arch/ia64/include/asm/barrier.h index 4d471d9511a5..6fffe5682713 100644 --- a/tools/arch/ia64/include/asm/barrier.h +++ b/tools/arch/ia64/include/asm/barrier.h @@ -39,9 +39,6 @@ * sequential memory pages only. */ -/* XXX From arch/ia64/include/uapi/asm/gcc_intrin.h */ -#define ia64_mf() asm volatile ("mf" ::: "memory") - #define mb() ia64_mf() #define rmb() mb() #define wmb() mb() From 94036f4c884377bdf2da1ba7666c9599d6df0191 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 16 Apr 2021 15:46:18 -0700 Subject: [PATCH 175/180] mm/mapping_dirty_helpers: guard hugepage pud's usage Mapping dirty helpers have, so far, been only used on X86, but a port of vmwgfx to ARM64 exposed a problem which results in a compilation error on ARM64 systems: mm/mapping_dirty_helpers.c: In function `wp_clean_pud_entry': mm/mapping_dirty_helpers.c:172:32: error: implicit declaration of function `pud_dirty'; did you mean `pmd_dirty'? [-Werror=implicit-function-declaration] This is due to the fact that mapping_dirty_helpers code assumes that pud_dirty is always defined, which is not the case for architectures that don't define CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD. ARM64 arch is a little inconsistent when it comes to PUD hugepage helpers, e.g. it defines pud_young but not pud_dirty but regardless of that the core kernel code shouldn't assume that any of the PUD hugepage helpers are available unless CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD is defined. This prevents compilation errors whenever one of the drivers is ported to new architectures. Link: https://lkml.kernel.org/r/20210409165151.694574-1-zackr@vmware.com Signed-off-by: Zack Rusin Reviewed-by: Thomas Hellstrm (Intel) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mapping_dirty_helpers.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/mapping_dirty_helpers.c b/mm/mapping_dirty_helpers.c index b59054ef2e10..b890854ec761 100644 --- a/mm/mapping_dirty_helpers.c +++ b/mm/mapping_dirty_helpers.c @@ -165,10 +165,12 @@ static int wp_clean_pud_entry(pud_t *pud, unsigned long addr, unsigned long end, return 0; } +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD /* Huge pud */ walk->action = ACTION_CONTINUE; if (pud_trans_huge(pudval) || pud_devmap(pudval)) WARN_ON(pud_write(pudval) || pud_dirty(pudval)); +#endif return 0; } From 458376913d86bed2fb781b4952eb6861675ef3be Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 16 Apr 2021 15:46:20 -0700 Subject: [PATCH 176/180] mm: ptdump: fix build failure READ_ONCE() cannot be used for reading PTEs. Use ptep_get() instead, to avoid the following errors: CC mm/ptdump.o In file included from : mm/ptdump.c: In function 'ptdump_pte_entry': include/linux/compiler_types.h:320:38: error: call to '__compiletime_assert_207' declared with attribute error: Unsupported access size for {READ,WRITE}_ONCE(). 320 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | ^ include/linux/compiler_types.h:301:4: note: in definition of macro '__compiletime_assert' 301 | prefix ## suffix(); \ | ^~~~~~ include/linux/compiler_types.h:320:2: note: in expansion of macro '_compiletime_assert' 320 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | ^~~~~~~~~~~~~~~~~~~ include/asm-generic/rwonce.h:36:2: note: in expansion of macro 'compiletime_assert' 36 | compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ | ^~~~~~~~~~~~~~~~~~ include/asm-generic/rwonce.h:49:2: note: in expansion of macro 'compiletime_assert_rwonce_type' 49 | compiletime_assert_rwonce_type(x); \ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ mm/ptdump.c:114:14: note: in expansion of macro 'READ_ONCE' 114 | pte_t val = READ_ONCE(*pte); | ^~~~~~~~~ make[2]: *** [mm/ptdump.o] Error 1 See commit 481e980a7c19 ("mm: Allow arches to provide ptep_get()") and commit c0e1c8c22beb ("powerpc/8xx: Provide ptep_get() with 16k pages") for details. Link: https://lkml.kernel.org/r/912b349e2bcaa88939904815ca0af945740c6bd4.1618478922.git.christophe.leroy@csgroup.eu Fixes: 30d621f6723b ("mm: add generic ptdump") Signed-off-by: Christophe Leroy Cc: Steven Price Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/ptdump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/ptdump.c b/mm/ptdump.c index 4354c1422d57..da751448d0e4 100644 --- a/mm/ptdump.c +++ b/mm/ptdump.c @@ -111,7 +111,7 @@ static int ptdump_pte_entry(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) { struct ptdump_state *st = walk->private; - pte_t val = READ_ONCE(*pte); + pte_t val = ptep_get(pte); if (st->effective_prot) st->effective_prot(st, 4, pte_val(val)); From 04c53de57cb6435738961dace8b1b71d3ecd3c39 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 16 Apr 2021 15:46:23 -0700 Subject: [PATCH 177/180] gcov: clang: fix clang-11+ build With clang-11+, the code is broken due to my kvmalloc() conversion (which predated the clang-11 support code) leaving one vmalloc() in place. Fix that. Link: https://lkml.kernel.org/r/20210412214210.6e1ecca9cdc5.I24459763acf0591d5e6b31c7e3a59890d802f79c@changeid Signed-off-by: Johannes Berg Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/gcov/clang.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/gcov/clang.c b/kernel/gcov/clang.c index c466c7fbdece..b81f2823630d 100644 --- a/kernel/gcov/clang.c +++ b/kernel/gcov/clang.c @@ -369,7 +369,7 @@ static struct gcov_fn_info *gcov_fn_info_dup(struct gcov_fn_info *fn) INIT_LIST_HEAD(&fn_dup->head); cv_size = fn->num_counters * sizeof(fn->counters[0]); - fn_dup->counters = vmalloc(cv_size); + fn_dup->counters = kvmalloc(cv_size, GFP_KERNEL); if (!fn_dup->counters) { kfree(fn_dup); return NULL; From c95c2d328cd051484bea161e66dfa715c02a7d7e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 16 Apr 2021 15:46:26 -0700 Subject: [PATCH 178/180] lib: remove "expecting prototype" kernel-doc warnings Fix various kernel-doc warnings in lib/ due to missing or erroneous function names. Add kernel-doc for some function parameters that was missing. Use kernel-doc "Return:" notation in earlycpio.c. Quietens the following warnings: lib/earlycpio.c:61: warning: expecting prototype for cpio_data find_cpio_data(). Prototype was for find_cpio_data() instead lib/lru_cache.c:640: warning: expecting prototype for lc_dump(). Prototype was for lc_seq_dump_details() instead lru_cache.c:90: warning: Function parameter or member 'cache' not described in 'lc_create' lib/parman.c:368: warning: expecting prototype for parman_item_del(). Prototype was for parman_item_remove() instead parman.c:309: warning: Excess function parameter 'prority' description in 'parman_prio_init' lib/radix-tree.c:703: warning: expecting prototype for __radix_tree_insert(). Prototype was for radix_tree_insert() instead radix-tree.c:180: warning: Excess function parameter 'addr' description in 'radix_tree_find_next_bit' radix-tree.c:180: warning: Excess function parameter 'size' description in 'radix_tree_find_next_bit' radix-tree.c:931: warning: Function parameter or member 'iter' not described in 'radix_tree_iter_replace' Link: https://lkml.kernel.org/r/20210411221756.15461-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Cc: Philipp Reisner Cc: Lars Ellenberg Cc: Jiri Pirko Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/earlycpio.c | 4 ++-- lib/lru_cache.c | 3 ++- lib/parman.c | 4 ++-- lib/radix-tree.c | 11 ++++++----- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/lib/earlycpio.c b/lib/earlycpio.c index e83628882001..7921193f0424 100644 --- a/lib/earlycpio.c +++ b/lib/earlycpio.c @@ -40,7 +40,7 @@ enum cpio_fields { }; /** - * cpio_data find_cpio_data - Search for files in an uncompressed cpio + * find_cpio_data - Search for files in an uncompressed cpio * @path: The directory to search for, including a slash at the end * @data: Pointer to the cpio archive or a header inside * @len: Remaining length of the cpio based on data pointer @@ -49,7 +49,7 @@ enum cpio_fields { * matching file itself. It can be used to iterate through the cpio * to find all files inside of a directory path. * - * @return: struct cpio_data containing the address, length and + * Return: &struct cpio_data containing the address, length and * filename (with the directory path cut off) of the found file. * If you search for a filename and not for files in a directory, * pass the absolute path of the filename in the cpio and make sure diff --git a/lib/lru_cache.c b/lib/lru_cache.c index c69ee53d8dde..52313acbfa62 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -76,6 +76,7 @@ int lc_try_lock(struct lru_cache *lc) /** * lc_create - prepares to track objects in an active set * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details + * @cache: cache root pointer * @max_pending_changes: maximum changes to accumulate until a transaction is required * @e_count: number of elements allowed to be active simultaneously * @e_size: size of the tracked objects @@ -627,7 +628,7 @@ void lc_set(struct lru_cache *lc, unsigned int enr, int index) } /** - * lc_dump - Dump a complete LRU cache to seq in textual form. + * lc_seq_dump_details - Dump a complete LRU cache to seq in textual form. * @lc: the lru cache to operate on * @seq: the &struct seq_file pointer to seq_printf into * @utext: user supplied additional "heading" or other info diff --git a/lib/parman.c b/lib/parman.c index a11f2f667639..3f8f8d422e62 100644 --- a/lib/parman.c +++ b/lib/parman.c @@ -297,7 +297,7 @@ EXPORT_SYMBOL(parman_destroy); * parman_prio_init - initializes a parman priority chunk * @parman: parman instance * @prio: parman prio structure to be initialized - * @prority: desired priority of the chunk + * @priority: desired priority of the chunk * * Note: all locking must be provided by the caller. * @@ -356,7 +356,7 @@ int parman_item_add(struct parman *parman, struct parman_prio *prio, EXPORT_SYMBOL(parman_item_add); /** - * parman_item_del - deletes parman item + * parman_item_remove - deletes parman item * @parman: parman instance * @prio: parman prio instance to delete the item from * @item: parman item instance diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 3a4da11b804d..b3afafe46fff 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -166,9 +166,9 @@ static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag) /** * radix_tree_find_next_bit - find the next set bit in a memory region * - * @addr: The address to base the search on - * @size: The bitmap size in bits - * @offset: The bitnumber to start searching at + * @node: where to begin the search + * @tag: the tag index + * @offset: the bitnumber to start searching at * * Unrollable variant of find_next_bit() for constant size arrays. * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero. @@ -461,7 +461,7 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp, /** * radix_tree_shrink - shrink radix tree to minimum height - * @root radix tree root + * @root: radix tree root */ static inline bool radix_tree_shrink(struct radix_tree_root *root) { @@ -691,7 +691,7 @@ static inline int insert_entries(struct radix_tree_node *node, } /** - * __radix_tree_insert - insert into a radix tree + * radix_tree_insert - insert into a radix tree * @root: radix tree root * @index: index key * @item: item to insert @@ -919,6 +919,7 @@ EXPORT_SYMBOL(radix_tree_replace_slot); /** * radix_tree_iter_replace - replace item in a slot * @root: radix tree root + * @iter: iterator state * @slot: pointer to slot * @item: new item to store in the slot. * From f2764bd4f6a8dffaec3e220728385d9756b3c2cb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 16 Apr 2021 21:29:13 +0200 Subject: [PATCH 179/180] netlink: don't call ->netlink_bind with table lock held When I added support to allow generic netlink multicast groups to be restricted to subscribers with CAP_NET_ADMIN I was unaware that a genl_bind implementation already existed in the past. It was reverted due to ABBA deadlock: 1. ->netlink_bind gets called with the table lock held. 2. genetlink bind callback is invoked, it grabs the genl lock. But when a new genl subsystem is (un)registered, these two locks are taken in reverse order. One solution would be to revert again and add a comment in genl referring 1e82a62fec613, "genetlink: remove genl_bind"). This would need a second change in mptcp to not expose the raw token value anymore, e.g. by hashing the token with a secret key so userspace can still associate subflow events with the correct mptcp connection. However, Paolo Abeni reminded me to double-check why the netlink table is locked in the first place. I can't find one. netlink_bind() is already called without this lock when userspace joins a group via NETLINK_ADD_MEMBERSHIP setsockopt. Same holds for the netlink_unbind operation. Digging through the history, commit f773608026ee1 ("netlink: access nlk groups safely in netlink bind and getname") expanded the lock scope. commit 3a20773beeeeade ("net: netlink: cap max groups which will be considered in netlink_bind()") ... removed the nlk->ngroups access that the lock scope extension was all about. Reduce the lock scope again and always call ->netlink_bind without the table lock. The Fixes tag should be vs. the patch mentioned in the link below, but that one got squash-merged into the patch that came earlier in the series. Fixes: 4d54cc32112d8d ("mptcp: avoid lock_fast usage in accept path") Link: https://lore.kernel.org/mptcp/20210213000001.379332-8-mathew.j.martineau@linux.intel.com/T/#u Cc: Cong Wang Cc: Xin Long Cc: Johannes Berg Cc: Sean Tranchetti Cc: Paolo Abeni Cc: Pablo Neira Ayuso Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index dd488938447f..3a62f97acf39 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1019,7 +1019,6 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, return -EINVAL; } - netlink_lock_table(); if (nlk->netlink_bind && groups) { int group; @@ -1031,13 +1030,14 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (!err) continue; netlink_undo_bind(group, groups, sk); - goto unlock; + return err; } } /* No need for barriers here as we return to user-space without * using any of the bound attributes. */ + netlink_lock_table(); if (!bound) { err = nladdr->nl_pid ? netlink_insert(sk, nladdr->nl_pid) : From fae8817ae804a682c6823ad1672438f39fc46c28 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 16 Apr 2021 17:43:30 -0700 Subject: [PATCH 180/180] cxl/mem: Fix memory device capacity probing The CXL Identify Memory Device output payload emits capacity in 256MB units. The driver is treating the capacity field as bytes. This was missed because QEMU reports bytes when it should report bytes / 256MB. Fixes: 8adaf747c9f0 ("cxl/mem: Find device capabilities") Reviewed-by: Vishal Verma Cc: Ben Widawsky Link: https://lore.kernel.org/r/161862021044.3259705.7008520073059739760.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 1b5078311f7d..2acc6173da36 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -1419,6 +1420,7 @@ static int cxl_mem_enumerate_cmds(struct cxl_mem *cxlm) */ static int cxl_mem_identify(struct cxl_mem *cxlm) { + /* See CXL 2.0 Table 175 Identify Memory Device Output Payload */ struct cxl_mbox_identify { char fw_revision[0x10]; __le64 total_capacity; @@ -1447,10 +1449,11 @@ static int cxl_mem_identify(struct cxl_mem *cxlm) * For now, only the capacity is exported in sysfs */ cxlm->ram_range.start = 0; - cxlm->ram_range.end = le64_to_cpu(id.volatile_capacity) - 1; + cxlm->ram_range.end = le64_to_cpu(id.volatile_capacity) * SZ_256M - 1; cxlm->pmem_range.start = 0; - cxlm->pmem_range.end = le64_to_cpu(id.persistent_capacity) - 1; + cxlm->pmem_range.end = + le64_to_cpu(id.persistent_capacity) * SZ_256M - 1; memcpy(cxlm->firmware_version, id.fw_revision, sizeof(id.fw_revision));