From 0de0e198bc7191a0e46cf71f66fec4d07ca91396 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 25 Apr 2017 12:58:31 -0700 Subject: [PATCH 001/206] ACPI / sysfs: fix acpi_get_table() leak / acpi-sysfs denial of service Reading an ACPI table through the /sys/firmware/acpi/tables interface more than 65,536 times leads to the following log message: ACPI Error: Table ffff88033595eaa8, Validation count is zero after increment (20170119/tbutils-423) ...and the table being unavailable until the next reboot. Add the missing acpi_put_table() so the table ->validation_count is decremented after each read. Reported-by: Anush Seetharaman Fixes: 174cc7187e6f "ACPICA: Tables: Back port acpi_get_table_with_size() ..." Signed-off-by: Dan Williams Cc: 4.10+ # 4.10+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/sysfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index cf05ae973381..5180fef9eb49 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -333,14 +333,17 @@ static ssize_t acpi_table_show(struct file *filp, struct kobject *kobj, container_of(bin_attr, struct acpi_table_attr, attr); struct acpi_table_header *table_header = NULL; acpi_status status; + ssize_t rc; status = acpi_get_table(table_attr->name, table_attr->instance, &table_header); if (ACPI_FAILURE(status)) return -ENODEV; - return memory_read_from_buffer(buf, count, &offset, - table_header, table_header->length); + rc = memory_read_from_buffer(buf, count, &offset, table_header, + table_header->length); + acpi_put_table(table_header); + return rc; } static int acpi_table_attr_init(struct kobject *tables_obj, From 2ac97f0f6654da14312d125005c77a6010e0ea38 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Tue, 25 Apr 2017 11:29:56 -0700 Subject: [PATCH 002/206] HID: wacom: Have wacom_tpc_irq guard against possible NULL dereference The following Smatch complaint was generated in response to commit 2a6cdbd ("HID: wacom: Introduce new 'touch_input' device"): drivers/hid/wacom_wac.c:1586 wacom_tpc_irq() error: we previously assumed 'wacom->touch_input' could be null (see line 1577) The 'touch_input' and 'pen_input' variables point to the 'struct input_dev' used for relaying touch and pen events to userspace, respectively. If a device does not have a touch interface or pen interface, the associated input variable is NULL. The 'wacom_tpc_irq()' function is responsible for forwarding input reports to a more-specific IRQ handler function. An unknown report could theoretically be mistaken as e.g. a touch report on a device which does not have a touch interface. This can be prevented by only calling the pen/touch functions are called when the pen/touch pointers are valid. Fixes: 2a6cdbd ("HID: wacom: Introduce new 'touch_input' device") Signed-off-by: Jason Gerecke Reviewed-by: Ping Cheng Cc: stable@vger.kernel.org Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 47 +++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 4b225fb19a16..e274c9dc32f3 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -1571,37 +1571,38 @@ static int wacom_tpc_irq(struct wacom_wac *wacom, size_t len) { unsigned char *data = wacom->data; - if (wacom->pen_input) + if (wacom->pen_input) { dev_dbg(wacom->pen_input->dev.parent, "%s: received report #%d\n", __func__, data[0]); - else if (wacom->touch_input) + + if (len == WACOM_PKGLEN_PENABLED || + data[0] == WACOM_REPORT_PENABLED) + return wacom_tpc_pen(wacom); + } + else if (wacom->touch_input) { dev_dbg(wacom->touch_input->dev.parent, "%s: received report #%d\n", __func__, data[0]); - switch (len) { - case WACOM_PKGLEN_TPC1FG: - return wacom_tpc_single_touch(wacom, len); - - case WACOM_PKGLEN_TPC2FG: - return wacom_tpc_mt_touch(wacom); - - case WACOM_PKGLEN_PENABLED: - return wacom_tpc_pen(wacom); - - default: - switch (data[0]) { - case WACOM_REPORT_TPC1FG: - case WACOM_REPORT_TPCHID: - case WACOM_REPORT_TPCST: - case WACOM_REPORT_TPC1FGE: + switch (len) { + case WACOM_PKGLEN_TPC1FG: return wacom_tpc_single_touch(wacom, len); - case WACOM_REPORT_TPCMT: - case WACOM_REPORT_TPCMT2: - return wacom_mt_touch(wacom); + case WACOM_PKGLEN_TPC2FG: + return wacom_tpc_mt_touch(wacom); - case WACOM_REPORT_PENABLED: - return wacom_tpc_pen(wacom); + default: + switch (data[0]) { + case WACOM_REPORT_TPC1FG: + case WACOM_REPORT_TPCHID: + case WACOM_REPORT_TPCST: + case WACOM_REPORT_TPC1FGE: + return wacom_tpc_single_touch(wacom, len); + + case WACOM_REPORT_TPCMT: + case WACOM_REPORT_TPCMT2: + return wacom_mt_touch(wacom); + + } } } From f4b65b9563216b3e01a5cc844c3ba68901d9b195 Mon Sep 17 00:00:00 2001 From: Che-Liang Chiou Date: Fri, 7 Apr 2017 10:12:36 +0200 Subject: [PATCH 003/206] HID: magicmouse: Set multi-touch keybits for Magic Mouse The driver emits multi-touch events for Magic Trackpad as well as Magic Mouse, but it does not set keybits that are related to multi-touch event for Magic Mouse; so set these keybits. The keybits that are not set cause trouble because user programs often probe these keybits for self-configuration and thus they cannot operate properly if the keybits are not set. One of such troubles is that libevdev will not be able to emit correct touch count, causing gestures library failed to do fling stop. Signed-off-by: Che-Liang Chiou Signed-off-by: Thierry Escande Signed-off-by: Jiri Kosina --- drivers/hid/hid-magicmouse.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c index 20b40ad26325..1d6c997b3001 100644 --- a/drivers/hid/hid-magicmouse.c +++ b/drivers/hid/hid-magicmouse.c @@ -349,6 +349,7 @@ static int magicmouse_raw_event(struct hid_device *hdev, if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) { magicmouse_emit_buttons(msc, clicks & 3); + input_mt_report_pointer_emulation(input, true); input_report_rel(input, REL_X, x); input_report_rel(input, REL_Y, y); } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */ @@ -388,16 +389,16 @@ static int magicmouse_setup_input(struct input_dev *input, struct hid_device *hd __clear_bit(BTN_RIGHT, input->keybit); __clear_bit(BTN_MIDDLE, input->keybit); __set_bit(BTN_MOUSE, input->keybit); - __set_bit(BTN_TOOL_FINGER, input->keybit); - __set_bit(BTN_TOOL_DOUBLETAP, input->keybit); - __set_bit(BTN_TOOL_TRIPLETAP, input->keybit); - __set_bit(BTN_TOOL_QUADTAP, input->keybit); - __set_bit(BTN_TOOL_QUINTTAP, input->keybit); - __set_bit(BTN_TOUCH, input->keybit); - __set_bit(INPUT_PROP_POINTER, input->propbit); __set_bit(INPUT_PROP_BUTTONPAD, input->propbit); } + __set_bit(BTN_TOOL_FINGER, input->keybit); + __set_bit(BTN_TOOL_DOUBLETAP, input->keybit); + __set_bit(BTN_TOOL_TRIPLETAP, input->keybit); + __set_bit(BTN_TOOL_QUADTAP, input->keybit); + __set_bit(BTN_TOOL_QUINTTAP, input->keybit); + __set_bit(BTN_TOUCH, input->keybit); + __set_bit(INPUT_PROP_POINTER, input->propbit); __set_bit(EV_ABS, input->evbit); From 0bb7a37f8d15e5fb5d21776875f9fbc74e10753a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Diego=20Elio=20Petten=C3=B2?= Date: Wed, 26 Apr 2017 17:37:04 +0100 Subject: [PATCH 004/206] HID: elecom: extend to fix the descriptor for DEFT trackballs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ELECOM DEFT trackballs report only five buttons, when the device actually has 8. Change the descriptor so that the HID driver can see all of them. For completeness and future reference, I included a side-by-side diff of the part of the descriptor that is being edited. Cc: Jiri Kosina Cc: Benjamin Tissoires Cc: Yuxuan Shui Signed-off-by: Diego Elio Pettenò Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 6 ++-- drivers/hid/hid-core.c | 2 ++ drivers/hid/hid-elecom.c | 62 ++++++++++++++++++++++++++++++++++------ drivers/hid/hid-ids.h | 2 ++ 4 files changed, 61 insertions(+), 11 deletions(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index fe40e5e499dd..687705c50794 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -275,10 +275,12 @@ config HID_EMS_FF - Trio Linker Plus II config HID_ELECOM - tristate "ELECOM BM084 bluetooth mouse" + tristate "ELECOM HID devices" depends on HID ---help--- - Support for the ELECOM BM084 (bluetooth mouse). + Support for ELECOM devices: + - BM084 Bluetooth Mouse + - DEFT Trackball (Wired and wireless) config HID_ELO tristate "ELO USB 4000/4500 touchscreen" diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 37084b645785..38d041510e1d 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1891,6 +1891,8 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_DREAM_CHEEKY, USB_DEVICE_ID_DREAM_CHEEKY_WN) }, { HID_USB_DEVICE(USB_VENDOR_ID_DREAM_CHEEKY, USB_DEVICE_ID_DREAM_CHEEKY_FA) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRED) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRELESS) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELO, 0x0009) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELO, 0x0030) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_ACCUTOUCH_2216) }, diff --git a/drivers/hid/hid-elecom.c b/drivers/hid/hid-elecom.c index 6e3848a8d8dd..e2c7465df69f 100644 --- a/drivers/hid/hid-elecom.c +++ b/drivers/hid/hid-elecom.c @@ -1,10 +1,8 @@ /* - * HID driver for Elecom BM084 (bluetooth mouse). - * Removes a non-existing horizontal wheel from - * the HID descriptor. - * (This module is based on "hid-ortek".) - * + * HID driver for ELECOM devices. * Copyright (c) 2010 Richard Nauber + * Copyright (c) 2016 Yuxuan Shui + * Copyright (c) 2017 Diego Elio Pettenò */ /* @@ -23,15 +21,61 @@ static __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { - if (*rsize >= 48 && rdesc[46] == 0x05 && rdesc[47] == 0x0c) { - hid_info(hdev, "Fixing up Elecom BM084 report descriptor\n"); - rdesc[47] = 0x00; + switch (hdev->product) { + case USB_DEVICE_ID_ELECOM_BM084: + /* The BM084 Bluetooth mouse includes a non-existing horizontal + * wheel in the HID descriptor. */ + if (*rsize >= 48 && rdesc[46] == 0x05 && rdesc[47] == 0x0c) { + hid_info(hdev, "Fixing up Elecom BM084 report descriptor\n"); + rdesc[47] = 0x00; + } + break; + case USB_DEVICE_ID_ELECOM_DEFT_WIRED: + case USB_DEVICE_ID_ELECOM_DEFT_WIRELESS: + /* The DEFT trackball has eight buttons, but its descriptor only + * reports five, disabling the three Fn buttons on the top of + * the mouse. + * + * Apply the following diff to the descriptor: + * + * Collection (Physical), Collection (Physical), + * Report ID (1), Report ID (1), + * Report Count (5), -> Report Count (8), + * Report Size (1), Report Size (1), + * Usage Page (Button), Usage Page (Button), + * Usage Minimum (01h), Usage Minimum (01h), + * Usage Maximum (05h), -> Usage Maximum (08h), + * Logical Minimum (0), Logical Minimum (0), + * Logical Maximum (1), Logical Maximum (1), + * Input (Variable), Input (Variable), + * Report Count (1), -> Report Count (0), + * Report Size (3), Report Size (3), + * Input (Constant), Input (Constant), + * Report Size (16), Report Size (16), + * Report Count (2), Report Count (2), + * Usage Page (Desktop), Usage Page (Desktop), + * Usage (X), Usage (X), + * Usage (Y), Usage (Y), + * Logical Minimum (-32768), Logical Minimum (-32768), + * Logical Maximum (32767), Logical Maximum (32767), + * Input (Variable, Relative), Input (Variable, Relative), + * End Collection, End Collection, + */ + if (*rsize == 213 && rdesc[13] == 5 && rdesc[21] == 5) { + hid_info(hdev, "Fixing up Elecom DEFT Fn buttons\n"); + rdesc[13] = 8; /* Button/Variable Report Count */ + rdesc[21] = 8; /* Button/Variable Usage Maximum */ + rdesc[29] = 0; /* Button/Constant Report Count */ + } + break; } return rdesc; } static const struct hid_device_id elecom_devices[] = { - { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084)}, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRED) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRELESS) }, { } }; MODULE_DEVICE_TABLE(hid, elecom_devices); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 643390ba749d..8e8a1baee090 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -358,6 +358,8 @@ #define USB_VENDOR_ID_ELECOM 0x056e #define USB_DEVICE_ID_ELECOM_BM084 0x0061 +#define USB_DEVICE_ID_ELECOM_DEFT_WIRED 0x00fe +#define USB_DEVICE_ID_ELECOM_DEFT_WIRELESS 0x00ff #define USB_VENDOR_ID_DREAM_CHEEKY 0x1d34 #define USB_DEVICE_ID_DREAM_CHEEKY_WN 0x0004 From eb8df543e444492328f506adffc7dfe94111f1bd Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 5 May 2017 11:57:44 +0200 Subject: [PATCH 005/206] dmaengine: mv_xor_v2: handle mv_xor_v2_prep_sw_desc() error properly The mv_xor_v2_prep_sw_desc() is called from a few different places in the driver, but we never take into account the fact that it might return NULL. This commit fixes that, ensuring that we don't panic if there are no more descriptors available. Fixes: 19a340b1a820 ("dmaengine: mv_xor_v2: new driver") Cc: Signed-off-by: Thomas Petazzoni Signed-off-by: Vinod Koul --- drivers/dma/mv_xor_v2.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c index a28a01fcba67..e9280207ac19 100644 --- a/drivers/dma/mv_xor_v2.c +++ b/drivers/dma/mv_xor_v2.c @@ -389,6 +389,8 @@ mv_xor_v2_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, __func__, len, &src, &dest, flags); sw_desc = mv_xor_v2_prep_sw_desc(xor_dev); + if (!sw_desc) + return NULL; sw_desc->async_tx.flags = flags; @@ -443,6 +445,8 @@ mv_xor_v2_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, __func__, src_cnt, len, &dest, flags); sw_desc = mv_xor_v2_prep_sw_desc(xor_dev); + if (!sw_desc) + return NULL; sw_desc->async_tx.flags = flags; @@ -491,6 +495,8 @@ mv_xor_v2_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags) container_of(chan, struct mv_xor_v2_device, dmachan); sw_desc = mv_xor_v2_prep_sw_desc(xor_dev); + if (!sw_desc) + return NULL; /* set the HW descriptor */ hw_descriptor = &sw_desc->hw_desc; From 2aab4e18152cd30cb5d2f4c27629fc8a04aed979 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 5 May 2017 11:57:45 +0200 Subject: [PATCH 006/206] dmaengine: mv_xor_v2: properly handle wrapping in the array of HW descriptors mv_xor_v2_tasklet() is looping over completed HW descriptors. Before the loop, it initializes 'next_pending_hw_desc' to the first HW descriptor to handle, and then the loop simply increments this point, without taking care of wrapping when we reach the last HW descriptor. The 'pending_ptr' index was being wrapped back to 0 at the end, but it wasn't used in each iteration of the loop to calculate next_pending_hw_desc. This commit fixes that, and makes next_pending_hw_desc a variable local to the loop itself. Fixes: 19a340b1a820 ("dmaengine: mv_xor_v2: new driver") Cc: Signed-off-by: Thomas Petazzoni Signed-off-by: Vinod Koul --- drivers/dma/mv_xor_v2.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c index e9280207ac19..bdfb36ecff81 100644 --- a/drivers/dma/mv_xor_v2.c +++ b/drivers/dma/mv_xor_v2.c @@ -560,7 +560,6 @@ static void mv_xor_v2_tasklet(unsigned long data) { struct mv_xor_v2_device *xor_dev = (struct mv_xor_v2_device *) data; int pending_ptr, num_of_pending, i; - struct mv_xor_v2_descriptor *next_pending_hw_desc = NULL; struct mv_xor_v2_sw_desc *next_pending_sw_desc = NULL; dev_dbg(xor_dev->dmadev.dev, "%s %d\n", __func__, __LINE__); @@ -568,17 +567,10 @@ static void mv_xor_v2_tasklet(unsigned long data) /* get the pending descriptors parameters */ num_of_pending = mv_xor_v2_get_pending_params(xor_dev, &pending_ptr); - /* next HW descriptor */ - next_pending_hw_desc = xor_dev->hw_desq_virt + pending_ptr; - /* loop over free descriptors */ for (i = 0; i < num_of_pending; i++) { - - if (pending_ptr > MV_XOR_V2_DESC_NUM) - pending_ptr = 0; - - if (next_pending_sw_desc != NULL) - next_pending_hw_desc++; + struct mv_xor_v2_descriptor *next_pending_hw_desc = + xor_dev->hw_desq_virt + pending_ptr; /* get the SW descriptor related to the HW descriptor */ next_pending_sw_desc = @@ -614,6 +606,8 @@ static void mv_xor_v2_tasklet(unsigned long data) /* increment the next descriptor */ pending_ptr++; + if (pending_ptr >= MV_XOR_V2_DESC_NUM) + pending_ptr = 0; } if (num_of_pending != 0) { From bc473da1ed726c975ad47f8d7d27631de11356d8 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 5 May 2017 11:57:46 +0200 Subject: [PATCH 007/206] dmaengine: mv_xor_v2: do not use descriptors not acked by async_tx Descriptors that have not been acknowledged by the async_tx layer should not be re-used, so this commit adjusts the implementation of mv_xor_v2_prep_sw_desc() to skip descriptors for which async_tx_test_ack() is false. Fixes: 19a340b1a820 ("dmaengine: mv_xor_v2: new driver") Cc: Signed-off-by: Thomas Petazzoni Signed-off-by: Vinod Koul --- drivers/dma/mv_xor_v2.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c index bdfb36ecff81..cb60e7c4aa16 100644 --- a/drivers/dma/mv_xor_v2.c +++ b/drivers/dma/mv_xor_v2.c @@ -344,6 +344,7 @@ static struct mv_xor_v2_sw_desc * mv_xor_v2_prep_sw_desc(struct mv_xor_v2_device *xor_dev) { struct mv_xor_v2_sw_desc *sw_desc; + bool found = false; /* Lock the channel */ spin_lock_bh(&xor_dev->lock); @@ -355,19 +356,23 @@ mv_xor_v2_prep_sw_desc(struct mv_xor_v2_device *xor_dev) return NULL; } - /* get a free SW descriptor from the SW DESQ */ - sw_desc = list_first_entry(&xor_dev->free_sw_desc, - struct mv_xor_v2_sw_desc, free_list); + list_for_each_entry(sw_desc, &xor_dev->free_sw_desc, free_list) { + if (async_tx_test_ack(&sw_desc->async_tx)) { + found = true; + break; + } + } + + if (!found) { + spin_unlock_bh(&xor_dev->lock); + return NULL; + } + list_del(&sw_desc->free_list); /* Release the channel */ spin_unlock_bh(&xor_dev->lock); - /* set the async tx descriptor */ - dma_async_tx_descriptor_init(&sw_desc->async_tx, &xor_dev->dmachan); - sw_desc->async_tx.tx_submit = mv_xor_v2_tx_submit; - async_tx_ack(&sw_desc->async_tx); - return sw_desc; } @@ -785,8 +790,15 @@ static int mv_xor_v2_probe(struct platform_device *pdev) /* add all SW descriptors to the free list */ for (i = 0; i < MV_XOR_V2_DESC_NUM; i++) { - xor_dev->sw_desq[i].idx = i; - list_add(&xor_dev->sw_desq[i].free_list, + struct mv_xor_v2_sw_desc *sw_desc = + xor_dev->sw_desq + i; + sw_desc->idx = i; + dma_async_tx_descriptor_init(&sw_desc->async_tx, + &xor_dev->dmachan); + sw_desc->async_tx.tx_submit = mv_xor_v2_tx_submit; + async_tx_ack(&sw_desc->async_tx); + + list_add(&sw_desc->free_list, &xor_dev->free_sw_desc); } From ab2c5f0a77fe49bdb6e307b397496373cb47d2c2 Mon Sep 17 00:00:00 2001 From: Hanna Hawa Date: Fri, 5 May 2017 11:57:47 +0200 Subject: [PATCH 008/206] dmaengine: mv_xor_v2: enable XOR engine after its configuration The engine was enabled prior to its configuration, which isn't correct. This patch relocates the activation of the XOR engine, to be after the configuration of the XOR engine. Fixes: 19a340b1a820 ("dmaengine: mv_xor_v2: new driver") Cc: Signed-off-by: Hanna Hawa Signed-off-by: Thomas Petazzoni Signed-off-by: Vinod Koul --- drivers/dma/mv_xor_v2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c index cb60e7c4aa16..211b8c0e3cfb 100644 --- a/drivers/dma/mv_xor_v2.c +++ b/drivers/dma/mv_xor_v2.c @@ -653,9 +653,6 @@ static int mv_xor_v2_descq_init(struct mv_xor_v2_device *xor_dev) writel((xor_dev->hw_desq & 0xFFFF00000000) >> 32, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_BAHR_OFF); - /* enable the DMA engine */ - writel(0, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_STOP_OFF); - /* * This is a temporary solution, until we activate the * SMMU. Set the attributes for reading & writing data buffers @@ -699,6 +696,9 @@ static int mv_xor_v2_descq_init(struct mv_xor_v2_device *xor_dev) reg |= MV_XOR_V2_GLOB_PAUSE_AXI_TIME_DIS_VAL; writel(reg, xor_dev->glob_base + MV_XOR_V2_GLOB_PAUSE); + /* enable the DMA engine */ + writel(0, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_STOP_OFF); + return 0; } From 44d5887a8bf1e86915c8ff647337cb138149da82 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 5 May 2017 11:57:48 +0200 Subject: [PATCH 009/206] dmaengine: mv_xor_v2: fix tx_submit() implementation The mv_xor_v2_tx_submit() gets the next available HW descriptor by calling mv_xor_v2_get_desq_write_ptr(), which reads a HW register telling the next available HW descriptor. This was working fine when HW descriptors were issued for processing directly in tx_submit(). However, as part of the review process of the driver, a change was requested to move the actual kick-off of HW descriptors processing to ->issue_pending(). Due to this, reading the HW register to know the next available HW descriptor no longer works. So instead of using this HW register, we implemented a software index pointing to the next available HW descriptor. Fixes: 19a340b1a820 ("dmaengine: mv_xor_v2: new driver") Cc: Signed-off-by: Thomas Petazzoni Signed-off-by: Vinod Koul --- drivers/dma/mv_xor_v2.c | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c index 211b8c0e3cfb..4684eceea759 100644 --- a/drivers/dma/mv_xor_v2.c +++ b/drivers/dma/mv_xor_v2.c @@ -161,6 +161,7 @@ struct mv_xor_v2_device { struct mv_xor_v2_sw_desc *sw_desq; int desc_size; unsigned int npendings; + unsigned int hw_queue_idx; }; /** @@ -213,18 +214,6 @@ static void mv_xor_v2_set_data_buffers(struct mv_xor_v2_device *xor_dev, } } -/* - * Return the next available index in the DESQ. - */ -static int mv_xor_v2_get_desq_write_ptr(struct mv_xor_v2_device *xor_dev) -{ - /* read the index for the next available descriptor in the DESQ */ - u32 reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_ALLOC_OFF); - - return ((reg >> MV_XOR_V2_DMA_DESQ_ALLOC_WRPTR_SHIFT) - & MV_XOR_V2_DMA_DESQ_ALLOC_WRPTR_MASK); -} - /* * notify the engine of new descriptors, and update the available index. */ @@ -306,7 +295,6 @@ static irqreturn_t mv_xor_v2_interrupt_handler(int irq, void *data) static dma_cookie_t mv_xor_v2_tx_submit(struct dma_async_tx_descriptor *tx) { - int desq_ptr; void *dest_hw_desc; dma_cookie_t cookie; struct mv_xor_v2_sw_desc *sw_desc = @@ -322,15 +310,15 @@ mv_xor_v2_tx_submit(struct dma_async_tx_descriptor *tx) spin_lock_bh(&xor_dev->lock); cookie = dma_cookie_assign(tx); - /* get the next available slot in the DESQ */ - desq_ptr = mv_xor_v2_get_desq_write_ptr(xor_dev); - /* copy the HW descriptor from the SW descriptor to the DESQ */ - dest_hw_desc = xor_dev->hw_desq_virt + desq_ptr; + dest_hw_desc = xor_dev->hw_desq_virt + xor_dev->hw_queue_idx; memcpy(dest_hw_desc, &sw_desc->hw_desc, xor_dev->desc_size); xor_dev->npendings++; + xor_dev->hw_queue_idx++; + if (xor_dev->hw_queue_idx >= MV_XOR_V2_DESC_NUM) + xor_dev->hw_queue_idx = 0; spin_unlock_bh(&xor_dev->lock); From 9dd4f319bac25334a869d9276b19eac9e478fd33 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 5 May 2017 11:57:49 +0200 Subject: [PATCH 010/206] dmaengine: mv_xor_v2: remove interrupt coalescing The current implementation of interrupt coalescing doesn't work, because it doesn't configure the coalescing timer, which is needed to make sure we get an interrupt at some point. As a fix for stable, we simply remove the interrupt coalescing functionality. It will be re-introduced properly in a future commit. Fixes: 19a340b1a820 ("dmaengine: mv_xor_v2: new driver") Cc: Signed-off-by: Thomas Petazzoni Signed-off-by: Vinod Koul --- drivers/dma/mv_xor_v2.c | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c index 4684eceea759..b133fe29d788 100644 --- a/drivers/dma/mv_xor_v2.c +++ b/drivers/dma/mv_xor_v2.c @@ -246,22 +246,6 @@ static int mv_xor_v2_set_desc_size(struct mv_xor_v2_device *xor_dev) return MV_XOR_V2_EXT_DESC_SIZE; } -/* - * Set the IMSG threshold - */ -static inline -void mv_xor_v2_set_imsg_thrd(struct mv_xor_v2_device *xor_dev, int thrd_val) -{ - u32 reg; - - reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_THRD_OFF); - - reg &= (~MV_XOR_V2_DMA_IMSG_THRD_MASK << MV_XOR_V2_DMA_IMSG_THRD_SHIFT); - reg |= (thrd_val << MV_XOR_V2_DMA_IMSG_THRD_SHIFT); - - writel(reg, xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_THRD_OFF); -} - static irqreturn_t mv_xor_v2_interrupt_handler(int irq, void *data) { struct mv_xor_v2_device *xor_dev = data; @@ -277,12 +261,6 @@ static irqreturn_t mv_xor_v2_interrupt_handler(int irq, void *data) if (!ndescs) return IRQ_NONE; - /* - * Update IMSG threshold, to disable new IMSG interrupts until - * end of the tasklet - */ - mv_xor_v2_set_imsg_thrd(xor_dev, MV_XOR_V2_DESC_NUM); - /* schedule a tasklet to handle descriptors callbacks */ tasklet_schedule(&xor_dev->irq_tasklet); @@ -607,9 +585,6 @@ static void mv_xor_v2_tasklet(unsigned long data) /* free the descriptores */ mv_xor_v2_free_desc_from_desq(xor_dev, num_of_pending); } - - /* Update IMSG threshold, to enable new IMSG interrupts */ - mv_xor_v2_set_imsg_thrd(xor_dev, 0); } /* From b2d3c270f9f2fb82518ac500a9849c3aaf503852 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 5 May 2017 11:57:50 +0200 Subject: [PATCH 011/206] dmaengine: mv_xor_v2: set DMA mask to 40 bits The XORv2 engine on Armada 7K/8K can only access the first 40 bits of the physical address space, so the DMA mask must be set accordingly. Fixes: 19a340b1a820 ("dmaengine: mv_xor_v2: new driver") Cc: Signed-off-by: Thomas Petazzoni Signed-off-by: Vinod Koul --- drivers/dma/mv_xor_v2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c index b133fe29d788..f3e211f8f6c5 100644 --- a/drivers/dma/mv_xor_v2.c +++ b/drivers/dma/mv_xor_v2.c @@ -693,6 +693,10 @@ static int mv_xor_v2_probe(struct platform_device *pdev) platform_set_drvdata(pdev, xor_dev); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); + if (ret) + return ret; + xor_dev->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(xor_dev->clk) && PTR_ERR(xor_dev->clk) == -EPROBE_DEFER) return -EPROBE_DEFER; From 72d42504bd7faa6de1c1644b5e46652933e040a6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 11 May 2017 13:34:29 +0200 Subject: [PATCH 012/206] ovl: select EXPORTFS We get a link error when EXPORTFS is not enabled: ERROR: "exportfs_encode_fh" [fs/overlayfs/overlay.ko] undefined! ERROR: "exportfs_decode_fh" [fs/overlayfs/overlay.ko] undefined! This adds a Kconfig 'select' statement for overlayfs, the same way that it is done for the other users of exportfs. Fixes: 3a1e819b4e80 ("ovl: store file handle of lower inode on copy up") Signed-off-by: Arnd Bergmann Signed-off-by: Miklos Szeredi --- fs/overlayfs/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index 0daac5112f7a..c0c9683934b7 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -1,5 +1,6 @@ config OVERLAY_FS tristate "Overlay filesystem support" + select EXPORTFS help An overlay filesystem combines two filesystems - an 'upper' filesystem and a 'lower' filesystem. When a name exists in both filesystems, the From 98883f1b5415ea9dce60d5178877d15f4faa10b8 Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Tue, 9 May 2017 11:50:26 -0500 Subject: [PATCH 013/206] ibmvscsis: Clear left-over abort_cmd pointers With the addition of ibmvscsis->abort_cmd pointer within commit 25e78531268e ("ibmvscsis: Do not send aborted task response"), make sure to explicitly NULL these pointers when clearing DELAY_SEND flag. Do this for two cases, when getting the new new ibmvscsis descriptor in ibmvscsis_get_free_cmd() and before posting the response completion in ibmvscsis_send_messages(). Signed-off-by: Bryant G. Ly Reviewed-by: Michael Cyr Cc: # v4.8+ Signed-off-by: Nicholas Bellinger --- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index d390325c99ec..ee64241865e6 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -1170,6 +1170,8 @@ static struct ibmvscsis_cmd *ibmvscsis_get_free_cmd(struct scsi_info *vscsi) cmd = list_first_entry_or_null(&vscsi->free_cmd, struct ibmvscsis_cmd, list); if (cmd) { + if (cmd->abort_cmd) + cmd->abort_cmd = NULL; cmd->flags &= ~(DELAY_SEND); list_del(&cmd->list); cmd->iue = iue; @@ -1774,6 +1776,7 @@ static void ibmvscsis_send_messages(struct scsi_info *vscsi) if (cmd->abort_cmd) { retry = true; cmd->abort_cmd->flags &= ~(DELAY_SEND); + cmd->abort_cmd = NULL; } /* From 75dbf2d36f6b122ad3c1070fe4bf95f71bbff321 Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Wed, 10 May 2017 14:35:47 -0500 Subject: [PATCH 014/206] ibmvscsis: Fix the incorrect req_lim_delta The current code is not correctly calculating the req_lim_delta. We want to make sure vscsi->credit is always incremented when we do not send a response for the scsi op. Thus for the case where there is a successfully aborted task we need to make sure the vscsi->credit is incremented. v2 - Moves the original location of the vscsi->credit increment to a better spot. Since if we increment credit, the next command we send back will have increased req_lim_delta. But we probably shouldn't be doing that until the aborted cmd is actually released. Otherwise the client will think that it can send a new command, and we could find ourselves short of command elements. Not likely, but could happen. This patch depends on both: commit 25e78531268e ("ibmvscsis: Do not send aborted task response") commit 98883f1b5415 ("ibmvscsis: Clear left-over abort_cmd pointers") Signed-off-by: Bryant G. Ly Reviewed-by: Michael Cyr Cc: # v4.8+ Signed-off-by: Nicholas Bellinger --- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index ee64241865e6..abf6026645dd 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -1791,6 +1791,25 @@ static void ibmvscsis_send_messages(struct scsi_info *vscsi) list_del(&cmd->list); ibmvscsis_free_cmd_resources(vscsi, cmd); + /* + * With a successfully aborted op + * through LIO we want to increment the + * the vscsi credit so that when we dont + * send a rsp to the original scsi abort + * op (h_send_crq), but the tm rsp to + * the abort is sent, the credit is + * correctly sent with the abort tm rsp. + * We would need 1 for the abort tm rsp + * and 1 credit for the aborted scsi op. + * Thus we need to increment here. + * Also we want to increment the credit + * here because we want to make sure + * cmd is actually released first + * otherwise the client will think it + * it can send a new cmd, and we could + * find ourselves short of cmd elements. + */ + vscsi->credit += 1; } else { iue = cmd->iue; @@ -2965,10 +2984,7 @@ static long srp_build_response(struct scsi_info *vscsi, rsp->opcode = SRP_RSP; - if (vscsi->credit > 0 && vscsi->state == SRP_PROCESSING) - rsp->req_lim_delta = cpu_to_be32(vscsi->credit); - else - rsp->req_lim_delta = cpu_to_be32(1 + vscsi->credit); + rsp->req_lim_delta = cpu_to_be32(1 + vscsi->credit); rsp->tag = cmd->rsp.tag; rsp->flags = 0; From 4ff83daa0200affe1894bd33d17bac404e3d78d4 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 11 May 2017 01:07:24 -0700 Subject: [PATCH 015/206] target: Re-add check to reject control WRITEs with overflow data During v4.3 when the overflow/underflow check was relaxed by commit c72c525022: commit c72c5250224d475614a00c1d7e54a67f77cd3410 Author: Roland Dreier Date: Wed Jul 22 15:08:18 2015 -0700 target: allow underflow/overflow for PR OUT etc. commands to allow underflow/overflow for Windows compliance + FCP, a consequence was to allow control CDBs to process overflow data for iscsi-target with immediate data as well. As per Roland's original change, continue to allow underflow cases for control CDBs to make Windows compliance + FCP happy, but until overflow for control CDBs is supported tree-wide, explicitly reject all control WRITEs with overflow following pre v4.3.y logic. Reported-by: Bart Van Assche Cc: Roland Dreier Cc: # v4.3+ Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 37f57357d4a0..6025935036c9 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1160,15 +1160,28 @@ target_cmd_size_check(struct se_cmd *cmd, unsigned int size) if (cmd->unknown_data_length) { cmd->data_length = size; } else if (size != cmd->data_length) { - pr_warn("TARGET_CORE[%s]: Expected Transfer Length:" + pr_warn_ratelimited("TARGET_CORE[%s]: Expected Transfer Length:" " %u does not match SCSI CDB Length: %u for SAM Opcode:" " 0x%02x\n", cmd->se_tfo->get_fabric_name(), cmd->data_length, size, cmd->t_task_cdb[0]); - if (cmd->data_direction == DMA_TO_DEVICE && - cmd->se_cmd_flags & SCF_SCSI_DATA_CDB) { - pr_err("Rejecting underflow/overflow WRITE data\n"); - return TCM_INVALID_CDB_FIELD; + if (cmd->data_direction == DMA_TO_DEVICE) { + if (cmd->se_cmd_flags & SCF_SCSI_DATA_CDB) { + pr_err_ratelimited("Rejecting underflow/overflow" + " for WRITE data CDB\n"); + return TCM_INVALID_CDB_FIELD; + } + /* + * Some fabric drivers like iscsi-target still expect to + * always reject overflow writes. Reject this case until + * full fabric driver level support for overflow writes + * is introduced tree-wide. + */ + if (size > cmd->data_length) { + pr_err_ratelimited("Rejecting overflow for" + " WRITE control CDB\n"); + return TCM_INVALID_CDB_FIELD; + } } /* * Reject READ_* or WRITE_* with overflow/underflow for From 9a445bbb1607d9f14556a532453dd86d1b7e381e Mon Sep 17 00:00:00 2001 From: Hiroyuki Yokoyama Date: Mon, 15 May 2017 17:49:52 +0900 Subject: [PATCH 016/206] dmaengine: usb-dmac: Fix DMAOR AE bit definition This patch fixes the register definition of AE (Address Error flag) bit. Fixes: 0c1c8ff32fa2 ("dmaengine: usb-dmac: Add Renesas USB DMA Controller (USB-DMAC) driver") Cc: # v4.1+ Signed-off-by: Hiroyuki Yokoyama [Shimoda: add Fixes and Cc tags in the commit log] Signed-off-by: Yoshihiro Shimoda Reviewed-by: Geert Uytterhoeven Signed-off-by: Vinod Koul --- drivers/dma/sh/usb-dmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c index 72c649713ace..31a145154e9f 100644 --- a/drivers/dma/sh/usb-dmac.c +++ b/drivers/dma/sh/usb-dmac.c @@ -117,7 +117,7 @@ struct usb_dmac { #define USB_DMASWR 0x0008 #define USB_DMASWR_SWR (1 << 0) #define USB_DMAOR 0x0060 -#define USB_DMAOR_AE (1 << 2) +#define USB_DMAOR_AE (1 << 1) #define USB_DMAOR_DME (1 << 0) #define USB_DMASAR 0x0000 From 6ceec6953efe7f371218082ddee1fe022a13e8ab Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 16 May 2017 16:54:53 +0200 Subject: [PATCH 017/206] MAINTAINERS: update RTC mailing list The RTC subsystem mailing list is moving to vger. Signed-off-by: Alexandre Belloni --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index f7d568b8f133..b3863ec142d7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10447,7 +10447,7 @@ S: Orphan PXA RTC DRIVER M: Robert Jarzmik -L: rtc-linux@googlegroups.com +L: linux-rtc@vger.kernel.org S: Maintained QAT DRIVER @@ -10754,7 +10754,7 @@ X: kernel/torture.c REAL TIME CLOCK (RTC) SUBSYSTEM M: Alessandro Zummo M: Alexandre Belloni -L: rtc-linux@googlegroups.com +L: linux-rtc@vger.kernel.org Q: http://patchwork.ozlabs.org/project/rtc-linux/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux.git S: Maintained From 9512a16b0e1217bbef73d276a67c28b5fbb46512 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 16 May 2017 15:57:42 -0400 Subject: [PATCH 018/206] nfsd: Revert "nfsd: check for oversized NFSv2/v3 arguments" This reverts commit 51f567777799 "nfsd: check for oversized NFSv2/v3 arguments", which breaks support for NFSv3 ACLs. That patch was actually an earlier draft of a fix for the problem that was eventually fixed by e6838a29ecb "nfsd: check for oversized NFSv2/v3 arguments". But somehow I accidentally left this earlier draft in the branch that was part of my 2.12 pull request. Reported-by: Eryu Guan Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs3xdr.c | 23 ++++++----------------- fs/nfsd/nfsxdr.c | 13 +++---------- include/linux/sunrpc/svc.h | 3 ++- 3 files changed, 11 insertions(+), 28 deletions(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 12feac6ee2fd..452334694a5d 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -334,11 +334,8 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, if (!p) return 0; p = xdr_decode_hyper(p, &args->offset); + args->count = ntohl(*p++); - - if (!xdr_argsize_check(rqstp, p)) - return 0; - len = min(args->count, max_blocksize); /* set up the kvec */ @@ -352,7 +349,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, v++; } args->vlen = v; - return 1; + return xdr_argsize_check(rqstp, p); } int @@ -544,11 +541,9 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, p = decode_fh(p, &args->fh); if (!p) return 0; - if (!xdr_argsize_check(rqstp, p)) - return 0; args->buffer = page_address(*(rqstp->rq_next_page++)); - return 1; + return xdr_argsize_check(rqstp, p); } int @@ -574,14 +569,10 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, args->verf = p; p += 2; args->dircount = ~0; args->count = ntohl(*p++); - - if (!xdr_argsize_check(rqstp, p)) - return 0; - args->count = min_t(u32, args->count, PAGE_SIZE); args->buffer = page_address(*(rqstp->rq_next_page++)); - return 1; + return xdr_argsize_check(rqstp, p); } int @@ -599,9 +590,6 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p, args->dircount = ntohl(*p++); args->count = ntohl(*p++); - if (!xdr_argsize_check(rqstp, p)) - return 0; - len = args->count = min(args->count, max_blocksize); while (len > 0) { struct page *p = *(rqstp->rq_next_page++); @@ -609,7 +597,8 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p, args->buffer = page_address(p); len -= PAGE_SIZE; } - return 1; + + return xdr_argsize_check(rqstp, p); } int diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 6a4947a3f4fa..de07ff625777 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -257,9 +257,6 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, len = args->count = ntohl(*p++); p++; /* totalcount - unused */ - if (!xdr_argsize_check(rqstp, p)) - return 0; - len = min_t(unsigned int, len, NFSSVC_MAXBLKSIZE_V2); /* set up somewhere to store response. @@ -275,7 +272,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, v++; } args->vlen = v; - return 1; + return xdr_argsize_check(rqstp, p); } int @@ -365,11 +362,9 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readli p = decode_fh(p, &args->fh); if (!p) return 0; - if (!xdr_argsize_check(rqstp, p)) - return 0; args->buffer = page_address(*(rqstp->rq_next_page++)); - return 1; + return xdr_argsize_check(rqstp, p); } int @@ -407,11 +402,9 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, args->cookie = ntohl(*p++); args->count = ntohl(*p++); args->count = min_t(u32, args->count, PAGE_SIZE); - if (!xdr_argsize_check(rqstp, p)) - return 0; args->buffer = page_address(*(rqstp->rq_next_page++)); - return 1; + return xdr_argsize_check(rqstp, p); } /* diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 94631026f79c..11cef5a7bc87 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -336,7 +336,8 @@ xdr_argsize_check(struct svc_rqst *rqstp, __be32 *p) { char *cp = (char *)p; struct kvec *vec = &rqstp->rq_arg.head[0]; - return cp == (char *)vec->iov_base + vec->iov_len; + return cp >= (char*)vec->iov_base + && cp <= (char*)vec->iov_base + vec->iov_len; } static inline int From 4681ee21d62cfed4364e09ec50ee8e88185dd628 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Thu, 18 May 2017 11:49:39 +0300 Subject: [PATCH 019/206] drm/i915: Do not sync RCU during shrinking Due to the complex dependencies between workqueues and RCU, which are not easily detected by lockdep, do not synchronize RCU during shrinking. On low-on-memory systems (mem=1G for example), the RCU sync leads to all system workqueus freezing and unrelated lockdep splats are displayed according to reports. GIT bisecting done by J. R. Okajima points to the commit where RCU syncing was extended. RCU sync gains us very little benefit in real life scenarios where the amount of memory used by object backing storage is dominant over the metadata under RCU, so drop it altogether. " Yeeeaah, if core could just, go ahead and reclaim RCU queues, that'd be great. " - Chris Wilson, 2016 (0eafec6d3244) v2: More information to commit message. v3: Remove "grep _rcu_" escapee from i915_gem_shrink_all (Andrea) Fixes: c053b5a506d3 ("drm/i915: Don't call synchronize_rcu_expedited under struct_mutex") Suggested-by: Chris Wilson Reported-by: J. R. Okajima Signed-off-by: Joonas Lahtinen Reviewed-by: Chris Wilson Tested-by: Hugh Dickins Tested-by: Andrea Arcangeli Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: J. R. Okajima Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Jani Nikula Cc: # v4.11+ (cherry picked from commit 73cc0b9aa9afa5ba65d92e46ded61d29430d72a4) Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1495097379-573-1-git-send-email-joonas.lahtinen@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_shrinker.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 129ed303a6c4..57d9f7f4ef15 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -59,9 +59,6 @@ static void i915_gem_shrinker_unlock(struct drm_device *dev, bool unlock) return; mutex_unlock(&dev->struct_mutex); - - /* expedite the RCU grace period to free some request slabs */ - synchronize_rcu_expedited(); } static bool any_vma_pinned(struct drm_i915_gem_object *obj) @@ -274,8 +271,6 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv) I915_SHRINK_ACTIVE); intel_runtime_pm_put(dev_priv); - synchronize_rcu(); /* wait for our earlier RCU delayed slab frees */ - return freed; } From 171d8b9363725e122b164e6b9ef2acf2f751e387 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 16 May 2017 09:55:14 +0100 Subject: [PATCH 020/206] drm/i915: use vma->size for appgtt allocate_va_range For the aliasing ppgtt we clear the va range up to vma->size, but seem to allocate up to vma->node.size, which is a little inconsistent given that vma->node.size >= vma->size. Not that is really matters all that much since we preallocate anyway, but for consistency just use vma->size. Fixes: ff685975d97f ("drm/i915: Move allocate_va_range to GTT") Signed-off-by: Matthew Auld Cc: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170516085514.5853-1-matthew.auld@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson (cherry picked from commit d567232cbd9ec2a289ddffea4013b7265bbcc3d5) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a0563e18d753..50b8f1139ff9 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2313,7 +2313,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, appgtt->base.allocate_va_range) { ret = appgtt->base.allocate_va_range(&appgtt->base, vma->node.start, - vma->node.size); + vma->size); if (ret) goto err_pages; } From d8db7ae4eeebb278aa68a231a003e7102f635a4d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 18 May 2017 13:06:44 +0200 Subject: [PATCH 021/206] drm/i915: Fix new -Wint-in-bool-context gcc compiler warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit fixes the following compiler warning: drivers/gpu/drm/i915/intel_dsi.c: In function ‘intel_dsi_prepare’: drivers/gpu/drm/i915/intel_dsi.c:1487:23: warning: ?: using integer constants in boolean context [-Wint-in-bool-context] PORT_A ? PORT_C : PORT_A), Fixes: f4c3a88e5f04 ("drm/i915: Tighten mmio arrays for MIPI_PORT") Signed-off-by: Hans de Goede Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170518110644.9902-1-hdegoede@redhat.com (cherry picked from commit 0ad4dc887d4168448e8c801aa4edd8fe1e0bd534) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 5a7c63e64381..65b837e96fe6 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8280,7 +8280,7 @@ enum { /* MIPI DSI registers */ -#define _MIPI_PORT(port, a, c) ((port) ? c : a) /* ports A and C only */ +#define _MIPI_PORT(port, a, c) (((port) == PORT_A) ? a : c) /* ports A and C only */ #define _MMIO_MIPI(port, a, c) _MMIO(_MIPI_PORT(port, a, c)) #define MIPIO_TXESC_CLK_DIV1 _MMIO(0x160004) From 8137ae26d25303e7b5cfb418fd28b976461e5b6e Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 16 May 2017 08:45:46 +0300 Subject: [PATCH 022/206] ovl: fix creds leak in copy up error path Fixes: 42f269b92540 ("ovl: rearrange code in ovl_copy_up_locked()") Cc: # v4.11 Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 9008ab9fbd2e..061a8448e6c4 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -343,12 +343,13 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, temp = ovl_do_tmpfile(upperdir, stat->mode); else temp = ovl_lookup_temp(workdir, dentry); - err = PTR_ERR(temp); - if (IS_ERR(temp)) - goto out1; - err = 0; - if (!tmpfile) + if (IS_ERR(temp)) { + err = PTR_ERR(temp); + temp = NULL; + } + + if (!err && !tmpfile) err = ovl_create_real(wdir, temp, &cattr, NULL, true); if (new_creds) { From 82b749b2c65e9d108c1c5598dc0a5f436b525f42 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 17 May 2017 00:12:40 +0300 Subject: [PATCH 023/206] ovl: check on mount time if upper fs supports setting xattr xattr are needed by overlayfs for setting opaque dir, redirect dir and copy up origin. Check at mount time by trying to set the overlay.opaque xattr on the workdir and if that fails issue a warning message. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/overlayfs.h | 3 +++ fs/overlayfs/ovl_entry.h | 1 + fs/overlayfs/super.c | 13 +++++++++++++ fs/overlayfs/util.c | 21 +++++++++++++++++++++ 4 files changed, 38 insertions(+) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index caa36cb9c46d..ce7c3aba61e4 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -279,3 +279,6 @@ int ovl_copy_up(struct dentry *dentry); int ovl_copy_up_flags(struct dentry *dentry, int flags); int ovl_copy_xattr(struct dentry *old, struct dentry *new); int ovl_set_attr(struct dentry *upper, struct kstat *stat); +int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, + const char *name, const void *value, size_t size, + int xerr); diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index b2023ddb8532..ad86c0a302eb 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -28,6 +28,7 @@ struct ovl_fs { /* creds of process who forced instantiation of super block */ const struct cred *creator_cred; bool tmpfile; + bool noxattr; wait_queue_head_t copyup_wq; /* sb common to all layers */ struct super_block *same_sb; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 9828b7de8999..f1647626a882 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -891,6 +891,19 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) dput(temp); else pr_warn("overlayfs: upper fs does not support tmpfile.\n"); + + /* + * Check if upper/work fs supports trusted.overlay.* + * xattr + */ + err = ovl_do_setxattr(ufs->workdir, OVL_XATTR_OPAQUE, + "0", 1, 0); + if (err) { + ufs->noxattr = true; + pr_warn("overlayfs: upper fs does not support xattr.\n"); + } else { + vfs_removexattr(ufs->workdir, OVL_XATTR_OPAQUE); + } } } diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index cfdea47313a1..b5a0dc36ee96 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -303,3 +303,24 @@ void ovl_copy_up_end(struct dentry *dentry) wake_up_locked(&ofs->copyup_wq); spin_unlock(&ofs->copyup_wq.lock); } + +int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, + const char *name, const void *value, size_t size, + int xerr) +{ + int err; + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + + if (ofs->noxattr) + return xerr; + + err = ovl_do_setxattr(upperdentry, name, value, size, 0); + + if (err == -EOPNOTSUPP) { + pr_warn("overlayfs: cannot set %s xattr on upper\n", name); + ofs->noxattr = true; + return xerr; + } + + return err; +} From 6266d465bde044a105f6c2d4e244680f951a2d70 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 18 May 2017 16:11:24 +0200 Subject: [PATCH 024/206] ovl: don't fail copy-up if upper doesn't support xattr Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 061a8448e6c4..f92ab35d43a6 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -300,7 +300,11 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower, return PTR_ERR(fh); } - err = ovl_do_setxattr(upper, OVL_XATTR_ORIGIN, fh, fh ? fh->len : 0, 0); + /* + * Do not fail when upper doesn't support xattrs. + */ + err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh, + fh ? fh->len : 0, 0); kfree(fh); return err; From 21a228781104ae6fed7e720137ab024575071feb Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 17 May 2017 00:12:41 +0300 Subject: [PATCH 025/206] ovl: handle rename when upper doesn't support xattr On failure to set opaque/redirect xattr on rename, skip setting xattr and return -EXDEV. On failure to set opaque xattr when creating a new directory, -EIO is returned instead of -EOPNOTSUPP. Any failure to set those xattr will be recorded in super block and then setting any xattr on upper won't be attempted again. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/dir.c | 29 +++++++++++++++++++---------- fs/overlayfs/overlayfs.h | 1 - fs/overlayfs/util.c | 9 +-------- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 723b98b90698..80e0e202a346 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -127,17 +127,28 @@ int ovl_create_real(struct inode *dir, struct dentry *newdentry, return err; } -static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry) +static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper, + int xerr) { int err; - err = ovl_do_setxattr(upperdentry, OVL_XATTR_OPAQUE, "y", 1, 0); + err = ovl_check_setxattr(dentry, upper, OVL_XATTR_OPAQUE, "y", 1, xerr); if (!err) ovl_dentry_set_opaque(dentry); return err; } +static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry) +{ + /* + * Fail with -EIO when trying to create opaque dir and upper doesn't + * support xattrs. ovl_rename() calls ovl_set_opaque_xerr(-EXDEV) to + * return a specific error for noxattr case. + */ + return ovl_set_opaque_xerr(dentry, upperdentry, -EIO); +} + /* Common operations required to be done after creation of file on upper */ static void ovl_instantiate(struct dentry *dentry, struct inode *inode, struct dentry *newdentry, bool hardlink) @@ -846,18 +857,16 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir) if (IS_ERR(redirect)) return PTR_ERR(redirect); - err = ovl_do_setxattr(ovl_dentry_upper(dentry), OVL_XATTR_REDIRECT, - redirect, strlen(redirect), 0); + err = ovl_check_setxattr(dentry, ovl_dentry_upper(dentry), + OVL_XATTR_REDIRECT, + redirect, strlen(redirect), -EXDEV); if (!err) { spin_lock(&dentry->d_lock); ovl_dentry_set_redirect(dentry, redirect); spin_unlock(&dentry->d_lock); } else { kfree(redirect); - if (err == -EOPNOTSUPP) - ovl_clear_redirect_dir(dentry->d_sb); - else - pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err); + pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err); /* Fall back to userspace copy-up */ err = -EXDEV; } @@ -992,7 +1001,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, if (ovl_type_merge_or_lower(old)) err = ovl_set_redirect(old, samedir); else if (!old_opaque && ovl_type_merge(new->d_parent)) - err = ovl_set_opaque(old, olddentry); + err = ovl_set_opaque_xerr(old, olddentry, -EXDEV); if (err) goto out_dput; } @@ -1000,7 +1009,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, if (ovl_type_merge_or_lower(new)) err = ovl_set_redirect(new, samedir); else if (!new_opaque && ovl_type_merge(old->d_parent)) - err = ovl_set_opaque(new, newdentry); + err = ovl_set_opaque_xerr(new, newdentry, -EXDEV); if (err) goto out_dput; } diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index ce7c3aba61e4..505b18b56330 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -206,7 +206,6 @@ bool ovl_dentry_is_opaque(struct dentry *dentry); bool ovl_dentry_is_whiteout(struct dentry *dentry); void ovl_dentry_set_opaque(struct dentry *dentry); bool ovl_redirect_dir(struct super_block *sb); -void ovl_clear_redirect_dir(struct super_block *sb); const char *ovl_dentry_get_redirect(struct dentry *dentry); void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect); void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry); diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index b5a0dc36ee96..4d541a8d0834 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -191,14 +191,7 @@ bool ovl_redirect_dir(struct super_block *sb) { struct ovl_fs *ofs = sb->s_fs_info; - return ofs->config.redirect_dir; -} - -void ovl_clear_redirect_dir(struct super_block *sb) -{ - struct ovl_fs *ofs = sb->s_fs_info; - - ofs->config.redirect_dir = false; + return ofs->config.redirect_dir && !ofs->noxattr; } const char *ovl_dentry_get_redirect(struct dentry *dentry) From 3d27573ce32b47ba54e6680c77c26a700d67cc16 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 19 May 2017 09:33:49 +0200 Subject: [PATCH 026/206] ovl: remove unused arg from ovl_lookup_temp() Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 2 +- fs/overlayfs/dir.c | 8 ++++---- fs/overlayfs/overlayfs.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index f92ab35d43a6..843ed2a2d7db 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -346,7 +346,7 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, if (tmpfile) temp = ovl_do_tmpfile(upperdir, stat->mode); else - temp = ovl_lookup_temp(workdir, dentry); + temp = ovl_lookup_temp(workdir); err = 0; if (IS_ERR(temp)) { err = PTR_ERR(temp); diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 80e0e202a346..369ee7c4cdc0 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -41,7 +41,7 @@ void ovl_cleanup(struct inode *wdir, struct dentry *wdentry) } } -struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry) +struct dentry *ovl_lookup_temp(struct dentry *workdir) { struct dentry *temp; char name[20]; @@ -68,7 +68,7 @@ static struct dentry *ovl_whiteout(struct dentry *workdir, struct dentry *whiteout; struct inode *wdir = workdir->d_inode; - whiteout = ovl_lookup_temp(workdir, dentry); + whiteout = ovl_lookup_temp(workdir); if (IS_ERR(whiteout)) return whiteout; @@ -261,7 +261,7 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry, if (upper->d_parent->d_inode != udir) goto out_unlock; - opaquedir = ovl_lookup_temp(workdir, dentry); + opaquedir = ovl_lookup_temp(workdir); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) goto out_unlock; @@ -393,7 +393,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, if (err) goto out; - newdentry = ovl_lookup_temp(workdir, dentry); + newdentry = ovl_lookup_temp(workdir); err = PTR_ERR(newdentry); if (IS_ERR(newdentry)) goto out_unlock; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 505b18b56330..7c56932bfc2f 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -262,7 +262,7 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to) /* dir.c */ extern const struct inode_operations ovl_dir_inode_operations; -struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry); +struct dentry *ovl_lookup_temp(struct dentry *workdir); struct cattr { dev_t rdev; umode_t mode; From ee1d6d37b6b884383b501089be93ce94f2153028 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 11 May 2017 16:42:26 +0300 Subject: [PATCH 027/206] ovl: mark upper dir with type origin entries "impure" When moving a merge dir or non-dir with copy up origin into a non-merge upper dir (a.k.a pure upper dir), we are marking the target parent dir "impure". ovl_iterate() iterates pure upper dirs directly, because there is no need to filter out whiteouts and merge dir content with lower dir. But for the case of an "impure" upper dir, ovl_iterate() will not be able to iterate the real upper dir directly, because it will need to lookup the origin inode and use it to fill d_ino. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/dir.c | 45 ++++++++++++++++++++++++++++++++++++++++ fs/overlayfs/namei.c | 18 ++++++++++++++-- fs/overlayfs/overlayfs.h | 3 +++ fs/overlayfs/ovl_entry.h | 1 + fs/overlayfs/util.c | 14 +++++++++++++ 5 files changed, 79 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 369ee7c4cdc0..f2a118ba00e4 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -149,6 +149,22 @@ static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry) return ovl_set_opaque_xerr(dentry, upperdentry, -EIO); } +static int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry) +{ + int err; + + /* + * Do not fail when upper doesn't support xattrs. + * Upper inodes won't have origin nor redirect xattr anyway. + */ + err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE, + "y", 1, 0); + if (!err) + ovl_dentry_set_impure(dentry); + + return err; +} + /* Common operations required to be done after creation of file on upper */ static void ovl_instantiate(struct dentry *dentry, struct inode *inode, struct dentry *newdentry, bool hardlink) @@ -173,6 +189,11 @@ static bool ovl_type_merge(struct dentry *dentry) return OVL_TYPE_MERGE(ovl_path_type(dentry)); } +static bool ovl_type_origin(struct dentry *dentry) +{ + return OVL_TYPE_ORIGIN(ovl_path_type(dentry)); +} + static int ovl_create_upper(struct dentry *dentry, struct inode *inode, struct cattr *attr, struct dentry *hardlink) { @@ -952,6 +973,30 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, old_upperdir = ovl_dentry_upper(old->d_parent); new_upperdir = ovl_dentry_upper(new->d_parent); + if (!samedir) { + /* + * When moving a merge dir or non-dir with copy up origin into + * a non-merge upper dir (a.k.a pure upper dir), we are making + * the target parent dir "impure". ovl_iterate() iterates pure + * upper dirs directly, because there is no need to filter out + * whiteouts and merge dir content with lower dir. But for the + * case of an "impure" upper dir, ovl_iterate() cannot iterate + * the real directory directly, because it looks for the inode + * numbers to fill d_ino in the entries origin inode. + */ + if (ovl_type_origin(old) && !ovl_type_merge(new->d_parent)) { + err = ovl_set_impure(new->d_parent, new_upperdir); + if (err) + goto out_revert_creds; + } + if (!overwrite && ovl_type_origin(new) && + !ovl_type_merge(old->d_parent)) { + err = ovl_set_impure(old->d_parent, old_upperdir); + if (err) + goto out_revert_creds; + } + } + trap = lock_rename(new_upperdir, old_upperdir); olddentry = lookup_one_len(old->d_name.name, old_upperdir, diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index bad0f665a635..0c72a5909db2 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -167,7 +167,7 @@ static struct dentry *ovl_get_origin(struct dentry *dentry, goto out; } -static bool ovl_is_opaquedir(struct dentry *dentry) +static bool ovl_check_dir_xattr(struct dentry *dentry, const char *name) { int res; char val; @@ -175,13 +175,23 @@ static bool ovl_is_opaquedir(struct dentry *dentry) if (!d_is_dir(dentry)) return false; - res = vfs_getxattr(dentry, OVL_XATTR_OPAQUE, &val, 1); + res = vfs_getxattr(dentry, name, &val, 1); if (res == 1 && val == 'y') return true; return false; } +static bool ovl_is_opaquedir(struct dentry *dentry) +{ + return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE); +} + +static bool ovl_is_impuredir(struct dentry *dentry) +{ + return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE); +} + static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, const char *name, unsigned int namelen, size_t prelen, const char *post, @@ -351,6 +361,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int ctr = 0; struct inode *inode = NULL; bool upperopaque = false; + bool upperimpure = false; char *upperredirect = NULL; struct dentry *this; unsigned int i; @@ -395,6 +406,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, poe = roe; } upperopaque = d.opaque; + if (upperdentry && d.is_dir) + upperimpure = ovl_is_impuredir(upperdentry); } if (!d.stop && poe->numlower) { @@ -463,6 +476,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, revert_creds(old_cred); oe->opaque = upperopaque; + oe->impure = upperimpure; oe->redirect = upperredirect; oe->__upperdentry = upperdentry; memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 7c56932bfc2f..a9fb958fd5d4 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -24,6 +24,7 @@ enum ovl_path_type { #define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque" #define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect" #define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin" +#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure" /* * The tuple (fh,uuid) is a universal unique identifier for a copy up origin, @@ -203,8 +204,10 @@ struct dentry *ovl_dentry_real(struct dentry *dentry); struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry); void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache); bool ovl_dentry_is_opaque(struct dentry *dentry); +bool ovl_dentry_is_impure(struct dentry *dentry); bool ovl_dentry_is_whiteout(struct dentry *dentry); void ovl_dentry_set_opaque(struct dentry *dentry); +void ovl_dentry_set_impure(struct dentry *dentry); bool ovl_redirect_dir(struct super_block *sb); const char *ovl_dentry_get_redirect(struct dentry *dentry); void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect); diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index ad86c0a302eb..34bc4a9f5c61 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -43,6 +43,7 @@ struct ovl_entry { u64 version; const char *redirect; bool opaque; + bool impure; bool copying; }; struct rcu_head rcu; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 4d541a8d0834..e0dfb07d5457 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -175,6 +175,13 @@ bool ovl_dentry_is_opaque(struct dentry *dentry) return oe->opaque; } +bool ovl_dentry_is_impure(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + return oe->impure; +} + bool ovl_dentry_is_whiteout(struct dentry *dentry) { return !dentry->d_inode && ovl_dentry_is_opaque(dentry); @@ -187,6 +194,13 @@ void ovl_dentry_set_opaque(struct dentry *dentry) oe->opaque = true; } +void ovl_dentry_set_impure(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + oe->impure = true; +} + bool ovl_redirect_dir(struct super_block *sb) { struct ovl_fs *ofs = sb->s_fs_info; From 46cd902f6d9a746f0da816cbdfb04b9baf6d7967 Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Thu, 11 May 2017 18:07:42 +0800 Subject: [PATCH 028/206] drm/i915: set initialised only when init_context callback is NULL During execlist_context_deferred_alloc() we presumed that the context is uninitialised (we only just allocated the state object for it!) and chose to optimise away the later call to engine->init_context() if engine->init_context were NULL. This breaks with GVT's contexts that are marked as pre-initialised to avoid us annoyingly calling engine->init_context(). The fix is to not override ce->initialised if it is already true. Cc: Chris Wilson Signed-off-by: Chuanxiao Dong Link: http://patchwork.freedesktop.org/patch/msgid/1494497262-24855-1-git-send-email-chuanxiao.dong@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson (cherry picked from commit 0d402a24df8c8160727af934d83293f3d44d31a3) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_lrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c8f7c631fc1f..dac4e003c1f3 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1989,7 +1989,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, ce->ring = ring; ce->state = vma; - ce->initialised = engine->init_context == NULL; + ce->initialised |= engine->init_context == NULL; return 0; From 2dffdc0724004f38f5e39907747b53e4b0d80e59 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Tue, 16 May 2017 14:01:25 +0800 Subject: [PATCH 029/206] md-cluster: fix potential lock issue in add_new_disk The add_new_disk returns with communication locked if __sendmsg returns failure, fix it with call unlock_comm before return. Reported-by: Dan Carpenter CC: Goldwyn Rodrigues Signed-off-by: Guoqing Jiang Signed-off-by: Shaohua Li --- drivers/md/md-cluster.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 7299ce2f08a8..03082e17c65c 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -1311,8 +1311,10 @@ static int add_new_disk(struct mddev *mddev, struct md_rdev *rdev) cmsg.raid_slot = cpu_to_le32(rdev->desc_nr); lock_comm(cinfo, 1); ret = __sendmsg(cinfo, &cmsg); - if (ret) + if (ret) { + unlock_comm(cinfo); return ret; + } cinfo->no_new_dev_lockres->flags |= DLM_LKF_NOQUEUE; ret = dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_EX); cinfo->no_new_dev_lockres->flags &= ~DLM_LKF_NOQUEUE; From 44d4182e23c555cbfa8b8a0ad2d94664d23850d3 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 1 May 2017 17:04:50 -0700 Subject: [PATCH 030/206] mtd: nand: don't leak buffers when ->scan_bbt() fails This bug seems to have been here forever, although we came close to fixing all of them in [1]! [1] 11eaf6df1cce ("mtd: nand: Remove BUG() abuse in nand_scan_tail") Signed-off-by: Brian Norris Acked-by: Ezequiel Garcia Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_base.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index d474378ed810..66782291a762 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -4842,7 +4842,11 @@ int nand_scan_tail(struct mtd_info *mtd) return 0; /* Build bad block table */ - return chip->scan_bbt(mtd); + ret = chip->scan_bbt(mtd); + if (ret) + goto err_free; + return 0; + err_free: if (nbuf) { kfree(nbuf->databuf); From 0545c1720277dd246bd682b23aee425f3830a14f Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 1 May 2017 17:04:52 -0700 Subject: [PATCH 031/206] mtd: nand: drop unneeded module.h include nand_ids isn't a separate module anymore and doesn't need this header. Signed-off-by: Brian Norris Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_ids.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c index 9d5ca0e540b5..92e2cf8e9ff9 100644 --- a/drivers/mtd/nand/nand_ids.c +++ b/drivers/mtd/nand/nand_ids.c @@ -6,7 +6,6 @@ * published by the Free Software Foundation. * */ -#include #include #include From 787710492911e21148975e1d1914c7409fb32c7e Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 1 May 2017 17:04:53 -0700 Subject: [PATCH 032/206] mtd: nand: free vendor-specific resources in init failure paths If we fail any time after calling nand_detect(), then we don't call the vendor-specific ->cleanup() callback, and we'll leak any resources the vendor-specific code might have allocated. Mark the "fix" against the first commit that started allocating anything in ->init(). Fixes: 626994e07480 ("mtd: nand: hynix: Add read-retry support for 1x nm MLC NANDs") Signed-off-by: Brian Norris Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_base.c | 38 +++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 66782291a762..81f77f9cd784 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -4361,7 +4361,7 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips, /* Initialize the ->data_interface field. */ ret = nand_init_data_interface(chip); if (ret) - return ret; + goto err_nand_init; /* * Setup the data interface correctly on the chip and controller side. @@ -4373,7 +4373,7 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips, */ ret = nand_setup_data_interface(chip); if (ret) - return ret; + goto err_nand_init; nand_maf_id = chip->id.data[0]; nand_dev_id = chip->id.data[1]; @@ -4404,6 +4404,12 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips, mtd->size = i * chip->chipsize; return 0; + +err_nand_init: + /* Free manufacturer priv data. */ + nand_manufacturer_cleanup(chip); + + return ret; } EXPORT_SYMBOL(nand_scan_ident); @@ -4574,18 +4580,23 @@ int nand_scan_tail(struct mtd_info *mtd) /* New bad blocks should be marked in OOB, flash-based BBT, or both */ if (WARN_ON((chip->bbt_options & NAND_BBT_NO_OOB_BBM) && - !(chip->bbt_options & NAND_BBT_USE_FLASH))) - return -EINVAL; + !(chip->bbt_options & NAND_BBT_USE_FLASH))) { + ret = -EINVAL; + goto err_ident; + } if (invalid_ecc_page_accessors(chip)) { pr_err("Invalid ECC page accessors setup\n"); - return -EINVAL; + ret = -EINVAL; + goto err_ident; } if (!(chip->options & NAND_OWN_BUFFERS)) { nbuf = kzalloc(sizeof(*nbuf), GFP_KERNEL); - if (!nbuf) - return -ENOMEM; + if (!nbuf) { + ret = -ENOMEM; + goto err_ident; + } nbuf->ecccalc = kmalloc(mtd->oobsize, GFP_KERNEL); if (!nbuf->ecccalc) { @@ -4608,8 +4619,10 @@ int nand_scan_tail(struct mtd_info *mtd) chip->buffers = nbuf; } else { - if (!chip->buffers) - return -ENOMEM; + if (!chip->buffers) { + ret = -ENOMEM; + goto err_ident; + } } /* Set the internal oob buffer location, just after the page data */ @@ -4854,6 +4867,13 @@ int nand_scan_tail(struct mtd_info *mtd) kfree(nbuf->ecccalc); kfree(nbuf); } + +err_ident: + /* Clean up nand_scan_ident(). */ + + /* Free manufacturer priv data. */ + nand_manufacturer_cleanup(chip); + return ret; } EXPORT_SYMBOL(nand_scan_tail); From d24197907454b902908e025bce4b8bc677d3ba6b Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 1 May 2017 17:04:55 -0700 Subject: [PATCH 033/206] mtd: nand: samsung: warn about un-parseable ECC info We don't handle cases larger than 7. We probably shouldn't pretend we know the ECC step size in this case, and it's probably also good to WARN() like we do in many other similar cases. Fixes: 8fc82d456e40 ("mtd: nand: samsung: Retrieve ECC requirements from extended ID") Signed-off-by: Brian Norris Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_samsung.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mtd/nand/nand_samsung.c b/drivers/mtd/nand/nand_samsung.c index 9cfc4035a420..1e0755997762 100644 --- a/drivers/mtd/nand/nand_samsung.c +++ b/drivers/mtd/nand/nand_samsung.c @@ -84,6 +84,9 @@ static void samsung_nand_decode_id(struct nand_chip *chip) case 7: chip->ecc_strength_ds = 60; break; + default: + WARN(1, "Could not decode ECC info"); + chip->ecc_step_ds = 0; } } } else { From 2761b4f12b017f6d3e5add386733a700a490df47 Mon Sep 17 00:00:00 2001 From: Andres Galacho Date: Mon, 1 May 2017 16:30:15 -0400 Subject: [PATCH 034/206] mtd: nand: tango: Export OF device ID table as module aliases The device table is required to load modules based on modaliases. After adding MODULE_DEVICE_TABLE, below entries for example will be added to module.alias: alias: of:N*T*Csigma,smp8758-nandC* alias: of:N*T*Csigma,smp8758-nand Fixes: 6956e2385a16 ("mtd: nand: add tango NAND flash controller support") Cc: stable@vger.kernel.org Signed-off-by: Andres Galacho Acked-by: Brian Norris Signed-off-by: Boris Brezillon --- drivers/mtd/nand/tango_nand.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/nand/tango_nand.c b/drivers/mtd/nand/tango_nand.c index 05b6e1065203..82fea9b4c358 100644 --- a/drivers/mtd/nand/tango_nand.c +++ b/drivers/mtd/nand/tango_nand.c @@ -663,6 +663,7 @@ static const struct of_device_id tango_nand_ids[] = { { .compatible = "sigma,smp8758-nand" }, { /* sentinel */ } }; +MODULE_DEVICE_TABLE(of, tango_nand_ids); static struct platform_driver tango_nand_driver = { .probe = tango_nand_probe, From 60cf0ce14b09b54e7ee79dc3ef498de6ef0e41e9 Mon Sep 17 00:00:00 2001 From: Marc Gonzalez Date: Fri, 12 May 2017 17:34:01 +0200 Subject: [PATCH 035/206] mtd: nand: tango: Update ecc_stats.corrected According to Boris, some user-space tools expect MTD drivers to update ecc_stats.corrected, and it's better to provide a lower bound than to provide no information at all. Fixes: 6956e2385a16 ("mtd: nand: add tango NAND flash controller support") Cc: stable@vger.kernel.org Reported-by: Pavel Machek Signed-off-by: Marc Gonzalez Signed-off-by: Boris Brezillon --- drivers/mtd/nand/tango_nand.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/mtd/nand/tango_nand.c b/drivers/mtd/nand/tango_nand.c index 82fea9b4c358..49b286c6c10f 100644 --- a/drivers/mtd/nand/tango_nand.c +++ b/drivers/mtd/nand/tango_nand.c @@ -55,10 +55,10 @@ * byte 1 for other packets in the page (PKT_N, for N > 0) * ERR_COUNT_PKT_N is the max error count over all but the first packet. */ -#define DECODE_OK_PKT_0(v) ((v) & BIT(7)) -#define DECODE_OK_PKT_N(v) ((v) & BIT(15)) #define ERR_COUNT_PKT_0(v) (((v) >> 0) & 0x3f) #define ERR_COUNT_PKT_N(v) (((v) >> 8) & 0x3f) +#define DECODE_FAIL_PKT_0(v) (((v) & BIT(7)) == 0) +#define DECODE_FAIL_PKT_N(v) (((v) & BIT(15)) == 0) /* Offsets relative to pbus_base */ #define PBUS_CS_CTRL 0x83c @@ -193,6 +193,8 @@ static int check_erased_page(struct nand_chip *chip, u8 *buf) chip->ecc.strength); if (res < 0) mtd->ecc_stats.failed++; + else + mtd->ecc_stats.corrected += res; bitflips = max(res, bitflips); buf += pkt_size; @@ -202,9 +204,11 @@ static int check_erased_page(struct nand_chip *chip, u8 *buf) return bitflips; } -static int decode_error_report(struct tango_nfc *nfc) +static int decode_error_report(struct nand_chip *chip) { u32 status, res; + struct mtd_info *mtd = nand_to_mtd(chip); + struct tango_nfc *nfc = to_tango_nfc(chip->controller); status = readl_relaxed(nfc->reg_base + NFC_XFER_STATUS); if (status & PAGE_IS_EMPTY) @@ -212,10 +216,14 @@ static int decode_error_report(struct tango_nfc *nfc) res = readl_relaxed(nfc->mem_base + ERROR_REPORT); - if (DECODE_OK_PKT_0(res) && DECODE_OK_PKT_N(res)) - return max(ERR_COUNT_PKT_0(res), ERR_COUNT_PKT_N(res)); + if (DECODE_FAIL_PKT_0(res) || DECODE_FAIL_PKT_N(res)) + return -EBADMSG; - return -EBADMSG; + /* ERR_COUNT_PKT_N is max, not sum, but that's all we have */ + mtd->ecc_stats.corrected += + ERR_COUNT_PKT_0(res) + ERR_COUNT_PKT_N(res); + + return max(ERR_COUNT_PKT_0(res), ERR_COUNT_PKT_N(res)); } static void tango_dma_callback(void *arg) @@ -282,7 +290,7 @@ static int tango_read_page(struct mtd_info *mtd, struct nand_chip *chip, if (err) return err; - res = decode_error_report(nfc); + res = decode_error_report(chip); if (res < 0) { chip->ecc.read_oob_raw(mtd, chip, page); res = check_erased_page(chip, buf); From d4ed3b9015b5eebc90d629579d9e7944607cbae5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 4 May 2017 13:11:00 +0100 Subject: [PATCH 036/206] mtd: nand: make nand_ooblayout_lp_hamming_ops static nand_ooblayout_lp_hamming_ops can be made static as it does not need to be in global scope. Signed-off-by: Colin Ian King Acked-by: Boris Brezillon Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 81f77f9cd784..b1dd12729f19 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -202,7 +202,7 @@ static int nand_ooblayout_free_lp_hamming(struct mtd_info *mtd, int section, return 0; } -const struct mtd_ooblayout_ops nand_ooblayout_lp_hamming_ops = { +static const struct mtd_ooblayout_ops nand_ooblayout_lp_hamming_ops = { .ecc = nand_ooblayout_ecc_lp_hamming, .free = nand_ooblayout_free_lp_hamming, }; From a9de080bbcd5c4e213a3d7bbb1e314d60980e943 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 25 Apr 2017 06:22:05 +0000 Subject: [PATCH 037/206] pinctrl: cherryview: Add terminate entry for dmi_system_id tables Make sure dmi_system_id tables are NULL terminated. Fixes: 703650278372 ("pinctrl: cherryview: Add a quirk to make Acer Chromebook keyboard work again") Signed-off-by: Wei Yongjun Acked-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-cherryview.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index 2debba62fac9..e35d0fe4c737 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1547,7 +1547,8 @@ static const struct dmi_system_id chv_no_valid_mask[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Edgar"), DMI_MATCH(DMI_BIOS_DATE, "05/21/2016"), }, - } + }, + {} }; static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq) From b4d2ea2af95cb77e2f320e24da526280d4aa2f6b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 8 May 2017 10:48:21 +0200 Subject: [PATCH 038/206] Revert "pinctrl: generic: Add bi-directional and output-enable" This reverts commit 8c58f1a7a4b6d1d723bf25fef9d842d5a11200d0. It turns out that applying these generic properties was premature: the properties used in the driver using this are of unclear electrical nature and the subject need to be discussed. Signed-off-by: Linus Walleij --- Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt | 2 -- drivers/pinctrl/pinconf-generic.c | 3 --- include/linux/pinctrl/pinconf-generic.h | 3 --- 3 files changed, 8 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt index 71a3c134af1b..f01d154090da 100644 --- a/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt +++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt @@ -247,7 +247,6 @@ bias-bus-hold - latch weakly bias-pull-up - pull up the pin bias-pull-down - pull down the pin bias-pull-pin-default - use pin-default pull state -bi-directional - pin supports simultaneous input/output operations drive-push-pull - drive actively high and low drive-open-drain - drive with open drain drive-open-source - drive with open source @@ -260,7 +259,6 @@ input-debounce - debounce mode with debound time X power-source - select between different power supplies low-power-enable - enable low power mode low-power-disable - disable low power mode -output-enable - enable output on pin regardless of output value output-low - set the pin to output mode with low level output-high - set the pin to output mode with high level slew-rate - set the slew rate diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c index 0d6b7f4b82af..720a19fd38d2 100644 --- a/drivers/pinctrl/pinconf-generic.c +++ b/drivers/pinctrl/pinconf-generic.c @@ -35,7 +35,6 @@ static const struct pin_config_item conf_items[] = { PCONFDUMP(PIN_CONFIG_BIAS_PULL_PIN_DEFAULT, "input bias pull to pin specific state", NULL, false), PCONFDUMP(PIN_CONFIG_BIAS_PULL_UP, "input bias pull up", NULL, false), - PCONFDUMP(PIN_CONFIG_BIDIRECTIONAL, "bi-directional pin operations", NULL, false), PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_DRAIN, "output drive open drain", NULL, false), PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_SOURCE, "output drive open source", NULL, false), PCONFDUMP(PIN_CONFIG_DRIVE_PUSH_PULL, "output drive push pull", NULL, false), @@ -161,7 +160,6 @@ static const struct pinconf_generic_params dt_params[] = { { "bias-pull-up", PIN_CONFIG_BIAS_PULL_UP, 1 }, { "bias-pull-pin-default", PIN_CONFIG_BIAS_PULL_PIN_DEFAULT, 1 }, { "bias-pull-down", PIN_CONFIG_BIAS_PULL_DOWN, 1 }, - { "bi-directional", PIN_CONFIG_BIDIRECTIONAL, 1 }, { "drive-open-drain", PIN_CONFIG_DRIVE_OPEN_DRAIN, 0 }, { "drive-open-source", PIN_CONFIG_DRIVE_OPEN_SOURCE, 0 }, { "drive-push-pull", PIN_CONFIG_DRIVE_PUSH_PULL, 0 }, @@ -174,7 +172,6 @@ static const struct pinconf_generic_params dt_params[] = { { "input-schmitt-enable", PIN_CONFIG_INPUT_SCHMITT_ENABLE, 1 }, { "low-power-disable", PIN_CONFIG_LOW_POWER_MODE, 0 }, { "low-power-enable", PIN_CONFIG_LOW_POWER_MODE, 1 }, - { "output-enable", PIN_CONFIG_OUTPUT, 1, }, { "output-high", PIN_CONFIG_OUTPUT, 1, }, { "output-low", PIN_CONFIG_OUTPUT, 0, }, { "power-source", PIN_CONFIG_POWER_SOURCE, 0 }, diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index 279e3c5326e3..7620eb127cff 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -42,8 +42,6 @@ * @PIN_CONFIG_BIAS_PULL_UP: the pin will be pulled up (usually with high * impedance to VDD). If the argument is != 0 pull-up is enabled, * if it is 0, pull-up is total, i.e. the pin is connected to VDD. - * @PIN_CONFIG_BIDIRECTIONAL: the pin will be configured to allow simultaneous - * input and output operations. * @PIN_CONFIG_DRIVE_OPEN_DRAIN: the pin will be driven with open drain (open * collector) which means it is usually wired with other output ports * which are then pulled up with an external resistor. Setting this @@ -98,7 +96,6 @@ enum pin_config_param { PIN_CONFIG_BIAS_PULL_DOWN, PIN_CONFIG_BIAS_PULL_PIN_DEFAULT, PIN_CONFIG_BIAS_PULL_UP, - PIN_CONFIG_BIDIRECTIONAL, PIN_CONFIG_DRIVE_OPEN_DRAIN, PIN_CONFIG_DRIVE_OPEN_SOURCE, PIN_CONFIG_DRIVE_PUSH_PULL, From 020e0b1c8f19f1fc3bce23beeccd80c574ca0e49 Mon Sep 17 00:00:00 2001 From: Anatolij Gustschin Date: Thu, 11 May 2017 20:24:31 +0200 Subject: [PATCH 039/206] gpiolib: Add stubs for gpiod lookup table interface Add stubs for gpiod_add_lookup_table() and gpiod_remove_lookup_table() for the !GPIOLIB case to prevent build errors. Signed-off-by: Anatolij Gustschin Reviewed-by: Andy Shevchenko Signed-off-by: Linus Walleij --- include/linux/gpio/machine.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h index c0d712d22b07..f738d50cc17d 100644 --- a/include/linux/gpio/machine.h +++ b/include/linux/gpio/machine.h @@ -56,7 +56,14 @@ struct gpiod_lookup_table { .flags = _flags, \ } +#ifdef CONFIG_GPIOLIB void gpiod_add_lookup_table(struct gpiod_lookup_table *table); void gpiod_remove_lookup_table(struct gpiod_lookup_table *table); +#else +static inline +void gpiod_add_lookup_table(struct gpiod_lookup_table *table) {} +static inline +void gpiod_remove_lookup_table(struct gpiod_lookup_table *table) {} +#endif #endif /* __LINUX_GPIO_MACHINE_H */ From 76dd1fbebbaebab294dc09230960238746b883b1 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 15 May 2017 09:31:41 +0200 Subject: [PATCH 040/206] HID: asus: Add support for T100 keyboard The keyboard dock used with the Asus Transformer T100 series, uses the same vendor-defined 0xff31 usage-page as some other Asus keyboards. But with a small twist, it has a small descriptor bug which needs to be fixed up for things to work. This commit adds the USB-ID for this keyboard to the hid-asus driver and makes asus_report_fixup fix the descriptor issue, fixing various special function keys on this keyboard not working. Signed-off-by: Hans de Goede Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-asus.c | 10 ++++++++++ drivers/hid/hid-core.c | 1 + drivers/hid/hid-ids.h | 1 + 3 files changed, 12 insertions(+) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index 16df6cc90235..101ab2e63d18 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -69,6 +69,7 @@ MODULE_DESCRIPTION("Asus HID Keyboard and TouchPad"); #define QUIRK_IS_MULTITOUCH BIT(3) #define QUIRK_NO_CONSUMER_USAGES BIT(4) #define QUIRK_USE_KBD_BACKLIGHT BIT(5) +#define QUIRK_T100_KEYBOARD BIT(6) #define I2C_KEYBOARD_QUIRKS (QUIRK_FIX_NOTEBOOK_REPORT | \ QUIRK_NO_INIT_REPORTS | \ @@ -548,6 +549,12 @@ static __u8 *asus_report_fixup(struct hid_device *hdev, __u8 *rdesc, hid_info(hdev, "Fixing up Asus notebook report descriptor\n"); rdesc[55] = 0xdd; } + if (drvdata->quirks & QUIRK_T100_KEYBOARD && + *rsize == 76 && rdesc[73] == 0x81 && rdesc[74] == 0x01) { + hid_info(hdev, "Fixing up Asus T100 keyb report descriptor\n"); + rdesc[74] &= ~HID_MAIN_ITEM_CONSTANT; + } + return rdesc; } @@ -560,6 +567,9 @@ static const struct hid_device_id asus_devices[] = { USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD1) }, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD2), QUIRK_USE_KBD_BACKLIGHT }, + { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, + USB_DEVICE_ID_ASUSTEK_T100_KEYBOARD), + QUIRK_T100_KEYBOARD | QUIRK_NO_CONSUMER_USAGES }, { } }; MODULE_DEVICE_TABLE(hid, asus_devices); diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 38d041510e1d..04cee65531d7 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1855,6 +1855,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_TOUCHPAD) }, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD1) }, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD2) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_T100_KEYBOARD) }, { HID_USB_DEVICE(USB_VENDOR_ID_AUREAL, USB_DEVICE_ID_AUREAL_W01RN) }, { HID_USB_DEVICE(USB_VENDOR_ID_BELKIN, USB_DEVICE_ID_FLIP_KVM) }, { HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185BFM, 0x2208) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 8e8a1baee090..8ca1e8ce0af2 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -173,6 +173,7 @@ #define USB_VENDOR_ID_ASUSTEK 0x0b05 #define USB_DEVICE_ID_ASUSTEK_LCM 0x1726 #define USB_DEVICE_ID_ASUSTEK_LCM2 0x175b +#define USB_DEVICE_ID_ASUSTEK_T100_KEYBOARD 0x17e0 #define USB_DEVICE_ID_ASUSTEK_I2C_KEYBOARD 0x8585 #define USB_DEVICE_ID_ASUSTEK_I2C_TOUCHPAD 0x0101 #define USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD1 0x1854 From 664b7c4728821767e0228ee161bab87db2be58f1 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 12 May 2017 08:47:57 -0700 Subject: [PATCH 041/206] pinctrl: core: Fix warning by removing bogus code Andre Przywara noticed that we can get the following warning with -EPROBE_DEFER: "WARNING: CPU: 1 PID: 89 at drivers/base/dd.c:349 driver_probe_device+0x2ac/0x2e8" Let's fix the issue by removing the indices as suggested by Tejun Heo . All we have to do here is kill the radix tree. I probably ended up with the indices after grepping for removal of all entries using radix_tree_for_each_slot() and the first match found was gmap_radix_tree_free(). Anyways, no need for indices here, and we can just do remove all the entries using radix_tree_for_each_slot() along how the item_kill_tree() test case does. Fixes: c7059c5ac70a ("pinctrl: core: Add generic pinctrl functions for managing groups") Fixes: a76edc89b100 ("pinctrl: core: Add generic pinctrl functions for managing groups") Reported-by: Andre Przywara Signed-off-by: Tony Lindgren Reviewed-by: Andre Przywara Tested-by: Andre Przywara Signed-off-by: Linus Walleij --- drivers/pinctrl/core.c | 20 +++----------------- drivers/pinctrl/pinmux.c | 21 ++++----------------- 2 files changed, 7 insertions(+), 34 deletions(-) diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 1653cbda6a82..bd459a93b0e7 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -680,30 +680,16 @@ EXPORT_SYMBOL_GPL(pinctrl_generic_remove_group); * pinctrl_generic_free_groups() - removes all pin groups * @pctldev: pin controller device * - * Note that the caller must take care of locking. + * Note that the caller must take care of locking. The pinctrl groups + * are allocated with devm_kzalloc() so no need to free them here. */ static void pinctrl_generic_free_groups(struct pinctrl_dev *pctldev) { struct radix_tree_iter iter; - struct group_desc *group; - unsigned long *indices; void **slot; - int i = 0; - - indices = devm_kzalloc(pctldev->dev, sizeof(*indices) * - pctldev->num_groups, GFP_KERNEL); - if (!indices) - return; radix_tree_for_each_slot(slot, &pctldev->pin_group_tree, &iter, 0) - indices[i++] = iter.index; - - for (i = 0; i < pctldev->num_groups; i++) { - group = radix_tree_lookup(&pctldev->pin_group_tree, - indices[i]); - radix_tree_delete(&pctldev->pin_group_tree, indices[i]); - devm_kfree(pctldev->dev, group); - } + radix_tree_delete(&pctldev->pin_group_tree, iter.index); pctldev->num_groups = 0; } diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c index 9fd6d9087dc5..16b3ae5e4f44 100644 --- a/drivers/pinctrl/pinmux.c +++ b/drivers/pinctrl/pinmux.c @@ -826,30 +826,17 @@ EXPORT_SYMBOL_GPL(pinmux_generic_remove_function); * pinmux_generic_free_functions() - removes all functions * @pctldev: pin controller device * - * Note that the caller must take care of locking. + * Note that the caller must take care of locking. The pinctrl + * functions are allocated with devm_kzalloc() so no need to free + * them here. */ void pinmux_generic_free_functions(struct pinctrl_dev *pctldev) { struct radix_tree_iter iter; - struct function_desc *function; - unsigned long *indices; void **slot; - int i = 0; - - indices = devm_kzalloc(pctldev->dev, sizeof(*indices) * - pctldev->num_functions, GFP_KERNEL); - if (!indices) - return; radix_tree_for_each_slot(slot, &pctldev->pin_function_tree, &iter, 0) - indices[i++] = iter.index; - - for (i = 0; i < pctldev->num_functions; i++) { - function = radix_tree_lookup(&pctldev->pin_function_tree, - indices[i]); - radix_tree_delete(&pctldev->pin_function_tree, indices[i]); - devm_kfree(pctldev->dev, function); - } + radix_tree_delete(&pctldev->pin_function_tree, iter.index); pctldev->num_functions = 0; } From f52236e0b0a0820e938e16a776309e76b7bd6c43 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Thu, 18 May 2017 13:47:25 +0300 Subject: [PATCH 042/206] dm verity: fix no salt use case DM-Verity has an (undocumented) mode where no salt is used. This was never handled directly by the DM-Verity code, instead working due to the fact that calling crypto_shash_update() with a zero length data is an implicit noop. This is no longer the case now that we have switched to crypto_ahash_update(). Fix the issue by introducing explicit handling of the no salt use case to DM-Verity. Signed-off-by: Gilad Ben-Yossef Reported-by: Marian Csontos Fixes: d1ac3ff ("dm verity: switch to using asynchronous hash crypto API") Tested-by: Milan Broz Signed-off-by: Mike Snitzer --- drivers/md/dm-verity-target.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 97de961a3bfc..1ec9b2c51c07 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -166,7 +166,7 @@ static int verity_hash_init(struct dm_verity *v, struct ahash_request *req, return r; } - if (likely(v->version >= 1)) + if (likely(v->salt_size && (v->version >= 1))) r = verity_hash_update(v, req, v->salt, v->salt_size, res); return r; @@ -177,7 +177,7 @@ static int verity_hash_final(struct dm_verity *v, struct ahash_request *req, { int r; - if (unlikely(!v->version)) { + if (unlikely(v->salt_size && (!v->version))) { r = verity_hash_update(v, req, v->salt, v->salt_size, res); if (r < 0) { From 702a6204f804bad946c455e7cd8d50d79c9d1629 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sat, 20 May 2017 14:56:21 -0400 Subject: [PATCH 043/206] dm integrity: use kvmalloc() instead of dm_integrity_kvmalloc() Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index c7f7c8d76576..1feeb2ccf5a1 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2374,21 +2374,6 @@ static void dm_integrity_set(struct dm_target *ti, struct dm_integrity_c *ic) blk_queue_max_integrity_segments(disk->queue, UINT_MAX); } -/* FIXME: use new kvmalloc */ -static void *dm_integrity_kvmalloc(size_t size, gfp_t gfp) -{ - void *ptr = NULL; - - if (size <= PAGE_SIZE) - ptr = kmalloc(size, GFP_KERNEL | gfp); - if (!ptr && size <= KMALLOC_MAX_SIZE) - ptr = kmalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY | gfp); - if (!ptr) - ptr = __vmalloc(size, GFP_KERNEL | gfp, PAGE_KERNEL); - - return ptr; -} - static void dm_integrity_free_page_list(struct dm_integrity_c *ic, struct page_list *pl) { unsigned i; @@ -2407,7 +2392,7 @@ static struct page_list *dm_integrity_alloc_page_list(struct dm_integrity_c *ic) struct page_list *pl; unsigned i; - pl = dm_integrity_kvmalloc(page_list_desc_size, __GFP_ZERO); + pl = kvmalloc(page_list_desc_size, GFP_KERNEL | __GFP_ZERO); if (!pl) return NULL; @@ -2437,7 +2422,7 @@ static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_int struct scatterlist **sl; unsigned i; - sl = dm_integrity_kvmalloc(ic->journal_sections * sizeof(struct scatterlist *), __GFP_ZERO); + sl = kvmalloc(ic->journal_sections * sizeof(struct scatterlist *), GFP_KERNEL | __GFP_ZERO); if (!sl) return NULL; @@ -2453,7 +2438,7 @@ static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_int n_pages = (end_index - start_index + 1); - s = dm_integrity_kvmalloc(n_pages * sizeof(struct scatterlist), 0); + s = kvmalloc(n_pages * sizeof(struct scatterlist), GFP_KERNEL); if (!s) { dm_integrity_free_journal_scatterlist(ic, sl); return NULL; @@ -2617,7 +2602,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error) goto bad; } - sg = dm_integrity_kvmalloc((ic->journal_pages + 1) * sizeof(struct scatterlist), 0); + sg = kvmalloc((ic->journal_pages + 1) * sizeof(struct scatterlist), GFP_KERNEL); if (!sg) { *error = "Unable to allocate sg list"; r = -ENOMEM; @@ -2673,7 +2658,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error) r = -ENOMEM; goto bad; } - ic->sk_requests = dm_integrity_kvmalloc(ic->journal_sections * sizeof(struct skcipher_request *), __GFP_ZERO); + ic->sk_requests = kvmalloc(ic->journal_sections * sizeof(struct skcipher_request *), GFP_KERNEL | __GFP_ZERO); if (!ic->sk_requests) { *error = "Unable to allocate sk requests"; r = -ENOMEM; @@ -2740,7 +2725,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error) r = -ENOMEM; goto bad; } - ic->journal_tree = dm_integrity_kvmalloc(journal_tree_size, 0); + ic->journal_tree = kvmalloc(journal_tree_size, GFP_KERNEL); if (!ic->journal_tree) { *error = "Could not allocate memory for journal tree"; r = -ENOMEM; From 8c1e2162f27b319da913683143c0c6c09b083ebb Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Thu, 18 May 2017 12:00:51 -0700 Subject: [PATCH 044/206] dm ioctl: restore __GFP_HIGH in copy_params() Commit d224e9381897 ("drivers/md/dm-ioctl.c: use kvmalloc rather than opencoded variant") left out the __GFP_HIGH flag when converting from __vmalloc to kvmalloc. This can cause the DM ioctl to fail in some low memory situations where it wouldn't have failed earlier. Add __GFP_HIGH back to avoid any potential regression. Fixes: d224e9381897 ("drivers/md/dm-ioctl.c: use kvmalloc rather than opencoded variant") Signed-off-by: Junaid Shahid Signed-off-by: Mikulas Patocka Acked-by: David Rientjes Signed-off-by: Mike Snitzer --- drivers/md/dm-ioctl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 0555b4410e05..41852ae287a5 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1710,12 +1710,13 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern } /* - * Try to avoid low memory issues when a device is suspended. + * Use __GFP_HIGH to avoid low memory issues when a device is + * suspended and the ioctl is needed to resume it. * Use kmalloc() rather than vmalloc() when we can. */ dmi = NULL; noio_flag = memalloc_noio_save(); - dmi = kvmalloc(param_kernel->data_size, GFP_KERNEL); + dmi = kvmalloc(param_kernel->data_size, GFP_KERNEL | __GFP_HIGH); memalloc_noio_restore(noio_flag); if (!dmi) { From 1999f108c983a7287a847b09e29dac25b9301dee Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Wed, 17 May 2017 15:49:01 +0800 Subject: [PATCH 045/206] drm/i915/gvt: Disable compression workaround for Gen9 With enabling this workaround, can observe GPU hang issue on Gen9. As currently host side doesn't have this workaround, disable it from GVT side. v2: - Fix indent error.(Zhenyu) Cc: Zhenyu Wang Signed-off-by: Chuanxiao Dong Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 30 ++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index c995e540ff96..0ffd69654592 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1366,18 +1366,28 @@ static int skl_misc_ctl_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - i915_reg_t reg = {.reg = offset}; + u32 v = *(u32 *)p_data; + + if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)) + return intel_vgpu_default_mmio_write(vgpu, + offset, p_data, bytes); switch (offset) { case 0x4ddc: - vgpu_vreg(vgpu, offset) = 0x8000003c; - /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl */ - I915_WRITE(reg, vgpu_vreg(vgpu, offset)); + /* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */ + vgpu_vreg(vgpu, offset) = v & ~(1 << 31); break; case 0x42080: - vgpu_vreg(vgpu, offset) = 0x8000; - /* WaCompressedResourceDisplayNewHashMode:skl */ - I915_WRITE(reg, vgpu_vreg(vgpu, offset)); + /* bypass WaCompressedResourceDisplayNewHashMode */ + vgpu_vreg(vgpu, offset) = v & ~(1 << 15); + break; + case 0xe194: + /* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */ + vgpu_vreg(vgpu, offset) = v & ~(1 << 8); + break; + case 0x7014: + /* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */ + vgpu_vreg(vgpu, offset) = v & ~(1 << 13); break; default: return -EINVAL; @@ -1634,7 +1644,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DFH(GAM_ECOCHK, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, + skl_misc_ctl_write); MMIO_DFH(0x9030, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x20a0, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x2420, D_ALL, F_CMD_ACCESS, NULL, NULL); @@ -2568,7 +2579,8 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(0x6e570, D_BDW_PLUS); MMIO_D(0x65f10, D_BDW_PLUS); - MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, + skl_misc_ctl_write); MMIO_DFH(0xe188, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(HALF_SLICE_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); From c61872c9833d17d3807fb999096917c1f9eaada0 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 17 May 2017 13:25:12 +0300 Subject: [PATCH 046/206] firmware: dmi: Add DMI_PRODUCT_FAMILY identification string Sometimes it is more convenient to be able to match a whole family of products, like in case of bunch of Chromebooks based on Intel_Strago to apply a driver quirk instead of quirking each machine one-by-one. This adds support for DMI_PRODUCT_FAMILY identification string and also exports it to the userspace through sysfs attribute just like the existing ones. Suggested-by: Dmitry Torokhov Signed-off-by: Mika Westerberg Reviewed-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/firmware/dmi-id.c | 2 ++ drivers/firmware/dmi_scan.c | 1 + include/linux/mod_devicetable.h | 1 + 3 files changed, 4 insertions(+) diff --git a/drivers/firmware/dmi-id.c b/drivers/firmware/dmi-id.c index 44c01390d035..dc269cb288c2 100644 --- a/drivers/firmware/dmi-id.c +++ b/drivers/firmware/dmi-id.c @@ -47,6 +47,7 @@ DEFINE_DMI_ATTR_WITH_SHOW(product_name, 0444, DMI_PRODUCT_NAME); DEFINE_DMI_ATTR_WITH_SHOW(product_version, 0444, DMI_PRODUCT_VERSION); DEFINE_DMI_ATTR_WITH_SHOW(product_serial, 0400, DMI_PRODUCT_SERIAL); DEFINE_DMI_ATTR_WITH_SHOW(product_uuid, 0400, DMI_PRODUCT_UUID); +DEFINE_DMI_ATTR_WITH_SHOW(product_family, 0400, DMI_PRODUCT_FAMILY); DEFINE_DMI_ATTR_WITH_SHOW(board_vendor, 0444, DMI_BOARD_VENDOR); DEFINE_DMI_ATTR_WITH_SHOW(board_name, 0444, DMI_BOARD_NAME); DEFINE_DMI_ATTR_WITH_SHOW(board_version, 0444, DMI_BOARD_VERSION); @@ -191,6 +192,7 @@ static void __init dmi_id_init_attr_table(void) ADD_DMI_ATTR(product_version, DMI_PRODUCT_VERSION); ADD_DMI_ATTR(product_serial, DMI_PRODUCT_SERIAL); ADD_DMI_ATTR(product_uuid, DMI_PRODUCT_UUID); + ADD_DMI_ATTR(product_family, DMI_PRODUCT_FAMILY); ADD_DMI_ATTR(board_vendor, DMI_BOARD_VENDOR); ADD_DMI_ATTR(board_name, DMI_BOARD_NAME); ADD_DMI_ATTR(board_version, DMI_BOARD_VERSION); diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 54be60ead08f..93f7acdaac7a 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -430,6 +430,7 @@ static void __init dmi_decode(const struct dmi_header *dm, void *dummy) dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6); dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7); dmi_save_uuid(dm, DMI_PRODUCT_UUID, 8); + dmi_save_ident(dm, DMI_PRODUCT_FAMILY, 26); break; case 2: /* Base Board Information */ dmi_save_ident(dm, DMI_BOARD_VENDOR, 4); diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 566fda587fcf..3f74ef2281e8 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -467,6 +467,7 @@ enum dmi_field { DMI_PRODUCT_VERSION, DMI_PRODUCT_SERIAL, DMI_PRODUCT_UUID, + DMI_PRODUCT_FAMILY, DMI_BOARD_VENDOR, DMI_BOARD_NAME, DMI_BOARD_VERSION, From 2a8209fa68236ad65363dba03db5dbced520268a Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 17 May 2017 13:25:14 +0300 Subject: [PATCH 047/206] pinctrl: cherryview: Extend the Chromebook DMI quirk to Intel_Strago systems It turns out there are quite many Chromebooks out there that have the same keyboard issue than Acer Chromebook. All of them are based on Intel_Strago reference and report their DMI_PRODUCT_FAMILY as "Intel_Strago" (Samsung Chromebook 3 and Cyan Chromebooks are exceptions for which we add separate entries). Instead of adding each machine to the quirk table, we use DMI_PRODUCT_FAMILY of "Intel_Strago" that hopefully covers most of the machines out there currently. Link: https://bugzilla.kernel.org/show_bug.cgi?id=194945 Suggested: Dmitry Torokhov Signed-off-by: Mika Westerberg Reviewed-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-cherryview.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index e35d0fe4c737..20f1b4493994 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1539,13 +1539,26 @@ static void chv_gpio_irq_handler(struct irq_desc *desc) * is not listed below. */ static const struct dmi_system_id chv_no_valid_mask[] = { + /* See https://bugzilla.kernel.org/show_bug.cgi?id=194945 */ { - /* See https://bugzilla.kernel.org/show_bug.cgi?id=194945 */ - .ident = "Acer Chromebook (CYAN)", + .ident = "Intel_Strago based Chromebooks (All models)", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), - DMI_MATCH(DMI_PRODUCT_NAME, "Edgar"), - DMI_MATCH(DMI_BIOS_DATE, "05/21/2016"), + DMI_MATCH(DMI_PRODUCT_FAMILY, "Intel_Strago"), + }, + }, + { + .ident = "Acer Chromebook R11 (Cyan)", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), + DMI_MATCH(DMI_PRODUCT_NAME, "Cyan"), + }, + }, + { + .ident = "Samsung Chromebook 3 (Celes)", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), + DMI_MATCH(DMI_PRODUCT_NAME, "Celes"), }, }, {} From da6c2addf66d7ff7d0b090d6267d4292f951e4e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 18 May 2017 11:23:55 +0200 Subject: [PATCH 048/206] pinctrl: mxs: atomically switch mux and drive strength config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To set the mux mode of a pin two bits must be set. Up to now this is implemented using the following idiom: writel(mask, reg + CLR); writel(value, reg + SET); . This however results in the mux mode being 0 between the two writes. On my machine there is an IC's reset pin connected to LCD_D20. The bootloader configures this pin as GPIO output-high (i.e. not holding the IC in reset). When Linux reconfigures the pin to GPIO the short time LCD_D20 is muxed as LCD_D20 instead of GPIO_1_20 is enough to confuse the connected IC. The same problem is present for the pin's drive strength setting which is reset to low drive strength before using the right value. So instead of relying on the hardware to modify the register setting using two writes implement the bit toggling using read-modify-write. Fixes: 17723111e64f ("pinctrl: add pinctrl-mxs support") Signed-off-by: Uwe Kleine-König Acked-by: Shawn Guo Signed-off-by: Linus Walleij --- drivers/pinctrl/freescale/pinctrl-mxs.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/freescale/pinctrl-mxs.c b/drivers/pinctrl/freescale/pinctrl-mxs.c index 41b5b07d5a2b..6852010a6d70 100644 --- a/drivers/pinctrl/freescale/pinctrl-mxs.c +++ b/drivers/pinctrl/freescale/pinctrl-mxs.c @@ -194,6 +194,16 @@ static int mxs_pinctrl_get_func_groups(struct pinctrl_dev *pctldev, return 0; } +static void mxs_pinctrl_rmwl(u32 value, u32 mask, u8 shift, void __iomem *reg) +{ + u32 tmp; + + tmp = readl(reg); + tmp &= ~(mask << shift); + tmp |= value << shift; + writel(tmp, reg); +} + static int mxs_pinctrl_set_mux(struct pinctrl_dev *pctldev, unsigned selector, unsigned group) { @@ -211,8 +221,7 @@ static int mxs_pinctrl_set_mux(struct pinctrl_dev *pctldev, unsigned selector, reg += bank * 0x20 + pin / 16 * 0x10; shift = pin % 16 * 2; - writel(0x3 << shift, reg + CLR); - writel(g->muxsel[i] << shift, reg + SET); + mxs_pinctrl_rmwl(g->muxsel[i], 0x3, shift, reg); } return 0; @@ -279,8 +288,7 @@ static int mxs_pinconf_group_set(struct pinctrl_dev *pctldev, /* mA */ if (config & MA_PRESENT) { shift = pin % 8 * 4; - writel(0x3 << shift, reg + CLR); - writel(ma << shift, reg + SET); + mxs_pinctrl_rmwl(ma, 0x3, shift, reg); } /* vol */ From 7903d4f5e1dec53963cba9b1bc472a76a3532e07 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Mon, 22 May 2017 14:25:49 +0800 Subject: [PATCH 049/206] pinctrl: sunxi: Fix SPDIF function name for A83T We use well known standard names for functions that have name, such as I2C, SPI, SPDIF, etc.. Fix the function name of SPDIF, which was named OWA (One Wire Audio) based on Allwinner datasheets. Fixes: 4730f33f0d82 ("pinctrl: sunxi: add allwinner A83T PIO controller support") Signed-off-by: Chen-Yu Tsai Acked-by: Maxime Ripard Signed-off-by: Linus Walleij --- drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c index 9aec1d2232dd..6624499eae72 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c @@ -394,7 +394,7 @@ static const struct sunxi_desc_pin sun8i_a83t_pins[] = { SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 18), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x3, "owa")), /* DOUT */ + SUNXI_FUNCTION(0x3, "spdif")), /* DOUT */ SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 19), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out")), From 9a307403d374b993061f5992a6e260c944920d0b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 23 May 2017 12:24:40 -0400 Subject: [PATCH 050/206] nfsd4: fix null dereference on replay if we receive a compound such that: - the sessionid, slot, and sequence number in the SEQUENCE op match a cached succesful reply with N ops, and - the Nth operation of the compound is a PUTFH, PUTPUBFH, PUTROOTFH, or RESTOREFH, then nfsd4_sequence will return 0 and set cstate->status to nfserr_replay_cache. The current filehandle will not be set. This will cause us to call check_nfsd_access with first argument NULL. To nfsd4_compound it looks like we just succesfully executed an operation that set a filehandle, but the current filehandle is not set. Fix this by moving the nfserr_replay_cache earlier. There was never any reason to have it after the encode_op label, since the only case where he hit that is when opdesc->op_func sets it. Note that there are two ways we could hit this case: - a client is resending a previously sent compound that ended with one of the four PUTFH-like operations, or - a client is sending a *new* compound that (incorrectly) shares sessionid, slot, and sequence number with a previously sent compound, and the length of the previously sent compound happens to match the position of a PUTFH-like operation in the new compound. The second is obviously incorrect client behavior. The first is also very strange--the only purpose of a PUTFH-like operation is to set the current filehandle to be used by the following operation, so there's no point in having it as the last in a compound. So it's likely this requires a buggy or malicious client to reproduce. Reported-by: Scott Mayhew Cc: stable@kernel.vger.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index c453a1998e00..dadb3bf305b2 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1769,6 +1769,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, opdesc->op_get_currentstateid(cstate, &op->u); op->status = opdesc->op_func(rqstp, cstate, &op->u); + /* Only from SEQUENCE */ + if (cstate->status == nfserr_replay_cache) { + dprintk("%s NFS4.1 replay from cache\n", __func__); + status = op->status; + goto out; + } if (!op->status) { if (opdesc->op_set_currentstateid) opdesc->op_set_currentstateid(cstate, &op->u); @@ -1779,14 +1785,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, if (need_wrongsec_check(rqstp)) op->status = check_nfsd_access(current_fh->fh_export, rqstp); } - encode_op: - /* Only from SEQUENCE */ - if (cstate->status == nfserr_replay_cache) { - dprintk("%s NFS4.1 replay from cache\n", __func__); - status = op->status; - goto out; - } if (op->status == nfserr_replay_me) { op->replay = &cstate->replay_owner->so_replay; nfsd4_encode_replay(&resp->xdr, op); From 0648a07c9b22acc33ead0645cf8f607b0c9c7e32 Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Sat, 20 May 2017 09:58:10 +0200 Subject: [PATCH 051/206] scsi: scsi_dh_rdac: Use ctlr directly in rdac_failover_get() rdac_failover_get references struct rdac_controller as ctlr->ms_sdev->handler_data->ctlr for no apparent reason. Besides being inefficient this also introduces a null-pointer dereference as send_mode_select() sets ctlr->ms_sdev to NULL before calling rdac_failover_get(): [ 18.432550] device-mapper: multipath service-time: version 0.3.0 loaded [ 18.436124] BUG: unable to handle kernel NULL pointer dereference at 0000000000000790 [ 18.436129] IP: send_mode_select+0xca/0x560 [ 18.436129] PGD 0 [ 18.436130] P4D 0 [ 18.436130] [ 18.436132] Oops: 0000 [#1] SMP [ 18.436133] Modules linked in: dm_service_time sd_mod dm_multipath amdkfd amd_iommu_v2 radeon(+) i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm qla2xxx drm serio_raw scsi_transport_fc bnx2 i2c_core dm_mirror dm_region_hash dm_log dm_mod [ 18.436143] CPU: 4 PID: 443 Comm: kworker/u16:2 Not tainted 4.12.0-rc1.1.el7.test.x86_64 #1 [ 18.436144] Hardware name: IBM BladeCenter LS22 -[79013SG]-/Server Blade, BIOS -[L8E164AUS-1.07]- 05/25/2011 [ 18.436145] Workqueue: kmpath_rdacd send_mode_select [ 18.436146] task: ffff880225116a40 task.stack: ffffc90002bd8000 [ 18.436148] RIP: 0010:send_mode_select+0xca/0x560 [ 18.436148] RSP: 0018:ffffc90002bdbda8 EFLAGS: 00010246 [ 18.436149] RAX: 0000000000000000 RBX: ffffc90002bdbe08 RCX: ffff88017ef04a80 [ 18.436150] RDX: ffffc90002bdbe08 RSI: ffff88017ef04a80 RDI: ffff8802248e4388 [ 18.436151] RBP: ffffc90002bdbe48 R08: 0000000000000000 R09: ffffffff81c104c0 [ 18.436151] R10: 00000000000001ff R11: 000000000000035a R12: ffffc90002bdbdd8 [ 18.436152] R13: ffff8802248e4390 R14: ffff880225152800 R15: ffff8802248e4400 [ 18.436153] FS: 0000000000000000(0000) GS:ffff880227d00000(0000) knlGS:0000000000000000 [ 18.436154] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 18.436154] CR2: 0000000000000790 CR3: 000000042535b000 CR4: 00000000000006e0 [ 18.436155] Call Trace: [ 18.436159] ? rdac_activate+0x14e/0x150 [ 18.436161] ? refcount_dec_and_test+0x11/0x20 [ 18.436162] ? kobject_put+0x1c/0x50 [ 18.436165] ? scsi_dh_activate+0x6f/0xd0 [ 18.436168] process_one_work+0x149/0x360 [ 18.436170] worker_thread+0x4d/0x3c0 [ 18.436172] kthread+0x109/0x140 [ 18.436173] ? rescuer_thread+0x380/0x380 [ 18.436174] ? kthread_park+0x60/0x60 [ 18.436176] ret_from_fork+0x2c/0x40 [ 18.436177] Code: 49 c7 46 20 00 00 00 00 4c 89 ef c6 07 00 0f 1f 40 00 45 31 ed c7 45 b0 05 00 00 00 44 89 6d b4 4d 89 f5 4c 8b 75 a8 49 8b 45 20 <48> 8b b0 90 07 00 00 48 8b 56 10 8b 42 10 48 8d 7a 28 85 c0 0f [ 18.436192] RIP: send_mode_select+0xca/0x560 RSP: ffffc90002bdbda8 [ 18.436192] CR2: 0000000000000790 [ 18.436198] ---[ end trace 40f3e4dca1ffabdd ]--- [ 18.436199] Kernel panic - not syncing: Fatal exception [ 18.436222] Kernel Offset: disabled [-- MARK -- Thu May 18 11:45:00 2017] Fixes: 327825574132 scsi_dh_rdac: switch to scsi_execute_req_flags() Cc: stable@vger.kernel.org Signed-off-by: Artem Savkov Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_rdac.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index 3cbab8710e58..2ceff585f189 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -265,18 +265,16 @@ static unsigned int rdac_failover_get(struct rdac_controller *ctlr, struct list_head *list, unsigned char *cdb) { - struct scsi_device *sdev = ctlr->ms_sdev; - struct rdac_dh_data *h = sdev->handler_data; struct rdac_mode_common *common; unsigned data_size; struct rdac_queue_data *qdata; u8 *lun_table; - if (h->ctlr->use_ms10) { + if (ctlr->use_ms10) { struct rdac_pg_expanded *rdac_pg; data_size = sizeof(struct rdac_pg_expanded); - rdac_pg = &h->ctlr->mode_select.expanded; + rdac_pg = &ctlr->mode_select.expanded; memset(rdac_pg, 0, data_size); common = &rdac_pg->common; rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER + 0x40; @@ -288,7 +286,7 @@ static unsigned int rdac_failover_get(struct rdac_controller *ctlr, struct rdac_pg_legacy *rdac_pg; data_size = sizeof(struct rdac_pg_legacy); - rdac_pg = &h->ctlr->mode_select.legacy; + rdac_pg = &ctlr->mode_select.legacy; memset(rdac_pg, 0, data_size); common = &rdac_pg->common; rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER; @@ -304,7 +302,7 @@ static unsigned int rdac_failover_get(struct rdac_controller *ctlr, } /* Prepare the command. */ - if (h->ctlr->use_ms10) { + if (ctlr->use_ms10) { cdb[0] = MODE_SELECT_10; cdb[7] = data_size >> 8; cdb[8] = data_size & 0xff; From 5e901d0b15c0cba8c5ba55e4be46fc5a0e2f3cb9 Mon Sep 17 00:00:00 2001 From: Arun Easi Date: Fri, 19 May 2017 01:33:15 -0700 Subject: [PATCH 052/206] scsi: qedi: Fix bad pte call trace when iscsiuio is stopped. munmap done by iscsiuio during a stop of the service triggers a "bad pte" warning sometimes. munmap kernel path goes through the mmapped pages and has a validation check for mapcount (in struct page) to be zero or above. kzalloc, which we had used to allocate udev->ctrl, uses slab allocations, which re-uses mapcount (union) for other purposes that can make the mapcount look negative. Avoid all these trouble by invoking one of the __get_free_pages wrappers to be used instead of kzalloc for udev->ctrl. BUG: Bad page map in process iscsiuio pte:80000000aa624067 pmd:3e6777067 page:ffffea0002a98900 count:2 mapcount:-2143289280 mapping: (null) index:0xffff8800aa624e00 page flags: 0x10075d00000090(dirty|slab) page dumped because: bad pte addr:00007fcba70a3000 vm_flags:0c0400fb anon_vma: (null) mapping:ffff8803edf66e90 index:0 Call Trace: dump_stack+0x19/0x1b print_bad_pte+0x1af/0x250 unmap_page_range+0x7a7/0x8a0 unmap_single_vma+0x81/0xf0 unmap_vmas+0x49/0x90 unmap_region+0xbe/0x140 ? vma_rb_erase+0x121/0x220 do_munmap+0x245/0x420 vm_munmap+0x41/0x60 SyS_munmap+0x22/0x30 tracesys+0xdd/0xe2 Signed-off-by: Arun Easi Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 92775a8b74b1..997e3052a706 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -151,6 +151,11 @@ static int qedi_uio_close(struct uio_info *uinfo, struct inode *inode) static void __qedi_free_uio_rings(struct qedi_uio_dev *udev) { + if (udev->uctrl) { + free_page((unsigned long)udev->uctrl); + udev->uctrl = NULL; + } + if (udev->ll2_ring) { free_page((unsigned long)udev->ll2_ring); udev->ll2_ring = NULL; @@ -169,7 +174,6 @@ static void __qedi_free_uio(struct qedi_uio_dev *udev) __qedi_free_uio_rings(udev); pci_dev_put(udev->pdev); - kfree(udev->uctrl); kfree(udev); } @@ -208,6 +212,11 @@ static int __qedi_alloc_uio_rings(struct qedi_uio_dev *udev) if (udev->ll2_ring || udev->ll2_buf) return rc; + /* Memory for control area. */ + udev->uctrl = (void *)get_zeroed_page(GFP_KERNEL); + if (!udev->uctrl) + return -ENOMEM; + /* Allocating memory for LL2 ring */ udev->ll2_ring_size = QEDI_PAGE_SIZE; udev->ll2_ring = (void *)get_zeroed_page(GFP_KERNEL | __GFP_COMP); @@ -237,7 +246,6 @@ static int __qedi_alloc_uio_rings(struct qedi_uio_dev *udev) static int qedi_alloc_uio_rings(struct qedi_ctx *qedi) { struct qedi_uio_dev *udev = NULL; - struct qedi_uio_ctrl *uctrl = NULL; int rc = 0; list_for_each_entry(udev, &qedi_udev_list, list) { @@ -258,21 +266,14 @@ static int qedi_alloc_uio_rings(struct qedi_ctx *qedi) goto err_udev; } - uctrl = kzalloc(sizeof(*uctrl), GFP_KERNEL); - if (!uctrl) { - rc = -ENOMEM; - goto err_uctrl; - } - udev->uio_dev = -1; udev->qedi = qedi; udev->pdev = qedi->pdev; - udev->uctrl = uctrl; rc = __qedi_alloc_uio_rings(udev); if (rc) - goto err_uio_rings; + goto err_uctrl; list_add(&udev->list, &qedi_udev_list); @@ -283,8 +284,6 @@ static int qedi_alloc_uio_rings(struct qedi_ctx *qedi) udev->rx_pkt = udev->ll2_buf + LL2_SINGLE_BUF_SIZE; return 0; - err_uio_rings: - kfree(uctrl); err_uctrl: kfree(udev); err_udev: From fc2fbf0d422b54b487c5e7413acd54cbac6d4151 Mon Sep 17 00:00:00 2001 From: "manish.rangankar@cavium.com" Date: Fri, 19 May 2017 01:33:16 -0700 Subject: [PATCH 053/206] scsi: qedi: Correctly set firmware max supported BDs. Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qedi/qedi.h b/drivers/scsi/qedi/qedi.h index 5ca3e8c28a3f..269dac620cf8 100644 --- a/drivers/scsi/qedi/qedi.h +++ b/drivers/scsi/qedi/qedi.h @@ -38,7 +38,7 @@ struct qedi_endpoint; #define QEDI_MAX_ISCSI_TASK 4096 #define QEDI_MAX_TASK_NUM 0x0FFF #define QEDI_MAX_ISCSI_CONNS_PER_HBA 1024 -#define QEDI_ISCSI_MAX_BDS_PER_CMD 256 /* Firmware max BDs is 256 */ +#define QEDI_ISCSI_MAX_BDS_PER_CMD 255 /* Firmware max BDs is 255 */ #define MAX_OUSTANDING_TASKS_PER_CON 1024 #define QEDI_MAX_BD_LEN 0xffff From d0788a528d9e09d7c17f05dd61c4cc492181f817 Mon Sep 17 00:00:00 2001 From: "manish.rangankar@cavium.com" Date: Fri, 19 May 2017 01:33:17 -0700 Subject: [PATCH 054/206] scsi: qedi: Set dma_boundary to 0xfff. Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi.h | 1 + drivers/scsi/qedi/qedi_iscsi.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/scsi/qedi/qedi.h b/drivers/scsi/qedi/qedi.h index 269dac620cf8..32632c9b2276 100644 --- a/drivers/scsi/qedi/qedi.h +++ b/drivers/scsi/qedi/qedi.h @@ -63,6 +63,7 @@ struct qedi_endpoint; #define QEDI_PAGE_MASK (~((QEDI_PAGE_SIZE) - 1)) #define QEDI_PAGE_SIZE 4096 +#define QEDI_HW_DMA_BOUNDARY 0xfff #define QEDI_PATH_HANDLE 0xFE0000000UL struct qedi_uio_ctrl { diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c index 3548d46f9b27..19177931b84c 100644 --- a/drivers/scsi/qedi/qedi_iscsi.c +++ b/drivers/scsi/qedi/qedi_iscsi.c @@ -59,6 +59,7 @@ struct scsi_host_template qedi_host_template = { .this_id = -1, .sg_tablesize = QEDI_ISCSI_MAX_BDS_PER_CMD, .max_sectors = 0xffff, + .dma_boundary = QEDI_HW_DMA_BOUNDARY, .cmd_per_lun = 128, .use_clustering = ENABLE_CLUSTERING, .shost_attrs = qedi_shost_attrs, From 0ea9314f4e6c69f8d732e0a9310114c2de35ada8 Mon Sep 17 00:00:00 2001 From: "manish.rangankar@cavium.com" Date: Fri, 19 May 2017 01:33:18 -0700 Subject: [PATCH 055/206] scsi: qedi: Fix endpoint NULL panic in qedi_set_path. RIP: 0010:qedi_set_path+0x114/0x570 [qedi] Call Trace: [] iscsi_if_recv_msg+0x623/0x14a0 [] ? rhashtable_lookup_compare+0x36/0x70 [] iscsi_if_rx+0x8e/0x1f0 [] netlink_unicast+0xed/0x1b0 [] netlink_sendmsg+0x330/0x770 [] sock_sendmsg+0xb0/0xf0 [] ? __switch_to+0x17b/0x4b0 [] ? __schedule+0x2d8/0x900 [] ___sys_sendmsg+0x3a9/0x3c0 [] ? get_futex_key+0x1c8/0x2b0 [] ? futex_wake+0x80/0x160 Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_iscsi.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c index 19177931b84c..87f0af358b33 100644 --- a/drivers/scsi/qedi/qedi_iscsi.c +++ b/drivers/scsi/qedi/qedi_iscsi.c @@ -1224,8 +1224,12 @@ static int qedi_set_path(struct Scsi_Host *shost, struct iscsi_path *path_data) iscsi_cid = (u32)path_data->handle; qedi_ep = qedi->ep_tbl[iscsi_cid]; - QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_CONN, + QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO, "iscsi_cid=0x%x, qedi_ep=%p\n", iscsi_cid, qedi_ep); + if (!qedi_ep) { + ret = -EINVAL; + goto set_path_exit; + } if (!is_valid_ether_addr(&path_data->mac_addr[0])) { QEDI_NOTICE(&qedi->dbg_ctx, "dst mac NOT VALID\n"); From 962ea1c0df6c5a36f4477aa4a10f4acc0f5f56bd Mon Sep 17 00:00:00 2001 From: "manish.rangankar@cavium.com" Date: Fri, 19 May 2017 01:33:19 -0700 Subject: [PATCH 056/206] scsi: qedi: Set firmware tcp msl timer value. Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 997e3052a706..62ba0550b68c 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -827,6 +827,7 @@ static int qedi_set_iscsi_pf_param(struct qedi_ctx *qedi) qedi->pf_params.iscsi_pf_params.num_uhq_pages_in_ring = num_sq_pages; qedi->pf_params.iscsi_pf_params.num_queues = qedi->num_queues; qedi->pf_params.iscsi_pf_params.debug_mode = qedi_fw_debug; + qedi->pf_params.iscsi_pf_params.two_msl_timer = 4000; for (log_page_size = 0 ; log_page_size < 32 ; log_page_size++) { if ((1 << log_page_size) == PAGE_SIZE) From 3d61a3132212d6b1c8c6914700d5f6456712ac08 Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Fri, 19 May 2017 01:33:20 -0700 Subject: [PATCH 057/206] scsi: qedi: set max_fin_rt default value max_fin_rt is the maximum re-transmission of FIN packets as part of the termination flow. After reaching this value the FW will send a single RESET. Signed-off-by: Nilesh Javali Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 62ba0550b68c..09a294634bc7 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -828,6 +828,7 @@ static int qedi_set_iscsi_pf_param(struct qedi_ctx *qedi) qedi->pf_params.iscsi_pf_params.num_queues = qedi->num_queues; qedi->pf_params.iscsi_pf_params.debug_mode = qedi_fw_debug; qedi->pf_params.iscsi_pf_params.two_msl_timer = 4000; + qedi->pf_params.iscsi_pf_params.max_fin_rt = 2; for (log_page_size = 0 ; log_page_size < 32 ; log_page_size++) { if ((1 << log_page_size) == PAGE_SIZE) From b19775e4785996503b106e59d9d3a8839e677afd Mon Sep 17 00:00:00 2001 From: "manish.rangankar@cavium.com" Date: Fri, 19 May 2017 01:33:21 -0700 Subject: [PATCH 058/206] scsi: qedi: Fix endpoint NULL panic during recovery. Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_fw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c index d6978cbc56f0..8bc7ee1a8ca8 100644 --- a/drivers/scsi/qedi/qedi_fw.c +++ b/drivers/scsi/qedi/qedi_fw.c @@ -1494,6 +1494,8 @@ static int qedi_send_iscsi_tmf(struct qedi_conn *qedi_conn, tmf_hdr = (struct iscsi_tm *)mtask->hdr; qedi_cmd = (struct qedi_cmd *)mtask->dd_data; ep = qedi_conn->ep; + if (!ep) + return -ENODEV; tid = qedi_get_task_idx(qedi); if (tid == -1) From e274086e473c0cbea18051ae0a78a05f8d658f47 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Mon, 22 May 2017 17:46:58 +0800 Subject: [PATCH 059/206] drm/i915/gvt: clean up unsubmited workloads before destroying kmem cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is to fix a memory leak issue caused by unfreed gvtg workload objects. Walk through the workload list and free all of the remained workloads before destroying kmem cache. [179.885211] INFO: Object 0xffff9cef10003b80 @offset=7040 [179.885657] kmem_cache_destroy gvt-g_vgpu_workload: Slab cache still has objects [179.886146] CPU: 2 PID: 2318 Comm: win_lucas Tainted: G    B   W       4.11.0+ #1 [179.887223] Call Trace: [179.887394] dump_stack+0x63/0x90 [179.887617] kmem_cache_destroy+0x1cf/0x1e0 [179.887960] intel_vgpu_clean_execlist+0x15/0x20 [i915] [179.888365] intel_gvt_destroy_vgpu+0x4c/0xd0 [i915] [179.888688] intel_vgpu_remove+0x2a/0x30 [kvmgt] [179.888988] mdev_device_remove_ops+0x23/0x50 [mdev] [179.889309] mdev_device_remove+0xe4/0x190 [mdev] [179.889615] remove_store+0x7d/0xb0 [mdev] [179.889885] dev_attr_store+0x18/0x30 [179.890129] sysfs_kf_write+0x37/0x40 [179.890371] kernfs_fop_write+0x107/0x180 [179.890632] __vfs_write+0x37/0x160 [179.890865] ? kmem_cache_alloc+0xd7/0x1b0 [179.891116] ? apparmor_file_permission+0x1a/0x20 [179.891372] ? security_file_permission+0x3b/0xc0 [179.891628] vfs_write+0xb8/0x1b0 [179.891812] SyS_write+0x55/0xc0 [179.891992] entry_SYSCALL_64_fastpath+0x1e/0xad Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/execlist.c | 30 +++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index dca989eb2d42..24fe04d6307b 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -779,8 +779,26 @@ static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id) vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw; } +static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask) +{ + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + struct intel_engine_cs *engine; + struct intel_vgpu_workload *pos, *n; + unsigned int tmp; + + /* free the unsubmited workloads in the queues. */ + for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { + list_for_each_entry_safe(pos, n, + &vgpu->workload_q_head[engine->id], list) { + list_del_init(&pos->list); + free_workload(pos); + } + } +} + void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu) { + clean_workloads(vgpu, ALL_ENGINES); kmem_cache_destroy(vgpu->workloads); } @@ -811,17 +829,9 @@ void intel_vgpu_reset_execlist(struct intel_vgpu *vgpu, { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct intel_engine_cs *engine; - struct intel_vgpu_workload *pos, *n; unsigned int tmp; - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { - /* free the unsubmited workload in the queue */ - list_for_each_entry_safe(pos, n, - &vgpu->workload_q_head[engine->id], list) { - list_del_init(&pos->list); - free_workload(pos); - } - + clean_workloads(vgpu, engine_mask); + for_each_engine_masked(engine, dev_priv, engine_mask, tmp) init_vgpu_execlist(vgpu, engine->id); - } } From 75b61250bf687c686ba6850c34eccc1303b0b827 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 16 May 2017 19:23:44 +0530 Subject: [PATCH 060/206] scsi: libcxgbi: fix skb use after free skb->data is assigned to task->hdr in cxgbi_conn_alloc_pdu(), skb gets freed after tx but task->hdr is still dereferenced in iscsi_tcp_task_xmit() to avoid this call skb_get() after allocating skb and free the skb in cxgbi_cleanup_task() or before allocating new skb in cxgbi_conn_alloc_pdu(). Signed-off-by: Varun Prakash Signed-off-by: Martin K. Petersen --- drivers/scsi/cxgbi/libcxgbi.c | 25 ++++++++++++++++++------- drivers/scsi/cxgbi/libcxgbi.h | 16 ++++++++-------- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c index bd7d39ecbd24..fb06974c88c1 100644 --- a/drivers/scsi/cxgbi/libcxgbi.c +++ b/drivers/scsi/cxgbi/libcxgbi.c @@ -1873,6 +1873,11 @@ int cxgbi_conn_alloc_pdu(struct iscsi_task *task, u8 opcode) tcp_task->dd_data = tdata; task->hdr = NULL; + if (tdata->skb) { + kfree_skb(tdata->skb); + tdata->skb = NULL; + } + if (SKB_MAX_HEAD(cdev->skb_tx_rsvd) > (512 * MAX_SKB_FRAGS) && (opcode == ISCSI_OP_SCSI_DATA_OUT || (opcode == ISCSI_OP_SCSI_CMD && @@ -1890,6 +1895,7 @@ int cxgbi_conn_alloc_pdu(struct iscsi_task *task, u8 opcode) return -ENOMEM; } + skb_get(tdata->skb); skb_reserve(tdata->skb, cdev->skb_tx_rsvd); task->hdr = (struct iscsi_hdr *)tdata->skb->data; task->hdr_max = SKB_TX_ISCSI_PDU_HEADER_MAX; /* BHS + AHS */ @@ -2035,9 +2041,9 @@ int cxgbi_conn_xmit_pdu(struct iscsi_task *task) unsigned int datalen; int err; - if (!skb) { + if (!skb || cxgbi_skcb_test_flag(skb, SKCBF_TX_DONE)) { log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_PDU_TX, - "task 0x%p, skb NULL.\n", task); + "task 0x%p, skb 0x%p\n", task, skb); return 0; } @@ -2050,7 +2056,6 @@ int cxgbi_conn_xmit_pdu(struct iscsi_task *task) } datalen = skb->data_len; - tdata->skb = NULL; /* write ppod first if using ofldq to write ppod */ if (ttinfo->flags & CXGBI_PPOD_INFO_FLAG_VALID) { @@ -2078,6 +2083,7 @@ int cxgbi_conn_xmit_pdu(struct iscsi_task *task) pdulen += ISCSI_DIGEST_SIZE; task->conn->txdata_octets += pdulen; + cxgbi_skcb_set_flag(skb, SKCBF_TX_DONE); return 0; } @@ -2086,7 +2092,6 @@ int cxgbi_conn_xmit_pdu(struct iscsi_task *task) "task 0x%p, skb 0x%p, len %u/%u, %d EAGAIN.\n", task, skb, skb->len, skb->data_len, err); /* reset skb to send when we are called again */ - tdata->skb = skb; return err; } @@ -2094,7 +2099,8 @@ int cxgbi_conn_xmit_pdu(struct iscsi_task *task) "itt 0x%x, skb 0x%p, len %u/%u, xmit err %d.\n", task->itt, skb, skb->len, skb->data_len, err); - kfree_skb(skb); + __kfree_skb(tdata->skb); + tdata->skb = NULL; iscsi_conn_printk(KERN_ERR, task->conn, "xmit err %d.\n", err); iscsi_conn_failure(task->conn, ISCSI_ERR_XMIT_FAILED); @@ -2113,8 +2119,10 @@ void cxgbi_cleanup_task(struct iscsi_task *task) tcp_task->dd_data = NULL; /* never reached the xmit task callout */ - if (tdata->skb) - __kfree_skb(tdata->skb); + if (tdata->skb) { + kfree_skb(tdata->skb); + tdata->skb = NULL; + } task_release_itt(task, task->hdr_itt); memset(tdata, 0, sizeof(*tdata)); @@ -2714,6 +2722,9 @@ EXPORT_SYMBOL_GPL(cxgbi_attr_is_visible); static int __init libcxgbi_init_module(void) { pr_info("%s", version); + + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, cb) < + sizeof(struct cxgbi_skb_cb)); return 0; } diff --git a/drivers/scsi/cxgbi/libcxgbi.h b/drivers/scsi/cxgbi/libcxgbi.h index 18e0ea83d361..239462a75760 100644 --- a/drivers/scsi/cxgbi/libcxgbi.h +++ b/drivers/scsi/cxgbi/libcxgbi.h @@ -195,7 +195,8 @@ struct cxgbi_skb_rx_cb { }; struct cxgbi_skb_tx_cb { - void *l2t; + void *handle; + void *arp_err_handler; struct sk_buff *wr_next; }; @@ -203,6 +204,7 @@ enum cxgbi_skcb_flags { SKCBF_TX_NEED_HDR, /* packet needs a header */ SKCBF_TX_MEM_WRITE, /* memory write */ SKCBF_TX_FLAG_COMPL, /* wr completion flag */ + SKCBF_TX_DONE, /* skb tx done */ SKCBF_RX_COALESCED, /* received whole pdu */ SKCBF_RX_HDR, /* received pdu header */ SKCBF_RX_DATA, /* received pdu payload */ @@ -215,13 +217,13 @@ enum cxgbi_skcb_flags { }; struct cxgbi_skb_cb { - unsigned char ulp_mode; - unsigned long flags; - unsigned int seq; union { struct cxgbi_skb_rx_cb rx; struct cxgbi_skb_tx_cb tx; }; + unsigned char ulp_mode; + unsigned long flags; + unsigned int seq; }; #define CXGBI_SKB_CB(skb) ((struct cxgbi_skb_cb *)&((skb)->cb[0])) @@ -374,11 +376,9 @@ static inline void cxgbi_sock_enqueue_wr(struct cxgbi_sock *csk, cxgbi_skcb_tx_wr_next(skb) = NULL; /* * We want to take an extra reference since both us and the driver - * need to free the packet before it's really freed. We know there's - * just one user currently so we use atomic_set rather than skb_get - * to avoid the atomic op. + * need to free the packet before it's really freed. */ - atomic_set(&skb->users, 2); + skb_get(skb); if (!csk->wr_pending_head) csk->wr_pending_head = skb; From f3cdbe39b2ab0636dec0d5d43b54f1061ce7566c Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 17 May 2017 04:34:37 -0500 Subject: [PATCH 061/206] tcmu: fix crash during device removal We currently do tcmu_free_device ->tcmu_netlink_event(TCMU_CMD_REMOVED_DEVICE) -> uio_unregister_device -> kfree(tcmu_dev). The problem is that the kernel does not wait for userspace to do the close() on the uio device before freeing the tcmu_dev. We can then hit a race where the kernel frees the tcmu_dev before userspace does close() and so when close() -> release -> tcmu_release is done, we try to access a freed tcmu_dev. This patch made over the target-pending master branch moves the freeing of the tcmu_dev to when the last reference has been dropped. This also fixes a leak where if tcmu_configure_device was not called on a device we did not free udev->name which was allocated at tcmu_alloc_device time. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 46 ++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 9045837f748b..beb5f098f32d 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -97,7 +97,7 @@ struct tcmu_hba { struct tcmu_dev { struct list_head node; - + struct kref kref; struct se_device se_dev; char *name; @@ -969,6 +969,7 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name) udev = kzalloc(sizeof(struct tcmu_dev), GFP_KERNEL); if (!udev) return NULL; + kref_init(&udev->kref); udev->name = kstrdup(name, GFP_KERNEL); if (!udev->name) { @@ -1145,6 +1146,24 @@ static int tcmu_open(struct uio_info *info, struct inode *inode) return 0; } +static void tcmu_dev_call_rcu(struct rcu_head *p) +{ + struct se_device *dev = container_of(p, struct se_device, rcu_head); + struct tcmu_dev *udev = TCMU_DEV(dev); + + kfree(udev->uio_info.name); + kfree(udev->name); + kfree(udev); +} + +static void tcmu_dev_kref_release(struct kref *kref) +{ + struct tcmu_dev *udev = container_of(kref, struct tcmu_dev, kref); + struct se_device *dev = &udev->se_dev; + + call_rcu(&dev->rcu_head, tcmu_dev_call_rcu); +} + static int tcmu_release(struct uio_info *info, struct inode *inode) { struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info); @@ -1152,7 +1171,8 @@ static int tcmu_release(struct uio_info *info, struct inode *inode) clear_bit(TCMU_DEV_BIT_OPEN, &udev->flags); pr_debug("close\n"); - + /* release ref from configure */ + kref_put(&udev->kref, tcmu_dev_kref_release); return 0; } @@ -1272,6 +1292,12 @@ static int tcmu_configure_device(struct se_device *dev) dev->dev_attrib.hw_max_sectors = 128; dev->dev_attrib.hw_queue_depth = 128; + /* + * Get a ref incase userspace does a close on the uio device before + * LIO has initiated tcmu_free_device. + */ + kref_get(&udev->kref); + ret = tcmu_netlink_event(TCMU_CMD_ADDED_DEVICE, udev->uio_info.name, udev->uio_info.uio_dev->minor); if (ret) @@ -1284,11 +1310,13 @@ static int tcmu_configure_device(struct se_device *dev) return 0; err_netlink: + kref_put(&udev->kref, tcmu_dev_kref_release); uio_unregister_device(&udev->uio_info); err_register: vfree(udev->mb_addr); err_vzalloc: kfree(info->name); + info->name = NULL; return ret; } @@ -1302,14 +1330,6 @@ static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd) return -EINVAL; } -static void tcmu_dev_call_rcu(struct rcu_head *p) -{ - struct se_device *dev = container_of(p, struct se_device, rcu_head); - struct tcmu_dev *udev = TCMU_DEV(dev); - - kfree(udev); -} - static bool tcmu_dev_configured(struct tcmu_dev *udev) { return udev->uio_info.uio_dev ? true : false; @@ -1364,10 +1384,10 @@ static void tcmu_free_device(struct se_device *dev) udev->uio_info.uio_dev->minor); uio_unregister_device(&udev->uio_info); - kfree(udev->uio_info.name); - kfree(udev->name); } - call_rcu(&dev->rcu_head, tcmu_dev_call_rcu); + + /* release ref from init */ + kref_put(&udev->kref, tcmu_dev_kref_release); } enum { From 0037ae47812b1f431cc602100d1d51f37d77b61e Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Mon, 22 May 2017 16:05:22 +0200 Subject: [PATCH 062/206] dmaengine: ep93xx: Always start from BASE0 The current buffer is being reset to zero on device_free_chan_resources() but not on device_terminate_all(). It could happen that HW is restarted and expects BASE0 to be used, but the driver is not synchronized and will start from BASE1. One solution is to reset the buffer explicitly in m2p_hw_setup(). Signed-off-by: Alexander Sverdlin Cc: stable@vger.kernel.org Signed-off-by: Vinod Koul --- drivers/dma/ep93xx_dma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c index d37e8dda8079..deb009c3121f 100644 --- a/drivers/dma/ep93xx_dma.c +++ b/drivers/dma/ep93xx_dma.c @@ -323,6 +323,8 @@ static int m2p_hw_setup(struct ep93xx_dma_chan *edmac) | M2P_CONTROL_ENABLE; m2p_set_control(edmac, control); + edmac->buffer = 0; + return 0; } From 98f9de366fccee7572c646af226b2d4b4841e3b5 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Mon, 22 May 2017 16:05:23 +0200 Subject: [PATCH 063/206] dmaengine: ep93xx: Don't drain the transfers in terminate_all() Draining the transfers in terminate_all callback happens with IRQs disabled, therefore induces huge latency: irqsoff latency trace v1.1.5 on 4.11.0 -------------------------------------------------------------------- latency: 39770 us, #57/57, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0) ----------------- | task: process-129 (uid:0 nice:0 policy:2 rt_prio:50) ----------------- => started at: _snd_pcm_stream_lock_irqsave => ended at: snd_pcm_stream_unlock_irqrestore _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / delay cmd pid ||||| time | caller \ / ||||| \ | / process-129 0d.s. 3us : _snd_pcm_stream_lock_irqsave process-129 0d.s1 9us : snd_pcm_stream_lock <-_snd_pcm_stream_lock_irqsave process-129 0d.s1 15us : preempt_count_add <-snd_pcm_stream_lock process-129 0d.s2 22us : preempt_count_add <-snd_pcm_stream_lock process-129 0d.s3 32us : snd_pcm_update_hw_ptr0 <-snd_pcm_period_elapsed process-129 0d.s3 41us : soc_pcm_pointer <-snd_pcm_update_hw_ptr0 process-129 0d.s3 50us : dmaengine_pcm_pointer <-soc_pcm_pointer process-129 0d.s3 58us+: snd_dmaengine_pcm_pointer_no_residue <-dmaengine_pcm_pointer process-129 0d.s3 96us : update_audio_tstamp <-snd_pcm_update_hw_ptr0 process-129 0d.s3 103us : snd_pcm_update_state <-snd_pcm_update_hw_ptr0 process-129 0d.s3 112us : xrun <-snd_pcm_update_state process-129 0d.s3 119us : snd_pcm_stop <-xrun process-129 0d.s3 126us : snd_pcm_action <-snd_pcm_stop process-129 0d.s3 134us : snd_pcm_action_single <-snd_pcm_action process-129 0d.s3 141us : snd_pcm_pre_stop <-snd_pcm_action_single process-129 0d.s3 150us : snd_pcm_do_stop <-snd_pcm_action_single process-129 0d.s3 157us : soc_pcm_trigger <-snd_pcm_do_stop process-129 0d.s3 166us : snd_dmaengine_pcm_trigger <-soc_pcm_trigger process-129 0d.s3 175us : ep93xx_dma_terminate_all <-snd_dmaengine_pcm_trigger process-129 0d.s3 182us : preempt_count_add <-ep93xx_dma_terminate_all process-129 0d.s4 189us*: m2p_hw_shutdown <-ep93xx_dma_terminate_all process-129 0d.s4 39472us : m2p_hw_setup <-ep93xx_dma_terminate_all ... rest skipped... process-129 0d.s. 40080us : => ep93xx_dma_tasklet => tasklet_action => __do_softirq => irq_exit => __handle_domain_irq => vic_handle_irq => __irq_usr => 0xb66c6668 Just abort the transfers and warn if the HW state is not what we expect. Move draining into device_synchronize callback. Signed-off-by: Alexander Sverdlin Cc: stable@vger.kernel.org Signed-off-by: Vinod Koul --- drivers/dma/ep93xx_dma.c | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c index deb009c3121f..ec240592f5c8 100644 --- a/drivers/dma/ep93xx_dma.c +++ b/drivers/dma/ep93xx_dma.c @@ -201,6 +201,7 @@ struct ep93xx_dma_engine { struct dma_device dma_dev; bool m2m; int (*hw_setup)(struct ep93xx_dma_chan *); + void (*hw_synchronize)(struct ep93xx_dma_chan *); void (*hw_shutdown)(struct ep93xx_dma_chan *); void (*hw_submit)(struct ep93xx_dma_chan *); int (*hw_interrupt)(struct ep93xx_dma_chan *); @@ -333,21 +334,27 @@ static inline u32 m2p_channel_state(struct ep93xx_dma_chan *edmac) return (readl(edmac->regs + M2P_STATUS) >> 4) & 0x3; } -static void m2p_hw_shutdown(struct ep93xx_dma_chan *edmac) +static void m2p_hw_synchronize(struct ep93xx_dma_chan *edmac) { + unsigned long flags; u32 control; + spin_lock_irqsave(&edmac->lock, flags); control = readl(edmac->regs + M2P_CONTROL); control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT); m2p_set_control(edmac, control); + spin_unlock_irqrestore(&edmac->lock, flags); while (m2p_channel_state(edmac) >= M2P_STATE_ON) - cpu_relax(); + schedule(); +} +static void m2p_hw_shutdown(struct ep93xx_dma_chan *edmac) +{ m2p_set_control(edmac, 0); - while (m2p_channel_state(edmac) == M2P_STATE_STALL) - cpu_relax(); + while (m2p_channel_state(edmac) != M2P_STATE_IDLE) + dev_warn(chan2dev(edmac), "M2P: Not yet IDLE\n"); } static void m2p_fill_desc(struct ep93xx_dma_chan *edmac) @@ -1162,6 +1169,26 @@ ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr, return NULL; } +/** + * ep93xx_dma_synchronize - Synchronizes the termination of transfers to the + * current context. + * @chan: channel + * + * Synchronizes the DMA channel termination to the current context. When this + * function returns it is guaranteed that all transfers for previously issued + * descriptors have stopped and and it is safe to free the memory associated + * with them. Furthermore it is guaranteed that all complete callback functions + * for a previously submitted descriptor have finished running and it is safe to + * free resources accessed from within the complete callbacks. + */ +static void ep93xx_dma_synchronize(struct dma_chan *chan) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + + if (edmac->edma->hw_synchronize) + edmac->edma->hw_synchronize(edmac); +} + /** * ep93xx_dma_terminate_all - terminate all transactions * @chan: channel @@ -1325,6 +1352,7 @@ static int __init ep93xx_dma_probe(struct platform_device *pdev) dma_dev->device_prep_slave_sg = ep93xx_dma_prep_slave_sg; dma_dev->device_prep_dma_cyclic = ep93xx_dma_prep_dma_cyclic; dma_dev->device_config = ep93xx_dma_slave_config; + dma_dev->device_synchronize = ep93xx_dma_synchronize; dma_dev->device_terminate_all = ep93xx_dma_terminate_all; dma_dev->device_issue_pending = ep93xx_dma_issue_pending; dma_dev->device_tx_status = ep93xx_dma_tx_status; @@ -1342,6 +1370,7 @@ static int __init ep93xx_dma_probe(struct platform_device *pdev) } else { dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask); + edma->hw_synchronize = m2p_hw_synchronize; edma->hw_setup = m2p_hw_setup; edma->hw_shutdown = m2p_hw_shutdown; edma->hw_submit = m2p_hw_submit; From 0f0b9b63e14fc3f66e4d342df016c9b071c5abed Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 2 May 2017 13:14:13 +0200 Subject: [PATCH 064/206] gfs2: Make flush bios explicitely sync Commit b685d3d65ac7 "block: treat REQ_FUA and REQ_PREFLUSH as synchronous" removed REQ_SYNC flag from WRITE_{FUA|PREFLUSH|...} definitions. generic_make_request_checks() however strips REQ_FUA and REQ_PREFLUSH flags from a bio when the storage doesn't report volatile write cache and thus write effectively becomes asynchronous which can lead to performance regressions Fix the problem by making sure all bios which are synchronous are properly marked with REQ_SYNC. Fixes: b685d3d65ac791406e0dfd8779cc9b3707fea5a3 CC: Steven Whitehouse CC: cluster-devel@redhat.com CC: stable@vger.kernel.org Acked-by: Bob Peterson Signed-off-by: Jan Kara --- fs/gfs2/log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index f865b96374df..d2955daf17a4 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -659,7 +659,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) struct gfs2_log_header *lh; unsigned int tail; u32 hash; - int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META; + int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC; struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO); enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); lh = page_address(page); From d8747d642ec4ce96adf17ae35652a5e4015cfe02 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 2 May 2017 13:16:18 +0200 Subject: [PATCH 065/206] reiserfs: Make flush bios explicitely sync Commit b685d3d65ac7 "block: treat REQ_FUA and REQ_PREFLUSH as synchronous" removed REQ_SYNC flag from WRITE_{FUA|PREFLUSH|...} definitions. generic_make_request_checks() however strips REQ_FUA and REQ_PREFLUSH flags from a bio when the storage doesn't report volatile write cache and thus write effectively becomes asynchronous which can lead to performance regressions Fix the problem by making sure all bios which are synchronous are properly marked with REQ_SYNC. Fixes: b685d3d65ac791406e0dfd8779cc9b3707fea5a3 CC: reiserfs-devel@vger.kernel.org CC: stable@vger.kernel.org Signed-off-by: Jan Kara --- fs/reiserfs/journal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index da01f497180a..39bb1e838d8d 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1112,7 +1112,7 @@ static int flush_commit_list(struct super_block *s, depth = reiserfs_write_unlock_nested(s); if (reiserfs_barrier_flush(s)) __sync_dirty_buffer(jl->j_commit_bh, - REQ_PREFLUSH | REQ_FUA); + REQ_SYNC | REQ_PREFLUSH | REQ_FUA); else sync_dirty_buffer(jl->j_commit_bh); reiserfs_write_lock_nested(s, depth); @@ -1271,7 +1271,7 @@ static int _update_journal_header_block(struct super_block *sb, if (reiserfs_barrier_flush(sb)) __sync_dirty_buffer(journal->j_header_bh, - REQ_PREFLUSH | REQ_FUA); + REQ_SYNC | REQ_PREFLUSH | REQ_FUA); else sync_dirty_buffer(journal->j_header_bh); From 6d3b5d8d8dd1c14f991ccab84b40f8425f1ae91b Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 16 May 2017 12:00:15 -0400 Subject: [PATCH 066/206] NFS fix COMMIT after COPY Fix a typo in the commit e092693443b995c8e3a565a73b5fdb05f1260f9b "NFS append COMMIT after synchronous COPY" Reported-by: Eryu Guan Fixes: e092693443b ("NFS append COMMIT after synchronous COPY") Signed-off-by: Olga Kornievskaia Tested-by: Eryu Guan Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 929d09a5310a..319a47db218d 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -177,7 +177,7 @@ static ssize_t _nfs42_proc_copy(struct file *src, if (status) goto out; - if (!nfs_write_verifier_cmp(&res->write_res.verifier.verifier, + if (nfs_write_verifier_cmp(&res->write_res.verifier.verifier, &res->commit_res.verf->verifier)) { status = -EAGAIN; goto out; From 662f9a105b4322b8559d448f86110e6ec24b8738 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 20 May 2017 00:31:12 +0300 Subject: [PATCH 067/206] pNFS/flexfiles: missing error code in ff_layout_alloc_lseg() If xdr_inline_decode() fails then we end up returning ERR_PTR(0). The caller treats NULL returns as -ENOMEM so it doesn't really hurt runtime, but obviously we intended to set an error code here. Fixes: d67ae825a59d ("pnfs/flexfiles: Add the FlexFile Layout Driver") Signed-off-by: Dan Carpenter Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index f5714ee01000..23542dc44a25 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -454,6 +454,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, goto out_err_free; /* fh */ + rc = -EIO; p = xdr_inline_decode(&stream, 4); if (!p) goto out_err_free; From d2c23c0075d7091bf749411bd2ee757cf4ec356c Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Mon, 22 May 2017 22:18:28 +0200 Subject: [PATCH 068/206] xprtrdma: Delete an error message for a failed memory allocation in xprt_rdma_bc_setup() Omit an extra message for a memory allocation failure in this function. This issue was detected by using the Coccinelle software. Link: http://events.linuxfoundation.org/sites/events/files/slides/LCJ16-Refactor_Strings-WSang_0.pdf Signed-off-by: Markus Elfring Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/backchannel.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 24fedd4b117e..03f6b5840764 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -119,11 +119,9 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) for (i = 0; i < (reqs << 1); i++) { rqst = kzalloc(sizeof(*rqst), GFP_KERNEL); - if (!rqst) { - pr_err("RPC: %s: Failed to create bc rpc_rqst\n", - __func__); + if (!rqst) goto out_free; - } + dprintk("RPC: %s: new rqst %p\n", __func__, rqst); rqst->rq_xprt = &r_xprt->rx_xprt; From 08cb5b0f058a325fcb5305e33f572ff6d6dfa289 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Mon, 22 May 2017 20:20:23 -0400 Subject: [PATCH 069/206] pnfs: Fix the check for requests in range of layout segment It's possible and acceptable for NFS to attempt to add requests beyond the range of the current pgio->pg_lseg, a case which should be caught and limited by the pg_test operation. However, the current handling of this case replaces pgio->pg_lseg with a new layout segment (after a WARN) within that pg_test operation. That will cause all the previously added requests to be submitted with this new layout segment, which may not be valid for those requests. Fix this problem by only returning zero for the number of bytes to coalesce from pg_test for this case which allows any previously added requests to complete on the current layout segment. The check for requests starting out of range of the layout segment moves to pg_init, so that the replacement of pgio->pg_lseg will be done when the next request is added. Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 25 +++++++++++++++++-------- fs/nfs/pnfs.h | 10 ++++++++++ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index adc6ec28d4b5..c383d0913b54 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2094,12 +2094,26 @@ pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio) } EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_layout); +/* + * Check for any intersection between the request and the pgio->pg_lseg, + * and if none, put this pgio->pg_lseg away. + */ +static void +pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + if (pgio->pg_lseg && !pnfs_lseg_request_intersecting(pgio->pg_lseg, req)) { + pnfs_put_lseg(pgio->pg_lseg); + pgio->pg_lseg = NULL; + } +} + void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { u64 rd_size = req->wb_bytes; pnfs_generic_pg_check_layout(pgio); + pnfs_generic_pg_check_range(pgio, req); if (pgio->pg_lseg == NULL) { if (pgio->pg_dreq == NULL) rd_size = i_size_read(pgio->pg_inode) - req_offset(req); @@ -2131,6 +2145,7 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req, u64 wb_size) { pnfs_generic_pg_check_layout(pgio); + pnfs_generic_pg_check_range(pgio, req); if (pgio->pg_lseg == NULL) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, @@ -2191,16 +2206,10 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, seg_end = pnfs_end_offset(pgio->pg_lseg->pls_range.offset, pgio->pg_lseg->pls_range.length); req_start = req_offset(req); - WARN_ON_ONCE(req_start >= seg_end); + /* start of request is past the last byte of this segment */ - if (req_start >= seg_end) { - /* reference the new lseg */ - if (pgio->pg_ops->pg_cleanup) - pgio->pg_ops->pg_cleanup(pgio); - if (pgio->pg_ops->pg_init) - pgio->pg_ops->pg_init(pgio, req); + if (req_start >= seg_end) return 0; - } /* adjust 'size' iff there are fewer bytes left in the * segment than what nfs_generic_pg_test returned */ diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 2d05b756a8d6..99731e3e332f 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -593,6 +593,16 @@ pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1, return pnfs_is_range_intersecting(l1->offset, end1, l2->offset, end2); } +static inline bool +pnfs_lseg_request_intersecting(struct pnfs_layout_segment *lseg, struct nfs_page *req) +{ + u64 seg_last = pnfs_end_offset(lseg->pls_range.offset, lseg->pls_range.length); + u64 req_last = req_offset(req) + req->wb_bytes; + + return pnfs_is_range_intersecting(lseg->pls_range.offset, seg_last, + req_offset(req), req_last); +} + extern unsigned int layoutstats_timer; #ifdef NFS_DEBUG From b49c15f97c936ef5a536821f97e4dd8568369802 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 24 May 2017 07:55:44 -0400 Subject: [PATCH 070/206] NFSv4.0: Fix a lock leak in nfs40_walk_client_list Xiaolong Ye's kernel test robot detected the following Oops: [ 299.158991] BUG: scheduling while atomic: mount.nfs/9387/0x00000002 [ 299.169587] 2 locks held by mount.nfs/9387: [ 299.176165] #0: (nfs_clid_init_mutex){......}, at: [] nfs4_discover_server_trunking+0x47/0x1fc [ 299.201802] #1: (&(&nn->nfs_client_lock)->rlock){......}, at: [] nfs40_walk_client_list+0x2e9/0x338 [ 299.221979] CPU: 0 PID: 9387 Comm: mount.nfs Not tainted 4.11.0-rc7-00021-g14d1bbb #45 [ 299.235584] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-20161025_171302-gandalf 04/01/2014 [ 299.251176] Call Trace: [ 299.255192] dump_stack+0x61/0x7e [ 299.260416] __schedule_bug+0x65/0x74 [ 299.266208] __schedule+0x5d/0x87c [ 299.271883] schedule+0x89/0x9a [ 299.276937] schedule_timeout+0x232/0x289 [ 299.283223] ? detach_if_pending+0x10b/0x10b [ 299.289935] schedule_timeout_uninterruptible+0x2a/0x2c [ 299.298266] ? put_rpccred+0x3e/0x115 [ 299.304327] ? schedule_timeout_uninterruptible+0x2a/0x2c [ 299.312851] msleep+0x1e/0x22 [ 299.317612] nfs4_discover_server_trunking+0x102/0x1fc [ 299.325644] nfs4_init_client+0x13f/0x194 It looks as if we recently added a spin_lock() leak to nfs40_walk_client_list() when cleaning up the code. Reported-by: kernel test robot Fixes: 14d1bbb0ca42 ("NFS: Create a common nfs4_match_client() function") Cc: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4client.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 692a7a8bfc7a..66776f022111 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -582,7 +582,6 @@ int nfs40_walk_client_list(struct nfs_client *new, */ nfs4_schedule_path_down_recovery(pos); default: - spin_lock(&nn->nfs_client_lock); goto out; } From d38162e4b5c643733792f32be4ea107c831827b4 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 17 May 2017 15:15:57 +0200 Subject: [PATCH 071/206] Revert "drm/i915: Restore lost "Initialized i915" welcome message" This reverts commit bc5ca47c0af4f949ba889e666b7da65569e36093. Gabriel put this back into generic code with commit 75f6dfe3e652e1adef8cc1b073c89f3e22103a8f Author: Gabriel Krisman Bertazi Date: Wed Dec 28 12:32:11 2016 -0200 drm: Deduplicate driver initialization message but somehow he missed Chris' patch to add the message meanwhile. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101025 Fixes: 75f6dfe3e652 ("drm: Deduplicate driver initialization message") Cc: Gabriel Krisman Bertazi Cc: Daniel Vetter Cc: Jani Nikula Cc: Chris Wilson Cc: # v4.11+ Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170517131557.7836-1-daniel.vetter@ffwll.ch (cherry picked from commit 6bdba81979b2c3c8fed0be62ca31c32c3129d85f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 3036d4835b0f..c994fe6e65b2 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1272,10 +1272,6 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) dev_priv->ipc_enabled = false; - /* Everything is in place, we can now relax! */ - DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n", - driver.name, driver.major, driver.minor, driver.patchlevel, - driver.date, pci_name(pdev), dev_priv->drm.primary->index); if (IS_ENABLED(CONFIG_DRM_I915_DEBUG)) DRM_INFO("DRM_I915_DEBUG enabled\n"); if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) From 2e0bb5b38f32eb21fe25e845721a205370adeffe Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 23 May 2017 20:44:12 +0100 Subject: [PATCH 072/206] drm/i915/selftests: Silence compiler warning in igt_ctx_exec The compiler doesn't always spot the guard that object is allocated on the first pass, leading to: drivers/gpu/drm/i915/selftests/i915_gem_context.c: warning: 'obj' may be used uninitialized in this function [-Wuninitialized]: => 370:8 v2: Make it more obvious by setting obj to NULL on the first pass and any later pass where we need to reallocate. Reported-by: Geert Uytterhoeven Fixes: 791ff39ae32a ("drm/i915: Live testing for context execution") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Matthew Auld c: # v4.12-rc1+ Link: http://patchwork.freedesktop.org/patch/msgid/20170523194412.1195-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin (cherry picked from commit ca83d5840cb641b2efb04db0b70fa56955dd1453) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 1afb8b06e3e1..12b85b3278cd 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -320,7 +320,7 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj) static int igt_ctx_exec(void *arg) { struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; + struct drm_i915_gem_object *obj = NULL; struct drm_file *file; IGT_TIMEOUT(end_time); LIST_HEAD(objects); @@ -359,7 +359,7 @@ static int igt_ctx_exec(void *arg) } for_each_engine(engine, i915, id) { - if (dw == 0) { + if (!obj) { obj = create_test_object(ctx, file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); @@ -376,8 +376,10 @@ static int igt_ctx_exec(void *arg) goto out_unlock; } - if (++dw == max_dwords(obj)) + if (++dw == max_dwords(obj)) { + obj = NULL; dw = 0; + } ndwords++; } ncontexts++; From 9dfa7bba35ac08a63565d58c454dccb7e1bb0a08 Mon Sep 17 00:00:00 2001 From: Michael Schmitz Date: Sun, 30 Apr 2017 19:49:21 +1200 Subject: [PATCH 073/206] fix race in drivers/char/random.c:get_reg() get_reg() can be reentered on architectures with prioritized interrupts (m68k in this case), causing f->reg_index to be incremented after the range check. Out of bounds memory access past the pt_regs struct results. This will go mostly undetected unless access is beyond end of memory. Prevent the race by disabling interrupts in get_reg(). Tested on m68k (Atari Falcon, and ARAnyM emulator). Kudos to Geert Uytterhoeven for helping to trace this race. Signed-off-by: Michael Schmitz Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 0ab024918907..a561f0c2f428 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1097,12 +1097,16 @@ static void add_interrupt_bench(cycles_t start) static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs) { __u32 *ptr = (__u32 *) regs; + unsigned long flags; if (regs == NULL) return 0; + local_irq_save(flags); if (f->reg_idx >= sizeof(struct pt_regs) / sizeof(__u32)) f->reg_idx = 0; - return *(ptr + f->reg_idx++); + ptr += f->reg_idx++; + local_irq_restore(flags); + return *ptr; } void add_interrupt_randomness(int irq, int irq_flags) From 4179bc30b2fe85f827d93e0ae7ae8f49ad3afc02 Mon Sep 17 00:00:00 2001 From: Kyungchan Koh Date: Wed, 24 May 2017 10:16:27 -0700 Subject: [PATCH 074/206] md: uuid debug statement now in processor byte order. Previously, the uuid debug statements were printed in little-endian format, which wasn't consistent in machines that might not be in little-endian byte order. With this change, the output will be consistent for all machines with different byte-ordering. Signed-off-by: Kyungchan Koh Signed-off-by: Shaohua Li --- drivers/md/bitmap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index bf7419a56454..f4eace5ea184 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -485,10 +485,10 @@ void bitmap_print_sb(struct bitmap *bitmap) pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic)); pr_debug(" version: %d\n", le32_to_cpu(sb->version)); pr_debug(" uuid: %08x.%08x.%08x.%08x\n", - *(__u32 *)(sb->uuid+0), - *(__u32 *)(sb->uuid+4), - *(__u32 *)(sb->uuid+8), - *(__u32 *)(sb->uuid+12)); + le32_to_cpu(*(__u32 *)(sb->uuid+0)), + le32_to_cpu(*(__u32 *)(sb->uuid+4)), + le32_to_cpu(*(__u32 *)(sb->uuid+8)), + le32_to_cpu(*(__u32 *)(sb->uuid+12))); pr_debug(" events: %llu\n", (unsigned long long) le64_to_cpu(sb->events)); pr_debug("events cleared: %llu\n", From e153903686deac9ea8c499b49516126f1a844fdb Mon Sep 17 00:00:00 2001 From: Nix Date: Tue, 16 May 2017 10:13:31 +0100 Subject: [PATCH 075/206] md: report sector of stripes with check mismatches This makes it possible, with appropriate filesystem support, for a sysadmin to tell what is affected by the mismatch, and whether it should be ignored (if it's inside a swap partition, for instance). We ratelimit to prevent log flooding: if there are so many mismatches that ratelimiting is necessary, the individual messages are relatively unlikely to be important (either the machine is swapping like crazy or something is very wrong with the disk). Signed-off-by: Nick Alcock Signed-off-by: Shaohua Li --- drivers/md/raid5.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 9c4f7659f8b1..722064689e82 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4085,10 +4085,15 @@ static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh, set_bit(STRIPE_INSYNC, &sh->state); else { atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches); - if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) + if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) { /* don't try to repair!! */ set_bit(STRIPE_INSYNC, &sh->state); - else { + pr_warn_ratelimited("%s: mismatch sector in range " + "%llu-%llu\n", mdname(conf->mddev), + (unsigned long long) sh->sector, + (unsigned long long) sh->sector + + STRIPE_SECTORS); + } else { sh->check_state = check_state_compute_run; set_bit(STRIPE_COMPUTE_RUN, &sh->state); set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); @@ -4237,10 +4242,15 @@ static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh, } } else { atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches); - if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) + if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) { /* don't try to repair!! */ set_bit(STRIPE_INSYNC, &sh->state); - else { + pr_warn_ratelimited("%s: mismatch sector in range " + "%llu-%llu\n", mdname(conf->mddev), + (unsigned long long) sh->sector, + (unsigned long long) sh->sector + + STRIPE_SECTORS); + } else { int *target = &sh->ops.target; sh->ops.target = -1; From 717902cc93118119a6fce7765da6cf2786987418 Mon Sep 17 00:00:00 2001 From: Timmy Li Date: Mon, 22 May 2017 16:48:28 +0100 Subject: [PATCH 076/206] ARM64: PCI: Fix struct acpi_pci_root_ops allocation failure path Commit 093d24a20442 ("arm64: PCI: Manage controller-specific data on per-controller basis") added code to allocate ACPI PCI root_ops dynamically on a per host bridge basis but failed to update the corresponding memory allocation failure path in pci_acpi_scan_root() leading to a potential memory leakage. Fix it by adding the required kfree call. Fixes: 093d24a20442 ("arm64: PCI: Manage controller-specific data on per-controller basis") Reviewed-by: Tomasz Nowicki Signed-off-by: Timmy Li [lorenzo.pieralisi@arm.com: refactored code, rewrote commit log] Signed-off-by: Lorenzo Pieralisi CC: Will Deacon CC: Bjorn Helgaas Signed-off-by: Catalin Marinas --- arch/arm64/kernel/pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index 4f0e3ebfea4b..c7e3e6387a49 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -191,8 +191,10 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) return NULL; root_ops = kzalloc_node(sizeof(*root_ops), GFP_KERNEL, node); - if (!root_ops) + if (!root_ops) { + kfree(ri); return NULL; + } ri->cfg = pci_acpi_setup_ecam_mapping(root); if (!ri->cfg) { From 9bd9590997b92fbd79fd028f704f6c584b4439d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 27 Apr 2017 19:02:22 +0300 Subject: [PATCH 077/206] drm/i915: Stop pretending to mask/unmask LPE audio interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vlv_display_irq_postinstall() enables the LPE audio interrupts regardless of whether the LPE audio irq chip has masked/unmasked them. Also the irqchip masking/unmasking doesn't consider the state of the display power well or the device, and hence just leads to dmesg spew when it tries to access the hardware while it's powered down. If the current way works, then we don't need to do anything in the mask/unmask hooks. If it doesn't work, well, then we'd need to properly track whether the irqchip has masked/unmasked the interrupts when we enable display interrupts. And the mask/unmask hooks would need to check whether display interrupts are even enabled before frobbing with he registers. So let's just assume the current way works and neuter the mask/unmask hooks. Also clean up vlv_display_irq_postinstall() a bit and stop it from trying to unmask/enable the LPE C interrupt on VLV since it doesn't exist. Cc: Takashi Iwai Cc: Pierre-Louis Bossart Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170427160231.13337-4-ville.syrjala@linux.intel.com Reviewed-by: Takashi Iwai (cherry picked from commit ebf5f921478b9b55ed4e634b994571dd23a8fca3) Reference: http://mid.mail-archive.com/874cf6d3-4e45-d4cf-e662-eb972490d2ce@redhat.com Tested-by: Hans de Goede Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_irq.c | 15 +++++------ drivers/gpu/drm/i915/intel_lpe_audio.c | 36 -------------------------- 2 files changed, 6 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index fd97fe00cd0d..190f6aa5d15e 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2953,7 +2953,6 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv) u32 pipestat_mask; u32 enable_mask; enum pipe pipe; - u32 val; pipestat_mask = PLANE_FLIP_DONE_INT_STATUS_VLV | PIPE_CRC_DONE_INTERRUPT_STATUS; @@ -2964,18 +2963,16 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv) enable_mask = I915_DISPLAY_PORT_INTERRUPT | I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | - I915_DISPLAY_PIPE_B_EVENT_INTERRUPT; + I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | + I915_LPE_PIPE_A_INTERRUPT | + I915_LPE_PIPE_B_INTERRUPT; + if (IS_CHERRYVIEW(dev_priv)) - enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT; + enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT | + I915_LPE_PIPE_C_INTERRUPT; WARN_ON(dev_priv->irq_mask != ~0); - val = (I915_LPE_PIPE_A_INTERRUPT | - I915_LPE_PIPE_B_INTERRUPT | - I915_LPE_PIPE_C_INTERRUPT); - - enable_mask |= val; - dev_priv->irq_mask = ~enable_mask; GEN5_IRQ_INIT(VLV_, dev_priv->irq_mask, enable_mask); diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index 668f00480d97..292fedf30b00 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -149,44 +149,10 @@ static void lpe_audio_platdev_destroy(struct drm_i915_private *dev_priv) static void lpe_audio_irq_unmask(struct irq_data *d) { - struct drm_i915_private *dev_priv = d->chip_data; - unsigned long irqflags; - u32 val = (I915_LPE_PIPE_A_INTERRUPT | - I915_LPE_PIPE_B_INTERRUPT); - - if (IS_CHERRYVIEW(dev_priv)) - val |= I915_LPE_PIPE_C_INTERRUPT; - - spin_lock_irqsave(&dev_priv->irq_lock, irqflags); - - dev_priv->irq_mask &= ~val; - I915_WRITE(VLV_IIR, val); - I915_WRITE(VLV_IIR, val); - I915_WRITE(VLV_IMR, dev_priv->irq_mask); - POSTING_READ(VLV_IMR); - - spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } static void lpe_audio_irq_mask(struct irq_data *d) { - struct drm_i915_private *dev_priv = d->chip_data; - unsigned long irqflags; - u32 val = (I915_LPE_PIPE_A_INTERRUPT | - I915_LPE_PIPE_B_INTERRUPT); - - if (IS_CHERRYVIEW(dev_priv)) - val |= I915_LPE_PIPE_C_INTERRUPT; - - spin_lock_irqsave(&dev_priv->irq_lock, irqflags); - - dev_priv->irq_mask |= val; - I915_WRITE(VLV_IMR, dev_priv->irq_mask); - I915_WRITE(VLV_IIR, val); - I915_WRITE(VLV_IIR, val); - POSTING_READ(VLV_IIR); - - spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } static struct irq_chip lpe_audio_irqchip = { @@ -330,8 +296,6 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv) desc = irq_to_desc(dev_priv->lpe_audio.irq); - lpe_audio_irq_mask(&desc->irq_data); - lpe_audio_platdev_destroy(dev_priv); irq_free_desc(dev_priv->lpe_audio.irq); From 5acc1ca4fb15f00bfa3d4046e35ca381bc25d580 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sat, 20 May 2017 20:32:32 -0700 Subject: [PATCH 078/206] KVM: X86: Fix preempt the preemption timer cancel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Preemption can occur during cancel preemption timer, and there will be inconsistent status in lapic, vmx and vmcs field. CPU0 CPU1 preemption timer vmexit handle_preemption_timer(vCPU0) kvm_lapic_expired_hv_timer vmx_cancel_hv_timer vmx->hv_deadline_tsc = -1 vmcs_clear_bits /* hv_timer_in_use still true */ sched_out sched_in kvm_arch_vcpu_load vmx_set_hv_timer write vmx->hv_deadline_tsc vmcs_set_bits /* back in kvm_lapic_expired_hv_timer */ hv_timer_in_use = false ... vmx_vcpu_run vmx_arm_hv_run write preemption timer deadline spurious preemption timer vmexit handle_preemption_timer(vCPU0) kvm_lapic_expired_hv_timer WARN_ON(!apic->lapic_timer.hv_timer_in_use); This can be reproduced sporadically during boot of L2 on a preemptible L1, causing a splat on L1. WARNING: CPU: 3 PID: 1952 at arch/x86/kvm/lapic.c:1529 kvm_lapic_expired_hv_timer+0xb5/0xd0 [kvm] CPU: 3 PID: 1952 Comm: qemu-system-x86 Not tainted 4.12.0-rc1+ #24 RIP: 0010:kvm_lapic_expired_hv_timer+0xb5/0xd0 [kvm] Call Trace: handle_preemption_timer+0xe/0x20 [kvm_intel] vmx_handle_exit+0xc9/0x15f0 [kvm_intel] ? lock_acquire+0xdb/0x250 ? lock_acquire+0xdb/0x250 ? kvm_arch_vcpu_ioctl_run+0xdf3/0x1ce0 [kvm] kvm_arch_vcpu_ioctl_run+0xe55/0x1ce0 [kvm] kvm_vcpu_ioctl+0x384/0x7b0 [kvm] ? kvm_vcpu_ioctl+0x384/0x7b0 [kvm] ? __fget+0xf3/0x210 do_vfs_ioctl+0xa4/0x700 ? __fget+0x114/0x210 SyS_ioctl+0x79/0x90 do_syscall_64+0x8f/0x750 ? trace_hardirqs_on_thunk+0x1a/0x1c entry_SYSCALL64_slow_path+0x25/0x25 This patch fixes it by disabling preemption while cancelling preemption timer. This way cancel_hv_timer is atomic with respect to kvm_arch_vcpu_load. Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index c329d2894905..6e6f345adfe6 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1495,8 +1495,10 @@ EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use); static void cancel_hv_timer(struct kvm_lapic *apic) { + preempt_disable(); kvm_x86_ops->cancel_hv_timer(apic->vcpu); apic->lapic_timer.hv_timer_in_use = false; + preempt_enable(); } static bool start_hv_timer(struct kvm_lapic *apic) From e1d39b17e044e8ae819827810d87d809ba5f58c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= Date: Sat, 20 May 2017 13:22:56 +0200 Subject: [PATCH 079/206] KVM: nVMX: Fix handling of lmsw instruction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The decision whether or not to exit from L2 to L1 on an lmsw instruction is based on bogus values: instead of using the information encoded within the exit qualification, it uses the data also used for the mov-to-cr instruction, which boils down to using whatever is in %eax at that point. Use the correct values instead. Without this fix, an L1 may not get notified when a 32-bit Linux L2 switches its secondary CPUs to protected mode; the L1 is only notified on the next modification of CR0. This short time window poses a problem, when there is some other reason to exit to L1 in between. Then, L2 will be resumed in real mode and chaos ensues. Signed-off-by: Jan H. Schönherr Reviewed-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 72f78396bc09..880f371705bc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7913,11 +7913,13 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, { unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); int cr = exit_qualification & 15; - int reg = (exit_qualification >> 8) & 15; - unsigned long val = kvm_register_readl(vcpu, reg); + int reg; + unsigned long val; switch ((exit_qualification >> 4) & 3) { case 0: /* mov to cr */ + reg = (exit_qualification >> 8) & 15; + val = kvm_register_readl(vcpu, reg); switch (cr) { case 0: if (vmcs12->cr0_guest_host_mask & @@ -7972,6 +7974,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, * lmsw can change bits 1..3 of cr0, and only set bit 0 of * cr0. Other attempted changes are ignored, with no exit. */ + val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; if (vmcs12->cr0_guest_host_mask & 0xe & (val ^ vmcs12->cr0_read_shadow)) return true; From 52b5419016997f2960e9c8b6584c4acb3875d126 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= Date: Sat, 20 May 2017 13:24:32 +0200 Subject: [PATCH 080/206] KVM: x86: Fix virtual wire mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Intel SDM says, that at most one LAPIC should be configured with ExtINT delivery. KVM configures all LAPICs this way. This causes pic_unlock() to kick the first available vCPU from the internal KVM data structures. If this vCPU is not the BSP, but some not-yet-booted AP, the BSP may never realize that there is an interrupt. Fix that by enabling ExtINT delivery only for the BSP. This allows booting a Linux guest without a TSC in the above situation. Otherwise the BSP gets stuck in calibrate_delay_converge(). Signed-off-by: Jan H. Schönherr Reviewed-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 6e6f345adfe6..d24c8742d9b0 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1936,7 +1936,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) for (i = 0; i < KVM_APIC_LVT_NUM; i++) kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); apic_update_lvtt(apic); - if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED)) + if (kvm_vcpu_is_reset_bsp(vcpu) && + kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED)) kvm_lapic_set_reg(apic, APIC_LVT0, SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); From 5720acf4bfc142ba568d5b6782fceaf62ed15e0b Mon Sep 17 00:00:00 2001 From: Miroslav Benes Date: Fri, 26 May 2017 14:45:21 +0200 Subject: [PATCH 081/206] livepatch: Make livepatch dependent on !TRIM_UNUSED_KSYMS If TRIM_UNUSED_KSYMS is enabled, all unneeded exported symbols are made unexported. Two-pass build of the kernel is done to find out which symbols are needed based on a configuration. This effectively complicates things for out-of-tree modules. Livepatch exports functions to (un)register and enable/disable a live patch. The only in-tree module which uses these functions is a sample in samples/livepatch/. If the sample is disabled, the functions are trimmed and out-of-tree live patches cannot be built. Note that live patches are intended to be built out-of-tree. Suggested-by: Michal Marek Acked-by: Josh Poimboeuf Acked-by: Jessica Yu Signed-off-by: Miroslav Benes Signed-off-by: Jiri Kosina --- kernel/livepatch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/livepatch/Kconfig b/kernel/livepatch/Kconfig index 045022557936..ec4565122e65 100644 --- a/kernel/livepatch/Kconfig +++ b/kernel/livepatch/Kconfig @@ -10,6 +10,7 @@ config LIVEPATCH depends on SYSFS depends on KALLSYMS_ALL depends on HAVE_LIVEPATCH + depends on !TRIM_UNUSED_KSYMS help Say Y here if you want to support kernel live patching. This option has no runtime impact until a kernel "patch" From ac20fa0a96c32ff40c8a127dfd58140f1df44a9b Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 2 May 2017 12:39:53 -0400 Subject: [PATCH 082/206] drm/msm: select PM_OPP Otherwise, if nothing else enabled selects it, dev_pm_opp_of_add_table() will return -ENOTSUPP. Fixes: e2af8b6 ("drm/msm: gpu: Use OPP tables if we can") Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index 5b8e23d051f2..0a31cd6d01ce 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -13,6 +13,7 @@ config DRM_MSM select QCOM_SCM select SND_SOC_HDMI_CODEC if SND_SOC select SYNC_FILE + select PM_OPP default y help DRM/KMS driver for MSM/snapdragon. From 786813c343cb619d23cb0990e152e350b826d810 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 3 May 2017 10:04:48 -0400 Subject: [PATCH 083/206] drm/msm/mdp5: use __drm_atomic_helper_plane_duplicate_state() Somehow the helper was never retrofitted for mdp5. Which meant when plane_state->fence was added, it could get copied into new state in mdp5_plane_duplicate_state(). If an update to disable the plane (for example on rmfb) managed to sneak in after an nonblock update had swapped state, but before it was committed, we'd get a splat: WARNING: CPU: 1 PID: 69 at ../drivers/gpu/drm/drm_atomic_helper.c:1061 drm_atomic_helper_wait_for_fences+0xe0/0xf8 Modules linked in: CPU: 1 PID: 69 Comm: kworker/1:1 Tainted: G W 4.11.0-rc8+ #1187 Hardware name: Qualcomm Technologies, Inc. APQ 8016 SBC (DT) Workqueue: events drm_mode_rmfb_work_fn task: ffffffc036560d00 task.stack: ffffffc036550000 PC is at drm_atomic_helper_wait_for_fences+0xe0/0xf8 LR is at complete_commit.isra.1+0x44/0x1c0 pc : [] lr : [] pstate: 20000145 sp : ffffffc036553b60 x29: ffffffc036553b60 x28: ffffffc0264e6a00 x27: ffffffc035659000 x26: 0000000000000000 x25: ffffffc0240e8000 x24: 0000000000000038 x23: 0000000000000000 x22: ffffff800858f200 x21: ffffffc0240e8000 x20: ffffffc02f56a800 x19: 0000000000000000 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: ffffffc00a192700 x13: 0000000000000004 x12: 0000000000000000 x11: ffffff80089a1690 x10: 00000000000008f0 x9 : ffffffc036553b20 x8 : ffffffc036561650 x7 : ffffffc03fe6cb40 x6 : 0000000000000000 x5 : 0000000000000001 x4 : 0000000000000002 x3 : ffffffc035659000 x2 : ffffffc0240e8c80 x1 : 0000000000000000 x0 : ffffffc02adbe588 ---[ end trace 13aeec77c3fb55e2 ]--- Call trace: Exception stack(0xffffffc036553990 to 0xffffffc036553ac0) 3980: 0000000000000000 0000008000000000 39a0: ffffffc036553b60 ffffff80084f6040 0000000000004ff0 0000000000000038 39c0: ffffffc0365539d0 ffffff800857e098 ffffffc036553a00 ffffff800857e1b0 39e0: ffffffc036553a10 ffffff800857c554 ffffffc0365e8400 ffffffc0365e8400 3a00: ffffffc036553a20 ffffff8008103358 000000000001aad7 ffffff800851b72c 3a20: ffffffc036553a50 ffffff80080e9228 ffffffc02adbe588 0000000000000000 3a40: ffffffc0240e8c80 ffffffc035659000 0000000000000002 0000000000000001 3a60: 0000000000000000 ffffffc03fe6cb40 ffffffc036561650 ffffffc036553b20 3a80: 00000000000008f0 ffffff80089a1690 0000000000000000 0000000000000004 3aa0: ffffffc00a192700 0000000000000000 0000000000000000 0000000000000000 [] drm_atomic_helper_wait_for_fences+0xe0/0xf8 [] complete_commit.isra.1+0x44/0x1c0 [] msm_atomic_commit+0x32c/0x350 [] drm_atomic_commit+0x50/0x60 [] drm_atomic_remove_fb+0x158/0x250 [] drm_framebuffer_remove+0x50/0x158 [] drm_mode_rmfb_work_fn+0x40/0x58 [] process_one_work+0x1d0/0x378 [] worker_thread+0x244/0x488 [] kthread+0xfc/0x128 [] ret_from_fork+0x10/0x50 Fixes: 9626014 ("drm/fence: add in-fences support") Cc: stable@vger.kernel.org Reviewed-by: Daniel Vetter Reported-by: Stanimir Varbanov Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c index a38c5fe6cc19..30b4691f7b0d 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c @@ -225,9 +225,10 @@ mdp5_plane_duplicate_state(struct drm_plane *plane) mdp5_state = kmemdup(to_mdp5_plane_state(plane->state), sizeof(*mdp5_state), GFP_KERNEL); + if (!mdp5_state) + return NULL; - if (mdp5_state && mdp5_state->base.fb) - drm_framebuffer_reference(mdp5_state->base.fb); + __drm_atomic_helper_plane_duplicate_state(plane, &mdp5_state->base); return &mdp5_state->base; } From 134ccada7ac59156761ce05afd1c0b1d02ebd928 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 3 May 2017 10:43:14 -0400 Subject: [PATCH 084/206] drm/msm/gpu: check legacy clk names in get_clocks() Otherwise if someone was using old bindings with "core_clk" instead of "core" as the clock name, we'd never find it and gpu would be stuck at 27MHz (or whatever it's slowest rate is). Fixes: 98db803 ("msm/drm: gpu: Dynamically locate the clocks from the device tree") Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 97b9c38c6b3f..0fdc88d79ca8 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -549,9 +549,9 @@ static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu) gpu->grp_clks[i] = get_clock(dev, name); /* Remember the key clocks that we need to control later */ - if (!strcmp(name, "core")) + if (!strcmp(name, "core") || !strcmp(name, "core_clk")) gpu->core_clk = gpu->grp_clks[i]; - else if (!strcmp(name, "rbbmtimer")) + else if (!strcmp(name, "rbbmtimer") || !strcmp(name, "rbbmtimer_clk")) gpu->rbbmtimer_clk = gpu->grp_clks[i]; ++i; From 43523eba79bda8f5b4c27f8ffe20ea078d20113a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 12 Apr 2017 12:11:58 -0700 Subject: [PATCH 085/206] drm/msm: Expose our reservation object when exporting a dmabuf. Without this, polling on the dma-buf (and presumably other devices synchronizing against our rendering) would return immediately, even while the BO was busy. Signed-off-by: Eric Anholt Reviewed-by: Daniel Vetter Cc: stable@vger.kernel.org Cc: Rob Clark Cc: linux-arm-msm@vger.kernel.org Cc: freedreno@lists.freedesktop.org Reviewed-by: Rob Clark Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_drv.c | 1 + drivers/gpu/drm/msm/msm_drv.h | 1 + drivers/gpu/drm/msm/msm_gem_prime.c | 7 +++++++ 3 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 87b5695d4034..9d498eb81906 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -830,6 +830,7 @@ static struct drm_driver msm_driver = { .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_export = drm_gem_prime_export, .gem_prime_import = drm_gem_prime_import, + .gem_prime_res_obj = msm_gem_prime_res_obj, .gem_prime_pin = msm_gem_prime_pin, .gem_prime_unpin = msm_gem_prime_unpin, .gem_prime_get_sg_table = msm_gem_prime_get_sg_table, diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 28b6f9ba5066..1b26ca626528 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -224,6 +224,7 @@ struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj); void *msm_gem_prime_vmap(struct drm_gem_object *obj); void msm_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); int msm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); +struct reservation_object *msm_gem_prime_res_obj(struct drm_gem_object *obj); struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sg); int msm_gem_prime_pin(struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c index 60bb290700ce..13403c6da6c7 100644 --- a/drivers/gpu/drm/msm/msm_gem_prime.c +++ b/drivers/gpu/drm/msm/msm_gem_prime.c @@ -70,3 +70,10 @@ void msm_gem_prime_unpin(struct drm_gem_object *obj) if (!obj->import_attach) msm_gem_put_pages(obj); } + +struct reservation_object *msm_gem_prime_res_obj(struct drm_gem_object *obj) +{ + struct msm_gem_object *msm_obj = to_msm_bo(obj); + + return msm_obj->resv; +} From 3c30cc41a87880db86c343b8c9cc8bc2d3d36055 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 12 Apr 2017 12:12:00 -0700 Subject: [PATCH 086/206] drm/msm: Reuse dma_fence_release. If we follow the typical pattern of the base class being the first member, we can use the default dma_fence_free function. Signed-off-by: Eric Anholt Cc: Rob Clark Cc: linux-arm-msm@vger.kernel.org Cc: freedreno@lists.freedesktop.org Reviewed-by: Rob Clark Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_fence.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c index 3f299c537b77..a2f89bac9c16 100644 --- a/drivers/gpu/drm/msm/msm_fence.c +++ b/drivers/gpu/drm/msm/msm_fence.c @@ -99,8 +99,8 @@ void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence) } struct msm_fence { - struct msm_fence_context *fctx; struct dma_fence base; + struct msm_fence_context *fctx; }; static inline struct msm_fence *to_msm_fence(struct dma_fence *fence) @@ -130,19 +130,13 @@ static bool msm_fence_signaled(struct dma_fence *fence) return fence_completed(f->fctx, f->base.seqno); } -static void msm_fence_release(struct dma_fence *fence) -{ - struct msm_fence *f = to_msm_fence(fence); - kfree_rcu(f, base.rcu); -} - static const struct dma_fence_ops msm_fence_ops = { .get_driver_name = msm_fence_get_driver_name, .get_timeline_name = msm_fence_get_timeline_name, .enable_signaling = msm_fence_enable_signaling, .signaled = msm_fence_signaled, .wait = dma_fence_default_wait, - .release = msm_fence_release, + .release = dma_fence_free, }; struct dma_fence * From adcbae310f9ed84ef139e3fa6d7a7743d96c44c8 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 21 May 2017 12:05:07 -0400 Subject: [PATCH 087/206] drm/msm/mdp5: release hwpipe(s) for unused planes Otherwise, if userspace doesn't re-use a given plane, it's hwpipe(s) could stay permanently assigned. Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c index 30b4691f7b0d..7d3741215387 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c @@ -445,6 +445,10 @@ static int mdp5_plane_atomic_check_with_state(struct drm_crtc_state *crtc_state, mdp5_pipe_release(state->state, old_hwpipe); mdp5_pipe_release(state->state, old_right_hwpipe); } + } else { + mdp5_pipe_release(state->state, mdp5_state->hwpipe); + mdp5_pipe_release(state->state, mdp5_state->r_hwpipe); + mdp5_state->hwpipe = mdp5_state->r_hwpipe = NULL; } return 0; From c43dd227f411359802427da7aebe5da5b0e48e0e Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Wed, 24 May 2017 18:12:19 +0200 Subject: [PATCH 088/206] drm/msm: constify irq_domain_ops struct irq_domain_ops is not modified, so it can be made const. Signed-off-by: Tobias Klauser Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.c index f8f48d014978..9c34d7824988 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.c @@ -116,7 +116,7 @@ static int mdss_hw_irqdomain_map(struct irq_domain *d, unsigned int irq, return 0; } -static struct irq_domain_ops mdss_hw_irqdomain_ops = { +static const struct irq_domain_ops mdss_hw_irqdomain_ops = { .map = mdss_hw_irqdomain_map, .xlate = irq_domain_xlate_onecell, }; From 3cfac69cbd56e1a57a70bfbdb07560b788030404 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Fri, 17 Mar 2017 19:38:40 +0100 Subject: [PATCH 089/206] drm/msm: for array in-fences, check if all backing fences are from our own context before waiting Use the dma_fence_match_context helper to check if all backing fences are from our own context, in which case we don't have to wait. Signed-off-by: Philipp Zabel Cc: Rob Clark Cc: Gustavo Padovan [rebased on code-motion] Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem_submit.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 1c545ebe6a5a..8ac4ca443914 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -410,12 +410,11 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (!in_fence) return -EINVAL; - /* TODO if we get an array-fence due to userspace merging multiple - * fences, we need a way to determine if all the backing fences - * are from our own context.. + /* + * Wait if the fence is from a foreign context, or if the fence + * array contains any fence from a foreign context. */ - - if (in_fence->context != gpu->fctx->context) { + if (!dma_fence_match_context(in_fence, gpu->fctx->context)) { ret = dma_fence_wait(in_fence, true); if (ret) return ret; From 90dd57de4a043f642179b1323a31ca3ced826611 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Mon, 8 May 2017 14:34:57 -0600 Subject: [PATCH 090/206] drm/msm: Take the mutex before calling msm_gem_new_impl Amongst its other duties, msm_gem_new_impl adds the newly created GEM object to the shared inactive list which may also be actively modifiying the list during submission. All the paths to modify the list are protected by the mutex except for the one through msm_gem_import which can end up causing list corruption. Signed-off-by: Jordan Crouse [add extra WARN_ON(!mutex_is_locked(&dev->struct_mutex))] Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 68e509b3b9e4..50289a23baf8 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -758,6 +758,8 @@ static int msm_gem_new_impl(struct drm_device *dev, struct msm_gem_object *msm_obj; bool use_vram = false; + WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + switch (flags & MSM_BO_CACHE_MASK) { case MSM_BO_UNCACHED: case MSM_BO_CACHED: @@ -853,7 +855,11 @@ struct drm_gem_object *msm_gem_import(struct drm_device *dev, size = PAGE_ALIGN(dmabuf->size); + /* Take mutex so we can modify the inactive list in msm_gem_new_impl */ + mutex_lock(&dev->struct_mutex); ret = msm_gem_new_impl(dev, size, MSM_BO_WC, dmabuf->resv, &obj); + mutex_unlock(&dev->struct_mutex); + if (ret) goto fail; From d72fea538fe6d783c1e63a2fc304019abf4be93a Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Mon, 8 May 2017 14:34:58 -0600 Subject: [PATCH 091/206] drm/msm: Fix the check for the command size The overrun check for the size of submitted commands is off by one. It should allow the offset plus the size to be equal to the size of the memory object when the command stream is very tightly constructed. Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem_submit.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 8ac4ca443914..7832e6421d25 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -495,8 +495,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, goto out; } - if ((submit_cmd.size + submit_cmd.submit_offset) >= - msm_obj->base.size) { + if (!submit_cmd.size || + ((submit_cmd.size + submit_cmd.submit_offset) > + msm_obj->base.size)) { DRM_ERROR("invalid cmdstream size: %u\n", submit_cmd.size); ret = -EINVAL; goto out; From 1ea34adb87c969b89dfd83f1905a79161e9ada26 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 26 May 2017 12:36:47 +0100 Subject: [PATCH 092/206] efi: Don't issue error message when booted under Xen When booted as Xen dom0 there won't be an EFI memmap allocated. Avoid issuing an error message in this case: [ 0.144079] efi: Failed to allocate new EFI memmap Signed-off-by: Juergen Gross Signed-off-by: Matt Fleming Cc: # v4.9+ Cc: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20170526113652.21339-2-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 26615991d69c..e0cf95a83f3f 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -360,6 +360,9 @@ void __init efi_free_boot_services(void) free_bootmem_late(start, size); } + if (!num_entries) + return; + new_size = efi.memmap.desc_size * num_entries; new_phys = efi_memmap_alloc(num_entries); if (!new_phys) { From 684e3f965d0be8c26fedefe94f637374242aba08 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 26 May 2017 12:36:48 +0100 Subject: [PATCH 093/206] efi: Remove duplicate 'const' specifiers gcc-7 shows these harmless warnings: drivers/firmware/efi/libstub/secureboot.c:19:27: error: duplicate 'const' declaration specifier [-Werror=duplicate-decl-specifier] static const efi_char16_t const efi_SecureBoot_name[] = { drivers/firmware/efi/libstub/secureboot.c:22:27: error: duplicate 'const' declaration specifier [-Werror=duplicate-decl-specifier] Removing one of the specifiers gives us the expected behavior. Signed-off-by: Arnd Bergmann Signed-off-by: Matt Fleming Reviewed-by: David Howells Acked-by: Ard Biesheuvel Cc: Josh Boyer Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: de8cb458625c ("efi: Get and store the secure boot status") Link: http://lkml.kernel.org/r/20170526113652.21339-3-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- drivers/firmware/efi/libstub/secureboot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c index 8c34d50a4d80..959777ec8a77 100644 --- a/drivers/firmware/efi/libstub/secureboot.c +++ b/drivers/firmware/efi/libstub/secureboot.c @@ -16,10 +16,10 @@ /* BIOS variables */ static const efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID; -static const efi_char16_t const efi_SecureBoot_name[] = { +static const efi_char16_t efi_SecureBoot_name[] = { 'S', 'e', 'c', 'u', 'r', 'e', 'B', 'o', 'o', 't', 0 }; -static const efi_char16_t const efi_SetupMode_name[] = { +static const efi_char16_t efi_SetupMode_name[] = { 'S', 'e', 't', 'u', 'p', 'M', 'o', 'd', 'e', 0 }; From 4e52797d2efefac3271abdc54439a3435abd77b9 Mon Sep 17 00:00:00 2001 From: Sai Praneeth Date: Fri, 26 May 2017 12:36:49 +0100 Subject: [PATCH 094/206] x86/efi: Disable runtime services on kexec kernel if booted with efi=old_map Booting kexec kernel with "efi=old_map" in kernel command line hits kernel panic as shown below. BUG: unable to handle kernel paging request at ffff88007fe78070 IP: virt_efi_set_variable.part.7+0x63/0x1b0 PGD 7ea28067 PUD 7ea2b067 PMD 7ea2d067 PTE 0 [...] Call Trace: virt_efi_set_variable() efi_delete_dummy_variable() efi_enter_virtual_mode() start_kernel() x86_64_start_reservations() x86_64_start_kernel() start_cpu() [ efi=old_map was never intended to work with kexec. The problem with using efi=old_map is that the virtual addresses are assigned from the memory region used by other kernel mappings; vmalloc() space. Potentially there could be collisions when booting kexec if something else is mapped at the virtual address we allocated for runtime service regions in the initial boot - Matt Fleming ] Since kexec was never intended to work with efi=old_map, disable runtime services in kexec if booted with efi=old_map, so that we don't panic. Tested-by: Lee Chun-Yi Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Matt Fleming Acked-by: Dave Young Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi Shankar Cc: Ricardo Neri Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20170526113652.21339-4-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 7e76a4d8304b..43b96f5f78ba 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -828,9 +828,11 @@ static void __init kexec_enter_virtual_mode(void) /* * We don't do virtual mode, since we don't do runtime services, on - * non-native EFI + * non-native EFI. With efi=old_map, we don't do runtime services in + * kexec kernel because in the initial boot something else might + * have been mapped at these virtual addresses. */ - if (!efi_is_native()) { + if (!efi_is_native() || efi_enabled(EFI_OLD_MEMMAP)) { efi_memmap_unmap(); clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); return; From 94133e46a0f5ca3f138479806104ab4a8cb0455e Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 26 May 2017 12:36:50 +0100 Subject: [PATCH 095/206] x86/efi: Correct EFI identity mapping under 'efi=old_map' when KASLR is enabled For EFI with the 'efi=old_map' kernel option specified, the kernel will panic when KASLR is enabled: BUG: unable to handle kernel paging request at 000000007febd57e IP: 0x7febd57e PGD 1025a067 PUD 0 Oops: 0010 [#1] SMP Call Trace: efi_enter_virtual_mode() start_kernel() x86_64_start_reservations() x86_64_start_kernel() start_cpu() The root cause is that the identity mapping is not built correctly in the 'efi=old_map' case. On 'nokaslr' kernels, PAGE_OFFSET is 0xffff880000000000 which is PGDIR_SIZE aligned. We can borrow the PUD table from the direct mappings safely. Given a physical address X, we have pud_index(X) == pud_index(__va(X)). However, on KASLR kernels, PAGE_OFFSET is PUD_SIZE aligned. For a given physical address X, pud_index(X) != pud_index(__va(X)). We can't just copy the PGD entry from direct mapping to build identity mapping, instead we need to copy the PUD entries one by one from the direct mapping. Fix it. Signed-off-by: Baoquan He Signed-off-by: Matt Fleming Cc: Ard Biesheuvel Cc: Bhupesh Sharma Cc: Borislav Petkov Cc: Dave Young Cc: Frank Ramsay Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Russ Anderson Cc: Thomas Garnier Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20170526113652.21339-5-matt@codeblueprint.co.uk [ Fixed and reworded the changelog and code comments to be more readable. ] Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi_64.c | 79 ++++++++++++++++++++++++++++++---- 1 file changed, 71 insertions(+), 8 deletions(-) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index c488625c9712..eb8dff15a7f6 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -71,11 +71,13 @@ static void __init early_code_mapping_set_exec(int executable) pgd_t * __init efi_call_phys_prolog(void) { - unsigned long vaddress; - pgd_t *save_pgd; + unsigned long vaddr, addr_pgd, addr_p4d, addr_pud; + pgd_t *save_pgd, *pgd_k, *pgd_efi; + p4d_t *p4d, *p4d_k, *p4d_efi; + pud_t *pud; int pgd; - int n_pgds; + int n_pgds, i, j; if (!efi_enabled(EFI_OLD_MEMMAP)) { save_pgd = (pgd_t *)read_cr3(); @@ -88,10 +90,49 @@ pgd_t * __init efi_call_phys_prolog(void) n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL); + /* + * Build 1:1 identity mapping for efi=old_map usage. Note that + * PAGE_OFFSET is PGDIR_SIZE aligned when KASLR is disabled, while + * it is PUD_SIZE ALIGNED with KASLR enabled. So for a given physical + * address X, the pud_index(X) != pud_index(__va(X)), we can only copy + * PUD entry of __va(X) to fill in pud entry of X to build 1:1 mapping. + * This means here we can only reuse the PMD tables of the direct mapping. + */ for (pgd = 0; pgd < n_pgds; pgd++) { - save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE); - vaddress = (unsigned long)__va(pgd * PGDIR_SIZE); - set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress)); + addr_pgd = (unsigned long)(pgd * PGDIR_SIZE); + vaddr = (unsigned long)__va(pgd * PGDIR_SIZE); + pgd_efi = pgd_offset_k(addr_pgd); + save_pgd[pgd] = *pgd_efi; + + p4d = p4d_alloc(&init_mm, pgd_efi, addr_pgd); + if (!p4d) { + pr_err("Failed to allocate p4d table!\n"); + goto out; + } + + for (i = 0; i < PTRS_PER_P4D; i++) { + addr_p4d = addr_pgd + i * P4D_SIZE; + p4d_efi = p4d + p4d_index(addr_p4d); + + pud = pud_alloc(&init_mm, p4d_efi, addr_p4d); + if (!pud) { + pr_err("Failed to allocate pud table!\n"); + goto out; + } + + for (j = 0; j < PTRS_PER_PUD; j++) { + addr_pud = addr_p4d + j * PUD_SIZE; + + if (addr_pud > (max_pfn << PAGE_SHIFT)) + break; + + vaddr = (unsigned long)__va(addr_pud); + + pgd_k = pgd_offset_k(vaddr); + p4d_k = p4d_offset(pgd_k, vaddr); + pud[j] = *pud_offset(p4d_k, vaddr); + } + } } out: __flush_tlb_all(); @@ -104,8 +145,11 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) /* * After the lock is released, the original page table is restored. */ - int pgd_idx; + int pgd_idx, i; int nr_pgds; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; if (!efi_enabled(EFI_OLD_MEMMAP)) { write_cr3((unsigned long)save_pgd); @@ -115,9 +159,28 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); - for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) + for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) { + pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE); set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]); + if (!(pgd_val(*pgd) & _PAGE_PRESENT)) + continue; + + for (i = 0; i < PTRS_PER_P4D; i++) { + p4d = p4d_offset(pgd, + pgd_idx * PGDIR_SIZE + i * P4D_SIZE); + + if (!(p4d_val(*p4d) & _PAGE_PRESENT)) + continue; + + pud = (pud_t *)p4d_page_vaddr(*p4d); + pud_free(&init_mm, pud); + } + + p4d = (p4d_t *)pgd_page_vaddr(*pgd); + p4d_free(&init_mm, p4d); + } + kfree(save_pgd); __flush_tlb_all(); From 7425826f4f7ac60f2538b06a7f0a5d1006405159 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Fri, 26 May 2017 12:36:51 +0100 Subject: [PATCH 096/206] efi/bgrt: Skip efi_bgrt_init() in case of non-EFI boot Sabrina Dubroca reported an early panic: BUG: unable to handle kernel paging request at ffffffffff240001 IP: efi_bgrt_init+0xdc/0x134 [...] ---[ end Kernel panic - not syncing: Attempted to kill the idle task! ... which was introduced by: 7b0a911478c7 ("efi/x86: Move the EFI BGRT init code to early init code") The cause is that on this machine the firmware provides the EFI ACPI BGRT table even on legacy non-EFI bootups - which table should be EFI only. The garbage BGRT data causes the efi_bgrt_init() panic. Add a check to skip efi_bgrt_init() in case non-EFI bootup to work around this firmware bug. Tested-by: Sabrina Dubroca Signed-off-by: Dave Young Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Cc: # v4.11+ Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: 7b0a911478c7 ("efi/x86: Move the EFI BGRT init code to early init code") Link: http://lkml.kernel.org/r/20170526113652.21339-6-matt@codeblueprint.co.uk [ Rewrote the changelog to be more readable. ] Signed-off-by: Ingo Molnar --- drivers/firmware/efi/efi-bgrt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/firmware/efi/efi-bgrt.c b/drivers/firmware/efi/efi-bgrt.c index 04ca8764f0c0..8bf27323f7a3 100644 --- a/drivers/firmware/efi/efi-bgrt.c +++ b/drivers/firmware/efi/efi-bgrt.c @@ -36,6 +36,9 @@ void __init efi_bgrt_init(struct acpi_table_header *table) if (acpi_disabled) return; + if (!efi_enabled(EFI_BOOT)) + return; + if (table->length < sizeof(bgrt_tab)) { pr_notice("Ignoring BGRT: invalid length %u (expected %zu)\n", table->length, sizeof(bgrt_tab)); From fe3b81b446d4ecb954f1b9dd191164a78fd278ad Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 4 Apr 2017 17:08:42 -0700 Subject: [PATCH 097/206] NFS: Use ERR_CAST() to avoid cross-structure cast When the call to nfs_devname() fails, the error path attempts to retain the error via the mnt variable, but this requires a cast across very different types (char * to struct vfsmount *), which the upcoming structure layout randomization plugin flags as being potentially dangerous in the face of randomization. This is a false positive, but what this code actually wants to do is retain the error value, so this patch explicitly sets it, instead of using what seems to be an unexpected cast. Signed-off-by: Kees Cook Acked-by: Trond Myklebust Reviewed-by: Christoph Hellwig --- fs/nfs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 1a224a33a6c2..e5686be67be8 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -246,7 +246,7 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh, devname = nfs_devname(dentry, page, PAGE_SIZE); if (IS_ERR(devname)) - mnt = (struct vfsmount *)devname; + mnt = ERR_CAST(devname); else mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata); From fee2aa753823860f2b8dfe58d98cafe8e4840855 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 8 May 2017 14:45:26 -0700 Subject: [PATCH 098/206] ntfs: Use ERR_CAST() to avoid cross-structure cast When trying to propagate an error result, the error return path attempts to retain the error, but does this with an open cast across very different types, which the upcoming structure layout randomization plugin flags as being potentially dangerous in the face of randomization. This is a false positive, but what this code actually wants to do is use ERR_CAST() to retain the error value. Cc: Anton Altaparmakov Cc: Andrew Morton Signed-off-by: Kees Cook --- fs/ntfs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index 358258364616..4690cd75d8d7 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c @@ -159,7 +159,7 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, PTR_ERR(dent_inode)); kfree(name); /* Return the error code. */ - return (struct dentry *)dent_inode; + return ERR_CAST(dent_inode); } /* It is guaranteed that @name is no longer allocated at this point. */ if (MREF_ERR(mref) == -ENOENT) { From 7585d12f6555fdf4faaefec34ac58b28555b27d0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 8 May 2017 14:49:27 -0700 Subject: [PATCH 099/206] ocfs2: Use ERR_CAST() to avoid cross-structure cast When trying to propagate an error result, the error return path attempts to retain the error, but does this with an open cast across very different types, which the upcoming structure layout randomization plugin flags as being potentially dangerous in the face of randomization. This is a false positive, but what this code actually wants to do is use ERR_CAST() to retain the error value. Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Kees Cook --- fs/ocfs2/export.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 827fc9809bc2..9f88188060db 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -119,7 +119,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, if (IS_ERR(inode)) { mlog_errno(PTR_ERR(inode)); - result = (void *)inode; + result = ERR_CAST(inode); goto bail; } From 234041dfe5ca83d5c8122ec1999eaf3f00335d7b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 4 Apr 2017 22:07:10 -0700 Subject: [PATCH 100/206] sgi-xp: Use designated initializers Prepare to mark sensitive kernel structures for randomization by making sure they're using designated initializers. In this case, no initializers are needed (they can be NULL initialized and callers adjusted to check for NULL, which is more efficient than an indirect call). Cc: Robin Holt Signed-off-by: Kees Cook Reviewed-by: Christoph Hellwig --- drivers/misc/sgi-xp/xp.h | 12 +++++++++++- drivers/misc/sgi-xp/xp_main.c | 36 +++++++---------------------------- 2 files changed, 18 insertions(+), 30 deletions(-) diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h index c862cd4583cc..b8069eec18cb 100644 --- a/drivers/misc/sgi-xp/xp.h +++ b/drivers/misc/sgi-xp/xp.h @@ -309,6 +309,9 @@ static inline enum xp_retval xpc_send(short partid, int ch_number, u32 flags, void *payload, u16 payload_size) { + if (!xpc_interface.send) + return xpNotLoaded; + return xpc_interface.send(partid, ch_number, flags, payload, payload_size); } @@ -317,6 +320,9 @@ static inline enum xp_retval xpc_send_notify(short partid, int ch_number, u32 flags, void *payload, u16 payload_size, xpc_notify_func func, void *key) { + if (!xpc_interface.send_notify) + return xpNotLoaded; + return xpc_interface.send_notify(partid, ch_number, flags, payload, payload_size, func, key); } @@ -324,12 +330,16 @@ xpc_send_notify(short partid, int ch_number, u32 flags, void *payload, static inline void xpc_received(short partid, int ch_number, void *payload) { - return xpc_interface.received(partid, ch_number, payload); + if (xpc_interface.received) + xpc_interface.received(partid, ch_number, payload); } static inline enum xp_retval xpc_partid_to_nasids(short partid, void *nasids) { + if (!xpc_interface.partid_to_nasids) + return xpNotLoaded; + return xpc_interface.partid_to_nasids(partid, nasids); } diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c index 01be66d02ca8..6d7f557fd1c1 100644 --- a/drivers/misc/sgi-xp/xp_main.c +++ b/drivers/misc/sgi-xp/xp_main.c @@ -69,23 +69,9 @@ struct xpc_registration xpc_registrations[XPC_MAX_NCHANNELS]; EXPORT_SYMBOL_GPL(xpc_registrations); /* - * Initialize the XPC interface to indicate that XPC isn't loaded. + * Initialize the XPC interface to NULL to indicate that XPC isn't loaded. */ -static enum xp_retval -xpc_notloaded(void) -{ - return xpNotLoaded; -} - -struct xpc_interface xpc_interface = { - (void (*)(int))xpc_notloaded, - (void (*)(int))xpc_notloaded, - (enum xp_retval(*)(short, int, u32, void *, u16))xpc_notloaded, - (enum xp_retval(*)(short, int, u32, void *, u16, xpc_notify_func, - void *))xpc_notloaded, - (void (*)(short, int, void *))xpc_notloaded, - (enum xp_retval(*)(short, void *))xpc_notloaded -}; +struct xpc_interface xpc_interface = { }; EXPORT_SYMBOL_GPL(xpc_interface); /* @@ -115,17 +101,7 @@ EXPORT_SYMBOL_GPL(xpc_set_interface); void xpc_clear_interface(void) { - xpc_interface.connect = (void (*)(int))xpc_notloaded; - xpc_interface.disconnect = (void (*)(int))xpc_notloaded; - xpc_interface.send = (enum xp_retval(*)(short, int, u32, void *, u16)) - xpc_notloaded; - xpc_interface.send_notify = (enum xp_retval(*)(short, int, u32, void *, - u16, xpc_notify_func, - void *))xpc_notloaded; - xpc_interface.received = (void (*)(short, int, void *)) - xpc_notloaded; - xpc_interface.partid_to_nasids = (enum xp_retval(*)(short, void *)) - xpc_notloaded; + memset(&xpc_interface, 0, sizeof(xpc_interface)); } EXPORT_SYMBOL_GPL(xpc_clear_interface); @@ -188,7 +164,8 @@ xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size, mutex_unlock(®istration->mutex); - xpc_interface.connect(ch_number); + if (xpc_interface.connect) + xpc_interface.connect(ch_number); return xpSuccess; } @@ -237,7 +214,8 @@ xpc_disconnect(int ch_number) registration->assigned_limit = 0; registration->idle_limit = 0; - xpc_interface.disconnect(ch_number); + if (xpc_interface.disconnect) + xpc_interface.disconnect(ch_number); mutex_unlock(®istration->mutex); From 2a9d6d26e2b76eee1064d221b49f3ec527546c53 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 6 May 2017 00:54:07 -0700 Subject: [PATCH 101/206] drm/amdgpu: Use designated initializers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The randstruct plugin requires structures that are entirely function pointers be initialized using designated initializers. Cc: Alex Deucher Cc: Christian König Signed-off-by: Kees Cook --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index a4831fe0223b..a2c59a08b2bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -220,9 +220,9 @@ static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man, } const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func = { - amdgpu_vram_mgr_init, - amdgpu_vram_mgr_fini, - amdgpu_vram_mgr_new, - amdgpu_vram_mgr_del, - amdgpu_vram_mgr_debug + .init = amdgpu_vram_mgr_init, + .takedown = amdgpu_vram_mgr_fini, + .get_node = amdgpu_vram_mgr_new, + .put_node = amdgpu_vram_mgr_del, + .debug = amdgpu_vram_mgr_debug }; From 3ddd396f6b57cbd5cb034498b5c4cd3dd920cf15 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 6 May 2017 01:09:00 -0700 Subject: [PATCH 102/206] drm/amd/powerplay: Use designated initializers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The randstruct plugin requires designated initializers for structures that are entirely function pointers. Cc: Christian König Cc: Eric Huang Cc: Alex Deucher Signed-off-by: Kees Cook --- .../drm/amd/powerplay/hwmgr/vega10_thermal.c | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c index d5f53d04fa08..83e40fe51b62 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c @@ -709,17 +709,17 @@ static int tf_vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr, static struct phm_master_table_item vega10_thermal_start_thermal_controller_master_list[] = { - {NULL, tf_vega10_thermal_initialize}, - {NULL, tf_vega10_thermal_set_temperature_range}, - {NULL, tf_vega10_thermal_enable_alert}, + { .tableFunction = tf_vega10_thermal_initialize }, + { .tableFunction = tf_vega10_thermal_set_temperature_range }, + { .tableFunction = tf_vega10_thermal_enable_alert }, /* We should restrict performance levels to low before we halt the SMC. * On the other hand we are still in boot state when we do this * so it would be pointless. * If this assumption changes we have to revisit this table. */ - {NULL, tf_vega10_thermal_setup_fan_table}, - {NULL, tf_vega10_thermal_start_smc_fan_control}, - {NULL, NULL} + { .tableFunction = tf_vega10_thermal_setup_fan_table }, + { .tableFunction = tf_vega10_thermal_start_smc_fan_control }, + { } }; static struct phm_master_table_header @@ -731,10 +731,10 @@ vega10_thermal_start_thermal_controller_master = { static struct phm_master_table_item vega10_thermal_set_temperature_range_master_list[] = { - {NULL, tf_vega10_thermal_disable_alert}, - {NULL, tf_vega10_thermal_set_temperature_range}, - {NULL, tf_vega10_thermal_enable_alert}, - {NULL, NULL} + { .tableFunction = tf_vega10_thermal_disable_alert }, + { .tableFunction = tf_vega10_thermal_set_temperature_range }, + { .tableFunction = tf_vega10_thermal_enable_alert }, + { } }; struct phm_master_table_header From 243dd05d39aa14fac2ffde75cc66dee3270896f8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 6 May 2017 01:10:06 -0700 Subject: [PATCH 103/206] mtk-vcodec: Use designated initializers The randstruct plugin requires designated initializers for structures that are entirely function pointers. Cc: Wu-Cheng Li Cc: Tiffany Lin Cc: Hans Verkuil Cc: Mauro Carvalho Chehab Signed-off-by: Kees Cook --- drivers/media/platform/mtk-vcodec/vdec/vdec_h264_if.c | 8 ++++---- drivers/media/platform/mtk-vcodec/vdec/vdec_vp8_if.c | 8 ++++---- drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/media/platform/mtk-vcodec/vdec/vdec_h264_if.c b/drivers/media/platform/mtk-vcodec/vdec/vdec_h264_if.c index 57a842ff3097..b7731b18ecae 100644 --- a/drivers/media/platform/mtk-vcodec/vdec/vdec_h264_if.c +++ b/drivers/media/platform/mtk-vcodec/vdec/vdec_h264_if.c @@ -493,10 +493,10 @@ static int vdec_h264_get_param(unsigned long h_vdec, } static struct vdec_common_if vdec_h264_if = { - vdec_h264_init, - vdec_h264_decode, - vdec_h264_get_param, - vdec_h264_deinit, + .init = vdec_h264_init, + .decode = vdec_h264_decode, + .get_param = vdec_h264_get_param, + .deinit = vdec_h264_deinit, }; struct vdec_common_if *get_h264_dec_comm_if(void); diff --git a/drivers/media/platform/mtk-vcodec/vdec/vdec_vp8_if.c b/drivers/media/platform/mtk-vcodec/vdec/vdec_vp8_if.c index 6e7a62ae0842..b9fad6a48879 100644 --- a/drivers/media/platform/mtk-vcodec/vdec/vdec_vp8_if.c +++ b/drivers/media/platform/mtk-vcodec/vdec/vdec_vp8_if.c @@ -620,10 +620,10 @@ static void vdec_vp8_deinit(unsigned long h_vdec) } static struct vdec_common_if vdec_vp8_if = { - vdec_vp8_init, - vdec_vp8_decode, - vdec_vp8_get_param, - vdec_vp8_deinit, + .init = vdec_vp8_init, + .decode = vdec_vp8_decode, + .get_param = vdec_vp8_get_param, + .deinit = vdec_vp8_deinit, }; struct vdec_common_if *get_vp8_dec_comm_if(void); diff --git a/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c b/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c index 5539b1853f16..1daee1207469 100644 --- a/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c +++ b/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c @@ -979,10 +979,10 @@ static int vdec_vp9_get_param(unsigned long h_vdec, } static struct vdec_common_if vdec_vp9_if = { - vdec_vp9_init, - vdec_vp9_decode, - vdec_vp9_get_param, - vdec_vp9_deinit, + .init = vdec_vp9_init, + .decode = vdec_vp9_decode, + .get_param = vdec_vp9_get_param, + .deinit = vdec_vp9_deinit, }; struct vdec_common_if *get_vp9_dec_comm_if(void); From 4013ef48798ae954530bdb8971d3794333e46526 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 8 Mar 2017 15:12:53 +0100 Subject: [PATCH 104/206] drm/exynos: Merge pre/postclose hooks Again no apparent explanation for the split except hysterical raisins. Cc: Inki Dae Cc: Joonyoung Shim Cc: Seung-Woo Kim Cc: Kyungmin Park Signed-off-by: Daniel Vetter Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_drv.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index 09d3c4c3c858..50294a7bd29d 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -82,14 +82,9 @@ static int exynos_drm_open(struct drm_device *dev, struct drm_file *file) return ret; } -static void exynos_drm_preclose(struct drm_device *dev, - struct drm_file *file) -{ - exynos_drm_subdrv_close(dev, file); -} - static void exynos_drm_postclose(struct drm_device *dev, struct drm_file *file) { + exynos_drm_subdrv_close(dev, file); kfree(file->driver_priv); file->driver_priv = NULL; } @@ -145,7 +140,6 @@ static struct drm_driver exynos_drm_driver = { .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME | DRIVER_ATOMIC | DRIVER_RENDER, .open = exynos_drm_open, - .preclose = exynos_drm_preclose, .lastclose = exynos_drm_lastclose, .postclose = exynos_drm_postclose, .gem_free_object_unlocked = exynos_drm_gem_free_object, From f2921d8c4899a3d9d7c6326315c24efea2c9efd9 Mon Sep 17 00:00:00 2001 From: Hoegeun Kwon Date: Thu, 13 Apr 2017 15:05:26 +0900 Subject: [PATCH 105/206] drm/exynos: dsi: Fix the parse_dt function The dsi + panel is a parental relationship, so OF grpah is not needed. Therefore, the current dsi_parse_dt function will throw an error, because there is no linked OF graph for the case fimd + dsi + panel. Parse the Pll burst and esc clock frequency properties in dsi_parse_dt() and create a bridge_node only if there is an OF graph associated with dsi. Signed-off-by: Hoegeun Kwon Reviewed-by: Andrzej Hajda Reviewed-by: Andi Shyti Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_dsi.c | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index fc4fda738906..24ab77c674ec 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -1633,7 +1633,6 @@ static int exynos_dsi_parse_dt(struct exynos_dsi *dsi) { struct device *dev = dsi->dev; struct device_node *node = dev->of_node; - struct device_node *ep; int ret; ret = exynos_dsi_of_read_u32(node, "samsung,pll-clock-frequency", @@ -1641,32 +1640,21 @@ static int exynos_dsi_parse_dt(struct exynos_dsi *dsi) if (ret < 0) return ret; - ep = of_graph_get_endpoint_by_regs(node, DSI_PORT_OUT, 0); - if (!ep) { - dev_err(dev, "no output port with endpoint specified\n"); - return -EINVAL; - } - - ret = exynos_dsi_of_read_u32(ep, "samsung,burst-clock-frequency", + ret = exynos_dsi_of_read_u32(node, "samsung,burst-clock-frequency", &dsi->burst_clk_rate); if (ret < 0) - goto end; + return ret; - ret = exynos_dsi_of_read_u32(ep, "samsung,esc-clock-frequency", + ret = exynos_dsi_of_read_u32(node, "samsung,esc-clock-frequency", &dsi->esc_clk_rate); if (ret < 0) - goto end; - - of_node_put(ep); + return ret; dsi->bridge_node = of_graph_get_remote_node(node, DSI_PORT_OUT, 0); if (!dsi->bridge_node) return -EINVAL; -end: - of_node_put(ep); - - return ret; + return 0; } static int exynos_dsi_bind(struct device *dev, struct device *master, From 70505c2ef94b7584dc24d6eaaeb0fc335b99813a Mon Sep 17 00:00:00 2001 From: Hoegeun Kwon Date: Fri, 26 May 2017 10:02:01 +0900 Subject: [PATCH 106/206] drm/exynos: dsi: Remove bridge node reference in removal Since bridge node is referenced during in the probe, it should be released on removal. Suggested-by: Andrzej Hajda Signed-off-by: Hoegeun Kwon Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_dsi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index 24ab77c674ec..d404de86d5f9 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -1805,6 +1805,10 @@ static int exynos_dsi_probe(struct platform_device *pdev) static int exynos_dsi_remove(struct platform_device *pdev) { + struct exynos_dsi *dsi = platform_get_drvdata(pdev); + + of_node_put(dsi->bridge_node); + pm_runtime_disable(&pdev->dev); component_del(&pdev->dev, &exynos_dsi_component_ops); From e379cbee79a8abb8e9b4da63187884939852ddb4 Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Mon, 29 May 2017 09:59:05 +0900 Subject: [PATCH 107/206] drm/exynos: clean up description of exynos_drm_crtc This patch removes unnecessary descriptions on exynos_drm_crtc structure and adds one description which specifies what pipe_clk member does. pipe_clk support had been added by below patch without any description, drm/exynos: add support for pipeline clock to the framework Commit-id : f26b9343f582f44ec920474d71b4b2220b1ed9a8 Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_drv.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h index cb3176930596..39c740572034 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.h +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h @@ -160,12 +160,9 @@ struct exynos_drm_clk { * drm framework doesn't support multiple irq yet. * we can refer to the crtc to current hardware interrupt occurred through * this pipe value. - * @enabled: if the crtc is enabled or not - * @event: vblank event that is currently queued for flip - * @wait_update: wait all pending planes updates to finish - * @pending_update: number of pending plane updates in this crtc * @ops: pointer to callbacks for exynos drm specific functionality * @ctx: A pointer to the crtc's implementation specific context + * @pipe_clk: A pointer to the crtc's pipeline clock. */ struct exynos_drm_crtc { struct drm_crtc base; From dac6ca243c4c49a9ca7507d3d66140ebfac8b04b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 28 May 2017 22:04:14 +0200 Subject: [PATCH 108/206] x86/microcode/AMD: Change load_microcode_amd()'s param to bool to fix preemptibility bug With CONFIG_DEBUG_PREEMPT enabled, I get: BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 caller is debug_smp_processor_id CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.12.0-rc2+ #2 Call Trace: dump_stack check_preemption_disabled debug_smp_processor_id save_microcode_in_initrd_amd ? microcode_init save_microcode_in_initrd ... because, well, it says it above, we're using smp_processor_id() in preemptible code. But passing the CPU number is not really needed. It is only used to determine whether we're on the BSP, and, if so, to save the microcode patch for early loading. [ We don't absolutely need to do it on the BSP but we do that customarily there. ] Instead, convert that function parameter to a boolean which denotes whether the patch should be saved or not, thereby avoiding the use of smp_processor_id() in preemptible code. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170528200414.31305-1-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/microcode/amd.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 45db4d2ebd01..e9f4d762aa5b 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -320,7 +320,7 @@ void load_ucode_amd_ap(unsigned int cpuid_1_eax) } static enum ucode_state -load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size); +load_microcode_amd(bool save, u8 family, const u8 *data, size_t size); int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax) { @@ -338,8 +338,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax) if (!desc.mc) return -EINVAL; - ret = load_microcode_amd(smp_processor_id(), x86_family(cpuid_1_eax), - desc.data, desc.size); + ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size); if (ret != UCODE_OK) return -EINVAL; @@ -675,7 +674,7 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, } static enum ucode_state -load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size) +load_microcode_amd(bool save, u8 family, const u8 *data, size_t size) { enum ucode_state ret; @@ -689,8 +688,8 @@ load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size) #ifdef CONFIG_X86_32 /* save BSP's matching patch for early load */ - if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) { - struct ucode_patch *p = find_patch(cpu); + if (save) { + struct ucode_patch *p = find_patch(0); if (p) { memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), @@ -722,11 +721,12 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device, { char fw_name[36] = "amd-ucode/microcode_amd.bin"; struct cpuinfo_x86 *c = &cpu_data(cpu); + bool bsp = c->cpu_index == boot_cpu_data.cpu_index; enum ucode_state ret = UCODE_NFOUND; const struct firmware *fw; /* reload ucode container only on the boot cpu */ - if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index) + if (!refresh_fw || !bsp) return UCODE_OK; if (c->x86 >= 0x15) @@ -743,7 +743,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device, goto fw_release; } - ret = load_microcode_amd(cpu, c->x86, fw->data, fw->size); + ret = load_microcode_amd(bsp, c->x86, fw->data, fw->size); fw_release: release_firmware(fw); From 5d9070b1f0fc9a159a9a3240c43004828408444b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 28 May 2017 11:03:42 +0200 Subject: [PATCH 109/206] x86/debug/32: Convert a smp_processor_id() call to raw to avoid DEBUG_PREEMPT warning ... to raw_smp_processor_id() to not trip the BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 check. The reasoning behind it is that __warn() already uses the raw_ variants but the show_regs() path on 32-bit doesn't. Signed-off-by: Borislav Petkov Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170528092212.fiod7kygpjm23m3o@pd.tnic Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index ff40e74c9181..ffeae818aa7a 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -78,7 +78,7 @@ void __show_regs(struct pt_regs *regs, int all) printk(KERN_DEFAULT "EIP: %pS\n", (void *)regs->ip); printk(KERN_DEFAULT "EFLAGS: %08lx CPU: %d\n", regs->flags, - smp_processor_id()); + raw_smp_processor_id()); printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", regs->ax, regs->bx, regs->cx, regs->dx); From 6ac56951dc10232e24419f6972fc8131dd0166e0 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 22 May 2017 19:59:24 +0200 Subject: [PATCH 110/206] rbd: implement REQ_OP_WRITE_ZEROES Commit 93c1defedcae ("rbd: remove the discard_zeroes_data flag") explicitly didn't implement REQ_OP_WRITE_ZEROES for rbd, while the following commit 48920ff2a5a9 ("block: remove the discard_zeroes_data flag") dropped ->discard_zeroes_data in favor of REQ_OP_WRITE_ZEROES. rbd does support efficient zeroing via CEPH_OSD_OP_ZERO opcode and will release either some or all blocks depending on whether the zeroing request is rbd_obj_bytes() aligned. This is how we currently implement discards, so REQ_OP_WRITE_ZEROES can be identical to REQ_OP_DISCARD for now. Caveats: - REQ_NOUNMAP is ignored, but AFAICT that's true of at least two other current implementations - nvme and loop - there is no ->write_zeroes_alignment and blk_bio_write_zeroes_split() is hence less helpful than blk_bio_discard_split(), but this can (and should) be fixed on the rbd side In the future we will split these into two code paths to respect REQ_NOUNMAP on zeroout and save on zeroing blocks that couldn't be released on discard. Fixes: 93c1defedcae ("rbd: remove the discard_zeroes_data flag") Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman Reviewed-by: Christoph Hellwig --- drivers/block/rbd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 454bf9c34882..c16f74547804 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4023,6 +4023,7 @@ static void rbd_queue_workfn(struct work_struct *work) switch (req_op(rq)) { case REQ_OP_DISCARD: + case REQ_OP_WRITE_ZEROES: op_type = OBJ_OP_DISCARD; break; case REQ_OP_WRITE: @@ -4420,6 +4421,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) q->limits.discard_granularity = segment_size; q->limits.discard_alignment = segment_size; blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE); + blk_queue_max_write_zeroes_sectors(q, segment_size / SECTOR_SIZE); if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; From f3a1568582cc207663a4d5e37da790334372855b Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 24 May 2017 15:29:33 +0300 Subject: [PATCH 111/206] ovl: mark upper merge dir with type origin entries "impure" An upper dir is marked "impure" to let ovl_iterate() know that this directory may contain non pure upper entries whose d_ino may need to be read from the origin inode. We already mark a non-merge dir "impure" when moving a non-pure child entry inside it, to let ovl_iterate() know not to iterate the non-merge dir directly. Mark also a merge dir "impure" when moving a non-pure child entry inside it and when copying up a child entry inside it. This can be used to optimize ovl_iterate() to perform a "pure merge" of upper and lower directories, merging the content of the directories, without having to read d_ino from origin inodes. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 5 +++++ fs/overlayfs/dir.c | 31 +++++------------------------ fs/overlayfs/namei.c | 20 ------------------- fs/overlayfs/overlayfs.h | 15 ++++++++++---- fs/overlayfs/super.c | 5 ++++- fs/overlayfs/util.c | 42 +++++++++++++++++++++++++++++++++------- 6 files changed, 60 insertions(+), 58 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 843ed2a2d7db..7a44533f4bbf 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -459,6 +459,11 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, ovl_path_upper(parent, &parentpath); upperdir = parentpath.dentry; + /* Mark parent "impure" because it may now contain non-pure upper */ + err = ovl_set_impure(parent, upperdir); + if (err) + return err; + err = vfs_getattr(&parentpath, &pstat, STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT); if (err) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index f2a118ba00e4..a63a71656e9b 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -149,22 +149,6 @@ static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry) return ovl_set_opaque_xerr(dentry, upperdentry, -EIO); } -static int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry) -{ - int err; - - /* - * Do not fail when upper doesn't support xattrs. - * Upper inodes won't have origin nor redirect xattr anyway. - */ - err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE, - "y", 1, 0); - if (!err) - ovl_dentry_set_impure(dentry); - - return err; -} - /* Common operations required to be done after creation of file on upper */ static void ovl_instantiate(struct dentry *dentry, struct inode *inode, struct dentry *newdentry, bool hardlink) @@ -976,21 +960,16 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, if (!samedir) { /* * When moving a merge dir or non-dir with copy up origin into - * a non-merge upper dir (a.k.a pure upper dir), we are making - * the target parent dir "impure". ovl_iterate() iterates pure - * upper dirs directly, because there is no need to filter out - * whiteouts and merge dir content with lower dir. But for the - * case of an "impure" upper dir, ovl_iterate() cannot iterate - * the real directory directly, because it looks for the inode - * numbers to fill d_ino in the entries origin inode. + * a new parent, we are marking the new parent dir "impure". + * When ovl_iterate() iterates an "impure" upper dir, it will + * lookup the origin inodes of the entries to fill d_ino. */ - if (ovl_type_origin(old) && !ovl_type_merge(new->d_parent)) { + if (ovl_type_origin(old)) { err = ovl_set_impure(new->d_parent, new_upperdir); if (err) goto out_revert_creds; } - if (!overwrite && ovl_type_origin(new) && - !ovl_type_merge(old->d_parent)) { + if (!overwrite && ovl_type_origin(new)) { err = ovl_set_impure(old->d_parent, old_upperdir); if (err) goto out_revert_creds; diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 0c72a5909db2..f3136c31e72a 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -167,31 +167,11 @@ static struct dentry *ovl_get_origin(struct dentry *dentry, goto out; } -static bool ovl_check_dir_xattr(struct dentry *dentry, const char *name) -{ - int res; - char val; - - if (!d_is_dir(dentry)) - return false; - - res = vfs_getxattr(dentry, name, &val, 1); - if (res == 1 && val == 'y') - return true; - - return false; -} - static bool ovl_is_opaquedir(struct dentry *dentry) { return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE); } -static bool ovl_is_impuredir(struct dentry *dentry) -{ - return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE); -} - static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, const char *name, unsigned int namelen, size_t prelen, const char *post, diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index a9fb958fd5d4..0623cebeefff 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -207,7 +207,6 @@ bool ovl_dentry_is_opaque(struct dentry *dentry); bool ovl_dentry_is_impure(struct dentry *dentry); bool ovl_dentry_is_whiteout(struct dentry *dentry); void ovl_dentry_set_opaque(struct dentry *dentry); -void ovl_dentry_set_impure(struct dentry *dentry); bool ovl_redirect_dir(struct super_block *sb); const char *ovl_dentry_get_redirect(struct dentry *dentry); void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect); @@ -221,6 +220,17 @@ bool ovl_is_whiteout(struct dentry *dentry); struct file *ovl_path_open(struct path *path, int flags); int ovl_copy_up_start(struct dentry *dentry); void ovl_copy_up_end(struct dentry *dentry); +bool ovl_check_dir_xattr(struct dentry *dentry, const char *name); +int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, + const char *name, const void *value, size_t size, + int xerr); +int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry); + +static inline bool ovl_is_impuredir(struct dentry *dentry) +{ + return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE); +} + /* namei.c */ int ovl_path_next(int idx, struct dentry *dentry, struct path *path); @@ -281,6 +291,3 @@ int ovl_copy_up(struct dentry *dentry); int ovl_copy_up_flags(struct dentry *dentry, int flags); int ovl_copy_xattr(struct dentry *old, struct dentry *new); int ovl_set_attr(struct dentry *upper, struct kstat *stat); -int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, - const char *name, const void *value, size_t size, - int xerr); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index f1647626a882..4882ffb37bae 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -974,7 +974,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) path_put(&workpath); kfree(lowertmp); - oe->__upperdentry = upperpath.dentry; + if (upperpath.dentry) { + oe->__upperdentry = upperpath.dentry; + oe->impure = ovl_is_impuredir(upperpath.dentry); + } for (i = 0; i < numlower; i++) { oe->lowerstack[i].dentry = stack[i].dentry; oe->lowerstack[i].mnt = ufs->lower_mnt[i]; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index e0dfb07d5457..809048913889 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -194,13 +194,6 @@ void ovl_dentry_set_opaque(struct dentry *dentry) oe->opaque = true; } -void ovl_dentry_set_impure(struct dentry *dentry) -{ - struct ovl_entry *oe = dentry->d_fsdata; - - oe->impure = true; -} - bool ovl_redirect_dir(struct super_block *sb) { struct ovl_fs *ofs = sb->s_fs_info; @@ -311,6 +304,21 @@ void ovl_copy_up_end(struct dentry *dentry) spin_unlock(&ofs->copyup_wq.lock); } +bool ovl_check_dir_xattr(struct dentry *dentry, const char *name) +{ + int res; + char val; + + if (!d_is_dir(dentry)) + return false; + + res = vfs_getxattr(dentry, name, &val, 1); + if (res == 1 && val == 'y') + return true; + + return false; +} + int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, const char *name, const void *value, size_t size, int xerr) @@ -331,3 +339,23 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, return err; } + +int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry) +{ + int err; + struct ovl_entry *oe = dentry->d_fsdata; + + if (oe->impure) + return 0; + + /* + * Do not fail when upper doesn't support xattrs. + * Upper inodes won't have origin nor redirect xattr anyway. + */ + err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE, + "y", 1, 0); + if (!err) + oe->impure = true; + + return err; +} From 118b90f3f18e733c99f0e8b98ea31a815ffc4d14 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 18 May 2017 14:10:22 +0300 Subject: [PATCH 112/206] drm/dp: add helper for reading DP sink/branch device desc from DPCD Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/acba54da7d80eafea9e59a893e27e3c31028c0ba.1495105635.git.jani.nikula@intel.com --- drivers/gpu/drm/drm_dp_helper.c | 35 +++++++++++++++++++++++++++++++++ include/drm/drm_dp_helper.h | 19 ++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c index 3e5f52110ea1..52e0ca9a5bb1 100644 --- a/drivers/gpu/drm/drm_dp_helper.c +++ b/drivers/gpu/drm/drm_dp_helper.c @@ -1208,3 +1208,38 @@ int drm_dp_stop_crc(struct drm_dp_aux *aux) return 0; } EXPORT_SYMBOL(drm_dp_stop_crc); + +/** + * drm_dp_read_desc - read sink/branch descriptor from DPCD + * @aux: DisplayPort AUX channel + * @desc: Device decriptor to fill from DPCD + * @is_branch: true for branch devices, false for sink devices + * + * Read DPCD 0x400 (sink) or 0x500 (branch) into @desc. Also debug log the + * identification. + * + * Returns 0 on success or a negative error code on failure. + */ +int drm_dp_read_desc(struct drm_dp_aux *aux, struct drm_dp_desc *desc, + bool is_branch) +{ + struct drm_dp_dpcd_ident *ident = &desc->ident; + unsigned int offset = is_branch ? DP_BRANCH_OUI : DP_SINK_OUI; + int ret, dev_id_len; + + ret = drm_dp_dpcd_read(aux, offset, ident, sizeof(*ident)); + if (ret < 0) + return ret; + + dev_id_len = strnlen(ident->device_id, sizeof(ident->device_id)); + + DRM_DEBUG_KMS("DP %s: OUI %*phD dev-ID %*pE HW-rev %d.%d SW-rev %d.%d\n", + is_branch ? "branch" : "sink", + (int)sizeof(ident->oui), ident->oui, + dev_id_len, ident->device_id, + ident->hw_rev >> 4, ident->hw_rev & 0xf, + ident->sw_major_rev, ident->sw_minor_rev); + + return 0; +} +EXPORT_SYMBOL(drm_dp_read_desc); diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index c0bd0d7651a9..84502da177a1 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -913,4 +913,23 @@ void drm_dp_aux_unregister(struct drm_dp_aux *aux); int drm_dp_start_crc(struct drm_dp_aux *aux, struct drm_crtc *crtc); int drm_dp_stop_crc(struct drm_dp_aux *aux); +struct drm_dp_dpcd_ident { + u8 oui[3]; + u8 device_id[6]; + u8 hw_rev; + u8 sw_major_rev; + u8 sw_minor_rev; +} __packed; + +/** + * struct drm_dp_desc - DP branch/sink device descriptor + * @ident: DP device identification from DPCD 0x400 (sink) or 0x500 (branch). + */ +struct drm_dp_desc { + struct drm_dp_dpcd_ident ident; +}; + +int drm_dp_read_desc(struct drm_dp_aux *aux, struct drm_dp_desc *desc, + bool is_branch); + #endif /* _DRM_DP_HELPER_H_ */ From 84c367533bd24108e5392b355280647a4a2893ac Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 18 May 2017 14:10:23 +0300 Subject: [PATCH 113/206] drm/i915: use drm DP helper to read DPCD desc Switch to using the common DP helpers instead of using our own. v2: also remove leftover struct intel_dp_desc (Daniel) Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_dp.c | 37 ++++------------------------- drivers/gpu/drm/i915/intel_drv.h | 13 +--------- drivers/gpu/drm/i915/intel_lspcon.c | 2 +- 3 files changed, 6 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index ee77b519835c..5ce45d98da78 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1507,37 +1507,6 @@ static void intel_dp_print_rates(struct intel_dp *intel_dp) DRM_DEBUG_KMS("common rates: %s\n", str); } -bool -__intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc) -{ - u32 base = drm_dp_is_branch(intel_dp->dpcd) ? DP_BRANCH_OUI : - DP_SINK_OUI; - - return drm_dp_dpcd_read(&intel_dp->aux, base, desc, sizeof(*desc)) == - sizeof(*desc); -} - -bool intel_dp_read_desc(struct intel_dp *intel_dp) -{ - struct intel_dp_desc *desc = &intel_dp->desc; - bool oui_sup = intel_dp->dpcd[DP_DOWN_STREAM_PORT_COUNT] & - DP_OUI_SUPPORT; - int dev_id_len; - - if (!__intel_dp_read_desc(intel_dp, desc)) - return false; - - dev_id_len = strnlen(desc->device_id, sizeof(desc->device_id)); - DRM_DEBUG_KMS("DP %s: OUI %*phD%s dev-ID %*pE HW-rev %d.%d SW-rev %d.%d\n", - drm_dp_is_branch(intel_dp->dpcd) ? "branch" : "sink", - (int)sizeof(desc->oui), desc->oui, oui_sup ? "" : "(NS)", - dev_id_len, desc->device_id, - desc->hw_rev >> 4, desc->hw_rev & 0xf, - desc->sw_major_rev, desc->sw_minor_rev); - - return true; -} - static int rate_to_index(int find, const int *rates) { int i = 0; @@ -3622,7 +3591,8 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) if (!intel_dp_read_dpcd(intel_dp)) return false; - intel_dp_read_desc(intel_dp); + drm_dp_read_desc(&intel_dp->aux, &intel_dp->desc, + drm_dp_is_branch(intel_dp->dpcd)); if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11) dev_priv->no_aux_handshake = intel_dp->dpcd[DP_MAX_DOWNSPREAD] & @@ -4624,7 +4594,8 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) intel_dp_print_rates(intel_dp); - intel_dp_read_desc(intel_dp); + drm_dp_read_desc(&intel_dp->aux, &intel_dp->desc, + drm_dp_is_branch(intel_dp->dpcd)); intel_dp_configure_mst(intel_dp); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index aaee3949a422..f630c7af5020 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -906,14 +906,6 @@ enum link_m_n_set { M2_N2 }; -struct intel_dp_desc { - u8 oui[3]; - u8 device_id[6]; - u8 hw_rev; - u8 sw_major_rev; - u8 sw_minor_rev; -} __packed; - struct intel_dp_compliance_data { unsigned long edid; uint8_t video_pattern; @@ -957,7 +949,7 @@ struct intel_dp { /* Max link BW for the sink as per DPCD registers */ int max_sink_link_bw; /* sink or branch descriptor */ - struct intel_dp_desc desc; + struct drm_dp_desc desc; struct drm_dp_aux aux; enum intel_display_power_domain aux_power_domain; uint8_t train_set[4]; @@ -1532,9 +1524,6 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count) } bool intel_dp_read_dpcd(struct intel_dp *intel_dp); -bool __intel_dp_read_desc(struct intel_dp *intel_dp, - struct intel_dp_desc *desc); -bool intel_dp_read_desc(struct intel_dp *intel_dp); int intel_dp_link_required(int pixel_clock, int bpp); int intel_dp_max_data_rate(int max_link_clock, int max_lanes); bool intel_digital_port_connected(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c index 71cbe9c08932..5abef482eacf 100644 --- a/drivers/gpu/drm/i915/intel_lspcon.c +++ b/drivers/gpu/drm/i915/intel_lspcon.c @@ -240,7 +240,7 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port) return false; } - intel_dp_read_desc(dp); + drm_dp_read_desc(&dp->aux, &dp->desc, drm_dp_is_branch(dp->dpcd)); DRM_DEBUG_KMS("Success: LSPCON init\n"); return true; From 76fa998acd86b6b40d0217e12af39c2406bdcd2b Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 18 May 2017 14:10:24 +0300 Subject: [PATCH 114/206] drm/dp: start a DPCD based DP sink/branch device quirk database MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Face the fact, there are Display Port sink and branch devices out there in the wild that don't follow the Display Port specifications, or they have bugs, or just otherwise require special treatment. Start a common quirk database the drivers can query based on the DP device identification. At least for now, we leave the workarounds for the drivers to implement as they see fit. For starters, add a branch device that can't handle full 24-bit main link Mdiv and Ndiv main link attributes properly. Naturally, the workaround of reducing main link attributes for all devices ended up in regressions for other devices. So here we are. v2: Rebase on DRM DP desc read helpers v3: Fix the OUI memcmp blunder (Clint) Cc: Ville Syrjälä Cc: Dhinakaran Pandiyan Cc: Clint Taylor Cc: Adam Jackson Cc: Harry Wentland Tested-by: Clinton Taylor Reviewed-by: Clinton Taylor Reviewed-by: Daniel Vetter # v2 Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/91ec198dd95258dbf3bee2f6be739e0da73b4fdd.1495105635.git.jani.nikula@intel.com --- drivers/gpu/drm/drm_dp_helper.c | 52 +++++++++++++++++++++++++++++++-- include/drm/drm_dp_helper.h | 32 ++++++++++++++++++++ 2 files changed, 82 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c index 52e0ca9a5bb1..213fb837e1c4 100644 --- a/drivers/gpu/drm/drm_dp_helper.c +++ b/drivers/gpu/drm/drm_dp_helper.c @@ -1209,6 +1209,51 @@ int drm_dp_stop_crc(struct drm_dp_aux *aux) } EXPORT_SYMBOL(drm_dp_stop_crc); +struct dpcd_quirk { + u8 oui[3]; + bool is_branch; + u32 quirks; +}; + +#define OUI(first, second, third) { (first), (second), (third) } + +static const struct dpcd_quirk dpcd_quirk_list[] = { + /* Analogix 7737 needs reduced M and N at HBR2 link rates */ + { OUI(0x00, 0x22, 0xb9), true, BIT(DP_DPCD_QUIRK_LIMITED_M_N) }, +}; + +#undef OUI + +/* + * Get a bit mask of DPCD quirks for the sink/branch device identified by + * ident. The quirk data is shared but it's up to the drivers to act on the + * data. + * + * For now, only the OUI (first three bytes) is used, but this may be extended + * to device identification string and hardware/firmware revisions later. + */ +static u32 +drm_dp_get_quirks(const struct drm_dp_dpcd_ident *ident, bool is_branch) +{ + const struct dpcd_quirk *quirk; + u32 quirks = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(dpcd_quirk_list); i++) { + quirk = &dpcd_quirk_list[i]; + + if (quirk->is_branch != is_branch) + continue; + + if (memcmp(quirk->oui, ident->oui, sizeof(ident->oui)) != 0) + continue; + + quirks |= quirk->quirks; + } + + return quirks; +} + /** * drm_dp_read_desc - read sink/branch descriptor from DPCD * @aux: DisplayPort AUX channel @@ -1231,14 +1276,17 @@ int drm_dp_read_desc(struct drm_dp_aux *aux, struct drm_dp_desc *desc, if (ret < 0) return ret; + desc->quirks = drm_dp_get_quirks(ident, is_branch); + dev_id_len = strnlen(ident->device_id, sizeof(ident->device_id)); - DRM_DEBUG_KMS("DP %s: OUI %*phD dev-ID %*pE HW-rev %d.%d SW-rev %d.%d\n", + DRM_DEBUG_KMS("DP %s: OUI %*phD dev-ID %*pE HW-rev %d.%d SW-rev %d.%d quirks 0x%04x\n", is_branch ? "branch" : "sink", (int)sizeof(ident->oui), ident->oui, dev_id_len, ident->device_id, ident->hw_rev >> 4, ident->hw_rev & 0xf, - ident->sw_major_rev, ident->sw_minor_rev); + ident->sw_major_rev, ident->sw_minor_rev, + desc->quirks); return 0; } diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index 84502da177a1..bb837310c07e 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -924,12 +924,44 @@ struct drm_dp_dpcd_ident { /** * struct drm_dp_desc - DP branch/sink device descriptor * @ident: DP device identification from DPCD 0x400 (sink) or 0x500 (branch). + * @quirks: Quirks; use drm_dp_has_quirk() to query for the quirks. */ struct drm_dp_desc { struct drm_dp_dpcd_ident ident; + u32 quirks; }; int drm_dp_read_desc(struct drm_dp_aux *aux, struct drm_dp_desc *desc, bool is_branch); +/** + * enum drm_dp_quirk - Display Port sink/branch device specific quirks + * + * Display Port sink and branch devices in the wild have a variety of bugs, try + * to collect them here. The quirks are shared, but it's up to the drivers to + * implement workarounds for them. + */ +enum drm_dp_quirk { + /** + * @DP_DPCD_QUIRK_LIMITED_M_N: + * + * The device requires main link attributes Mvid and Nvid to be limited + * to 16 bits. + */ + DP_DPCD_QUIRK_LIMITED_M_N, +}; + +/** + * drm_dp_has_quirk() - does the DP device have a specific quirk + * @desc: Device decriptor filled by drm_dp_read_desc() + * @quirk: Quirk to query for + * + * Return true if DP device identified by @desc has @quirk. + */ +static inline bool +drm_dp_has_quirk(const struct drm_dp_desc *desc, enum drm_dp_quirk quirk) +{ + return desc->quirks & BIT(quirk); +} + #endif /* _DRM_DP_HELPER_H_ */ From b31e85eda38c58cae986162ae2c462b53b0a2065 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 18 May 2017 14:10:25 +0300 Subject: [PATCH 115/206] drm/i915: Detect USB-C specific dongles before reducing M and N MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Analogix 7737 DP to HDMI converter requires reduced M and N values when to operate correctly at HBR2. We tried to reduce the M/N values for all devices in commit 9a86cda07af2 ("drm/i915/dp: reduce link M/N parameters"), but that regressed some other sinks. Detect this IC by its OUI value of 0x0022B9 via the DPCD quirk list, and only reduce the M/N values for that. v2 by Jani: Rebased on the DP quirk database v3 by Jani: Rebased on the reworked DP quirk database v4 by Jani: Improve commit message (Daniel) Fixes: 9a86cda07af2 ("drm/i915/dp: reduce link M/N parameters") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93578 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100755 Cc: Jani Nikula Cc: Dhinakaran Pandiyan Cc: Ville Syrjälä Reviewed-by: Daniel Vetter Signed-off-by: Clint Taylor Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/2d2e30f8f47d3f28c9b74ca2612336a54585c3ec.1495105635.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 3 ++- drivers/gpu/drm/i915/intel_display.c | 22 ++++++++++++++-------- drivers/gpu/drm/i915/intel_dp.c | 8 ++++++-- drivers/gpu/drm/i915/intel_dp_mst.c | 5 ++++- 4 files changed, 26 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c9b0949f6c1a..963f6d4481f7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -562,7 +562,8 @@ struct intel_link_m_n { void intel_link_compute_m_n(int bpp, int nlanes, int pixel_clock, int link_clock, - struct intel_link_m_n *m_n); + struct intel_link_m_n *m_n, + bool reduce_m_n); /* Interface history: * diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3617927af269..3cabe52a4e3b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6101,7 +6101,7 @@ static int ironlake_fdi_compute_config(struct intel_crtc *intel_crtc, pipe_config->fdi_lanes = lane; intel_link_compute_m_n(pipe_config->pipe_bpp, lane, fdi_dotclock, - link_bw, &pipe_config->fdi_m_n); + link_bw, &pipe_config->fdi_m_n, false); ret = ironlake_check_fdi_lanes(dev, intel_crtc->pipe, pipe_config); if (ret == -EINVAL && pipe_config->pipe_bpp > 6*3) { @@ -6277,7 +6277,8 @@ intel_reduce_m_n_ratio(uint32_t *num, uint32_t *den) } static void compute_m_n(unsigned int m, unsigned int n, - uint32_t *ret_m, uint32_t *ret_n) + uint32_t *ret_m, uint32_t *ret_n, + bool reduce_m_n) { /* * Reduce M/N as much as possible without loss in precision. Several DP @@ -6285,9 +6286,11 @@ static void compute_m_n(unsigned int m, unsigned int n, * values. The passed in values are more likely to have the least * significant bits zero than M after rounding below, so do this first. */ - while ((m & 1) == 0 && (n & 1) == 0) { - m >>= 1; - n >>= 1; + if (reduce_m_n) { + while ((m & 1) == 0 && (n & 1) == 0) { + m >>= 1; + n >>= 1; + } } *ret_n = min_t(unsigned int, roundup_pow_of_two(n), DATA_LINK_N_MAX); @@ -6298,16 +6301,19 @@ static void compute_m_n(unsigned int m, unsigned int n, void intel_link_compute_m_n(int bits_per_pixel, int nlanes, int pixel_clock, int link_clock, - struct intel_link_m_n *m_n) + struct intel_link_m_n *m_n, + bool reduce_m_n) { m_n->tu = 64; compute_m_n(bits_per_pixel * pixel_clock, link_clock * nlanes * 8, - &m_n->gmch_m, &m_n->gmch_n); + &m_n->gmch_m, &m_n->gmch_n, + reduce_m_n); compute_m_n(pixel_clock, link_clock, - &m_n->link_m, &m_n->link_n); + &m_n->link_m, &m_n->link_n, + reduce_m_n); } static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 5ce45d98da78..fc691b8b317c 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1593,6 +1593,8 @@ intel_dp_compute_config(struct intel_encoder *encoder, int common_rates[DP_MAX_SUPPORTED_RATES] = {}; int common_len; uint8_t link_bw, rate_select; + bool reduce_m_n = drm_dp_has_quirk(&intel_dp->desc, + DP_DPCD_QUIRK_LIMITED_M_N); common_len = intel_dp_common_rates(intel_dp, common_rates); @@ -1722,7 +1724,8 @@ intel_dp_compute_config(struct intel_encoder *encoder, intel_link_compute_m_n(bpp, lane_count, adjusted_mode->crtc_clock, pipe_config->port_clock, - &pipe_config->dp_m_n); + &pipe_config->dp_m_n, + reduce_m_n); if (intel_connector->panel.downclock_mode != NULL && dev_priv->drrs.type == SEAMLESS_DRRS_SUPPORT) { @@ -1730,7 +1733,8 @@ intel_dp_compute_config(struct intel_encoder *encoder, intel_link_compute_m_n(bpp, lane_count, intel_connector->panel.downclock_mode->clock, pipe_config->port_clock, - &pipe_config->dp_m2_n2); + &pipe_config->dp_m2_n2, + reduce_m_n); } /* diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index c1f62eb07c07..989e25577ac0 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -44,6 +44,8 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, int lane_count, slots; const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; int mst_pbn; + bool reduce_m_n = drm_dp_has_quirk(&intel_dp->desc, + DP_DPCD_QUIRK_LIMITED_M_N); pipe_config->has_pch_encoder = false; bpp = 24; @@ -75,7 +77,8 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, intel_link_compute_m_n(bpp, lane_count, adjusted_mode->crtc_clock, pipe_config->port_clock, - &pipe_config->dp_m_n); + &pipe_config->dp_m_n, + reduce_m_n); pipe_config->dp_m_n.tu = slots; From f3d3eab667de62572376abb1aa26316191c39929 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 9 May 2017 10:04:36 +0200 Subject: [PATCH 116/206] HID: i2c: Call acpi_device_fix_up_power for ACPI-enumerated devices For ACPI devices which do not have a _PSC method, the ACPI subsys cannot query their initial state at boot, so these devices are assumed to have been put in D0 by the BIOS, but for touchscreens that is not always true. This commit adds a call to acpi_device_fix_up_power to explicitly put devices without a _PSC method into D0 state (for devices with a _PSC method it is a nop). Note we only need to do this on probe, after a resume the ACPI subsys knows the device is in D3 and will properly put it in D0. This fixes the SIS0817 i2c-hid touchscreen on a Peaq C1010 2-in-1 device failing to probe with a "hid_descr_cmd failed" error. Acked-by: Benjamin Tissoires Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina --- drivers/hid/i2c-hid/i2c-hid.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index 8daa8ce64ebb..fb55fb4c39fc 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -897,6 +897,15 @@ static int i2c_hid_acpi_pdata(struct i2c_client *client, return 0; } +static void i2c_hid_acpi_fix_up_power(struct device *dev) +{ + acpi_handle handle = ACPI_HANDLE(dev); + struct acpi_device *adev; + + if (handle && acpi_bus_get_device(handle, &adev) == 0) + acpi_device_fix_up_power(adev); +} + static const struct acpi_device_id i2c_hid_acpi_match[] = { {"ACPI0C50", 0 }, {"PNP0C50", 0 }, @@ -909,6 +918,8 @@ static inline int i2c_hid_acpi_pdata(struct i2c_client *client, { return -ENODEV; } + +static inline void i2c_hid_acpi_fix_up_power(struct device *dev) {} #endif #ifdef CONFIG_OF @@ -1030,6 +1041,8 @@ static int i2c_hid_probe(struct i2c_client *client, if (ret < 0) goto err_regulator; + i2c_hid_acpi_fix_up_power(&client->dev); + pm_runtime_get_noresume(&client->dev); pm_runtime_set_active(&client->dev); pm_runtime_enable(&client->dev); From a082c6f680da298cf075886ff032f32ccb7c5e1a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 29 May 2017 15:15:27 +0200 Subject: [PATCH 117/206] ovl: filter trusted xattr for non-admin Filesystems filter out extended attributes in the "trusted." domain for unprivlieged callers. Overlay calls underlying filesystem's method with elevated privs, so need to do the filtering in overlayfs too. Signed-off-by: Miklos Szeredi --- fs/overlayfs/inode.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index ad9547f82da5..d613e2c41242 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -240,6 +240,16 @@ int ovl_xattr_get(struct dentry *dentry, const char *name, return res; } +static bool ovl_can_list(const char *s) +{ + /* List all non-trusted xatts */ + if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) + return true; + + /* Never list trusted.overlay, list other trusted for superuser only */ + return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN); +} + ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) { struct dentry *realdentry = ovl_dentry_real(dentry); @@ -263,7 +273,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) return -EIO; len -= slen; - if (ovl_is_private_xattr(s)) { + if (!ovl_can_list(s)) { res -= slen; memmove(s, s + slen, len); } else { From b0f5a8f32e8bbdaae1abb8abe2d3cbafaba57e08 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Mon, 29 May 2017 09:22:07 +0200 Subject: [PATCH 118/206] kthread: fix boot hang (regression) on MIPS/OpenRISC This fixes a regression in commit 4d6501dce079 where I didn't notice that MIPS and OpenRISC were reinitialising p->{set,clear}_child_tid to NULL after our initialisation in copy_process(). We can simply get rid of the arch-specific initialisation here since it is now always done in copy_process() before hitting copy_thread{,_tls}(). Review notes: - As far as I can tell, copy_process() is the only user of copy_thread_tls(), which is the only caller of copy_thread() for architectures that don't implement copy_thread_tls(). - After this patch, there is no arch-specific code touching p->set_child_tid or p->clear_child_tid whatsoever. - It may look like MIPS/OpenRISC wanted to always have these fields be NULL, but that's not true, as copy_process() would unconditionally set them again _after_ calling copy_thread_tls() before commit 4d6501dce079. Fixes: 4d6501dce079c1eb6bf0b1d8f528a5e81770109e ("kthread: Fix use-after-free if kthread fork fails") Reported-by: Guenter Roeck Tested-by: Guenter Roeck # MIPS only Acked-by: Stafford Horne Acked-by: Oleg Nesterov Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: Jonas Bonn Cc: Stefan Kristiansson Cc: openrisc@lists.librecores.org Cc: Jamie Iles Cc: Thomas Gleixner Signed-off-by: Vegard Nossum Signed-off-by: Linus Torvalds --- arch/mips/kernel/process.c | 1 - arch/openrisc/kernel/process.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 918d4c73e951..5351e1f3950d 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -120,7 +120,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp, struct thread_info *ti = task_thread_info(p); struct pt_regs *childregs, *regs = current_pt_regs(); unsigned long childksp; - p->set_child_tid = p->clear_child_tid = NULL; childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32; diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index f8da545854f9..106859ae27ff 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -167,8 +167,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp, top_of_kernel_stack = sp; - p->set_child_tid = p->clear_child_tid = NULL; - /* Locate userspace context on stack... */ sp -= STACK_FRAME_OVERHEAD; /* redzone */ sp -= sizeof(struct pt_regs); From 878d8db039daac0938238e9a40a5bd6e50ee3c9b Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 10 May 2017 18:12:40 +0200 Subject: [PATCH 119/206] Revert "ACPI / button: Change default behavior to lid_init_state=open" Revert commit 77e9a4aa9de1 (ACPI / button: Change default behavior to lid_init_state=open) which changed the kernel's behavior on laptops that boot with closed lids and expect the lid switch state to be reported accurately by the kernel. If you boot or resume your laptop with the lid closed on a docking station while using an external monitor connected to it, both internal and external displays will light on, while only the external should. There is a design choice in gdm to only provide the greeter on the internal display when lit on, so users only see a gray area on the external monitor. Also, the cursor will not show up as it's by default on the internal display too. To "fix" that, users have to open the laptop once and close it once again to sync the state of the switch with the hardware state. Even if the "method" operation mode implementation can be buggy on some platforms, the "open" choice is worse. It breaks docking stations basically and there is no way to have a user-space hwdb to fix that. On the contrary, it's rather easy in user-space to have a hwdb with the problematic platforms. Then, libinput (1.7.0+) can fix the state of the lid switch for us: you need to set the udev property LIBINPUT_ATTR_LID_SWITCH_RELIABILITY to 'write_open'. When libinput detects internal keyboard events, it will overwrite the state of the switch to open, making it reliable again. Given that logind only checks the lid switch value after a timeout, we can assume the user will use the internal keyboard before this timeout expires. For example, such a hwdb entry is: libinput:name:*Lid Switch*:dmi:*svnMicrosoftCorporation:pnSurface3:* LIBINPUT_ATTR_LID_SWITCH_RELIABILITY=write_open Link: https://bugzilla.gnome.org/show_bug.cgi?id=782380 Cc: 4.11+ # 4.11+ Signed-off-by: Benjamin Tissoires Signed-off-by: Rafael J. Wysocki --- drivers/acpi/button.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index 6d5a8c1d3132..e19f530f1083 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -113,7 +113,7 @@ struct acpi_button { static BLOCKING_NOTIFIER_HEAD(acpi_lid_notifier); static struct acpi_device *lid_device; -static u8 lid_init_state = ACPI_BUTTON_LID_INIT_OPEN; +static u8 lid_init_state = ACPI_BUTTON_LID_INIT_METHOD; static unsigned long lid_report_interval __read_mostly = 500; module_param(lid_report_interval, ulong, 0644); From 2ea65321b83539afc1d45c1bea39c55ab42af62b Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Tue, 9 May 2017 13:57:31 +0800 Subject: [PATCH 120/206] ACPICA: Tables: Fix regression introduced by a too early mechanism enabling In the Linux kernel, acpi_get_table() "clones" haven't been fully balanced by acpi_put_table() invocations. In upstream ACPICA, due to the design change, there are also unbalanced acpi_get_table_by_index() invocations requiring special care. acpi_get_table() reference counting mismatches may occor due to that and printing error messages related to them is not useful at this point. The strict balanced validation count check should only be enabled after confirming that all invocations are safe and aligned with their designed purposes. Thus this patch removes the error value returned by acpi_tb_get_table() in that case along with the accompanying error message to fix the issue. Fixes: 174cc7187e6f (ACPICA: Tables: Back port acpi_get_table_with_size() and early_acpi_os_unmap_memory() from Linux kernel) Cc: 4.10+ # 4.10+ Reported-by: Anush Seetharaman Reported-by: Dan Williams Signed-off-by: Lv Zheng [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/tbutils.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c index 5a968a78652b..7abe66505739 100644 --- a/drivers/acpi/acpica/tbutils.c +++ b/drivers/acpi/acpica/tbutils.c @@ -418,11 +418,7 @@ acpi_tb_get_table(struct acpi_table_desc *table_desc, table_desc->validation_count++; if (table_desc->validation_count == 0) { - ACPI_ERROR((AE_INFO, - "Table %p, Validation count is zero after increment\n", - table_desc)); table_desc->validation_count--; - return_ACPI_STATUS(AE_LIMIT); } *out_table = table_desc->pointer; From 6c77003677d5f1ce15f26d24360cb66c0bc07bb3 Mon Sep 17 00:00:00 2001 From: David Arcari Date: Fri, 26 May 2017 11:37:31 -0400 Subject: [PATCH 121/206] cpufreq: cpufreq_register_driver() should return -ENODEV if init fails For a driver that does not set the CPUFREQ_STICKY flag, if all of the ->init() calls fail, cpufreq_register_driver() should return an error. This will prevent the driver from loading. Fixes: ce1bcfe94db8 (cpufreq: check cpufreq_policy_list instead of scanning policies for all CPUs) Cc: 4.0+ # 4.0+ Signed-off-by: David Arcari Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0e3f6496524d..26b643d57847 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2468,6 +2468,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) if (!(cpufreq_driver->flags & CPUFREQ_STICKY) && list_empty(&cpufreq_policy_list)) { /* if all ->init() calls failed, unregister */ + ret = -ENODEV; pr_debug("%s: No CPU initialized for driver %s\n", __func__, driver_data->name); goto err_if_unreg; From 7575f8257279f9dd24b3fc950aa2c7e98564a103 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 17 May 2017 12:20:35 +0530 Subject: [PATCH 122/206] cpufreq: kirkwood-cpufreq:- Handle return value of clk_prepare_enable() clk_prepare_enable() can fail here and we must check its return value. Signed-off-by: Arvind Yadav Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/kirkwood-cpufreq.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c index 1b9bcd76c60e..c2dd43f3f5d8 100644 --- a/drivers/cpufreq/kirkwood-cpufreq.c +++ b/drivers/cpufreq/kirkwood-cpufreq.c @@ -127,7 +127,12 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev) return PTR_ERR(priv.cpu_clk); } - clk_prepare_enable(priv.cpu_clk); + err = clk_prepare_enable(priv.cpu_clk); + if (err) { + dev_err(priv.dev, "Unable to prepare cpuclk\n"); + return err; + } + kirkwood_freq_table[0].frequency = clk_get_rate(priv.cpu_clk) / 1000; priv.ddr_clk = of_clk_get_by_name(np, "ddrclk"); @@ -137,7 +142,11 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev) goto out_cpu; } - clk_prepare_enable(priv.ddr_clk); + err = clk_prepare_enable(priv.ddr_clk); + if (err) { + dev_err(priv.dev, "Unable to prepare ddrclk\n"); + goto out_cpu; + } kirkwood_freq_table[1].frequency = clk_get_rate(priv.ddr_clk) / 1000; priv.powersave_clk = of_clk_get_by_name(np, "powersave"); @@ -146,7 +155,11 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev) err = PTR_ERR(priv.powersave_clk); goto out_ddr; } - clk_prepare_enable(priv.powersave_clk); + err = clk_prepare_enable(priv.powersave_clk); + if (err) { + dev_err(priv.dev, "Unable to prepare powersave clk\n"); + goto out_ddr; + } of_node_put(np); np = NULL; From 1943d1723e7a7ee1a7b9b0ba4878dde0dc100671 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 26 May 2017 16:09:48 -0700 Subject: [PATCH 123/206] Input: silead - disable interrupt during suspend When we put the touchscreen controller in low-power mode the irq pin may trigger (float) and if we then try to read a data packet we get the following error in dmesg: [ 478.801017] silead_ts i2c-MSSL1680:00: Data read error -121 This commit disables the irq during suspend/resume fixing this error. Signed-off-by: Hans de Goede Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/silead.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/touchscreen/silead.c b/drivers/input/touchscreen/silead.c index 813dd68a5c82..0dbcf105f7db 100644 --- a/drivers/input/touchscreen/silead.c +++ b/drivers/input/touchscreen/silead.c @@ -526,6 +526,7 @@ static int __maybe_unused silead_ts_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); + disable_irq(client->irq); silead_ts_set_power(client, SILEAD_POWER_OFF); return 0; } @@ -551,6 +552,8 @@ static int __maybe_unused silead_ts_resume(struct device *dev) return -ENODEV; } + enable_irq(client->irq); + return 0; } From 2755551188d240f0098cdc6f1a2984f8a1785689 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 29 May 2017 19:57:19 -0700 Subject: [PATCH 124/206] Input: synaptics - clear device info before filling in synaptics_query_hardware() was being passed a 'struct synaptics_device_info' in uninitialized stack memory, then not always initializing all fields. This caused garbage to show up in certain fields, making the touchpad unusable. Fix by zeroing the device info, so all fields default to 0. Fixes: 6c53694fb222 ("Input: synaptics - split device info into a separate structure") Signed-off-by: Eric Biggers Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 131df9d3660f..4f97970abc94 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -397,6 +397,8 @@ static int synaptics_query_hardware(struct psmouse *psmouse, { int error; + memset(info, 0, sizeof(*info)); + error = synaptics_identify(psmouse, info); if (error) return error; From f4947d79a7080b25829997eeee38d4d65137c161 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 23 May 2017 14:15:24 -0700 Subject: [PATCH 125/206] Input: synaptics - keep PS/2 around when RMI4_SMB is not enabled Or the user might have the touchpad unbound from PS/2 but never picked up by rmi-smbus.ko Signed-off-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 4f97970abc94..9b27a6c710b2 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -1689,7 +1689,8 @@ enum { SYNAPTICS_INTERTOUCH_ON, }; -static int synaptics_intertouch = SYNAPTICS_INTERTOUCH_NOT_SET; +static int synaptics_intertouch = IS_ENABLED(CONFIG_RMI4_SMB) ? + SYNAPTICS_INTERTOUCH_NOT_SET : SYNAPTICS_INTERTOUCH_OFF; module_param_named(synaptics_intertouch, synaptics_intertouch, int, 0644); MODULE_PARM_DESC(synaptics_intertouch, "Use a secondary bus for the Synaptics device."); From f4101ff87dafd22fbcc3547fd3a3a3717d3d72d3 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Fri, 26 May 2017 16:21:36 -0700 Subject: [PATCH 126/206] Input: synaptics - warn the users when there is a better mode The Synaptics touchpads are now either using i2c-hid or rmi-smbus. Warn the users if they are missing the rmi-smbus modules and have no chance of reporting correct data. Signed-off-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 9b27a6c710b2..604aa3aa7284 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -176,6 +176,12 @@ static const char * const smbus_pnp_ids[] = { NULL }; +static const char * const forcepad_pnp_ids[] = { + "SYN300D", + "SYN3014", + NULL +}; + /* * Send a command to the synpatics touchpad by special commands */ @@ -482,13 +488,6 @@ static const struct min_max_quirk min_max_pnpid_table[] = { { } }; -/* This list has been kindly provided by Synaptics. */ -static const char * const forcepad_pnp_ids[] = { - "SYN300D", - "SYN3014", - NULL -}; - /***************************************************************************** * Synaptics communications functions ****************************************************************************/ @@ -1813,6 +1812,15 @@ int synaptics_init(struct psmouse *psmouse) } if (SYN_CAP_INTERTOUCH(info.ext_cap_0c)) { + if ((!IS_ENABLED(CONFIG_RMI4_SMB) || + !IS_ENABLED(CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS)) && + /* Forcepads need F21, which is not ready */ + !psmouse_matches_pnp_id(psmouse, forcepad_pnp_ids)) { + psmouse_warn(psmouse, + "The touchpad can support a better bus than the too old PS/2 protocol. " + "Make sure MOUSE_PS2_SYNAPTICS_SMBUS and RMI4_SMB are enabled to get a better touchpad experience.\n"); + } + error = synaptics_setup_intertouch(psmouse, &info, true); if (!error) return PSMOUSE_SYNAPTICS_SMBUS; From 2fef826e45c6a1e63f55ab72546f7d795300d9a8 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Fri, 26 May 2017 16:51:19 -0700 Subject: [PATCH 127/206] Input: synaptics - tell users to report when they should be using rmi-smbus Users should really consider switching to rmi-smbus instead of plain PS/2. Notify them that they should report a missing pnpID in the file. Signed-off-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 604aa3aa7284..16c30460ef04 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -1739,8 +1739,16 @@ static int synaptics_setup_intertouch(struct psmouse *psmouse, if (synaptics_intertouch == SYNAPTICS_INTERTOUCH_NOT_SET) { if (!psmouse_matches_pnp_id(psmouse, topbuttonpad_pnp_ids) && - !psmouse_matches_pnp_id(psmouse, smbus_pnp_ids)) + !psmouse_matches_pnp_id(psmouse, smbus_pnp_ids)) { + + if (!psmouse_matches_pnp_id(psmouse, forcepad_pnp_ids)) + psmouse_info(psmouse, + "Your touchpad (%s) says it can support a different bus. " + "If i2c-hid and hid-rmi are not used, you might want to try setting psmouse.synaptics_intertouch to 1 and report this to linux-input@vger.kernel.org.\n", + psmouse->ps2dev.serio->firmware_id); + return -ENXIO; + } } psmouse_info(psmouse, "Trying to set up SMBus access\n"); From 56b177055adb246cdeca174331dbf92fc49bfccd Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 23 May 2017 07:08:43 +0000 Subject: [PATCH 128/206] rcar-dmac: fixup descriptor pointer for descriptor mode In descriptor mode, the descriptor running pointer is not maintained by the interrupt handler, thus, driver finds the running descriptor from the descriptor pointer field in the CHCRB register. But, CHCRB::DPTR indicates *next* descriptor pointer, not current. Thus, The residue calculation will be missed. This patch fixup it. Signed-off-by: Kuninori Morimoto Signed-off-by: Vinod Koul --- drivers/dma/sh/rcar-dmac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index db41795fe42a..bd261c9e9664 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -1287,6 +1287,9 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan, if (desc->hwdescs.use) { dptr = (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) & RCAR_DMACHCRB_DPTR_MASK) >> RCAR_DMACHCRB_DPTR_SHIFT; + if (dptr == 0) + dptr = desc->nchunks; + dptr--; WARN_ON(dptr >= desc->nchunks); } else { running = desc->running; From 477c50e8dc1c7b5004e099b8b74ca8b70be2f4fd Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 30 May 2017 11:56:22 +0100 Subject: [PATCH 129/206] drivers/perf: arm_pmu_acpi: avoid perf IRQ init when guest PMU is off We saw perf IRQ init failures when running Linux kernel in an ACPI guest without PMU (i.e. pmu=off). This is because perf IRQ is not present when pmu=off, but arm_pmu_acpi still tries to register or unregister GSI. This patch addresses the problem by checking gicc->performance_interrupt. If it is 0, which is the value set by qemu when pmu=off, we skip the IRQ register/unregister process. [ 4.069470] bc00: 0000000000040b00 ffff0000089db190 [ 4.070267] [] enable_percpu_irq+0xdc/0xe4 [ 4.071192] [] arm_perf_starting_cpu+0x108/0x10c [ 4.072200] [] cpuhp_invoke_callback+0x14c/0x4ac [ 4.073210] [] cpuhp_thread_fun+0xd4/0x11c [ 4.074132] [] smpboot_thread_fn+0x1b4/0x1c4 [ 4.075081] [] kthread+0x10c/0x138 [ 4.075921] [] ret_from_fork+0x10/0x50 [ 4.076947] genirq: Setting trigger mode 4 for irq 43 failed (gic_set_type+0x0/0x74) Signed-off-by: Wei Huang [will: add comment justifying deviation from ACPI spec, removed redundant hunk] Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- drivers/perf/arm_pmu_acpi.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c index 34c862f213c7..0a9b78705ee8 100644 --- a/drivers/perf/arm_pmu_acpi.c +++ b/drivers/perf/arm_pmu_acpi.c @@ -29,6 +29,17 @@ static int arm_pmu_acpi_register_irq(int cpu) return -EINVAL; gsi = gicc->performance_interrupt; + + /* + * Per the ACPI spec, the MADT cannot describe a PMU that doesn't + * have an interrupt. QEMU advertises this by using a GSI of zero, + * which is not known to be valid on any hardware despite being + * valid per the spec. Take the pragmatic approach and reject a + * GSI of zero for now. + */ + if (!gsi) + return 0; + if (gicc->flags & ACPI_MADT_PERFORMANCE_IRQ_MODE) trigger = ACPI_EDGE_SENSITIVE; else From af622b86665881ffa96bc77fb89c94fae02cfa6b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 23 May 2017 17:49:54 +0200 Subject: [PATCH 130/206] nbd: nbd_reset() call in nbd_dev_add() is redundant There is nothing to clear -- nbd_device has just been allocated. Fold nbd_reset() into its other caller, nbd_config_put(). Signed-off-by: Ilya Dryomov Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 9a7bb2c29447..e725d8d5ab0b 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -937,14 +937,6 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg) return -ENOSPC; } -/* Reset all properties of an NBD device */ -static void nbd_reset(struct nbd_device *nbd) -{ - nbd->config = NULL; - nbd->tag_set.timeout = 0; - queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); -} - static void nbd_bdev_reset(struct block_device *bdev) { if (bdev->bd_openers > 1) @@ -1029,7 +1021,10 @@ static void nbd_config_put(struct nbd_device *nbd) } kfree(config->socks); } - nbd_reset(nbd); + nbd->config = NULL; + + nbd->tag_set.timeout = 0; + queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); mutex_unlock(&nbd->config_lock); nbd_put(nbd); @@ -1483,7 +1478,6 @@ static int nbd_dev_add(int index) disk->fops = &nbd_fops; disk->private_data = nbd; sprintf(disk->disk_name, "nbd%d", index); - nbd_reset(nbd); add_disk(disk); nbd_total_devices++; return index; From fa9765323a93473d3853d04c9903958453c92ad4 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 23 May 2017 17:49:55 +0200 Subject: [PATCH 131/206] nbd: don't leak nbd_config nbd_config is allocated in nbd_alloc_config(), but never freed. Fixes: 5ea8d10802ec ("nbd: separate out the config information") Signed-off-by: Ilya Dryomov Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index e725d8d5ab0b..f3f191ba8ca4 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1021,6 +1021,7 @@ static void nbd_config_put(struct nbd_device *nbd) } kfree(config->socks); } + kfree(nbd->config); nbd->config = NULL; nbd->tag_set.timeout = 0; From cbf712792b6e61317b93dd56dd5c0784363c9ac9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Fri, 19 May 2017 15:48:51 +0200 Subject: [PATCH 132/206] KVM: nVMX: fix nested_vmx_check_vmptr failure paths under debugging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kvm_skip_emulated_instruction() will return 0 if userspace is single-stepping the guest. kvm_skip_emulated_instruction() uses return status convention of exit handler: 0 means "exit to userspace" and 1 means "continue vm entries". The problem is that nested_vmx_check_vmptr() return status means something else: 0 is ok, 1 is error. This means we would continue executing after a failure. Static checker noticed it because vmptr was not initialized. Reported-by: Dan Carpenter Fixes: 6affcbedcac7 ("KVM: x86: Add kvm_skip_emulated_instruction and use it.") Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 140 ++++++++++++++++++--------------------------- 1 file changed, 57 insertions(+), 83 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 880f371705bc..9b4b5d6dcd34 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6914,97 +6914,21 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, return 0; } -/* - * This function performs the various checks including - * - if it's 4KB aligned - * - No bits beyond the physical address width are set - * - Returns 0 on success or else 1 - * (Intel SDM Section 30.3) - */ -static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, - gpa_t *vmpointer) +static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer) { gva_t gva; - gpa_t vmptr; struct x86_exception e; - struct page *page; - struct vcpu_vmx *vmx = to_vmx(vcpu); - int maxphyaddr = cpuid_maxphyaddr(vcpu); if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva)) return 1; - if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr, - sizeof(vmptr), &e)) { + if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, vmpointer, + sizeof(*vmpointer), &e)) { kvm_inject_page_fault(vcpu, &e); return 1; } - switch (exit_reason) { - case EXIT_REASON_VMON: - /* - * SDM 3: 24.11.5 - * The first 4 bytes of VMXON region contain the supported - * VMCS revision identifier - * - * Note - IA32_VMX_BASIC[48] will never be 1 - * for the nested case; - * which replaces physical address width with 32 - * - */ - if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) { - nested_vmx_failInvalid(vcpu); - return kvm_skip_emulated_instruction(vcpu); - } - - page = nested_get_page(vcpu, vmptr); - if (page == NULL) { - nested_vmx_failInvalid(vcpu); - return kvm_skip_emulated_instruction(vcpu); - } - if (*(u32 *)kmap(page) != VMCS12_REVISION) { - kunmap(page); - nested_release_page_clean(page); - nested_vmx_failInvalid(vcpu); - return kvm_skip_emulated_instruction(vcpu); - } - kunmap(page); - nested_release_page_clean(page); - vmx->nested.vmxon_ptr = vmptr; - break; - case EXIT_REASON_VMCLEAR: - if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) { - nested_vmx_failValid(vcpu, - VMXERR_VMCLEAR_INVALID_ADDRESS); - return kvm_skip_emulated_instruction(vcpu); - } - - if (vmptr == vmx->nested.vmxon_ptr) { - nested_vmx_failValid(vcpu, - VMXERR_VMCLEAR_VMXON_POINTER); - return kvm_skip_emulated_instruction(vcpu); - } - break; - case EXIT_REASON_VMPTRLD: - if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) { - nested_vmx_failValid(vcpu, - VMXERR_VMPTRLD_INVALID_ADDRESS); - return kvm_skip_emulated_instruction(vcpu); - } - - if (vmptr == vmx->nested.vmxon_ptr) { - nested_vmx_failValid(vcpu, - VMXERR_VMPTRLD_VMXON_POINTER); - return kvm_skip_emulated_instruction(vcpu); - } - break; - default: - return 1; /* shouldn't happen */ - } - - if (vmpointer) - *vmpointer = vmptr; return 0; } @@ -7066,6 +6990,8 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) static int handle_vmon(struct kvm_vcpu *vcpu) { int ret; + gpa_t vmptr; + struct page *page; struct vcpu_vmx *vmx = to_vmx(vcpu); const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; @@ -7095,9 +7021,37 @@ static int handle_vmon(struct kvm_vcpu *vcpu) return 1; } - if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMON, NULL)) + if (nested_vmx_get_vmptr(vcpu, &vmptr)) return 1; - + + /* + * SDM 3: 24.11.5 + * The first 4 bytes of VMXON region contain the supported + * VMCS revision identifier + * + * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case; + * which replaces physical address width with 32 + */ + if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) { + nested_vmx_failInvalid(vcpu); + return kvm_skip_emulated_instruction(vcpu); + } + + page = nested_get_page(vcpu, vmptr); + if (page == NULL) { + nested_vmx_failInvalid(vcpu); + return kvm_skip_emulated_instruction(vcpu); + } + if (*(u32 *)kmap(page) != VMCS12_REVISION) { + kunmap(page); + nested_release_page_clean(page); + nested_vmx_failInvalid(vcpu); + return kvm_skip_emulated_instruction(vcpu); + } + kunmap(page); + nested_release_page_clean(page); + + vmx->nested.vmxon_ptr = vmptr; ret = enter_vmx_operation(vcpu); if (ret) return ret; @@ -7213,9 +7167,19 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) if (!nested_vmx_check_permission(vcpu)) return 1; - if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMCLEAR, &vmptr)) + if (nested_vmx_get_vmptr(vcpu, &vmptr)) return 1; + if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) { + nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS); + return kvm_skip_emulated_instruction(vcpu); + } + + if (vmptr == vmx->nested.vmxon_ptr) { + nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_VMXON_POINTER); + return kvm_skip_emulated_instruction(vcpu); + } + if (vmptr == vmx->nested.current_vmptr) nested_release_vmcs12(vmx); @@ -7545,9 +7509,19 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) if (!nested_vmx_check_permission(vcpu)) return 1; - if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMPTRLD, &vmptr)) + if (nested_vmx_get_vmptr(vcpu, &vmptr)) return 1; + if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) { + nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS); + return kvm_skip_emulated_instruction(vcpu); + } + + if (vmptr == vmx->nested.vmxon_ptr) { + nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_VMXON_POINTER); + return kvm_skip_emulated_instruction(vcpu); + } + if (vmx->nested.current_vmptr != vmptr) { struct vmcs12 *new_vmcs12; struct page *page; From 8eae9570d1d3887487be0b355d12656b46fac226 Mon Sep 17 00:00:00 2001 From: Gioh Kim Date: Tue, 30 May 2017 15:24:45 +0200 Subject: [PATCH 133/206] KVM: SVM: ignore type when setting segment registers Commit 19bca6ab75d8 ("KVM: SVM: Fix cross vendor migration issue with unusable bit") added checking type when setting unusable. So unusable can be set if present is 0 OR type is 0. According to the AMD processor manual, long mode ignores the type value in segment descriptor. And type can be 0 if it is read-only data segment. Therefore type value is not related to unusable flag. This patch is based on linux-next v4.12.0-rc3. Signed-off-by: Gioh Kim Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 183ddb235fb4..a654372efea1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1807,7 +1807,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, * AMD's VMCB does not have an explicit unusable field, so emulate it * for cross vendor migration purposes by "not present" */ - var->unusable = !var->present || (var->type == 0); + var->unusable = !var->present; switch (seg) { case VCPU_SREG_TR: From e4dc2b32df5573b077f6723e01cf761d236d5113 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 30 May 2017 14:39:11 -0400 Subject: [PATCH 134/206] blk-mq: Take tagset lock when updating hw queues The tagset lock needs to be held when iterating the tag_list, so a lockdep assert was added when updating number of hardware queues. The drivers calling this API, however, were unaware of the new requirement, so are failing the assertion. This patch takes the lock within the blk-mq function so the drivers do not have to be modified in order to be safe. Fixes: 705cda97e ("blk-mq: Make it safe to use RCU to iterate over blk_mq_tag_set.tag_list") Reported-by: Gabriel Krisman Bertazi Reviewed-by: Bart Van Assche Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- block/blk-mq.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index f2224ffd225d..1bcccedcc74f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2641,7 +2641,8 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) return ret; } -void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) +static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, + int nr_hw_queues) { struct request_queue *q; @@ -2665,6 +2666,13 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) list_for_each_entry(q, &set->tag_list, tag_set_list) blk_mq_unfreeze_queue(q); } + +void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) +{ + mutex_lock(&set->tag_list_lock); + __blk_mq_update_nr_hw_queues(set, nr_hw_queues); + mutex_unlock(&set->tag_list_lock); +} EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues); /* Enable polling stats and return whether they were already enabled. */ From f511c0b17b081562dca8ac5061dfa86db4c66cc2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 30 May 2017 12:38:59 -0700 Subject: [PATCH 135/206] "Yes, people use FOLL_FORCE ;)" This effectively reverts commit 8ee74a91ac30 ("proc: try to remove use of FOLL_FORCE entirely") It turns out that people do depend on FOLL_FORCE for the /proc//mem case, and we're talking not just debuggers. Talking to the affected people, the use-cases are: Keno Fischer: "We used these semantics as a hardening mechanism in the julia JIT. By opening /proc/self/mem and using these semantics, we could avoid needing RWX pages, or a dual mapping approach. We do have fallbacks to these other methods (though getting EIO here actually causes an assert in released versions - we'll updated that to make sure to take the fall back in that case). Nevertheless the /proc/self/mem approach was our favored approach because it a) Required an attacker to be able to execute syscalls which is a taller order than getting memory write and b) didn't double the virtual address space requirements (as a dual mapping approach would). I think in general this feature is very useful for anybody who needs to precisely control the execution of some other process. Various debuggers (gdb/lldb/rr) certainly fall into that category, but there's another class of such processes (wine, various emulators) which may want to do that kind of thing. Now, I suspect most of these will have the other process under ptrace control, so maybe allowing (same_mm || ptraced) would be ok, but at least for the sandbox/remote-jit use case, it would be perfectly reasonable to not have the jit server be a ptracer" Robert O'Callahan: "We write to readonly code and data mappings via /proc/.../mem in lots of different situations, particularly when we're adjusting program state during replay to match the recorded execution. Like Julia, we can add workarounds, but they could be expensive." so not only do people use FOLL_FORCE for both reads and writes, but they use it for both the local mm and remote mm. With these comments in mind, we likely also cannot add the "are we actively ptracing" check either, so this keeps the new code organization and does not do a real revert that would add back the original comment about "Maybe we should limit FOLL_FORCE to actual ptrace users?" Reported-by: Keno Fischer Reported-by: Robert O'Callahan Cc: Kees Cook Cc: Andy Lutomirski Cc: Eric Biederman Signed-off-by: Linus Torvalds --- fs/proc/base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 45f6bf68fff3..f1e1927ccd48 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -821,7 +821,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf, if (!mmget_not_zero(mm)) goto free; - flags = write ? FOLL_WRITE : 0; + flags = FOLL_FORCE | (write ? FOLL_WRITE : 0); while (count > 0) { int this_len = min_t(int, count, PAGE_SIZE); From 08fd5e76c268b7a5353302825b6b63a6027ddd0a Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 27 May 2017 06:50:05 -0700 Subject: [PATCH 136/206] hwmon: (aspeed-pwm-tacho) Select REGMAP The driver uses regmap and thus has to select it to avoid build errors such as the following. drivers/hwmon/aspeed-pwm-tacho.c:337:21: error: variable 'aspeed_pwm_tacho_regmap_config' has initializer but incomplete type Reported-by: kbuild test robot Acked-by: Joel Stanley Fixes: 2d7a548a3eff ("drivers: hwmon: Support for ASPEED PWM/Fan tach") Signed-off-by: Guenter Roeck --- drivers/hwmon/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 22d5eafd6815..5ef2814345ef 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -343,6 +343,7 @@ config SENSORS_ASB100 config SENSORS_ASPEED tristate "ASPEED AST2400/AST2500 PWM and Fan tach driver" + select REGMAP help This driver provides support for ASPEED AST2400/AST2500 PWM and Fan Tacho controllers. From 7ed1c5e5ddbb87aef9df7f8a8a714bcffe90f0eb Mon Sep 17 00:00:00 2001 From: Patrick Venture Date: Tue, 30 May 2017 12:42:01 -0700 Subject: [PATCH 137/206] hwmon: (aspeed-pwm-tacho) On read failure return -ETIMEDOUT When the controller fails to provide an RPM reading within the alloted time; the driver returns -ETIMEDOUT and no file contents. Signed-off-by: Patrick Venture Fixes: 2d7a548a3eff ("drivers: hwmon: Support for ASPEED PWM/Fan tach") Signed-off-by: Guenter Roeck --- drivers/hwmon/aspeed-pwm-tacho.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/aspeed-pwm-tacho.c b/drivers/hwmon/aspeed-pwm-tacho.c index 48403a2115be..12b716b70ead 100644 --- a/drivers/hwmon/aspeed-pwm-tacho.c +++ b/drivers/hwmon/aspeed-pwm-tacho.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -494,7 +495,7 @@ static u32 aspeed_get_fan_tach_ch_measure_period(struct aspeed_pwm_tacho_data return clk / (clk_unit * div_h * div_l * tacho_div * tacho_unit); } -static u32 aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv, +static int aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv, u8 fan_tach_ch) { u32 raw_data, tach_div, clk_source, sec, val; @@ -510,6 +511,9 @@ static u32 aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv, msleep(sec); regmap_read(priv->regmap, ASPEED_PTCR_RESULT, &val); + if (!(val & RESULT_STATUS_MASK)) + return -ETIMEDOUT; + raw_data = val & RESULT_VALUE_MASK; tach_div = priv->type_fan_tach_clock_division[type]; tach_div = 0x4 << (tach_div * 2); @@ -561,12 +565,14 @@ static ssize_t show_rpm(struct device *dev, struct device_attribute *attr, { struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); int index = sensor_attr->index; - u32 rpm; + int rpm; struct aspeed_pwm_tacho_data *priv = dev_get_drvdata(dev); rpm = aspeed_get_fan_tach_ch_rpm(priv, index); + if (rpm < 0) + return rpm; - return sprintf(buf, "%u\n", rpm); + return sprintf(buf, "%d\n", rpm); } static umode_t pwm_is_visible(struct kobject *kobj, From 64188cfbe5245d412de2139a3864e4e00b4136f0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 30 May 2017 09:23:41 +0200 Subject: [PATCH 138/206] Revert "ALSA: usb-audio: purge needless variable length array" This reverts commit 89b593c30e83 ("ALSA: usb-audio: purge needless variable length array"). The patch turned out to cause a severe regression, triggering an Oops at snd_usb_ctl_msg(). It was overseen that snd_usb_ctl_msg() writes back the response to the given buffer, while the patch changed it to a read-only const buffer. (One should always double-check when an extra pointer cast is present...) As a simple fix, just revert the affected commit. It was merely a cleanup. Although it brings VLA again, it's clearer as a fix. We'll address the VLA later in another patch. Fixes: 89b593c30e83 ("ALSA: usb-audio: purge needless variable length array") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195875 Cc: # v4.11+ Signed-off-by: Takashi Iwai --- sound/usb/mixer_us16x08.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/usb/mixer_us16x08.c b/sound/usb/mixer_us16x08.c index dc48eedea92e..29d2c9282987 100644 --- a/sound/usb/mixer_us16x08.c +++ b/sound/usb/mixer_us16x08.c @@ -698,12 +698,12 @@ static int snd_us16x08_meter_get(struct snd_kcontrol *kcontrol, struct snd_usb_audio *chip = elem->head.mixer->chip; struct snd_us16x08_meter_store *store = elem->private_data; u8 meter_urb[64]; - char tmp[sizeof(mix_init_msg2)] = {0}; + char tmp[max(sizeof(mix_init_msg1), sizeof(mix_init_msg2))]; switch (kcontrol->private_value) { case 0: - snd_us16x08_send_urb(chip, (char *)mix_init_msg1, - sizeof(mix_init_msg1)); + memcpy(tmp, mix_init_msg1, sizeof(mix_init_msg1)); + snd_us16x08_send_urb(chip, tmp, 4); snd_us16x08_recv_urb(chip, meter_urb, sizeof(meter_urb)); kcontrol->private_value++; @@ -721,7 +721,7 @@ static int snd_us16x08_meter_get(struct snd_kcontrol *kcontrol, case 3: memcpy(tmp, mix_init_msg2, sizeof(mix_init_msg2)); tmp[2] = snd_get_meter_comp_index(store); - snd_us16x08_send_urb(chip, tmp, sizeof(mix_init_msg2)); + snd_us16x08_send_urb(chip, tmp, 10); snd_us16x08_recv_urb(chip, meter_urb, sizeof(meter_urb)); kcontrol->private_value = 0; From 617163fc2580da3d489b6c1bacb6312e0e2aac02 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 30 May 2017 23:21:07 +0200 Subject: [PATCH 139/206] ALSA: usb: Fix a typo in Tascam US-16x08 mixer element A mixer element created in a quirk for Tascam US-16x08 contains a typo: it should be "EQ MidLow Q" instead of "EQ MidQLow Q". Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195875 Fixes: d2bb390a2081 ("ALSA: usb-audio: Tascam US-16x08 DSP mixer quirk") Cc: # v4.11+ Signed-off-by: Takashi Iwai --- sound/usb/mixer_us16x08.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/mixer_us16x08.c b/sound/usb/mixer_us16x08.c index 29d2c9282987..442d8f7998e3 100644 --- a/sound/usb/mixer_us16x08.c +++ b/sound/usb/mixer_us16x08.c @@ -1135,7 +1135,7 @@ static const struct snd_us16x08_control_params eq_controls[] = { .control_id = SND_US16X08_ID_EQLOWMIDWIDTH, .type = USB_MIXER_U8, .num_channels = 16, - .name = "EQ MidQLow Q", + .name = "EQ MidLow Q", }, { /* EQ mid high gain */ .kcontrol_new = &snd_us16x08_eq_gain_ctl, From e49a14fa36aeb72476e038fcde96c151b7e4ecc8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 30 May 2017 23:24:45 +0200 Subject: [PATCH 140/206] ALSA: usb: Avoid VLA in mixer_us16x08.c This is another attempt to work around the VLA used in mixer_us16x08.c. Basically the temporary array is used individually for two cases, and we can declare locally in each block, instead of hackish max() usage. Signed-off-by: Takashi Iwai --- sound/usb/mixer_us16x08.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sound/usb/mixer_us16x08.c b/sound/usb/mixer_us16x08.c index 442d8f7998e3..26ed23b18b77 100644 --- a/sound/usb/mixer_us16x08.c +++ b/sound/usb/mixer_us16x08.c @@ -698,16 +698,18 @@ static int snd_us16x08_meter_get(struct snd_kcontrol *kcontrol, struct snd_usb_audio *chip = elem->head.mixer->chip; struct snd_us16x08_meter_store *store = elem->private_data; u8 meter_urb[64]; - char tmp[max(sizeof(mix_init_msg1), sizeof(mix_init_msg2))]; switch (kcontrol->private_value) { - case 0: + case 0: { + char tmp[sizeof(mix_init_msg1)]; + memcpy(tmp, mix_init_msg1, sizeof(mix_init_msg1)); snd_us16x08_send_urb(chip, tmp, 4); snd_us16x08_recv_urb(chip, meter_urb, sizeof(meter_urb)); kcontrol->private_value++; break; + } case 1: snd_us16x08_recv_urb(chip, meter_urb, sizeof(meter_urb)); @@ -718,7 +720,9 @@ static int snd_us16x08_meter_get(struct snd_kcontrol *kcontrol, sizeof(meter_urb)); kcontrol->private_value++; break; - case 3: + case 3: { + char tmp[sizeof(mix_init_msg2)]; + memcpy(tmp, mix_init_msg2, sizeof(mix_init_msg2)); tmp[2] = snd_get_meter_comp_index(store); snd_us16x08_send_urb(chip, tmp, 10); @@ -727,6 +731,7 @@ static int snd_us16x08_meter_get(struct snd_kcontrol *kcontrol, kcontrol->private_value = 0; break; } + } for (set = 0; set < 6; set++) get_meter_levels_from_urb(set, store, meter_urb); From ff0361b34ac63ef80c785c32d62e0e9d89a2cf89 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 31 May 2017 09:44:32 +0200 Subject: [PATCH 141/206] dm: make flush bios explicitly sync Commit b685d3d65ac7 ("block: treat REQ_FUA and REQ_PREFLUSH as synchronous") removed REQ_SYNC flag from WRITE_{FUA|PREFLUSH|...} definitions. generic_make_request_checks() however strips REQ_FUA and REQ_PREFLUSH flags from a bio when the storage doesn't report volatile write cache and thus write effectively becomes asynchronous which can lead to performance regressions. Fix the problem by making sure all bios which are synchronous are properly marked with REQ_SYNC. Fixes: b685d3d65ac7 ("block: treat REQ_FUA and REQ_PREFLUSH as synchronous") Cc: stable@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Mike Snitzer --- drivers/md/dm-bufio.c | 2 +- drivers/md/dm-integrity.c | 3 ++- drivers/md/dm-raid1.c | 2 +- drivers/md/dm-snap-persistent.c | 3 ++- drivers/md/dm.c | 2 +- 5 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index cd8139593ccd..840c1496b2b1 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1334,7 +1334,7 @@ int dm_bufio_issue_flush(struct dm_bufio_client *c) { struct dm_io_request io_req = { .bi_op = REQ_OP_WRITE, - .bi_op_flags = REQ_PREFLUSH, + .bi_op_flags = REQ_PREFLUSH | REQ_SYNC, .mem.type = DM_IO_KMEM, .mem.ptr.addr = NULL, .client = c->dm_io, diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 1feeb2ccf5a1..7910bfe50da4 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -783,7 +783,8 @@ static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsi for (i = 0; i < commit_sections; i++) rw_section_mac(ic, commit_start + i, true); } - rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, commit_sections, &io_comp); + rw_journal(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, commit_start, + commit_sections, &io_comp); } else { unsigned to_end; io_comp.in_flight = (atomic_t)ATOMIC_INIT(2); diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index a95cbb80fb34..e61c45047c25 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -260,7 +260,7 @@ static int mirror_flush(struct dm_target *ti) struct mirror *m; struct dm_io_request io_req = { .bi_op = REQ_OP_WRITE, - .bi_op_flags = REQ_PREFLUSH, + .bi_op_flags = REQ_PREFLUSH | REQ_SYNC, .mem.type = DM_IO_KMEM, .mem.ptr.addr = NULL, .client = ms->io_client, diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index b93476c3ba3f..c5534d294773 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -741,7 +741,8 @@ static void persistent_commit_exception(struct dm_exception_store *store, /* * Commit exceptions to disk. */ - if (ps->valid && area_io(ps, REQ_OP_WRITE, REQ_PREFLUSH | REQ_FUA)) + if (ps->valid && area_io(ps, REQ_OP_WRITE, + REQ_PREFLUSH | REQ_FUA | REQ_SYNC)) ps->valid = 0; /* diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 6ef9500226c0..37ccd73c79ec 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1657,7 +1657,7 @@ static struct mapped_device *alloc_dev(int minor) bio_init(&md->flush_bio, NULL, 0); md->flush_bio.bi_bdev = md->bdev; - md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; + md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; dm_stats_init(&md->stats); From 63db7c815bc0997c29e484d2409684fdd9fcd93b Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 31 May 2017 08:22:52 -0700 Subject: [PATCH 142/206] xfs: use ->b_state to fix buffer I/O accounting release race We've had user reports of unmount hangs in xfs_wait_buftarg() that analysis shows is due to btp->bt_io_count == -1. bt_io_count represents the count of in-flight asynchronous buffers and thus should always be >= 0. xfs_wait_buftarg() waits for this value to stabilize to zero in order to ensure that all untracked (with respect to the lru) buffers have completed I/O processing before unmount proceeds to tear down in-core data structures. The value of -1 implies an I/O accounting decrement race. Indeed, the fact that xfs_buf_ioacct_dec() is called from xfs_buf_rele() (where the buffer lock is no longer held) means that bp->b_flags can be updated from an unsafe context. While a user-level reproducer is currently not available, some intrusive hacks to run racing buffer lookups/ioacct/releases from multiple threads was used to successfully manufacture this problem. Existing callers do not expect to acquire the buffer lock from xfs_buf_rele(). Therefore, we can not safely update ->b_flags from this context. It turns out that we already have separate buffer state bits and associated serialization for dealing with buffer LRU state in the form of ->b_state and ->b_lock. Therefore, replace the _XBF_IN_FLIGHT flag with a ->b_state variant, update the I/O accounting wrappers appropriately and make sure they are used with the correct locking. This ensures that buffer in-flight state can be modified at buffer release time without racing with modifications from a buffer lock holder. Fixes: 9c7504aa72b6 ("xfs: track and serialize in-flight async buffers against unmount") Cc: # v4.8+ Signed-off-by: Brian Foster Reviewed-by: Nikolay Borisov Tested-by: Libor Pechacek Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_buf.c | 38 ++++++++++++++++++++++++++------------ fs/xfs/xfs_buf.h | 5 ++--- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 62fa39276a24..07b77b73b024 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -97,27 +97,41 @@ static inline void xfs_buf_ioacct_inc( struct xfs_buf *bp) { - if (bp->b_flags & (XBF_NO_IOACCT|_XBF_IN_FLIGHT)) + if (bp->b_flags & XBF_NO_IOACCT) return; ASSERT(bp->b_flags & XBF_ASYNC); - bp->b_flags |= _XBF_IN_FLIGHT; - percpu_counter_inc(&bp->b_target->bt_io_count); + spin_lock(&bp->b_lock); + if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) { + bp->b_state |= XFS_BSTATE_IN_FLIGHT; + percpu_counter_inc(&bp->b_target->bt_io_count); + } + spin_unlock(&bp->b_lock); } /* * Clear the in-flight state on a buffer about to be released to the LRU or * freed and unaccount from the buftarg. */ +static inline void +__xfs_buf_ioacct_dec( + struct xfs_buf *bp) +{ + ASSERT(spin_is_locked(&bp->b_lock)); + + if (bp->b_state & XFS_BSTATE_IN_FLIGHT) { + bp->b_state &= ~XFS_BSTATE_IN_FLIGHT; + percpu_counter_dec(&bp->b_target->bt_io_count); + } +} + static inline void xfs_buf_ioacct_dec( struct xfs_buf *bp) { - if (!(bp->b_flags & _XBF_IN_FLIGHT)) - return; - - bp->b_flags &= ~_XBF_IN_FLIGHT; - percpu_counter_dec(&bp->b_target->bt_io_count); + spin_lock(&bp->b_lock); + __xfs_buf_ioacct_dec(bp); + spin_unlock(&bp->b_lock); } /* @@ -149,9 +163,9 @@ xfs_buf_stale( * unaccounted (released to LRU) before that occurs. Drop in-flight * status now to preserve accounting consistency. */ - xfs_buf_ioacct_dec(bp); - spin_lock(&bp->b_lock); + __xfs_buf_ioacct_dec(bp); + atomic_set(&bp->b_lru_ref, 0); if (!(bp->b_state & XFS_BSTATE_DISPOSE) && (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru))) @@ -979,12 +993,12 @@ xfs_buf_rele( * ensures the decrement occurs only once per-buf. */ if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru)) - xfs_buf_ioacct_dec(bp); + __xfs_buf_ioacct_dec(bp); goto out_unlock; } /* the last reference has been dropped ... */ - xfs_buf_ioacct_dec(bp); + __xfs_buf_ioacct_dec(bp); if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { /* * If the buffer is added to the LRU take a new reference to the diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 8d1d44f87ce9..1508121f29f2 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -63,7 +63,6 @@ typedef enum { #define _XBF_KMEM (1 << 21)/* backed by heap memory */ #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ #define _XBF_COMPOUND (1 << 23)/* compound buffer */ -#define _XBF_IN_FLIGHT (1 << 25) /* I/O in flight, for accounting purposes */ typedef unsigned int xfs_buf_flags_t; @@ -84,14 +83,14 @@ typedef unsigned int xfs_buf_flags_t; { _XBF_PAGES, "PAGES" }, \ { _XBF_KMEM, "KMEM" }, \ { _XBF_DELWRI_Q, "DELWRI_Q" }, \ - { _XBF_COMPOUND, "COMPOUND" }, \ - { _XBF_IN_FLIGHT, "IN_FLIGHT" } + { _XBF_COMPOUND, "COMPOUND" } /* * Internal state flags. */ #define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */ +#define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */ /* * The xfs_buftarg contains 2 notions of "sector size" - From 5be6b75610cefd1e21b98a218211922c2feb6e08 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 1 Mar 2017 09:02:33 +0800 Subject: [PATCH 143/206] cfq-iosched: fix the delay of cfq_group's vdisktime under iops mode When adding a cfq_group into the cfq service tree, we use CFQ_IDLE_DELAY as the delay of cfq_group's vdisktime if there have been other cfq_groups already. When cfq is under iops mode, commit 9a7f38c42c2b ("cfq-iosched: Convert from jiffies to nanoseconds") could result in a large iops delay and lead to an abnormal io schedule delay for the added cfq_group. To fix it, we just need to revert to the old CFQ_IDLE_DELAY value: HZ / 5 when iops mode is enabled. Despite having the same value, the delay of a cfq_queue in idle class and the delay of cfq_group are different things, so I define two new macros for the delay of a cfq_group under time-slice mode and iops mode. Fixes: 9a7f38c42c2b ("cfq-iosched: Convert from jiffies to nanoseconds") Cc: # 4.8+ Signed-off-by: Hou Tao Acked-by: Jan Kara Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index da69b079725f..b7e9c7feeab2 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -38,9 +38,13 @@ static const u64 cfq_target_latency = (u64)NSEC_PER_SEC * 3/10; /* 300 ms */ static const int cfq_hist_divisor = 4; /* - * offset from end of service tree + * offset from end of queue service tree for idle class */ #define CFQ_IDLE_DELAY (NSEC_PER_SEC / 5) +/* offset from end of group service tree under time slice mode */ +#define CFQ_SLICE_MODE_GROUP_DELAY (NSEC_PER_SEC / 5) +/* offset from end of group service under IOPS mode */ +#define CFQ_IOPS_MODE_GROUP_DELAY (HZ / 5) /* * below this threshold, we consider thinktime immediate @@ -1362,6 +1366,14 @@ cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) cfqg->vfraction = max_t(unsigned, vfr, 1); } +static inline u64 cfq_get_cfqg_vdisktime_delay(struct cfq_data *cfqd) +{ + if (!iops_mode(cfqd)) + return CFQ_SLICE_MODE_GROUP_DELAY; + else + return CFQ_IOPS_MODE_GROUP_DELAY; +} + static void cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg) { @@ -1381,7 +1393,8 @@ cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg) n = rb_last(&st->rb); if (n) { __cfqg = rb_entry_cfqg(n); - cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY; + cfqg->vdisktime = __cfqg->vdisktime + + cfq_get_cfqg_vdisktime_delay(cfqd); } else cfqg->vdisktime = st->min_vdisktime; cfq_group_service_tree_add(st, cfqg); From 5a8948f8a32ba56c17b3fb75d318ac98157f3ba5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 31 May 2017 09:44:33 +0200 Subject: [PATCH 144/206] md: Make flush bios explicitely sync Commit b685d3d65ac7 "block: treat REQ_FUA and REQ_PREFLUSH as synchronous" removed REQ_SYNC flag from WRITE_{FUA|PREFLUSH|...} definitions. generic_make_request_checks() however strips REQ_FUA and REQ_PREFLUSH flags from a bio when the storage doesn't report volatile write cache and thus write effectively becomes asynchronous which can lead to performance regressions Fix the problem by making sure all bios which are synchronous are properly marked with REQ_SYNC. CC: linux-raid@vger.kernel.org CC: Shaohua Li Fixes: b685d3d65ac791406e0dfd8779cc9b3707fea5a3 CC: stable@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Shaohua Li --- drivers/md/md.c | 2 +- drivers/md/raid5-cache.c | 4 ++-- drivers/md/raid5-ppl.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 10367ffe92e3..212a6777ff31 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -765,7 +765,7 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev, test_bit(FailFast, &rdev->flags) && !test_bit(LastDev, &rdev->flags)) ff = MD_FAILFAST; - bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA | ff; + bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA | ff; atomic_inc(&mddev->pending_writes); submit_bio(bio); diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 4c00bc248287..0a7af8b0a80a 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -1782,7 +1782,7 @@ static int r5l_log_write_empty_meta_block(struct r5l_log *log, sector_t pos, mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum, mb, PAGE_SIZE)); if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, REQ_OP_WRITE, - REQ_FUA, false)) { + REQ_SYNC | REQ_FUA, false)) { __free_page(page); return -EIO; } @@ -2388,7 +2388,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum, mb, PAGE_SIZE)); sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page, - REQ_OP_WRITE, REQ_FUA, false); + REQ_OP_WRITE, REQ_SYNC | REQ_FUA, false); sh->log_start = ctx->pos; list_add_tail(&sh->r5c, &log->stripe_in_journal_list); atomic_inc(&log->stripe_in_journal_count); diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index 5d25bebf3328..ccce92e68d7f 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -907,8 +907,8 @@ static int ppl_write_empty_header(struct ppl_log *log) pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PAGE_SIZE)); if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset, - PPL_HEADER_SIZE, page, REQ_OP_WRITE | REQ_FUA, 0, - false)) { + PPL_HEADER_SIZE, page, REQ_OP_WRITE | REQ_SYNC | + REQ_FUA, 0, false)) { md_error(rdev->mddev, rdev); ret = -EIO; } From 6ea44adce91526700535b3150f77f8639ae8c82d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 25 May 2017 17:00:32 +1000 Subject: [PATCH 145/206] SUNRPC: ensure correct error is reported by xs_tcp_setup_socket() If you attempt a TCP mount from an host that is unreachable in a way that triggers an immediate error from kernel_connect(), that error does not propagate up, instead EAGAIN is reported. This results in call_connect_status receiving the wrong error. A case that it easy to demonstrate is to attempt to mount from an address that results in ENETUNREACH, but first deleting any default route. Without this patch, the mount.nfs process is persistently runnable and is hard to kill. With this patch it exits as it should. The problem is caused by the fact that xs_tcp_force_close() eventually calls xprt_wake_pending_tasks(xprt, -EAGAIN); which causes an error return of -EAGAIN. so when xs_tcp_setup_sock() calls xprt_wake_pending_tasks(xprt, status); the status is ignored. Fixes: 4efdd92c9211 ("SUNRPC: Remove TCP client connection reset hack") Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 16aff8ddc16f..d5b54c020dec 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2432,7 +2432,12 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -ENETUNREACH: case -EADDRINUSE: case -ENOBUFS: - /* retry with existing socket, after a delay */ + /* + * xs_tcp_force_close() wakes tasks with -EIO. + * We need to wake them first to ensure the + * correct error code. + */ + xprt_wake_pending_tasks(xprt, status); xs_tcp_force_close(xprt); goto out; } From 45cc6586b7a73e84a8806881122b6ec306cdc9e7 Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Mon, 29 May 2017 13:13:59 -0400 Subject: [PATCH 146/206] drm/amdgpu: Program ring for vce instance 1 at its register space We need program ring buffer on instance 1 register space domain, when only if instance 1 available, with two instances or instance 0, and we need only program instance 0 regsiter space domain for ring. Signed-off-by: Leo Liu Reviewed-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 95 +++++++++++++++++++-------- 1 file changed, 68 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index fb0819359909..90332f55cfba 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -77,13 +77,26 @@ static int vce_v3_0_set_clockgating_state(void *handle, static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + u32 v; + + mutex_lock(&adev->grbm_idx_mutex); + if (adev->vce.harvest_config == 0 || + adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1) + WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0)); + else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) + WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1)); if (ring == &adev->vce.ring[0]) - return RREG32(mmVCE_RB_RPTR); + v = RREG32(mmVCE_RB_RPTR); else if (ring == &adev->vce.ring[1]) - return RREG32(mmVCE_RB_RPTR2); + v = RREG32(mmVCE_RB_RPTR2); else - return RREG32(mmVCE_RB_RPTR3); + v = RREG32(mmVCE_RB_RPTR3); + + WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT); + mutex_unlock(&adev->grbm_idx_mutex); + + return v; } /** @@ -96,13 +109,26 @@ static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + u32 v; + + mutex_lock(&adev->grbm_idx_mutex); + if (adev->vce.harvest_config == 0 || + adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1) + WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0)); + else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) + WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1)); if (ring == &adev->vce.ring[0]) - return RREG32(mmVCE_RB_WPTR); + v = RREG32(mmVCE_RB_WPTR); else if (ring == &adev->vce.ring[1]) - return RREG32(mmVCE_RB_WPTR2); + v = RREG32(mmVCE_RB_WPTR2); else - return RREG32(mmVCE_RB_WPTR3); + v = RREG32(mmVCE_RB_WPTR3); + + WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT); + mutex_unlock(&adev->grbm_idx_mutex); + + return v; } /** @@ -116,12 +142,22 @@ static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + mutex_lock(&adev->grbm_idx_mutex); + if (adev->vce.harvest_config == 0 || + adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1) + WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0)); + else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) + WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1)); + if (ring == &adev->vce.ring[0]) WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); else if (ring == &adev->vce.ring[1]) WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); else WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); + + WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT); + mutex_unlock(&adev->grbm_idx_mutex); } static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) @@ -231,33 +267,38 @@ static int vce_v3_0_start(struct amdgpu_device *adev) struct amdgpu_ring *ring; int idx, r; - ring = &adev->vce.ring[0]; - WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr)); - WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); - WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr); - WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32(mmVCE_RB_SIZE, ring->ring_size / 4); - - ring = &adev->vce.ring[1]; - WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr)); - WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); - WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr); - WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); - WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4); - - ring = &adev->vce.ring[2]; - WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr)); - WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); - WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr); - WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr)); - WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4); - mutex_lock(&adev->grbm_idx_mutex); for (idx = 0; idx < 2; ++idx) { if (adev->vce.harvest_config & (1 << idx)) continue; WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx)); + + /* Program instance 0 reg space for two instances or instance 0 case + program instance 1 reg space for only instance 1 available case */ + if (idx != 1 || adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) { + ring = &adev->vce.ring[0]; + WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr); + WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32(mmVCE_RB_SIZE, ring->ring_size / 4); + + ring = &adev->vce.ring[1]; + WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr); + WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4); + + ring = &adev->vce.ring[2]; + WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr); + WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr)); + WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4); + } + vce_v3_0_mc_resume(adev, idx); WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1); From 25cdda95fda78d22d44157da15aa7ea34be3c804 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 24 May 2017 21:47:09 -0700 Subject: [PATCH 147/206] iscsi-target: Fix initial login PDU asynchronous socket close OOPs This patch fixes a OOPs originally introduced by: commit bb048357dad6d604520c91586334c9c230366a14 Author: Nicholas Bellinger Date: Thu Sep 5 14:54:04 2013 -0700 iscsi-target: Add sk->sk_state_change to cleanup after TCP failure which would trigger a NULL pointer dereference when a TCP connection was closed asynchronously via iscsi_target_sk_state_change(), but only when the initial PDU processing in iscsi_target_do_login() from iscsi_np process context was blocked waiting for backend I/O to complete. To address this issue, this patch makes the following changes. First, it introduces some common helper functions used for checking socket closing state, checking login_flags, and atomically checking socket closing state + setting login_flags. Second, it introduces a LOGIN_FLAGS_INITIAL_PDU bit to know when a TCP connection has dropped via iscsi_target_sk_state_change(), but the initial PDU processing within iscsi_target_do_login() in iscsi_np context is still running. For this case, it sets LOGIN_FLAGS_CLOSED, but doesn't invoke schedule_delayed_work(). The original NULL pointer dereference case reported by MNC is now handled by iscsi_target_do_login() doing a iscsi_target_sk_check_close() before transitioning to FFP to determine when the socket has already closed, or iscsi_target_start_negotiation() if the login needs to exchange more PDUs (eg: iscsi_target_do_login returned 0) but the socket has closed. For both of these cases, the cleanup up of remaining connection resources will occur in iscsi_target_start_negotiation() from iscsi_np process context once the failure is detected. Finally, to handle to case where iscsi_target_sk_state_change() is called after the initial PDU procesing is complete, it now invokes conn->login_work -> iscsi_target_do_login_rx() to perform cleanup once existing iscsi_target_sk_check_close() checks detect connection failure. For this case, the cleanup of remaining connection resources will occur in iscsi_target_do_login_rx() from delayed workqueue process context once the failure is detected. Reported-by: Mike Christie Reviewed-by: Mike Christie Tested-by: Mike Christie Cc: Mike Christie Reported-by: Hannes Reinecke Cc: Hannes Reinecke Cc: Sagi Grimberg Cc: Varun Prakash Cc: # v3.12+ Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target_nego.c | 196 +++++++++++++++-------- include/target/iscsi/iscsi_target_core.h | 1 + 2 files changed, 134 insertions(+), 63 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 7ccc9c1cbfd1..6f88b31242b0 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -493,14 +493,60 @@ static void iscsi_target_restore_sock_callbacks(struct iscsi_conn *conn) static int iscsi_target_do_login(struct iscsi_conn *, struct iscsi_login *); -static bool iscsi_target_sk_state_check(struct sock *sk) +static bool __iscsi_target_sk_check_close(struct sock *sk) { if (sk->sk_state == TCP_CLOSE_WAIT || sk->sk_state == TCP_CLOSE) { - pr_debug("iscsi_target_sk_state_check: TCP_CLOSE_WAIT|TCP_CLOSE," + pr_debug("__iscsi_target_sk_check_close: TCP_CLOSE_WAIT|TCP_CLOSE," "returning FALSE\n"); - return false; + return true; } - return true; + return false; +} + +static bool iscsi_target_sk_check_close(struct iscsi_conn *conn) +{ + bool state = false; + + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + read_lock_bh(&sk->sk_callback_lock); + state = (__iscsi_target_sk_check_close(sk) || + test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)); + read_unlock_bh(&sk->sk_callback_lock); + } + return state; +} + +static bool iscsi_target_sk_check_flag(struct iscsi_conn *conn, unsigned int flag) +{ + bool state = false; + + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + read_lock_bh(&sk->sk_callback_lock); + state = test_bit(flag, &conn->login_flags); + read_unlock_bh(&sk->sk_callback_lock); + } + return state; +} + +static bool iscsi_target_sk_check_and_clear(struct iscsi_conn *conn, unsigned int flag) +{ + bool state = false; + + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + state = (__iscsi_target_sk_check_close(sk) || + test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)); + if (!state) + clear_bit(flag, &conn->login_flags); + write_unlock_bh(&sk->sk_callback_lock); + } + return state; } static void iscsi_target_login_drop(struct iscsi_conn *conn, struct iscsi_login *login) @@ -540,6 +586,20 @@ static void iscsi_target_do_login_rx(struct work_struct *work) pr_debug("entering iscsi_target_do_login_rx, conn: %p, %s:%d\n", conn, current->comm, current->pid); + /* + * If iscsi_target_do_login_rx() has been invoked by ->sk_data_ready() + * before initial PDU processing in iscsi_target_start_negotiation() + * has completed, go ahead and retry until it's cleared. + * + * Otherwise if the TCP connection drops while this is occuring, + * iscsi_target_start_negotiation() will detect the failure, call + * cancel_delayed_work_sync(&conn->login_work), and cleanup the + * remaining iscsi connection resources from iscsi_np process context. + */ + if (iscsi_target_sk_check_flag(conn, LOGIN_FLAGS_INITIAL_PDU)) { + schedule_delayed_work(&conn->login_work, msecs_to_jiffies(10)); + return; + } spin_lock(&tpg->tpg_state_lock); state = (tpg->tpg_state == TPG_STATE_ACTIVE); @@ -547,26 +607,12 @@ static void iscsi_target_do_login_rx(struct work_struct *work) if (!state) { pr_debug("iscsi_target_do_login_rx: tpg_state != TPG_STATE_ACTIVE\n"); - iscsi_target_restore_sock_callbacks(conn); - iscsi_target_login_drop(conn, login); - iscsit_deaccess_np(np, tpg, tpg_np); - return; + goto err; } - if (conn->sock) { - struct sock *sk = conn->sock->sk; - - read_lock_bh(&sk->sk_callback_lock); - state = iscsi_target_sk_state_check(sk); - read_unlock_bh(&sk->sk_callback_lock); - - if (!state) { - pr_debug("iscsi_target_do_login_rx, TCP state CLOSE\n"); - iscsi_target_restore_sock_callbacks(conn); - iscsi_target_login_drop(conn, login); - iscsit_deaccess_np(np, tpg, tpg_np); - return; - } + if (iscsi_target_sk_check_close(conn)) { + pr_debug("iscsi_target_do_login_rx, TCP state CLOSE\n"); + goto err; } conn->login_kworker = current; @@ -584,34 +630,29 @@ static void iscsi_target_do_login_rx(struct work_struct *work) flush_signals(current); conn->login_kworker = NULL; - if (rc < 0) { - iscsi_target_restore_sock_callbacks(conn); - iscsi_target_login_drop(conn, login); - iscsit_deaccess_np(np, tpg, tpg_np); - return; - } + if (rc < 0) + goto err; pr_debug("iscsi_target_do_login_rx after rx_login_io, %p, %s:%d\n", conn, current->comm, current->pid); rc = iscsi_target_do_login(conn, login); if (rc < 0) { - iscsi_target_restore_sock_callbacks(conn); - iscsi_target_login_drop(conn, login); - iscsit_deaccess_np(np, tpg, tpg_np); + goto err; } else if (!rc) { - if (conn->sock) { - struct sock *sk = conn->sock->sk; - - write_lock_bh(&sk->sk_callback_lock); - clear_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags); - write_unlock_bh(&sk->sk_callback_lock); - } + if (iscsi_target_sk_check_and_clear(conn, LOGIN_FLAGS_READ_ACTIVE)) + goto err; } else if (rc == 1) { iscsi_target_nego_release(conn); iscsi_post_login_handler(np, conn, zero_tsih); iscsit_deaccess_np(np, tpg, tpg_np); } + return; + +err: + iscsi_target_restore_sock_callbacks(conn); + iscsi_target_login_drop(conn, login); + iscsit_deaccess_np(np, tpg, tpg_np); } static void iscsi_target_do_cleanup(struct work_struct *work) @@ -659,31 +700,54 @@ static void iscsi_target_sk_state_change(struct sock *sk) orig_state_change(sk); return; } + state = __iscsi_target_sk_check_close(sk); + pr_debug("__iscsi_target_sk_close_change: state: %d\n", state); + if (test_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags)) { pr_debug("Got LOGIN_FLAGS_READ_ACTIVE=1 sk_state_change" " conn: %p\n", conn); + if (state) + set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags); write_unlock_bh(&sk->sk_callback_lock); orig_state_change(sk); return; } - if (test_and_set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)) { + if (test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)) { pr_debug("Got LOGIN_FLAGS_CLOSED=1 sk_state_change conn: %p\n", conn); write_unlock_bh(&sk->sk_callback_lock); orig_state_change(sk); return; } - - state = iscsi_target_sk_state_check(sk); - write_unlock_bh(&sk->sk_callback_lock); - - pr_debug("iscsi_target_sk_state_change: state: %d\n", state); - - if (!state) { + /* + * If the TCP connection has dropped, go ahead and set LOGIN_FLAGS_CLOSED, + * but only queue conn->login_work -> iscsi_target_do_login_rx() + * processing if LOGIN_FLAGS_INITIAL_PDU has already been cleared. + * + * When iscsi_target_do_login_rx() runs, iscsi_target_sk_check_close() + * will detect the dropped TCP connection from delayed workqueue context. + * + * If LOGIN_FLAGS_INITIAL_PDU is still set, which means the initial + * iscsi_target_start_negotiation() is running, iscsi_target_do_login() + * via iscsi_target_sk_check_close() or iscsi_target_start_negotiation() + * via iscsi_target_sk_check_and_clear() is responsible for detecting the + * dropped TCP connection in iscsi_np process context, and cleaning up + * the remaining iscsi connection resources. + */ + if (state) { pr_debug("iscsi_target_sk_state_change got failed state\n"); - schedule_delayed_work(&conn->login_cleanup_work, 0); + set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags); + state = test_bit(LOGIN_FLAGS_INITIAL_PDU, &conn->login_flags); + write_unlock_bh(&sk->sk_callback_lock); + + orig_state_change(sk); + + if (!state) + schedule_delayed_work(&conn->login_work, 0); return; } + write_unlock_bh(&sk->sk_callback_lock); + orig_state_change(sk); } @@ -946,6 +1010,15 @@ static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *lo if (iscsi_target_handle_csg_one(conn, login) < 0) return -1; if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) { + /* + * Check to make sure the TCP connection has not + * dropped asynchronously while session reinstatement + * was occuring in this kthread context, before + * transitioning to full feature phase operation. + */ + if (iscsi_target_sk_check_close(conn)) + return -1; + login->tsih = conn->sess->tsih; login->login_complete = 1; iscsi_target_restore_sock_callbacks(conn); @@ -972,21 +1045,6 @@ static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *lo break; } - if (conn->sock) { - struct sock *sk = conn->sock->sk; - bool state; - - read_lock_bh(&sk->sk_callback_lock); - state = iscsi_target_sk_state_check(sk); - read_unlock_bh(&sk->sk_callback_lock); - - if (!state) { - pr_debug("iscsi_target_do_login() failed state for" - " conn: %p\n", conn); - return -1; - } - } - return 0; } @@ -1255,10 +1313,22 @@ int iscsi_target_start_negotiation( write_lock_bh(&sk->sk_callback_lock); set_bit(LOGIN_FLAGS_READY, &conn->login_flags); + set_bit(LOGIN_FLAGS_INITIAL_PDU, &conn->login_flags); write_unlock_bh(&sk->sk_callback_lock); } - + /* + * If iscsi_target_do_login returns zero to signal more PDU + * exchanges are required to complete the login, go ahead and + * clear LOGIN_FLAGS_INITIAL_PDU but only if the TCP connection + * is still active. + * + * Otherwise if TCP connection dropped asynchronously, go ahead + * and perform connection cleanup now. + */ ret = iscsi_target_do_login(conn, login); + if (!ret && iscsi_target_sk_check_and_clear(conn, LOGIN_FLAGS_INITIAL_PDU)) + ret = -1; + if (ret < 0) { cancel_delayed_work_sync(&conn->login_work); cancel_delayed_work_sync(&conn->login_cleanup_work); diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 275581d483dd..5f17fb770477 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -557,6 +557,7 @@ struct iscsi_conn { #define LOGIN_FLAGS_READ_ACTIVE 1 #define LOGIN_FLAGS_CLOSED 2 #define LOGIN_FLAGS_READY 4 +#define LOGIN_FLAGS_INITIAL_PDU 8 unsigned long login_flags; struct delayed_work login_work; struct delayed_work login_cleanup_work; From 5e0cf5e6c43b9e19fc0284f69e5cd2b4a47523b0 Mon Sep 17 00:00:00 2001 From: Jiang Yi Date: Tue, 16 May 2017 17:57:55 +0800 Subject: [PATCH 148/206] iscsi-target: Always wait for kthread_should_stop() before kthread exit There are three timing problems in the kthread usages of iscsi_target_mod: - np_thread of struct iscsi_np - rx_thread and tx_thread of struct iscsi_conn In iscsit_close_connection(), it calls send_sig(SIGINT, conn->tx_thread, 1); kthread_stop(conn->tx_thread); In conn->tx_thread, which is iscsi_target_tx_thread(), when it receive SIGINT the kthread will exit without checking the return value of kthread_should_stop(). So if iscsi_target_tx_thread() exit right between send_sig(SIGINT...) and kthread_stop(...), the kthread_stop() will try to stop an already stopped kthread. This is invalid according to the documentation of kthread_stop(). (Fix -ECONNRESET logout handling in iscsi_target_tx_thread and early iscsi_target_rx_thread failure case - nab) Signed-off-by: Jiang Yi Cc: # v3.12+ Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target.c | 30 ++++++++++++++++++----- drivers/target/iscsi/iscsi_target_erl0.c | 6 ++++- drivers/target/iscsi/iscsi_target_erl0.h | 2 +- drivers/target/iscsi/iscsi_target_login.c | 4 +++ 4 files changed, 34 insertions(+), 8 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 26a9bcd5ee6a..0d8f81591bed 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -3790,6 +3790,8 @@ int iscsi_target_tx_thread(void *arg) { int ret = 0; struct iscsi_conn *conn = arg; + bool conn_freed = false; + /* * Allow ourselves to be interrupted by SIGINT so that a * connection recovery / failure event can be triggered externally. @@ -3815,12 +3817,14 @@ int iscsi_target_tx_thread(void *arg) goto transport_err; ret = iscsit_handle_response_queue(conn); - if (ret == 1) + if (ret == 1) { goto get_immediate; - else if (ret == -ECONNRESET) + } else if (ret == -ECONNRESET) { + conn_freed = true; goto out; - else if (ret < 0) + } else if (ret < 0) { goto transport_err; + } } transport_err: @@ -3830,8 +3834,13 @@ int iscsi_target_tx_thread(void *arg) * responsible for cleaning up the early connection failure. */ if (conn->conn_state != TARG_CONN_STATE_IN_LOGIN) - iscsit_take_action_for_connection_exit(conn); + iscsit_take_action_for_connection_exit(conn, &conn_freed); out: + if (!conn_freed) { + while (!kthread_should_stop()) { + msleep(100); + } + } return 0; } @@ -4004,6 +4013,7 @@ int iscsi_target_rx_thread(void *arg) { int rc; struct iscsi_conn *conn = arg; + bool conn_freed = false; /* * Allow ourselves to be interrupted by SIGINT so that a @@ -4016,7 +4026,7 @@ int iscsi_target_rx_thread(void *arg) */ rc = wait_for_completion_interruptible(&conn->rx_login_comp); if (rc < 0 || iscsi_target_check_conn_state(conn)) - return 0; + goto out; if (!conn->conn_transport->iscsit_get_rx_pdu) return 0; @@ -4025,7 +4035,15 @@ int iscsi_target_rx_thread(void *arg) if (!signal_pending(current)) atomic_set(&conn->transport_failed, 1); - iscsit_take_action_for_connection_exit(conn); + iscsit_take_action_for_connection_exit(conn, &conn_freed); + +out: + if (!conn_freed) { + while (!kthread_should_stop()) { + msleep(100); + } + } + return 0; } diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c index 9a96e17bf7cd..7fe2aa73cff6 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.c +++ b/drivers/target/iscsi/iscsi_target_erl0.c @@ -930,8 +930,10 @@ static void iscsit_handle_connection_cleanup(struct iscsi_conn *conn) } } -void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn) +void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn, bool *conn_freed) { + *conn_freed = false; + spin_lock_bh(&conn->state_lock); if (atomic_read(&conn->connection_exit)) { spin_unlock_bh(&conn->state_lock); @@ -942,6 +944,7 @@ void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn) if (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT) { spin_unlock_bh(&conn->state_lock); iscsit_close_connection(conn); + *conn_freed = true; return; } @@ -955,4 +958,5 @@ void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn) spin_unlock_bh(&conn->state_lock); iscsit_handle_connection_cleanup(conn); + *conn_freed = true; } diff --git a/drivers/target/iscsi/iscsi_target_erl0.h b/drivers/target/iscsi/iscsi_target_erl0.h index 60e69e2af6ed..3822d9cd1230 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.h +++ b/drivers/target/iscsi/iscsi_target_erl0.h @@ -15,6 +15,6 @@ extern int iscsit_stop_time2retain_timer(struct iscsi_session *); extern void iscsit_connection_reinstatement_rcfr(struct iscsi_conn *); extern void iscsit_cause_connection_reinstatement(struct iscsi_conn *, int); extern void iscsit_fall_back_to_erl0(struct iscsi_session *); -extern void iscsit_take_action_for_connection_exit(struct iscsi_conn *); +extern void iscsit_take_action_for_connection_exit(struct iscsi_conn *, bool *); #endif /*** ISCSI_TARGET_ERL0_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 66238477137b..92b96b51d506 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -1464,5 +1464,9 @@ int iscsi_target_login_thread(void *arg) break; } + while (!kthread_should_stop()) { + msleep(100); + } + return 0; } From d2c3b14e1f0dcebdb695617c0c1342a36b914a47 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 1 Jun 2017 09:35:30 +0200 Subject: [PATCH 149/206] ALSA: hda - Fix applying MSI dual-codec mobo quirk The previous commit [63691587f7b0: ALSA: hda - Apply dual-codec quirk for MSI Z270-Gaming mobo] attempted to apply the existing dual-codec quirk for a MSI mobo. But it turned out that this isn't applied properly due to the MSI-vendor quirk before this entry. I overlooked such two MSI entries just because they were put in the wrong position, although we have a list ordered by PCI SSID numbers. This patch fixes it by rearranging the unordered entries. Fixes: 63691587f7b0 ("ALSA: hda - Apply dual-codec quirk for MSI Z270-Gaming mobo") Reported-by: Rudolf Schmidt Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 918e45268915..a57988d617e9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2324,11 +2324,11 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x106b, 0x4a00, "Macbook 5,2", ALC889_FIXUP_MBA11_VREF), SND_PCI_QUIRK(0x1071, 0x8258, "Evesham Voyaeger", ALC882_FIXUP_EAPD), - SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD), - SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3), SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE), SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), + SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD), SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), + SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3), SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX), SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD), SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD), From d9c1b5431d5f0e07575db785a022bce91051ac1d Mon Sep 17 00:00:00 2001 From: Roman Pen Date: Thu, 1 Jun 2017 10:55:03 +0200 Subject: [PATCH 150/206] KVM: SVM: do not zero out segment attributes if segment is unusable or not present MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a fix for the problem [1], where VMCB.CPL was set to 0 and interrupt was taken on userspace stack. The root cause lies in the specific AMD CPU behaviour which manifests itself as unusable segment attributes on SYSRET. The corresponding work around for the kernel is the following: 61f01dd941ba ("x86_64, asm: Work around AMD SYSRET SS descriptor attribute issue") In other turn virtualization side treated unusable segment incorrectly and restored CPL from SS attributes, which were zeroed out few lines above. In current patch it is assured only that P bit is cleared in VMCB.save state and segment attributes are not zeroed out if segment is not presented or is unusable, therefore CPL can be safely restored from DPL field. This is only one part of the fix, since QEMU side should be fixed accordingly not to zero out attributes on its side. Corresponding patch will follow. [1] Message id: CAJrWOzD6Xq==b-zYCDdFLgSRMPM-NkNuTSDFEtX=7MreT45i7Q@mail.gmail.com Signed-off-by: Roman Pen Signed-off-by: Mikhail Sennikovskii Cc: Paolo Bonzini Cc: Radim KrÄmář Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a654372efea1..ba9891ac5c56 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1840,6 +1840,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, */ if (var->unusable) var->db = 0; + /* This is symmetric with svm_set_segment() */ var->dpl = to_svm(vcpu)->vmcb->save.cpl; break; } @@ -1980,18 +1981,14 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, s->base = var->base; s->limit = var->limit; s->selector = var->selector; - if (var->unusable) - s->attrib = 0; - else { - s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK); - s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT; - s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT; - s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT; - s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT; - s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT; - s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT; - s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; - } + s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK); + s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT; + s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT; + s->attrib |= ((var->present & 1) && !var->unusable) << SVM_SELECTOR_P_SHIFT; + s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT; + s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT; + s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT; + s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; /* * This is always accurate, except if SYSRET returned to a segment @@ -2000,7 +1997,8 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, * would entail passing the CPL to userspace and back. */ if (seg == VCPU_SREG_SS) - svm->vmcb->save.cpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3; + /* This is symmetric with svm_get_segment() */ + svm->vmcb->save.cpl = (var->dpl & 3); mark_dirty(svm->vmcb, VMCB_SEG); } From 47a66eed99e6f231f4a1d261a9d493f4eee94829 Mon Sep 17 00:00:00 2001 From: ZhuangYanying Date: Fri, 26 May 2017 13:16:48 +0800 Subject: [PATCH 151/206] KVM: x86: Fix nmi injection failure when vcpu got blocked When spin_lock_irqsave() deadlock occurs inside the guest, vcpu threads, other than the lock-holding one, would enter into S state because of pvspinlock. Then inject NMI via libvirt API "inject-nmi", the NMI could not be injected into vm. The reason is: 1 It sets nmi_queued to 1 when calling ioctl KVM_NMI in qemu, and sets cpu->kvm_vcpu_dirty to true in do_inject_external_nmi() meanwhile. 2 It sets nmi_queued to 0 in process_nmi(), before entering guest, because cpu->kvm_vcpu_dirty is true. It's not enough just to check nmi_queued to decide whether to stay in vcpu_block() or not. NMI should be injected immediately at any situation. Add checking nmi_pending, and testing KVM_REQ_NMI replaces nmi_queued in vm_vcpu_has_events(). Do the same change for SMIs. Signed-off-by: Zhuang Yanying Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 02363e37d4a6..a2cd0997343c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8394,10 +8394,13 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) if (vcpu->arch.pv.pv_unhalted) return true; - if (atomic_read(&vcpu->arch.nmi_queued)) + if (kvm_test_request(KVM_REQ_NMI, vcpu) || + (vcpu->arch.nmi_pending && + kvm_x86_ops->nmi_allowed(vcpu))) return true; - if (kvm_test_request(KVM_REQ_SMI, vcpu)) + if (kvm_test_request(KVM_REQ_SMI, vcpu) || + (vcpu->arch.smi_pending && !is_smm(vcpu))) return true; if (kvm_arch_interrupt_allowed(vcpu) && From c08d517480ea342cc43acdacc5cf4a795e18151d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 1 Jun 2017 15:52:23 +0200 Subject: [PATCH 152/206] Revert "x86/PAT: Fix Xorg regression on CPUs that don't support PAT" This reverts commit cbed27cdf0e3f7ea3b2259e86b9e34df02be3fe4. As Andy Lutomirski observed: "I think this patch is bogus. pat_enabled() sure looks like it's supposed to return true if PAT is *enabled*, and these days PAT is 'enabled' even if there's no HW PAT support." Reported-by: Bernhard Held Reported-by: Chris Wilson Acked-by: Andy Lutomirski Cc: Andrew Morton Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Mikulas Patocka Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: stable@vger.kernel.org # v4.2+ Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 83a59a67757a..9b78685b66e6 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -65,11 +65,9 @@ static int __init nopat(char *str) } early_param("nopat", nopat); -static bool __read_mostly __pat_initialized = false; - bool pat_enabled(void) { - return __pat_initialized; + return !!__pat_enabled; } EXPORT_SYMBOL_GPL(pat_enabled); @@ -227,14 +225,13 @@ static void pat_bsp_init(u64 pat) } wrmsrl(MSR_IA32_CR_PAT, pat); - __pat_initialized = true; __init_cache_modes(pat); } static void pat_ap_init(u64 pat) { - if (!this_cpu_has(X86_FEATURE_PAT)) { + if (!boot_cpu_has(X86_FEATURE_PAT)) { /* * If this happens we are on a secondary CPU, but switched to * PAT on the boot CPU. We have no way to undo PAT. @@ -309,7 +306,7 @@ void pat_init(void) u64 pat; struct cpuinfo_x86 *c = &boot_cpu_data; - if (!__pat_enabled) { + if (!pat_enabled()) { init_cache_modes(); return; } From b425e50492583b10cceb388af36ef0bd3bdf842a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 31 May 2017 14:43:45 -0700 Subject: [PATCH 153/206] block: Avoid that blk_exit_rl() triggers a use-after-free Since the introduction of .init_rq_fn() and .exit_rq_fn() it is essential that the memory allocated for struct request_queue stays around until all blk_exit_rl() calls have finished. Hence make blk_init_rl() take a reference on struct request_queue. This patch fixes the following crash: general protection fault: 0000 [#2] SMP CPU: 3 PID: 28 Comm: ksoftirqd/3 Tainted: G D 4.12.0-rc2-dbg+ #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 task: ffff88013a108040 task.stack: ffffc9000071c000 RIP: 0010:free_request_size+0x1a/0x30 RSP: 0018:ffffc9000071fd38 EFLAGS: 00010202 RAX: 6b6b6b6b6b6b6b6b RBX: ffff880067362a88 RCX: 0000000000000003 RDX: ffff880067464178 RSI: ffff880067362a88 RDI: ffff880135ea4418 RBP: ffffc9000071fd40 R08: 0000000000000000 R09: 0000000100180009 R10: ffffc9000071fd38 R11: ffffffff81110800 R12: ffff88006752d3d8 R13: ffff88006752d3d8 R14: ffff88013a108040 R15: 000000000000000a FS: 0000000000000000(0000) GS:ffff88013fd80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa8ec1edb00 CR3: 0000000138ee8000 CR4: 00000000001406e0 Call Trace: mempool_destroy.part.10+0x21/0x40 mempool_destroy+0xe/0x10 blk_exit_rl+0x12/0x20 blkg_free+0x4d/0xa0 __blkg_release_rcu+0x59/0x170 rcu_process_callbacks+0x260/0x4e0 __do_softirq+0x116/0x250 smpboot_thread_fn+0x123/0x1e0 kthread+0x109/0x140 ret_from_fork+0x31/0x40 Fixes: commit e9c787e65c0c ("scsi: allocate scsi_cmnd structures as part of struct request") Signed-off-by: Bart Van Assche Acked-by: Tejun Heo Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Cc: Jan Kara Cc: # v4.11+ Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 2 +- block/blk-core.c | 10 ++++++++-- block/blk-sysfs.c | 2 +- block/blk.h | 2 +- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 7c2947128f58..0480892e97e5 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -74,7 +74,7 @@ static void blkg_free(struct blkcg_gq *blkg) blkcg_policy[i]->pd_free_fn(blkg->pd[i]); if (blkg->blkcg != &blkcg_root) - blk_exit_rl(&blkg->rl); + blk_exit_rl(blkg->q, &blkg->rl); blkg_rwstat_exit(&blkg->stat_ios); blkg_rwstat_exit(&blkg->stat_bytes); diff --git a/block/blk-core.c b/block/blk-core.c index c7068520794b..a7421b772d0e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -648,13 +648,19 @@ int blk_init_rl(struct request_list *rl, struct request_queue *q, if (!rl->rq_pool) return -ENOMEM; + if (rl != &q->root_rl) + WARN_ON_ONCE(!blk_get_queue(q)); + return 0; } -void blk_exit_rl(struct request_list *rl) +void blk_exit_rl(struct request_queue *q, struct request_list *rl) { - if (rl->rq_pool) + if (rl->rq_pool) { mempool_destroy(rl->rq_pool); + if (rl != &q->root_rl) + blk_put_queue(q); + } } struct request_queue *blk_alloc_queue(gfp_t gfp_mask) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 712b018e9f54..283da7fbe034 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -809,7 +809,7 @@ static void blk_release_queue(struct kobject *kobj) blk_free_queue_stats(q->stats); - blk_exit_rl(&q->root_rl); + blk_exit_rl(q, &q->root_rl); if (q->queue_tags) __blk_queue_free_tags(q); diff --git a/block/blk.h b/block/blk.h index 2ed70228e44f..83c8e1100525 100644 --- a/block/blk.h +++ b/block/blk.h @@ -59,7 +59,7 @@ void blk_free_flush_queue(struct blk_flush_queue *q); int blk_init_rl(struct request_list *rl, struct request_queue *q, gfp_t gfp_mask); -void blk_exit_rl(struct request_list *rl); +void blk_exit_rl(struct request_queue *q, struct request_list *rl); void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio); void blk_queue_bypass_start(struct request_queue *q); From 5a27fec21b70c2c41efbfe30a0fbb0f005b9f7e5 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Fri, 5 May 2017 12:05:16 -0500 Subject: [PATCH 154/206] RDMA/i40iw: Don't set 0-length FULPDU RTR indication control flag Don't set control flag for 0-length FULPDU (Send) RTR indication in the enhanced MPA Request/Reply frames, because it isn't supported. Signed-off-by: Tatyana Nikolova Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index f3bc01bce483..e8e864fc1883 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -784,7 +784,6 @@ static void i40iw_build_mpa_v2(struct i40iw_cm_node *cm_node, } ctrl_ird |= IETF_PEER_TO_PEER; - ctrl_ird |= IETF_FLPDU_ZERO_LEN; switch (mpa_key) { case MPA_KEY_REQUEST: From b117f4796306a750ec1ffb928acc5f55bdb1fc7b Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Fri, 5 May 2017 12:05:17 -0500 Subject: [PATCH 155/206] RDMA/i40iw: ACK MPA Reject frame Explicitly ACK the MPA Reject frame so the peer does not retransmit the frame. Signed-off-by: Tatyana Nikolova Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index e8e864fc1883..6ae98aa7f74e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -2445,8 +2445,8 @@ static void i40iw_handle_rcv_mpa(struct i40iw_cm_node *cm_node, } else { type = I40IW_CM_EVENT_CONNECTED; cm_node->state = I40IW_CM_STATE_OFFLOADED; - i40iw_send_ack(cm_node); } + i40iw_send_ack(cm_node); break; default: pr_err("%s wrong cm_node state =%d\n", __func__, cm_node->state); From c0c643e16f9b00332cbbf3954556652dfa4ed5a3 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Wed, 10 May 2017 23:32:14 -0500 Subject: [PATCH 156/206] RDMA/i40iw: Fix device initialization error path Some error paths in i40iw_initialize_dev are doing additional and unnecessary work before exiting. Correctly free resources allocated prior to error and return with correct status code. Signed-off-by: Mustafa Ismail Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_main.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 2728af3103ce..a3f18a22f5ed 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -1319,13 +1319,13 @@ static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev, status = i40iw_obj_aligned_mem(iwdev, &mem, I40IW_QUERY_FPM_BUF_SIZE, I40IW_FPM_QUERY_BUF_ALIGNMENT_MASK); if (status) - goto exit; + goto error; info.fpm_query_buf_pa = mem.pa; info.fpm_query_buf = mem.va; status = i40iw_obj_aligned_mem(iwdev, &mem, I40IW_COMMIT_FPM_BUF_SIZE, I40IW_FPM_COMMIT_BUF_ALIGNMENT_MASK); if (status) - goto exit; + goto error; info.fpm_commit_buf_pa = mem.pa; info.fpm_commit_buf = mem.va; info.hmc_fn_id = ldev->fid; @@ -1347,11 +1347,9 @@ static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev, info.exception_lan_queue = 1; info.vchnl_send = i40iw_virtchnl_send; status = i40iw_device_init(&iwdev->sc_dev, &info); -exit: - if (status) { - kfree(iwdev->hmc_info_mem); - iwdev->hmc_info_mem = NULL; - } + + if (status) + goto error; memset(&vsi_info, 0, sizeof(vsi_info)); vsi_info.dev = &iwdev->sc_dev; vsi_info.back_vsi = (void *)iwdev; @@ -1362,11 +1360,19 @@ static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev, memset(&stats_info, 0, sizeof(stats_info)); stats_info.fcn_id = ldev->fid; stats_info.pestat = kzalloc(sizeof(*stats_info.pestat), GFP_KERNEL); + if (!stats_info.pestat) { + status = I40IW_ERR_NO_MEMORY; + goto error; + } stats_info.stats_initialize = true; if (stats_info.pestat) i40iw_vsi_stats_init(&iwdev->vsi, &stats_info); } return status; +error: + kfree(iwdev->hmc_info_mem); + iwdev->hmc_info_mem = NULL; + return status; } /** From f300ba2d1ef1cb8411daa5e1ae44acfa7b88236c Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Fri, 19 May 2017 16:14:02 -0500 Subject: [PATCH 157/206] RDMA/i40iw: Remove MSS change support MSS change on active QPs is not supported. Store new MSS value for new QPs only. Remove code to modify MSS on the fly. This also resolves a crash on QP modify to QP 0. BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: i40iw_sc_qp_modify+0x22/0x280 [i40iw] Oops: 0000 [#1] SMP KASAN CPU: 2 PID: 1236 Comm: kworker/u16:4 Not tainted 4.12.0-rc1 #5 Hardware name: Gigabyte Technology Co., Ltd. To be filled by O.E.M./Q87M-D2H, BIOS F7 01/17/2014 Workqueue: l2params i40iw_l2params_worker [i40iw] task: ffff88070f5a9b40 task.stack: ffff88070f5a0000 RIP: 0010:i40iw_sc_qp_modify+0x22/0x280 [i40iw] ... Call Trace: i40iw_exec_cqp_cmd+0x2ce/0x410 [i40iw] ? _raw_spin_lock_irqsave+0x6f/0x80 ? i40iw_process_cqp_cmd+0x1d/0x80 [i40iw] i40iw_process_cqp_cmd+0x7c/0x80 [i40iw] i40iw_handle_cqp_op+0x2f/0x200 [i40iw] ? trace_hardirqs_off+0xd/0x10 ? _raw_spin_unlock_irqrestore+0x46/0x50 i40iw_hw_modify_qp+0x5e/0x90 [i40iw] i40iw_qp_mss_modify+0x52/0x60 [i40iw] i40iw_change_l2params+0x145/0x160 [i40iw] i40iw_l2params_worker+0x1f/0x40 [i40iw] process_one_work+0x1f5/0x650 ? process_one_work+0x161/0x650 worker_thread+0x48/0x3b0 kthread+0x112/0x150 ? process_one_work+0x650/0x650 ? kthread_create_on_node+0x40/0x40 ret_from_fork+0x2e/0x40 Code: 2e 0f 1f 84 00 00 00 00 00 55 48 89 e5 41 56 41 55 41 89 cd 41 54 49 89 fc 53 48 89 f3 48 89 d6 48 83 ec 08 48 8b 87 10 01 00 00 <48> 8b 40 08 4c 8b b0 40 04 00 00 4c 89 f7 e8 1b e5 ff ff 48 85 RIP: i40iw_sc_qp_modify+0x22/0x280 [i40iw] RSP: ffff88070f5a7c28 CR2: 0000000000000008 ---[ end trace 77a405931e296060 ]--- Reported-by: Stefan Assmann Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 12 +----------- drivers/infiniband/hw/i40iw/i40iw_osdep.h | 1 - drivers/infiniband/hw/i40iw/i40iw_type.h | 2 -- drivers/infiniband/hw/i40iw/i40iw_utils.c | 17 ----------------- 4 files changed, 1 insertion(+), 31 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index f82483b3d1e7..a027e2072477 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -285,28 +285,20 @@ void i40iw_change_l2params(struct i40iw_sc_vsi *vsi, struct i40iw_l2params *l2pa struct i40iw_sc_dev *dev = vsi->dev; struct i40iw_sc_qp *qp = NULL; bool qs_handle_change = false; - bool mss_change = false; unsigned long flags; u16 qs_handle; int i; - if (vsi->mss != l2params->mss) { - mss_change = true; - vsi->mss = l2params->mss; - } + vsi->mss = l2params->mss; i40iw_fill_qos_list(l2params->qs_handle_list); for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) { qs_handle = l2params->qs_handle_list[i]; if (vsi->qos[i].qs_handle != qs_handle) qs_handle_change = true; - else if (!mss_change) - continue; /* no MSS nor qs handle change */ spin_lock_irqsave(&vsi->qos[i].lock, flags); qp = i40iw_get_qp(&vsi->qos[i].qplist, qp); while (qp) { - if (mss_change) - i40iw_qp_mss_modify(dev, qp); if (qs_handle_change) { qp->qs_handle = qs_handle; /* issue cqp suspend command */ @@ -2395,7 +2387,6 @@ static enum i40iw_status_code i40iw_sc_qp_modify( set_64bit_val(wqe, 8, - LS_64(info->new_mss, I40IW_CQPSQ_QP_NEWMSS) | LS_64(term_len, I40IW_CQPSQ_QP_TERMLEN)); set_64bit_val(wqe, 16, qp->hw_host_ctx_pa); @@ -2410,7 +2401,6 @@ static enum i40iw_status_code i40iw_sc_qp_modify( LS_64(info->cq_num_valid, I40IW_CQPSQ_QP_CQNUMVALID) | LS_64(info->force_loopback, I40IW_CQPSQ_QP_FORCELOOPBACK) | LS_64(qp->qp_type, I40IW_CQPSQ_QP_QPTYPE) | - LS_64(info->mss_change, I40IW_CQPSQ_QP_MSSCHANGE) | LS_64(info->static_rsrc, I40IW_CQPSQ_QP_STATRSRC) | LS_64(info->remove_hash_idx, I40IW_CQPSQ_QP_REMOVEHASHENTRY) | LS_64(term_actions, I40IW_CQPSQ_QP_TERMACT) | diff --git a/drivers/infiniband/hw/i40iw/i40iw_osdep.h b/drivers/infiniband/hw/i40iw/i40iw_osdep.h index aa66c1c63dfa..f27be3e7830b 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_osdep.h +++ b/drivers/infiniband/hw/i40iw/i40iw_osdep.h @@ -199,7 +199,6 @@ void i40iw_cqp_spawn_worker(struct i40iw_sc_dev *dev, struct i40iw_virtchnl_work_info *work_info, u32 iw_vf_idx); void *i40iw_remove_head(struct list_head *list); void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp, bool suspend); -void i40iw_qp_mss_modify(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp); void i40iw_term_modify_qp(struct i40iw_sc_qp *qp, u8 next_state, u8 term, u8 term_len); void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred); diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index 7b76259752b0..959ec81fba99 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -541,7 +541,6 @@ struct i40iw_create_qp_info { struct i40iw_modify_qp_info { u64 rx_win0; u64 rx_win1; - u16 new_mss; u8 next_iwarp_state; u8 termlen; bool ord_valid; @@ -554,7 +553,6 @@ struct i40iw_modify_qp_info { bool dont_send_term; bool dont_send_fin; bool cached_var_valid; - bool mss_change; bool force_loopback; }; diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 409a3781e735..56d986924a4c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -756,23 +756,6 @@ void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp, b i40iw_pr_err("CQP-OP QP Suspend/Resume fail"); } -/** - * i40iw_qp_mss_modify - modify mss for qp - * @dev: hardware control device structure - * @qp: hardware control qp - */ -void i40iw_qp_mss_modify(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp) -{ - struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev; - struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp; - struct i40iw_modify_qp_info info; - - memset(&info, 0, sizeof(info)); - info.mss_change = true; - info.new_mss = qp->vsi->mss; - i40iw_hw_modify_qp(iwdev, iwqp, &info, false); -} - /** * i40iw_term_modify_qp - modify qp for term message * @qp: hardware control qp From e80bd98d1ff011beec872a8ebbb73930507d6a13 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 18 May 2017 13:11:17 -0500 Subject: [PATCH 158/206] RDMA/i40iw: fix duplicated code for different branches Refactor code to avoid identical code for different branches. Addresses-Coverity-ID: 1357356 Reviewed-by: Yuval Shaia Signed-off-by: Gustavo A. R. Silva Acked-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_virtchnl.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c index f4d13683a403..48fd327f876b 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c @@ -443,10 +443,7 @@ enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev, if (!dev->vchnl_up) return I40IW_ERR_NOT_READY; if (vchnl_msg->iw_op_code == I40IW_VCHNL_OP_GET_VER) { - if (vchnl_msg->iw_op_ver != I40IW_VCHNL_OP_GET_VER_V0) - vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg); - else - vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg); + vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg); return I40IW_SUCCESS; } for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; iw_vf_idx++) { From 0e5fc903513a652a2cf890a3d3b91c1779e16e97 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Wed, 10 May 2017 16:46:39 -0500 Subject: [PATCH 159/206] RDMA/nes: Don't set 0-length FULPDU RTR indication control flag Don't set control flag for 0-length FULPDU (Send) RTR indication in the enhanced MPA Request/Reply frames, because it isn't supported. Signed-off-by: Tatyana Nikolova Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes_cm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index fb983df7c157..06a55f6fb708 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -610,7 +610,6 @@ static void build_mpa_v2(struct nes_cm_node *cm_node, ctrl_ord = cm_node->ord_size & IETF_NO_IRD_ORD; } ctrl_ird |= IETF_PEER_TO_PEER; - ctrl_ird |= IETF_FLPDU_ZERO_LEN; switch (mpa_key) { case MPA_KEY_REQUEST: From f863de7de34025ee536cb3cc382bfc3cafaa9f0a Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Wed, 10 May 2017 16:46:40 -0500 Subject: [PATCH 160/206] RDMA/nes: ACK MPA Reply frame Explicitly ACK the MPA Reply frame so the peer does not retransmit the frame. Signed-off-by: Tatyana Nikolova Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 06a55f6fb708..30b256a2c54e 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1825,7 +1825,7 @@ static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb) type = NES_CM_EVENT_CONNECTED; cm_node->state = NES_CM_STATE_TSA; } - + send_ack(cm_node, NULL); break; default: WARN_ON(1); From 1dad0ebeea1cd890b8892523f736916e245b0aef Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Mon, 15 May 2017 06:40:39 +0000 Subject: [PATCH 161/206] RDMA/iw_cxgb4: Avoid touch after free error in ARP failure handlers The patch 761e19a504af (RDMA/iw_cxgb4: Handle return value of c4iw_ofld_send() in abort_arp_failure()) from May 6, 2016 leads to the following static checker warning: drivers/infiniband/hw/cxgb4/cm.c:575 abort_arp_failure() warn: passing freed memory 'skb' Also fixes skb leak when l2t resolution fails Fixes: 761e19a504afa55 (RDMA/iw_cxgb4: Handle return value of c4iw_ofld_send() in abort_arp_failure()) Reported-by: Dan Carpenter Cc: Dan Carpenter Signed-off-by: Raju Rangoju Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index b6fe45924c6e..06b110213e92 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -488,6 +488,7 @@ static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); release_ep_resources(ep); + kfree_skb(skb); return 0; } @@ -498,6 +499,7 @@ static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); c4iw_put_ep(&ep->parent_ep->com); release_ep_resources(ep); + kfree_skb(skb); return 0; } @@ -569,11 +571,13 @@ static void abort_arp_failure(void *handle, struct sk_buff *skb) pr_debug("%s rdev %p\n", __func__, rdev); req->cmd = CPL_ABORT_NO_RST; + skb_get(skb); ret = c4iw_ofld_send(rdev, skb); if (ret) { __state_set(&ep->com, DEAD); queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE); - } + } else + kfree_skb(skb); } static int send_flowc(struct c4iw_ep *ep) From 4bbfabede50849f87b913a1fc07c02ecc96b6c57 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Fri, 19 May 2017 14:48:42 +0530 Subject: [PATCH 162/206] RDMA/iw_cxgb4: calculate t4_eq_status_entries properly use egrstatuspagesize to calculate t4_eq_status_entries. Fixes: bb58d07964f2 ("cxgb4: Update IngPad and IngPack values") Reported-by: Logan Gunthorpe Signed-off-by: Potnuri Bharat Teja Signed-off-by: Ganesh Goudar Acked-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 329fb65e8fb0..f96a96dbcf1f 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -971,7 +971,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) devp->rdev.lldi.sge_egrstatuspagesize); devp->rdev.hw_queue.t4_eq_status_entries = - devp->rdev.lldi.sge_ingpadboundary > 64 ? 2 : 1; + devp->rdev.lldi.sge_egrstatuspagesize / 64; devp->rdev.hw_queue.t4_max_eq_size = 65520; devp->rdev.hw_queue.t4_max_iq_size = 65520; devp->rdev.hw_queue.t4_max_rq_size = 8192 - From 98b80a2a73a26f0e259632da519ccedb0cf37617 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 31 May 2017 12:06:58 +0530 Subject: [PATCH 163/206] RDMA/iw_cxgb4: fix the calculation of ipv6 header size Take care of ipv6 checks while computing header length for deducing mtu size of ipv6 servers. Due to the incorrect header length computation for ipv6 servers, wrong mss is reported to the peer (client). Signed-off-by: Raju Rangoju Signed-off-by: Ganesh Goudar Acked-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 06b110213e92..0910faf3587b 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2521,7 +2521,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } - hdrs = sizeof(struct iphdr) + sizeof(struct tcphdr) + + hdrs = ((iptype == 4) ? sizeof(struct iphdr) : sizeof(struct ipv6hdr)) + + sizeof(struct tcphdr) + ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0); if (peer_mss && child_ep->mtu > (peer_mss + hdrs)) child_ep->mtu = peer_mss + hdrs; From c4dd4b69f55abcc8dd079f8de55d9d8c2ddbefce Mon Sep 17 00:00:00 2001 From: "Steven L. Roberts" Date: Wed, 10 May 2017 10:54:12 -0500 Subject: [PATCH 164/206] RDMA/hfi1: fix array termination by appending NULL to attr array This fixes a kernel panic when loading the hfi driver as a dynamic module. Signed-off-by: Steven L Roberts Reviewed-by: Leon Romanovsky Acked-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/sysfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 50d140d25e38..2f3bbcac1e34 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -196,7 +196,8 @@ static const struct sysfs_ops port_cc_sysfs_ops = { }; static struct attribute *port_cc_default_attributes[] = { - &cc_prescan_attr.attr + &cc_prescan_attr.attr, + NULL }; static struct kobj_type port_cc_ktype = { From e4785b0633574a607daaa04bf2fe43550055194f Mon Sep 17 00:00:00 2001 From: "Steven L. Roberts" Date: Wed, 10 May 2017 14:58:13 -0500 Subject: [PATCH 165/206] RDMA/hfi1: change PCI bar addr assignments to Linux API functions The Omni-Path adapter driver fails to load on the ppc64le platform due to invalid PCI setup. This patch makes the PCI configuration more robust and will fix 64 bit addressing for ppc64le. Signed-off-by: Steven L Roberts Acked-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/pcie.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 93faf86d54b6..6a9f6f9819e1 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -207,8 +207,8 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) /* * Save BARs and command to rewrite after device reset. */ - dd->pcibar0 = addr; - dd->pcibar1 = addr >> 32; + pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, &dd->pcibar0); + pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, &dd->pcibar1); pci_read_config_dword(dd->pcidev, PCI_ROM_ADDRESS, &dd->pci_rom); pci_read_config_word(dd->pcidev, PCI_COMMAND, &dd->pci_command); pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &dd->pcie_devctl); From b3e6b4bdbb609762d8401ac4a959d590b4e4e3b8 Mon Sep 17 00:00:00 2001 From: "Byczkowski, Jakub" Date: Fri, 12 May 2017 09:01:37 -0700 Subject: [PATCH 166/206] RDMA/hfi1: Defer setting VL15 credits to link-up interrupt Keep VL15 credits at 0 during LNI, before link-up. Store VL15 credits value during verify cap interrupt and set in after link-up. This addresses an issue where VL15 MAD packets could be sent by one side of the link before the other side is ready to receive them. Reviewed-by: Mike Marciniszyn Reviewed-by: Dean Luick Reviewed-by: Dennis Dalessandro Signed-off-by: Jakub Byczkowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 67 +++++++++++++++------ drivers/infiniband/hw/hfi1/chip_registers.h | 2 + drivers/infiniband/hw/hfi1/hfi.h | 11 +++- drivers/infiniband/hw/hfi1/intr.c | 3 +- 4 files changed, 64 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 5d6b1eeaa9a0..2ba00b89df6a 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -6312,25 +6312,38 @@ static void handle_8051_request(struct hfi1_pportdata *ppd) } } -static void write_global_credit(struct hfi1_devdata *dd, - u8 vau, u16 total, u16 shared) +/* + * Set up allocation unit vaulue. + */ +void set_up_vau(struct hfi1_devdata *dd, u8 vau) { - write_csr(dd, SEND_CM_GLOBAL_CREDIT, - ((u64)total << - SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT) | - ((u64)shared << - SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT) | - ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT)); + u64 reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT); + + /* do not modify other values in the register */ + reg &= ~SEND_CM_GLOBAL_CREDIT_AU_SMASK; + reg |= (u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT; + write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg); } /* * Set up initial VL15 credits of the remote. Assumes the rest of - * the CM credit registers are zero from a previous global or credit reset . + * the CM credit registers are zero from a previous global or credit reset. + * Shared limit for VL15 will always be 0. */ -void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf) +void set_up_vl15(struct hfi1_devdata *dd, u16 vl15buf) { - /* leave shared count at zero for both global and VL15 */ - write_global_credit(dd, vau, vl15buf, 0); + u64 reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT); + + /* set initial values for total and shared credit limit */ + reg &= ~(SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SMASK | + SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SMASK); + + /* + * Set total limit to be equal to VL15 credits. + * Leave shared limit at 0. + */ + reg |= (u64)vl15buf << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT; + write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg); write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT); @@ -6348,9 +6361,11 @@ void reset_link_credits(struct hfi1_devdata *dd) for (i = 0; i < TXE_NUM_DATA_VL; i++) write_csr(dd, SEND_CM_CREDIT_VL + (8 * i), 0); write_csr(dd, SEND_CM_CREDIT_VL15, 0); - write_global_credit(dd, 0, 0, 0); + write_csr(dd, SEND_CM_GLOBAL_CREDIT, 0); /* reset the CM block */ pio_send_control(dd, PSC_CM_RESET); + /* reset cached value */ + dd->vl15buf_cached = 0; } /* convert a vCU to a CU */ @@ -6839,24 +6854,35 @@ void handle_link_up(struct work_struct *work) { struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, link_up_work); + struct hfi1_devdata *dd = ppd->dd; + set_link_state(ppd, HLS_UP_INIT); /* cache the read of DC_LCB_STS_ROUND_TRIP_LTP_CNT */ - read_ltp_rtt(ppd->dd); + read_ltp_rtt(dd); /* * OPA specifies that certain counters are cleared on a transition * to link up, so do that. */ - clear_linkup_counters(ppd->dd); + clear_linkup_counters(dd); /* * And (re)set link up default values. */ set_linkup_defaults(ppd); + /* + * Set VL15 credits. Use cached value from verify cap interrupt. + * In case of quick linkup or simulator, vl15 value will be set by + * handle_linkup_change. VerifyCap interrupt handler will not be + * called in those scenarios. + */ + if (!(quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR)) + set_up_vl15(dd, dd->vl15buf_cached); + /* enforce link speed enabled */ if ((ppd->link_speed_active & ppd->link_speed_enabled) == 0) { /* oops - current speed is not enabled, bounce */ - dd_dev_err(ppd->dd, + dd_dev_err(dd, "Link speed active 0x%x is outside enabled 0x%x, downing link\n", ppd->link_speed_active, ppd->link_speed_enabled); set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0, @@ -7357,7 +7383,14 @@ void handle_verify_cap(struct work_struct *work) */ if (vau == 0) vau = 1; - set_up_vl15(dd, vau, vl15buf); + set_up_vau(dd, vau); + + /* + * Set VL15 credits to 0 in global credit register. Cache remote VL15 + * credits value and wait for link-up interrupt ot set it. + */ + set_up_vl15(dd, 0); + dd->vl15buf_cached = vl15buf; /* set up the LCB CRC mode */ crc_mask = ppd->port_crc_mode_enabled & partner_supported_crc; diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index 5bfa839d1c48..793514f1d15f 100644 --- a/drivers/infiniband/hw/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h @@ -839,7 +839,9 @@ #define SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK 0x8ull #define SEND_CM_CTRL_RESETCSR 0x0000000000000020ull #define SEND_CM_GLOBAL_CREDIT (TXE + 0x000000000508) +#define SEND_CM_GLOBAL_CREDIT_AU_MASK 0x7ull #define SEND_CM_GLOBAL_CREDIT_AU_SHIFT 16 +#define SEND_CM_GLOBAL_CREDIT_AU_SMASK 0x70000ull #define SEND_CM_GLOBAL_CREDIT_RESETCSR 0x0000094000030000ull #define SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_MASK 0xFFFFull #define SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT 0 diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index da322e6668cc..414a04a481c2 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1045,6 +1045,14 @@ struct hfi1_devdata { /* initial vl15 credits to use */ u16 vl15_init; + /* + * Cached value for vl15buf, read during verify cap interrupt. VL15 + * credits are to be kept at 0 and set when handling the link-up + * interrupt. This removes the possibility of receiving VL15 MAD + * packets before this HFI is ready. + */ + u16 vl15buf_cached; + /* Misc small ints */ u8 n_krcv_queues; u8 qos_shift; @@ -1598,7 +1606,8 @@ int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encode); int fm_get_table(struct hfi1_pportdata *ppd, int which, void *t); int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t); -void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf); +void set_up_vau(struct hfi1_devdata *dd, u8 vau); +void set_up_vl15(struct hfi1_devdata *dd, u16 vl15buf); void reset_link_credits(struct hfi1_devdata *dd); void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu); diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index ba265d0ae93b..04a5082d5ac5 100644 --- a/drivers/infiniband/hw/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c @@ -130,7 +130,8 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup) * the remote values. Both sides must be using the values. */ if (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR) { - set_up_vl15(dd, dd->vau, dd->vl15_init); + set_up_vau(dd, dd->vau); + set_up_vl15(dd, dd->vl15_init); assign_remote_cm_au_table(dd, dd->vcu); } From 1feb40067cf04ae48d65f728d62ca255c9449178 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 12 May 2017 09:02:00 -0700 Subject: [PATCH 167/206] RDMA/qib,hfi1: Fix MR reference count leak on write with immediate The handling of IB_RDMA_WRITE_ONLY_WITH_IMMEDIATE will leak a memory reference when a buffer cannot be allocated for returning the immediate data. The issue is that the rkey validation has already occurred and the RNR nak fails to release the reference that was fruitlessly gotten. The the peer will send the identical single packet request when its RNR timer pops. The fix is to release the held reference prior to the rnr nak exit. This is the only sequence the requires both rkey validation and the buffer allocation on the same packet. Cc: Stable # 4.7+ Tested-by: Tadeusz Struk Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/rc.c | 5 ++++- drivers/infiniband/hw/qib/qib_rc.c | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 069bdaf061ab..1080778a1f7c 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -2159,8 +2159,11 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) ret = hfi1_rvt_get_rwqe(qp, 1); if (ret < 0) goto nack_op_err; - if (!ret) + if (!ret) { + /* peer will send again */ + rvt_put_ss(&qp->r_sge); goto rnr_nak; + } wc.ex.imm_data = ohdr->u.rc.imm_data; wc.wc_flags = IB_WC_WITH_IMM; goto send_last; diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index fc8b88514da5..4ddbcac5eabe 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -1956,8 +1956,10 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr, ret = qib_get_rwqe(qp, 1); if (ret < 0) goto nack_op_err; - if (!ret) + if (!ret) { + rvt_put_ss(&qp->r_sge); goto rnr_nak; + } wc.ex.imm_data = ohdr->u.rc.imm_data; hdrsize += 4; wc.wc_flags = IB_WC_WITH_IMM; From eed7624552ca55c346a4e9ccaa9ab0723841aee6 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sun, 21 May 2017 19:11:13 +0300 Subject: [PATCH 168/206] RDMA/mlx4: Fix MAD tunneling when SRIOV is enabled The cited patch added a type field to structures ib_ah and rdma_ah_attr. Function mlx4_ib_query_ah() builds an rdma_ah_attr structure from the data in an mlx4_ib_ah structure (which contains both an ib_ah structure and an address vector). For mlx4_ib_query_ah() to work properly, the type field in the contained ib_ah structure must be set correctly. In the outgoing MAD tunneling flow, procedure mlx4_ib_multiplex_mad() paravirtualizes a MAD received from a slave and sends the processed mad out over the wire. During this processing, it populates an mlx4_ib_ah structure and calls mlx4_ib_query_ah(). The cited commit overlooked setting the type field in the contained ib_ah structure before invoking mlx4_ib_query_ah(). As a result, the type field remained uninitialized, and the rdma_ah_attr structure was incorrectly built. This resulted in improperly built MADs being sent out over the wire. This patch properly initializes the type field in the contained ib_ah structure before calling mlx4_ib_query_ah(). The rdma_ah_attr structure is then generated correctly. Fixes: 44c58487d51a ("IB/core: Define 'ib' and 'roce' rdma_ah_attr types") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mad.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index b4694717f6f3..21d31cb1325f 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1578,6 +1578,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc if (port < 0) return; ah.av.ib.port_pd = cpu_to_be32(port << 24 | (be32_to_cpu(ah.av.ib.port_pd) & 0xffffff)); + ah.ibah.type = rdma_ah_find_type(&dev->ib_dev, port); mlx4_ib_query_ah(&ah.ibah, &ah_attr); if (rdma_ah_get_ah_flags(&ah_attr) & IB_AH_GRH) From 1410a90ae449061b7e1ae19d275148f36948801b Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 28 May 2017 10:53:10 +0300 Subject: [PATCH 169/206] net/mlx5: Define interface bits for fencing UMR wqe HW can implement UMR wqe re-transmission in various ways. Thus, add HCA cap to distinguish the needed fence for UMR to make sure that the wqe wouldn't fail on mkey checks. Signed-off-by: Max Gurtovoy Acked-by: Leon Romanovsky Reviewed-by: Christoph Hellwig Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 32de0724b400..edafedb7b509 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -766,6 +766,12 @@ enum { MLX5_CAP_PORT_TYPE_ETH = 0x1, }; +enum { + MLX5_CAP_UMR_FENCE_STRONG = 0x0, + MLX5_CAP_UMR_FENCE_SMALL = 0x1, + MLX5_CAP_UMR_FENCE_NONE = 0x2, +}; + struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_0[0x80]; @@ -875,7 +881,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_202[0x1]; u8 ipoib_enhanced_offloads[0x1]; u8 ipoib_basic_offloads[0x1]; - u8 reserved_at_205[0xa]; + u8 reserved_at_205[0x5]; + u8 umr_fence[0x2]; + u8 reserved_at_20c[0x3]; u8 drain_sigerr[0x1]; u8 cmdif_checksum[0x2]; u8 sigerr_cqe[0x1]; From 6e8484c5cf07c7ee632587e98c1a12d319dacb7c Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 28 May 2017 10:53:11 +0300 Subject: [PATCH 170/206] RDMA/mlx5: set UMR wqe fence according to HCA cap Cache the needed umr_fence and set the wqe ctrl segmennt accordingly. Signed-off-by: Max Gurtovoy Acked-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 14 +++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +- drivers/infiniband/hw/mlx5/qp.c | 59 +++++++++++----------------- 3 files changed, 39 insertions(+), 37 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d45772da0963..0c79983c8b1a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2979,6 +2979,18 @@ static int create_umr_res(struct mlx5_ib_dev *dev) return ret; } +static u8 mlx5_get_umr_fence(u8 umr_fence_cap) +{ + switch (umr_fence_cap) { + case MLX5_CAP_UMR_FENCE_NONE: + return MLX5_FENCE_MODE_NONE; + case MLX5_CAP_UMR_FENCE_SMALL: + return MLX5_FENCE_MODE_INITIATOR_SMALL; + default: + return MLX5_FENCE_MODE_STRONG_ORDERING; + } +} + static int create_dev_resources(struct mlx5_ib_resources *devr) { struct ib_srq_init_attr attr; @@ -3693,6 +3705,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) mlx5_ib_internal_fill_odp_caps(dev); + dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence)); + if (MLX5_CAP_GEN(mdev, imaicl)) { dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw; dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 38c877bc45e5..bdcf25410c99 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -349,7 +349,7 @@ struct mlx5_ib_qp { struct mlx5_ib_wq rq; u8 sq_signal_bits; - u8 fm_cache; + u8 next_fence; struct mlx5_ib_wq sq; /* serialize qp state modifications @@ -654,6 +654,7 @@ struct mlx5_ib_dev { struct mlx5_ib_port *port; struct mlx5_sq_bfreg bfreg; struct mlx5_sq_bfreg fp_bfreg; + u8 umr_fence; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 93959e1e43a3..ebb6768684de 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3738,24 +3738,6 @@ static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) } } -static u8 get_fence(u8 fence, struct ib_send_wr *wr) -{ - if (unlikely(wr->opcode == IB_WR_LOCAL_INV && - wr->send_flags & IB_SEND_FENCE)) - return MLX5_FENCE_MODE_STRONG_ORDERING; - - if (unlikely(fence)) { - if (wr->send_flags & IB_SEND_FENCE) - return MLX5_FENCE_MODE_SMALL_AND_FENCE; - else - return fence; - } else if (unlikely(wr->send_flags & IB_SEND_FENCE)) { - return MLX5_FENCE_MODE_FENCE; - } - - return 0; -} - static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, struct mlx5_wqe_ctrl_seg **ctrl, struct ib_send_wr *wr, unsigned *idx, @@ -3784,8 +3766,7 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, static void finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, u8 size, unsigned idx, u64 wr_id, - int nreq, u8 fence, u8 next_fence, - u32 mlx5_opcode) + int nreq, u8 fence, u32 mlx5_opcode) { u8 opmod = 0; @@ -3793,7 +3774,6 @@ static void finish_wqe(struct mlx5_ib_qp *qp, mlx5_opcode | ((u32)opmod << 24)); ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8)); ctrl->fm_ce_se |= fence; - qp->fm_cache = next_fence; if (unlikely(qp->wq_sig)) ctrl->signature = wq_sig(ctrl); @@ -3853,7 +3833,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - fence = qp->fm_cache; num_sge = wr->num_sge; if (unlikely(num_sge > qp->sq.max_gs)) { mlx5_ib_warn(dev, "\n"); @@ -3870,6 +3849,19 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } + if (wr->opcode == IB_WR_LOCAL_INV || + wr->opcode == IB_WR_REG_MR) { + fence = dev->umr_fence; + next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + } else if (wr->send_flags & IB_SEND_FENCE) { + if (qp->next_fence) + fence = MLX5_FENCE_MODE_SMALL_AND_FENCE; + else + fence = MLX5_FENCE_MODE_FENCE; + } else { + fence = qp->next_fence; + } + switch (ibqp->qp_type) { case IB_QPT_XRC_INI: xrc = seg; @@ -3896,7 +3888,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; case IB_WR_LOCAL_INV: - next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; qp->sq.wr_data[idx] = IB_WR_LOCAL_INV; ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey); set_linv_wr(qp, &seg, &size); @@ -3904,7 +3895,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_WR_REG_MR: - next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; qp->sq.wr_data[idx] = IB_WR_REG_MR; ctrl->imm = cpu_to_be32(reg_wr(wr)->key); err = set_reg_wr(qp, reg_wr(wr), &seg, &size); @@ -3927,9 +3917,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, - nreq, get_fence(fence, wr), - next_fence, MLX5_OPCODE_UMR); + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, + fence, MLX5_OPCODE_UMR); /* * SET_PSV WQEs are not signaled and solicited * on error @@ -3954,9 +3943,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, - nreq, get_fence(fence, wr), - next_fence, MLX5_OPCODE_SET_PSV); + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, + fence, MLX5_OPCODE_SET_PSV); err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq); if (err) { @@ -3966,7 +3954,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire, mr->sig->psv_wire.psv_idx, &seg, &size); @@ -3976,9 +3963,9 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, - nreq, get_fence(fence, wr), - next_fence, MLX5_OPCODE_SET_PSV); + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, + fence, MLX5_OPCODE_SET_PSV); + qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; num_sge = 0; goto skip_psv; @@ -4089,8 +4076,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, - get_fence(fence, wr), next_fence, + qp->next_fence = next_fence; + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, fence, mlx5_ib_opcode[wr->opcode]); skip_psv: if (0) From d38d7fdafaf89d1756f27e9b4054dd19a1d2f545 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 22 May 2017 13:30:15 -0500 Subject: [PATCH 171/206] RDMA/qedr: add null check before pointer dereference Add null check before dereferencing pointer sgid_attr.ndev inside function rdma_vlan_dev_vlan_id(). Addresses-Coverity-ID: 1373979 Signed-off-by: Gustavo A. R. Silva Acked-by: Ram Amrani Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/qedr_cm.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/qedr/qedr_cm.c b/drivers/infiniband/hw/qedr/qedr_cm.c index 3d7705cec770..d86dbe814d98 100644 --- a/drivers/infiniband/hw/qedr/qedr_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_cm.c @@ -270,11 +270,13 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev, return rc; } - vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev); - if (vlan_id < VLAN_CFI_MASK) - has_vlan = true; - if (sgid_attr.ndev) + if (sgid_attr.ndev) { + vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev); + if (vlan_id < VLAN_CFI_MASK) + has_vlan = true; + dev_put(sgid_attr.ndev); + } if (!memcmp(&sgid, &zgid, sizeof(sgid))) { DP_ERR(dev, "gsi post send: GID not found GID index %d\n", From 8c490669deb0133dfb6bfe7a80d7cfef3bbbd92f Mon Sep 17 00:00:00 2001 From: Honggang Li Date: Thu, 11 May 2017 20:14:28 +0800 Subject: [PATCH 172/206] RDMA/IPoIB: Replace netdev_priv with ipoib_priv for ipoib_get_link_ksettings ipoib_dev_init accesses the wrong private data for the IPoIB device. Commit cd565b4b51e5 (IB/IPoIB: Support acceleration options callbacks) changed ipoib_priv from being identical to netdev_priv to being an area inside of, but not the same pointer as, the netdev_priv pointer. As such, the struct we want is the ipoib_priv area, not the netdev_priv area, so use the right accessor, otherwise we kernel panic. [ 27.271938] IPv6: ADDRCONF(NETDEV_CHANGE): mlx5_ib0.8006: link becomes ready [ 28.156790] BUG: unable to handle kernel NULL pointer dereference at 000000000000067c [ 28.166309] IP: ib_query_port+0x30/0x180 [ib_core] ... [ 28.306282] RIP: 0010:ib_query_port+0x30/0x180 [ib_core] ... [ 28.393337] Call Trace: [ 28.397594] ipoib_get_link_ksettings+0x66/0xe0 [ib_ipoib] [ 28.405274] __ethtool_get_link_ksettings+0xa0/0x1c0 [ 28.412353] speed_show+0x74/0xa0 [ 28.417503] dev_attr_show+0x20/0x50 [ 28.422922] ? mutex_lock+0x12/0x40 [ 28.428179] sysfs_kf_seq_show+0xbf/0x1a0 [ 28.434002] kernfs_seq_show+0x21/0x30 [ 28.439470] seq_read+0x116/0x3b0 [ 28.444445] ? do_filp_open+0xa5/0x100 [ 28.449774] kernfs_fop_read+0xff/0x180 [ 28.455220] __vfs_read+0x37/0x150 [ 28.460167] ? security_file_permission+0x9d/0xc0 [ 28.466560] vfs_read+0x8c/0x130 [ 28.471318] SyS_read+0x55/0xc0 [ 28.475950] do_syscall_64+0x67/0x150 [ 28.481163] entry_SYSCALL64_slow_path+0x25/0x25 ... [ 28.584493] ---[ end trace 3549968a4bf0aa5d ]--- Fixes: cd565b4b51e5 (IB/IPoIB: Support acceleration options callbacks) Fixes: 0d7e2d2166f6 (IB/ipoib: add get_link_ksettings in ethtool) Signed-off-by: Honggang Li Reviewed-by: Yuval Shaia Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 874b24366e4d..7871379342f4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -178,7 +178,7 @@ static inline int ib_speed_enum_to_int(int speed) static int ipoib_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { - struct ipoib_dev_priv *priv = netdev_priv(netdev); + struct ipoib_dev_priv *priv = ipoib_priv(netdev); struct ib_port_attr attr; int ret, speed, width; From 0a1a972630c77edb39adf310699e404b8bf9176e Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 14 May 2017 13:32:06 +0300 Subject: [PATCH 173/206] RDMA/IPoIB: Limit the ipoib_dev_uninit_default scope ipoib_dev_uninit_default() call is used in ipoib_main.c file only and it generates the following warning from smatch tool: drivers/infiniband/ulp/ipoib/ipoib_main.c:1593:6: warning: symbol 'ipoib_dev_uninit_default' was not declared. Should it be static? so let's declare that function as static. Signed-off-by: Leon Romanovsky Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 2869d1adb1de..a115c0b7a310 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1590,7 +1590,7 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev) wait_for_completion(&priv->ntbl.deleted); } -void ipoib_dev_uninit_default(struct net_device *dev) +static void ipoib_dev_uninit_default(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); From 95c2ef50c726a51d580c35ae8dccd383abaa8701 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Thu, 11 May 2017 18:52:36 +0300 Subject: [PATCH 174/206] RDMA/srp: Fix NULL deref at srp_destroy_qp() If srp_init_qp() fails at srp_create_ch_ib() then ch->send_cq may be NULL. Calling directly to ib_destroy_qp() is sufficient because no work requests were posted on the created qp. Fixes: 9294000d6d89 ("IB/srp: Drain the send queue before destroying a QP") Cc: Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Bart van Assche -- Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srp/ib_srp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index def723a5df29..4306285fb155 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -575,7 +575,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) return 0; err_qp: - srp_destroy_qp(ch, qp); + ib_destroy_qp(qp); err_send_cq: ib_free_cq(send_cq); From 233c1955835bd8649003be9bb3d8e79788b08be1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 14 May 2017 15:49:57 +0300 Subject: [PATCH 175/206] RDMA/netlink: Reduce exposure of RDMA netlink functions RDMA netlink is part of ib_core, hence ibnl_chk_listeners(), ibnl_init() and ibnl_cleanup() don't need to be published in public header file. Let's remove EXPORT_SYMBOL from ibnl_chk_listeners() and move all these functions to private header file. CC: Yuval Shaia Signed-off-by: Leon Romanovsky Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/core/core_priv.h | 10 ++++++++++ drivers/infiniband/core/netlink.c | 2 +- include/rdma/rdma_netlink.h | 10 ---------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index cb7d372e4bdf..d92ab4eaa8f3 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -169,6 +169,16 @@ void ib_mad_cleanup(void); int ib_sa_init(void); void ib_sa_cleanup(void); +int ibnl_init(void); +void ibnl_cleanup(void); + +/** + * Check if there are any listeners to the netlink group + * @group: the netlink group ID + * Returns 0 on success or a negative for no listeners. + */ +int ibnl_chk_listeners(unsigned int group); + int ib_nl_handle_resolve_resp(struct sk_buff *skb, struct netlink_callback *cb); int ib_nl_handle_set_timeout(struct sk_buff *skb, diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index b784055423c8..94931c474d41 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -37,6 +37,7 @@ #include #include #include +#include "core_priv.h" struct ibnl_client { struct list_head list; @@ -55,7 +56,6 @@ int ibnl_chk_listeners(unsigned int group) return -1; return 0; } -EXPORT_SYMBOL(ibnl_chk_listeners); int ibnl_add_client(int index, int nops, const struct ibnl_client_cbs cb_table[]) diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 585266144329..348c102cb5f6 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -10,9 +10,6 @@ struct ibnl_client_cbs { struct module *module; }; -int ibnl_init(void); -void ibnl_cleanup(void); - /** * Add a a client to the list of IB netlink exporters. * @index: Index of the added client @@ -77,11 +74,4 @@ int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh, int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh, unsigned int group, gfp_t flags); -/** - * Check if there are any listeners to the netlink group - * @group: the netlink group ID - * Returns 0 on success or a negative for no listeners. - */ -int ibnl_chk_listeners(unsigned int group); - #endif /* _RDMA_NETLINK_H */ From f937d93a9122d1510ca6e4bb8d860aedcf9408c3 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 18 May 2017 07:40:33 +0300 Subject: [PATCH 176/206] RDMA/uverbs: Declare local function static and add brackets to sizeof Commit 57520751445b ("IB/SA: Add OPA path record type") introduced new local function __ib_copy_path_rec_to_user, but didn't limit its scope. This produces the following sparse warning: drivers/infiniband/core/uverbs_marshall.c:99:6: warning: symbol '__ib_copy_path_rec_to_user' was not declared. Should it be static? In addition, it used sizeof ... notations instead of sizeof(...), which is correct in C, but a little bit misleading. Let's change it too. Fixes: 57520751445b ("IB/SA: Add OPA path record type") Reviewed-by: Yuval Shaia Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_marshall.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index 8b9587fe2303..94fd989c9060 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -96,11 +96,11 @@ void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst, } EXPORT_SYMBOL(ib_copy_qp_attr_to_user); -void __ib_copy_path_rec_to_user(struct ib_user_path_rec *dst, - struct sa_path_rec *src) +static void __ib_copy_path_rec_to_user(struct ib_user_path_rec *dst, + struct sa_path_rec *src) { - memcpy(dst->dgid, src->dgid.raw, sizeof src->dgid); - memcpy(dst->sgid, src->sgid.raw, sizeof src->sgid); + memcpy(dst->dgid, src->dgid.raw, sizeof(src->dgid)); + memcpy(dst->sgid, src->sgid.raw, sizeof(src->sgid)); dst->dlid = htons(ntohl(sa_path_get_dlid(src))); dst->slid = htons(ntohl(sa_path_get_slid(src))); From 53376fedb9da54c0d3b0bd3a6edcbeb681692909 Mon Sep 17 00:00:00 2001 From: Qing Huang Date: Thu, 18 May 2017 16:33:53 -0700 Subject: [PATCH 177/206] RDMA/core: not to set page dirty bit if it's already set. This change will optimize kernel memory deregistration operations. __ib_umem_release() used to call set_page_dirty_lock() against every writable page in its memory region. Its purpose is to keep data synced between CPU and DMA device when swapping happens after mem deregistration ops. Now we choose not to set page dirty bit if it's already set by kernel prior to calling __ib_umem_release(). This reduces memory deregistration time by half or even more when we ran application simulation test program. Signed-off-by: Qing Huang Signed-off-by: Doug Ledford --- drivers/infiniband/core/umem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 3dbf811d3c51..21e60b1e2ff4 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -58,7 +58,7 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) { page = sg_page(sg); - if (umem->writable && dirty) + if (!PageDirty(page) && umem->writable && dirty) set_page_dirty_lock(page); put_page(page); } From 79bb5b7ee1776a244484a1be6671d89fbd7c0c9f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 21 May 2017 19:08:09 +0300 Subject: [PATCH 178/206] RDMA/umem: Fix missing mmap_sem in get umem ODP call Add mmap_sem lock around VMA inspection in ib_umem_odp_get(). Fixes: 0008b84ea9af ('IB/umem: Add support to huge ODP') Signed-off-by: Artemy Kovalyov Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/umem_odp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 0780b1afefa9..8c4ec564e495 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -321,11 +321,15 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem, struct vm_area_struct *vma; struct hstate *h; + down_read(&mm->mmap_sem); vma = find_vma(mm, ib_umem_start(umem)); - if (!vma || !is_vm_hugetlb_page(vma)) + if (!vma || !is_vm_hugetlb_page(vma)) { + up_read(&mm->mmap_sem); return -EINVAL; + } h = hstate_vma(vma); umem->page_shift = huge_page_shift(h); + up_read(&mm->mmap_sem); umem->hugetlb = 1; } else { umem->hugetlb = 0; From d3957b86a40612826ef935f474b31359d66cbdca Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Sun, 21 May 2017 19:09:54 +0300 Subject: [PATCH 179/206] RDMA/SA: Fix kernel panic in CMA request handler flow Commit 9fdca4da4d8c (IB/SA: Split struct sa_path_rec based on IB and ROCE specific fields) moved the service_id to be specific attribute for IB and OPA SA Path Record, and thus wasn't assigned for RoCE. This caused to the following kernel panic in the CMA request handler flow: [ 27.074594] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 [ 27.074731] IP: __radix_tree_lookup+0x1d/0xe0 ... [ 27.075356] Workqueue: ib_cm cm_work_handler [ib_cm] [ 27.075401] task: ffff88022e3b8000 task.stack: ffffc90001298000 [ 27.075449] RIP: 0010:__radix_tree_lookup+0x1d/0xe0 ... [ 27.075979] Call Trace: [ 27.076015] radix_tree_lookup+0xd/0x10 [ 27.076055] cma_ps_find+0x59/0x70 [rdma_cm] [ 27.076097] cma_id_from_event+0xd2/0x470 [rdma_cm] [ 27.076144] ? ib_init_ah_from_path+0x39a/0x590 [ib_core] [ 27.076193] cma_req_handler+0x25/0x480 [rdma_cm] [ 27.076237] cm_process_work+0x25/0x120 [ib_cm] [ 27.076280] ? cm_get_bth_pkey.isra.62+0x3c/0xa0 [ib_cm] [ 27.076350] cm_req_handler+0xb03/0xd40 [ib_cm] [ 27.076430] ? sched_clock_cpu+0x11/0xb0 [ 27.076478] cm_work_handler+0x194/0x1588 [ib_cm] [ 27.076525] process_one_work+0x160/0x410 [ 27.076565] worker_thread+0x137/0x4a0 [ 27.076614] kthread+0x112/0x150 [ 27.076684] ? max_active_store+0x60/0x60 [ 27.077642] ? kthread_park+0x90/0x90 [ 27.078530] ret_from_fork+0x2c/0x40 This patch moves it back to the common SA Path Record structure and removes the redundant setter and getter. Tested on Connect-IB and Connect-X4 in Infiniband and RoCE respectively. Fixes: 9fdca4da4d8c (IB/SA: Split struct sa_path_rec based on IB ands ROCE specific fields) Signed-off-by: Majd Dibbiny Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 4 ++-- drivers/infiniband/core/cma.c | 13 ++++++------- drivers/infiniband/core/sa_query.c | 6 +++--- drivers/infiniband/ulp/srp/ib_srp.c | 2 +- include/rdma/ib_sa.h | 25 +++---------------------- 5 files changed, 15 insertions(+), 35 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 1844770f3ae8..2b4d613a3474 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1429,7 +1429,7 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg, primary_path->packet_life_time = cm_req_get_primary_local_ack_timeout(req_msg); primary_path->packet_life_time -= (primary_path->packet_life_time > 0); - sa_path_set_service_id(primary_path, req_msg->service_id); + primary_path->service_id = req_msg->service_id; if (req_msg->alt_local_lid) { alt_path->dgid = req_msg->alt_local_gid; @@ -1452,7 +1452,7 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg, alt_path->packet_life_time = cm_req_get_alt_local_ack_timeout(req_msg); alt_path->packet_life_time -= (alt_path->packet_life_time > 0); - sa_path_set_service_id(alt_path, req_msg->service_id); + alt_path->service_id = req_msg->service_id; } } diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 91b7a2fe5a55..31bb82d8ecd7 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1140,7 +1140,7 @@ static void cma_save_ib_info(struct sockaddr *src_addr, ib->sib_pkey = path->pkey; ib->sib_flowinfo = path->flow_label; memcpy(&ib->sib_addr, &path->sgid, 16); - ib->sib_sid = sa_path_get_service_id(path); + ib->sib_sid = path->service_id; ib->sib_scope_id = 0; } else { ib->sib_pkey = listen_ib->sib_pkey; @@ -1274,8 +1274,7 @@ static int cma_save_req_info(const struct ib_cm_event *ib_event, memcpy(&req->local_gid, &req_param->primary_path->sgid, sizeof(req->local_gid)); req->has_gid = true; - req->service_id = - sa_path_get_service_id(req_param->primary_path); + req->service_id = req_param->primary_path->service_id; req->pkey = be16_to_cpu(req_param->primary_path->pkey); if (req->pkey != req_param->bth_pkey) pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n" @@ -1827,7 +1826,8 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, struct rdma_route *rt; const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; struct sa_path_rec *path = ib_event->param.req_rcvd.primary_path; - const __be64 service_id = sa_path_get_service_id(path); + const __be64 service_id = + ib_event->param.req_rcvd.primary_path->service_id; int ret; id = rdma_create_id(listen_id->route.addr.dev_addr.net, @@ -2345,9 +2345,8 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); path_rec.numb_path = 1; path_rec.reversible = 1; - sa_path_set_service_id(&path_rec, - rdma_get_service_id(&id_priv->id, - cma_dst_addr(id_priv))); + path_rec.service_id = rdma_get_service_id(&id_priv->id, + cma_dst_addr(id_priv)); comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index e335b09c022e..fb7aec4047c8 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -194,7 +194,7 @@ static u32 tid; .field_name = "sa_path_rec:" #field static const struct ib_field path_rec_table[] = { - { PATH_REC_FIELD(ib.service_id), + { PATH_REC_FIELD(service_id), .offset_words = 0, .offset_bits = 0, .size_bits = 64 }, @@ -296,7 +296,7 @@ static const struct ib_field path_rec_table[] = { .field_name = "sa_path_rec:" #field static const struct ib_field opa_path_rec_table[] = { - { OPA_PATH_REC_FIELD(opa.service_id), + { OPA_PATH_REC_FIELD(service_id), .offset_words = 0, .offset_bits = 0, .size_bits = 64 }, @@ -774,7 +774,7 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb, /* Now build the attributes */ if (comp_mask & IB_SA_PATH_REC_SERVICE_ID) { - val64 = be64_to_cpu(sa_path_get_service_id(sa_rec)); + val64 = be64_to_cpu(sa_rec->service_id); nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SERVICE_ID, sizeof(val64), &val64); } diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 4306285fb155..2354c742caa1 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -320,7 +320,7 @@ static int srp_new_cm_id(struct srp_rdma_ch *ch) ch->path.sgid = target->sgid; ch->path.dgid = target->orig_dgid; ch->path.pkey = target->pkey; - sa_path_set_service_id(&ch->path, target->service_id); + ch->path.service_id = target->service_id; return 0; } diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index f5f70e345318..355b81f4242d 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -158,7 +158,6 @@ enum sa_path_rec_type { }; struct sa_path_rec_ib { - __be64 service_id; __be16 dlid; __be16 slid; u8 raw_traffic; @@ -174,7 +173,6 @@ struct sa_path_rec_roce { }; struct sa_path_rec_opa { - __be64 service_id; __be32 dlid; __be32 slid; u8 raw_traffic; @@ -189,6 +187,7 @@ struct sa_path_rec_opa { struct sa_path_rec { union ib_gid dgid; union ib_gid sgid; + __be64 service_id; /* reserved */ __be32 flow_label; u8 hop_limit; @@ -262,7 +261,7 @@ static inline void path_conv_opa_to_ib(struct sa_path_rec *ib, ib->ib.dlid = htons(ntohl(opa->opa.dlid)); ib->ib.slid = htons(ntohl(opa->opa.slid)); } - ib->ib.service_id = opa->opa.service_id; + ib->service_id = opa->service_id; ib->ib.raw_traffic = opa->opa.raw_traffic; } @@ -281,7 +280,7 @@ static inline void path_conv_ib_to_opa(struct sa_path_rec *opa, } opa->opa.slid = slid; opa->opa.dlid = dlid; - opa->opa.service_id = ib->ib.service_id; + opa->service_id = ib->service_id; opa->opa.raw_traffic = ib->ib.raw_traffic; } @@ -591,15 +590,6 @@ static inline bool sa_path_is_roce(struct sa_path_rec *rec) (rec->rec_type == SA_PATH_REC_TYPE_ROCE_V2)); } -static inline void sa_path_set_service_id(struct sa_path_rec *rec, - __be64 service_id) -{ - if (rec->rec_type == SA_PATH_REC_TYPE_IB) - rec->ib.service_id = service_id; - else if (rec->rec_type == SA_PATH_REC_TYPE_OPA) - rec->opa.service_id = service_id; -} - static inline void sa_path_set_slid(struct sa_path_rec *rec, __be32 slid) { if (rec->rec_type == SA_PATH_REC_TYPE_IB) @@ -625,15 +615,6 @@ static inline void sa_path_set_raw_traffic(struct sa_path_rec *rec, rec->opa.raw_traffic = raw_traffic; } -static inline __be64 sa_path_get_service_id(struct sa_path_rec *rec) -{ - if (rec->rec_type == SA_PATH_REC_TYPE_IB) - return rec->ib.service_id; - else if (rec->rec_type == SA_PATH_REC_TYPE_OPA) - return rec->opa.service_id; - return 0; -} - static inline __be32 sa_path_get_slid(struct sa_path_rec *rec) { if (rec->rec_type == SA_PATH_REC_TYPE_IB) From c4beedb8a914af9c8c1b6e67c753adf411e05160 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Thu, 1 Jun 2017 22:05:40 -0700 Subject: [PATCH 180/206] Input: tm2-touchkey - use LEN_ON as boolean value instead of LED_FULL Commit 4e552c8cb5bc ("leds: add LED_ON brightness as boolean value") has introduced the LED_ON enumeration value that can be used instead of LED_FULL which has more of a linear value. Because the tm2-touchscreen doesn't have brightness levels, but it's a simple on/off led, use LED_ON instead of LED_FULL. Signed-off-by: Andi Shyti Reviewed-by: Jaechul Lee Tested-by: Jaechul Lee Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/tm2-touchkey.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/keyboard/tm2-touchkey.c b/drivers/input/keyboard/tm2-touchkey.c index 485900f953e0..abc266e40e17 100644 --- a/drivers/input/keyboard/tm2-touchkey.c +++ b/drivers/input/keyboard/tm2-touchkey.c @@ -213,7 +213,7 @@ static int tm2_touchkey_probe(struct i2c_client *client, /* led device */ touchkey->led_dev.name = TM2_TOUCHKEY_DEV_NAME; touchkey->led_dev.brightness = LED_FULL; - touchkey->led_dev.max_brightness = LED_FULL; + touchkey->led_dev.max_brightness = LED_ON; touchkey->led_dev.brightness_set = tm2_touchkey_led_brightness_set; error = devm_led_classdev_register(&client->dev, &touchkey->led_dev); From ebcdaee4cebb3a8d0d702ab5e9392373672ec1de Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 1 Jun 2017 19:22:01 +0100 Subject: [PATCH 181/206] dmaengine: pl330: fix warning in pl330_remove When removing a device with less than 9 IRQs (AMBA_NR_IRQS), we'll get a big WARN_ON from devres.c because pl330_remove calls devm_free_irqs for unallocated irqs. Similarly to pl330_probe, check that IRQ number is present before calling devm_free_irq. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Vinod Koul --- drivers/dma/pl330.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 8b0da7fa520d..e90a7a0d760a 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -3008,7 +3008,8 @@ static int pl330_remove(struct amba_device *adev) for (i = 0; i < AMBA_NR_IRQS; i++) { irq = adev->irq[i]; - devm_free_irq(&adev->dev, irq, pl330); + if (irq) + devm_free_irq(&adev->dev, irq, pl330); } dma_async_device_unregister(&pl330->ddma); From 715e944f8a7a1059db5d61ebf197e6f348d747e2 Mon Sep 17 00:00:00 2001 From: Carlo Caione Date: Tue, 30 May 2017 22:39:46 +0200 Subject: [PATCH 182/206] HID: asus: Stop underlying hardware on remove We are missing a call to hid_hw_stop() on the remove hook. Among other things this is causing an Oops when (re-)starting GNOME / upowerd / ... after the module has been already rmmod-ed. Signed-off-by: Carlo Caione Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-asus.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index 101ab2e63d18..a6268f2f7408 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -537,6 +537,8 @@ static void asus_remove(struct hid_device *hdev) drvdata->kbd_backlight->removed = true; cancel_work_sync(&drvdata->kbd_backlight->work); } + + hid_hw_stop(hdev); } static __u8 *asus_report_fixup(struct hid_device *hdev, __u8 *rdesc, From cb7cf772d83d2d4e6995c5bb9e0fb59aea8f7080 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 26 May 2017 17:40:02 +0100 Subject: [PATCH 183/206] ARM64/ACPI: Fix BAD_MADT_GICC_ENTRY() macro implementation The BAD_MADT_GICC_ENTRY() macro checks if a GICC MADT entry passes muster from an ACPI specification standpoint. Current macro detects the MADT GICC entry length through ACPI firmware version (it changed from 76 to 80 bytes in the transition from ACPI 5.1 to ACPI 6.0 specification) but always uses (erroneously) the ACPICA (latest) struct (ie struct acpi_madt_generic_interrupt - that is 80-bytes long) length to check if the current GICC entry memory record exceeds the MADT table end in memory as defined by the MADT table header itself, which may result in false negatives depending on the ACPI firmware version and how the MADT entries are laid out in memory (ie on ACPI 5.1 firmware MADT GICC entries are 76 bytes long, so by adding 80 to a GICC entry start address in memory the resulting address may well be past the actual MADT end, triggering a false negative). Fix the BAD_MADT_GICC_ENTRY() macro by reshuffling the condition checks and update them to always use the firmware version specific MADT GICC entry length in order to carry out boundary checks. Fixes: b6cfb277378e ("ACPI / ARM64: add BAD_MADT_GICC_ENTRY() macro") Reported-by: Julien Grall Acked-by: Will Deacon Acked-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Cc: Julien Grall Cc: Hanjun Guo Cc: Al Stone Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/acpi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 0e99978da3f0..59cca1d6ec54 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -23,9 +23,9 @@ #define ACPI_MADT_GICC_LENGTH \ (acpi_gbl_FADT.header.revision < 6 ? 76 : 80) -#define BAD_MADT_GICC_ENTRY(entry, end) \ - (!(entry) || (unsigned long)(entry) + sizeof(*(entry)) > (end) || \ - (entry)->header.length != ACPI_MADT_GICC_LENGTH) +#define BAD_MADT_GICC_ENTRY(entry, end) \ + (!(entry) || (entry)->header.length != ACPI_MADT_GICC_LENGTH || \ + (unsigned long)(entry) + ACPI_MADT_GICC_LENGTH > (end)) /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI From a7306c3436e9c8e584a4b9fad5f3dc91be2a6076 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 2 Jun 2017 14:46:11 -0700 Subject: [PATCH 184/206] ksm: prevent crash after write_protect_page fails "err" needs to be left set to -EFAULT if split_huge_page succeeds. Otherwise if "err" gets clobbered with zero and write_protect_page fails, try_to_merge_one_page() will succeed instead of returning -EFAULT and then try_to_merge_with_ksm_page() will continue thinking kpage is a PageKsm when in fact it's still an anonymous page. Eventually it'll crash in page_add_anon_rmap. This has been reproduced on Fedora25 kernel but I can reproduce with upstream too. The bug was introduced in commit f765f540598a ("ksm: prepare to new THP semantics") introduced in v4.5. page:fffff67546ce1cc0 count:4 mapcount:2 mapping:ffffa094551e36e1 index:0x7f0f46673 flags: 0x2ffffc0004007c(referenced|uptodate|dirty|lru|active|swapbacked) page dumped because: VM_BUG_ON_PAGE(!PageLocked(page)) page->mem_cgroup:ffffa09674bf0000 ------------[ cut here ]------------ kernel BUG at mm/rmap.c:1222! CPU: 1 PID: 76 Comm: ksmd Not tainted 4.9.3-200.fc25.x86_64 #1 RIP: do_page_add_anon_rmap+0x1c4/0x240 Call Trace: page_add_anon_rmap+0x18/0x20 try_to_merge_with_ksm_page+0x50b/0x780 ksm_scan_thread+0x1211/0x1410 ? prepare_to_wait_event+0x100/0x100 ? try_to_merge_with_ksm_page+0x780/0x780 kthread+0xd9/0xf0 ? kthread_park+0x60/0x60 ret_from_fork+0x25/0x30 Fixes: f765f54059 ("ksm: prepare to new THP semantics") Link: http://lkml.kernel.org/r/20170513131040.21732-1-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Reported-by: Federico Simoncelli Acked-by: Kirill A. Shutemov Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/ksm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/ksm.c b/mm/ksm.c index d9fc0e456128..216184af0e19 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1028,8 +1028,7 @@ static int try_to_merge_one_page(struct vm_area_struct *vma, goto out; if (PageTransCompound(page)) { - err = split_huge_page(page); - if (err) + if (split_huge_page(page)) goto out_unlock; } From 1bde33e051233f0ed93a8bc67137016ab38c3d2d Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 2 Jun 2017 14:46:13 -0700 Subject: [PATCH 185/206] include/linux/gfp.h: fix ___GFP_NOLOCKDEP value Igor Stoppa has noticed that __GFP_NOLOCKDEP can use a lower bit. At the time commit 7e7844226f10 ("lockdep: allow to disable reclaim lockup detection") was written we still had __GFP_OTHER_NODE but I have removed it in commit 41b6167e8f74 ("mm: get rid of __GFP_OTHER_NODE") and forgot to lower the bit value. The current value is outside of __GFP_BITS_SHIFT so it cannot be used actually. Fixes: 7e7844226f10 ("lockdep: allow to disable reclaim lockup detection") Signed-off-by: Michal Hocko Reported-by: Igor Stoppa Acked-by: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 2b1a44f5bdb6..a89d37e8b387 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -41,7 +41,7 @@ struct vm_area_struct; #define ___GFP_WRITE 0x800000u #define ___GFP_KSWAPD_RECLAIM 0x1000000u #ifdef CONFIG_LOCKDEP -#define ___GFP_NOLOCKDEP 0x4000000u +#define ___GFP_NOLOCKDEP 0x2000000u #else #define ___GFP_NOLOCKDEP 0 #endif From 60b0a8c3d2480f3b57282b47b7cae7ee71c48635 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Fri, 2 Jun 2017 14:46:16 -0700 Subject: [PATCH 186/206] frv: declare jiffies to be located in the .data section Commit 7c30f352c852 ("jiffies.h: declare jiffies and jiffies_64 with ____cacheline_aligned_in_smp") removed a section specification from the jiffies declaration that caused conflicts on some platforms. Unfortunately this change broke the build for frv: kernel/built-in.o: In function `__do_softirq': (.text+0x6460): relocation truncated to fit: R_FRV_GPREL12 against symbol `jiffies' defined in *ABS* section in .tmp_vmlinux1 kernel/built-in.o: In function `__do_softirq': (.text+0x6574): relocation truncated to fit: R_FRV_GPREL12 against symbol `jiffies' defined in *ABS* section in .tmp_vmlinux1 kernel/built-in.o: In function `pwq_activate_delayed_work': workqueue.c:(.text+0x15b9c): relocation truncated to fit: R_FRV_GPREL12 against symbol `jiffies' defined in *ABS* section in .tmp_vmlinux1 ... Add __jiffy_arch_data to the declaration of jiffies and use it on frv to include the section specification. For all other platforms __jiffy_arch_data (currently) has no effect. Fixes: 7c30f352c852 ("jiffies.h: declare jiffies and jiffies_64 with ____cacheline_aligned_in_smp") Link: http://lkml.kernel.org/r/20170516221333.177280-1-mka@chromium.org Signed-off-by: Matthias Kaehlcke Reported-by: Guenter Roeck Tested-by: Guenter Roeck Reviewed-by: David Howells Cc: Sudip Mukherjee Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/frv/include/asm/timex.h | 6 ++++++ include/linux/jiffies.h | 6 +++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/frv/include/asm/timex.h b/arch/frv/include/asm/timex.h index a89bddefdacf..139093fab326 100644 --- a/arch/frv/include/asm/timex.h +++ b/arch/frv/include/asm/timex.h @@ -16,5 +16,11 @@ static inline cycles_t get_cycles(void) #define vxtime_lock() do {} while (0) #define vxtime_unlock() do {} while (0) +/* This attribute is used in include/linux/jiffies.h alongside with + * __cacheline_aligned_in_smp. It is assumed that __cacheline_aligned_in_smp + * for frv does not contain another section specification. + */ +#define __jiffy_arch_data __attribute__((__section__(".data"))) + #endif diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 36872fbb815d..734377ad42e9 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -64,13 +64,17 @@ extern int register_refined_jiffies(long clock_tick_rate); /* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */ #define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ) +#ifndef __jiffy_arch_data +#define __jiffy_arch_data +#endif + /* * The 64-bit value is not atomic - you MUST NOT read it * without sampling the sequence number in jiffies_lock. * get_jiffies_64() will do this for you as appropriate. */ extern u64 __cacheline_aligned_in_smp jiffies_64; -extern unsigned long volatile __cacheline_aligned_in_smp jiffies; +extern unsigned long volatile __cacheline_aligned_in_smp __jiffy_arch_data jiffies; #if (BITS_PER_LONG < 64) u64 get_jiffies_64(void); From 4f4f2ba9c531b3d7cee293dd3654ba3b86e7d220 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 2 Jun 2017 14:46:19 -0700 Subject: [PATCH 187/206] mm: clarify why we want kmalloc before falling backto vmallock While converting drm_[cm]alloc* helpers to kvmalloc* variants Chris Wilson has wondered why we want to try kmalloc before vmalloc fallback even for larger allocations requests. Let's clarify that one larger physically contiguous block is less likely to fragment memory than many scattered pages which can prevent more large blocks from being created. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/20170517080932.21423-1-mhocko@kernel.org Signed-off-by: Michal Hocko Suggested-by: Chris Wilson Reviewed-by: Chris Wilson Acked-by: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/util.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/util.c b/mm/util.c index 464df3489903..26be6407abd7 100644 --- a/mm/util.c +++ b/mm/util.c @@ -357,8 +357,11 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node) WARN_ON_ONCE((flags & GFP_KERNEL) != GFP_KERNEL); /* - * Make sure that larger requests are not too disruptive - no OOM - * killer and no allocation failure warnings as we have a fallback + * We want to attempt a large physically contiguous block first because + * it is less likely to fragment multiple larger blocks and therefore + * contribute to a long term fragmentation less than vmalloc fallback. + * However make sure that larger requests are not too disruptive - no + * OOM killer and no allocation failure warnings as we have a fallback. */ if (size > PAGE_SIZE) { kmalloc_flags |= __GFP_NOWARN; From 57ddfdaa9a72fe726a44d26d99db31bc137dbeff Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 2 Jun 2017 14:46:22 -0700 Subject: [PATCH 188/206] initramfs: fix disabling of initramfs (and its compression) Commit db2aa7fd15e8 ("initramfs: allow again choice of the embedded initram compression algorithm") introduced the possibility to select the initramfs compression algorithm from Kconfig and while this is a nice feature it broke the use case described below. Here is what my build system does: - kernel is initially configured not to have an initramfs included - build the user space root file system - re-configure the kernel to have an initramfs included (CONFIG_INITRAMFS_SOURCE="/path/to/romfs") and set relevant CONFIG_INITRAMFS options, in my case, no compression option (CONFIG_INITRAMFS_COMPRESSION_NONE) - kernel is re-built with these options -> kernel+initramfs image is copied - kernel is re-built again without these options -> kernel image is copied Building a kernel without an initramfs means setting this option: CONFIG_INITRAMFS_SOURCE="" (and this one only) whereas building a kernel with an initramfs means setting these options: CONFIG_INITRAMFS_SOURCE="/home/fainelli/work/uclinux-rootfs/romfs /home/fainelli/work/uclinux-rootfs/misc/initramfs.dev" CONFIG_INITRAMFS_ROOT_UID=1000 CONFIG_INITRAMFS_ROOT_GID=1000 CONFIG_INITRAMFS_COMPRESSION_NONE=y CONFIG_INITRAMFS_COMPRESSION="" Commit db2aa7fd15e85 ("initramfs: allow again choice of the embedded initram compression algorithm") is problematic because CONFIG_INITRAMFS_COMPRESSION which is used to determine the initramfs_data.cpio extension/compression is a string, and due to how Kconfig works it will evaluate in order, how to assign it. Setting CONFIG_INITRAMFS_COMPRESSION_NONE with CONFIG_INITRAMFS_SOURCE="" cannot possibly work (because of the depends on INITRAMFS_SOURCE!="" imposed on CONFIG_INITRAMFS_COMPRESSION ) yet we still get CONFIG_INITRAMFS_COMPRESSION assigned to ".gz" because CONFIG_RD_GZIP=y is set in my kernel, even when there is no initramfs being built. So we basically end-up generating two initramfs_data.cpio* files, one without extension, and one with .gz. This causes usr/Makefile to track usr/initramfs_data.cpio.gz, and not usr/initramfs_data.cpio anymore, that is also largely problematic after 9e3596b0c6539e ("kbuild: initramfs cleanup, set target from Kconfig") because we used to track all possible initramfs_data files in the $(targets) variable before that commit. The end result is that the kernel with an initramfs clearly does not contain what we expect it to, it has a stale initramfs_data.cpio file built into it, and we keep re-generating an initramfs_data.cpio.gz file which is not the one that we want to include in the kernel image proper. The fix consists in hiding CONFIG_INITRAMFS_COMPRESSION when CONFIG_INITRAMFS_SOURCE="". This puts us back in a state to the pre-4.10 behavior where we can properly disable and re-enable initramfs within the same kernel .config file, and be in control of what CONFIG_INITRAMFS_COMPRESSION is set to. Fixes: db2aa7fd15e8 ("initramfs: allow again choice of the embedded initram compression algorithm") Fixes: 9e3596b0c653 ("kbuild: initramfs cleanup, set target from Kconfig") Link: http://lkml.kernel.org/r/20170521033337.6197-1-f.fainelli@gmail.com Signed-off-by: Florian Fainelli Acked-by: Nicholas Piggin Cc: P J P Cc: Paul Bolle Cc: Michal Marek Cc: Daniel Thompson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- usr/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/usr/Kconfig b/usr/Kconfig index c0c48507e44e..ad0543e21760 100644 --- a/usr/Kconfig +++ b/usr/Kconfig @@ -220,6 +220,7 @@ config INITRAMFS_COMPRESSION_LZ4 endchoice config INITRAMFS_COMPRESSION + depends on INITRAMFS_SOURCE!="" string default "" if INITRAMFS_COMPRESSION_NONE default ".gz" if INITRAMFS_COMPRESSION_GZIP From 478fe3037b2278d276d4cd9cd0ab06c4cb2e9b32 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 Jun 2017 14:46:25 -0700 Subject: [PATCH 189/206] slub/memcg: cure the brainless abuse of sysfs attributes memcg_propagate_slab_attrs() abuses the sysfs attribute file functions to propagate settings from the root kmem_cache to a newly created kmem_cache. It does that with: attr->show(root, buf); attr->store(new, buf, strlen(bug); Aside of being a lazy and absurd hackery this is broken because it does not check the return value of the show() function. Some of the show() functions return 0 w/o touching the buffer. That means in such a case the store function is called with the stale content of the previous show(). That causes nonsense like invoking kmem_cache_shrink() on a newly created kmem_cache. In the worst case it would cause handing in an uninitialized buffer. This should be rewritten proper by adding a propagate() callback to those slub_attributes which must be propagated and avoid that insane conversion to and from ASCII, but that's too large for a hot fix. Check at least the return value of the show() function, so calling store() with stale content is prevented. Steven said: "It can cause a deadlock with get_online_cpus() that has been uncovered by recent cpu hotplug and lockdep changes that Thomas and Peter have been doing. Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(cpu_hotplug.lock); lock(slab_mutex); lock(cpu_hotplug.lock); lock(slab_mutex); *** DEADLOCK ***" Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1705201244540.2255@nanos Signed-off-by: Thomas Gleixner Reported-by: Steven Rostedt Acked-by: David Rientjes Cc: Johannes Weiner Cc: Michal Hocko Cc: Peter Zijlstra Cc: Christoph Lameter Cc: Pekka Enberg Cc: Joonsoo Kim Cc: Christoph Hellwig Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 57e5156f02be..7449593fca72 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5512,6 +5512,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s) char mbuf[64]; char *buf; struct slab_attribute *attr = to_slab_attr(slab_attrs[i]); + ssize_t len; if (!attr || !attr->store || !attr->show) continue; @@ -5536,8 +5537,9 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s) buf = buffer; } - attr->show(root_cache, buf); - attr->store(s, buf, strlen(buf)); + len = attr->show(root_cache, buf); + if (len > 0) + attr->store(s, buf, len); } if (buffer) From ff5a20169b98d84ad8d7f99f27c5ebbb008204d6 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Fri, 2 Jun 2017 14:46:28 -0700 Subject: [PATCH 190/206] pcmcia: remove left-over %Z format Commit 5b5e0928f742 ("lib/vsprintf.c: remove %Z support") removed some usages of format %Z but forgot "%.2Zx". This makes clang 4.0 reports a -Wformat-extra-args warning because it does not know about %Z. Replace %Z with %z. Link: http://lkml.kernel.org/r/20170520090946.22562-1-nicolas.iooss_linux@m4x.org Signed-off-by: Nicolas Iooss Cc: Harald Welte Cc: Alexey Dobriyan Cc: [4.11+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/pcmcia/cm4040_cs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c index d4dbd8d8e524..382c864814d9 100644 --- a/drivers/char/pcmcia/cm4040_cs.c +++ b/drivers/char/pcmcia/cm4040_cs.c @@ -374,7 +374,7 @@ static ssize_t cm4040_write(struct file *filp, const char __user *buf, rc = write_sync_reg(SCR_HOST_TO_READER_START, dev); if (rc <= 0) { - DEBUGP(5, dev, "write_sync_reg c=%.2Zx\n", rc); + DEBUGP(5, dev, "write_sync_reg c=%.2zx\n", rc); DEBUGP(2, dev, "<- cm4040_write (failed)\n"); if (rc == -ERESTARTSYS) return rc; @@ -387,7 +387,7 @@ static ssize_t cm4040_write(struct file *filp, const char __user *buf, for (i = 0; i < bytes_to_write; i++) { rc = wait_for_bulk_out_ready(dev); if (rc <= 0) { - DEBUGP(5, dev, "wait_for_bulk_out_ready rc=%.2Zx\n", + DEBUGP(5, dev, "wait_for_bulk_out_ready rc=%.2zx\n", rc); DEBUGP(2, dev, "<- cm4040_write (failed)\n"); if (rc == -ERESTARTSYS) @@ -403,7 +403,7 @@ static ssize_t cm4040_write(struct file *filp, const char __user *buf, rc = write_sync_reg(SCR_HOST_TO_READER_DONE, dev); if (rc <= 0) { - DEBUGP(5, dev, "write_sync_reg c=%.2Zx\n", rc); + DEBUGP(5, dev, "write_sync_reg c=%.2zx\n", rc); DEBUGP(2, dev, "<- cm4040_write (failed)\n"); if (rc == -ERESTARTSYS) return rc; From c288983dddf714216428774e022ad78f48dd8cb1 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 2 Jun 2017 14:46:31 -0700 Subject: [PATCH 191/206] mm/page_alloc.c: make sure OOM victim can try allocations with no watermarks once Roman Gushchin has reported that the OOM killer can trivially selects next OOM victim when a thread doing memory allocation from page fault path was selected as first OOM victim. allocate invoked oom-killer: gfp_mask=0x14280ca(GFP_HIGHUSER_MOVABLE|__GFP_ZERO), nodemask=(null), order=0, oom_score_adj=0 allocate cpuset=/ mems_allowed=0 CPU: 1 PID: 492 Comm: allocate Not tainted 4.12.0-rc1-mm1+ #181 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 Call Trace: oom_kill_process+0x219/0x3e0 out_of_memory+0x11d/0x480 __alloc_pages_slowpath+0xc84/0xd40 __alloc_pages_nodemask+0x245/0x260 alloc_pages_vma+0xa2/0x270 __handle_mm_fault+0xca9/0x10c0 handle_mm_fault+0xf3/0x210 __do_page_fault+0x240/0x4e0 trace_do_page_fault+0x37/0xe0 do_async_page_fault+0x19/0x70 async_page_fault+0x28/0x30 ... Out of memory: Kill process 492 (allocate) score 899 or sacrifice child Killed process 492 (allocate) total-vm:2052368kB, anon-rss:1894576kB, file-rss:4kB, shmem-rss:0kB allocate: page allocation failure: order:0, mode:0x14280ca(GFP_HIGHUSER_MOVABLE|__GFP_ZERO), nodemask=(null) allocate cpuset=/ mems_allowed=0 CPU: 1 PID: 492 Comm: allocate Not tainted 4.12.0-rc1-mm1+ #181 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 Call Trace: __alloc_pages_slowpath+0xd32/0xd40 __alloc_pages_nodemask+0x245/0x260 alloc_pages_vma+0xa2/0x270 __handle_mm_fault+0xca9/0x10c0 handle_mm_fault+0xf3/0x210 __do_page_fault+0x240/0x4e0 trace_do_page_fault+0x37/0xe0 do_async_page_fault+0x19/0x70 async_page_fault+0x28/0x30 ... oom_reaper: reaped process 492 (allocate), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB ... allocate invoked oom-killer: gfp_mask=0x0(), nodemask=(null), order=0, oom_score_adj=0 allocate cpuset=/ mems_allowed=0 CPU: 1 PID: 492 Comm: allocate Not tainted 4.12.0-rc1-mm1+ #181 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 Call Trace: oom_kill_process+0x219/0x3e0 out_of_memory+0x11d/0x480 pagefault_out_of_memory+0x68/0x80 mm_fault_error+0x8f/0x190 ? handle_mm_fault+0xf3/0x210 __do_page_fault+0x4b2/0x4e0 trace_do_page_fault+0x37/0xe0 do_async_page_fault+0x19/0x70 async_page_fault+0x28/0x30 ... Out of memory: Kill process 233 (firewalld) score 10 or sacrifice child Killed process 233 (firewalld) total-vm:246076kB, anon-rss:20956kB, file-rss:0kB, shmem-rss:0kB There is a race window that the OOM reaper completes reclaiming the first victim's memory while nothing but mutex_trylock() prevents the first victim from calling out_of_memory() from pagefault_out_of_memory() after memory allocation for page fault path failed due to being selected as an OOM victim. This is a side effect of commit 9a67f6488eca926f ("mm: consolidate GFP_NOFAIL checks in the allocator slowpath") because that commit silently changed the behavior from /* Avoid allocations with no watermarks from looping endlessly */ to /* * Give up allocations without trying memory reserves if selected * as an OOM victim */ in __alloc_pages_slowpath() by moving the location to check TIF_MEMDIE flag. I have noticed this change but I didn't post a patch because I thought it is an acceptable change other than noise by warn_alloc() because !__GFP_NOFAIL allocations are allowed to fail. But we overlooked that failing memory allocation from page fault path makes difference due to the race window explained above. While it might be possible to add a check to pagefault_out_of_memory() that prevents the first victim from calling out_of_memory() or remove out_of_memory() from pagefault_out_of_memory(), changing pagefault_out_of_memory() does not suppress noise by warn_alloc() when allocating thread was selected as an OOM victim. There is little point with printing similar backtraces and memory information from both out_of_memory() and warn_alloc(). Instead, if we guarantee that current thread can try allocations with no watermarks once when current thread looping inside __alloc_pages_slowpath() was selected as an OOM victim, we can follow "who can use memory reserves" rules and suppress noise by warn_alloc() and prevent memory allocations from page fault path from calling pagefault_out_of_memory(). If we take the comment literally, this patch would do - if (test_thread_flag(TIF_MEMDIE)) - goto nopage; + if (alloc_flags == ALLOC_NO_WATERMARKS || (gfp_mask & __GFP_NOMEMALLOC)) + goto nopage; because gfp_pfmemalloc_allowed() returns false if __GFP_NOMEMALLOC is given. But if I recall correctly (I couldn't find the message), the condition is meant to apply to only OOM victims despite the comment. Therefore, this patch preserves TIF_MEMDIE check. Fixes: 9a67f6488eca926f ("mm: consolidate GFP_NOFAIL checks in the allocator slowpath") Link: http://lkml.kernel.org/r/201705192112.IAF69238.OQOHSJLFOFFMtV@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Reported-by: Roman Gushchin Tested-by: Roman Gushchin Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Cc: [4.11] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f9e450c6b6e4..b7a6f583a373 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3870,7 +3870,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, goto got_pg; /* Avoid allocations with no watermarks from looping endlessly */ - if (test_thread_flag(TIF_MEMDIE)) + if (test_thread_flag(TIF_MEMDIE) && + (alloc_flags == ALLOC_NO_WATERMARKS || + (gfp_mask & __GFP_NOMEMALLOC))) goto nopage; /* Retry as long as the OOM killer is making progress */ From d0f0931de936a0a468d7e59284d39581c16d3a73 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 2 Jun 2017 14:46:34 -0700 Subject: [PATCH 192/206] mm: avoid spurious 'bad pmd' warning messages When the pmd_devmap() checks were added by 5c7fb56e5e3f ("mm, dax: dax-pmd vs thp-pmd vs hugetlbfs-pmd") to add better support for DAX huge pages, they were all added to the end of if() statements after existing pmd_trans_huge() checks. So, things like: - if (pmd_trans_huge(*pmd)) + if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) When further checks were added after pmd_trans_unstable() checks by commit 7267ec008b5c ("mm: postpone page table allocation until we have page to map") they were also added at the end of the conditional: + if (pmd_trans_unstable(fe->pmd) || pmd_devmap(*fe->pmd)) This ordering is fine for pmd_trans_huge(), but doesn't work for pmd_trans_unstable(). This is because DAX huge pages trip the bad_pmd() check inside of pmd_none_or_trans_huge_or_clear_bad() (called by pmd_trans_unstable()), which prints out a warning and returns 1. So, we do end up doing the right thing, but only after spamming dmesg with suspicious looking messages: mm/pgtable-generic.c:39: bad pmd ffff8808daa49b88(84000001006000a5) Reorder these checks in a helper so that pmd_devmap() is checked first, avoiding the error messages, and add a comment explaining why the ordering is important. Fixes: commit 7267ec008b5c ("mm: postpone page table allocation until we have page to map") Link: http://lkml.kernel.org/r/20170522215749.23516-1-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reviewed-by: Jan Kara Cc: Pawel Lebioda Cc: "Darrick J. Wong" Cc: Alexander Viro Cc: Christoph Hellwig Cc: Dan Williams Cc: Dave Hansen Cc: Matthew Wilcox Cc: "Kirill A . Shutemov" Cc: Dave Jiang Cc: Xiong Zhou Cc: Eryu Guan Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 6ff5d729ded0..2e65df1831d9 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3029,6 +3029,17 @@ static int __do_fault(struct vm_fault *vmf) return ret; } +/* + * The ordering of these checks is important for pmds with _PAGE_DEVMAP set. + * If we check pmd_trans_unstable() first we will trip the bad_pmd() check + * inside of pmd_none_or_trans_huge_or_clear_bad(). This will end up correctly + * returning 1 but not before it spams dmesg with the pmd_clear_bad() output. + */ +static int pmd_devmap_trans_unstable(pmd_t *pmd) +{ + return pmd_devmap(*pmd) || pmd_trans_unstable(pmd); +} + static int pte_alloc_one_map(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; @@ -3052,18 +3063,27 @@ static int pte_alloc_one_map(struct vm_fault *vmf) map_pte: /* * If a huge pmd materialized under us just retry later. Use - * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd - * didn't become pmd_trans_huge under us and then back to pmd_none, as - * a result of MADV_DONTNEED running immediately after a huge pmd fault - * in a different thread of this mm, in turn leading to a misleading - * pmd_trans_huge() retval. All we have to ensure is that it is a - * regular pmd that we can walk with pte_offset_map() and we can do that - * through an atomic read in C, which is what pmd_trans_unstable() - * provides. + * pmd_trans_unstable() via pmd_devmap_trans_unstable() instead of + * pmd_trans_huge() to ensure the pmd didn't become pmd_trans_huge + * under us and then back to pmd_none, as a result of MADV_DONTNEED + * running immediately after a huge pmd fault in a different thread of + * this mm, in turn leading to a misleading pmd_trans_huge() retval. + * All we have to ensure is that it is a regular pmd that we can walk + * with pte_offset_map() and we can do that through an atomic read in + * C, which is what pmd_trans_unstable() provides. */ - if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd)) + if (pmd_devmap_trans_unstable(vmf->pmd)) return VM_FAULT_NOPAGE; + /* + * At this point we know that our vmf->pmd points to a page of ptes + * and it cannot become pmd_none(), pmd_devmap() or pmd_trans_huge() + * for the duration of the fault. If a racing MADV_DONTNEED runs and + * we zap the ptes pointed to by our vmf->pmd, the vmf->ptl will still + * be valid and we will re-check to make sure the vmf->pte isn't + * pte_none() under vmf->ptl protection when we return to + * alloc_set_pte(). + */ vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); return 0; @@ -3690,7 +3710,7 @@ static int handle_pte_fault(struct vm_fault *vmf) vmf->pte = NULL; } else { /* See comment in pte_alloc_one_map() */ - if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd)) + if (pmd_devmap_trans_unstable(vmf->pmd)) return 0; /* * A regular pmd is established and it can't morph into a huge From e2093926a098a8ccf0f1d10f6df8dad452cb28d3 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 2 Jun 2017 14:46:37 -0700 Subject: [PATCH 193/206] dax: fix race between colliding PMD & PTE entries We currently have two related PMD vs PTE races in the DAX code. These can both be easily triggered by having two threads reading and writing simultaneously to the same private mapping, with the key being that private mapping reads can be handled with PMDs but private mapping writes are always handled with PTEs so that we can COW. Here is the first race: CPU 0 CPU 1 (private mapping write) __handle_mm_fault() create_huge_pmd() - FALLBACK handle_pte_fault() passes check for pmd_devmap() (private mapping read) __handle_mm_fault() create_huge_pmd() dax_iomap_pmd_fault() inserts PMD dax_iomap_pte_fault() does a PTE fault, but we already have a DAX PMD installed in our page tables at this spot. Here's the second race: CPU 0 CPU 1 (private mapping read) __handle_mm_fault() passes check for pmd_none() create_huge_pmd() dax_iomap_pmd_fault() inserts PMD (private mapping write) __handle_mm_fault() create_huge_pmd() - FALLBACK (private mapping read) __handle_mm_fault() passes check for pmd_none() create_huge_pmd() handle_pte_fault() dax_iomap_pte_fault() inserts PTE dax_iomap_pmd_fault() inserts PMD, but we already have a PTE at this spot. The core of the issue is that while there is isolation between faults to the same range in the DAX fault handlers via our DAX entry locking, there is no isolation between faults in the code in mm/memory.c. This means for instance that this code in __handle_mm_fault() can run: if (pmd_none(*vmf.pmd) && transparent_hugepage_enabled(vma)) { ret = create_huge_pmd(&vmf); But by the time we actually get to run the fault handler called by create_huge_pmd(), the PMD is no longer pmd_none() because a racing PTE fault has installed a normal PMD here as a parent. This is the cause of the 2nd race. The first race is similar - there is the following check in handle_pte_fault(): } else { /* See comment in pte_alloc_one_map() */ if (pmd_devmap(*vmf->pmd) || pmd_trans_unstable(vmf->pmd)) return 0; So if a pmd_devmap() PMD (a DAX PMD) has been installed at vmf->pmd, we will bail and retry the fault. This is correct, but there is nothing preventing the PMD from being installed after this check but before we actually get to the DAX PTE fault handlers. In my testing these races result in the following types of errors: BUG: Bad rss-counter state mm:ffff8800a817d280 idx:1 val:1 BUG: non-zero nr_ptes on freeing mm: 15 Fix this issue by having the DAX fault handlers verify that it is safe to continue their fault after they have taken an entry lock to block other racing faults. [ross.zwisler@linux.intel.com: improve fix for colliding PMD & PTE entries] Link: http://lkml.kernel.org/r/20170526195932.32178-1-ross.zwisler@linux.intel.com Link: http://lkml.kernel.org/r/20170522215749.23516-2-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reported-by: Pawel Lebioda Reviewed-by: Jan Kara Cc: "Darrick J. Wong" Cc: Alexander Viro Cc: Christoph Hellwig Cc: Dan Williams Cc: Dave Hansen Cc: Matthew Wilcox Cc: "Kirill A . Shutemov" Cc: Pawel Lebioda Cc: Dave Jiang Cc: Xiong Zhou Cc: Eryu Guan Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/fs/dax.c b/fs/dax.c index c22eaf162f95..2a6889b3585f 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1154,6 +1154,17 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, goto out; } + /* + * It is possible, particularly with mixed reads & writes to private + * mappings, that we have raced with a PMD fault that overlaps with + * the PTE we need to set up. If so just return and the fault will be + * retried. + */ + if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) { + vmf_ret = VM_FAULT_NOPAGE; + goto unlock_entry; + } + /* * Note that we don't bother to use iomap_apply here: DAX required * the file system block size to be equal the page size, which means @@ -1397,6 +1408,18 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, if (IS_ERR(entry)) goto fallback; + /* + * It is possible, particularly with mixed reads & writes to private + * mappings, that we have raced with a PTE fault that overlaps with + * the PMD we need to set up. If so just return and the fault will be + * retried. + */ + if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd) && + !pmd_devmap(*vmf->pmd)) { + result = 0; + goto unlock_entry; + } + /* * Note that we don't use iomap_apply here. We aren't doing I/O, only * setting up a mapping, so really we're using iomap_begin() as a way From 30809f559a0d348c2dfd7ab05e9a451e2384962e Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Fri, 2 Jun 2017 14:46:40 -0700 Subject: [PATCH 194/206] mm/migrate: fix refcount handling when !hugepage_migration_supported() On failing to migrate a page, soft_offline_huge_page() performs the necessary update to the hugepage ref-count. But when !hugepage_migration_supported() , unmap_and_move_hugepage() also decrements the page ref-count for the hugepage. The combined behaviour leaves the ref-count in an inconsistent state. This leads to soft lockups when running the overcommitted hugepage test from mce-tests suite. Soft offlining pfn 0x83ed600 at process virtual address 0x400000000000 soft offline: 0x83ed600: migration failed 1, type 1fffc00000008008 (uptodate|head) INFO: rcu_preempt detected stalls on CPUs/tasks: Tasks blocked on level-0 rcu_node (CPUs 0-7): P2715 (detected by 7, t=5254 jiffies, g=963, c=962, q=321) thugetlb_overco R running task 0 2715 2685 0x00000008 Call trace: dump_backtrace+0x0/0x268 show_stack+0x24/0x30 sched_show_task+0x134/0x180 rcu_print_detail_task_stall_rnp+0x54/0x7c rcu_check_callbacks+0xa74/0xb08 update_process_times+0x34/0x60 tick_sched_handle.isra.7+0x38/0x70 tick_sched_timer+0x4c/0x98 __hrtimer_run_queues+0xc0/0x300 hrtimer_interrupt+0xac/0x228 arch_timer_handler_phys+0x3c/0x50 handle_percpu_devid_irq+0x8c/0x290 generic_handle_irq+0x34/0x50 __handle_domain_irq+0x68/0xc0 gic_handle_irq+0x5c/0xb0 Address this by changing the putback_active_hugepage() in soft_offline_huge_page() to putback_movable_pages(). This only triggers on systems that enable memory failure handling (ARCH_SUPPORTS_MEMORY_FAILURE) but not hugepage migration (!ARCH_ENABLE_HUGEPAGE_MIGRATION). I imagine this wasn't triggered as there aren't many systems running this configuration. [akpm@linux-foundation.org: remove dead comment, per Naoya] Link: http://lkml.kernel.org/r/20170525135146.32011-1-punit.agrawal@arm.com Reported-by: Manoj Iyer Tested-by: Manoj Iyer Suggested-by: Naoya Horiguchi Signed-off-by: Punit Agrawal Cc: Joonsoo Kim Cc: Wanpeng Li Cc: Christoph Lameter Cc: Mel Gorman Cc: [3.14+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 2527dfeddb00..342fac9ba89b 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1595,12 +1595,8 @@ static int soft_offline_huge_page(struct page *page, int flags) if (ret) { pr_info("soft offline: %#lx: migration failed %d, type %lx (%pGp)\n", pfn, ret, page->flags, &page->flags); - /* - * We know that soft_offline_huge_page() tries to migrate - * only one hugepage pointed to by hpage, so we need not - * run through the pagelist here. - */ - putback_active_hugepage(hpage); + if (!list_empty(&pagelist)) + putback_movable_pages(&pagelist); if (ret > 0) ret = -EIO; } else { From 70feee0e1ef331b22cc51f383d532a0d043fbdcc Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Fri, 2 Jun 2017 14:46:43 -0700 Subject: [PATCH 195/206] mlock: fix mlock count can not decrease in race condition Kefeng reported that when running the follow test, the mlock count in meminfo will increase permanently: [1] testcase linux:~ # cat test_mlockal grep Mlocked /proc/meminfo for j in `seq 0 10` do for i in `seq 4 15` do ./p_mlockall >> log & done sleep 0.2 done # wait some time to let mlock counter decrease and 5s may not enough sleep 5 grep Mlocked /proc/meminfo linux:~ # cat p_mlockall.c #include #include #include #define SPACE_LEN 4096 int main(int argc, char ** argv) { int ret; void *adr = malloc(SPACE_LEN); if (!adr) return -1; ret = mlockall(MCL_CURRENT | MCL_FUTURE); printf("mlcokall ret = %d\n", ret); ret = munlockall(); printf("munlcokall ret = %d\n", ret); free(adr); return 0; } In __munlock_pagevec() we should decrement NR_MLOCK for each page where we clear the PageMlocked flag. Commit 1ebb7cc6a583 ("mm: munlock: batch NR_MLOCK zone state updates") has introduced a bug where we don't decrement NR_MLOCK for pages where we clear the flag, but fail to isolate them from the lru list (e.g. when the pages are on some other cpu's percpu pagevec). Since PageMlocked stays cleared, the NR_MLOCK accounting gets permanently disrupted by this. Fix it by counting the number of page whose PageMlock flag is cleared. Fixes: 1ebb7cc6a583 (" mm: munlock: batch NR_MLOCK zone state updates") Link: http://lkml.kernel.org/r/1495678405-54569-1-git-send-email-xieyisheng1@huawei.com Signed-off-by: Yisheng Xie Reported-by: Kefeng Wang Tested-by: Kefeng Wang Cc: Vlastimil Babka Cc: Joern Engel Cc: Mel Gorman Cc: Michel Lespinasse Cc: Hugh Dickins Cc: Rik van Riel Cc: Johannes Weiner Cc: Michal Hocko Cc: Xishi Qiu Cc: zhongjiang Cc: Hanjun Guo Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mlock.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/mlock.c b/mm/mlock.c index c483c5c20b4b..b562b5523a65 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -284,7 +284,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) { int i; int nr = pagevec_count(pvec); - int delta_munlocked; + int delta_munlocked = -nr; struct pagevec pvec_putback; int pgrescued = 0; @@ -304,6 +304,8 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) continue; else __munlock_isolation_failed(page); + } else { + delta_munlocked++; } /* @@ -315,7 +317,6 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) pagevec_add(&pvec_putback, pvec->pages[i]); pvec->pages[i] = NULL; } - delta_munlocked = -nr + pagevec_count(&pvec_putback); __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); spin_unlock_irq(zone_lru_lock(zone)); From 9a291a7c9428155e8e623e4a3989f8be47134df5 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 2 Jun 2017 14:46:46 -0700 Subject: [PATCH 196/206] mm/hugetlb: report -EHWPOISON not -EFAULT when FOLL_HWPOISON is specified KVM uses get_user_pages() to resolve its stage2 faults. KVM sets the FOLL_HWPOISON flag causing faultin_page() to return -EHWPOISON when it finds a VM_FAULT_HWPOISON. KVM handles these hwpoison pages as a special case. (check_user_page_hwpoison()) When huge pages are involved, this doesn't work so well. get_user_pages() calls follow_hugetlb_page(), which stops early if it receives VM_FAULT_HWPOISON from hugetlb_fault(), eventually returning -EFAULT to the caller. The step to map this to -EHWPOISON based on the FOLL_ flags is missing. The hwpoison special case is skipped, and -EFAULT is returned to user-space, causing Qemu or kvmtool to exit. Instead, move this VM_FAULT_ to errno mapping code into a header file and use it from faultin_page() and follow_hugetlb_page(). With this, KVM works as expected. This isn't a problem for arm64 today as we haven't enabled MEMORY_FAILURE, but I can't see any reason this doesn't happen on x86 too, so I think this should be a fix. This doesn't apply earlier than stable's v4.11.1 due to all sorts of cleanup. [james.morse@arm.com: add vm_fault_to_errno() call to faultin_page()] suggested. Link: http://lkml.kernel.org/r/20170525171035.16359-1-james.morse@arm.com [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/20170524160900.28786-1-james.morse@arm.com Signed-off-by: James Morse Acked-by: Punit Agrawal Acked-by: Naoya Horiguchi Cc: "Kirill A . Shutemov" Cc: [4.11.1+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 11 +++++++++++ mm/gup.c | 20 ++++++++------------ mm/hugetlb.c | 5 +++++ 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 7cb17c6b97de..b892e95d4929 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2327,6 +2327,17 @@ static inline struct page *follow_page(struct vm_area_struct *vma, #define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ #define FOLL_COW 0x4000 /* internal GUP flag */ +static inline int vm_fault_to_errno(int vm_fault, int foll_flags) +{ + if (vm_fault & VM_FAULT_OOM) + return -ENOMEM; + if (vm_fault & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) + return (foll_flags & FOLL_HWPOISON) ? -EHWPOISON : -EFAULT; + if (vm_fault & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) + return -EFAULT; + return 0; +} + typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, diff --git a/mm/gup.c b/mm/gup.c index d9e6fddcc51f..b3c7214d710d 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -407,12 +407,10 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, ret = handle_mm_fault(vma, address, fault_flags); if (ret & VM_FAULT_ERROR) { - if (ret & VM_FAULT_OOM) - return -ENOMEM; - if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) - return *flags & FOLL_HWPOISON ? -EHWPOISON : -EFAULT; - if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) - return -EFAULT; + int err = vm_fault_to_errno(ret, *flags); + + if (err) + return err; BUG(); } @@ -723,12 +721,10 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, ret = handle_mm_fault(vma, address, fault_flags); major |= ret & VM_FAULT_MAJOR; if (ret & VM_FAULT_ERROR) { - if (ret & VM_FAULT_OOM) - return -ENOMEM; - if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) - return -EHWPOISON; - if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) - return -EFAULT; + int err = vm_fault_to_errno(ret, 0); + + if (err) + return err; BUG(); } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e5828875f7bb..3eedb187e549 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4170,6 +4170,11 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, } ret = hugetlb_fault(mm, vma, vaddr, fault_flags); if (ret & VM_FAULT_ERROR) { + int err = vm_fault_to_errno(ret, flags); + + if (err) + return err; + remainder = 0; break; } From 864b9a393dcb5aed09b8fd31b9bbda0fdda99374 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 2 Jun 2017 14:46:49 -0700 Subject: [PATCH 197/206] mm: consider memblock reservations for deferred memory initialization sizing We have seen an early OOM killer invocation on ppc64 systems with crashkernel=4096M: kthreadd invoked oom-killer: gfp_mask=0x16040c0(GFP_KERNEL|__GFP_COMP|__GFP_NOTRACK), nodemask=7, order=0, oom_score_adj=0 kthreadd cpuset=/ mems_allowed=7 CPU: 0 PID: 2 Comm: kthreadd Not tainted 4.4.68-1.gd7fe927-default #1 Call Trace: dump_stack+0xb0/0xf0 (unreliable) dump_header+0xb0/0x258 out_of_memory+0x5f0/0x640 __alloc_pages_nodemask+0xa8c/0xc80 kmem_getpages+0x84/0x1a0 fallback_alloc+0x2a4/0x320 kmem_cache_alloc_node+0xc0/0x2e0 copy_process.isra.25+0x260/0x1b30 _do_fork+0x94/0x470 kernel_thread+0x48/0x60 kthreadd+0x264/0x330 ret_from_kernel_thread+0x5c/0xa4 Mem-Info: active_anon:0 inactive_anon:0 isolated_anon:0 active_file:0 inactive_file:0 isolated_file:0 unevictable:0 dirty:0 writeback:0 unstable:0 slab_reclaimable:5 slab_unreclaimable:73 mapped:0 shmem:0 pagetables:0 bounce:0 free:0 free_pcp:0 free_cma:0 Node 7 DMA free:0kB min:0kB low:0kB high:0kB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:52428800kB managed:110016kB mlocked:0kB dirty:0kB writeback:0kB mapped:0kB shmem:0kB slab_reclaimable:320kB slab_unreclaimable:4672kB kernel_stack:1152kB pagetables:0kB unstable:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? yes lowmem_reserve[]: 0 0 0 0 Node 7 DMA: 0*64kB 0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB 0*8192kB 0*16384kB = 0kB 0 total pagecache pages 0 pages in swap cache Swap cache stats: add 0, delete 0, find 0/0 Free swap = 0kB Total swap = 0kB 819200 pages RAM 0 pages HighMem/MovableOnly 817481 pages reserved 0 pages cma reserved 0 pages hwpoisoned the reason is that the managed memory is too low (only 110MB) while the rest of the the 50GB is still waiting for the deferred intialization to be done. update_defer_init estimates the initial memoty to initialize to 2GB at least but it doesn't consider any memory allocated in that range. In this particular case we've had Reserving 4096MB of memory at 128MB for crashkernel (System RAM: 51200MB) so the low 2GB is mostly depleted. Fix this by considering memblock allocations in the initial static initialization estimation. Move the max_initialise to reset_deferred_meminit and implement a simple memblock_reserved_memory helper which iterates all reserved blocks and sums the size of all that start below the given address. The cumulative size is than added on top of the initial estimation. This is still not ideal because reset_deferred_meminit doesn't consider holes and so reservation might be above the initial estimation whihch we ignore but let's make the logic simpler until we really need to handle more complicated cases. Fixes: 3a80a7fa7989 ("mm: meminit: initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set") Link: http://lkml.kernel.org/r/20170531104010.GI27783@dhcp22.suse.cz Signed-off-by: Michal Hocko Acked-by: Mel Gorman Tested-by: Srikar Dronamraju Cc: [4.2+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 8 ++++++++ include/linux/mmzone.h | 1 + mm/memblock.c | 23 +++++++++++++++++++++++ mm/page_alloc.c | 33 ++++++++++++++++++++++----------- 4 files changed, 54 insertions(+), 11 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 4ce24a376262..8098695e5d8d 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -425,12 +425,20 @@ static inline void early_memtest(phys_addr_t start, phys_addr_t end) } #endif +extern unsigned long memblock_reserved_memory_within(phys_addr_t start_addr, + phys_addr_t end_addr); #else static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align) { return 0; } +static inline unsigned long memblock_reserved_memory_within(phys_addr_t start_addr, + phys_addr_t end_addr) +{ + return 0; +} + #endif /* CONFIG_HAVE_MEMBLOCK */ #endif /* __KERNEL__ */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ebaccd4e7d8c..ef6a13b7bd3e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -678,6 +678,7 @@ typedef struct pglist_data { * is the first PFN that needs to be initialised. */ unsigned long first_deferred_pfn; + unsigned long static_init_size; #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/mm/memblock.c b/mm/memblock.c index b049c9b2dba8..7b8a5db76a2f 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1739,6 +1739,29 @@ static void __init_memblock memblock_dump(struct memblock_type *type) } } +extern unsigned long __init_memblock +memblock_reserved_memory_within(phys_addr_t start_addr, phys_addr_t end_addr) +{ + struct memblock_region *rgn; + unsigned long size = 0; + int idx; + + for_each_memblock_type((&memblock.reserved), rgn) { + phys_addr_t start, end; + + if (rgn->base + rgn->size < start_addr) + continue; + if (rgn->base > end_addr) + continue; + + start = rgn->base; + end = start + rgn->size; + size += end - start; + } + + return size; +} + void __init_memblock __memblock_dump_all(void) { pr_info("MEMBLOCK configuration:\n"); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b7a6f583a373..2302f250d6b1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -292,6 +292,26 @@ int page_group_by_mobility_disabled __read_mostly; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT static inline void reset_deferred_meminit(pg_data_t *pgdat) { + unsigned long max_initialise; + unsigned long reserved_lowmem; + + /* + * Initialise at least 2G of a node but also take into account that + * two large system hashes that can take up 1GB for 0.25TB/node. + */ + max_initialise = max(2UL << (30 - PAGE_SHIFT), + (pgdat->node_spanned_pages >> 8)); + + /* + * Compensate the all the memblock reservations (e.g. crash kernel) + * from the initial estimation to make sure we will initialize enough + * memory to boot. + */ + reserved_lowmem = memblock_reserved_memory_within(pgdat->node_start_pfn, + pgdat->node_start_pfn + max_initialise); + max_initialise += reserved_lowmem; + + pgdat->static_init_size = min(max_initialise, pgdat->node_spanned_pages); pgdat->first_deferred_pfn = ULONG_MAX; } @@ -314,20 +334,11 @@ static inline bool update_defer_init(pg_data_t *pgdat, unsigned long pfn, unsigned long zone_end, unsigned long *nr_initialised) { - unsigned long max_initialise; - /* Always populate low zones for address-contrained allocations */ if (zone_end < pgdat_end_pfn(pgdat)) return true; - /* - * Initialise at least 2G of a node but also take into account that - * two large system hashes that can take up 1GB for 0.25TB/node. - */ - max_initialise = max(2UL << (30 - PAGE_SHIFT), - (pgdat->node_spanned_pages >> 8)); - (*nr_initialised)++; - if ((*nr_initialised > max_initialise) && + if ((*nr_initialised > pgdat->static_init_size) && (pfn & (PAGES_PER_SECTION - 1)) == 0) { pgdat->first_deferred_pfn = pfn; return false; @@ -6138,7 +6149,6 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, /* pg_data_t should be reset to zero when it's allocated */ WARN_ON(pgdat->nr_zones || pgdat->kswapd_classzone_idx); - reset_deferred_meminit(pgdat); pgdat->node_id = nid; pgdat->node_start_pfn = node_start_pfn; pgdat->per_cpu_nodestats = NULL; @@ -6160,6 +6170,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, (unsigned long)pgdat->node_mem_map); #endif + reset_deferred_meminit(pgdat); free_area_init_core(pgdat); } From d6c9708737c2107c38bd75f133d14d5801b8d6d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Fri, 2 Jun 2017 14:46:51 -0700 Subject: [PATCH 198/206] scripts/gdb: make lx-dmesg command work (reliably) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lx-dmesg needs access to the log_buf symbol from printk.c. Unfortunately, the symbol log_buf also exists in BPF's verifier.c and hence gdb can pick one or the other. If it happens to pick BPF's log_buf, lx-dmesg doesn't work: (gdb) lx-dmesg Python Exception Cannot access memory at address 0x0: Error occurred in Python command: Cannot access memory at address 0x0 (gdb) p log_buf $15 = 0x0 Luckily, GDB has a way to deal with this, see https://sourceware.org/gdb/onlinedocs/gdb/Symbols.html (gdb) info variables ^log_buf$ All variables matching regular expression "^log_buf$": File /kernel/bpf/verifier.c: static char *log_buf; File /kernel/printk/printk.c: static char *log_buf; (gdb) p 'verifier.c'::log_buf $1 = 0x0 (gdb) p 'printk.c'::log_buf $2 = 0x811a6aa0 <__log_buf> "" (gdb) p &log_buf $3 = (char **) 0x8120fe40 (gdb) p &'verifier.c'::log_buf $4 = (char **) 0x8120fe40 (gdb) p &'printk.c'::log_buf $5 = (char **) 0x8048b7d0 By being explicit about the location of the symbol, we can make lx-dmesg work again. While at it, do the same for the other symbols we need from printk.c Link: http://lkml.kernel.org/r/20170526112222.3414-1-git@andred.net Signed-off-by: André Draszik Tested-by: Kieran Bingham Acked-by: Jan Kiszka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/gdb/linux/dmesg.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/gdb/linux/dmesg.py b/scripts/gdb/linux/dmesg.py index f9b92ece7834..5afd1098e33a 100644 --- a/scripts/gdb/linux/dmesg.py +++ b/scripts/gdb/linux/dmesg.py @@ -23,10 +23,11 @@ class LxDmesg(gdb.Command): super(LxDmesg, self).__init__("lx-dmesg", gdb.COMMAND_DATA) def invoke(self, arg, from_tty): - log_buf_addr = int(str(gdb.parse_and_eval("log_buf")).split()[0], 16) - log_first_idx = int(gdb.parse_and_eval("log_first_idx")) - log_next_idx = int(gdb.parse_and_eval("log_next_idx")) - log_buf_len = int(gdb.parse_and_eval("log_buf_len")) + log_buf_addr = int(str(gdb.parse_and_eval( + "'printk.c'::log_buf")).split()[0], 16) + log_first_idx = int(gdb.parse_and_eval("'printk.c'::log_first_idx")) + log_next_idx = int(gdb.parse_and_eval("'printk.c'::log_next_idx")) + log_buf_len = int(gdb.parse_and_eval("'printk.c'::log_buf_len")) inf = gdb.inferiors()[0] start = log_buf_addr + log_first_idx From 8d4b31376974855cd4fb176e5d63ba8e0407928e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 2 Jun 2017 17:18:47 -0700 Subject: [PATCH 199/206] Input: axp20x-pek - only check for "INTCFD9" ACPI device on Cherry Trail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 9b13a4ca8d2c ("Input: axp20x-pek - do not register input device on some systems") added a check for the INTCFD9 ACPI device which also handles the powerbutton as on some systems the powerbutton is connected to both the PMIC, handled by axp20x-pek, and to a gpio on the SoC, handled by soc_button_array which attaches itself to the INTCFD9 ACPI device. Testing + comparing DSDTs has shown that this only happens on Cherry Trail devices with an AXP288 PMIC, the AXP288 PMIC is also used on Bay Trail devices but there the power button is only connected to the PMIC and not handled by soc_button_array. This means that the INTCFD9 check has caused a regression on Bay Trail devices, causing power-button presses to no longer be seen. This commit fixes this by limiting the check to devices where the ACPI node for the AXP288 contains a _HRV (hardware revision) attribute with a value of 3 which indicates we are dealing with a Cherry Trail platform. Fixes: 9b13a4ca8d2c ("Input: axp20x-pek - do not register input ...") Reported-by: Сергей Трусов Signed-off-by: Hans de Goede Signed-off-by: Dmitry Torokhov --- drivers/input/misc/axp20x-pek.c | 43 +++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/drivers/input/misc/axp20x-pek.c b/drivers/input/misc/axp20x-pek.c index f11807db6979..c0a763a70a00 100644 --- a/drivers/input/misc/axp20x-pek.c +++ b/drivers/input/misc/axp20x-pek.c @@ -256,6 +256,41 @@ static int axp20x_pek_probe_input_device(struct axp20x_pek *axp20x_pek, return 0; } +#ifdef CONFIG_ACPI +static bool axp20x_pek_should_register_input(struct axp20x_pek *axp20x_pek, + struct platform_device *pdev) +{ + unsigned long long hrv = 0; + acpi_status status; + + if (IS_ENABLED(CONFIG_INPUT_SOC_BUTTON_ARRAY) && + axp20x_pek->axp20x->variant == AXP288_ID) { + status = acpi_evaluate_integer(ACPI_HANDLE(pdev->dev.parent), + "_HRV", NULL, &hrv); + if (ACPI_FAILURE(status)) + dev_err(&pdev->dev, "Failed to get PMIC hardware revision\n"); + + /* + * On Cherry Trail platforms (hrv == 3), do not register the + * input device if there is an "INTCFD9" gpio + * button ACPI device, as that handles the power button too, + * and otherwise we end up reporting all presses twice. + */ + if (hrv == 3 && acpi_dev_found("INTCFD9")) + return false; + + } + + return true; +} +#else +static bool axp20x_pek_should_register_input(struct axp20x_pek *axp20x_pek, + struct platform_device *pdev) +{ + return true; +} +#endif + static int axp20x_pek_probe(struct platform_device *pdev) { struct axp20x_pek *axp20x_pek; @@ -268,13 +303,7 @@ static int axp20x_pek_probe(struct platform_device *pdev) axp20x_pek->axp20x = dev_get_drvdata(pdev->dev.parent); - /* - * Do not register the input device if there is an "INTCFD9" - * gpio button ACPI device, that handles the power button too, - * and otherwise we end up reporting all presses twice. - */ - if (!acpi_dev_found("INTCFD9") || - !IS_ENABLED(CONFIG_INPUT_SOC_BUTTON_ARRAY)) { + if (axp20x_pek_should_register_input(axp20x_pek, pdev)) { error = axp20x_pek_probe_input_device(axp20x_pek, pdev); if (error) return error; From 0fd5f221093870d93edb696f6903b058c4d75411 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 2 Jun 2017 17:50:22 -0700 Subject: [PATCH 200/206] Input: axp20x-pek - switch to acpi_dev_present and check for ACPI0011 too acpi_dev_found checks that there is a matching ACPI node, but it may be disabled (_STA method returns 0) in which case the soc_button_array driver will not bind to it and axp20x-pek should handle the power-button. This commit switches from acpi_dev_found to acpi_dev_present to avoid not registering an input-dev for the powerbutton when there is a disabled PNP0C40 device. The ACPI-6.0 standard defines a standard gpio button device using the ACPI0011 HID replacing the custom PNP0C40 gpio device, many newer devices define both PNP0C40 and ACPI0011 devices enabling one or the other depending on whether the BIOS thinks it is going to boot Android or Windows. This commit adds a check for the ACPI0011 device, so that if either device is present *and* enabled we don't register an input-dev for the powerbutton. Signed-off-by: Hans de Goede Signed-off-by: Dmitry Torokhov --- drivers/input/misc/axp20x-pek.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/input/misc/axp20x-pek.c b/drivers/input/misc/axp20x-pek.c index c0a763a70a00..400869e61a06 100644 --- a/drivers/input/misc/axp20x-pek.c +++ b/drivers/input/misc/axp20x-pek.c @@ -272,11 +272,12 @@ static bool axp20x_pek_should_register_input(struct axp20x_pek *axp20x_pek, /* * On Cherry Trail platforms (hrv == 3), do not register the - * input device if there is an "INTCFD9" gpio + * input device if there is an "INTCFD9" or "ACPI0011" gpio * button ACPI device, as that handles the power button too, * and otherwise we end up reporting all presses twice. */ - if (hrv == 3 && acpi_dev_found("INTCFD9")) + if (hrv == 3 && (acpi_dev_present("INTCFD9", NULL, -1) || + acpi_dev_present("ACPI0011", NULL, -1))) return false; } From 4d58e7329fdd87d37f04b319d6a5165aaff5d75c Mon Sep 17 00:00:00 2001 From: Stefan Schaeckeler Date: Fri, 2 Jun 2017 12:42:08 -0700 Subject: [PATCH 201/206] hwmon: (aspeed-pwm-tacho) Call of_node_put() on a node not claimed Call of_node_put() on a node claimed with of_node_get() or by any other means such as for_each_child_of_node(). Signed-off-by: Stefan Schaeckeler Fixes: 2d7a548a3eff ("drivers: hwmon: Support for ASPEED PWM/Fan tach") Signed-off-by: Guenter Roeck --- drivers/hwmon/aspeed-pwm-tacho.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/hwmon/aspeed-pwm-tacho.c b/drivers/hwmon/aspeed-pwm-tacho.c index 12b716b70ead..cc15bedb3415 100644 --- a/drivers/hwmon/aspeed-pwm-tacho.c +++ b/drivers/hwmon/aspeed-pwm-tacho.c @@ -808,7 +808,6 @@ static int aspeed_pwm_tacho_probe(struct platform_device *pdev) if (ret) return ret; } - of_node_put(np); priv->groups[0] = &pwm_dev_group; priv->groups[1] = &fan_dev_group; From 5f348fa35a28b19cf7ee1eaa420757341a724c86 Mon Sep 17 00:00:00 2001 From: Stefan Schaeckeler Date: Fri, 2 Jun 2017 12:43:28 -0700 Subject: [PATCH 202/206] hwmon: (aspeed-pwm-tacho) make fan/pwm names start with index 1 Make fan and pwm names in sysfs start with index 1 in accordance to Documentation/hwmon/sysfs-interface conventions. Current implementation starts with index 0, making tools such as sensors(1) skip the first fan. Signed-off-by: Stefan Schaeckeler Fixes: 2d7a548a3eff ("drivers: hwmon: Support for ASPEED PWM/Fan tach") Signed-off-by: Guenter Roeck --- drivers/hwmon/aspeed-pwm-tacho.c | 52 ++++++++++++++++---------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/hwmon/aspeed-pwm-tacho.c b/drivers/hwmon/aspeed-pwm-tacho.c index cc15bedb3415..9de13d626c68 100644 --- a/drivers/hwmon/aspeed-pwm-tacho.c +++ b/drivers/hwmon/aspeed-pwm-tacho.c @@ -597,24 +597,23 @@ static umode_t fan_dev_is_visible(struct kobject *kobj, return a->mode; } -static SENSOR_DEVICE_ATTR(pwm0, 0644, - show_pwm, set_pwm, 0); static SENSOR_DEVICE_ATTR(pwm1, 0644, - show_pwm, set_pwm, 1); + show_pwm, set_pwm, 0); static SENSOR_DEVICE_ATTR(pwm2, 0644, - show_pwm, set_pwm, 2); + show_pwm, set_pwm, 1); static SENSOR_DEVICE_ATTR(pwm3, 0644, - show_pwm, set_pwm, 3); + show_pwm, set_pwm, 2); static SENSOR_DEVICE_ATTR(pwm4, 0644, - show_pwm, set_pwm, 4); + show_pwm, set_pwm, 3); static SENSOR_DEVICE_ATTR(pwm5, 0644, - show_pwm, set_pwm, 5); + show_pwm, set_pwm, 4); static SENSOR_DEVICE_ATTR(pwm6, 0644, - show_pwm, set_pwm, 6); + show_pwm, set_pwm, 5); static SENSOR_DEVICE_ATTR(pwm7, 0644, + show_pwm, set_pwm, 6); +static SENSOR_DEVICE_ATTR(pwm8, 0644, show_pwm, set_pwm, 7); static struct attribute *pwm_dev_attrs[] = { - &sensor_dev_attr_pwm0.dev_attr.attr, &sensor_dev_attr_pwm1.dev_attr.attr, &sensor_dev_attr_pwm2.dev_attr.attr, &sensor_dev_attr_pwm3.dev_attr.attr, @@ -622,6 +621,7 @@ static struct attribute *pwm_dev_attrs[] = { &sensor_dev_attr_pwm5.dev_attr.attr, &sensor_dev_attr_pwm6.dev_attr.attr, &sensor_dev_attr_pwm7.dev_attr.attr, + &sensor_dev_attr_pwm8.dev_attr.attr, NULL, }; @@ -630,40 +630,39 @@ static const struct attribute_group pwm_dev_group = { .is_visible = pwm_is_visible, }; -static SENSOR_DEVICE_ATTR(fan0_input, 0444, - show_rpm, NULL, 0); static SENSOR_DEVICE_ATTR(fan1_input, 0444, - show_rpm, NULL, 1); + show_rpm, NULL, 0); static SENSOR_DEVICE_ATTR(fan2_input, 0444, - show_rpm, NULL, 2); + show_rpm, NULL, 1); static SENSOR_DEVICE_ATTR(fan3_input, 0444, - show_rpm, NULL, 3); + show_rpm, NULL, 2); static SENSOR_DEVICE_ATTR(fan4_input, 0444, - show_rpm, NULL, 4); + show_rpm, NULL, 3); static SENSOR_DEVICE_ATTR(fan5_input, 0444, - show_rpm, NULL, 5); + show_rpm, NULL, 4); static SENSOR_DEVICE_ATTR(fan6_input, 0444, - show_rpm, NULL, 6); + show_rpm, NULL, 5); static SENSOR_DEVICE_ATTR(fan7_input, 0444, - show_rpm, NULL, 7); + show_rpm, NULL, 6); static SENSOR_DEVICE_ATTR(fan8_input, 0444, - show_rpm, NULL, 8); + show_rpm, NULL, 7); static SENSOR_DEVICE_ATTR(fan9_input, 0444, - show_rpm, NULL, 9); + show_rpm, NULL, 8); static SENSOR_DEVICE_ATTR(fan10_input, 0444, - show_rpm, NULL, 10); + show_rpm, NULL, 9); static SENSOR_DEVICE_ATTR(fan11_input, 0444, - show_rpm, NULL, 11); + show_rpm, NULL, 10); static SENSOR_DEVICE_ATTR(fan12_input, 0444, - show_rpm, NULL, 12); + show_rpm, NULL, 11); static SENSOR_DEVICE_ATTR(fan13_input, 0444, - show_rpm, NULL, 13); + show_rpm, NULL, 12); static SENSOR_DEVICE_ATTR(fan14_input, 0444, - show_rpm, NULL, 14); + show_rpm, NULL, 13); static SENSOR_DEVICE_ATTR(fan15_input, 0444, + show_rpm, NULL, 14); +static SENSOR_DEVICE_ATTR(fan16_input, 0444, show_rpm, NULL, 15); static struct attribute *fan_dev_attrs[] = { - &sensor_dev_attr_fan0_input.dev_attr.attr, &sensor_dev_attr_fan1_input.dev_attr.attr, &sensor_dev_attr_fan2_input.dev_attr.attr, &sensor_dev_attr_fan3_input.dev_attr.attr, @@ -679,6 +678,7 @@ static struct attribute *fan_dev_attrs[] = { &sensor_dev_attr_fan13_input.dev_attr.attr, &sensor_dev_attr_fan14_input.dev_attr.attr, &sensor_dev_attr_fan15_input.dev_attr.attr, + &sensor_dev_attr_fan16_input.dev_attr.attr, NULL }; From 4f253e1eb628f5adf7ca4f43aab4bbb1bfffa081 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 16 May 2017 12:18:11 +0200 Subject: [PATCH 203/206] nfs: Mark unnecessarily extern functions as static nfs_initialise_sb() and nfs_clone_super() are declared as extern even though they are used only in fs/nfs/super.c. Mark them as static. Also remove explicit 'inline' directive from nfs_initialise_sb() and leave it upto compiler to decide whether inlining is worth it. Signed-off-by: Jan Kara Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 2 -- fs/nfs/super.c | 5 +++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index e9b4c3320e37..3e24392f2caa 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -398,7 +398,6 @@ extern struct file_system_type nfs4_referral_fs_type; bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t); struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *); -void nfs_initialise_sb(struct super_block *); int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); struct dentry *nfs_fs_mount_common(struct nfs_server *, int, const char *, @@ -458,7 +457,6 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); /* super.c */ -void nfs_clone_super(struct super_block *, struct nfs_mount_info *); void nfs_umount_begin(struct super_block *); int nfs_statfs(struct dentry *, struct kstatfs *); int nfs_show_options(struct seq_file *, struct dentry *); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2f3822a4a7d5..eceb4eabb064 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2301,7 +2301,7 @@ EXPORT_SYMBOL_GPL(nfs_remount); /* * Initialise the common bits of the superblock */ -inline void nfs_initialise_sb(struct super_block *sb) +static void nfs_initialise_sb(struct super_block *sb) { struct nfs_server *server = NFS_SB(sb); @@ -2348,7 +2348,8 @@ EXPORT_SYMBOL_GPL(nfs_fill_super); /* * Finish setting up a cloned NFS2/3/4 superblock */ -void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info) +static void nfs_clone_super(struct super_block *sb, + struct nfs_mount_info *mount_info) { const struct super_block *old_sb = mount_info->cloned->sb; struct nfs_server *server = NFS_SB(sb); From fc098af16b9ff6d470d779d8ddcfb2d91869045a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 4 Jun 2017 10:23:25 +0200 Subject: [PATCH 204/206] Revert "tty: fix port buffer locking" This reverts commit 925bb1ce47f429f69aad35876df7ecd8c53deb7e. It causes lots of warnings and problems so for now, let's just revert it. Reported-by: Reported-by: Russell King Reported-by: Sergey Senozhatsky Reported-by: Geert Uytterhoeven Reported-by: Jiri Slaby Reported-by: Andrey Konovalov Acked-by: Vegard Nossum Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_port.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c index 4fb3165384c4..6b137194069f 100644 --- a/drivers/tty/tty_port.c +++ b/drivers/tty/tty_port.c @@ -34,9 +34,7 @@ static int tty_port_default_receive_buf(struct tty_port *port, if (!disc) return 0; - mutex_lock(&tty->atomic_write_lock); ret = tty_ldisc_receive_buf(disc, p, (char *)f, count); - mutex_unlock(&tty->atomic_write_lock); tty_ldisc_deref(disc); From 239e250e4acbc0104d514307029c0839e834a51a Mon Sep 17 00:00:00 2001 From: Richard Narron Date: Sun, 4 Jun 2017 16:23:18 -0700 Subject: [PATCH 205/206] fs/ufs: Set UFS default maximum bytes per file This fixes a problem with reading files larger than 2GB from a UFS-2 file system: https://bugzilla.kernel.org/show_bug.cgi?id=195721 The incorrect UFS s_maxsize limit became a problem as of commit c2a9737f45e2 ("vfs,mm: fix a dead loop in truncate_inode_pages_range()") which started using s_maxbytes to avoid a page index overflow in do_generic_file_read(). That caused files to be truncated on UFS-2 file systems because the default maximum file size is 2GB (MAX_NON_LFS) and UFS didn't update it. Here I simply increase the default to a common value used by other file systems. Signed-off-by: Richard Narron Cc: Al Viro Cc: Will B Cc: Theodore Ts'o Cc: # v4.9 and backports of c2a9737f45e2 Signed-off-by: Linus Torvalds --- fs/ufs/super.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 131b2b77c818..29ecaf739449 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -812,9 +812,8 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) uspi->s_dirblksize = UFS_SECTOR_SIZE; super_block_offset=UFS_SBLOCK; - /* Keep 2Gig file limit. Some UFS variants need to override - this but as I don't know which I'll let those in the know loosen - the rules */ + sb->s_maxbytes = MAX_LFS_FILESIZE; + switch (sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) { case UFS_MOUNT_UFSTYPE_44BSD: UFSD("ufstype=44bsd\n"); From 3c2993b8c6143d8a5793746a54eba8f86f95240f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 4 Jun 2017 16:47:43 -0700 Subject: [PATCH 206/206] Linux 4.12-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 470bd4d9513a..853ae9179af9 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Fearless Coyote # *DOCUMENTATION*