From 463cdad83823830dd59572fb81ac243f6a44b589 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Tue, 2 Jul 2019 13:27:05 +0200 Subject: [PATCH 01/24] scsi: core: use scmd_printk() to print which command timed out With a possibly faulty disk the following messages may appear in the logs: kernel: sd 0:0:9:0: timing out command, waited 180s kernel: sd 0:0:9:0: timing out command, waited 20s kernel: sd 0:0:9:0: timing out command, waited 20s kernel: sd 0:0:9:0: timing out command, waited 60s kernel: sd 0:0:9:0: timing out command, waited 20s This is not very informative because it's not possible to identify the command that timed out. This patch replaces sdev_printk() with scmd_printk(). Signed-off-by: Maurizio Lombardi Reviewed-by: Ewan D. Milne Reviewed-by: Ming Lei Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 83d6dd8067d6..ae1c989ef08e 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1452,7 +1452,7 @@ static void scsi_softirq_done(struct request *rq) disposition = scsi_decide_disposition(cmd); if (disposition != SUCCESS && time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) { - sdev_printk(KERN_ERR, cmd->device, + scmd_printk(KERN_ERR, cmd, "timing out command, waited %lus\n", wait_for/HZ); disposition = SUCCESS; From 6764f519bcb007bf09f328133311d6cdc2bbc72f Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 2 Jul 2019 21:01:14 +0800 Subject: [PATCH 02/24] scsi: megaraid_sas: Make some symbols static Fix sparse warnings: drivers/scsi/megaraid/megaraid_sas_base.c:271:1: warning: symbol 'megasas_issue_dcmd' was not declared. Should it be static? drivers/scsi/megaraid/megaraid_sas_base.c:2227:6: warning: symbol 'megasas_do_ocr' was not declared. Should it be static? drivers/scsi/megaraid/megaraid_sas_base.c:3194:25: warning: symbol 'megaraid_host_attrs' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: YueHaibing Acked-by: Sumit Saxena Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index bd6da59c087f..bb1bea6ab99c 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -292,7 +292,7 @@ void megasas_set_dma_settings(struct megasas_instance *instance, } } -void +static void megasas_issue_dcmd(struct megasas_instance *instance, struct megasas_cmd *cmd) { instance->instancet->fire_cmd(instance, @@ -2249,7 +2249,7 @@ megasas_internal_reset_defer_cmds(struct megasas_instance *instance); static void process_fw_state_change_wq(struct work_struct *work); -void megasas_do_ocr(struct megasas_instance *instance) +static void megasas_do_ocr(struct megasas_instance *instance) { if ((instance->pdev->device == PCI_DEVICE_ID_LSI_SAS1064R) || (instance->pdev->device == PCI_DEVICE_ID_DELL_PERC5) || @@ -3315,7 +3315,7 @@ static DEVICE_ATTR_RO(fw_cmds_outstanding); static DEVICE_ATTR_RO(dump_system_regs); static DEVICE_ATTR_RO(raid_map_id); -struct device_attribute *megaraid_host_attrs[] = { +static struct device_attribute *megaraid_host_attrs[] = { &dev_attr_fw_crash_buffer_size, &dev_attr_fw_crash_buffer, &dev_attr_fw_crash_state, From 9b79ee9773a8c11772b2d0ed0cec831c7a68dc4f Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Mon, 8 Jul 2019 19:43:39 +0300 Subject: [PATCH 03/24] scsi: libsas: remove the exporting of sas_wait_eh The function sas_wait_eh is declared static and marked EXPORT_SYMBOL, which is at best an odd combination. Because the function is not used outside of the drivers/scsi/libsas/sas_scsi_host.c file it is defined in, this commit removes the EXPORT_SYMBOL() marking. Signed-off-by: Denis Efremov Reviewed-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_scsi_host.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 602bd1a17a63..8c04f1b97a85 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -414,7 +414,6 @@ static void sas_wait_eh(struct domain_device *dev) goto retry; } } -EXPORT_SYMBOL(sas_wait_eh); static int sas_queue_reset(struct domain_device *dev, int reset_type, u64 lun, int wait) From 196ba6629cf95e51403337235d09742fcdc3febd Mon Sep 17 00:00:00 2001 From: Deepak Ukey Date: Tue, 9 Jul 2019 15:30:48 +0530 Subject: [PATCH 04/24] scsi: pm80xx: Fixed kernel panic during error recovery for SATA drive Disabling the SATA drive interface cause kernel panic. When the drive Interface is disabled, device should be deregistered after aborting all pending I/Os. Also changed the port recovery timeout to 10000 ms for PM8006 controller. Signed-off-by: Deepak Ukey Signed-off-by: Viswas G Reviewed-by: Jack Wang Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm8001_sas.c | 6 +++++- drivers/scsi/pm8001/pm80xx_hwi.c | 2 +- drivers/scsi/pm8001/pm80xx_hwi.h | 2 ++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index dd38c356a1a4..9453705f643a 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -888,6 +888,8 @@ static void pm8001_dev_gone_notify(struct domain_device *dev) spin_unlock_irqrestore(&pm8001_ha->lock, flags); pm8001_exec_internal_task_abort(pm8001_ha, pm8001_dev , dev, 1, 0); + while (pm8001_dev->running_req) + msleep(20); spin_lock_irqsave(&pm8001_ha->lock, flags); } PM8001_CHIP_DISP->dereg_dev_req(pm8001_ha, device_id); @@ -1256,8 +1258,10 @@ int pm8001_abort_task(struct sas_task *task) PM8001_MSG_DBG(pm8001_ha, pm8001_printk("Waiting for Port reset\n")); wait_for_completion(&completion_reset); - if (phy->port_reset_status) + if (phy->port_reset_status) { + pm8001_dev_gone_notify(dev); goto out; + } /* * 4. SATA Abort ALL diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index 1128d86d241a..73261902d75d 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -604,7 +604,7 @@ static void update_main_config_table(struct pm8001_hba_info *pm8001_ha) pm8001_ha->main_cfg_tbl.pm80xx_tbl.port_recovery_timer &= 0x0000ffff; pm8001_ha->main_cfg_tbl.pm80xx_tbl.port_recovery_timer |= - 0x140000; + CHIP_8006_PORT_RECOVERY_TIMEOUT; } pm8001_mw32(address, MAIN_PORT_RECOVERY_TIMER, pm8001_ha->main_cfg_tbl.pm80xx_tbl.port_recovery_timer); diff --git a/drivers/scsi/pm8001/pm80xx_hwi.h b/drivers/scsi/pm8001/pm80xx_hwi.h index 84d7426441bf..dc9ab7689060 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.h +++ b/drivers/scsi/pm8001/pm80xx_hwi.h @@ -230,6 +230,8 @@ #define SAS_MAX_AIP 0x200000 #define IT_NEXUS_TIMEOUT 0x7D0 #define PORT_RECOVERY_TIMEOUT ((IT_NEXUS_TIMEOUT/100) + 30) +/* Port recovery timeout, 10000 ms for PM8006 controller */ +#define CHIP_8006_PORT_RECOVERY_TIMEOUT 0x640000 #ifdef __LITTLE_ENDIAN_BITFIELD struct sas_identify_frame_local { From 4bc022145c939dd3938771535a8074a884aec0f9 Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Mon, 17 Jun 2019 22:31:45 -0300 Subject: [PATCH 05/24] scsi: devinfo: BLIST_TRY_VPD_PAGES for SanDisk Cruzer Blade Currently, all USB devices skip VPD pages, even when the device supports them (SPC-3 and later), but some of them support VPD, like Cruzer Blade. Signed-off-by: Marcos Paulo de Souza Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_devinfo.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index a08ff3bd6310..df14597752ec 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -239,6 +239,8 @@ static struct { {"LSI", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"ENGENIO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"LENOVO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, + {"SanDisk", "Cruzer Blade", NULL, BLIST_TRY_VPD_PAGES | + BLIST_INQUIRY_36}, {"SMSC", "USB 2 HS-CF", NULL, BLIST_SPARSELUN | BLIST_INQUIRY_36}, {"SONY", "CD-ROM CDU-8001", NULL, BLIST_BORKEN}, {"SONY", "TSL", NULL, BLIST_FORCELUN}, /* DDS3 & DDS4 autoloaders */ From 057959c6e36e4e6e5303a39c4df79c4af9d40923 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 28 Jun 2019 14:37:47 +0200 Subject: [PATCH 06/24] scsi: lpfc: reduce stack size with CONFIG_GCC_PLUGIN_STRUCTLEAK_VERBOSE The lpfc_debug_dump_all_queues() function repeatedly calls into lpfc_debug_dump_qe() which has a temporary 128 byte buffer. This was fine before the introduction of CONFIG_GCC_PLUGIN_STRUCTLEAK_VERBOSE because each instance could occupy the same stack slot. However, now they each get their own copy, which leads to a huge increase in stack usage as seen from the compiler warning: drivers/scsi/lpfc/lpfc_debugfs.c: In function 'lpfc_debug_dump_all_queues': drivers/scsi/lpfc/lpfc_debugfs.c:6474:1: error: the frame size of 1712 bytes is larger than 100 bytes [-Werror=frame-larger-than=] Avoid this by not marking lpfc_debug_dump_qe() as inline so the compiler can choose to emit a static version of this function when it's needed or otherwise silently drop it. As an added benefit, not inlining multiple copies of this function means we save several kilobytes of .text section, reducing the file size from 47kb to 43. It is somewhat unusual to have a function that is static but not inline in a header file, but this does not cause problems here because it is only used by other inline functions. It would however seem reasonable to move all the lpfc_debug_dump_* functions into lpfc_debugfs.c and not mark them inline as a later cleanup. Fixes: 81a56f6dcd20 ("gcc-plugins: structleak: Generalize to all variable types") Signed-off-by: Arnd Bergmann Acked-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_debugfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h index 2322ddb085c0..34070874616d 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.h +++ b/drivers/scsi/lpfc/lpfc_debugfs.h @@ -330,7 +330,7 @@ enum { * This function dumps an entry indexed by @idx from a queue specified by the * queue descriptor @q. **/ -static inline void +static void lpfc_debug_dump_qe(struct lpfc_queue *q, uint32_t idx) { char line_buf[LPFC_LBUF_SZ]; From c8f96df5b8e633056b7ebf5d52a9d6fb1b156ce3 Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Fri, 28 Jun 2019 18:02:12 -0700 Subject: [PATCH 07/24] scsi: megaraid_sas: Fix calculation of target ID In megasas_get_target_prop(), driver is incorrectly calculating the target ID for devices with channel 1 and 3. Due to this, firmware will either fail the command (if there is no device with the target id sent from driver) or could return the properties for a target which was not intended. Devices could end up with the wrong queue depth due to this. Fix target id calculation for channel 1 and 3. Fixes: 96188a89cc6d ("scsi: megaraid_sas: NVME interface target prop added") Cc: stable@vger.kernel.org Signed-off-by: Shivasharan S Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index bb1bea6ab99c..3544d52dddf0 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -6558,7 +6558,8 @@ megasas_get_target_prop(struct megasas_instance *instance, int ret; struct megasas_cmd *cmd; struct megasas_dcmd_frame *dcmd; - u16 targetId = (sdev->channel % 2) + sdev->id; + u16 targetId = ((sdev->channel % 2) * MEGASAS_MAX_DEV_PER_CHANNEL) + + sdev->id; cmd = megasas_get_cmd(instance); From 1175b88452cad208894412b955ee698934968aed Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Fri, 28 Jun 2019 02:50:39 -0700 Subject: [PATCH 08/24] scsi: megaraid_sas: Enable msix_load_balance for Invader and later controllers Load balancing IO completions across all available MSI-X vectors should be enabled for Invader and later generation controllers only. This needs to be disabled for older controllers. Add an adapter type check before setting msix_load_balance flag. Fixes: 1d15d9098ad1 ("scsi: megaraid_sas: Load balance completions across all MSI-X") Signed-off-by: Shivasharan S Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 3544d52dddf0..1bd91c7fb10c 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -5945,7 +5945,8 @@ static int megasas_init_fw(struct megasas_instance *instance) instance->is_rdpq = (scratch_pad_1 & MR_RDPQ_MODE_OFFSET) ? 1 : 0; - if (!instance->msix_combined) { + if (instance->adapter_type >= INVADER_SERIES && + !instance->msix_combined) { instance->msix_load_balance = true; instance->smp_affinity_enable = false; } From d956a116c96cfa651b5ed5c783984a33d5222268 Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Fri, 28 Jun 2019 02:50:40 -0700 Subject: [PATCH 09/24] scsi: megaraid_sas: Add module parameter for FW Async event logging Add module parameter to control logging levels of async event notifications from firmware that get logged to system log. Also, allow changing the value from sysfs after driver load. Signed-off-by: Shivasharan S Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 1bd91c7fb10c..8b0dc0795f64 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -117,6 +117,10 @@ MODULE_PARM_DESC(perf_mode, "Performance mode (only for Aero adapters), options: "default mode is 'balanced'" ); +int event_log_level = MFI_EVT_CLASS_CRITICAL; +module_param(event_log_level, int, 0644); +MODULE_PARM_DESC(event_log_level, "Asynchronous event logging level- range is: -2(CLASS_DEBUG) to 4(CLASS_DEAD), Default: 2(CLASS_CRITICAL)"); + MODULE_LICENSE("GPL"); MODULE_VERSION(MEGASAS_VERSION); MODULE_AUTHOR("megaraidlinux.pdl@broadcom.com"); @@ -416,7 +420,13 @@ megasas_decode_evt(struct megasas_instance *instance) union megasas_evt_class_locale class_locale; class_locale.word = le32_to_cpu(evt_detail->cl.word); - if (class_locale.members.class >= MFI_EVT_CLASS_CRITICAL) + if ((event_log_level < MFI_EVT_CLASS_DEBUG) || + (event_log_level > MFI_EVT_CLASS_DEAD)) { + printk(KERN_WARNING "megaraid_sas: provided event log level is out of range, setting it to default 2(CLASS_CRITICAL), permissible range is: -2 to 4\n"); + event_log_level = MFI_EVT_CLASS_CRITICAL; + } + + if (class_locale.members.class >= event_log_level) dev_info(&instance->pdev->dev, "%d (%s/0x%04x/%s) - %s\n", le32_to_cpu(evt_detail->seq_num), format_timestamp(le32_to_cpu(evt_detail->time_stamp)), @@ -8762,6 +8772,12 @@ static int __init megasas_init(void) goto err_pcidrv; } + if ((event_log_level < MFI_EVT_CLASS_DEBUG) || + (event_log_level > MFI_EVT_CLASS_DEAD)) { + printk(KERN_WARNING "megarid_sas: provided event log level is out of range, setting it to default 2(CLASS_CRITICAL), permissible range is: -2 to 4\n"); + event_log_level = MFI_EVT_CLASS_CRITICAL; + } + rval = driver_create_file(&megasas_pci_driver.driver, &driver_attr_version); if (rval) From 705d3b088af9c1aac8a388456f5b9c38f04f97a1 Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Fri, 28 Jun 2019 02:50:41 -0700 Subject: [PATCH 10/24] scsi: megaraid_sas: Update driver version to 07.710.50.00 Signed-off-by: Shivasharan S Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index 11565597c6d5..b4b27d0dd12f 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -33,8 +33,8 @@ /* * MegaRAID SAS Driver meta data */ -#define MEGASAS_VERSION "07.710.06.00-rc1" -#define MEGASAS_RELDATE "June 18, 2019" +#define MEGASAS_VERSION "07.710.50.00-rc1" +#define MEGASAS_RELDATE "June 28, 2019" /* * Device IDs From b76becde2b84137faa29bbc9a3b20953b5980e48 Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Tue, 2 Jul 2019 23:02:00 +0200 Subject: [PATCH 11/24] scsi: zfcp: fix request object use-after-free in send path causing seqno errors With a recent change to our send path for FSF commands we introduced a possible use-after-free of request-objects, that might further lead to zfcp crafting bad requests, which the FCP channel correctly complains about with an error (FSF_PROT_SEQ_NUMB_ERROR). This error is then handled by an adapter-wide recovery. The following sequence illustrates the possible use-after-free: Send Path: int zfcp_fsf_open_port(struct zfcp_erp_action *erp_action) { struct zfcp_fsf_req *req; ... spin_lock_irq(&qdio->req_q_lock); // ^^^^^^^^^^^^^^^^ // protects QDIO queue during sending ... req = zfcp_fsf_req_create(qdio, FSF_QTCB_OPEN_PORT_WITH_DID, SBAL_SFLAGS0_TYPE_READ, qdio->adapter->pool.erp_req); // ^^^^^^^^^^^^^^^^^^^ // allocation of the request-object ... retval = zfcp_fsf_req_send(req); ... spin_unlock_irq(&qdio->req_q_lock); return retval; } static int zfcp_fsf_req_send(struct zfcp_fsf_req *req) { struct zfcp_adapter *adapter = req->adapter; struct zfcp_qdio *qdio = adapter->qdio; ... zfcp_reqlist_add(adapter->req_list, req); // ^^^^^^^^^^^^^^^^ // add request to our driver-internal hash-table for tracking // (protected by separate lock req_list->lock) ... if (zfcp_qdio_send(qdio, &req->qdio_req)) { // ^^^^^^^^^^^^^^ // hand-off the request to FCP channel; // the request can complete at any point now ... } /* Don't increase for unsolicited status */ if (!zfcp_fsf_req_is_status_read_buffer(req)) // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ // possible use-after-free adapter->fsf_req_seq_no++; // ^^^^^^^^^^^^^^^^ // because of the use-after-free we might // miss this accounting, and as follow-up // this results in the FCP channel error // FSF_PROT_SEQ_NUMB_ERROR adapter->req_no++; return 0; } static inline bool zfcp_fsf_req_is_status_read_buffer(struct zfcp_fsf_req *req) { return req->qtcb == NULL; // ^^^^^^^^^ // possible use-after-free } Response Path: void zfcp_fsf_reqid_check(struct zfcp_qdio *qdio, int sbal_idx) { ... struct zfcp_fsf_req *fsf_req; ... for (idx = 0; idx < QDIO_MAX_ELEMENTS_PER_BUFFER; idx++) { ... fsf_req = zfcp_reqlist_find_rm(adapter->req_list, req_id); // ^^^^^^^^^^^^^^^^^^^^ // remove request from our driver-internal // hash-table (lock req_list->lock) ... zfcp_fsf_req_complete(fsf_req); } } static void zfcp_fsf_req_complete(struct zfcp_fsf_req *req) { ... if (likely(req->status & ZFCP_STATUS_FSFREQ_CLEANUP)) zfcp_fsf_req_free(req); // ^^^^^^^^^^^^^^^^^ // free memory for request-object else complete(&req->completion); // ^^^^^^^^ // completion notification for code-paths that wait // synchronous for the completion of the request; in // those the memory is freed separately } The result of the use-after-free only affects the send path, and can not lead to any data corruption. In case we miss the sequence-number accounting, because the memory was already re-purposed, the next FSF command will fail with said FCP channel error, and we will recover the whole adapter. This causes no additional errors, but it slows down traffic. There is a slight chance of the same thing happen again recursively after the adapter recovery, but so far this has not been seen. This was seen under z/VM, where the send path might run on a virtual CPU that gets scheduled away by z/VM, while the return path might still run, and so create the necessary timing. Running with KASAN can also slow down the kernel sufficiently to run into this user-after-free, and then see the report by KASAN. To fix this, simply pull the test for the sequence-number accounting in front of the hand-off to the FCP channel (this information doesn't change during hand-off), but leave the sequence-number accounting itself where it is. To make future regressions of the same kind less likely, add comments to all closely related code-paths. Signed-off-by: Benjamin Block Fixes: f9eca0227600 ("scsi: zfcp: drop duplicate fsf_command from zfcp_fsf_req which is also in QTCB header") Cc: #5.0+ Reviewed-by: Steffen Maier Reviewed-by: Jens Remus Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_fsf.c | 45 ++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index d94496ee6883..c5b2615b49ef 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include +#include #include #include #include "zfcp_ext.h" @@ -741,6 +742,7 @@ static struct zfcp_fsf_req *zfcp_fsf_req_create(struct zfcp_qdio *qdio, static int zfcp_fsf_req_send(struct zfcp_fsf_req *req) { + const bool is_srb = zfcp_fsf_req_is_status_read_buffer(req); struct zfcp_adapter *adapter = req->adapter; struct zfcp_qdio *qdio = adapter->qdio; int req_id = req->req_id; @@ -757,8 +759,20 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *req) return -EIO; } + /* + * NOTE: DO NOT TOUCH ASYNC req PAST THIS POINT. + * ONLY TOUCH SYNC req AGAIN ON req->completion. + * + * The request might complete and be freed concurrently at any point + * now. This is not protected by the QDIO-lock (req_q_lock). So any + * uncontrolled access after this might result in an use-after-free bug. + * Only if the request doesn't have ZFCP_STATUS_FSFREQ_CLEANUP set, and + * when it is completed via req->completion, is it safe to use req + * again. + */ + /* Don't increase for unsolicited status */ - if (!zfcp_fsf_req_is_status_read_buffer(req)) + if (!is_srb) adapter->fsf_req_seq_no++; adapter->req_no++; @@ -805,6 +819,7 @@ int zfcp_fsf_status_read(struct zfcp_qdio *qdio) retval = zfcp_fsf_req_send(req); if (retval) goto failed_req_send; + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ goto out; @@ -914,8 +929,10 @@ struct zfcp_fsf_req *zfcp_fsf_abort_fcp_cmnd(struct scsi_cmnd *scmnd) req->qtcb->bottom.support.req_handle = (u64) old_req_id; zfcp_fsf_start_timer(req, ZFCP_FSF_SCSI_ER_TIMEOUT); - if (!zfcp_fsf_req_send(req)) + if (!zfcp_fsf_req_send(req)) { + /* NOTE: DO NOT TOUCH req, UNTIL IT COMPLETES! */ goto out; + } out_error_free: zfcp_fsf_req_free(req); @@ -1098,6 +1115,7 @@ int zfcp_fsf_send_ct(struct zfcp_fc_wka_port *wka_port, ret = zfcp_fsf_req_send(req); if (ret) goto failed_send; + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ goto out; @@ -1198,6 +1216,7 @@ int zfcp_fsf_send_els(struct zfcp_adapter *adapter, u32 d_id, ret = zfcp_fsf_req_send(req); if (ret) goto failed_send; + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ goto out; @@ -1243,6 +1262,7 @@ int zfcp_fsf_exchange_config_data(struct zfcp_erp_action *erp_action) zfcp_fsf_req_free(req); erp_action->fsf_req_id = 0; } + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ out: spin_unlock_irq(&qdio->req_q_lock); return retval; @@ -1279,8 +1299,10 @@ int zfcp_fsf_exchange_config_data_sync(struct zfcp_qdio *qdio, zfcp_fsf_start_timer(req, ZFCP_FSF_REQUEST_TIMEOUT); retval = zfcp_fsf_req_send(req); spin_unlock_irq(&qdio->req_q_lock); - if (!retval) + if (!retval) { + /* NOTE: ONLY TOUCH SYNC req AGAIN ON req->completion. */ wait_for_completion(&req->completion); + } zfcp_fsf_req_free(req); return retval; @@ -1330,6 +1352,7 @@ int zfcp_fsf_exchange_port_data(struct zfcp_erp_action *erp_action) zfcp_fsf_req_free(req); erp_action->fsf_req_id = 0; } + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ out: spin_unlock_irq(&qdio->req_q_lock); return retval; @@ -1372,8 +1395,10 @@ int zfcp_fsf_exchange_port_data_sync(struct zfcp_qdio *qdio, retval = zfcp_fsf_req_send(req); spin_unlock_irq(&qdio->req_q_lock); - if (!retval) + if (!retval) { + /* NOTE: ONLY TOUCH SYNC req AGAIN ON req->completion. */ wait_for_completion(&req->completion); + } zfcp_fsf_req_free(req); @@ -1493,6 +1518,7 @@ int zfcp_fsf_open_port(struct zfcp_erp_action *erp_action) erp_action->fsf_req_id = 0; put_device(&port->dev); } + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ out: spin_unlock_irq(&qdio->req_q_lock); return retval; @@ -1557,6 +1583,7 @@ int zfcp_fsf_close_port(struct zfcp_erp_action *erp_action) zfcp_fsf_req_free(req); erp_action->fsf_req_id = 0; } + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ out: spin_unlock_irq(&qdio->req_q_lock); return retval; @@ -1626,6 +1653,7 @@ int zfcp_fsf_open_wka_port(struct zfcp_fc_wka_port *wka_port) retval = zfcp_fsf_req_send(req); if (retval) zfcp_fsf_req_free(req); + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ out: spin_unlock_irq(&qdio->req_q_lock); if (!retval) @@ -1681,6 +1709,7 @@ int zfcp_fsf_close_wka_port(struct zfcp_fc_wka_port *wka_port) retval = zfcp_fsf_req_send(req); if (retval) zfcp_fsf_req_free(req); + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ out: spin_unlock_irq(&qdio->req_q_lock); if (!retval) @@ -1776,6 +1805,7 @@ int zfcp_fsf_close_physical_port(struct zfcp_erp_action *erp_action) zfcp_fsf_req_free(req); erp_action->fsf_req_id = 0; } + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ out: spin_unlock_irq(&qdio->req_q_lock); return retval; @@ -1899,6 +1929,7 @@ int zfcp_fsf_open_lun(struct zfcp_erp_action *erp_action) zfcp_fsf_req_free(req); erp_action->fsf_req_id = 0; } + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ out: spin_unlock_irq(&qdio->req_q_lock); return retval; @@ -1987,6 +2018,7 @@ int zfcp_fsf_close_lun(struct zfcp_erp_action *erp_action) zfcp_fsf_req_free(req); erp_action->fsf_req_id = 0; } + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ out: spin_unlock_irq(&qdio->req_q_lock); return retval; @@ -2299,6 +2331,7 @@ int zfcp_fsf_fcp_cmnd(struct scsi_cmnd *scsi_cmnd) retval = zfcp_fsf_req_send(req); if (unlikely(retval)) goto failed_scsi_cmnd; + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ goto out; @@ -2373,8 +2406,10 @@ struct zfcp_fsf_req *zfcp_fsf_fcp_task_mgmt(struct scsi_device *sdev, zfcp_fc_fcp_tm(fcp_cmnd, sdev, tm_flags); zfcp_fsf_start_timer(req, ZFCP_FSF_SCSI_ER_TIMEOUT); - if (!zfcp_fsf_req_send(req)) + if (!zfcp_fsf_req_send(req)) { + /* NOTE: DO NOT TOUCH req, UNTIL IT COMPLETES! */ goto out; + } zfcp_fsf_req_free(req); req = NULL; From 106d45f350c7cac876844dc685845cba4ffdb70b Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Tue, 2 Jul 2019 23:02:01 +0200 Subject: [PATCH 12/24] scsi: zfcp: fix request object use-after-free in send path causing wrong traces When tracing instances where we open and close WKA ports, we also pass the request-ID of the respective FSF command. But after successfully sending the FSF command we must not use the request-object anymore, as this might result in an use-after-free (see "zfcp: fix request object use-after-free in send path causing seqno errors" ). To fix this add a new variable that caches the request-ID before sending the request. This won't change during the hand-off to the FCP channel, and so it's safe to trace this cached request-ID later, instead of using the request object. Signed-off-by: Benjamin Block Fixes: d27a7cb91960 ("zfcp: trace on request for open and close of WKA port") Cc: #2.6.38+ Reviewed-by: Steffen Maier Reviewed-by: Jens Remus Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_fsf.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index c5b2615b49ef..296bbc3c4606 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -1627,6 +1627,7 @@ int zfcp_fsf_open_wka_port(struct zfcp_fc_wka_port *wka_port) { struct zfcp_qdio *qdio = wka_port->adapter->qdio; struct zfcp_fsf_req *req; + unsigned long req_id = 0; int retval = -EIO; spin_lock_irq(&qdio->req_q_lock); @@ -1649,6 +1650,8 @@ int zfcp_fsf_open_wka_port(struct zfcp_fc_wka_port *wka_port) hton24(req->qtcb->bottom.support.d_id, wka_port->d_id); req->data = wka_port; + req_id = req->req_id; + zfcp_fsf_start_timer(req, ZFCP_FSF_REQUEST_TIMEOUT); retval = zfcp_fsf_req_send(req); if (retval) @@ -1657,7 +1660,7 @@ int zfcp_fsf_open_wka_port(struct zfcp_fc_wka_port *wka_port) out: spin_unlock_irq(&qdio->req_q_lock); if (!retval) - zfcp_dbf_rec_run_wka("fsowp_1", wka_port, req->req_id); + zfcp_dbf_rec_run_wka("fsowp_1", wka_port, req_id); return retval; } @@ -1683,6 +1686,7 @@ int zfcp_fsf_close_wka_port(struct zfcp_fc_wka_port *wka_port) { struct zfcp_qdio *qdio = wka_port->adapter->qdio; struct zfcp_fsf_req *req; + unsigned long req_id = 0; int retval = -EIO; spin_lock_irq(&qdio->req_q_lock); @@ -1705,6 +1709,8 @@ int zfcp_fsf_close_wka_port(struct zfcp_fc_wka_port *wka_port) req->data = wka_port; req->qtcb->header.port_handle = wka_port->handle; + req_id = req->req_id; + zfcp_fsf_start_timer(req, ZFCP_FSF_REQUEST_TIMEOUT); retval = zfcp_fsf_req_send(req); if (retval) @@ -1713,7 +1719,7 @@ int zfcp_fsf_close_wka_port(struct zfcp_fc_wka_port *wka_port) out: spin_unlock_irq(&qdio->req_q_lock); if (!retval) - zfcp_dbf_rec_run_wka("fscwp_1", wka_port, req->req_id); + zfcp_dbf_rec_run_wka("fscwp_1", wka_port, req_id); return retval; } From 484647088826f2f651acbda6bcf9536b8a466703 Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Tue, 2 Jul 2019 23:02:02 +0200 Subject: [PATCH 13/24] scsi: zfcp: fix GCC compiler warning emitted with -Wmaybe-uninitialized GCC v9 emits this warning: CC drivers/s390/scsi/zfcp_erp.o drivers/s390/scsi/zfcp_erp.c: In function 'zfcp_erp_action_enqueue': drivers/s390/scsi/zfcp_erp.c:217:26: warning: 'erp_action' may be used uninitialized in this function [-Wmaybe-uninitialized] 217 | struct zfcp_erp_action *erp_action; | ^~~~~~~~~~ This is a possible false positive case, as also documented in the GCC documentations: https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wmaybe-uninitialized The actual code-sequence is like this: Various callers can invoke the function below with the argument "want" being one of: ZFCP_ERP_ACTION_REOPEN_ADAPTER, ZFCP_ERP_ACTION_REOPEN_PORT_FORCED, ZFCP_ERP_ACTION_REOPEN_PORT, or ZFCP_ERP_ACTION_REOPEN_LUN. zfcp_erp_action_enqueue(want, ...) ... need = zfcp_erp_required_act(want, ...) need = want ... maybe: need = ZFCP_ERP_ACTION_REOPEN_PORT maybe: need = ZFCP_ERP_ACTION_REOPEN_ADAPTER ... return need ... zfcp_erp_setup_act(need, ...) struct zfcp_erp_action *erp_action; // <== line 217 ... switch(need) { case ZFCP_ERP_ACTION_REOPEN_LUN: ... erp_action = &zfcp_sdev->erp_action; WARN_ON_ONCE(erp_action->port != port); // <== access ... break; case ZFCP_ERP_ACTION_REOPEN_PORT: case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: ... erp_action = &port->erp_action; WARN_ON_ONCE(erp_action->port != port); // <== access ... break; case ZFCP_ERP_ACTION_REOPEN_ADAPTER: ... erp_action = &adapter->erp_action; WARN_ON_ONCE(erp_action->port != NULL); // <== access ... break; } ... WARN_ON_ONCE(erp_action->adapter != adapter); // <== access When zfcp_erp_setup_act() is called, 'need' will never be anything else than one of the 4 possible enumeration-names that are used in the switch-case, and 'erp_action' is initialized for every one of them, before it is used. Thus the warning is a false positive, as documented. We introduce the extra if{} in the beginning to create an extra code-flow, so the compiler can be convinced that the switch-case will never see any other value. BUG_ON()/BUG() is intentionally not used to not crash anything, should this ever happen anyway - right now it's impossible, as argued above; and it doesn't introduce a 'default:' switch-case to retain warnings should 'enum zfcp_erp_act_type' ever be extended and no explicit case be introduced. See also v5.0 commit 399b6c8bc9f7 ("scsi: zfcp: drop old default switch case which might paper over missing case"). Signed-off-by: Benjamin Block Reviewed-by: Jens Remus Reviewed-by: Steffen Maier Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_erp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index e8fc28dba8df..96f0d34e9459 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include +#include #include "zfcp_ext.h" #include "zfcp_reqlist.h" @@ -217,6 +218,12 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(enum zfcp_erp_act_type need, struct zfcp_erp_action *erp_action; struct zfcp_scsi_dev *zfcp_sdev; + if (WARN_ON_ONCE(need != ZFCP_ERP_ACTION_REOPEN_LUN && + need != ZFCP_ERP_ACTION_REOPEN_PORT && + need != ZFCP_ERP_ACTION_REOPEN_PORT_FORCED && + need != ZFCP_ERP_ACTION_REOPEN_ADAPTER)) + return NULL; + switch (need) { case ZFCP_ERP_ACTION_REOPEN_LUN: zfcp_sdev = sdev_to_zfcp(sdev); From 41a6bf6529edd10a6def42e3b2c34a7474bcc2f5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 2 Jul 2019 10:18:35 +0100 Subject: [PATCH 14/24] scsi: libfc: fix null pointer dereference on a null lport Currently if lport is null then the null lport pointer is dereference when printing out debug via the FC_LPORT_DB macro. Fix this by using the more generic FC_LIBFC_DBG debug macro instead that does not use lport. Addresses-Coverity: ("Dereference after null check") Fixes: 7414705ea4ae ("libfc: Add runtime debugging with debug_logging module parameter") Signed-off-by: Colin Ian King Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/libfc/fc_exch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index 025cd2ff9f65..0c87b9777c3a 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c @@ -2591,7 +2591,7 @@ void fc_exch_recv(struct fc_lport *lport, struct fc_frame *fp) /* lport lock ? */ if (!lport || lport->state == LPORT_ST_DISABLED) { - FC_LPORT_DBG(lport, "Receiving frames for an lport that " + FC_LIBFC_DBG("Receiving frames for an lport that " "has not been initialized correctly\n"); fc_frame_free(fp); return; From 0cdc58580b37a160fac4b884266b8b7cb096f539 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 17 Jul 2019 10:51:49 +0900 Subject: [PATCH 15/24] scsi: sd_zbc: Fix compilation warning kbuild test robot gets the following compilation warning using gcc 7.4 cross compilation for c6x (GCC_VERSION=7.4.0 make.cross ARCH=c6x). In file included from include/asm-generic/bug.h:18:0, from arch/c6x/include/asm/bug.h:12, from include/linux/bug.h:5, from include/linux/thread_info.h:12, from include/asm-generic/current.h:5, from ./arch/c6x/include/generated/asm/current.h:1, from include/linux/sched.h:12, from include/linux/blkdev.h:5, from drivers//scsi/sd_zbc.c:11: drivers//scsi/sd_zbc.c: In function 'sd_zbc_read_zones': >> include/linux/kernel.h:62:48: warning: 'zone_blocks' may be used uninitialized in this function [-Wmaybe-uninitialized] #define __round_mask(x, y) ((__typeof__(x))((y)-1)) ^ drivers//scsi/sd_zbc.c:464:6: note: 'zone_blocks' was declared here u32 zone_blocks; ^~~~~~~~~~~ This is a false-positive report. The variable zone_blocks is always initialized in sd_zbc_check_zones() before use. It is not initialized only and only if sd_zbc_check_zones() fails. Avoid this warning by initializing the zone_blocks variable to 0. Fixes: 5f832a395859 ("scsi: sd_zbc: Fix sd_zbc_check_zones() error checks") Cc: Stable Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/sd_zbc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index d5e83cfa4d74..4e15db16e91e 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -417,7 +417,7 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) { struct gendisk *disk = sdkp->disk; unsigned int nr_zones; - u32 zone_blocks; + u32 zone_blocks = 0; int ret; if (!sd_is_zoned(sdkp)) From f9b0530fa02e0c73f31a49ef743e8f44eb8e32cc Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 12 Jul 2019 10:08:19 +0800 Subject: [PATCH 16/24] scsi: core: Fix race on creating sense cache When scsi_init_sense_cache(host) is called concurrently from different hosts, each code path may find that no cache has been created and allocate a new one. The lack of locking can lead to potentially overriding a cache allocated by a different host. Fix the issue by moving 'mutex_lock(&scsi_sense_cache_mutex)' before scsi_select_sense_cache(). Fixes: 0a6ac4ee7c21 ("scsi: respect unchecked_isa_dma for blk-mq") Cc: Stable Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Ewan D. Milne Signed-off-by: Ming Lei Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index ae1c989ef08e..420e2354b36b 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -84,11 +84,11 @@ int scsi_init_sense_cache(struct Scsi_Host *shost) struct kmem_cache *cache; int ret = 0; + mutex_lock(&scsi_sense_cache_mutex); cache = scsi_select_sense_cache(shost->unchecked_isa_dma); if (cache) - return 0; + goto exit; - mutex_lock(&scsi_sense_cache_mutex); if (shost->unchecked_isa_dma) { scsi_sense_isadma_cache = kmem_cache_create("scsi_sense_cache(DMA)", @@ -104,7 +104,7 @@ int scsi_init_sense_cache(struct Scsi_Host *shost) if (!scsi_sense_cache) ret = -ENOMEM; } - + exit: mutex_unlock(&scsi_sense_cache_mutex); return ret; } From 7ad388d8e4c703980b7018b938cdeec58832d78d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2019 14:19:53 +0200 Subject: [PATCH 17/24] scsi: core: add a host / host template field for the virt boundary This allows drivers setting it up easily instead of branching out to block layer calls in slave_alloc, and ensures the upgraded max_segment_size setting gets picked up by the DMA layer. Signed-off-by: Christoph Hellwig Acked-by: Kashyap Desai < kashyap.desai@broadcom.com> Signed-off-by: Martin K. Petersen --- drivers/scsi/hosts.c | 3 +++ drivers/scsi/scsi_lib.c | 3 ++- include/scsi/scsi_host.h | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 96ed24841c33..f98509d717a0 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -462,6 +462,9 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) else shost->dma_boundary = 0xffffffff; + if (sht->virt_boundary_mask) + shost->virt_boundary_mask = sht->virt_boundary_mask; + device_initialize(&shost->shost_gendev); dev_set_name(&shost->shost_gendev, "host%d", shost->host_no); shost->shost_gendev.bus = &scsi_bus_type; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 420e2354b36b..404e5e28ef62 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1791,7 +1791,8 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) dma_set_seg_boundary(dev, shost->dma_boundary); blk_queue_max_segment_size(q, shost->max_segment_size); - dma_set_max_seg_size(dev, shost->max_segment_size); + blk_queue_virt_boundary(q, shost->virt_boundary_mask); + dma_set_max_seg_size(dev, queue_max_segment_size(q)); /* * Set a reasonable default alignment: The larger of 32-byte (dword), diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index a5fcdad4a03e..cc139dbd71e5 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -369,6 +369,8 @@ struct scsi_host_template { */ unsigned long dma_boundary; + unsigned long virt_boundary_mask; + /* * This specifies "machine infinity" for host templates which don't * limit the transfer size. Note this limit represents an absolute @@ -587,6 +589,7 @@ struct Scsi_Host { unsigned int max_sectors; unsigned int max_segment_size; unsigned long dma_boundary; + unsigned long virt_boundary_mask; /* * In scsi-mq mode, the number of hardware queues supported by the LLD. * From bdd17bdef7d8da4d8eee254abb4c92d8a566bdc1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2019 14:19:54 +0200 Subject: [PATCH 18/24] scsi: core: take the DMA max mapping size into account We need to limit the device's max_sectors to what the DMA mapping implementation can support. If not, we risk running out of swiotlb buffers easily. Signed-off-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 404e5e28ef62..699623f81dde 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1784,6 +1784,8 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) blk_queue_max_integrity_segments(q, shost->sg_prot_tablesize); } + shost->max_sectors = min_t(unsigned int, shost->max_sectors, + dma_max_mapping_size(dev) << SECTOR_SHIFT); blk_queue_max_hw_sectors(q, shost->max_sectors); if (shost->unchecked_isa_dma) blk_queue_bounce_limit(q, BLK_BOUNCE_ISA); From 552a990ca1668895851271df1224cb02003af348 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2019 14:19:55 +0200 Subject: [PATCH 19/24] scsi: ufshcd: set max_segment_size in the scsi host template We need to also mirror the value to the device to ensure IOMMU merging doesn't undo it, and the SCSI host level parameter will ensure that. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index a208589426b1..a3b6cd1a623d 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -4586,8 +4586,6 @@ static int ufshcd_slave_configure(struct scsi_device *sdev) struct request_queue *q = sdev->request_queue; blk_queue_update_dma_pad(q, PRDT_DATA_BYTE_COUNT_PAD - 1); - blk_queue_max_segment_size(q, PRDT_DATA_BYTE_COUNT_MAX); - return 0; } @@ -7021,6 +7019,7 @@ static struct scsi_host_template ufshcd_driver_template = { .sg_tablesize = SG_ALL, .cmd_per_lun = UFSHCD_CMD_PER_LUN, .can_queue = UFSHCD_CAN_QUEUE, + .max_segment_size = PRDT_DATA_BYTE_COUNT_MAX, .max_host_blocked = 1, .track_queue_depth = 1, .sdev_groups = ufshcd_driver_groups, From 83eed4592f19a04004c80e6f256236264db95dd8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2019 14:19:56 +0200 Subject: [PATCH 20/24] scsi: storvsc: set virt_boundary_mask in the scsi host template This ensures all proper DMA layer handling is taken care of by the SCSI midlayer. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 5b6c0f88da59..559fc63e9f1c 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1435,9 +1435,6 @@ static int storvsc_device_configure(struct scsi_device *sdevice) { blk_queue_rq_timeout(sdevice->request_queue, (storvsc_timeout * HZ)); - /* Ensure there are no gaps in presented sgls */ - blk_queue_virt_boundary(sdevice->request_queue, PAGE_SIZE - 1); - sdevice->no_write_same = 1; /* @@ -1710,6 +1707,8 @@ static struct scsi_host_template scsi_driver = { .this_id = -1, /* Make sure we dont get a sg segment crosses a page boundary */ .dma_boundary = PAGE_SIZE-1, + /* Ensure there are no gaps in presented sgls */ + .virt_boundary_mask = PAGE_SIZE-1, .no_write_same = 1, .track_queue_depth = 1, .change_queue_depth = storvsc_change_queue_depth, From 09a4460ba4434ef0327cd26bf25f2d7afb973251 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2019 14:19:57 +0200 Subject: [PATCH 21/24] scsi: IB/iser: set virt_boundary_mask in the scsi host This ensures all proper DMA layer handling is taken care of by the SCSI midlayer. Signed-off-by: Christoph Hellwig Acked-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Martin K. Petersen --- drivers/infiniband/ulp/iser/iscsi_iser.c | 35 +++++------------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 9c185a8dabd3..841b66397a57 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -613,6 +613,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, struct Scsi_Host *shost; struct iser_conn *iser_conn = NULL; struct ib_conn *ib_conn; + struct ib_device *ib_dev; u32 max_fr_sectors; shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0); @@ -643,16 +644,19 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, } ib_conn = &iser_conn->ib_conn; + ib_dev = ib_conn->device->ib_device; if (ib_conn->pi_support) { - u32 sig_caps = ib_conn->device->ib_device->attrs.sig_prot_cap; + u32 sig_caps = ib_dev->attrs.sig_prot_cap; scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps)); scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP | SHOST_DIX_GUARD_CRC); } - if (iscsi_host_add(shost, - ib_conn->device->ib_device->dev.parent)) { + if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) + shost->virt_boundary_mask = ~MASK_4K; + + if (iscsi_host_add(shost, ib_dev->dev.parent)) { mutex_unlock(&iser_conn->state_mutex); goto free_host; } @@ -958,30 +962,6 @@ static umode_t iser_attr_is_visible(int param_type, int param) return 0; } -static int iscsi_iser_slave_alloc(struct scsi_device *sdev) -{ - struct iscsi_session *session; - struct iser_conn *iser_conn; - struct ib_device *ib_dev; - - mutex_lock(&unbind_iser_conn_mutex); - - session = starget_to_session(scsi_target(sdev))->dd_data; - iser_conn = session->leadconn->dd_data; - if (!iser_conn) { - mutex_unlock(&unbind_iser_conn_mutex); - return -ENOTCONN; - } - ib_dev = iser_conn->ib_conn.device->ib_device; - - if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) - blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K); - - mutex_unlock(&unbind_iser_conn_mutex); - - return 0; -} - static struct scsi_host_template iscsi_iser_sht = { .module = THIS_MODULE, .name = "iSCSI Initiator over iSER", @@ -994,7 +974,6 @@ static struct scsi_host_template iscsi_iser_sht = { .eh_device_reset_handler= iscsi_eh_device_reset, .eh_target_reset_handler = iscsi_eh_recover_target, .target_alloc = iscsi_target_alloc, - .slave_alloc = iscsi_iser_slave_alloc, .proc_name = "iscsi_iser", .this_id = -1, .track_queue_depth = 1, From 8c175d3131acd166ba57f1fd31f0d76f6fa4f376 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2019 14:19:58 +0200 Subject: [PATCH 22/24] scsi: IB/srp: set virt_boundary_mask in the scsi host This ensures all proper DMA layer handling is taken care of by the SCSI midlayer. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Acked-by: Bart Van Assche Reviewed-by: Max Gurtovoy Signed-off-by: Martin K. Petersen --- drivers/infiniband/ulp/srp/ib_srp.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index b7c5a35f7daa..9c5b40601948 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3044,20 +3044,6 @@ static int srp_target_alloc(struct scsi_target *starget) return 0; } -static int srp_slave_alloc(struct scsi_device *sdev) -{ - struct Scsi_Host *shost = sdev->host; - struct srp_target_port *target = host_to_target(shost); - struct srp_device *srp_dev = target->srp_host->srp_dev; - struct ib_device *ibdev = srp_dev->dev; - - if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) - blk_queue_virt_boundary(sdev->request_queue, - ~srp_dev->mr_page_mask); - - return 0; -} - static int srp_slave_configure(struct scsi_device *sdev) { struct Scsi_Host *shost = sdev->host; @@ -3260,7 +3246,6 @@ static struct scsi_host_template srp_template = { .name = "InfiniBand SRP initiator", .proc_name = DRV_NAME, .target_alloc = srp_target_alloc, - .slave_alloc = srp_slave_alloc, .slave_configure = srp_slave_configure, .info = srp_target_info, .queuecommand = srp_queuecommand, @@ -3795,6 +3780,9 @@ static ssize_t srp_create_target(struct device *dev, target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; target_host->max_segment_size = ib_dma_max_seg_size(ibdev); + if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) + target_host->virt_boundary_mask = ~srp_dev->mr_page_mask; + target = host_to_target(target_host); target->net = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); From ce0ad853109733d772d26224297fda0de313bf13 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2019 14:19:59 +0200 Subject: [PATCH 23/24] scsi: mpt3sas: set an unlimited max_segment_size for SAS 3.0 HBAs When using a virt_boundary_mask, as done for NVMe devices attached to mpt3sas controllers, we require an unlimited max_segment_size as the virt boundary merging code assumes that. But we also need to propagate that to the DMA mapping layer to make dma-debug happy. The SCSI layer takes care of that when using the per-host virt_boundary setting, but given that mpt3sas only wants to set the virt_boundary for actual NVMe devices, we can't rely on that. The DMA layer maximum segment is global to the HBA however, so we have to set it explicitly. This patch assumes that mpt3sas does not have a segment size limitation, which seems true based on the SGL format, but will need to be verified. Signed-off-by: Christoph Hellwig Acked-by: Suganath Prabu Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 27c731a3fb49..717ba0845a2a 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -10238,6 +10238,7 @@ static struct scsi_host_template mpt3sas_driver_template = { .this_id = -1, .sg_tablesize = MPT3SAS_SG_DEPTH, .max_sectors = 32767, + .max_segment_size = 0xffffffff, .cmd_per_lun = 7, .shost_attrs = mpt3sas_host_attrs, .sdev_attrs = mpt3sas_dev_attrs, From 07d9aa14346489d6facae5777ceb267a1dcadbc5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2019 14:20:00 +0200 Subject: [PATCH 24/24] scsi: megaraid_sas: set an unlimited max_segment_size When using a virt_boundary_mask, as done for NVMe devices attached to megaraid_sas controllers, we require an unlimited max_segment_size as the virt boundary merging code assumes that. But we also need to propagate that to the DMA mapping layer to make dma-debug happy. The SCSI layer takes care of that when using the per-host virt_boundary setting, but given that megaraid_sas only wants to set the virt_boundary for actual NVMe devices, we can't rely on that. The DMA layer maximum segment is global to the HBA however, so we have to set it explicitly. This patch assumes that megaraid_sas does not have a segment size limitation, which seems true based on the SGL format, but will need to be verified. Signed-off-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 8b0dc0795f64..178f8e20ee9a 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -3356,6 +3356,7 @@ static struct scsi_host_template megasas_template = { .shost_attrs = megaraid_host_attrs, .bios_param = megasas_bios_param, .change_queue_depth = scsi_change_queue_depth, + .max_segment_size = 0xffffffff, .no_write_same = 1, };