linux/drivers/scsi/device_handler/scsi_dh_rdac.c

835 lines
20 KiB
C
Raw Normal View History

/*
* LSI/Engenio/NetApp E-Series RDAC SCSI Device Handler
*
* Copyright (C) 2005 Mike Christie. All rights reserved.
* Copyright (C) Chandra Seetharaman, IBM Corp. 2007
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
*/
#include <scsi/scsi.h>
#include <scsi/scsi_eh.h>
#include <scsi/scsi_dh.h>
#include <linux/workqueue.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
#include <linux/slab.h>
#include <linux/module.h>
#define RDAC_NAME "rdac"
#define RDAC_RETRY_COUNT 5
/*
* LSI mode page stuff
*
* These struct definitions and the forming of the
* mode page were taken from the LSI RDAC 2.4 GPL'd
* driver, and then converted to Linux conventions.
*/
#define RDAC_QUIESCENCE_TIME 20
/*
* Page Codes
*/
#define RDAC_PAGE_CODE_REDUNDANT_CONTROLLER 0x2c
/*
* Controller modes definitions
*/
#define RDAC_MODE_TRANSFER_SPECIFIED_LUNS 0x02
/*
* RDAC Options field
*/
#define RDAC_FORCED_QUIESENCE 0x02
#define RDAC_TIMEOUT (60 * HZ)
#define RDAC_RETRIES 3
struct rdac_mode_6_hdr {
u8 data_len;
u8 medium_type;
u8 device_params;
u8 block_desc_len;
};
struct rdac_mode_10_hdr {
u16 data_len;
u8 medium_type;
u8 device_params;
u16 reserved;
u16 block_desc_len;
};
struct rdac_mode_common {
u8 controller_serial[16];
u8 alt_controller_serial[16];
u8 rdac_mode[2];
u8 alt_rdac_mode[2];
u8 quiescence_timeout;
u8 rdac_options;
};
struct rdac_pg_legacy {
struct rdac_mode_6_hdr hdr;
u8 page_code;
u8 page_len;
struct rdac_mode_common common;
#define MODE6_MAX_LUN 32
u8 lun_table[MODE6_MAX_LUN];
u8 reserved2[32];
u8 reserved3;
u8 reserved4;
};
struct rdac_pg_expanded {
struct rdac_mode_10_hdr hdr;
u8 page_code;
u8 subpage_code;
u8 page_len[2];
struct rdac_mode_common common;
u8 lun_table[256];
u8 reserved3;
u8 reserved4;
};
struct c9_inquiry {
u8 peripheral_info;
u8 page_code; /* 0xC9 */
u8 reserved1;
u8 page_len;
u8 page_id[4]; /* "vace" */
u8 avte_cvp;
u8 path_prio;
u8 reserved2[38];
};
#define SUBSYS_ID_LEN 16
#define SLOT_ID_LEN 2
#define ARRAY_LABEL_LEN 31
struct c4_inquiry {
u8 peripheral_info;
u8 page_code; /* 0xC4 */
u8 reserved1;
u8 page_len;
u8 page_id[4]; /* "subs" */
u8 subsys_id[SUBSYS_ID_LEN];
u8 revision[4];
u8 slot_id[SLOT_ID_LEN];
u8 reserved[2];
};
#define UNIQUE_ID_LEN 16
struct c8_inquiry {
u8 peripheral_info;
u8 page_code; /* 0xC8 */
u8 reserved1;
u8 page_len;
u8 page_id[4]; /* "edid" */
u8 reserved2[3];
u8 vol_uniq_id_len;
u8 vol_uniq_id[16];
u8 vol_user_label_len;
u8 vol_user_label[60];
u8 array_uniq_id_len;
u8 array_unique_id[UNIQUE_ID_LEN];
u8 array_user_label_len;
u8 array_user_label[60];
u8 lun[8];
};
struct rdac_controller {
u8 array_id[UNIQUE_ID_LEN];
int use_ms10;
struct kref kref;
struct list_head node; /* list of all controllers */
union {
struct rdac_pg_legacy legacy;
struct rdac_pg_expanded expanded;
} mode_select;
u8 index;
u8 array_name[ARRAY_LABEL_LEN];
struct Scsi_Host *host;
spinlock_t ms_lock;
int ms_queued;
struct work_struct ms_work;
struct scsi_device *ms_sdev;
struct list_head ms_head;
struct list_head dh_list;
};
struct c2_inquiry {
u8 peripheral_info;
u8 page_code; /* 0xC2 */
u8 reserved1;
u8 page_len;
u8 page_id[4]; /* "swr4" */
u8 sw_version[3];
u8 sw_date[3];
u8 features_enabled;
u8 max_lun_supported;
u8 partitions[239]; /* Total allocation length should be 0xFF */
};
struct rdac_dh_data {
struct list_head node;
struct rdac_controller *ctlr;
struct scsi_device *sdev;
#define UNINITIALIZED_LUN (1 << 8)
unsigned lun;
#define RDAC_MODE 0
#define RDAC_MODE_AVT 1
#define RDAC_MODE_IOSHIP 2
unsigned char mode;
#define RDAC_STATE_ACTIVE 0
#define RDAC_STATE_PASSIVE 1
unsigned char state;
#define RDAC_LUN_UNOWNED 0
#define RDAC_LUN_OWNED 1
char lun_state;
#define RDAC_PREFERRED 0
#define RDAC_NON_PREFERRED 1
char preferred;
union {
struct c2_inquiry c2;
struct c4_inquiry c4;
struct c8_inquiry c8;
struct c9_inquiry c9;
} inq;
};
static const char *mode[] = {
"RDAC",
"AVT",
"IOSHIP",
};
static const char *lun_state[] =
{
"unowned",
"owned",
};
struct rdac_queue_data {
struct list_head entry;
struct rdac_dh_data *h;
activate_complete callback_fn;
void *callback_data;
};
static LIST_HEAD(ctlr_list);
static DEFINE_SPINLOCK(list_lock);
static struct workqueue_struct *kmpath_rdacd;
static void send_mode_select(struct work_struct *work);
/*
* module parameter to enable rdac debug logging.
* 2 bits for each type of logging, only two types defined for now
* Can be enhanced if required at later point
*/
static int rdac_logging = 1;
module_param(rdac_logging, int, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(rdac_logging, "A bit mask of rdac logging levels, "
"Default is 1 - failover logging enabled, "
"set it to 0xF to enable all the logs");
#define RDAC_LOG_FAILOVER 0
#define RDAC_LOG_SENSE 2
#define RDAC_LOG_BITS 2
#define RDAC_LOG_LEVEL(SHIFT) \
((rdac_logging >> (SHIFT)) & ((1 << (RDAC_LOG_BITS)) - 1))
#define RDAC_LOG(SHIFT, sdev, f, arg...) \
do { \
if (unlikely(RDAC_LOG_LEVEL(SHIFT))) \
sdev_printk(KERN_INFO, sdev, RDAC_NAME ": " f "\n", ## arg); \
} while (0);
static unsigned int rdac_failover_get(struct rdac_controller *ctlr,
struct list_head *list,
unsigned char *cdb)
{
struct rdac_mode_common *common;
unsigned data_size;
struct rdac_queue_data *qdata;
u8 *lun_table;
scsi: scsi_dh_rdac: Use ctlr directly in rdac_failover_get() rdac_failover_get references struct rdac_controller as ctlr->ms_sdev->handler_data->ctlr for no apparent reason. Besides being inefficient this also introduces a null-pointer dereference as send_mode_select() sets ctlr->ms_sdev to NULL before calling rdac_failover_get(): [ 18.432550] device-mapper: multipath service-time: version 0.3.0 loaded [ 18.436124] BUG: unable to handle kernel NULL pointer dereference at 0000000000000790 [ 18.436129] IP: send_mode_select+0xca/0x560 [ 18.436129] PGD 0 [ 18.436130] P4D 0 [ 18.436130] [ 18.436132] Oops: 0000 [#1] SMP [ 18.436133] Modules linked in: dm_service_time sd_mod dm_multipath amdkfd amd_iommu_v2 radeon(+) i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm qla2xxx drm serio_raw scsi_transport_fc bnx2 i2c_core dm_mirror dm_region_hash dm_log dm_mod [ 18.436143] CPU: 4 PID: 443 Comm: kworker/u16:2 Not tainted 4.12.0-rc1.1.el7.test.x86_64 #1 [ 18.436144] Hardware name: IBM BladeCenter LS22 -[79013SG]-/Server Blade, BIOS -[L8E164AUS-1.07]- 05/25/2011 [ 18.436145] Workqueue: kmpath_rdacd send_mode_select [ 18.436146] task: ffff880225116a40 task.stack: ffffc90002bd8000 [ 18.436148] RIP: 0010:send_mode_select+0xca/0x560 [ 18.436148] RSP: 0018:ffffc90002bdbda8 EFLAGS: 00010246 [ 18.436149] RAX: 0000000000000000 RBX: ffffc90002bdbe08 RCX: ffff88017ef04a80 [ 18.436150] RDX: ffffc90002bdbe08 RSI: ffff88017ef04a80 RDI: ffff8802248e4388 [ 18.436151] RBP: ffffc90002bdbe48 R08: 0000000000000000 R09: ffffffff81c104c0 [ 18.436151] R10: 00000000000001ff R11: 000000000000035a R12: ffffc90002bdbdd8 [ 18.436152] R13: ffff8802248e4390 R14: ffff880225152800 R15: ffff8802248e4400 [ 18.436153] FS: 0000000000000000(0000) GS:ffff880227d00000(0000) knlGS:0000000000000000 [ 18.436154] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 18.436154] CR2: 0000000000000790 CR3: 000000042535b000 CR4: 00000000000006e0 [ 18.436155] Call Trace: [ 18.436159] ? rdac_activate+0x14e/0x150 [ 18.436161] ? refcount_dec_and_test+0x11/0x20 [ 18.436162] ? kobject_put+0x1c/0x50 [ 18.436165] ? scsi_dh_activate+0x6f/0xd0 [ 18.436168] process_one_work+0x149/0x360 [ 18.436170] worker_thread+0x4d/0x3c0 [ 18.436172] kthread+0x109/0x140 [ 18.436173] ? rescuer_thread+0x380/0x380 [ 18.436174] ? kthread_park+0x60/0x60 [ 18.436176] ret_from_fork+0x2c/0x40 [ 18.436177] Code: 49 c7 46 20 00 00 00 00 4c 89 ef c6 07 00 0f 1f 40 00 45 31 ed c7 45 b0 05 00 00 00 44 89 6d b4 4d 89 f5 4c 8b 75 a8 49 8b 45 20 <48> 8b b0 90 07 00 00 48 8b 56 10 8b 42 10 48 8d 7a 28 85 c0 0f [ 18.436192] RIP: send_mode_select+0xca/0x560 RSP: ffffc90002bdbda8 [ 18.436192] CR2: 0000000000000790 [ 18.436198] ---[ end trace 40f3e4dca1ffabdd ]--- [ 18.436199] Kernel panic - not syncing: Fatal exception [ 18.436222] Kernel Offset: disabled [-- MARK -- Thu May 18 11:45:00 2017] Fixes: 327825574132 scsi_dh_rdac: switch to scsi_execute_req_flags() Cc: stable@vger.kernel.org Signed-off-by: Artem Savkov <asavkov@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2017-05-20 15:58:10 +08:00
if (ctlr->use_ms10) {
struct rdac_pg_expanded *rdac_pg;
data_size = sizeof(struct rdac_pg_expanded);
scsi: scsi_dh_rdac: Use ctlr directly in rdac_failover_get() rdac_failover_get references struct rdac_controller as ctlr->ms_sdev->handler_data->ctlr for no apparent reason. Besides being inefficient this also introduces a null-pointer dereference as send_mode_select() sets ctlr->ms_sdev to NULL before calling rdac_failover_get(): [ 18.432550] device-mapper: multipath service-time: version 0.3.0 loaded [ 18.436124] BUG: unable to handle kernel NULL pointer dereference at 0000000000000790 [ 18.436129] IP: send_mode_select+0xca/0x560 [ 18.436129] PGD 0 [ 18.436130] P4D 0 [ 18.436130] [ 18.436132] Oops: 0000 [#1] SMP [ 18.436133] Modules linked in: dm_service_time sd_mod dm_multipath amdkfd amd_iommu_v2 radeon(+) i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm qla2xxx drm serio_raw scsi_transport_fc bnx2 i2c_core dm_mirror dm_region_hash dm_log dm_mod [ 18.436143] CPU: 4 PID: 443 Comm: kworker/u16:2 Not tainted 4.12.0-rc1.1.el7.test.x86_64 #1 [ 18.436144] Hardware name: IBM BladeCenter LS22 -[79013SG]-/Server Blade, BIOS -[L8E164AUS-1.07]- 05/25/2011 [ 18.436145] Workqueue: kmpath_rdacd send_mode_select [ 18.436146] task: ffff880225116a40 task.stack: ffffc90002bd8000 [ 18.436148] RIP: 0010:send_mode_select+0xca/0x560 [ 18.436148] RSP: 0018:ffffc90002bdbda8 EFLAGS: 00010246 [ 18.436149] RAX: 0000000000000000 RBX: ffffc90002bdbe08 RCX: ffff88017ef04a80 [ 18.436150] RDX: ffffc90002bdbe08 RSI: ffff88017ef04a80 RDI: ffff8802248e4388 [ 18.436151] RBP: ffffc90002bdbe48 R08: 0000000000000000 R09: ffffffff81c104c0 [ 18.436151] R10: 00000000000001ff R11: 000000000000035a R12: ffffc90002bdbdd8 [ 18.436152] R13: ffff8802248e4390 R14: ffff880225152800 R15: ffff8802248e4400 [ 18.436153] FS: 0000000000000000(0000) GS:ffff880227d00000(0000) knlGS:0000000000000000 [ 18.436154] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 18.436154] CR2: 0000000000000790 CR3: 000000042535b000 CR4: 00000000000006e0 [ 18.436155] Call Trace: [ 18.436159] ? rdac_activate+0x14e/0x150 [ 18.436161] ? refcount_dec_and_test+0x11/0x20 [ 18.436162] ? kobject_put+0x1c/0x50 [ 18.436165] ? scsi_dh_activate+0x6f/0xd0 [ 18.436168] process_one_work+0x149/0x360 [ 18.436170] worker_thread+0x4d/0x3c0 [ 18.436172] kthread+0x109/0x140 [ 18.436173] ? rescuer_thread+0x380/0x380 [ 18.436174] ? kthread_park+0x60/0x60 [ 18.436176] ret_from_fork+0x2c/0x40 [ 18.436177] Code: 49 c7 46 20 00 00 00 00 4c 89 ef c6 07 00 0f 1f 40 00 45 31 ed c7 45 b0 05 00 00 00 44 89 6d b4 4d 89 f5 4c 8b 75 a8 49 8b 45 20 <48> 8b b0 90 07 00 00 48 8b 56 10 8b 42 10 48 8d 7a 28 85 c0 0f [ 18.436192] RIP: send_mode_select+0xca/0x560 RSP: ffffc90002bdbda8 [ 18.436192] CR2: 0000000000000790 [ 18.436198] ---[ end trace 40f3e4dca1ffabdd ]--- [ 18.436199] Kernel panic - not syncing: Fatal exception [ 18.436222] Kernel Offset: disabled [-- MARK -- Thu May 18 11:45:00 2017] Fixes: 327825574132 scsi_dh_rdac: switch to scsi_execute_req_flags() Cc: stable@vger.kernel.org Signed-off-by: Artem Savkov <asavkov@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2017-05-20 15:58:10 +08:00
rdac_pg = &ctlr->mode_select.expanded;
memset(rdac_pg, 0, data_size);
common = &rdac_pg->common;
rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER + 0x40;
rdac_pg->subpage_code = 0x1;
rdac_pg->page_len[0] = 0x01;
rdac_pg->page_len[1] = 0x28;
lun_table = rdac_pg->lun_table;
} else {
struct rdac_pg_legacy *rdac_pg;
data_size = sizeof(struct rdac_pg_legacy);
scsi: scsi_dh_rdac: Use ctlr directly in rdac_failover_get() rdac_failover_get references struct rdac_controller as ctlr->ms_sdev->handler_data->ctlr for no apparent reason. Besides being inefficient this also introduces a null-pointer dereference as send_mode_select() sets ctlr->ms_sdev to NULL before calling rdac_failover_get(): [ 18.432550] device-mapper: multipath service-time: version 0.3.0 loaded [ 18.436124] BUG: unable to handle kernel NULL pointer dereference at 0000000000000790 [ 18.436129] IP: send_mode_select+0xca/0x560 [ 18.436129] PGD 0 [ 18.436130] P4D 0 [ 18.436130] [ 18.436132] Oops: 0000 [#1] SMP [ 18.436133] Modules linked in: dm_service_time sd_mod dm_multipath amdkfd amd_iommu_v2 radeon(+) i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm qla2xxx drm serio_raw scsi_transport_fc bnx2 i2c_core dm_mirror dm_region_hash dm_log dm_mod [ 18.436143] CPU: 4 PID: 443 Comm: kworker/u16:2 Not tainted 4.12.0-rc1.1.el7.test.x86_64 #1 [ 18.436144] Hardware name: IBM BladeCenter LS22 -[79013SG]-/Server Blade, BIOS -[L8E164AUS-1.07]- 05/25/2011 [ 18.436145] Workqueue: kmpath_rdacd send_mode_select [ 18.436146] task: ffff880225116a40 task.stack: ffffc90002bd8000 [ 18.436148] RIP: 0010:send_mode_select+0xca/0x560 [ 18.436148] RSP: 0018:ffffc90002bdbda8 EFLAGS: 00010246 [ 18.436149] RAX: 0000000000000000 RBX: ffffc90002bdbe08 RCX: ffff88017ef04a80 [ 18.436150] RDX: ffffc90002bdbe08 RSI: ffff88017ef04a80 RDI: ffff8802248e4388 [ 18.436151] RBP: ffffc90002bdbe48 R08: 0000000000000000 R09: ffffffff81c104c0 [ 18.436151] R10: 00000000000001ff R11: 000000000000035a R12: ffffc90002bdbdd8 [ 18.436152] R13: ffff8802248e4390 R14: ffff880225152800 R15: ffff8802248e4400 [ 18.436153] FS: 0000000000000000(0000) GS:ffff880227d00000(0000) knlGS:0000000000000000 [ 18.436154] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 18.436154] CR2: 0000000000000790 CR3: 000000042535b000 CR4: 00000000000006e0 [ 18.436155] Call Trace: [ 18.436159] ? rdac_activate+0x14e/0x150 [ 18.436161] ? refcount_dec_and_test+0x11/0x20 [ 18.436162] ? kobject_put+0x1c/0x50 [ 18.436165] ? scsi_dh_activate+0x6f/0xd0 [ 18.436168] process_one_work+0x149/0x360 [ 18.436170] worker_thread+0x4d/0x3c0 [ 18.436172] kthread+0x109/0x140 [ 18.436173] ? rescuer_thread+0x380/0x380 [ 18.436174] ? kthread_park+0x60/0x60 [ 18.436176] ret_from_fork+0x2c/0x40 [ 18.436177] Code: 49 c7 46 20 00 00 00 00 4c 89 ef c6 07 00 0f 1f 40 00 45 31 ed c7 45 b0 05 00 00 00 44 89 6d b4 4d 89 f5 4c 8b 75 a8 49 8b 45 20 <48> 8b b0 90 07 00 00 48 8b 56 10 8b 42 10 48 8d 7a 28 85 c0 0f [ 18.436192] RIP: send_mode_select+0xca/0x560 RSP: ffffc90002bdbda8 [ 18.436192] CR2: 0000000000000790 [ 18.436198] ---[ end trace 40f3e4dca1ffabdd ]--- [ 18.436199] Kernel panic - not syncing: Fatal exception [ 18.436222] Kernel Offset: disabled [-- MARK -- Thu May 18 11:45:00 2017] Fixes: 327825574132 scsi_dh_rdac: switch to scsi_execute_req_flags() Cc: stable@vger.kernel.org Signed-off-by: Artem Savkov <asavkov@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2017-05-20 15:58:10 +08:00
rdac_pg = &ctlr->mode_select.legacy;
memset(rdac_pg, 0, data_size);
common = &rdac_pg->common;
rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER;
rdac_pg->page_len = 0x68;
lun_table = rdac_pg->lun_table;
}
common->rdac_mode[1] = RDAC_MODE_TRANSFER_SPECIFIED_LUNS;
common->quiescence_timeout = RDAC_QUIESCENCE_TIME;
common->rdac_options = RDAC_FORCED_QUIESENCE;
list_for_each_entry(qdata, list, entry) {
lun_table[qdata->h->lun] = 0x81;
}
/* Prepare the command. */
scsi: scsi_dh_rdac: Use ctlr directly in rdac_failover_get() rdac_failover_get references struct rdac_controller as ctlr->ms_sdev->handler_data->ctlr for no apparent reason. Besides being inefficient this also introduces a null-pointer dereference as send_mode_select() sets ctlr->ms_sdev to NULL before calling rdac_failover_get(): [ 18.432550] device-mapper: multipath service-time: version 0.3.0 loaded [ 18.436124] BUG: unable to handle kernel NULL pointer dereference at 0000000000000790 [ 18.436129] IP: send_mode_select+0xca/0x560 [ 18.436129] PGD 0 [ 18.436130] P4D 0 [ 18.436130] [ 18.436132] Oops: 0000 [#1] SMP [ 18.436133] Modules linked in: dm_service_time sd_mod dm_multipath amdkfd amd_iommu_v2 radeon(+) i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm qla2xxx drm serio_raw scsi_transport_fc bnx2 i2c_core dm_mirror dm_region_hash dm_log dm_mod [ 18.436143] CPU: 4 PID: 443 Comm: kworker/u16:2 Not tainted 4.12.0-rc1.1.el7.test.x86_64 #1 [ 18.436144] Hardware name: IBM BladeCenter LS22 -[79013SG]-/Server Blade, BIOS -[L8E164AUS-1.07]- 05/25/2011 [ 18.436145] Workqueue: kmpath_rdacd send_mode_select [ 18.436146] task: ffff880225116a40 task.stack: ffffc90002bd8000 [ 18.436148] RIP: 0010:send_mode_select+0xca/0x560 [ 18.436148] RSP: 0018:ffffc90002bdbda8 EFLAGS: 00010246 [ 18.436149] RAX: 0000000000000000 RBX: ffffc90002bdbe08 RCX: ffff88017ef04a80 [ 18.436150] RDX: ffffc90002bdbe08 RSI: ffff88017ef04a80 RDI: ffff8802248e4388 [ 18.436151] RBP: ffffc90002bdbe48 R08: 0000000000000000 R09: ffffffff81c104c0 [ 18.436151] R10: 00000000000001ff R11: 000000000000035a R12: ffffc90002bdbdd8 [ 18.436152] R13: ffff8802248e4390 R14: ffff880225152800 R15: ffff8802248e4400 [ 18.436153] FS: 0000000000000000(0000) GS:ffff880227d00000(0000) knlGS:0000000000000000 [ 18.436154] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 18.436154] CR2: 0000000000000790 CR3: 000000042535b000 CR4: 00000000000006e0 [ 18.436155] Call Trace: [ 18.436159] ? rdac_activate+0x14e/0x150 [ 18.436161] ? refcount_dec_and_test+0x11/0x20 [ 18.436162] ? kobject_put+0x1c/0x50 [ 18.436165] ? scsi_dh_activate+0x6f/0xd0 [ 18.436168] process_one_work+0x149/0x360 [ 18.436170] worker_thread+0x4d/0x3c0 [ 18.436172] kthread+0x109/0x140 [ 18.436173] ? rescuer_thread+0x380/0x380 [ 18.436174] ? kthread_park+0x60/0x60 [ 18.436176] ret_from_fork+0x2c/0x40 [ 18.436177] Code: 49 c7 46 20 00 00 00 00 4c 89 ef c6 07 00 0f 1f 40 00 45 31 ed c7 45 b0 05 00 00 00 44 89 6d b4 4d 89 f5 4c 8b 75 a8 49 8b 45 20 <48> 8b b0 90 07 00 00 48 8b 56 10 8b 42 10 48 8d 7a 28 85 c0 0f [ 18.436192] RIP: send_mode_select+0xca/0x560 RSP: ffffc90002bdbda8 [ 18.436192] CR2: 0000000000000790 [ 18.436198] ---[ end trace 40f3e4dca1ffabdd ]--- [ 18.436199] Kernel panic - not syncing: Fatal exception [ 18.436222] Kernel Offset: disabled [-- MARK -- Thu May 18 11:45:00 2017] Fixes: 327825574132 scsi_dh_rdac: switch to scsi_execute_req_flags() Cc: stable@vger.kernel.org Signed-off-by: Artem Savkov <asavkov@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2017-05-20 15:58:10 +08:00
if (ctlr->use_ms10) {
cdb[0] = MODE_SELECT_10;
cdb[7] = data_size >> 8;
cdb[8] = data_size & 0xff;
} else {
cdb[0] = MODE_SELECT;
cdb[4] = data_size;
}
return data_size;
}
static void release_controller(struct kref *kref)
{
struct rdac_controller *ctlr;
ctlr = container_of(kref, struct rdac_controller, kref);
list_del(&ctlr->node);
kfree(ctlr);
}
static struct rdac_controller *get_controller(int index, char *array_name,
u8 *array_id, struct scsi_device *sdev)
{
struct rdac_controller *ctlr, *tmp;
list_for_each_entry(tmp, &ctlr_list, node) {
if ((memcmp(tmp->array_id, array_id, UNIQUE_ID_LEN) == 0) &&
(tmp->index == index) &&
(tmp->host == sdev->host)) {
kref_get(&tmp->kref);
return tmp;
}
}
ctlr = kmalloc(sizeof(*ctlr), GFP_ATOMIC);
if (!ctlr)
[SCSI] scsi_dh_rdac: Fix for unbalanced reference count This patch fixes an unbalanced refcount issue. Elevating the lock for both kref_put and also for controller node deletion. Previously, controller deletion was protected but the not the kref_put. This was causing the other thread to pick up the controller structure which was already kref'd zero. This was causing the following WARN_ON and also sometimes panic. WARNING: at lib/kref.c:43 kref_get+0x2d/0x30() (Not tainted) Hardware name: IBM System x3655 -[7985AC1]- Modules linked in: fuse scsi_dh_rdac autofs4 nfs lockd fscache nfs_acl auth_rpcgss sunrpc 8021q garp stp llc ipv6 ib_srp(U) scsi_transport_srp scsi_tgt ib_cm(U) ib_sa(U) ib_uverbs(U) ib_umad(U) mlx4_ib(U) mlx4_core(U) ib_mthca(U) ib_mad(U) ib_core(U) dm_mirror dm_region_hash dm_log dm_round_robin dm_multipath uinput bnx2 ses enclosure sg ibmpex ibmaem ipmi_msghandler serio_raw k8temp hwmon amd64_edac_mod edac_core edac_mce_amd shpchp i2c_piix4 ext4 mbcache jbd2 sr_mod cdrom sd_mod crc_t10dif sata_svw pata_acpi ata_generic pata_serverworks aacraid radeon ttm drm_kms_helper drm i2c_algo_bit i2c_core dm_mod [last unloaded: freq_table] Pid: 13735, comm: srp_daemon Not tainted 2.6.32-71.el6.x86_64 #1 Call Trace: [<ffffffff8106b857>] warn_slowpath_common+0x87/0xc0 [<ffffffff8106b8aa>] warn_slowpath_null+0x1a/0x20 [<ffffffff8125c39d>] kref_get+0x2d/0x30 [<ffffffffa01b4029>] rdac_bus_attach+0x459/0x580 [scsi_dh_rdac] [<ffffffff8135232a>] scsi_dh_handler_attach+0x2a/0x80 [<ffffffff81352c7b>] scsi_dh_notifier+0x9b/0xa0 [<ffffffff814cd7a5>] notifier_call_chain+0x55/0x80 [<ffffffff8109711a>] __blocking_notifier_call_chain+0x5a/0x80 [<ffffffff81097156>] blocking_notifier_call_chain+0x16/0x20 [<ffffffff8132bec5>] device_add+0x515/0x640 [<ffffffff813329e4>] ? attribute_container_device_trigger+0xc4/0xe0 [<ffffffff8134f659>] scsi_sysfs_add_sdev+0x89/0x2c0 [<ffffffff8134d096>] scsi_probe_and_add_lun+0xea6/0xed0 [<ffffffff8134beb2>] ? scsi_alloc_target+0x292/0x2d0 [<ffffffff8134d1e1>] __scsi_scan_target+0x121/0x750 [<ffffffff811df806>] ? sysfs_create_file+0x26/0x30 [<ffffffff8132b759>] ? device_create_file+0x19/0x20 [<ffffffff81332838>] ? attribute_container_add_attrs+0x78/0x90 [<ffffffff814b008c>] ? klist_next+0x4c/0xf0 [<ffffffff81332e30>] ? transport_configure+0x0/0x20 [<ffffffff813329e4>] ? attribute_container_device_trigger+0xc4/0xe0 [<ffffffff8134df40>] scsi_scan_target+0xd0/0xe0 [<ffffffffa02f053a>] srp_create_target+0x75a/0x890 [ib_srp] [<ffffffff8132a130>] dev_attr_store+0x20/0x30 [<ffffffff811df145>] sysfs_write_file+0xe5/0x170 [<ffffffff8116c818>] vfs_write+0xb8/0x1a0 [<ffffffff810d40a2>] ? audit_syscall_entry+0x272/0x2a0 [<ffffffff8116d251>] sys_write+0x51/0x90 [<ffffffff81013172>] system_call_fastpath+0x16/0x1b Signed-off-by: Babu Moger <babu.moger@netapp.com> Acked-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2012-02-02 23:21:54 +08:00
return NULL;
/* initialize fields of controller */
memcpy(ctlr->array_id, array_id, UNIQUE_ID_LEN);
ctlr->index = index;
ctlr->host = sdev->host;
memcpy(ctlr->array_name, array_name, ARRAY_LABEL_LEN);
kref_init(&ctlr->kref);
ctlr->use_ms10 = -1;
ctlr->ms_queued = 0;
ctlr->ms_sdev = NULL;
spin_lock_init(&ctlr->ms_lock);
INIT_WORK(&ctlr->ms_work, send_mode_select);
INIT_LIST_HEAD(&ctlr->ms_head);
list_add(&ctlr->node, &ctlr_list);
INIT_LIST_HEAD(&ctlr->dh_list);
[SCSI] scsi_dh_rdac: Fix for unbalanced reference count This patch fixes an unbalanced refcount issue. Elevating the lock for both kref_put and also for controller node deletion. Previously, controller deletion was protected but the not the kref_put. This was causing the other thread to pick up the controller structure which was already kref'd zero. This was causing the following WARN_ON and also sometimes panic. WARNING: at lib/kref.c:43 kref_get+0x2d/0x30() (Not tainted) Hardware name: IBM System x3655 -[7985AC1]- Modules linked in: fuse scsi_dh_rdac autofs4 nfs lockd fscache nfs_acl auth_rpcgss sunrpc 8021q garp stp llc ipv6 ib_srp(U) scsi_transport_srp scsi_tgt ib_cm(U) ib_sa(U) ib_uverbs(U) ib_umad(U) mlx4_ib(U) mlx4_core(U) ib_mthca(U) ib_mad(U) ib_core(U) dm_mirror dm_region_hash dm_log dm_round_robin dm_multipath uinput bnx2 ses enclosure sg ibmpex ibmaem ipmi_msghandler serio_raw k8temp hwmon amd64_edac_mod edac_core edac_mce_amd shpchp i2c_piix4 ext4 mbcache jbd2 sr_mod cdrom sd_mod crc_t10dif sata_svw pata_acpi ata_generic pata_serverworks aacraid radeon ttm drm_kms_helper drm i2c_algo_bit i2c_core dm_mod [last unloaded: freq_table] Pid: 13735, comm: srp_daemon Not tainted 2.6.32-71.el6.x86_64 #1 Call Trace: [<ffffffff8106b857>] warn_slowpath_common+0x87/0xc0 [<ffffffff8106b8aa>] warn_slowpath_null+0x1a/0x20 [<ffffffff8125c39d>] kref_get+0x2d/0x30 [<ffffffffa01b4029>] rdac_bus_attach+0x459/0x580 [scsi_dh_rdac] [<ffffffff8135232a>] scsi_dh_handler_attach+0x2a/0x80 [<ffffffff81352c7b>] scsi_dh_notifier+0x9b/0xa0 [<ffffffff814cd7a5>] notifier_call_chain+0x55/0x80 [<ffffffff8109711a>] __blocking_notifier_call_chain+0x5a/0x80 [<ffffffff81097156>] blocking_notifier_call_chain+0x16/0x20 [<ffffffff8132bec5>] device_add+0x515/0x640 [<ffffffff813329e4>] ? attribute_container_device_trigger+0xc4/0xe0 [<ffffffff8134f659>] scsi_sysfs_add_sdev+0x89/0x2c0 [<ffffffff8134d096>] scsi_probe_and_add_lun+0xea6/0xed0 [<ffffffff8134beb2>] ? scsi_alloc_target+0x292/0x2d0 [<ffffffff8134d1e1>] __scsi_scan_target+0x121/0x750 [<ffffffff811df806>] ? sysfs_create_file+0x26/0x30 [<ffffffff8132b759>] ? device_create_file+0x19/0x20 [<ffffffff81332838>] ? attribute_container_add_attrs+0x78/0x90 [<ffffffff814b008c>] ? klist_next+0x4c/0xf0 [<ffffffff81332e30>] ? transport_configure+0x0/0x20 [<ffffffff813329e4>] ? attribute_container_device_trigger+0xc4/0xe0 [<ffffffff8134df40>] scsi_scan_target+0xd0/0xe0 [<ffffffffa02f053a>] srp_create_target+0x75a/0x890 [ib_srp] [<ffffffff8132a130>] dev_attr_store+0x20/0x30 [<ffffffff811df145>] sysfs_write_file+0xe5/0x170 [<ffffffff8116c818>] vfs_write+0xb8/0x1a0 [<ffffffff810d40a2>] ? audit_syscall_entry+0x272/0x2a0 [<ffffffff8116d251>] sys_write+0x51/0x90 [<ffffffff81013172>] system_call_fastpath+0x16/0x1b Signed-off-by: Babu Moger <babu.moger@netapp.com> Acked-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2012-02-02 23:21:54 +08:00
return ctlr;
}
static int get_lun_info(struct scsi_device *sdev, struct rdac_dh_data *h,
char *array_name, u8 *array_id)
{
int err = SCSI_DH_IO, i;
struct c8_inquiry *inqp = &h->inq.c8;
if (!scsi_get_vpd_page(sdev, 0xC8, (unsigned char *)inqp,
sizeof(struct c8_inquiry))) {
if (inqp->page_code != 0xc8)
return SCSI_DH_NOSYS;
if (inqp->page_id[0] != 'e' || inqp->page_id[1] != 'd' ||
inqp->page_id[2] != 'i' || inqp->page_id[3] != 'd')
return SCSI_DH_NOSYS;
h->lun = inqp->lun[7]; /* Uses only the last byte */
for(i=0; i<ARRAY_LABEL_LEN-1; ++i)
*(array_name+i) = inqp->array_user_label[(2*i)+1];
*(array_name+ARRAY_LABEL_LEN-1) = '\0';
memset(array_id, 0, UNIQUE_ID_LEN);
memcpy(array_id, inqp->array_unique_id, inqp->array_uniq_id_len);
err = SCSI_DH_OK;
}
return err;
}
static int check_ownership(struct scsi_device *sdev, struct rdac_dh_data *h)
{
int err = SCSI_DH_IO, access_state;
struct rdac_dh_data *tmp;
struct c9_inquiry *inqp = &h->inq.c9;
h->state = RDAC_STATE_ACTIVE;
if (!scsi_get_vpd_page(sdev, 0xC9, (unsigned char *)inqp,
sizeof(struct c9_inquiry))) {
/* detect the operating mode */
if ((inqp->avte_cvp >> 5) & 0x1)
h->mode = RDAC_MODE_IOSHIP; /* LUN in IOSHIP mode */
else if (inqp->avte_cvp >> 7)
h->mode = RDAC_MODE_AVT; /* LUN in AVT mode */
else
h->mode = RDAC_MODE; /* LUN in RDAC mode */
/* Update ownership */
if (inqp->avte_cvp & 0x1) {
h->lun_state = RDAC_LUN_OWNED;
access_state = SCSI_ACCESS_STATE_OPTIMAL;
} else {
h->lun_state = RDAC_LUN_UNOWNED;
if (h->mode == RDAC_MODE) {
h->state = RDAC_STATE_PASSIVE;
access_state = SCSI_ACCESS_STATE_STANDBY;
} else
access_state = SCSI_ACCESS_STATE_ACTIVE;
}
/* Update path prio*/
if (inqp->path_prio & 0x1) {
h->preferred = RDAC_PREFERRED;
access_state |= SCSI_ACCESS_STATE_PREFERRED;
} else
h->preferred = RDAC_NON_PREFERRED;
rcu_read_lock();
list_for_each_entry_rcu(tmp, &h->ctlr->dh_list, node) {
/* h->sdev should always be valid */
BUG_ON(!tmp->sdev);
tmp->sdev->access_state = access_state;
}
rcu_read_unlock();
err = SCSI_DH_OK;
}
return err;
}
static int initialize_controller(struct scsi_device *sdev,
struct rdac_dh_data *h, char *array_name, u8 *array_id)
{
int err = SCSI_DH_IO, index;
struct c4_inquiry *inqp = &h->inq.c4;
if (!scsi_get_vpd_page(sdev, 0xC4, (unsigned char *)inqp,
sizeof(struct c4_inquiry))) {
/* get the controller index */
if (inqp->slot_id[1] == 0x31)
index = 0;
else
index = 1;
[SCSI] scsi_dh_rdac: Fix for unbalanced reference count This patch fixes an unbalanced refcount issue. Elevating the lock for both kref_put and also for controller node deletion. Previously, controller deletion was protected but the not the kref_put. This was causing the other thread to pick up the controller structure which was already kref'd zero. This was causing the following WARN_ON and also sometimes panic. WARNING: at lib/kref.c:43 kref_get+0x2d/0x30() (Not tainted) Hardware name: IBM System x3655 -[7985AC1]- Modules linked in: fuse scsi_dh_rdac autofs4 nfs lockd fscache nfs_acl auth_rpcgss sunrpc 8021q garp stp llc ipv6 ib_srp(U) scsi_transport_srp scsi_tgt ib_cm(U) ib_sa(U) ib_uverbs(U) ib_umad(U) mlx4_ib(U) mlx4_core(U) ib_mthca(U) ib_mad(U) ib_core(U) dm_mirror dm_region_hash dm_log dm_round_robin dm_multipath uinput bnx2 ses enclosure sg ibmpex ibmaem ipmi_msghandler serio_raw k8temp hwmon amd64_edac_mod edac_core edac_mce_amd shpchp i2c_piix4 ext4 mbcache jbd2 sr_mod cdrom sd_mod crc_t10dif sata_svw pata_acpi ata_generic pata_serverworks aacraid radeon ttm drm_kms_helper drm i2c_algo_bit i2c_core dm_mod [last unloaded: freq_table] Pid: 13735, comm: srp_daemon Not tainted 2.6.32-71.el6.x86_64 #1 Call Trace: [<ffffffff8106b857>] warn_slowpath_common+0x87/0xc0 [<ffffffff8106b8aa>] warn_slowpath_null+0x1a/0x20 [<ffffffff8125c39d>] kref_get+0x2d/0x30 [<ffffffffa01b4029>] rdac_bus_attach+0x459/0x580 [scsi_dh_rdac] [<ffffffff8135232a>] scsi_dh_handler_attach+0x2a/0x80 [<ffffffff81352c7b>] scsi_dh_notifier+0x9b/0xa0 [<ffffffff814cd7a5>] notifier_call_chain+0x55/0x80 [<ffffffff8109711a>] __blocking_notifier_call_chain+0x5a/0x80 [<ffffffff81097156>] blocking_notifier_call_chain+0x16/0x20 [<ffffffff8132bec5>] device_add+0x515/0x640 [<ffffffff813329e4>] ? attribute_container_device_trigger+0xc4/0xe0 [<ffffffff8134f659>] scsi_sysfs_add_sdev+0x89/0x2c0 [<ffffffff8134d096>] scsi_probe_and_add_lun+0xea6/0xed0 [<ffffffff8134beb2>] ? scsi_alloc_target+0x292/0x2d0 [<ffffffff8134d1e1>] __scsi_scan_target+0x121/0x750 [<ffffffff811df806>] ? sysfs_create_file+0x26/0x30 [<ffffffff8132b759>] ? device_create_file+0x19/0x20 [<ffffffff81332838>] ? attribute_container_add_attrs+0x78/0x90 [<ffffffff814b008c>] ? klist_next+0x4c/0xf0 [<ffffffff81332e30>] ? transport_configure+0x0/0x20 [<ffffffff813329e4>] ? attribute_container_device_trigger+0xc4/0xe0 [<ffffffff8134df40>] scsi_scan_target+0xd0/0xe0 [<ffffffffa02f053a>] srp_create_target+0x75a/0x890 [ib_srp] [<ffffffff8132a130>] dev_attr_store+0x20/0x30 [<ffffffff811df145>] sysfs_write_file+0xe5/0x170 [<ffffffff8116c818>] vfs_write+0xb8/0x1a0 [<ffffffff810d40a2>] ? audit_syscall_entry+0x272/0x2a0 [<ffffffff8116d251>] sys_write+0x51/0x90 [<ffffffff81013172>] system_call_fastpath+0x16/0x1b Signed-off-by: Babu Moger <babu.moger@netapp.com> Acked-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2012-02-02 23:21:54 +08:00
spin_lock(&list_lock);
h->ctlr = get_controller(index, array_name, array_id, sdev);
if (!h->ctlr)
err = SCSI_DH_RES_TEMP_UNAVAIL;
else {
list_add_rcu(&h->node, &h->ctlr->dh_list);
h->sdev = sdev;
}
[SCSI] scsi_dh_rdac: Fix for unbalanced reference count This patch fixes an unbalanced refcount issue. Elevating the lock for both kref_put and also for controller node deletion. Previously, controller deletion was protected but the not the kref_put. This was causing the other thread to pick up the controller structure which was already kref'd zero. This was causing the following WARN_ON and also sometimes panic. WARNING: at lib/kref.c:43 kref_get+0x2d/0x30() (Not tainted) Hardware name: IBM System x3655 -[7985AC1]- Modules linked in: fuse scsi_dh_rdac autofs4 nfs lockd fscache nfs_acl auth_rpcgss sunrpc 8021q garp stp llc ipv6 ib_srp(U) scsi_transport_srp scsi_tgt ib_cm(U) ib_sa(U) ib_uverbs(U) ib_umad(U) mlx4_ib(U) mlx4_core(U) ib_mthca(U) ib_mad(U) ib_core(U) dm_mirror dm_region_hash dm_log dm_round_robin dm_multipath uinput bnx2 ses enclosure sg ibmpex ibmaem ipmi_msghandler serio_raw k8temp hwmon amd64_edac_mod edac_core edac_mce_amd shpchp i2c_piix4 ext4 mbcache jbd2 sr_mod cdrom sd_mod crc_t10dif sata_svw pata_acpi ata_generic pata_serverworks aacraid radeon ttm drm_kms_helper drm i2c_algo_bit i2c_core dm_mod [last unloaded: freq_table] Pid: 13735, comm: srp_daemon Not tainted 2.6.32-71.el6.x86_64 #1 Call Trace: [<ffffffff8106b857>] warn_slowpath_common+0x87/0xc0 [<ffffffff8106b8aa>] warn_slowpath_null+0x1a/0x20 [<ffffffff8125c39d>] kref_get+0x2d/0x30 [<ffffffffa01b4029>] rdac_bus_attach+0x459/0x580 [scsi_dh_rdac] [<ffffffff8135232a>] scsi_dh_handler_attach+0x2a/0x80 [<ffffffff81352c7b>] scsi_dh_notifier+0x9b/0xa0 [<ffffffff814cd7a5>] notifier_call_chain+0x55/0x80 [<ffffffff8109711a>] __blocking_notifier_call_chain+0x5a/0x80 [<ffffffff81097156>] blocking_notifier_call_chain+0x16/0x20 [<ffffffff8132bec5>] device_add+0x515/0x640 [<ffffffff813329e4>] ? attribute_container_device_trigger+0xc4/0xe0 [<ffffffff8134f659>] scsi_sysfs_add_sdev+0x89/0x2c0 [<ffffffff8134d096>] scsi_probe_and_add_lun+0xea6/0xed0 [<ffffffff8134beb2>] ? scsi_alloc_target+0x292/0x2d0 [<ffffffff8134d1e1>] __scsi_scan_target+0x121/0x750 [<ffffffff811df806>] ? sysfs_create_file+0x26/0x30 [<ffffffff8132b759>] ? device_create_file+0x19/0x20 [<ffffffff81332838>] ? attribute_container_add_attrs+0x78/0x90 [<ffffffff814b008c>] ? klist_next+0x4c/0xf0 [<ffffffff81332e30>] ? transport_configure+0x0/0x20 [<ffffffff813329e4>] ? attribute_container_device_trigger+0xc4/0xe0 [<ffffffff8134df40>] scsi_scan_target+0xd0/0xe0 [<ffffffffa02f053a>] srp_create_target+0x75a/0x890 [ib_srp] [<ffffffff8132a130>] dev_attr_store+0x20/0x30 [<ffffffff811df145>] sysfs_write_file+0xe5/0x170 [<ffffffff8116c818>] vfs_write+0xb8/0x1a0 [<ffffffff810d40a2>] ? audit_syscall_entry+0x272/0x2a0 [<ffffffff8116d251>] sys_write+0x51/0x90 [<ffffffff81013172>] system_call_fastpath+0x16/0x1b Signed-off-by: Babu Moger <babu.moger@netapp.com> Acked-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2012-02-02 23:21:54 +08:00
spin_unlock(&list_lock);
err = SCSI_DH_OK;
}
return err;
}
static int set_mode_select(struct scsi_device *sdev, struct rdac_dh_data *h)
{
int err = SCSI_DH_IO;
struct c2_inquiry *inqp = &h->inq.c2;
if (!scsi_get_vpd_page(sdev, 0xC2, (unsigned char *)inqp,
sizeof(struct c2_inquiry))) {
/*
* If more than MODE6_MAX_LUN luns are supported, use
* mode select 10
*/
if (inqp->max_lun_supported >= MODE6_MAX_LUN)
h->ctlr->use_ms10 = 1;
else
h->ctlr->use_ms10 = 0;
err = SCSI_DH_OK;
}
return err;
}
static int mode_select_handle_sense(struct scsi_device *sdev,
struct scsi_sense_hdr *sense_hdr)
{
int err = SCSI_DH_IO;
struct rdac_dh_data *h = sdev->handler_data;
if (!scsi_sense_valid(sense_hdr))
goto done;
switch (sense_hdr->sense_key) {
case NO_SENSE:
case ABORTED_COMMAND:
case UNIT_ATTENTION:
err = SCSI_DH_RETRY;
break;
case NOT_READY:
if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x01)
/* LUN Not Ready and is in the Process of Becoming
* Ready
*/
err = SCSI_DH_RETRY;
break;
case ILLEGAL_REQUEST:
if (sense_hdr->asc == 0x91 && sense_hdr->ascq == 0x36)
/*
* Command Lock contention
*/
err = SCSI_DH_IMM_RETRY;
break;
default:
break;
}
RDAC_LOG(RDAC_LOG_FAILOVER, sdev, "array %s, ctlr %d, "
"MODE_SELECT returned with sense %02x/%02x/%02x",
(char *) h->ctlr->array_name, h->ctlr->index,
sense_hdr->sense_key, sense_hdr->asc, sense_hdr->ascq);
done:
return err;
}
static void send_mode_select(struct work_struct *work)
{
struct rdac_controller *ctlr =
container_of(work, struct rdac_controller, ms_work);
struct scsi_device *sdev = ctlr->ms_sdev;
struct rdac_dh_data *h = sdev->handler_data;
int err = SCSI_DH_OK, retry_cnt = RDAC_RETRY_COUNT;
struct rdac_queue_data *tmp, *qdata;
LIST_HEAD(list);
unsigned char cdb[COMMAND_SIZE(MODE_SELECT_10)];
struct scsi_sense_hdr sshdr;
unsigned int data_size;
u64 req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
REQ_FAILFAST_DRIVER;
spin_lock(&ctlr->ms_lock);
list_splice_init(&ctlr->ms_head, &list);
ctlr->ms_queued = 0;
ctlr->ms_sdev = NULL;
spin_unlock(&ctlr->ms_lock);
retry:
data_size = rdac_failover_get(ctlr, &list, cdb);
RDAC_LOG(RDAC_LOG_FAILOVER, sdev, "array %s, ctlr %d, "
"%s MODE_SELECT command",
(char *) h->ctlr->array_name, h->ctlr->index,
(retry_cnt == RDAC_RETRY_COUNT) ? "queueing" : "retrying");
if (scsi_execute(sdev, cdb, DMA_TO_DEVICE, &h->ctlr->mode_select,
data_size, NULL, &sshdr, RDAC_TIMEOUT * HZ,
RDAC_RETRIES, req_flags, 0, NULL)) {
err = mode_select_handle_sense(sdev, &sshdr);
if (err == SCSI_DH_RETRY && retry_cnt--)
goto retry;
if (err == SCSI_DH_IMM_RETRY)
goto retry;
}
if (err == SCSI_DH_OK) {
h->state = RDAC_STATE_ACTIVE;
RDAC_LOG(RDAC_LOG_FAILOVER, sdev, "array %s, ctlr %d, "
"MODE_SELECT completed",
(char *) h->ctlr->array_name, h->ctlr->index);
}
list_for_each_entry_safe(qdata, tmp, &list, entry) {
list_del(&qdata->entry);
if (err == SCSI_DH_OK)
qdata->h->state = RDAC_STATE_ACTIVE;
if (qdata->callback_fn)
qdata->callback_fn(qdata->callback_data, err);
kfree(qdata);
}
return;
}
static int queue_mode_select(struct scsi_device *sdev,
activate_complete fn, void *data)
{
struct rdac_queue_data *qdata;
struct rdac_controller *ctlr;
qdata = kzalloc(sizeof(*qdata), GFP_KERNEL);
if (!qdata)
return SCSI_DH_RETRY;
qdata->h = sdev->handler_data;
qdata->callback_fn = fn;
qdata->callback_data = data;
ctlr = qdata->h->ctlr;
spin_lock(&ctlr->ms_lock);
list_add_tail(&qdata->entry, &ctlr->ms_head);
if (!ctlr->ms_queued) {
ctlr->ms_queued = 1;
ctlr->ms_sdev = sdev;
queue_work(kmpath_rdacd, &ctlr->ms_work);
}
spin_unlock(&ctlr->ms_lock);
return SCSI_DH_OK;
}
static int rdac_activate(struct scsi_device *sdev,
activate_complete fn, void *data)
{
struct rdac_dh_data *h = sdev->handler_data;
int err = SCSI_DH_OK;
int act = 0;
err = check_ownership(sdev, h);
if (err != SCSI_DH_OK)
goto done;
switch (h->mode) {
case RDAC_MODE:
if (h->lun_state == RDAC_LUN_UNOWNED)
act = 1;
break;
case RDAC_MODE_IOSHIP:
if ((h->lun_state == RDAC_LUN_UNOWNED) &&
(h->preferred == RDAC_PREFERRED))
act = 1;
break;
default:
break;
}
if (act) {
err = queue_mode_select(sdev, fn, data);
if (err == SCSI_DH_OK)
return 0;
}
done:
if (fn)
fn(data, err);
return 0;
}
static int rdac_prep_fn(struct scsi_device *sdev, struct request *req)
{
struct rdac_dh_data *h = sdev->handler_data;
int ret = BLKPREP_OK;
if (h->state != RDAC_STATE_ACTIVE) {
ret = BLKPREP_KILL;
req->rq_flags |= RQF_QUIET;
}
return ret;
}
static int rdac_check_sense(struct scsi_device *sdev,
struct scsi_sense_hdr *sense_hdr)
{
struct rdac_dh_data *h = sdev->handler_data;
RDAC_LOG(RDAC_LOG_SENSE, sdev, "array %s, ctlr %d, "
"I/O returned with sense %02x/%02x/%02x",
(char *) h->ctlr->array_name, h->ctlr->index,
sense_hdr->sense_key, sense_hdr->asc, sense_hdr->ascq);
switch (sense_hdr->sense_key) {
case NOT_READY:
if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x01)
/* LUN Not Ready - Logical Unit Not Ready and is in
* the process of becoming ready
* Just retry.
*/
return ADD_TO_MLQUEUE;
if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x81)
/* LUN Not Ready - Storage firmware incompatible
* Manual code synchonisation required.
*
* Nothing we can do here. Try to bypass the path.
*/
return SUCCESS;
if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0xA1)
/* LUN Not Ready - Quiescense in progress
*
* Just retry and wait.
*/
return ADD_TO_MLQUEUE;
if (sense_hdr->asc == 0xA1 && sense_hdr->ascq == 0x02)
/* LUN Not Ready - Quiescense in progress
* or has been achieved
* Just retry.
*/
return ADD_TO_MLQUEUE;
break;
case ILLEGAL_REQUEST:
if (sense_hdr->asc == 0x94 && sense_hdr->ascq == 0x01) {
/* Invalid Request - Current Logical Unit Ownership.
* Controller is not the current owner of the LUN,
* Fail the path, so that the other path be used.
*/
h->state = RDAC_STATE_PASSIVE;
return SUCCESS;
}
break;
case UNIT_ATTENTION:
if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00)
/*
* Power On, Reset, or Bus Device Reset, just retry.
*/
return ADD_TO_MLQUEUE;
if (sense_hdr->asc == 0x8b && sense_hdr->ascq == 0x02)
/*
* Quiescence in progress , just retry.
*/
return ADD_TO_MLQUEUE;
break;
}
/* success just means we do not care what scsi-ml does */
return SCSI_RETURN_NOT_HANDLED;
}
static int rdac_bus_attach(struct scsi_device *sdev)
{
struct rdac_dh_data *h;
int err;
char array_name[ARRAY_LABEL_LEN];
char array_id[UNIQUE_ID_LEN];
h = kzalloc(sizeof(*h) , GFP_KERNEL);
if (!h)
return SCSI_DH_NOMEM;
h->lun = UNINITIALIZED_LUN;
h->state = RDAC_STATE_ACTIVE;
err = get_lun_info(sdev, h, array_name, array_id);
if (err != SCSI_DH_OK)
goto failed;
err = initialize_controller(sdev, h, array_name, array_id);
if (err != SCSI_DH_OK)
goto failed;
err = check_ownership(sdev, h);
if (err != SCSI_DH_OK)
goto clean_ctlr;
err = set_mode_select(sdev, h);
if (err != SCSI_DH_OK)
goto clean_ctlr;
sdev_printk(KERN_NOTICE, sdev,
"%s: LUN %d (%s) (%s)\n",
RDAC_NAME, h->lun, mode[(int)h->mode],
lun_state[(int)h->lun_state]);
sdev->handler_data = h;
return SCSI_DH_OK;
clean_ctlr:
[SCSI] scsi_dh_rdac: Fix for unbalanced reference count This patch fixes an unbalanced refcount issue. Elevating the lock for both kref_put and also for controller node deletion. Previously, controller deletion was protected but the not the kref_put. This was causing the other thread to pick up the controller structure which was already kref'd zero. This was causing the following WARN_ON and also sometimes panic. WARNING: at lib/kref.c:43 kref_get+0x2d/0x30() (Not tainted) Hardware name: IBM System x3655 -[7985AC1]- Modules linked in: fuse scsi_dh_rdac autofs4 nfs lockd fscache nfs_acl auth_rpcgss sunrpc 8021q garp stp llc ipv6 ib_srp(U) scsi_transport_srp scsi_tgt ib_cm(U) ib_sa(U) ib_uverbs(U) ib_umad(U) mlx4_ib(U) mlx4_core(U) ib_mthca(U) ib_mad(U) ib_core(U) dm_mirror dm_region_hash dm_log dm_round_robin dm_multipath uinput bnx2 ses enclosure sg ibmpex ibmaem ipmi_msghandler serio_raw k8temp hwmon amd64_edac_mod edac_core edac_mce_amd shpchp i2c_piix4 ext4 mbcache jbd2 sr_mod cdrom sd_mod crc_t10dif sata_svw pata_acpi ata_generic pata_serverworks aacraid radeon ttm drm_kms_helper drm i2c_algo_bit i2c_core dm_mod [last unloaded: freq_table] Pid: 13735, comm: srp_daemon Not tainted 2.6.32-71.el6.x86_64 #1 Call Trace: [<ffffffff8106b857>] warn_slowpath_common+0x87/0xc0 [<ffffffff8106b8aa>] warn_slowpath_null+0x1a/0x20 [<ffffffff8125c39d>] kref_get+0x2d/0x30 [<ffffffffa01b4029>] rdac_bus_attach+0x459/0x580 [scsi_dh_rdac] [<ffffffff8135232a>] scsi_dh_handler_attach+0x2a/0x80 [<ffffffff81352c7b>] scsi_dh_notifier+0x9b/0xa0 [<ffffffff814cd7a5>] notifier_call_chain+0x55/0x80 [<ffffffff8109711a>] __blocking_notifier_call_chain+0x5a/0x80 [<ffffffff81097156>] blocking_notifier_call_chain+0x16/0x20 [<ffffffff8132bec5>] device_add+0x515/0x640 [<ffffffff813329e4>] ? attribute_container_device_trigger+0xc4/0xe0 [<ffffffff8134f659>] scsi_sysfs_add_sdev+0x89/0x2c0 [<ffffffff8134d096>] scsi_probe_and_add_lun+0xea6/0xed0 [<ffffffff8134beb2>] ? scsi_alloc_target+0x292/0x2d0 [<ffffffff8134d1e1>] __scsi_scan_target+0x121/0x750 [<ffffffff811df806>] ? sysfs_create_file+0x26/0x30 [<ffffffff8132b759>] ? device_create_file+0x19/0x20 [<ffffffff81332838>] ? attribute_container_add_attrs+0x78/0x90 [<ffffffff814b008c>] ? klist_next+0x4c/0xf0 [<ffffffff81332e30>] ? transport_configure+0x0/0x20 [<ffffffff813329e4>] ? attribute_container_device_trigger+0xc4/0xe0 [<ffffffff8134df40>] scsi_scan_target+0xd0/0xe0 [<ffffffffa02f053a>] srp_create_target+0x75a/0x890 [ib_srp] [<ffffffff8132a130>] dev_attr_store+0x20/0x30 [<ffffffff811df145>] sysfs_write_file+0xe5/0x170 [<ffffffff8116c818>] vfs_write+0xb8/0x1a0 [<ffffffff810d40a2>] ? audit_syscall_entry+0x272/0x2a0 [<ffffffff8116d251>] sys_write+0x51/0x90 [<ffffffff81013172>] system_call_fastpath+0x16/0x1b Signed-off-by: Babu Moger <babu.moger@netapp.com> Acked-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2012-02-02 23:21:54 +08:00
spin_lock(&list_lock);
kref_put(&h->ctlr->kref, release_controller);
[SCSI] scsi_dh_rdac: Fix for unbalanced reference count This patch fixes an unbalanced refcount issue. Elevating the lock for both kref_put and also for controller node deletion. Previously, controller deletion was protected but the not the kref_put. This was causing the other thread to pick up the controller structure which was already kref'd zero. This was causing the following WARN_ON and also sometimes panic. WARNING: at lib/kref.c:43 kref_get+0x2d/0x30() (Not tainted) Hardware name: IBM System x3655 -[7985AC1]- Modules linked in: fuse scsi_dh_rdac autofs4 nfs lockd fscache nfs_acl auth_rpcgss sunrpc 8021q garp stp llc ipv6 ib_srp(U) scsi_transport_srp scsi_tgt ib_cm(U) ib_sa(U) ib_uverbs(U) ib_umad(U) mlx4_ib(U) mlx4_core(U) ib_mthca(U) ib_mad(U) ib_core(U) dm_mirror dm_region_hash dm_log dm_round_robin dm_multipath uinput bnx2 ses enclosure sg ibmpex ibmaem ipmi_msghandler serio_raw k8temp hwmon amd64_edac_mod edac_core edac_mce_amd shpchp i2c_piix4 ext4 mbcache jbd2 sr_mod cdrom sd_mod crc_t10dif sata_svw pata_acpi ata_generic pata_serverworks aacraid radeon ttm drm_kms_helper drm i2c_algo_bit i2c_core dm_mod [last unloaded: freq_table] Pid: 13735, comm: srp_daemon Not tainted 2.6.32-71.el6.x86_64 #1 Call Trace: [<ffffffff8106b857>] warn_slowpath_common+0x87/0xc0 [<ffffffff8106b8aa>] warn_slowpath_null+0x1a/0x20 [<ffffffff8125c39d>] kref_get+0x2d/0x30 [<ffffffffa01b4029>] rdac_bus_attach+0x459/0x580 [scsi_dh_rdac] [<ffffffff8135232a>] scsi_dh_handler_attach+0x2a/0x80 [<ffffffff81352c7b>] scsi_dh_notifier+0x9b/0xa0 [<ffffffff814cd7a5>] notifier_call_chain+0x55/0x80 [<ffffffff8109711a>] __blocking_notifier_call_chain+0x5a/0x80 [<ffffffff81097156>] blocking_notifier_call_chain+0x16/0x20 [<ffffffff8132bec5>] device_add+0x515/0x640 [<ffffffff813329e4>] ? attribute_container_device_trigger+0xc4/0xe0 [<ffffffff8134f659>] scsi_sysfs_add_sdev+0x89/0x2c0 [<ffffffff8134d096>] scsi_probe_and_add_lun+0xea6/0xed0 [<ffffffff8134beb2>] ? scsi_alloc_target+0x292/0x2d0 [<ffffffff8134d1e1>] __scsi_scan_target+0x121/0x750 [<ffffffff811df806>] ? sysfs_create_file+0x26/0x30 [<ffffffff8132b759>] ? device_create_file+0x19/0x20 [<ffffffff81332838>] ? attribute_container_add_attrs+0x78/0x90 [<ffffffff814b008c>] ? klist_next+0x4c/0xf0 [<ffffffff81332e30>] ? transport_configure+0x0/0x20 [<ffffffff813329e4>] ? attribute_container_device_trigger+0xc4/0xe0 [<ffffffff8134df40>] scsi_scan_target+0xd0/0xe0 [<ffffffffa02f053a>] srp_create_target+0x75a/0x890 [ib_srp] [<ffffffff8132a130>] dev_attr_store+0x20/0x30 [<ffffffff811df145>] sysfs_write_file+0xe5/0x170 [<ffffffff8116c818>] vfs_write+0xb8/0x1a0 [<ffffffff810d40a2>] ? audit_syscall_entry+0x272/0x2a0 [<ffffffff8116d251>] sys_write+0x51/0x90 [<ffffffff81013172>] system_call_fastpath+0x16/0x1b Signed-off-by: Babu Moger <babu.moger@netapp.com> Acked-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2012-02-02 23:21:54 +08:00
spin_unlock(&list_lock);
failed:
kfree(h);
return err;
}
static void rdac_bus_detach( struct scsi_device *sdev )
{
struct rdac_dh_data *h = sdev->handler_data;
[SCSI] scsi_dh_rdac: Fix for unbalanced reference count This patch fixes an unbalanced refcount issue. Elevating the lock for both kref_put and also for controller node deletion. Previously, controller deletion was protected but the not the kref_put. This was causing the other thread to pick up the controller structure which was already kref'd zero. This was causing the following WARN_ON and also sometimes panic. WARNING: at lib/kref.c:43 kref_get+0x2d/0x30() (Not tainted) Hardware name: IBM System x3655 -[7985AC1]- Modules linked in: fuse scsi_dh_rdac autofs4 nfs lockd fscache nfs_acl auth_rpcgss sunrpc 8021q garp stp llc ipv6 ib_srp(U) scsi_transport_srp scsi_tgt ib_cm(U) ib_sa(U) ib_uverbs(U) ib_umad(U) mlx4_ib(U) mlx4_core(U) ib_mthca(U) ib_mad(U) ib_core(U) dm_mirror dm_region_hash dm_log dm_round_robin dm_multipath uinput bnx2 ses enclosure sg ibmpex ibmaem ipmi_msghandler serio_raw k8temp hwmon amd64_edac_mod edac_core edac_mce_amd shpchp i2c_piix4 ext4 mbcache jbd2 sr_mod cdrom sd_mod crc_t10dif sata_svw pata_acpi ata_generic pata_serverworks aacraid radeon ttm drm_kms_helper drm i2c_algo_bit i2c_core dm_mod [last unloaded: freq_table] Pid: 13735, comm: srp_daemon Not tainted 2.6.32-71.el6.x86_64 #1 Call Trace: [<ffffffff8106b857>] warn_slowpath_common+0x87/0xc0 [<ffffffff8106b8aa>] warn_slowpath_null+0x1a/0x20 [<ffffffff8125c39d>] kref_get+0x2d/0x30 [<ffffffffa01b4029>] rdac_bus_attach+0x459/0x580 [scsi_dh_rdac] [<ffffffff8135232a>] scsi_dh_handler_attach+0x2a/0x80 [<ffffffff81352c7b>] scsi_dh_notifier+0x9b/0xa0 [<ffffffff814cd7a5>] notifier_call_chain+0x55/0x80 [<ffffffff8109711a>] __blocking_notifier_call_chain+0x5a/0x80 [<ffffffff81097156>] blocking_notifier_call_chain+0x16/0x20 [<ffffffff8132bec5>] device_add+0x515/0x640 [<ffffffff813329e4>] ? attribute_container_device_trigger+0xc4/0xe0 [<ffffffff8134f659>] scsi_sysfs_add_sdev+0x89/0x2c0 [<ffffffff8134d096>] scsi_probe_and_add_lun+0xea6/0xed0 [<ffffffff8134beb2>] ? scsi_alloc_target+0x292/0x2d0 [<ffffffff8134d1e1>] __scsi_scan_target+0x121/0x750 [<ffffffff811df806>] ? sysfs_create_file+0x26/0x30 [<ffffffff8132b759>] ? device_create_file+0x19/0x20 [<ffffffff81332838>] ? attribute_container_add_attrs+0x78/0x90 [<ffffffff814b008c>] ? klist_next+0x4c/0xf0 [<ffffffff81332e30>] ? transport_configure+0x0/0x20 [<ffffffff813329e4>] ? attribute_container_device_trigger+0xc4/0xe0 [<ffffffff8134df40>] scsi_scan_target+0xd0/0xe0 [<ffffffffa02f053a>] srp_create_target+0x75a/0x890 [ib_srp] [<ffffffff8132a130>] dev_attr_store+0x20/0x30 [<ffffffff811df145>] sysfs_write_file+0xe5/0x170 [<ffffffff8116c818>] vfs_write+0xb8/0x1a0 [<ffffffff810d40a2>] ? audit_syscall_entry+0x272/0x2a0 [<ffffffff8116d251>] sys_write+0x51/0x90 [<ffffffff81013172>] system_call_fastpath+0x16/0x1b Signed-off-by: Babu Moger <babu.moger@netapp.com> Acked-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2012-02-02 23:21:54 +08:00
if (h->ctlr && h->ctlr->ms_queued)
flush_workqueue(kmpath_rdacd);
spin_lock(&list_lock);
if (h->ctlr) {
list_del_rcu(&h->node);
h->sdev = NULL;
kref_put(&h->ctlr->kref, release_controller);
}
[SCSI] scsi_dh_rdac: Fix for unbalanced reference count This patch fixes an unbalanced refcount issue. Elevating the lock for both kref_put and also for controller node deletion. Previously, controller deletion was protected but the not the kref_put. This was causing the other thread to pick up the controller structure which was already kref'd zero. This was causing the following WARN_ON and also sometimes panic. WARNING: at lib/kref.c:43 kref_get+0x2d/0x30() (Not tainted) Hardware name: IBM System x3655 -[7985AC1]- Modules linked in: fuse scsi_dh_rdac autofs4 nfs lockd fscache nfs_acl auth_rpcgss sunrpc 8021q garp stp llc ipv6 ib_srp(U) scsi_transport_srp scsi_tgt ib_cm(U) ib_sa(U) ib_uverbs(U) ib_umad(U) mlx4_ib(U) mlx4_core(U) ib_mthca(U) ib_mad(U) ib_core(U) dm_mirror dm_region_hash dm_log dm_round_robin dm_multipath uinput bnx2 ses enclosure sg ibmpex ibmaem ipmi_msghandler serio_raw k8temp hwmon amd64_edac_mod edac_core edac_mce_amd shpchp i2c_piix4 ext4 mbcache jbd2 sr_mod cdrom sd_mod crc_t10dif sata_svw pata_acpi ata_generic pata_serverworks aacraid radeon ttm drm_kms_helper drm i2c_algo_bit i2c_core dm_mod [last unloaded: freq_table] Pid: 13735, comm: srp_daemon Not tainted 2.6.32-71.el6.x86_64 #1 Call Trace: [<ffffffff8106b857>] warn_slowpath_common+0x87/0xc0 [<ffffffff8106b8aa>] warn_slowpath_null+0x1a/0x20 [<ffffffff8125c39d>] kref_get+0x2d/0x30 [<ffffffffa01b4029>] rdac_bus_attach+0x459/0x580 [scsi_dh_rdac] [<ffffffff8135232a>] scsi_dh_handler_attach+0x2a/0x80 [<ffffffff81352c7b>] scsi_dh_notifier+0x9b/0xa0 [<ffffffff814cd7a5>] notifier_call_chain+0x55/0x80 [<ffffffff8109711a>] __blocking_notifier_call_chain+0x5a/0x80 [<ffffffff81097156>] blocking_notifier_call_chain+0x16/0x20 [<ffffffff8132bec5>] device_add+0x515/0x640 [<ffffffff813329e4>] ? attribute_container_device_trigger+0xc4/0xe0 [<ffffffff8134f659>] scsi_sysfs_add_sdev+0x89/0x2c0 [<ffffffff8134d096>] scsi_probe_and_add_lun+0xea6/0xed0 [<ffffffff8134beb2>] ? scsi_alloc_target+0x292/0x2d0 [<ffffffff8134d1e1>] __scsi_scan_target+0x121/0x750 [<ffffffff811df806>] ? sysfs_create_file+0x26/0x30 [<ffffffff8132b759>] ? device_create_file+0x19/0x20 [<ffffffff81332838>] ? attribute_container_add_attrs+0x78/0x90 [<ffffffff814b008c>] ? klist_next+0x4c/0xf0 [<ffffffff81332e30>] ? transport_configure+0x0/0x20 [<ffffffff813329e4>] ? attribute_container_device_trigger+0xc4/0xe0 [<ffffffff8134df40>] scsi_scan_target+0xd0/0xe0 [<ffffffffa02f053a>] srp_create_target+0x75a/0x890 [ib_srp] [<ffffffff8132a130>] dev_attr_store+0x20/0x30 [<ffffffff811df145>] sysfs_write_file+0xe5/0x170 [<ffffffff8116c818>] vfs_write+0xb8/0x1a0 [<ffffffff810d40a2>] ? audit_syscall_entry+0x272/0x2a0 [<ffffffff8116d251>] sys_write+0x51/0x90 [<ffffffff81013172>] system_call_fastpath+0x16/0x1b Signed-off-by: Babu Moger <babu.moger@netapp.com> Acked-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
2012-02-02 23:21:54 +08:00
spin_unlock(&list_lock);
sdev->handler_data = NULL;
kfree(h);
}
static struct scsi_device_handler rdac_dh = {
.name = RDAC_NAME,
.module = THIS_MODULE,
.prep_fn = rdac_prep_fn,
.check_sense = rdac_check_sense,
.attach = rdac_bus_attach,
.detach = rdac_bus_detach,
.activate = rdac_activate,
};
static int __init rdac_init(void)
{
int r;
r = scsi_register_device_handler(&rdac_dh);
if (r != 0) {
printk(KERN_ERR "Failed to register scsi device handler.");
goto done;
}
/*
* Create workqueue to handle mode selects for rdac
*/
kmpath_rdacd = create_singlethread_workqueue("kmpath_rdacd");
if (!kmpath_rdacd) {
scsi_unregister_device_handler(&rdac_dh);
printk(KERN_ERR "kmpath_rdacd creation failed.\n");
r = -EINVAL;
}
done:
return r;
}
static void __exit rdac_exit(void)
{
destroy_workqueue(kmpath_rdacd);
scsi_unregister_device_handler(&rdac_dh);
}
module_init(rdac_init);
module_exit(rdac_exit);
MODULE_DESCRIPTION("Multipath LSI/Engenio/NetApp E-Series RDAC driver");
MODULE_AUTHOR("Mike Christie, Chandra Seetharaman");
MODULE_VERSION("01.00.0000.0000");
MODULE_LICENSE("GPL");