2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2003 Sistina Software Limited.
|
|
|
|
* Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
|
|
|
|
*
|
|
|
|
* This file is released under the GPL.
|
|
|
|
*/
|
|
|
|
|
2008-10-22 00:44:59 +08:00
|
|
|
#include <linux/device-mapper.h>
|
|
|
|
|
2016-05-13 04:28:10 +08:00
|
|
|
#include "dm-rq.h"
|
2016-05-20 04:15:14 +08:00
|
|
|
#include "dm-bio-record.h"
|
2005-04-17 06:20:36 +08:00
|
|
|
#include "dm-path-selector.h"
|
2007-10-20 05:48:02 +08:00
|
|
|
#include "dm-uevent.h"
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2014-12-18 10:08:12 +08:00
|
|
|
#include <linux/blkdev.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/ctype.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/mempool.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/pagemap.h>
|
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/time.h>
|
|
|
|
#include <linux/workqueue.h>
|
2012-06-03 07:29:58 +08:00
|
|
|
#include <linux/delay.h>
|
2008-05-02 05:50:11 +08:00
|
|
|
#include <scsi/scsi_dh.h>
|
2011-07-27 07:09:06 +08:00
|
|
|
#include <linux/atomic.h>
|
2016-02-01 06:38:28 +08:00
|
|
|
#include <linux/blk-mq.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2006-06-26 15:27:35 +08:00
|
|
|
#define DM_MSG_PREFIX "multipath"
|
2011-01-14 04:00:01 +08:00
|
|
|
#define DM_PG_INIT_DELAY_MSECS 2000
|
|
|
|
#define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* Path properties */
|
|
|
|
struct pgpath {
|
|
|
|
struct list_head list;
|
|
|
|
|
|
|
|
struct priority_group *pg; /* Owning PG */
|
|
|
|
unsigned fail_count; /* Cumulative failure count */
|
|
|
|
|
2006-12-08 18:36:33 +08:00
|
|
|
struct dm_path path;
|
2011-01-14 04:00:01 +08:00
|
|
|
struct delayed_work activate_path;
|
2016-02-11 02:02:21 +08:00
|
|
|
|
|
|
|
bool is_active:1; /* Path status */
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
#define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Paths are grouped into Priority Groups and numbered from 1 upwards.
|
|
|
|
* Each has a path selector which controls which path gets used.
|
|
|
|
*/
|
|
|
|
struct priority_group {
|
|
|
|
struct list_head list;
|
|
|
|
|
|
|
|
struct multipath *m; /* Owning multipath instance */
|
|
|
|
struct path_selector ps;
|
|
|
|
|
|
|
|
unsigned pg_num; /* Reference number */
|
|
|
|
unsigned nr_pgpaths; /* Number of paths in PG */
|
|
|
|
struct list_head pgpaths;
|
2016-02-11 02:02:21 +08:00
|
|
|
|
|
|
|
bool bypassed:1; /* Temporarily bypass this PG? */
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/* Multipath context */
|
|
|
|
struct multipath {
|
|
|
|
struct list_head list;
|
|
|
|
struct dm_target *ti;
|
|
|
|
|
2008-05-02 05:50:11 +08:00
|
|
|
const char *hw_handler_name;
|
2009-08-04 03:42:45 +08:00
|
|
|
char *hw_handler_params;
|
2011-01-14 04:00:01 +08:00
|
|
|
|
2012-06-03 07:29:43 +08:00
|
|
|
spinlock_t lock;
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
unsigned nr_priority_groups;
|
|
|
|
struct list_head priority_groups;
|
2011-01-14 04:00:01 +08:00
|
|
|
|
|
|
|
wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
struct pgpath *current_pgpath;
|
|
|
|
struct priority_group *current_pg;
|
|
|
|
struct priority_group *next_pg; /* Switch to this PG if set */
|
|
|
|
|
2016-03-18 04:32:10 +08:00
|
|
|
unsigned long flags; /* Multipath state flags */
|
2012-06-03 07:29:43 +08:00
|
|
|
|
2007-10-20 05:47:53 +08:00
|
|
|
unsigned pg_init_retries; /* Number of times to retry pg_init */
|
2011-01-14 04:00:01 +08:00
|
|
|
unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2016-03-18 05:10:15 +08:00
|
|
|
atomic_t nr_valid_paths; /* Total number of usable paths */
|
|
|
|
atomic_t pg_init_in_progress; /* Only one pg_init allowed at once */
|
|
|
|
atomic_t pg_init_count; /* Number of times pg_init called */
|
|
|
|
|
2016-05-25 09:16:51 +08:00
|
|
|
unsigned queue_mode;
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
2007-07-13 00:26:32 +08:00
|
|
|
* We must use a mempool of dm_mpath_io structs so that we
|
2005-04-17 06:20:36 +08:00
|
|
|
* can resubmit bios on error.
|
|
|
|
*/
|
|
|
|
mempool_t *mpio_pool;
|
2009-12-11 07:52:21 +08:00
|
|
|
|
|
|
|
struct mutex work_mutex;
|
2016-03-18 05:13:10 +08:00
|
|
|
struct work_struct trigger_event;
|
2016-05-20 04:15:14 +08:00
|
|
|
|
|
|
|
struct work_struct process_queued_bios;
|
|
|
|
struct bio_list queued_bios;
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
2016-05-20 04:15:14 +08:00
|
|
|
* Context information attached to each io we process.
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
2007-07-13 00:26:32 +08:00
|
|
|
struct dm_mpath_io {
|
2005-04-17 06:20:36 +08:00
|
|
|
struct pgpath *pgpath;
|
2009-06-22 17:12:27 +08:00
|
|
|
size_t nr_bytes;
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
typedef int (*action_fn) (struct pgpath *pgpath);
|
|
|
|
|
2006-12-07 12:33:20 +08:00
|
|
|
static struct kmem_cache *_mpio_cache;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-05-02 05:50:22 +08:00
|
|
|
static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
|
2006-11-22 22:57:56 +08:00
|
|
|
static void trigger_event(struct work_struct *work);
|
2008-05-02 05:50:22 +08:00
|
|
|
static void activate_path(struct work_struct *work);
|
2016-05-20 04:15:14 +08:00
|
|
|
static void process_queued_bios(struct work_struct *work);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2016-03-18 04:32:10 +08:00
|
|
|
/*-----------------------------------------------
|
|
|
|
* Multipath state flags.
|
|
|
|
*-----------------------------------------------*/
|
|
|
|
|
|
|
|
#define MPATHF_QUEUE_IO 0 /* Must we queue all I/O? */
|
|
|
|
#define MPATHF_QUEUE_IF_NO_PATH 1 /* Queue I/O if last path fails? */
|
|
|
|
#define MPATHF_SAVED_QUEUE_IF_NO_PATH 2 /* Saved state during suspension */
|
|
|
|
#define MPATHF_RETAIN_ATTACHED_HW_HANDLER 3 /* If there's already a hw_handler present, don't change it. */
|
|
|
|
#define MPATHF_PG_INIT_DISABLED 4 /* pg_init is not currently allowed */
|
|
|
|
#define MPATHF_PG_INIT_REQUIRED 5 /* pg_init needs calling? */
|
|
|
|
#define MPATHF_PG_INIT_DELAY_RETRY 6 /* Delay pg_init retry? */
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*-----------------------------------------------
|
|
|
|
* Allocation routines
|
|
|
|
*-----------------------------------------------*/
|
|
|
|
|
|
|
|
static struct pgpath *alloc_pgpath(void)
|
|
|
|
{
|
2006-10-03 16:15:34 +08:00
|
|
|
struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-08-29 15:36:09 +08:00
|
|
|
if (pgpath) {
|
2016-02-11 02:02:21 +08:00
|
|
|
pgpath->is_active = true;
|
2011-01-14 04:00:01 +08:00
|
|
|
INIT_DELAYED_WORK(&pgpath->activate_path, activate_path);
|
2008-08-29 15:36:09 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
return pgpath;
|
|
|
|
}
|
|
|
|
|
2007-07-13 00:26:32 +08:00
|
|
|
static void free_pgpath(struct pgpath *pgpath)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
kfree(pgpath);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct priority_group *alloc_priority_group(void)
|
|
|
|
{
|
|
|
|
struct priority_group *pg;
|
|
|
|
|
2006-10-03 16:15:34 +08:00
|
|
|
pg = kzalloc(sizeof(*pg), GFP_KERNEL);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2006-10-03 16:15:34 +08:00
|
|
|
if (pg)
|
|
|
|
INIT_LIST_HEAD(&pg->pgpaths);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
return pg;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
|
|
|
|
{
|
|
|
|
struct pgpath *pgpath, *tmp;
|
|
|
|
|
|
|
|
list_for_each_entry_safe(pgpath, tmp, pgpaths, list) {
|
|
|
|
list_del(&pgpath->list);
|
|
|
|
dm_put_device(ti, pgpath->path.dev);
|
|
|
|
free_pgpath(pgpath);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void free_priority_group(struct priority_group *pg,
|
|
|
|
struct dm_target *ti)
|
|
|
|
{
|
|
|
|
struct path_selector *ps = &pg->ps;
|
|
|
|
|
|
|
|
if (ps->type) {
|
|
|
|
ps->type->destroy(ps);
|
|
|
|
dm_put_path_selector(ps->type);
|
|
|
|
}
|
|
|
|
|
|
|
|
free_pgpaths(&pg->pgpaths, ti);
|
|
|
|
kfree(pg);
|
|
|
|
}
|
|
|
|
|
2016-05-25 09:16:51 +08:00
|
|
|
static struct multipath *alloc_multipath(struct dm_target *ti)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
struct multipath *m;
|
|
|
|
|
2006-10-03 16:15:34 +08:00
|
|
|
m = kzalloc(sizeof(*m), GFP_KERNEL);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (m) {
|
|
|
|
INIT_LIST_HEAD(&m->priority_groups);
|
|
|
|
spin_lock_init(&m->lock);
|
2016-03-18 04:32:10 +08:00
|
|
|
set_bit(MPATHF_QUEUE_IO, &m->flags);
|
2016-03-18 05:10:15 +08:00
|
|
|
atomic_set(&m->nr_valid_paths, 0);
|
|
|
|
atomic_set(&m->pg_init_in_progress, 0);
|
|
|
|
atomic_set(&m->pg_init_count, 0);
|
2011-01-14 04:00:01 +08:00
|
|
|
m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
|
2006-11-22 22:57:56 +08:00
|
|
|
INIT_WORK(&m->trigger_event, trigger_event);
|
2010-03-06 10:32:13 +08:00
|
|
|
init_waitqueue_head(&m->pg_init_wait);
|
2009-12-11 07:52:21 +08:00
|
|
|
mutex_init(&m->work_mutex);
|
2016-02-01 01:08:36 +08:00
|
|
|
|
|
|
|
m->mpio_pool = NULL;
|
2016-05-25 09:16:51 +08:00
|
|
|
m->queue_mode = DM_TYPE_NONE;
|
2016-05-20 04:15:14 +08:00
|
|
|
|
2006-10-03 16:15:33 +08:00
|
|
|
m->ti = ti;
|
|
|
|
ti->private = m;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return m;
|
|
|
|
}
|
|
|
|
|
2016-05-25 09:16:51 +08:00
|
|
|
static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
|
|
|
|
{
|
|
|
|
if (m->queue_mode == DM_TYPE_NONE) {
|
|
|
|
/*
|
|
|
|
* Default to request-based.
|
|
|
|
*/
|
|
|
|
if (dm_use_blk_mq(dm_table_get_md(ti->table)))
|
|
|
|
m->queue_mode = DM_TYPE_MQ_REQUEST_BASED;
|
|
|
|
else
|
|
|
|
m->queue_mode = DM_TYPE_REQUEST_BASED;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (m->queue_mode == DM_TYPE_REQUEST_BASED) {
|
|
|
|
unsigned min_ios = dm_get_reserved_rq_based_ios();
|
|
|
|
|
|
|
|
m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache);
|
|
|
|
if (!m->mpio_pool)
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
else if (m->queue_mode == DM_TYPE_BIO_BASED) {
|
|
|
|
INIT_WORK(&m->process_queued_bios, process_queued_bios);
|
|
|
|
/*
|
|
|
|
* bio-based doesn't support any direct scsi_dh management;
|
|
|
|
* it just discovers if a scsi_dh is attached.
|
|
|
|
*/
|
|
|
|
set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
dm_table_set_type(ti->table, m->queue_mode);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
static void free_multipath(struct multipath *m)
|
|
|
|
{
|
|
|
|
struct priority_group *pg, *tmp;
|
|
|
|
|
|
|
|
list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) {
|
|
|
|
list_del(&pg->list);
|
|
|
|
free_priority_group(pg, m->ti);
|
|
|
|
}
|
|
|
|
|
2008-05-02 05:50:11 +08:00
|
|
|
kfree(m->hw_handler_name);
|
2009-08-04 03:42:45 +08:00
|
|
|
kfree(m->hw_handler_params);
|
2005-04-17 06:20:36 +08:00
|
|
|
mempool_destroy(m->mpio_pool);
|
|
|
|
kfree(m);
|
|
|
|
}
|
|
|
|
|
2016-02-03 22:13:14 +08:00
|
|
|
static struct dm_mpath_io *get_mpio(union map_info *info)
|
|
|
|
{
|
|
|
|
return info->ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct dm_mpath_io *set_mpio(struct multipath *m, union map_info *info)
|
2012-03-29 01:41:25 +08:00
|
|
|
{
|
|
|
|
struct dm_mpath_io *mpio;
|
|
|
|
|
2016-02-01 01:08:36 +08:00
|
|
|
if (!m->mpio_pool) {
|
|
|
|
/* Use blk-mq pdu memory requested via per_io_data_size */
|
2016-02-03 22:13:14 +08:00
|
|
|
mpio = get_mpio(info);
|
2016-02-01 01:08:36 +08:00
|
|
|
memset(mpio, 0, sizeof(*mpio));
|
|
|
|
return mpio;
|
|
|
|
}
|
|
|
|
|
2012-03-29 01:41:25 +08:00
|
|
|
mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
|
|
|
|
if (!mpio)
|
2016-02-03 22:13:14 +08:00
|
|
|
return NULL;
|
2012-03-29 01:41:25 +08:00
|
|
|
|
|
|
|
memset(mpio, 0, sizeof(*mpio));
|
|
|
|
info->ptr = mpio;
|
|
|
|
|
2016-02-03 22:13:14 +08:00
|
|
|
return mpio;
|
2012-03-29 01:41:25 +08:00
|
|
|
}
|
|
|
|
|
2016-02-03 22:13:14 +08:00
|
|
|
static void clear_request_fn_mpio(struct multipath *m, union map_info *info)
|
2012-03-29 01:41:25 +08:00
|
|
|
{
|
2016-02-03 22:13:14 +08:00
|
|
|
/* Only needed for non blk-mq (.request_fn) multipath */
|
2016-02-01 01:08:36 +08:00
|
|
|
if (m->mpio_pool) {
|
|
|
|
struct dm_mpath_io *mpio = info->ptr;
|
2012-03-29 01:41:25 +08:00
|
|
|
|
2016-02-01 01:08:36 +08:00
|
|
|
info->ptr = NULL;
|
|
|
|
mempool_free(mpio, m->mpio_pool);
|
|
|
|
}
|
2012-03-29 01:41:25 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2016-05-25 03:48:08 +08:00
|
|
|
static size_t multipath_per_bio_data_size(void)
|
|
|
|
{
|
|
|
|
return sizeof(struct dm_mpath_io) + sizeof(struct dm_bio_details);
|
|
|
|
}
|
|
|
|
|
2016-05-20 04:15:14 +08:00
|
|
|
static struct dm_mpath_io *get_mpio_from_bio(struct bio *bio)
|
|
|
|
{
|
2016-05-25 03:48:08 +08:00
|
|
|
return dm_per_bio_data(bio, multipath_per_bio_data_size());
|
2016-05-20 04:15:14 +08:00
|
|
|
}
|
|
|
|
|
2016-05-25 03:48:08 +08:00
|
|
|
static struct dm_bio_details *get_bio_details_from_bio(struct bio *bio)
|
2016-05-20 04:15:14 +08:00
|
|
|
{
|
2016-05-25 03:48:08 +08:00
|
|
|
/* dm_bio_details is immediately after the dm_mpath_io in bio's per-bio-data */
|
2016-05-20 04:15:14 +08:00
|
|
|
struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
|
2016-05-25 03:48:08 +08:00
|
|
|
void *bio_details = mpio + 1;
|
|
|
|
|
|
|
|
return bio_details;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mpio_p,
|
|
|
|
struct dm_bio_details **bio_details_p)
|
|
|
|
{
|
|
|
|
struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
|
|
|
|
struct dm_bio_details *bio_details = get_bio_details_from_bio(bio);
|
2016-05-20 04:15:14 +08:00
|
|
|
|
|
|
|
memset(mpio, 0, sizeof(*mpio));
|
2016-05-25 03:48:08 +08:00
|
|
|
memset(bio_details, 0, sizeof(*bio_details));
|
|
|
|
dm_bio_record(bio_details, bio);
|
2016-05-20 04:15:14 +08:00
|
|
|
|
2016-05-25 03:48:08 +08:00
|
|
|
if (mpio_p)
|
|
|
|
*mpio_p = mpio;
|
|
|
|
if (bio_details_p)
|
|
|
|
*bio_details_p = bio_details;
|
2016-05-20 04:15:14 +08:00
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*-----------------------------------------------
|
|
|
|
* Path selection
|
|
|
|
*-----------------------------------------------*/
|
|
|
|
|
dm mpath: remove process_queued_ios()
process_queued_ios() has served 3 functions:
1) select pg and pgpath if none is selected
2) start pg_init if requested
3) dispatch queued IOs when pg is ready
Basically, a call to queue_work(process_queued_ios) can be replaced by
dm_table_run_md_queue_async(), which runs request queue and ends up
calling map_io(), which does 1), 2) and 3).
Exception is when !pg_ready() (which means either pg_init is running or
requested), then multipath_busy() prevents map_io() being called from
request_fn.
If pg_init is running, it should be ok as long as pg_init_done() does
the right thing when pg_init is completed, I.e.: restart pg_init if
!pg_ready() or call dm_table_run_md_queue_async() to kick map_io().
If pg_init is requested, we have to make sure the request is detected
and pg_init will be started. pg_init is requested in 3 places:
a) __choose_pgpath() in map_io()
b) __choose_pgpath() in multipath_ioctl()
c) pg_init retry in pg_init_done()
a) is ok because map_io() calls __pg_init_all_paths(), which does 2).
b) needs a call to __pg_init_all_paths(), which does 2).
c) needs a call to __pg_init_all_paths(), which does 2).
So this patch removes process_queued_ios() and ensures that
__pg_init_all_paths() is called at the appropriate locations.
Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
2014-02-28 22:33:45 +08:00
|
|
|
static int __pg_init_all_paths(struct multipath *m)
|
2010-03-06 10:32:18 +08:00
|
|
|
{
|
|
|
|
struct pgpath *pgpath;
|
2011-01-14 04:00:01 +08:00
|
|
|
unsigned long pg_init_delay = 0;
|
2010-03-06 10:32:18 +08:00
|
|
|
|
2016-03-18 05:10:15 +08:00
|
|
|
if (atomic_read(&m->pg_init_in_progress) || test_bit(MPATHF_PG_INIT_DISABLED, &m->flags))
|
dm mpath: remove process_queued_ios()
process_queued_ios() has served 3 functions:
1) select pg and pgpath if none is selected
2) start pg_init if requested
3) dispatch queued IOs when pg is ready
Basically, a call to queue_work(process_queued_ios) can be replaced by
dm_table_run_md_queue_async(), which runs request queue and ends up
calling map_io(), which does 1), 2) and 3).
Exception is when !pg_ready() (which means either pg_init is running or
requested), then multipath_busy() prevents map_io() being called from
request_fn.
If pg_init is running, it should be ok as long as pg_init_done() does
the right thing when pg_init is completed, I.e.: restart pg_init if
!pg_ready() or call dm_table_run_md_queue_async() to kick map_io().
If pg_init is requested, we have to make sure the request is detected
and pg_init will be started. pg_init is requested in 3 places:
a) __choose_pgpath() in map_io()
b) __choose_pgpath() in multipath_ioctl()
c) pg_init retry in pg_init_done()
a) is ok because map_io() calls __pg_init_all_paths(), which does 2).
b) needs a call to __pg_init_all_paths(), which does 2).
c) needs a call to __pg_init_all_paths(), which does 2).
So this patch removes process_queued_ios() and ensures that
__pg_init_all_paths() is called at the appropriate locations.
Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
2014-02-28 22:33:45 +08:00
|
|
|
return 0;
|
2014-02-28 22:33:42 +08:00
|
|
|
|
2016-03-18 05:10:15 +08:00
|
|
|
atomic_inc(&m->pg_init_count);
|
2016-03-18 04:32:10 +08:00
|
|
|
clear_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
|
dm mpath: remove process_queued_ios()
process_queued_ios() has served 3 functions:
1) select pg and pgpath if none is selected
2) start pg_init if requested
3) dispatch queued IOs when pg is ready
Basically, a call to queue_work(process_queued_ios) can be replaced by
dm_table_run_md_queue_async(), which runs request queue and ends up
calling map_io(), which does 1), 2) and 3).
Exception is when !pg_ready() (which means either pg_init is running or
requested), then multipath_busy() prevents map_io() being called from
request_fn.
If pg_init is running, it should be ok as long as pg_init_done() does
the right thing when pg_init is completed, I.e.: restart pg_init if
!pg_ready() or call dm_table_run_md_queue_async() to kick map_io().
If pg_init is requested, we have to make sure the request is detected
and pg_init will be started. pg_init is requested in 3 places:
a) __choose_pgpath() in map_io()
b) __choose_pgpath() in multipath_ioctl()
c) pg_init retry in pg_init_done()
a) is ok because map_io() calls __pg_init_all_paths(), which does 2).
b) needs a call to __pg_init_all_paths(), which does 2).
c) needs a call to __pg_init_all_paths(), which does 2).
So this patch removes process_queued_ios() and ensures that
__pg_init_all_paths() is called at the appropriate locations.
Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
2014-02-28 22:33:45 +08:00
|
|
|
|
|
|
|
/* Check here to reset pg_init_required */
|
|
|
|
if (!m->current_pg)
|
|
|
|
return 0;
|
|
|
|
|
2016-03-18 04:32:10 +08:00
|
|
|
if (test_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags))
|
2011-01-14 04:00:01 +08:00
|
|
|
pg_init_delay = msecs_to_jiffies(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT ?
|
|
|
|
m->pg_init_delay_msecs : DM_PG_INIT_DELAY_MSECS);
|
2010-03-06 10:32:18 +08:00
|
|
|
list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) {
|
|
|
|
/* Skip failed paths */
|
|
|
|
if (!pgpath->is_active)
|
|
|
|
continue;
|
2011-01-14 04:00:01 +08:00
|
|
|
if (queue_delayed_work(kmpath_handlerd, &pgpath->activate_path,
|
|
|
|
pg_init_delay))
|
2016-03-18 05:10:15 +08:00
|
|
|
atomic_inc(&m->pg_init_in_progress);
|
2010-03-06 10:32:18 +08:00
|
|
|
}
|
2016-03-18 05:10:15 +08:00
|
|
|
return atomic_read(&m->pg_init_in_progress);
|
2010-03-06 10:32:18 +08:00
|
|
|
}
|
|
|
|
|
2016-11-16 07:34:09 +08:00
|
|
|
static void pg_init_all_paths(struct multipath *m)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2016-03-18 06:38:17 +08:00
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&m->lock, flags);
|
2016-11-16 07:34:09 +08:00
|
|
|
__pg_init_all_paths(m);
|
2016-03-18 06:38:17 +08:00
|
|
|
spin_unlock_irqrestore(&m->lock, flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __switch_pg(struct multipath *m, struct priority_group *pg)
|
|
|
|
{
|
|
|
|
m->current_pg = pg;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* Must we initialise the PG first, and queue I/O till it's ready? */
|
2008-05-02 05:50:11 +08:00
|
|
|
if (m->hw_handler_name) {
|
2016-03-18 04:32:10 +08:00
|
|
|
set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
|
|
|
|
set_bit(MPATHF_QUEUE_IO, &m->flags);
|
2005-04-17 06:20:36 +08:00
|
|
|
} else {
|
2016-03-18 04:32:10 +08:00
|
|
|
clear_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
|
|
|
|
clear_bit(MPATHF_QUEUE_IO, &m->flags);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
2007-10-20 05:47:53 +08:00
|
|
|
|
2016-03-18 05:10:15 +08:00
|
|
|
atomic_set(&m->pg_init_count, 0);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2016-03-18 06:38:17 +08:00
|
|
|
static struct pgpath *choose_path_in_pg(struct multipath *m,
|
|
|
|
struct priority_group *pg,
|
|
|
|
size_t nr_bytes)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2016-03-18 06:38:17 +08:00
|
|
|
unsigned long flags;
|
2006-12-08 18:36:33 +08:00
|
|
|
struct dm_path *path;
|
2016-03-18 06:38:17 +08:00
|
|
|
struct pgpath *pgpath;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2016-02-18 10:29:17 +08:00
|
|
|
path = pg->ps.type->select_path(&pg->ps, nr_bytes);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (!path)
|
2016-03-18 06:38:17 +08:00
|
|
|
return ERR_PTR(-ENXIO);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2016-03-18 06:38:17 +08:00
|
|
|
pgpath = path_to_pgpath(path);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2016-03-18 06:38:17 +08:00
|
|
|
if (unlikely(lockless_dereference(m->current_pg) != pg)) {
|
|
|
|
/* Only update current_pgpath if pg changed */
|
|
|
|
spin_lock_irqsave(&m->lock, flags);
|
|
|
|
m->current_pgpath = pgpath;
|
|
|
|
__switch_pg(m, pg);
|
|
|
|
spin_unlock_irqrestore(&m->lock, flags);
|
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2016-03-18 06:38:17 +08:00
|
|
|
return pgpath;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2016-03-18 06:38:17 +08:00
|
|
|
static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2016-03-18 06:38:17 +08:00
|
|
|
unsigned long flags;
|
2005-04-17 06:20:36 +08:00
|
|
|
struct priority_group *pg;
|
2016-03-18 06:38:17 +08:00
|
|
|
struct pgpath *pgpath;
|
2017-01-07 04:33:14 +08:00
|
|
|
unsigned bypassed = 1;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2016-03-18 05:10:15 +08:00
|
|
|
if (!atomic_read(&m->nr_valid_paths)) {
|
2016-03-18 04:32:10 +08:00
|
|
|
clear_bit(MPATHF_QUEUE_IO, &m->flags);
|
2005-04-17 06:20:36 +08:00
|
|
|
goto failed;
|
2014-08-14 02:53:42 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* Were we instructed to switch PG? */
|
2016-03-18 06:38:17 +08:00
|
|
|
if (lockless_dereference(m->next_pg)) {
|
|
|
|
spin_lock_irqsave(&m->lock, flags);
|
2005-04-17 06:20:36 +08:00
|
|
|
pg = m->next_pg;
|
2016-03-18 06:38:17 +08:00
|
|
|
if (!pg) {
|
|
|
|
spin_unlock_irqrestore(&m->lock, flags);
|
|
|
|
goto check_current_pg;
|
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
m->next_pg = NULL;
|
2016-03-18 06:38:17 +08:00
|
|
|
spin_unlock_irqrestore(&m->lock, flags);
|
|
|
|
pgpath = choose_path_in_pg(m, pg, nr_bytes);
|
|
|
|
if (!IS_ERR_OR_NULL(pgpath))
|
|
|
|
return pgpath;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Don't change PG until it has no remaining paths */
|
2016-03-18 06:38:17 +08:00
|
|
|
check_current_pg:
|
|
|
|
pg = lockless_dereference(m->current_pg);
|
|
|
|
if (pg) {
|
|
|
|
pgpath = choose_path_in_pg(m, pg, nr_bytes);
|
|
|
|
if (!IS_ERR_OR_NULL(pgpath))
|
|
|
|
return pgpath;
|
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Loop through priority groups until we find a valid path.
|
|
|
|
* First time we skip PGs marked 'bypassed'.
|
2012-06-03 07:29:45 +08:00
|
|
|
* Second time we only try the ones we skipped, but set
|
|
|
|
* pg_init_delay_retry so we do not hammer controllers.
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
|
|
|
do {
|
|
|
|
list_for_each_entry(pg, &m->priority_groups, list) {
|
2017-01-07 04:33:14 +08:00
|
|
|
if (pg->bypassed == !!bypassed)
|
2005-04-17 06:20:36 +08:00
|
|
|
continue;
|
2016-03-18 06:38:17 +08:00
|
|
|
pgpath = choose_path_in_pg(m, pg, nr_bytes);
|
|
|
|
if (!IS_ERR_OR_NULL(pgpath)) {
|
2012-06-03 07:29:45 +08:00
|
|
|
if (!bypassed)
|
2016-03-18 04:32:10 +08:00
|
|
|
set_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags);
|
2016-03-18 06:38:17 +08:00
|
|
|
return pgpath;
|
2012-06-03 07:29:45 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
} while (bypassed--);
|
|
|
|
|
|
|
|
failed:
|
2016-03-18 06:38:17 +08:00
|
|
|
spin_lock_irqsave(&m->lock, flags);
|
2005-04-17 06:20:36 +08:00
|
|
|
m->current_pgpath = NULL;
|
|
|
|
m->current_pg = NULL;
|
2016-03-18 06:38:17 +08:00
|
|
|
spin_unlock_irqrestore(&m->lock, flags);
|
|
|
|
|
|
|
|
return NULL;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2006-12-08 18:41:10 +08:00
|
|
|
/*
|
|
|
|
* Check whether bios must be queued in the device-mapper core rather
|
|
|
|
* than here in the target.
|
|
|
|
*
|
|
|
|
* If m->queue_if_no_path and m->saved_queue_if_no_path hold the
|
|
|
|
* same value then we are not between multipath_presuspend()
|
|
|
|
* and multipath_resume() calls and we have no need to check
|
|
|
|
* for the DMF_NOFLUSH_SUSPENDING flag.
|
|
|
|
*/
|
2016-05-20 04:15:14 +08:00
|
|
|
static bool __must_push_back(struct multipath *m)
|
|
|
|
{
|
|
|
|
return ((test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) !=
|
|
|
|
test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) &&
|
|
|
|
dm_noflush_suspending(m->ti));
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool must_push_back_rq(struct multipath *m)
|
2006-12-08 18:41:10 +08:00
|
|
|
{
|
2016-07-26 09:08:51 +08:00
|
|
|
bool r;
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&m->lock, flags);
|
|
|
|
r = (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) ||
|
|
|
|
__must_push_back(m));
|
|
|
|
spin_unlock_irqrestore(&m->lock, flags);
|
|
|
|
|
|
|
|
return r;
|
2016-05-20 04:15:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static bool must_push_back_bio(struct multipath *m)
|
|
|
|
{
|
2016-07-26 09:08:51 +08:00
|
|
|
bool r;
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&m->lock, flags);
|
|
|
|
r = __must_push_back(m);
|
|
|
|
spin_unlock_irqrestore(&m->lock, flags);
|
|
|
|
|
|
|
|
return r;
|
2006-12-08 18:41:10 +08:00
|
|
|
}
|
|
|
|
|
2014-02-28 22:33:47 +08:00
|
|
|
/*
|
2016-05-20 04:15:14 +08:00
|
|
|
* Map cloned requests (request-based multipath)
|
2014-02-28 22:33:47 +08:00
|
|
|
*/
|
2014-12-18 10:08:12 +08:00
|
|
|
static int __multipath_map(struct dm_target *ti, struct request *clone,
|
|
|
|
union map_info *map_context,
|
|
|
|
struct request *rq, struct request **__clone)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2016-02-03 10:53:15 +08:00
|
|
|
struct multipath *m = ti->private;
|
2014-02-28 22:33:46 +08:00
|
|
|
int r = DM_MAPIO_REQUEUE;
|
2014-12-18 10:08:12 +08:00
|
|
|
size_t nr_bytes = clone ? blk_rq_bytes(clone) : blk_rq_bytes(rq);
|
2005-04-17 06:20:36 +08:00
|
|
|
struct pgpath *pgpath;
|
dm mpath: change to be request based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:37 +08:00
|
|
|
struct block_device *bdev;
|
2014-02-28 22:33:46 +08:00
|
|
|
struct dm_mpath_io *mpio;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* Do we need to select a new pgpath? */
|
2016-03-18 06:38:17 +08:00
|
|
|
pgpath = lockless_dereference(m->current_pgpath);
|
|
|
|
if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags))
|
|
|
|
pgpath = choose_pgpath(m, nr_bytes);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2014-02-28 22:33:48 +08:00
|
|
|
if (!pgpath) {
|
2016-09-10 07:26:19 +08:00
|
|
|
if (must_push_back_rq(m))
|
|
|
|
return DM_MAPIO_DELAY_REQUEUE;
|
|
|
|
return -EIO; /* Failed */
|
2016-03-18 04:32:10 +08:00
|
|
|
} else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
|
|
|
|
test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
|
2016-03-18 06:38:17 +08:00
|
|
|
pg_init_all_paths(m);
|
|
|
|
return r;
|
2014-02-28 22:33:48 +08:00
|
|
|
}
|
2014-07-08 23:55:09 +08:00
|
|
|
|
2016-02-03 22:13:14 +08:00
|
|
|
mpio = set_mpio(m, map_context);
|
|
|
|
if (!mpio)
|
2014-02-28 22:33:48 +08:00
|
|
|
/* ENOMEM, requeue */
|
2016-03-18 06:38:17 +08:00
|
|
|
return r;
|
2014-02-28 22:33:48 +08:00
|
|
|
|
2014-10-18 07:46:36 +08:00
|
|
|
mpio->pgpath = pgpath;
|
|
|
|
mpio->nr_bytes = nr_bytes;
|
|
|
|
|
2014-02-28 22:33:48 +08:00
|
|
|
bdev = pgpath->path.dev->bdev;
|
2014-10-18 07:46:36 +08:00
|
|
|
|
2014-12-18 10:08:12 +08:00
|
|
|
if (clone) {
|
2016-02-21 03:02:49 +08:00
|
|
|
/*
|
|
|
|
* Old request-based interface: allocated clone is passed in.
|
|
|
|
* Used by: .request_fn stacked on .request_fn path(s).
|
|
|
|
*/
|
2014-12-18 10:08:12 +08:00
|
|
|
clone->q = bdev_get_queue(bdev);
|
|
|
|
clone->rq_disk = bdev->bd_disk;
|
|
|
|
clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
|
|
|
|
} else {
|
2016-02-21 02:45:38 +08:00
|
|
|
/*
|
|
|
|
* blk-mq request-based interface; used by both:
|
|
|
|
* .request_fn stacked on blk-mq path(s) and
|
|
|
|
* blk-mq stacked on blk-mq path(s).
|
|
|
|
*/
|
2016-11-16 07:34:32 +08:00
|
|
|
clone = blk_mq_alloc_request(bdev_get_queue(bdev),
|
|
|
|
rq_data_dir(rq), BLK_MQ_REQ_NOWAIT);
|
|
|
|
if (IS_ERR(clone)) {
|
|
|
|
/* EBUSY, ENODEV or EWOULDBLOCK: requeue */
|
2016-02-03 22:13:14 +08:00
|
|
|
clear_request_fn_mpio(m, map_context);
|
2014-12-18 10:08:12 +08:00
|
|
|
return r;
|
2015-05-28 03:23:56 +08:00
|
|
|
}
|
2016-11-16 07:34:32 +08:00
|
|
|
clone->bio = clone->biotail = NULL;
|
|
|
|
clone->rq_disk = bdev->bd_disk;
|
|
|
|
clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
|
|
|
|
*__clone = clone;
|
2014-12-18 10:08:12 +08:00
|
|
|
}
|
|
|
|
|
2014-02-28 22:33:48 +08:00
|
|
|
if (pgpath->pg->ps.type->start_io)
|
|
|
|
pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
|
|
|
|
&pgpath->path,
|
|
|
|
nr_bytes);
|
2014-10-18 07:46:36 +08:00
|
|
|
return DM_MAPIO_REMAPPED;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2014-12-18 10:08:12 +08:00
|
|
|
static int multipath_map(struct dm_target *ti, struct request *clone,
|
|
|
|
union map_info *map_context)
|
|
|
|
{
|
|
|
|
return __multipath_map(ti, clone, map_context, NULL, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
|
|
|
|
union map_info *map_context,
|
|
|
|
struct request **clone)
|
|
|
|
{
|
|
|
|
return __multipath_map(ti, NULL, map_context, rq, clone);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void multipath_release_clone(struct request *clone)
|
|
|
|
{
|
2016-02-01 06:38:28 +08:00
|
|
|
blk_mq_free_request(clone);
|
2014-12-18 10:08:12 +08:00
|
|
|
}
|
|
|
|
|
2016-05-20 04:15:14 +08:00
|
|
|
/*
|
|
|
|
* Map cloned bios (bio-based multipath)
|
|
|
|
*/
|
|
|
|
static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_mpath_io *mpio)
|
|
|
|
{
|
|
|
|
size_t nr_bytes = bio->bi_iter.bi_size;
|
|
|
|
struct pgpath *pgpath;
|
|
|
|
unsigned long flags;
|
|
|
|
bool queue_io;
|
|
|
|
|
|
|
|
/* Do we need to select a new pgpath? */
|
|
|
|
pgpath = lockless_dereference(m->current_pgpath);
|
|
|
|
queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags);
|
|
|
|
if (!pgpath || !queue_io)
|
|
|
|
pgpath = choose_pgpath(m, nr_bytes);
|
|
|
|
|
|
|
|
if ((pgpath && queue_io) ||
|
|
|
|
(!pgpath && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) {
|
|
|
|
/* Queue for the daemon to resubmit */
|
|
|
|
spin_lock_irqsave(&m->lock, flags);
|
|
|
|
bio_list_add(&m->queued_bios, bio);
|
|
|
|
spin_unlock_irqrestore(&m->lock, flags);
|
|
|
|
/* PG_INIT_REQUIRED cannot be set without QUEUE_IO */
|
|
|
|
if (queue_io || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
|
|
|
|
pg_init_all_paths(m);
|
|
|
|
else if (!queue_io)
|
|
|
|
queue_work(kmultipathd, &m->process_queued_bios);
|
|
|
|
return DM_MAPIO_SUBMITTED;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!pgpath) {
|
|
|
|
if (!must_push_back_bio(m))
|
|
|
|
return -EIO;
|
|
|
|
return DM_MAPIO_REQUEUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
mpio->pgpath = pgpath;
|
|
|
|
mpio->nr_bytes = nr_bytes;
|
|
|
|
|
|
|
|
bio->bi_error = 0;
|
|
|
|
bio->bi_bdev = pgpath->path.dev->bdev;
|
2016-08-06 05:35:16 +08:00
|
|
|
bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
|
2016-05-20 04:15:14 +08:00
|
|
|
|
|
|
|
if (pgpath->pg->ps.type->start_io)
|
|
|
|
pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
|
|
|
|
&pgpath->path,
|
|
|
|
nr_bytes);
|
|
|
|
return DM_MAPIO_REMAPPED;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int multipath_map_bio(struct dm_target *ti, struct bio *bio)
|
|
|
|
{
|
|
|
|
struct multipath *m = ti->private;
|
2016-05-25 03:48:08 +08:00
|
|
|
struct dm_mpath_io *mpio = NULL;
|
|
|
|
|
|
|
|
multipath_init_per_bio_data(bio, &mpio, NULL);
|
2016-05-20 04:15:14 +08:00
|
|
|
|
|
|
|
return __multipath_map_bio(m, bio, mpio);
|
|
|
|
}
|
|
|
|
|
2016-09-14 22:47:03 +08:00
|
|
|
static void process_queued_io_list(struct multipath *m)
|
2016-05-20 04:15:14 +08:00
|
|
|
{
|
2016-09-14 22:47:03 +08:00
|
|
|
if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
|
|
|
|
dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table));
|
|
|
|
else if (m->queue_mode == DM_TYPE_BIO_BASED)
|
2016-05-20 04:15:14 +08:00
|
|
|
queue_work(kmultipathd, &m->process_queued_bios);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void process_queued_bios(struct work_struct *work)
|
|
|
|
{
|
|
|
|
int r;
|
|
|
|
unsigned long flags;
|
|
|
|
struct bio *bio;
|
|
|
|
struct bio_list bios;
|
|
|
|
struct blk_plug plug;
|
|
|
|
struct multipath *m =
|
|
|
|
container_of(work, struct multipath, process_queued_bios);
|
|
|
|
|
|
|
|
bio_list_init(&bios);
|
|
|
|
|
|
|
|
spin_lock_irqsave(&m->lock, flags);
|
|
|
|
|
|
|
|
if (bio_list_empty(&m->queued_bios)) {
|
|
|
|
spin_unlock_irqrestore(&m->lock, flags);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
bio_list_merge(&bios, &m->queued_bios);
|
|
|
|
bio_list_init(&m->queued_bios);
|
|
|
|
|
|
|
|
spin_unlock_irqrestore(&m->lock, flags);
|
|
|
|
|
|
|
|
blk_start_plug(&plug);
|
|
|
|
while ((bio = bio_list_pop(&bios))) {
|
|
|
|
r = __multipath_map_bio(m, bio, get_mpio_from_bio(bio));
|
|
|
|
if (r < 0 || r == DM_MAPIO_REQUEUE) {
|
|
|
|
bio->bi_error = r;
|
|
|
|
bio_endio(bio);
|
|
|
|
} else if (r == DM_MAPIO_REMAPPED)
|
|
|
|
generic_make_request(bio);
|
|
|
|
}
|
|
|
|
blk_finish_plug(&plug);
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* If we run out of usable paths, should we queue I/O or error it?
|
|
|
|
*/
|
2016-02-11 02:02:21 +08:00
|
|
|
static int queue_if_no_path(struct multipath *m, bool queue_if_no_path,
|
|
|
|
bool save_old_value)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&m->lock, flags);
|
|
|
|
|
2016-03-18 04:32:10 +08:00
|
|
|
if (save_old_value) {
|
|
|
|
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
|
|
|
|
set_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
|
|
|
|
else
|
|
|
|
clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
|
|
|
|
} else {
|
|
|
|
if (queue_if_no_path)
|
|
|
|
set_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
|
|
|
|
else
|
|
|
|
clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
|
|
|
|
}
|
|
|
|
if (queue_if_no_path)
|
|
|
|
set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
|
2005-09-28 12:45:45 +08:00
|
|
|
else
|
2016-03-18 04:32:10 +08:00
|
|
|
clear_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
spin_unlock_irqrestore(&m->lock, flags);
|
|
|
|
|
2016-05-20 04:15:14 +08:00
|
|
|
if (!queue_if_no_path) {
|
2014-05-26 20:45:39 +08:00
|
|
|
dm_table_run_md_queue_async(m->ti->table);
|
2016-09-14 22:47:03 +08:00
|
|
|
process_queued_io_list(m);
|
2016-05-20 04:15:14 +08:00
|
|
|
}
|
2014-05-26 20:45:39 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* An event is triggered whenever a path is taken out of use.
|
|
|
|
* Includes path failure and PG bypass.
|
|
|
|
*/
|
2006-11-22 22:57:56 +08:00
|
|
|
static void trigger_event(struct work_struct *work)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2006-11-22 22:57:56 +08:00
|
|
|
struct multipath *m =
|
|
|
|
container_of(work, struct multipath, trigger_event);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
dm_table_event(m->ti->table);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*-----------------------------------------------------------------
|
|
|
|
* Constructor/argument parsing:
|
|
|
|
* <#multipath feature args> [<arg>]*
|
|
|
|
* <#hw_handler args> [hw_handler [<arg>]*]
|
|
|
|
* <#priority groups>
|
|
|
|
* <initial priority group>
|
|
|
|
* [<selector> <#selector args> [<arg>]*
|
|
|
|
* <#paths> <#per-path selector args>
|
|
|
|
* [<path> [<arg>]* ]+ ]+
|
|
|
|
*---------------------------------------------------------------*/
|
2011-08-02 19:32:04 +08:00
|
|
|
static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg,
|
2005-04-17 06:20:36 +08:00
|
|
|
struct dm_target *ti)
|
|
|
|
{
|
|
|
|
int r;
|
|
|
|
struct path_selector_type *pst;
|
|
|
|
unsigned ps_argc;
|
|
|
|
|
2011-08-02 19:32:04 +08:00
|
|
|
static struct dm_arg _args[] = {
|
2006-06-26 15:27:35 +08:00
|
|
|
{0, 1024, "invalid number of path selector args"},
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
2011-08-02 19:32:04 +08:00
|
|
|
pst = dm_get_path_selector(dm_shift_arg(as));
|
2005-04-17 06:20:36 +08:00
|
|
|
if (!pst) {
|
2006-06-26 15:27:35 +08:00
|
|
|
ti->error = "unknown path selector type";
|
2005-04-17 06:20:36 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2011-08-02 19:32:04 +08:00
|
|
|
r = dm_read_arg_group(_args, as, &ps_argc, &ti->error);
|
2008-07-21 19:00:24 +08:00
|
|
|
if (r) {
|
|
|
|
dm_put_path_selector(pst);
|
2005-04-17 06:20:36 +08:00
|
|
|
return -EINVAL;
|
2008-07-21 19:00:24 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
r = pst->create(&pg->ps, ps_argc, as->argv);
|
|
|
|
if (r) {
|
|
|
|
dm_put_path_selector(pst);
|
2006-06-26 15:27:35 +08:00
|
|
|
ti->error = "path selector constructor failed";
|
2005-04-17 06:20:36 +08:00
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
pg->ps.type = pst;
|
2011-08-02 19:32:04 +08:00
|
|
|
dm_consume_args(as, ps_argc);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-08-02 19:32:04 +08:00
|
|
|
static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps,
|
2005-04-17 06:20:36 +08:00
|
|
|
struct dm_target *ti)
|
|
|
|
{
|
|
|
|
int r;
|
|
|
|
struct pgpath *p;
|
2008-07-18 08:49:02 +08:00
|
|
|
struct multipath *m = ti->private;
|
2012-07-27 22:08:04 +08:00
|
|
|
struct request_queue *q = NULL;
|
|
|
|
const char *attached_handler_name;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* we need at least a path arg */
|
|
|
|
if (as->argc < 1) {
|
2006-06-26 15:27:35 +08:00
|
|
|
ti->error = "no device given";
|
2008-10-10 20:36:57 +08:00
|
|
|
return ERR_PTR(-EINVAL);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
p = alloc_pgpath();
|
|
|
|
if (!p)
|
2008-10-10 20:36:57 +08:00
|
|
|
return ERR_PTR(-ENOMEM);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2011-08-02 19:32:04 +08:00
|
|
|
r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
|
2010-03-06 10:32:27 +08:00
|
|
|
&p->path.dev);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (r) {
|
2006-06-26 15:27:35 +08:00
|
|
|
ti->error = "error getting device";
|
2005-04-17 06:20:36 +08:00
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
|
2016-03-18 04:32:10 +08:00
|
|
|
if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags) || m->hw_handler_name)
|
2012-07-27 22:08:04 +08:00
|
|
|
q = bdev_get_queue(p->path.dev->bdev);
|
|
|
|
|
2016-03-18 04:32:10 +08:00
|
|
|
if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) {
|
2015-08-27 20:16:54 +08:00
|
|
|
retain:
|
2012-07-27 22:08:04 +08:00
|
|
|
attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL);
|
|
|
|
if (attached_handler_name) {
|
2016-11-24 15:11:48 +08:00
|
|
|
/*
|
|
|
|
* Clear any hw_handler_params associated with a
|
|
|
|
* handler that isn't already attached.
|
|
|
|
*/
|
|
|
|
if (m->hw_handler_name && strcmp(attached_handler_name, m->hw_handler_name)) {
|
|
|
|
kfree(m->hw_handler_params);
|
|
|
|
m->hw_handler_params = NULL;
|
|
|
|
}
|
|
|
|
|
2012-07-27 22:08:04 +08:00
|
|
|
/*
|
|
|
|
* Reset hw_handler_name to match the attached handler
|
|
|
|
*
|
|
|
|
* NB. This modifies the table line to show the actual
|
|
|
|
* handler instead of the original table passed in.
|
|
|
|
*/
|
|
|
|
kfree(m->hw_handler_name);
|
|
|
|
m->hw_handler_name = attached_handler_name;
|
|
|
|
}
|
|
|
|
}
|
2009-06-22 17:12:11 +08:00
|
|
|
|
2012-07-27 22:08:04 +08:00
|
|
|
if (m->hw_handler_name) {
|
2009-06-22 17:12:11 +08:00
|
|
|
r = scsi_dh_attach(q, m->hw_handler_name);
|
|
|
|
if (r == -EBUSY) {
|
2015-08-27 20:16:54 +08:00
|
|
|
char b[BDEVNAME_SIZE];
|
2009-06-22 17:12:11 +08:00
|
|
|
|
2015-08-27 20:16:54 +08:00
|
|
|
printk(KERN_INFO "dm-mpath: retaining handler on device %s\n",
|
|
|
|
bdevname(p->path.dev->bdev, b));
|
|
|
|
goto retain;
|
|
|
|
}
|
2008-07-18 08:49:02 +08:00
|
|
|
if (r < 0) {
|
2009-06-22 17:12:11 +08:00
|
|
|
ti->error = "error attaching hardware handler";
|
2008-07-18 08:49:02 +08:00
|
|
|
dm_put_device(ti, p->path.dev);
|
|
|
|
goto bad;
|
|
|
|
}
|
2009-08-04 03:42:45 +08:00
|
|
|
|
|
|
|
if (m->hw_handler_params) {
|
|
|
|
r = scsi_dh_set_params(q, m->hw_handler_params);
|
|
|
|
if (r < 0) {
|
|
|
|
ti->error = "unable to set hardware "
|
|
|
|
"handler parameters";
|
|
|
|
dm_put_device(ti, p->path.dev);
|
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
}
|
2008-07-18 08:49:02 +08:00
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error);
|
|
|
|
if (r) {
|
|
|
|
dm_put_device(ti, p->path.dev);
|
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
|
|
|
|
return p;
|
|
|
|
|
|
|
|
bad:
|
|
|
|
free_pgpath(p);
|
2008-10-10 20:36:57 +08:00
|
|
|
return ERR_PTR(r);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2011-08-02 19:32:04 +08:00
|
|
|
static struct priority_group *parse_priority_group(struct dm_arg_set *as,
|
2006-10-03 16:15:33 +08:00
|
|
|
struct multipath *m)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2011-08-02 19:32:04 +08:00
|
|
|
static struct dm_arg _args[] = {
|
2006-06-26 15:27:35 +08:00
|
|
|
{1, 1024, "invalid number of paths"},
|
|
|
|
{0, 1024, "invalid number of selector args"}
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
int r;
|
2011-08-02 19:32:04 +08:00
|
|
|
unsigned i, nr_selector_args, nr_args;
|
2005-04-17 06:20:36 +08:00
|
|
|
struct priority_group *pg;
|
2006-10-03 16:15:33 +08:00
|
|
|
struct dm_target *ti = m->ti;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
if (as->argc < 2) {
|
|
|
|
as->argc = 0;
|
2008-10-10 20:36:57 +08:00
|
|
|
ti->error = "not enough priority group arguments";
|
|
|
|
return ERR_PTR(-EINVAL);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
pg = alloc_priority_group();
|
|
|
|
if (!pg) {
|
2006-06-26 15:27:35 +08:00
|
|
|
ti->error = "couldn't allocate priority group";
|
2008-10-10 20:36:57 +08:00
|
|
|
return ERR_PTR(-ENOMEM);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
pg->m = m;
|
|
|
|
|
|
|
|
r = parse_path_selector(as, pg, ti);
|
|
|
|
if (r)
|
|
|
|
goto bad;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* read the paths
|
|
|
|
*/
|
2011-08-02 19:32:04 +08:00
|
|
|
r = dm_read_arg(_args, as, &pg->nr_pgpaths, &ti->error);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (r)
|
|
|
|
goto bad;
|
|
|
|
|
2011-08-02 19:32:04 +08:00
|
|
|
r = dm_read_arg(_args + 1, as, &nr_selector_args, &ti->error);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (r)
|
|
|
|
goto bad;
|
|
|
|
|
2011-08-02 19:32:04 +08:00
|
|
|
nr_args = 1 + nr_selector_args;
|
2005-04-17 06:20:36 +08:00
|
|
|
for (i = 0; i < pg->nr_pgpaths; i++) {
|
|
|
|
struct pgpath *pgpath;
|
2011-08-02 19:32:04 +08:00
|
|
|
struct dm_arg_set path_args;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2011-08-02 19:32:04 +08:00
|
|
|
if (as->argc < nr_args) {
|
2008-07-21 19:00:30 +08:00
|
|
|
ti->error = "not enough path parameters";
|
2010-08-12 11:13:49 +08:00
|
|
|
r = -EINVAL;
|
2005-04-17 06:20:36 +08:00
|
|
|
goto bad;
|
2008-07-21 19:00:30 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2011-08-02 19:32:04 +08:00
|
|
|
path_args.argc = nr_args;
|
2005-04-17 06:20:36 +08:00
|
|
|
path_args.argv = as->argv;
|
|
|
|
|
|
|
|
pgpath = parse_path(&path_args, &pg->ps, ti);
|
2008-10-10 20:36:57 +08:00
|
|
|
if (IS_ERR(pgpath)) {
|
|
|
|
r = PTR_ERR(pgpath);
|
2005-04-17 06:20:36 +08:00
|
|
|
goto bad;
|
2008-10-10 20:36:57 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
pgpath->pg = pg;
|
|
|
|
list_add_tail(&pgpath->list, &pg->pgpaths);
|
2011-08-02 19:32:04 +08:00
|
|
|
dm_consume_args(as, nr_args);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return pg;
|
|
|
|
|
|
|
|
bad:
|
|
|
|
free_priority_group(pg, ti);
|
2008-10-10 20:36:57 +08:00
|
|
|
return ERR_PTR(r);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2011-08-02 19:32:04 +08:00
|
|
|
static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
unsigned hw_argc;
|
2009-08-04 03:42:45 +08:00
|
|
|
int ret;
|
2006-10-03 16:15:33 +08:00
|
|
|
struct dm_target *ti = m->ti;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2011-08-02 19:32:04 +08:00
|
|
|
static struct dm_arg _args[] = {
|
2006-06-26 15:27:35 +08:00
|
|
|
{0, 1024, "invalid number of hardware handler args"},
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
2011-08-02 19:32:04 +08:00
|
|
|
if (dm_read_arg_group(_args, as, &hw_argc, &ti->error))
|
2005-04-17 06:20:36 +08:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (!hw_argc)
|
|
|
|
return 0;
|
|
|
|
|
2016-05-25 09:16:51 +08:00
|
|
|
if (m->queue_mode == DM_TYPE_BIO_BASED) {
|
2016-05-20 04:15:14 +08:00
|
|
|
dm_consume_args(as, hw_argc);
|
|
|
|
DMERR("bio-based multipath doesn't allow hardware handler args");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-08-02 19:32:04 +08:00
|
|
|
m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL);
|
2016-10-28 17:04:46 +08:00
|
|
|
if (!m->hw_handler_name)
|
|
|
|
return -EINVAL;
|
2008-11-14 07:39:06 +08:00
|
|
|
|
2009-08-04 03:42:45 +08:00
|
|
|
if (hw_argc > 1) {
|
|
|
|
char *p;
|
|
|
|
int i, j, len = 4;
|
|
|
|
|
|
|
|
for (i = 0; i <= hw_argc - 2; i++)
|
|
|
|
len += strlen(as->argv[i]) + 1;
|
|
|
|
p = m->hw_handler_params = kzalloc(len, GFP_KERNEL);
|
|
|
|
if (!p) {
|
|
|
|
ti->error = "memory allocation failed";
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
j = sprintf(p, "%d", hw_argc - 1);
|
|
|
|
for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1)
|
|
|
|
j = sprintf(p, "%s", as->argv[i]);
|
|
|
|
}
|
2011-08-02 19:32:04 +08:00
|
|
|
dm_consume_args(as, hw_argc - 1);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
return 0;
|
2009-08-04 03:42:45 +08:00
|
|
|
fail:
|
|
|
|
kfree(m->hw_handler_name);
|
|
|
|
m->hw_handler_name = NULL;
|
|
|
|
return ret;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2011-08-02 19:32:04 +08:00
|
|
|
static int parse_features(struct dm_arg_set *as, struct multipath *m)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
int r;
|
|
|
|
unsigned argc;
|
2006-10-03 16:15:33 +08:00
|
|
|
struct dm_target *ti = m->ti;
|
2011-08-02 19:32:04 +08:00
|
|
|
const char *arg_name;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2011-08-02 19:32:04 +08:00
|
|
|
static struct dm_arg _args[] = {
|
2016-05-25 09:16:51 +08:00
|
|
|
{0, 8, "invalid number of feature args"},
|
2007-10-20 05:47:53 +08:00
|
|
|
{1, 50, "pg_init_retries must be between 1 and 50"},
|
2011-01-14 04:00:01 +08:00
|
|
|
{0, 60000, "pg_init_delay_msecs must be between 0 and 60000"},
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
2011-08-02 19:32:04 +08:00
|
|
|
r = dm_read_arg_group(_args, as, &argc, &ti->error);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (r)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (!argc)
|
|
|
|
return 0;
|
|
|
|
|
2007-10-20 05:47:53 +08:00
|
|
|
do {
|
2011-08-02 19:32:04 +08:00
|
|
|
arg_name = dm_shift_arg(as);
|
2007-10-20 05:47:53 +08:00
|
|
|
argc--;
|
|
|
|
|
2011-08-02 19:32:04 +08:00
|
|
|
if (!strcasecmp(arg_name, "queue_if_no_path")) {
|
2016-02-11 02:02:21 +08:00
|
|
|
r = queue_if_no_path(m, true, false);
|
2007-10-20 05:47:53 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2012-07-27 22:08:04 +08:00
|
|
|
if (!strcasecmp(arg_name, "retain_attached_hw_handler")) {
|
2016-03-18 04:32:10 +08:00
|
|
|
set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
|
2012-07-27 22:08:04 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2011-08-02 19:32:04 +08:00
|
|
|
if (!strcasecmp(arg_name, "pg_init_retries") &&
|
2007-10-20 05:47:53 +08:00
|
|
|
(argc >= 1)) {
|
2011-08-02 19:32:04 +08:00
|
|
|
r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error);
|
2007-10-20 05:47:53 +08:00
|
|
|
argc--;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2011-08-02 19:32:04 +08:00
|
|
|
if (!strcasecmp(arg_name, "pg_init_delay_msecs") &&
|
2011-01-14 04:00:01 +08:00
|
|
|
(argc >= 1)) {
|
2011-08-02 19:32:04 +08:00
|
|
|
r = dm_read_arg(_args + 2, as, &m->pg_init_delay_msecs, &ti->error);
|
2011-01-14 04:00:01 +08:00
|
|
|
argc--;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2016-05-25 09:16:51 +08:00
|
|
|
if (!strcasecmp(arg_name, "queue_mode") &&
|
|
|
|
(argc >= 1)) {
|
|
|
|
const char *queue_mode_name = dm_shift_arg(as);
|
|
|
|
|
|
|
|
if (!strcasecmp(queue_mode_name, "bio"))
|
|
|
|
m->queue_mode = DM_TYPE_BIO_BASED;
|
|
|
|
else if (!strcasecmp(queue_mode_name, "rq"))
|
|
|
|
m->queue_mode = DM_TYPE_REQUEST_BASED;
|
|
|
|
else if (!strcasecmp(queue_mode_name, "mq"))
|
|
|
|
m->queue_mode = DM_TYPE_MQ_REQUEST_BASED;
|
|
|
|
else {
|
|
|
|
ti->error = "Unknown 'queue_mode' requested";
|
|
|
|
r = -EINVAL;
|
|
|
|
}
|
|
|
|
argc--;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
ti->error = "Unrecognised multipath feature request";
|
2007-10-20 05:47:53 +08:00
|
|
|
r = -EINVAL;
|
|
|
|
} while (argc && !r);
|
|
|
|
|
|
|
|
return r;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2016-05-25 09:16:51 +08:00
|
|
|
static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2011-08-02 19:32:04 +08:00
|
|
|
/* target arguments */
|
|
|
|
static struct dm_arg _args[] = {
|
2011-03-24 21:54:33 +08:00
|
|
|
{0, 1024, "invalid number of priority groups"},
|
|
|
|
{0, 1024, "invalid initial priority group number"},
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
int r;
|
|
|
|
struct multipath *m;
|
2011-08-02 19:32:04 +08:00
|
|
|
struct dm_arg_set as;
|
2005-04-17 06:20:36 +08:00
|
|
|
unsigned pg_count = 0;
|
|
|
|
unsigned next_pg_num;
|
|
|
|
|
|
|
|
as.argc = argc;
|
|
|
|
as.argv = argv;
|
|
|
|
|
2016-05-25 09:16:51 +08:00
|
|
|
m = alloc_multipath(ti);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (!m) {
|
2006-06-26 15:27:35 +08:00
|
|
|
ti->error = "can't allocate multipath";
|
2005-04-17 06:20:36 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2006-10-03 16:15:33 +08:00
|
|
|
r = parse_features(&as, m);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (r)
|
|
|
|
goto bad;
|
|
|
|
|
2016-05-25 09:16:51 +08:00
|
|
|
r = alloc_multipath_stage2(ti, m);
|
|
|
|
if (r)
|
|
|
|
goto bad;
|
|
|
|
|
2006-10-03 16:15:33 +08:00
|
|
|
r = parse_hw_handler(&as, m);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (r)
|
|
|
|
goto bad;
|
|
|
|
|
2011-08-02 19:32:04 +08:00
|
|
|
r = dm_read_arg(_args, &as, &m->nr_priority_groups, &ti->error);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (r)
|
|
|
|
goto bad;
|
|
|
|
|
2011-08-02 19:32:04 +08:00
|
|
|
r = dm_read_arg(_args + 1, &as, &next_pg_num, &ti->error);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (r)
|
|
|
|
goto bad;
|
|
|
|
|
2011-03-24 21:54:33 +08:00
|
|
|
if ((!m->nr_priority_groups && next_pg_num) ||
|
|
|
|
(m->nr_priority_groups && !next_pg_num)) {
|
|
|
|
ti->error = "invalid initial priority group";
|
|
|
|
r = -EINVAL;
|
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/* parse the priority groups */
|
|
|
|
while (as.argc) {
|
|
|
|
struct priority_group *pg;
|
2016-03-18 05:10:15 +08:00
|
|
|
unsigned nr_valid_paths = atomic_read(&m->nr_valid_paths);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2006-10-03 16:15:33 +08:00
|
|
|
pg = parse_priority_group(&as, m);
|
2008-10-10 20:36:57 +08:00
|
|
|
if (IS_ERR(pg)) {
|
|
|
|
r = PTR_ERR(pg);
|
2005-04-17 06:20:36 +08:00
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
|
2016-03-18 05:10:15 +08:00
|
|
|
nr_valid_paths += pg->nr_pgpaths;
|
|
|
|
atomic_set(&m->nr_valid_paths, nr_valid_paths);
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
list_add_tail(&pg->list, &m->priority_groups);
|
|
|
|
pg_count++;
|
|
|
|
pg->pg_num = pg_count;
|
|
|
|
if (!--next_pg_num)
|
|
|
|
m->next_pg = pg;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pg_count != m->nr_priority_groups) {
|
2006-06-26 15:27:35 +08:00
|
|
|
ti->error = "priority group count mismatch";
|
2005-04-17 06:20:36 +08:00
|
|
|
r = -EINVAL;
|
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
|
2013-03-02 06:45:47 +08:00
|
|
|
ti->num_flush_bios = 1;
|
|
|
|
ti->num_discard_bios = 1;
|
2013-05-10 21:37:16 +08:00
|
|
|
ti->num_write_same_bios = 1;
|
2016-05-25 09:16:51 +08:00
|
|
|
if (m->queue_mode == DM_TYPE_BIO_BASED)
|
2016-05-25 03:48:08 +08:00
|
|
|
ti->per_io_data_size = multipath_per_bio_data_size();
|
2016-05-25 09:16:51 +08:00
|
|
|
else if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
|
2016-02-01 01:08:36 +08:00
|
|
|
ti->per_io_data_size = sizeof(struct dm_mpath_io);
|
2009-06-22 17:12:24 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
bad:
|
|
|
|
free_multipath(m);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2010-03-06 10:32:13 +08:00
|
|
|
static void multipath_wait_for_pg_init_completion(struct multipath *m)
|
|
|
|
{
|
2016-09-01 06:16:43 +08:00
|
|
|
DEFINE_WAIT(wait);
|
2010-03-06 10:32:13 +08:00
|
|
|
|
|
|
|
while (1) {
|
2016-09-01 06:16:43 +08:00
|
|
|
prepare_to_wait(&m->pg_init_wait, &wait, TASK_UNINTERRUPTIBLE);
|
2010-03-06 10:32:13 +08:00
|
|
|
|
2016-03-18 05:10:15 +08:00
|
|
|
if (!atomic_read(&m->pg_init_in_progress))
|
2010-03-06 10:32:13 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
io_schedule();
|
|
|
|
}
|
2016-09-01 06:16:43 +08:00
|
|
|
finish_wait(&m->pg_init_wait, &wait);
|
2010-03-06 10:32:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void flush_multipath_work(struct multipath *m)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2016-03-18 04:32:10 +08:00
|
|
|
set_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
|
|
|
|
smp_mb__after_atomic();
|
2013-10-30 11:26:38 +08:00
|
|
|
|
2008-05-02 05:50:22 +08:00
|
|
|
flush_workqueue(kmpath_handlerd);
|
2010-03-06 10:32:13 +08:00
|
|
|
multipath_wait_for_pg_init_completion(m);
|
2005-07-13 06:53:02 +08:00
|
|
|
flush_workqueue(kmultipathd);
|
2012-08-21 05:51:24 +08:00
|
|
|
flush_work(&m->trigger_event);
|
2013-10-30 11:26:38 +08:00
|
|
|
|
2016-03-18 04:32:10 +08:00
|
|
|
clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
|
|
|
|
smp_mb__after_atomic();
|
2009-12-11 07:52:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void multipath_dtr(struct dm_target *ti)
|
|
|
|
{
|
|
|
|
struct multipath *m = ti->private;
|
|
|
|
|
2010-03-06 10:32:13 +08:00
|
|
|
flush_multipath_work(m);
|
2005-04-17 06:20:36 +08:00
|
|
|
free_multipath(m);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Take a path out of use.
|
|
|
|
*/
|
|
|
|
static int fail_path(struct pgpath *pgpath)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
struct multipath *m = pgpath->pg->m;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&m->lock, flags);
|
|
|
|
|
2008-10-10 20:36:58 +08:00
|
|
|
if (!pgpath->is_active)
|
2005-04-17 06:20:36 +08:00
|
|
|
goto out;
|
|
|
|
|
2006-06-26 15:27:35 +08:00
|
|
|
DMWARN("Failing path %s.", pgpath->path.dev->name);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
|
2016-02-11 02:02:21 +08:00
|
|
|
pgpath->is_active = false;
|
2005-04-17 06:20:36 +08:00
|
|
|
pgpath->fail_count++;
|
|
|
|
|
2016-03-18 05:10:15 +08:00
|
|
|
atomic_dec(&m->nr_valid_paths);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
if (pgpath == m->current_pgpath)
|
|
|
|
m->current_pgpath = NULL;
|
|
|
|
|
2007-10-20 05:48:02 +08:00
|
|
|
dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
|
2016-03-18 05:10:15 +08:00
|
|
|
pgpath->path.dev->name, atomic_read(&m->nr_valid_paths));
|
2007-10-20 05:48:02 +08:00
|
|
|
|
2009-01-06 11:05:13 +08:00
|
|
|
schedule_work(&m->trigger_event);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
out:
|
|
|
|
spin_unlock_irqrestore(&m->lock, flags);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Reinstate a previously-failed path
|
|
|
|
*/
|
|
|
|
static int reinstate_path(struct pgpath *pgpath)
|
|
|
|
{
|
2014-05-26 20:45:39 +08:00
|
|
|
int r = 0, run_queue = 0;
|
2005-04-17 06:20:36 +08:00
|
|
|
unsigned long flags;
|
|
|
|
struct multipath *m = pgpath->pg->m;
|
2016-03-18 05:10:15 +08:00
|
|
|
unsigned nr_valid_paths;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
spin_lock_irqsave(&m->lock, flags);
|
|
|
|
|
2008-10-10 20:36:58 +08:00
|
|
|
if (pgpath->is_active)
|
2005-04-17 06:20:36 +08:00
|
|
|
goto out;
|
|
|
|
|
2016-02-21 01:49:43 +08:00
|
|
|
DMWARN("Reinstating path %s.", pgpath->path.dev->name);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path);
|
|
|
|
if (r)
|
|
|
|
goto out;
|
|
|
|
|
2016-02-11 02:02:21 +08:00
|
|
|
pgpath->is_active = true;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2016-03-18 05:10:15 +08:00
|
|
|
nr_valid_paths = atomic_inc_return(&m->nr_valid_paths);
|
|
|
|
if (nr_valid_paths == 1) {
|
2009-06-22 17:12:12 +08:00
|
|
|
m->current_pgpath = NULL;
|
2014-05-26 20:45:39 +08:00
|
|
|
run_queue = 1;
|
2009-06-22 17:12:12 +08:00
|
|
|
} else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
|
2011-01-14 04:00:01 +08:00
|
|
|
if (queue_work(kmpath_handlerd, &pgpath->activate_path.work))
|
2016-03-18 05:10:15 +08:00
|
|
|
atomic_inc(&m->pg_init_in_progress);
|
2009-06-22 17:12:12 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2007-10-20 05:48:02 +08:00
|
|
|
dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
|
2016-03-18 05:10:15 +08:00
|
|
|
pgpath->path.dev->name, nr_valid_paths);
|
2007-10-20 05:48:02 +08:00
|
|
|
|
2009-01-06 11:05:13 +08:00
|
|
|
schedule_work(&m->trigger_event);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
out:
|
|
|
|
spin_unlock_irqrestore(&m->lock, flags);
|
2016-05-20 04:15:14 +08:00
|
|
|
if (run_queue) {
|
2014-05-26 20:45:39 +08:00
|
|
|
dm_table_run_md_queue_async(m->ti->table);
|
2016-09-14 22:47:03 +08:00
|
|
|
process_queued_io_list(m);
|
2016-05-20 04:15:14 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Fail or reinstate all paths that match the provided struct dm_dev.
|
|
|
|
*/
|
|
|
|
static int action_dev(struct multipath *m, struct dm_dev *dev,
|
|
|
|
action_fn action)
|
|
|
|
{
|
2011-03-24 21:54:31 +08:00
|
|
|
int r = -EINVAL;
|
2005-04-17 06:20:36 +08:00
|
|
|
struct pgpath *pgpath;
|
|
|
|
struct priority_group *pg;
|
|
|
|
|
|
|
|
list_for_each_entry(pg, &m->priority_groups, list) {
|
|
|
|
list_for_each_entry(pgpath, &pg->pgpaths, list) {
|
|
|
|
if (pgpath->path.dev == dev)
|
|
|
|
r = action(pgpath);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Temporarily try to avoid having to use the specified PG
|
|
|
|
*/
|
|
|
|
static void bypass_pg(struct multipath *m, struct priority_group *pg,
|
2016-02-11 02:02:21 +08:00
|
|
|
bool bypassed)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&m->lock, flags);
|
|
|
|
|
|
|
|
pg->bypassed = bypassed;
|
|
|
|
m->current_pgpath = NULL;
|
|
|
|
m->current_pg = NULL;
|
|
|
|
|
|
|
|
spin_unlock_irqrestore(&m->lock, flags);
|
|
|
|
|
2009-01-06 11:05:13 +08:00
|
|
|
schedule_work(&m->trigger_event);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Switch to using the specified PG from the next I/O that gets mapped
|
|
|
|
*/
|
|
|
|
static int switch_pg_num(struct multipath *m, const char *pgstr)
|
|
|
|
{
|
|
|
|
struct priority_group *pg;
|
|
|
|
unsigned pgnum;
|
|
|
|
unsigned long flags;
|
dm: reject trailing characters in sccanf input
Device mapper uses sscanf to convert arguments to numbers. The problem is that
the way we use it ignores additional unmatched characters in the scanned string.
For example, this `if (sscanf(string, "%d", &number) == 1)' will match a number,
but also it will match number with some garbage appended, like "123abc".
As a result, device mapper accepts garbage after some numbers. For example
the command `dmsetup create vg1-new --table "0 16384 linear 254:1bla 34816bla"'
will pass without an error.
This patch fixes all sscanf uses in device mapper. It appends "%c" with
a pointer to a dummy character variable to every sscanf statement.
The construct `if (sscanf(string, "%d%c", &number, &dummy) == 1)' succeeds
only if string is a null-terminated number (optionally preceded by some
whitespace characters). If there is some character appended after the number,
sscanf matches "%c", writes the character to the dummy variable and returns 2.
We check the return value for 1 and consequently reject numbers with some
garbage appended.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2012-03-29 01:41:26 +08:00
|
|
|
char dummy;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
dm: reject trailing characters in sccanf input
Device mapper uses sscanf to convert arguments to numbers. The problem is that
the way we use it ignores additional unmatched characters in the scanned string.
For example, this `if (sscanf(string, "%d", &number) == 1)' will match a number,
but also it will match number with some garbage appended, like "123abc".
As a result, device mapper accepts garbage after some numbers. For example
the command `dmsetup create vg1-new --table "0 16384 linear 254:1bla 34816bla"'
will pass without an error.
This patch fixes all sscanf uses in device mapper. It appends "%c" with
a pointer to a dummy character variable to every sscanf statement.
The construct `if (sscanf(string, "%d%c", &number, &dummy) == 1)' succeeds
only if string is a null-terminated number (optionally preceded by some
whitespace characters). If there is some character appended after the number,
sscanf matches "%c", writes the character to the dummy variable and returns 2.
We check the return value for 1 and consequently reject numbers with some
garbage appended.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2012-03-29 01:41:26 +08:00
|
|
|
if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
|
2016-11-04 12:37:09 +08:00
|
|
|
!m->nr_priority_groups || (pgnum > m->nr_priority_groups)) {
|
2005-04-17 06:20:36 +08:00
|
|
|
DMWARN("invalid PG number supplied to switch_pg_num");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
spin_lock_irqsave(&m->lock, flags);
|
|
|
|
list_for_each_entry(pg, &m->priority_groups, list) {
|
2016-02-11 02:02:21 +08:00
|
|
|
pg->bypassed = false;
|
2005-04-17 06:20:36 +08:00
|
|
|
if (--pgnum)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
m->current_pgpath = NULL;
|
|
|
|
m->current_pg = NULL;
|
|
|
|
m->next_pg = pg;
|
|
|
|
}
|
|
|
|
spin_unlock_irqrestore(&m->lock, flags);
|
|
|
|
|
2009-01-06 11:05:13 +08:00
|
|
|
schedule_work(&m->trigger_event);
|
2005-04-17 06:20:36 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set/clear bypassed status of a PG.
|
|
|
|
* PGs are numbered upwards from 1 in the order they were declared.
|
|
|
|
*/
|
2016-02-11 02:02:21 +08:00
|
|
|
static int bypass_pg_num(struct multipath *m, const char *pgstr, bool bypassed)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
struct priority_group *pg;
|
|
|
|
unsigned pgnum;
|
dm: reject trailing characters in sccanf input
Device mapper uses sscanf to convert arguments to numbers. The problem is that
the way we use it ignores additional unmatched characters in the scanned string.
For example, this `if (sscanf(string, "%d", &number) == 1)' will match a number,
but also it will match number with some garbage appended, like "123abc".
As a result, device mapper accepts garbage after some numbers. For example
the command `dmsetup create vg1-new --table "0 16384 linear 254:1bla 34816bla"'
will pass without an error.
This patch fixes all sscanf uses in device mapper. It appends "%c" with
a pointer to a dummy character variable to every sscanf statement.
The construct `if (sscanf(string, "%d%c", &number, &dummy) == 1)' succeeds
only if string is a null-terminated number (optionally preceded by some
whitespace characters). If there is some character appended after the number,
sscanf matches "%c", writes the character to the dummy variable and returns 2.
We check the return value for 1 and consequently reject numbers with some
garbage appended.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2012-03-29 01:41:26 +08:00
|
|
|
char dummy;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
dm: reject trailing characters in sccanf input
Device mapper uses sscanf to convert arguments to numbers. The problem is that
the way we use it ignores additional unmatched characters in the scanned string.
For example, this `if (sscanf(string, "%d", &number) == 1)' will match a number,
but also it will match number with some garbage appended, like "123abc".
As a result, device mapper accepts garbage after some numbers. For example
the command `dmsetup create vg1-new --table "0 16384 linear 254:1bla 34816bla"'
will pass without an error.
This patch fixes all sscanf uses in device mapper. It appends "%c" with
a pointer to a dummy character variable to every sscanf statement.
The construct `if (sscanf(string, "%d%c", &number, &dummy) == 1)' succeeds
only if string is a null-terminated number (optionally preceded by some
whitespace characters). If there is some character appended after the number,
sscanf matches "%c", writes the character to the dummy variable and returns 2.
We check the return value for 1 and consequently reject numbers with some
garbage appended.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2012-03-29 01:41:26 +08:00
|
|
|
if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
|
2016-11-04 12:37:09 +08:00
|
|
|
!m->nr_priority_groups || (pgnum > m->nr_priority_groups)) {
|
2005-04-17 06:20:36 +08:00
|
|
|
DMWARN("invalid PG number supplied to bypass_pg");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
list_for_each_entry(pg, &m->priority_groups, list) {
|
|
|
|
if (!--pgnum)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
bypass_pg(m, pg, bypassed);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-10-20 05:47:53 +08:00
|
|
|
/*
|
|
|
|
* Should we retry pg_init immediately?
|
|
|
|
*/
|
2016-02-11 02:02:21 +08:00
|
|
|
static bool pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
|
2007-10-20 05:47:53 +08:00
|
|
|
{
|
|
|
|
unsigned long flags;
|
2016-02-11 02:02:21 +08:00
|
|
|
bool limit_reached = false;
|
2007-10-20 05:47:53 +08:00
|
|
|
|
|
|
|
spin_lock_irqsave(&m->lock, flags);
|
|
|
|
|
2016-03-18 05:10:15 +08:00
|
|
|
if (atomic_read(&m->pg_init_count) <= m->pg_init_retries &&
|
|
|
|
!test_bit(MPATHF_PG_INIT_DISABLED, &m->flags))
|
2016-03-18 04:32:10 +08:00
|
|
|
set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
|
2007-10-20 05:47:53 +08:00
|
|
|
else
|
2016-02-11 02:02:21 +08:00
|
|
|
limit_reached = true;
|
2007-10-20 05:47:53 +08:00
|
|
|
|
|
|
|
spin_unlock_irqrestore(&m->lock, flags);
|
|
|
|
|
|
|
|
return limit_reached;
|
|
|
|
}
|
|
|
|
|
2009-10-22 00:22:46 +08:00
|
|
|
static void pg_init_done(void *data, int errors)
|
2008-05-02 05:50:11 +08:00
|
|
|
{
|
2010-03-06 10:29:45 +08:00
|
|
|
struct pgpath *pgpath = data;
|
2008-05-02 05:50:11 +08:00
|
|
|
struct priority_group *pg = pgpath->pg;
|
|
|
|
struct multipath *m = pg->m;
|
|
|
|
unsigned long flags;
|
2016-02-11 02:02:21 +08:00
|
|
|
bool delay_retry = false;
|
2008-05-02 05:50:11 +08:00
|
|
|
|
|
|
|
/* device or driver problems */
|
|
|
|
switch (errors) {
|
|
|
|
case SCSI_DH_OK:
|
|
|
|
break;
|
|
|
|
case SCSI_DH_NOSYS:
|
|
|
|
if (!m->hw_handler_name) {
|
|
|
|
errors = 0;
|
|
|
|
break;
|
|
|
|
}
|
2010-03-06 10:29:49 +08:00
|
|
|
DMERR("Could not failover the device: Handler scsi_dh_%s "
|
|
|
|
"Error %d.", m->hw_handler_name, errors);
|
2008-05-02 05:50:11 +08:00
|
|
|
/*
|
|
|
|
* Fail path for now, so we do not ping pong
|
|
|
|
*/
|
|
|
|
fail_path(pgpath);
|
|
|
|
break;
|
|
|
|
case SCSI_DH_DEV_TEMP_BUSY:
|
|
|
|
/*
|
|
|
|
* Probably doing something like FW upgrade on the
|
|
|
|
* controller so try the other pg.
|
|
|
|
*/
|
2016-02-11 02:02:21 +08:00
|
|
|
bypass_pg(m, pg, true);
|
2008-05-02 05:50:11 +08:00
|
|
|
break;
|
|
|
|
case SCSI_DH_RETRY:
|
2011-01-14 04:00:01 +08:00
|
|
|
/* Wait before retrying. */
|
|
|
|
delay_retry = 1;
|
2008-05-02 05:50:11 +08:00
|
|
|
case SCSI_DH_IMM_RETRY:
|
|
|
|
case SCSI_DH_RES_TEMP_UNAVAIL:
|
|
|
|
if (pg_init_limit_reached(m, pgpath))
|
|
|
|
fail_path(pgpath);
|
|
|
|
errors = 0;
|
|
|
|
break;
|
2016-02-21 01:49:43 +08:00
|
|
|
case SCSI_DH_DEV_OFFLINED:
|
2008-05-02 05:50:11 +08:00
|
|
|
default:
|
|
|
|
/*
|
|
|
|
* We probably do not want to fail the path for a device
|
|
|
|
* error, but this is what the old dm did. In future
|
|
|
|
* patches we can do more advanced handling.
|
|
|
|
*/
|
|
|
|
fail_path(pgpath);
|
|
|
|
}
|
|
|
|
|
|
|
|
spin_lock_irqsave(&m->lock, flags);
|
|
|
|
if (errors) {
|
2009-06-22 17:12:12 +08:00
|
|
|
if (pgpath == m->current_pgpath) {
|
|
|
|
DMERR("Could not failover device. Error %d.", errors);
|
|
|
|
m->current_pgpath = NULL;
|
|
|
|
m->current_pg = NULL;
|
|
|
|
}
|
2016-03-18 04:32:10 +08:00
|
|
|
} else if (!test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
|
2016-02-11 02:02:21 +08:00
|
|
|
pg->bypassed = false;
|
2008-05-02 05:50:11 +08:00
|
|
|
|
2016-03-18 05:10:15 +08:00
|
|
|
if (atomic_dec_return(&m->pg_init_in_progress) > 0)
|
2010-03-06 10:30:02 +08:00
|
|
|
/* Activations of other paths are still on going */
|
|
|
|
goto out;
|
|
|
|
|
2016-03-18 04:32:10 +08:00
|
|
|
if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
|
|
|
|
if (delay_retry)
|
|
|
|
set_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags);
|
|
|
|
else
|
|
|
|
clear_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags);
|
|
|
|
|
dm mpath: remove process_queued_ios()
process_queued_ios() has served 3 functions:
1) select pg and pgpath if none is selected
2) start pg_init if requested
3) dispatch queued IOs when pg is ready
Basically, a call to queue_work(process_queued_ios) can be replaced by
dm_table_run_md_queue_async(), which runs request queue and ends up
calling map_io(), which does 1), 2) and 3).
Exception is when !pg_ready() (which means either pg_init is running or
requested), then multipath_busy() prevents map_io() being called from
request_fn.
If pg_init is running, it should be ok as long as pg_init_done() does
the right thing when pg_init is completed, I.e.: restart pg_init if
!pg_ready() or call dm_table_run_md_queue_async() to kick map_io().
If pg_init is requested, we have to make sure the request is detected
and pg_init will be started. pg_init is requested in 3 places:
a) __choose_pgpath() in map_io()
b) __choose_pgpath() in multipath_ioctl()
c) pg_init retry in pg_init_done()
a) is ok because map_io() calls __pg_init_all_paths(), which does 2).
b) needs a call to __pg_init_all_paths(), which does 2).
c) needs a call to __pg_init_all_paths(), which does 2).
So this patch removes process_queued_ios() and ensures that
__pg_init_all_paths() is called at the appropriate locations.
Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
2014-02-28 22:33:45 +08:00
|
|
|
if (__pg_init_all_paths(m))
|
|
|
|
goto out;
|
|
|
|
}
|
2016-03-18 04:32:10 +08:00
|
|
|
clear_bit(MPATHF_QUEUE_IO, &m->flags);
|
2010-03-06 10:30:02 +08:00
|
|
|
|
2016-09-14 22:47:03 +08:00
|
|
|
process_queued_io_list(m);
|
2016-05-20 04:15:14 +08:00
|
|
|
|
2010-03-06 10:32:13 +08:00
|
|
|
/*
|
|
|
|
* Wake up any thread waiting to suspend.
|
|
|
|
*/
|
|
|
|
wake_up(&m->pg_init_wait);
|
|
|
|
|
2010-03-06 10:30:02 +08:00
|
|
|
out:
|
2008-05-02 05:50:11 +08:00
|
|
|
spin_unlock_irqrestore(&m->lock, flags);
|
|
|
|
}
|
|
|
|
|
2008-05-02 05:50:22 +08:00
|
|
|
static void activate_path(struct work_struct *work)
|
|
|
|
{
|
2009-06-22 17:12:12 +08:00
|
|
|
struct pgpath *pgpath =
|
2011-01-14 04:00:01 +08:00
|
|
|
container_of(work, struct pgpath, activate_path.work);
|
2016-09-02 00:06:37 +08:00
|
|
|
struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
|
2008-05-02 05:50:22 +08:00
|
|
|
|
2016-09-02 00:06:37 +08:00
|
|
|
if (pgpath->is_active && !blk_queue_dying(q))
|
|
|
|
scsi_dh_activate(q, pg_init_done, pgpath);
|
2014-02-28 22:33:49 +08:00
|
|
|
else
|
|
|
|
pg_init_done(pgpath, SCSI_DH_DEV_OFFLINED);
|
2008-05-02 05:50:22 +08:00
|
|
|
}
|
|
|
|
|
2013-07-01 21:16:26 +08:00
|
|
|
static int noretry_error(int error)
|
|
|
|
{
|
|
|
|
switch (error) {
|
2015-07-15 19:23:24 +08:00
|
|
|
case -EBADE:
|
|
|
|
/*
|
|
|
|
* EBADE signals an reservation conflict.
|
|
|
|
* We shouldn't fail the path here as we can communicate with
|
|
|
|
* the target. We should failover to the next path, but in
|
|
|
|
* doing so we might be causing a ping-pong between paths.
|
|
|
|
* So just return the reservation conflict error.
|
|
|
|
*/
|
2013-07-01 21:16:26 +08:00
|
|
|
case -EOPNOTSUPP:
|
|
|
|
case -EREMOTEIO:
|
|
|
|
case -EILSEQ:
|
|
|
|
case -ENODATA:
|
2013-09-13 13:54:30 +08:00
|
|
|
case -ENOSPC:
|
2013-07-01 21:16:26 +08:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Anything else could be a path failure, so should be retried */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* end_io handling
|
|
|
|
*/
|
dm mpath: change to be request based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:37 +08:00
|
|
|
static int do_end_io(struct multipath *m, struct request *clone,
|
2007-07-13 00:26:32 +08:00
|
|
|
int error, struct dm_mpath_io *mpio)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
dm mpath: change to be request based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:37 +08:00
|
|
|
/*
|
|
|
|
* We don't queue any clone request inside the multipath target
|
|
|
|
* during end I/O handling, since those clone requests don't have
|
|
|
|
* bio clones. If we queue them inside the multipath target,
|
|
|
|
* we need to make bio clones, that requires memory allocation.
|
2016-05-13 04:28:10 +08:00
|
|
|
* (See drivers/md/dm-rq.c:end_clone_bio() about why the clone requests
|
dm mpath: change to be request based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:37 +08:00
|
|
|
* don't have bio clones.)
|
|
|
|
* Instead of queueing the clone request here, we queue the original
|
|
|
|
* request into dm core, which will remake a clone request and
|
|
|
|
* clone bios for it and resubmit it later.
|
|
|
|
*/
|
|
|
|
int r = DM_ENDIO_REQUEUE;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
dm mpath: change to be request based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:37 +08:00
|
|
|
if (!error && !clone->errors)
|
2005-04-17 06:20:36 +08:00
|
|
|
return 0; /* I/O complete */
|
|
|
|
|
2014-06-03 03:50:06 +08:00
|
|
|
if (noretry_error(error))
|
2010-08-12 11:14:32 +08:00
|
|
|
return error;
|
|
|
|
|
2008-05-02 05:50:11 +08:00
|
|
|
if (mpio->pgpath)
|
|
|
|
fail_path(mpio->pgpath);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2016-03-18 05:10:15 +08:00
|
|
|
if (!atomic_read(&m->nr_valid_paths)) {
|
2016-03-18 04:32:10 +08:00
|
|
|
if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
|
2016-05-20 04:15:14 +08:00
|
|
|
if (!must_push_back_rq(m))
|
2011-01-18 17:13:12 +08:00
|
|
|
r = -EIO;
|
|
|
|
}
|
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
dm mpath: change to be request based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:37 +08:00
|
|
|
return r;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
dm mpath: change to be request based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:37 +08:00
|
|
|
static int multipath_end_io(struct dm_target *ti, struct request *clone,
|
2005-04-17 06:20:36 +08:00
|
|
|
int error, union map_info *map_context)
|
|
|
|
{
|
2007-07-13 00:26:32 +08:00
|
|
|
struct multipath *m = ti->private;
|
2016-02-03 22:13:14 +08:00
|
|
|
struct dm_mpath_io *mpio = get_mpio(map_context);
|
2012-10-12 23:59:42 +08:00
|
|
|
struct pgpath *pgpath;
|
2005-04-17 06:20:36 +08:00
|
|
|
struct path_selector *ps;
|
|
|
|
int r;
|
|
|
|
|
2012-03-29 01:41:25 +08:00
|
|
|
BUG_ON(!mpio);
|
|
|
|
|
2016-02-03 22:13:14 +08:00
|
|
|
r = do_end_io(m, clone, error, mpio);
|
2012-10-12 23:59:42 +08:00
|
|
|
pgpath = mpio->pgpath;
|
2005-04-17 06:20:36 +08:00
|
|
|
if (pgpath) {
|
|
|
|
ps = &pgpath->pg->ps;
|
|
|
|
if (ps->type->end_io)
|
2009-06-22 17:12:27 +08:00
|
|
|
ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
2016-02-03 22:13:14 +08:00
|
|
|
clear_request_fn_mpio(m, map_context);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2016-05-20 04:15:14 +08:00
|
|
|
static int do_end_io_bio(struct multipath *m, struct bio *clone,
|
|
|
|
int error, struct dm_mpath_io *mpio)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
if (!error)
|
|
|
|
return 0; /* I/O complete */
|
|
|
|
|
|
|
|
if (noretry_error(error))
|
|
|
|
return error;
|
|
|
|
|
|
|
|
if (mpio->pgpath)
|
|
|
|
fail_path(mpio->pgpath);
|
|
|
|
|
|
|
|
if (!atomic_read(&m->nr_valid_paths)) {
|
|
|
|
if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
|
|
|
|
if (!must_push_back_bio(m))
|
|
|
|
return -EIO;
|
|
|
|
return DM_ENDIO_REQUEUE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Queue for the daemon to resubmit */
|
2016-05-25 03:48:08 +08:00
|
|
|
dm_bio_restore(get_bio_details_from_bio(clone), clone);
|
2016-05-20 04:15:14 +08:00
|
|
|
|
|
|
|
spin_lock_irqsave(&m->lock, flags);
|
|
|
|
bio_list_add(&m->queued_bios, clone);
|
|
|
|
spin_unlock_irqrestore(&m->lock, flags);
|
|
|
|
if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
|
|
|
|
queue_work(kmultipathd, &m->process_queued_bios);
|
|
|
|
|
|
|
|
return DM_ENDIO_INCOMPLETE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int error)
|
|
|
|
{
|
|
|
|
struct multipath *m = ti->private;
|
|
|
|
struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
|
|
|
|
struct pgpath *pgpath;
|
|
|
|
struct path_selector *ps;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
BUG_ON(!mpio);
|
|
|
|
|
|
|
|
r = do_end_io_bio(m, clone, error, mpio);
|
|
|
|
pgpath = mpio->pgpath;
|
|
|
|
if (pgpath) {
|
|
|
|
ps = &pgpath->pg->ps;
|
|
|
|
if (ps->type->end_io)
|
|
|
|
ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
|
|
|
|
}
|
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Suspend can't complete until all the I/O is processed so if
|
2005-07-13 06:53:03 +08:00
|
|
|
* the last path fails we must error any remaining I/O.
|
|
|
|
* Note that if the freeze_bdev fails while suspending, the
|
|
|
|
* queue_if_no_path state is lost - userspace should reset it.
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
|
|
|
static void multipath_presuspend(struct dm_target *ti)
|
|
|
|
{
|
2016-02-03 10:53:15 +08:00
|
|
|
struct multipath *m = ti->private;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2016-02-11 02:02:21 +08:00
|
|
|
queue_if_no_path(m, false, true);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2009-12-11 07:52:19 +08:00
|
|
|
static void multipath_postsuspend(struct dm_target *ti)
|
|
|
|
{
|
2009-12-11 07:52:21 +08:00
|
|
|
struct multipath *m = ti->private;
|
|
|
|
|
|
|
|
mutex_lock(&m->work_mutex);
|
2010-03-06 10:32:13 +08:00
|
|
|
flush_multipath_work(m);
|
2009-12-11 07:52:21 +08:00
|
|
|
mutex_unlock(&m->work_mutex);
|
2009-12-11 07:52:19 +08:00
|
|
|
}
|
|
|
|
|
2005-07-13 06:53:03 +08:00
|
|
|
/*
|
|
|
|
* Restore the queue_if_no_path setting.
|
|
|
|
*/
|
2005-04-17 06:20:36 +08:00
|
|
|
static void multipath_resume(struct dm_target *ti)
|
|
|
|
{
|
2016-02-03 10:53:15 +08:00
|
|
|
struct multipath *m = ti->private;
|
2016-07-26 09:08:51 +08:00
|
|
|
unsigned long flags;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2016-07-26 09:08:51 +08:00
|
|
|
spin_lock_irqsave(&m->lock, flags);
|
2016-03-18 04:32:10 +08:00
|
|
|
if (test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags))
|
|
|
|
set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
|
|
|
|
else
|
|
|
|
clear_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
|
2016-07-26 09:08:51 +08:00
|
|
|
spin_unlock_irqrestore(&m->lock, flags);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Info output has the following format:
|
|
|
|
* num_multipath_feature_args [multipath_feature_args]*
|
|
|
|
* num_handler_status_args [handler_status_args]*
|
|
|
|
* num_groups init_group_number
|
|
|
|
* [A|D|E num_ps_status_args [ps_status_args]*
|
|
|
|
* num_paths num_selector_args
|
|
|
|
* [path_dev A|F fail_count [selector_args]* ]+ ]+
|
|
|
|
*
|
|
|
|
* Table output has the following format (identical to the constructor string):
|
|
|
|
* num_feature_args [features_args]*
|
|
|
|
* num_handler_args hw_handler [hw_handler_args]*
|
|
|
|
* num_groups init_group_number
|
|
|
|
* [priority selector-name num_ps_args [ps_args]*
|
|
|
|
* num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
|
|
|
|
*/
|
2013-03-02 06:45:44 +08:00
|
|
|
static void multipath_status(struct dm_target *ti, status_type_t type,
|
|
|
|
unsigned status_flags, char *result, unsigned maxlen)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
int sz = 0;
|
|
|
|
unsigned long flags;
|
2016-02-03 10:53:15 +08:00
|
|
|
struct multipath *m = ti->private;
|
2005-04-17 06:20:36 +08:00
|
|
|
struct priority_group *pg;
|
|
|
|
struct pgpath *p;
|
|
|
|
unsigned pg_num;
|
|
|
|
char state;
|
|
|
|
|
|
|
|
spin_lock_irqsave(&m->lock, flags);
|
|
|
|
|
|
|
|
/* Features */
|
|
|
|
if (type == STATUSTYPE_INFO)
|
2016-03-18 05:10:15 +08:00
|
|
|
DMEMIT("2 %u %u ", test_bit(MPATHF_QUEUE_IO, &m->flags),
|
|
|
|
atomic_read(&m->pg_init_count));
|
2007-10-20 05:47:53 +08:00
|
|
|
else {
|
2016-03-18 04:32:10 +08:00
|
|
|
DMEMIT("%u ", test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) +
|
2011-01-14 04:00:01 +08:00
|
|
|
(m->pg_init_retries > 0) * 2 +
|
2012-07-27 22:08:04 +08:00
|
|
|
(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 +
|
2016-05-25 09:16:51 +08:00
|
|
|
test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags) +
|
|
|
|
(m->queue_mode != DM_TYPE_REQUEST_BASED) * 2);
|
|
|
|
|
2016-03-18 04:32:10 +08:00
|
|
|
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
|
2007-10-20 05:47:53 +08:00
|
|
|
DMEMIT("queue_if_no_path ");
|
|
|
|
if (m->pg_init_retries)
|
|
|
|
DMEMIT("pg_init_retries %u ", m->pg_init_retries);
|
2011-01-14 04:00:01 +08:00
|
|
|
if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT)
|
|
|
|
DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs);
|
2016-03-18 04:32:10 +08:00
|
|
|
if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags))
|
2012-07-27 22:08:04 +08:00
|
|
|
DMEMIT("retain_attached_hw_handler ");
|
2016-05-25 09:16:51 +08:00
|
|
|
if (m->queue_mode != DM_TYPE_REQUEST_BASED) {
|
|
|
|
switch(m->queue_mode) {
|
|
|
|
case DM_TYPE_BIO_BASED:
|
|
|
|
DMEMIT("queue_mode bio ");
|
|
|
|
break;
|
|
|
|
case DM_TYPE_MQ_REQUEST_BASED:
|
|
|
|
DMEMIT("queue_mode mq ");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2007-10-20 05:47:53 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-05-02 05:50:11 +08:00
|
|
|
if (!m->hw_handler_name || type == STATUSTYPE_INFO)
|
2005-04-17 06:20:36 +08:00
|
|
|
DMEMIT("0 ");
|
|
|
|
else
|
2008-05-02 05:50:11 +08:00
|
|
|
DMEMIT("1 %s ", m->hw_handler_name);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
DMEMIT("%u ", m->nr_priority_groups);
|
|
|
|
|
|
|
|
if (m->next_pg)
|
|
|
|
pg_num = m->next_pg->pg_num;
|
|
|
|
else if (m->current_pg)
|
|
|
|
pg_num = m->current_pg->pg_num;
|
|
|
|
else
|
2011-03-24 21:54:33 +08:00
|
|
|
pg_num = (m->nr_priority_groups ? 1 : 0);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
DMEMIT("%u ", pg_num);
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case STATUSTYPE_INFO:
|
|
|
|
list_for_each_entry(pg, &m->priority_groups, list) {
|
|
|
|
if (pg->bypassed)
|
|
|
|
state = 'D'; /* Disabled */
|
|
|
|
else if (pg == m->current_pg)
|
|
|
|
state = 'A'; /* Currently Active */
|
|
|
|
else
|
|
|
|
state = 'E'; /* Enabled */
|
|
|
|
|
|
|
|
DMEMIT("%c ", state);
|
|
|
|
|
|
|
|
if (pg->ps.type->status)
|
|
|
|
sz += pg->ps.type->status(&pg->ps, NULL, type,
|
|
|
|
result + sz,
|
|
|
|
maxlen - sz);
|
|
|
|
else
|
|
|
|
DMEMIT("0 ");
|
|
|
|
|
|
|
|
DMEMIT("%u %u ", pg->nr_pgpaths,
|
|
|
|
pg->ps.type->info_args);
|
|
|
|
|
|
|
|
list_for_each_entry(p, &pg->pgpaths, list) {
|
|
|
|
DMEMIT("%s %s %u ", p->path.dev->name,
|
2008-10-10 20:36:58 +08:00
|
|
|
p->is_active ? "A" : "F",
|
2005-04-17 06:20:36 +08:00
|
|
|
p->fail_count);
|
|
|
|
if (pg->ps.type->status)
|
|
|
|
sz += pg->ps.type->status(&pg->ps,
|
|
|
|
&p->path, type, result + sz,
|
|
|
|
maxlen - sz);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case STATUSTYPE_TABLE:
|
|
|
|
list_for_each_entry(pg, &m->priority_groups, list) {
|
|
|
|
DMEMIT("%s ", pg->ps.type->name);
|
|
|
|
|
|
|
|
if (pg->ps.type->status)
|
|
|
|
sz += pg->ps.type->status(&pg->ps, NULL, type,
|
|
|
|
result + sz,
|
|
|
|
maxlen - sz);
|
|
|
|
else
|
|
|
|
DMEMIT("0 ");
|
|
|
|
|
|
|
|
DMEMIT("%u %u ", pg->nr_pgpaths,
|
|
|
|
pg->ps.type->table_args);
|
|
|
|
|
|
|
|
list_for_each_entry(p, &pg->pgpaths, list) {
|
|
|
|
DMEMIT("%s ", p->path.dev->name);
|
|
|
|
if (pg->ps.type->status)
|
|
|
|
sz += pg->ps.type->status(&pg->ps,
|
|
|
|
&p->path, type, result + sz,
|
|
|
|
maxlen - sz);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
spin_unlock_irqrestore(&m->lock, flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
|
|
|
|
{
|
2009-12-11 07:52:21 +08:00
|
|
|
int r = -EINVAL;
|
2005-04-17 06:20:36 +08:00
|
|
|
struct dm_dev *dev;
|
2016-02-03 10:53:15 +08:00
|
|
|
struct multipath *m = ti->private;
|
2005-04-17 06:20:36 +08:00
|
|
|
action_fn action;
|
|
|
|
|
2009-12-11 07:52:21 +08:00
|
|
|
mutex_lock(&m->work_mutex);
|
|
|
|
|
2009-12-11 07:52:27 +08:00
|
|
|
if (dm_suspended(ti)) {
|
|
|
|
r = -EBUSY;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
if (argc == 1) {
|
2011-08-02 19:32:04 +08:00
|
|
|
if (!strcasecmp(argv[0], "queue_if_no_path")) {
|
2016-02-11 02:02:21 +08:00
|
|
|
r = queue_if_no_path(m, true, false);
|
2009-12-11 07:52:21 +08:00
|
|
|
goto out;
|
2011-08-02 19:32:04 +08:00
|
|
|
} else if (!strcasecmp(argv[0], "fail_if_no_path")) {
|
2016-02-11 02:02:21 +08:00
|
|
|
r = queue_if_no_path(m, false, false);
|
2009-12-11 07:52:21 +08:00
|
|
|
goto out;
|
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2009-12-11 07:52:21 +08:00
|
|
|
if (argc != 2) {
|
2014-01-30 00:52:45 +08:00
|
|
|
DMWARN("Invalid multipath message arguments. Expected 2 arguments, got %d.", argc);
|
2009-12-11 07:52:21 +08:00
|
|
|
goto out;
|
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2011-08-02 19:32:04 +08:00
|
|
|
if (!strcasecmp(argv[0], "disable_group")) {
|
2016-02-11 02:02:21 +08:00
|
|
|
r = bypass_pg_num(m, argv[1], true);
|
2009-12-11 07:52:21 +08:00
|
|
|
goto out;
|
2011-08-02 19:32:04 +08:00
|
|
|
} else if (!strcasecmp(argv[0], "enable_group")) {
|
2016-02-11 02:02:21 +08:00
|
|
|
r = bypass_pg_num(m, argv[1], false);
|
2009-12-11 07:52:21 +08:00
|
|
|
goto out;
|
2011-08-02 19:32:04 +08:00
|
|
|
} else if (!strcasecmp(argv[0], "switch_group")) {
|
2009-12-11 07:52:21 +08:00
|
|
|
r = switch_pg_num(m, argv[1]);
|
|
|
|
goto out;
|
2011-08-02 19:32:04 +08:00
|
|
|
} else if (!strcasecmp(argv[0], "reinstate_path"))
|
2005-04-17 06:20:36 +08:00
|
|
|
action = reinstate_path;
|
2011-08-02 19:32:04 +08:00
|
|
|
else if (!strcasecmp(argv[0], "fail_path"))
|
2005-04-17 06:20:36 +08:00
|
|
|
action = fail_path;
|
2009-12-11 07:52:21 +08:00
|
|
|
else {
|
2014-01-30 00:52:45 +08:00
|
|
|
DMWARN("Unrecognised multipath message received: %s", argv[0]);
|
2009-12-11 07:52:21 +08:00
|
|
|
goto out;
|
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2010-03-06 10:32:27 +08:00
|
|
|
r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (r) {
|
2006-06-26 15:27:35 +08:00
|
|
|
DMWARN("message: error getting device %s",
|
2005-04-17 06:20:36 +08:00
|
|
|
argv[1]);
|
2009-12-11 07:52:21 +08:00
|
|
|
goto out;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
r = action_dev(m, dev, action);
|
|
|
|
|
|
|
|
dm_put_device(ti, dev);
|
|
|
|
|
2009-12-11 07:52:21 +08:00
|
|
|
out:
|
|
|
|
mutex_unlock(&m->work_mutex);
|
2005-04-17 06:20:36 +08:00
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2015-10-15 20:10:50 +08:00
|
|
|
static int multipath_prepare_ioctl(struct dm_target *ti,
|
|
|
|
struct block_device **bdev, fmode_t *mode)
|
2006-10-03 16:15:20 +08:00
|
|
|
{
|
2012-06-03 07:29:58 +08:00
|
|
|
struct multipath *m = ti->private;
|
2016-03-18 06:38:17 +08:00
|
|
|
struct pgpath *current_pgpath;
|
2012-06-03 07:29:58 +08:00
|
|
|
int r;
|
|
|
|
|
2016-03-18 06:38:17 +08:00
|
|
|
current_pgpath = lockless_dereference(m->current_pgpath);
|
|
|
|
if (!current_pgpath)
|
|
|
|
current_pgpath = choose_pgpath(m, 0);
|
2006-10-03 16:15:20 +08:00
|
|
|
|
2016-03-18 06:38:17 +08:00
|
|
|
if (current_pgpath) {
|
2016-03-18 04:32:10 +08:00
|
|
|
if (!test_bit(MPATHF_QUEUE_IO, &m->flags)) {
|
2016-03-18 06:38:17 +08:00
|
|
|
*bdev = current_pgpath->path.dev->bdev;
|
|
|
|
*mode = current_pgpath->path.dev->mode;
|
2015-11-17 17:36:56 +08:00
|
|
|
r = 0;
|
|
|
|
} else {
|
|
|
|
/* pg_init has not started or completed */
|
|
|
|
r = -ENOTCONN;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* No path is available */
|
2016-03-18 04:32:10 +08:00
|
|
|
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
|
2015-11-17 17:36:56 +08:00
|
|
|
r = -ENOTCONN;
|
|
|
|
else
|
|
|
|
r = -EIO;
|
2006-10-03 16:15:22 +08:00
|
|
|
}
|
2006-10-03 16:15:20 +08:00
|
|
|
|
2015-11-17 17:39:26 +08:00
|
|
|
if (r == -ENOTCONN) {
|
2016-03-18 06:38:17 +08:00
|
|
|
if (!lockless_dereference(m->current_pg)) {
|
dm mpath: remove process_queued_ios()
process_queued_ios() has served 3 functions:
1) select pg and pgpath if none is selected
2) start pg_init if requested
3) dispatch queued IOs when pg is ready
Basically, a call to queue_work(process_queued_ios) can be replaced by
dm_table_run_md_queue_async(), which runs request queue and ends up
calling map_io(), which does 1), 2) and 3).
Exception is when !pg_ready() (which means either pg_init is running or
requested), then multipath_busy() prevents map_io() being called from
request_fn.
If pg_init is running, it should be ok as long as pg_init_done() does
the right thing when pg_init is completed, I.e.: restart pg_init if
!pg_ready() or call dm_table_run_md_queue_async() to kick map_io().
If pg_init is requested, we have to make sure the request is detected
and pg_init will be started. pg_init is requested in 3 places:
a) __choose_pgpath() in map_io()
b) __choose_pgpath() in multipath_ioctl()
c) pg_init retry in pg_init_done()
a) is ok because map_io() calls __pg_init_all_paths(), which does 2).
b) needs a call to __pg_init_all_paths(), which does 2).
c) needs a call to __pg_init_all_paths(), which does 2).
So this patch removes process_queued_ios() and ensures that
__pg_init_all_paths() is called at the appropriate locations.
Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
2014-02-28 22:33:45 +08:00
|
|
|
/* Path status changed, redo selection */
|
2016-03-18 06:38:17 +08:00
|
|
|
(void) choose_pgpath(m, 0);
|
dm mpath: remove process_queued_ios()
process_queued_ios() has served 3 functions:
1) select pg and pgpath if none is selected
2) start pg_init if requested
3) dispatch queued IOs when pg is ready
Basically, a call to queue_work(process_queued_ios) can be replaced by
dm_table_run_md_queue_async(), which runs request queue and ends up
calling map_io(), which does 1), 2) and 3).
Exception is when !pg_ready() (which means either pg_init is running or
requested), then multipath_busy() prevents map_io() being called from
request_fn.
If pg_init is running, it should be ok as long as pg_init_done() does
the right thing when pg_init is completed, I.e.: restart pg_init if
!pg_ready() or call dm_table_run_md_queue_async() to kick map_io().
If pg_init is requested, we have to make sure the request is detected
and pg_init will be started. pg_init is requested in 3 places:
a) __choose_pgpath() in map_io()
b) __choose_pgpath() in multipath_ioctl()
c) pg_init retry in pg_init_done()
a) is ok because map_io() calls __pg_init_all_paths(), which does 2).
b) needs a call to __pg_init_all_paths(), which does 2).
c) needs a call to __pg_init_all_paths(), which does 2).
So this patch removes process_queued_ios() and ensures that
__pg_init_all_paths() is called at the appropriate locations.
Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
2014-02-28 22:33:45 +08:00
|
|
|
}
|
2016-03-18 04:32:10 +08:00
|
|
|
if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
|
2016-03-18 06:38:17 +08:00
|
|
|
pg_init_all_paths(m);
|
2014-05-26 20:45:39 +08:00
|
|
|
dm_table_run_md_queue_async(m->ti->table);
|
2016-09-14 22:47:03 +08:00
|
|
|
process_queued_io_list(m);
|
dm mpath: remove process_queued_ios()
process_queued_ios() has served 3 functions:
1) select pg and pgpath if none is selected
2) start pg_init if requested
3) dispatch queued IOs when pg is ready
Basically, a call to queue_work(process_queued_ios) can be replaced by
dm_table_run_md_queue_async(), which runs request queue and ends up
calling map_io(), which does 1), 2) and 3).
Exception is when !pg_ready() (which means either pg_init is running or
requested), then multipath_busy() prevents map_io() being called from
request_fn.
If pg_init is running, it should be ok as long as pg_init_done() does
the right thing when pg_init is completed, I.e.: restart pg_init if
!pg_ready() or call dm_table_run_md_queue_async() to kick map_io().
If pg_init is requested, we have to make sure the request is detected
and pg_init will be started. pg_init is requested in 3 places:
a) __choose_pgpath() in map_io()
b) __choose_pgpath() in multipath_ioctl()
c) pg_init retry in pg_init_done()
a) is ok because map_io() calls __pg_init_all_paths(), which does 2).
b) needs a call to __pg_init_all_paths(), which does 2).
c) needs a call to __pg_init_all_paths(), which does 2).
So this patch removes process_queued_ios() and ensures that
__pg_init_all_paths() is called at the appropriate locations.
Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
2014-02-28 22:33:45 +08:00
|
|
|
}
|
2012-06-03 07:29:58 +08:00
|
|
|
|
2015-10-15 20:10:50 +08:00
|
|
|
/*
|
|
|
|
* Only pass ioctls through if the device sizes match exactly.
|
|
|
|
*/
|
|
|
|
if (!r && ti->len != i_size_read((*bdev)->bd_inode) >> SECTOR_SHIFT)
|
|
|
|
return 1;
|
|
|
|
return r;
|
2006-10-03 16:15:20 +08:00
|
|
|
}
|
|
|
|
|
2009-06-22 17:12:33 +08:00
|
|
|
static int multipath_iterate_devices(struct dm_target *ti,
|
|
|
|
iterate_devices_callout_fn fn, void *data)
|
|
|
|
{
|
|
|
|
struct multipath *m = ti->private;
|
|
|
|
struct priority_group *pg;
|
|
|
|
struct pgpath *p;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
list_for_each_entry(pg, &m->priority_groups, list) {
|
|
|
|
list_for_each_entry(p, &pg->pgpaths, list) {
|
2009-07-24 03:30:42 +08:00
|
|
|
ret = fn(ti, p->path.dev, ti->begin, ti->len, data);
|
2009-06-22 17:12:33 +08:00
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-02-12 10:42:28 +08:00
|
|
|
static int pgpath_busy(struct pgpath *pgpath)
|
dm mpath: change to be request based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:37 +08:00
|
|
|
{
|
|
|
|
struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
|
|
|
|
|
2015-02-24 05:36:41 +08:00
|
|
|
return blk_lld_busy(q);
|
dm mpath: change to be request based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:37 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We return "busy", only when we can map I/Os but underlying devices
|
|
|
|
* are busy (so even if we map I/Os now, the I/Os will wait on
|
|
|
|
* the underlying queue).
|
|
|
|
* In other words, if we want to kill I/Os or queue them inside us
|
|
|
|
* due to map unavailability, we don't return "busy". Otherwise,
|
|
|
|
* dm core won't give us the I/Os and we can't do what we want.
|
|
|
|
*/
|
|
|
|
static int multipath_busy(struct dm_target *ti)
|
|
|
|
{
|
2016-02-11 02:02:21 +08:00
|
|
|
bool busy = false, has_active = false;
|
dm mpath: change to be request based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:37 +08:00
|
|
|
struct multipath *m = ti->private;
|
2016-03-18 06:38:17 +08:00
|
|
|
struct priority_group *pg, *next_pg;
|
dm mpath: change to be request based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:37 +08:00
|
|
|
struct pgpath *pgpath;
|
|
|
|
|
2016-09-10 07:26:19 +08:00
|
|
|
/* pg_init in progress */
|
|
|
|
if (atomic_read(&m->pg_init_in_progress))
|
2016-03-18 06:38:17 +08:00
|
|
|
return true;
|
|
|
|
|
2016-09-10 07:26:19 +08:00
|
|
|
/* no paths available, for blk-mq: rely on IO mapping to delay requeue */
|
|
|
|
if (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
|
|
|
|
return (m->queue_mode != DM_TYPE_MQ_REQUEST_BASED);
|
|
|
|
|
dm mpath: change to be request based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:37 +08:00
|
|
|
/* Guess which priority_group will be used at next mapping time */
|
2016-03-18 06:38:17 +08:00
|
|
|
pg = lockless_dereference(m->current_pg);
|
|
|
|
next_pg = lockless_dereference(m->next_pg);
|
|
|
|
if (unlikely(!lockless_dereference(m->current_pgpath) && next_pg))
|
|
|
|
pg = next_pg;
|
|
|
|
|
|
|
|
if (!pg) {
|
dm mpath: change to be request based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:37 +08:00
|
|
|
/*
|
|
|
|
* We don't know which pg will be used at next mapping time.
|
2016-03-18 06:38:17 +08:00
|
|
|
* We don't call choose_pgpath() here to avoid to trigger
|
dm mpath: change to be request based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:37 +08:00
|
|
|
* pg_init just by busy checking.
|
|
|
|
* So we don't know whether underlying devices we will be using
|
|
|
|
* at next mapping time are busy or not. Just try mapping.
|
|
|
|
*/
|
2016-03-18 06:38:17 +08:00
|
|
|
return busy;
|
|
|
|
}
|
dm mpath: change to be request based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:37 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If there is one non-busy active path at least, the path selector
|
|
|
|
* will be able to select it. So we consider such a pg as not busy.
|
|
|
|
*/
|
2016-02-11 02:02:21 +08:00
|
|
|
busy = true;
|
2016-03-18 06:38:17 +08:00
|
|
|
list_for_each_entry(pgpath, &pg->pgpaths, list) {
|
dm mpath: change to be request based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:37 +08:00
|
|
|
if (pgpath->is_active) {
|
2016-02-11 02:02:21 +08:00
|
|
|
has_active = true;
|
2016-02-12 10:42:28 +08:00
|
|
|
if (!pgpath_busy(pgpath)) {
|
2016-02-11 02:02:21 +08:00
|
|
|
busy = false;
|
dm mpath: change to be request based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:37 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2016-03-18 06:38:17 +08:00
|
|
|
}
|
dm mpath: change to be request based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:37 +08:00
|
|
|
|
2016-03-18 06:38:17 +08:00
|
|
|
if (!has_active) {
|
dm mpath: change to be request based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:37 +08:00
|
|
|
/*
|
|
|
|
* No active path in this pg, so this pg won't be used and
|
|
|
|
* the current_pg will be changed at next mapping time.
|
|
|
|
* We need to try mapping to determine it.
|
|
|
|
*/
|
2016-02-11 02:02:21 +08:00
|
|
|
busy = false;
|
2016-03-18 06:38:17 +08:00
|
|
|
}
|
dm mpath: change to be request based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:37 +08:00
|
|
|
|
|
|
|
return busy;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*-----------------------------------------------------------------
|
|
|
|
* Module setup
|
|
|
|
*---------------------------------------------------------------*/
|
|
|
|
static struct target_type multipath_target = {
|
|
|
|
.name = "multipath",
|
2016-05-25 09:16:51 +08:00
|
|
|
.version = {1, 12, 0},
|
2016-02-01 06:22:27 +08:00
|
|
|
.features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE,
|
2005-04-17 06:20:36 +08:00
|
|
|
.module = THIS_MODULE,
|
|
|
|
.ctr = multipath_ctr,
|
|
|
|
.dtr = multipath_dtr,
|
dm mpath: change to be request based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:37 +08:00
|
|
|
.map_rq = multipath_map,
|
2014-12-18 10:08:12 +08:00
|
|
|
.clone_and_map_rq = multipath_clone_and_map,
|
|
|
|
.release_clone_rq = multipath_release_clone,
|
dm mpath: change to be request based
This patch converts dm-multipath target to request-based from bio-based.
Basically, the patch just converts the I/O unit from struct bio
to struct request.
In the course of the conversion, it also changes the I/O queueing
mechanism. The change in the I/O queueing is described in details
as follows.
I/O queueing mechanism change
-----------------------------
In I/O submission, map_io(), there is no mechanism change from
bio-based, since the clone request is ready for retry as it is.
However, in I/O complition, do_end_io(), there is a mechanism change
from bio-based, since the clone request is not ready for retry.
In do_end_io() of bio-based, the clone bio has all needed memory
for resubmission. So the target driver can queue it and resubmit
it later without memory allocations.
The mechanism has almost no overhead.
On the other hand, in do_end_io() of request-based, the clone request
doesn't have clone bios, so the target driver can't resubmit it
as it is. To resubmit the clone request, memory allocation for
clone bios is needed, and it takes some overheads.
To avoid the overheads just for queueing, the target driver doesn't
queue the clone request inside itself.
Instead, the target driver asks dm core for queueing and remapping
the original request of the clone request, since the overhead for
queueing is just a freeing memory for the clone request.
As a result, the target driver doesn't need to record/restore
the information of the original request for resubmitting
the clone request. So dm_bio_details in dm_mpath_io is removed.
multipath_busy()
---------------------
The target driver returns "busy", only when the following case:
o The target driver will map I/Os, if map() function is called
and
o The mapped I/Os will wait on underlying device's queue due to
their congestions, if map() function is called now.
In other cases, the target driver doesn't return "busy".
Otherwise, dm core will keep the I/Os and the target driver can't
do what it wants.
(e.g. the target driver can't map I/Os now, so wants to kill I/Os.)
Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
2009-06-22 17:12:37 +08:00
|
|
|
.rq_end_io = multipath_end_io,
|
2016-05-20 04:15:14 +08:00
|
|
|
.map = multipath_map_bio,
|
|
|
|
.end_io = multipath_end_io_bio,
|
|
|
|
.presuspend = multipath_presuspend,
|
|
|
|
.postsuspend = multipath_postsuspend,
|
|
|
|
.resume = multipath_resume,
|
|
|
|
.status = multipath_status,
|
|
|
|
.message = multipath_message,
|
|
|
|
.prepare_ioctl = multipath_prepare_ioctl,
|
|
|
|
.iterate_devices = multipath_iterate_devices,
|
|
|
|
.busy = multipath_busy,
|
|
|
|
};
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
static int __init dm_multipath_init(void)
|
|
|
|
{
|
|
|
|
int r;
|
|
|
|
|
2016-05-20 04:15:14 +08:00
|
|
|
/* allocate a slab for the dm_mpath_ios */
|
2007-07-13 00:26:32 +08:00
|
|
|
_mpio_cache = KMEM_CACHE(dm_mpath_io, 0);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (!_mpio_cache)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
r = dm_register_target(&multipath_target);
|
|
|
|
if (r < 0) {
|
2016-05-20 04:15:14 +08:00
|
|
|
DMERR("request-based register failed %d", r);
|
2015-01-11 19:45:23 +08:00
|
|
|
r = -EINVAL;
|
|
|
|
goto bad_register_target;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2011-01-14 03:59:57 +08:00
|
|
|
kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0);
|
2005-05-06 07:16:07 +08:00
|
|
|
if (!kmultipathd) {
|
2007-07-13 00:27:01 +08:00
|
|
|
DMERR("failed to create workqueue kmpathd");
|
2015-01-11 19:45:23 +08:00
|
|
|
r = -ENOMEM;
|
|
|
|
goto bad_alloc_kmultipathd;
|
2005-05-06 07:16:07 +08:00
|
|
|
}
|
|
|
|
|
2008-05-02 05:50:22 +08:00
|
|
|
/*
|
|
|
|
* A separate workqueue is used to handle the device handlers
|
|
|
|
* to avoid overloading existing workqueue. Overloading the
|
|
|
|
* old workqueue would also create a bottleneck in the
|
|
|
|
* path of the storage hardware device activation.
|
|
|
|
*/
|
2011-01-14 03:59:57 +08:00
|
|
|
kmpath_handlerd = alloc_ordered_workqueue("kmpath_handlerd",
|
|
|
|
WQ_MEM_RECLAIM);
|
2008-05-02 05:50:22 +08:00
|
|
|
if (!kmpath_handlerd) {
|
|
|
|
DMERR("failed to create workqueue kmpath_handlerd");
|
2015-01-11 19:45:23 +08:00
|
|
|
r = -ENOMEM;
|
|
|
|
goto bad_alloc_kmpath_handlerd;
|
2008-05-02 05:50:22 +08:00
|
|
|
}
|
|
|
|
|
2015-01-11 19:45:23 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
bad_alloc_kmpath_handlerd:
|
|
|
|
destroy_workqueue(kmultipathd);
|
|
|
|
bad_alloc_kmultipathd:
|
|
|
|
dm_unregister_target(&multipath_target);
|
|
|
|
bad_register_target:
|
|
|
|
kmem_cache_destroy(_mpio_cache);
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __exit dm_multipath_exit(void)
|
|
|
|
{
|
2008-05-02 05:50:22 +08:00
|
|
|
destroy_workqueue(kmpath_handlerd);
|
2005-05-06 07:16:07 +08:00
|
|
|
destroy_workqueue(kmultipathd);
|
|
|
|
|
2009-01-06 11:04:58 +08:00
|
|
|
dm_unregister_target(&multipath_target);
|
2005-04-17 06:20:36 +08:00
|
|
|
kmem_cache_destroy(_mpio_cache);
|
|
|
|
}
|
|
|
|
|
|
|
|
module_init(dm_multipath_init);
|
|
|
|
module_exit(dm_multipath_exit);
|
|
|
|
|
|
|
|
MODULE_DESCRIPTION(DM_NAME " multipath target");
|
|
|
|
MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>");
|
|
|
|
MODULE_LICENSE("GPL");
|