Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "A smaller collection of fixes that should go into -rc1. This contains: - A fix from Christoph, fixing a regression with the WRITE_SAME and partial completions. Caused a BUG() on ppc. - Fixup for __blk_mq_stop_hw_queues(), it should be static. From Colin. - Removal of dmesg error messages on elevator switching, when invoked from sysfs. From me. - Fix for blk-stat, using this_cpu_ptr() in a section only protected by rcu_read_lock(). This breaks when PREEMPT_RCU is enabled. From me. - Two fixes for BFQ from Paolo, one fixing a crash and one updating the documentation. - An error handling lightnvm memory leak, from Rakesh. - The previous blk-mq hot unplug lock reversal depends on the CPU hotplug rework that isn't in mainline yet. This caused a lockdep splat when people unplugged CPUs with blk-mq devices. From Wanpeng. - A regression fix for DIF/DIX on blk-mq. From Wen" * 'for-linus' of git://git.kernel.dk/linux-block: block: handle partial completions for special payload requests blk-mq: NVMe 512B/4K+T10 DIF/DIX format returns I/O error on dd with split op blk-stat: don't use this_cpu_ptr() in a preemptable section elevator: remove redundant warnings on IO scheduler switch block, bfq: stress that low_latency must be off to get max throughput block, bfq: use pointer entity->sched_data only if set nvme: lightnvm: fix memory leak blk-mq: make __blk_mq_stop_hw_queues static lightnvm: remove unused rq parameter of nvme_nvm_rqtocmd() to kill warning block/mq: fix potential deadlock during cpu hotplug
This commit is contained in:
commit
55a1ab56c7
|
@ -11,6 +11,13 @@ controllers), BFQ's main features are:
|
|||
groups (switching back to time distribution when needed to keep
|
||||
throughput high).
|
||||
|
||||
In its default configuration, BFQ privileges latency over
|
||||
throughput. So, when needed for achieving a lower latency, BFQ builds
|
||||
schedules that may lead to a lower throughput. If your main or only
|
||||
goal, for a given device, is to achieve the maximum-possible
|
||||
throughput at all times, then do switch off all low-latency heuristics
|
||||
for that device, by setting low_latency to 0. Full details in Section 3.
|
||||
|
||||
On average CPUs, the current version of BFQ can handle devices
|
||||
performing at most ~30K IOPS; at most ~50 KIOPS on faster CPUs. As a
|
||||
reference, 30-50 KIOPS correspond to very high bandwidths with
|
||||
|
@ -375,11 +382,19 @@ default, low latency mode is enabled. If enabled, interactive and soft
|
|||
real-time applications are privileged and experience a lower latency,
|
||||
as explained in more detail in the description of how BFQ works.
|
||||
|
||||
DO NOT enable this mode if you need full control on bandwidth
|
||||
DISABLE this mode if you need full control on bandwidth
|
||||
distribution. In fact, if it is enabled, then BFQ automatically
|
||||
increases the bandwidth share of privileged applications, as the main
|
||||
means to guarantee a lower latency to them.
|
||||
|
||||
In addition, as already highlighted at the beginning of this document,
|
||||
DISABLE this mode if your only goal is to achieve a high throughput.
|
||||
In fact, privileging the I/O of some application over the rest may
|
||||
entail a lower throughput. To achieve the highest-possible throughput
|
||||
on a non-rotational device, setting slice_idle to 0 may be needed too
|
||||
(at the cost of giving up any strong guarantee on fairness and low
|
||||
latency).
|
||||
|
||||
timeout_sync
|
||||
------------
|
||||
|
||||
|
|
|
@ -56,6 +56,11 @@
|
|||
* rotational or flash-based devices, and to get the job done quickly
|
||||
* for applications consisting in many I/O-bound processes.
|
||||
*
|
||||
* NOTE: if the main or only goal, with a given device, is to achieve
|
||||
* the maximum-possible throughput at all times, then do switch off
|
||||
* all low-latency heuristics for that device, by setting low_latency
|
||||
* to 0.
|
||||
*
|
||||
* BFQ is described in [1], where also a reference to the initial, more
|
||||
* theoretical paper on BFQ can be found. The interested reader can find
|
||||
* in the latter paper full details on the main algorithm, as well as
|
||||
|
|
|
@ -1114,12 +1114,21 @@ static void bfq_activate_requeue_entity(struct bfq_entity *entity,
|
|||
bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
|
||||
{
|
||||
struct bfq_sched_data *sd = entity->sched_data;
|
||||
struct bfq_service_tree *st = bfq_entity_service_tree(entity);
|
||||
int is_in_service = entity == sd->in_service_entity;
|
||||
struct bfq_service_tree *st;
|
||||
bool is_in_service;
|
||||
|
||||
if (!entity->on_st) /* entity never activated, or already inactive */
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If we get here, then entity is active, which implies that
|
||||
* bfq_group_set_parent has already been invoked for the group
|
||||
* represented by entity. Therefore, the field
|
||||
* entity->sched_data has been set, and we can safely use it.
|
||||
*/
|
||||
st = bfq_entity_service_tree(entity);
|
||||
is_in_service = entity == sd->in_service_entity;
|
||||
|
||||
if (is_in_service)
|
||||
bfq_calc_finish(entity, entity->service);
|
||||
|
||||
|
|
|
@ -2644,8 +2644,6 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
|
|||
return false;
|
||||
}
|
||||
|
||||
WARN_ON_ONCE(req->rq_flags & RQF_SPECIAL_PAYLOAD);
|
||||
|
||||
req->__data_len -= total_bytes;
|
||||
|
||||
/* update sector only for requests with clear definition of sector */
|
||||
|
@ -2658,17 +2656,19 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
|
|||
req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK;
|
||||
}
|
||||
|
||||
/*
|
||||
* If total number of sectors is less than the first segment
|
||||
* size, something has gone terribly wrong.
|
||||
*/
|
||||
if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
|
||||
blk_dump_rq_flags(req, "request botched");
|
||||
req->__data_len = blk_rq_cur_bytes(req);
|
||||
}
|
||||
if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) {
|
||||
/*
|
||||
* If total number of sectors is less than the first segment
|
||||
* size, something has gone terribly wrong.
|
||||
*/
|
||||
if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
|
||||
blk_dump_rq_flags(req, "request botched");
|
||||
req->__data_len = blk_rq_cur_bytes(req);
|
||||
}
|
||||
|
||||
/* recalculate the number of segments */
|
||||
blk_recalc_rq_segments(req);
|
||||
/* recalculate the number of segments */
|
||||
blk_recalc_rq_segments(req);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -1236,7 +1236,7 @@ void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
|
|||
}
|
||||
EXPORT_SYMBOL(blk_mq_stop_hw_queue);
|
||||
|
||||
void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync)
|
||||
static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
int i;
|
||||
|
@ -1554,13 +1554,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
|
|||
|
||||
blk_queue_bounce(q, &bio);
|
||||
|
||||
blk_queue_split(q, &bio, q->bio_split);
|
||||
|
||||
if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
|
||||
bio_io_error(bio);
|
||||
return BLK_QC_T_NONE;
|
||||
}
|
||||
|
||||
blk_queue_split(q, &bio, q->bio_split);
|
||||
|
||||
if (!is_flush_fua && !blk_queue_nomerges(q) &&
|
||||
blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
|
||||
return BLK_QC_T_NONE;
|
||||
|
@ -2341,15 +2341,15 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
|||
|
||||
blk_mq_init_cpu_queues(q, set->nr_hw_queues);
|
||||
|
||||
mutex_lock(&all_q_mutex);
|
||||
get_online_cpus();
|
||||
mutex_lock(&all_q_mutex);
|
||||
|
||||
list_add_tail(&q->all_q_node, &all_q_list);
|
||||
blk_mq_add_queue_tag_set(set, q);
|
||||
blk_mq_map_swqueue(q, cpu_online_mask);
|
||||
|
||||
put_online_cpus();
|
||||
mutex_unlock(&all_q_mutex);
|
||||
put_online_cpus();
|
||||
|
||||
if (!(set->flags & BLK_MQ_F_NO_SCHED)) {
|
||||
int ret;
|
||||
|
|
|
@ -96,13 +96,16 @@ void blk_stat_add(struct request *rq)
|
|||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(cb, &q->stats->callbacks, list) {
|
||||
if (blk_stat_is_active(cb)) {
|
||||
bucket = cb->bucket_fn(rq);
|
||||
if (bucket < 0)
|
||||
continue;
|
||||
stat = &this_cpu_ptr(cb->cpu_stat)[bucket];
|
||||
__blk_stat_add(stat, value);
|
||||
}
|
||||
if (!blk_stat_is_active(cb))
|
||||
continue;
|
||||
|
||||
bucket = cb->bucket_fn(rq);
|
||||
if (bucket < 0)
|
||||
continue;
|
||||
|
||||
stat = &get_cpu_ptr(cb->cpu_stat)[bucket];
|
||||
__blk_stat_add(stat, value);
|
||||
put_cpu_ptr(cb->cpu_stat);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
|
|
@ -1062,10 +1062,8 @@ static int __elevator_change(struct request_queue *q, const char *name)
|
|||
|
||||
strlcpy(elevator_name, name, sizeof(elevator_name));
|
||||
e = elevator_get(strstrip(elevator_name), true);
|
||||
if (!e) {
|
||||
printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
|
||||
if (!e)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (q->elevator &&
|
||||
!strcmp(elevator_name, q->elevator->type->elevator_name)) {
|
||||
|
@ -1105,7 +1103,6 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
|
|||
if (!ret)
|
||||
return count;
|
||||
|
||||
printk(KERN_ERR "elevator: switch to %s failed\n", name);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -367,7 +367,8 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb,
|
|||
|
||||
if (unlikely(elba > nvmdev->total_secs)) {
|
||||
pr_err("nvm: L2P data from device is out of bounds!\n");
|
||||
return -EINVAL;
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Transform physical address to target address space */
|
||||
|
@ -464,8 +465,8 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd,
|
||||
struct nvme_ns *ns, struct nvme_nvm_command *c)
|
||||
static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns,
|
||||
struct nvme_nvm_command *c)
|
||||
{
|
||||
c->ph_rw.opcode = rqd->opcode;
|
||||
c->ph_rw.nsid = cpu_to_le32(ns->ns_id);
|
||||
|
@ -503,7 +504,7 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
|
|||
if (!cmd)
|
||||
return -ENOMEM;
|
||||
|
||||
nvme_nvm_rqtocmd(rq, rqd, ns, cmd);
|
||||
nvme_nvm_rqtocmd(rqd, ns, cmd);
|
||||
|
||||
rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY);
|
||||
if (IS_ERR(rq)) {
|
||||
|
|
Loading…
Reference in New Issue