Merge branch 'for-5.18/block' into for-5.18/write-streams

* for-5.18/block: (96 commits)
  block: remove bio_devname
  ext4: stop using bio_devname
  raid5-ppl: stop using bio_devname
  raid1: stop using bio_devname
  md-multipath: stop using bio_devname
  dm-integrity: stop using bio_devname
  dm-crypt: stop using bio_devname
  pktcdvd: remove a pointless debug check in pkt_submit_bio
  block: remove handle_bad_sector
  block: fix and cleanup bio_check_ro
  bfq: fix use-after-free in bfq_dispatch_request
  blk-crypto: show crypto capabilities in sysfs
  block: don't delete queue kobject before its children
  block: simplify calling convention of elv_unregister_queue()
  block: remove redundant semicolon
  block: default BLOCK_LEGACY_AUTOLOAD to y
  block: update io_ticks when io hang
  block, bfq: don't move oom_bfqq
  block, bfq: avoid moving bfqq to it's parent bfqg
  block, bfq: cleanup bfq_bfqq_to_bfqg()
  ...
This commit is contained in:
Jens Axboe 2022-03-07 12:44:37 -07:00
commit 13400b1454
183 changed files with 2005 additions and 3592 deletions

View File

@ -155,6 +155,55 @@ Description:
last zone of the device which may be smaller.
What: /sys/block/<disk>/queue/crypto/
Date: February 2022
Contact: linux-block@vger.kernel.org
Description:
The presence of this subdirectory of /sys/block/<disk>/queue/
indicates that the device supports inline encryption. This
subdirectory contains files which describe the inline encryption
capabilities of the device. For more information about inline
encryption, refer to Documentation/block/inline-encryption.rst.
What: /sys/block/<disk>/queue/crypto/max_dun_bits
Date: February 2022
Contact: linux-block@vger.kernel.org
Description:
[RO] This file shows the maximum length, in bits, of data unit
numbers accepted by the device in inline encryption requests.
What: /sys/block/<disk>/queue/crypto/modes/<mode>
Date: February 2022
Contact: linux-block@vger.kernel.org
Description:
[RO] For each crypto mode (i.e., encryption/decryption
algorithm) the device supports with inline encryption, a file
will exist at this location. It will contain a hexadecimal
number that is a bitmask of the supported data unit sizes, in
bytes, for that crypto mode.
Currently, the crypto modes that may be supported are:
* AES-256-XTS
* AES-128-CBC-ESSIV
* Adiantum
For example, if a device supports AES-256-XTS inline encryption
with data unit sizes of 512 and 4096 bytes, the file
/sys/block/<disk>/queue/crypto/modes/AES-256-XTS will exist and
will contain "0x1200".
What: /sys/block/<disk>/queue/crypto/num_keyslots
Date: February 2022
Contact: linux-block@vger.kernel.org
Description:
[RO] This file shows the number of keyslots the device has for
use with inline encryption.
What: /sys/block/<disk>/queue/dax
Date: June 2016
Contact: linux-block@vger.kernel.org

File diff suppressed because it is too large Load Diff

View File

@ -7,4 +7,4 @@ This file documents the sysfs file ``block/<disk>/capability``.
``capability`` is a bitfield, printed in hexadecimal, indicating which
capabilities a specific block device supports:
.. kernel-doc:: include/linux/genhd.h
.. kernel-doc:: include/linux/blkdev.h

View File

@ -8,7 +8,6 @@ Block
:maxdepth: 1
bfq-iosched
biodoc
biovecs
blk-mq
capability

View File

@ -3440,6 +3440,7 @@ F: Documentation/ABI/stable/sysfs-block
F: Documentation/block/
F: block/
F: drivers/block/
F: include/linux/bio.h
F: include/linux/blk*
F: kernel/trace/blktrace.c
F: lib/sbitmap.c

View File

@ -30,7 +30,6 @@
#include <linux/types.h>
#include <linux/kdev_t.h>
#include <linux/genhd.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/interrupt.h>

View File

@ -23,7 +23,6 @@
#include <linux/linkage.h>
#include <linux/init.h>
#include <linux/major.h>
#include <linux/genhd.h>
#include <linux/rtc.h>
#include <linux/interrupt.h>
#include <linux/bcd.h>

View File

@ -13,7 +13,6 @@
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>
#include <linux/hdreg.h>
#include <linux/slab.h>

View File

@ -16,7 +16,6 @@
#include <linux/interrupt.h>
#include <linux/fs.h>
#include <linux/console.h>
#include <linux/genhd.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/init.h>

View File

@ -22,7 +22,6 @@
#include <linux/linkage.h>
#include <linux/init.h>
#include <linux/major.h>
#include <linux/genhd.h>
#include <linux/rtc.h>
#include <linux/interrupt.h>

View File

@ -24,7 +24,6 @@
#include <linux/linkage.h>
#include <linux/init.h>
#include <linux/major.h>
#include <linux/genhd.h>
#include <linux/rtc.h>
#include <linux/interrupt.h>
#include <linux/module.h>

View File

@ -26,6 +26,16 @@ menuconfig BLOCK
if BLOCK
config BLOCK_LEGACY_AUTOLOAD
bool "Legacy autoloading support"
default y
help
Enable loading modules and creating block device instances based on
accesses through their device special file. This is a historic Linux
feature and makes no sense in a udev world where device files are
created on demand, but scripts that manually create device nodes and
then call losetup might rely on this behavior.
config BLK_RQ_ALLOC_TIME
bool
@ -218,6 +228,9 @@ config BLK_PM
config BLOCK_HOLDER_DEPRECATED
bool
config BLK_MQ_STACKING
bool
source "block/Kconfig.iosched"
endif # BLOCK

View File

@ -36,6 +36,7 @@ obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o
obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o
obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o
obj-$(CONFIG_BLK_PM) += blk-pm.o
obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += blk-crypto.o blk-crypto-profile.o
obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += blk-crypto.o blk-crypto-profile.o \
blk-crypto-sysfs.o
obj-$(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) += blk-crypto-fallback.o
obj-$(CONFIG_BLOCK_HOLDER_DEPRECATED) += holder.o

View File

@ -678,7 +678,7 @@ static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
if (test_bit(GD_NEED_PART_SCAN, &disk->state))
bdev_disk_changed(disk, false);
bdev->bd_openers++;
return 0;;
return 0;
}
static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
@ -733,12 +733,15 @@ struct block_device *blkdev_get_no_open(dev_t dev)
struct inode *inode;
inode = ilookup(blockdev_superblock, dev);
if (!inode) {
if (!inode && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) {
blk_request_module(dev);
inode = ilookup(blockdev_superblock, dev);
if (!inode)
return NULL;
if (inode)
pr_warn_ratelimited(
"block device autoloading is deprecated and will be removed.\n");
}
if (!inode)
return NULL;
/* switch from the inode reference to a device mode one: */
bdev = &BDEV_I(inode)->bdev;

View File

@ -645,7 +645,21 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
struct bfq_group *bfqg)
{
struct bfq_entity *entity = &bfqq->entity;
struct bfq_group *old_parent = bfqq_group(bfqq);
/*
* No point to move bfqq to the same group, which can happen when
* root group is offlined
*/
if (old_parent == bfqg)
return;
/*
* oom_bfqq is not allowed to move, oom_bfqq will hold ref to root_group
* until elevator exit.
*/
if (bfqq == &bfqd->oom_bfqq)
return;
/*
* Get extra reference to prevent bfqq from being freed in
* next possible expire or deactivate.
@ -666,7 +680,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfq_deactivate_bfqq(bfqd, bfqq, false, false);
else if (entity->on_st_or_in_serv)
bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
bfqg_and_blkg_put(bfqq_group(bfqq));
bfqg_and_blkg_put(old_parent);
if (entity->parent &&
entity->parent->last_bfqq_created == bfqq)

View File

@ -774,7 +774,7 @@ bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
if (!bfqq->next_rq)
return;
bfqq->pos_root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree;
bfqq->pos_root = &bfqq_group(bfqq)->rq_pos_tree;
__bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root,
blk_rq_pos(bfqq->next_rq), &parent, &p);
if (!__bfqq) {
@ -2669,7 +2669,7 @@ static struct bfq_queue *bfqq_find_close(struct bfq_data *bfqd,
struct bfq_queue *bfqq,
sector_t sector)
{
struct rb_root *root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree;
struct rb_root *root = &bfqq_group(bfqq)->rq_pos_tree;
struct rb_node *parent, *node;
struct bfq_queue *__bfqq;
@ -5181,7 +5181,7 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
struct request *rq;
struct bfq_queue *in_serv_queue;
bool waiting_rq, idle_timer_disabled;
bool waiting_rq, idle_timer_disabled = false;
spin_lock_irq(&bfqd->lock);
@ -5189,14 +5189,15 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue);
rq = __bfq_dispatch_request(hctx);
idle_timer_disabled =
waiting_rq && !bfq_bfqq_wait_request(in_serv_queue);
if (in_serv_queue == bfqd->in_service_queue) {
idle_timer_disabled =
waiting_rq && !bfq_bfqq_wait_request(in_serv_queue);
}
spin_unlock_irq(&bfqd->lock);
bfq_update_dispatch_stats(hctx->queue, rq, in_serv_queue,
idle_timer_disabled);
bfq_update_dispatch_stats(hctx->queue, rq,
idle_timer_disabled ? in_serv_queue : NULL,
idle_timer_disabled);
return rq;
}

View File

@ -8,7 +8,6 @@
#include <linux/blktrace_api.h>
#include <linux/hrtimer.h>
#include <linux/blk-cgroup.h>
#include "blk-cgroup-rwstat.h"
@ -1051,7 +1050,6 @@ extern struct blkcg_policy blkcg_policy_bfq;
for (parent = NULL; entity ; entity = parent)
#endif /* CONFIG_BFQ_GROUP_IOSCHED */
struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq);
struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity);
unsigned int bfq_tot_busy_queues(struct bfq_data *bfqd);
struct bfq_service_tree *bfq_entity_service_tree(struct bfq_entity *entity);

View File

@ -142,16 +142,6 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd,
#ifdef CONFIG_BFQ_GROUP_IOSCHED
struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
{
struct bfq_entity *group_entity = bfqq->entity.parent;
if (!group_entity)
group_entity = &bfqq->bfqd->root_group->entity;
return container_of(group_entity, struct bfq_group, entity);
}
/*
* Returns true if this budget changes may let next_in_service->parent
* become the next_in_service entity for its parent entity.
@ -230,11 +220,6 @@ static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
#else /* CONFIG_BFQ_GROUP_IOSCHED */
struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
{
return bfqq->bfqd->root_group;
}
static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)
{
return false;
@ -519,7 +504,7 @@ unsigned short bfq_ioprio_to_weight(int ioprio)
static unsigned short bfq_weight_to_ioprio(int weight)
{
return max_t(int, 0,
IOPRIO_NR_LEVELS * BFQ_WEIGHT_CONVERSION_COEFF - weight);
IOPRIO_NR_LEVELS - weight / BFQ_WEIGHT_CONVERSION_COEFF);
}
static void bfq_get_entity(struct bfq_entity *entity)

View File

@ -420,7 +420,6 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
return 0;
}
EXPORT_SYMBOL(bio_integrity_clone);
int bioset_integrity_create(struct bio_set *bs, int pool_size)
{

View File

@ -15,7 +15,6 @@
#include <linux/mempool.h>
#include <linux/workqueue.h>
#include <linux/cgroup.h>
#include <linux/blk-cgroup.h>
#include <linux/highmem.h>
#include <linux/sched/sysctl.h>
#include <linux/blk-crypto.h>
@ -24,6 +23,7 @@
#include <trace/events/block.h>
#include "blk.h"
#include "blk-rq-qos.h"
#include "blk-cgroup.h"
struct bio_alloc_cache {
struct bio *free_list;
@ -249,12 +249,12 @@ static void bio_free(struct bio *bio)
* they must remember to pair any call to bio_init() with bio_uninit()
* when IO has completed, or when the bio is released.
*/
void bio_init(struct bio *bio, struct bio_vec *table,
unsigned short max_vecs)
void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
unsigned short max_vecs, unsigned int opf)
{
bio->bi_next = NULL;
bio->bi_bdev = NULL;
bio->bi_opf = 0;
bio->bi_bdev = bdev;
bio->bi_opf = opf;
bio->bi_flags = 0;
bio->bi_ioprio = 0;
bio->bi_write_hint = 0;
@ -268,6 +268,8 @@ void bio_init(struct bio *bio, struct bio_vec *table,
#ifdef CONFIG_BLK_CGROUP
bio->bi_blkg = NULL;
bio->bi_issue.value = 0;
if (bdev)
bio_associate_blkg(bio);
#ifdef CONFIG_BLK_CGROUP_IOCOST
bio->bi_iocost_cost = 0;
#endif
@ -293,6 +295,8 @@ EXPORT_SYMBOL(bio_init);
/**
* bio_reset - reinitialize a bio
* @bio: bio to reset
* @bdev: block device to use the bio for
* @opf: operation and flags for bio
*
* Description:
* After calling bio_reset(), @bio will be in the same state as a freshly
@ -300,11 +304,15 @@ EXPORT_SYMBOL(bio_init);
* preserved are the ones that are initialized by bio_alloc_bioset(). See
* comment in struct bio.
*/
void bio_reset(struct bio *bio)
void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf)
{
bio_uninit(bio);
memset(bio, 0, BIO_RESET_BYTES);
atomic_set(&bio->__bi_remaining, 1);
bio->bi_bdev = bdev;
if (bio->bi_bdev)
bio_associate_blkg(bio);
bio->bi_opf = opf;
}
EXPORT_SYMBOL(bio_reset);
@ -344,6 +352,20 @@ void bio_chain(struct bio *bio, struct bio *parent)
}
EXPORT_SYMBOL(bio_chain);
struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
unsigned int nr_pages, unsigned int opf, gfp_t gfp)
{
struct bio *new = bio_alloc(bdev, nr_pages, opf, gfp);
if (bio) {
bio_chain(bio, new);
submit_bio(bio);
}
return new;
}
EXPORT_SYMBOL_GPL(blk_next_bio);
static void bio_alloc_rescue(struct work_struct *work)
{
struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
@ -400,8 +422,10 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
/**
* bio_alloc_bioset - allocate a bio for I/O
* @bdev: block device to allocate the bio for (can be %NULL)
* @nr_vecs: number of bvecs to pre-allocate
* @opf: operation and flags for bio
* @gfp_mask: the GFP_* mask given to the slab allocator
* @nr_iovecs: number of iovecs to pre-allocate
* @bs: the bio_set to allocate from.
*
* Allocate a bio from the mempools in @bs.
@ -430,15 +454,16 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
*
* Returns: Pointer to new bio on success, NULL on failure.
*/
struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned short nr_iovecs,
struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
unsigned int opf, gfp_t gfp_mask,
struct bio_set *bs)
{
gfp_t saved_gfp = gfp_mask;
struct bio *bio;
void *p;
/* should not use nobvec bioset for nr_iovecs > 0 */
if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_iovecs > 0))
/* should not use nobvec bioset for nr_vecs > 0 */
if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_vecs > 0))
return NULL;
/*
@ -475,23 +500,23 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned short nr_iovecs,
return NULL;
bio = p + bs->front_pad;
if (nr_iovecs > BIO_INLINE_VECS) {
if (nr_vecs > BIO_INLINE_VECS) {
struct bio_vec *bvl = NULL;
bvl = bvec_alloc(&bs->bvec_pool, &nr_iovecs, gfp_mask);
bvl = bvec_alloc(&bs->bvec_pool, &nr_vecs, gfp_mask);
if (!bvl && gfp_mask != saved_gfp) {
punt_bios_to_rescuer(bs);
gfp_mask = saved_gfp;
bvl = bvec_alloc(&bs->bvec_pool, &nr_iovecs, gfp_mask);
bvl = bvec_alloc(&bs->bvec_pool, &nr_vecs, gfp_mask);
}
if (unlikely(!bvl))
goto err_free;
bio_init(bio, bvl, nr_iovecs);
} else if (nr_iovecs) {
bio_init(bio, bio->bi_inline_vecs, BIO_INLINE_VECS);
bio_init(bio, bdev, bvl, nr_vecs, opf);
} else if (nr_vecs) {
bio_init(bio, bdev, bio->bi_inline_vecs, BIO_INLINE_VECS, opf);
} else {
bio_init(bio, NULL, 0);
bio_init(bio, bdev, NULL, 0, opf);
}
bio->bi_pool = bs;
@ -522,7 +547,8 @@ struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs)
bio = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask);
if (unlikely(!bio))
return NULL;
bio_init(bio, nr_iovecs ? bio->bi_inline_vecs : NULL, nr_iovecs);
bio_init(bio, NULL, nr_iovecs ? bio->bi_inline_vecs : NULL, nr_iovecs,
0);
bio->bi_pool = NULL;
return bio;
}
@ -702,80 +728,84 @@ void bio_put(struct bio *bio)
}
EXPORT_SYMBOL(bio_put);
/**
* __bio_clone_fast - clone a bio that shares the original bio's biovec
* @bio: destination bio
* @bio_src: bio to clone
*
* Clone a &bio. Caller will own the returned bio, but not
* the actual data it points to. Reference count of returned
* bio will be one.
*
* Caller must ensure that @bio_src is not freed before @bio.
*/
void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
{
WARN_ON_ONCE(bio->bi_pool && bio->bi_max_vecs);
/*
* most users will be overriding ->bi_bdev with a new target,
* so we don't set nor calculate new physical/hw segment counts here
*/
bio->bi_bdev = bio_src->bi_bdev;
bio_set_flag(bio, BIO_CLONED);
if (bio_flagged(bio_src, BIO_THROTTLED))
bio_set_flag(bio, BIO_THROTTLED);
if (bio_flagged(bio_src, BIO_REMAPPED))
if (bio->bi_bdev == bio_src->bi_bdev &&
bio_flagged(bio_src, BIO_REMAPPED))
bio_set_flag(bio, BIO_REMAPPED);
bio->bi_opf = bio_src->bi_opf;
bio->bi_ioprio = bio_src->bi_ioprio;
bio->bi_write_hint = bio_src->bi_write_hint;
bio->bi_iter = bio_src->bi_iter;
bio->bi_io_vec = bio_src->bi_io_vec;
bio_clone_blkg_association(bio, bio_src);
blkcg_bio_issue_init(bio);
if (bio_crypt_clone(bio, bio_src, gfp) < 0)
return -ENOMEM;
if (bio_integrity(bio_src) &&
bio_integrity_clone(bio, bio_src, gfp) < 0)
return -ENOMEM;
return 0;
}
EXPORT_SYMBOL(__bio_clone_fast);
/**
* bio_clone_fast - clone a bio that shares the original bio's biovec
* @bio: bio to clone
* @gfp_mask: allocation priority
* @bs: bio_set to allocate from
* bio_alloc_clone - clone a bio that shares the original bio's biovec
* @bdev: block_device to clone onto
* @bio_src: bio to clone from
* @gfp: allocation priority
* @bs: bio_set to allocate from
*
* Like __bio_clone_fast, only also allocates the returned bio
* Allocate a new bio that is a clone of @bio_src. The caller owns the returned
* bio, but not the actual data it points to.
*
* The caller must ensure that the return bio is not freed before @bio_src.
*/
struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
struct bio *bio_alloc_clone(struct block_device *bdev, struct bio *bio_src,
gfp_t gfp, struct bio_set *bs)
{
struct bio *b;
struct bio *bio;
b = bio_alloc_bioset(gfp_mask, 0, bs);
if (!b)
bio = bio_alloc_bioset(bdev, 0, bio_src->bi_opf, gfp, bs);
if (!bio)
return NULL;
__bio_clone_fast(b, bio);
if (__bio_clone(bio, bio_src, gfp) < 0) {
bio_put(bio);
return NULL;
}
bio->bi_io_vec = bio_src->bi_io_vec;
if (bio_crypt_clone(b, bio, gfp_mask) < 0)
goto err_put;
if (bio_integrity(bio) &&
bio_integrity_clone(b, bio, gfp_mask) < 0)
goto err_put;
return b;
err_put:
bio_put(b);
return NULL;
return bio;
}
EXPORT_SYMBOL(bio_clone_fast);
EXPORT_SYMBOL(bio_alloc_clone);
const char *bio_devname(struct bio *bio, char *buf)
/**
* bio_init_clone - clone a bio that shares the original bio's biovec
* @bdev: block_device to clone onto
* @bio: bio to clone into
* @bio_src: bio to clone from
* @gfp: allocation priority
*
* Initialize a new bio in caller provided memory that is a clone of @bio_src.
* The caller owns the returned bio, but not the actual data it points to.
*
* The caller must ensure that @bio_src is not freed before @bio.
*/
int bio_init_clone(struct block_device *bdev, struct bio *bio,
struct bio *bio_src, gfp_t gfp)
{
return bdevname(bio->bi_bdev, buf);
int ret;
bio_init(bio, bdev, bio_src->bi_io_vec, 0, bio_src->bi_opf);
ret = __bio_clone(bio, bio_src, gfp);
if (ret)
bio_uninit(bio);
return ret;
}
EXPORT_SYMBOL(bio_devname);
EXPORT_SYMBOL(bio_init_clone);
/**
* bio_full - check if the bio is full
@ -1054,7 +1084,7 @@ bool bio_add_folio(struct bio *bio, struct folio *folio, size_t len,
size_t off)
{
if (len > UINT_MAX || off > UINT_MAX)
return 0;
return false;
return bio_add_page(bio, &folio->page, len, off) > 0;
}
@ -1541,7 +1571,7 @@ struct bio *bio_split(struct bio *bio, int sectors,
if (WARN_ON_ONCE(bio_op(bio) == REQ_OP_ZONE_APPEND))
return NULL;
split = bio_clone_fast(bio, gfp, bs);
split = bio_alloc_clone(bio->bi_bdev, bio, gfp, bs);
if (!split)
return NULL;
@ -1636,9 +1666,9 @@ EXPORT_SYMBOL(bioset_exit);
* Note that the bio must be embedded at the END of that structure always,
* or things will break badly.
* If %BIOSET_NEED_BVECS is set in @flags, a separate pool will be allocated
* for allocating iovecs. This pool is not needed e.g. for bio_clone_fast().
* If %BIOSET_NEED_RESCUER is set, a workqueue is created which can be used to
* dispatch queued requests when the mempool runs out of space.
* for allocating iovecs. This pool is not needed e.g. for bio_init_clone().
* If %BIOSET_NEED_RESCUER is set, a workqueue is created which can be used
* to dispatch queued requests when the mempool runs out of space.
*
*/
int bioset_init(struct bio_set *bs,
@ -1708,7 +1738,9 @@ EXPORT_SYMBOL(bioset_init_from_src);
/**
* bio_alloc_kiocb - Allocate a bio from bio_set based on kiocb
* @kiocb: kiocb describing the IO
* @bdev: block device to allocate the bio for (can be %NULL)
* @nr_vecs: number of iovecs to pre-allocate
* @opf: operation and flags for bio
* @bs: bio_set to allocate from
*
* Description:
@ -1719,14 +1751,14 @@ EXPORT_SYMBOL(bioset_init_from_src);
* MUST be done from process context, not hard/soft IRQ.
*
*/
struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs,
struct bio_set *bs)
struct bio *bio_alloc_kiocb(struct kiocb *kiocb, struct block_device *bdev,
unsigned short nr_vecs, unsigned int opf, struct bio_set *bs)
{
struct bio_alloc_cache *cache;
struct bio *bio;
if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS)
return bio_alloc_bioset(GFP_KERNEL, nr_vecs, bs);
return bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs);
cache = per_cpu_ptr(bs->cache, get_cpu());
if (cache->free_list) {
@ -1734,13 +1766,14 @@ struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs,
cache->free_list = bio->bi_next;
cache->nr--;
put_cpu();
bio_init(bio, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs);
bio_init(bio, bdev, nr_vecs ? bio->bi_inline_vecs : NULL,
nr_vecs, opf);
bio->bi_pool = bs;
bio_set_flag(bio, BIO_PERCPU_CACHE);
return bio;
}
put_cpu();
bio = bio_alloc_bioset(GFP_KERNEL, nr_vecs, bs);
bio = bio_alloc_bioset(bdev, nr_vecs, opf, GFP_KERNEL, bs);
bio_set_flag(bio, BIO_PERCPU_CACHE);
return bio;
}

View File

@ -6,7 +6,7 @@
#ifndef _BLK_CGROUP_RWSTAT_H
#define _BLK_CGROUP_RWSTAT_H
#include <linux/blk-cgroup.h>
#include "blk-cgroup.h"
enum blkg_rwstat_type {
BLKG_RWSTAT_READ,

View File

@ -23,15 +23,14 @@
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/slab.h>
#include <linux/genhd.h>
#include <linux/delay.h>
#include <linux/atomic.h>
#include <linux/ctype.h>
#include <linux/blk-cgroup.h>
#include <linux/tracehook.h>
#include <linux/psi.h>
#include <linux/part_stat.h>
#include "blk.h"
#include "blk-cgroup.h"
#include "blk-ioprio.h"
#include "blk-throttle.h"
@ -857,11 +856,11 @@ static void blkcg_fill_root_iostats(void)
blk_queue_root_blkg(bdev_get_queue(bdev));
struct blkg_iostat tmp;
int cpu;
unsigned long flags;
memset(&tmp, 0, sizeof(tmp));
for_each_possible_cpu(cpu) {
struct disk_stats *cpu_dkstats;
unsigned long flags;
cpu_dkstats = per_cpu_ptr(bdev->bd_stats, cpu);
tmp.ios[BLKG_IOSTAT_READ] +=
@ -877,11 +876,11 @@ static void blkcg_fill_root_iostats(void)
cpu_dkstats->sectors[STAT_WRITE] << 9;
tmp.bytes[BLKG_IOSTAT_DISCARD] +=
cpu_dkstats->sectors[STAT_DISCARD] << 9;
flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
blkg_iostat_set(&blkg->iostat.cur, &tmp);
u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
}
flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
blkg_iostat_set(&blkg->iostat.cur, &tmp);
u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
}
}
@ -1176,6 +1175,8 @@ int blkcg_init_queue(struct request_queue *q)
bool preloaded;
int ret;
INIT_LIST_HEAD(&q->blkg_list);
new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
if (!new_blkg)
return -ENOMEM;

477
block/blk-cgroup.h Normal file
View File

@ -0,0 +1,477 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BLK_CGROUP_PRIVATE_H
#define _BLK_CGROUP_PRIVATE_H
/*
* block cgroup private header
*
* Based on ideas and code from CFQ, CFS and BFQ:
* Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
*
* Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
* Paolo Valente <paolo.valente@unimore.it>
*
* Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
* Nauman Rafique <nauman@google.com>
*/
#include <linux/blk-cgroup.h>
/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
#define BLKG_STAT_CPU_BATCH (INT_MAX / 2)
#ifdef CONFIG_BLK_CGROUP
/*
* A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
* request_queue (q). This is used by blkcg policies which need to track
* information per blkcg - q pair.
*
* There can be multiple active blkcg policies and each blkg:policy pair is
* represented by a blkg_policy_data which is allocated and freed by each
* policy's pd_alloc/free_fn() methods. A policy can allocate private data
* area by allocating larger data structure which embeds blkg_policy_data
* at the beginning.
*/
struct blkg_policy_data {
/* the blkg and policy id this per-policy data belongs to */
struct blkcg_gq *blkg;
int plid;
};
/*
* Policies that need to keep per-blkcg data which is independent from any
* request_queue associated to it should implement cpd_alloc/free_fn()
* methods. A policy can allocate private data area by allocating larger
* data structure which embeds blkcg_policy_data at the beginning.
* cpd_init() is invoked to let each policy handle per-blkcg data.
*/
struct blkcg_policy_data {
/* the blkcg and policy id this per-policy data belongs to */
struct blkcg *blkcg;
int plid;
};
typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd);
typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd);
typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd);
typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp,
struct request_queue *q, struct blkcg *blkcg);
typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
struct seq_file *s);
struct blkcg_policy {
int plid;
/* cgroup files for the policy */
struct cftype *dfl_cftypes;
struct cftype *legacy_cftypes;
/* operations */
blkcg_pol_alloc_cpd_fn *cpd_alloc_fn;
blkcg_pol_init_cpd_fn *cpd_init_fn;
blkcg_pol_free_cpd_fn *cpd_free_fn;
blkcg_pol_bind_cpd_fn *cpd_bind_fn;
blkcg_pol_alloc_pd_fn *pd_alloc_fn;
blkcg_pol_init_pd_fn *pd_init_fn;
blkcg_pol_online_pd_fn *pd_online_fn;
blkcg_pol_offline_pd_fn *pd_offline_fn;
blkcg_pol_free_pd_fn *pd_free_fn;
blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn;
blkcg_pol_stat_pd_fn *pd_stat_fn;
};
extern struct blkcg blkcg_root;
extern bool blkcg_debug_stats;
struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
struct request_queue *q, bool update_hint);
int blkcg_init_queue(struct request_queue *q);
void blkcg_exit_queue(struct request_queue *q);
/* Blkio controller policy registration */
int blkcg_policy_register(struct blkcg_policy *pol);
void blkcg_policy_unregister(struct blkcg_policy *pol);
int blkcg_activate_policy(struct request_queue *q,
const struct blkcg_policy *pol);
void blkcg_deactivate_policy(struct request_queue *q,
const struct blkcg_policy *pol);
const char *blkg_dev_name(struct blkcg_gq *blkg);
void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
u64 (*prfill)(struct seq_file *,
struct blkg_policy_data *, int),
const struct blkcg_policy *pol, int data,
bool show_total);
u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
struct blkg_conf_ctx {
struct block_device *bdev;
struct blkcg_gq *blkg;
char *body;
};
struct block_device *blkcg_conf_open_bdev(char **inputp);
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
char *input, struct blkg_conf_ctx *ctx);
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
/**
* blkcg_css - find the current css
*
* Find the css associated with either the kthread or the current task.
* This may return a dying css, so it is up to the caller to use tryget logic
* to confirm it is alive and well.
*/
static inline struct cgroup_subsys_state *blkcg_css(void)
{
struct cgroup_subsys_state *css;
css = kthread_blkcg();
if (css)
return css;
return task_css(current, io_cgrp_id);
}
/**
* __bio_blkcg - internal, inconsistent version to get blkcg
*
* DO NOT USE.
* This function is inconsistent and consequently is dangerous to use. The
* first part of the function returns a blkcg where a reference is owned by the
* bio. This means it does not need to be rcu protected as it cannot go away
* with the bio owning a reference to it. However, the latter potentially gets
* it from task_css(). This can race against task migration and the cgroup
* dying. It is also semantically different as it must be called rcu protected
* and is susceptible to failure when trying to get a reference to it.
* Therefore, it is not ok to assume that *_get() will always succeed on the
* blkcg returned here.
*/
static inline struct blkcg *__bio_blkcg(struct bio *bio)
{
if (bio && bio->bi_blkg)
return bio->bi_blkg->blkcg;
return css_to_blkcg(blkcg_css());
}
/**
* bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
* @return: true if this bio needs to be submitted with the root blkg context.
*
* In order to avoid priority inversions we sometimes need to issue a bio as if
* it were attached to the root blkg, and then backcharge to the actual owning
* blkg. The idea is we do bio_blkcg() to look up the actual context for the
* bio and attach the appropriate blkg to the bio. Then we call this helper and
* if it is true run with the root blkg for that queue and then do any
* backcharging to the originating cgroup once the io is complete.
*/
static inline bool bio_issue_as_root_blkg(struct bio *bio)
{
return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0;
}
/**
* __blkg_lookup - internal version of blkg_lookup()
* @blkcg: blkcg of interest
* @q: request_queue of interest
* @update_hint: whether to update lookup hint with the result or not
*
* This is internal version and shouldn't be used by policy
* implementations. Looks up blkgs for the @blkcg - @q pair regardless of
* @q's bypass state. If @update_hint is %true, the caller should be
* holding @q->queue_lock and lookup hint is updated on success.
*/
static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
struct request_queue *q,
bool update_hint)
{
struct blkcg_gq *blkg;
if (blkcg == &blkcg_root)
return q->root_blkg;
blkg = rcu_dereference(blkcg->blkg_hint);
if (blkg && blkg->q == q)
return blkg;
return blkg_lookup_slowpath(blkcg, q, update_hint);
}
/**
* blkg_lookup - lookup blkg for the specified blkcg - q pair
* @blkcg: blkcg of interest
* @q: request_queue of interest
*
* Lookup blkg for the @blkcg - @q pair. This function should be called
* under RCU read lock.
*/
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
struct request_queue *q)
{
WARN_ON_ONCE(!rcu_read_lock_held());
return __blkg_lookup(blkcg, q, false);
}
/**
* blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair
* @q: request_queue of interest
*
* Lookup blkg for @q at the root level. See also blkg_lookup().
*/
static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
{
return q->root_blkg;
}
/**
* blkg_to_pdata - get policy private data
* @blkg: blkg of interest
* @pol: policy of interest
*
* Return pointer to private data associated with the @blkg-@pol pair.
*/
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
struct blkcg_policy *pol)
{
return blkg ? blkg->pd[pol->plid] : NULL;
}
static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
struct blkcg_policy *pol)
{
return blkcg ? blkcg->cpd[pol->plid] : NULL;
}
/**
* pdata_to_blkg - get blkg associated with policy private data
* @pd: policy private data of interest
*
* @pd is policy private data. Determine the blkg it's associated with.
*/
static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
{
return pd ? pd->blkg : NULL;
}
static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
{
return cpd ? cpd->blkcg : NULL;
}
/**
* blkg_path - format cgroup path of blkg
* @blkg: blkg of interest
* @buf: target buffer
* @buflen: target buffer length
*
* Format the path of the cgroup of @blkg into @buf.
*/
static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
{
return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
}
/**
* blkg_get - get a blkg reference
* @blkg: blkg to get
*
* The caller should be holding an existing reference.
*/
static inline void blkg_get(struct blkcg_gq *blkg)
{
percpu_ref_get(&blkg->refcnt);
}
/**
* blkg_tryget - try and get a blkg reference
* @blkg: blkg to get
*
* This is for use when doing an RCU lookup of the blkg. We may be in the midst
* of freeing this blkg, so we can only use it if the refcnt is not zero.
*/
static inline bool blkg_tryget(struct blkcg_gq *blkg)
{
return blkg && percpu_ref_tryget(&blkg->refcnt);
}
/**
* blkg_put - put a blkg reference
* @blkg: blkg to put
*/
static inline void blkg_put(struct blkcg_gq *blkg)
{
percpu_ref_put(&blkg->refcnt);
}
/**
* blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
* @d_blkg: loop cursor pointing to the current descendant
* @pos_css: used for iteration
* @p_blkg: target blkg to walk descendants of
*
* Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU
* read locked. If called under either blkcg or queue lock, the iteration
* is guaranteed to include all and only online blkgs. The caller may
* update @pos_css by calling css_rightmost_descendant() to skip subtree.
* @p_blkg is included in the iteration and the first node to be visited.
*/
#define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \
css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \
if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
(p_blkg)->q, false)))
/**
* blkg_for_each_descendant_post - post-order walk of a blkg's descendants
* @d_blkg: loop cursor pointing to the current descendant
* @pos_css: used for iteration
* @p_blkg: target blkg to walk descendants of
*
* Similar to blkg_for_each_descendant_pre() but performs post-order
* traversal instead. Synchronization rules are the same. @p_blkg is
* included in the iteration and the last node to be visited.
*/
#define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \
css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \
if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
(p_blkg)->q, false)))
bool __blkcg_punt_bio_submit(struct bio *bio);
static inline bool blkcg_punt_bio_submit(struct bio *bio)
{
if (bio->bi_opf & REQ_CGROUP_PUNT)
return __blkcg_punt_bio_submit(bio);
else
return false;
}
static inline void blkcg_bio_issue_init(struct bio *bio)
{
bio_issue_init(&bio->bi_issue, bio_sectors(bio));
}
static inline void blkcg_use_delay(struct blkcg_gq *blkg)
{
if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0))
return;
if (atomic_add_return(1, &blkg->use_delay) == 1)
atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
}
static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
{
int old = atomic_read(&blkg->use_delay);
if (WARN_ON_ONCE(old < 0))
return 0;
if (old == 0)
return 0;
/*
* We do this song and dance because we can race with somebody else
* adding or removing delay. If we just did an atomic_dec we'd end up
* negative and we'd already be in trouble. We need to subtract 1 and
* then check to see if we were the last delay so we can drop the
* congestion count on the cgroup.
*/
while (old) {
int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1);
if (cur == old)
break;
old = cur;
}
if (old == 0)
return 0;
if (old == 1)
atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
return 1;
}
/**
* blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount
* @blkg: target blkg
* @delay: delay duration in nsecs
*
* When enabled with this function, the delay is not decayed and must be
* explicitly cleared with blkcg_clear_delay(). Must not be mixed with
* blkcg_[un]use_delay() and blkcg_add_delay() usages.
*/
static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay)
{
int old = atomic_read(&blkg->use_delay);
/* We only want 1 person setting the congestion count for this blkg. */
if (!old && atomic_cmpxchg(&blkg->use_delay, old, -1) == old)
atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
atomic64_set(&blkg->delay_nsec, delay);
}
/**
* blkcg_clear_delay - Disable allocator delay mechanism
* @blkg: target blkg
*
* Disable use_delay mechanism. See blkcg_set_delay().
*/
static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
{
int old = atomic_read(&blkg->use_delay);
/* We only want 1 person clearing the congestion count for this blkg. */
if (old && atomic_cmpxchg(&blkg->use_delay, old, 0) == old)
atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
}
void blk_cgroup_bio_start(struct bio *bio);
void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
#else /* CONFIG_BLK_CGROUP */
struct blkg_policy_data {
};
struct blkcg_policy_data {
};
struct blkcg_policy {
};
#ifdef CONFIG_BLOCK
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
{ return NULL; }
static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
static inline void blkcg_exit_queue(struct request_queue *q) { }
static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
static inline int blkcg_activate_policy(struct request_queue *q,
const struct blkcg_policy *pol) { return 0; }
static inline void blkcg_deactivate_policy(struct request_queue *q,
const struct blkcg_policy *pol) { }
static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
struct blkcg_policy *pol) { return NULL; }
static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
static inline void blkg_get(struct blkcg_gq *blkg) { }
static inline void blkg_put(struct blkcg_gq *blkg) { }
static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
static inline void blkcg_bio_issue_init(struct bio *bio) { }
static inline void blk_cgroup_bio_start(struct bio *bio) { }
#define blk_queue_for_each_rl(rl, q) \
for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
#endif /* CONFIG_BLOCK */
#endif /* CONFIG_BLK_CGROUP */
#endif /* _BLK_CGROUP_PRIVATE_H */

View File

@ -34,7 +34,6 @@
#include <linux/delay.h>
#include <linux/ratelimit.h>
#include <linux/pm_runtime.h>
#include <linux/blk-cgroup.h>
#include <linux/t10-pi.h>
#include <linux/debugfs.h>
#include <linux/bpf.h>
@ -49,6 +48,7 @@
#include "blk.h"
#include "blk-mq-sched.h"
#include "blk-pm.h"
#include "blk-cgroup.h"
#include "blk-throttle.h"
struct dentry *blk_debugfs_root;
@ -164,6 +164,7 @@ static const struct {
[BLK_STS_RESOURCE] = { -ENOMEM, "kernel resource" },
[BLK_STS_DEV_RESOURCE] = { -EBUSY, "device resource" },
[BLK_STS_AGAIN] = { -EAGAIN, "nonblocking retry" },
[BLK_STS_OFFLINE] = { -ENODEV, "device offline" },
/* device mapper special case, should not leak out: */
[BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" },
@ -469,9 +470,6 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
INIT_WORK(&q->timeout_work, blk_timeout_work);
INIT_LIST_HEAD(&q->icq_list);
#ifdef CONFIG_BLK_CGROUP
INIT_LIST_HEAD(&q->blkg_list);
#endif
kobject_init(&q->kobj, &blk_queue_ktype);
@ -536,17 +534,6 @@ bool blk_get_queue(struct request_queue *q)
}
EXPORT_SYMBOL(blk_get_queue);
static void handle_bad_sector(struct bio *bio, sector_t maxsector)
{
char b[BDEVNAME_SIZE];
pr_info_ratelimited("%s: attempt to access beyond end of device\n"
"%s: rw=%d, want=%llu, limit=%llu\n",
current->comm,
bio_devname(bio, b), bio->bi_opf,
bio_end_sector(bio), maxsector);
}
#ifdef CONFIG_FAIL_MAKE_REQUEST
static DECLARE_FAULT_ATTR(fail_make_request);
@ -576,14 +563,10 @@ late_initcall(fail_make_request_debugfs);
static inline bool bio_check_ro(struct bio *bio)
{
if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) {
char b[BDEVNAME_SIZE];
if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
return false;
WARN_ONCE(1,
"Trying to write to read-only block-device %s (partno %d)\n",
bio_devname(bio, b), bio->bi_bdev->bd_partno);
pr_warn("Trying to write to read-only block-device %pg\n",
bio->bi_bdev);
/* Older lvm-tools actually trigger this */
return false;
}
@ -612,7 +595,11 @@ static inline int bio_check_eod(struct bio *bio)
if (nr_sectors && maxsector &&
(nr_sectors > maxsector ||
bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
handle_bad_sector(bio, maxsector);
pr_info_ratelimited("%s: attempt to access beyond end of device\n"
"%pg: rw=%d, want=%llu, limit=%llu\n",
current->comm,
bio->bi_bdev, bio->bi_opf,
bio_end_sector(bio), maxsector);
return -EIO;
}
return 0;
@ -672,7 +659,123 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
return BLK_STS_OK;
}
noinline_for_stack bool submit_bio_checks(struct bio *bio)
static void __submit_bio(struct bio *bio)
{
struct gendisk *disk = bio->bi_bdev->bd_disk;
if (unlikely(!blk_crypto_bio_prep(&bio)))
return;
if (!disk->fops->submit_bio) {
blk_mq_submit_bio(bio);
} else if (likely(bio_queue_enter(bio) == 0)) {
disk->fops->submit_bio(bio);
blk_queue_exit(disk->queue);
}
}
/*
* The loop in this function may be a bit non-obvious, and so deserves some
* explanation:
*
* - Before entering the loop, bio->bi_next is NULL (as all callers ensure
* that), so we have a list with a single bio.
* - We pretend that we have just taken it off a longer list, so we assign
* bio_list to a pointer to the bio_list_on_stack, thus initialising the
* bio_list of new bios to be added. ->submit_bio() may indeed add some more
* bios through a recursive call to submit_bio_noacct. If it did, we find a
* non-NULL value in bio_list and re-enter the loop from the top.
* - In this case we really did just take the bio of the top of the list (no
* pretending) and so remove it from bio_list, and call into ->submit_bio()
* again.
*
* bio_list_on_stack[0] contains bios submitted by the current ->submit_bio.
* bio_list_on_stack[1] contains bios that were submitted before the current
* ->submit_bio_bio, but that haven't been processed yet.
*/
static void __submit_bio_noacct(struct bio *bio)
{
struct bio_list bio_list_on_stack[2];
BUG_ON(bio->bi_next);
bio_list_init(&bio_list_on_stack[0]);
current->bio_list = bio_list_on_stack;
do {
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
struct bio_list lower, same;
/*
* Create a fresh bio_list for all subordinate requests.
*/
bio_list_on_stack[1] = bio_list_on_stack[0];
bio_list_init(&bio_list_on_stack[0]);
__submit_bio(bio);
/*
* Sort new bios into those for a lower level and those for the
* same level.
*/
bio_list_init(&lower);
bio_list_init(&same);
while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
if (q == bdev_get_queue(bio->bi_bdev))
bio_list_add(&same, bio);
else
bio_list_add(&lower, bio);
/*
* Now assemble so we handle the lowest level first.
*/
bio_list_merge(&bio_list_on_stack[0], &lower);
bio_list_merge(&bio_list_on_stack[0], &same);
bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
} while ((bio = bio_list_pop(&bio_list_on_stack[0])));
current->bio_list = NULL;
}
static void __submit_bio_noacct_mq(struct bio *bio)
{
struct bio_list bio_list[2] = { };
current->bio_list = bio_list;
do {
__submit_bio(bio);
} while ((bio = bio_list_pop(&bio_list[0])));
current->bio_list = NULL;
}
void submit_bio_noacct_nocheck(struct bio *bio)
{
/*
* We only want one ->submit_bio to be active at a time, else stack
* usage with stacked devices could be a problem. Use current->bio_list
* to collect a list of requests submited by a ->submit_bio method while
* it is active, and then process them after it returned.
*/
if (current->bio_list)
bio_list_add(&current->bio_list[0], bio);
else if (!bio->bi_bdev->bd_disk->fops->submit_bio)
__submit_bio_noacct_mq(bio);
else
__submit_bio_noacct(bio);
}
/**
* submit_bio_noacct - re-submit a bio to the block device layer for I/O
* @bio: The bio describing the location in memory and on the device.
*
* This is a version of submit_bio() that shall only be used for I/O that is
* resubmitted to lower level drivers by stacking block drivers. All file
* systems and other upper level users of the block layer should use
* submit_bio() instead.
*/
void submit_bio_noacct(struct bio *bio)
{
struct block_device *bdev = bio->bi_bdev;
struct request_queue *q = bdev_get_queue(bdev);
@ -757,7 +860,7 @@ noinline_for_stack bool submit_bio_checks(struct bio *bio)
}
if (blk_throtl_bio(bio))
return false;
return;
blk_cgroup_bio_start(bio);
blkcg_bio_issue_init(bio);
@ -769,138 +872,14 @@ noinline_for_stack bool submit_bio_checks(struct bio *bio)
*/
bio_set_flag(bio, BIO_TRACE_COMPLETION);
}
return true;
submit_bio_noacct_nocheck(bio);
return;
not_supported:
status = BLK_STS_NOTSUPP;
end_io:
bio->bi_status = status;
bio_endio(bio);
return false;
}
static void __submit_bio_fops(struct gendisk *disk, struct bio *bio)
{
if (blk_crypto_bio_prep(&bio)) {
if (likely(bio_queue_enter(bio) == 0)) {
disk->fops->submit_bio(bio);
blk_queue_exit(disk->queue);
}
}
}
static void __submit_bio(struct bio *bio)
{
struct gendisk *disk = bio->bi_bdev->bd_disk;
if (unlikely(!submit_bio_checks(bio)))
return;
if (!disk->fops->submit_bio)
blk_mq_submit_bio(bio);
else
__submit_bio_fops(disk, bio);
}
/*
* The loop in this function may be a bit non-obvious, and so deserves some
* explanation:
*
* - Before entering the loop, bio->bi_next is NULL (as all callers ensure
* that), so we have a list with a single bio.
* - We pretend that we have just taken it off a longer list, so we assign
* bio_list to a pointer to the bio_list_on_stack, thus initialising the
* bio_list of new bios to be added. ->submit_bio() may indeed add some more
* bios through a recursive call to submit_bio_noacct. If it did, we find a
* non-NULL value in bio_list and re-enter the loop from the top.
* - In this case we really did just take the bio of the top of the list (no
* pretending) and so remove it from bio_list, and call into ->submit_bio()
* again.
*
* bio_list_on_stack[0] contains bios submitted by the current ->submit_bio.
* bio_list_on_stack[1] contains bios that were submitted before the current
* ->submit_bio_bio, but that haven't been processed yet.
*/
static void __submit_bio_noacct(struct bio *bio)
{
struct bio_list bio_list_on_stack[2];
BUG_ON(bio->bi_next);
bio_list_init(&bio_list_on_stack[0]);
current->bio_list = bio_list_on_stack;
do {
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
struct bio_list lower, same;
/*
* Create a fresh bio_list for all subordinate requests.
*/
bio_list_on_stack[1] = bio_list_on_stack[0];
bio_list_init(&bio_list_on_stack[0]);
__submit_bio(bio);
/*
* Sort new bios into those for a lower level and those for the
* same level.
*/
bio_list_init(&lower);
bio_list_init(&same);
while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
if (q == bdev_get_queue(bio->bi_bdev))
bio_list_add(&same, bio);
else
bio_list_add(&lower, bio);
/*
* Now assemble so we handle the lowest level first.
*/
bio_list_merge(&bio_list_on_stack[0], &lower);
bio_list_merge(&bio_list_on_stack[0], &same);
bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
} while ((bio = bio_list_pop(&bio_list_on_stack[0])));
current->bio_list = NULL;
}
static void __submit_bio_noacct_mq(struct bio *bio)
{
struct bio_list bio_list[2] = { };
current->bio_list = bio_list;
do {
__submit_bio(bio);
} while ((bio = bio_list_pop(&bio_list[0])));
current->bio_list = NULL;
}
/**
* submit_bio_noacct - re-submit a bio to the block device layer for I/O
* @bio: The bio describing the location in memory and on the device.
*
* This is a version of submit_bio() that shall only be used for I/O that is
* resubmitted to lower level drivers by stacking block drivers. All file
* systems and other upper level users of the block layer should use
* submit_bio() instead.
*/
void submit_bio_noacct(struct bio *bio)
{
/*
* We only want one ->submit_bio to be active at a time, else stack
* usage with stacked devices could be a problem. Use current->bio_list
* to collect a list of requests submited by a ->submit_bio method while
* it is active, and then process them after it returned.
*/
if (current->bio_list)
bio_list_add(&current->bio_list[0], bio);
else if (!bio->bi_bdev->bd_disk->fops->submit_bio)
__submit_bio_noacct_mq(bio);
else
__submit_bio_noacct(bio);
}
EXPORT_SYMBOL(submit_bio_noacct);
@ -985,8 +964,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
return 0;
if (current->plug)
blk_flush_plug(current->plug, false);
blk_flush_plug(current->plug, false);
if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT))
return 0;
@ -1268,7 +1246,7 @@ struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data,
}
EXPORT_SYMBOL(blk_check_plugged);
void blk_flush_plug(struct blk_plug *plug, bool from_schedule)
void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
{
if (!list_empty(&plug->cb_list))
flush_plug_callbacks(plug, from_schedule);
@ -1297,7 +1275,7 @@ void blk_flush_plug(struct blk_plug *plug, bool from_schedule)
void blk_finish_plug(struct blk_plug *plug)
{
if (plug == current->plug) {
blk_flush_plug(plug, false);
__blk_flush_plug(plug, false);
current->plug = NULL;
}
}

View File

@ -10,7 +10,6 @@
#define pr_fmt(fmt) "blk-crypto-fallback: " fmt
#include <crypto/skcipher.h>
#include <linux/blk-cgroup.h>
#include <linux/blk-crypto.h>
#include <linux/blk-crypto-profile.h>
#include <linux/blkdev.h>
@ -20,6 +19,7 @@
#include <linux/random.h>
#include <linux/scatterlist.h>
#include "blk-cgroup.h"
#include "blk-crypto-internal.h"
static unsigned int num_prealloc_bounce_pg = 32;

View File

@ -11,6 +11,7 @@
/* Represents a crypto mode supported by blk-crypto */
struct blk_crypto_mode {
const char *name; /* name of this mode, shown in sysfs */
const char *cipher_str; /* crypto API name (for fallback case) */
unsigned int keysize; /* key size in bytes */
unsigned int ivsize; /* iv size in bytes */
@ -20,6 +21,10 @@ extern const struct blk_crypto_mode blk_crypto_modes[];
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
int blk_crypto_sysfs_register(struct request_queue *q);
void blk_crypto_sysfs_unregister(struct request_queue *q);
void bio_crypt_dun_increment(u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE],
unsigned int inc);
@ -62,6 +67,13 @@ static inline bool blk_crypto_rq_is_encrypted(struct request *rq)
#else /* CONFIG_BLK_INLINE_ENCRYPTION */
static inline int blk_crypto_sysfs_register(struct request_queue *q)
{
return 0;
}
static inline void blk_crypto_sysfs_unregister(struct request_queue *q) { }
static inline bool bio_crypt_rq_ctx_compatible(struct request *rq,
struct bio *bio)
{

172
block/blk-crypto-sysfs.c Normal file
View File

@ -0,0 +1,172 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2021 Google LLC
*
* sysfs support for blk-crypto. This file contains the code which exports the
* crypto capabilities of devices via /sys/block/$disk/queue/crypto/.
*/
#include <linux/blk-crypto-profile.h>
#include "blk-crypto-internal.h"
struct blk_crypto_kobj {
struct kobject kobj;
struct blk_crypto_profile *profile;
};
struct blk_crypto_attr {
struct attribute attr;
ssize_t (*show)(struct blk_crypto_profile *profile,
struct blk_crypto_attr *attr, char *page);
};
static struct blk_crypto_profile *kobj_to_crypto_profile(struct kobject *kobj)
{
return container_of(kobj, struct blk_crypto_kobj, kobj)->profile;
}
static struct blk_crypto_attr *attr_to_crypto_attr(struct attribute *attr)
{
return container_of(attr, struct blk_crypto_attr, attr);
}
static ssize_t max_dun_bits_show(struct blk_crypto_profile *profile,
struct blk_crypto_attr *attr, char *page)
{
return sysfs_emit(page, "%u\n", 8 * profile->max_dun_bytes_supported);
}
static ssize_t num_keyslots_show(struct blk_crypto_profile *profile,
struct blk_crypto_attr *attr, char *page)
{
return sysfs_emit(page, "%u\n", profile->num_slots);
}
#define BLK_CRYPTO_RO_ATTR(_name) \
static struct blk_crypto_attr _name##_attr = __ATTR_RO(_name)
BLK_CRYPTO_RO_ATTR(max_dun_bits);
BLK_CRYPTO_RO_ATTR(num_keyslots);
static struct attribute *blk_crypto_attrs[] = {
&max_dun_bits_attr.attr,
&num_keyslots_attr.attr,
NULL,
};
static const struct attribute_group blk_crypto_attr_group = {
.attrs = blk_crypto_attrs,
};
/*
* The encryption mode attributes. To avoid hard-coding the list of encryption
* modes, these are initialized at boot time by blk_crypto_sysfs_init().
*/
static struct blk_crypto_attr __blk_crypto_mode_attrs[BLK_ENCRYPTION_MODE_MAX];
static struct attribute *blk_crypto_mode_attrs[BLK_ENCRYPTION_MODE_MAX + 1];
static umode_t blk_crypto_mode_is_visible(struct kobject *kobj,
struct attribute *attr, int n)
{
struct blk_crypto_profile *profile = kobj_to_crypto_profile(kobj);
struct blk_crypto_attr *a = attr_to_crypto_attr(attr);
int mode_num = a - __blk_crypto_mode_attrs;
if (profile->modes_supported[mode_num])
return 0444;
return 0;
}
static ssize_t blk_crypto_mode_show(struct blk_crypto_profile *profile,
struct blk_crypto_attr *attr, char *page)
{
int mode_num = attr - __blk_crypto_mode_attrs;
return sysfs_emit(page, "0x%x\n", profile->modes_supported[mode_num]);
}
static const struct attribute_group blk_crypto_modes_attr_group = {
.name = "modes",
.attrs = blk_crypto_mode_attrs,
.is_visible = blk_crypto_mode_is_visible,
};
static const struct attribute_group *blk_crypto_attr_groups[] = {
&blk_crypto_attr_group,
&blk_crypto_modes_attr_group,
NULL,
};
static ssize_t blk_crypto_attr_show(struct kobject *kobj,
struct attribute *attr, char *page)
{
struct blk_crypto_profile *profile = kobj_to_crypto_profile(kobj);
struct blk_crypto_attr *a = attr_to_crypto_attr(attr);
return a->show(profile, a, page);
}
static const struct sysfs_ops blk_crypto_attr_ops = {
.show = blk_crypto_attr_show,
};
static void blk_crypto_release(struct kobject *kobj)
{
kfree(container_of(kobj, struct blk_crypto_kobj, kobj));
}
static struct kobj_type blk_crypto_ktype = {
.default_groups = blk_crypto_attr_groups,
.sysfs_ops = &blk_crypto_attr_ops,
.release = blk_crypto_release,
};
/*
* If the request_queue has a blk_crypto_profile, create the "crypto"
* subdirectory in sysfs (/sys/block/$disk/queue/crypto/).
*/
int blk_crypto_sysfs_register(struct request_queue *q)
{
struct blk_crypto_kobj *obj;
int err;
if (!q->crypto_profile)
return 0;
obj = kzalloc(sizeof(*obj), GFP_KERNEL);
if (!obj)
return -ENOMEM;
obj->profile = q->crypto_profile;
err = kobject_init_and_add(&obj->kobj, &blk_crypto_ktype, &q->kobj,
"crypto");
if (err) {
kobject_put(&obj->kobj);
return err;
}
q->crypto_kobject = &obj->kobj;
return 0;
}
void blk_crypto_sysfs_unregister(struct request_queue *q)
{
kobject_put(q->crypto_kobject);
}
static int __init blk_crypto_sysfs_init(void)
{
int i;
BUILD_BUG_ON(BLK_ENCRYPTION_MODE_INVALID != 0);
for (i = 1; i < BLK_ENCRYPTION_MODE_MAX; i++) {
struct blk_crypto_attr *attr = &__blk_crypto_mode_attrs[i];
attr->attr.name = blk_crypto_modes[i].name;
attr->attr.mode = 0444;
attr->show = blk_crypto_mode_show;
blk_crypto_mode_attrs[i - 1] = &attr->attr;
}
return 0;
}
subsys_initcall(blk_crypto_sysfs_init);

View File

@ -19,16 +19,19 @@
const struct blk_crypto_mode blk_crypto_modes[] = {
[BLK_ENCRYPTION_MODE_AES_256_XTS] = {
.name = "AES-256-XTS",
.cipher_str = "xts(aes)",
.keysize = 64,
.ivsize = 16,
},
[BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV] = {
.name = "AES-128-CBC-ESSIV",
.cipher_str = "essiv(cbc(aes),sha256)",
.keysize = 16,
.ivsize = 16,
},
[BLK_ENCRYPTION_MODE_ADIANTUM] = {
.name = "Adiantum",
.cipher_str = "adiantum(xchacha12,aes)",
.keysize = 32,
.ivsize = 32,
@ -111,7 +114,6 @@ int __bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask)
*dst->bi_crypt_context = *src->bi_crypt_context;
return 0;
}
EXPORT_SYMBOL_GPL(__bio_crypt_clone);
/* Increments @dun by @inc, treating @dun as a multi-limb integer. */
void bio_crypt_dun_increment(u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE],

View File

@ -460,9 +460,7 @@ int blkdev_issue_flush(struct block_device *bdev)
{
struct bio bio;
bio_init(&bio, NULL, 0);
bio_set_dev(&bio, bdev);
bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
bio_init(&bio, bdev, NULL, 0, REQ_OP_WRITE | REQ_PREFLUSH);
return submit_bio_wait(&bio);
}
EXPORT_SYMBOL(blkdev_issue_flush);

View File

@ -178,12 +178,12 @@
#include <linux/time64.h>
#include <linux/parser.h>
#include <linux/sched/signal.h>
#include <linux/blk-cgroup.h>
#include <asm/local.h>
#include <asm/local64.h>
#include "blk-rq-qos.h"
#include "blk-stat.h"
#include "blk-wbt.h"
#include "blk-cgroup.h"
#ifdef CONFIG_TRACEPOINTS

View File

@ -74,9 +74,9 @@
#include <linux/sched/signal.h>
#include <trace/events/block.h>
#include <linux/blk-mq.h>
#include <linux/blk-cgroup.h>
#include "blk-rq-qos.h"
#include "blk-stat.h"
#include "blk-cgroup.h"
#include "blk.h"
#define DEFAULT_SCALE_COOKIE 1000000U

View File

@ -12,11 +12,11 @@
* Documentation/admin-guide/cgroup-v2.rst.
*/
#include <linux/blk-cgroup.h>
#include <linux/blk-mq.h>
#include <linux/blk_types.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include "blk-cgroup.h"
#include "blk-ioprio.h"
#include "blk-rq-qos.h"

View File

@ -10,19 +10,6 @@
#include "blk.h"
struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp)
{
struct bio *new = bio_alloc(gfp, nr_pages);
if (bio) {
bio_chain(bio, new);
submit_bio(bio);
}
return new;
}
EXPORT_SYMBOL_GPL(blk_next_bio);
int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, int flags,
struct bio **biop)
@ -32,9 +19,6 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
unsigned int op;
sector_t bs_mask, part_offset = 0;
if (!q)
return -ENXIO;
if (bdev_read_only(bdev))
return -EPERM;
@ -95,11 +79,8 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
WARN_ON_ONCE((req_sects << 9) > UINT_MAX);
bio = blk_next_bio(bio, 0, gfp_mask);
bio = blk_next_bio(bio, bdev, 0, op, gfp_mask);
bio->bi_iter.bi_sector = sector;
bio_set_dev(bio, bdev);
bio_set_op_attrs(bio, op, 0);
bio->bi_iter.bi_size = req_sects << 9;
sector += req_sects;
nr_sects -= req_sects;
@ -172,9 +153,6 @@ static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
struct bio *bio = *biop;
sector_t bs_mask;
if (!q)
return -ENXIO;
if (bdev_read_only(bdev))
return -EPERM;
@ -189,14 +167,12 @@ static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
max_write_same_sectors = bio_allowed_max_sectors(q);
while (nr_sects) {
bio = blk_next_bio(bio, 1, gfp_mask);
bio = blk_next_bio(bio, bdev, 1, REQ_OP_WRITE_SAME, gfp_mask);
bio->bi_iter.bi_sector = sector;
bio_set_dev(bio, bdev);
bio->bi_vcnt = 1;
bio->bi_io_vec->bv_page = page;
bio->bi_io_vec->bv_offset = 0;
bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0);
if (nr_sects > max_write_same_sectors) {
bio->bi_iter.bi_size = max_write_same_sectors << 9;
@ -250,10 +226,6 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
{
struct bio *bio = *biop;
unsigned int max_write_zeroes_sectors;
struct request_queue *q = bdev_get_queue(bdev);
if (!q)
return -ENXIO;
if (bdev_read_only(bdev))
return -EPERM;
@ -265,10 +237,8 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
return -EOPNOTSUPP;
while (nr_sects) {
bio = blk_next_bio(bio, 0, gfp_mask);
bio = blk_next_bio(bio, bdev, 0, REQ_OP_WRITE_ZEROES, gfp_mask);
bio->bi_iter.bi_sector = sector;
bio_set_dev(bio, bdev);
bio->bi_opf = REQ_OP_WRITE_ZEROES;
if (flags & BLKDEV_ZERO_NOUNMAP)
bio->bi_opf |= REQ_NOUNMAP;
@ -304,23 +274,17 @@ static int __blkdev_issue_zero_pages(struct block_device *bdev,
sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
struct bio **biop)
{
struct request_queue *q = bdev_get_queue(bdev);
struct bio *bio = *biop;
int bi_size = 0;
unsigned int sz;
if (!q)
return -ENXIO;
if (bdev_read_only(bdev))
return -EPERM;
while (nr_sects != 0) {
bio = blk_next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
gfp_mask);
bio = blk_next_bio(bio, bdev, __blkdev_sectors_to_bio_pages(nr_sects),
REQ_OP_WRITE, gfp_mask);
bio->bi_iter.bi_sector = sector;
bio_set_dev(bio, bdev);
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
while (nr_sects != 0) {
sz = min((sector_t) PAGE_SIZE, nr_sects << 9);

View File

@ -368,8 +368,6 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio,
trace_block_split(split, (*bio)->bi_iter.bi_sector);
submit_bio_noacct(*bio);
*bio = split;
blk_throtl_charge_bio_split(*bio);
}
}

View File

@ -107,7 +107,7 @@ static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
return BLK_MQ_NO_TAG;
if (data->shallow_depth)
return __sbitmap_queue_get_shallow(bt, data->shallow_depth);
return sbitmap_queue_get_shallow(bt, data->shallow_depth);
else
return __sbitmap_queue_get(bt);
}

View File

@ -793,8 +793,10 @@ bool blk_update_request(struct request *req, blk_status_t error,
#endif
if (unlikely(error && !blk_rq_is_passthrough(req) &&
!(req->rq_flags & RQF_QUIET)))
!(req->rq_flags & RQF_QUIET))) {
blk_print_req_error(req, error);
trace_block_rq_error(req, error, nr_bytes);
}
blk_account_io_completion(req, nr_bytes);
@ -2181,6 +2183,14 @@ void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs)
queue_for_each_hw_ctx(q, hctx, i) {
if (blk_mq_hctx_stopped(hctx))
continue;
/*
* If there is already a run_work pending, leave the
* pending delay untouched. Otherwise, a hctx can stall
* if another hctx is re-delaying the other's work
* before the work executes.
*/
if (delayed_work_pending(&hctx->run_work))
continue;
/*
* Dispatch from this hctx either if there's no hctx preferred
* by IO scheduler or if it has requests that bypass the
@ -2790,9 +2800,6 @@ void blk_mq_submit_bio(struct bio *bio)
unsigned int nr_segs = 1;
blk_status_t ret;
if (unlikely(!blk_crypto_bio_prep(&bio)))
return;
blk_queue_bounce(q, &bio);
if (blk_may_split(q, bio))
__blk_queue_split(q, &bio, &nr_segs);
@ -2842,27 +2849,16 @@ void blk_mq_submit_bio(struct bio *bio)
blk_mq_try_issue_directly(rq->mq_hctx, rq));
}
#ifdef CONFIG_BLK_MQ_STACKING
/**
* blk_cloned_rq_check_limits - Helper function to check a cloned request
* for the new queue limits
* @q: the queue
* @rq: the request being checked
*
* Description:
* @rq may have been made based on weaker limitations of upper-level queues
* in request stacking drivers, and it may violate the limitation of @q.
* Since the block layer and the underlying device driver trust @rq
* after it is inserted to @q, it should be checked against @q before
* the insertion using this generic function.
*
* Request stacking drivers like request-based dm may change the queue
* limits when retrying requests on other queues. Those requests need
* to be checked against the new queue limits again during dispatch.
* blk_insert_cloned_request - Helper for stacking drivers to submit a request
* @rq: the request being queued
*/
static blk_status_t blk_cloned_rq_check_limits(struct request_queue *q,
struct request *rq)
blk_status_t blk_insert_cloned_request(struct request *rq)
{
struct request_queue *q = rq->q;
unsigned int max_sectors = blk_queue_get_max_sectors(q, req_op(rq));
blk_status_t ret;
if (blk_rq_sectors(rq) > max_sectors) {
/*
@ -2894,24 +2890,7 @@ static blk_status_t blk_cloned_rq_check_limits(struct request_queue *q,
return BLK_STS_IOERR;
}
return BLK_STS_OK;
}
/**
* blk_insert_cloned_request - Helper for stacking drivers to submit a request
* @q: the queue to submit the request
* @rq: the request being queued
*/
blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
{
blk_status_t ret;
ret = blk_cloned_rq_check_limits(q, rq);
if (ret != BLK_STS_OK)
return ret;
if (rq->q->disk &&
should_fail_request(rq->q->disk->part0, blk_rq_bytes(rq)))
if (q->disk && should_fail_request(q->disk->part0, blk_rq_bytes(rq)))
return BLK_STS_IOERR;
if (blk_crypto_insert_cloned_request(rq))
@ -2924,7 +2903,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
* bypass a potential scheduler on the bottom device for
* insert.
*/
blk_mq_run_dispatch_ops(rq->q,
blk_mq_run_dispatch_ops(q,
ret = blk_mq_request_issue_directly(rq, true));
if (ret)
blk_account_io_done(rq, ktime_get_ns());
@ -2979,10 +2958,10 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
bs = &fs_bio_set;
__rq_for_each_bio(bio_src, rq_src) {
bio = bio_clone_fast(bio_src, gfp_mask, bs);
bio = bio_alloc_clone(rq->q->disk->part0, bio_src, gfp_mask,
bs);
if (!bio)
goto free_and_out;
bio->bi_bdev = rq->q->disk->part0;
if (bio_ctr && bio_ctr(bio, bio_src, data))
goto free_and_out;
@ -3019,6 +2998,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
return -ENOMEM;
}
EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
#endif /* CONFIG_BLK_MQ_STACKING */
/*
* Steal bios from a request and add them to a bio list.

View File

@ -10,7 +10,6 @@
#include <linux/backing-dev.h>
#include <linux/blktrace_api.h>
#include <linux/blk-mq.h>
#include <linux/blk-cgroup.h>
#include <linux/debugfs.h>
#include "blk.h"
@ -18,6 +17,7 @@
#include "blk-mq-debugfs.h"
#include "blk-mq-sched.h"
#include "blk-wbt.h"
#include "blk-cgroup.h"
#include "blk-throttle.h"
struct queue_sysfs_entry {
@ -880,6 +880,10 @@ int blk_register_queue(struct gendisk *disk)
goto put_dev;
}
ret = blk_crypto_sysfs_register(q);
if (ret)
goto put_dev;
blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
wbt_enable_default(q);
blk_throtl_register_queue(q);
@ -910,6 +914,7 @@ int blk_register_queue(struct gendisk *disk)
return ret;
put_dev:
elv_unregister_queue(q);
disk_unregister_independent_access_ranges(disk);
mutex_unlock(&q->sysfs_lock);
mutex_unlock(&q->sysfs_dir_lock);
@ -954,16 +959,18 @@ void blk_unregister_queue(struct gendisk *disk)
*/
if (queue_is_mq(q))
blk_mq_unregister_dev(disk_to_dev(disk), q);
kobject_uevent(&q->kobj, KOBJ_REMOVE);
kobject_del(&q->kobj);
blk_crypto_sysfs_unregister(q);
blk_trace_remove_sysfs(disk_to_dev(disk));
mutex_lock(&q->sysfs_lock);
if (q->elevator)
elv_unregister_queue(q);
elv_unregister_queue(q);
disk_unregister_independent_access_ranges(disk);
mutex_unlock(&q->sysfs_lock);
/* Now that we've deleted all child objects, we can delete the queue. */
kobject_uevent(&q->kobj, KOBJ_REMOVE);
kobject_del(&q->kobj);
mutex_unlock(&q->sysfs_dir_lock);
kobject_put(&disk_to_dev(disk)->kobj);

View File

@ -10,7 +10,6 @@
#include <linux/blkdev.h>
#include <linux/bio.h>
#include <linux/blktrace_api.h>
#include <linux/blk-cgroup.h>
#include "blk.h"
#include "blk-cgroup-rwstat.h"
#include "blk-stat.h"
@ -42,11 +41,6 @@
/* A workqueue to queue throttle related work */
static struct workqueue_struct *kthrotld_workqueue;
enum tg_state_flags {
THROTL_TG_PENDING = 1 << 0, /* on parent's pending tree */
THROTL_TG_WAS_EMPTY = 1 << 1, /* bio_lists[] became non-empty */
};
#define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node)
/* We measure latency for request size from <= 4k to >= 1M */
@ -426,12 +420,24 @@ static void tg_update_has_rules(struct throtl_grp *tg)
struct throtl_grp *parent_tg = sq_to_tg(tg->service_queue.parent_sq);
struct throtl_data *td = tg->td;
int rw;
int has_iops_limit = 0;
for (rw = READ; rw <= WRITE; rw++) {
unsigned int iops_limit = tg_iops_limit(tg, rw);
for (rw = READ; rw <= WRITE; rw++)
tg->has_rules[rw] = (parent_tg && parent_tg->has_rules[rw]) ||
(td->limit_valid[td->limit_index] &&
(tg_bps_limit(tg, rw) != U64_MAX ||
tg_iops_limit(tg, rw) != UINT_MAX));
iops_limit != UINT_MAX));
if (iops_limit != UINT_MAX)
has_iops_limit = 1;
}
if (has_iops_limit)
tg->flags |= THROTL_TG_HAS_IOPS_LIMIT;
else
tg->flags &= ~THROTL_TG_HAS_IOPS_LIMIT;
}
static void throtl_pd_online(struct blkg_policy_data *pd)
@ -634,8 +640,6 @@ static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg,
tg->bytes_disp[rw] = 0;
tg->io_disp[rw] = 0;
atomic_set(&tg->io_split_cnt[rw], 0);
/*
* Previous slice has expired. We must have trimmed it after last
* bio dispatch. That means since start of last slice, we never used
@ -659,8 +663,6 @@ static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw)
tg->slice_start[rw] = jiffies;
tg->slice_end[rw] = jiffies + tg->td->throtl_slice;
atomic_set(&tg->io_split_cnt[rw], 0);
throtl_log(&tg->service_queue,
"[%c] new slice start=%lu end=%lu jiffies=%lu",
rw == READ ? 'R' : 'W', tg->slice_start[rw],
@ -808,7 +810,8 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
unsigned int bio_size = throtl_bio_data_size(bio);
if (bps_limit == U64_MAX) {
/* no need to throttle if this bio's bytes have been accounted */
if (bps_limit == U64_MAX || bio_flagged(bio, BIO_THROTTLED)) {
if (wait)
*wait = 0;
return true;
@ -893,9 +896,6 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
jiffies + tg->td->throtl_slice);
}
if (iops_limit != UINT_MAX)
tg->io_disp[rw] += atomic_xchg(&tg->io_split_cnt[rw], 0);
if (tg_with_in_bps_limit(tg, bio, bps_limit, &bps_wait) &&
tg_with_in_iops_limit(tg, bio, iops_limit, &iops_wait)) {
if (wait)
@ -920,9 +920,12 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
unsigned int bio_size = throtl_bio_data_size(bio);
/* Charge the bio to the group */
tg->bytes_disp[rw] += bio_size;
if (!bio_flagged(bio, BIO_THROTTLED)) {
tg->bytes_disp[rw] += bio_size;
tg->last_bytes_disp[rw] += bio_size;
}
tg->io_disp[rw]++;
tg->last_bytes_disp[rw] += bio_size;
tg->last_io_disp[rw]++;
/*
@ -1219,7 +1222,7 @@ static void blk_throtl_dispatch_work_fn(struct work_struct *work)
if (!bio_list_empty(&bio_list_on_stack)) {
blk_start_plug(&plug);
while ((bio = bio_list_pop(&bio_list_on_stack)))
submit_bio_noacct(bio);
submit_bio_noacct_nocheck(bio);
blk_finish_plug(&plug);
}
}
@ -1917,14 +1920,12 @@ static void throtl_downgrade_check(struct throtl_grp *tg)
}
if (tg->iops[READ][LIMIT_LOW]) {
tg->last_io_disp[READ] += atomic_xchg(&tg->last_io_split_cnt[READ], 0);
iops = tg->last_io_disp[READ] * HZ / elapsed_time;
if (iops >= tg->iops[READ][LIMIT_LOW])
tg->last_low_overflow_time[READ] = now;
}
if (tg->iops[WRITE][LIMIT_LOW]) {
tg->last_io_disp[WRITE] += atomic_xchg(&tg->last_io_split_cnt[WRITE], 0);
iops = tg->last_io_disp[WRITE] * HZ / elapsed_time;
if (iops >= tg->iops[WRITE][LIMIT_LOW])
tg->last_low_overflow_time[WRITE] = now;
@ -2043,25 +2044,6 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
}
#endif
void blk_throtl_charge_bio_split(struct bio *bio)
{
struct blkcg_gq *blkg = bio->bi_blkg;
struct throtl_grp *parent = blkg_to_tg(blkg);
struct throtl_service_queue *parent_sq;
bool rw = bio_data_dir(bio);
do {
if (!parent->has_rules[rw])
break;
atomic_inc(&parent->io_split_cnt[rw]);
atomic_inc(&parent->last_io_split_cnt[rw]);
parent_sq = parent->service_queue.parent_sq;
parent = sq_to_tg(parent_sq);
} while (parent);
}
bool __blk_throtl_bio(struct bio *bio)
{
struct request_queue *q = bdev_get_queue(bio->bi_bdev);

View File

@ -52,6 +52,12 @@ struct throtl_service_queue {
struct timer_list pending_timer; /* fires on first_pending_disptime */
};
enum tg_state_flags {
THROTL_TG_PENDING = 1 << 0, /* on parent's pending tree */
THROTL_TG_WAS_EMPTY = 1 << 1, /* bio_lists[] became non-empty */
THROTL_TG_HAS_IOPS_LIMIT = 1 << 2, /* tg has iops limit */
};
enum {
LIMIT_LOW,
LIMIT_MAX,
@ -132,9 +138,6 @@ struct throtl_grp {
unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
unsigned long bio_cnt_reset_time;
atomic_t io_split_cnt[2];
atomic_t last_io_split_cnt[2];
struct blkg_rwstat stat_bytes;
struct blkg_rwstat stat_ios;
};
@ -158,20 +161,21 @@ static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg)
static inline int blk_throtl_init(struct request_queue *q) { return 0; }
static inline void blk_throtl_exit(struct request_queue *q) { }
static inline void blk_throtl_register_queue(struct request_queue *q) { }
static inline void blk_throtl_charge_bio_split(struct bio *bio) { }
static inline bool blk_throtl_bio(struct bio *bio) { return false; }
#else /* CONFIG_BLK_DEV_THROTTLING */
int blk_throtl_init(struct request_queue *q);
void blk_throtl_exit(struct request_queue *q);
void blk_throtl_register_queue(struct request_queue *q);
void blk_throtl_charge_bio_split(struct bio *bio);
bool __blk_throtl_bio(struct bio *bio);
static inline bool blk_throtl_bio(struct bio *bio)
{
struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg);
if (bio_flagged(bio, BIO_THROTTLED))
/* no need to throttle bps any more if the bio has been throttled */
if (bio_flagged(bio, BIO_THROTTLED) &&
!(tg->flags & THROTL_TG_HAS_IOPS_LIMIT))
return false;
if (!tg->has_rules[bio_data_dir(bio)])
return false;

View File

@ -215,9 +215,8 @@ static int blkdev_zone_reset_all_emulated(struct block_device *bdev,
continue;
}
bio = blk_next_bio(bio, 0, gfp_mask);
bio_set_dev(bio, bdev);
bio->bi_opf = REQ_OP_ZONE_RESET | REQ_SYNC;
bio = blk_next_bio(bio, bdev, 0, REQ_OP_ZONE_RESET | REQ_SYNC,
gfp_mask);
bio->bi_iter.bi_sector = sector;
sector += zone_sectors;
@ -239,10 +238,7 @@ static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask)
{
struct bio bio;
bio_init(&bio, NULL, 0);
bio_set_dev(&bio, bdev);
bio.bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC;
bio_init(&bio, bdev, NULL, 0, REQ_OP_ZONE_RESET_ALL | REQ_SYNC);
return submit_bio_wait(&bio);
}
@ -306,9 +302,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
}
while (sector < end_sector) {
bio = blk_next_bio(bio, 0, gfp_mask);
bio_set_dev(bio, bdev);
bio->bi_opf = op | REQ_SYNC;
bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, gfp_mask);
bio->bi_iter.bi_sector = sector;
sector += zone_sectors;

View File

@ -46,7 +46,7 @@ void blk_freeze_queue(struct request_queue *q);
void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic);
void blk_queue_start_drain(struct request_queue *q);
int __bio_queue_enter(struct request_queue *q, struct bio *bio);
bool submit_bio_checks(struct bio *bio);
void submit_bio_noacct_nocheck(struct bio *bio);
static inline bool blk_try_enter_queue(struct request_queue *q, bool pm)
{
@ -406,8 +406,6 @@ extern int blk_iolatency_init(struct request_queue *q);
static inline int blk_iolatency_init(struct request_queue *q) { return 0; }
#endif
struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp);
#ifdef CONFIG_BLK_DEV_ZONED
void blk_queue_free_zone_bitmaps(struct request_queue *q);
void blk_queue_clear_zone_settings(struct request_queue *q);
@ -426,6 +424,7 @@ int bdev_add_partition(struct gendisk *disk, int partno, sector_t start,
int bdev_del_partition(struct gendisk *disk, int partno);
int bdev_resize_partition(struct gendisk *disk, int partno, sector_t start,
sector_t length);
void blk_drop_partitions(struct gendisk *disk);
int bio_add_hw_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset,
@ -445,6 +444,9 @@ int disk_alloc_events(struct gendisk *disk);
void disk_add_events(struct gendisk *disk);
void disk_del_events(struct gendisk *disk);
void disk_release_events(struct gendisk *disk);
void disk_block_events(struct gendisk *disk);
void disk_unblock_events(struct gendisk *disk);
void disk_flush_events(struct gendisk *disk, unsigned int mask);
extern struct device_attribute dev_attr_events;
extern struct device_attribute dev_attr_events_async;
extern struct device_attribute dev_attr_events_poll_msecs;

View File

@ -14,7 +14,6 @@
#include <linux/pagemap.h>
#include <linux/mempool.h>
#include <linux/blkdev.h>
#include <linux/blk-cgroup.h>
#include <linux/backing-dev.h>
#include <linux/init.h>
#include <linux/hash.h>
@ -24,6 +23,7 @@
#include <trace/events/block.h>
#include "blk.h"
#include "blk-cgroup.h"
#define POOL_SIZE 64
#define ISA_POOL_SIZE 16
@ -162,15 +162,12 @@ static struct bio *bounce_clone_bio(struct bio *bio_src)
* that does not own the bio - reason being drivers don't use it for
* iterating over the biovec anymore, so expecting it to be kept up
* to date (i.e. for clones that share the parent biovec) is just
* asking for trouble and would force extra work on
* __bio_clone_fast() anyways.
* asking for trouble and would force extra work.
*/
bio = bio_alloc_bioset(GFP_NOIO, bio_segments(bio_src),
&bounce_bio_set);
bio->bi_bdev = bio_src->bi_bdev;
bio = bio_alloc_bioset(bio_src->bi_bdev, bio_segments(bio_src),
bio_src->bi_opf, GFP_NOIO, &bounce_bio_set);
if (bio_flagged(bio_src, BIO_REMAPPED))
bio_set_flag(bio, BIO_REMAPPED);
bio->bi_opf = bio_src->bi_opf;
bio->bi_ioprio = bio_src->bi_ioprio;
bio->bi_write_hint = bio_src->bi_write_hint;
bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;

View File

@ -4,7 +4,7 @@
*/
#include <linux/export.h>
#include <linux/moduleparam.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>
#include "blk.h"
struct disk_events {

View File

@ -35,7 +35,6 @@
#include <linux/hash.h>
#include <linux/uaccess.h>
#include <linux/pm_runtime.h>
#include <linux/blk-cgroup.h>
#include <trace/events/block.h>
@ -44,6 +43,7 @@
#include "blk-mq-sched.h"
#include "blk-pm.h"
#include "blk-wbt.h"
#include "blk-cgroup.h"
static DEFINE_SPINLOCK(elv_list_lock);
static LIST_HEAD(elv_list);
@ -516,9 +516,11 @@ int elv_register_queue(struct request_queue *q, bool uevent)
void elv_unregister_queue(struct request_queue *q)
{
struct elevator_queue *e = q->elevator;
lockdep_assert_held(&q->sysfs_lock);
if (q) {
if (e && e->registered) {
struct elevator_queue *e = q->elevator;
kobject_uevent(&e->kobj, KOBJ_REMOVE);
@ -591,9 +593,7 @@ int elevator_switch_mq(struct request_queue *q,
lockdep_assert_held(&q->sysfs_lock);
if (q->elevator) {
if (q->elevator->registered)
elv_unregister_queue(q);
elv_unregister_queue(q);
ioc_clear_queue(q);
blk_mq_sched_free_rqs(q);
elevator_exit(q);

View File

@ -75,8 +75,13 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
return -ENOMEM;
}
bio_init(&bio, vecs, nr_pages);
bio_set_dev(&bio, bdev);
if (iov_iter_rw(iter) == READ) {
bio_init(&bio, bdev, vecs, nr_pages, REQ_OP_READ);
if (iter_is_iovec(iter))
should_dirty = true;
} else {
bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
}
bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
bio.bi_write_hint = iocb->ki_hint;
bio.bi_private = current;
@ -88,14 +93,9 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
goto out;
ret = bio.bi_iter.bi_size;
if (iov_iter_rw(iter) == READ) {
bio.bi_opf = REQ_OP_READ;
if (iter_is_iovec(iter))
should_dirty = true;
} else {
bio.bi_opf = dio_bio_write_op(iocb);
if (iov_iter_rw(iter) == WRITE)
task_io_account_write(ret);
}
if (iocb->ki_flags & IOCB_NOWAIT)
bio.bi_opf |= REQ_NOWAIT;
if (iocb->ki_flags & IOCB_HIPRI)
@ -190,6 +190,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
struct blkdev_dio *dio;
struct bio *bio;
bool is_read = (iov_iter_rw(iter) == READ), is_sync;
unsigned int opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
loff_t pos = iocb->ki_pos;
int ret = 0;
@ -197,7 +198,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
(bdev_logical_block_size(bdev) - 1))
return -EINVAL;
bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool);
bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool);
dio = container_of(bio, struct blkdev_dio, bio);
atomic_set(&dio->ref, 1);
@ -223,7 +224,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
blk_start_plug(&plug);
for (;;) {
bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
bio->bi_write_hint = iocb->ki_hint;
bio->bi_private = dio;
@ -238,11 +238,9 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
}
if (is_read) {
bio->bi_opf = REQ_OP_READ;
if (dio->flags & DIO_SHOULD_DIRTY)
bio_set_pages_dirty(bio);
} else {
bio->bi_opf = dio_bio_write_op(iocb);
task_io_account_write(bio->bi_iter.bi_size);
}
if (iocb->ki_flags & IOCB_NOWAIT)
@ -258,7 +256,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
}
atomic_inc(&dio->ref);
submit_bio(bio);
bio = bio_alloc(GFP_KERNEL, nr_pages);
bio = bio_alloc(bdev, nr_pages, opf, GFP_KERNEL);
}
blk_finish_plug(&plug);
@ -313,6 +311,8 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
unsigned int nr_pages)
{
struct block_device *bdev = iocb->ki_filp->private_data;
bool is_read = iov_iter_rw(iter) == READ;
unsigned int opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
struct blkdev_dio *dio;
struct bio *bio;
loff_t pos = iocb->ki_pos;
@ -322,11 +322,10 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
(bdev_logical_block_size(bdev) - 1))
return -EINVAL;
bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool);
bio = bio_alloc_kiocb(iocb, bdev, nr_pages, opf, &blkdev_dio_pool);
dio = container_of(bio, struct blkdev_dio, bio);
dio->flags = 0;
dio->iocb = iocb;
bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
bio->bi_write_hint = iocb->ki_hint;
bio->bi_end_io = blkdev_bio_end_io_async;
@ -349,14 +348,12 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
}
dio->size = bio->bi_iter.bi_size;
if (iov_iter_rw(iter) == READ) {
bio->bi_opf = REQ_OP_READ;
if (is_read) {
if (iter_is_iovec(iter)) {
dio->flags |= DIO_SHOULD_DIRTY;
bio_set_pages_dirty(bio);
}
} else {
bio->bi_opf = dio_bio_write_op(iocb);
task_io_account_write(bio->bi_iter.bi_size);
}

View File

@ -8,7 +8,6 @@
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/fs.h>
#include <linux/genhd.h>
#include <linux/kdev_t.h>
#include <linux/kernel.h>
#include <linux/blkdev.h>
@ -185,7 +184,9 @@ static struct blk_major_name {
struct blk_major_name *next;
int major;
char name[16];
#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
void (*probe)(dev_t devt);
#endif
} *major_names[BLKDEV_MAJOR_HASH_SIZE];
static DEFINE_MUTEX(major_names_lock);
static DEFINE_SPINLOCK(major_names_spinlock);
@ -275,7 +276,9 @@ int __register_blkdev(unsigned int major, const char *name,
}
p->major = major;
#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
p->probe = probe;
#endif
strlcpy(p->name, name, sizeof(p->name));
p->next = NULL;
index = major_to_index(major);
@ -523,6 +526,7 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
disk_update_readahead(disk);
disk_add_events(disk);
set_bit(GD_ADDED, &disk->state);
return 0;
out_unregister_bdi:
@ -693,6 +697,7 @@ static ssize_t disk_badblocks_store(struct device *dev,
return badblocks_store(disk->bb, page, len, 0);
}
#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
void blk_request_module(dev_t devt)
{
unsigned int major = MAJOR(devt);
@ -712,6 +717,7 @@ void blk_request_module(dev_t devt)
/* Make old-style 2.4 aliases work */
request_module("block-major-%d", MAJOR(devt));
}
#endif /* CONFIG_BLOCK_LEGACY_AUTOLOAD */
/*
* print a full list of all partitions - intended for places where the root
@ -927,12 +933,17 @@ ssize_t part_stat_show(struct device *dev,
struct disk_stats stat;
unsigned int inflight;
part_stat_read_all(bdev, &stat);
if (queue_is_mq(q))
inflight = blk_mq_in_flight(q, bdev);
else
inflight = part_in_flight(bdev);
if (inflight) {
part_stat_lock();
update_io_ticks(bdev, jiffies, true);
part_stat_unlock();
}
part_stat_read_all(bdev, &stat);
return sprintf(buf,
"%8lu %8lu %8llu %8u "
"%8lu %8lu %8llu %8u "
@ -1128,6 +1139,10 @@ static void disk_release(struct device *dev)
xa_destroy(&disk->part_tbl);
disk->queue->disk = NULL;
blk_put_queue(disk->queue);
if (test_bit(GD_ADDED, &disk->state) && disk->fops->free_disk)
disk->fops->free_disk(disk);
iput(disk->part0->bd_inode); /* frees the disk */
}
@ -1188,12 +1203,17 @@ static int diskstats_show(struct seq_file *seqf, void *v)
xa_for_each(&gp->part_tbl, idx, hd) {
if (bdev_is_partition(hd) && !bdev_nr_sectors(hd))
continue;
part_stat_read_all(hd, &stat);
if (queue_is_mq(gp->queue))
inflight = blk_mq_in_flight(gp->queue, hd);
else
inflight = part_in_flight(hd);
if (inflight) {
part_stat_lock();
update_io_ticks(hd, jiffies, true);
part_stat_unlock();
}
part_stat_read_all(hd, &stat);
seq_printf(seqf, "%4d %7d %pg "
"%lu %lu %lu %u "
"%lu %lu %lu %u "

View File

@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/genhd.h>
#include <linux/blkdev.h>
#include <linux/slab.h>
struct bd_holder_disk {

View File

@ -1,7 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/pagemap.h>
#include <linux/blkdev.h>
#include <linux/genhd.h>
#include "../blk.h"
/*

View File

@ -8,7 +8,6 @@
#include <linux/major.h>
#include <linux/slab.h>
#include <linux/ctype.h>
#include <linux/genhd.h>
#include <linux/vmalloc.h>
#include <linux/blktrace_api.h>
#include <linux/raid/detect.h>

View File

@ -13,7 +13,6 @@
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/genhd.h>
#include <linux/kernel.h>
#include <linux/major.h>
#include <linux/string.h>

View File

@ -14,7 +14,6 @@
#include <linux/types.h>
#include <linux/list.h>
#include <linux/genhd.h>
#include <linux/fs.h>
#include <asm/unaligned.h>
#include <asm/byteorder.h>

View File

@ -13,7 +13,7 @@
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <uapi/linux/sed-opal.h>

View File

@ -16,7 +16,7 @@
#include <linux/kdev_t.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>
#include <linux/mutex.h>
#include "base.h"

View File

@ -21,7 +21,7 @@
#include <linux/notifier.h>
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>
#include <linux/mutex.h>
#include <linux/pm_runtime.h>
#include <linux/netdevice.h>

View File

@ -17,7 +17,7 @@
#include <linux/syscalls.h>
#include <linux/mount.h>
#include <linux/device.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>
#include <linux/namei.h>
#include <linux/fs.h>
#include <linux/shmem_fs.h>

View File

@ -12,7 +12,6 @@
#include <linux/ioctl.h>
#include <linux/slab.h>
#include <linux/ratelimit.h>
#include <linux/genhd.h>
#include <linux/netdevice.h>
#include <linux/mutex.h>
#include <linux/export.h>

View File

@ -10,7 +10,6 @@
#include <linux/blk-mq.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/genhd.h>
#include <linux/moduleparam.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>

View File

@ -138,15 +138,14 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
op_flags |= REQ_FUA | REQ_PREFLUSH;
op_flags |= REQ_SYNC;
bio = bio_alloc_bioset(GFP_NOIO, 1, &drbd_md_io_bio_set);
bio_set_dev(bio, bdev->md_bdev);
bio = bio_alloc_bioset(bdev->md_bdev, 1, op | op_flags, GFP_NOIO,
&drbd_md_io_bio_set);
bio->bi_iter.bi_sector = sector;
err = -EIO;
if (bio_add_page(bio, device->md_io.page, size, 0) != size)
goto out;
bio->bi_private = device;
bio->bi_end_io = drbd_md_endio;
bio_set_op_attrs(bio, op, op_flags);
if (op != REQ_OP_WRITE && device->state.disk == D_DISKLESS && device->ldev == NULL)
/* special case, drbd_md_read() during drbd_adm_attach(): no get_ldev */

View File

@ -976,12 +976,13 @@ static void drbd_bm_endio(struct bio *bio)
static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_hold(local)
{
struct bio *bio = bio_alloc_bioset(GFP_NOIO, 1, &drbd_md_io_bio_set);
struct drbd_device *device = ctx->device;
unsigned int op = (ctx->flags & BM_AIO_READ) ? REQ_OP_READ : REQ_OP_WRITE;
struct bio *bio = bio_alloc_bioset(device->ldev->md_bdev, 1, op,
GFP_NOIO, &drbd_md_io_bio_set);
struct drbd_bitmap *b = device->bitmap;
struct page *page;
unsigned int len;
unsigned int op = (ctx->flags & BM_AIO_READ) ? REQ_OP_READ : REQ_OP_WRITE;
sector_t on_disk_sector =
device->ldev->md.md_offset + device->ldev->md.bm_offset;
@ -1006,14 +1007,12 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
bm_store_page_idx(page, page_nr);
} else
page = b->bm_pages[page_nr];
bio_set_dev(bio, device->ldev->md_bdev);
bio->bi_iter.bi_sector = on_disk_sector;
/* bio_add_page of a single page to an empty bio will always succeed,
* according to api. Do we want to assert that? */
bio_add_page(bio, page, len, 0);
bio->bi_private = ctx;
bio->bi_end_io = drbd_bm_endio;
bio_set_op_attrs(bio, op, 0);
if (drbd_insert_fault(device, (op == REQ_OP_WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
bio_io_error(bio);

View File

@ -27,7 +27,6 @@
#include <linux/major.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/genhd.h>
#include <linux/idr.h>
#include <linux/dynamic_debug.h>
#include <net/tcp.h>

View File

@ -1279,16 +1279,16 @@ static void one_flush_endio(struct bio *bio)
static void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx)
{
struct bio *bio = bio_alloc(GFP_NOIO, 0);
struct bio *bio = bio_alloc(device->ldev->backing_bdev, 0,
REQ_OP_FLUSH | REQ_PREFLUSH, GFP_NOIO);
struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO);
if (!bio || !octx) {
drbd_warn(device, "Could not allocate a bio, CANNOT ISSUE FLUSH\n");
if (!octx) {
drbd_warn(device, "Could not allocate a octx, CANNOT ISSUE FLUSH\n");
/* FIXME: what else can I do now? disconnecting or detaching
* really does not help to improve the state of the world, either.
*/
kfree(octx);
if (bio)
bio_put(bio);
bio_put(bio);
ctx->error = -ENOMEM;
put_ldev(device);
@ -1298,10 +1298,8 @@ static void submit_one_flush(struct drbd_device *device, struct issue_flush_cont
octx->device = device;
octx->ctx = ctx;
bio_set_dev(bio, device->ldev->backing_bdev);
bio->bi_private = octx;
bio->bi_end_io = one_flush_endio;
bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH;
device->flush_jif = jiffies;
set_bit(FLUSH_PENDING, &device->flags);
@ -1646,7 +1644,6 @@ int drbd_submit_peer_request(struct drbd_device *device,
unsigned data_size = peer_req->i.size;
unsigned n_bios = 0;
unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
int err = -ENOMEM;
/* TRIM/DISCARD: for now, always use the helper function
* blkdev_issue_zeroout(..., discard=true).
@ -1687,15 +1684,10 @@ int drbd_submit_peer_request(struct drbd_device *device,
* generated bio, but a bio allocated on behalf of the peer.
*/
next_bio:
bio = bio_alloc(GFP_NOIO, nr_pages);
if (!bio) {
drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
goto fail;
}
bio = bio_alloc(device->ldev->backing_bdev, nr_pages, op | op_flags,
GFP_NOIO);
/* > peer_req->i.sector, unless this is the first bio */
bio->bi_iter.bi_sector = sector;
bio_set_dev(bio, device->ldev->backing_bdev);
bio_set_op_attrs(bio, op, op_flags);
bio->bi_private = peer_req;
bio->bi_end_io = drbd_peer_request_endio;
@ -1726,14 +1718,6 @@ int drbd_submit_peer_request(struct drbd_device *device,
drbd_submit_bio_noacct(device, fault_type, bio);
} while (bios);
return 0;
fail:
while (bios) {
bio = bios;
bios = bios->bi_next;
bio_put(bio);
}
return err;
}
static void drbd_remove_epoch_entry_interval(struct drbd_device *device,

View File

@ -30,7 +30,8 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio
return NULL;
memset(req, 0, sizeof(*req));
req->private_bio = bio_clone_fast(bio_src, GFP_NOIO, &drbd_io_bio_set);
req->private_bio = bio_alloc_clone(device->ldev->backing_bdev, bio_src,
GFP_NOIO, &drbd_io_bio_set);
req->private_bio->bi_private = req;
req->private_bio->bi_end_io = drbd_request_endio;
@ -1151,8 +1152,6 @@ drbd_submit_req_private_bio(struct drbd_request *req)
else
type = DRBD_FAULT_DT_RD;
bio_set_dev(bio, device->ldev->backing_bdev);
/* State may have changed since we grabbed our reference on the
* ->ldev member. Double check, and short-circuit to endio.
* In case the last activity log transaction failed to get on

View File

@ -1523,9 +1523,9 @@ int w_restart_disk_io(struct drbd_work *w, int cancel)
if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
drbd_al_begin_io(device, &req->i);
req->private_bio = bio_clone_fast(req->master_bio, GFP_NOIO,
req->private_bio = bio_alloc_clone(device->ldev->backing_bdev,
req->master_bio, GFP_NOIO,
&drbd_io_bio_set);
bio_set_dev(req->private_bio, device->ldev->backing_bdev);
req->private_bio->bi_private = req;
req->private_bio->bi_end_io = drbd_request_endio;
submit_bio_noacct(req->private_bio);

View File

@ -4129,15 +4129,13 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive)
cbdata.drive = drive;
bio_init(&bio, &bio_vec, 1);
bio_set_dev(&bio, bdev);
bio_init(&bio, bdev, &bio_vec, 1, REQ_OP_READ);
bio_add_page(&bio, page, block_size(bdev), 0);
bio.bi_iter.bi_sector = 0;
bio.bi_flags |= (1 << BIO_QUIET);
bio.bi_private = &cbdata;
bio.bi_end_io = floppy_rb0_cb;
bio_set_op_attrs(&bio, REQ_OP_READ, 0);
init_completion(&cbdata.complete);

View File

@ -19,7 +19,6 @@
#include <linux/compat.h>
#include <linux/fs.h>
#include <linux/module.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/bio.h>

View File

@ -15,7 +15,6 @@
#include <linux/rwsem.h>
#include <linux/ata.h>
#include <linux/interrupt.h>
#include <linux/genhd.h>
/* Offset of Subsystem Device ID in pci confoguration space */
#define PCI_SUBSYSTEM_DEVICEID 0x2E

View File

@ -1020,9 +1020,8 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
continue;
bio = pkt->r_bios[f];
bio_reset(bio);
bio_reset(bio, pd->bdev, REQ_OP_READ);
bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
bio_set_dev(bio, pd->bdev);
bio->bi_end_io = pkt_end_io_read;
bio->bi_private = pkt;
@ -1034,7 +1033,6 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
BUG();
atomic_inc(&pkt->io_wait);
bio_set_op_attrs(bio, REQ_OP_READ, 0);
pkt_queue_bio(pd, bio);
frames_read++;
}
@ -1235,9 +1233,8 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
{
int f;
bio_reset(pkt->w_bio);
bio_reset(pkt->w_bio, pd->bdev, REQ_OP_WRITE);
pkt->w_bio->bi_iter.bi_sector = pkt->sector;
bio_set_dev(pkt->w_bio, pd->bdev);
pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
pkt->w_bio->bi_private = pkt;
@ -1270,7 +1267,6 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
/* Start the write request */
atomic_set(&pkt->io_wait, 1);
bio_set_op_attrs(pkt->w_bio, REQ_OP_WRITE, 0);
pkt_queue_bio(pd, pkt->w_bio);
}
@ -2298,12 +2294,12 @@ static void pkt_end_io_read_cloned(struct bio *bio)
static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
{
struct bio *cloned_bio = bio_clone_fast(bio, GFP_NOIO, &pkt_bio_set);
struct bio *cloned_bio =
bio_alloc_clone(pd->bdev, bio, GFP_NOIO, &pkt_bio_set);
struct packet_stacked_data *psd = mempool_alloc(&psd_pool, GFP_NOIO);
psd->pd = pd;
psd->bio = bio;
bio_set_dev(cloned_bio, pd->bdev);
cloned_bio->bi_private = psd;
cloned_bio->bi_end_io = pkt_end_io_read_cloned;
pd->stats.secs_r += bio_sectors(bio);
@ -2404,18 +2400,11 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
static void pkt_submit_bio(struct bio *bio)
{
struct pktcdvd_device *pd;
char b[BDEVNAME_SIZE];
struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->queue->queuedata;
struct bio *split;
blk_queue_split(&bio);
pd = bio->bi_bdev->bd_disk->queue->queuedata;
if (!pd) {
pr_err("%s incorrect request queue\n", bio_devname(bio, b));
goto end_io;
}
pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
(unsigned long long)bio->bi_iter.bi_sector,
(unsigned long long)bio_end_sector(bio));

View File

@ -12,8 +12,7 @@
#include "rnbd-srv-dev.h"
#include "rnbd-log.h"
struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags,
struct bio_set *bs)
struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags)
{
struct rnbd_dev *dev;
int ret;
@ -30,7 +29,6 @@ struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags,
dev->blk_open_flags = flags;
bdevname(dev->bdev, dev->name);
dev->ibd_bio_set = bs;
return dev;
@ -44,60 +42,3 @@ void rnbd_dev_close(struct rnbd_dev *dev)
blkdev_put(dev->bdev, dev->blk_open_flags);
kfree(dev);
}
void rnbd_dev_bi_end_io(struct bio *bio)
{
struct rnbd_dev_blk_io *io = bio->bi_private;
rnbd_endio(io->priv, blk_status_to_errno(bio->bi_status));
bio_put(bio);
}
/**
* rnbd_bio_map_kern - map kernel address into bio
* @data: pointer to buffer to map
* @bs: bio_set to use.
* @len: length in bytes
* @gfp_mask: allocation flags for bio allocation
*
* Map the kernel address into a bio suitable for io to a block
* device. Returns an error pointer in case of error.
*/
struct bio *rnbd_bio_map_kern(void *data, struct bio_set *bs,
unsigned int len, gfp_t gfp_mask)
{
unsigned long kaddr = (unsigned long)data;
unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
unsigned long start = kaddr >> PAGE_SHIFT;
const int nr_pages = end - start;
int offset, i;
struct bio *bio;
bio = bio_alloc_bioset(gfp_mask, nr_pages, bs);
if (!bio)
return ERR_PTR(-ENOMEM);
offset = offset_in_page(kaddr);
for (i = 0; i < nr_pages; i++) {
unsigned int bytes = PAGE_SIZE - offset;
if (len <= 0)
break;
if (bytes > len)
bytes = len;
if (bio_add_page(bio, virt_to_page(data), bytes,
offset) < bytes) {
/* we don't support partial mappings */
bio_put(bio);
return ERR_PTR(-EINVAL);
}
data += bytes;
len -= bytes;
offset = 0;
}
return bio;
}

View File

@ -14,25 +14,16 @@
struct rnbd_dev {
struct block_device *bdev;
struct bio_set *ibd_bio_set;
fmode_t blk_open_flags;
char name[BDEVNAME_SIZE];
};
struct rnbd_dev_blk_io {
struct rnbd_dev *dev;
void *priv;
/* have to be last member for front_pad usage of bioset_init */
struct bio bio;
};
/**
* rnbd_dev_open() - Open a device
* @path: path to open
* @flags: open flags
* @bs: bio_set to use during block io,
*/
struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags,
struct bio_set *bs);
struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags);
/**
* rnbd_dev_close() - Close a device
@ -41,11 +32,6 @@ void rnbd_dev_close(struct rnbd_dev *dev);
void rnbd_endio(void *priv, int error);
void rnbd_dev_bi_end_io(struct bio *bio);
struct bio *rnbd_bio_map_kern(void *data, struct bio_set *bs,
unsigned int len, gfp_t gfp_mask);
static inline int rnbd_dev_get_max_segs(const struct rnbd_dev *dev)
{
return queue_max_segments(bdev_get_queue(dev->bdev));

View File

@ -13,7 +13,6 @@
#include <linux/kobject.h>
#include <linux/sysfs.h>
#include <linux/stat.h>
#include <linux/genhd.h>
#include <linux/list.h>
#include <linux/moduleparam.h>
#include <linux/device.h>

View File

@ -114,6 +114,12 @@ rnbd_get_sess_dev(int dev_id, struct rnbd_srv_session *srv_sess)
return sess_dev;
}
static void rnbd_dev_bi_end_io(struct bio *bio)
{
rnbd_endio(bio->bi_private, blk_status_to_errno(bio->bi_status));
bio_put(bio);
}
static int process_rdma(struct rnbd_srv_session *srv_sess,
struct rtrs_srv_op *id, void *data, u32 datalen,
const void *usr, size_t usrlen)
@ -123,7 +129,6 @@ static int process_rdma(struct rnbd_srv_session *srv_sess,
struct rnbd_srv_sess_dev *sess_dev;
u32 dev_id;
int err;
struct rnbd_dev_blk_io *io;
struct bio *bio;
short prio;
@ -144,33 +149,29 @@ static int process_rdma(struct rnbd_srv_session *srv_sess,
priv->sess_dev = sess_dev;
priv->id = id;
/* Generate bio with pages pointing to the rdma buffer */
bio = rnbd_bio_map_kern(data, sess_dev->rnbd_dev->ibd_bio_set, datalen, GFP_KERNEL);
if (IS_ERR(bio)) {
err = PTR_ERR(bio);
rnbd_srv_err(sess_dev, "Failed to generate bio, err: %d\n", err);
goto sess_dev_put;
bio = bio_alloc(sess_dev->rnbd_dev->bdev, 1,
rnbd_to_bio_flags(le32_to_cpu(msg->rw)), GFP_KERNEL);
if (bio_add_page(bio, virt_to_page(data), datalen,
offset_in_page(data)) != datalen) {
rnbd_srv_err(sess_dev, "Failed to map data to bio\n");
err = -EINVAL;
goto bio_put;
}
io = container_of(bio, struct rnbd_dev_blk_io, bio);
io->dev = sess_dev->rnbd_dev;
io->priv = priv;
bio->bi_end_io = rnbd_dev_bi_end_io;
bio->bi_private = io;
bio->bi_opf = rnbd_to_bio_flags(le32_to_cpu(msg->rw));
bio->bi_private = priv;
bio->bi_iter.bi_sector = le64_to_cpu(msg->sector);
bio->bi_iter.bi_size = le32_to_cpu(msg->bi_size);
prio = srv_sess->ver < RNBD_PROTO_VER_MAJOR ||
usrlen < sizeof(*msg) ? 0 : le16_to_cpu(msg->prio);
bio_set_prio(bio, prio);
bio_set_dev(bio, sess_dev->rnbd_dev->bdev);
submit_bio(bio);
return 0;
sess_dev_put:
bio_put:
bio_put(bio);
rnbd_put_sess_dev(sess_dev);
err:
kfree(priv);
@ -251,7 +252,6 @@ static void destroy_sess(struct rnbd_srv_session *srv_sess)
out:
xa_destroy(&srv_sess->index_idr);
bioset_exit(&srv_sess->sess_bio_set);
pr_info("RTRS Session %s disconnected\n", srv_sess->sessname);
@ -280,16 +280,6 @@ static int create_sess(struct rtrs_srv_sess *rtrs)
return -ENOMEM;
srv_sess->queue_depth = rtrs_srv_get_queue_depth(rtrs);
err = bioset_init(&srv_sess->sess_bio_set, srv_sess->queue_depth,
offsetof(struct rnbd_dev_blk_io, bio),
BIOSET_NEED_BVECS);
if (err) {
pr_err("Allocating srv_session for path %s failed\n",
pathname);
kfree(srv_sess);
return err;
}
xa_init_flags(&srv_sess->index_idr, XA_FLAGS_ALLOC);
INIT_LIST_HEAD(&srv_sess->sess_dev_list);
mutex_init(&srv_sess->lock);
@ -738,8 +728,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
goto reject;
}
rnbd_dev = rnbd_dev_open(full_path, open_flags,
&srv_sess->sess_bio_set);
rnbd_dev = rnbd_dev_open(full_path, open_flags);
if (IS_ERR(rnbd_dev)) {
pr_err("Opening device '%s' on session %s failed, failed to open the block device, err: %ld\n",
full_path, srv_sess->sessname, PTR_ERR(rnbd_dev));

View File

@ -23,7 +23,6 @@ struct rnbd_srv_session {
struct rtrs_srv_sess *rtrs;
char sessname[NAME_MAX];
int queue_depth;
struct bio_set sess_bio_set;
struct xarray index_idr;
/* List of struct rnbd_srv_sess_dev */

View File

@ -9,7 +9,6 @@
#include <linux/types.h>
#include <linux/blk-mq.h>
#include <linux/hdreg.h>
#include <linux/genhd.h>
#include <linux/cdrom.h>
#include <linux/slab.h>
#include <linux/spinlock.h>

View File

@ -69,13 +69,6 @@ struct virtio_blk {
/* Process context for config space updates */
struct work_struct config_work;
/*
* Tracks references from block_device_operations open/release and
* virtio_driver probe/remove so this object can be freed once no
* longer in use.
*/
refcount_t refs;
/* What host tells us, plus 2 for header & tailer. */
unsigned int sg_elems;
@ -391,43 +384,6 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
return err;
}
static void virtblk_get(struct virtio_blk *vblk)
{
refcount_inc(&vblk->refs);
}
static void virtblk_put(struct virtio_blk *vblk)
{
if (refcount_dec_and_test(&vblk->refs)) {
ida_simple_remove(&vd_index_ida, vblk->index);
mutex_destroy(&vblk->vdev_mutex);
kfree(vblk);
}
}
static int virtblk_open(struct block_device *bd, fmode_t mode)
{
struct virtio_blk *vblk = bd->bd_disk->private_data;
int ret = 0;
mutex_lock(&vblk->vdev_mutex);
if (vblk->vdev)
virtblk_get(vblk);
else
ret = -ENXIO;
mutex_unlock(&vblk->vdev_mutex);
return ret;
}
static void virtblk_release(struct gendisk *disk, fmode_t mode)
{
struct virtio_blk *vblk = disk->private_data;
virtblk_put(vblk);
}
/* We provide getgeo only to please some old bootloader/partitioning tools */
static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
{
@ -460,11 +416,19 @@ static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
return ret;
}
static void virtblk_free_disk(struct gendisk *disk)
{
struct virtio_blk *vblk = disk->private_data;
ida_simple_remove(&vd_index_ida, vblk->index);
mutex_destroy(&vblk->vdev_mutex);
kfree(vblk);
}
static const struct block_device_operations virtblk_fops = {
.owner = THIS_MODULE,
.open = virtblk_open,
.release = virtblk_release,
.getgeo = virtblk_getgeo,
.owner = THIS_MODULE,
.getgeo = virtblk_getgeo,
.free_disk = virtblk_free_disk,
};
static int index_to_minor(int index)
@ -791,8 +755,6 @@ static int virtblk_probe(struct virtio_device *vdev)
goto out_free_index;
}
/* This reference is dropped in virtblk_remove(). */
refcount_set(&vblk->refs, 1);
mutex_init(&vblk->vdev_mutex);
vblk->vdev = vdev;
@ -970,7 +932,7 @@ static void virtblk_remove(struct virtio_device *vdev)
flush_work(&vblk->config_work);
del_gendisk(vblk->disk);
blk_cleanup_disk(vblk->disk);
blk_cleanup_queue(vblk->disk->queue);
blk_mq_free_tag_set(&vblk->tag_set);
mutex_lock(&vblk->vdev_mutex);
@ -986,7 +948,7 @@ static void virtblk_remove(struct virtio_device *vdev)
mutex_unlock(&vblk->vdev_mutex);
virtblk_put(vblk);
put_disk(vblk->disk);
}
#ifdef CONFIG_PM_SLEEP

View File

@ -1326,16 +1326,13 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
pages[i]->page,
seg[i].nsec << 9,
seg[i].offset) == 0)) {
bio = bio_alloc(GFP_KERNEL, bio_max_segs(nseg - i));
if (unlikely(bio == NULL))
goto fail_put_bio;
bio = bio_alloc(preq.bdev, bio_max_segs(nseg - i),
operation | operation_flags,
GFP_KERNEL);
biolist[nbio++] = bio;
bio_set_dev(bio, preq.bdev);
bio->bi_private = pending_req;
bio->bi_end_io = end_block_io_op;
bio->bi_iter.bi_sector = preq.sector_number;
bio_set_op_attrs(bio, operation, operation_flags);
}
preq.sector_number += seg[i].nsec;
@ -1345,15 +1342,11 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
if (!bio) {
BUG_ON(operation_flags != REQ_PREFLUSH);
bio = bio_alloc(GFP_KERNEL, 0);
if (unlikely(bio == NULL))
goto fail_put_bio;
bio = bio_alloc(preq.bdev, 0, operation | operation_flags,
GFP_KERNEL);
biolist[nbio++] = bio;
bio_set_dev(bio, preq.bdev);
bio->bi_private = pending_req;
bio->bi_end_io = end_block_io_op;
bio_set_op_attrs(bio, operation, operation_flags);
}
atomic_set(&pending_req->pendcnt, nbio);
@ -1381,14 +1374,6 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
free_req(ring, pending_req);
msleep(1); /* back off a bit */
return -EIO;
fail_put_bio:
for (i = 0; i < nbio; i++)
bio_put(biolist[i]);
atomic_set(&pending_req->pendcnt, 1);
__end_block_io_op(pending_req, BLK_STS_RESOURCE);
msleep(1); /* back off a bit */
return -EIO;
}

View File

@ -22,7 +22,6 @@
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/device.h>
#include <linux/genhd.h>
#include <linux/highmem.h>
#include <linux/slab.h>
#include <linux/backing-dev.h>
@ -617,24 +616,21 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
{
struct bio *bio;
bio = bio_alloc(GFP_NOIO, 1);
bio = bio_alloc(zram->bdev, 1, parent ? parent->bi_opf : REQ_OP_READ,
GFP_NOIO);
if (!bio)
return -ENOMEM;
bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
bio_set_dev(bio, zram->bdev);
if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
bio_put(bio);
return -EIO;
}
if (!parent) {
bio->bi_opf = REQ_OP_READ;
if (!parent)
bio->bi_end_io = zram_page_end_io;
} else {
bio->bi_opf = parent->bi_opf;
else
bio_chain(bio, parent);
}
submit_bio(bio);
return 1;
@ -747,10 +743,9 @@ static ssize_t writeback_store(struct device *dev,
continue;
}
bio_init(&bio, &bio_vec, 1);
bio_set_dev(&bio, zram->bdev);
bio_init(&bio, zram->bdev, &bio_vec, 1,
REQ_OP_WRITE | REQ_SYNC);
bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
bio.bi_opf = REQ_OP_WRITE | REQ_SYNC;
bio_add_page(&bio, bvec.bv_page, bvec.bv_len,
bvec.bv_offset);

View File

@ -15,7 +15,6 @@
#include <linux/slab.h>
#include <linux/dma-mapping.h>
#include <linux/cdrom.h>
#include <linux/genhd.h>
#include <linux/bio.h>
#include <linux/blk-mq.h>
#include <linux/interrupt.h>

View File

@ -330,7 +330,7 @@
#include <linux/poll.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>
#include <linux/interrupt.h>
#include <linux/mm.h>
#include <linux/nodemask.h>

View File

@ -204,6 +204,7 @@ config BLK_DEV_DM
tristate "Device mapper support"
select BLOCK_HOLDER_DEPRECATED if SYSFS
select BLK_DEV_DM_BUILTIN
select BLK_MQ_STACKING
depends on DAX || DAX=n
help
Device-mapper is a low level volume manager. It works by allowing

View File

@ -26,7 +26,8 @@ struct bio *bch_bbio_alloc(struct cache_set *c)
struct bbio *b = mempool_alloc(&c->bio_meta, GFP_NOIO);
struct bio *bio = &b->bio;
bio_init(bio, bio->bi_inline_vecs, meta_bucket_pages(&c->cache->sb));
bio_init(bio, NULL, bio->bi_inline_vecs,
meta_bucket_pages(&c->cache->sb), 0);
return bio;
}

View File

@ -53,14 +53,12 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list,
reread: left = ca->sb.bucket_size - offset;
len = min_t(unsigned int, left, PAGE_SECTORS << JSET_BITS);
bio_reset(bio);
bio_reset(bio, ca->bdev, REQ_OP_READ);
bio->bi_iter.bi_sector = bucket + offset;
bio_set_dev(bio, ca->bdev);
bio->bi_iter.bi_size = len << 9;
bio->bi_end_io = journal_read_endio;
bio->bi_private = &cl;
bio_set_op_attrs(bio, REQ_OP_READ, 0);
bch_bio_map(bio, data);
closure_bio_submit(ca->set, bio, &cl);
@ -611,11 +609,9 @@ static void do_journal_discard(struct cache *ca)
atomic_set(&ja->discard_in_flight, DISCARD_IN_FLIGHT);
bio_init(bio, bio->bi_inline_vecs, 1);
bio_set_op_attrs(bio, REQ_OP_DISCARD, 0);
bio_init(bio, ca->bdev, bio->bi_inline_vecs, 1, REQ_OP_DISCARD);
bio->bi_iter.bi_sector = bucket_to_sector(ca->set,
ca->sb.d[ja->discard_idx]);
bio_set_dev(bio, ca->bdev);
bio->bi_iter.bi_size = bucket_bytes(ca);
bio->bi_end_io = journal_discard_endio;
@ -773,16 +769,14 @@ static void journal_write_unlocked(struct closure *cl)
atomic_long_add(sectors, &ca->meta_sectors_written);
bio_reset(bio);
bio_reset(bio, ca->bdev, REQ_OP_WRITE |
REQ_SYNC | REQ_META | REQ_PREFLUSH | REQ_FUA);
bch_bio_map(bio, w->data);
bio->bi_iter.bi_sector = PTR_OFFSET(k, i);
bio_set_dev(bio, ca->bdev);
bio->bi_iter.bi_size = sectors << 9;
bio->bi_end_io = journal_write_endio;
bio->bi_private = w;
bio_set_op_attrs(bio, REQ_OP_WRITE,
REQ_SYNC|REQ_META|REQ_PREFLUSH|REQ_FUA);
bch_bio_map(bio, w->data);
trace_bcache_journal_write(bio, w->data->keys);
bio_list_add(&list, bio);

View File

@ -79,8 +79,8 @@ static void moving_init(struct moving_io *io)
{
struct bio *bio = &io->bio.bio;
bio_init(bio, bio->bi_inline_vecs,
DIV_ROUND_UP(KEY_SIZE(&io->w->key), PAGE_SECTORS));
bio_init(bio, NULL, bio->bi_inline_vecs,
DIV_ROUND_UP(KEY_SIZE(&io->w->key), PAGE_SECTORS), 0);
bio_get(bio);
bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));

View File

@ -685,8 +685,7 @@ static void do_bio_hook(struct search *s,
{
struct bio *bio = &s->bio.bio;
bio_init(bio, NULL, 0);
__bio_clone_fast(bio, orig_bio);
bio_init_clone(bio->bi_bdev, bio, orig_bio, GFP_NOIO);
/*
* bi_end_io can be set separately somewhere else, e.g. the
* variants in,
@ -831,11 +830,11 @@ static void cached_dev_read_done(struct closure *cl)
*/
if (s->iop.bio) {
bio_reset(s->iop.bio);
bio_reset(s->iop.bio, s->cache_miss->bi_bdev, REQ_OP_READ);
s->iop.bio->bi_iter.bi_sector =
s->cache_miss->bi_iter.bi_sector;
bio_copy_dev(s->iop.bio, s->cache_miss);
s->iop.bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
bio_clone_blkg_association(s->iop.bio, s->cache_miss);
bch_bio_map(s->iop.bio, NULL);
bio_copy_data(s->cache_miss, s->iop.bio);
@ -913,14 +912,13 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
/* btree_search_recurse()'s btree iterator is no good anymore */
ret = miss == bio ? MAP_DONE : -EINTR;
cache_bio = bio_alloc_bioset(GFP_NOWAIT,
cache_bio = bio_alloc_bioset(miss->bi_bdev,
DIV_ROUND_UP(s->insert_bio_sectors, PAGE_SECTORS),
&dc->disk.bio_split);
0, GFP_NOWAIT, &dc->disk.bio_split);
if (!cache_bio)
goto out_submit;
cache_bio->bi_iter.bi_sector = miss->bi_iter.bi_sector;
bio_copy_dev(cache_bio, miss);
cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
cache_bio->bi_end_io = backing_request_endio;
@ -1025,21 +1023,21 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
*/
struct bio *flush;
flush = bio_alloc_bioset(GFP_NOIO, 0,
&dc->disk.bio_split);
flush = bio_alloc_bioset(bio->bi_bdev, 0,
REQ_OP_WRITE | REQ_PREFLUSH,
GFP_NOIO, &dc->disk.bio_split);
if (!flush) {
s->iop.status = BLK_STS_RESOURCE;
goto insert_data;
}
bio_copy_dev(flush, bio);
flush->bi_end_io = backing_request_endio;
flush->bi_private = cl;
flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
/* I/O request sent to backing device */
closure_bio_submit(s->iop.c, flush, cl);
}
} else {
s->iop.bio = bio_clone_fast(bio, GFP_NOIO, &dc->disk.bio_split);
s->iop.bio = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO,
&dc->disk.bio_split);
/* I/O request sent to backing device */
bio->bi_end_io = backing_request_endio;
closure_bio_submit(s->iop.c, bio, cl);

View File

@ -18,7 +18,6 @@
#include <linux/blkdev.h>
#include <linux/pagemap.h>
#include <linux/debugfs.h>
#include <linux/genhd.h>
#include <linux/idr.h>
#include <linux/kthread.h>
#include <linux/workqueue.h>
@ -343,8 +342,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
down(&dc->sb_write_mutex);
closure_init(cl, parent);
bio_init(bio, dc->sb_bv, 1);
bio_set_dev(bio, dc->bdev);
bio_init(bio, dc->bdev, dc->sb_bv, 1, 0);
bio->bi_end_io = write_bdev_super_endio;
bio->bi_private = dc;
@ -387,8 +385,7 @@ void bcache_write_super(struct cache_set *c)
if (ca->sb.version < version)
ca->sb.version = version;
bio_init(bio, ca->sb_bv, 1);
bio_set_dev(bio, ca->bdev);
bio_init(bio, ca->bdev, ca->sb_bv, 1, 0);
bio->bi_end_io = write_super_endio;
bio->bi_private = ca;
@ -2240,7 +2237,7 @@ static int cache_alloc(struct cache *ca)
__module_get(THIS_MODULE);
kobject_init(&ca->kobj, &bch_cache_ktype);
bio_init(&ca->journal.bio, ca->journal.bio.bi_inline_vecs, 8);
bio_init(&ca->journal.bio, NULL, ca->journal.bio.bi_inline_vecs, 8, 0);
/*
* when ca->sb.njournal_buckets is not zero, journal exists,

View File

@ -292,8 +292,8 @@ static void dirty_init(struct keybuf_key *w)
struct dirty_io *io = w->private;
struct bio *bio = &io->bio;
bio_init(bio, bio->bi_inline_vecs,
DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS));
bio_init(bio, NULL, bio->bi_inline_vecs,
DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS), 0);
if (!io->dc->writeback_percent)
bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));

View File

@ -744,21 +744,14 @@ static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
spin_unlock_irq(&cache->lock);
}
static void __remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
dm_oblock_t oblock, bool bio_has_pbd)
{
if (bio_has_pbd)
check_if_tick_bio_needed(cache, bio);
remap_to_origin(cache, bio);
if (bio_data_dir(bio) == WRITE)
clear_discard(cache, oblock_to_dblock(cache, oblock));
}
static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
dm_oblock_t oblock)
{
// FIXME: check_if_tick_bio_needed() is called way too much through this interface
__remap_to_origin_clear_discard(cache, bio, oblock, true);
check_if_tick_bio_needed(cache, bio);
remap_to_origin(cache, bio);
if (bio_data_dir(bio) == WRITE)
clear_discard(cache, oblock_to_dblock(cache, oblock));
}
static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
@ -826,16 +819,15 @@ static void issue_op(struct bio *bio, void *context)
static void remap_to_origin_and_cache(struct cache *cache, struct bio *bio,
dm_oblock_t oblock, dm_cblock_t cblock)
{
struct bio *origin_bio = bio_clone_fast(bio, GFP_NOIO, &cache->bs);
struct bio *origin_bio = bio_alloc_clone(cache->origin_dev->bdev, bio,
GFP_NOIO, &cache->bs);
BUG_ON(!origin_bio);
bio_chain(origin_bio, bio);
/*
* Passing false to __remap_to_origin_clear_discard() skips
* all code that might use per_bio_data (since clone doesn't have it)
*/
__remap_to_origin_clear_discard(cache, origin_bio, oblock, false);
if (bio_data_dir(origin_bio) == WRITE)
clear_discard(cache, oblock_to_dblock(cache, oblock));
submit_bio(origin_bio);
remap_to_cache(cache, bio, cblock);

View File

@ -11,7 +11,6 @@
#include <linux/kthread.h>
#include <linux/ktime.h>
#include <linux/genhd.h>
#include <linux/blk-mq.h>
#include <linux/blk-crypto-profile.h>

View File

@ -234,7 +234,7 @@ static volatile unsigned long dm_crypt_pages_per_client;
#define DM_CRYPT_MEMORY_PERCENT 2
#define DM_CRYPT_MIN_PAGES_PER_CLIENT (BIO_MAX_VECS * 16)
static void clone_init(struct dm_crypt_io *, struct bio *);
static void crypt_endio(struct bio *clone);
static void kcryptd_queue_crypt(struct dm_crypt_io *io);
static struct scatterlist *crypt_get_sg_data(struct crypt_config *cc,
struct scatterlist *sg);
@ -1364,11 +1364,10 @@ static int crypt_convert_block_aead(struct crypt_config *cc,
}
if (r == -EBADMSG) {
char b[BDEVNAME_SIZE];
sector_t s = le64_to_cpu(*sector);
DMERR_LIMIT("%s: INTEGRITY AEAD ERROR, sector %llu",
bio_devname(ctx->bio_in, b), s);
DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu",
ctx->bio_in->bi_bdev, s);
dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead",
ctx->bio_in, s, 0);
}
@ -1672,11 +1671,10 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
mutex_lock(&cc->bio_alloc_lock);
clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, &cc->bs);
if (!clone)
goto out;
clone_init(io, clone);
clone = bio_alloc_bioset(cc->dev->bdev, nr_iovecs, io->base_bio->bi_opf,
GFP_NOIO, &cc->bs);
clone->bi_private = io;
clone->bi_end_io = crypt_endio;
remaining_size = size;
@ -1702,7 +1700,7 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
bio_put(clone);
clone = NULL;
}
out:
if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
mutex_unlock(&cc->bio_alloc_lock);
@ -1829,34 +1827,25 @@ static void crypt_endio(struct bio *clone)
crypt_dec_pending(io);
}
static void clone_init(struct dm_crypt_io *io, struct bio *clone)
{
struct crypt_config *cc = io->cc;
clone->bi_private = io;
clone->bi_end_io = crypt_endio;
bio_set_dev(clone, cc->dev->bdev);
clone->bi_opf = io->base_bio->bi_opf;
}
static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
{
struct crypt_config *cc = io->cc;
struct bio *clone;
/*
* We need the original biovec array in order to decrypt
* the whole bio data *afterwards* -- thanks to immutable
* biovecs we don't need to worry about the block layer
* modifying the biovec array; so leverage bio_clone_fast().
* We need the original biovec array in order to decrypt the whole bio
* data *afterwards* -- thanks to immutable biovecs we don't need to
* worry about the block layer modifying the biovec array; so leverage
* bio_alloc_clone().
*/
clone = bio_clone_fast(io->base_bio, gfp, &cc->bs);
clone = bio_alloc_clone(cc->dev->bdev, io->base_bio, gfp, &cc->bs);
if (!clone)
return 1;
clone->bi_private = io;
clone->bi_end_io = crypt_endio;
crypt_inc_pending(io);
clone_init(io, clone);
clone->bi_iter.bi_sector = cc->start + io->sector;
if (dm_crypt_integrity_io_alloc(io, clone)) {
@ -2179,11 +2168,10 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
error = cc->iv_gen_ops->post(cc, org_iv_of_dmreq(cc, dmreq), dmreq);
if (error == -EBADMSG) {
char b[BDEVNAME_SIZE];
sector_t s = le64_to_cpu(*org_sector_of_dmreq(cc, dmreq));
DMERR_LIMIT("%s: INTEGRITY AEAD ERROR, sector %llu",
bio_devname(ctx->bio_in, b), s);
DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu",
ctx->bio_in->bi_bdev, s);
dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead",
ctx->bio_in, s, 0);
io->error = BLK_STS_PROTECTION;

View File

@ -1788,12 +1788,11 @@ static void integrity_metadata(struct work_struct *w)
checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE);
if (unlikely(r)) {
if (r > 0) {
char b[BDEVNAME_SIZE];
sector_t s;
s = sector - ((r + ic->tag_size - 1) / ic->tag_size);
DMERR_LIMIT("%s: Checksum failed at sector 0x%llx",
bio_devname(bio, b), s);
DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx",
bio->bi_bdev, s);
r = -EILSEQ;
atomic64_inc(&ic->number_of_mismatches);
dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum",

View File

@ -345,11 +345,10 @@ static void do_region(int op, int op_flags, unsigned region,
(PAGE_SIZE >> SECTOR_SHIFT)));
}
bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, &io->client->bios);
bio = bio_alloc_bioset(where->bdev, num_bvecs, op | op_flags,
GFP_NOIO, &io->client->bios);
bio->bi_iter.bi_sector = where->sector + (where->count - remaining);
bio_set_dev(bio, where->bdev);
bio->bi_end_io = endio;
bio_set_op_attrs(bio, op, op_flags);
store_io_and_region_in_bio(bio, io, region);
if (op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) {

View File

@ -217,18 +217,12 @@ static int write_metadata(struct log_writes_c *lc, void *entry,
void *ptr;
size_t ret;
bio = bio_alloc(GFP_KERNEL, 1);
if (!bio) {
DMERR("Couldn't alloc log bio");
goto error;
}
bio = bio_alloc(lc->logdev->bdev, 1, REQ_OP_WRITE, GFP_KERNEL);
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = sector;
bio_set_dev(bio, lc->logdev->bdev);
bio->bi_end_io = (sector == WRITE_LOG_SUPER_SECTOR) ?
log_end_super : log_end_io;
bio->bi_private = lc;
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
page = alloc_page(GFP_KERNEL);
if (!page) {
@ -275,18 +269,12 @@ static int write_inline_data(struct log_writes_c *lc, void *entry,
atomic_inc(&lc->io_blocks);
bio = bio_alloc(GFP_KERNEL, bio_pages);
if (!bio) {
DMERR("Couldn't alloc inline data bio");
goto error;
}
bio = bio_alloc(lc->logdev->bdev, bio_pages, REQ_OP_WRITE,
GFP_KERNEL);
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = sector;
bio_set_dev(bio, lc->logdev->bdev);
bio->bi_end_io = log_end_io;
bio->bi_private = lc;
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
for (i = 0; i < bio_pages; i++) {
pg_datalen = min_t(int, datalen, PAGE_SIZE);
@ -322,7 +310,6 @@ static int write_inline_data(struct log_writes_c *lc, void *entry,
error_bio:
bio_free_pages(bio);
bio_put(bio);
error:
put_io_block(lc);
return -1;
}
@ -363,17 +350,12 @@ static int log_one_block(struct log_writes_c *lc,
goto out;
atomic_inc(&lc->io_blocks);
bio = bio_alloc(GFP_KERNEL, bio_max_segs(block->vec_cnt));
if (!bio) {
DMERR("Couldn't alloc log bio");
goto error;
}
bio = bio_alloc(lc->logdev->bdev, bio_max_segs(block->vec_cnt),
REQ_OP_WRITE, GFP_KERNEL);
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = sector;
bio_set_dev(bio, lc->logdev->bdev);
bio->bi_end_io = log_end_io;
bio->bi_private = lc;
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
for (i = 0; i < block->vec_cnt; i++) {
/*
@ -385,18 +367,13 @@ static int log_one_block(struct log_writes_c *lc,
if (ret != block->vecs[i].bv_len) {
atomic_inc(&lc->io_blocks);
submit_bio(bio);
bio = bio_alloc(GFP_KERNEL,
bio_max_segs(block->vec_cnt - i));
if (!bio) {
DMERR("Couldn't alloc log bio");
goto error;
}
bio = bio_alloc(lc->logdev->bdev,
bio_max_segs(block->vec_cnt - i),
REQ_OP_WRITE, GFP_KERNEL);
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = sector;
bio_set_dev(bio, lc->logdev->bdev);
bio->bi_end_io = log_end_io;
bio->bi_private = lc;
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
ret = bio_add_page(bio, block->vecs[i].bv_page,
block->vecs[i].bv_len, 0);

View File

@ -303,21 +303,6 @@ static void end_clone_request(struct request *clone, blk_status_t error)
dm_complete_request(tio->orig, error);
}
static blk_status_t dm_dispatch_clone_request(struct request *clone, struct request *rq)
{
blk_status_t r;
if (blk_queue_io_stat(clone->q))
clone->rq_flags |= RQF_IO_STAT;
clone->start_time_ns = ktime_get_ns();
r = blk_insert_cloned_request(clone->q, clone);
if (r != BLK_STS_OK && r != BLK_STS_RESOURCE && r != BLK_STS_DEV_RESOURCE)
/* must complete clone in terms of original request */
dm_complete_request(rq, r);
return r;
}
static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
void *data)
{
@ -398,13 +383,20 @@ static int map_request(struct dm_rq_target_io *tio)
/* The target has remapped the I/O so dispatch it */
trace_block_rq_remap(clone, disk_devt(dm_disk(md)),
blk_rq_pos(rq));
ret = dm_dispatch_clone_request(clone, rq);
if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
ret = blk_insert_cloned_request(clone);
switch (ret) {
case BLK_STS_OK:
break;
case BLK_STS_RESOURCE:
case BLK_STS_DEV_RESOURCE:
blk_rq_unprep_clone(clone);
blk_mq_cleanup_rq(clone);
tio->ti->type->release_clone_rq(clone, &tio->info);
tio->clone = NULL;
return DM_MAPIO_REQUEUE;
default:
/* must complete clone in terms of original request */
dm_complete_request(rq, ret);
}
break;
case DM_MAPIO_REQUEUE:

View File

@ -141,11 +141,6 @@ struct dm_snapshot {
* for them to be committed.
*/
struct bio_list bios_queued_during_merge;
/*
* Flush data after merge.
*/
struct bio flush_bio;
};
/*
@ -1127,17 +1122,6 @@ static void snapshot_merge_next_chunks(struct dm_snapshot *s)
static void error_bios(struct bio *bio);
static int flush_data(struct dm_snapshot *s)
{
struct bio *flush_bio = &s->flush_bio;
bio_reset(flush_bio);
bio_set_dev(flush_bio, s->origin->bdev);
flush_bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
return submit_bio_wait(flush_bio);
}
static void merge_callback(int read_err, unsigned long write_err, void *context)
{
struct dm_snapshot *s = context;
@ -1151,7 +1135,7 @@ static void merge_callback(int read_err, unsigned long write_err, void *context)
goto shut;
}
if (flush_data(s) < 0) {
if (blkdev_issue_flush(s->origin->bdev) < 0) {
DMERR("Flush after merge failed: shutting down merge");
goto shut;
}
@ -1340,7 +1324,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
s->first_merging_chunk = 0;
s->num_merging_chunks = 0;
bio_list_init(&s->bios_queued_during_merge);
bio_init(&s->flush_bio, NULL, 0);
/* Allocate hash table for COW data */
if (init_hash_tables(s)) {
@ -1528,8 +1511,6 @@ static void snapshot_dtr(struct dm_target *ti)
dm_exception_store_destroy(s->store);
bio_uninit(&s->flush_bio);
dm_put_device(ti, s->cow);
dm_put_device(ti, s->origin);

View File

@ -282,8 +282,6 @@ struct pool {
struct dm_bio_prison_cell **cell_sort_array;
mempool_t mapping_pool;
struct bio flush_bio;
};
static void metadata_operation_failed(struct pool *pool, const char *op, int r);
@ -1179,25 +1177,17 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
return;
}
discard_parent = bio_alloc(GFP_NOIO, 1);
if (!discard_parent) {
DMWARN("%s: unable to allocate top level discard bio for passdown. Skipping passdown.",
dm_device_name(tc->pool->pool_md));
queue_passdown_pt2(m);
discard_parent = bio_alloc(NULL, 1, 0, GFP_NOIO);
discard_parent->bi_end_io = passdown_endio;
discard_parent->bi_private = m;
if (m->maybe_shared)
passdown_double_checking_shared_status(m, discard_parent);
else {
struct discard_op op;
} else {
discard_parent->bi_end_io = passdown_endio;
discard_parent->bi_private = m;
if (m->maybe_shared)
passdown_double_checking_shared_status(m, discard_parent);
else {
struct discard_op op;
begin_discard(&op, tc, discard_parent);
r = issue_discard(&op, m->data_block, data_end);
end_discard(&op, r);
}
begin_discard(&op, tc, discard_parent);
r = issue_discard(&op, m->data_block, data_end);
end_discard(&op, r);
}
}
@ -2913,7 +2903,6 @@ static void __pool_destroy(struct pool *pool)
if (pool->next_mapping)
mempool_free(pool->next_mapping, &pool->mapping_pool);
mempool_exit(&pool->mapping_pool);
bio_uninit(&pool->flush_bio);
dm_deferred_set_destroy(pool->shared_read_ds);
dm_deferred_set_destroy(pool->all_io_ds);
kfree(pool);
@ -2994,7 +2983,6 @@ static struct pool *pool_create(struct mapped_device *pool_md,
pool->low_water_triggered = false;
pool->suspended = true;
pool->out_of_data_space = false;
bio_init(&pool->flush_bio, NULL, 0);
pool->shared_read_ds = dm_deferred_set_create();
if (!pool->shared_read_ds) {
@ -3201,13 +3189,8 @@ static void metadata_low_callback(void *context)
static int metadata_pre_commit_callback(void *context)
{
struct pool *pool = context;
struct bio *flush_bio = &pool->flush_bio;
bio_reset(flush_bio);
bio_set_dev(flush_bio, pool->data_dev);
flush_bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
return submit_bio_wait(flush_bio);
return blkdev_issue_flush(pool->data_dev);
}
static sector_t get_dev_size(struct block_device *bdev)

View File

@ -1821,11 +1821,11 @@ static void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeba
max_pages = e->wc_list_contiguous;
bio = bio_alloc_bioset(GFP_NOIO, max_pages, &wc->bio_set);
bio = bio_alloc_bioset(wc->dev->bdev, max_pages, REQ_OP_WRITE,
GFP_NOIO, &wc->bio_set);
wb = container_of(bio, struct writeback_struct, bio);
wb->wc = wc;
bio->bi_end_io = writecache_writeback_endio;
bio_set_dev(bio, wc->dev->bdev);
bio->bi_iter.bi_sector = read_original_sector(wc, e);
if (max_pages <= WB_LIST_INLINE ||
unlikely(!(wb->wc_list = kmalloc_array(max_pages, sizeof(struct wc_entry *),
@ -1852,7 +1852,8 @@ static void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeba
wb->wc_list[wb->wc_list_n++] = f;
e = f;
}
bio_set_op_attrs(bio, REQ_OP_WRITE, WC_MODE_FUA(wc) * REQ_FUA);
if (WC_MODE_FUA(wc))
bio->bi_opf |= REQ_FUA;
if (writecache_has_error(wc)) {
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);

View File

@ -550,11 +550,8 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd,
if (!mblk)
return ERR_PTR(-ENOMEM);
bio = bio_alloc(GFP_NOIO, 1);
if (!bio) {
dmz_free_mblock(zmd, mblk);
return ERR_PTR(-ENOMEM);
}
bio = bio_alloc(dev->bdev, 1, REQ_OP_READ | REQ_META | REQ_PRIO,
GFP_NOIO);
spin_lock(&zmd->mblk_lock);
@ -578,10 +575,8 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd,
/* Submit read BIO */
bio->bi_iter.bi_sector = dmz_blk2sect(block);
bio_set_dev(bio, dev->bdev);
bio->bi_private = mblk;
bio->bi_end_io = dmz_mblock_bio_end_io;
bio_set_op_attrs(bio, REQ_OP_READ, REQ_META | REQ_PRIO);
bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0);
submit_bio(bio);
@ -725,19 +720,14 @@ static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk,
if (dmz_bdev_is_dying(dev))
return -EIO;
bio = bio_alloc(GFP_NOIO, 1);
if (!bio) {
set_bit(DMZ_META_ERROR, &mblk->state);
return -ENOMEM;
}
bio = bio_alloc(dev->bdev, 1, REQ_OP_WRITE | REQ_META | REQ_PRIO,
GFP_NOIO);
set_bit(DMZ_META_WRITING, &mblk->state);
bio->bi_iter.bi_sector = dmz_blk2sect(block);
bio_set_dev(bio, dev->bdev);
bio->bi_private = mblk;
bio->bi_end_io = dmz_mblock_bio_end_io;
bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_META | REQ_PRIO);
bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0);
submit_bio(bio);
@ -759,13 +749,9 @@ static int dmz_rdwr_block(struct dmz_dev *dev, int op,
if (dmz_bdev_is_dying(dev))
return -EIO;
bio = bio_alloc(GFP_NOIO, 1);
if (!bio)
return -ENOMEM;
bio = bio_alloc(dev->bdev, 1, op | REQ_SYNC | REQ_META | REQ_PRIO,
GFP_NOIO);
bio->bi_iter.bi_sector = dmz_blk2sect(block);
bio_set_dev(bio, dev->bdev);
bio_set_op_attrs(bio, op, REQ_SYNC | REQ_META | REQ_PRIO);
bio_add_page(bio, page, DMZ_BLOCK_SIZE, 0);
ret = submit_bio_wait(bio);
bio_put(bio);

View File

@ -125,11 +125,10 @@ static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone,
if (dev->flags & DMZ_BDEV_DYING)
return -EIO;
clone = bio_clone_fast(bio, GFP_NOIO, &dmz->bio_set);
clone = bio_alloc_clone(dev->bdev, bio, GFP_NOIO, &dmz->bio_set);
if (!clone)
return -ENOMEM;
bio_set_dev(clone, dev->bdev);
bioctx->dev = dev;
clone->bi_iter.bi_sector =
dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);

View File

@ -79,10 +79,14 @@ struct clone_info {
#define DM_IO_BIO_OFFSET \
(offsetof(struct dm_target_io, clone) + offsetof(struct dm_io, tio))
static inline struct dm_target_io *clone_to_tio(struct bio *clone)
{
return container_of(clone, struct dm_target_io, clone);
}
void *dm_per_bio_data(struct bio *bio, size_t data_size)
{
struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
if (!tio->inside_dm_io)
if (!clone_to_tio(bio)->inside_dm_io)
return (char *)bio - DM_TARGET_IO_BIO_OFFSET - data_size;
return (char *)bio - DM_IO_BIO_OFFSET - data_size;
}
@ -477,10 +481,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
u64 dm_start_time_ns_from_clone(struct bio *bio)
{
struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
struct dm_io *io = tio->io;
return jiffies_to_nsecs(io->start_time);
return jiffies_to_nsecs(clone_to_tio(bio)->io->start_time);
}
EXPORT_SYMBOL_GPL(dm_start_time_ns_from_clone);
@ -519,11 +520,9 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
struct dm_target_io *tio;
struct bio *clone;
clone = bio_alloc_bioset(GFP_NOIO, 0, &md->io_bs);
if (!clone)
return NULL;
clone = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO, &md->io_bs);
tio = container_of(clone, struct dm_target_io, clone);
tio = clone_to_tio(clone);
tio->inside_dm_io = true;
tio->io = NULL;
@ -545,8 +544,8 @@ static void free_io(struct mapped_device *md, struct dm_io *io)
bio_put(&io->tio.clone);
}
static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *ti,
unsigned target_bio_nr, gfp_t gfp_mask)
static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
unsigned target_bio_nr, unsigned *len, gfp_t gfp_mask)
{
struct dm_target_io *tio;
@ -554,11 +553,12 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *t
/* the dm_target_io embedded in ci->io is available */
tio = &ci->io->tio;
} else {
struct bio *clone = bio_alloc_bioset(gfp_mask, 0, &ci->io->md->bs);
struct bio *clone = bio_alloc_clone(ci->bio->bi_bdev, ci->bio,
gfp_mask, &ci->io->md->bs);
if (!clone)
return NULL;
tio = container_of(clone, struct dm_target_io, clone);
tio = clone_to_tio(clone);
tio->inside_dm_io = false;
}
@ -566,15 +566,16 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *t
tio->io = ci->io;
tio->ti = ti;
tio->target_bio_nr = target_bio_nr;
tio->len_ptr = len;
return tio;
return &tio->clone;
}
static void free_tio(struct dm_target_io *tio)
static void free_tio(struct bio *clone)
{
if (tio->inside_dm_io)
if (clone_to_tio(clone)->inside_dm_io)
return;
bio_put(&tio->clone);
bio_put(clone);
}
/*
@ -879,7 +880,7 @@ static bool swap_bios_limit(struct dm_target *ti, struct bio *bio)
static void clone_endio(struct bio *bio)
{
blk_status_t error = bio->bi_status;
struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
struct dm_target_io *tio = clone_to_tio(bio);
struct dm_io *io = tio->io;
struct mapped_device *md = tio->io->md;
dm_endio_fn endio = tio->ti->type->end_io;
@ -930,7 +931,7 @@ static void clone_endio(struct bio *bio)
up(&md->swap_bios_semaphore);
}
free_tio(tio);
free_tio(bio);
dm_io_dec_pending(io, error);
}
@ -1085,7 +1086,7 @@ static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
*/
void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
{
struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
struct dm_target_io *tio = clone_to_tio(bio);
unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT;
BUG_ON(bio->bi_opf & REQ_PREFLUSH);
@ -1115,11 +1116,11 @@ static noinline void __set_swap_bios_limit(struct mapped_device *md, int latch)
mutex_unlock(&md->swap_bios_lock);
}
static void __map_bio(struct dm_target_io *tio)
static void __map_bio(struct bio *clone)
{
struct dm_target_io *tio = clone_to_tio(clone);
int r;
sector_t sector;
struct bio *clone = &tio->clone;
struct dm_io *io = tio->io;
struct dm_target *ti = tio->ti;
@ -1164,7 +1165,7 @@ static void __map_bio(struct dm_target_io *tio)
struct mapped_device *md = io->md;
up(&md->swap_bios_semaphore);
}
free_tio(tio);
free_tio(clone);
dm_io_dec_pending(io, BLK_STS_IOERR);
break;
case DM_MAPIO_REQUEUE:
@ -1172,7 +1173,7 @@ static void __map_bio(struct dm_target_io *tio)
struct mapped_device *md = io->md;
up(&md->swap_bios_semaphore);
}
free_tio(tio);
free_tio(clone);
dm_io_dec_pending(io, BLK_STS_DM_REQUEUE);
break;
default:
@ -1190,106 +1191,75 @@ static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len)
/*
* Creates a bio that consists of range of complete bvecs.
*/
static int clone_bio(struct dm_target_io *tio, struct bio *bio,
sector_t sector, unsigned len)
static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
sector_t sector, unsigned *len)
{
struct bio *clone = &tio->clone;
int r;
__bio_clone_fast(clone, bio);
r = bio_crypt_clone(clone, bio, GFP_NOIO);
if (r < 0)
return r;
if (bio_integrity(bio)) {
if (unlikely(!dm_target_has_integrity(tio->ti->type) &&
!dm_target_passes_integrity(tio->ti->type))) {
DMWARN("%s: the target %s doesn't support integrity data.",
dm_device_name(tio->io->md),
tio->ti->type->name);
return -EIO;
}
r = bio_integrity_clone(clone, bio, GFP_NOIO);
if (r < 0)
return r;
}
struct bio *bio = ci->bio, *clone;
clone = alloc_tio(ci, ti, 0, len, GFP_NOIO);
bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
clone->bi_iter.bi_size = to_bytes(len);
clone->bi_iter.bi_size = to_bytes(*len);
if (bio_integrity(bio))
bio_integrity_trim(clone);
__map_bio(clone);
return 0;
}
static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
struct dm_target *ti, unsigned num_bios)
struct dm_target *ti, unsigned num_bios,
unsigned *len)
{
struct dm_target_io *tio;
struct bio *bio;
int try;
if (!num_bios)
return;
if (num_bios == 1) {
tio = alloc_tio(ci, ti, 0, GFP_NOIO);
bio_list_add(blist, &tio->clone);
return;
}
for (try = 0; try < 2; try++) {
int bio_nr;
struct bio *bio;
if (try)
mutex_lock(&ci->io->md->table_devices_lock);
for (bio_nr = 0; bio_nr < num_bios; bio_nr++) {
tio = alloc_tio(ci, ti, bio_nr, try ? GFP_NOIO : GFP_NOWAIT);
if (!tio)
bio = alloc_tio(ci, ti, bio_nr, len,
try ? GFP_NOIO : GFP_NOWAIT);
if (!bio)
break;
bio_list_add(blist, &tio->clone);
bio_list_add(blist, bio);
}
if (try)
mutex_unlock(&ci->io->md->table_devices_lock);
if (bio_nr == num_bios)
return;
while ((bio = bio_list_pop(blist))) {
tio = container_of(bio, struct dm_target_io, clone);
free_tio(tio);
}
while ((bio = bio_list_pop(blist)))
free_tio(bio);
}
}
static void __clone_and_map_simple_bio(struct clone_info *ci,
struct dm_target_io *tio, unsigned *len)
{
struct bio *clone = &tio->clone;
tio->len_ptr = len;
__bio_clone_fast(clone, ci->bio);
if (len)
bio_setup_sector(clone, ci->sector, *len);
__map_bio(tio);
}
static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
unsigned num_bios, unsigned *len)
{
struct bio_list blist = BIO_EMPTY_LIST;
struct bio *bio;
struct dm_target_io *tio;
struct bio *clone;
alloc_multiple_bios(&blist, ci, ti, num_bios);
while ((bio = bio_list_pop(&blist))) {
tio = container_of(bio, struct dm_target_io, clone);
__clone_and_map_simple_bio(ci, tio, len);
switch (num_bios) {
case 0:
break;
case 1:
clone = alloc_tio(ci, ti, 0, len, GFP_NOIO);
if (len)
bio_setup_sector(clone, ci->sector, *len);
__map_bio(clone);
break;
default:
alloc_multiple_bios(&blist, ci, ti, num_bios, len);
while ((clone = bio_list_pop(&blist))) {
if (len)
bio_setup_sector(clone, ci->sector, *len);
__map_bio(clone);
}
break;
}
}
@ -1304,9 +1274,8 @@ static int __send_empty_flush(struct clone_info *ci)
* need to reference it after submit. It's just used as
* the basis for the clone(s).
*/
bio_init(&flush_bio, NULL, 0);
flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
bio_set_dev(&flush_bio, ci->io->md->disk->part0);
bio_init(&flush_bio, ci->io->md->disk->part0, NULL, 0,
REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC);
ci->bio = &flush_bio;
ci->sector_count = 0;
@ -1319,25 +1288,6 @@ static int __send_empty_flush(struct clone_info *ci)
return 0;
}
static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
sector_t sector, unsigned *len)
{
struct bio *bio = ci->bio;
struct dm_target_io *tio;
int r;
tio = alloc_tio(ci, ti, 0, GFP_NOIO);
tio->len_ptr = len;
r = clone_bio(tio, bio, sector, *len);
if (r < 0) {
free_tio(tio);
return r;
}
__map_bio(tio);
return 0;
}
static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti,
unsigned num_bios)
{

View File

@ -205,9 +205,9 @@ static bool faulty_make_request(struct mddev *mddev, struct bio *bio)
}
}
if (failit) {
struct bio *b = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
struct bio *b = bio_alloc_clone(conf->rdev->bdev, bio, GFP_NOIO,
&mddev->bio_set);
bio_set_dev(b, conf->rdev->bdev);
b->bi_private = bio;
b->bi_end_io = faulty_fail;
bio = b;

View File

@ -121,11 +121,9 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
}
multipath = conf->multipaths + mp_bh->path;
bio_init(&mp_bh->bio, NULL, 0);
__bio_clone_fast(&mp_bh->bio, bio);
bio_init_clone(multipath->rdev->bdev, &mp_bh->bio, bio, GFP_NOIO);
mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset;
bio_set_dev(&mp_bh->bio, multipath->rdev->bdev);
mp_bh->bio.bi_opf |= REQ_FAILFAST_TRANSPORT;
mp_bh->bio.bi_end_io = multipath_end_request;
mp_bh->bio.bi_private = mp_bh;
@ -299,7 +297,6 @@ static void multipathd(struct md_thread *thread)
md_check_recovery(mddev);
for (;;) {
char b[BDEVNAME_SIZE];
spin_lock_irqsave(&conf->device_lock, flags);
if (list_empty(head))
break;
@ -311,13 +308,13 @@ static void multipathd(struct md_thread *thread)
bio->bi_iter.bi_sector = mp_bh->master_bio->bi_iter.bi_sector;
if ((mp_bh->path = multipath_map (conf))<0) {
pr_err("multipath: %s: unrecoverable IO read error for block %llu\n",
bio_devname(bio, b),
pr_err("multipath: %pg: unrecoverable IO read error for block %llu\n",
bio->bi_bdev,
(unsigned long long)bio->bi_iter.bi_sector);
multipath_end_bh_io(mp_bh, BLK_STS_IOERR);
} else {
pr_err("multipath: %s: redirecting sector %llu to another IO path\n",
bio_devname(bio, b),
pr_err("multipath: %pg: redirecting sector %llu to another IO path\n",
bio->bi_bdev,
(unsigned long long)bio->bi_iter.bi_sector);
*bio = *(mp_bh->master_bio);
bio->bi_iter.bi_sector +=

Some files were not shown because too many files have changed in this diff Show More