for-linus-20180906
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAluRkywQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpm8uEAC8vBFb5tzZ2dOeRbGQ6LaPTToBmRrLtOcP kDRnfZIw0raNStOpn1dkGLz8IOSjwOGftx9Q4pJed25vynTEq5lYmmLVUlJQ6cJ7 oNpYiCdPxJvbKz5fChGG2nHHa1RLer1d728NZtkeZU/ChPmw56EO5ORghE7zPG7K Z/0qHYsgwS427o8pUDsymmt6I62IJGrjzqJdC0pqBy6RePQWtlwkmtd7CIgFiffY tDnk6RSwcihnIalMMLvFXeGf6cSaZvuH4oK1QNdfojAyS8kWeA6gHtjRS8UcuuUY t6o+hU0vki8bghoNoI40RrLgAmV91BVv1/Voo79dQvDWAigyie51HwFFkqdWzJxJ g4MCZYpys26w/VUGBFCku0hiRIAhZFO8Sun5zbVCJpyt8hTXF0RrG3CpwmCF7Lc0 m+h8tJanEMCesfYMztTD31L1BOFhJeOgBJr4a5QURy0LbIvC0V52IKiOQ0475E8E H10rQaRw/7Am+mZugedMUGMgYD/eN33NQoRuTWZdck/58big2SU78zGpR/GqTmy3 w9v2I8ksBTivzEayBV0G4Z5Gxu7QYA7NMsO5RS/wuGfUX8D/1QtQU9Ejh5TESbek R3WUyhXJJ2S+DWTUlmX7TgPxYxG3sXatQbSAgFJiucxyIRdpdqfeoXmOHvPrWZEq O3VDm0D6pw== =qhv7 -----END PGP SIGNATURE----- Merge tag 'for-linus-20180906' of git://git.kernel.dk/linux-block Pull block fixes from Jens Axboe: "Small collection of fixes that should go into this release. This contains: - Small series that fixes a race between blkcg teardown and writeback (Dennis Zhou) - Fix disallowing invalid block size settings from the nbd ioctl (me) - BFQ fix for a use-after-free on last release of a bfqg (Konstantin Khlebnikov) - Fix for the "don't warn for flush" fix (Mikulas)" * tag 'for-linus-20180906' of git://git.kernel.dk/linux-block: block: bfq: swap puts in bfqg_and_blkg_put block: don't warn when doing fsync on read-only devices nbd: don't allow invalid blocksize settings blkcg: use tryget logic when associating a blkg with a bio blkcg: delay blkg destruction until after writeback has finished Revert "blk-throttle: fix race between blkcg_bio_issue_check() and cgroup_rmdir()"
This commit is contained in:
commit
ca16eb342e
|
@ -275,9 +275,9 @@ static void bfqg_and_blkg_get(struct bfq_group *bfqg)
|
|||
|
||||
void bfqg_and_blkg_put(struct bfq_group *bfqg)
|
||||
{
|
||||
bfqg_put(bfqg);
|
||||
|
||||
blkg_put(bfqg_to_blkg(bfqg));
|
||||
|
||||
bfqg_put(bfqg);
|
||||
}
|
||||
|
||||
/* @stats = 0 */
|
||||
|
|
|
@ -2015,7 +2015,8 @@ int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
|
|||
{
|
||||
if (unlikely(bio->bi_blkg))
|
||||
return -EBUSY;
|
||||
blkg_get(blkg);
|
||||
if (!blkg_try_get(blkg))
|
||||
return -ENODEV;
|
||||
bio->bi_blkg = blkg;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -310,28 +310,11 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
|
|||
}
|
||||
}
|
||||
|
||||
static void blkg_pd_offline(struct blkcg_gq *blkg)
|
||||
{
|
||||
int i;
|
||||
|
||||
lockdep_assert_held(blkg->q->queue_lock);
|
||||
lockdep_assert_held(&blkg->blkcg->lock);
|
||||
|
||||
for (i = 0; i < BLKCG_MAX_POLS; i++) {
|
||||
struct blkcg_policy *pol = blkcg_policy[i];
|
||||
|
||||
if (blkg->pd[i] && !blkg->pd[i]->offline &&
|
||||
pol->pd_offline_fn) {
|
||||
pol->pd_offline_fn(blkg->pd[i]);
|
||||
blkg->pd[i]->offline = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void blkg_destroy(struct blkcg_gq *blkg)
|
||||
{
|
||||
struct blkcg *blkcg = blkg->blkcg;
|
||||
struct blkcg_gq *parent = blkg->parent;
|
||||
int i;
|
||||
|
||||
lockdep_assert_held(blkg->q->queue_lock);
|
||||
lockdep_assert_held(&blkcg->lock);
|
||||
|
@ -340,6 +323,13 @@ static void blkg_destroy(struct blkcg_gq *blkg)
|
|||
WARN_ON_ONCE(list_empty(&blkg->q_node));
|
||||
WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
|
||||
|
||||
for (i = 0; i < BLKCG_MAX_POLS; i++) {
|
||||
struct blkcg_policy *pol = blkcg_policy[i];
|
||||
|
||||
if (blkg->pd[i] && pol->pd_offline_fn)
|
||||
pol->pd_offline_fn(blkg->pd[i]);
|
||||
}
|
||||
|
||||
if (parent) {
|
||||
blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes);
|
||||
blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios);
|
||||
|
@ -382,7 +372,6 @@ static void blkg_destroy_all(struct request_queue *q)
|
|||
struct blkcg *blkcg = blkg->blkcg;
|
||||
|
||||
spin_lock(&blkcg->lock);
|
||||
blkg_pd_offline(blkg);
|
||||
blkg_destroy(blkg);
|
||||
spin_unlock(&blkcg->lock);
|
||||
}
|
||||
|
@ -1053,59 +1042,64 @@ static struct cftype blkcg_legacy_files[] = {
|
|||
{ } /* terminate */
|
||||
};
|
||||
|
||||
/*
|
||||
* blkcg destruction is a three-stage process.
|
||||
*
|
||||
* 1. Destruction starts. The blkcg_css_offline() callback is invoked
|
||||
* which offlines writeback. Here we tie the next stage of blkg destruction
|
||||
* to the completion of writeback associated with the blkcg. This lets us
|
||||
* avoid punting potentially large amounts of outstanding writeback to root
|
||||
* while maintaining any ongoing policies. The next stage is triggered when
|
||||
* the nr_cgwbs count goes to zero.
|
||||
*
|
||||
* 2. When the nr_cgwbs count goes to zero, blkcg_destroy_blkgs() is called
|
||||
* and handles the destruction of blkgs. Here the css reference held by
|
||||
* the blkg is put back eventually allowing blkcg_css_free() to be called.
|
||||
* This work may occur in cgwb_release_workfn() on the cgwb_release
|
||||
* workqueue. Any submitted ios that fail to get the blkg ref will be
|
||||
* punted to the root_blkg.
|
||||
*
|
||||
* 3. Once the blkcg ref count goes to zero, blkcg_css_free() is called.
|
||||
* This finally frees the blkcg.
|
||||
*/
|
||||
|
||||
/**
|
||||
* blkcg_css_offline - cgroup css_offline callback
|
||||
* @css: css of interest
|
||||
*
|
||||
* This function is called when @css is about to go away and responsible
|
||||
* for offlining all blkgs pd and killing all wbs associated with @css.
|
||||
* blkgs pd offline should be done while holding both q and blkcg locks.
|
||||
* As blkcg lock is nested inside q lock, this function performs reverse
|
||||
* double lock dancing.
|
||||
*
|
||||
* This is the blkcg counterpart of ioc_release_fn().
|
||||
* This function is called when @css is about to go away. Here the cgwbs are
|
||||
* offlined first and only once writeback associated with the blkcg has
|
||||
* finished do we start step 2 (see above).
|
||||
*/
|
||||
static void blkcg_css_offline(struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct blkcg *blkcg = css_to_blkcg(css);
|
||||
struct blkcg_gq *blkg;
|
||||
|
||||
spin_lock_irq(&blkcg->lock);
|
||||
|
||||
hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
|
||||
struct request_queue *q = blkg->q;
|
||||
|
||||
if (spin_trylock(q->queue_lock)) {
|
||||
blkg_pd_offline(blkg);
|
||||
spin_unlock(q->queue_lock);
|
||||
} else {
|
||||
spin_unlock_irq(&blkcg->lock);
|
||||
cpu_relax();
|
||||
spin_lock_irq(&blkcg->lock);
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock_irq(&blkcg->lock);
|
||||
|
||||
/* this prevents anyone from attaching or migrating to this blkcg */
|
||||
wb_blkcg_offline(blkcg);
|
||||
|
||||
/* put the base cgwb reference allowing step 2 to be triggered */
|
||||
blkcg_cgwb_put(blkcg);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkcg_destroy_all_blkgs - destroy all blkgs associated with a blkcg
|
||||
* blkcg_destroy_blkgs - responsible for shooting down blkgs
|
||||
* @blkcg: blkcg of interest
|
||||
*
|
||||
* This function is called when blkcg css is about to free and responsible for
|
||||
* destroying all blkgs associated with @blkcg.
|
||||
* blkgs should be removed while holding both q and blkcg locks. As blkcg lock
|
||||
* blkgs should be removed while holding both q and blkcg locks. As blkcg lock
|
||||
* is nested inside q lock, this function performs reverse double lock dancing.
|
||||
* Destroying the blkgs releases the reference held on the blkcg's css allowing
|
||||
* blkcg_css_free to eventually be called.
|
||||
*
|
||||
* This is the blkcg counterpart of ioc_release_fn().
|
||||
*/
|
||||
static void blkcg_destroy_all_blkgs(struct blkcg *blkcg)
|
||||
void blkcg_destroy_blkgs(struct blkcg *blkcg)
|
||||
{
|
||||
spin_lock_irq(&blkcg->lock);
|
||||
|
||||
while (!hlist_empty(&blkcg->blkg_list)) {
|
||||
struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
|
||||
struct blkcg_gq,
|
||||
blkcg_node);
|
||||
struct blkcg_gq, blkcg_node);
|
||||
struct request_queue *q = blkg->q;
|
||||
|
||||
if (spin_trylock(q->queue_lock)) {
|
||||
|
@ -1117,6 +1111,7 @@ static void blkcg_destroy_all_blkgs(struct blkcg *blkcg)
|
|||
spin_lock_irq(&blkcg->lock);
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock_irq(&blkcg->lock);
|
||||
}
|
||||
|
||||
|
@ -1125,8 +1120,6 @@ static void blkcg_css_free(struct cgroup_subsys_state *css)
|
|||
struct blkcg *blkcg = css_to_blkcg(css);
|
||||
int i;
|
||||
|
||||
blkcg_destroy_all_blkgs(blkcg);
|
||||
|
||||
mutex_lock(&blkcg_pol_mutex);
|
||||
|
||||
list_del(&blkcg->all_blkcgs_node);
|
||||
|
@ -1189,6 +1182,7 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
|
|||
INIT_HLIST_HEAD(&blkcg->blkg_list);
|
||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||
INIT_LIST_HEAD(&blkcg->cgwb_list);
|
||||
refcount_set(&blkcg->cgwb_refcnt, 1);
|
||||
#endif
|
||||
list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs);
|
||||
|
||||
|
@ -1480,11 +1474,8 @@ void blkcg_deactivate_policy(struct request_queue *q,
|
|||
|
||||
list_for_each_entry(blkg, &q->blkg_list, q_node) {
|
||||
if (blkg->pd[pol->plid]) {
|
||||
if (!blkg->pd[pol->plid]->offline &&
|
||||
pol->pd_offline_fn) {
|
||||
if (pol->pd_offline_fn)
|
||||
pol->pd_offline_fn(blkg->pd[pol->plid]);
|
||||
blkg->pd[pol->plid]->offline = true;
|
||||
}
|
||||
pol->pd_free_fn(blkg->pd[pol->plid]);
|
||||
blkg->pd[pol->plid] = NULL;
|
||||
}
|
||||
|
|
|
@ -2163,9 +2163,12 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
|
|||
{
|
||||
const int op = bio_op(bio);
|
||||
|
||||
if (part->policy && (op_is_write(op) && !op_is_flush(op))) {
|
||||
if (part->policy && op_is_write(op)) {
|
||||
char b[BDEVNAME_SIZE];
|
||||
|
||||
if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
|
||||
return false;
|
||||
|
||||
WARN_ONCE(1,
|
||||
"generic_make_request: Trying to write "
|
||||
"to read-only block-device %s (partno %d)\n",
|
||||
|
|
|
@ -2129,8 +2129,9 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
|
|||
static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
|
||||
{
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
if (bio->bi_css)
|
||||
bio_associate_blkg(bio, tg_to_blkg(tg));
|
||||
/* fallback to root_blkg if we fail to get a blkg ref */
|
||||
if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV))
|
||||
bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg);
|
||||
bio_issue_init(&bio->bi_issue, bio_sectors(bio));
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -1239,6 +1239,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
|
|||
case NBD_SET_SOCK:
|
||||
return nbd_add_socket(nbd, arg, false);
|
||||
case NBD_SET_BLKSIZE:
|
||||
if (!arg || !is_power_of_2(arg) || arg < 512 ||
|
||||
arg > PAGE_SIZE)
|
||||
return -EINVAL;
|
||||
nbd_size_set(nbd, arg,
|
||||
div_s64(config->bytesize, arg));
|
||||
return 0;
|
||||
|
|
|
@ -56,6 +56,7 @@ struct blkcg {
|
|||
struct list_head all_blkcgs_node;
|
||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||
struct list_head cgwb_list;
|
||||
refcount_t cgwb_refcnt;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@ -89,7 +90,6 @@ struct blkg_policy_data {
|
|||
/* the blkg and policy id this per-policy data belongs to */
|
||||
struct blkcg_gq *blkg;
|
||||
int plid;
|
||||
bool offline;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -387,6 +387,49 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
|
|||
return cpd ? cpd->blkcg : NULL;
|
||||
}
|
||||
|
||||
extern void blkcg_destroy_blkgs(struct blkcg *blkcg);
|
||||
|
||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||
|
||||
/**
|
||||
* blkcg_cgwb_get - get a reference for blkcg->cgwb_list
|
||||
* @blkcg: blkcg of interest
|
||||
*
|
||||
* This is used to track the number of active wb's related to a blkcg.
|
||||
*/
|
||||
static inline void blkcg_cgwb_get(struct blkcg *blkcg)
|
||||
{
|
||||
refcount_inc(&blkcg->cgwb_refcnt);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkcg_cgwb_put - put a reference for @blkcg->cgwb_list
|
||||
* @blkcg: blkcg of interest
|
||||
*
|
||||
* This is used to track the number of active wb's related to a blkcg.
|
||||
* When this count goes to zero, all active wb has finished so the
|
||||
* blkcg can continue destruction by calling blkcg_destroy_blkgs().
|
||||
* This work may occur in cgwb_release_workfn() on the cgwb_release
|
||||
* workqueue.
|
||||
*/
|
||||
static inline void blkcg_cgwb_put(struct blkcg *blkcg)
|
||||
{
|
||||
if (refcount_dec_and_test(&blkcg->cgwb_refcnt))
|
||||
blkcg_destroy_blkgs(blkcg);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline void blkcg_cgwb_get(struct blkcg *blkcg) { }
|
||||
|
||||
static inline void blkcg_cgwb_put(struct blkcg *blkcg)
|
||||
{
|
||||
/* wb isn't being accounted, so trigger destruction right away */
|
||||
blkcg_destroy_blkgs(blkcg);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* blkg_path - format cgroup path of blkg
|
||||
* @blkg: blkg of interest
|
||||
|
|
|
@ -491,6 +491,7 @@ static void cgwb_release_workfn(struct work_struct *work)
|
|||
{
|
||||
struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
|
||||
release_work);
|
||||
struct blkcg *blkcg = css_to_blkcg(wb->blkcg_css);
|
||||
|
||||
mutex_lock(&wb->bdi->cgwb_release_mutex);
|
||||
wb_shutdown(wb);
|
||||
|
@ -499,6 +500,9 @@ static void cgwb_release_workfn(struct work_struct *work)
|
|||
css_put(wb->blkcg_css);
|
||||
mutex_unlock(&wb->bdi->cgwb_release_mutex);
|
||||
|
||||
/* triggers blkg destruction if cgwb_refcnt becomes zero */
|
||||
blkcg_cgwb_put(blkcg);
|
||||
|
||||
fprop_local_destroy_percpu(&wb->memcg_completions);
|
||||
percpu_ref_exit(&wb->refcnt);
|
||||
wb_exit(wb);
|
||||
|
@ -597,6 +601,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
|
|||
list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
|
||||
list_add(&wb->memcg_node, memcg_cgwb_list);
|
||||
list_add(&wb->blkcg_node, blkcg_cgwb_list);
|
||||
blkcg_cgwb_get(blkcg);
|
||||
css_get(memcg_css);
|
||||
css_get(blkcg_css);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue