linux/drivers/md/bcache/util.c

288 lines
6.4 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* random utiility code, for bcache but in theory not specific to bcache
*
* Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
* Copyright 2012 Google, Inc.
*/
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/ctype.h>
#include <linux/debugfs.h>
#include <linux/module.h>
#include <linux/seq_file.h>
#include <linux/types.h>
#include <linux/sched/clock.h>
#include "util.h"
#define simple_strtoint(c, end, base) simple_strtol(c, end, base)
#define simple_strtouint(c, end, base) simple_strtoul(c, end, base)
#define STRTO_H(name, type) \
int bch_ ## name ## _h(const char *cp, type *res) \
{ \
int u = 0; \
char *e; \
type i = simple_ ## name(cp, &e, 10); \
\
switch (tolower(*e)) { \
default: \
return -EINVAL; \
case 'y': \
case 'z': \
u++; \
/* fall through */ \
case 'e': \
u++; \
/* fall through */ \
case 'p': \
u++; \
/* fall through */ \
case 't': \
u++; \
/* fall through */ \
case 'g': \
u++; \
/* fall through */ \
case 'm': \
u++; \
/* fall through */ \
case 'k': \
u++; \
if (e++ == cp) \
return -EINVAL; \
/* fall through */ \
case '\n': \
case '\0': \
if (*e == '\n') \
e++; \
} \
\
if (*e) \
return -EINVAL; \
\
while (u--) { \
if ((type) ~0 > 0 && \
(type) ~0 / 1024 <= i) \
return -EINVAL; \
if ((i > 0 && ANYSINT_MAX(type) / 1024 < i) || \
(i < 0 && -ANYSINT_MAX(type) / 1024 > i)) \
return -EINVAL; \
i *= 1024; \
} \
\
*res = i; \
return 0; \
} \
STRTO_H(strtoint, int)
STRTO_H(strtouint, unsigned int)
STRTO_H(strtoll, long long)
STRTO_H(strtoull, unsigned long long)
/**
* bch_hprint - formats @v to human readable string for sysfs.
* @buf: the (at least 8 byte) buffer to format the result into.
* @v: signed 64 bit integer
*
* Returns the number of bytes used by format.
*/
ssize_t bch_hprint(char *buf, int64_t v)
{
static const char units[] = "?kMGTPEZY";
int u = 0, t;
uint64_t q;
if (v < 0)
q = -v;
else
q = v;
/* For as long as the number is more than 3 digits, but at least
* once, shift right / divide by 1024. Keep the remainder for
* a digit after the decimal point.
*/
do {
u++;
t = q & ~(~0 << 10);
q >>= 10;
} while (q >= 1000);
if (v < 0)
/* '-', up to 3 digits, '.', 1 digit, 1 character, null;
* yields 8 bytes.
*/
return sprintf(buf, "-%llu.%i%c", q, t * 10 / 1024, units[u]);
else
return sprintf(buf, "%llu.%i%c", q, t * 10 / 1024, units[u]);
}
bool bch_is_zero(const char *p, size_t n)
{
size_t i;
for (i = 0; i < n; i++)
if (p[i])
return false;
return true;
}
int bch_parse_uuid(const char *s, char *uuid)
{
size_t i, j, x;
memset(uuid, 0, 16);
for (i = 0, j = 0;
i < strspn(s, "-0123456789:ABCDEFabcdef") && j < 32;
i++) {
x = s[i] | 32;
switch (x) {
case '0'...'9':
x -= '0';
break;
case 'a'...'f':
x -= 'a' - 10;
break;
default:
continue;
}
if (!(j & 1))
x <<= 4;
uuid[j++ >> 1] |= x;
}
return i;
}
void bch_time_stats_update(struct time_stats *stats, uint64_t start_time)
{
uint64_t now, duration, last;
spin_lock(&stats->lock);
now = local_clock();
duration = time_after64(now, start_time)
? now - start_time : 0;
last = time_after64(now, stats->last)
? now - stats->last : 0;
stats->max_duration = max(stats->max_duration, duration);
if (stats->last) {
ewma_add(stats->average_duration, duration, 8, 8);
if (stats->average_frequency)
ewma_add(stats->average_frequency, last, 8, 8);
else
stats->average_frequency = last << 8;
} else {
stats->average_duration = duration << 8;
}
stats->last = now ?: 1;
spin_unlock(&stats->lock);
}
/**
* bch_next_delay() - update ratelimiting statistics and calculate next delay
* @d: the struct bch_ratelimit to update
* @done: the amount of work done, in arbitrary units
*
* Increment @d by the amount of work done, and return how long to delay in
* jiffies until the next time to do some work.
*/
uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
{
uint64_t now = local_clock();
bcache: set max writeback rate when I/O request is idle Commit b1092c9af9ed ("bcache: allow quick writeback when backing idle") allows the writeback rate to be faster if there is no I/O request on a bcache device. It works well if there is only one bcache device attached to the cache set. If there are many bcache devices attached to a cache set, it may introduce performance regression because multiple faster writeback threads of the idle bcache devices will compete the btree level locks with the bcache device who have I/O requests coming. This patch fixes the above issue by only permitting fast writebac when all bcache devices attached on the cache set are idle. And if one of the bcache devices has new I/O request coming, minimized all writeback throughput immediately and let PI controller __update_writeback_rate() to decide the upcoming writeback rate for each bcache device. Also when all bcache devices are idle, limited wrieback rate to a small number is wast of thoughput, especially when backing devices are slower non-rotation devices (e.g. SATA SSD). This patch sets a max writeback rate for each backing device if the whole cache set is idle. A faster writeback rate in idle time means new I/Os may have more available space for dirty data, and people may observe a better write performance then. Please note bcache may change its cache mode in run time, and this patch still works if the cache mode is switched from writeback mode and there is still dirty data on cache. Fixes: Commit b1092c9af9ed ("bcache: allow quick writeback when backing idle") Cc: stable@vger.kernel.org #4.16+ Signed-off-by: Coly Li <colyli@suse.de> Tested-by: Kai Krakow <kai@kaishome.de> Tested-by: Stefan Priebe <s.priebe@profihost.ag> Cc: Michael Lyle <mlyle@lyle.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
2018-08-09 15:48:49 +08:00
d->next += div_u64(done * NSEC_PER_SEC, atomic_long_read(&d->rate));
/* Bound the time. Don't let us fall further than 2 seconds behind
* (this prevents unnecessary backlog that would make it impossible
* to catch up). If we're ahead of the desired writeback rate,
* don't let us sleep more than 2.5 seconds (so we can notice/respond
* if the control system tells us to speed up!).
*/
if (time_before64(now + NSEC_PER_SEC * 5LLU / 2LLU, d->next))
d->next = now + NSEC_PER_SEC * 5LLU / 2LLU;
if (time_after64(now - NSEC_PER_SEC * 2, d->next))
d->next = now - NSEC_PER_SEC * 2;
return time_after64(d->next, now)
? div_u64(d->next - now, NSEC_PER_SEC / HZ)
: 0;
}
/*
* Generally it isn't good to access .bi_io_vec and .bi_vcnt directly,
* the preferred way is bio_add_page, but in this case, bch_bio_map()
* supposes that the bvec table is empty, so it is safe to access
* .bi_vcnt & .bi_io_vec in this way even after multipage bvec is
* supported.
*/
void bch_bio_map(struct bio *bio, void *base)
{
block: Abstract out bvec iterator Immutable biovecs are going to require an explicit iterator. To implement immutable bvecs, a later patch is going to add a bi_bvec_done member to this struct; for now, this patch effectively just renames things. Signed-off-by: Kent Overstreet <kmo@daterainc.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: "Ed L. Cashin" <ecashin@coraid.com> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Lars Ellenberg <drbd-dev@lists.linbit.com> Cc: Jiri Kosina <jkosina@suse.cz> Cc: Matthew Wilcox <willy@linux.intel.com> Cc: Geoff Levand <geoff@infradead.org> Cc: Yehuda Sadeh <yehuda@inktank.com> Cc: Sage Weil <sage@inktank.com> Cc: Alex Elder <elder@inktank.com> Cc: ceph-devel@vger.kernel.org Cc: Joshua Morris <josh.h.morris@us.ibm.com> Cc: Philip Kelleher <pjk1939@linux.vnet.ibm.com> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Jeremy Fitzhardinge <jeremy@goop.org> Cc: Neil Brown <neilb@suse.de> Cc: Alasdair Kergon <agk@redhat.com> Cc: Mike Snitzer <snitzer@redhat.com> Cc: dm-devel@redhat.com Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: linux390@de.ibm.com Cc: Boaz Harrosh <bharrosh@panasas.com> Cc: Benny Halevy <bhalevy@tonian.com> Cc: "James E.J. Bottomley" <JBottomley@parallels.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: "Nicholas A. Bellinger" <nab@linux-iscsi.org> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Chris Mason <chris.mason@fusionio.com> Cc: "Theodore Ts'o" <tytso@mit.edu> Cc: Andreas Dilger <adilger.kernel@dilger.ca> Cc: Jaegeuk Kim <jaegeuk.kim@samsung.com> Cc: Steven Whitehouse <swhiteho@redhat.com> Cc: Dave Kleikamp <shaggy@kernel.org> Cc: Joern Engel <joern@logfs.org> Cc: Prasad Joshi <prasadjoshi.linux@gmail.com> Cc: Trond Myklebust <Trond.Myklebust@netapp.com> Cc: KONISHI Ryusuke <konishi.ryusuke@lab.ntt.co.jp> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Ben Myers <bpm@sgi.com> Cc: xfs@oss.sgi.com Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Len Brown <len.brown@intel.com> Cc: Pavel Machek <pavel@ucw.cz> Cc: "Rafael J. Wysocki" <rjw@sisk.pl> Cc: Herton Ronaldo Krzesinski <herton.krzesinski@canonical.com> Cc: Ben Hutchings <ben@decadent.org.uk> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Guo Chao <yan@linux.vnet.ibm.com> Cc: Tejun Heo <tj@kernel.org> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Wei Yongjun <yongjun_wei@trendmicro.com.cn> Cc: "Roger Pau Monné" <roger.pau@citrix.com> Cc: Jan Beulich <jbeulich@suse.com> Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com> Cc: Ian Campbell <Ian.Campbell@citrix.com> Cc: Sebastian Ott <sebott@linux.vnet.ibm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Jiang Liu <jiang.liu@huawei.com> Cc: Nitin Gupta <ngupta@vflare.org> Cc: Jerome Marchand <jmarchand@redhat.com> Cc: Joe Perches <joe@perches.com> Cc: Peng Tao <tao.peng@emc.com> Cc: Andy Adamson <andros@netapp.com> Cc: fanchaoting <fanchaoting@cn.fujitsu.com> Cc: Jie Liu <jeff.liu@oracle.com> Cc: Sunil Mushran <sunil.mushran@gmail.com> Cc: "Martin K. Petersen" <martin.petersen@oracle.com> Cc: Namjae Jeon <namjae.jeon@samsung.com> Cc: Pankaj Kumar <pankaj.km@samsung.com> Cc: Dan Magenheimer <dan.magenheimer@oracle.com> Cc: Mel Gorman <mgorman@suse.de>6
2013-10-12 06:44:27 +08:00
size_t size = bio->bi_iter.bi_size;
struct bio_vec *bv = bio->bi_io_vec;
block: Abstract out bvec iterator Immutable biovecs are going to require an explicit iterator. To implement immutable bvecs, a later patch is going to add a bi_bvec_done member to this struct; for now, this patch effectively just renames things. Signed-off-by: Kent Overstreet <kmo@daterainc.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: "Ed L. Cashin" <ecashin@coraid.com> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Lars Ellenberg <drbd-dev@lists.linbit.com> Cc: Jiri Kosina <jkosina@suse.cz> Cc: Matthew Wilcox <willy@linux.intel.com> Cc: Geoff Levand <geoff@infradead.org> Cc: Yehuda Sadeh <yehuda@inktank.com> Cc: Sage Weil <sage@inktank.com> Cc: Alex Elder <elder@inktank.com> Cc: ceph-devel@vger.kernel.org Cc: Joshua Morris <josh.h.morris@us.ibm.com> Cc: Philip Kelleher <pjk1939@linux.vnet.ibm.com> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Jeremy Fitzhardinge <jeremy@goop.org> Cc: Neil Brown <neilb@suse.de> Cc: Alasdair Kergon <agk@redhat.com> Cc: Mike Snitzer <snitzer@redhat.com> Cc: dm-devel@redhat.com Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: linux390@de.ibm.com Cc: Boaz Harrosh <bharrosh@panasas.com> Cc: Benny Halevy <bhalevy@tonian.com> Cc: "James E.J. Bottomley" <JBottomley@parallels.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: "Nicholas A. Bellinger" <nab@linux-iscsi.org> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Chris Mason <chris.mason@fusionio.com> Cc: "Theodore Ts'o" <tytso@mit.edu> Cc: Andreas Dilger <adilger.kernel@dilger.ca> Cc: Jaegeuk Kim <jaegeuk.kim@samsung.com> Cc: Steven Whitehouse <swhiteho@redhat.com> Cc: Dave Kleikamp <shaggy@kernel.org> Cc: Joern Engel <joern@logfs.org> Cc: Prasad Joshi <prasadjoshi.linux@gmail.com> Cc: Trond Myklebust <Trond.Myklebust@netapp.com> Cc: KONISHI Ryusuke <konishi.ryusuke@lab.ntt.co.jp> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Ben Myers <bpm@sgi.com> Cc: xfs@oss.sgi.com Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Len Brown <len.brown@intel.com> Cc: Pavel Machek <pavel@ucw.cz> Cc: "Rafael J. Wysocki" <rjw@sisk.pl> Cc: Herton Ronaldo Krzesinski <herton.krzesinski@canonical.com> Cc: Ben Hutchings <ben@decadent.org.uk> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Guo Chao <yan@linux.vnet.ibm.com> Cc: Tejun Heo <tj@kernel.org> Cc: Asai Thambi S P <asamymuthupa@micron.com> Cc: Selvan Mani <smani@micron.com> Cc: Sam Bradshaw <sbradshaw@micron.com> Cc: Wei Yongjun <yongjun_wei@trendmicro.com.cn> Cc: "Roger Pau Monné" <roger.pau@citrix.com> Cc: Jan Beulich <jbeulich@suse.com> Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com> Cc: Ian Campbell <Ian.Campbell@citrix.com> Cc: Sebastian Ott <sebott@linux.vnet.ibm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Jiang Liu <jiang.liu@huawei.com> Cc: Nitin Gupta <ngupta@vflare.org> Cc: Jerome Marchand <jmarchand@redhat.com> Cc: Joe Perches <joe@perches.com> Cc: Peng Tao <tao.peng@emc.com> Cc: Andy Adamson <andros@netapp.com> Cc: fanchaoting <fanchaoting@cn.fujitsu.com> Cc: Jie Liu <jeff.liu@oracle.com> Cc: Sunil Mushran <sunil.mushran@gmail.com> Cc: "Martin K. Petersen" <martin.petersen@oracle.com> Cc: Namjae Jeon <namjae.jeon@samsung.com> Cc: Pankaj Kumar <pankaj.km@samsung.com> Cc: Dan Magenheimer <dan.magenheimer@oracle.com> Cc: Mel Gorman <mgorman@suse.de>6
2013-10-12 06:44:27 +08:00
BUG_ON(!bio->bi_iter.bi_size);
BUG_ON(bio->bi_vcnt);
bv->bv_offset = base ? offset_in_page(base) : 0;
goto start;
for (; size; bio->bi_vcnt++, bv++) {
bv->bv_offset = 0;
start: bv->bv_len = min_t(size_t, PAGE_SIZE - bv->bv_offset,
size);
if (base) {
bv->bv_page = is_vmalloc_addr(base)
? vmalloc_to_page(base)
: virt_to_page(base);
base += bv->bv_len;
}
size -= bv->bv_len;
}
}
/**
* bch_bio_alloc_pages - allocates a single page for each bvec in a bio
* @bio: bio to allocate pages for
* @gfp_mask: flags for allocation
*
* Allocates pages up to @bio->bi_vcnt.
*
* Returns 0 on success, -ENOMEM on failure. On failure, any allocated pages are
* freed.
*/
int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp_mask)
{
int i;
struct bio_vec *bv;
/*
* This is called on freshly new bio, so it is safe to access the
* bvec table directly.
*/
for (i = 0, bv = bio->bi_io_vec; i < bio->bi_vcnt; bv++, i++) {
bv->bv_page = alloc_page(gfp_mask);
if (!bv->bv_page) {
while (--bv >= bio->bi_io_vec)
__free_page(bv->bv_page);
return -ENOMEM;
}
}
return 0;
}