mirror of https://gitee.com/openkylin/linux.git
btrfs: introduce end_bio_subpage_eb_writepage() function
The new function, end_bio_subpage_eb_writepage(), will handle the metadata writeback endio. The major differences involved are: - How to grab extent buffer Now page::private is a pointer to btrfs_subpage, we can no longer grab extent buffer directly. Thus we need to use the bv_offset to locate the extent buffer manually and iterate through the whole range. - Use btrfs_subpage_end_writeback() caller This helper will handle the subpage writeback for us. Since this function is executed under endio context, when grabbing extent buffers it can't grab eb->refs_lock as that lock is not designed to be grabbed under hardirq context. So here introduce a helper, find_extent_buffer_nolock(), for such situation, and convert find_extent_buffer() to use that helper. Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
fb686c6824
commit
2f3186d8ee
|
@ -4080,13 +4080,98 @@ static void set_btree_ioerr(struct page *page, struct extent_buffer *eb)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The endio specific version which won't touch any unsafe spinlock in endio
|
||||||
|
* context.
|
||||||
|
*/
|
||||||
|
static struct extent_buffer *find_extent_buffer_nolock(
|
||||||
|
struct btrfs_fs_info *fs_info, u64 start)
|
||||||
|
{
|
||||||
|
struct extent_buffer *eb;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
eb = radix_tree_lookup(&fs_info->buffer_radix,
|
||||||
|
start >> fs_info->sectorsize_bits);
|
||||||
|
if (eb && atomic_inc_not_zero(&eb->refs)) {
|
||||||
|
rcu_read_unlock();
|
||||||
|
return eb;
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The endio function for subpage extent buffer write.
|
||||||
|
*
|
||||||
|
* Unlike end_bio_extent_buffer_writepage(), we only call end_page_writeback()
|
||||||
|
* after all extent buffers in the page has finished their writeback.
|
||||||
|
*/
|
||||||
|
static void end_bio_subpage_eb_writepage(struct btrfs_fs_info *fs_info,
|
||||||
|
struct bio *bio)
|
||||||
|
{
|
||||||
|
struct bio_vec *bvec;
|
||||||
|
struct bvec_iter_all iter_all;
|
||||||
|
|
||||||
|
ASSERT(!bio_flagged(bio, BIO_CLONED));
|
||||||
|
bio_for_each_segment_all(bvec, bio, iter_all) {
|
||||||
|
struct page *page = bvec->bv_page;
|
||||||
|
u64 bvec_start = page_offset(page) + bvec->bv_offset;
|
||||||
|
u64 bvec_end = bvec_start + bvec->bv_len - 1;
|
||||||
|
u64 cur_bytenr = bvec_start;
|
||||||
|
|
||||||
|
ASSERT(IS_ALIGNED(bvec->bv_len, fs_info->nodesize));
|
||||||
|
|
||||||
|
/* Iterate through all extent buffers in the range */
|
||||||
|
while (cur_bytenr <= bvec_end) {
|
||||||
|
struct extent_buffer *eb;
|
||||||
|
int done;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Here we can't use find_extent_buffer(), as it may
|
||||||
|
* try to lock eb->refs_lock, which is not safe in endio
|
||||||
|
* context.
|
||||||
|
*/
|
||||||
|
eb = find_extent_buffer_nolock(fs_info, cur_bytenr);
|
||||||
|
ASSERT(eb);
|
||||||
|
|
||||||
|
cur_bytenr = eb->start + eb->len;
|
||||||
|
|
||||||
|
ASSERT(test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags));
|
||||||
|
done = atomic_dec_and_test(&eb->io_pages);
|
||||||
|
ASSERT(done);
|
||||||
|
|
||||||
|
if (bio->bi_status ||
|
||||||
|
test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
|
||||||
|
ClearPageUptodate(page);
|
||||||
|
set_btree_ioerr(page, eb);
|
||||||
|
}
|
||||||
|
|
||||||
|
btrfs_subpage_clear_writeback(fs_info, page, eb->start,
|
||||||
|
eb->len);
|
||||||
|
end_extent_buffer_writeback(eb);
|
||||||
|
/*
|
||||||
|
* free_extent_buffer() will grab spinlock which is not
|
||||||
|
* safe in endio context. Thus here we manually dec
|
||||||
|
* the ref.
|
||||||
|
*/
|
||||||
|
atomic_dec(&eb->refs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bio_put(bio);
|
||||||
|
}
|
||||||
|
|
||||||
static void end_bio_extent_buffer_writepage(struct bio *bio)
|
static void end_bio_extent_buffer_writepage(struct bio *bio)
|
||||||
{
|
{
|
||||||
|
struct btrfs_fs_info *fs_info;
|
||||||
struct bio_vec *bvec;
|
struct bio_vec *bvec;
|
||||||
struct extent_buffer *eb;
|
struct extent_buffer *eb;
|
||||||
int done;
|
int done;
|
||||||
struct bvec_iter_all iter_all;
|
struct bvec_iter_all iter_all;
|
||||||
|
|
||||||
|
fs_info = btrfs_sb(bio_first_page_all(bio)->mapping->host->i_sb);
|
||||||
|
if (fs_info->sectorsize < PAGE_SIZE)
|
||||||
|
return end_bio_subpage_eb_writepage(fs_info, bio);
|
||||||
|
|
||||||
ASSERT(!bio_flagged(bio, BIO_CLONED));
|
ASSERT(!bio_flagged(bio, BIO_CLONED));
|
||||||
bio_for_each_segment_all(bvec, bio, iter_all) {
|
bio_for_each_segment_all(bvec, bio, iter_all) {
|
||||||
struct page *page = bvec->bv_page;
|
struct page *page = bvec->bv_page;
|
||||||
|
@ -5465,36 +5550,28 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
|
||||||
{
|
{
|
||||||
struct extent_buffer *eb;
|
struct extent_buffer *eb;
|
||||||
|
|
||||||
rcu_read_lock();
|
eb = find_extent_buffer_nolock(fs_info, start);
|
||||||
eb = radix_tree_lookup(&fs_info->buffer_radix,
|
if (!eb)
|
||||||
start >> fs_info->sectorsize_bits);
|
return NULL;
|
||||||
if (eb && atomic_inc_not_zero(&eb->refs)) {
|
/*
|
||||||
rcu_read_unlock();
|
* Lock our eb's refs_lock to avoid races with free_extent_buffer().
|
||||||
/*
|
* When we get our eb it might be flagged with EXTENT_BUFFER_STALE and
|
||||||
* Lock our eb's refs_lock to avoid races with
|
* another task running free_extent_buffer() might have seen that flag
|
||||||
* free_extent_buffer. When we get our eb it might be flagged
|
* set, eb->refs == 2, that the buffer isn't under IO (dirty and
|
||||||
* with EXTENT_BUFFER_STALE and another task running
|
* writeback flags not set) and it's still in the tree (flag
|
||||||
* free_extent_buffer might have seen that flag set,
|
* EXTENT_BUFFER_TREE_REF set), therefore being in the process of
|
||||||
* eb->refs == 2, that the buffer isn't under IO (dirty and
|
* decrementing the extent buffer's reference count twice. So here we
|
||||||
* writeback flags not set) and it's still in the tree (flag
|
* could race and increment the eb's reference count, clear its stale
|
||||||
* EXTENT_BUFFER_TREE_REF set), therefore being in the process
|
* flag, mark it as dirty and drop our reference before the other task
|
||||||
* of decrementing the extent buffer's reference count twice.
|
* finishes executing free_extent_buffer, which would later result in
|
||||||
* So here we could race and increment the eb's reference count,
|
* an attempt to free an extent buffer that is dirty.
|
||||||
* clear its stale flag, mark it as dirty and drop our reference
|
*/
|
||||||
* before the other task finishes executing free_extent_buffer,
|
if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
|
||||||
* which would later result in an attempt to free an extent
|
spin_lock(&eb->refs_lock);
|
||||||
* buffer that is dirty.
|
spin_unlock(&eb->refs_lock);
|
||||||
*/
|
|
||||||
if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
|
|
||||||
spin_lock(&eb->refs_lock);
|
|
||||||
spin_unlock(&eb->refs_lock);
|
|
||||||
}
|
|
||||||
mark_extent_buffer_accessed(eb, NULL);
|
|
||||||
return eb;
|
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
mark_extent_buffer_accessed(eb, NULL);
|
||||||
|
return eb;
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||||
|
|
Loading…
Reference in New Issue