btrfs: introduce end_bio_subpage_eb_writepage() function

The new function, end_bio_subpage_eb_writepage(), will handle the metadata writeback endio. The major differences involved are: - How to grab extent buffer Now page::private is a pointer to btrfs_subpage, we can no longer grab extent buffer directly. Thus we need to use the bv_offset to locate the extent buffer manually and iterate through the whole range. - Use btrfs_subpage_end_writeback() caller This helper will handle the subpage writeback for us. Since this function is executed under endio context, when grabbing extent buffers it can't grab eb->refs_lock as that lock is not designed to be grabbed under hardirq context. So here introduce a helper, find_extent_buffer_nolock(), for such situation, and convert find_extent_buffer() to use that helper. Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
2021-04-06 08:36:00 +08:00 · 2021-04-06 08:36:00 +08:00 · 2f3186d8ee
parent fb686c6824
commit 2f3186d8ee
1 changed files with 106 additions and 29 deletions
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@ -4080,13 +4080,98 @@ static void set_btree_ioerr(struct page *page, struct extent_buffer *eb)
 	}
 }
 /*
 * The endio specific version which won't touch any unsafe spinlock in endio
 * context.
 */
 static struct extent_buffer *find_extent_buffer_nolock(
 		struct btrfs_fs_info *fs_info, u64 start)
 {
 	struct extent_buffer *eb;
 	rcu_read_lock();
 	eb = radix_tree_lookup(&fs_info->buffer_radix,
 			       start >> fs_info->sectorsize_bits);
 	if (eb && atomic_inc_not_zero(&eb->refs)) {
 		rcu_read_unlock();
 		return eb;
 	}
 	rcu_read_unlock();
 	return NULL;
 }
 /*
 * The endio function for subpage extent buffer write.
 *
 * Unlike end_bio_extent_buffer_writepage(), we only call end_page_writeback()
 * after all extent buffers in the page has finished their writeback.
 */
 static void end_bio_subpage_eb_writepage(struct btrfs_fs_info *fs_info,
 					 struct bio *bio)
 {
 	struct bio_vec *bvec;
 	struct bvec_iter_all iter_all;
 	ASSERT(!bio_flagged(bio, BIO_CLONED));
 	bio_for_each_segment_all(bvec, bio, iter_all) {
 		struct page *page = bvec->bv_page;
 		u64 bvec_start = page_offset(page) + bvec->bv_offset;
 		u64 bvec_end = bvec_start + bvec->bv_len - 1;
 		u64 cur_bytenr = bvec_start;
 		ASSERT(IS_ALIGNED(bvec->bv_len, fs_info->nodesize));
 		/* Iterate through all extent buffers in the range */
 		while (cur_bytenr <= bvec_end) {
 			struct extent_buffer *eb;
 			int done;
 			/*
 			 * Here we can't use find_extent_buffer(), as it may
 			 * try to lock eb->refs_lock, which is not safe in endio
 			 * context.
 			 */
 			eb = find_extent_buffer_nolock(fs_info, cur_bytenr);
 			ASSERT(eb);
 			cur_bytenr = eb->start + eb->len;
 			ASSERT(test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags));
 			done = atomic_dec_and_test(&eb->io_pages);
 			ASSERT(done);
 			if (bio->bi_status ||
 			    test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
 				ClearPageUptodate(page);
 				set_btree_ioerr(page, eb);
 			}
 			btrfs_subpage_clear_writeback(fs_info, page, eb->start,
 						      eb->len);
 			end_extent_buffer_writeback(eb);
 			/*
 			 * free_extent_buffer() will grab spinlock which is not
 			 * safe in endio context. Thus here we manually dec
 			 * the ref.
 			 */
 			atomic_dec(&eb->refs);
 		}
 	}
 	bio_put(bio);
 }
 static void end_bio_extent_buffer_writepage(struct bio *bio)
 {
 	struct btrfs_fs_info *fs_info;
 	struct bio_vec *bvec;
 	struct extent_buffer *eb;
 	int done;
 	struct bvec_iter_all iter_all;
 	fs_info = btrfs_sb(bio_first_page_all(bio)->mapping->host->i_sb);
 	if (fs_info->sectorsize < PAGE_SIZE)
 		return end_bio_subpage_eb_writepage(fs_info, bio);
 	ASSERT(!bio_flagged(bio, BIO_CLONED));
 	bio_for_each_segment_all(bvec, bio, iter_all) {
 		struct page *page = bvec->bv_page;
@ -5465,25 +5550,21 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
 {
 	struct extent_buffer *eb;
-	rcu_read_lock();
+	eb = find_extent_buffer_nolock(fs_info, start);
-	eb = radix_tree_lookup(&fs_info->buffer_radix,
+	if (!eb)
-			       start >> fs_info->sectorsize_bits);
+		return NULL;
 	if (eb && atomic_inc_not_zero(&eb->refs)) {
 		rcu_read_unlock();
 	/*
-		 * Lock our eb's refs_lock to avoid races with
+	 * Lock our eb's refs_lock to avoid races with free_extent_buffer().
-		 * free_extent_buffer. When we get our eb it might be flagged
+	 * When we get our eb it might be flagged with EXTENT_BUFFER_STALE and
-		 * with EXTENT_BUFFER_STALE and another task running
+	 * another task running free_extent_buffer() might have seen that flag
-		 * free_extent_buffer might have seen that flag set,
+	 * set, eb->refs == 2, that the buffer isn't under IO (dirty and
 		 * eb->refs == 2, that the buffer isn't under IO (dirty and
 	 * writeback flags not set) and it's still in the tree (flag
-		 * EXTENT_BUFFER_TREE_REF set), therefore being in the process
+	 * EXTENT_BUFFER_TREE_REF set), therefore being in the process of
-		 * of decrementing the extent buffer's reference count twice.
+	 * decrementing the extent buffer's reference count twice.  So here we
-		 * So here we could race and increment the eb's reference count,
+	 * could race and increment the eb's reference count, clear its stale
-		 * clear its stale flag, mark it as dirty and drop our reference
+	 * flag, mark it as dirty and drop our reference before the other task
-		 * before the other task finishes executing free_extent_buffer,
+	 * finishes executing free_extent_buffer, which would later result in
-		 * which would later result in an attempt to free an extent
+	 * an attempt to free an extent buffer that is dirty.
 		 * buffer that is dirty.
 	 */
 	if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
 		spin_lock(&eb->refs_lock);
@ -5491,10 +5572,6 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
 	}
 	mark_extent_buffer_accessed(eb, NULL);
 	return eb;
 	}
 	rcu_read_unlock();
 	return NULL;
 }
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS