From ad0bf11070ebb3c95f8ce82e6219dbd79c8e8b69 Mon Sep 17 00:00:00 2001
From: Alberto Bertogli <albertito@blitiri.com.ar>
Date: Mon, 2 Nov 2009 11:39:22 +0100
Subject: [PATCH 1/5] bio_put(): add bio_clone() to the list of functions in
 the comment

In bio_put()'s comment, add bio_clone() to the list of functions that can
give you a bio reference.

Signed-off-by: Alberto Bertogli <albertito@blitiri.com.ar>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/bio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/bio.c b/fs/bio.c
index 402cb84a92a1..bcab9e93d3a7 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -407,7 +407,7 @@ EXPORT_SYMBOL(zero_fill_bio);
  *
  * Description:
  *   Put a reference to a &struct bio, either one you have gotten with
- *   bio_alloc or bio_get. The last put of a bio will free it.
+ *   bio_alloc, bio_get or bio_clone. The last put of a bio will free it.
  **/
 void bio_put(struct bio *bio)
 {

From 5f04eeb8a76521dec371ceb05e8263889a8af2bc Mon Sep 17 00:00:00 2001
From: Alberto Bertogli <albertito@blitiri.com.ar>
Date: Mon, 2 Nov 2009 11:39:42 +0100
Subject: [PATCH 2/5] Fix bio_alloc() and bio_kmalloc() documentation

Commit 451a9ebf accidentally broke bio_alloc() and bio_kmalloc() comments by
(almost) swapping them.

This patch fixes that, by placing the comments in the right place.

Signed-off-by: Alberto Bertogli <albertito@blitiri.com.ar>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/bio.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/fs/bio.c b/fs/bio.c
index bcab9e93d3a7..12da5db8682c 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -325,8 +325,16 @@ static void bio_fs_destructor(struct bio *bio)
  *	@gfp_mask: allocation mask to use
  *	@nr_iovecs: number of iovecs
  *
- *	Allocate a new bio with @nr_iovecs bvecs.  If @gfp_mask
- *	contains __GFP_WAIT, the allocation is guaranteed to succeed.
+ *	bio_alloc will allocate a bio and associated bio_vec array that can hold
+ *	at least @nr_iovecs entries. Allocations will be done from the
+ *	fs_bio_set. Also see @bio_alloc_bioset and @bio_kmalloc.
+ *
+ *	If %__GFP_WAIT is set, then bio_alloc will always be able to allocate
+ *	a bio. This is due to the mempool guarantees. To make this work, callers
+ *	must never allocate more than 1 bio at a time from this pool. Callers
+ *	that need to allocate more than 1 bio must always submit the previously
+ *	allocated bio for IO before attempting to allocate a new one. Failure to
+ *	do so can cause livelocks under memory pressure.
  *
  *	RETURNS:
  *	Pointer to new bio on success, NULL on failure.
@@ -350,21 +358,13 @@ static void bio_kmalloc_destructor(struct bio *bio)
 }
 
 /**
- * bio_alloc - allocate a bio for I/O
+ * bio_kmalloc - allocate a bio for I/O using kmalloc()
  * @gfp_mask:   the GFP_ mask given to the slab allocator
  * @nr_iovecs:	number of iovecs to pre-allocate
  *
  * Description:
- *   bio_alloc will allocate a bio and associated bio_vec array that can hold
- *   at least @nr_iovecs entries. Allocations will be done from the
- *   fs_bio_set. Also see @bio_alloc_bioset.
- *
- *   If %__GFP_WAIT is set, then bio_alloc will always be able to allocate
- *   a bio. This is due to the mempool guarantees. To make this work, callers
- *   must never allocate more than 1 bio at a time from this pool. Callers
- *   that need to allocate more than 1 bio must always submit the previously
- *   allocated bio for IO before attempting to allocate a new one. Failure to
- *   do so can cause livelocks under memory pressure.
+ *   Allocate a new bio with @nr_iovecs bvecs.  If @gfp_mask contains
+ *   %__GFP_WAIT, the allocation is guaranteed to succeed.
  *
  **/
 struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)

From 8c4db3355b0fcc9ad77431f15b955efa0645b5d0 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 3 Nov 2009 20:18:44 +0100
Subject: [PATCH 3/5] backing-dev: bdi sb prune should be in the unregister
 path, not destroy

Commit 592b09a42fc3ae6737a0f3ecf4fee42ecd0296f8 was different from
the tested path, in that it moved the bdi super_block prune from
unregister to destroy context. This doesn't fully fix the sync hang
bug on unexpected device removal, as need to prune the bdi cache
pointer before killing flusher thread.

Tested-by: Artur Skawina <art.08.09@gmail.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 mm/backing-dev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 1065b715ef64..11aee09dd2a6 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -628,6 +628,8 @@ static void bdi_prune_sb(struct backing_dev_info *bdi)
 void bdi_unregister(struct backing_dev_info *bdi)
 {
 	if (bdi->dev) {
+		bdi_prune_sb(bdi);
+
 		if (!bdi_cap_flush_forker(bdi))
 			bdi_wb_shutdown(bdi);
 		bdi_debug_unregister(bdi);
@@ -697,7 +699,6 @@ void bdi_destroy(struct backing_dev_info *bdi)
 		spin_unlock(&inode_lock);
 	}
 
-	bdi_prune_sb(bdi);
 	bdi_unregister(bdi);
 
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++)

From e6ec4fe24572ee265723d895ec4159e5559c8266 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 3 Nov 2009 20:21:35 +0100
Subject: [PATCH 4/5] cfq-iosched: fix bad return value cfq_should_preempt()

Commit a6151c3a5c8e1ff5a28450bc8d6a99a2a0add0a7 inadvertently reversed
a preempt condition check, potentially causing a performance regression.
Make the meta check correct again.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/cfq-iosched.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 069a61017c02..5802e322b7ad 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2051,7 +2051,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
 	 * it's a metadata request and the current queue is doing regular IO.
 	 */
 	if (rq_is_meta(rq) && !cfqq->meta_pending)
-		return false;
+		return true;
 
 	/*
 	 * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.

From 4b27e1bb442e964903f8a3fa6bdf33a602dc0941 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Tue, 3 Nov 2009 20:25:02 +0100
Subject: [PATCH 5/5] cfq-iosched: limit coop preemption

CFQ has an optimization for cooperated applications. if several
io-context have close requests, they will get boost. But the
optimization get abused. Considering thread a, b, which work on one
file. a reads sectors s, s+2, s+4, ...; b reads sectors s+1, s+3, s
+5, ... Both a and b are sequential read, so they can open idle window.
a reads a sector s and goes to idle window and wakeup b. b reads sector
s+1, since in current implementation, cfq_should_preempt() thinks a and
b are cooperators, b will preempt a. b then reads sector s+1 and goes to
idle window and wakeup a. for the same reason, a will preempt b and
reads s+2. a and b will continue the circle. The circle will be very
long, and a and b will occupy whole disk queue. Other applications will
nearly have no chance to run.

Fix this limiting coop preempt until a queue is scheduled normally
again.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/cfq-iosched.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 5802e322b7ad..aa1e9535e358 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -196,6 +196,7 @@ enum cfqq_state_flags {
 	CFQ_CFQQ_FLAG_slice_new,	/* no requests dispatched in slice */
 	CFQ_CFQQ_FLAG_sync,		/* synchronous queue */
 	CFQ_CFQQ_FLAG_coop,		/* has done a coop jump of the queue */
+	CFQ_CFQQ_FLAG_coop_preempt,	/* coop preempt */
 };
 
 #define CFQ_CFQQ_FNS(name)						\
@@ -222,6 +223,7 @@ CFQ_CFQQ_FNS(prio_changed);
 CFQ_CFQQ_FNS(slice_new);
 CFQ_CFQQ_FNS(sync);
 CFQ_CFQQ_FNS(coop);
+CFQ_CFQQ_FNS(coop_preempt);
 #undef CFQ_CFQQ_FNS
 
 #define cfq_log_cfqq(cfqd, cfqq, fmt, args...)	\
@@ -945,10 +947,13 @@ static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd,
 {
 	if (!cfqq) {
 		cfqq = cfq_get_next_queue(cfqd);
-		if (cfqq)
+		if (cfqq && !cfq_cfqq_coop_preempt(cfqq))
 			cfq_clear_cfqq_coop(cfqq);
 	}
 
+	if (cfqq)
+		cfq_clear_cfqq_coop_preempt(cfqq);
+
 	__cfq_set_active_queue(cfqd, cfqq);
 	return cfqq;
 }
@@ -2066,8 +2071,16 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
 	 * if this request is as-good as one we would expect from the
 	 * current cfqq, let it preempt
 	 */
-	if (cfq_rq_close(cfqd, rq))
+	if (cfq_rq_close(cfqd, rq) && (!cfq_cfqq_coop(new_cfqq) ||
+	    cfqd->busy_queues == 1)) {
+		/*
+		 * Mark new queue coop_preempt, so its coop flag will not be
+		 * cleared when new queue gets scheduled at the very first time
+		 */
+		cfq_mark_cfqq_coop_preempt(new_cfqq);
+		cfq_mark_cfqq_coop(new_cfqq);
 		return true;
+	}
 
 	return false;
 }