From bd9dcd046509cd5355605e43791eacee8bf5e40f Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linux.alibaba.com>
Date: Tue, 6 Feb 2018 07:18:25 +0800
Subject: [PATCH 1/6] debugobjects: Export max loops counter

__debug_check_no_obj_freed() can be an expensive operation depending on the
size of memory freed. It already exports the maximum chain walk length via
debugfs, but this only records the maximum of a single memory chunk.

Though there is no information about the total number of objects inspected
for a __debug_check_no_obj_freed() operation, which might be significantly
larger when a huge memory region is freed.

Aggregate the number of objects inspected for a single invocation of
__debug_check_no_obj_freed() and export it via sysfs.

The resulting output of /sys/kernel/debug/debug_objects/stats looks like:

max_chain     :121
max_checked   :543267
warnings      :0
fixups        :0
pool_free     :1764
pool_min_free :341
pool_used     :86438
pool_max_used :268887
objs_allocated:6068254
objs_freed    :5981076

[ tglx: Renamed the variable to max_checked and adjusted changelog ]

Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: longman@redhat.com
Link: https://lkml.kernel.org/r/1517872708-24207-2-git-send-email-yang.shi@linux.alibaba.com
---
 lib/debugobjects.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 2f5349c6e81a..f6d57a11c927 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -50,6 +50,7 @@ static int			obj_pool_max_used;
 static struct kmem_cache	*obj_cache;
 
 static int			debug_objects_maxchain __read_mostly;
+static int			debug_objects_maxchecked __read_mostly;
 static int			debug_objects_fixups __read_mostly;
 static int			debug_objects_warnings __read_mostly;
 static int			debug_objects_enabled __read_mostly
@@ -720,7 +721,7 @@ static void __debug_check_no_obj_freed(const void *address, unsigned long size)
 	enum debug_obj_state state;
 	struct debug_bucket *db;
 	struct debug_obj *obj;
-	int cnt;
+	int cnt, objs_checked = 0;
 
 	saddr = (unsigned long) address;
 	eaddr = saddr + size;
@@ -765,7 +766,12 @@ static void __debug_check_no_obj_freed(const void *address, unsigned long size)
 
 		if (cnt > debug_objects_maxchain)
 			debug_objects_maxchain = cnt;
+
+		objs_checked += cnt;
 	}
+
+	if (objs_checked > debug_objects_maxchecked)
+		debug_objects_maxchecked = objs_checked;
 }
 
 void debug_check_no_obj_freed(const void *address, unsigned long size)
@@ -780,6 +786,7 @@ void debug_check_no_obj_freed(const void *address, unsigned long size)
 static int debug_stats_show(struct seq_file *m, void *v)
 {
 	seq_printf(m, "max_chain     :%d\n", debug_objects_maxchain);
+	seq_printf(m, "max_checked   :%d\n", debug_objects_maxchecked);
 	seq_printf(m, "warnings      :%d\n", debug_objects_warnings);
 	seq_printf(m, "fixups        :%d\n", debug_objects_fixups);
 	seq_printf(m, "pool_free     :%d\n", obj_pool_free);

From 36c4ead6f6dfbbe777d3d7e9cc8702530b71a94f Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linux.alibaba.com>
Date: Tue, 6 Feb 2018 07:18:26 +0800
Subject: [PATCH 2/6] debugobjects: Add global free list and the counter

free_object() adds objects to the pool list and schedules work when the
pool list is larger than the pool size.  The worker handles the actual
kfree() of the object by iterating the pool list until the pool size is
below the maximum pool size again.

To iterate the pool list, pool_lock has to be held and the objects which
should be freed() need to be put into temporary storage so pool_lock can be
dropped for the actual kmem_cache_free() invocation. That's a pointless and
expensive exercise if there is a large number of objects to free.

In such a case its better to evaulate the fill level of the pool in
free_objects() and queue the object to free either in the pool list or if
it's full on a separate global free list.

The worker can then do the following simpler operation:

  - Move objects back from the global free list to the pool list if the
    pool list is not longer full.

  - Remove the remaining objects in a single list move operation from the
    global free list and do the kmem_cache_free() operation lockless from
    the temporary list head.

In fill_pool() the global free list is checked as well to avoid real
allocations from the kmem cache.

Add the necessary list head and a counter for the number of objects on the
global free list and export that counter via sysfs:

max_chain     :79
max_loops     :8147
warnings      :0
fixups        :0
pool_free     :1697
pool_min_free :346
pool_used     :15356
pool_max_used :23933
on_free_list  :39
objs_allocated:32617
objs_freed    :16588

Nothing queues objects on the global free list yet. This happens in a
follow up change.

[ tglx: Simplified implementation and massaged changelog ]

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: longman@redhat.com
Link: https://lkml.kernel.org/r/1517872708-24207-3-git-send-email-yang.shi@linux.alibaba.com
---
 lib/debugobjects.c | 58 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index f6d57a11c927..e31273b45da5 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -42,11 +42,14 @@ static struct debug_obj		obj_static_pool[ODEBUG_POOL_SIZE] __initdata;
 static DEFINE_RAW_SPINLOCK(pool_lock);
 
 static HLIST_HEAD(obj_pool);
+static HLIST_HEAD(obj_to_free);
 
 static int			obj_pool_min_free = ODEBUG_POOL_SIZE;
 static int			obj_pool_free = ODEBUG_POOL_SIZE;
 static int			obj_pool_used;
 static int			obj_pool_max_used;
+/* The number of objs on the global free list */
+static int			obj_nr_tofree;
 static struct kmem_cache	*obj_cache;
 
 static int			debug_objects_maxchain __read_mostly;
@@ -97,12 +100,32 @@ static const char *obj_states[ODEBUG_STATE_MAX] = {
 static void fill_pool(void)
 {
 	gfp_t gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
-	struct debug_obj *new;
+	struct debug_obj *new, *obj;
 	unsigned long flags;
 
 	if (likely(obj_pool_free >= debug_objects_pool_min_level))
 		return;
 
+	/*
+	 * Reuse objs from the global free list; they will be reinitialized
+	 * when allocating.
+	 */
+	while (obj_nr_tofree && (obj_pool_free < obj_pool_min_free)) {
+		raw_spin_lock_irqsave(&pool_lock, flags);
+		/*
+		 * Recheck with the lock held as the worker thread might have
+		 * won the race and freed the global free list already.
+		 */
+		if (obj_nr_tofree) {
+			obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
+			hlist_del(&obj->node);
+			obj_nr_tofree--;
+			hlist_add_head(&obj->node, &obj_pool);
+			obj_pool_free++;
+		}
+		raw_spin_unlock_irqrestore(&pool_lock, flags);
+	}
+
 	if (unlikely(!obj_cache))
 		return;
 
@@ -186,11 +209,38 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
 static void free_obj_work(struct work_struct *work)
 {
 	struct debug_obj *objs[ODEBUG_FREE_BATCH];
+	struct hlist_node *tmp;
+	struct debug_obj *obj;
 	unsigned long flags;
 	int i;
+	HLIST_HEAD(tofree);
 
 	if (!raw_spin_trylock_irqsave(&pool_lock, flags))
 		return;
+
+	/*
+	 * The objs on the pool list might be allocated before the work is
+	 * run, so recheck if pool list it full or not, if not fill pool
+	 * list from the global free list
+	 */
+	while (obj_nr_tofree && obj_pool_free < debug_objects_pool_size) {
+		obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
+		hlist_del(&obj->node);
+		hlist_add_head(&obj->node, &obj_pool);
+		obj_pool_free++;
+		obj_nr_tofree--;
+	}
+
+	/*
+	 * Pool list is already full and there are still objs on the free
+	 * list. Move remaining free objs to a temporary list to free the
+	 * memory outside the pool_lock held region.
+	 */
+	if (obj_nr_tofree) {
+		hlist_move_list(&obj_to_free, &tofree);
+		obj_nr_tofree = 0;
+	}
+
 	while (obj_pool_free >= debug_objects_pool_size + ODEBUG_FREE_BATCH) {
 		for (i = 0; i < ODEBUG_FREE_BATCH; i++) {
 			objs[i] = hlist_entry(obj_pool.first,
@@ -211,6 +261,11 @@ static void free_obj_work(struct work_struct *work)
 			return;
 	}
 	raw_spin_unlock_irqrestore(&pool_lock, flags);
+
+	hlist_for_each_entry_safe(obj, tmp, &tofree, node) {
+		hlist_del(&obj->node);
+		kmem_cache_free(obj_cache, obj);
+	}
 }
 
 /*
@@ -793,6 +848,7 @@ static int debug_stats_show(struct seq_file *m, void *v)
 	seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free);
 	seq_printf(m, "pool_used     :%d\n", obj_pool_used);
 	seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used);
+	seq_printf(m, "on_free_list  :%d\n", obj_nr_tofree);
 	seq_printf(m, "objs_allocated:%d\n", debug_objects_allocated);
 	seq_printf(m, "objs_freed    :%d\n", debug_objects_freed);
 	return 0;

From 636e1970fd7deaa0d0ee0dfb6ac65fbd690b32d2 Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linux.alibaba.com>
Date: Tue, 6 Feb 2018 07:18:27 +0800
Subject: [PATCH 3/6] debugobjects: Use global free list in free_object()

The newly added global free list allows to avoid lengthy pool_list
iterations in free_obj_work() by putting objects either into the pool list
when the fill level of the pool is below the maximum or by putting them on
the global free list immediately.

As the pool is now guaranteed to never exceed the maximum fill level this
allows to remove the batch removal from pool list in free_obj_work().

Split free_object() into two parts, so the actual queueing function can be
reused without invoking schedule_work() on every invocation.

[ tglx: Remove the batch removal from pool list and massage changelog ]

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: longman@redhat.com
Link: https://lkml.kernel.org/r/1517872708-24207-4-git-send-email-yang.shi@linux.alibaba.com
---
 lib/debugobjects.c | 63 ++++++++++++++++------------------------------
 1 file changed, 22 insertions(+), 41 deletions(-)

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index e31273b45da5..3e79c100271f 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -201,18 +201,13 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
  * workqueue function to free objects.
  *
  * To reduce contention on the global pool_lock, the actual freeing of
- * debug objects will be delayed if the pool_lock is busy. We also free
- * the objects in a batch of 4 for each lock/unlock cycle.
+ * debug objects will be delayed if the pool_lock is busy.
  */
-#define ODEBUG_FREE_BATCH	4
-
 static void free_obj_work(struct work_struct *work)
 {
-	struct debug_obj *objs[ODEBUG_FREE_BATCH];
 	struct hlist_node *tmp;
 	struct debug_obj *obj;
 	unsigned long flags;
-	int i;
 	HLIST_HEAD(tofree);
 
 	if (!raw_spin_trylock_irqsave(&pool_lock, flags))
@@ -240,26 +235,6 @@ static void free_obj_work(struct work_struct *work)
 		hlist_move_list(&obj_to_free, &tofree);
 		obj_nr_tofree = 0;
 	}
-
-	while (obj_pool_free >= debug_objects_pool_size + ODEBUG_FREE_BATCH) {
-		for (i = 0; i < ODEBUG_FREE_BATCH; i++) {
-			objs[i] = hlist_entry(obj_pool.first,
-					      typeof(*objs[0]), node);
-			hlist_del(&objs[i]->node);
-		}
-
-		obj_pool_free -= ODEBUG_FREE_BATCH;
-		debug_objects_freed += ODEBUG_FREE_BATCH;
-		/*
-		 * We release pool_lock across kmem_cache_free() to
-		 * avoid contention on pool_lock.
-		 */
-		raw_spin_unlock_irqrestore(&pool_lock, flags);
-		for (i = 0; i < ODEBUG_FREE_BATCH; i++)
-			kmem_cache_free(obj_cache, objs[i]);
-		if (!raw_spin_trylock_irqsave(&pool_lock, flags))
-			return;
-	}
 	raw_spin_unlock_irqrestore(&pool_lock, flags);
 
 	hlist_for_each_entry_safe(obj, tmp, &tofree, node) {
@@ -268,27 +243,33 @@ static void free_obj_work(struct work_struct *work)
 	}
 }
 
+static bool __free_object(struct debug_obj *obj)
+{
+	unsigned long flags;
+	bool work;
+
+	raw_spin_lock_irqsave(&pool_lock, flags);
+	work = (obj_pool_free > debug_objects_pool_size) && obj_cache;
+	obj_pool_used--;
+
+	if (work) {
+		obj_nr_tofree++;
+		hlist_add_head(&obj->node, &obj_to_free);
+	} else {
+		obj_pool_free++;
+		hlist_add_head(&obj->node, &obj_pool);
+	}
+	raw_spin_unlock_irqrestore(&pool_lock, flags);
+	return work;
+}
+
 /*
  * Put the object back into the pool and schedule work to free objects
  * if necessary.
  */
 static void free_object(struct debug_obj *obj)
 {
-	unsigned long flags;
-	int sched = 0;
-
-	raw_spin_lock_irqsave(&pool_lock, flags);
-	/*
-	 * schedule work when the pool is filled and the cache is
-	 * initialized:
-	 */
-	if (obj_pool_free > debug_objects_pool_size && obj_cache)
-		sched = 1;
-	hlist_add_head(&obj->node, &obj_pool);
-	obj_pool_free++;
-	obj_pool_used--;
-	raw_spin_unlock_irqrestore(&pool_lock, flags);
-	if (sched)
+	if (__free_object(obj))
 		schedule_work(&debug_obj_work);
 }
 

From 1ea9b98b007a662e402551a41a4413becad40a65 Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linux.alibaba.com>
Date: Tue, 6 Feb 2018 07:18:28 +0800
Subject: [PATCH 4/6] debugobjects: Use global free list in
 __debug_check_no_obj_freed()

__debug_check_no_obj_freed() iterates over the to be freed memory region in
chunks and iterates over the corresponding hash bucket list for each
chunk. This can accumulate to hundred thousands of checked objects. In the
worst case this can trigger the soft lockup detector:

NMI watchdog: BUG: soft lockup - CPU#15 stuck for 22s!
CPU: 15 PID: 110342 Comm: stress-ng-getde
Call Trace:
  [<ffffffff8141177e>] debug_check_no_obj_freed+0x13e/0x220
  [<ffffffff811f8751>] __free_pages_ok+0x1f1/0x5c0
  [<ffffffff811fa785>] __free_pages+0x25/0x40
  [<ffffffff812638db>] __free_slab+0x19b/0x270
  [<ffffffff812639e9>] discard_slab+0x39/0x50
  [<ffffffff812679f7>] __slab_free+0x207/0x270
  [<ffffffff81269966>] ___cache_free+0xa6/0xb0
  [<ffffffff8126c267>] qlist_free_all+0x47/0x80
  [<ffffffff8126c5a9>] quarantine_reduce+0x159/0x190
  [<ffffffff8126b3bf>] kasan_kmalloc+0xaf/0xc0
  [<ffffffff8126b8a2>] kasan_slab_alloc+0x12/0x20
  [<ffffffff81265e8a>] kmem_cache_alloc+0xfa/0x360
  [<ffffffff812abc8f>] ? getname_flags+0x4f/0x1f0
  [<ffffffff812abc8f>] getname_flags+0x4f/0x1f0
  [<ffffffff812abe42>] getname+0x12/0x20
  [<ffffffff81298da9>] do_sys_open+0xf9/0x210
  [<ffffffff81298ede>] SyS_open+0x1e/0x20
  [<ffffffff817d6e01>] entry_SYSCALL_64_fastpath+0x1f/0xc2

The code path might be called in either atomic or non-atomic context, but
in_atomic() can't tell if the current context is atomic or not on a
PREEMPT=n kernel, so cond_resched() can't be used to prevent the
softlockup.

Utilize the global free list to shorten the loop execution time.

[ tglx: Massaged changelog ]

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: longman@redhat.com
Link: https://lkml.kernel.org/r/1517872708-24207-5-git-send-email-yang.shi@linux.alibaba.com
---
 lib/debugobjects.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 3e79c100271f..faab2c4ea024 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -751,13 +751,13 @@ EXPORT_SYMBOL_GPL(debug_object_active_state);
 static void __debug_check_no_obj_freed(const void *address, unsigned long size)
 {
 	unsigned long flags, oaddr, saddr, eaddr, paddr, chunks;
-	struct hlist_node *tmp;
-	HLIST_HEAD(freelist);
 	struct debug_obj_descr *descr;
 	enum debug_obj_state state;
 	struct debug_bucket *db;
+	struct hlist_node *tmp;
 	struct debug_obj *obj;
 	int cnt, objs_checked = 0;
+	bool work = false;
 
 	saddr = (unsigned long) address;
 	eaddr = saddr + size;
@@ -788,18 +788,12 @@ static void __debug_check_no_obj_freed(const void *address, unsigned long size)
 				goto repeat;
 			default:
 				hlist_del(&obj->node);
-				hlist_add_head(&obj->node, &freelist);
+				work |= __free_object(obj);
 				break;
 			}
 		}
 		raw_spin_unlock_irqrestore(&db->lock, flags);
 
-		/* Now free them */
-		hlist_for_each_entry_safe(obj, tmp, &freelist, node) {
-			hlist_del(&obj->node);
-			free_object(obj);
-		}
-
 		if (cnt > debug_objects_maxchain)
 			debug_objects_maxchain = cnt;
 
@@ -808,6 +802,10 @@ static void __debug_check_no_obj_freed(const void *address, unsigned long size)
 
 	if (objs_checked > debug_objects_maxchecked)
 		debug_objects_maxchecked = objs_checked;
+
+	/* Schedule work to actually kmem_cache_free() objects */
+	if (work)
+		schedule_work(&debug_obj_work);
 }
 
 void debug_check_no_obj_freed(const void *address, unsigned long size)

From 04148187aa9df3626168f7429d2287997787e387 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 22 Feb 2018 16:52:58 +0100
Subject: [PATCH 5/6] debugobjects: Fix debug_objects_freed accounting

The removal of the batched object freeing has caused the debug_objects_freed
to become read-only, and the reading is inside an ifdef, so gcc warns that it
is completely unused without CONFIG_DEBUG_FS:

lib/debugobjects.c:71:14: error: 'debug_objects_freed' defined but not used [-Werror=unused-variable]

Assuming we are still interested in this number, this adds back code to
keep track of the freed objects.

Fixes: 636e1970fd7d ("debugobjects: Use global free list in free_object()")
Suggested-by: Waiman Long <longman@redhat.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Yang Shi <yang.shi@linux.alibaba.com>
Acked-by: Waiman Long <longman@redhat.com>
Link: https://lkml.kernel.org/r/20180222155335.1647466-1-arnd@arndb.de
---
 lib/debugobjects.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index faab2c4ea024..105ecfc47d8c 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -233,6 +233,7 @@ static void free_obj_work(struct work_struct *work)
 	 */
 	if (obj_nr_tofree) {
 		hlist_move_list(&obj_to_free, &tofree);
+		debug_objects_freed += obj_nr_tofree;
 		obj_nr_tofree = 0;
 	}
 	raw_spin_unlock_irqrestore(&pool_lock, flags);

From 163cf842f5837334bc69aaf09ad38e11f4573914 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 13 Mar 2018 14:18:46 +0100
Subject: [PATCH 6/6] debugobjects: Avoid another unused variable warning

debug_objects_maxchecked is only updated in __debug_check_no_obj_freed(),
and only read in debug_objects_maxchecked, unfortunately both of these are
optional and depend on different Kconfig symbols.

When both CONFIG_DEBUG_OBJECTS_FREE and CONFIG_DEBUG_FS are disabled this
warning is emitted:

  lib/debugobjects.c:56:14: error: 'debug_objects_maxchecked' defined but not used [-Werror=unused-variable]

Rather than trying to add more complex #ifdef protections, mark the
variable as __maybe_unused so it can be silently dropped when usused.

Fixes: bd9dcd046509 ("debugobjects: Export max loops counter")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Yang Shi <yang.shi@linux.alibaba.com>
Cc: Waiman Long <longman@redhat.com>
Link: https://lkml.kernel.org/r/20180313131857.158876-1-arnd@arndb.de
---
 lib/debugobjects.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 105ecfc47d8c..994be4805cec 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -53,7 +53,7 @@ static int			obj_nr_tofree;
 static struct kmem_cache	*obj_cache;
 
 static int			debug_objects_maxchain __read_mostly;
-static int			debug_objects_maxchecked __read_mostly;
+static int __maybe_unused	debug_objects_maxchecked __read_mostly;
 static int			debug_objects_fixups __read_mostly;
 static int			debug_objects_warnings __read_mostly;
 static int			debug_objects_enabled __read_mostly