From bd9dcd046509cd5355605e43791eacee8bf5e40f Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 6 Feb 2018 07:18:25 +0800 Subject: [PATCH 1/6] debugobjects: Export max loops counter __debug_check_no_obj_freed() can be an expensive operation depending on the size of memory freed. It already exports the maximum chain walk length via debugfs, but this only records the maximum of a single memory chunk. Though there is no information about the total number of objects inspected for a __debug_check_no_obj_freed() operation, which might be significantly larger when a huge memory region is freed. Aggregate the number of objects inspected for a single invocation of __debug_check_no_obj_freed() and export it via sysfs. The resulting output of /sys/kernel/debug/debug_objects/stats looks like: max_chain :121 max_checked :543267 warnings :0 fixups :0 pool_free :1764 pool_min_free :341 pool_used :86438 pool_max_used :268887 objs_allocated:6068254 objs_freed :5981076 [ tglx: Renamed the variable to max_checked and adjusted changelog ] Signed-off-by: Yang Shi Signed-off-by: Thomas Gleixner Cc: longman@redhat.com Link: https://lkml.kernel.org/r/1517872708-24207-2-git-send-email-yang.shi@linux.alibaba.com --- lib/debugobjects.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 2f5349c6e81a..f6d57a11c927 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -50,6 +50,7 @@ static int obj_pool_max_used; static struct kmem_cache *obj_cache; static int debug_objects_maxchain __read_mostly; +static int debug_objects_maxchecked __read_mostly; static int debug_objects_fixups __read_mostly; static int debug_objects_warnings __read_mostly; static int debug_objects_enabled __read_mostly @@ -720,7 +721,7 @@ static void __debug_check_no_obj_freed(const void *address, unsigned long size) enum debug_obj_state state; struct debug_bucket *db; struct debug_obj *obj; - int cnt; + int cnt, objs_checked = 0; saddr = (unsigned long) address; eaddr = saddr + size; @@ -765,7 +766,12 @@ static void __debug_check_no_obj_freed(const void *address, unsigned long size) if (cnt > debug_objects_maxchain) debug_objects_maxchain = cnt; + + objs_checked += cnt; } + + if (objs_checked > debug_objects_maxchecked) + debug_objects_maxchecked = objs_checked; } void debug_check_no_obj_freed(const void *address, unsigned long size) @@ -780,6 +786,7 @@ void debug_check_no_obj_freed(const void *address, unsigned long size) static int debug_stats_show(struct seq_file *m, void *v) { seq_printf(m, "max_chain :%d\n", debug_objects_maxchain); + seq_printf(m, "max_checked :%d\n", debug_objects_maxchecked); seq_printf(m, "warnings :%d\n", debug_objects_warnings); seq_printf(m, "fixups :%d\n", debug_objects_fixups); seq_printf(m, "pool_free :%d\n", obj_pool_free); From 36c4ead6f6dfbbe777d3d7e9cc8702530b71a94f Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 6 Feb 2018 07:18:26 +0800 Subject: [PATCH 2/6] debugobjects: Add global free list and the counter free_object() adds objects to the pool list and schedules work when the pool list is larger than the pool size. The worker handles the actual kfree() of the object by iterating the pool list until the pool size is below the maximum pool size again. To iterate the pool list, pool_lock has to be held and the objects which should be freed() need to be put into temporary storage so pool_lock can be dropped for the actual kmem_cache_free() invocation. That's a pointless and expensive exercise if there is a large number of objects to free. In such a case its better to evaulate the fill level of the pool in free_objects() and queue the object to free either in the pool list or if it's full on a separate global free list. The worker can then do the following simpler operation: - Move objects back from the global free list to the pool list if the pool list is not longer full. - Remove the remaining objects in a single list move operation from the global free list and do the kmem_cache_free() operation lockless from the temporary list head. In fill_pool() the global free list is checked as well to avoid real allocations from the kmem cache. Add the necessary list head and a counter for the number of objects on the global free list and export that counter via sysfs: max_chain :79 max_loops :8147 warnings :0 fixups :0 pool_free :1697 pool_min_free :346 pool_used :15356 pool_max_used :23933 on_free_list :39 objs_allocated:32617 objs_freed :16588 Nothing queues objects on the global free list yet. This happens in a follow up change. [ tglx: Simplified implementation and massaged changelog ] Suggested-by: Thomas Gleixner Signed-off-by: Yang Shi Signed-off-by: Thomas Gleixner Cc: longman@redhat.com Link: https://lkml.kernel.org/r/1517872708-24207-3-git-send-email-yang.shi@linux.alibaba.com --- lib/debugobjects.c | 58 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/lib/debugobjects.c b/lib/debugobjects.c index f6d57a11c927..e31273b45da5 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -42,11 +42,14 @@ static struct debug_obj obj_static_pool[ODEBUG_POOL_SIZE] __initdata; static DEFINE_RAW_SPINLOCK(pool_lock); static HLIST_HEAD(obj_pool); +static HLIST_HEAD(obj_to_free); static int obj_pool_min_free = ODEBUG_POOL_SIZE; static int obj_pool_free = ODEBUG_POOL_SIZE; static int obj_pool_used; static int obj_pool_max_used; +/* The number of objs on the global free list */ +static int obj_nr_tofree; static struct kmem_cache *obj_cache; static int debug_objects_maxchain __read_mostly; @@ -97,12 +100,32 @@ static const char *obj_states[ODEBUG_STATE_MAX] = { static void fill_pool(void) { gfp_t gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; - struct debug_obj *new; + struct debug_obj *new, *obj; unsigned long flags; if (likely(obj_pool_free >= debug_objects_pool_min_level)) return; + /* + * Reuse objs from the global free list; they will be reinitialized + * when allocating. + */ + while (obj_nr_tofree && (obj_pool_free < obj_pool_min_free)) { + raw_spin_lock_irqsave(&pool_lock, flags); + /* + * Recheck with the lock held as the worker thread might have + * won the race and freed the global free list already. + */ + if (obj_nr_tofree) { + obj = hlist_entry(obj_to_free.first, typeof(*obj), node); + hlist_del(&obj->node); + obj_nr_tofree--; + hlist_add_head(&obj->node, &obj_pool); + obj_pool_free++; + } + raw_spin_unlock_irqrestore(&pool_lock, flags); + } + if (unlikely(!obj_cache)) return; @@ -186,11 +209,38 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) static void free_obj_work(struct work_struct *work) { struct debug_obj *objs[ODEBUG_FREE_BATCH]; + struct hlist_node *tmp; + struct debug_obj *obj; unsigned long flags; int i; + HLIST_HEAD(tofree); if (!raw_spin_trylock_irqsave(&pool_lock, flags)) return; + + /* + * The objs on the pool list might be allocated before the work is + * run, so recheck if pool list it full or not, if not fill pool + * list from the global free list + */ + while (obj_nr_tofree && obj_pool_free < debug_objects_pool_size) { + obj = hlist_entry(obj_to_free.first, typeof(*obj), node); + hlist_del(&obj->node); + hlist_add_head(&obj->node, &obj_pool); + obj_pool_free++; + obj_nr_tofree--; + } + + /* + * Pool list is already full and there are still objs on the free + * list. Move remaining free objs to a temporary list to free the + * memory outside the pool_lock held region. + */ + if (obj_nr_tofree) { + hlist_move_list(&obj_to_free, &tofree); + obj_nr_tofree = 0; + } + while (obj_pool_free >= debug_objects_pool_size + ODEBUG_FREE_BATCH) { for (i = 0; i < ODEBUG_FREE_BATCH; i++) { objs[i] = hlist_entry(obj_pool.first, @@ -211,6 +261,11 @@ static void free_obj_work(struct work_struct *work) return; } raw_spin_unlock_irqrestore(&pool_lock, flags); + + hlist_for_each_entry_safe(obj, tmp, &tofree, node) { + hlist_del(&obj->node); + kmem_cache_free(obj_cache, obj); + } } /* @@ -793,6 +848,7 @@ static int debug_stats_show(struct seq_file *m, void *v) seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free); seq_printf(m, "pool_used :%d\n", obj_pool_used); seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used); + seq_printf(m, "on_free_list :%d\n", obj_nr_tofree); seq_printf(m, "objs_allocated:%d\n", debug_objects_allocated); seq_printf(m, "objs_freed :%d\n", debug_objects_freed); return 0; From 636e1970fd7deaa0d0ee0dfb6ac65fbd690b32d2 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 6 Feb 2018 07:18:27 +0800 Subject: [PATCH 3/6] debugobjects: Use global free list in free_object() The newly added global free list allows to avoid lengthy pool_list iterations in free_obj_work() by putting objects either into the pool list when the fill level of the pool is below the maximum or by putting them on the global free list immediately. As the pool is now guaranteed to never exceed the maximum fill level this allows to remove the batch removal from pool list in free_obj_work(). Split free_object() into two parts, so the actual queueing function can be reused without invoking schedule_work() on every invocation. [ tglx: Remove the batch removal from pool list and massage changelog ] Suggested-by: Thomas Gleixner Signed-off-by: Yang Shi Signed-off-by: Thomas Gleixner Cc: longman@redhat.com Link: https://lkml.kernel.org/r/1517872708-24207-4-git-send-email-yang.shi@linux.alibaba.com --- lib/debugobjects.c | 63 ++++++++++++++++------------------------------ 1 file changed, 22 insertions(+), 41 deletions(-) diff --git a/lib/debugobjects.c b/lib/debugobjects.c index e31273b45da5..3e79c100271f 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -201,18 +201,13 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) * workqueue function to free objects. * * To reduce contention on the global pool_lock, the actual freeing of - * debug objects will be delayed if the pool_lock is busy. We also free - * the objects in a batch of 4 for each lock/unlock cycle. + * debug objects will be delayed if the pool_lock is busy. */ -#define ODEBUG_FREE_BATCH 4 - static void free_obj_work(struct work_struct *work) { - struct debug_obj *objs[ODEBUG_FREE_BATCH]; struct hlist_node *tmp; struct debug_obj *obj; unsigned long flags; - int i; HLIST_HEAD(tofree); if (!raw_spin_trylock_irqsave(&pool_lock, flags)) @@ -240,26 +235,6 @@ static void free_obj_work(struct work_struct *work) hlist_move_list(&obj_to_free, &tofree); obj_nr_tofree = 0; } - - while (obj_pool_free >= debug_objects_pool_size + ODEBUG_FREE_BATCH) { - for (i = 0; i < ODEBUG_FREE_BATCH; i++) { - objs[i] = hlist_entry(obj_pool.first, - typeof(*objs[0]), node); - hlist_del(&objs[i]->node); - } - - obj_pool_free -= ODEBUG_FREE_BATCH; - debug_objects_freed += ODEBUG_FREE_BATCH; - /* - * We release pool_lock across kmem_cache_free() to - * avoid contention on pool_lock. - */ - raw_spin_unlock_irqrestore(&pool_lock, flags); - for (i = 0; i < ODEBUG_FREE_BATCH; i++) - kmem_cache_free(obj_cache, objs[i]); - if (!raw_spin_trylock_irqsave(&pool_lock, flags)) - return; - } raw_spin_unlock_irqrestore(&pool_lock, flags); hlist_for_each_entry_safe(obj, tmp, &tofree, node) { @@ -268,27 +243,33 @@ static void free_obj_work(struct work_struct *work) } } +static bool __free_object(struct debug_obj *obj) +{ + unsigned long flags; + bool work; + + raw_spin_lock_irqsave(&pool_lock, flags); + work = (obj_pool_free > debug_objects_pool_size) && obj_cache; + obj_pool_used--; + + if (work) { + obj_nr_tofree++; + hlist_add_head(&obj->node, &obj_to_free); + } else { + obj_pool_free++; + hlist_add_head(&obj->node, &obj_pool); + } + raw_spin_unlock_irqrestore(&pool_lock, flags); + return work; +} + /* * Put the object back into the pool and schedule work to free objects * if necessary. */ static void free_object(struct debug_obj *obj) { - unsigned long flags; - int sched = 0; - - raw_spin_lock_irqsave(&pool_lock, flags); - /* - * schedule work when the pool is filled and the cache is - * initialized: - */ - if (obj_pool_free > debug_objects_pool_size && obj_cache) - sched = 1; - hlist_add_head(&obj->node, &obj_pool); - obj_pool_free++; - obj_pool_used--; - raw_spin_unlock_irqrestore(&pool_lock, flags); - if (sched) + if (__free_object(obj)) schedule_work(&debug_obj_work); } From 1ea9b98b007a662e402551a41a4413becad40a65 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 6 Feb 2018 07:18:28 +0800 Subject: [PATCH 4/6] debugobjects: Use global free list in __debug_check_no_obj_freed() __debug_check_no_obj_freed() iterates over the to be freed memory region in chunks and iterates over the corresponding hash bucket list for each chunk. This can accumulate to hundred thousands of checked objects. In the worst case this can trigger the soft lockup detector: NMI watchdog: BUG: soft lockup - CPU#15 stuck for 22s! CPU: 15 PID: 110342 Comm: stress-ng-getde Call Trace: [] debug_check_no_obj_freed+0x13e/0x220 [] __free_pages_ok+0x1f1/0x5c0 [] __free_pages+0x25/0x40 [] __free_slab+0x19b/0x270 [] discard_slab+0x39/0x50 [] __slab_free+0x207/0x270 [] ___cache_free+0xa6/0xb0 [] qlist_free_all+0x47/0x80 [] quarantine_reduce+0x159/0x190 [] kasan_kmalloc+0xaf/0xc0 [] kasan_slab_alloc+0x12/0x20 [] kmem_cache_alloc+0xfa/0x360 [] ? getname_flags+0x4f/0x1f0 [] getname_flags+0x4f/0x1f0 [] getname+0x12/0x20 [] do_sys_open+0xf9/0x210 [] SyS_open+0x1e/0x20 [] entry_SYSCALL_64_fastpath+0x1f/0xc2 The code path might be called in either atomic or non-atomic context, but in_atomic() can't tell if the current context is atomic or not on a PREEMPT=n kernel, so cond_resched() can't be used to prevent the softlockup. Utilize the global free list to shorten the loop execution time. [ tglx: Massaged changelog ] Suggested-by: Thomas Gleixner Signed-off-by: Yang Shi Signed-off-by: Thomas Gleixner Cc: longman@redhat.com Link: https://lkml.kernel.org/r/1517872708-24207-5-git-send-email-yang.shi@linux.alibaba.com --- lib/debugobjects.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 3e79c100271f..faab2c4ea024 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -751,13 +751,13 @@ EXPORT_SYMBOL_GPL(debug_object_active_state); static void __debug_check_no_obj_freed(const void *address, unsigned long size) { unsigned long flags, oaddr, saddr, eaddr, paddr, chunks; - struct hlist_node *tmp; - HLIST_HEAD(freelist); struct debug_obj_descr *descr; enum debug_obj_state state; struct debug_bucket *db; + struct hlist_node *tmp; struct debug_obj *obj; int cnt, objs_checked = 0; + bool work = false; saddr = (unsigned long) address; eaddr = saddr + size; @@ -788,18 +788,12 @@ static void __debug_check_no_obj_freed(const void *address, unsigned long size) goto repeat; default: hlist_del(&obj->node); - hlist_add_head(&obj->node, &freelist); + work |= __free_object(obj); break; } } raw_spin_unlock_irqrestore(&db->lock, flags); - /* Now free them */ - hlist_for_each_entry_safe(obj, tmp, &freelist, node) { - hlist_del(&obj->node); - free_object(obj); - } - if (cnt > debug_objects_maxchain) debug_objects_maxchain = cnt; @@ -808,6 +802,10 @@ static void __debug_check_no_obj_freed(const void *address, unsigned long size) if (objs_checked > debug_objects_maxchecked) debug_objects_maxchecked = objs_checked; + + /* Schedule work to actually kmem_cache_free() objects */ + if (work) + schedule_work(&debug_obj_work); } void debug_check_no_obj_freed(const void *address, unsigned long size) From 04148187aa9df3626168f7429d2287997787e387 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 22 Feb 2018 16:52:58 +0100 Subject: [PATCH 5/6] debugobjects: Fix debug_objects_freed accounting The removal of the batched object freeing has caused the debug_objects_freed to become read-only, and the reading is inside an ifdef, so gcc warns that it is completely unused without CONFIG_DEBUG_FS: lib/debugobjects.c:71:14: error: 'debug_objects_freed' defined but not used [-Werror=unused-variable] Assuming we are still interested in this number, this adds back code to keep track of the freed objects. Fixes: 636e1970fd7d ("debugobjects: Use global free list in free_object()") Suggested-by: Waiman Long Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Acked-by: Yang Shi Acked-by: Waiman Long Link: https://lkml.kernel.org/r/20180222155335.1647466-1-arnd@arndb.de --- lib/debugobjects.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/debugobjects.c b/lib/debugobjects.c index faab2c4ea024..105ecfc47d8c 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -233,6 +233,7 @@ static void free_obj_work(struct work_struct *work) */ if (obj_nr_tofree) { hlist_move_list(&obj_to_free, &tofree); + debug_objects_freed += obj_nr_tofree; obj_nr_tofree = 0; } raw_spin_unlock_irqrestore(&pool_lock, flags); From 163cf842f5837334bc69aaf09ad38e11f4573914 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Mar 2018 14:18:46 +0100 Subject: [PATCH 6/6] debugobjects: Avoid another unused variable warning debug_objects_maxchecked is only updated in __debug_check_no_obj_freed(), and only read in debug_objects_maxchecked, unfortunately both of these are optional and depend on different Kconfig symbols. When both CONFIG_DEBUG_OBJECTS_FREE and CONFIG_DEBUG_FS are disabled this warning is emitted: lib/debugobjects.c:56:14: error: 'debug_objects_maxchecked' defined but not used [-Werror=unused-variable] Rather than trying to add more complex #ifdef protections, mark the variable as __maybe_unused so it can be silently dropped when usused. Fixes: bd9dcd046509 ("debugobjects: Export max loops counter") Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Acked-by: Yang Shi Cc: Waiman Long Link: https://lkml.kernel.org/r/20180313131857.158876-1-arnd@arndb.de --- lib/debugobjects.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 105ecfc47d8c..994be4805cec 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -53,7 +53,7 @@ static int obj_nr_tofree; static struct kmem_cache *obj_cache; static int debug_objects_maxchain __read_mostly; -static int debug_objects_maxchecked __read_mostly; +static int __maybe_unused debug_objects_maxchecked __read_mostly; static int debug_objects_fixups __read_mostly; static int debug_objects_warnings __read_mostly; static int debug_objects_enabled __read_mostly