locking/lockdep: Rework FS_RECLAIM annotation

A while ago someone, and I cannot find the email just now, asked if we
could not implement the RECLAIM_FS inversion stuff with a 'fake' lock
like we use for other things like workqueues etc. I think this should
be possible which allows reducing the 'irq' states and will reduce the
amount of __bfs() lookups we do.

Removing the 1 IRQ state results in 4 less __bfs() walks per
dependency, improving lockdep performance. And by moving this
annotation out of the lockdep code it becomes easier for the mm people
to extend.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Byungchul Park <byungchul.park@lge.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Nikolay Borisov <nborisov@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: akpm@linux-foundation.org
Cc: boqun.feng@gmail.com
Cc: iamjoonsoo.kim@lge.com
Cc: kernel-team@lge.com
Cc: kirill@shutemov.name
Cc: npiggin@gmail.com
Cc: walken@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Peter Zijlstra 2017-03-03 10:13:38 +01:00 committed by Ingo Molnar
parent a9668cd6ee
commit d92a8cfcb3
10 changed files with 75 additions and 120 deletions

View File

@ -28,6 +28,7 @@
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/sort.h> #include <linux/sort.h>
#include <linux/sched/mm.h>
#include "intel_drv.h" #include "intel_drv.h"
static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node)
@ -4331,7 +4332,7 @@ i915_drop_caches_set(void *data, u64 val)
mutex_unlock(&dev->struct_mutex); mutex_unlock(&dev->struct_mutex);
} }
lockdep_set_current_reclaim_state(GFP_KERNEL); fs_reclaim_acquire(GFP_KERNEL);
if (val & DROP_BOUND) if (val & DROP_BOUND)
i915_gem_shrink(dev_priv, LONG_MAX, I915_SHRINK_BOUND); i915_gem_shrink(dev_priv, LONG_MAX, I915_SHRINK_BOUND);
@ -4340,7 +4341,7 @@ i915_drop_caches_set(void *data, u64 val)
if (val & DROP_SHRINK_ALL) if (val & DROP_SHRINK_ALL)
i915_gem_shrink_all(dev_priv); i915_gem_shrink_all(dev_priv);
lockdep_clear_current_reclaim_state(); fs_reclaim_release(GFP_KERNEL);
if (val & DROP_FREED) { if (val & DROP_FREED) {
synchronize_rcu(); synchronize_rcu();

View File

@ -29,7 +29,7 @@ extern int lock_stat;
* We'd rather not expose kernel/lockdep_states.h this wide, but we do need * We'd rather not expose kernel/lockdep_states.h this wide, but we do need
* the total number of states... :-( * the total number of states... :-(
*/ */
#define XXX_LOCK_USAGE_STATES (1+3*4) #define XXX_LOCK_USAGE_STATES (1+2*4)
/* /*
* NR_LOCKDEP_CACHING_CLASSES ... Number of classes * NR_LOCKDEP_CACHING_CLASSES ... Number of classes
@ -363,10 +363,6 @@ static inline void lock_set_subclass(struct lockdep_map *lock,
extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip); extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip);
extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask);
extern void lockdep_clear_current_reclaim_state(void);
extern void lockdep_trace_alloc(gfp_t mask);
struct pin_cookie { unsigned int val; }; struct pin_cookie { unsigned int val; };
#define NIL_COOKIE (struct pin_cookie){ .val = 0U, } #define NIL_COOKIE (struct pin_cookie){ .val = 0U, }
@ -375,7 +371,7 @@ extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock);
extern void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie); extern void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie);
extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
# define INIT_LOCKDEP .lockdep_recursion = 0, .lockdep_reclaim_gfp = 0, # define INIT_LOCKDEP .lockdep_recursion = 0,
#define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0) #define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0)
@ -416,9 +412,6 @@ static inline void lockdep_on(void)
# define lock_downgrade(l, i) do { } while (0) # define lock_downgrade(l, i) do { } while (0)
# define lock_set_class(l, n, k, s, i) do { } while (0) # define lock_set_class(l, n, k, s, i) do { } while (0)
# define lock_set_subclass(l, s, i) do { } while (0) # define lock_set_subclass(l, s, i) do { } while (0)
# define lockdep_set_current_reclaim_state(g) do { } while (0)
# define lockdep_clear_current_reclaim_state() do { } while (0)
# define lockdep_trace_alloc(g) do { } while (0)
# define lockdep_info() do { } while (0) # define lockdep_info() do { } while (0)
# define lockdep_init_map(lock, name, key, sub) \ # define lockdep_init_map(lock, name, key, sub) \
do { (void)(name); (void)(key); } while (0) do { (void)(name); (void)(key); } while (0)

View File

@ -846,7 +846,6 @@ struct task_struct {
int lockdep_depth; int lockdep_depth;
unsigned int lockdep_recursion; unsigned int lockdep_recursion;
struct held_lock held_locks[MAX_LOCK_DEPTH]; struct held_lock held_locks[MAX_LOCK_DEPTH];
gfp_t lockdep_reclaim_gfp;
#endif #endif
#ifdef CONFIG_UBSAN #ifdef CONFIG_UBSAN

View File

@ -167,6 +167,14 @@ static inline gfp_t current_gfp_context(gfp_t flags)
return flags; return flags;
} }
#ifdef CONFIG_LOCKDEP
extern void fs_reclaim_acquire(gfp_t gfp_mask);
extern void fs_reclaim_release(gfp_t gfp_mask);
#else
static inline void fs_reclaim_acquire(gfp_t gfp_mask) { }
static inline void fs_reclaim_release(gfp_t gfp_mask) { }
#endif
static inline unsigned int memalloc_noio_save(void) static inline unsigned int memalloc_noio_save(void)
{ {
unsigned int flags = current->flags & PF_MEMALLOC_NOIO; unsigned int flags = current->flags & PF_MEMALLOC_NOIO;

View File

@ -344,14 +344,12 @@ EXPORT_SYMBOL(lockdep_on);
#if VERBOSE #if VERBOSE
# define HARDIRQ_VERBOSE 1 # define HARDIRQ_VERBOSE 1
# define SOFTIRQ_VERBOSE 1 # define SOFTIRQ_VERBOSE 1
# define RECLAIM_VERBOSE 1
#else #else
# define HARDIRQ_VERBOSE 0 # define HARDIRQ_VERBOSE 0
# define SOFTIRQ_VERBOSE 0 # define SOFTIRQ_VERBOSE 0
# define RECLAIM_VERBOSE 0
#endif #endif
#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE || RECLAIM_VERBOSE #if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE
/* /*
* Quick filtering for interesting events: * Quick filtering for interesting events:
*/ */
@ -2567,14 +2565,6 @@ static int SOFTIRQ_verbose(struct lock_class *class)
return 0; return 0;
} }
static int RECLAIM_FS_verbose(struct lock_class *class)
{
#if RECLAIM_VERBOSE
return class_filter(class);
#endif
return 0;
}
#define STRICT_READ_CHECKS 1 #define STRICT_READ_CHECKS 1
static int (*state_verbose_f[])(struct lock_class *class) = { static int (*state_verbose_f[])(struct lock_class *class) = {
@ -2870,57 +2860,6 @@ void trace_softirqs_off(unsigned long ip)
debug_atomic_inc(redundant_softirqs_off); debug_atomic_inc(redundant_softirqs_off);
} }
static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
{
struct task_struct *curr = current;
if (unlikely(!debug_locks))
return;
gfp_mask = current_gfp_context(gfp_mask);
/* no reclaim without waiting on it */
if (!(gfp_mask & __GFP_DIRECT_RECLAIM))
return;
/* this guy won't enter reclaim */
if ((curr->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC))
return;
/* We're only interested __GFP_FS allocations for now */
if (!(gfp_mask & __GFP_FS) || (curr->flags & PF_MEMALLOC_NOFS))
return;
/*
* Oi! Can't be having __GFP_FS allocations with IRQs disabled.
*/
if (DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags)))
return;
/* Disable lockdep if explicitly requested */
if (gfp_mask & __GFP_NOLOCKDEP)
return;
mark_held_locks(curr, RECLAIM_FS);
}
static void check_flags(unsigned long flags);
void lockdep_trace_alloc(gfp_t gfp_mask)
{
unsigned long flags;
if (unlikely(current->lockdep_recursion))
return;
raw_local_irq_save(flags);
check_flags(flags);
current->lockdep_recursion = 1;
__lockdep_trace_alloc(gfp_mask, flags);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
}
static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
{ {
/* /*
@ -2966,22 +2905,6 @@ static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
} }
} }
/*
* We reuse the irq context infrastructure more broadly as a general
* context checking code. This tests GFP_FS recursion (a lock taken
* during reclaim for a GFP_FS allocation is held over a GFP_FS
* allocation).
*/
if (!hlock->trylock && (curr->lockdep_reclaim_gfp & __GFP_FS)) {
if (hlock->read) {
if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS_READ))
return 0;
} else {
if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS))
return 0;
}
}
return 1; return 1;
} }
@ -3040,10 +2963,6 @@ static inline int separate_irq_context(struct task_struct *curr,
return 0; return 0;
} }
void lockdep_trace_alloc(gfp_t gfp_mask)
{
}
#endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */ #endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
/* /*
@ -3952,18 +3871,6 @@ void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
} }
EXPORT_SYMBOL_GPL(lock_unpin_lock); EXPORT_SYMBOL_GPL(lock_unpin_lock);
void lockdep_set_current_reclaim_state(gfp_t gfp_mask)
{
current->lockdep_reclaim_gfp = current_gfp_context(gfp_mask);
}
EXPORT_SYMBOL_GPL(lockdep_set_current_reclaim_state);
void lockdep_clear_current_reclaim_state(void)
{
current->lockdep_reclaim_gfp = 0;
}
EXPORT_SYMBOL_GPL(lockdep_clear_current_reclaim_state);
#ifdef CONFIG_LOCK_STAT #ifdef CONFIG_LOCK_STAT
static int static int
print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,

View File

@ -6,4 +6,3 @@
*/ */
LOCKDEP_STATE(HARDIRQ) LOCKDEP_STATE(HARDIRQ)
LOCKDEP_STATE(SOFTIRQ) LOCKDEP_STATE(SOFTIRQ)
LOCKDEP_STATE(RECLAIM_FS)

View File

@ -66,6 +66,7 @@
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/memcontrol.h> #include <linux/memcontrol.h>
#include <linux/ftrace.h> #include <linux/ftrace.h>
#include <linux/lockdep.h>
#include <asm/sections.h> #include <asm/sections.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
@ -3490,6 +3491,47 @@ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_fla
} }
#endif /* CONFIG_COMPACTION */ #endif /* CONFIG_COMPACTION */
#ifdef CONFIG_LOCKDEP
struct lockdep_map __fs_reclaim_map =
STATIC_LOCKDEP_MAP_INIT("fs_reclaim", &__fs_reclaim_map);
static bool __need_fs_reclaim(gfp_t gfp_mask)
{
gfp_mask = current_gfp_context(gfp_mask);
/* no reclaim without waiting on it */
if (!(gfp_mask & __GFP_DIRECT_RECLAIM))
return false;
/* this guy won't enter reclaim */
if ((current->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC))
return false;
/* We're only interested __GFP_FS allocations for now */
if (!(gfp_mask & __GFP_FS))
return false;
if (gfp_mask & __GFP_NOLOCKDEP)
return false;
return true;
}
void fs_reclaim_acquire(gfp_t gfp_mask)
{
if (__need_fs_reclaim(gfp_mask))
lock_map_acquire(&__fs_reclaim_map);
}
EXPORT_SYMBOL_GPL(fs_reclaim_acquire);
void fs_reclaim_release(gfp_t gfp_mask)
{
if (__need_fs_reclaim(gfp_mask))
lock_map_release(&__fs_reclaim_map);
}
EXPORT_SYMBOL_GPL(fs_reclaim_release);
#endif
/* Perform direct synchronous page reclaim */ /* Perform direct synchronous page reclaim */
static int static int
__perform_reclaim(gfp_t gfp_mask, unsigned int order, __perform_reclaim(gfp_t gfp_mask, unsigned int order,
@ -3504,7 +3546,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
/* We now go into synchronous reclaim */ /* We now go into synchronous reclaim */
cpuset_memory_pressure_bump(); cpuset_memory_pressure_bump();
noreclaim_flag = memalloc_noreclaim_save(); noreclaim_flag = memalloc_noreclaim_save();
lockdep_set_current_reclaim_state(gfp_mask); fs_reclaim_acquire(gfp_mask);
reclaim_state.reclaimed_slab = 0; reclaim_state.reclaimed_slab = 0;
current->reclaim_state = &reclaim_state; current->reclaim_state = &reclaim_state;
@ -3512,7 +3554,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
ac->nodemask); ac->nodemask);
current->reclaim_state = NULL; current->reclaim_state = NULL;
lockdep_clear_current_reclaim_state(); fs_reclaim_release(gfp_mask);
memalloc_noreclaim_restore(noreclaim_flag); memalloc_noreclaim_restore(noreclaim_flag);
cond_resched(); cond_resched();
@ -4041,7 +4083,8 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
*alloc_flags |= ALLOC_CPUSET; *alloc_flags |= ALLOC_CPUSET;
} }
lockdep_trace_alloc(gfp_mask); fs_reclaim_acquire(gfp_mask);
fs_reclaim_release(gfp_mask);
might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM); might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);

View File

@ -43,6 +43,7 @@ struct kmem_cache {
#include <linux/kasan.h> #include <linux/kasan.h>
#include <linux/kmemleak.h> #include <linux/kmemleak.h>
#include <linux/random.h> #include <linux/random.h>
#include <linux/sched/mm.h>
/* /*
* State of the slab allocator. * State of the slab allocator.
@ -412,7 +413,10 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
gfp_t flags) gfp_t flags)
{ {
flags &= gfp_allowed_mask; flags &= gfp_allowed_mask;
lockdep_trace_alloc(flags);
fs_reclaim_acquire(flags);
fs_reclaim_release(flags);
might_sleep_if(gfpflags_allow_blocking(flags)); might_sleep_if(gfpflags_allow_blocking(flags));
if (should_failslab(s, flags)) if (should_failslab(s, flags))

View File

@ -432,7 +432,8 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
gfp &= gfp_allowed_mask; gfp &= gfp_allowed_mask;
lockdep_trace_alloc(gfp); fs_reclaim_acquire(gfp);
fs_reclaim_release(gfp);
if (size < PAGE_SIZE - align) { if (size < PAGE_SIZE - align) {
if (!size) if (!size)
@ -538,7 +539,8 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
flags &= gfp_allowed_mask; flags &= gfp_allowed_mask;
lockdep_trace_alloc(flags); fs_reclaim_acquire(flags);
fs_reclaim_release(flags);
if (c->size < PAGE_SIZE) { if (c->size < PAGE_SIZE) {
b = slob_alloc(c->size, flags, c->align, node); b = slob_alloc(c->size, flags, c->align, node);

View File

@ -3525,8 +3525,6 @@ static int kswapd(void *p)
}; };
const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
lockdep_set_current_reclaim_state(GFP_KERNEL);
if (!cpumask_empty(cpumask)) if (!cpumask_empty(cpumask))
set_cpus_allowed_ptr(tsk, cpumask); set_cpus_allowed_ptr(tsk, cpumask);
current->reclaim_state = &reclaim_state; current->reclaim_state = &reclaim_state;
@ -3585,14 +3583,15 @@ static int kswapd(void *p)
*/ */
trace_mm_vmscan_kswapd_wake(pgdat->node_id, classzone_idx, trace_mm_vmscan_kswapd_wake(pgdat->node_id, classzone_idx,
alloc_order); alloc_order);
fs_reclaim_acquire(GFP_KERNEL);
reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx); reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx);
fs_reclaim_release(GFP_KERNEL);
if (reclaim_order < alloc_order) if (reclaim_order < alloc_order)
goto kswapd_try_sleep; goto kswapd_try_sleep;
} }
tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD); tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
current->reclaim_state = NULL; current->reclaim_state = NULL;
lockdep_clear_current_reclaim_state();
return 0; return 0;
} }
@ -3655,14 +3654,14 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
unsigned int noreclaim_flag; unsigned int noreclaim_flag;
noreclaim_flag = memalloc_noreclaim_save(); noreclaim_flag = memalloc_noreclaim_save();
lockdep_set_current_reclaim_state(sc.gfp_mask); fs_reclaim_acquire(sc.gfp_mask);
reclaim_state.reclaimed_slab = 0; reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state; p->reclaim_state = &reclaim_state;
nr_reclaimed = do_try_to_free_pages(zonelist, &sc); nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
p->reclaim_state = NULL; p->reclaim_state = NULL;
lockdep_clear_current_reclaim_state(); fs_reclaim_release(sc.gfp_mask);
memalloc_noreclaim_restore(noreclaim_flag); memalloc_noreclaim_restore(noreclaim_flag);
return nr_reclaimed; return nr_reclaimed;
@ -3847,7 +3846,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
*/ */
noreclaim_flag = memalloc_noreclaim_save(); noreclaim_flag = memalloc_noreclaim_save();
p->flags |= PF_SWAPWRITE; p->flags |= PF_SWAPWRITE;
lockdep_set_current_reclaim_state(sc.gfp_mask); fs_reclaim_acquire(sc.gfp_mask);
reclaim_state.reclaimed_slab = 0; reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state; p->reclaim_state = &reclaim_state;
@ -3862,9 +3861,9 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
} }
p->reclaim_state = NULL; p->reclaim_state = NULL;
fs_reclaim_release(gfp_mask);
current->flags &= ~PF_SWAPWRITE; current->flags &= ~PF_SWAPWRITE;
memalloc_noreclaim_restore(noreclaim_flag); memalloc_noreclaim_restore(noreclaim_flag);
lockdep_clear_current_reclaim_state();
return sc.nr_reclaimed >= nr_pages; return sc.nr_reclaimed >= nr_pages;
} }