Merge branches 'bitmaprange.2021.05.10c', 'doc.2021.05.10c', 'fixes.2021.05.13a', 'kvfree_rcu.2021.05.10c', 'mmdumpobj.2021.05.10c', 'nocb.2021.05.12a', 'srcu.2021.05.12a', 'tasks.2021.05.18a' and 'torture.2021.05.10c' into HEAD
bitmaprange.2021.05.10c: Allow "all" for bitmap ranges. doc.2021.05.10c: Documentation updates. fixes.2021.05.13a: Miscellaneous fixes. kvfree_rcu.2021.05.10c: kvfree_rcu() updates. mmdumpobj.2021.05.10c: mem_dump_obj() updates. nocb.2021.05.12a: RCU NOCB CPU updates, including limited deoffloading. srcu.2021.05.12a: SRCU updates. tasks.2021.05.18a: Tasks-RCU updates. torture.2021.05.10c: Torture-test updates.
This commit is contained in:
parent
a6814a79f2
e5bd61e82b
c70360c334
a78d4a2a10
e548eaa116
a616aec9aa
0a580fa65c
474d099736
5390473ec1
commit
641faf1b90
|
@ -21,7 +21,7 @@ Any code that happens after the end of a given RCU grace period is guaranteed
|
|||
to see the effects of all accesses prior to the beginning of that grace
|
||||
period that are within RCU read-side critical sections.
|
||||
Similarly, any code that happens before the beginning of a given RCU grace
|
||||
period is guaranteed to see the effects of all accesses following the end
|
||||
period is guaranteed to not see the effects of all accesses following the end
|
||||
of that grace period that are within RCU read-side critical sections.
|
||||
|
||||
Note well that RCU-sched read-side critical sections include any region
|
||||
|
@ -339,14 +339,14 @@ The diagram below shows the path of ordering if the leftmost
|
|||
leftmost ``rcu_node`` structure offlines its last CPU and if the next
|
||||
``rcu_node`` structure has no online CPUs).
|
||||
|
||||
.. kernel-figure:: TreeRCU-gp-init-1.svg
|
||||
.. kernel-figure:: TreeRCU-gp-init-2.svg
|
||||
|
||||
The final ``rcu_gp_init()`` pass through the ``rcu_node`` tree traverses
|
||||
breadth-first, setting each ``rcu_node`` structure's ``->gp_seq`` field
|
||||
to the newly advanced value from the ``rcu_state`` structure, as shown
|
||||
in the following diagram.
|
||||
|
||||
.. kernel-figure:: TreeRCU-gp-init-1.svg
|
||||
.. kernel-figure:: TreeRCU-gp-init-3.svg
|
||||
|
||||
This change will also cause each CPU's next call to
|
||||
``__note_gp_changes()`` to notice that a new grace period has started,
|
||||
|
|
|
@ -4290,6 +4290,11 @@
|
|||
whole algorithm to behave better in low memory
|
||||
condition.
|
||||
|
||||
rcutree.rcu_delay_page_cache_fill_msec= [KNL]
|
||||
Set the page-cache refill delay (in milliseconds)
|
||||
in response to low-memory conditions. The range
|
||||
of permitted values is in the range 0:100000.
|
||||
|
||||
rcutree.jiffies_till_first_fqs= [KNL]
|
||||
Set delay from grace-period initialization to
|
||||
first attempt to force quiescent states.
|
||||
|
|
|
@ -315,7 +315,7 @@ static inline int rcu_read_lock_any_held(void)
|
|||
#define RCU_LOCKDEP_WARN(c, s) \
|
||||
do { \
|
||||
static bool __section(".data.unlikely") __warned; \
|
||||
if (debug_lockdep_rcu_enabled() && !__warned && (c)) { \
|
||||
if ((c) && debug_lockdep_rcu_enabled() && !__warned) { \
|
||||
__warned = true; \
|
||||
lockdep_rcu_suspicious(__FILE__, __LINE__, s); \
|
||||
} \
|
||||
|
@ -363,6 +363,20 @@ static inline void rcu_preempt_sleep_check(void) { }
|
|||
#define rcu_check_sparse(p, space)
|
||||
#endif /* #else #ifdef __CHECKER__ */
|
||||
|
||||
/**
|
||||
* unrcu_pointer - mark a pointer as not being RCU protected
|
||||
* @p: pointer needing to lose its __rcu property
|
||||
*
|
||||
* Converts @p from an __rcu pointer to a __kernel pointer.
|
||||
* This allows an __rcu pointer to be used with xchg() and friends.
|
||||
*/
|
||||
#define unrcu_pointer(p) \
|
||||
({ \
|
||||
typeof(*p) *_________p1 = (typeof(*p) *__force)(p); \
|
||||
rcu_check_sparse(p, __rcu); \
|
||||
((typeof(*p) __force __kernel *)(_________p1)); \
|
||||
})
|
||||
|
||||
#define __rcu_access_pointer(p, space) \
|
||||
({ \
|
||||
typeof(*p) *_________p1 = (typeof(*p) *__force)READ_ONCE(p); \
|
||||
|
@ -518,7 +532,12 @@ do { \
|
|||
* @p: The pointer to read, prior to dereferencing
|
||||
* @c: The conditions under which the dereference will take place
|
||||
*
|
||||
* This is the RCU-bh counterpart to rcu_dereference_check().
|
||||
* This is the RCU-bh counterpart to rcu_dereference_check(). However,
|
||||
* please note that starting in v5.0 kernels, vanilla RCU grace periods
|
||||
* wait for local_bh_disable() regions of code in addition to regions of
|
||||
* code demarked by rcu_read_lock() and rcu_read_unlock(). This means
|
||||
* that synchronize_rcu(), call_rcu, and friends all take not only
|
||||
* rcu_read_lock() but also rcu_read_lock_bh() into account.
|
||||
*/
|
||||
#define rcu_dereference_bh_check(p, c) \
|
||||
__rcu_dereference_check((p), (c) || rcu_read_lock_bh_held(), __rcu)
|
||||
|
@ -529,6 +548,11 @@ do { \
|
|||
* @c: The conditions under which the dereference will take place
|
||||
*
|
||||
* This is the RCU-sched counterpart to rcu_dereference_check().
|
||||
* However, please note that starting in v5.0 kernels, vanilla RCU grace
|
||||
* periods wait for preempt_disable() regions of code in addition to
|
||||
* regions of code demarked by rcu_read_lock() and rcu_read_unlock().
|
||||
* This means that synchronize_rcu(), call_rcu, and friends all take not
|
||||
* only rcu_read_lock() but also rcu_read_lock_sched() into account.
|
||||
*/
|
||||
#define rcu_dereference_sched_check(p, c) \
|
||||
__rcu_dereference_check((p), (c) || rcu_read_lock_sched_held(), \
|
||||
|
@ -620,6 +644,12 @@ do { \
|
|||
* sections, invocation of the corresponding RCU callback is deferred
|
||||
* until after the all the other CPUs exit their critical sections.
|
||||
*
|
||||
* In v5.0 and later kernels, synchronize_rcu() and call_rcu() also
|
||||
* wait for regions of code with preemption disabled, including regions of
|
||||
* code with interrupts or softirqs disabled. In pre-v5.0 kernels, which
|
||||
* define synchronize_sched(), only code enclosed within rcu_read_lock()
|
||||
* and rcu_read_unlock() are guaranteed to be waited for.
|
||||
*
|
||||
* Note, however, that RCU callbacks are permitted to run concurrently
|
||||
* with new RCU read-side critical sections. One way that this can happen
|
||||
* is via the following sequence of events: (1) CPU 0 enters an RCU
|
||||
|
@ -672,33 +702,12 @@ static __always_inline void rcu_read_lock(void)
|
|||
/**
|
||||
* rcu_read_unlock() - marks the end of an RCU read-side critical section.
|
||||
*
|
||||
* In most situations, rcu_read_unlock() is immune from deadlock.
|
||||
* However, in kernels built with CONFIG_RCU_BOOST, rcu_read_unlock()
|
||||
* is responsible for deboosting, which it does via rt_mutex_unlock().
|
||||
* Unfortunately, this function acquires the scheduler's runqueue and
|
||||
* priority-inheritance spinlocks. This means that deadlock could result
|
||||
* if the caller of rcu_read_unlock() already holds one of these locks or
|
||||
* any lock that is ever acquired while holding them.
|
||||
*
|
||||
* That said, RCU readers are never priority boosted unless they were
|
||||
* preempted. Therefore, one way to avoid deadlock is to make sure
|
||||
* that preemption never happens within any RCU read-side critical
|
||||
* section whose outermost rcu_read_unlock() is called with one of
|
||||
* rt_mutex_unlock()'s locks held. Such preemption can be avoided in
|
||||
* a number of ways, for example, by invoking preempt_disable() before
|
||||
* critical section's outermost rcu_read_lock().
|
||||
*
|
||||
* Given that the set of locks acquired by rt_mutex_unlock() might change
|
||||
* at any time, a somewhat more future-proofed approach is to make sure
|
||||
* that that preemption never happens within any RCU read-side critical
|
||||
* section whose outermost rcu_read_unlock() is called with irqs disabled.
|
||||
* This approach relies on the fact that rt_mutex_unlock() currently only
|
||||
* acquires irq-disabled locks.
|
||||
*
|
||||
* The second of these two approaches is best in most situations,
|
||||
* however, the first approach can also be useful, at least to those
|
||||
* developers willing to keep abreast of the set of locks acquired by
|
||||
* rt_mutex_unlock().
|
||||
* In almost all situations, rcu_read_unlock() is immune from deadlock.
|
||||
* In recent kernels that have consolidated synchronize_sched() and
|
||||
* synchronize_rcu_bh() into synchronize_rcu(), this deadlock immunity
|
||||
* also extends to the scheduler's runqueue and priority-inheritance
|
||||
* spinlocks, courtesy of the quiescent-state deferral that is carried
|
||||
* out when rcu_read_unlock() is invoked with interrupts disabled.
|
||||
*
|
||||
* See rcu_read_lock() for more information.
|
||||
*/
|
||||
|
@ -714,9 +723,11 @@ static inline void rcu_read_unlock(void)
|
|||
/**
|
||||
* rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section
|
||||
*
|
||||
* This is equivalent of rcu_read_lock(), but also disables softirqs.
|
||||
* Note that anything else that disables softirqs can also serve as
|
||||
* an RCU read-side critical section.
|
||||
* This is equivalent to rcu_read_lock(), but also disables softirqs.
|
||||
* Note that anything else that disables softirqs can also serve as an RCU
|
||||
* read-side critical section. However, please note that this equivalence
|
||||
* applies only to v5.0 and later. Before v5.0, rcu_read_lock() and
|
||||
* rcu_read_lock_bh() were unrelated.
|
||||
*
|
||||
* Note that rcu_read_lock_bh() and the matching rcu_read_unlock_bh()
|
||||
* must occur in the same context, for example, it is illegal to invoke
|
||||
|
@ -749,9 +760,12 @@ static inline void rcu_read_unlock_bh(void)
|
|||
/**
|
||||
* rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section
|
||||
*
|
||||
* This is equivalent of rcu_read_lock(), but disables preemption.
|
||||
* Read-side critical sections can also be introduced by anything else
|
||||
* that disables preemption, including local_irq_disable() and friends.
|
||||
* This is equivalent to rcu_read_lock(), but also disables preemption.
|
||||
* Read-side critical sections can also be introduced by anything else that
|
||||
* disables preemption, including local_irq_disable() and friends. However,
|
||||
* please note that the equivalence to rcu_read_lock() applies only to
|
||||
* v5.0 and later. Before v5.0, rcu_read_lock() and rcu_read_lock_sched()
|
||||
* were unrelated.
|
||||
*
|
||||
* Note that rcu_read_lock_sched() and the matching rcu_read_unlock_sched()
|
||||
* must occur in the same context, for example, it is illegal to invoke
|
||||
|
|
|
@ -86,7 +86,6 @@ static inline void rcu_irq_enter(void) { }
|
|||
static inline void rcu_irq_exit_irqson(void) { }
|
||||
static inline void rcu_irq_enter_irqson(void) { }
|
||||
static inline void rcu_irq_exit(void) { }
|
||||
static inline void rcu_irq_exit_preempt(void) { }
|
||||
static inline void rcu_irq_exit_check_preempt(void) { }
|
||||
#define rcu_is_idle_cpu(cpu) \
|
||||
(is_idle_task(current) && !in_nmi() && !in_irq() && !in_serving_softirq())
|
||||
|
|
|
@ -49,7 +49,6 @@ void rcu_idle_enter(void);
|
|||
void rcu_idle_exit(void);
|
||||
void rcu_irq_enter(void);
|
||||
void rcu_irq_exit(void);
|
||||
void rcu_irq_exit_preempt(void);
|
||||
void rcu_irq_enter_irqson(void);
|
||||
void rcu_irq_exit_irqson(void);
|
||||
bool rcu_is_idle_cpu(int cpu);
|
||||
|
|
|
@ -64,6 +64,12 @@ unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp);
|
|||
unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp);
|
||||
bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie);
|
||||
|
||||
#ifdef CONFIG_SRCU
|
||||
void srcu_init(void);
|
||||
#else /* #ifdef CONFIG_SRCU */
|
||||
static inline void srcu_init(void) { }
|
||||
#endif /* #else #ifdef CONFIG_SRCU */
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
|
||||
/**
|
||||
|
|
|
@ -82,9 +82,7 @@ struct srcu_struct {
|
|||
/* callback for the barrier */
|
||||
/* operation. */
|
||||
struct delayed_work work;
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
struct lockdep_map dep_map;
|
||||
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
|
||||
};
|
||||
|
||||
/* Values for state variable (bottom bits of ->srcu_gp_seq). */
|
||||
|
|
|
@ -192,8 +192,6 @@ extern int try_to_del_timer_sync(struct timer_list *timer);
|
|||
|
||||
#define del_singleshot_timer_sync(t) del_timer_sync(t)
|
||||
|
||||
extern bool timer_curr_running(struct timer_list *timer);
|
||||
|
||||
extern void init_timers(void);
|
||||
struct hrtimer;
|
||||
extern enum hrtimer_restart it_real_fn(struct hrtimer *);
|
||||
|
|
|
@ -278,6 +278,7 @@ TRACE_EVENT_RCU(rcu_exp_funnel_lock,
|
|||
* "WakeNot": Don't wake rcuo kthread.
|
||||
* "WakeNotPoll": Don't wake rcuo kthread because it is polling.
|
||||
* "WakeOvfIsDeferred": Wake rcuo kthread later, CB list is huge.
|
||||
* "WakeBypassIsDeferred": Wake rcuo kthread later, bypass list is contended.
|
||||
* "WokeEmpty": rcuo CB kthread woke to find empty list.
|
||||
*/
|
||||
TRACE_EVENT_RCU(rcu_nocb_wake,
|
||||
|
|
|
@ -42,6 +42,7 @@
|
|||
#include <linux/profile.h>
|
||||
#include <linux/kfence.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/srcu.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/writeback.h>
|
||||
|
@ -979,6 +980,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
|
|||
tick_init();
|
||||
rcu_init_nohz();
|
||||
init_timers();
|
||||
srcu_init();
|
||||
hrtimers_init();
|
||||
softirq_init();
|
||||
timekeeping_init();
|
||||
|
|
|
@ -6393,6 +6393,7 @@ asmlinkage __visible void lockdep_sys_exit(void)
|
|||
void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
|
||||
{
|
||||
struct task_struct *curr = current;
|
||||
int dl = READ_ONCE(debug_locks);
|
||||
|
||||
/* Note: the following can be executed concurrently, so be careful. */
|
||||
pr_warn("\n");
|
||||
|
@ -6402,11 +6403,12 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
|
|||
pr_warn("-----------------------------\n");
|
||||
pr_warn("%s:%d %s!\n", file, line, s);
|
||||
pr_warn("\nother info that might help us debug this:\n\n");
|
||||
pr_warn("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
|
||||
pr_warn("\n%srcu_scheduler_active = %d, debug_locks = %d\n%s",
|
||||
!rcu_lockdep_current_cpu_online()
|
||||
? "RCU used illegally from offline CPU!\n"
|
||||
: "",
|
||||
rcu_scheduler_active, debug_locks);
|
||||
rcu_scheduler_active, dl,
|
||||
dl ? "" : "Possible false positive due to lockdep disabling via debug_locks = 0\n");
|
||||
|
||||
/*
|
||||
* If a CPU is in the RCU-free window in idle (ie: in the section
|
||||
|
|
|
@ -116,7 +116,7 @@ config RCU_EQS_DEBUG
|
|||
|
||||
config RCU_STRICT_GRACE_PERIOD
|
||||
bool "Provide debug RCU implementation with short grace periods"
|
||||
depends on DEBUG_KERNEL && RCU_EXPERT
|
||||
depends on DEBUG_KERNEL && RCU_EXPERT && NR_CPUS <= 4
|
||||
default n
|
||||
select PREEMPT_COUNT if PREEMPT=n
|
||||
help
|
||||
|
|
|
@ -308,6 +308,8 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
|
|||
}
|
||||
}
|
||||
|
||||
extern void rcu_init_geometry(void);
|
||||
|
||||
/* Returns a pointer to the first leaf rcu_node structure. */
|
||||
#define rcu_first_leaf_node() (rcu_state.level[rcu_num_lvls - 1])
|
||||
|
||||
|
@ -422,12 +424,6 @@ do { \
|
|||
|
||||
#endif /* #if defined(CONFIG_SRCU) || !defined(CONFIG_TINY_RCU) */
|
||||
|
||||
#ifdef CONFIG_SRCU
|
||||
void srcu_init(void);
|
||||
#else /* #ifdef CONFIG_SRCU */
|
||||
static inline void srcu_init(void) { }
|
||||
#endif /* #else #ifdef CONFIG_SRCU */
|
||||
|
||||
#ifdef CONFIG_TINY_RCU
|
||||
/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
|
||||
static inline bool rcu_gp_is_normal(void) { return true; }
|
||||
|
@ -441,7 +437,11 @@ bool rcu_gp_is_expedited(void); /* Internal RCU use. */
|
|||
void rcu_expedite_gp(void);
|
||||
void rcu_unexpedite_gp(void);
|
||||
void rcupdate_announce_bootup_oddness(void);
|
||||
#ifdef CONFIG_TASKS_RCU_GENERIC
|
||||
void show_rcu_tasks_gp_kthreads(void);
|
||||
#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
|
||||
static inline void show_rcu_tasks_gp_kthreads(void) {}
|
||||
#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
|
||||
void rcu_request_urgent_qs_task(struct task_struct *t);
|
||||
#endif /* #else #ifdef CONFIG_TINY_RCU */
|
||||
|
||||
|
@ -519,6 +519,7 @@ static inline unsigned long rcu_exp_batches_completed(void) { return 0; }
|
|||
static inline unsigned long
|
||||
srcu_batches_completed(struct srcu_struct *sp) { return 0; }
|
||||
static inline void rcu_force_quiescent_state(void) { }
|
||||
static inline bool rcu_check_boost_fail(unsigned long gp_state, int *cpup) { return true; }
|
||||
static inline void show_rcu_gp_kthreads(void) { }
|
||||
static inline int rcu_get_gp_kthreads_prio(void) { return 0; }
|
||||
static inline void rcu_fwd_progress_check(unsigned long j) { }
|
||||
|
@ -527,6 +528,7 @@ bool rcu_dynticks_zero_in_eqs(int cpu, int *vp);
|
|||
unsigned long rcu_get_gp_seq(void);
|
||||
unsigned long rcu_exp_batches_completed(void);
|
||||
unsigned long srcu_batches_completed(struct srcu_struct *sp);
|
||||
bool rcu_check_boost_fail(unsigned long gp_state, int *cpup);
|
||||
void show_rcu_gp_kthreads(void);
|
||||
int rcu_get_gp_kthreads_prio(void);
|
||||
void rcu_fwd_progress_check(unsigned long j);
|
||||
|
|
|
@ -245,12 +245,6 @@ static const char *rcu_torture_writer_state_getname(void)
|
|||
return rcu_torture_writer_state_names[i];
|
||||
}
|
||||
|
||||
#if defined(CONFIG_RCU_BOOST) && defined(CONFIG_PREEMPT_RT)
|
||||
# define rcu_can_boost() 1
|
||||
#else
|
||||
# define rcu_can_boost() 0
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_RCU_TRACE
|
||||
static u64 notrace rcu_trace_clock_local(void)
|
||||
{
|
||||
|
@ -331,6 +325,7 @@ struct rcu_torture_ops {
|
|||
void (*read_delay)(struct torture_random_state *rrsp,
|
||||
struct rt_read_seg *rtrsp);
|
||||
void (*readunlock)(int idx);
|
||||
int (*readlock_held)(void);
|
||||
unsigned long (*get_gp_seq)(void);
|
||||
unsigned long (*gp_diff)(unsigned long new, unsigned long old);
|
||||
void (*deferred_free)(struct rcu_torture *p);
|
||||
|
@ -345,6 +340,7 @@ struct rcu_torture_ops {
|
|||
void (*fqs)(void);
|
||||
void (*stats)(void);
|
||||
void (*gp_kthread_dbg)(void);
|
||||
bool (*check_boost_failed)(unsigned long gp_state, int *cpup);
|
||||
int (*stall_dur)(void);
|
||||
int irq_capable;
|
||||
int can_boost;
|
||||
|
@ -359,6 +355,11 @@ static struct rcu_torture_ops *cur_ops;
|
|||
* Definitions for rcu torture testing.
|
||||
*/
|
||||
|
||||
static int torture_readlock_not_held(void)
|
||||
{
|
||||
return rcu_read_lock_bh_held() || rcu_read_lock_sched_held();
|
||||
}
|
||||
|
||||
static int rcu_torture_read_lock(void) __acquires(RCU)
|
||||
{
|
||||
rcu_read_lock();
|
||||
|
@ -488,6 +489,7 @@ static struct rcu_torture_ops rcu_ops = {
|
|||
.readlock = rcu_torture_read_lock,
|
||||
.read_delay = rcu_read_delay,
|
||||
.readunlock = rcu_torture_read_unlock,
|
||||
.readlock_held = torture_readlock_not_held,
|
||||
.get_gp_seq = rcu_get_gp_seq,
|
||||
.gp_diff = rcu_seq_diff,
|
||||
.deferred_free = rcu_torture_deferred_free,
|
||||
|
@ -502,9 +504,10 @@ static struct rcu_torture_ops rcu_ops = {
|
|||
.fqs = rcu_force_quiescent_state,
|
||||
.stats = NULL,
|
||||
.gp_kthread_dbg = show_rcu_gp_kthreads,
|
||||
.check_boost_failed = rcu_check_boost_fail,
|
||||
.stall_dur = rcu_jiffies_till_stall_check,
|
||||
.irq_capable = 1,
|
||||
.can_boost = rcu_can_boost(),
|
||||
.can_boost = IS_ENABLED(CONFIG_RCU_BOOST),
|
||||
.extendables = RCUTORTURE_MAX_EXTEND,
|
||||
.name = "rcu"
|
||||
};
|
||||
|
@ -540,6 +543,7 @@ static struct rcu_torture_ops rcu_busted_ops = {
|
|||
.readlock = rcu_torture_read_lock,
|
||||
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
|
||||
.readunlock = rcu_torture_read_unlock,
|
||||
.readlock_held = torture_readlock_not_held,
|
||||
.get_gp_seq = rcu_no_completed,
|
||||
.deferred_free = rcu_busted_torture_deferred_free,
|
||||
.sync = synchronize_rcu_busted,
|
||||
|
@ -589,6 +593,11 @@ static void srcu_torture_read_unlock(int idx) __releases(srcu_ctlp)
|
|||
srcu_read_unlock(srcu_ctlp, idx);
|
||||
}
|
||||
|
||||
static int torture_srcu_read_lock_held(void)
|
||||
{
|
||||
return srcu_read_lock_held(srcu_ctlp);
|
||||
}
|
||||
|
||||
static unsigned long srcu_torture_completed(void)
|
||||
{
|
||||
return srcu_batches_completed(srcu_ctlp);
|
||||
|
@ -646,6 +655,7 @@ static struct rcu_torture_ops srcu_ops = {
|
|||
.readlock = srcu_torture_read_lock,
|
||||
.read_delay = srcu_read_delay,
|
||||
.readunlock = srcu_torture_read_unlock,
|
||||
.readlock_held = torture_srcu_read_lock_held,
|
||||
.get_gp_seq = srcu_torture_completed,
|
||||
.deferred_free = srcu_torture_deferred_free,
|
||||
.sync = srcu_torture_synchronize,
|
||||
|
@ -681,6 +691,7 @@ static struct rcu_torture_ops srcud_ops = {
|
|||
.readlock = srcu_torture_read_lock,
|
||||
.read_delay = srcu_read_delay,
|
||||
.readunlock = srcu_torture_read_unlock,
|
||||
.readlock_held = torture_srcu_read_lock_held,
|
||||
.get_gp_seq = srcu_torture_completed,
|
||||
.deferred_free = srcu_torture_deferred_free,
|
||||
.sync = srcu_torture_synchronize,
|
||||
|
@ -700,6 +711,7 @@ static struct rcu_torture_ops busted_srcud_ops = {
|
|||
.readlock = srcu_torture_read_lock,
|
||||
.read_delay = rcu_read_delay,
|
||||
.readunlock = srcu_torture_read_unlock,
|
||||
.readlock_held = torture_srcu_read_lock_held,
|
||||
.get_gp_seq = srcu_torture_completed,
|
||||
.deferred_free = srcu_torture_deferred_free,
|
||||
.sync = srcu_torture_synchronize,
|
||||
|
@ -787,6 +799,7 @@ static struct rcu_torture_ops trivial_ops = {
|
|||
.readlock = rcu_torture_read_lock_trivial,
|
||||
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
|
||||
.readunlock = rcu_torture_read_unlock_trivial,
|
||||
.readlock_held = torture_readlock_not_held,
|
||||
.get_gp_seq = rcu_no_completed,
|
||||
.sync = synchronize_rcu_trivial,
|
||||
.exp_sync = synchronize_rcu_trivial,
|
||||
|
@ -850,6 +863,7 @@ static struct rcu_torture_ops tasks_tracing_ops = {
|
|||
.readlock = tasks_tracing_torture_read_lock,
|
||||
.read_delay = srcu_read_delay, /* just reuse srcu's version. */
|
||||
.readunlock = tasks_tracing_torture_read_unlock,
|
||||
.readlock_held = rcu_read_lock_trace_held,
|
||||
.get_gp_seq = rcu_no_completed,
|
||||
.deferred_free = rcu_tasks_tracing_torture_deferred_free,
|
||||
.sync = synchronize_rcu_tasks_trace,
|
||||
|
@ -871,32 +885,13 @@ static unsigned long rcutorture_seq_diff(unsigned long new, unsigned long old)
|
|||
return cur_ops->gp_diff(new, old);
|
||||
}
|
||||
|
||||
static bool __maybe_unused torturing_tasks(void)
|
||||
{
|
||||
return cur_ops == &tasks_ops || cur_ops == &tasks_rude_ops;
|
||||
}
|
||||
|
||||
/*
|
||||
* RCU torture priority-boost testing. Runs one real-time thread per
|
||||
* CPU for moderate bursts, repeatedly registering RCU callbacks and
|
||||
* spinning waiting for them to be invoked. If a given callback takes
|
||||
* too long to be invoked, we assume that priority inversion has occurred.
|
||||
* CPU for moderate bursts, repeatedly starting grace periods and waiting
|
||||
* for them to complete. If a given grace period takes too long, we assume
|
||||
* that priority inversion has occurred.
|
||||
*/
|
||||
|
||||
struct rcu_boost_inflight {
|
||||
struct rcu_head rcu;
|
||||
int inflight;
|
||||
};
|
||||
|
||||
static void rcu_torture_boost_cb(struct rcu_head *head)
|
||||
{
|
||||
struct rcu_boost_inflight *rbip =
|
||||
container_of(head, struct rcu_boost_inflight, rcu);
|
||||
|
||||
/* Ensure RCU-core accesses precede clearing ->inflight */
|
||||
smp_store_release(&rbip->inflight, 0);
|
||||
}
|
||||
|
||||
static int old_rt_runtime = -1;
|
||||
|
||||
static void rcu_torture_disable_rt_throttle(void)
|
||||
|
@ -923,49 +918,68 @@ static void rcu_torture_enable_rt_throttle(void)
|
|||
old_rt_runtime = -1;
|
||||
}
|
||||
|
||||
static bool rcu_torture_boost_failed(unsigned long start, unsigned long end)
|
||||
static bool rcu_torture_boost_failed(unsigned long gp_state, unsigned long *start)
|
||||
{
|
||||
int cpu;
|
||||
static int dbg_done;
|
||||
unsigned long end = jiffies;
|
||||
bool gp_done;
|
||||
unsigned long j;
|
||||
static unsigned long last_persist;
|
||||
unsigned long lp;
|
||||
unsigned long mininterval = test_boost_duration * HZ - HZ / 2;
|
||||
|
||||
if (end - start > test_boost_duration * HZ - HZ / 2) {
|
||||
if (end - *start > mininterval) {
|
||||
// Recheck after checking time to avoid false positives.
|
||||
smp_mb(); // Time check before grace-period check.
|
||||
if (cur_ops->poll_gp_state(gp_state))
|
||||
return false; // passed, though perhaps just barely
|
||||
if (cur_ops->check_boost_failed && !cur_ops->check_boost_failed(gp_state, &cpu)) {
|
||||
// At most one persisted message per boost test.
|
||||
j = jiffies;
|
||||
lp = READ_ONCE(last_persist);
|
||||
if (time_after(j, lp + mininterval) && cmpxchg(&last_persist, lp, j) == lp)
|
||||
pr_info("Boost inversion persisted: No QS from CPU %d\n", cpu);
|
||||
return false; // passed on a technicality
|
||||
}
|
||||
VERBOSE_TOROUT_STRING("rcu_torture_boost boosting failed");
|
||||
n_rcu_torture_boost_failure++;
|
||||
if (!xchg(&dbg_done, 1) && cur_ops->gp_kthread_dbg)
|
||||
if (!xchg(&dbg_done, 1) && cur_ops->gp_kthread_dbg) {
|
||||
pr_info("Boost inversion thread ->rt_priority %u gp_state %lu jiffies %lu\n",
|
||||
current->rt_priority, gp_state, end - *start);
|
||||
cur_ops->gp_kthread_dbg();
|
||||
// Recheck after print to flag grace period ending during splat.
|
||||
gp_done = cur_ops->poll_gp_state(gp_state);
|
||||
pr_info("Boost inversion: GP %lu %s.\n", gp_state,
|
||||
gp_done ? "ended already" : "still pending");
|
||||
|
||||
return true; /* failed */
|
||||
}
|
||||
|
||||
return false; /* passed */
|
||||
return true; // failed
|
||||
} else if (cur_ops->check_boost_failed && !cur_ops->check_boost_failed(gp_state, NULL)) {
|
||||
*start = jiffies;
|
||||
}
|
||||
|
||||
return false; // passed
|
||||
}
|
||||
|
||||
static int rcu_torture_boost(void *arg)
|
||||
{
|
||||
unsigned long call_rcu_time;
|
||||
unsigned long endtime;
|
||||
unsigned long gp_state;
|
||||
unsigned long gp_state_time;
|
||||
unsigned long oldstarttime;
|
||||
struct rcu_boost_inflight rbi = { .inflight = 0 };
|
||||
|
||||
VERBOSE_TOROUT_STRING("rcu_torture_boost started");
|
||||
|
||||
/* Set real-time priority. */
|
||||
sched_set_fifo_low(current);
|
||||
|
||||
init_rcu_head_on_stack(&rbi.rcu);
|
||||
/* Each pass through the following loop does one boost-test cycle. */
|
||||
do {
|
||||
bool failed = false; // Test failed already in this test interval
|
||||
bool firsttime = true;
|
||||
bool gp_initiated = false;
|
||||
|
||||
/* Increment n_rcu_torture_boosts once per boost-test */
|
||||
while (!kthread_should_stop()) {
|
||||
if (mutex_trylock(&boost_mutex)) {
|
||||
n_rcu_torture_boosts++;
|
||||
mutex_unlock(&boost_mutex);
|
||||
break;
|
||||
}
|
||||
schedule_timeout_uninterruptible(1);
|
||||
}
|
||||
if (kthread_should_stop())
|
||||
goto checkwait;
|
||||
|
||||
|
@ -979,33 +993,33 @@ static int rcu_torture_boost(void *arg)
|
|||
goto checkwait;
|
||||
}
|
||||
|
||||
/* Do one boost-test interval. */
|
||||
// Do one boost-test interval.
|
||||
endtime = oldstarttime + test_boost_duration * HZ;
|
||||
while (time_before(jiffies, endtime)) {
|
||||
/* If we don't have a callback in flight, post one. */
|
||||
if (!smp_load_acquire(&rbi.inflight)) {
|
||||
/* RCU core before ->inflight = 1. */
|
||||
smp_store_release(&rbi.inflight, 1);
|
||||
cur_ops->call(&rbi.rcu, rcu_torture_boost_cb);
|
||||
/* Check if the boost test failed */
|
||||
if (!firsttime && !failed)
|
||||
failed = rcu_torture_boost_failed(call_rcu_time, jiffies);
|
||||
call_rcu_time = jiffies;
|
||||
firsttime = false;
|
||||
// Has current GP gone too long?
|
||||
if (gp_initiated && !failed && !cur_ops->poll_gp_state(gp_state))
|
||||
failed = rcu_torture_boost_failed(gp_state, &gp_state_time);
|
||||
// If we don't have a grace period in flight, start one.
|
||||
if (!gp_initiated || cur_ops->poll_gp_state(gp_state)) {
|
||||
gp_state = cur_ops->start_gp_poll();
|
||||
gp_initiated = true;
|
||||
gp_state_time = jiffies;
|
||||
}
|
||||
if (stutter_wait("rcu_torture_boost"))
|
||||
if (stutter_wait("rcu_torture_boost")) {
|
||||
sched_set_fifo_low(current);
|
||||
// If the grace period already ended,
|
||||
// we don't know when that happened, so
|
||||
// start over.
|
||||
if (cur_ops->poll_gp_state(gp_state))
|
||||
gp_initiated = false;
|
||||
}
|
||||
if (torture_must_stop())
|
||||
goto checkwait;
|
||||
}
|
||||
|
||||
/*
|
||||
* If boost never happened, then inflight will always be 1, in
|
||||
* this case the boost check would never happen in the above
|
||||
* loop so do another one here.
|
||||
*/
|
||||
if (!firsttime && !failed && smp_load_acquire(&rbi.inflight))
|
||||
rcu_torture_boost_failed(call_rcu_time, jiffies);
|
||||
// In case the grace period extended beyond the end of the loop.
|
||||
if (gp_initiated && !failed && !cur_ops->poll_gp_state(gp_state))
|
||||
rcu_torture_boost_failed(gp_state, &gp_state_time);
|
||||
|
||||
/*
|
||||
* Set the start time of the next test interval.
|
||||
|
@ -1014,11 +1028,12 @@ static int rcu_torture_boost(void *arg)
|
|||
* interval. Besides, we are running at RT priority,
|
||||
* so delays should be relatively rare.
|
||||
*/
|
||||
while (oldstarttime == boost_starttime &&
|
||||
!kthread_should_stop()) {
|
||||
while (oldstarttime == boost_starttime && !kthread_should_stop()) {
|
||||
if (mutex_trylock(&boost_mutex)) {
|
||||
boost_starttime = jiffies +
|
||||
test_boost_interval * HZ;
|
||||
if (oldstarttime == boost_starttime) {
|
||||
boost_starttime = jiffies + test_boost_interval * HZ;
|
||||
n_rcu_torture_boosts++;
|
||||
}
|
||||
mutex_unlock(&boost_mutex);
|
||||
break;
|
||||
}
|
||||
|
@ -1030,15 +1045,11 @@ checkwait: if (stutter_wait("rcu_torture_boost"))
|
|||
sched_set_fifo_low(current);
|
||||
} while (!torture_must_stop());
|
||||
|
||||
while (smp_load_acquire(&rbi.inflight))
|
||||
schedule_timeout_uninterruptible(1); // rcu_barrier() deadlocks.
|
||||
|
||||
/* Clean up and exit. */
|
||||
while (!kthread_should_stop() || smp_load_acquire(&rbi.inflight)) {
|
||||
while (!kthread_should_stop()) {
|
||||
torture_shutdown_absorb("rcu_torture_boost");
|
||||
schedule_timeout_uninterruptible(1);
|
||||
}
|
||||
destroy_rcu_head_on_stack(&rbi.rcu);
|
||||
torture_kthread_stopping("rcu_torture_boost");
|
||||
return 0;
|
||||
}
|
||||
|
@ -1553,11 +1564,7 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid)
|
|||
started = cur_ops->get_gp_seq();
|
||||
ts = rcu_trace_clock_local();
|
||||
p = rcu_dereference_check(rcu_torture_current,
|
||||
rcu_read_lock_bh_held() ||
|
||||
rcu_read_lock_sched_held() ||
|
||||
srcu_read_lock_held(srcu_ctlp) ||
|
||||
rcu_read_lock_trace_held() ||
|
||||
torturing_tasks());
|
||||
!cur_ops->readlock_held || cur_ops->readlock_held());
|
||||
if (p == NULL) {
|
||||
/* Wait for rcu_torture_writer to get underway */
|
||||
rcutorture_one_extend(&readstate, 0, trsp, rtrsp);
|
||||
|
@ -1861,7 +1868,11 @@ rcu_torture_stats(void *arg)
|
|||
torture_shutdown_absorb("rcu_torture_stats");
|
||||
} while (!torture_must_stop());
|
||||
torture_kthread_stopping("rcu_torture_stats");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Test mem_dump_obj() and friends. */
|
||||
static void rcu_torture_mem_dump_obj(void)
|
||||
{
|
||||
struct rcu_head *rhp;
|
||||
struct kmem_cache *kcp;
|
||||
|
@ -1900,9 +1911,6 @@ rcu_torture_stats(void *arg)
|
|||
vfree(rhp);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
|
||||
{
|
||||
|
@ -2634,7 +2642,7 @@ static bool rcu_torture_can_boost(void)
|
|||
|
||||
if (!(test_boost == 1 && cur_ops->can_boost) && test_boost != 2)
|
||||
return false;
|
||||
if (!cur_ops->call)
|
||||
if (!cur_ops->start_gp_poll || !cur_ops->poll_gp_state)
|
||||
return false;
|
||||
|
||||
prio = rcu_get_gp_kthreads_prio();
|
||||
|
@ -2818,6 +2826,8 @@ rcu_torture_cleanup(void)
|
|||
if (cur_ops->cleanup != NULL)
|
||||
cur_ops->cleanup();
|
||||
|
||||
rcu_torture_mem_dump_obj();
|
||||
|
||||
rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
|
||||
|
||||
if (err_segs_recorded) {
|
||||
|
@ -3120,6 +3130,21 @@ rcu_torture_init(void)
|
|||
if (firsterr < 0)
|
||||
goto unwind;
|
||||
rcutor_hp = firsterr;
|
||||
|
||||
// Testing RCU priority boosting requires rcutorture do
|
||||
// some serious abuse. Counter this by running ksoftirqd
|
||||
// at higher priority.
|
||||
if (IS_BUILTIN(CONFIG_RCU_TORTURE_TEST)) {
|
||||
for_each_online_cpu(cpu) {
|
||||
struct sched_param sp;
|
||||
struct task_struct *t;
|
||||
|
||||
t = per_cpu(ksoftirqd, cpu);
|
||||
WARN_ON_ONCE(!t);
|
||||
sp.sched_priority = 2;
|
||||
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
|
||||
}
|
||||
}
|
||||
}
|
||||
shutdown_jiffies = jiffies + shutdown_secs * HZ;
|
||||
firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup);
|
||||
|
|
|
@ -362,6 +362,111 @@ static struct ref_scale_ops rwsem_ops = {
|
|||
.name = "rwsem"
|
||||
};
|
||||
|
||||
// Definitions for global spinlock
|
||||
static DEFINE_SPINLOCK(test_lock);
|
||||
|
||||
static void ref_lock_section(const int nloops)
|
||||
{
|
||||
int i;
|
||||
|
||||
preempt_disable();
|
||||
for (i = nloops; i >= 0; i--) {
|
||||
spin_lock(&test_lock);
|
||||
spin_unlock(&test_lock);
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static void ref_lock_delay_section(const int nloops, const int udl, const int ndl)
|
||||
{
|
||||
int i;
|
||||
|
||||
preempt_disable();
|
||||
for (i = nloops; i >= 0; i--) {
|
||||
spin_lock(&test_lock);
|
||||
un_delay(udl, ndl);
|
||||
spin_unlock(&test_lock);
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static struct ref_scale_ops lock_ops = {
|
||||
.readsection = ref_lock_section,
|
||||
.delaysection = ref_lock_delay_section,
|
||||
.name = "lock"
|
||||
};
|
||||
|
||||
// Definitions for global irq-save spinlock
|
||||
|
||||
static void ref_lock_irq_section(const int nloops)
|
||||
{
|
||||
unsigned long flags;
|
||||
int i;
|
||||
|
||||
preempt_disable();
|
||||
for (i = nloops; i >= 0; i--) {
|
||||
spin_lock_irqsave(&test_lock, flags);
|
||||
spin_unlock_irqrestore(&test_lock, flags);
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static void ref_lock_irq_delay_section(const int nloops, const int udl, const int ndl)
|
||||
{
|
||||
unsigned long flags;
|
||||
int i;
|
||||
|
||||
preempt_disable();
|
||||
for (i = nloops; i >= 0; i--) {
|
||||
spin_lock_irqsave(&test_lock, flags);
|
||||
un_delay(udl, ndl);
|
||||
spin_unlock_irqrestore(&test_lock, flags);
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static struct ref_scale_ops lock_irq_ops = {
|
||||
.readsection = ref_lock_irq_section,
|
||||
.delaysection = ref_lock_irq_delay_section,
|
||||
.name = "lock-irq"
|
||||
};
|
||||
|
||||
// Definitions acquire-release.
|
||||
static DEFINE_PER_CPU(unsigned long, test_acqrel);
|
||||
|
||||
static void ref_acqrel_section(const int nloops)
|
||||
{
|
||||
unsigned long x;
|
||||
int i;
|
||||
|
||||
preempt_disable();
|
||||
for (i = nloops; i >= 0; i--) {
|
||||
x = smp_load_acquire(this_cpu_ptr(&test_acqrel));
|
||||
smp_store_release(this_cpu_ptr(&test_acqrel), x + 1);
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static void ref_acqrel_delay_section(const int nloops, const int udl, const int ndl)
|
||||
{
|
||||
unsigned long x;
|
||||
int i;
|
||||
|
||||
preempt_disable();
|
||||
for (i = nloops; i >= 0; i--) {
|
||||
x = smp_load_acquire(this_cpu_ptr(&test_acqrel));
|
||||
un_delay(udl, ndl);
|
||||
smp_store_release(this_cpu_ptr(&test_acqrel), x + 1);
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static struct ref_scale_ops acqrel_ops = {
|
||||
.readsection = ref_acqrel_section,
|
||||
.delaysection = ref_acqrel_delay_section,
|
||||
.name = "acqrel"
|
||||
};
|
||||
|
||||
static void rcu_scale_one_reader(void)
|
||||
{
|
||||
if (readdelay <= 0)
|
||||
|
@ -653,8 +758,8 @@ ref_scale_init(void)
|
|||
long i;
|
||||
int firsterr = 0;
|
||||
static struct ref_scale_ops *scale_ops[] = {
|
||||
&rcu_ops, &srcu_ops, &rcu_trace_ops, &rcu_tasks_ops,
|
||||
&refcnt_ops, &rwlock_ops, &rwsem_ops,
|
||||
&rcu_ops, &srcu_ops, &rcu_trace_ops, &rcu_tasks_ops, &refcnt_ops, &rwlock_ops,
|
||||
&rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops,
|
||||
};
|
||||
|
||||
if (!torture_init_begin(scale_type, verbose))
|
||||
|
|
|
@ -80,7 +80,7 @@ do { \
|
|||
* srcu_read_unlock() running against them. So if the is_static parameter
|
||||
* is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[].
|
||||
*/
|
||||
static void init_srcu_struct_nodes(struct srcu_struct *ssp, bool is_static)
|
||||
static void init_srcu_struct_nodes(struct srcu_struct *ssp)
|
||||
{
|
||||
int cpu;
|
||||
int i;
|
||||
|
@ -90,6 +90,9 @@ static void init_srcu_struct_nodes(struct srcu_struct *ssp, bool is_static)
|
|||
struct srcu_node *snp;
|
||||
struct srcu_node *snp_first;
|
||||
|
||||
/* Initialize geometry if it has not already been initialized. */
|
||||
rcu_init_geometry();
|
||||
|
||||
/* Work out the overall tree geometry. */
|
||||
ssp->level[0] = &ssp->node[0];
|
||||
for (i = 1; i < rcu_num_lvls; i++)
|
||||
|
@ -148,14 +151,6 @@ static void init_srcu_struct_nodes(struct srcu_struct *ssp, bool is_static)
|
|||
timer_setup(&sdp->delay_work, srcu_delay_timer, 0);
|
||||
sdp->ssp = ssp;
|
||||
sdp->grpmask = 1 << (cpu - sdp->mynode->grplo);
|
||||
if (is_static)
|
||||
continue;
|
||||
|
||||
/* Dynamically allocated, better be no srcu_read_locks()! */
|
||||
for (i = 0; i < ARRAY_SIZE(sdp->srcu_lock_count); i++) {
|
||||
sdp->srcu_lock_count[i] = 0;
|
||||
sdp->srcu_unlock_count[i] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -179,7 +174,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
|
|||
ssp->sda = alloc_percpu(struct srcu_data);
|
||||
if (!ssp->sda)
|
||||
return -ENOMEM;
|
||||
init_srcu_struct_nodes(ssp, is_static);
|
||||
init_srcu_struct_nodes(ssp);
|
||||
ssp->srcu_gp_seq_needed_exp = 0;
|
||||
ssp->srcu_last_gp_end = ktime_get_mono_fast_ns();
|
||||
smp_store_release(&ssp->srcu_gp_seq_needed, 0); /* Init done. */
|
||||
|
@ -777,9 +772,9 @@ static bool srcu_might_be_idle(struct srcu_struct *ssp)
|
|||
spin_unlock_irqrestore_rcu_node(sdp, flags);
|
||||
|
||||
/*
|
||||
* No local callbacks, so probabalistically probe global state.
|
||||
* No local callbacks, so probabilistically probe global state.
|
||||
* Exact information would require acquiring locks, which would
|
||||
* kill scalability, hence the probabalistic nature of the probe.
|
||||
* kill scalability, hence the probabilistic nature of the probe.
|
||||
*/
|
||||
|
||||
/* First, see if enough time has passed since the last GP. */
|
||||
|
@ -1000,6 +995,9 @@ EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
|
|||
* synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are
|
||||
* passed the same srcu_struct structure.
|
||||
*
|
||||
* Implementation of these memory-ordering guarantees is similar to
|
||||
* that of synchronize_rcu().
|
||||
*
|
||||
* If SRCU is likely idle, expedite the first request. This semantic
|
||||
* was provided by Classic SRCU, and is relied upon by its users, so TREE
|
||||
* SRCU must also provide it. Note that detecting idleness is heuristic
|
||||
|
@ -1392,11 +1390,15 @@ void __init srcu_init(void)
|
|||
{
|
||||
struct srcu_struct *ssp;
|
||||
|
||||
/*
|
||||
* Once that is set, call_srcu() can follow the normal path and
|
||||
* queue delayed work. This must follow RCU workqueues creation
|
||||
* and timers initialization.
|
||||
*/
|
||||
srcu_init_done = true;
|
||||
while (!list_empty(&srcu_boot_list)) {
|
||||
ssp = list_first_entry(&srcu_boot_list, struct srcu_struct,
|
||||
work.work.entry);
|
||||
check_init_srcu_struct(ssp);
|
||||
list_del_init(&ssp->work.work.entry);
|
||||
queue_work(rcu_gp_wq, &ssp->work.work);
|
||||
}
|
||||
|
|
|
@ -94,9 +94,9 @@ static void rcu_sync_func(struct rcu_head *rhp)
|
|||
rcu_sync_call(rsp);
|
||||
} else {
|
||||
/*
|
||||
* We're at least a GP after the last rcu_sync_exit(); eveybody
|
||||
* We're at least a GP after the last rcu_sync_exit(); everybody
|
||||
* will now have observed the write side critical section.
|
||||
* Let 'em rip!.
|
||||
* Let 'em rip!
|
||||
*/
|
||||
WRITE_ONCE(rsp->gp_state, GP_IDLE);
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ typedef void (*postgp_func_t)(struct rcu_tasks *rtp);
|
|||
* struct rcu_tasks - Definition for a Tasks-RCU-like mechanism.
|
||||
* @cbs_head: Head of callback list.
|
||||
* @cbs_tail: Tail pointer for callback list.
|
||||
* @cbs_wq: Wait queue allowning new callback to get kthread's attention.
|
||||
* @cbs_wq: Wait queue allowing new callback to get kthread's attention.
|
||||
* @cbs_lock: Lock protecting callback list.
|
||||
* @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
|
||||
* @gp_func: This flavor's grace-period-wait function.
|
||||
|
@ -377,6 +377,46 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
|
|||
// Finally, this implementation does not support high call_rcu_tasks()
|
||||
// rates from multiple CPUs. If this is required, per-CPU callback lists
|
||||
// will be needed.
|
||||
//
|
||||
// The implementation uses rcu_tasks_wait_gp(), which relies on function
|
||||
// pointers in the rcu_tasks structure. The rcu_spawn_tasks_kthread()
|
||||
// function sets these function pointers up so that rcu_tasks_wait_gp()
|
||||
// invokes these functions in this order:
|
||||
//
|
||||
// rcu_tasks_pregp_step():
|
||||
// Invokes synchronize_rcu() in order to wait for all in-flight
|
||||
// t->on_rq and t->nvcsw transitions to complete. This works because
|
||||
// all such transitions are carried out with interrupts disabled.
|
||||
// rcu_tasks_pertask(), invoked on every non-idle task:
|
||||
// For every runnable non-idle task other than the current one, use
|
||||
// get_task_struct() to pin down that task, snapshot that task's
|
||||
// number of voluntary context switches, and add that task to the
|
||||
// holdout list.
|
||||
// rcu_tasks_postscan():
|
||||
// Invoke synchronize_srcu() to ensure that all tasks that were
|
||||
// in the process of exiting (and which thus might not know to
|
||||
// synchronize with this RCU Tasks grace period) have completed
|
||||
// exiting.
|
||||
// check_all_holdout_tasks(), repeatedly until holdout list is empty:
|
||||
// Scans the holdout list, attempting to identify a quiescent state
|
||||
// for each task on the list. If there is a quiescent state, the
|
||||
// corresponding task is removed from the holdout list.
|
||||
// rcu_tasks_postgp():
|
||||
// Invokes synchronize_rcu() in order to ensure that all prior
|
||||
// t->on_rq and t->nvcsw transitions are seen by all CPUs and tasks
|
||||
// to have happened before the end of this RCU Tasks grace period.
|
||||
// Again, this works because all such transitions are carried out
|
||||
// with interrupts disabled.
|
||||
//
|
||||
// For each exiting task, the exit_tasks_rcu_start() and
|
||||
// exit_tasks_rcu_finish() functions begin and end, respectively, the SRCU
|
||||
// read-side critical sections waited for by rcu_tasks_postscan().
|
||||
//
|
||||
// Pre-grace-period update-side code is ordered before the grace via the
|
||||
// ->cbs_lock and the smp_mb__after_spinlock(). Pre-grace-period read-side
|
||||
// code is ordered before the grace period via synchronize_rcu() call
|
||||
// in rcu_tasks_pregp_step() and by the scheduler's locks and interrupt
|
||||
// disabling.
|
||||
|
||||
/* Pre-grace-period preparation. */
|
||||
static void rcu_tasks_pregp_step(void)
|
||||
|
@ -504,7 +544,7 @@ DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks, "RCU Tasks");
|
|||
* or transition to usermode execution. As such, there are no read-side
|
||||
* primitives analogous to rcu_read_lock() and rcu_read_unlock() because
|
||||
* this primitive is intended to determine that all tasks have passed
|
||||
* through a safe state, not so much for data-strcuture synchronization.
|
||||
* through a safe state, not so much for data-structure synchronization.
|
||||
*
|
||||
* See the description of call_rcu() for more detailed information on
|
||||
* memory ordering guarantees.
|
||||
|
@ -605,8 +645,13 @@ void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); }
|
|||
// passing an empty function to schedule_on_each_cpu(). This approach
|
||||
// provides an asynchronous call_rcu_tasks_rude() API and batching
|
||||
// of concurrent calls to the synchronous synchronize_rcu_rude() API.
|
||||
// This sends IPIs far and wide and induces otherwise unnecessary context
|
||||
// switches on all online CPUs, whether idle or not.
|
||||
// This invokes schedule_on_each_cpu() in order to send IPIs far and wide
|
||||
// and induces otherwise unnecessary context switches on all online CPUs,
|
||||
// whether idle or not.
|
||||
//
|
||||
// Callback handling is provided by the rcu_tasks_kthread() function.
|
||||
//
|
||||
// Ordering is provided by the scheduler's context-switch code.
|
||||
|
||||
// Empty function to allow workqueues to force a context switch.
|
||||
static void rcu_tasks_be_rude(struct work_struct *work)
|
||||
|
@ -637,7 +682,7 @@ DEFINE_RCU_TASKS(rcu_tasks_rude, rcu_tasks_rude_wait_gp, call_rcu_tasks_rude,
|
|||
* there are no read-side primitives analogous to rcu_read_lock() and
|
||||
* rcu_read_unlock() because this primitive is intended to determine
|
||||
* that all tasks have passed through a safe state, not so much for
|
||||
* data-strcuture synchronization.
|
||||
* data-structure synchronization.
|
||||
*
|
||||
* See the description of call_rcu() for more detailed information on
|
||||
* memory ordering guarantees.
|
||||
|
@ -1163,7 +1208,7 @@ static void exit_tasks_rcu_finish_trace(struct task_struct *t)
|
|||
* there are no read-side primitives analogous to rcu_read_lock() and
|
||||
* rcu_read_unlock() because this primitive is intended to determine
|
||||
* that all tasks have passed through a safe state, not so much for
|
||||
* data-strcuture synchronization.
|
||||
* data-structure synchronization.
|
||||
*
|
||||
* See the description of call_rcu() for more detailed information on
|
||||
* memory ordering guarantees.
|
||||
|
@ -1356,5 +1401,4 @@ void __init rcu_init_tasks_generic(void)
|
|||
|
||||
#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
|
||||
static inline void rcu_tasks_bootup_oddness(void) {}
|
||||
void show_rcu_tasks_gp_kthreads(void) {}
|
||||
#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
|
||||
|
|
|
@ -221,5 +221,4 @@ void __init rcu_init(void)
|
|||
{
|
||||
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
|
||||
rcu_early_boot_tests();
|
||||
srcu_init();
|
||||
}
|
||||
|
|
|
@ -186,6 +186,17 @@ module_param(rcu_unlock_delay, int, 0444);
|
|||
static int rcu_min_cached_objs = 5;
|
||||
module_param(rcu_min_cached_objs, int, 0444);
|
||||
|
||||
// A page shrinker can ask for pages to be freed to make them
|
||||
// available for other parts of the system. This usually happens
|
||||
// under low memory conditions, and in that case we should also
|
||||
// defer page-cache filling for a short time period.
|
||||
//
|
||||
// The default value is 5 seconds, which is long enough to reduce
|
||||
// interference with the shrinker while it asks other systems to
|
||||
// drain their caches.
|
||||
static int rcu_delay_page_cache_fill_msec = 5000;
|
||||
module_param(rcu_delay_page_cache_fill_msec, int, 0444);
|
||||
|
||||
/* Retrieve RCU kthreads priority for rcutorture */
|
||||
int rcu_get_gp_kthreads_prio(void)
|
||||
{
|
||||
|
@ -202,7 +213,7 @@ EXPORT_SYMBOL_GPL(rcu_get_gp_kthreads_prio);
|
|||
* the need for long delays to increase some race probabilities with the
|
||||
* need for fast grace periods to increase other race probabilities.
|
||||
*/
|
||||
#define PER_RCU_NODE_PERIOD 3 /* Number of grace periods between delays. */
|
||||
#define PER_RCU_NODE_PERIOD 3 /* Number of grace periods between delays for debugging. */
|
||||
|
||||
/*
|
||||
* Compute the mask of online CPUs for the specified rcu_node structure.
|
||||
|
@ -242,6 +253,7 @@ void rcu_softirq_qs(void)
|
|||
{
|
||||
rcu_qs();
|
||||
rcu_preempt_deferred_qs(current);
|
||||
rcu_tasks_qs(current, false);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -833,28 +845,6 @@ void noinstr rcu_irq_exit(void)
|
|||
rcu_nmi_exit();
|
||||
}
|
||||
|
||||
/**
|
||||
* rcu_irq_exit_preempt - Inform RCU that current CPU is exiting irq
|
||||
* towards in kernel preemption
|
||||
*
|
||||
* Same as rcu_irq_exit() but has a sanity check that scheduling is safe
|
||||
* from RCU point of view. Invoked from return from interrupt before kernel
|
||||
* preemption.
|
||||
*/
|
||||
void rcu_irq_exit_preempt(void)
|
||||
{
|
||||
lockdep_assert_irqs_disabled();
|
||||
rcu_nmi_exit();
|
||||
|
||||
RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) <= 0,
|
||||
"RCU dynticks_nesting counter underflow/zero!");
|
||||
RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nmi_nesting) !=
|
||||
DYNTICK_IRQ_NONIDLE,
|
||||
"Bad RCU dynticks_nmi_nesting counter\n");
|
||||
RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
|
||||
"RCU in extended quiescent state!");
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROVE_RCU
|
||||
/**
|
||||
* rcu_irq_exit_check_preempt - Validate that scheduling is possible
|
||||
|
@ -959,7 +949,7 @@ EXPORT_SYMBOL_GPL(rcu_idle_exit);
|
|||
*/
|
||||
void noinstr rcu_user_exit(void)
|
||||
{
|
||||
rcu_eqs_exit(1);
|
||||
rcu_eqs_exit(true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1225,7 +1215,7 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
|
|||
#endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */
|
||||
|
||||
/*
|
||||
* We are reporting a quiescent state on behalf of some other CPU, so
|
||||
* When trying to report a quiescent state on behalf of some other CPU,
|
||||
* it is our responsibility to check for and handle potential overflow
|
||||
* of the rcu_node ->gp_seq counter with respect to the rcu_data counters.
|
||||
* After all, the CPU might be in deep idle state, and thus executing no
|
||||
|
@ -2048,7 +2038,7 @@ static void rcu_gp_fqs_loop(void)
|
|||
/*
|
||||
* Clean up after the old grace period.
|
||||
*/
|
||||
static void rcu_gp_cleanup(void)
|
||||
static noinline void rcu_gp_cleanup(void)
|
||||
{
|
||||
int cpu;
|
||||
bool needgp = false;
|
||||
|
@ -2489,7 +2479,7 @@ int rcutree_dead_cpu(unsigned int cpu)
|
|||
|
||||
/*
|
||||
* Invoke any RCU callbacks that have made it to the end of their grace
|
||||
* period. Thottle as specified by rdp->blimit.
|
||||
* period. Throttle as specified by rdp->blimit.
|
||||
*/
|
||||
static void rcu_do_batch(struct rcu_data *rdp)
|
||||
{
|
||||
|
@ -2629,7 +2619,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
|
|||
* state, for example, user mode or idle loop. It also schedules RCU
|
||||
* core processing. If the current grace period has gone on too long,
|
||||
* it will ask the scheduler to manufacture a context switch for the sole
|
||||
* purpose of providing a providing the needed quiescent state.
|
||||
* purpose of providing the needed quiescent state.
|
||||
*/
|
||||
void rcu_sched_clock_irq(int user)
|
||||
{
|
||||
|
@ -2911,7 +2901,6 @@ static int __init rcu_spawn_core_kthreads(void)
|
|||
"%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__);
|
||||
return 0;
|
||||
}
|
||||
early_initcall(rcu_spawn_core_kthreads);
|
||||
|
||||
/*
|
||||
* Handle any core-RCU processing required by a call_rcu() invocation.
|
||||
|
@ -3082,12 +3071,14 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
|
|||
* period elapses, in other words after all pre-existing RCU read-side
|
||||
* critical sections have completed. However, the callback function
|
||||
* might well execute concurrently with RCU read-side critical sections
|
||||
* that started after call_rcu() was invoked. RCU read-side critical
|
||||
* sections are delimited by rcu_read_lock() and rcu_read_unlock(), and
|
||||
* may be nested. In addition, regions of code across which interrupts,
|
||||
* preemption, or softirqs have been disabled also serve as RCU read-side
|
||||
* critical sections. This includes hardware interrupt handlers, softirq
|
||||
* handlers, and NMI handlers.
|
||||
* that started after call_rcu() was invoked.
|
||||
*
|
||||
* RCU read-side critical sections are delimited by rcu_read_lock()
|
||||
* and rcu_read_unlock(), and may be nested. In addition, but only in
|
||||
* v5.0 and later, regions of code across which interrupts, preemption,
|
||||
* or softirqs have been disabled also serve as RCU read-side critical
|
||||
* sections. This includes hardware interrupt handlers, softirq handlers,
|
||||
* and NMI handlers.
|
||||
*
|
||||
* Note that all CPUs must agree that the grace period extended beyond
|
||||
* all pre-existing RCU read-side critical section. On systems with more
|
||||
|
@ -3107,6 +3098,9 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
|
|||
* between the call to call_rcu() and the invocation of "func()" -- even
|
||||
* if CPU A and CPU B are the same CPU (but again only if the system has
|
||||
* more than one CPU).
|
||||
*
|
||||
* Implementation of these memory-ordering guarantees is described here:
|
||||
* Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
|
||||
*/
|
||||
void call_rcu(struct rcu_head *head, rcu_callback_t func)
|
||||
{
|
||||
|
@ -3171,6 +3165,7 @@ struct kfree_rcu_cpu_work {
|
|||
* Even though it is lockless an access has to be protected by the
|
||||
* per-cpu lock.
|
||||
* @page_cache_work: A work to refill the cache when it is empty
|
||||
* @backoff_page_cache_fill: Delay cache refills
|
||||
* @work_in_progress: Indicates that page_cache_work is running
|
||||
* @hrtimer: A hrtimer for scheduling a page_cache_work
|
||||
* @nr_bkv_objs: number of allocated objects at @bkvcache.
|
||||
|
@ -3190,7 +3185,8 @@ struct kfree_rcu_cpu {
|
|||
bool initialized;
|
||||
int count;
|
||||
|
||||
struct work_struct page_cache_work;
|
||||
struct delayed_work page_cache_work;
|
||||
atomic_t backoff_page_cache_fill;
|
||||
atomic_t work_in_progress;
|
||||
struct hrtimer hrtimer;
|
||||
|
||||
|
@ -3237,7 +3233,7 @@ get_cached_bnode(struct kfree_rcu_cpu *krcp)
|
|||
if (!krcp->nr_bkv_objs)
|
||||
return NULL;
|
||||
|
||||
krcp->nr_bkv_objs--;
|
||||
WRITE_ONCE(krcp->nr_bkv_objs, krcp->nr_bkv_objs - 1);
|
||||
return (struct kvfree_rcu_bulk_data *)
|
||||
llist_del_first(&krcp->bkvcache);
|
||||
}
|
||||
|
@ -3251,14 +3247,33 @@ put_cached_bnode(struct kfree_rcu_cpu *krcp,
|
|||
return false;
|
||||
|
||||
llist_add((struct llist_node *) bnode, &krcp->bkvcache);
|
||||
krcp->nr_bkv_objs++;
|
||||
WRITE_ONCE(krcp->nr_bkv_objs, krcp->nr_bkv_objs + 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
static int
|
||||
drain_page_cache(struct kfree_rcu_cpu *krcp)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct llist_node *page_list, *pos, *n;
|
||||
int freed = 0;
|
||||
|
||||
raw_spin_lock_irqsave(&krcp->lock, flags);
|
||||
page_list = llist_del_all(&krcp->bkvcache);
|
||||
WRITE_ONCE(krcp->nr_bkv_objs, 0);
|
||||
raw_spin_unlock_irqrestore(&krcp->lock, flags);
|
||||
|
||||
llist_for_each_safe(pos, n, page_list) {
|
||||
free_page((unsigned long)pos);
|
||||
freed++;
|
||||
}
|
||||
|
||||
return freed;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is invoked in workqueue context after a grace period.
|
||||
* It frees all the objects queued on ->bhead_free or ->head_free.
|
||||
* It frees all the objects queued on ->bkvhead_free or ->head_free.
|
||||
*/
|
||||
static void kfree_rcu_work(struct work_struct *work)
|
||||
{
|
||||
|
@ -3285,7 +3300,7 @@ static void kfree_rcu_work(struct work_struct *work)
|
|||
krwp->head_free = NULL;
|
||||
raw_spin_unlock_irqrestore(&krcp->lock, flags);
|
||||
|
||||
// Handle two first channels.
|
||||
// Handle the first two channels.
|
||||
for (i = 0; i < FREE_N_CHANNELS; i++) {
|
||||
for (; bkvhead[i]; bkvhead[i] = bnext) {
|
||||
bnext = bkvhead[i]->next;
|
||||
|
@ -3323,9 +3338,11 @@ static void kfree_rcu_work(struct work_struct *work)
|
|||
}
|
||||
|
||||
/*
|
||||
* Emergency case only. It can happen under low memory
|
||||
* condition when an allocation gets failed, so the "bulk"
|
||||
* path can not be temporary maintained.
|
||||
* This is used when the "bulk" path can not be used for the
|
||||
* double-argument of kvfree_rcu(). This happens when the
|
||||
* page-cache is empty, which means that objects are instead
|
||||
* queued on a linked list through their rcu_head structures.
|
||||
* This list is named "Channel 3".
|
||||
*/
|
||||
for (; head; head = next) {
|
||||
unsigned long offset = (unsigned long)head->func;
|
||||
|
@ -3345,34 +3362,31 @@ static void kfree_rcu_work(struct work_struct *work)
|
|||
}
|
||||
|
||||
/*
|
||||
* Schedule the kfree batch RCU work to run in workqueue context after a GP.
|
||||
*
|
||||
* This function is invoked by kfree_rcu_monitor() when the KFREE_DRAIN_JIFFIES
|
||||
* timeout has been reached.
|
||||
* This function is invoked after the KFREE_DRAIN_JIFFIES timeout.
|
||||
*/
|
||||
static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
|
||||
static void kfree_rcu_monitor(struct work_struct *work)
|
||||
{
|
||||
struct kfree_rcu_cpu_work *krwp;
|
||||
bool repeat = false;
|
||||
struct kfree_rcu_cpu *krcp = container_of(work,
|
||||
struct kfree_rcu_cpu, monitor_work.work);
|
||||
unsigned long flags;
|
||||
int i, j;
|
||||
|
||||
lockdep_assert_held(&krcp->lock);
|
||||
raw_spin_lock_irqsave(&krcp->lock, flags);
|
||||
|
||||
// Attempt to start a new batch.
|
||||
for (i = 0; i < KFREE_N_BATCHES; i++) {
|
||||
krwp = &(krcp->krw_arr[i]);
|
||||
struct kfree_rcu_cpu_work *krwp = &(krcp->krw_arr[i]);
|
||||
|
||||
/*
|
||||
* Try to detach bkvhead or head and attach it over any
|
||||
* available corresponding free channel. It can be that
|
||||
* a previous RCU batch is in progress, it means that
|
||||
* immediately to queue another one is not possible so
|
||||
* return false to tell caller to retry.
|
||||
*/
|
||||
// Try to detach bkvhead or head and attach it over any
|
||||
// available corresponding free channel. It can be that
|
||||
// a previous RCU batch is in progress, it means that
|
||||
// immediately to queue another one is not possible so
|
||||
// in that case the monitor work is rearmed.
|
||||
if ((krcp->bkvhead[0] && !krwp->bkvhead_free[0]) ||
|
||||
(krcp->bkvhead[1] && !krwp->bkvhead_free[1]) ||
|
||||
(krcp->head && !krwp->head_free)) {
|
||||
// Channel 1 corresponds to SLAB ptrs.
|
||||
// Channel 2 corresponds to vmalloc ptrs.
|
||||
// Channel 1 corresponds to the SLAB-pointer bulk path.
|
||||
// Channel 2 corresponds to vmalloc-pointer bulk path.
|
||||
for (j = 0; j < FREE_N_CHANNELS; j++) {
|
||||
if (!krwp->bkvhead_free[j]) {
|
||||
krwp->bkvhead_free[j] = krcp->bkvhead[j];
|
||||
|
@ -3380,7 +3394,8 @@ static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
|
|||
}
|
||||
}
|
||||
|
||||
// Channel 3 corresponds to emergency path.
|
||||
// Channel 3 corresponds to both SLAB and vmalloc
|
||||
// objects queued on the linked list.
|
||||
if (!krwp->head_free) {
|
||||
krwp->head_free = krcp->head;
|
||||
krcp->head = NULL;
|
||||
|
@ -3388,55 +3403,25 @@ static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
|
|||
|
||||
WRITE_ONCE(krcp->count, 0);
|
||||
|
||||
/*
|
||||
* One work is per one batch, so there are three
|
||||
* "free channels", the batch can handle. It can
|
||||
* be that the work is in the pending state when
|
||||
* channels have been detached following by each
|
||||
* other.
|
||||
*/
|
||||
// One work is per one batch, so there are three
|
||||
// "free channels", the batch can handle. It can
|
||||
// be that the work is in the pending state when
|
||||
// channels have been detached following by each
|
||||
// other.
|
||||
queue_rcu_work(system_wq, &krwp->rcu_work);
|
||||
}
|
||||
|
||||
// Repeat if any "free" corresponding channel is still busy.
|
||||
if (krcp->bkvhead[0] || krcp->bkvhead[1] || krcp->head)
|
||||
repeat = true;
|
||||
}
|
||||
|
||||
return !repeat;
|
||||
}
|
||||
|
||||
static inline void kfree_rcu_drain_unlock(struct kfree_rcu_cpu *krcp,
|
||||
unsigned long flags)
|
||||
{
|
||||
// Attempt to start a new batch.
|
||||
// If there is nothing to detach, it means that our job is
|
||||
// successfully done here. In case of having at least one
|
||||
// of the channels that is still busy we should rearm the
|
||||
// work to repeat an attempt. Because previous batches are
|
||||
// still in progress.
|
||||
if (!krcp->bkvhead[0] && !krcp->bkvhead[1] && !krcp->head)
|
||||
krcp->monitor_todo = false;
|
||||
if (queue_kfree_rcu_work(krcp)) {
|
||||
// Success! Our job is done here.
|
||||
raw_spin_unlock_irqrestore(&krcp->lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
// Previous RCU batch still in progress, try again later.
|
||||
krcp->monitor_todo = true;
|
||||
schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
|
||||
raw_spin_unlock_irqrestore(&krcp->lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is invoked after the KFREE_DRAIN_JIFFIES timeout.
|
||||
* It invokes kfree_rcu_drain_unlock() to attempt to start another batch.
|
||||
*/
|
||||
static void kfree_rcu_monitor(struct work_struct *work)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct kfree_rcu_cpu *krcp = container_of(work, struct kfree_rcu_cpu,
|
||||
monitor_work.work);
|
||||
|
||||
raw_spin_lock_irqsave(&krcp->lock, flags);
|
||||
if (krcp->monitor_todo)
|
||||
kfree_rcu_drain_unlock(krcp, flags);
|
||||
else
|
||||
schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
|
||||
|
||||
raw_spin_unlock_irqrestore(&krcp->lock, flags);
|
||||
}
|
||||
|
||||
|
@ -3446,7 +3431,7 @@ schedule_page_work_fn(struct hrtimer *t)
|
|||
struct kfree_rcu_cpu *krcp =
|
||||
container_of(t, struct kfree_rcu_cpu, hrtimer);
|
||||
|
||||
queue_work(system_highpri_wq, &krcp->page_cache_work);
|
||||
queue_delayed_work(system_highpri_wq, &krcp->page_cache_work, 0);
|
||||
return HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
|
@ -3455,12 +3440,16 @@ static void fill_page_cache_func(struct work_struct *work)
|
|||
struct kvfree_rcu_bulk_data *bnode;
|
||||
struct kfree_rcu_cpu *krcp =
|
||||
container_of(work, struct kfree_rcu_cpu,
|
||||
page_cache_work);
|
||||
page_cache_work.work);
|
||||
unsigned long flags;
|
||||
int nr_pages;
|
||||
bool pushed;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < rcu_min_cached_objs; i++) {
|
||||
nr_pages = atomic_read(&krcp->backoff_page_cache_fill) ?
|
||||
1 : rcu_min_cached_objs;
|
||||
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
bnode = (struct kvfree_rcu_bulk_data *)
|
||||
__get_free_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
|
||||
|
||||
|
@ -3477,6 +3466,7 @@ static void fill_page_cache_func(struct work_struct *work)
|
|||
}
|
||||
|
||||
atomic_set(&krcp->work_in_progress, 0);
|
||||
atomic_set(&krcp->backoff_page_cache_fill, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -3484,12 +3474,17 @@ run_page_cache_worker(struct kfree_rcu_cpu *krcp)
|
|||
{
|
||||
if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
|
||||
!atomic_xchg(&krcp->work_in_progress, 1)) {
|
||||
hrtimer_init(&krcp->hrtimer, CLOCK_MONOTONIC,
|
||||
HRTIMER_MODE_REL);
|
||||
if (atomic_read(&krcp->backoff_page_cache_fill)) {
|
||||
queue_delayed_work(system_wq,
|
||||
&krcp->page_cache_work,
|
||||
msecs_to_jiffies(rcu_delay_page_cache_fill_msec));
|
||||
} else {
|
||||
hrtimer_init(&krcp->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
krcp->hrtimer.function = schedule_page_work_fn;
|
||||
hrtimer_start(&krcp->hrtimer, 0, HRTIMER_MODE_REL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Record ptr in a page managed by krcp, with the pre-krc_this_cpu_lock()
|
||||
// state specified by flags. If can_alloc is true, the caller must
|
||||
|
@ -3552,11 +3547,11 @@ add_ptr_to_bulk_krc_lock(struct kfree_rcu_cpu **krcp,
|
|||
}
|
||||
|
||||
/*
|
||||
* Queue a request for lazy invocation of appropriate free routine after a
|
||||
* grace period. Please note there are three paths are maintained, two are the
|
||||
* main ones that use array of pointers interface and third one is emergency
|
||||
* one, that is used only when the main path can not be maintained temporary,
|
||||
* due to memory pressure.
|
||||
* Queue a request for lazy invocation of the appropriate free routine
|
||||
* after a grace period. Please note that three paths are maintained,
|
||||
* two for the common case using arrays of pointers and a third one that
|
||||
* is used only when the main paths cannot be used, for example, due to
|
||||
* memory pressure.
|
||||
*
|
||||
* Each kvfree_call_rcu() request is added to a batch. The batch will be drained
|
||||
* every KFREE_DRAIN_JIFFIES number of jiffies. All the objects in the batch will
|
||||
|
@ -3645,6 +3640,8 @@ kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
|
|||
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
|
||||
|
||||
count += READ_ONCE(krcp->count);
|
||||
count += READ_ONCE(krcp->nr_bkv_objs);
|
||||
atomic_set(&krcp->backoff_page_cache_fill, 1);
|
||||
}
|
||||
|
||||
return count;
|
||||
|
@ -3654,18 +3651,14 @@ static unsigned long
|
|||
kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
|
||||
{
|
||||
int cpu, freed = 0;
|
||||
unsigned long flags;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
int count;
|
||||
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
|
||||
|
||||
count = krcp->count;
|
||||
raw_spin_lock_irqsave(&krcp->lock, flags);
|
||||
if (krcp->monitor_todo)
|
||||
kfree_rcu_drain_unlock(krcp, flags);
|
||||
else
|
||||
raw_spin_unlock_irqrestore(&krcp->lock, flags);
|
||||
count += drain_page_cache(krcp);
|
||||
kfree_rcu_monitor(&krcp->monitor_work.work);
|
||||
|
||||
sc->nr_to_scan -= count;
|
||||
freed += count;
|
||||
|
@ -3693,7 +3686,8 @@ void __init kfree_rcu_scheduler_running(void)
|
|||
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
|
||||
|
||||
raw_spin_lock_irqsave(&krcp->lock, flags);
|
||||
if (!krcp->head || krcp->monitor_todo) {
|
||||
if ((!krcp->bkvhead[0] && !krcp->bkvhead[1] && !krcp->head) ||
|
||||
krcp->monitor_todo) {
|
||||
raw_spin_unlock_irqrestore(&krcp->lock, flags);
|
||||
continue;
|
||||
}
|
||||
|
@ -3750,10 +3744,12 @@ static int rcu_blocking_is_gp(void)
|
|||
* read-side critical sections have completed. Note, however, that
|
||||
* upon return from synchronize_rcu(), the caller might well be executing
|
||||
* concurrently with new RCU read-side critical sections that began while
|
||||
* synchronize_rcu() was waiting. RCU read-side critical sections are
|
||||
* delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
|
||||
* In addition, regions of code across which interrupts, preemption, or
|
||||
* softirqs have been disabled also serve as RCU read-side critical
|
||||
* synchronize_rcu() was waiting.
|
||||
*
|
||||
* RCU read-side critical sections are delimited by rcu_read_lock()
|
||||
* and rcu_read_unlock(), and may be nested. In addition, but only in
|
||||
* v5.0 and later, regions of code across which interrupts, preemption,
|
||||
* or softirqs have been disabled also serve as RCU read-side critical
|
||||
* sections. This includes hardware interrupt handlers, softirq handlers,
|
||||
* and NMI handlers.
|
||||
*
|
||||
|
@ -3774,6 +3770,9 @@ static int rcu_blocking_is_gp(void)
|
|||
* to have executed a full memory barrier during the execution of
|
||||
* synchronize_rcu() -- even if CPU A and CPU B are the same CPU (but
|
||||
* again only if the system has more than one CPU).
|
||||
*
|
||||
* Implementation of these memory-ordering guarantees is described here:
|
||||
* Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
|
||||
*/
|
||||
void synchronize_rcu(void)
|
||||
{
|
||||
|
@ -3844,11 +3843,11 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu);
|
|||
/**
|
||||
* poll_state_synchronize_rcu - Conditionally wait for an RCU grace period
|
||||
*
|
||||
* @oldstate: return from call to get_state_synchronize_rcu() or start_poll_synchronize_rcu()
|
||||
* @oldstate: value from get_state_synchronize_rcu() or start_poll_synchronize_rcu()
|
||||
*
|
||||
* If a full RCU grace period has elapsed since the earlier call from
|
||||
* which oldstate was obtained, return @true, otherwise return @false.
|
||||
* If @false is returned, it is the caller's responsibilty to invoke this
|
||||
* If @false is returned, it is the caller's responsibility to invoke this
|
||||
* function later on until it does return @true. Alternatively, the caller
|
||||
* can explicitly wait for a grace period, for example, by passing @oldstate
|
||||
* to cond_synchronize_rcu() or by directly invoking synchronize_rcu().
|
||||
|
@ -3860,6 +3859,11 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu);
|
|||
* (many hours even on 32-bit systems) should check them occasionally
|
||||
* and either refresh them or set a flag indicating that the grace period
|
||||
* has completed.
|
||||
*
|
||||
* This function provides the same memory-ordering guarantees that
|
||||
* would be provided by a synchronize_rcu() that was invoked at the call
|
||||
* to the function that provided @oldstate, and that returned at the end
|
||||
* of this function.
|
||||
*/
|
||||
bool poll_state_synchronize_rcu(unsigned long oldstate)
|
||||
{
|
||||
|
@ -3874,7 +3878,7 @@ EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu);
|
|||
/**
|
||||
* cond_synchronize_rcu - Conditionally wait for an RCU grace period
|
||||
*
|
||||
* @oldstate: return value from earlier call to get_state_synchronize_rcu()
|
||||
* @oldstate: value from get_state_synchronize_rcu() or start_poll_synchronize_rcu()
|
||||
*
|
||||
* If a full RCU grace period has elapsed since the earlier call to
|
||||
* get_state_synchronize_rcu() or start_poll_synchronize_rcu(), just return.
|
||||
|
@ -3884,6 +3888,11 @@ EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu);
|
|||
* counter wrap is harmless. If the counter wraps, we have waited for
|
||||
* more than 2 billion grace periods (and way more on a 64-bit system!),
|
||||
* so waiting for one additional grace period should be just fine.
|
||||
*
|
||||
* This function provides the same memory-ordering guarantees that
|
||||
* would be provided by a synchronize_rcu() that was invoked at the call
|
||||
* to the function that provided @oldstate, and that returned at the end
|
||||
* of this function.
|
||||
*/
|
||||
void cond_synchronize_rcu(unsigned long oldstate)
|
||||
{
|
||||
|
@ -3911,7 +3920,7 @@ static int rcu_pending(int user)
|
|||
check_cpu_stall(rdp);
|
||||
|
||||
/* Does this CPU need a deferred NOCB wakeup? */
|
||||
if (rcu_nocb_need_deferred_wakeup(rdp))
|
||||
if (rcu_nocb_need_deferred_wakeup(rdp, RCU_NOCB_WAKE))
|
||||
return 1;
|
||||
|
||||
/* Is this a nohz_full CPU in userspace or idle? (Ignore RCU if so.) */
|
||||
|
@ -4094,7 +4103,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier);
|
|||
/*
|
||||
* Propagate ->qsinitmask bits up the rcu_node tree to account for the
|
||||
* first CPU in a given leaf rcu_node structure coming online. The caller
|
||||
* must hold the corresponding leaf rcu_node ->lock with interrrupts
|
||||
* must hold the corresponding leaf rcu_node ->lock with interrupts
|
||||
* disabled.
|
||||
*/
|
||||
static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
|
||||
|
@ -4189,7 +4198,7 @@ int rcutree_prepare_cpu(unsigned int cpu)
|
|||
rdp->rcu_iw_gp_seq = rdp->gp_seq - 1;
|
||||
trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl"));
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
rcu_prepare_kthreads(cpu);
|
||||
rcu_spawn_one_boost_kthread(rnp);
|
||||
rcu_spawn_cpu_nocb_kthread(cpu);
|
||||
WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus + 1);
|
||||
|
||||
|
@ -4472,6 +4481,7 @@ static int __init rcu_spawn_gp_kthread(void)
|
|||
wake_up_process(t);
|
||||
rcu_spawn_nocb_kthreads();
|
||||
rcu_spawn_boost_kthreads();
|
||||
rcu_spawn_core_kthreads();
|
||||
return 0;
|
||||
}
|
||||
early_initcall(rcu_spawn_gp_kthread);
|
||||
|
@ -4582,11 +4592,25 @@ static void __init rcu_init_one(void)
|
|||
* replace the definitions in tree.h because those are needed to size
|
||||
* the ->node array in the rcu_state structure.
|
||||
*/
|
||||
static void __init rcu_init_geometry(void)
|
||||
void rcu_init_geometry(void)
|
||||
{
|
||||
ulong d;
|
||||
int i;
|
||||
static unsigned long old_nr_cpu_ids;
|
||||
int rcu_capacity[RCU_NUM_LVLS];
|
||||
static bool initialized;
|
||||
|
||||
if (initialized) {
|
||||
/*
|
||||
* Warn if setup_nr_cpu_ids() had not yet been invoked,
|
||||
* unless nr_cpus_ids == NR_CPUS, in which case who cares?
|
||||
*/
|
||||
WARN_ON_ONCE(old_nr_cpu_ids != nr_cpu_ids);
|
||||
return;
|
||||
}
|
||||
|
||||
old_nr_cpu_ids = nr_cpu_ids;
|
||||
initialized = true;
|
||||
|
||||
/*
|
||||
* Initialize any unspecified boot parameters.
|
||||
|
@ -4687,6 +4711,18 @@ static void __init kfree_rcu_batch_init(void)
|
|||
int cpu;
|
||||
int i;
|
||||
|
||||
/* Clamp it to [0:100] seconds interval. */
|
||||
if (rcu_delay_page_cache_fill_msec < 0 ||
|
||||
rcu_delay_page_cache_fill_msec > 100 * MSEC_PER_SEC) {
|
||||
|
||||
rcu_delay_page_cache_fill_msec =
|
||||
clamp(rcu_delay_page_cache_fill_msec, 0,
|
||||
(int) (100 * MSEC_PER_SEC));
|
||||
|
||||
pr_info("Adjusting rcutree.rcu_delay_page_cache_fill_msec to %d ms.\n",
|
||||
rcu_delay_page_cache_fill_msec);
|
||||
}
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
|
||||
|
||||
|
@ -4696,7 +4732,7 @@ static void __init kfree_rcu_batch_init(void)
|
|||
}
|
||||
|
||||
INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
|
||||
INIT_WORK(&krcp->page_cache_work, fill_page_cache_func);
|
||||
INIT_DELAYED_WORK(&krcp->page_cache_work, fill_page_cache_func);
|
||||
krcp->initialized = true;
|
||||
}
|
||||
if (register_shrinker(&kfree_rcu_shrinker))
|
||||
|
@ -4730,12 +4766,11 @@ void __init rcu_init(void)
|
|||
rcutree_online_cpu(cpu);
|
||||
}
|
||||
|
||||
/* Create workqueue for expedited GPs and for Tree SRCU. */
|
||||
/* Create workqueue for Tree SRCU and for expedited GPs. */
|
||||
rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);
|
||||
WARN_ON(!rcu_gp_wq);
|
||||
rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
|
||||
WARN_ON(!rcu_par_gp_wq);
|
||||
srcu_init();
|
||||
|
||||
/* Fill in default value for rcutree.qovld boot parameter. */
|
||||
/* -After- the rcu_node ->lock fields are initialized! */
|
||||
|
|
|
@ -115,6 +115,7 @@ struct rcu_node {
|
|||
/* boosting for this rcu_node structure. */
|
||||
unsigned int boost_kthread_status;
|
||||
/* State of boost_kthread_task for tracing. */
|
||||
unsigned long n_boosts; /* Number of boosts for this rcu_node structure. */
|
||||
#ifdef CONFIG_RCU_NOCB_CPU
|
||||
struct swait_queue_head nocb_gp_wq[2];
|
||||
/* Place for rcu_nocb_kthread() to wait GP. */
|
||||
|
@ -153,7 +154,7 @@ struct rcu_data {
|
|||
unsigned long gp_seq; /* Track rsp->gp_seq counter. */
|
||||
unsigned long gp_seq_needed; /* Track furthest future GP request. */
|
||||
union rcu_noqs cpu_no_qs; /* No QSes yet for this CPU. */
|
||||
bool core_needs_qs; /* Core waits for quiesc state. */
|
||||
bool core_needs_qs; /* Core waits for quiescent state. */
|
||||
bool beenonline; /* CPU online at least once. */
|
||||
bool gpwrap; /* Possible ->gp_seq wrap. */
|
||||
bool exp_deferred_qs; /* This CPU awaiting a deferred QS? */
|
||||
|
@ -218,7 +219,6 @@ struct rcu_data {
|
|||
|
||||
/* The following fields are used by GP kthread, hence own cacheline. */
|
||||
raw_spinlock_t nocb_gp_lock ____cacheline_internodealigned_in_smp;
|
||||
struct timer_list nocb_bypass_timer; /* Force nocb_bypass flush. */
|
||||
u8 nocb_gp_sleep; /* Is the nocb GP thread asleep? */
|
||||
u8 nocb_gp_bypass; /* Found a bypass on last scan? */
|
||||
u8 nocb_gp_gp; /* GP to wait for on last scan? */
|
||||
|
@ -257,10 +257,10 @@ struct rcu_data {
|
|||
};
|
||||
|
||||
/* Values for nocb_defer_wakeup field in struct rcu_data. */
|
||||
#define RCU_NOCB_WAKE_OFF -1
|
||||
#define RCU_NOCB_WAKE_NOT 0
|
||||
#define RCU_NOCB_WAKE 1
|
||||
#define RCU_NOCB_WAKE_FORCE 2
|
||||
#define RCU_NOCB_WAKE_BYPASS 1
|
||||
#define RCU_NOCB_WAKE 2
|
||||
#define RCU_NOCB_WAKE_FORCE 3
|
||||
|
||||
#define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500))
|
||||
/* For jiffies_till_first_fqs and */
|
||||
|
@ -417,8 +417,8 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
|
|||
static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
|
||||
static bool rcu_is_callbacks_kthread(void);
|
||||
static void rcu_cpu_kthread_setup(unsigned int cpu);
|
||||
static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp);
|
||||
static void __init rcu_spawn_boost_kthreads(void);
|
||||
static void rcu_prepare_kthreads(int cpu);
|
||||
static void rcu_cleanup_after_idle(void);
|
||||
static void rcu_prepare_for_idle(void);
|
||||
static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
|
||||
|
@ -434,7 +434,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
|
|||
bool *was_alldone, unsigned long flags);
|
||||
static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
|
||||
unsigned long flags);
|
||||
static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp);
|
||||
static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level);
|
||||
static bool do_nocb_deferred_wakeup(struct rcu_data *rdp);
|
||||
static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
|
||||
static void rcu_spawn_cpu_nocb_kthread(int cpu);
|
||||
|
|
|
@ -33,10 +33,6 @@ static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp)
|
|||
return false;
|
||||
}
|
||||
|
||||
static inline bool rcu_running_nocb_timer(struct rcu_data *rdp)
|
||||
{
|
||||
return (timer_curr_running(&rdp->nocb_timer) && !in_irq());
|
||||
}
|
||||
#else
|
||||
static inline int rcu_lockdep_is_held_nocb(struct rcu_data *rdp)
|
||||
{
|
||||
|
@ -48,11 +44,6 @@ static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp)
|
|||
return false;
|
||||
}
|
||||
|
||||
static inline bool rcu_running_nocb_timer(struct rcu_data *rdp)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
|
||||
|
||||
static bool rcu_rdp_is_offloaded(struct rcu_data *rdp)
|
||||
|
@ -72,8 +63,7 @@ static bool rcu_rdp_is_offloaded(struct rcu_data *rdp)
|
|||
rcu_lockdep_is_held_nocb(rdp) ||
|
||||
(rdp == this_cpu_ptr(&rcu_data) &&
|
||||
!(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible())) ||
|
||||
rcu_current_is_nocb_kthread(rdp) ||
|
||||
rcu_running_nocb_timer(rdp)),
|
||||
rcu_current_is_nocb_kthread(rdp)),
|
||||
"Unsafe read of RCU_NOCB offloaded state"
|
||||
);
|
||||
|
||||
|
@ -1098,6 +1088,7 @@ static int rcu_boost(struct rcu_node *rnp)
|
|||
/* Lock only for side effect: boosts task t's priority. */
|
||||
rt_mutex_lock(&rnp->boost_mtx);
|
||||
rt_mutex_unlock(&rnp->boost_mtx); /* Then keep lockdep happy. */
|
||||
rnp->n_boosts++;
|
||||
|
||||
return READ_ONCE(rnp->exp_tasks) != NULL ||
|
||||
READ_ONCE(rnp->boost_tasks) != NULL;
|
||||
|
@ -1197,22 +1188,16 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
|
|||
*/
|
||||
static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
|
||||
{
|
||||
int rnp_index = rnp - rcu_get_root();
|
||||
unsigned long flags;
|
||||
int rnp_index = rnp - rcu_get_root();
|
||||
struct sched_param sp;
|
||||
struct task_struct *t;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_PREEMPT_RCU))
|
||||
return;
|
||||
|
||||
if (!rcu_scheduler_fully_active || rcu_rnp_online_cpus(rnp) == 0)
|
||||
if (rnp->boost_kthread_task || !rcu_scheduler_fully_active)
|
||||
return;
|
||||
|
||||
rcu_state.boost = 1;
|
||||
|
||||
if (rnp->boost_kthread_task != NULL)
|
||||
return;
|
||||
|
||||
t = kthread_create(rcu_boost_kthread, (void *)rnp,
|
||||
"rcub/%d", rnp_index);
|
||||
if (WARN_ON_ONCE(IS_ERR(t)))
|
||||
|
@ -1264,16 +1249,7 @@ static void __init rcu_spawn_boost_kthreads(void)
|
|||
struct rcu_node *rnp;
|
||||
|
||||
rcu_for_each_leaf_node(rnp)
|
||||
rcu_spawn_one_boost_kthread(rnp);
|
||||
}
|
||||
|
||||
static void rcu_prepare_kthreads(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
|
||||
struct rcu_node *rnp = rdp->mynode;
|
||||
|
||||
/* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
|
||||
if (rcu_scheduler_fully_active)
|
||||
if (rcu_rnp_online_cpus(rnp))
|
||||
rcu_spawn_one_boost_kthread(rnp);
|
||||
}
|
||||
|
||||
|
@ -1294,6 +1270,10 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
|
|||
{
|
||||
}
|
||||
|
||||
static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
|
||||
{
|
||||
}
|
||||
|
||||
static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
|
||||
{
|
||||
}
|
||||
|
@ -1302,10 +1282,6 @@ static void __init rcu_spawn_boost_kthreads(void)
|
|||
{
|
||||
}
|
||||
|
||||
static void rcu_prepare_kthreads(int cpu)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* #else #ifdef CONFIG_RCU_BOOST */
|
||||
|
||||
#if !defined(CONFIG_RCU_FAST_NO_HZ)
|
||||
|
@ -1689,43 +1665,50 @@ bool rcu_is_nocb_cpu(int cpu)
|
|||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Kick the GP kthread for this NOCB group. Caller holds ->nocb_lock
|
||||
* and this function releases it.
|
||||
*/
|
||||
static bool wake_nocb_gp(struct rcu_data *rdp, bool force,
|
||||
unsigned long flags)
|
||||
__releases(rdp->nocb_lock)
|
||||
static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
|
||||
struct rcu_data *rdp,
|
||||
bool force, unsigned long flags)
|
||||
__releases(rdp_gp->nocb_gp_lock)
|
||||
{
|
||||
bool needwake = false;
|
||||
struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
|
||||
|
||||
lockdep_assert_held(&rdp->nocb_lock);
|
||||
if (!READ_ONCE(rdp_gp->nocb_gp_kthread)) {
|
||||
rcu_nocb_unlock_irqrestore(rdp, flags);
|
||||
raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
|
||||
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
|
||||
TPS("AlreadyAwake"));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (READ_ONCE(rdp->nocb_defer_wakeup) > RCU_NOCB_WAKE_NOT) {
|
||||
WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
|
||||
del_timer(&rdp->nocb_timer);
|
||||
if (rdp_gp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
|
||||
WRITE_ONCE(rdp_gp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
|
||||
del_timer(&rdp_gp->nocb_timer);
|
||||
}
|
||||
rcu_nocb_unlock_irqrestore(rdp, flags);
|
||||
raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
|
||||
|
||||
if (force || READ_ONCE(rdp_gp->nocb_gp_sleep)) {
|
||||
WRITE_ONCE(rdp_gp->nocb_gp_sleep, false);
|
||||
needwake = true;
|
||||
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DoWake"));
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
|
||||
if (needwake)
|
||||
if (needwake) {
|
||||
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DoWake"));
|
||||
wake_up_process(rdp_gp->nocb_gp_kthread);
|
||||
}
|
||||
|
||||
return needwake;
|
||||
}
|
||||
|
||||
/*
|
||||
* Kick the GP kthread for this NOCB group.
|
||||
*/
|
||||
static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
|
||||
|
||||
raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
|
||||
return __wake_nocb_gp(rdp_gp, rdp, force, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Arrange to wake the GP kthread for this NOCB group at some future
|
||||
* time when it is safe to do so.
|
||||
|
@ -1733,12 +1716,27 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force,
|
|||
static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,
|
||||
const char *reason)
|
||||
{
|
||||
if (rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_OFF)
|
||||
return;
|
||||
if (rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT)
|
||||
mod_timer(&rdp->nocb_timer, jiffies + 1);
|
||||
if (rdp->nocb_defer_wakeup < waketype)
|
||||
WRITE_ONCE(rdp->nocb_defer_wakeup, waketype);
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
|
||||
|
||||
raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
|
||||
|
||||
/*
|
||||
* Bypass wakeup overrides previous deferments. In case
|
||||
* of callback storm, no need to wake up too early.
|
||||
*/
|
||||
if (waketype == RCU_NOCB_WAKE_BYPASS) {
|
||||
mod_timer(&rdp_gp->nocb_timer, jiffies + 2);
|
||||
WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
|
||||
} else {
|
||||
if (rdp_gp->nocb_defer_wakeup < RCU_NOCB_WAKE)
|
||||
mod_timer(&rdp_gp->nocb_timer, jiffies + 1);
|
||||
if (rdp_gp->nocb_defer_wakeup < waketype)
|
||||
WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
|
||||
}
|
||||
|
||||
raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
|
||||
|
||||
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, reason);
|
||||
}
|
||||
|
||||
|
@ -1937,7 +1935,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
|
|||
}
|
||||
|
||||
/*
|
||||
* Awaken the no-CBs grace-period kthead if needed, either due to it
|
||||
* Awaken the no-CBs grace-period kthread if needed, either due to it
|
||||
* legitimately being asleep or due to overload conditions.
|
||||
*
|
||||
* If warranted, also wake up the kthread servicing this CPUs queues.
|
||||
|
@ -1965,13 +1963,14 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
|
|||
rdp->qlen_last_fqs_check = len;
|
||||
if (!irqs_disabled_flags(flags)) {
|
||||
/* ... if queue was empty ... */
|
||||
wake_nocb_gp(rdp, false, flags);
|
||||
rcu_nocb_unlock_irqrestore(rdp, flags);
|
||||
wake_nocb_gp(rdp, false);
|
||||
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
|
||||
TPS("WakeEmpty"));
|
||||
} else {
|
||||
rcu_nocb_unlock_irqrestore(rdp, flags);
|
||||
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE,
|
||||
TPS("WakeEmptyIsDeferred"));
|
||||
rcu_nocb_unlock_irqrestore(rdp, flags);
|
||||
}
|
||||
} else if (len > rdp->qlen_last_fqs_check + qhimark) {
|
||||
/* ... or if many callbacks queued. */
|
||||
|
@ -1986,10 +1985,14 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
|
|||
smp_mb(); /* Enqueue before timer_pending(). */
|
||||
if ((rdp->nocb_cb_sleep ||
|
||||
!rcu_segcblist_ready_cbs(&rdp->cblist)) &&
|
||||
!timer_pending(&rdp->nocb_bypass_timer))
|
||||
!timer_pending(&rdp->nocb_timer)) {
|
||||
rcu_nocb_unlock_irqrestore(rdp, flags);
|
||||
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE,
|
||||
TPS("WakeOvfIsDeferred"));
|
||||
} else {
|
||||
rcu_nocb_unlock_irqrestore(rdp, flags);
|
||||
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
|
||||
}
|
||||
} else {
|
||||
rcu_nocb_unlock_irqrestore(rdp, flags);
|
||||
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
|
||||
|
@ -1997,18 +2000,6 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
|
|||
return;
|
||||
}
|
||||
|
||||
/* Wake up the no-CBs GP kthread to flush ->nocb_bypass. */
|
||||
static void do_nocb_bypass_wakeup_timer(struct timer_list *t)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp = from_timer(rdp, t, nocb_bypass_timer);
|
||||
|
||||
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Timer"));
|
||||
rcu_nocb_lock_irqsave(rdp, flags);
|
||||
smp_mb__after_spinlock(); /* Timer expire before wakeup. */
|
||||
__call_rcu_nocb_wake(rdp, true, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if we ignore this rdp.
|
||||
*
|
||||
|
@ -2115,11 +2106,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
|
|||
bypass = true;
|
||||
}
|
||||
rnp = rdp->mynode;
|
||||
if (bypass) { // Avoid race with first bypass CB.
|
||||
WRITE_ONCE(my_rdp->nocb_defer_wakeup,
|
||||
RCU_NOCB_WAKE_NOT);
|
||||
del_timer(&my_rdp->nocb_timer);
|
||||
}
|
||||
|
||||
// Advance callbacks if helpful and low contention.
|
||||
needwake_gp = false;
|
||||
if (!rcu_segcblist_restempty(&rdp->cblist,
|
||||
|
@ -2165,12 +2152,12 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
|
|||
my_rdp->nocb_gp_bypass = bypass;
|
||||
my_rdp->nocb_gp_gp = needwait_gp;
|
||||
my_rdp->nocb_gp_seq = needwait_gp ? wait_gp_seq : 0;
|
||||
|
||||
if (bypass && !rcu_nocb_poll) {
|
||||
// At least one child with non-empty ->nocb_bypass, so set
|
||||
// timer in order to avoid stranding its callbacks.
|
||||
raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags);
|
||||
mod_timer(&my_rdp->nocb_bypass_timer, j + 2);
|
||||
raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
|
||||
wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_BYPASS,
|
||||
TPS("WakeBypassIsDeferred"));
|
||||
}
|
||||
if (rcu_nocb_poll) {
|
||||
/* Polling, so trace if first poll in the series. */
|
||||
|
@ -2194,8 +2181,10 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
|
|||
}
|
||||
if (!rcu_nocb_poll) {
|
||||
raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags);
|
||||
if (bypass)
|
||||
del_timer(&my_rdp->nocb_bypass_timer);
|
||||
if (my_rdp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
|
||||
WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
|
||||
del_timer(&my_rdp->nocb_timer);
|
||||
}
|
||||
WRITE_ONCE(my_rdp->nocb_gp_sleep, true);
|
||||
raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
|
||||
}
|
||||
|
@ -2331,25 +2320,27 @@ static int rcu_nocb_cb_kthread(void *arg)
|
|||
}
|
||||
|
||||
/* Is a deferred wakeup of rcu_nocb_kthread() required? */
|
||||
static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
|
||||
static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level)
|
||||
{
|
||||
return READ_ONCE(rdp->nocb_defer_wakeup) > RCU_NOCB_WAKE_NOT;
|
||||
return READ_ONCE(rdp->nocb_defer_wakeup) >= level;
|
||||
}
|
||||
|
||||
/* Do a deferred wakeup of rcu_nocb_kthread(). */
|
||||
static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp)
|
||||
static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp_gp,
|
||||
struct rcu_data *rdp, int level,
|
||||
unsigned long flags)
|
||||
__releases(rdp_gp->nocb_gp_lock)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ndw;
|
||||
int ret;
|
||||
|
||||
rcu_nocb_lock_irqsave(rdp, flags);
|
||||
if (!rcu_nocb_need_deferred_wakeup(rdp)) {
|
||||
rcu_nocb_unlock_irqrestore(rdp, flags);
|
||||
if (!rcu_nocb_need_deferred_wakeup(rdp_gp, level)) {
|
||||
raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
|
||||
return false;
|
||||
}
|
||||
ndw = READ_ONCE(rdp->nocb_defer_wakeup);
|
||||
ret = wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
|
||||
|
||||
ndw = rdp_gp->nocb_defer_wakeup;
|
||||
ret = __wake_nocb_gp(rdp_gp, rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
|
||||
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake"));
|
||||
|
||||
return ret;
|
||||
|
@ -2358,9 +2349,15 @@ static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp)
|
|||
/* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */
|
||||
static void do_nocb_deferred_wakeup_timer(struct timer_list *t)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp = from_timer(rdp, t, nocb_timer);
|
||||
|
||||
do_nocb_deferred_wakeup_common(rdp);
|
||||
WARN_ON_ONCE(rdp->nocb_gp_rdp != rdp);
|
||||
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Timer"));
|
||||
|
||||
raw_spin_lock_irqsave(&rdp->nocb_gp_lock, flags);
|
||||
smp_mb__after_spinlock(); /* Timer expire before wakeup. */
|
||||
do_nocb_deferred_wakeup_common(rdp, rdp, RCU_NOCB_WAKE_BYPASS, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2370,9 +2367,14 @@ static void do_nocb_deferred_wakeup_timer(struct timer_list *t)
|
|||
*/
|
||||
static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
|
||||
{
|
||||
if (rcu_nocb_need_deferred_wakeup(rdp))
|
||||
return do_nocb_deferred_wakeup_common(rdp);
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
|
||||
|
||||
if (!rdp_gp || !rcu_nocb_need_deferred_wakeup(rdp_gp, RCU_NOCB_WAKE))
|
||||
return false;
|
||||
|
||||
raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
|
||||
return do_nocb_deferred_wakeup_common(rdp_gp, rdp, RCU_NOCB_WAKE, flags);
|
||||
}
|
||||
|
||||
void rcu_nocb_flush_deferred_wakeup(void)
|
||||
|
@ -2440,17 +2442,15 @@ static long rcu_nocb_rdp_deoffload(void *arg)
|
|||
swait_event_exclusive(rdp->nocb_state_wq,
|
||||
!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB |
|
||||
SEGCBLIST_KTHREAD_GP));
|
||||
rcu_nocb_lock_irqsave(rdp, flags);
|
||||
/* Make sure nocb timer won't stay around */
|
||||
WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_OFF);
|
||||
rcu_nocb_unlock_irqrestore(rdp, flags);
|
||||
del_timer_sync(&rdp->nocb_timer);
|
||||
|
||||
/*
|
||||
* Theoretically we could set SEGCBLIST_SOFTIRQ_ONLY with CB unlocked
|
||||
* and IRQs disabled but let's be paranoid.
|
||||
* Lock one last time to acquire latest callback updates from kthreads
|
||||
* so we can later handle callbacks locally without locking.
|
||||
*/
|
||||
rcu_nocb_lock_irqsave(rdp, flags);
|
||||
/*
|
||||
* Theoretically we could set SEGCBLIST_SOFTIRQ_ONLY after the nocb
|
||||
* lock is released but how about being paranoid for once?
|
||||
*/
|
||||
rcu_segcblist_set_flags(cblist, SEGCBLIST_SOFTIRQ_ONLY);
|
||||
/*
|
||||
* With SEGCBLIST_SOFTIRQ_ONLY, we can't use
|
||||
|
@ -2470,10 +2470,6 @@ int rcu_nocb_cpu_deoffload(int cpu)
|
|||
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
|
||||
int ret = 0;
|
||||
|
||||
if (rdp == rdp->nocb_gp_rdp) {
|
||||
pr_info("Can't deoffload an rdp GP leader (yet)\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
mutex_lock(&rcu_state.barrier_mutex);
|
||||
cpus_read_lock();
|
||||
if (rcu_rdp_is_offloaded(rdp)) {
|
||||
|
@ -2514,8 +2510,7 @@ static long rcu_nocb_rdp_offload(void *arg)
|
|||
* SEGCBLIST_SOFTIRQ_ONLY mode.
|
||||
*/
|
||||
raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
|
||||
/* Re-enable nocb timer */
|
||||
WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
|
||||
|
||||
/*
|
||||
* We didn't take the nocb lock while working on the
|
||||
* rdp->cblist in SEGCBLIST_SOFTIRQ_ONLY mode.
|
||||
|
@ -2623,7 +2618,6 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
|
|||
raw_spin_lock_init(&rdp->nocb_bypass_lock);
|
||||
raw_spin_lock_init(&rdp->nocb_gp_lock);
|
||||
timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0);
|
||||
timer_setup(&rdp->nocb_bypass_timer, do_nocb_bypass_wakeup_timer, 0);
|
||||
rcu_cblist_init(&rdp->nocb_bypass);
|
||||
}
|
||||
|
||||
|
@ -2782,13 +2776,12 @@ static void show_rcu_nocb_gp_state(struct rcu_data *rdp)
|
|||
{
|
||||
struct rcu_node *rnp = rdp->mynode;
|
||||
|
||||
pr_info("nocb GP %d %c%c%c%c%c%c %c[%c%c] %c%c:%ld rnp %d:%d %lu %c CPU %d%s\n",
|
||||
pr_info("nocb GP %d %c%c%c%c%c %c[%c%c] %c%c:%ld rnp %d:%d %lu %c CPU %d%s\n",
|
||||
rdp->cpu,
|
||||
"kK"[!!rdp->nocb_gp_kthread],
|
||||
"lL"[raw_spin_is_locked(&rdp->nocb_gp_lock)],
|
||||
"dD"[!!rdp->nocb_defer_wakeup],
|
||||
"tT"[timer_pending(&rdp->nocb_timer)],
|
||||
"bB"[timer_pending(&rdp->nocb_bypass_timer)],
|
||||
"sS"[!!rdp->nocb_gp_sleep],
|
||||
".W"[swait_active(&rdp->nocb_gp_wq)],
|
||||
".W"[swait_active(&rnp->nocb_gp_wq[0])],
|
||||
|
@ -2809,7 +2802,6 @@ static void show_rcu_nocb_state(struct rcu_data *rdp)
|
|||
char bufr[20];
|
||||
struct rcu_segcblist *rsclp = &rdp->cblist;
|
||||
bool waslocked;
|
||||
bool wastimer;
|
||||
bool wassleep;
|
||||
|
||||
if (rdp->nocb_gp_rdp == rdp)
|
||||
|
@ -2846,15 +2838,13 @@ static void show_rcu_nocb_state(struct rcu_data *rdp)
|
|||
return;
|
||||
|
||||
waslocked = raw_spin_is_locked(&rdp->nocb_gp_lock);
|
||||
wastimer = timer_pending(&rdp->nocb_bypass_timer);
|
||||
wassleep = swait_active(&rdp->nocb_gp_wq);
|
||||
if (!rdp->nocb_gp_sleep && !waslocked && !wastimer && !wassleep)
|
||||
return; /* Nothing untowards. */
|
||||
if (!rdp->nocb_gp_sleep && !waslocked && !wassleep)
|
||||
return; /* Nothing untoward. */
|
||||
|
||||
pr_info(" nocb GP activity on CB-only CPU!!! %c%c%c%c %c\n",
|
||||
pr_info(" nocb GP activity on CB-only CPU!!! %c%c%c %c\n",
|
||||
"lL"[waslocked],
|
||||
"dD"[!!rdp->nocb_defer_wakeup],
|
||||
"tT"[wastimer],
|
||||
"sS"[!!rdp->nocb_gp_sleep],
|
||||
".W"[wassleep]);
|
||||
}
|
||||
|
@ -2919,7 +2909,7 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
|
|||
{
|
||||
}
|
||||
|
||||
static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
|
||||
static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -314,6 +314,7 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
|
|||
* tasks blocked within RCU read-side critical sections.
|
||||
*/
|
||||
static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
|
||||
__releases(rnp->lock)
|
||||
{
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
return 0;
|
||||
|
@ -716,6 +717,63 @@ static void check_cpu_stall(struct rcu_data *rdp)
|
|||
// RCU forward-progress mechanisms, including of callback invocation.
|
||||
|
||||
|
||||
/*
|
||||
* Check to see if a failure to end RCU priority inversion was due to
|
||||
* a CPU not passing through a quiescent state. When this happens, there
|
||||
* is nothing that RCU priority boosting can do to help, so we shouldn't
|
||||
* count this as an RCU priority boosting failure. A return of true says
|
||||
* RCU priority boosting is to blame, and false says otherwise. If false
|
||||
* is returned, the first of the CPUs to blame is stored through cpup.
|
||||
* If there was no CPU blocking the current grace period, but also nothing
|
||||
* in need of being boosted, *cpup is set to -1. This can happen in case
|
||||
* of vCPU preemption while the last CPU is reporting its quiscent state,
|
||||
* for example.
|
||||
*
|
||||
* If cpup is NULL, then a lockless quick check is carried out, suitable
|
||||
* for high-rate usage. On the other hand, if cpup is non-NULL, each
|
||||
* rcu_node structure's ->lock is acquired, ruling out high-rate usage.
|
||||
*/
|
||||
bool rcu_check_boost_fail(unsigned long gp_state, int *cpup)
|
||||
{
|
||||
bool atb = false;
|
||||
int cpu;
|
||||
unsigned long flags;
|
||||
struct rcu_node *rnp;
|
||||
|
||||
rcu_for_each_leaf_node(rnp) {
|
||||
if (!cpup) {
|
||||
if (READ_ONCE(rnp->qsmask)) {
|
||||
return false;
|
||||
} else {
|
||||
if (READ_ONCE(rnp->gp_tasks))
|
||||
atb = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
*cpup = -1;
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags);
|
||||
if (rnp->gp_tasks)
|
||||
atb = true;
|
||||
if (!rnp->qsmask) {
|
||||
// No CPUs without quiescent states for this rnp.
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
continue;
|
||||
}
|
||||
// Find the first holdout CPU.
|
||||
for_each_leaf_node_possible_cpu(rnp, cpu) {
|
||||
if (rnp->qsmask & (1UL << (cpu - rnp->grplo))) {
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
*cpup = cpu;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
}
|
||||
// Can't blame CPUs, so must blame RCU priority boosting.
|
||||
return atb;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_check_boost_fail);
|
||||
|
||||
/*
|
||||
* Show the state of the grace-period kthreads.
|
||||
*/
|
||||
|
@ -726,6 +784,7 @@ void show_rcu_gp_kthreads(void)
|
|||
unsigned long j;
|
||||
unsigned long ja;
|
||||
unsigned long jr;
|
||||
unsigned long js;
|
||||
unsigned long jw;
|
||||
struct rcu_data *rdp;
|
||||
struct rcu_node *rnp;
|
||||
|
@ -734,21 +793,30 @@ void show_rcu_gp_kthreads(void)
|
|||
j = jiffies;
|
||||
ja = j - data_race(rcu_state.gp_activity);
|
||||
jr = j - data_race(rcu_state.gp_req_activity);
|
||||
js = j - data_race(rcu_state.gp_start);
|
||||
jw = j - data_race(rcu_state.gp_wake_time);
|
||||
pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n",
|
||||
pr_info("%s: wait state: %s(%d) ->state: %#lx ->rt_priority %u delta ->gp_start %lu ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_max %lu ->gp_flags %#x\n",
|
||||
rcu_state.name, gp_state_getname(rcu_state.gp_state),
|
||||
rcu_state.gp_state, t ? t->state : 0x1ffffL,
|
||||
ja, jr, jw, (long)data_race(rcu_state.gp_wake_seq),
|
||||
rcu_state.gp_state, t ? t->state : 0x1ffffL, t ? t->rt_priority : 0xffU,
|
||||
js, ja, jr, jw, (long)data_race(rcu_state.gp_wake_seq),
|
||||
(long)data_race(rcu_state.gp_seq),
|
||||
(long)data_race(rcu_get_root()->gp_seq_needed),
|
||||
data_race(rcu_state.gp_max),
|
||||
data_race(rcu_state.gp_flags));
|
||||
rcu_for_each_node_breadth_first(rnp) {
|
||||
if (ULONG_CMP_GE(READ_ONCE(rcu_state.gp_seq),
|
||||
READ_ONCE(rnp->gp_seq_needed)))
|
||||
if (ULONG_CMP_GE(READ_ONCE(rcu_state.gp_seq), READ_ONCE(rnp->gp_seq_needed)) &&
|
||||
!data_race(rnp->qsmask) && !data_race(rnp->boost_tasks) &&
|
||||
!data_race(rnp->exp_tasks) && !data_race(rnp->gp_tasks))
|
||||
continue;
|
||||
pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld\n",
|
||||
rnp->grplo, rnp->grphi, (long)data_race(rnp->gp_seq),
|
||||
(long)data_race(rnp->gp_seq_needed));
|
||||
pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld ->qsmask %#lx %c%c%c%c ->n_boosts %ld\n",
|
||||
rnp->grplo, rnp->grphi,
|
||||
(long)data_race(rnp->gp_seq), (long)data_race(rnp->gp_seq_needed),
|
||||
data_race(rnp->qsmask),
|
||||
".b"[!!data_race(rnp->boost_kthread_task)],
|
||||
".B"[!!data_race(rnp->boost_tasks)],
|
||||
".E"[!!data_race(rnp->exp_tasks)],
|
||||
".G"[!!data_race(rnp->gp_tasks)],
|
||||
data_race(rnp->n_boosts));
|
||||
if (!rcu_is_leaf_node(rnp))
|
||||
continue;
|
||||
for_each_leaf_node_possible_cpu(rnp, cpu) {
|
||||
|
|
|
@ -277,7 +277,7 @@ EXPORT_SYMBOL_GPL(rcu_callback_map);
|
|||
|
||||
noinstr int notrace debug_lockdep_rcu_enabled(void)
|
||||
{
|
||||
return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks &&
|
||||
return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && READ_ONCE(debug_locks) &&
|
||||
current->lockdep_recursion == 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
|
||||
|
@ -524,6 +524,7 @@ static void test_callback(struct rcu_head *r)
|
|||
}
|
||||
|
||||
DEFINE_STATIC_SRCU(early_srcu);
|
||||
static unsigned long early_srcu_cookie;
|
||||
|
||||
struct early_boot_kfree_rcu {
|
||||
struct rcu_head rh;
|
||||
|
@ -536,8 +537,10 @@ static void early_boot_test_call_rcu(void)
|
|||
struct early_boot_kfree_rcu *rhp;
|
||||
|
||||
call_rcu(&head, test_callback);
|
||||
if (IS_ENABLED(CONFIG_SRCU))
|
||||
if (IS_ENABLED(CONFIG_SRCU)) {
|
||||
early_srcu_cookie = start_poll_synchronize_srcu(&early_srcu);
|
||||
call_srcu(&early_srcu, &shead, test_callback);
|
||||
}
|
||||
rhp = kmalloc(sizeof(*rhp), GFP_KERNEL);
|
||||
if (!WARN_ON_ONCE(!rhp))
|
||||
kfree_rcu(rhp, rh);
|
||||
|
@ -563,6 +566,7 @@ static int rcu_verify_early_boot_tests(void)
|
|||
if (IS_ENABLED(CONFIG_SRCU)) {
|
||||
early_boot_test_counter++;
|
||||
srcu_barrier(&early_srcu);
|
||||
WARN_ON_ONCE(!poll_state_synchronize_srcu(&early_srcu, early_srcu_cookie));
|
||||
}
|
||||
}
|
||||
if (rcu_self_test_counter != early_boot_test_counter) {
|
||||
|
|
|
@ -1237,20 +1237,6 @@ int try_to_del_timer_sync(struct timer_list *timer)
|
|||
}
|
||||
EXPORT_SYMBOL(try_to_del_timer_sync);
|
||||
|
||||
bool timer_curr_running(struct timer_list *timer)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NR_BASES; i++) {
|
||||
struct timer_base *base = this_cpu_ptr(&timer_bases[i]);
|
||||
|
||||
if (base->running_timer == timer)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PREEMPT_RT
|
||||
static __init void timer_base_init_expiry_lock(struct timer_base *base)
|
||||
{
|
||||
|
|
|
@ -922,7 +922,7 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
|
|||
continue;
|
||||
}
|
||||
/*
|
||||
* No kthead_use_mm() user needs to read from the userspace so
|
||||
* No kthread_use_mm() user needs to read from the userspace so
|
||||
* we are ok to reap it.
|
||||
*/
|
||||
if (unlikely(p->flags & PF_KTHREAD))
|
||||
|
|
|
@ -640,6 +640,7 @@ struct kmem_obj_info {
|
|||
struct kmem_cache *kp_slab_cache;
|
||||
void *kp_ret;
|
||||
void *kp_stack[KS_ADDRS_COUNT];
|
||||
void *kp_free_stack[KS_ADDRS_COUNT];
|
||||
};
|
||||
void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page);
|
||||
#endif
|
||||
|
|
|
@ -564,7 +564,7 @@ EXPORT_SYMBOL_GPL(kmem_valid_obj);
|
|||
* depends on the type of object and on how much debugging is enabled.
|
||||
* For a slab-cache object, the fact that it is a slab object is printed,
|
||||
* and, if available, the slab name, return address, and stack trace from
|
||||
* the allocation of that object.
|
||||
* the allocation and last free path of that object.
|
||||
*
|
||||
* This function will splat if passed a pointer to a non-slab object.
|
||||
* If you are not sure what type of object you have, you should instead
|
||||
|
@ -609,6 +609,16 @@ void kmem_dump_obj(void *object)
|
|||
break;
|
||||
pr_info(" %pS\n", kp.kp_stack[i]);
|
||||
}
|
||||
|
||||
if (kp.kp_free_stack[0])
|
||||
pr_cont(" Free path:\n");
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(kp.kp_free_stack); i++) {
|
||||
if (!kp.kp_free_stack[i])
|
||||
break;
|
||||
pr_info(" %pS\n", kp.kp_free_stack[i]);
|
||||
}
|
||||
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kmem_dump_obj);
|
||||
#endif
|
||||
|
|
|
@ -4002,6 +4002,7 @@ void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
|
|||
!(s->flags & SLAB_STORE_USER))
|
||||
return;
|
||||
#ifdef CONFIG_SLUB_DEBUG
|
||||
objp = fixup_red_left(s, objp);
|
||||
trackp = get_track(s, objp, TRACK_ALLOC);
|
||||
kpp->kp_ret = (void *)trackp->addr;
|
||||
#ifdef CONFIG_STACKTRACE
|
||||
|
@ -4010,6 +4011,13 @@ void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
|
|||
if (!kpp->kp_stack[i])
|
||||
break;
|
||||
}
|
||||
|
||||
trackp = get_track(s, objp, TRACK_FREE);
|
||||
for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) {
|
||||
kpp->kp_free_stack[i] = (void *)trackp->addrs[i];
|
||||
if (!kpp->kp_free_stack[i])
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -983,7 +983,7 @@ int __weak memcmp_pages(struct page *page1, struct page *page2)
|
|||
* depends on the type of object and on how much debugging is enabled.
|
||||
* For example, for a slab-cache object, the slab name is printed, and,
|
||||
* if available, the return address and stack trace from the allocation
|
||||
* of that object.
|
||||
* and last free path of that object.
|
||||
*/
|
||||
void mem_dump_obj(void *object)
|
||||
{
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
#!/usr/bin/env drgn
|
||||
# SPDX-License-Identifier: GPL-2.0+
|
||||
#
|
||||
# Dump out the number of RCU callbacks outstanding.
|
||||
#
|
||||
# On older kernels having multiple flavors of RCU, this dumps out the
|
||||
# number of callbacks for the most heavily used flavor.
|
||||
#
|
||||
# Usage: sudo drgn rcu-cbs.py
|
||||
#
|
||||
# Copyright (C) 2021 Facebook, Inc.
|
||||
#
|
||||
# Authors: Paul E. McKenney <paulmck@kernel.org>
|
||||
|
||||
import sys
|
||||
import drgn
|
||||
from drgn import NULL, Object
|
||||
from drgn.helpers.linux import *
|
||||
|
||||
def get_rdp0(prog):
|
||||
try:
|
||||
rdp0 = prog.variable('rcu_preempt_data', 'kernel/rcu/tree.c');
|
||||
except LookupError:
|
||||
rdp0 = NULL;
|
||||
|
||||
if rdp0 == NULL:
|
||||
try:
|
||||
rdp0 = prog.variable('rcu_sched_data',
|
||||
'kernel/rcu/tree.c');
|
||||
except LookupError:
|
||||
rdp0 = NULL;
|
||||
|
||||
if rdp0 == NULL:
|
||||
rdp0 = prog.variable('rcu_data', 'kernel/rcu/tree.c');
|
||||
return rdp0.address_of_();
|
||||
|
||||
rdp0 = get_rdp0(prog);
|
||||
|
||||
# Sum up RCU callbacks.
|
||||
sum = 0;
|
||||
for cpu in for_each_possible_cpu(prog):
|
||||
rdp = per_cpu_ptr(rdp0, cpu);
|
||||
len = rdp.cblist.len.value_();
|
||||
# print("CPU " + str(cpu) + " RCU callbacks: " + str(len));
|
||||
sum += len;
|
||||
print("Number of RCU callbacks in flight: " + str(sum));
|
|
@ -29,7 +29,7 @@ then
|
|||
echo "Usage: $scriptname /path/to/old/run [ options ]"
|
||||
exit 1
|
||||
fi
|
||||
if ! cp "$oldrun/batches" $T/batches.oldrun
|
||||
if ! cp "$oldrun/scenarios" $T/scenarios.oldrun
|
||||
then
|
||||
# Later on, can reconstitute this from console.log files.
|
||||
echo Prior run batches file does not exist: $oldrun/batches
|
||||
|
@ -143,6 +143,8 @@ then
|
|||
usage
|
||||
fi
|
||||
rm -f "$rundir"/*/{console.log,console.log.diags,qemu_pid,qemu-retval,Warnings,kvm-test-1-run.sh.out,kvm-test-1-run-qemu.sh.out,vmlinux} "$rundir"/log
|
||||
touch "$rundir/log"
|
||||
echo $scriptname $args | tee -a "$rundir/log"
|
||||
echo $oldrun > "$rundir/re-run"
|
||||
if ! test -d "$rundir/../../bin"
|
||||
then
|
||||
|
@ -165,22 +167,12 @@ done
|
|||
grep '^#' $i | sed -e 's/^# //' > $T/qemu-cmd-settings
|
||||
. $T/qemu-cmd-settings
|
||||
|
||||
grep -v '^#' $T/batches.oldrun | awk '
|
||||
BEGIN {
|
||||
oldbatch = 1;
|
||||
}
|
||||
|
||||
grep -v '^#' $T/scenarios.oldrun | awk '
|
||||
{
|
||||
if (oldbatch != $1) {
|
||||
print "kvm-test-1-run-batch.sh" curbatch;
|
||||
curbatch = "";
|
||||
oldbatch = $1;
|
||||
}
|
||||
curbatch = curbatch " " $2;
|
||||
}
|
||||
|
||||
END {
|
||||
print "kvm-test-1-run-batch.sh" curbatch
|
||||
for (i = 2; i <= NF; i++)
|
||||
curbatch = curbatch " " $i;
|
||||
print "kvm-test-1-run-batch.sh" curbatch;
|
||||
}' > $T/runbatches.sh
|
||||
|
||||
if test -n "$dryrun"
|
||||
|
@ -188,12 +180,5 @@ then
|
|||
echo ---- Dryrun complete, directory: $rundir | tee -a "$rundir/log"
|
||||
else
|
||||
( cd "$rundir"; sh $T/runbatches.sh )
|
||||
kcsan-collapse.sh "$rundir" | tee -a "$rundir/log"
|
||||
echo | tee -a "$rundir/log"
|
||||
echo ---- Results directory: $rundir | tee -a "$rundir/log"
|
||||
kvm-recheck.sh "$rundir" > $T/kvm-recheck.sh.out 2>&1
|
||||
ret=$?
|
||||
cat $T/kvm-recheck.sh.out | tee -a "$rundir/log"
|
||||
echo " --- Done at `date` (`get_starttime_duration $starttime`) exitcode $ret" | tee -a "$rundir/log"
|
||||
exit $ret
|
||||
kvm-end-run-stats.sh "$rundir" "$starttime"
|
||||
fi
|
||||
|
|
|
@ -40,8 +40,10 @@ if test $retval -gt 1
|
|||
then
|
||||
exit 2
|
||||
fi
|
||||
ncpus=`cpus2use.sh`
|
||||
make -j$ncpus $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1
|
||||
|
||||
# Tell "make" to use double the number of real CPUs on the build system.
|
||||
ncpus="`getconf _NPROCESSORS_ONLN`"
|
||||
make -j$((2 * ncpus)) $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1
|
||||
retval=$?
|
||||
if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | egrep -q "Stop|Error|error:|warning:" || egrep -q "Stop|Error|error:" < $resdir/Make.out
|
||||
then
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0+
|
||||
#
|
||||
# Check the status of the specified run.
|
||||
#
|
||||
# Usage: kvm-end-run-stats.sh /path/to/run starttime
|
||||
#
|
||||
# Copyright (C) 2021 Facebook, Inc.
|
||||
#
|
||||
# Authors: Paul E. McKenney <paulmck@kernel.org>
|
||||
|
||||
# scriptname=$0
|
||||
# args="$*"
|
||||
rundir="$1"
|
||||
if ! test -d "$rundir"
|
||||
then
|
||||
echo kvm-end-run-stats.sh: Specified run directory does not exist: $rundir
|
||||
exit 1
|
||||
fi
|
||||
|
||||
T=${TMPDIR-/tmp}/kvm-end-run-stats.sh.$$
|
||||
trap 'rm -rf $T' 0
|
||||
mkdir $T
|
||||
|
||||
KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
|
||||
PATH=${KVM}/bin:$PATH; export PATH
|
||||
. functions.sh
|
||||
default_starttime="`get_starttime`"
|
||||
starttime="${2-default_starttime}"
|
||||
|
||||
echo | tee -a "$rundir/log"
|
||||
echo | tee -a "$rundir/log"
|
||||
echo " --- `date` Test summary:" | tee -a "$rundir/log"
|
||||
echo Results directory: $rundir | tee -a "$rundir/log"
|
||||
kcsan-collapse.sh "$rundir" | tee -a "$rundir/log"
|
||||
kvm-recheck.sh "$rundir" > $T/kvm-recheck.sh.out 2>&1
|
||||
ret=$?
|
||||
cat $T/kvm-recheck.sh.out | tee -a "$rundir/log"
|
||||
echo " --- Done at `date` (`get_starttime_duration $starttime`) exitcode $ret" | tee -a "$rundir/log"
|
||||
exit $ret
|
|
@ -43,7 +43,7 @@ then
|
|||
else
|
||||
echo No build errors.
|
||||
fi
|
||||
if grep -q -e "--buildonly" < ${rundir}/log
|
||||
if grep -q -e "--build-\?only" < ${rundir}/log && ! test -f "${rundir}/remote-log"
|
||||
then
|
||||
echo Build-only run, no console logs to check.
|
||||
exit $editorret
|
||||
|
|
|
@ -31,7 +31,7 @@ then
|
|||
echo "$configfile ------- " $stopstate
|
||||
else
|
||||
title="$configfile ------- $ngps GPs"
|
||||
dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`
|
||||
dur=`grep -v '^#' $i/qemu-cmd | sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//'`
|
||||
if test -z "$dur"
|
||||
then
|
||||
:
|
||||
|
|
|
@ -0,0 +1,249 @@
|
|||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0+
|
||||
#
|
||||
# Run a series of tests on remote systems under KVM.
|
||||
#
|
||||
# Usage: kvm-remote.sh "systems" [ <kvm.sh args> ]
|
||||
# kvm-remote.sh "systems" /path/to/old/run [ <kvm-again.sh args> ]
|
||||
#
|
||||
# Copyright (C) 2021 Facebook, Inc.
|
||||
#
|
||||
# Authors: Paul E. McKenney <paulmck@kernel.org>
|
||||
|
||||
scriptname=$0
|
||||
args="$*"
|
||||
|
||||
if ! test -d tools/testing/selftests/rcutorture/bin
|
||||
then
|
||||
echo $scriptname must be run from top-level directory of kernel source tree.
|
||||
exit 1
|
||||
fi
|
||||
|
||||
KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
|
||||
PATH=${KVM}/bin:$PATH; export PATH
|
||||
. functions.sh
|
||||
|
||||
starttime="`get_starttime`"
|
||||
|
||||
systems="$1"
|
||||
if test -z "$systems"
|
||||
then
|
||||
echo $scriptname: Empty list of systems will go nowhere good, giving up.
|
||||
exit 1
|
||||
fi
|
||||
shift
|
||||
|
||||
# Pathnames:
|
||||
# T: /tmp/kvm-remote.sh.$$
|
||||
# resdir: /tmp/kvm-remote.sh.$$/res
|
||||
# rundir: /tmp/kvm-remote.sh.$$/res/$ds ("-remote" suffix)
|
||||
# oldrun: `pwd`/tools/testing/.../res/$otherds
|
||||
#
|
||||
# Pathname segments:
|
||||
# TD: kvm-remote.sh.$$
|
||||
# ds: yyyy.mm.dd-hh.mm.ss-remote
|
||||
|
||||
TD=kvm-remote.sh.$$
|
||||
T=${TMPDIR-/tmp}/$TD
|
||||
trap 'rm -rf $T' 0
|
||||
mkdir $T
|
||||
|
||||
resdir="$T/res"
|
||||
ds=`date +%Y.%m.%d-%H.%M.%S`-remote
|
||||
rundir=$resdir/$ds
|
||||
echo Results directory: $rundir
|
||||
echo $scriptname $args
|
||||
if echo $1 | grep -q '^--'
|
||||
then
|
||||
# Fresh build. Create a datestamp unless the caller supplied one.
|
||||
datestamp="`echo "$@" | awk -v ds="$ds" '{
|
||||
for (i = 1; i < NF; i++) {
|
||||
if ($i == "--datestamp") {
|
||||
ds = "";
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ds != "")
|
||||
print "--datestamp " ds;
|
||||
}'`"
|
||||
kvm.sh --remote "$@" $datestamp --buildonly > $T/kvm.sh.out 2>&1
|
||||
ret=$?
|
||||
if test "$ret" -ne 0
|
||||
then
|
||||
echo $scriptname: kvm.sh failed exit code $?
|
||||
cat $T/kvm.sh.out
|
||||
exit 2
|
||||
fi
|
||||
oldrun="`grep -m 1 "^Results directory: " $T/kvm.sh.out | awk '{ print $3 }'`"
|
||||
touch "$oldrun/remote-log"
|
||||
echo $scriptname $args >> "$oldrun/remote-log"
|
||||
echo | tee -a "$oldrun/remote-log"
|
||||
echo " ----" kvm.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
|
||||
cat $T/kvm.sh.out | tee -a "$oldrun/remote-log"
|
||||
# We are going to run this, so remove the buildonly files.
|
||||
rm -f "$oldrun"/*/buildonly
|
||||
kvm-again.sh $oldrun --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
|
||||
ret=$?
|
||||
if test "$ret" -ne 0
|
||||
then
|
||||
echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log"
|
||||
cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
|
||||
exit 2
|
||||
fi
|
||||
else
|
||||
# Re-use old run.
|
||||
oldrun="$1"
|
||||
if ! echo $oldrun | grep -q '^/'
|
||||
then
|
||||
oldrun="`pwd`/$oldrun"
|
||||
fi
|
||||
shift
|
||||
touch "$oldrun/remote-log"
|
||||
echo $scriptname $args >> "$oldrun/remote-log"
|
||||
kvm-again.sh "$oldrun" "$@" --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
|
||||
ret=$?
|
||||
if test "$ret" -ne 0
|
||||
then
|
||||
echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log"
|
||||
cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
|
||||
exit 2
|
||||
fi
|
||||
cp -a "$rundir" "$KVM/res/"
|
||||
oldrun="$KVM/res/$ds"
|
||||
fi
|
||||
echo | tee -a "$oldrun/remote-log"
|
||||
echo " ----" kvm-again.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
|
||||
cat $T/kvm-again.sh.out
|
||||
echo | tee -a "$oldrun/remote-log"
|
||||
echo Remote run directory: $rundir | tee -a "$oldrun/remote-log"
|
||||
echo Local build-side run directory: $oldrun | tee -a "$oldrun/remote-log"
|
||||
|
||||
# Create the kvm-remote-N.sh scripts in the bin directory.
|
||||
awk < "$rundir"/scenarios -v dest="$T/bin" -v rundir="$rundir" '
|
||||
{
|
||||
n = $1;
|
||||
sub(/\./, "", n);
|
||||
fn = dest "/kvm-remote-" n ".sh"
|
||||
scenarios = "";
|
||||
for (i = 2; i <= NF; i++)
|
||||
scenarios = scenarios " " $i;
|
||||
print "kvm-test-1-run-batch.sh" scenarios > fn;
|
||||
print "rm " rundir "/remote.run" >> fn;
|
||||
}'
|
||||
chmod +x $T/bin/kvm-remote-*.sh
|
||||
( cd "`dirname $T`"; tar -chzf $T/binres.tgz "$TD/bin" "$TD/res" )
|
||||
|
||||
# Check first to avoid the need for cleanup for system-name typos
|
||||
for i in $systems
|
||||
do
|
||||
ncpus="`ssh $i getconf _NPROCESSORS_ONLN 2> /dev/null`"
|
||||
echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log"
|
||||
ret=$?
|
||||
if test "$ret" -ne 0
|
||||
then
|
||||
echo System $i unreachable, giving up. | tee -a "$oldrun/remote-log"
|
||||
exit 4 | tee -a "$oldrun/remote-log"
|
||||
fi
|
||||
done
|
||||
|
||||
# Download and expand the tarball on all systems.
|
||||
for i in $systems
|
||||
do
|
||||
echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log"
|
||||
cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
|
||||
ret=$?
|
||||
if test "$ret" -ne 0
|
||||
then
|
||||
echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log"
|
||||
exit 10 | tee -a "$oldrun/remote-log"
|
||||
fi
|
||||
done
|
||||
|
||||
# Function to check for presence of a file on the specified system.
|
||||
# Complain if the system cannot be reached, and retry after a wait.
|
||||
# Currently just waits forever if a machine disappears.
|
||||
#
|
||||
# Usage: checkremotefile system pathname
|
||||
checkremotefile () {
|
||||
local ret
|
||||
local sleeptime=60
|
||||
|
||||
while :
|
||||
do
|
||||
ssh $1 "test -f \"$2\""
|
||||
ret=$?
|
||||
if test "$ret" -ne 255
|
||||
then
|
||||
return $ret
|
||||
fi
|
||||
echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date`
|
||||
sleep $sleeptime
|
||||
done
|
||||
}
|
||||
|
||||
# Function to start batches on idle remote $systems
|
||||
#
|
||||
# Usage: startbatches curbatch nbatches
|
||||
#
|
||||
# Batches are numbered starting at 1. Returns the next batch to start.
|
||||
# Be careful to redirect all debug output to FD 2 (stderr).
|
||||
startbatches () {
|
||||
local curbatch="$1"
|
||||
local nbatches="$2"
|
||||
local ret
|
||||
|
||||
# Each pass through the following loop examines one system.
|
||||
for i in $systems
|
||||
do
|
||||
if test "$curbatch" -gt "$nbatches"
|
||||
then
|
||||
echo $((nbatches + 1))
|
||||
return 0
|
||||
fi
|
||||
if checkremotefile "$i" "$resdir/$ds/remote.run" 1>&2
|
||||
then
|
||||
continue # System still running last test, skip.
|
||||
fi
|
||||
ssh "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2
|
||||
ret=$?
|
||||
if test "$ret" -ne 0
|
||||
then
|
||||
echo ssh $i failed: exitcode $ret 1>&2
|
||||
exit 11
|
||||
fi
|
||||
echo " ----" System $i Batch `head -n $curbatch < "$rundir"/scenarios | tail -1` `date` 1>&2
|
||||
curbatch=$((curbatch + 1))
|
||||
done
|
||||
echo $curbatch
|
||||
}
|
||||
|
||||
# Launch all the scenarios.
|
||||
nbatches="`wc -l "$rundir"/scenarios | awk '{ print $1 }'`"
|
||||
curbatch=1
|
||||
while test "$curbatch" -le "$nbatches"
|
||||
do
|
||||
startbatches $curbatch $nbatches > $T/curbatch 2> $T/startbatches.stderr
|
||||
curbatch="`cat $T/curbatch`"
|
||||
if test -s "$T/startbatches.stderr"
|
||||
then
|
||||
cat "$T/startbatches.stderr" | tee -a "$oldrun/remote-log"
|
||||
fi
|
||||
if test "$curbatch" -le "$nbatches"
|
||||
then
|
||||
sleep 30
|
||||
fi
|
||||
done
|
||||
echo All batches started. `date`
|
||||
|
||||
# Wait for all remaining scenarios to complete and collect results.
|
||||
for i in $systems
|
||||
do
|
||||
while checkremotefile "$i" "$resdir/$ds/remote.run"
|
||||
do
|
||||
sleep 30
|
||||
done
|
||||
( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu_pid */qemu-retval; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
|
||||
done
|
||||
|
||||
( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log"
|
||||
exit "`cat $T/exitcode`"
|
|
@ -20,6 +20,9 @@ mkdir $T
|
|||
|
||||
cd `dirname $scriptname`/../../../../../
|
||||
|
||||
# This script knows only English.
|
||||
LANG=en_US.UTF-8; export LANG
|
||||
|
||||
dur=$((30*60))
|
||||
dryrun=""
|
||||
KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
|
||||
|
@ -41,6 +44,7 @@ TORTURE_KCONFIG_KASAN_ARG=""
|
|||
TORTURE_KCONFIG_KCSAN_ARG=""
|
||||
TORTURE_KMAKE_ARG=""
|
||||
TORTURE_QEMU_MEM=512
|
||||
TORTURE_REMOTE=
|
||||
TORTURE_SHUTDOWN_GRACE=180
|
||||
TORTURE_SUITE=rcu
|
||||
TORTURE_MOD=rcutorture
|
||||
|
@ -64,7 +68,7 @@ usage () {
|
|||
echo " --cpus N"
|
||||
echo " --datestamp string"
|
||||
echo " --defconfig string"
|
||||
echo " --dryrun batches|sched|script"
|
||||
echo " --dryrun batches|scenarios|sched|script"
|
||||
echo " --duration minutes | <seconds>s | <hours>h | <days>d"
|
||||
echo " --gdb"
|
||||
echo " --help"
|
||||
|
@ -77,6 +81,7 @@ usage () {
|
|||
echo " --no-initrd"
|
||||
echo " --qemu-args qemu-arguments"
|
||||
echo " --qemu-cmd qemu-system-..."
|
||||
echo " --remote"
|
||||
echo " --results absolute-pathname"
|
||||
echo " --torture lock|rcu|rcuscale|refscale|scf"
|
||||
echo " --trust-make"
|
||||
|
@ -112,11 +117,14 @@ do
|
|||
checkarg --cpus "(number)" "$#" "$2" '^[0-9]*$' '^--'
|
||||
cpus=$2
|
||||
TORTURE_ALLOTED_CPUS="$2"
|
||||
if test -z "$TORTURE_REMOTE"
|
||||
then
|
||||
max_cpus="`identify_qemu_vcpus`"
|
||||
if test "$TORTURE_ALLOTED_CPUS" -gt "$max_cpus"
|
||||
then
|
||||
TORTURE_ALLOTED_CPUS=$max_cpus
|
||||
fi
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
--datestamp)
|
||||
|
@ -130,7 +138,7 @@ do
|
|||
shift
|
||||
;;
|
||||
--dryrun)
|
||||
checkarg --dryrun "batches|sched|script" $# "$2" 'batches\|sched\|script' '^--'
|
||||
checkarg --dryrun "batches|sched|script" $# "$2" 'batches\|scenarios\|sched\|script' '^--'
|
||||
dryrun=$2
|
||||
shift
|
||||
;;
|
||||
|
@ -206,6 +214,9 @@ do
|
|||
TORTURE_QEMU_CMD="$2"
|
||||
shift
|
||||
;;
|
||||
--remote)
|
||||
TORTURE_REMOTE=1
|
||||
;;
|
||||
--results)
|
||||
checkarg --results "(absolute pathname)" "$#" "$2" '^/' '^error'
|
||||
resdir=$2
|
||||
|
@ -550,20 +561,7 @@ END {
|
|||
if (ncpus != 0)
|
||||
dump(first, i, batchnum);
|
||||
}' >> $T/script
|
||||
|
||||
cat << '___EOF___' >> $T/script
|
||||
echo | tee -a $TORTURE_RESDIR/log
|
||||
echo | tee -a $TORTURE_RESDIR/log
|
||||
echo " --- `date` Test summary:" | tee -a $TORTURE_RESDIR/log
|
||||
___EOF___
|
||||
cat << ___EOF___ >> $T/script
|
||||
echo Results directory: $resdir/$ds | tee -a $resdir/$ds/log
|
||||
kcsan-collapse.sh $resdir/$ds | tee -a $resdir/$ds/log
|
||||
kvm-recheck.sh $resdir/$ds > $T/kvm-recheck.sh.out 2>&1
|
||||
___EOF___
|
||||
echo 'ret=$?' >> $T/script
|
||||
echo "cat $T/kvm-recheck.sh.out | tee -a $resdir/$ds/log" >> $T/script
|
||||
echo 'exit $ret' >> $T/script
|
||||
echo kvm-end-run-stats.sh "$resdir/$ds" "$starttime" >> $T/script
|
||||
|
||||
# Extract the tests and their batches from the script.
|
||||
egrep 'Start batch|Starting build\.' $T/script | grep -v ">>" |
|
||||
|
@ -577,6 +575,25 @@ egrep 'Start batch|Starting build\.' $T/script | grep -v ">>" |
|
|||
print batchno, $1, $2
|
||||
}' > $T/batches
|
||||
|
||||
# As above, but one line per batch.
|
||||
grep -v '^#' $T/batches | awk '
|
||||
BEGIN {
|
||||
oldbatch = 1;
|
||||
}
|
||||
|
||||
{
|
||||
if (oldbatch != $1) {
|
||||
print ++n ". " curbatch;
|
||||
curbatch = "";
|
||||
oldbatch = $1;
|
||||
}
|
||||
curbatch = curbatch " " $2;
|
||||
}
|
||||
|
||||
END {
|
||||
print ++n ". " curbatch;
|
||||
}' > $T/scenarios
|
||||
|
||||
if test "$dryrun" = script
|
||||
then
|
||||
cat $T/script
|
||||
|
@ -597,13 +614,17 @@ elif test "$dryrun" = batches
|
|||
then
|
||||
cat $T/batches
|
||||
exit 0
|
||||
elif test "$dryrun" = scenarios
|
||||
then
|
||||
cat $T/scenarios
|
||||
exit 0
|
||||
else
|
||||
# Not a dryrun. Record the batches and the number of CPUs, then run the script.
|
||||
bash $T/script
|
||||
ret=$?
|
||||
cp $T/batches $resdir/$ds/batches
|
||||
cp $T/scenarios $resdir/$ds/scenarios
|
||||
echo '#' cpus=$cpus >> $resdir/$ds/batches
|
||||
echo " --- Done at `date` (`get_starttime_duration $starttime`) exitcode $ret" | tee -a $resdir/$ds/log
|
||||
exit $ret
|
||||
fi
|
||||
|
||||
|
|
|
@ -302,7 +302,7 @@ function torture_set {
|
|||
kcsan_kmake_tag="--kmake-args"
|
||||
cur_kcsan_kmake_args="$kcsan_kmake_args"
|
||||
fi
|
||||
torture_one $* --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan
|
||||
torture_one "$@" --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan
|
||||
fi
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
CONFIG_SMP=y
|
||||
CONFIG_NR_CPUS=16
|
||||
CONFIG_PREEMPT_NONE=n
|
||||
CONFIG_PREEMPT_VOLUNTARY=n
|
||||
CONFIG_PREEMPT=y
|
||||
#CHECK#CONFIG_PREEMPT_RCU=y
|
||||
CONFIG_HZ_PERIODIC=y
|
||||
CONFIG_NO_HZ_IDLE=n
|
||||
CONFIG_NO_HZ_FULL=n
|
||||
CONFIG_RCU_TRACE=y
|
||||
CONFIG_HOTPLUG_CPU=y
|
||||
CONFIG_RCU_FANOUT=2
|
||||
CONFIG_RCU_FANOUT_LEAF=2
|
||||
CONFIG_RCU_NOCB_CPU=n
|
||||
CONFIG_DEBUG_LOCK_ALLOC=n
|
||||
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
|
||||
CONFIG_RCU_EXPERT=y
|
|
@ -0,0 +1,8 @@
|
|||
rcutorture.test_boost=2
|
||||
rcutorture.stutter=0
|
||||
rcutree.gp_preinit_delay=12
|
||||
rcutree.gp_init_delay=3
|
||||
rcutree.gp_cleanup_delay=3
|
||||
rcutree.kthread_prio=2
|
||||
threadirqs
|
||||
tree.use_softirq=0
|
|
@ -7,7 +7,7 @@ CONFIG_HZ_PERIODIC=n
|
|||
CONFIG_NO_HZ_IDLE=y
|
||||
CONFIG_NO_HZ_FULL=n
|
||||
CONFIG_RCU_FAST_NO_HZ=n
|
||||
CONFIG_HOTPLUG_CPU=n
|
||||
CONFIG_HOTPLUG_CPU=y
|
||||
CONFIG_SUSPEND=n
|
||||
CONFIG_HIBERNATION=n
|
||||
CONFIG_RCU_NOCB_CPU=n
|
||||
|
|
|
@ -8,7 +8,7 @@ CONFIG_HZ_PERIODIC=n
|
|||
CONFIG_NO_HZ_IDLE=y
|
||||
CONFIG_NO_HZ_FULL=n
|
||||
CONFIG_RCU_FAST_NO_HZ=n
|
||||
CONFIG_HOTPLUG_CPU=n
|
||||
CONFIG_HOTPLUG_CPU=y
|
||||
CONFIG_SUSPEND=n
|
||||
CONFIG_HIBERNATION=n
|
||||
CONFIG_RCU_FANOUT=3
|
||||
|
|
|
@ -7,7 +7,7 @@ CONFIG_HZ_PERIODIC=n
|
|||
CONFIG_NO_HZ_IDLE=y
|
||||
CONFIG_NO_HZ_FULL=n
|
||||
CONFIG_RCU_FAST_NO_HZ=n
|
||||
CONFIG_HOTPLUG_CPU=n
|
||||
CONFIG_HOTPLUG_CPU=y
|
||||
CONFIG_SUSPEND=n
|
||||
CONFIG_HIBERNATION=n
|
||||
CONFIG_RCU_NOCB_CPU=n
|
||||
|
|
|
@ -7,7 +7,7 @@ CONFIG_HZ_PERIODIC=n
|
|||
CONFIG_NO_HZ_IDLE=y
|
||||
CONFIG_NO_HZ_FULL=n
|
||||
CONFIG_RCU_FAST_NO_HZ=n
|
||||
CONFIG_HOTPLUG_CPU=n
|
||||
CONFIG_HOTPLUG_CPU=y
|
||||
CONFIG_SUSPEND=n
|
||||
CONFIG_HIBERNATION=n
|
||||
CONFIG_RCU_NOCB_CPU=n
|
||||
|
|
|
@ -174,7 +174,7 @@ static inline bool spin_trylock(spinlock_t *lock)
|
|||
}
|
||||
|
||||
struct completion {
|
||||
/* Hopefuly this won't overflow. */
|
||||
/* Hopefully this won't overflow. */
|
||||
unsigned int count;
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue