mirror of https://gitee.com/openkylin/linux.git
Merge branch 'timers/nohz-v3' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks into timers/nohz
Pull nohz improvements from Frederic Weisbecker: " It mostly contains fixes and full dynticks off-case optimizations. I believe that distros want to enable this feature so it seems important to optimize the case where the "nohz_full=" parameter is empty. ie: I'm trying to remove any performance regression that comes with NO_HZ_FULL=y when the feature is not used. This patchset improves the current situation a lot (off-case appears to be around 11% faster with hackbench, although I guess it may vary depending on the configuration but it should be significantly faster in any case) now there is still some work to do: I can still observe a remaining loss of 1.6% throughput seen with hackbench compared to CONFIG_NO_HZ_FULL=n. " Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
6f1d657668
|
@ -3,3 +3,4 @@ generic-y += clkdev.h
|
|||
generic-y += exec.h
|
||||
generic-y += kvm_para.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += vtime.h
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#include <linux/types.h>
|
||||
#ifdef CONFIG_MMU
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/preempt_mask.h>
|
||||
#endif
|
||||
#include <linux/preempt.h>
|
||||
#include <asm/thread_info.h>
|
||||
|
|
|
@ -2,3 +2,4 @@
|
|||
generic-y += clkdev.h
|
||||
generic-y += rwsem.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += vtime.h
|
|
@ -13,9 +13,6 @@
|
|||
#include <asm/div64.h>
|
||||
|
||||
|
||||
#define __ARCH_HAS_VTIME_ACCOUNT
|
||||
#define __ARCH_HAS_VTIME_TASK_SWITCH
|
||||
|
||||
/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
|
||||
|
||||
typedef unsigned long long __nocast cputime_t;
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
#ifndef _S390_VTIME_H
|
||||
#define _S390_VTIME_H
|
||||
|
||||
#define __ARCH_HAS_VTIME_ACCOUNT
|
||||
#define __ARCH_HAS_VTIME_TASK_SWITCH
|
||||
|
||||
#endif /* _S390_VTIME_H */
|
|
@ -19,6 +19,7 @@
|
|||
#include <asm/irq_regs.h>
|
||||
#include <asm/cputime.h>
|
||||
#include <asm/vtimer.h>
|
||||
#include <asm/vtime.h>
|
||||
#include <asm/irq.h>
|
||||
#include "entry.h"
|
||||
|
||||
|
|
|
@ -2,100 +2,110 @@
|
|||
#define _LINUX_CONTEXT_TRACKING_H
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/vtime.h>
|
||||
#include <linux/context_tracking_state.h>
|
||||
#include <asm/ptrace.h>
|
||||
|
||||
struct context_tracking {
|
||||
/*
|
||||
* When active is false, probes are unset in order
|
||||
* to minimize overhead: TIF flags are cleared
|
||||
* and calls to user_enter/exit are ignored. This
|
||||
* may be further optimized using static keys.
|
||||
*/
|
||||
bool active;
|
||||
enum ctx_state {
|
||||
IN_KERNEL = 0,
|
||||
IN_USER,
|
||||
} state;
|
||||
};
|
||||
|
||||
static inline void __guest_enter(void)
|
||||
{
|
||||
/*
|
||||
* This is running in ioctl context so we can avoid
|
||||
* the call to vtime_account() with its unnecessary idle check.
|
||||
*/
|
||||
vtime_account_system(current);
|
||||
current->flags |= PF_VCPU;
|
||||
}
|
||||
|
||||
static inline void __guest_exit(void)
|
||||
{
|
||||
/*
|
||||
* This is running in ioctl context so we can avoid
|
||||
* the call to vtime_account() with its unnecessary idle check.
|
||||
*/
|
||||
vtime_account_system(current);
|
||||
current->flags &= ~PF_VCPU;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
DECLARE_PER_CPU(struct context_tracking, context_tracking);
|
||||
extern void context_tracking_cpu_set(int cpu);
|
||||
|
||||
static inline bool context_tracking_in_user(void)
|
||||
extern void context_tracking_user_enter(void);
|
||||
extern void context_tracking_user_exit(void);
|
||||
extern void __context_tracking_task_switch(struct task_struct *prev,
|
||||
struct task_struct *next);
|
||||
|
||||
static inline void user_enter(void)
|
||||
{
|
||||
return __this_cpu_read(context_tracking.state) == IN_USER;
|
||||
}
|
||||
if (static_key_false(&context_tracking_enabled))
|
||||
context_tracking_user_enter();
|
||||
|
||||
static inline bool context_tracking_active(void)
|
||||
}
|
||||
static inline void user_exit(void)
|
||||
{
|
||||
return __this_cpu_read(context_tracking.active);
|
||||
if (static_key_false(&context_tracking_enabled))
|
||||
context_tracking_user_exit();
|
||||
}
|
||||
|
||||
extern void user_enter(void);
|
||||
extern void user_exit(void);
|
||||
|
||||
extern void guest_enter(void);
|
||||
extern void guest_exit(void);
|
||||
|
||||
static inline enum ctx_state exception_enter(void)
|
||||
{
|
||||
enum ctx_state prev_ctx;
|
||||
|
||||
if (!static_key_false(&context_tracking_enabled))
|
||||
return 0;
|
||||
|
||||
prev_ctx = this_cpu_read(context_tracking.state);
|
||||
user_exit();
|
||||
context_tracking_user_exit();
|
||||
|
||||
return prev_ctx;
|
||||
}
|
||||
|
||||
static inline void exception_exit(enum ctx_state prev_ctx)
|
||||
{
|
||||
if (prev_ctx == IN_USER)
|
||||
user_enter();
|
||||
if (static_key_false(&context_tracking_enabled)) {
|
||||
if (prev_ctx == IN_USER)
|
||||
context_tracking_user_enter();
|
||||
}
|
||||
}
|
||||
|
||||
extern void context_tracking_task_switch(struct task_struct *prev,
|
||||
struct task_struct *next);
|
||||
static inline void context_tracking_task_switch(struct task_struct *prev,
|
||||
struct task_struct *next)
|
||||
{
|
||||
if (static_key_false(&context_tracking_enabled))
|
||||
__context_tracking_task_switch(prev, next);
|
||||
}
|
||||
#else
|
||||
static inline bool context_tracking_in_user(void) { return false; }
|
||||
static inline void user_enter(void) { }
|
||||
static inline void user_exit(void) { }
|
||||
|
||||
static inline void guest_enter(void)
|
||||
{
|
||||
__guest_enter();
|
||||
}
|
||||
|
||||
static inline void guest_exit(void)
|
||||
{
|
||||
__guest_exit();
|
||||
}
|
||||
|
||||
static inline enum ctx_state exception_enter(void) { return 0; }
|
||||
static inline void exception_exit(enum ctx_state prev_ctx) { }
|
||||
static inline void context_tracking_task_switch(struct task_struct *prev,
|
||||
struct task_struct *next) { }
|
||||
#endif /* !CONFIG_CONTEXT_TRACKING */
|
||||
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_FORCE
|
||||
extern void context_tracking_init(void);
|
||||
#else
|
||||
static inline void context_tracking_init(void) { }
|
||||
#endif /* CONFIG_CONTEXT_TRACKING_FORCE */
|
||||
|
||||
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
|
||||
static inline void guest_enter(void)
|
||||
{
|
||||
if (vtime_accounting_enabled())
|
||||
vtime_guest_enter(current);
|
||||
else
|
||||
current->flags |= PF_VCPU;
|
||||
}
|
||||
|
||||
static inline void guest_exit(void)
|
||||
{
|
||||
if (vtime_accounting_enabled())
|
||||
vtime_guest_exit(current);
|
||||
else
|
||||
current->flags &= ~PF_VCPU;
|
||||
}
|
||||
|
||||
#else
|
||||
static inline void guest_enter(void)
|
||||
{
|
||||
/*
|
||||
* This is running in ioctl context so its safe
|
||||
* to assume that it's the stime pending cputime
|
||||
* to flush.
|
||||
*/
|
||||
vtime_account_system(current);
|
||||
current->flags |= PF_VCPU;
|
||||
}
|
||||
|
||||
static inline void guest_exit(void)
|
||||
{
|
||||
/* Flush the guest cputime we spent on the guest */
|
||||
vtime_account_system(current);
|
||||
current->flags &= ~PF_VCPU;
|
||||
}
|
||||
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
#ifndef _LINUX_CONTEXT_TRACKING_STATE_H
|
||||
#define _LINUX_CONTEXT_TRACKING_STATE_H
|
||||
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/static_key.h>
|
||||
|
||||
struct context_tracking {
|
||||
/*
|
||||
* When active is false, probes are unset in order
|
||||
* to minimize overhead: TIF flags are cleared
|
||||
* and calls to user_enter/exit are ignored. This
|
||||
* may be further optimized using static keys.
|
||||
*/
|
||||
bool active;
|
||||
enum ctx_state {
|
||||
IN_KERNEL = 0,
|
||||
IN_USER,
|
||||
} state;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
extern struct static_key context_tracking_enabled;
|
||||
DECLARE_PER_CPU(struct context_tracking, context_tracking);
|
||||
|
||||
static inline bool context_tracking_in_user(void)
|
||||
{
|
||||
return __this_cpu_read(context_tracking.state) == IN_USER;
|
||||
}
|
||||
|
||||
static inline bool context_tracking_active(void)
|
||||
{
|
||||
return __this_cpu_read(context_tracking.active);
|
||||
}
|
||||
#else
|
||||
static inline bool context_tracking_in_user(void) { return false; }
|
||||
static inline bool context_tracking_active(void) { return false; }
|
||||
#endif /* CONFIG_CONTEXT_TRACKING */
|
||||
|
||||
#endif
|
|
@ -1,126 +1,11 @@
|
|||
#ifndef LINUX_HARDIRQ_H
|
||||
#define LINUX_HARDIRQ_H
|
||||
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/preempt_mask.h>
|
||||
#include <linux/lockdep.h>
|
||||
#include <linux/ftrace_irq.h>
|
||||
#include <linux/vtime.h>
|
||||
#include <asm/hardirq.h>
|
||||
|
||||
/*
|
||||
* We put the hardirq and softirq counter into the preemption
|
||||
* counter. The bitmask has the following meaning:
|
||||
*
|
||||
* - bits 0-7 are the preemption count (max preemption depth: 256)
|
||||
* - bits 8-15 are the softirq count (max # of softirqs: 256)
|
||||
*
|
||||
* The hardirq count can in theory reach the same as NR_IRQS.
|
||||
* In reality, the number of nested IRQS is limited to the stack
|
||||
* size as well. For archs with over 1000 IRQS it is not practical
|
||||
* to expect that they will all nest. We give a max of 10 bits for
|
||||
* hardirq nesting. An arch may choose to give less than 10 bits.
|
||||
* m68k expects it to be 8.
|
||||
*
|
||||
* - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
|
||||
* - bit 26 is the NMI_MASK
|
||||
* - bit 27 is the PREEMPT_ACTIVE flag
|
||||
*
|
||||
* PREEMPT_MASK: 0x000000ff
|
||||
* SOFTIRQ_MASK: 0x0000ff00
|
||||
* HARDIRQ_MASK: 0x03ff0000
|
||||
* NMI_MASK: 0x04000000
|
||||
*/
|
||||
#define PREEMPT_BITS 8
|
||||
#define SOFTIRQ_BITS 8
|
||||
#define NMI_BITS 1
|
||||
|
||||
#define MAX_HARDIRQ_BITS 10
|
||||
|
||||
#ifndef HARDIRQ_BITS
|
||||
# define HARDIRQ_BITS MAX_HARDIRQ_BITS
|
||||
#endif
|
||||
|
||||
#if HARDIRQ_BITS > MAX_HARDIRQ_BITS
|
||||
#error HARDIRQ_BITS too high!
|
||||
#endif
|
||||
|
||||
#define PREEMPT_SHIFT 0
|
||||
#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
|
||||
#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
|
||||
#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS)
|
||||
|
||||
#define __IRQ_MASK(x) ((1UL << (x))-1)
|
||||
|
||||
#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
|
||||
#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
|
||||
#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
|
||||
#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)
|
||||
|
||||
#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT)
|
||||
#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)
|
||||
#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
|
||||
#define NMI_OFFSET (1UL << NMI_SHIFT)
|
||||
|
||||
#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
|
||||
|
||||
#ifndef PREEMPT_ACTIVE
|
||||
#define PREEMPT_ACTIVE_BITS 1
|
||||
#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS)
|
||||
#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
|
||||
#endif
|
||||
|
||||
#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
|
||||
#error PREEMPT_ACTIVE is too low!
|
||||
#endif
|
||||
|
||||
#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
|
||||
#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
|
||||
#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
|
||||
| NMI_MASK))
|
||||
|
||||
/*
|
||||
* Are we doing bottom half or hardware interrupt processing?
|
||||
* Are we in a softirq context? Interrupt context?
|
||||
* in_softirq - Are we currently processing softirq or have bh disabled?
|
||||
* in_serving_softirq - Are we currently processing softirq?
|
||||
*/
|
||||
#define in_irq() (hardirq_count())
|
||||
#define in_softirq() (softirq_count())
|
||||
#define in_interrupt() (irq_count())
|
||||
#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
|
||||
|
||||
/*
|
||||
* Are we in NMI context?
|
||||
*/
|
||||
#define in_nmi() (preempt_count() & NMI_MASK)
|
||||
|
||||
#if defined(CONFIG_PREEMPT_COUNT)
|
||||
# define PREEMPT_CHECK_OFFSET 1
|
||||
#else
|
||||
# define PREEMPT_CHECK_OFFSET 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Are we running in atomic context? WARNING: this macro cannot
|
||||
* always detect atomic context; in particular, it cannot know about
|
||||
* held spinlocks in non-preemptible kernels. Thus it should not be
|
||||
* used in the general case to determine whether sleeping is possible.
|
||||
* Do not use in_atomic() in driver code.
|
||||
*/
|
||||
#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0)
|
||||
|
||||
/*
|
||||
* Check whether we were atomic before we did preempt_disable():
|
||||
* (used by the scheduler, *after* releasing the kernel lock)
|
||||
*/
|
||||
#define in_atomic_preempt_off() \
|
||||
((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
|
||||
|
||||
#ifdef CONFIG_PREEMPT_COUNT
|
||||
# define preemptible() (preempt_count() == 0 && !irqs_disabled())
|
||||
#else
|
||||
# define preemptible() 0
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS)
|
||||
extern void synchronize_irq(unsigned int irq);
|
||||
|
|
|
@ -0,0 +1,122 @@
|
|||
#ifndef LINUX_PREEMPT_MASK_H
|
||||
#define LINUX_PREEMPT_MASK_H
|
||||
|
||||
#include <linux/preempt.h>
|
||||
#include <asm/hardirq.h>
|
||||
|
||||
/*
|
||||
* We put the hardirq and softirq counter into the preemption
|
||||
* counter. The bitmask has the following meaning:
|
||||
*
|
||||
* - bits 0-7 are the preemption count (max preemption depth: 256)
|
||||
* - bits 8-15 are the softirq count (max # of softirqs: 256)
|
||||
*
|
||||
* The hardirq count can in theory reach the same as NR_IRQS.
|
||||
* In reality, the number of nested IRQS is limited to the stack
|
||||
* size as well. For archs with over 1000 IRQS it is not practical
|
||||
* to expect that they will all nest. We give a max of 10 bits for
|
||||
* hardirq nesting. An arch may choose to give less than 10 bits.
|
||||
* m68k expects it to be 8.
|
||||
*
|
||||
* - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
|
||||
* - bit 26 is the NMI_MASK
|
||||
* - bit 27 is the PREEMPT_ACTIVE flag
|
||||
*
|
||||
* PREEMPT_MASK: 0x000000ff
|
||||
* SOFTIRQ_MASK: 0x0000ff00
|
||||
* HARDIRQ_MASK: 0x03ff0000
|
||||
* NMI_MASK: 0x04000000
|
||||
*/
|
||||
#define PREEMPT_BITS 8
|
||||
#define SOFTIRQ_BITS 8
|
||||
#define NMI_BITS 1
|
||||
|
||||
#define MAX_HARDIRQ_BITS 10
|
||||
|
||||
#ifndef HARDIRQ_BITS
|
||||
# define HARDIRQ_BITS MAX_HARDIRQ_BITS
|
||||
#endif
|
||||
|
||||
#if HARDIRQ_BITS > MAX_HARDIRQ_BITS
|
||||
#error HARDIRQ_BITS too high!
|
||||
#endif
|
||||
|
||||
#define PREEMPT_SHIFT 0
|
||||
#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
|
||||
#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
|
||||
#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS)
|
||||
|
||||
#define __IRQ_MASK(x) ((1UL << (x))-1)
|
||||
|
||||
#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
|
||||
#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
|
||||
#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
|
||||
#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)
|
||||
|
||||
#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT)
|
||||
#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)
|
||||
#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
|
||||
#define NMI_OFFSET (1UL << NMI_SHIFT)
|
||||
|
||||
#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
|
||||
|
||||
#ifndef PREEMPT_ACTIVE
|
||||
#define PREEMPT_ACTIVE_BITS 1
|
||||
#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS)
|
||||
#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
|
||||
#endif
|
||||
|
||||
#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
|
||||
#error PREEMPT_ACTIVE is too low!
|
||||
#endif
|
||||
|
||||
#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
|
||||
#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
|
||||
#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
|
||||
| NMI_MASK))
|
||||
|
||||
/*
|
||||
* Are we doing bottom half or hardware interrupt processing?
|
||||
* Are we in a softirq context? Interrupt context?
|
||||
* in_softirq - Are we currently processing softirq or have bh disabled?
|
||||
* in_serving_softirq - Are we currently processing softirq?
|
||||
*/
|
||||
#define in_irq() (hardirq_count())
|
||||
#define in_softirq() (softirq_count())
|
||||
#define in_interrupt() (irq_count())
|
||||
#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
|
||||
|
||||
/*
|
||||
* Are we in NMI context?
|
||||
*/
|
||||
#define in_nmi() (preempt_count() & NMI_MASK)
|
||||
|
||||
#if defined(CONFIG_PREEMPT_COUNT)
|
||||
# define PREEMPT_CHECK_OFFSET 1
|
||||
#else
|
||||
# define PREEMPT_CHECK_OFFSET 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Are we running in atomic context? WARNING: this macro cannot
|
||||
* always detect atomic context; in particular, it cannot know about
|
||||
* held spinlocks in non-preemptible kernels. Thus it should not be
|
||||
* used in the general case to determine whether sleeping is possible.
|
||||
* Do not use in_atomic() in driver code.
|
||||
*/
|
||||
#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0)
|
||||
|
||||
/*
|
||||
* Check whether we were atomic before we did preempt_disable():
|
||||
* (used by the scheduler, *after* releasing the kernel lock)
|
||||
*/
|
||||
#define in_atomic_preempt_off() \
|
||||
((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
|
||||
|
||||
#ifdef CONFIG_PREEMPT_COUNT
|
||||
# define preemptible() (preempt_count() == 0 && !irqs_disabled())
|
||||
#else
|
||||
# define preemptible() 0
|
||||
#endif
|
||||
|
||||
#endif /* LINUX_PREEMPT_MASK_H */
|
|
@ -10,6 +10,8 @@
|
|||
#include <linux/irqflags.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/hrtimer.h>
|
||||
#include <linux/context_tracking_state.h>
|
||||
#include <linux/cpumask.h>
|
||||
|
||||
#ifdef CONFIG_GENERIC_CLOCKEVENTS
|
||||
|
||||
|
@ -158,20 +160,51 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
|
|||
# endif /* !CONFIG_NO_HZ_COMMON */
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
extern bool tick_nohz_full_running;
|
||||
extern cpumask_var_t tick_nohz_full_mask;
|
||||
|
||||
static inline bool tick_nohz_full_enabled(void)
|
||||
{
|
||||
if (!static_key_false(&context_tracking_enabled))
|
||||
return false;
|
||||
|
||||
return tick_nohz_full_running;
|
||||
}
|
||||
|
||||
static inline bool tick_nohz_full_cpu(int cpu)
|
||||
{
|
||||
if (!tick_nohz_full_enabled())
|
||||
return false;
|
||||
|
||||
return cpumask_test_cpu(cpu, tick_nohz_full_mask);
|
||||
}
|
||||
|
||||
extern void tick_nohz_init(void);
|
||||
extern int tick_nohz_full_cpu(int cpu);
|
||||
extern void tick_nohz_full_check(void);
|
||||
extern void __tick_nohz_full_check(void);
|
||||
extern void tick_nohz_full_kick(void);
|
||||
extern void tick_nohz_full_kick_all(void);
|
||||
extern void tick_nohz_task_switch(struct task_struct *tsk);
|
||||
extern void __tick_nohz_task_switch(struct task_struct *tsk);
|
||||
#else
|
||||
static inline void tick_nohz_init(void) { }
|
||||
static inline int tick_nohz_full_cpu(int cpu) { return 0; }
|
||||
static inline void tick_nohz_full_check(void) { }
|
||||
static inline bool tick_nohz_full_enabled(void) { return false; }
|
||||
static inline bool tick_nohz_full_cpu(int cpu) { return false; }
|
||||
static inline void __tick_nohz_full_check(void) { }
|
||||
static inline void tick_nohz_full_kick(void) { }
|
||||
static inline void tick_nohz_full_kick_all(void) { }
|
||||
static inline void tick_nohz_task_switch(struct task_struct *tsk) { }
|
||||
static inline void __tick_nohz_task_switch(struct task_struct *tsk) { }
|
||||
#endif
|
||||
|
||||
static inline void tick_nohz_full_check(void)
|
||||
{
|
||||
if (tick_nohz_full_enabled())
|
||||
__tick_nohz_full_check();
|
||||
}
|
||||
|
||||
static inline void tick_nohz_task_switch(struct task_struct *tsk)
|
||||
{
|
||||
if (tick_nohz_full_enabled())
|
||||
__tick_nohz_task_switch(tsk);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,18 +1,68 @@
|
|||
#ifndef _LINUX_KERNEL_VTIME_H
|
||||
#define _LINUX_KERNEL_VTIME_H
|
||||
|
||||
#include <linux/context_tracking_state.h>
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
|
||||
#include <asm/vtime.h>
|
||||
#endif
|
||||
|
||||
|
||||
struct task_struct;
|
||||
|
||||
/*
|
||||
* vtime_accounting_enabled() definitions/declarations
|
||||
*/
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
|
||||
static inline bool vtime_accounting_enabled(void) { return true; }
|
||||
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
|
||||
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
|
||||
static inline bool vtime_accounting_enabled(void)
|
||||
{
|
||||
if (static_key_false(&context_tracking_enabled)) {
|
||||
if (context_tracking_active())
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
|
||||
|
||||
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
|
||||
static inline bool vtime_accounting_enabled(void) { return false; }
|
||||
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
|
||||
|
||||
|
||||
/*
|
||||
* Common vtime APIs
|
||||
*/
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
|
||||
|
||||
#ifdef __ARCH_HAS_VTIME_TASK_SWITCH
|
||||
extern void vtime_task_switch(struct task_struct *prev);
|
||||
#else
|
||||
extern void vtime_common_task_switch(struct task_struct *prev);
|
||||
static inline void vtime_task_switch(struct task_struct *prev)
|
||||
{
|
||||
if (vtime_accounting_enabled())
|
||||
vtime_common_task_switch(prev);
|
||||
}
|
||||
#endif /* __ARCH_HAS_VTIME_TASK_SWITCH */
|
||||
|
||||
extern void vtime_account_system(struct task_struct *tsk);
|
||||
extern void vtime_account_idle(struct task_struct *tsk);
|
||||
extern void vtime_account_user(struct task_struct *tsk);
|
||||
extern void vtime_account_irq_enter(struct task_struct *tsk);
|
||||
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
|
||||
static inline bool vtime_accounting_enabled(void) { return true; }
|
||||
#endif
|
||||
#ifdef __ARCH_HAS_VTIME_ACCOUNT
|
||||
extern void vtime_account_irq_enter(struct task_struct *tsk);
|
||||
#else
|
||||
extern void vtime_common_account_irq_enter(struct task_struct *tsk);
|
||||
static inline void vtime_account_irq_enter(struct task_struct *tsk)
|
||||
{
|
||||
if (vtime_accounting_enabled())
|
||||
vtime_common_account_irq_enter(tsk);
|
||||
}
|
||||
#endif /* __ARCH_HAS_VTIME_ACCOUNT */
|
||||
|
||||
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
|
||||
|
||||
|
@ -20,14 +70,20 @@ static inline void vtime_task_switch(struct task_struct *prev) { }
|
|||
static inline void vtime_account_system(struct task_struct *tsk) { }
|
||||
static inline void vtime_account_user(struct task_struct *tsk) { }
|
||||
static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
|
||||
static inline bool vtime_accounting_enabled(void) { return false; }
|
||||
#endif
|
||||
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
|
||||
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
|
||||
extern void arch_vtime_task_switch(struct task_struct *tsk);
|
||||
extern void vtime_account_irq_exit(struct task_struct *tsk);
|
||||
extern bool vtime_accounting_enabled(void);
|
||||
extern void vtime_gen_account_irq_exit(struct task_struct *tsk);
|
||||
|
||||
static inline void vtime_account_irq_exit(struct task_struct *tsk)
|
||||
{
|
||||
if (vtime_accounting_enabled())
|
||||
vtime_gen_account_irq_exit(tsk);
|
||||
}
|
||||
|
||||
extern void vtime_user_enter(struct task_struct *tsk);
|
||||
|
||||
static inline void vtime_user_exit(struct task_struct *tsk)
|
||||
{
|
||||
vtime_account_user(tsk);
|
||||
|
@ -35,7 +91,7 @@ static inline void vtime_user_exit(struct task_struct *tsk)
|
|||
extern void vtime_guest_enter(struct task_struct *tsk);
|
||||
extern void vtime_guest_exit(struct task_struct *tsk);
|
||||
extern void vtime_init_idle(struct task_struct *tsk, int cpu);
|
||||
#else
|
||||
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN */
|
||||
static inline void vtime_account_irq_exit(struct task_struct *tsk)
|
||||
{
|
||||
/* On hard|softirq exit we always account to hard|softirq cputime */
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM context_tracking
|
||||
|
||||
#if !defined(_TRACE_CONTEXT_TRACKING_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _TRACE_CONTEXT_TRACKING_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
DECLARE_EVENT_CLASS(context_tracking_user,
|
||||
|
||||
TP_PROTO(int dummy),
|
||||
|
||||
TP_ARGS(dummy),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( int, dummy )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->dummy = dummy;
|
||||
),
|
||||
|
||||
TP_printk("%s", "")
|
||||
);
|
||||
|
||||
/**
|
||||
* user_enter - called when the kernel resumes to userspace
|
||||
* @dummy: dummy arg to make trace event macro happy
|
||||
*
|
||||
* This event occurs when the kernel resumes to userspace after
|
||||
* an exception or a syscall.
|
||||
*/
|
||||
DEFINE_EVENT(context_tracking_user, user_enter,
|
||||
|
||||
TP_PROTO(int dummy),
|
||||
|
||||
TP_ARGS(dummy)
|
||||
);
|
||||
|
||||
/**
|
||||
* user_exit - called when userspace enters the kernel
|
||||
* @dummy: dummy arg to make trace event macro happy
|
||||
*
|
||||
* This event occurs when userspace enters the kernel through
|
||||
* an exception or a syscall.
|
||||
*/
|
||||
DEFINE_EVENT(context_tracking_user, user_exit,
|
||||
|
||||
TP_PROTO(int dummy),
|
||||
|
||||
TP_ARGS(dummy)
|
||||
);
|
||||
|
||||
|
||||
#endif /* _TRACE_CONTEXT_TRACKING_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
#include <trace/define_trace.h>
|
28
init/Kconfig
28
init/Kconfig
|
@ -527,13 +527,29 @@ config RCU_USER_QS
|
|||
config CONTEXT_TRACKING_FORCE
|
||||
bool "Force context tracking"
|
||||
depends on CONTEXT_TRACKING
|
||||
default CONTEXT_TRACKING
|
||||
default y if !NO_HZ_FULL
|
||||
help
|
||||
Probe on user/kernel boundaries by default in order to
|
||||
test the features that rely on it such as userspace RCU extended
|
||||
quiescent states.
|
||||
This test is there for debugging until we have a real user like the
|
||||
full dynticks mode.
|
||||
The major pre-requirement for full dynticks to work is to
|
||||
support the context tracking subsystem. But there are also
|
||||
other dependencies to provide in order to make the full
|
||||
dynticks working.
|
||||
|
||||
This option stands for testing when an arch implements the
|
||||
context tracking backend but doesn't yet fullfill all the
|
||||
requirements to make the full dynticks feature working.
|
||||
Without the full dynticks, there is no way to test the support
|
||||
for context tracking and the subsystems that rely on it: RCU
|
||||
userspace extended quiescent state and tickless cputime
|
||||
accounting. This option copes with the absence of the full
|
||||
dynticks subsystem by forcing the context tracking on all
|
||||
CPUs in the system.
|
||||
|
||||
Say Y only if you're working on the developpement of an
|
||||
architecture backend for the context tracking.
|
||||
|
||||
Say N otherwise, this option brings an overhead that you
|
||||
don't want in production.
|
||||
|
||||
|
||||
config RCU_FANOUT
|
||||
int "Tree-based hierarchical RCU fanout value"
|
||||
|
|
|
@ -75,6 +75,7 @@
|
|||
#include <linux/blkdev.h>
|
||||
#include <linux/elevator.h>
|
||||
#include <linux/sched_clock.h>
|
||||
#include <linux/context_tracking.h>
|
||||
|
||||
#include <asm/io.h>
|
||||
#include <asm/bugs.h>
|
||||
|
@ -545,6 +546,7 @@ asmlinkage void __init start_kernel(void)
|
|||
idr_init_cache();
|
||||
rcu_init();
|
||||
tick_nohz_init();
|
||||
context_tracking_init();
|
||||
radix_tree_init();
|
||||
/* init some links before init_ISA_irqs() */
|
||||
early_irq_init();
|
||||
|
|
|
@ -20,22 +20,33 @@
|
|||
#include <linux/hardirq.h>
|
||||
#include <linux/export.h>
|
||||
|
||||
DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_FORCE
|
||||
.active = true,
|
||||
#endif
|
||||
};
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/context_tracking.h>
|
||||
|
||||
struct static_key context_tracking_enabled = STATIC_KEY_INIT_FALSE;
|
||||
EXPORT_SYMBOL_GPL(context_tracking_enabled);
|
||||
|
||||
DEFINE_PER_CPU(struct context_tracking, context_tracking);
|
||||
EXPORT_SYMBOL_GPL(context_tracking);
|
||||
|
||||
void context_tracking_cpu_set(int cpu)
|
||||
{
|
||||
if (!per_cpu(context_tracking.active, cpu)) {
|
||||
per_cpu(context_tracking.active, cpu) = true;
|
||||
static_key_slow_inc(&context_tracking_enabled);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* user_enter - Inform the context tracking that the CPU is going to
|
||||
* enter userspace mode.
|
||||
* context_tracking_user_enter - Inform the context tracking that the CPU is going to
|
||||
* enter userspace mode.
|
||||
*
|
||||
* This function must be called right before we switch from the kernel
|
||||
* to userspace, when it's guaranteed the remaining kernel instructions
|
||||
* to execute won't use any RCU read side critical section because this
|
||||
* function sets RCU in extended quiescent state.
|
||||
*/
|
||||
void user_enter(void)
|
||||
void context_tracking_user_enter(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
|
@ -54,17 +65,32 @@ void user_enter(void)
|
|||
WARN_ON_ONCE(!current->mm);
|
||||
|
||||
local_irq_save(flags);
|
||||
if (__this_cpu_read(context_tracking.active) &&
|
||||
__this_cpu_read(context_tracking.state) != IN_USER) {
|
||||
if ( __this_cpu_read(context_tracking.state) != IN_USER) {
|
||||
if (__this_cpu_read(context_tracking.active)) {
|
||||
trace_user_enter(0);
|
||||
/*
|
||||
* At this stage, only low level arch entry code remains and
|
||||
* then we'll run in userspace. We can assume there won't be
|
||||
* any RCU read-side critical section until the next call to
|
||||
* user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
|
||||
* on the tick.
|
||||
*/
|
||||
vtime_user_enter(current);
|
||||
rcu_user_enter();
|
||||
}
|
||||
/*
|
||||
* At this stage, only low level arch entry code remains and
|
||||
* then we'll run in userspace. We can assume there won't be
|
||||
* any RCU read-side critical section until the next call to
|
||||
* user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
|
||||
* on the tick.
|
||||
* Even if context tracking is disabled on this CPU, because it's outside
|
||||
* the full dynticks mask for example, we still have to keep track of the
|
||||
* context transitions and states to prevent inconsistency on those of
|
||||
* other CPUs.
|
||||
* If a task triggers an exception in userspace, sleep on the exception
|
||||
* handler and then migrate to another CPU, that new CPU must know where
|
||||
* the exception returns by the time we call exception_exit().
|
||||
* This information can only be provided by the previous CPU when it called
|
||||
* exception_enter().
|
||||
* OTOH we can spare the calls to vtime and RCU when context_tracking.active
|
||||
* is false because we know that CPU is not tickless.
|
||||
*/
|
||||
vtime_user_enter(current);
|
||||
rcu_user_enter();
|
||||
__this_cpu_write(context_tracking.state, IN_USER);
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
|
@ -87,10 +113,9 @@ void user_enter(void)
|
|||
*/
|
||||
void __sched notrace preempt_schedule_context(void)
|
||||
{
|
||||
struct thread_info *ti = current_thread_info();
|
||||
enum ctx_state prev_ctx;
|
||||
|
||||
if (likely(ti->preempt_count || irqs_disabled()))
|
||||
if (likely(!preemptible()))
|
||||
return;
|
||||
|
||||
/*
|
||||
|
@ -112,8 +137,8 @@ EXPORT_SYMBOL_GPL(preempt_schedule_context);
|
|||
#endif /* CONFIG_PREEMPT */
|
||||
|
||||
/**
|
||||
* user_exit - Inform the context tracking that the CPU is
|
||||
* exiting userspace mode and entering the kernel.
|
||||
* context_tracking_user_exit - Inform the context tracking that the CPU is
|
||||
* exiting userspace mode and entering the kernel.
|
||||
*
|
||||
* This function must be called after we entered the kernel from userspace
|
||||
* before any use of RCU read side critical section. This potentially include
|
||||
|
@ -122,7 +147,7 @@ EXPORT_SYMBOL_GPL(preempt_schedule_context);
|
|||
* This call supports re-entrancy. This way it can be called from any exception
|
||||
* handler without needing to know if we came from userspace or not.
|
||||
*/
|
||||
void user_exit(void)
|
||||
void context_tracking_user_exit(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
|
@ -131,38 +156,22 @@ void user_exit(void)
|
|||
|
||||
local_irq_save(flags);
|
||||
if (__this_cpu_read(context_tracking.state) == IN_USER) {
|
||||
/*
|
||||
* We are going to run code that may use RCU. Inform
|
||||
* RCU core about that (ie: we may need the tick again).
|
||||
*/
|
||||
rcu_user_exit();
|
||||
vtime_user_exit(current);
|
||||
if (__this_cpu_read(context_tracking.active)) {
|
||||
/*
|
||||
* We are going to run code that may use RCU. Inform
|
||||
* RCU core about that (ie: we may need the tick again).
|
||||
*/
|
||||
rcu_user_exit();
|
||||
vtime_user_exit(current);
|
||||
trace_user_exit(0);
|
||||
}
|
||||
__this_cpu_write(context_tracking.state, IN_KERNEL);
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
void guest_enter(void)
|
||||
{
|
||||
if (vtime_accounting_enabled())
|
||||
vtime_guest_enter(current);
|
||||
else
|
||||
__guest_enter();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(guest_enter);
|
||||
|
||||
void guest_exit(void)
|
||||
{
|
||||
if (vtime_accounting_enabled())
|
||||
vtime_guest_exit(current);
|
||||
else
|
||||
__guest_exit();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(guest_exit);
|
||||
|
||||
|
||||
/**
|
||||
* context_tracking_task_switch - context switch the syscall callbacks
|
||||
* __context_tracking_task_switch - context switch the syscall callbacks
|
||||
* @prev: the task that is being switched out
|
||||
* @next: the task that is being switched in
|
||||
*
|
||||
|
@ -174,11 +183,19 @@ EXPORT_SYMBOL_GPL(guest_exit);
|
|||
* migrate to some CPU that doesn't do the context tracking. As such the TIF
|
||||
* flag may not be desired there.
|
||||
*/
|
||||
void context_tracking_task_switch(struct task_struct *prev,
|
||||
struct task_struct *next)
|
||||
void __context_tracking_task_switch(struct task_struct *prev,
|
||||
struct task_struct *next)
|
||||
{
|
||||
if (__this_cpu_read(context_tracking.active)) {
|
||||
clear_tsk_thread_flag(prev, TIF_NOHZ);
|
||||
set_tsk_thread_flag(next, TIF_NOHZ);
|
||||
}
|
||||
clear_tsk_thread_flag(prev, TIF_NOHZ);
|
||||
set_tsk_thread_flag(next, TIF_NOHZ);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_FORCE
|
||||
void __init context_tracking_init(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
context_tracking_cpu_set(cpu);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -2510,13 +2510,11 @@ void __sched schedule_preempt_disabled(void)
|
|||
*/
|
||||
asmlinkage void __sched notrace preempt_schedule(void)
|
||||
{
|
||||
struct thread_info *ti = current_thread_info();
|
||||
|
||||
/*
|
||||
* If there is a non-zero preempt_count or interrupts are disabled,
|
||||
* we do not want to preempt the current task. Just return..
|
||||
*/
|
||||
if (likely(ti->preempt_count || irqs_disabled()))
|
||||
if (likely(!preemptible()))
|
||||
return;
|
||||
|
||||
do {
|
||||
|
|
|
@ -378,11 +378,8 @@ static inline void irqtime_account_process_tick(struct task_struct *p, int user_
|
|||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
|
||||
|
||||
#ifndef __ARCH_HAS_VTIME_TASK_SWITCH
|
||||
void vtime_task_switch(struct task_struct *prev)
|
||||
void vtime_common_task_switch(struct task_struct *prev)
|
||||
{
|
||||
if (!vtime_accounting_enabled())
|
||||
return;
|
||||
|
||||
if (is_idle_task(prev))
|
||||
vtime_account_idle(prev);
|
||||
else
|
||||
|
@ -404,11 +401,8 @@ void vtime_task_switch(struct task_struct *prev)
|
|||
* vtime_account().
|
||||
*/
|
||||
#ifndef __ARCH_HAS_VTIME_ACCOUNT
|
||||
void vtime_account_irq_enter(struct task_struct *tsk)
|
||||
void vtime_common_account_irq_enter(struct task_struct *tsk)
|
||||
{
|
||||
if (!vtime_accounting_enabled())
|
||||
return;
|
||||
|
||||
if (!in_interrupt()) {
|
||||
/*
|
||||
* If we interrupted user, context_tracking_in_user()
|
||||
|
@ -428,7 +422,7 @@ void vtime_account_irq_enter(struct task_struct *tsk)
|
|||
}
|
||||
vtime_account_system(tsk);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
|
||||
EXPORT_SYMBOL_GPL(vtime_common_account_irq_enter);
|
||||
#endif /* __ARCH_HAS_VTIME_ACCOUNT */
|
||||
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
|
||||
|
||||
|
@ -559,12 +553,6 @@ static void cputime_adjust(struct task_cputime *curr,
|
|||
{
|
||||
cputime_t rtime, stime, utime, total;
|
||||
|
||||
if (vtime_accounting_enabled()) {
|
||||
*ut = curr->utime;
|
||||
*st = curr->stime;
|
||||
return;
|
||||
}
|
||||
|
||||
stime = curr->stime;
|
||||
total = stime + curr->utime;
|
||||
|
||||
|
@ -664,23 +652,17 @@ static void __vtime_account_system(struct task_struct *tsk)
|
|||
|
||||
void vtime_account_system(struct task_struct *tsk)
|
||||
{
|
||||
if (!vtime_accounting_enabled())
|
||||
return;
|
||||
|
||||
write_seqlock(&tsk->vtime_seqlock);
|
||||
__vtime_account_system(tsk);
|
||||
write_sequnlock(&tsk->vtime_seqlock);
|
||||
}
|
||||
|
||||
void vtime_account_irq_exit(struct task_struct *tsk)
|
||||
void vtime_gen_account_irq_exit(struct task_struct *tsk)
|
||||
{
|
||||
if (!vtime_accounting_enabled())
|
||||
return;
|
||||
|
||||
write_seqlock(&tsk->vtime_seqlock);
|
||||
__vtime_account_system(tsk);
|
||||
if (context_tracking_in_user())
|
||||
tsk->vtime_snap_whence = VTIME_USER;
|
||||
__vtime_account_system(tsk);
|
||||
write_sequnlock(&tsk->vtime_seqlock);
|
||||
}
|
||||
|
||||
|
@ -688,12 +670,8 @@ void vtime_account_user(struct task_struct *tsk)
|
|||
{
|
||||
cputime_t delta_cpu;
|
||||
|
||||
if (!vtime_accounting_enabled())
|
||||
return;
|
||||
|
||||
delta_cpu = get_vtime_delta(tsk);
|
||||
|
||||
write_seqlock(&tsk->vtime_seqlock);
|
||||
delta_cpu = get_vtime_delta(tsk);
|
||||
tsk->vtime_snap_whence = VTIME_SYS;
|
||||
account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
|
||||
write_sequnlock(&tsk->vtime_seqlock);
|
||||
|
@ -701,22 +679,27 @@ void vtime_account_user(struct task_struct *tsk)
|
|||
|
||||
void vtime_user_enter(struct task_struct *tsk)
|
||||
{
|
||||
if (!vtime_accounting_enabled())
|
||||
return;
|
||||
|
||||
write_seqlock(&tsk->vtime_seqlock);
|
||||
tsk->vtime_snap_whence = VTIME_USER;
|
||||
__vtime_account_system(tsk);
|
||||
tsk->vtime_snap_whence = VTIME_USER;
|
||||
write_sequnlock(&tsk->vtime_seqlock);
|
||||
}
|
||||
|
||||
void vtime_guest_enter(struct task_struct *tsk)
|
||||
{
|
||||
/*
|
||||
* The flags must be updated under the lock with
|
||||
* the vtime_snap flush and update.
|
||||
* That enforces a right ordering and update sequence
|
||||
* synchronization against the reader (task_gtime())
|
||||
* that can thus safely catch up with a tickless delta.
|
||||
*/
|
||||
write_seqlock(&tsk->vtime_seqlock);
|
||||
__vtime_account_system(tsk);
|
||||
current->flags |= PF_VCPU;
|
||||
write_sequnlock(&tsk->vtime_seqlock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vtime_guest_enter);
|
||||
|
||||
void vtime_guest_exit(struct task_struct *tsk)
|
||||
{
|
||||
|
@ -725,6 +708,7 @@ void vtime_guest_exit(struct task_struct *tsk)
|
|||
current->flags &= ~PF_VCPU;
|
||||
write_sequnlock(&tsk->vtime_seqlock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vtime_guest_exit);
|
||||
|
||||
void vtime_account_idle(struct task_struct *tsk)
|
||||
{
|
||||
|
@ -733,11 +717,6 @@ void vtime_account_idle(struct task_struct *tsk)
|
|||
account_idle_time(delta_cpu);
|
||||
}
|
||||
|
||||
bool vtime_accounting_enabled(void)
|
||||
{
|
||||
return context_tracking_active();
|
||||
}
|
||||
|
||||
void arch_vtime_task_switch(struct task_struct *prev)
|
||||
{
|
||||
write_seqlock(&prev->vtime_seqlock);
|
||||
|
|
|
@ -105,7 +105,6 @@ config NO_HZ_FULL
|
|||
select RCU_USER_QS
|
||||
select RCU_NOCB_CPU
|
||||
select VIRT_CPU_ACCOUNTING_GEN
|
||||
select CONTEXT_TRACKING_FORCE
|
||||
select IRQ_WORK
|
||||
help
|
||||
Adaptively try to shutdown the tick whenever possible, even when
|
||||
|
|
|
@ -121,7 +121,7 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
|
|||
BUG_ON(bits > 32);
|
||||
WARN_ON(!irqs_disabled());
|
||||
read_sched_clock = read;
|
||||
sched_clock_mask = (1 << bits) - 1;
|
||||
sched_clock_mask = (1ULL << bits) - 1;
|
||||
cd.rate = rate;
|
||||
|
||||
/* calculate the mult/shift to convert counter ticks to ns. */
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include <linux/irq_work.h>
|
||||
#include <linux/posix-timers.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/context_tracking.h>
|
||||
|
||||
#include <asm/irq_regs.h>
|
||||
|
||||
|
@ -148,8 +149,8 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
|
|||
}
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
static cpumask_var_t nohz_full_mask;
|
||||
bool have_nohz_full_mask;
|
||||
cpumask_var_t tick_nohz_full_mask;
|
||||
bool tick_nohz_full_running;
|
||||
|
||||
static bool can_stop_full_tick(void)
|
||||
{
|
||||
|
@ -182,7 +183,8 @@ static bool can_stop_full_tick(void)
|
|||
* Don't allow the user to think they can get
|
||||
* full NO_HZ with this machine.
|
||||
*/
|
||||
WARN_ONCE(1, "NO_HZ FULL will not work with unstable sched clock");
|
||||
WARN_ONCE(tick_nohz_full_running,
|
||||
"NO_HZ FULL will not work with unstable sched clock");
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
@ -196,7 +198,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now);
|
|||
* Re-evaluate the need for the tick on the current CPU
|
||||
* and restart it if necessary.
|
||||
*/
|
||||
void tick_nohz_full_check(void)
|
||||
void __tick_nohz_full_check(void)
|
||||
{
|
||||
struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
|
||||
|
||||
|
@ -210,7 +212,7 @@ void tick_nohz_full_check(void)
|
|||
|
||||
static void nohz_full_kick_work_func(struct irq_work *work)
|
||||
{
|
||||
tick_nohz_full_check();
|
||||
__tick_nohz_full_check();
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
|
||||
|
@ -229,7 +231,7 @@ void tick_nohz_full_kick(void)
|
|||
|
||||
static void nohz_full_kick_ipi(void *info)
|
||||
{
|
||||
tick_nohz_full_check();
|
||||
__tick_nohz_full_check();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -238,11 +240,11 @@ static void nohz_full_kick_ipi(void *info)
|
|||
*/
|
||||
void tick_nohz_full_kick_all(void)
|
||||
{
|
||||
if (!have_nohz_full_mask)
|
||||
if (!tick_nohz_full_running)
|
||||
return;
|
||||
|
||||
preempt_disable();
|
||||
smp_call_function_many(nohz_full_mask,
|
||||
smp_call_function_many(tick_nohz_full_mask,
|
||||
nohz_full_kick_ipi, NULL, false);
|
||||
preempt_enable();
|
||||
}
|
||||
|
@ -252,7 +254,7 @@ void tick_nohz_full_kick_all(void)
|
|||
* It might need the tick due to per task/process properties:
|
||||
* perf events, posix cpu timers, ...
|
||||
*/
|
||||
void tick_nohz_task_switch(struct task_struct *tsk)
|
||||
void __tick_nohz_task_switch(struct task_struct *tsk)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
|
@ -268,31 +270,23 @@ void tick_nohz_task_switch(struct task_struct *tsk)
|
|||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
int tick_nohz_full_cpu(int cpu)
|
||||
{
|
||||
if (!have_nohz_full_mask)
|
||||
return 0;
|
||||
|
||||
return cpumask_test_cpu(cpu, nohz_full_mask);
|
||||
}
|
||||
|
||||
/* Parse the boot-time nohz CPU list from the kernel parameters. */
|
||||
static int __init tick_nohz_full_setup(char *str)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
alloc_bootmem_cpumask_var(&nohz_full_mask);
|
||||
if (cpulist_parse(str, nohz_full_mask) < 0) {
|
||||
alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
|
||||
if (cpulist_parse(str, tick_nohz_full_mask) < 0) {
|
||||
pr_warning("NOHZ: Incorrect nohz_full cpumask\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
cpu = smp_processor_id();
|
||||
if (cpumask_test_cpu(cpu, nohz_full_mask)) {
|
||||
if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
|
||||
pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu);
|
||||
cpumask_clear_cpu(cpu, nohz_full_mask);
|
||||
cpumask_clear_cpu(cpu, tick_nohz_full_mask);
|
||||
}
|
||||
have_nohz_full_mask = true;
|
||||
tick_nohz_full_running = true;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -310,7 +304,7 @@ static int tick_nohz_cpu_down_callback(struct notifier_block *nfb,
|
|||
* If we handle the timekeeping duty for full dynticks CPUs,
|
||||
* we can't safely shutdown that CPU.
|
||||
*/
|
||||
if (have_nohz_full_mask && tick_do_timer_cpu == cpu)
|
||||
if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
|
||||
return NOTIFY_BAD;
|
||||
break;
|
||||
}
|
||||
|
@ -329,14 +323,14 @@ static int tick_nohz_init_all(void)
|
|||
int err = -1;
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL_ALL
|
||||
if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) {
|
||||
if (!alloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) {
|
||||
pr_err("NO_HZ: Can't allocate full dynticks cpumask\n");
|
||||
return err;
|
||||
}
|
||||
err = 0;
|
||||
cpumask_setall(nohz_full_mask);
|
||||
cpumask_clear_cpu(smp_processor_id(), nohz_full_mask);
|
||||
have_nohz_full_mask = true;
|
||||
cpumask_setall(tick_nohz_full_mask);
|
||||
cpumask_clear_cpu(smp_processor_id(), tick_nohz_full_mask);
|
||||
tick_nohz_full_running = true;
|
||||
#endif
|
||||
return err;
|
||||
}
|
||||
|
@ -345,17 +339,18 @@ void __init tick_nohz_init(void)
|
|||
{
|
||||
int cpu;
|
||||
|
||||
if (!have_nohz_full_mask) {
|
||||
if (!tick_nohz_full_running) {
|
||||
if (tick_nohz_init_all() < 0)
|
||||
return;
|
||||
}
|
||||
|
||||
for_each_cpu(cpu, tick_nohz_full_mask)
|
||||
context_tracking_cpu_set(cpu);
|
||||
|
||||
cpu_notifier(tick_nohz_cpu_down_callback, 0);
|
||||
cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask);
|
||||
cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), tick_nohz_full_mask);
|
||||
pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf);
|
||||
}
|
||||
#else
|
||||
#define have_nohz_full_mask (0)
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -733,7 +728,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
|
|||
return false;
|
||||
}
|
||||
|
||||
if (have_nohz_full_mask) {
|
||||
if (tick_nohz_full_enabled()) {
|
||||
/*
|
||||
* Keep the tick alive to guarantee timekeeping progression
|
||||
* if there are full dynticks CPUs around
|
||||
|
|
Loading…
Reference in New Issue