Updates for timers and timekeeping core code:
- Expose CLOCK_TAI to instrumentation to aid with TSN debugging. - Ensure that the clockevent is stopped when there is no timer armed to avoid pointless wakeups. - Make the sched clock frequency handling and rounding consistent. - Provide a better debugobject hint for delayed works. The timer callback is always the same, which makes it difficult to identify the underlying work. Use the work function as a hint instead. - Move the timer specific sysctl code into the timer subsystem. - The usual set of improvements and cleanups -----BEGIN PGP SIGNATURE----- iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmKLPHMTHHRnbHhAbGlu dXRyb25peC5kZQAKCRCmGPVMDXSYoZBoEACIURtS8w9PFZ6q/2mFq0pTYi/uI/HQ vqbB6gCbrjfL6QwInd7jxDc/UoqEOllG9pTaGdWx/0Gi9syDosEbeop7cvvt2xi+ pReoEN1kVI3JAVrQFIAuGw4EMuzYB8PfuZkm1PdozcCP9qkgDmtippVxe05sFQ+/ RPdA29vE3g63eXkSFBhEID23pQR8yKLbqVq6KcH87OipZedL+2fry3yB+/9sLuuU /PFLbI6B9f43S2sfo6szzpFkpd6tJlBlu02IrB6gh4IxKrslmZb5onpvcf6iT+19 rFh5A15GFWoZUC8EjH1sBpATq3wA/jfGEOPWgy07N5SmobtJvWSM5yvT+gC3qXqm C/bjyjqXzLKftG7KIXo/hWewtsjdovMbdfcMBsGiatytNBZfI1GR/4Pq60/qpTHZ qJo35trOUcP6o1njphwONy3lisq78S7xaozpWO1hIMTcAqGgBkm/lOieGMM4hGnE Ps0Im3ZsOXNGllulN+3h+UHstM5/y6f/vzBsw7pfIG66i6KqebAiNjbMfHCr22sX 7UavNCoFggUQgZVgUYX/AscdW4/Dwx6R5YUqj1EBqztknd70Ac4TqjaIz4Xa6ZER z+eQSSt5XqqV2eKWA4FsQYmCIc+BvQ4apSA6+whz9vmsvCYtB7zzSfeh+xkgcl1/ Cc0N6G5+L9v0Gw== =De28 -----END PGP SIGNATURE----- Merge tag 'timers-core-2022-05-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull timer and timekeeping updates from Thomas Gleixner: - Expose CLOCK_TAI to instrumentation to aid with TSN debugging. - Ensure that the clockevent is stopped when there is no timer armed to avoid pointless wakeups. - Make the sched clock frequency handling and rounding consistent. - Provide a better debugobject hint for delayed works. The timer callback is always the same, which makes it difficult to identify the underlying work. Use the work function as a hint instead. - Move the timer specific sysctl code into the timer subsystem. - The usual set of improvements and cleanups * tag 'timers-core-2022-05-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: timers: Provide a better debugobjects hint for delayed works time/sched_clock: Fix formatting of frequency reporting code time/sched_clock: Use Hz as the unit for clock rate reporting below 4kHz time/sched_clock: Round the frequency reported to nearest rather than down timekeeping: Consolidate fast timekeeper timekeeping: Annotate ktime_get_boot_fast_ns() with data_race() timers/nohz: Switch to ONESHOT_STOPPED in the low-res handler when the tick is stopped timekeeping: Introduce fast accessor to clock tai tracing/timer: Add missing argument documentation of trace points clocksource: Replace cpumask_weight() with cpumask_empty() timers: Move timer sysctl into the timer code clockevents: Use dedicated list iterator variable timers: Simplify calc_index() timers: Initialize base::next_expiry_recalc in timers_prepare_cpu()
This commit is contained in:
commit
6e01f86fb2
|
@ -132,6 +132,7 @@ Some additional variants exist for more specialized cases:
|
|||
.. c:function:: u64 ktime_get_mono_fast_ns( void )
|
||||
u64 ktime_get_raw_fast_ns( void )
|
||||
u64 ktime_get_boot_fast_ns( void )
|
||||
u64 ktime_get_tai_fast_ns( void )
|
||||
u64 ktime_get_real_fast_ns( void )
|
||||
|
||||
These variants are safe to call from any context, including from
|
||||
|
|
|
@ -177,6 +177,7 @@ static inline u64 ktime_get_raw_ns(void)
|
|||
extern u64 ktime_get_mono_fast_ns(void);
|
||||
extern u64 ktime_get_raw_fast_ns(void);
|
||||
extern u64 ktime_get_boot_fast_ns(void);
|
||||
extern u64 ktime_get_tai_fast_ns(void);
|
||||
extern u64 ktime_get_real_fast_ns(void);
|
||||
|
||||
/*
|
||||
|
|
|
@ -196,14 +196,6 @@ extern void init_timers(void);
|
|||
struct hrtimer;
|
||||
extern enum hrtimer_restart it_real_fn(struct hrtimer *);
|
||||
|
||||
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
|
||||
struct ctl_table;
|
||||
|
||||
extern unsigned int sysctl_timer_migration;
|
||||
int timer_migration_handler(struct ctl_table *table, int write,
|
||||
void *buffer, size_t *lenp, loff_t *ppos);
|
||||
#endif
|
||||
|
||||
unsigned long __round_jiffies(unsigned long j, int cpu);
|
||||
unsigned long __round_jiffies_relative(unsigned long j, int cpu);
|
||||
unsigned long round_jiffies(unsigned long j);
|
||||
|
|
|
@ -48,6 +48,7 @@ DEFINE_EVENT(timer_class, timer_init,
|
|||
* timer_start - called when the timer is started
|
||||
* @timer: pointer to struct timer_list
|
||||
* @expires: the timers expiry time
|
||||
* @flags: the timers flags
|
||||
*/
|
||||
TRACE_EVENT(timer_start,
|
||||
|
||||
|
@ -84,6 +85,7 @@ TRACE_EVENT(timer_start,
|
|||
/**
|
||||
* timer_expire_entry - called immediately before the timer callback
|
||||
* @timer: pointer to struct timer_list
|
||||
* @baseclk: value of timer_base::clk when timer expires
|
||||
*
|
||||
* Allows to determine the timer latency.
|
||||
*/
|
||||
|
@ -191,6 +193,7 @@ TRACE_EVENT(hrtimer_init,
|
|||
/**
|
||||
* hrtimer_start - called when the hrtimer is started
|
||||
* @hrtimer: pointer to struct hrtimer
|
||||
* @mode: the hrtimers mode
|
||||
*/
|
||||
TRACE_EVENT(hrtimer_start,
|
||||
|
||||
|
|
|
@ -2288,17 +2288,6 @@ static struct ctl_table kern_table[] = {
|
|||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_ONE,
|
||||
},
|
||||
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
|
||||
{
|
||||
.procname = "timer_migration",
|
||||
.data = &sysctl_timer_migration,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = timer_migration_handler,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_ONE,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
{
|
||||
.procname = "unprivileged_bpf_disabled",
|
||||
|
|
|
@ -690,7 +690,7 @@ static ssize_t unbind_device_store(struct device *dev,
|
|||
{
|
||||
char name[CS_NAME_LEN];
|
||||
ssize_t ret = sysfs_get_uname(buf, name, count);
|
||||
struct clock_event_device *ce;
|
||||
struct clock_event_device *ce = NULL, *iter;
|
||||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
@ -698,9 +698,10 @@ static ssize_t unbind_device_store(struct device *dev,
|
|||
ret = -ENODEV;
|
||||
mutex_lock(&clockevents_mutex);
|
||||
raw_spin_lock_irq(&clockevents_lock);
|
||||
list_for_each_entry(ce, &clockevent_devices, list) {
|
||||
if (!strcmp(ce->name, name)) {
|
||||
ret = __clockevents_try_unbind(ce, dev->id);
|
||||
list_for_each_entry(iter, &clockevent_devices, list) {
|
||||
if (!strcmp(iter->name, name)) {
|
||||
ret = __clockevents_try_unbind(iter, dev->id);
|
||||
ce = iter;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -343,7 +343,7 @@ void clocksource_verify_percpu(struct clocksource *cs)
|
|||
cpus_read_lock();
|
||||
preempt_disable();
|
||||
clocksource_verify_choose_cpus();
|
||||
if (cpumask_weight(&cpus_chosen) == 0) {
|
||||
if (cpumask_empty(&cpus_chosen)) {
|
||||
preempt_enable();
|
||||
cpus_read_unlock();
|
||||
pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name);
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <linux/jiffies.h>
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/math.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/clock.h>
|
||||
|
@ -199,16 +200,14 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
|
|||
|
||||
r = rate;
|
||||
if (r >= 4000000) {
|
||||
r /= 1000000;
|
||||
r = DIV_ROUND_CLOSEST(r, 1000000);
|
||||
r_unit = 'M';
|
||||
} else {
|
||||
if (r >= 1000) {
|
||||
r /= 1000;
|
||||
} else if (r >= 4000) {
|
||||
r = DIV_ROUND_CLOSEST(r, 1000);
|
||||
r_unit = 'k';
|
||||
} else {
|
||||
r_unit = ' ';
|
||||
}
|
||||
}
|
||||
|
||||
/* Calculate the ns resolution of this counter */
|
||||
res = cyc_to_ns(1ULL, new_mult, new_shift);
|
||||
|
|
|
@ -928,6 +928,8 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
|
|||
if (unlikely(expires == KTIME_MAX)) {
|
||||
if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
|
||||
hrtimer_cancel(&ts->sched_timer);
|
||||
else
|
||||
tick_program_event(KTIME_MAX, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1364,9 +1366,15 @@ static void tick_nohz_handler(struct clock_event_device *dev)
|
|||
tick_sched_do_timer(ts, now);
|
||||
tick_sched_handle(ts, regs);
|
||||
|
||||
/* No need to reprogram if we are running tickless */
|
||||
if (unlikely(ts->tick_stopped))
|
||||
if (unlikely(ts->tick_stopped)) {
|
||||
/*
|
||||
* The clockevent device is not reprogrammed, so change the
|
||||
* clock event device to ONESHOT_STOPPED to avoid spurious
|
||||
* interrupts on devices which might not be truly one shot.
|
||||
*/
|
||||
tick_program_event(KTIME_MAX, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
|
||||
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
|
||||
|
|
|
@ -429,6 +429,14 @@ static void update_fast_timekeeper(const struct tk_read_base *tkr,
|
|||
memcpy(base + 1, base, sizeof(*base));
|
||||
}
|
||||
|
||||
static __always_inline u64 fast_tk_get_delta_ns(struct tk_read_base *tkr)
|
||||
{
|
||||
u64 delta, cycles = tk_clock_read(tkr);
|
||||
|
||||
delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
|
||||
return timekeeping_delta_to_ns(tkr, delta);
|
||||
}
|
||||
|
||||
static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
|
||||
{
|
||||
struct tk_read_base *tkr;
|
||||
|
@ -439,12 +447,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
|
|||
seq = raw_read_seqcount_latch(&tkf->seq);
|
||||
tkr = tkf->base + (seq & 0x01);
|
||||
now = ktime_to_ns(tkr->base);
|
||||
|
||||
now += timekeeping_delta_to_ns(tkr,
|
||||
clocksource_delta(
|
||||
tk_clock_read(tkr),
|
||||
tkr->cycle_last,
|
||||
tkr->mask));
|
||||
now += fast_tk_get_delta_ns(tkr);
|
||||
} while (read_seqcount_latch_retry(&tkf->seq, seq));
|
||||
|
||||
return now;
|
||||
|
@ -528,10 +531,27 @@ u64 notrace ktime_get_boot_fast_ns(void)
|
|||
{
|
||||
struct timekeeper *tk = &tk_core.timekeeper;
|
||||
|
||||
return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot));
|
||||
return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_boot)));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
|
||||
|
||||
/**
|
||||
* ktime_get_tai_fast_ns - NMI safe and fast access to tai clock.
|
||||
*
|
||||
* The same limitations as described for ktime_get_boot_fast_ns() apply. The
|
||||
* mono time and the TAI offset are not read atomically which may yield wrong
|
||||
* readouts. However, an update of the TAI offset is an rare event e.g., caused
|
||||
* by settime or adjtimex with an offset. The user of this function has to deal
|
||||
* with the possibility of wrong timestamps in post processing.
|
||||
*/
|
||||
u64 notrace ktime_get_tai_fast_ns(void)
|
||||
{
|
||||
struct timekeeper *tk = &tk_core.timekeeper;
|
||||
|
||||
return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_tai)));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ktime_get_tai_fast_ns);
|
||||
|
||||
static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
|
||||
{
|
||||
struct tk_read_base *tkr;
|
||||
|
@ -543,10 +563,7 @@ static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
|
|||
tkr = tkf->base + (seq & 0x01);
|
||||
basem = ktime_to_ns(tkr->base);
|
||||
baser = ktime_to_ns(tkr->base_real);
|
||||
|
||||
delta = timekeeping_delta_to_ns(tkr,
|
||||
clocksource_delta(tk_clock_read(tkr),
|
||||
tkr->cycle_last, tkr->mask));
|
||||
delta = fast_tk_get_delta_ns(tkr);
|
||||
} while (read_seqcount_latch_retry(&tkf->seq, seq));
|
||||
|
||||
if (mono)
|
||||
|
|
|
@ -44,6 +44,7 @@
|
|||
#include <linux/slab.h>
|
||||
#include <linux/compat.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/sysctl.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/unistd.h>
|
||||
|
@ -223,7 +224,7 @@ static void timer_update_keys(struct work_struct *work);
|
|||
static DECLARE_WORK(timer_update_work, timer_update_keys);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
unsigned int sysctl_timer_migration = 1;
|
||||
static unsigned int sysctl_timer_migration = 1;
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(timers_migration_enabled);
|
||||
|
||||
|
@ -234,7 +235,42 @@ static void timers_update_migration(void)
|
|||
else
|
||||
static_branch_disable(&timers_migration_enabled);
|
||||
}
|
||||
#else
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
static int timer_migration_handler(struct ctl_table *table, int write,
|
||||
void *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&timer_keys_mutex);
|
||||
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
||||
if (!ret && write)
|
||||
timers_update_migration();
|
||||
mutex_unlock(&timer_keys_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct ctl_table timer_sysctl[] = {
|
||||
{
|
||||
.procname = "timer_migration",
|
||||
.data = &sysctl_timer_migration,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = timer_migration_handler,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_ONE,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static int __init timer_sysctl_init(void)
|
||||
{
|
||||
register_sysctl("kernel", timer_sysctl);
|
||||
return 0;
|
||||
}
|
||||
device_initcall(timer_sysctl_init);
|
||||
#endif /* CONFIG_SYSCTL */
|
||||
#else /* CONFIG_SMP */
|
||||
static inline void timers_update_migration(void) { }
|
||||
#endif /* !CONFIG_SMP */
|
||||
|
||||
|
@ -251,19 +287,6 @@ void timers_update_nohz(void)
|
|||
schedule_work(&timer_update_work);
|
||||
}
|
||||
|
||||
int timer_migration_handler(struct ctl_table *table, int write,
|
||||
void *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&timer_keys_mutex);
|
||||
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
||||
if (!ret && write)
|
||||
timers_update_migration();
|
||||
mutex_unlock(&timer_keys_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool is_timers_nohz_active(void)
|
||||
{
|
||||
return static_branch_unlikely(&timers_nohz_active);
|
||||
|
@ -502,7 +525,7 @@ static inline unsigned calc_index(unsigned long expires, unsigned lvl,
|
|||
*
|
||||
* Round up with level granularity to prevent this.
|
||||
*/
|
||||
expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl);
|
||||
expires = (expires >> LVL_SHIFT(lvl)) + 1;
|
||||
*bucket_expiry = expires << LVL_SHIFT(lvl);
|
||||
return LVL_OFFS(lvl) + (expires & LVL_MASK);
|
||||
}
|
||||
|
@ -615,9 +638,39 @@ static void internal_add_timer(struct timer_base *base, struct timer_list *timer
|
|||
|
||||
static const struct debug_obj_descr timer_debug_descr;
|
||||
|
||||
struct timer_hint {
|
||||
void (*function)(struct timer_list *t);
|
||||
long offset;
|
||||
};
|
||||
|
||||
#define TIMER_HINT(fn, container, timr, hintfn) \
|
||||
{ \
|
||||
.function = fn, \
|
||||
.offset = offsetof(container, hintfn) - \
|
||||
offsetof(container, timr) \
|
||||
}
|
||||
|
||||
static const struct timer_hint timer_hints[] = {
|
||||
TIMER_HINT(delayed_work_timer_fn,
|
||||
struct delayed_work, timer, work.func),
|
||||
TIMER_HINT(kthread_delayed_work_timer_fn,
|
||||
struct kthread_delayed_work, timer, work.func),
|
||||
};
|
||||
|
||||
static void *timer_debug_hint(void *addr)
|
||||
{
|
||||
return ((struct timer_list *) addr)->function;
|
||||
struct timer_list *timer = addr;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(timer_hints); i++) {
|
||||
if (timer_hints[i].function == timer->function) {
|
||||
void (**fn)(void) = addr + timer_hints[i].offset;
|
||||
|
||||
return *fn;
|
||||
}
|
||||
}
|
||||
|
||||
return timer->function;
|
||||
}
|
||||
|
||||
static bool timer_is_static_object(void *addr)
|
||||
|
@ -1953,6 +2006,7 @@ int timers_prepare_cpu(unsigned int cpu)
|
|||
base = per_cpu_ptr(&timer_bases[b], cpu);
|
||||
base->clk = jiffies;
|
||||
base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
|
||||
base->next_expiry_recalc = false;
|
||||
base->timers_pending = false;
|
||||
base->is_idle = false;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue