From 2731aa7d65dbb31c6dad14347c37d522bb3bc7c6 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Tue, 5 Apr 2022 21:17:31 +0200 Subject: [PATCH 01/14] timers: Initialize base::next_expiry_recalc in timers_prepare_cpu() When base::next_expiry_recalc is not initialized to false during cpu bringup in HOTPLUG_CPU and is accidently true and no timer is queued in the meantime, the loop through the wheel to find __next_timer_interrupt() might be done for nothing. Therefore initialize base::next_expiry_recalc to false in timers_prepare_cpu(). Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20220405191732.7438-2-anna-maria@linutronix.de --- kernel/time/timer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 9dd2a39cb3b0..204d6cd83d0e 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1953,6 +1953,7 @@ int timers_prepare_cpu(unsigned int cpu) base = per_cpu_ptr(&timer_bases[b], cpu); base->clk = jiffies; base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA; + base->next_expiry_recalc = false; base->timers_pending = false; base->is_idle = false; } From a2026e44eff5d74a83d7ffee6325a007bef85385 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Apr 2022 16:47:55 +0200 Subject: [PATCH 02/14] timers: Simplify calc_index() The level granularity round up of calc_index() does: (x + (1 << n)) >> n which is obviously equivalent to (x >> n) + 1 but compilers can't figure that out despite the fact that the input range is known to not cause an overflow. It's neither intuitive to read. Just write out the obvious. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/87h778j46c.ffs@tglx --- kernel/time/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 204d6cd83d0e..60aebf2b7f0a 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -502,7 +502,7 @@ static inline unsigned calc_index(unsigned long expires, unsigned lvl, * * Round up with level granularity to prevent this. */ - expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl); + expires = (expires >> LVL_SHIFT(lvl)) + 1; *bucket_expiry = expires << LVL_SHIFT(lvl); return LVL_OFFS(lvl) + (expires & LVL_MASK); } From 2966a9918dfab183a1cb6be6794981ebe2abff83 Mon Sep 17 00:00:00 2001 From: Jakob Koschel Date: Thu, 31 Mar 2022 23:57:07 +0200 Subject: [PATCH 03/14] clockevents: Use dedicated list iterator variable To move the list iterator variable into the list_for_each_entry_*() macro in the future it should be avoided to use the list iterator variable after the loop body. To *never* use the list iterator variable after the loop it was concluded to use a separate iterator variable. Signed-off-by: Jakob Koschel Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/CAHk-=wgRr_D8CB-D9Kg-c=EHreAsk5SqXPwr9Y7k9sA6cWXJ6w@mail.gmail.com/ Link: https://lore.kernel.org/r/20220331215707.883957-1-jakobkoschel@gmail.com --- kernel/time/clockevents.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 003ccf338d20..5d85014d59b5 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -690,7 +690,7 @@ static ssize_t unbind_device_store(struct device *dev, { char name[CS_NAME_LEN]; ssize_t ret = sysfs_get_uname(buf, name, count); - struct clock_event_device *ce; + struct clock_event_device *ce = NULL, *iter; if (ret < 0) return ret; @@ -698,9 +698,10 @@ static ssize_t unbind_device_store(struct device *dev, ret = -ENODEV; mutex_lock(&clockevents_mutex); raw_spin_lock_irq(&clockevents_lock); - list_for_each_entry(ce, &clockevent_devices, list) { - if (!strcmp(ce->name, name)) { - ret = __clockevents_try_unbind(ce, dev->id); + list_for_each_entry(iter, &clockevent_devices, list) { + if (!strcmp(iter->name, name)) { + ret = __clockevents_try_unbind(iter, dev->id); + ce = iter; break; } } From efaa0227f6c6a5073951b20cf2f8c63c4155306c Mon Sep 17 00:00:00 2001 From: tangmeng Date: Tue, 15 Feb 2022 14:50:19 +0800 Subject: [PATCH 04/14] timers: Move timer sysctl into the timer code This is part of the effort to reduce kernel/sysctl.c to only contain the core logic. Signed-off-by: tangmeng Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220215065019.7520-1-tangmeng@uniontech.com --- include/linux/timer.h | 8 ------- kernel/sysctl.c | 11 --------- kernel/time/timer.c | 53 +++++++++++++++++++++++++++++++------------ 3 files changed, 38 insertions(+), 34 deletions(-) diff --git a/include/linux/timer.h b/include/linux/timer.h index fda13c9d1256..648f00105f58 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -196,14 +196,6 @@ extern void init_timers(void); struct hrtimer; extern enum hrtimer_restart it_real_fn(struct hrtimer *); -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -struct ctl_table; - -extern unsigned int sysctl_timer_migration; -int timer_migration_handler(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); -#endif - unsigned long __round_jiffies(unsigned long j, int cpu); unsigned long __round_jiffies_relative(unsigned long j, int cpu); unsigned long round_jiffies(unsigned long j); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 830aaf8ca08e..5b7b1a82ae6a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2288,17 +2288,6 @@ static struct ctl_table kern_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) - { - .procname = "timer_migration", - .data = &sysctl_timer_migration, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = timer_migration_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif #ifdef CONFIG_BPF_SYSCALL { .procname = "unprivileged_bpf_disabled", diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 60aebf2b7f0a..ef082d43c307 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -223,7 +224,7 @@ static void timer_update_keys(struct work_struct *work); static DECLARE_WORK(timer_update_work, timer_update_keys); #ifdef CONFIG_SMP -unsigned int sysctl_timer_migration = 1; +static unsigned int sysctl_timer_migration = 1; DEFINE_STATIC_KEY_FALSE(timers_migration_enabled); @@ -234,7 +235,42 @@ static void timers_update_migration(void) else static_branch_disable(&timers_migration_enabled); } -#else + +#ifdef CONFIG_SYSCTL +static int timer_migration_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int ret; + + mutex_lock(&timer_keys_mutex); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (!ret && write) + timers_update_migration(); + mutex_unlock(&timer_keys_mutex); + return ret; +} + +static struct ctl_table timer_sysctl[] = { + { + .procname = "timer_migration", + .data = &sysctl_timer_migration, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = timer_migration_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + {} +}; + +static int __init timer_sysctl_init(void) +{ + register_sysctl("kernel", timer_sysctl); + return 0; +} +device_initcall(timer_sysctl_init); +#endif /* CONFIG_SYSCTL */ +#else /* CONFIG_SMP */ static inline void timers_update_migration(void) { } #endif /* !CONFIG_SMP */ @@ -251,19 +287,6 @@ void timers_update_nohz(void) schedule_work(&timer_update_work); } -int timer_migration_handler(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - int ret; - - mutex_lock(&timer_keys_mutex); - ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - if (!ret && write) - timers_update_migration(); - mutex_unlock(&timer_keys_mutex); - return ret; -} - static inline bool is_timers_nohz_active(void) { return static_branch_unlikely(&timers_nohz_active); From 8afbcaf8690dac19ebf570a4e4fef9c59c75bf8e Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 10 Feb 2022 14:49:07 -0800 Subject: [PATCH 05/14] clocksource: Replace cpumask_weight() with cpumask_empty() clocksource_verify_percpu() calls cpumask_weight() to check if any bit of a given cpumask is set. This can be done more efficiently with cpumask_empty() because cpumask_empty() stops traversing the cpumask as soon as it finds first set bit, while cpumask_weight() counts all bits unconditionally. Signed-off-by: Yury Norov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220210224933.379149-24-yury.norov@gmail.com --- kernel/time/clocksource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 95d7ca35bdf2..cee5da1e54c4 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -343,7 +343,7 @@ void clocksource_verify_percpu(struct clocksource *cs) cpus_read_lock(); preempt_disable(); clocksource_verify_choose_cpus(); - if (cpumask_weight(&cpus_chosen) == 0) { + if (cpumask_empty(&cpus_chosen)) { preempt_enable(); cpus_read_unlock(); pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name); From fde33ca4cb2bae6472714123b451475fdfd9995c Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 11 Apr 2022 16:01:15 +0200 Subject: [PATCH 06/14] tracing/timer: Add missing argument documentation of trace points Documentation of trace points timer_start, timer_expire_entry and hrtimer_start lack always the last argument. Add it to keep implementation and documentation in sync. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Acked-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20220411140115.24185-1-anna-maria@linutronix.de --- include/trace/events/timer.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 6ad031c71be7..2e713a7d9aa3 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -48,6 +48,7 @@ DEFINE_EVENT(timer_class, timer_init, * timer_start - called when the timer is started * @timer: pointer to struct timer_list * @expires: the timers expiry time + * @flags: the timers flags */ TRACE_EVENT(timer_start, @@ -84,6 +85,7 @@ TRACE_EVENT(timer_start, /** * timer_expire_entry - called immediately before the timer callback * @timer: pointer to struct timer_list + * @baseclk: value of timer_base::clk when timer expires * * Allows to determine the timer latency. */ @@ -190,7 +192,8 @@ TRACE_EVENT(hrtimer_init, /** * hrtimer_start - called when the hrtimer is started - * @hrtimer: pointer to struct hrtimer + * @hrtimer: pointer to struct hrtimer + * @mode: the hrtimers mode */ TRACE_EVENT(hrtimer_start, From 3dc6ffae2da201284cb24af66af77ee0bbb2efaa Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Thu, 14 Apr 2022 11:18:03 +0200 Subject: [PATCH 07/14] timekeeping: Introduce fast accessor to clock tai Introduce fast/NMI safe accessor to clock tai for tracing. The Linux kernel tracing infrastructure has support for using different clocks to generate timestamps for trace events. Especially in TSN networks it's useful to have TAI as trace clock, because the application scheduling is done in accordance to the network time, which is based on TAI. With a tai trace_clock in place, it becomes very convenient to correlate network activity with Linux kernel application traces. Use the same implementation as ktime_get_boot_fast_ns() does by reading the monotonic time and adding the TAI offset. The same limitations as for the fast boot implementation apply. The TAI offset may change at run time e.g., by setting the time or using adjtimex() with an offset. However, these kind of offset changes are rare events. Nevertheless, the user has to be aware and deal with it in post processing. An alternative approach would be to use the same implementation as ktime_get_real_fast_ns() does. However, this requires to add an additional u64 member to the tk_read_base struct. This struct together with a seqcount is designed to fit into a single cache line on 64 bit architectures. Adding a new member would violate this constraint. Signed-off-by: Kurt Kanzenbach Signed-off-by: Thomas Gleixner Cc: Steven Rostedt Link: https://lore.kernel.org/r/20220414091805.89667-2-kurt@linutronix.de --- Documentation/core-api/timekeeping.rst | 1 + include/linux/timekeeping.h | 1 + kernel/time/timekeeping.c | 17 +++++++++++++++++ 3 files changed, 19 insertions(+) diff --git a/Documentation/core-api/timekeeping.rst b/Documentation/core-api/timekeeping.rst index 729e24864fe7..22ec68f24421 100644 --- a/Documentation/core-api/timekeeping.rst +++ b/Documentation/core-api/timekeeping.rst @@ -132,6 +132,7 @@ Some additional variants exist for more specialized cases: .. c:function:: u64 ktime_get_mono_fast_ns( void ) u64 ktime_get_raw_fast_ns( void ) u64 ktime_get_boot_fast_ns( void ) + u64 ktime_get_tai_fast_ns( void ) u64 ktime_get_real_fast_ns( void ) These variants are safe to call from any context, including from diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 78a98bdff76d..fe1e467ba046 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -177,6 +177,7 @@ static inline u64 ktime_get_raw_ns(void) extern u64 ktime_get_mono_fast_ns(void); extern u64 ktime_get_raw_fast_ns(void); extern u64 ktime_get_boot_fast_ns(void); +extern u64 ktime_get_tai_fast_ns(void); extern u64 ktime_get_real_fast_ns(void); /* diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index dcdcb85121e4..2c22023fbf5f 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -532,6 +532,23 @@ u64 notrace ktime_get_boot_fast_ns(void) } EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns); +/** + * ktime_get_tai_fast_ns - NMI safe and fast access to tai clock. + * + * The same limitations as described for ktime_get_boot_fast_ns() apply. The + * mono time and the TAI offset are not read atomically which may yield wrong + * readouts. However, an update of the TAI offset is an rare event e.g., caused + * by settime or adjtimex with an offset. The user of this function has to deal + * with the possibility of wrong timestamps in post processing. + */ +u64 notrace ktime_get_tai_fast_ns(void) +{ + struct timekeeper *tk = &tk_core.timekeeper; + + return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_tai))); +} +EXPORT_SYMBOL_GPL(ktime_get_tai_fast_ns); + static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono) { struct tk_read_base *tkr; From 62c1256d544747b38e77ca9b5bfe3a26f9592576 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 23 Apr 2022 00:14:46 +1000 Subject: [PATCH 08/14] timers/nohz: Switch to ONESHOT_STOPPED in the low-res handler when the tick is stopped When tick_nohz_stop_tick() stops the tick and high resolution timers are disabled, then the clock event device is not put into ONESHOT_STOPPED mode. This can lead to spurious timer interrupts with some clock event device drivers that don't shut down entirely after firing. Eliminate these by putting the device into ONESHOT_STOPPED mode at points where it is not being reprogrammed. When there are no timers active, then tick_program_event() with KTIME_MAX can be used to stop the device. When there is a timer active, the device can be stopped at the next tick (any new timer added by timers will reprogram the tick). Signed-off-by: Nicholas Piggin Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220422141446.915024-1-npiggin@gmail.com --- kernel/time/tick-sched.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 2d76c91b85de..b1b105db31eb 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -928,6 +928,8 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) if (unlikely(expires == KTIME_MAX)) { if (ts->nohz_mode == NOHZ_MODE_HIGHRES) hrtimer_cancel(&ts->sched_timer); + else + tick_program_event(KTIME_MAX, 1); return; } @@ -1364,9 +1366,15 @@ static void tick_nohz_handler(struct clock_event_device *dev) tick_sched_do_timer(ts, now); tick_sched_handle(ts, regs); - /* No need to reprogram if we are running tickless */ - if (unlikely(ts->tick_stopped)) + if (unlikely(ts->tick_stopped)) { + /* + * The clockevent device is not reprogrammed, so change the + * clock event device to ONESHOT_STOPPED to avoid spurious + * interrupts on devices which might not be truly one shot. + */ + tick_program_event(KTIME_MAX, 1); return; + } hrtimer_forward(&ts->sched_timer, now, TICK_NSEC); tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); From eff4849f928f2b90402907e06a6de1619cf16b1a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Apr 2022 11:19:35 +0200 Subject: [PATCH 09/14] timekeeping: Annotate ktime_get_boot_fast_ns() with data_race() Accessing timekeeper::offset_boot in ktime_get_boot_fast_ns() is an intended data race as the reader side cannot synchronize with a writer and there is no space in struct tk_read_base of the NMI safe timekeeper. Mark it so. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220415091920.956045162@linutronix.de --- kernel/time/timekeeping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 2c22023fbf5f..3479804ed5e6 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -528,7 +528,7 @@ u64 notrace ktime_get_boot_fast_ns(void) { struct timekeeper *tk = &tk_core.timekeeper; - return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot)); + return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_boot))); } EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns); From 90be8d6c1f91e1e5121c219726524c91b52bfc20 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Apr 2022 11:19:38 +0200 Subject: [PATCH 10/14] timekeeping: Consolidate fast timekeeper Provide a inline function which replaces the copy & pasta. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220415091921.072296632@linutronix.de --- kernel/time/timekeeping.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3479804ed5e6..8895ff20f82d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -429,6 +429,14 @@ static void update_fast_timekeeper(const struct tk_read_base *tkr, memcpy(base + 1, base, sizeof(*base)); } +static __always_inline u64 fast_tk_get_delta_ns(struct tk_read_base *tkr) +{ + u64 delta, cycles = tk_clock_read(tkr); + + delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask); + return timekeeping_delta_to_ns(tkr, delta); +} + static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) { struct tk_read_base *tkr; @@ -439,12 +447,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) seq = raw_read_seqcount_latch(&tkf->seq); tkr = tkf->base + (seq & 0x01); now = ktime_to_ns(tkr->base); - - now += timekeeping_delta_to_ns(tkr, - clocksource_delta( - tk_clock_read(tkr), - tkr->cycle_last, - tkr->mask)); + now += fast_tk_get_delta_ns(tkr); } while (read_seqcount_latch_retry(&tkf->seq, seq)); return now; @@ -560,10 +563,7 @@ static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono) tkr = tkf->base + (seq & 0x01); basem = ktime_to_ns(tkr->base); baser = ktime_to_ns(tkr->base_real); - - delta = timekeeping_delta_to_ns(tkr, - clocksource_delta(tk_clock_read(tkr), - tkr->cycle_last, tkr->mask)); + delta = fast_tk_get_delta_ns(tkr); } while (read_seqcount_latch_retry(&tkf->seq, seq)); if (mono) From 92067440f1311dfa4d77b57a9da6b3706f5da32e Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sun, 24 Apr 2022 12:47:20 +0100 Subject: [PATCH 11/14] time/sched_clock: Round the frequency reported to nearest rather than down The frequency reported for clock sources are rounded down, which gives misleading figures, e.g.: I/O ASIC clock frequency 24999480Hz sched_clock: 32 bits at 24MHz, resolution 40ns, wraps every 85901132779ns MIPS counter frequency 59998512Hz sched_clock: 32 bits at 59MHz, resolution 16ns, wraps every 35792281591ns Rounding to nearest is more adequate: I/O ASIC clock frequency 24999664Hz sched_clock: 32 bits at 25MHz, resolution 40ns, wraps every 85900499947ns MIPS counter frequency 59999728Hz sched_clock: 32 bits at 60MHz, resolution 16ns, wraps every 35791556599ns Signed-off-by: Maciej W. Rozycki Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/r/alpine.DEB.2.21.2204240055590.9383@angie.orcam.me.uk --- kernel/time/sched_clock.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index b1b9b12899f5..ee07f3ac1e5b 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -199,11 +200,11 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) r = rate; if (r >= 4000000) { - r /= 1000000; + r = DIV_ROUND_CLOSEST(r, 1000000); r_unit = 'M'; } else { if (r >= 1000) { - r /= 1000; + r = DIV_ROUND_CLOSEST(r, 1000); r_unit = 'k'; } else { r_unit = ' '; From cc1b923a4e378c943386e7fe6205918d43e5fede Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sun, 24 Apr 2022 12:47:26 +0100 Subject: [PATCH 12/14] time/sched_clock: Use Hz as the unit for clock rate reporting below 4kHz The kernel uses kHz as the unit for clock rates reported between 1MHz (inclusive) and 4MHz (exclusive), e.g.: sched_clock: 64 bits at 1000kHz, resolution 1000ns, wraps every 2199023255500ns This reduces the amount of data lost due to rounding, but hasn't been replicated for the kHz range when support was added for proper reporting of sub-kHz clock rates. Take the same approach for rates between 1kHz (inclusive) and 4kHz (exclusive), which makes it consistent. Signed-off-by: Maciej W. Rozycki Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/alpine.DEB.2.21.2204240106380.9383@angie.orcam.me.uk --- kernel/time/sched_clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index ee07f3ac1e5b..4a95c0be9daf 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -203,7 +203,7 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) r = DIV_ROUND_CLOSEST(r, 1000000); r_unit = 'M'; } else { - if (r >= 1000) { + if (r >= 4000) { r = DIV_ROUND_CLOSEST(r, 1000); r_unit = 'k'; } else { From f4b62e1e1137507268c2c63dc4e6da279dc58e9f Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sun, 24 Apr 2022 12:47:30 +0100 Subject: [PATCH 13/14] time/sched_clock: Fix formatting of frequency reporting code Use flat rather than nested indentation for chained else/if clauses as per coding-style.rst: if (x == y) { .. } else if (x > y) { ... } else { .... } This also improves readability. Signed-off-by: Maciej W. Rozycki Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/r/alpine.DEB.2.21.2204240148220.9383@angie.orcam.me.uk --- kernel/time/sched_clock.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 4a95c0be9daf..8464c5acc913 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -202,13 +202,11 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) if (r >= 4000000) { r = DIV_ROUND_CLOSEST(r, 1000000); r_unit = 'M'; + } else if (r >= 4000) { + r = DIV_ROUND_CLOSEST(r, 1000); + r_unit = 'k'; } else { - if (r >= 4000) { - r = DIV_ROUND_CLOSEST(r, 1000); - r_unit = 'k'; - } else { - r_unit = ' '; - } + r_unit = ' '; } /* Calculate the ns resolution of this counter */ From 317f29c14d0cca09952f1022491454b23455ebcb Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 11 May 2022 13:19:51 -0700 Subject: [PATCH 14/14] timers: Provide a better debugobjects hint for delayed works With debugobjects enabled the timer hint for freeing of active timers embedded inside delayed works is always the same, i.e. the hint is delayed_work_timer_fn, even though the function the delayed work is going to run can be wildly different depending on what work was queued. Enabling workqueue debugobjects doesn't help either because the delayed work isn't considered active until it is actually queued to run on a workqueue. If the work is freed while the timer is pending the work isn't considered active so there is no information from workqueue debugobjects. Special case delayed works in the timer debugobjects hint logic so that the delayed work function is returned instead of the delayed_work_timer_fn. This will help to understand which delayed work was pending that got freed. Apply the same treatment for kthread_delayed_work because it follows the same pattern. Suggested-by: Thomas Gleixner Signed-off-by: Stephen Boyd Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220511201951.42408-1-swboyd@chromium.org --- kernel/time/timer.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index ef082d43c307..a0666d948147 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -638,9 +638,39 @@ static void internal_add_timer(struct timer_base *base, struct timer_list *timer static const struct debug_obj_descr timer_debug_descr; +struct timer_hint { + void (*function)(struct timer_list *t); + long offset; +}; + +#define TIMER_HINT(fn, container, timr, hintfn) \ + { \ + .function = fn, \ + .offset = offsetof(container, hintfn) - \ + offsetof(container, timr) \ + } + +static const struct timer_hint timer_hints[] = { + TIMER_HINT(delayed_work_timer_fn, + struct delayed_work, timer, work.func), + TIMER_HINT(kthread_delayed_work_timer_fn, + struct kthread_delayed_work, timer, work.func), +}; + static void *timer_debug_hint(void *addr) { - return ((struct timer_list *) addr)->function; + struct timer_list *timer = addr; + int i; + + for (i = 0; i < ARRAY_SIZE(timer_hints); i++) { + if (timer_hints[i].function == timer->function) { + void (**fn)(void) = addr + timer_hints[i].offset; + + return *fn; + } + } + + return timer->function; } static bool timer_is_static_object(void *addr)