2007-10-11 17:17:24 +08:00
|
|
|
/*
|
2007-10-13 09:10:53 +08:00
|
|
|
* check TSC synchronization.
|
2007-10-11 17:17:24 +08:00
|
|
|
*
|
|
|
|
* Copyright (C) 2006, Red Hat, Inc., Ingo Molnar
|
|
|
|
*
|
|
|
|
* We check whether all boot CPUs have their TSC's synchronized,
|
|
|
|
* print a warning if not and turn off the TSC clock-source.
|
|
|
|
*
|
|
|
|
* The warp-check is point-to-point between two CPUs, the CPU
|
|
|
|
* initiating the bootup is the 'source CPU', the freshly booting
|
|
|
|
* CPU is the 'target CPU'.
|
|
|
|
*
|
|
|
|
* Only two CPUs may participate - they can enter in any order.
|
|
|
|
* ( The serial nature of the boot logic and the CPU hotplug lock
|
|
|
|
* protects against more than 2 CPUs entering this code. )
|
|
|
|
*/
|
2016-11-19 21:47:36 +08:00
|
|
|
#include <linux/topology.h>
|
2007-10-11 17:17:24 +08:00
|
|
|
#include <linux/spinlock.h>
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/smp.h>
|
|
|
|
#include <linux/nmi.h>
|
|
|
|
#include <asm/tsc.h>
|
|
|
|
|
2016-11-19 21:47:36 +08:00
|
|
|
struct tsc_adjust {
|
2016-11-19 21:47:37 +08:00
|
|
|
s64 bootval;
|
|
|
|
s64 adjusted;
|
|
|
|
unsigned long nextcheck;
|
|
|
|
bool warned;
|
2016-11-19 21:47:36 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static DEFINE_PER_CPU(struct tsc_adjust, tsc_adjust);
|
|
|
|
|
2017-10-13 00:32:02 +08:00
|
|
|
/*
|
|
|
|
* TSC's on different sockets may be reset asynchronously.
|
|
|
|
* This may cause the TSC ADJUST value on socket 0 to be NOT 0.
|
|
|
|
*/
|
|
|
|
bool __read_mostly tsc_async_resets;
|
|
|
|
|
|
|
|
void mark_tsc_async_resets(char *reason)
|
|
|
|
{
|
|
|
|
if (tsc_async_resets)
|
|
|
|
return;
|
|
|
|
tsc_async_resets = true;
|
|
|
|
pr_info("tsc: Marking TSC async resets true due to %s\n", reason);
|
|
|
|
}
|
|
|
|
|
2016-12-13 21:14:17 +08:00
|
|
|
void tsc_verify_tsc_adjust(bool resume)
|
2016-11-19 21:47:37 +08:00
|
|
|
{
|
|
|
|
struct tsc_adjust *adj = this_cpu_ptr(&tsc_adjust);
|
|
|
|
s64 curval;
|
|
|
|
|
|
|
|
if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
|
|
|
|
return;
|
|
|
|
|
2017-10-13 00:32:03 +08:00
|
|
|
/* Skip unnecessary error messages if TSC already unstable */
|
|
|
|
if (check_tsc_unstable())
|
|
|
|
return;
|
|
|
|
|
2016-11-19 21:47:37 +08:00
|
|
|
/* Rate limit the MSR check */
|
2016-12-13 21:14:17 +08:00
|
|
|
if (!resume && time_before(jiffies, adj->nextcheck))
|
2016-11-19 21:47:37 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
adj->nextcheck = jiffies + HZ;
|
|
|
|
|
|
|
|
rdmsrl(MSR_IA32_TSC_ADJUST, curval);
|
|
|
|
if (adj->adjusted == curval)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* Restore the original value */
|
|
|
|
wrmsrl(MSR_IA32_TSC_ADJUST, adj->adjusted);
|
|
|
|
|
2016-12-13 21:14:17 +08:00
|
|
|
if (!adj->warned || resume) {
|
2016-11-19 21:47:37 +08:00
|
|
|
pr_warn(FW_BUG "TSC ADJUST differs: CPU%u %lld --> %lld. Restoring\n",
|
|
|
|
smp_processor_id(), adj->adjusted, curval);
|
|
|
|
adj->warned = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-13 21:14:17 +08:00
|
|
|
static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval,
|
|
|
|
unsigned int cpu, bool bootcpu)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* First online CPU in a package stores the boot value in the
|
|
|
|
* adjustment value. This value might change later via the sync
|
|
|
|
* mechanism. If that fails we still can yell about boot values not
|
|
|
|
* being consistent.
|
|
|
|
*
|
|
|
|
* On the boot cpu we just force set the ADJUST value to 0 if it's
|
|
|
|
* non zero. We don't do that on non boot cpus because physical
|
|
|
|
* hotplug should have set the ADJUST register to a value > 0 so
|
|
|
|
* the TSC is in sync with the already running cpus.
|
2017-10-13 00:32:02 +08:00
|
|
|
*
|
|
|
|
* Also don't force the ADJUST value to zero if that is a valid value
|
|
|
|
* for socket 0 as determined by the system arch. This is required
|
|
|
|
* when multiple sockets are reset asynchronously with each other
|
|
|
|
* and socket 0 may not have an TSC ADJUST value of 0.
|
2016-12-13 21:14:17 +08:00
|
|
|
*/
|
2017-05-31 23:52:04 +08:00
|
|
|
if (bootcpu && bootval != 0) {
|
2017-10-13 00:32:02 +08:00
|
|
|
if (likely(!tsc_async_resets)) {
|
|
|
|
pr_warn(FW_BUG "TSC ADJUST: CPU%u: %lld force to 0\n",
|
|
|
|
cpu, bootval);
|
|
|
|
wrmsrl(MSR_IA32_TSC_ADJUST, 0);
|
|
|
|
bootval = 0;
|
|
|
|
} else {
|
|
|
|
pr_info("TSC ADJUST: CPU%u: %lld NOT forced to 0\n",
|
|
|
|
cpu, bootval);
|
|
|
|
}
|
2016-12-13 21:14:17 +08:00
|
|
|
}
|
|
|
|
cur->adjusted = bootval;
|
|
|
|
}
|
|
|
|
|
2016-11-19 21:47:36 +08:00
|
|
|
#ifndef CONFIG_SMP
|
2016-12-13 21:14:17 +08:00
|
|
|
bool __init tsc_store_and_check_tsc_adjust(bool bootcpu)
|
2016-11-19 21:47:36 +08:00
|
|
|
{
|
2016-11-30 03:28:31 +08:00
|
|
|
struct tsc_adjust *cur = this_cpu_ptr(&tsc_adjust);
|
2016-11-19 21:47:36 +08:00
|
|
|
s64 bootval;
|
|
|
|
|
|
|
|
if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
|
2016-11-19 21:47:39 +08:00
|
|
|
return false;
|
2016-11-19 21:47:36 +08:00
|
|
|
|
2017-10-13 00:32:03 +08:00
|
|
|
/* Skip unnecessary error messages if TSC already unstable */
|
|
|
|
if (check_tsc_unstable())
|
|
|
|
return false;
|
|
|
|
|
2016-11-19 21:47:36 +08:00
|
|
|
rdmsrl(MSR_IA32_TSC_ADJUST, bootval);
|
|
|
|
cur->bootval = bootval;
|
2016-11-19 21:47:37 +08:00
|
|
|
cur->nextcheck = jiffies + HZ;
|
2016-12-13 21:14:17 +08:00
|
|
|
tsc_sanitize_first_cpu(cur, bootval, smp_processor_id(), bootcpu);
|
2016-11-19 21:47:39 +08:00
|
|
|
return false;
|
2016-11-19 21:47:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
#else /* !CONFIG_SMP */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Store and check the TSC ADJUST MSR if available
|
|
|
|
*/
|
2016-12-13 21:14:17 +08:00
|
|
|
bool tsc_store_and_check_tsc_adjust(bool bootcpu)
|
2016-11-19 21:47:36 +08:00
|
|
|
{
|
|
|
|
struct tsc_adjust *ref, *cur = this_cpu_ptr(&tsc_adjust);
|
|
|
|
unsigned int refcpu, cpu = smp_processor_id();
|
2016-12-01 20:26:58 +08:00
|
|
|
struct cpumask *mask;
|
2016-11-19 21:47:36 +08:00
|
|
|
s64 bootval;
|
|
|
|
|
|
|
|
if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
|
2016-11-19 21:47:39 +08:00
|
|
|
return false;
|
2016-11-19 21:47:36 +08:00
|
|
|
|
|
|
|
rdmsrl(MSR_IA32_TSC_ADJUST, bootval);
|
|
|
|
cur->bootval = bootval;
|
2016-11-19 21:47:37 +08:00
|
|
|
cur->nextcheck = jiffies + HZ;
|
|
|
|
cur->warned = false;
|
2016-11-19 21:47:36 +08:00
|
|
|
|
2017-10-13 00:32:02 +08:00
|
|
|
/*
|
|
|
|
* If a non-zero TSC value for socket 0 may be valid then the default
|
|
|
|
* adjusted value cannot assumed to be zero either.
|
|
|
|
*/
|
|
|
|
if (tsc_async_resets)
|
|
|
|
cur->adjusted = bootval;
|
|
|
|
|
2016-11-19 21:47:36 +08:00
|
|
|
/*
|
|
|
|
* Check whether this CPU is the first in a package to come up. In
|
|
|
|
* this case do not check the boot value against another package
|
2016-12-13 21:14:17 +08:00
|
|
|
* because the new package might have been physically hotplugged,
|
|
|
|
* where TSC_ADJUST is expected to be different. When called on the
|
|
|
|
* boot CPU topology_core_cpumask() might not be available yet.
|
2016-11-19 21:47:36 +08:00
|
|
|
*/
|
2016-12-01 20:26:58 +08:00
|
|
|
mask = topology_core_cpumask(cpu);
|
|
|
|
refcpu = mask ? cpumask_any_but(mask, cpu) : nr_cpu_ids;
|
2016-11-19 21:47:36 +08:00
|
|
|
|
|
|
|
if (refcpu >= nr_cpu_ids) {
|
2016-12-13 21:14:17 +08:00
|
|
|
tsc_sanitize_first_cpu(cur, bootval, smp_processor_id(),
|
|
|
|
bootcpu);
|
2016-11-19 21:47:39 +08:00
|
|
|
return false;
|
2016-11-19 21:47:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
ref = per_cpu_ptr(&tsc_adjust, refcpu);
|
|
|
|
/*
|
|
|
|
* Compare the boot value and complain if it differs in the
|
|
|
|
* package.
|
|
|
|
*/
|
|
|
|
if (bootval != ref->bootval) {
|
2016-12-18 22:06:27 +08:00
|
|
|
pr_warn(FW_BUG "TSC ADJUST differs: Reference CPU%u: %lld CPU%u: %lld\n",
|
2016-11-19 21:47:36 +08:00
|
|
|
refcpu, ref->bootval, cpu, bootval);
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* The TSC_ADJUST values in a package must be the same. If the boot
|
|
|
|
* value on this newly upcoming CPU differs from the adjustment
|
|
|
|
* value of the already online CPU in this package, set it to that
|
|
|
|
* adjusted value.
|
|
|
|
*/
|
|
|
|
if (bootval != ref->adjusted) {
|
|
|
|
pr_warn("TSC ADJUST synchronize: Reference CPU%u: %lld CPU%u: %lld\n",
|
|
|
|
refcpu, ref->adjusted, cpu, bootval);
|
|
|
|
cur->adjusted = ref->adjusted;
|
|
|
|
wrmsrl(MSR_IA32_TSC_ADJUST, ref->adjusted);
|
|
|
|
}
|
2016-11-19 21:47:39 +08:00
|
|
|
/*
|
|
|
|
* We have the TSCs forced to be in sync on this package. Skip sync
|
|
|
|
* test:
|
|
|
|
*/
|
|
|
|
return true;
|
2016-11-19 21:47:36 +08:00
|
|
|
}
|
|
|
|
|
2007-10-11 17:17:24 +08:00
|
|
|
/*
|
|
|
|
* Entry/exit counters that make sure that both CPUs
|
|
|
|
* run the measurement code at once:
|
|
|
|
*/
|
x86: delete __cpuinit usage from all x86 files
The __cpuinit type of throwaway sections might have made sense
some time ago when RAM was more constrained, but now the savings
do not offset the cost and complications. For example, the fix in
commit 5e427ec2d0 ("x86: Fix bit corruption at CPU resume time")
is a good example of the nasty type of bugs that can be created
with improper use of the various __init prefixes.
After a discussion on LKML[1] it was decided that cpuinit should go
the way of devinit and be phased out. Once all the users are gone,
we can then finally remove the macros themselves from linux/init.h.
Note that some harmless section mismatch warnings may result, since
notify_cpu_starting() and cpu_up() are arch independent (kernel/cpu.c)
are flagged as __cpuinit -- so if we remove the __cpuinit from
arch specific callers, we will also get section mismatch warnings.
As an intermediate step, we intend to turn the linux/init.h cpuinit
content into no-ops as early as possible, since that will get rid
of these warnings. In any case, they are temporary and harmless.
This removes all the arch/x86 uses of the __cpuinit macros from
all C files. x86 only had the one __CPUINIT used in assembly files,
and it wasn't paired off with a .previous or a __FINIT, so we can
delete it directly w/o any corresponding additional change there.
[1] https://lkml.org/lkml/2013/5/20/589
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2013-06-19 06:23:59 +08:00
|
|
|
static atomic_t start_count;
|
|
|
|
static atomic_t stop_count;
|
2016-11-19 21:47:39 +08:00
|
|
|
static atomic_t skip_test;
|
2016-11-19 21:47:43 +08:00
|
|
|
static atomic_t test_runs;
|
2007-10-11 17:17:24 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We use a raw spinlock in this exceptional case, because
|
|
|
|
* we want to have the fastest, inlined, non-debug version
|
|
|
|
* of a critical section, to be able to prove TSC time-warps:
|
|
|
|
*/
|
x86: delete __cpuinit usage from all x86 files
The __cpuinit type of throwaway sections might have made sense
some time ago when RAM was more constrained, but now the savings
do not offset the cost and complications. For example, the fix in
commit 5e427ec2d0 ("x86: Fix bit corruption at CPU resume time")
is a good example of the nasty type of bugs that can be created
with improper use of the various __init prefixes.
After a discussion on LKML[1] it was decided that cpuinit should go
the way of devinit and be phased out. Once all the users are gone,
we can then finally remove the macros themselves from linux/init.h.
Note that some harmless section mismatch warnings may result, since
notify_cpu_starting() and cpu_up() are arch independent (kernel/cpu.c)
are flagged as __cpuinit -- so if we remove the __cpuinit from
arch specific callers, we will also get section mismatch warnings.
As an intermediate step, we intend to turn the linux/init.h cpuinit
content into no-ops as early as possible, since that will get rid
of these warnings. In any case, they are temporary and harmless.
This removes all the arch/x86 uses of the __cpuinit macros from
all C files. x86 only had the one __CPUINIT used in assembly files,
and it wasn't paired off with a .previous or a __FINIT, so we can
delete it directly w/o any corresponding additional change there.
[1] https://lkml.org/lkml/2013/5/20/589
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2013-06-19 06:23:59 +08:00
|
|
|
static arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED;
|
2009-05-07 15:12:50 +08:00
|
|
|
|
x86: delete __cpuinit usage from all x86 files
The __cpuinit type of throwaway sections might have made sense
some time ago when RAM was more constrained, but now the savings
do not offset the cost and complications. For example, the fix in
commit 5e427ec2d0 ("x86: Fix bit corruption at CPU resume time")
is a good example of the nasty type of bugs that can be created
with improper use of the various __init prefixes.
After a discussion on LKML[1] it was decided that cpuinit should go
the way of devinit and be phased out. Once all the users are gone,
we can then finally remove the macros themselves from linux/init.h.
Note that some harmless section mismatch warnings may result, since
notify_cpu_starting() and cpu_up() are arch independent (kernel/cpu.c)
are flagged as __cpuinit -- so if we remove the __cpuinit from
arch specific callers, we will also get section mismatch warnings.
As an intermediate step, we intend to turn the linux/init.h cpuinit
content into no-ops as early as possible, since that will get rid
of these warnings. In any case, they are temporary and harmless.
This removes all the arch/x86 uses of the __cpuinit macros from
all C files. x86 only had the one __CPUINIT used in assembly files,
and it wasn't paired off with a .previous or a __FINIT, so we can
delete it directly w/o any corresponding additional change there.
[1] https://lkml.org/lkml/2013/5/20/589
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2013-06-19 06:23:59 +08:00
|
|
|
static cycles_t last_tsc;
|
|
|
|
static cycles_t max_warp;
|
|
|
|
static int nr_warps;
|
2016-11-19 21:47:35 +08:00
|
|
|
static int random_warps;
|
2007-10-11 17:17:24 +08:00
|
|
|
|
|
|
|
/*
|
2015-06-26 00:44:09 +08:00
|
|
|
* TSC-warp measurement loop running on both CPUs. This is not called
|
|
|
|
* if there is no TSC.
|
2007-10-11 17:17:24 +08:00
|
|
|
*/
|
2016-11-19 21:47:41 +08:00
|
|
|
static cycles_t check_tsc_warp(unsigned int timeout)
|
2007-10-11 17:17:24 +08:00
|
|
|
{
|
2016-11-19 21:47:41 +08:00
|
|
|
cycles_t start, now, prev, end, cur_max_warp = 0;
|
2016-11-19 21:47:35 +08:00
|
|
|
int i, cur_warps = 0;
|
2007-10-11 17:17:24 +08:00
|
|
|
|
2015-06-26 00:44:09 +08:00
|
|
|
start = rdtsc_ordered();
|
2007-10-11 17:17:24 +08:00
|
|
|
/*
|
2012-02-07 10:32:20 +08:00
|
|
|
* The measurement runs for 'timeout' msecs:
|
2007-10-11 17:17:24 +08:00
|
|
|
*/
|
2012-02-07 10:32:20 +08:00
|
|
|
end = start + (cycles_t) tsc_khz * timeout;
|
2007-10-11 17:17:24 +08:00
|
|
|
now = start;
|
|
|
|
|
|
|
|
for (i = 0; ; i++) {
|
|
|
|
/*
|
|
|
|
* We take the global lock, measure TSC, save the
|
|
|
|
* previous TSC that was measured (possibly on
|
|
|
|
* another CPU) and update the previous TSC timestamp.
|
|
|
|
*/
|
2009-12-03 03:01:25 +08:00
|
|
|
arch_spin_lock(&sync_lock);
|
2007-10-11 17:17:24 +08:00
|
|
|
prev = last_tsc;
|
2015-06-26 00:44:09 +08:00
|
|
|
now = rdtsc_ordered();
|
2007-10-11 17:17:24 +08:00
|
|
|
last_tsc = now;
|
2009-12-03 03:01:25 +08:00
|
|
|
arch_spin_unlock(&sync_lock);
|
2007-10-11 17:17:24 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Be nice every now and then (and also check whether
|
2008-01-30 20:33:23 +08:00
|
|
|
* measurement is done [we also insert a 10 million
|
2007-10-11 17:17:24 +08:00
|
|
|
* loops safety exit, so we dont lock up in case the
|
|
|
|
* TSC readout is totally broken]):
|
|
|
|
*/
|
|
|
|
if (unlikely(!(i & 7))) {
|
2008-01-30 20:33:23 +08:00
|
|
|
if (now > end || i > 10000000)
|
2007-10-11 17:17:24 +08:00
|
|
|
break;
|
|
|
|
cpu_relax();
|
|
|
|
touch_nmi_watchdog();
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Outside the critical section we can now see whether
|
|
|
|
* we saw a time-warp of the TSC going backwards:
|
|
|
|
*/
|
|
|
|
if (unlikely(prev > now)) {
|
2009-12-03 03:01:25 +08:00
|
|
|
arch_spin_lock(&sync_lock);
|
2007-10-11 17:17:24 +08:00
|
|
|
max_warp = max(max_warp, prev - now);
|
2016-11-19 21:47:41 +08:00
|
|
|
cur_max_warp = max_warp;
|
2016-11-19 21:47:35 +08:00
|
|
|
/*
|
|
|
|
* Check whether this bounces back and forth. Only
|
|
|
|
* one CPU should observe time going backwards.
|
|
|
|
*/
|
|
|
|
if (cur_warps != nr_warps)
|
|
|
|
random_warps++;
|
2007-10-11 17:17:24 +08:00
|
|
|
nr_warps++;
|
2016-11-19 21:47:35 +08:00
|
|
|
cur_warps = nr_warps;
|
2009-12-03 03:01:25 +08:00
|
|
|
arch_spin_unlock(&sync_lock);
|
2007-10-11 17:17:24 +08:00
|
|
|
}
|
2008-01-30 20:33:24 +08:00
|
|
|
}
|
2008-07-09 00:51:56 +08:00
|
|
|
WARN(!(now-start),
|
|
|
|
"Warning: zero tsc calibration delta: %Ld [max: %Ld]\n",
|
2008-01-30 20:33:24 +08:00
|
|
|
now-start, end-start);
|
2016-11-19 21:47:41 +08:00
|
|
|
return cur_max_warp;
|
2007-10-11 17:17:24 +08:00
|
|
|
}
|
|
|
|
|
2012-02-07 10:32:20 +08:00
|
|
|
/*
|
|
|
|
* If the target CPU coming online doesn't have any of its core-siblings
|
|
|
|
* online, a timeout of 20msec will be used for the TSC-warp measurement
|
|
|
|
* loop. Otherwise a smaller timeout of 2msec will be used, as we have some
|
|
|
|
* information about this socket already (and this information grows as we
|
|
|
|
* have more and more logical-siblings in that socket).
|
|
|
|
*
|
|
|
|
* Ideally we should be able to skip the TSC sync check on the other
|
|
|
|
* core-siblings, if the first logical CPU in a socket passed the sync test.
|
|
|
|
* But as the TSC is per-logical CPU and can potentially be modified wrongly
|
|
|
|
* by the bios, TSC sync test for smaller duration should be able
|
|
|
|
* to catch such errors. Also this will catch the condition where all the
|
|
|
|
* cores in the socket doesn't get reset at the same time.
|
|
|
|
*/
|
|
|
|
static inline unsigned int loop_timeout(int cpu)
|
|
|
|
{
|
2015-05-26 21:11:35 +08:00
|
|
|
return (cpumask_weight(topology_core_cpumask(cpu)) > 1) ? 2 : 20;
|
2012-02-07 10:32:20 +08:00
|
|
|
}
|
|
|
|
|
2007-10-11 17:17:24 +08:00
|
|
|
/*
|
|
|
|
* Source CPU calls into this - it waits for the freshly booted
|
|
|
|
* target CPU to arrive and then starts the measurement:
|
|
|
|
*/
|
x86: delete __cpuinit usage from all x86 files
The __cpuinit type of throwaway sections might have made sense
some time ago when RAM was more constrained, but now the savings
do not offset the cost and complications. For example, the fix in
commit 5e427ec2d0 ("x86: Fix bit corruption at CPU resume time")
is a good example of the nasty type of bugs that can be created
with improper use of the various __init prefixes.
After a discussion on LKML[1] it was decided that cpuinit should go
the way of devinit and be phased out. Once all the users are gone,
we can then finally remove the macros themselves from linux/init.h.
Note that some harmless section mismatch warnings may result, since
notify_cpu_starting() and cpu_up() are arch independent (kernel/cpu.c)
are flagged as __cpuinit -- so if we remove the __cpuinit from
arch specific callers, we will also get section mismatch warnings.
As an intermediate step, we intend to turn the linux/init.h cpuinit
content into no-ops as early as possible, since that will get rid
of these warnings. In any case, they are temporary and harmless.
This removes all the arch/x86 uses of the __cpuinit macros from
all C files. x86 only had the one __CPUINIT used in assembly files,
and it wasn't paired off with a .previous or a __FINIT, so we can
delete it directly w/o any corresponding additional change there.
[1] https://lkml.org/lkml/2013/5/20/589
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2013-06-19 06:23:59 +08:00
|
|
|
void check_tsc_sync_source(int cpu)
|
2007-10-11 17:17:24 +08:00
|
|
|
{
|
|
|
|
int cpus = 2;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* No need to check if we already know that the TSC is not
|
2015-06-26 00:44:09 +08:00
|
|
|
* synchronized or if we have no TSC.
|
2007-10-11 17:17:24 +08:00
|
|
|
*/
|
|
|
|
if (unsynchronized_tsc())
|
|
|
|
return;
|
|
|
|
|
2016-11-19 21:47:43 +08:00
|
|
|
/*
|
|
|
|
* Set the maximum number of test runs to
|
|
|
|
* 1 if the CPU does not provide the TSC_ADJUST MSR
|
|
|
|
* 3 if the MSR is available, so the target can try to adjust
|
|
|
|
*/
|
|
|
|
if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
|
|
|
|
atomic_set(&test_runs, 1);
|
|
|
|
else
|
|
|
|
atomic_set(&test_runs, 3);
|
|
|
|
retry:
|
2007-10-11 17:17:24 +08:00
|
|
|
/*
|
2016-11-19 21:47:39 +08:00
|
|
|
* Wait for the target to start or to skip the test:
|
2007-10-11 17:17:24 +08:00
|
|
|
*/
|
2016-11-19 21:47:39 +08:00
|
|
|
while (atomic_read(&start_count) != cpus - 1) {
|
|
|
|
if (atomic_read(&skip_test) > 0) {
|
|
|
|
atomic_set(&skip_test, 0);
|
|
|
|
return;
|
|
|
|
}
|
2007-10-11 17:17:24 +08:00
|
|
|
cpu_relax();
|
2016-11-19 21:47:39 +08:00
|
|
|
}
|
|
|
|
|
2007-10-11 17:17:24 +08:00
|
|
|
/*
|
|
|
|
* Trigger the target to continue into the measurement too:
|
|
|
|
*/
|
|
|
|
atomic_inc(&start_count);
|
|
|
|
|
2012-02-07 10:32:20 +08:00
|
|
|
check_tsc_warp(loop_timeout(cpu));
|
2007-10-11 17:17:24 +08:00
|
|
|
|
|
|
|
while (atomic_read(&stop_count) != cpus-1)
|
|
|
|
cpu_relax();
|
|
|
|
|
2016-11-19 21:47:43 +08:00
|
|
|
/*
|
|
|
|
* If the test was successful set the number of runs to zero and
|
|
|
|
* stop. If not, decrement the number of runs an check if we can
|
|
|
|
* retry. In case of random warps no retry is attempted.
|
|
|
|
*/
|
|
|
|
if (!nr_warps) {
|
|
|
|
atomic_set(&test_runs, 0);
|
|
|
|
|
|
|
|
pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n",
|
|
|
|
smp_processor_id(), cpu);
|
|
|
|
|
|
|
|
} else if (atomic_dec_and_test(&test_runs) || random_warps) {
|
|
|
|
/* Force it to 0 if random warps brought us here */
|
|
|
|
atomic_set(&test_runs, 0);
|
|
|
|
|
2009-11-18 08:22:16 +08:00
|
|
|
pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n",
|
|
|
|
smp_processor_id(), cpu);
|
2009-05-07 15:12:50 +08:00
|
|
|
pr_warning("Measured %Ld cycles TSC warp between CPUs, "
|
|
|
|
"turning off TSC clock.\n", max_warp);
|
2016-11-19 21:47:35 +08:00
|
|
|
if (random_warps)
|
|
|
|
pr_warning("TSC warped randomly between CPUs\n");
|
2007-10-11 17:17:24 +08:00
|
|
|
mark_tsc_unstable("check_tsc_sync_source failed");
|
|
|
|
}
|
|
|
|
|
x86: fix: s2ram + P4 + tsc = annoyance
s2ram recently became useful here, except for the kernel's annoying
habit of disabling my P4's perfectly good TSC.
[ 107.894470] CPU 1 is now offline
[ 107.894474] SMP alternatives: switching to UP code
[ 107.895832] CPU0 attaching sched-domain:
[ 107.895836] domain 0: span 1
[ 107.895838] groups: 1
[ 107.896097] CPU1 is down
[ 3.726156] Intel machine check architecture supported.
[ 3.726165] Intel machine check reporting enabled on CPU#0.
[ 3.726167] CPU0: Intel P4/Xeon Extended MCE MSRs (12) available
[ 3.726170] CPU0: Thermal monitoring enabled
[ 3.726175] Back to C!
[ 3.726708] Force enabled HPET at resume
[ 3.726775] Enabling non-boot CPUs ...
[ 3.727049] CPU0 attaching NULL sched-domain.
[ 3.727165] SMP alternatives: switching to SMP code
[ 3.727858] Booting processor 1/1 eip 3000
[ 3.727862] CPU 1 irqstacks, hard=b042f000 soft=b042d000
[ 3.738173] Initializing CPU#1
[ 3.798912] Calibrating delay using timer specific routine.. 5986.12 BogoMIPS (lpj=2993061)
[ 3.798920] CPU: After generic identify, caps: bfebfbff 00000000 00000000 00000000 00004400 00000000 00000000 00000000
[ 3.798931] CPU: Trace cache: 12K uops, L1 D cache: 8K
[ 3.798934] CPU: L2 cache: 512K
[ 3.798936] CPU: Physical Processor ID: 0
[ 3.798938] CPU: After all inits, caps: bfebfbff 00000000 00000000 0000b080 00004400 00000000 00000000 00000000
[ 3.798946] Intel machine check architecture supported.
[ 3.798952] Intel machine check reporting enabled on CPU#1.
[ 3.798955] CPU1: Intel P4/Xeon Extended MCE MSRs (12) available
[ 3.798959] CPU1: Thermal monitoring enabled
[ 3.799161] CPU1: Intel(R) Pentium(R) 4 CPU 3.00GHz stepping 09
[ 3.799187] checking TSC synchronization [CPU#0 -> CPU#1]:
[ 3.819181] Measured 63588552840 cycles TSC warp between CPUs, turning off TSC clock.
[ 3.819184] Marking TSC unstable due to: check_tsc_sync_source failed.
If check_tsc_warp() is called after initial boot, and the TSC has in the
meantime been set (BIOS, user, silicon, elves) to a value lower than the
last stored/stale value, we blame the TSC. Reset to pristine condition
after every test.
Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-01-30 20:30:04 +08:00
|
|
|
/*
|
|
|
|
* Reset it - just in case we boot another CPU later:
|
|
|
|
*/
|
|
|
|
atomic_set(&start_count, 0);
|
2016-11-19 21:47:35 +08:00
|
|
|
random_warps = 0;
|
x86: fix: s2ram + P4 + tsc = annoyance
s2ram recently became useful here, except for the kernel's annoying
habit of disabling my P4's perfectly good TSC.
[ 107.894470] CPU 1 is now offline
[ 107.894474] SMP alternatives: switching to UP code
[ 107.895832] CPU0 attaching sched-domain:
[ 107.895836] domain 0: span 1
[ 107.895838] groups: 1
[ 107.896097] CPU1 is down
[ 3.726156] Intel machine check architecture supported.
[ 3.726165] Intel machine check reporting enabled on CPU#0.
[ 3.726167] CPU0: Intel P4/Xeon Extended MCE MSRs (12) available
[ 3.726170] CPU0: Thermal monitoring enabled
[ 3.726175] Back to C!
[ 3.726708] Force enabled HPET at resume
[ 3.726775] Enabling non-boot CPUs ...
[ 3.727049] CPU0 attaching NULL sched-domain.
[ 3.727165] SMP alternatives: switching to SMP code
[ 3.727858] Booting processor 1/1 eip 3000
[ 3.727862] CPU 1 irqstacks, hard=b042f000 soft=b042d000
[ 3.738173] Initializing CPU#1
[ 3.798912] Calibrating delay using timer specific routine.. 5986.12 BogoMIPS (lpj=2993061)
[ 3.798920] CPU: After generic identify, caps: bfebfbff 00000000 00000000 00000000 00004400 00000000 00000000 00000000
[ 3.798931] CPU: Trace cache: 12K uops, L1 D cache: 8K
[ 3.798934] CPU: L2 cache: 512K
[ 3.798936] CPU: Physical Processor ID: 0
[ 3.798938] CPU: After all inits, caps: bfebfbff 00000000 00000000 0000b080 00004400 00000000 00000000 00000000
[ 3.798946] Intel machine check architecture supported.
[ 3.798952] Intel machine check reporting enabled on CPU#1.
[ 3.798955] CPU1: Intel P4/Xeon Extended MCE MSRs (12) available
[ 3.798959] CPU1: Thermal monitoring enabled
[ 3.799161] CPU1: Intel(R) Pentium(R) 4 CPU 3.00GHz stepping 09
[ 3.799187] checking TSC synchronization [CPU#0 -> CPU#1]:
[ 3.819181] Measured 63588552840 cycles TSC warp between CPUs, turning off TSC clock.
[ 3.819184] Marking TSC unstable due to: check_tsc_sync_source failed.
If check_tsc_warp() is called after initial boot, and the TSC has in the
meantime been set (BIOS, user, silicon, elves) to a value lower than the
last stored/stale value, we blame the TSC. Reset to pristine condition
after every test.
Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-01-30 20:30:04 +08:00
|
|
|
nr_warps = 0;
|
|
|
|
max_warp = 0;
|
|
|
|
last_tsc = 0;
|
|
|
|
|
2007-10-11 17:17:24 +08:00
|
|
|
/*
|
|
|
|
* Let the target continue with the bootup:
|
|
|
|
*/
|
|
|
|
atomic_inc(&stop_count);
|
2016-11-19 21:47:43 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Retry, if there is a chance to do so.
|
|
|
|
*/
|
|
|
|
if (atomic_read(&test_runs) > 0)
|
|
|
|
goto retry;
|
2007-10-11 17:17:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Freshly booted CPUs call into this:
|
|
|
|
*/
|
x86: delete __cpuinit usage from all x86 files
The __cpuinit type of throwaway sections might have made sense
some time ago when RAM was more constrained, but now the savings
do not offset the cost and complications. For example, the fix in
commit 5e427ec2d0 ("x86: Fix bit corruption at CPU resume time")
is a good example of the nasty type of bugs that can be created
with improper use of the various __init prefixes.
After a discussion on LKML[1] it was decided that cpuinit should go
the way of devinit and be phased out. Once all the users are gone,
we can then finally remove the macros themselves from linux/init.h.
Note that some harmless section mismatch warnings may result, since
notify_cpu_starting() and cpu_up() are arch independent (kernel/cpu.c)
are flagged as __cpuinit -- so if we remove the __cpuinit from
arch specific callers, we will also get section mismatch warnings.
As an intermediate step, we intend to turn the linux/init.h cpuinit
content into no-ops as early as possible, since that will get rid
of these warnings. In any case, they are temporary and harmless.
This removes all the arch/x86 uses of the __cpuinit macros from
all C files. x86 only had the one __CPUINIT used in assembly files,
and it wasn't paired off with a .previous or a __FINIT, so we can
delete it directly w/o any corresponding additional change there.
[1] https://lkml.org/lkml/2013/5/20/589
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
2013-06-19 06:23:59 +08:00
|
|
|
void check_tsc_sync_target(void)
|
2007-10-11 17:17:24 +08:00
|
|
|
{
|
2016-11-19 21:47:43 +08:00
|
|
|
struct tsc_adjust *cur = this_cpu_ptr(&tsc_adjust);
|
|
|
|
unsigned int cpu = smp_processor_id();
|
|
|
|
cycles_t cur_max_warp, gbl_max_warp;
|
2007-10-11 17:17:24 +08:00
|
|
|
int cpus = 2;
|
|
|
|
|
2015-06-26 00:44:09 +08:00
|
|
|
/* Also aborts if there is no TSC. */
|
2017-02-09 23:08:42 +08:00
|
|
|
if (unsynchronized_tsc())
|
2007-10-11 17:17:24 +08:00
|
|
|
return;
|
|
|
|
|
2016-11-19 21:47:39 +08:00
|
|
|
/*
|
|
|
|
* Store, verify and sanitize the TSC adjust register. If
|
|
|
|
* successful skip the test.
|
2017-02-09 23:08:42 +08:00
|
|
|
*
|
|
|
|
* The test is also skipped when the TSC is marked reliable. This
|
|
|
|
* is true for SoCs which have no fallback clocksource. On these
|
|
|
|
* SoCs the TSC is frequency synchronized, but still the TSC ADJUST
|
|
|
|
* register might have been wreckaged by the BIOS..
|
2016-11-19 21:47:39 +08:00
|
|
|
*/
|
2017-02-09 23:08:42 +08:00
|
|
|
if (tsc_store_and_check_tsc_adjust(false) || tsc_clocksource_reliable) {
|
2016-11-19 21:47:39 +08:00
|
|
|
atomic_inc(&skip_test);
|
|
|
|
return;
|
|
|
|
}
|
2016-11-19 21:47:36 +08:00
|
|
|
|
2016-11-19 21:47:43 +08:00
|
|
|
retry:
|
2007-10-11 17:17:24 +08:00
|
|
|
/*
|
|
|
|
* Register this CPU's participation and wait for the
|
|
|
|
* source CPU to start the measurement:
|
|
|
|
*/
|
|
|
|
atomic_inc(&start_count);
|
|
|
|
while (atomic_read(&start_count) != cpus)
|
|
|
|
cpu_relax();
|
|
|
|
|
2016-11-19 21:47:43 +08:00
|
|
|
cur_max_warp = check_tsc_warp(loop_timeout(cpu));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Store the maximum observed warp value for a potential retry:
|
|
|
|
*/
|
|
|
|
gbl_max_warp = max_warp;
|
2007-10-11 17:17:24 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Ok, we are done:
|
|
|
|
*/
|
|
|
|
atomic_inc(&stop_count);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Wait for the source CPU to print stuff:
|
|
|
|
*/
|
|
|
|
while (atomic_read(&stop_count) != cpus)
|
|
|
|
cpu_relax();
|
2016-11-19 21:47:40 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Reset it for the next sync test:
|
|
|
|
*/
|
|
|
|
atomic_set(&stop_count, 0);
|
2016-11-19 21:47:43 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Check the number of remaining test runs. If not zero, the test
|
|
|
|
* failed and a retry with adjusted TSC is possible. If zero the
|
|
|
|
* test was either successful or failed terminally.
|
|
|
|
*/
|
|
|
|
if (!atomic_read(&test_runs))
|
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the warp value of this CPU is 0, then the other CPU
|
|
|
|
* observed time going backwards so this TSC was ahead and
|
|
|
|
* needs to move backwards.
|
|
|
|
*/
|
|
|
|
if (!cur_max_warp)
|
|
|
|
cur_max_warp = -gbl_max_warp;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Add the result to the previous adjustment value.
|
|
|
|
*
|
|
|
|
* The adjustement value is slightly off by the overhead of the
|
|
|
|
* sync mechanism (observed values are ~200 TSC cycles), but this
|
|
|
|
* really depends on CPU, node distance and frequency. So
|
|
|
|
* compensating for this is hard to get right. Experiments show
|
|
|
|
* that the warp is not longer detectable when the observed warp
|
|
|
|
* value is used. In the worst case the adjustment needs to go
|
|
|
|
* through a 3rd run for fine tuning.
|
|
|
|
*/
|
|
|
|
cur->adjusted += cur_max_warp;
|
2016-12-18 22:09:29 +08:00
|
|
|
|
2016-11-19 21:47:43 +08:00
|
|
|
pr_warn("TSC ADJUST compensate: CPU%u observed %lld warp. Adjust: %lld\n",
|
|
|
|
cpu, cur_max_warp, cur->adjusted);
|
|
|
|
|
|
|
|
wrmsrl(MSR_IA32_TSC_ADJUST, cur->adjusted);
|
|
|
|
goto retry;
|
|
|
|
|
2007-10-11 17:17:24 +08:00
|
|
|
}
|
2016-11-19 21:47:36 +08:00
|
|
|
|
|
|
|
#endif /* CONFIG_SMP */
|