From 81e6737518a1e392ead4e568a4ee70bb7c371458 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Tue, 23 Jun 2020 14:31:31 +0800 Subject: [PATCH 1/4] PM: s2idle: Clear _TIF_POLLING_NRFLAG before suspend to idle Suspend to idle was found to not work on Goldmont CPU recently. The issue happens due to: 1. On Goldmont the CPU in idle can only be woken up via IPIs, not POLLING mode, due to commit 08e237fa56a1 ("x86/cpu: Add workaround for MONITOR instruction erratum on Goldmont based CPUs") 2. When the CPU is entering suspend to idle process, the _TIF_POLLING_NRFLAG remains on, because cpuidle_enter_s2idle() doesn't match call_cpuidle() exactly. 3. Commit b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()") makes use of _TIF_POLLING_NRFLAG to avoid sending IPIs to idle CPUs. 4. As a result, some IPIs related functions might not work well during suspend to idle on Goldmont. For example, one suspected victim: tick_unfreeze() -> timekeeping_resume() -> hrtimers_resume() -> clock_was_set() -> on_each_cpu() might wait forever, because the IPIs will not be sent to the CPUs which are sleeping with _TIF_POLLING_NRFLAG set, and Goldmont CPU could not be woken up by only setting _TIF_NEED_RESCHED on the monitor address. To avoid that, clear the _TIF_POLLING_NRFLAG flag before invoking enter_s2idle_proper() in cpuidle_enter_s2idle() in analogy with the call_cpuidle() code flow. Fixes: b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()") Suggested-by: Peter Zijlstra (Intel) Suggested-by: Rafael J. Wysocki Reported-by: kbuild test robot Signed-off-by: Chen Yu [ rjw: Subject / changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index c149d9e20dfd..e092789187c6 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -186,7 +187,7 @@ int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev) * be frozen safely. */ index = find_deepest_state(drv, dev, U64_MAX, 0, true); - if (index > 0) + if (index > 0 && !current_clr_polling_and_test()) enter_s2idle_proper(drv, dev, index); return index; From 589bab6bb30c8cd43a7da6dcf43cc3a586cc2e25 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 12 Jun 2020 11:09:57 -0700 Subject: [PATCH 2/4] cpufreq: intel_pstate: Add one more OOB control bit Add one more bit for OOB (Out Of Band) enabling of P-states. If OOB handling of P-states is enabled, intel_pstate shouldn't load. Currently, only "BIT(8) == 1" of the MSR MSR_MISC_PWR_MGMT is considered as OOB, but "BIT(18) == 1" needs to be taken into consideration as OOB condition too. Signed-off-by: Srinivas Pandruvada [ rjw: Add an empty code line, edit subject and changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 8e23a698ce04..e771e8b4f99f 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2677,6 +2677,8 @@ static struct acpi_platform_list plat_info[] __initdata = { { } /* End */ }; +#define BITMASK_OOB (BIT(8) | BIT(18)) + static bool __init intel_pstate_platform_pwr_mgmt_exists(void) { const struct x86_cpu_id *id; @@ -2686,8 +2688,9 @@ static bool __init intel_pstate_platform_pwr_mgmt_exists(void) id = x86_match_cpu(intel_pstate_cpu_oob_ids); if (id) { rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr); - if (misc_pwr & (1 << 8)) { - pr_debug("Bit 8 in the MISC_PWR_MGMT MSR set\n"); + if (misc_pwr & BITMASK_OOB) { + pr_debug("Bit 8 or 18 in the MISC_PWR_MGMT MSR set\n"); + pr_debug("P states are controlled in Out of Band mode by the firmware/hardware\n"); return true; } } From 0659d4205db4d0225efd8f83291cbacfcdea030c Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 21 Jun 2020 10:18:54 +0200 Subject: [PATCH 3/4] PM: sleep: core: mark 2 functions as __init to save some memory 'early_resume_init()' and 'late_resume_init() 'are only called respectively via 'early_resume_init' and 'late_resume_init'. They can be marked as __init to save a few bytes of memory. Signed-off-by: Christophe JAILLET [ rjw: Subject edits ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c index 977d27bd1a22..a97f33d0c59f 100644 --- a/drivers/base/power/trace.c +++ b/drivers/base/power/trace.c @@ -265,14 +265,14 @@ static struct notifier_block pm_trace_nb = { .notifier_call = pm_trace_notify, }; -static int early_resume_init(void) +static int __init early_resume_init(void) { hash_value_early_read = read_magic_time(); register_pm_notifier(&pm_trace_nb); return 0; } -static int late_resume_init(void) +static int __init late_resume_init(void) { unsigned int val = hash_value_early_read; unsigned int user, file, dev; From 10e8b11eb3195e11450c509d4dd3984d707a4167 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 25 Jun 2020 13:52:53 +0200 Subject: [PATCH 4/4] cpuidle: Rearrange s2idle-specific idle state entry code Implement call_cpuidle_s2idle() in analogy with call_cpuidle() for the s2idle-specific idle state entry and invoke it from cpuidle_idle_call() to make the s2idle-specific idle entry code path look more similar to the "regular" idle entry one. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Chen Yu --- drivers/cpuidle/cpuidle.c | 6 +++--- kernel/sched/idle.c | 15 +++++++++++---- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index e092789187c6..87197319ab06 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -187,9 +186,10 @@ int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev) * be frozen safely. */ index = find_deepest_state(drv, dev, U64_MAX, 0, true); - if (index > 0 && !current_clr_polling_and_test()) + if (index > 0) { enter_s2idle_proper(drv, dev, index); - + local_irq_enable(); + } return index; } #endif /* CONFIG_SUSPEND */ diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 05deb81bb3e3..1ae95b9150d3 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -96,6 +96,15 @@ void __cpuidle default_idle_call(void) } } +static int call_cpuidle_s2idle(struct cpuidle_driver *drv, + struct cpuidle_device *dev) +{ + if (current_clr_polling_and_test()) + return -EBUSY; + + return cpuidle_enter_s2idle(drv, dev); +} + static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, int next_state) { @@ -171,11 +180,9 @@ static void cpuidle_idle_call(void) if (idle_should_enter_s2idle()) { rcu_idle_enter(); - entered_state = cpuidle_enter_s2idle(drv, dev); - if (entered_state > 0) { - local_irq_enable(); + entered_state = call_cpuidle_s2idle(drv, dev); + if (entered_state > 0) goto exit_idle; - } rcu_idle_exit();