From 0b07ee944701dabcddc294d903b5e8e21c2c5d95 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 4 Jul 2019 13:06:17 +0530 Subject: [PATCH 01/27] PM / QOS: Pass request type to dev_pm_qos_{add|remove}_notifier() In order to use the same set of routines to register notifiers for different request types, update the existing dev_pm_qos_{add|remove}_notifier() routines with an additional parameter: request-type. For now, it only supports resume-latency request type but will be extended to frequency limit (min/max) constraints later on. Reviewed-by: Matthias Kaehlcke Reviewed-by: Ulf Hansson Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- Documentation/power/pm_qos_interface.txt | 12 +++++++----- drivers/base/power/domain.c | 8 +++++--- drivers/base/power/qos.c | 14 ++++++++++++-- include/linux/pm_qos.h | 12 ++++++++---- 4 files changed, 32 insertions(+), 14 deletions(-) diff --git a/Documentation/power/pm_qos_interface.txt b/Documentation/power/pm_qos_interface.txt index 19c5f7b1a7ba..ec7d662d1707 100644 --- a/Documentation/power/pm_qos_interface.txt +++ b/Documentation/power/pm_qos_interface.txt @@ -164,12 +164,14 @@ directory. Notification mechanisms: The per-device PM QoS framework has a per-device notification tree. -int dev_pm_qos_add_notifier(device, notifier): -Adds a notification callback function for the device. -The callback is called when the aggregated value of the device constraints list -is changed (for resume latency device PM QoS only). +int dev_pm_qos_add_notifier(device, notifier, type): +Adds a notification callback function for the device for a particular request +type. -int dev_pm_qos_remove_notifier(device, notifier): +The callback is called when the aggregated value of the device constraints list +is changed. + +int dev_pm_qos_remove_notifier(device, notifier, type): Removes the notification callback function for the device. diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 33c30c1e6a30..b063bc41b0a9 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1536,7 +1536,8 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, if (ret) genpd_free_dev_data(dev, gpd_data); else - dev_pm_qos_add_notifier(dev, &gpd_data->nb); + dev_pm_qos_add_notifier(dev, &gpd_data->nb, + DEV_PM_QOS_RESUME_LATENCY); return ret; } @@ -1569,7 +1570,8 @@ static int genpd_remove_device(struct generic_pm_domain *genpd, pdd = dev->power.subsys_data->domain_data; gpd_data = to_gpd_data(pdd); - dev_pm_qos_remove_notifier(dev, &gpd_data->nb); + dev_pm_qos_remove_notifier(dev, &gpd_data->nb, + DEV_PM_QOS_RESUME_LATENCY); genpd_lock(genpd); @@ -1597,7 +1599,7 @@ static int genpd_remove_device(struct generic_pm_domain *genpd, out: genpd_unlock(genpd); - dev_pm_qos_add_notifier(dev, &gpd_data->nb); + dev_pm_qos_add_notifier(dev, &gpd_data->nb, DEV_PM_QOS_RESUME_LATENCY); return ret; } diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index 6c91f8df1d59..cfd463212513 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -467,6 +467,7 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_remove_request); * * @dev: target device for the constraint * @notifier: notifier block managed by caller. + * @type: request type. * * Will register the notifier into a notification chain that gets called * upon changes to the target value for the device. @@ -474,10 +475,14 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_remove_request); * If the device's constraints object doesn't exist when this routine is called, * it will be created (or error code will be returned if that fails). */ -int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier) +int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier, + enum dev_pm_qos_req_type type) { int ret = 0; + if (WARN_ON(type != DEV_PM_QOS_RESUME_LATENCY)) + return -EINVAL; + mutex_lock(&dev_pm_qos_mtx); if (IS_ERR(dev->power.qos)) @@ -500,15 +505,20 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_add_notifier); * * @dev: target device for the constraint * @notifier: notifier block to be removed. + * @type: request type. * * Will remove the notifier from the notification chain that gets called * upon changes to the target value. */ int dev_pm_qos_remove_notifier(struct device *dev, - struct notifier_block *notifier) + struct notifier_block *notifier, + enum dev_pm_qos_req_type type) { int retval = 0; + if (WARN_ON(type != DEV_PM_QOS_RESUME_LATENCY)) + return -EINVAL; + mutex_lock(&dev_pm_qos_mtx); /* Silently return if the constraints object is not present. */ diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 6ea1ae373d77..58e8749ceac5 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -146,9 +146,11 @@ int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, int dev_pm_qos_update_request(struct dev_pm_qos_request *req, s32 new_value); int dev_pm_qos_remove_request(struct dev_pm_qos_request *req); int dev_pm_qos_add_notifier(struct device *dev, - struct notifier_block *notifier); + struct notifier_block *notifier, + enum dev_pm_qos_req_type type); int dev_pm_qos_remove_notifier(struct device *dev, - struct notifier_block *notifier); + struct notifier_block *notifier, + enum dev_pm_qos_req_type type); void dev_pm_qos_constraints_init(struct device *dev); void dev_pm_qos_constraints_destroy(struct device *dev); int dev_pm_qos_add_ancestor_request(struct device *dev, @@ -202,10 +204,12 @@ static inline int dev_pm_qos_update_request(struct dev_pm_qos_request *req, static inline int dev_pm_qos_remove_request(struct dev_pm_qos_request *req) { return 0; } static inline int dev_pm_qos_add_notifier(struct device *dev, - struct notifier_block *notifier) + struct notifier_block *notifier, + enum dev_pm_qos_req_type type) { return 0; } static inline int dev_pm_qos_remove_notifier(struct device *dev, - struct notifier_block *notifier) + struct notifier_block *notifier, + enum dev_pm_qos_req_type type) { return 0; } static inline void dev_pm_qos_constraints_init(struct device *dev) { From 8262331eaaf751076fb2c781f492bafd8344591d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 4 Jul 2019 13:06:18 +0530 Subject: [PATCH 02/27] PM / QOS: Rename __dev_pm_qos_read_value() and dev_pm_qos_raw_read_value() dev_pm_qos_read_value() will soon need to support more constraint types (min/max frequency) and will have another argument to it, i.e. type of the constraint. While that is fine for the existing users of dev_pm_qos_read_value(), but not that optimal for the callers of __dev_pm_qos_read_value() and dev_pm_qos_raw_read_value() as all the callers of these two routines are only looking for resume latency constraint. Lets make these two routines care only about the resume latency constraint and rename them to __dev_pm_qos_resume_latency() and dev_pm_qos_raw_resume_latency(). Suggested-by: Rafael J. Wysocki Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain_governor.c | 2 +- drivers/base/power/qos.c | 13 +++++++++---- drivers/base/power/runtime.c | 2 +- drivers/cpuidle/governor.c | 2 +- include/linux/pm_qos.h | 8 ++++---- 5 files changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index 3838045c9277..20e56a5be01f 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -66,7 +66,7 @@ static bool default_suspend_ok(struct device *dev) td->constraint_changed = false; td->cached_suspend_ok = false; td->effective_constraint_ns = 0; - constraint_ns = __dev_pm_qos_read_value(dev); + constraint_ns = __dev_pm_qos_resume_latency(dev); spin_unlock_irqrestore(&dev->power.lock, flags); diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index cfd463212513..7a0d197f0809 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -90,16 +90,16 @@ enum pm_qos_flags_status dev_pm_qos_flags(struct device *dev, s32 mask) EXPORT_SYMBOL_GPL(dev_pm_qos_flags); /** - * __dev_pm_qos_read_value - Get PM QoS constraint for a given device. + * __dev_pm_qos_resume_latency - Get resume latency constraint for a given device. * @dev: Device to get the PM QoS constraint value for. * * This routine must be called with dev->power.lock held. */ -s32 __dev_pm_qos_read_value(struct device *dev) +s32 __dev_pm_qos_resume_latency(struct device *dev) { lockdep_assert_held(&dev->power.lock); - return dev_pm_qos_raw_read_value(dev); + return dev_pm_qos_raw_resume_latency(dev); } /** @@ -112,7 +112,12 @@ s32 dev_pm_qos_read_value(struct device *dev) s32 ret; spin_lock_irqsave(&dev->power.lock, flags); - ret = __dev_pm_qos_read_value(dev); + + if (IS_ERR_OR_NULL(dev->power.qos)) + ret = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; + else + ret = pm_qos_read_value(&dev->power.qos->resume_latency); + spin_unlock_irqrestore(&dev->power.lock, flags); return ret; diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 952a1e7057c7..b75335508d2c 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -275,7 +275,7 @@ static int rpm_check_suspend_allowed(struct device *dev) || (dev->power.request_pending && dev->power.request == RPM_REQ_RESUME)) retval = -EAGAIN; - else if (__dev_pm_qos_read_value(dev) == 0) + else if (__dev_pm_qos_resume_latency(dev) == 0) retval = -EPERM; else if (dev->power.runtime_status == RPM_SUSPENDED) retval = 1; diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c index 9fddf828a76f..2e3e14192bee 100644 --- a/drivers/cpuidle/governor.c +++ b/drivers/cpuidle/governor.c @@ -110,7 +110,7 @@ int cpuidle_governor_latency_req(unsigned int cpu) { int global_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); struct device *device = get_cpu_device(cpu); - int device_req = dev_pm_qos_raw_read_value(device); + int device_req = dev_pm_qos_raw_resume_latency(device); return device_req < global_req ? device_req : global_req; } diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 58e8749ceac5..5e09d4980786 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -139,7 +139,7 @@ s32 pm_qos_read_value(struct pm_qos_constraints *c); #ifdef CONFIG_PM enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, s32 mask); enum pm_qos_flags_status dev_pm_qos_flags(struct device *dev, s32 mask); -s32 __dev_pm_qos_read_value(struct device *dev); +s32 __dev_pm_qos_resume_latency(struct device *dev); s32 dev_pm_qos_read_value(struct device *dev); int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, enum dev_pm_qos_req_type type, s32 value); @@ -176,7 +176,7 @@ static inline s32 dev_pm_qos_requested_flags(struct device *dev) return dev->power.qos->flags_req->data.flr.flags; } -static inline s32 dev_pm_qos_raw_read_value(struct device *dev) +static inline s32 dev_pm_qos_raw_resume_latency(struct device *dev) { return IS_ERR_OR_NULL(dev->power.qos) ? PM_QOS_RESUME_LATENCY_NO_CONSTRAINT : @@ -189,7 +189,7 @@ static inline enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, static inline enum pm_qos_flags_status dev_pm_qos_flags(struct device *dev, s32 mask) { return PM_QOS_FLAGS_UNDEFINED; } -static inline s32 __dev_pm_qos_read_value(struct device *dev) +static inline s32 __dev_pm_qos_resume_latency(struct device *dev) { return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; } static inline s32 dev_pm_qos_read_value(struct device *dev) { return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; } @@ -245,7 +245,7 @@ static inline s32 dev_pm_qos_requested_resume_latency(struct device *dev) return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; } static inline s32 dev_pm_qos_requested_flags(struct device *dev) { return 0; } -static inline s32 dev_pm_qos_raw_read_value(struct device *dev) +static inline s32 dev_pm_qos_raw_resume_latency(struct device *dev) { return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; } From 2a79ea5ec53973c8711b54d33ace5c77659dc8f8 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 4 Jul 2019 13:06:19 +0530 Subject: [PATCH 03/27] PM / QOS: Pass request type to dev_pm_qos_read_value() In order to allow dev_pm_qos_read_value() to read values for different QoS requests, pass request type as a parameter to these routines. For now, it only supports resume-latency request type but will be extended to frequency limit (min/max) constraints later on. Reviewed-by: Matthias Kaehlcke Reviewed-by: Ulf Hansson Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- Documentation/power/pm_qos_interface.txt | 2 +- drivers/base/power/domain_governor.c | 2 +- drivers/base/power/qos.c | 17 ++++++++++++----- include/linux/pm_qos.h | 16 +++++++++++++--- 4 files changed, 27 insertions(+), 10 deletions(-) diff --git a/Documentation/power/pm_qos_interface.txt b/Documentation/power/pm_qos_interface.txt index ec7d662d1707..cfcb1df39799 100644 --- a/Documentation/power/pm_qos_interface.txt +++ b/Documentation/power/pm_qos_interface.txt @@ -123,7 +123,7 @@ Will remove the element. After removal it will update the aggregate target and call the notification trees if the target was changed as a result of removing the request. -s32 dev_pm_qos_read_value(device): +s32 dev_pm_qos_read_value(device, type): Returns the aggregated value for a given device's constraints list. enum pm_qos_flags_status dev_pm_qos_flags(device, mask) diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index 20e56a5be01f..daa8c7689f7e 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -33,7 +33,7 @@ static int dev_update_qos_constraint(struct device *dev, void *data) * take its current PM QoS constraint (that's the only thing * known at this point anyway). */ - constraint_ns = dev_pm_qos_read_value(dev); + constraint_ns = dev_pm_qos_read_value(dev, DEV_PM_QOS_RESUME_LATENCY); constraint_ns *= NSEC_PER_USEC; } diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index 7a0d197f0809..2461fed0efa0 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -105,18 +105,25 @@ s32 __dev_pm_qos_resume_latency(struct device *dev) /** * dev_pm_qos_read_value - Get PM QoS constraint for a given device (locked). * @dev: Device to get the PM QoS constraint value for. + * @type: QoS request type. */ -s32 dev_pm_qos_read_value(struct device *dev) +s32 dev_pm_qos_read_value(struct device *dev, enum dev_pm_qos_req_type type) { + struct dev_pm_qos *qos = dev->power.qos; unsigned long flags; s32 ret; spin_lock_irqsave(&dev->power.lock, flags); - if (IS_ERR_OR_NULL(dev->power.qos)) - ret = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; - else - ret = pm_qos_read_value(&dev->power.qos->resume_latency); + switch (type) { + case DEV_PM_QOS_RESUME_LATENCY: + ret = IS_ERR_OR_NULL(qos) ? PM_QOS_RESUME_LATENCY_NO_CONSTRAINT + : pm_qos_read_value(&qos->resume_latency); + break; + default: + WARN_ON(1); + ret = 0; + } spin_unlock_irqrestore(&dev->power.lock, flags); diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 5e09d4980786..9a21b7ba72ae 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -140,7 +140,7 @@ s32 pm_qos_read_value(struct pm_qos_constraints *c); enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, s32 mask); enum pm_qos_flags_status dev_pm_qos_flags(struct device *dev, s32 mask); s32 __dev_pm_qos_resume_latency(struct device *dev); -s32 dev_pm_qos_read_value(struct device *dev); +s32 dev_pm_qos_read_value(struct device *dev, enum dev_pm_qos_req_type type); int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, enum dev_pm_qos_req_type type, s32 value); int dev_pm_qos_update_request(struct dev_pm_qos_request *req, s32 new_value); @@ -191,8 +191,18 @@ static inline enum pm_qos_flags_status dev_pm_qos_flags(struct device *dev, { return PM_QOS_FLAGS_UNDEFINED; } static inline s32 __dev_pm_qos_resume_latency(struct device *dev) { return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; } -static inline s32 dev_pm_qos_read_value(struct device *dev) - { return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; } +static inline s32 dev_pm_qos_read_value(struct device *dev, + enum dev_pm_qos_req_type type) +{ + switch (type) { + case DEV_PM_QOS_RESUME_LATENCY: + return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; + default: + WARN_ON(1); + return 0; + } +} + static inline int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, enum dev_pm_qos_req_type type, From 208637b37824c8956fe28d277835a403ee35fa84 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 4 Jul 2019 13:06:20 +0530 Subject: [PATCH 04/27] PM / QoS: Add support for MIN/MAX frequency constraints This patch introduces the min-frequency and max-frequency device constraints, which will be used by the cpufreq core to begin with. Reviewed-by: Matthias Kaehlcke Reviewed-by: Ulf Hansson Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/base/power/qos.c | 111 ++++++++++++++++++++++++++++++++++----- include/linux/pm_qos.h | 12 +++++ 2 files changed, 109 insertions(+), 14 deletions(-) diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index 2461fed0efa0..6c90fd7e2ff8 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -120,6 +120,14 @@ s32 dev_pm_qos_read_value(struct device *dev, enum dev_pm_qos_req_type type) ret = IS_ERR_OR_NULL(qos) ? PM_QOS_RESUME_LATENCY_NO_CONSTRAINT : pm_qos_read_value(&qos->resume_latency); break; + case DEV_PM_QOS_MIN_FREQUENCY: + ret = IS_ERR_OR_NULL(qos) ? PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE + : pm_qos_read_value(&qos->min_frequency); + break; + case DEV_PM_QOS_MAX_FREQUENCY: + ret = IS_ERR_OR_NULL(qos) ? PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE + : pm_qos_read_value(&qos->max_frequency); + break; default: WARN_ON(1); ret = 0; @@ -161,6 +169,14 @@ static int apply_constraint(struct dev_pm_qos_request *req, req->dev->power.set_latency_tolerance(req->dev, value); } break; + case DEV_PM_QOS_MIN_FREQUENCY: + ret = pm_qos_update_target(&qos->min_frequency, + &req->data.pnode, action, value); + break; + case DEV_PM_QOS_MAX_FREQUENCY: + ret = pm_qos_update_target(&qos->max_frequency, + &req->data.pnode, action, value); + break; case DEV_PM_QOS_FLAGS: ret = pm_qos_update_flags(&qos->flags, &req->data.flr, action, value); @@ -189,12 +205,11 @@ static int dev_pm_qos_constraints_allocate(struct device *dev) if (!qos) return -ENOMEM; - n = kzalloc(sizeof(*n), GFP_KERNEL); + n = kzalloc(3 * sizeof(*n), GFP_KERNEL); if (!n) { kfree(qos); return -ENOMEM; } - BLOCKING_INIT_NOTIFIER_HEAD(n); c = &qos->resume_latency; plist_head_init(&c->list); @@ -203,6 +218,7 @@ static int dev_pm_qos_constraints_allocate(struct device *dev) c->no_constraint_value = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; c->type = PM_QOS_MIN; c->notifiers = n; + BLOCKING_INIT_NOTIFIER_HEAD(n); c = &qos->latency_tolerance; plist_head_init(&c->list); @@ -211,6 +227,24 @@ static int dev_pm_qos_constraints_allocate(struct device *dev) c->no_constraint_value = PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT; c->type = PM_QOS_MIN; + c = &qos->min_frequency; + plist_head_init(&c->list); + c->target_value = PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE; + c->default_value = PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE; + c->no_constraint_value = PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE; + c->type = PM_QOS_MAX; + c->notifiers = ++n; + BLOCKING_INIT_NOTIFIER_HEAD(n); + + c = &qos->max_frequency; + plist_head_init(&c->list); + c->target_value = PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE; + c->default_value = PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE; + c->no_constraint_value = PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE; + c->type = PM_QOS_MIN; + c->notifiers = ++n; + BLOCKING_INIT_NOTIFIER_HEAD(n); + INIT_LIST_HEAD(&qos->flags.list); spin_lock_irq(&dev->power.lock); @@ -264,11 +298,25 @@ void dev_pm_qos_constraints_destroy(struct device *dev) apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); memset(req, 0, sizeof(*req)); } + c = &qos->latency_tolerance; plist_for_each_entry_safe(req, tmp, &c->list, data.pnode) { apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); memset(req, 0, sizeof(*req)); } + + c = &qos->min_frequency; + plist_for_each_entry_safe(req, tmp, &c->list, data.pnode) { + apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE); + memset(req, 0, sizeof(*req)); + } + + c = &qos->max_frequency; + plist_for_each_entry_safe(req, tmp, &c->list, data.pnode) { + apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); + memset(req, 0, sizeof(*req)); + } + f = &qos->flags; list_for_each_entry_safe(req, tmp, &f->list, data.flr.node) { apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); @@ -380,6 +428,8 @@ static int __dev_pm_qos_update_request(struct dev_pm_qos_request *req, switch(req->type) { case DEV_PM_QOS_RESUME_LATENCY: case DEV_PM_QOS_LATENCY_TOLERANCE: + case DEV_PM_QOS_MIN_FREQUENCY: + case DEV_PM_QOS_MAX_FREQUENCY: curr_value = req->data.pnode.prio; break; case DEV_PM_QOS_FLAGS: @@ -492,9 +542,6 @@ int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier, { int ret = 0; - if (WARN_ON(type != DEV_PM_QOS_RESUME_LATENCY)) - return -EINVAL; - mutex_lock(&dev_pm_qos_mtx); if (IS_ERR(dev->power.qos)) @@ -502,10 +549,28 @@ int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier, else if (!dev->power.qos) ret = dev_pm_qos_constraints_allocate(dev); - if (!ret) + if (ret) + goto unlock; + + switch (type) { + case DEV_PM_QOS_RESUME_LATENCY: ret = blocking_notifier_chain_register(dev->power.qos->resume_latency.notifiers, notifier); + break; + case DEV_PM_QOS_MIN_FREQUENCY: + ret = blocking_notifier_chain_register(dev->power.qos->min_frequency.notifiers, + notifier); + break; + case DEV_PM_QOS_MAX_FREQUENCY: + ret = blocking_notifier_chain_register(dev->power.qos->max_frequency.notifiers, + notifier); + break; + default: + WARN_ON(1); + ret = -EINVAL; + } +unlock: mutex_unlock(&dev_pm_qos_mtx); return ret; } @@ -526,20 +591,35 @@ int dev_pm_qos_remove_notifier(struct device *dev, struct notifier_block *notifier, enum dev_pm_qos_req_type type) { - int retval = 0; - - if (WARN_ON(type != DEV_PM_QOS_RESUME_LATENCY)) - return -EINVAL; + int ret = 0; mutex_lock(&dev_pm_qos_mtx); /* Silently return if the constraints object is not present. */ - if (!IS_ERR_OR_NULL(dev->power.qos)) - retval = blocking_notifier_chain_unregister(dev->power.qos->resume_latency.notifiers, - notifier); + if (IS_ERR_OR_NULL(dev->power.qos)) + goto unlock; + switch (type) { + case DEV_PM_QOS_RESUME_LATENCY: + ret = blocking_notifier_chain_unregister(dev->power.qos->resume_latency.notifiers, + notifier); + break; + case DEV_PM_QOS_MIN_FREQUENCY: + ret = blocking_notifier_chain_unregister(dev->power.qos->min_frequency.notifiers, + notifier); + break; + case DEV_PM_QOS_MAX_FREQUENCY: + ret = blocking_notifier_chain_unregister(dev->power.qos->max_frequency.notifiers, + notifier); + break; + default: + WARN_ON(1); + ret = -EINVAL; + } + +unlock: mutex_unlock(&dev_pm_qos_mtx); - return retval; + return ret; } EXPORT_SYMBOL_GPL(dev_pm_qos_remove_notifier); @@ -599,6 +679,9 @@ static void __dev_pm_qos_drop_user_request(struct device *dev, req = dev->power.qos->flags_req; dev->power.qos->flags_req = NULL; break; + default: + WARN_ON(1); + return; } __dev_pm_qos_remove_request(req); kfree(req); diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 9a21b7ba72ae..2aebbc5b9950 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -40,6 +40,8 @@ enum pm_qos_flags_status { #define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT PM_QOS_LATENCY_ANY #define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS PM_QOS_LATENCY_ANY_NS #define PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE 0 +#define PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE 0 +#define PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE (-1) #define PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT (-1) #define PM_QOS_FLAG_NO_POWER_OFF (1 << 0) @@ -58,6 +60,8 @@ struct pm_qos_flags_request { enum dev_pm_qos_req_type { DEV_PM_QOS_RESUME_LATENCY = 1, DEV_PM_QOS_LATENCY_TOLERANCE, + DEV_PM_QOS_MIN_FREQUENCY, + DEV_PM_QOS_MAX_FREQUENCY, DEV_PM_QOS_FLAGS, }; @@ -99,10 +103,14 @@ struct pm_qos_flags { struct dev_pm_qos { struct pm_qos_constraints resume_latency; struct pm_qos_constraints latency_tolerance; + struct pm_qos_constraints min_frequency; + struct pm_qos_constraints max_frequency; struct pm_qos_flags flags; struct dev_pm_qos_request *resume_latency_req; struct dev_pm_qos_request *latency_tolerance_req; struct dev_pm_qos_request *flags_req; + struct dev_pm_qos_request *min_frequency_req; + struct dev_pm_qos_request *max_frequency_req; }; /* Action requested to pm_qos_update_target */ @@ -197,6 +205,10 @@ static inline s32 dev_pm_qos_read_value(struct device *dev, switch (type) { case DEV_PM_QOS_RESUME_LATENCY: return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; + case DEV_PM_QOS_MIN_FREQUENCY: + return PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE; + case DEV_PM_QOS_MAX_FREQUENCY: + return PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE; default: WARN_ON(1); return 0; From 67d874c3b2c69d65274fa5ce44ba939788d5729d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 8 Jul 2019 16:27:52 +0530 Subject: [PATCH 05/27] cpufreq: Register notifiers with the PM QoS framework Register notifiers for min/max frequency constraints with the PM QoS framework. The constraints are also taken into consideration in cpufreq_set_policy(). This also relocates cpufreq_policy_put_kobj() as it is required to be called from cpufreq_policy_alloc() now. refresh_frequency_limits() is updated to avoid calling cpufreq_set_policy() for inactive policies and handle_update() is updated to have proper locking in place. No constraints are added until now though. Reviewed-by: Matthias Kaehlcke Reviewed-by: Ulf Hansson Signed-off-by: Viresh Kumar Tested-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 183 +++++++++++++++++++++++++++----------- include/linux/cpufreq.h | 3 + 2 files changed, 132 insertions(+), 54 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index ceb57af15ca0..b96ef6db1bfe 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -999,7 +1000,7 @@ static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu) { struct device *dev = get_cpu_device(cpu); - if (!dev) + if (unlikely(!dev)) return; if (cpumask_test_and_set_cpu(cpu, policy->real_cpus)) @@ -1117,14 +1118,16 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp static void refresh_frequency_limits(struct cpufreq_policy *policy) { - struct cpufreq_policy new_policy = *policy; + struct cpufreq_policy new_policy; - pr_debug("updating policy for CPU %u\n", policy->cpu); + if (!policy_is_inactive(policy)) { + new_policy = *policy; + pr_debug("updating policy for CPU %u\n", policy->cpu); - new_policy.min = policy->user_policy.min; - new_policy.max = policy->user_policy.max; - - cpufreq_set_policy(policy, &new_policy); + new_policy.min = policy->user_policy.min; + new_policy.max = policy->user_policy.max; + cpufreq_set_policy(policy, &new_policy); + } } static void handle_update(struct work_struct *work) @@ -1133,60 +1136,27 @@ static void handle_update(struct work_struct *work) container_of(work, struct cpufreq_policy, update); pr_debug("handle_update for cpu %u called\n", policy->cpu); + down_write(&policy->rwsem); refresh_frequency_limits(policy); + up_write(&policy->rwsem); } -static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) +static int cpufreq_notifier_min(struct notifier_block *nb, unsigned long freq, + void *data) { - struct cpufreq_policy *policy; - int ret; + struct cpufreq_policy *policy = container_of(nb, struct cpufreq_policy, nb_min); - policy = kzalloc(sizeof(*policy), GFP_KERNEL); - if (!policy) - return NULL; + schedule_work(&policy->update); + return 0; +} - if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL)) - goto err_free_policy; +static int cpufreq_notifier_max(struct notifier_block *nb, unsigned long freq, + void *data) +{ + struct cpufreq_policy *policy = container_of(nb, struct cpufreq_policy, nb_max); - if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) - goto err_free_cpumask; - - if (!zalloc_cpumask_var(&policy->real_cpus, GFP_KERNEL)) - goto err_free_rcpumask; - - ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, - cpufreq_global_kobject, "policy%u", cpu); - if (ret) { - pr_err("%s: failed to init policy->kobj: %d\n", __func__, ret); - /* - * The entire policy object will be freed below, but the extra - * memory allocated for the kobject name needs to be freed by - * releasing the kobject. - */ - kobject_put(&policy->kobj); - goto err_free_real_cpus; - } - - INIT_LIST_HEAD(&policy->policy_list); - init_rwsem(&policy->rwsem); - spin_lock_init(&policy->transition_lock); - init_waitqueue_head(&policy->transition_wait); - init_completion(&policy->kobj_unregister); - INIT_WORK(&policy->update, handle_update); - - policy->cpu = cpu; - return policy; - -err_free_real_cpus: - free_cpumask_var(policy->real_cpus); -err_free_rcpumask: - free_cpumask_var(policy->related_cpus); -err_free_cpumask: - free_cpumask_var(policy->cpus); -err_free_policy: - kfree(policy); - - return NULL; + schedule_work(&policy->update); + return 0; } static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy) @@ -1211,8 +1181,90 @@ static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy) pr_debug("wait complete\n"); } +static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) +{ + struct cpufreq_policy *policy; + struct device *dev = get_cpu_device(cpu); + int ret; + + if (!dev) + return NULL; + + policy = kzalloc(sizeof(*policy), GFP_KERNEL); + if (!policy) + return NULL; + + if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL)) + goto err_free_policy; + + if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) + goto err_free_cpumask; + + if (!zalloc_cpumask_var(&policy->real_cpus, GFP_KERNEL)) + goto err_free_rcpumask; + + ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, + cpufreq_global_kobject, "policy%u", cpu); + if (ret) { + dev_err(dev, "%s: failed to init policy->kobj: %d\n", __func__, ret); + /* + * The entire policy object will be freed below, but the extra + * memory allocated for the kobject name needs to be freed by + * releasing the kobject. + */ + kobject_put(&policy->kobj); + goto err_free_real_cpus; + } + + policy->nb_min.notifier_call = cpufreq_notifier_min; + policy->nb_max.notifier_call = cpufreq_notifier_max; + + ret = dev_pm_qos_add_notifier(dev, &policy->nb_min, + DEV_PM_QOS_MIN_FREQUENCY); + if (ret) { + dev_err(dev, "Failed to register MIN QoS notifier: %d (%*pbl)\n", + ret, cpumask_pr_args(policy->cpus)); + goto err_kobj_remove; + } + + ret = dev_pm_qos_add_notifier(dev, &policy->nb_max, + DEV_PM_QOS_MAX_FREQUENCY); + if (ret) { + dev_err(dev, "Failed to register MAX QoS notifier: %d (%*pbl)\n", + ret, cpumask_pr_args(policy->cpus)); + goto err_min_qos_notifier; + } + + INIT_LIST_HEAD(&policy->policy_list); + init_rwsem(&policy->rwsem); + spin_lock_init(&policy->transition_lock); + init_waitqueue_head(&policy->transition_wait); + init_completion(&policy->kobj_unregister); + INIT_WORK(&policy->update, handle_update); + + policy->cpu = cpu; + return policy; + +err_min_qos_notifier: + dev_pm_qos_remove_notifier(dev, &policy->nb_min, + DEV_PM_QOS_MIN_FREQUENCY); +err_kobj_remove: + cpufreq_policy_put_kobj(policy); +err_free_real_cpus: + free_cpumask_var(policy->real_cpus); +err_free_rcpumask: + free_cpumask_var(policy->related_cpus); +err_free_cpumask: + free_cpumask_var(policy->cpus); +err_free_policy: + kfree(policy); + + return NULL; +} + static void cpufreq_policy_free(struct cpufreq_policy *policy) { + struct device *dev = get_cpu_device(policy->cpu); unsigned long flags; int cpu; @@ -1224,6 +1276,11 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy) per_cpu(cpufreq_cpu_data, cpu) = NULL; write_unlock_irqrestore(&cpufreq_driver_lock, flags); + dev_pm_qos_remove_notifier(dev, &policy->nb_max, + DEV_PM_QOS_MAX_FREQUENCY); + dev_pm_qos_remove_notifier(dev, &policy->nb_min, + DEV_PM_QOS_MIN_FREQUENCY); + cpufreq_policy_put_kobj(policy); free_cpumask_var(policy->real_cpus); free_cpumask_var(policy->related_cpus); @@ -2283,6 +2340,8 @@ int cpufreq_set_policy(struct cpufreq_policy *policy, struct cpufreq_policy *new_policy) { struct cpufreq_governor *old_gov; + struct device *cpu_dev = get_cpu_device(policy->cpu); + unsigned long min, max; int ret; pr_debug("setting new policy for CPU %u: %u - %u kHz\n", @@ -2297,11 +2356,27 @@ int cpufreq_set_policy(struct cpufreq_policy *policy, if (new_policy->min > new_policy->max) return -EINVAL; + /* + * PM QoS framework collects all the requests from users and provide us + * the final aggregated value here. + */ + min = dev_pm_qos_read_value(cpu_dev, DEV_PM_QOS_MIN_FREQUENCY); + max = dev_pm_qos_read_value(cpu_dev, DEV_PM_QOS_MAX_FREQUENCY); + + if (min > new_policy->min) + new_policy->min = min; + if (max < new_policy->max) + new_policy->max = max; + /* verify the cpu speed can be set within this limit */ ret = cpufreq_driver->verify(new_policy); if (ret) return ret; + /* + * The notifier-chain shall be removed once all the users of + * CPUFREQ_ADJUST are moved to use the QoS framework. + */ /* adjust if necessary - all reasons */ blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_ADJUST, new_policy); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index a1467aa7f58b..95425941f46d 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -147,6 +147,9 @@ struct cpufreq_policy { /* Pointer to the cooling device if used for thermal mitigation */ struct thermal_cooling_device *cdev; + + struct notifier_block nb_min; + struct notifier_block nb_max; }; struct cpufreq_freqs { From c57b25bdf7cd374af106992356536bf5df7c255b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 4 Jul 2019 13:06:22 +0530 Subject: [PATCH 06/27] cpufreq: intel_pstate: Reuse refresh_frequency_limits() The implementation of intel_pstate_update_max_freq() is quite similar to refresh_frequency_limits(), lets reuse it. Finding minimum of policy->user_policy.max and policy->cpuinfo.max_freq in intel_pstate_update_max_freq() is redundant as cpufreq_set_policy() will call the ->verify() callback of intel-pstate driver, which will do this comparison anyway and so dropping it from intel_pstate_update_max_freq() doesn't harm. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 3 ++- drivers/cpufreq/intel_pstate.c | 7 +------ include/linux/cpufreq.h | 1 + 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b96ef6db1bfe..79bac52919a5 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1116,7 +1116,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp return ret; } -static void refresh_frequency_limits(struct cpufreq_policy *policy) +void refresh_frequency_limits(struct cpufreq_policy *policy) { struct cpufreq_policy new_policy; @@ -1129,6 +1129,7 @@ static void refresh_frequency_limits(struct cpufreq_policy *policy) cpufreq_set_policy(policy, &new_policy); } } +EXPORT_SYMBOL(refresh_frequency_limits); static void handle_update(struct work_struct *work) { diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f2ff5de988c1..cc27d4c59dca 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -898,7 +898,6 @@ static void intel_pstate_update_policies(void) static void intel_pstate_update_max_freq(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu); - struct cpufreq_policy new_policy; struct cpudata *cpudata; if (!policy) @@ -908,11 +907,7 @@ static void intel_pstate_update_max_freq(unsigned int cpu) policy->cpuinfo.max_freq = global.turbo_disabled_mf ? cpudata->pstate.max_freq : cpudata->pstate.turbo_freq; - memcpy(&new_policy, policy, sizeof(*policy)); - new_policy.max = min(policy->user_policy.max, policy->cpuinfo.max_freq); - new_policy.min = min(policy->user_policy.min, new_policy.max); - - cpufreq_set_policy(policy, &new_policy); + refresh_frequency_limits(policy); cpufreq_cpu_release(policy); } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 95425941f46d..1fa37b675a80 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -207,6 +207,7 @@ void cpufreq_cpu_release(struct cpufreq_policy *policy); int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu); int cpufreq_set_policy(struct cpufreq_policy *policy, struct cpufreq_policy *new_policy); +void refresh_frequency_limits(struct cpufreq_policy *policy); void cpufreq_update_policy(unsigned int cpu); void cpufreq_update_limits(unsigned int cpu); bool have_governor_per_policy(void); From 18c49926c4bf4915e5194d1de3299c0537229f9f Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 5 Jul 2019 16:21:24 +0530 Subject: [PATCH 07/27] cpufreq: Add QoS requests for userspace constraints This implements QoS requests to manage userspace configuration of min and max frequency. Reviewed-by: Matthias Kaehlcke Reviewed-by: Ulf Hansson Signed-off-by: Viresh Kumar Tested-by: syzbot Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 98 ++++++++++++++++++++++----------------- include/linux/cpufreq.h | 8 +--- 2 files changed, 57 insertions(+), 49 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 79bac52919a5..99aa7d20b458 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -718,23 +718,15 @@ static ssize_t show_scaling_cur_freq(struct cpufreq_policy *policy, char *buf) static ssize_t store_##file_name \ (struct cpufreq_policy *policy, const char *buf, size_t count) \ { \ - int ret, temp; \ - struct cpufreq_policy new_policy; \ + unsigned long val; \ + int ret; \ \ - memcpy(&new_policy, policy, sizeof(*policy)); \ - new_policy.min = policy->user_policy.min; \ - new_policy.max = policy->user_policy.max; \ - \ - ret = sscanf(buf, "%u", &new_policy.object); \ + ret = sscanf(buf, "%lu", &val); \ if (ret != 1) \ return -EINVAL; \ \ - temp = new_policy.object; \ - ret = cpufreq_set_policy(policy, &new_policy); \ - if (!ret) \ - policy->user_policy.object = temp; \ - \ - return ret ? ret : count; \ + ret = dev_pm_qos_update_request(policy->object##_freq_req, val);\ + return ret >= 0 ? count : ret; \ } store_one(scaling_min_freq, min); @@ -1124,8 +1116,6 @@ void refresh_frequency_limits(struct cpufreq_policy *policy) new_policy = *policy; pr_debug("updating policy for CPU %u\n", policy->cpu); - new_policy.min = policy->user_policy.min; - new_policy.max = policy->user_policy.max; cpufreq_set_policy(policy, &new_policy); } } @@ -1281,6 +1271,9 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy) DEV_PM_QOS_MAX_FREQUENCY); dev_pm_qos_remove_notifier(dev, &policy->nb_min, DEV_PM_QOS_MIN_FREQUENCY); + dev_pm_qos_remove_request(policy->max_freq_req); + dev_pm_qos_remove_request(policy->min_freq_req); + kfree(policy->min_freq_req); cpufreq_policy_put_kobj(policy); free_cpumask_var(policy->real_cpus); @@ -1359,16 +1352,50 @@ static int cpufreq_online(unsigned int cpu) cpumask_and(policy->cpus, policy->cpus, cpu_online_mask); if (new_policy) { - policy->user_policy.min = policy->min; - policy->user_policy.max = policy->max; + struct device *dev = get_cpu_device(cpu); for_each_cpu(j, policy->related_cpus) { per_cpu(cpufreq_cpu_data, j) = policy; add_cpu_dev_symlink(policy, j); } - } else { - policy->min = policy->user_policy.min; - policy->max = policy->user_policy.max; + + policy->min_freq_req = kzalloc(2 * sizeof(*policy->min_freq_req), + GFP_KERNEL); + if (!policy->min_freq_req) + goto out_destroy_policy; + + ret = dev_pm_qos_add_request(dev, policy->min_freq_req, + DEV_PM_QOS_MIN_FREQUENCY, + policy->min); + if (ret < 0) { + /* + * So we don't call dev_pm_qos_remove_request() for an + * uninitialized request. + */ + kfree(policy->min_freq_req); + policy->min_freq_req = NULL; + + dev_err(dev, "Failed to add min-freq constraint (%d)\n", + ret); + goto out_destroy_policy; + } + + /* + * This must be initialized right here to avoid calling + * dev_pm_qos_remove_request() on uninitialized request in case + * of errors. + */ + policy->max_freq_req = policy->min_freq_req + 1; + + ret = dev_pm_qos_add_request(dev, policy->max_freq_req, + DEV_PM_QOS_MAX_FREQUENCY, + policy->max); + if (ret < 0) { + policy->max_freq_req = NULL; + dev_err(dev, "Failed to add max-freq constraint (%d)\n", + ret); + goto out_destroy_policy; + } } if (cpufreq_driver->get && has_target()) { @@ -2342,7 +2369,6 @@ int cpufreq_set_policy(struct cpufreq_policy *policy, { struct cpufreq_governor *old_gov; struct device *cpu_dev = get_cpu_device(policy->cpu); - unsigned long min, max; int ret; pr_debug("setting new policy for CPU %u: %u - %u kHz\n", @@ -2350,24 +2376,12 @@ int cpufreq_set_policy(struct cpufreq_policy *policy, memcpy(&new_policy->cpuinfo, &policy->cpuinfo, sizeof(policy->cpuinfo)); - /* - * This check works well when we store new min/max freq attributes, - * because new_policy is a copy of policy with one field updated. - */ - if (new_policy->min > new_policy->max) - return -EINVAL; - /* * PM QoS framework collects all the requests from users and provide us * the final aggregated value here. */ - min = dev_pm_qos_read_value(cpu_dev, DEV_PM_QOS_MIN_FREQUENCY); - max = dev_pm_qos_read_value(cpu_dev, DEV_PM_QOS_MAX_FREQUENCY); - - if (min > new_policy->min) - new_policy->min = min; - if (max < new_policy->max) - new_policy->max = max; + new_policy->min = dev_pm_qos_read_value(cpu_dev, DEV_PM_QOS_MIN_FREQUENCY); + new_policy->max = dev_pm_qos_read_value(cpu_dev, DEV_PM_QOS_MAX_FREQUENCY); /* verify the cpu speed can be set within this limit */ ret = cpufreq_driver->verify(new_policy); @@ -2456,10 +2470,9 @@ int cpufreq_set_policy(struct cpufreq_policy *policy, * @cpu: CPU to re-evaluate the policy for. * * Update the current frequency for the cpufreq policy of @cpu and use - * cpufreq_set_policy() to re-apply the min and max limits saved in the - * user_policy sub-structure of that policy, which triggers the evaluation - * of policy notifiers and the cpufreq driver's ->verify() callback for the - * policy in question, among other things. + * cpufreq_set_policy() to re-apply the min and max limits, which triggers the + * evaluation of policy notifiers and the cpufreq driver's ->verify() callback + * for the policy in question, among other things. */ void cpufreq_update_policy(unsigned int cpu) { @@ -2519,10 +2532,9 @@ static int cpufreq_boost_set_sw(int state) break; } - down_write(&policy->rwsem); - policy->user_policy.max = policy->max; - cpufreq_governor_limits(policy); - up_write(&policy->rwsem); + ret = dev_pm_qos_update_request(policy->max_freq_req, policy->max); + if (ret) + break; } return ret; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 1fa37b675a80..afc683021ac5 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -50,11 +50,6 @@ struct cpufreq_cpuinfo { unsigned int transition_latency; }; -struct cpufreq_user_policy { - unsigned int min; /* in kHz */ - unsigned int max; /* in kHz */ -}; - struct cpufreq_policy { /* CPUs sharing clock, require sw coordination */ cpumask_var_t cpus; /* Online CPUs only */ @@ -84,7 +79,8 @@ struct cpufreq_policy { struct work_struct update; /* if update_policy() needs to be * called, but you're in IRQ context */ - struct cpufreq_user_policy user_policy; + struct dev_pm_qos_request *min_freq_req; + struct dev_pm_qos_request *max_freq_req; struct cpufreq_frequency_table *freq_table; enum cpufreq_table_sorting freq_table_sorted; From 5b8010ba70d5aa5b321d3e69b5b31cc3db857d5e Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Mon, 8 Jul 2019 11:03:08 +0800 Subject: [PATCH 08/27] cpufreq: imx-cpufreq-dt: Add i.MX8MN support i.MX8MN is a new SoC of i.MX8M series, it also uses speed grading and market segment fuses for OPP definitions, add support for this SoC. Signed-off-by: Anson Huang Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/imx-cpufreq-dt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/imx-cpufreq-dt.c b/drivers/cpufreq/imx-cpufreq-dt.c index b54fd26ea7df..4f85f3112784 100644 --- a/drivers/cpufreq/imx-cpufreq-dt.c +++ b/drivers/cpufreq/imx-cpufreq-dt.c @@ -44,10 +44,11 @@ static int imx_cpufreq_dt_probe(struct platform_device *pdev) * According to datasheet minimum speed grading is not supported for * consumer parts so clamp to 1 to avoid warning for "no OPPs" * - * Applies to 8mq and 8mm. + * Applies to i.MX8M series SoCs. */ if (mkt_segment == 0 && speed_grade == 0 && ( of_machine_is_compatible("fsl,imx8mm") || + of_machine_is_compatible("fsl,imx8mn") || of_machine_is_compatible("fsl,imx8mq"))) speed_grade = 1; From f7c4e0c89bbd0f008b33d9dce02e207d9dea9f54 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 10 Jul 2019 21:44:22 +0800 Subject: [PATCH 09/27] intel_rapl: use reg instead of msr To support both MSR and MMIO Interface, use 'reg' to discribe RAPL registers instead of 'msr'. Reviewed-by: Pandruvada, Srinivas Tested-by: Pandruvada, Srinivas Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl.c | 98 +++++++++++++++++------------------ 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index 8692f6b79f93..45d5f22d00e8 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c @@ -83,13 +83,13 @@ enum rapl_domain_type { RAPL_DOMAIN_MAX, }; -enum rapl_domain_msr_id { - RAPL_DOMAIN_MSR_LIMIT, - RAPL_DOMAIN_MSR_STATUS, - RAPL_DOMAIN_MSR_PERF, - RAPL_DOMAIN_MSR_POLICY, - RAPL_DOMAIN_MSR_INFO, - RAPL_DOMAIN_MSR_MAX, +enum rapl_domain_reg_id { + RAPL_DOMAIN_REG_LIMIT, + RAPL_DOMAIN_REG_STATUS, + RAPL_DOMAIN_REG_PERF, + RAPL_DOMAIN_REG_POLICY, + RAPL_DOMAIN_REG_INFO, + RAPL_DOMAIN_REG_MAX, }; /* per domain data, some are optional */ @@ -154,7 +154,7 @@ struct rapl_package; struct rapl_domain { const char *name; enum rapl_domain_type id; - int msrs[RAPL_DOMAIN_MSR_MAX]; + int regs[RAPL_DOMAIN_REG_MAX]; struct powercap_zone power_zone; struct rapl_domain_data rdd; struct rapl_power_limit rpl[NR_POWER_LIMITS]; @@ -216,7 +216,7 @@ struct rapl_primitive_info { const char *name; u64 mask; int shift; - enum rapl_domain_msr_id id; + enum rapl_domain_reg_id id; enum unit_type unit; u32 flag; }; @@ -642,11 +642,11 @@ static void rapl_init_domains(struct rapl_package *rp) case BIT(RAPL_DOMAIN_PACKAGE): rd->name = rapl_domain_names[RAPL_DOMAIN_PACKAGE]; rd->id = RAPL_DOMAIN_PACKAGE; - rd->msrs[0] = MSR_PKG_POWER_LIMIT; - rd->msrs[1] = MSR_PKG_ENERGY_STATUS; - rd->msrs[2] = MSR_PKG_PERF_STATUS; - rd->msrs[3] = 0; - rd->msrs[4] = MSR_PKG_POWER_INFO; + rd->regs[0] = MSR_PKG_POWER_LIMIT; + rd->regs[1] = MSR_PKG_ENERGY_STATUS; + rd->regs[2] = MSR_PKG_PERF_STATUS; + rd->regs[3] = 0; + rd->regs[4] = MSR_PKG_POWER_INFO; rd->rpl[0].prim_id = PL1_ENABLE; rd->rpl[0].name = pl1_name; rd->rpl[1].prim_id = PL2_ENABLE; @@ -655,33 +655,33 @@ static void rapl_init_domains(struct rapl_package *rp) case BIT(RAPL_DOMAIN_PP0): rd->name = rapl_domain_names[RAPL_DOMAIN_PP0]; rd->id = RAPL_DOMAIN_PP0; - rd->msrs[0] = MSR_PP0_POWER_LIMIT; - rd->msrs[1] = MSR_PP0_ENERGY_STATUS; - rd->msrs[2] = 0; - rd->msrs[3] = MSR_PP0_POLICY; - rd->msrs[4] = 0; + rd->regs[0] = MSR_PP0_POWER_LIMIT; + rd->regs[1] = MSR_PP0_ENERGY_STATUS; + rd->regs[2] = 0; + rd->regs[3] = MSR_PP0_POLICY; + rd->regs[4] = 0; rd->rpl[0].prim_id = PL1_ENABLE; rd->rpl[0].name = pl1_name; break; case BIT(RAPL_DOMAIN_PP1): rd->name = rapl_domain_names[RAPL_DOMAIN_PP1]; rd->id = RAPL_DOMAIN_PP1; - rd->msrs[0] = MSR_PP1_POWER_LIMIT; - rd->msrs[1] = MSR_PP1_ENERGY_STATUS; - rd->msrs[2] = 0; - rd->msrs[3] = MSR_PP1_POLICY; - rd->msrs[4] = 0; + rd->regs[0] = MSR_PP1_POWER_LIMIT; + rd->regs[1] = MSR_PP1_ENERGY_STATUS; + rd->regs[2] = 0; + rd->regs[3] = MSR_PP1_POLICY; + rd->regs[4] = 0; rd->rpl[0].prim_id = PL1_ENABLE; rd->rpl[0].name = pl1_name; break; case BIT(RAPL_DOMAIN_DRAM): rd->name = rapl_domain_names[RAPL_DOMAIN_DRAM]; rd->id = RAPL_DOMAIN_DRAM; - rd->msrs[0] = MSR_DRAM_POWER_LIMIT; - rd->msrs[1] = MSR_DRAM_ENERGY_STATUS; - rd->msrs[2] = MSR_DRAM_PERF_STATUS; - rd->msrs[3] = 0; - rd->msrs[4] = MSR_DRAM_POWER_INFO; + rd->regs[0] = MSR_DRAM_POWER_LIMIT; + rd->regs[1] = MSR_DRAM_ENERGY_STATUS; + rd->regs[2] = MSR_DRAM_PERF_STATUS; + rd->regs[3] = 0; + rd->regs[4] = MSR_DRAM_POWER_INFO; rd->rpl[0].prim_id = PL1_ENABLE; rd->rpl[0].name = pl1_name; rd->domain_energy_unit = @@ -736,37 +736,37 @@ static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type, static struct rapl_primitive_info rpi[] = { /* name, mask, shift, msr index, unit divisor */ PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0, - RAPL_DOMAIN_MSR_STATUS, ENERGY_UNIT, 0), + RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0), PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0, - RAPL_DOMAIN_MSR_LIMIT, POWER_UNIT, 0), + RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32, - RAPL_DOMAIN_MSR_LIMIT, POWER_UNIT, 0), + RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), PRIMITIVE_INFO_INIT(FW_LOCK, POWER_PP_LOCK, 31, - RAPL_DOMAIN_MSR_LIMIT, ARBITRARY_UNIT, 0), + RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15, - RAPL_DOMAIN_MSR_LIMIT, ARBITRARY_UNIT, 0), + RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), PRIMITIVE_INFO_INIT(PL1_CLAMP, POWER_LIMIT1_CLAMP, 16, - RAPL_DOMAIN_MSR_LIMIT, ARBITRARY_UNIT, 0), + RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), PRIMITIVE_INFO_INIT(PL2_ENABLE, POWER_LIMIT2_ENABLE, 47, - RAPL_DOMAIN_MSR_LIMIT, ARBITRARY_UNIT, 0), + RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48, - RAPL_DOMAIN_MSR_LIMIT, ARBITRARY_UNIT, 0), + RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17, - RAPL_DOMAIN_MSR_LIMIT, TIME_UNIT, 0), + RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49, - RAPL_DOMAIN_MSR_LIMIT, TIME_UNIT, 0), + RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, POWER_INFO_THERMAL_SPEC_MASK, - 0, RAPL_DOMAIN_MSR_INFO, POWER_UNIT, 0), + 0, RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), PRIMITIVE_INFO_INIT(MAX_POWER, POWER_INFO_MAX_MASK, 32, - RAPL_DOMAIN_MSR_INFO, POWER_UNIT, 0), + RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), PRIMITIVE_INFO_INIT(MIN_POWER, POWER_INFO_MIN_MASK, 16, - RAPL_DOMAIN_MSR_INFO, POWER_UNIT, 0), + RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, POWER_INFO_MAX_TIME_WIN_MASK, 48, - RAPL_DOMAIN_MSR_INFO, TIME_UNIT, 0), + RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0), PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0, - RAPL_DOMAIN_MSR_PERF, TIME_UNIT, 0), + RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0), PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0, - RAPL_DOMAIN_MSR_POLICY, ARBITRARY_UNIT, 0), + RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0), /* non-hardware */ PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT, RAPL_PRIMITIVE_DERIVED), @@ -798,7 +798,7 @@ static int rapl_read_data_raw(struct rapl_domain *rd, if (!rp->name || rp->flag & RAPL_PRIMITIVE_DUMMY) return -EINVAL; - msr = rd->msrs[rp->id]; + msr = rd->regs[rp->id]; if (!msr) return -EINVAL; @@ -874,7 +874,7 @@ static int rapl_write_data_raw(struct rapl_domain *rd, memset(&ma, 0, sizeof(ma)); - ma.msr_no = rd->msrs[rp->id]; + ma.msr_no = rd->regs[rp->id]; ma.clear_mask = rp->mask; ma.set_mask = bits; @@ -1282,8 +1282,8 @@ static int __init rapl_register_psys(void) rd->name = rapl_domain_names[RAPL_DOMAIN_PLATFORM]; rd->id = RAPL_DOMAIN_PLATFORM; - rd->msrs[0] = MSR_PLATFORM_POWER_LIMIT; - rd->msrs[1] = MSR_PLATFORM_ENERGY_STATUS; + rd->regs[0] = MSR_PLATFORM_POWER_LIMIT; + rd->regs[1] = MSR_PLATFORM_ENERGY_STATUS; rd->rpl[0].prim_id = PL1_ENABLE; rd->rpl[0].name = pl1_name; rd->rpl[1].prim_id = PL2_ENABLE; From 8310e8202f24d674b6b2bd341af15d72299f696d Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 10 Jul 2019 21:44:23 +0800 Subject: [PATCH 10/27] intel_rapl: remove hardcoded register index enum rapl_domain_reg_id is defined for the RAPL registers for each RAPL domain, thus use it whenever possible. Reviewed-by: Pandruvada, Srinivas Tested-by: Pandruvada, Srinivas Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl.c | 44 +++++++++++++++++------------------ 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index 45d5f22d00e8..9be9f20ff056 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c @@ -642,11 +642,11 @@ static void rapl_init_domains(struct rapl_package *rp) case BIT(RAPL_DOMAIN_PACKAGE): rd->name = rapl_domain_names[RAPL_DOMAIN_PACKAGE]; rd->id = RAPL_DOMAIN_PACKAGE; - rd->regs[0] = MSR_PKG_POWER_LIMIT; - rd->regs[1] = MSR_PKG_ENERGY_STATUS; - rd->regs[2] = MSR_PKG_PERF_STATUS; - rd->regs[3] = 0; - rd->regs[4] = MSR_PKG_POWER_INFO; + rd->regs[RAPL_DOMAIN_REG_LIMIT] = MSR_PKG_POWER_LIMIT; + rd->regs[RAPL_DOMAIN_REG_STATUS] = MSR_PKG_ENERGY_STATUS; + rd->regs[RAPL_DOMAIN_REG_PERF] = MSR_PKG_PERF_STATUS; + rd->regs[RAPL_DOMAIN_REG_POLICY] = 0; + rd->regs[RAPL_DOMAIN_REG_INFO] = MSR_PKG_POWER_INFO; rd->rpl[0].prim_id = PL1_ENABLE; rd->rpl[0].name = pl1_name; rd->rpl[1].prim_id = PL2_ENABLE; @@ -655,33 +655,33 @@ static void rapl_init_domains(struct rapl_package *rp) case BIT(RAPL_DOMAIN_PP0): rd->name = rapl_domain_names[RAPL_DOMAIN_PP0]; rd->id = RAPL_DOMAIN_PP0; - rd->regs[0] = MSR_PP0_POWER_LIMIT; - rd->regs[1] = MSR_PP0_ENERGY_STATUS; - rd->regs[2] = 0; - rd->regs[3] = MSR_PP0_POLICY; - rd->regs[4] = 0; + rd->regs[RAPL_DOMAIN_REG_LIMIT] = MSR_PP0_POWER_LIMIT; + rd->regs[RAPL_DOMAIN_REG_STATUS] = MSR_PP0_ENERGY_STATUS; + rd->regs[RAPL_DOMAIN_REG_PERF] = 0; + rd->regs[RAPL_DOMAIN_REG_POLICY] = MSR_PP0_POLICY; + rd->regs[RAPL_DOMAIN_REG_INFO] = 0; rd->rpl[0].prim_id = PL1_ENABLE; rd->rpl[0].name = pl1_name; break; case BIT(RAPL_DOMAIN_PP1): rd->name = rapl_domain_names[RAPL_DOMAIN_PP1]; rd->id = RAPL_DOMAIN_PP1; - rd->regs[0] = MSR_PP1_POWER_LIMIT; - rd->regs[1] = MSR_PP1_ENERGY_STATUS; - rd->regs[2] = 0; - rd->regs[3] = MSR_PP1_POLICY; - rd->regs[4] = 0; + rd->regs[RAPL_DOMAIN_REG_LIMIT] = MSR_PP1_POWER_LIMIT; + rd->regs[RAPL_DOMAIN_REG_STATUS] = MSR_PP1_ENERGY_STATUS; + rd->regs[RAPL_DOMAIN_REG_PERF] = 0; + rd->regs[RAPL_DOMAIN_REG_POLICY] = MSR_PP1_POLICY; + rd->regs[RAPL_DOMAIN_REG_INFO] = 0; rd->rpl[0].prim_id = PL1_ENABLE; rd->rpl[0].name = pl1_name; break; case BIT(RAPL_DOMAIN_DRAM): rd->name = rapl_domain_names[RAPL_DOMAIN_DRAM]; rd->id = RAPL_DOMAIN_DRAM; - rd->regs[0] = MSR_DRAM_POWER_LIMIT; - rd->regs[1] = MSR_DRAM_ENERGY_STATUS; - rd->regs[2] = MSR_DRAM_PERF_STATUS; - rd->regs[3] = 0; - rd->regs[4] = MSR_DRAM_POWER_INFO; + rd->regs[RAPL_DOMAIN_REG_LIMIT] = MSR_DRAM_POWER_LIMIT; + rd->regs[RAPL_DOMAIN_REG_STATUS] = MSR_DRAM_ENERGY_STATUS; + rd->regs[RAPL_DOMAIN_REG_PERF] = MSR_DRAM_PERF_STATUS; + rd->regs[RAPL_DOMAIN_REG_POLICY] = 0; + rd->regs[RAPL_DOMAIN_REG_INFO] = MSR_DRAM_POWER_INFO; rd->rpl[0].prim_id = PL1_ENABLE; rd->rpl[0].name = pl1_name; rd->domain_energy_unit = @@ -1282,8 +1282,8 @@ static int __init rapl_register_psys(void) rd->name = rapl_domain_names[RAPL_DOMAIN_PLATFORM]; rd->id = RAPL_DOMAIN_PLATFORM; - rd->regs[0] = MSR_PLATFORM_POWER_LIMIT; - rd->regs[1] = MSR_PLATFORM_ENERGY_STATUS; + rd->regs[RAPL_DOMAIN_REG_LIMIT] = MSR_PLATFORM_POWER_LIMIT; + rd->regs[RAPL_DOMAIN_REG_STATUS] = MSR_PLATFORM_ENERGY_STATUS; rd->rpl[0].prim_id = PL1_ENABLE; rd->rpl[0].name = pl1_name; rd->rpl[1].prim_id = PL2_ENABLE; From ff956826a403f5cf189978d5ff6b3eb53aa11610 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 10 Jul 2019 21:44:24 +0800 Subject: [PATCH 11/27] intel_rapl: introduce intel_rapl.h Create a new header file for the common definitions that might be used by different RAPL Interface. Reviewed-by: Pandruvada, Srinivas Tested-by: Pandruvada, Srinivas Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- MAINTAINERS | 1 + drivers/powercap/intel_rapl.c | 101 +----------------------------- include/linux/intel_rapl.h | 113 ++++++++++++++++++++++++++++++++++ 3 files changed, 116 insertions(+), 99 deletions(-) create mode 100644 include/linux/intel_rapl.h diff --git a/MAINTAINERS b/MAINTAINERS index 28a36f1efe02..9ded49d371da 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12679,6 +12679,7 @@ F: drivers/base/power/ F: include/linux/pm.h F: include/linux/pm_* F: include/linux/powercap.h +F: include/linux/intel_rapl.h F: drivers/powercap/ F: kernel/configs/nopm.config diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index 9be9f20ff056..adb35ec9f939 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c @@ -18,8 +18,9 @@ #include #include #include -#include +#include +#include #include #include #include @@ -74,59 +75,9 @@ enum unit_type { TIME_UNIT, }; -enum rapl_domain_type { - RAPL_DOMAIN_PACKAGE, /* entire package/socket */ - RAPL_DOMAIN_PP0, /* core power plane */ - RAPL_DOMAIN_PP1, /* graphics uncore */ - RAPL_DOMAIN_DRAM,/* DRAM control_type */ - RAPL_DOMAIN_PLATFORM, /* PSys control_type */ - RAPL_DOMAIN_MAX, -}; - -enum rapl_domain_reg_id { - RAPL_DOMAIN_REG_LIMIT, - RAPL_DOMAIN_REG_STATUS, - RAPL_DOMAIN_REG_PERF, - RAPL_DOMAIN_REG_POLICY, - RAPL_DOMAIN_REG_INFO, - RAPL_DOMAIN_REG_MAX, -}; - /* per domain data, some are optional */ -enum rapl_primitives { - ENERGY_COUNTER, - POWER_LIMIT1, - POWER_LIMIT2, - FW_LOCK, - - PL1_ENABLE, /* power limit 1, aka long term */ - PL1_CLAMP, /* allow frequency to go below OS request */ - PL2_ENABLE, /* power limit 2, aka short term, instantaneous */ - PL2_CLAMP, - - TIME_WINDOW1, /* long term */ - TIME_WINDOW2, /* short term */ - THERMAL_SPEC_POWER, - MAX_POWER, - - MIN_POWER, - MAX_TIME_WINDOW, - THROTTLED_TIME, - PRIORITY_LEVEL, - - /* below are not raw primitive data */ - AVERAGE_POWER, - NR_RAPL_PRIMITIVES, -}; - #define NR_RAW_PRIMITIVES (NR_RAPL_PRIMITIVES - 2) -/* Can be expanded to include events, etc.*/ -struct rapl_domain_data { - u64 primitives[NR_RAPL_PRIMITIVES]; - unsigned long timestamp; -}; - struct msrl_action { u32 msr_no; u64 clear_mask; @@ -138,60 +89,12 @@ struct msrl_action { #define DOMAIN_STATE_POWER_LIMIT_SET BIT(1) #define DOMAIN_STATE_BIOS_LOCKED BIT(2) -#define NR_POWER_LIMITS (2) -struct rapl_power_limit { - struct powercap_zone_constraint *constraint; - int prim_id; /* primitive ID used to enable */ - struct rapl_domain *domain; - const char *name; - u64 last_power_limit; -}; - static const char pl1_name[] = "long_term"; static const char pl2_name[] = "short_term"; -struct rapl_package; -struct rapl_domain { - const char *name; - enum rapl_domain_type id; - int regs[RAPL_DOMAIN_REG_MAX]; - struct powercap_zone power_zone; - struct rapl_domain_data rdd; - struct rapl_power_limit rpl[NR_POWER_LIMITS]; - u64 attr_map; /* track capabilities */ - unsigned int state; - unsigned int domain_energy_unit; - struct rapl_package *rp; -}; #define power_zone_to_rapl_domain(_zone) \ container_of(_zone, struct rapl_domain, power_zone) -/* maximum rapl package domain name: package-%d-die-%d */ -#define PACKAGE_DOMAIN_NAME_LENGTH 30 - - -/* Each rapl package contains multiple domains, these are the common - * data across RAPL domains within a package. - */ -struct rapl_package { - unsigned int id; /* logical die id, equals physical 1-die systems */ - unsigned int nr_domains; - unsigned long domain_map; /* bit map of active domains */ - unsigned int power_unit; - unsigned int energy_unit; - unsigned int time_unit; - struct rapl_domain *domains; /* array of domains, sized at runtime */ - struct powercap_zone *power_zone; /* keep track of parent zone */ - unsigned long power_limit_irq; /* keep track of package power limit - * notify interrupt enable status. - */ - struct list_head plist; - int lead_cpu; /* one active cpu per package for access */ - /* Track active cpus */ - struct cpumask cpumask; - char name[PACKAGE_DOMAIN_NAME_LENGTH]; -}; - struct rapl_defaults { u8 floor_freq_reg_addr; int (*check_unit)(struct rapl_package *rp, int cpu); diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h new file mode 100644 index 000000000000..94716036d829 --- /dev/null +++ b/include/linux/intel_rapl.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Data types and headers for RAPL support + * + * Copyright (C) 2019 Intel Corporation. + * + * Author: Zhang Rui + */ + +#ifndef __INTEL_RAPL_H__ +#define __INTEL_RAPL_H__ + +#include +#include + +enum rapl_domain_type { + RAPL_DOMAIN_PACKAGE, /* entire package/socket */ + RAPL_DOMAIN_PP0, /* core power plane */ + RAPL_DOMAIN_PP1, /* graphics uncore */ + RAPL_DOMAIN_DRAM, /* DRAM control_type */ + RAPL_DOMAIN_PLATFORM, /* PSys control_type */ + RAPL_DOMAIN_MAX, +}; + +enum rapl_domain_reg_id { + RAPL_DOMAIN_REG_LIMIT, + RAPL_DOMAIN_REG_STATUS, + RAPL_DOMAIN_REG_PERF, + RAPL_DOMAIN_REG_POLICY, + RAPL_DOMAIN_REG_INFO, + RAPL_DOMAIN_REG_MAX, +}; + +struct rapl_package; + +enum rapl_primitives { + ENERGY_COUNTER, + POWER_LIMIT1, + POWER_LIMIT2, + FW_LOCK, + + PL1_ENABLE, /* power limit 1, aka long term */ + PL1_CLAMP, /* allow frequency to go below OS request */ + PL2_ENABLE, /* power limit 2, aka short term, instantaneous */ + PL2_CLAMP, + + TIME_WINDOW1, /* long term */ + TIME_WINDOW2, /* short term */ + THERMAL_SPEC_POWER, + MAX_POWER, + + MIN_POWER, + MAX_TIME_WINDOW, + THROTTLED_TIME, + PRIORITY_LEVEL, + + /* below are not raw primitive data */ + AVERAGE_POWER, + NR_RAPL_PRIMITIVES, +}; + +struct rapl_domain_data { + u64 primitives[NR_RAPL_PRIMITIVES]; + unsigned long timestamp; +}; + +#define NR_POWER_LIMITS (2) +struct rapl_power_limit { + struct powercap_zone_constraint *constraint; + int prim_id; /* primitive ID used to enable */ + struct rapl_domain *domain; + const char *name; + u64 last_power_limit; +}; + +struct rapl_package; + +struct rapl_domain { + const char *name; + enum rapl_domain_type id; + int regs[RAPL_DOMAIN_REG_MAX]; + struct powercap_zone power_zone; + struct rapl_domain_data rdd; + struct rapl_power_limit rpl[NR_POWER_LIMITS]; + u64 attr_map; /* track capabilities */ + unsigned int state; + unsigned int domain_energy_unit; + struct rapl_package *rp; +}; + +/* maximum rapl package domain name: package-%d-die-%d */ +#define PACKAGE_DOMAIN_NAME_LENGTH 30 + +struct rapl_package { + unsigned int id; /* logical die id, equals physical 1-die systems */ + unsigned int nr_domains; + unsigned long domain_map; /* bit map of active domains */ + unsigned int power_unit; + unsigned int energy_unit; + unsigned int time_unit; + struct rapl_domain *domains; /* array of domains, sized at runtime */ + struct powercap_zone *power_zone; /* keep track of parent zone */ + unsigned long power_limit_irq; /* keep track of package power limit + * notify interrupt enable status. + */ + struct list_head plist; + int lead_cpu; /* one active cpu per package for access */ + /* Track active cpus */ + struct cpumask cpumask; + char name[PACKAGE_DOMAIN_NAME_LENGTH]; +}; + +#endif /* __INTEL_RAPL_H__ */ From 7ebf8eff63b4f349e7b2ded6aa5036d94bdf94b9 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 10 Jul 2019 21:44:25 +0800 Subject: [PATCH 12/27] intel_rapl: introduce struct rapl_if_private Introduce a new structure, rapl_if_private, to save the private data for different RAPL Interface. Reviewed-by: Pandruvada, Srinivas Tested-by: Pandruvada, Srinivas Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl.c | 59 +++++++++++++++++------------------ include/linux/intel_rapl.h | 15 +++++++++ 2 files changed, 44 insertions(+), 30 deletions(-) diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index adb35ec9f939..e05d92d67525 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c @@ -75,6 +75,9 @@ enum unit_type { TIME_UNIT, }; +/* private data for RAPL MSR Interface */ +static struct rapl_if_priv rapl_msr_priv; + /* per domain data, some are optional */ #define NR_RAW_PRIMITIVES (NR_RAPL_PRIMITIVES - 2) @@ -155,17 +158,14 @@ static const char * const rapl_domain_names[] = { "psys", }; -static struct powercap_control_type *control_type; /* PowerCap Controller */ -static struct rapl_domain *platform_rapl_domain; /* Platform (PSys) domain */ - /* caller to ensure CPU hotplug lock is held */ -static struct rapl_package *rapl_find_package_domain(int cpu) +static struct rapl_package *rapl_find_package_domain(int cpu, struct rapl_if_priv *priv) { int id = topology_logical_die_id(cpu); struct rapl_package *rp; list_for_each_entry(rp, &rapl_packages, plist) { - if (rp->id == id) + if (rp->id == id && rp->priv->control_type == priv->control_type) return rp; } @@ -1090,12 +1090,12 @@ static void rapl_update_domain_data(struct rapl_package *rp) static void rapl_unregister_powercap(void) { - if (platform_rapl_domain) { - powercap_unregister_zone(control_type, - &platform_rapl_domain->power_zone); - kfree(platform_rapl_domain); + if (&rapl_msr_priv.platform_rapl_domain) { + powercap_unregister_zone(rapl_msr_priv.control_type, + &rapl_msr_priv.platform_rapl_domain->power_zone); + kfree(rapl_msr_priv.platform_rapl_domain); } - powercap_unregister_control_type(control_type); + powercap_unregister_control_type(rapl_msr_priv.control_type); } static int rapl_package_register_powercap(struct rapl_package *rp) @@ -1113,7 +1113,7 @@ static int rapl_package_register_powercap(struct rapl_package *rp) nr_pl = find_nr_power_limit(rd); pr_debug("register package domain %s\n", rp->name); power_zone = powercap_register_zone(&rd->power_zone, - control_type, + rp->priv->control_type, rp->name, NULL, &zone_ops[rd->id], nr_pl, @@ -1140,7 +1140,7 @@ static int rapl_package_register_powercap(struct rapl_package *rp) /* number of power limits per domain varies */ nr_pl = find_nr_power_limit(rd); power_zone = powercap_register_zone(&rd->power_zone, - control_type, rd->name, + rp->priv->control_type, rd->name, rp->power_zone, &zone_ops[rd->id], nr_pl, &constraint_ops); @@ -1161,7 +1161,7 @@ static int rapl_package_register_powercap(struct rapl_package *rp) */ while (--rd >= rp->domains) { pr_debug("unregister %s domain %s\n", rp->name, rd->name); - powercap_unregister_zone(control_type, &rd->power_zone); + powercap_unregister_zone(rp->priv->control_type, &rd->power_zone); } return ret; @@ -1191,9 +1191,9 @@ static int __init rapl_register_psys(void) rd->rpl[0].name = pl1_name; rd->rpl[1].prim_id = PL2_ENABLE; rd->rpl[1].name = pl2_name; - rd->rp = rapl_find_package_domain(0); + rd->rp = rapl_find_package_domain(0, &rapl_msr_priv); - power_zone = powercap_register_zone(&rd->power_zone, control_type, + power_zone = powercap_register_zone(&rd->power_zone, rapl_msr_priv.control_type, "psys", NULL, &zone_ops[RAPL_DOMAIN_PLATFORM], 2, &constraint_ops); @@ -1203,17 +1203,17 @@ static int __init rapl_register_psys(void) return PTR_ERR(power_zone); } - platform_rapl_domain = rd; + rapl_msr_priv.platform_rapl_domain = rd; return 0; } static int __init rapl_register_powercap(void) { - control_type = powercap_register_control_type(NULL, "intel-rapl", NULL); - if (IS_ERR(control_type)) { + rapl_msr_priv.control_type = powercap_register_control_type(NULL, "intel-rapl", NULL); + if (IS_ERR(rapl_msr_priv.control_type)) { pr_debug("failed to register powercap control_type.\n"); - return PTR_ERR(control_type); + return PTR_ERR(rapl_msr_priv.control_type); } return 0; } @@ -1338,16 +1338,16 @@ static void rapl_remove_package(struct rapl_package *rp) } pr_debug("remove package, undo power limit on %s: %s\n", rp->name, rd->name); - powercap_unregister_zone(control_type, &rd->power_zone); + powercap_unregister_zone(rp->priv->control_type, &rd->power_zone); } /* do parent zone last */ - powercap_unregister_zone(control_type, &rd_package->power_zone); + powercap_unregister_zone(rp->priv->control_type, &rd_package->power_zone); list_del(&rp->plist); kfree(rp); } /* called from CPU hotplug notifier, hotplug lock held */ -static struct rapl_package *rapl_add_package(int cpu) +static struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv) { int id = topology_logical_die_id(cpu); struct rapl_package *rp; @@ -1361,6 +1361,7 @@ static struct rapl_package *rapl_add_package(int cpu) /* add the new package to the list */ rp->id = id; rp->lead_cpu = cpu; + rp->priv = priv; if (topology_max_die_per_package() > 1) snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, @@ -1399,9 +1400,9 @@ static int rapl_cpu_online(unsigned int cpu) { struct rapl_package *rp; - rp = rapl_find_package_domain(cpu); + rp = rapl_find_package_domain(cpu, &rapl_msr_priv); if (!rp) { - rp = rapl_add_package(cpu); + rp = rapl_add_package(cpu, &rapl_msr_priv); if (IS_ERR(rp)) return PTR_ERR(rp); } @@ -1414,7 +1415,7 @@ static int rapl_cpu_down_prep(unsigned int cpu) struct rapl_package *rp; int lead_cpu; - rp = rapl_find_package_domain(cpu); + rp = rapl_find_package_domain(cpu, &rapl_msr_priv); if (!rp) return 0; @@ -1427,8 +1428,6 @@ static int rapl_cpu_down_prep(unsigned int cpu) return 0; } -static enum cpuhp_state pcap_rapl_online; - static void power_limit_state_save(void) { struct rapl_package *rp; @@ -1538,7 +1537,7 @@ static int __init rapl_init(void) rapl_cpu_online, rapl_cpu_down_prep); if (ret < 0) goto err_unreg; - pcap_rapl_online = ret; + rapl_msr_priv.pcap_rapl_online = ret; /* Don't bail out if PSys is not supported */ rapl_register_psys(); @@ -1550,7 +1549,7 @@ static int __init rapl_init(void) return 0; err_unreg_all: - cpuhp_remove_state(pcap_rapl_online); + cpuhp_remove_state(rapl_msr_priv.pcap_rapl_online); err_unreg: rapl_unregister_powercap(); @@ -1560,7 +1559,7 @@ static int __init rapl_init(void) static void __exit rapl_exit(void) { unregister_pm_notifier(&rapl_pm_notifier); - cpuhp_remove_state(pcap_rapl_online); + cpuhp_remove_state(rapl_msr_priv.pcap_rapl_online); rapl_unregister_powercap(); } diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h index 94716036d829..7bf1683e4a63 100644 --- a/include/linux/intel_rapl.h +++ b/include/linux/intel_rapl.h @@ -88,6 +88,20 @@ struct rapl_domain { struct rapl_package *rp; }; +/** + * struct rapl_if_priv: private data for different RAPL interfaces + * @control_type: Each RAPL interface must have its own powercap + * control type. + * @platform_rapl_domain: Optional. Some RAPL interface may have platform + * level RAPL control. + * @pcap_rapl_online: CPU hotplug state for each RAPL interface. + */ +struct rapl_if_priv { + struct powercap_control_type *control_type; + struct rapl_domain *platform_rapl_domain; + enum cpuhp_state pcap_rapl_online; +}; + /* maximum rapl package domain name: package-%d-die-%d */ #define PACKAGE_DOMAIN_NAME_LENGTH 30 @@ -108,6 +122,7 @@ struct rapl_package { /* Track active cpus */ struct cpumask cpumask; char name[PACKAGE_DOMAIN_NAME_LENGTH]; + struct rapl_if_priv *priv; }; #endif /* __INTEL_RAPL_H__ */ From 7fde2712a7adab721eaabafbd8ff93dff3262d35 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 10 Jul 2019 21:44:26 +0800 Subject: [PATCH 13/27] intel_rapl: abstract register address MSR and MMIO RAPL interface have different sets of registers, thus the RAPL register address should be obtained from interface specific structure, i.e. struct rapl_if_private, instead. Reviewed-by: Pandruvada, Srinivas Tested-by: Pandruvada, Srinivas Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl.c | 73 ++++++++++++++++------------------- include/linux/intel_rapl.h | 4 ++ 2 files changed, 37 insertions(+), 40 deletions(-) diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index e05d92d67525..9f22aed49f24 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c @@ -76,7 +76,19 @@ enum unit_type { }; /* private data for RAPL MSR Interface */ -static struct rapl_if_priv rapl_msr_priv; +static struct rapl_if_priv rapl_msr_priv = { + .reg_unit = MSR_RAPL_POWER_UNIT, + .regs[RAPL_DOMAIN_PACKAGE] = { + MSR_PKG_POWER_LIMIT, MSR_PKG_ENERGY_STATUS, MSR_PKG_PERF_STATUS, 0, MSR_PKG_POWER_INFO }, + .regs[RAPL_DOMAIN_PP0] = { + MSR_PP0_POWER_LIMIT, MSR_PP0_ENERGY_STATUS, 0, MSR_PP0_POLICY, 0 }, + .regs[RAPL_DOMAIN_PP1] = { + MSR_PP1_POWER_LIMIT, MSR_PP1_ENERGY_STATUS, 0, MSR_PP1_POLICY, 0 }, + .regs[RAPL_DOMAIN_DRAM] = { + MSR_DRAM_POWER_LIMIT, MSR_DRAM_ENERGY_STATUS, MSR_DRAM_PERF_STATUS, 0, MSR_DRAM_POWER_INFO }, + .regs[RAPL_DOMAIN_PLATFORM] = { + MSR_PLATFORM_POWER_LIMIT, MSR_PLATFORM_ENERGY_STATUS, 0, 0, 0}, +}; /* per domain data, some are optional */ #define NR_RAW_PRIMITIVES (NR_RAPL_PRIMITIVES - 2) @@ -541,15 +553,17 @@ static void rapl_init_domains(struct rapl_package *rp) for (i = 0; i < RAPL_DOMAIN_MAX; i++) { unsigned int mask = rp->domain_map & (1 << i); + + rd->regs[RAPL_DOMAIN_REG_LIMIT] = rp->priv->regs[i][RAPL_DOMAIN_REG_LIMIT]; + rd->regs[RAPL_DOMAIN_REG_STATUS] = rp->priv->regs[i][RAPL_DOMAIN_REG_STATUS]; + rd->regs[RAPL_DOMAIN_REG_PERF] = rp->priv->regs[i][RAPL_DOMAIN_REG_PERF]; + rd->regs[RAPL_DOMAIN_REG_POLICY] = rp->priv->regs[i][RAPL_DOMAIN_REG_POLICY]; + rd->regs[RAPL_DOMAIN_REG_INFO] = rp->priv->regs[i][RAPL_DOMAIN_REG_INFO]; + switch (mask) { case BIT(RAPL_DOMAIN_PACKAGE): rd->name = rapl_domain_names[RAPL_DOMAIN_PACKAGE]; rd->id = RAPL_DOMAIN_PACKAGE; - rd->regs[RAPL_DOMAIN_REG_LIMIT] = MSR_PKG_POWER_LIMIT; - rd->regs[RAPL_DOMAIN_REG_STATUS] = MSR_PKG_ENERGY_STATUS; - rd->regs[RAPL_DOMAIN_REG_PERF] = MSR_PKG_PERF_STATUS; - rd->regs[RAPL_DOMAIN_REG_POLICY] = 0; - rd->regs[RAPL_DOMAIN_REG_INFO] = MSR_PKG_POWER_INFO; rd->rpl[0].prim_id = PL1_ENABLE; rd->rpl[0].name = pl1_name; rd->rpl[1].prim_id = PL2_ENABLE; @@ -558,33 +572,18 @@ static void rapl_init_domains(struct rapl_package *rp) case BIT(RAPL_DOMAIN_PP0): rd->name = rapl_domain_names[RAPL_DOMAIN_PP0]; rd->id = RAPL_DOMAIN_PP0; - rd->regs[RAPL_DOMAIN_REG_LIMIT] = MSR_PP0_POWER_LIMIT; - rd->regs[RAPL_DOMAIN_REG_STATUS] = MSR_PP0_ENERGY_STATUS; - rd->regs[RAPL_DOMAIN_REG_PERF] = 0; - rd->regs[RAPL_DOMAIN_REG_POLICY] = MSR_PP0_POLICY; - rd->regs[RAPL_DOMAIN_REG_INFO] = 0; rd->rpl[0].prim_id = PL1_ENABLE; rd->rpl[0].name = pl1_name; break; case BIT(RAPL_DOMAIN_PP1): rd->name = rapl_domain_names[RAPL_DOMAIN_PP1]; rd->id = RAPL_DOMAIN_PP1; - rd->regs[RAPL_DOMAIN_REG_LIMIT] = MSR_PP1_POWER_LIMIT; - rd->regs[RAPL_DOMAIN_REG_STATUS] = MSR_PP1_ENERGY_STATUS; - rd->regs[RAPL_DOMAIN_REG_PERF] = 0; - rd->regs[RAPL_DOMAIN_REG_POLICY] = MSR_PP1_POLICY; - rd->regs[RAPL_DOMAIN_REG_INFO] = 0; rd->rpl[0].prim_id = PL1_ENABLE; rd->rpl[0].name = pl1_name; break; case BIT(RAPL_DOMAIN_DRAM): rd->name = rapl_domain_names[RAPL_DOMAIN_DRAM]; rd->id = RAPL_DOMAIN_DRAM; - rd->regs[RAPL_DOMAIN_REG_LIMIT] = MSR_DRAM_POWER_LIMIT; - rd->regs[RAPL_DOMAIN_REG_STATUS] = MSR_DRAM_ENERGY_STATUS; - rd->regs[RAPL_DOMAIN_REG_PERF] = MSR_DRAM_PERF_STATUS; - rd->regs[RAPL_DOMAIN_REG_POLICY] = 0; - rd->regs[RAPL_DOMAIN_REG_INFO] = MSR_DRAM_POWER_INFO; rd->rpl[0].prim_id = PL1_ENABLE; rd->rpl[0].name = pl1_name; rd->domain_energy_unit = @@ -806,9 +805,9 @@ static int rapl_check_unit_core(struct rapl_package *rp, int cpu) u64 msr_val; u32 value; - if (rdmsrl_safe_on_cpu(cpu, MSR_RAPL_POWER_UNIT, &msr_val)) { + if (rdmsrl_safe_on_cpu(cpu, rp->priv->reg_unit, &msr_val)) { pr_err("Failed to read power unit MSR 0x%x on CPU %d, exit.\n", - MSR_RAPL_POWER_UNIT, cpu); + rp->priv->reg_unit, cpu); return -ENODEV; } @@ -832,9 +831,9 @@ static int rapl_check_unit_atom(struct rapl_package *rp, int cpu) u64 msr_val; u32 value; - if (rdmsrl_safe_on_cpu(cpu, MSR_RAPL_POWER_UNIT, &msr_val)) { + if (rdmsrl_safe_on_cpu(cpu, rp->priv->reg_unit, &msr_val)) { pr_err("Failed to read power unit MSR 0x%x on CPU %d, exit.\n", - MSR_RAPL_POWER_UNIT, cpu); + rp->priv->reg_unit, cpu); return -ENODEV; } value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; @@ -1173,10 +1172,10 @@ static int __init rapl_register_psys(void) struct powercap_zone *power_zone; u64 val; - if (rdmsrl_safe_on_cpu(0, MSR_PLATFORM_ENERGY_STATUS, &val) || !val) + if (rdmsrl_safe_on_cpu(0, rapl_msr_priv.regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_STATUS], &val) || !val) return -ENODEV; - if (rdmsrl_safe_on_cpu(0, MSR_PLATFORM_POWER_LIMIT, &val) || !val) + if (rdmsrl_safe_on_cpu(0, rapl_msr_priv.regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_LIMIT], &val) || !val) return -ENODEV; rd = kzalloc(sizeof(*rd), GFP_KERNEL); @@ -1185,8 +1184,8 @@ static int __init rapl_register_psys(void) rd->name = rapl_domain_names[RAPL_DOMAIN_PLATFORM]; rd->id = RAPL_DOMAIN_PLATFORM; - rd->regs[RAPL_DOMAIN_REG_LIMIT] = MSR_PLATFORM_POWER_LIMIT; - rd->regs[RAPL_DOMAIN_REG_STATUS] = MSR_PLATFORM_ENERGY_STATUS; + rd->regs[RAPL_DOMAIN_REG_LIMIT] = rapl_msr_priv.regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_LIMIT]; + rd->regs[RAPL_DOMAIN_REG_STATUS] = rapl_msr_priv.regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_STATUS]; rd->rpl[0].prim_id = PL1_ENABLE; rd->rpl[0].name = pl1_name; rd->rpl[1].prim_id = PL2_ENABLE; @@ -1218,23 +1217,17 @@ static int __init rapl_register_powercap(void) return 0; } -static int rapl_check_domain(int cpu, int domain) +static int rapl_check_domain(int cpu, int domain, struct rapl_package *rp) { - unsigned msr; + u32 reg; u64 val = 0; switch (domain) { case RAPL_DOMAIN_PACKAGE: - msr = MSR_PKG_ENERGY_STATUS; - break; case RAPL_DOMAIN_PP0: - msr = MSR_PP0_ENERGY_STATUS; - break; case RAPL_DOMAIN_PP1: - msr = MSR_PP1_ENERGY_STATUS; - break; case RAPL_DOMAIN_DRAM: - msr = MSR_DRAM_ENERGY_STATUS; + reg = rp->priv->regs[domain][RAPL_DOMAIN_REG_STATUS]; break; case RAPL_DOMAIN_PLATFORM: /* PSYS(PLATFORM) is not a CPU domain, so avoid printng error */ @@ -1246,7 +1239,7 @@ static int rapl_check_domain(int cpu, int domain) /* make sure domain counters are available and contains non-zero * values, otherwise skip it. */ - if (rdmsrl_safe_on_cpu(cpu, msr, &val) || !val) + if (rdmsrl_safe_on_cpu(cpu, reg, &val) || !val) return -ENODEV; return 0; @@ -1293,7 +1286,7 @@ static int rapl_detect_domains(struct rapl_package *rp, int cpu) for (i = 0; i < RAPL_DOMAIN_MAX; i++) { /* use physical package id to read counters */ - if (!rapl_check_domain(cpu, i)) { + if (!rapl_check_domain(cpu, i, rp)) { rp->domain_map |= 1 << i; pr_info("Found RAPL domain %s\n", rapl_domain_names[i]); } diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h index 7bf1683e4a63..ec2c9e83274f 100644 --- a/include/linux/intel_rapl.h +++ b/include/linux/intel_rapl.h @@ -95,11 +95,15 @@ struct rapl_domain { * @platform_rapl_domain: Optional. Some RAPL interface may have platform * level RAPL control. * @pcap_rapl_online: CPU hotplug state for each RAPL interface. + * @reg_unit: Register for getting energy/power/time unit. + * @regs: Register sets for different RAPL Domains. */ struct rapl_if_priv { struct powercap_control_type *control_type; struct rapl_domain *platform_rapl_domain; enum cpuhp_state pcap_rapl_online; + u32 reg_unit; + u32 regs[RAPL_DOMAIN_MAX][RAPL_DOMAIN_REG_MAX]; }; /* maximum rapl package domain name: package-%d-die-%d */ From beea8df821d928e7755917da6c1e45d6afde5148 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 10 Jul 2019 21:44:27 +0800 Subject: [PATCH 14/27] intel_rapl: abstract register access operations MSR and MMIO RAPL interfaces have different ways to access the registers, thus in order to abstract the register access operations, two callbacks, .read_raw()/.write_raw() are introduced, and they should be implemented by MSR RAPL and MMIO RAPL interface driver respectly. This patch implements them for the MSR I/F only. Reviewed-by: Pandruvada, Srinivas Tested-by: Pandruvada, Srinivas Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl.c | 110 ++++++++++++++++++---------------- include/linux/intel_rapl.h | 13 ++++ 2 files changed, 70 insertions(+), 53 deletions(-) diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index 9f22aed49f24..d3b9d1cf4d48 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c @@ -93,13 +93,6 @@ static struct rapl_if_priv rapl_msr_priv = { /* per domain data, some are optional */ #define NR_RAW_PRIMITIVES (NR_RAPL_PRIMITIVES - 2) -struct msrl_action { - u32 msr_no; - u64 clear_mask; - u64 set_mask; - int err; -}; - #define DOMAIN_STATE_INACTIVE BIT(0) #define DOMAIN_STATE_POWER_LIMIT_SET BIT(1) #define DOMAIN_STATE_BIOS_LOCKED BIT(2) @@ -692,16 +685,16 @@ static int rapl_read_data_raw(struct rapl_domain *rd, enum rapl_primitives prim, bool xlate, u64 *data) { - u64 value, final; - u32 msr; + u64 value; struct rapl_primitive_info *rp = &rpi[prim]; + struct reg_action ra; int cpu; if (!rp->name || rp->flag & RAPL_PRIMITIVE_DUMMY) return -EINVAL; - msr = rd->regs[rp->id]; - if (!msr) + ra.reg = rd->regs[rp->id]; + if (!ra.reg) return -EINVAL; cpu = rd->rp->lead_cpu; @@ -717,47 +710,23 @@ static int rapl_read_data_raw(struct rapl_domain *rd, return 0; } - if (rdmsrl_safe_on_cpu(cpu, msr, &value)) { - pr_debug("failed to read msr 0x%x on cpu %d\n", msr, cpu); + ra.mask = rp->mask; + + if (rd->rp->priv->read_raw(cpu, &ra)) { + pr_debug("failed to read reg 0x%x on cpu %d\n", ra.reg, cpu); return -EIO; } - final = value & rp->mask; - final = final >> rp->shift; + value = ra.value >> rp->shift; + if (xlate) - *data = rapl_unit_xlate(rd, rp->unit, final, 0); + *data = rapl_unit_xlate(rd, rp->unit, value, 0); else - *data = final; + *data = value; return 0; } - -static int msrl_update_safe(u32 msr_no, u64 clear_mask, u64 set_mask) -{ - int err; - u64 val; - - err = rdmsrl_safe(msr_no, &val); - if (err) - goto out; - - val &= ~clear_mask; - val |= set_mask; - - err = wrmsrl_safe(msr_no, val); - -out: - return err; -} - -static void msrl_update_func(void *info) -{ - struct msrl_action *ma = info; - - ma->err = msrl_update_safe(ma->msr_no, ma->clear_mask, ma->set_mask); -} - /* Similar use of primitive info in the read counterpart */ static int rapl_write_data_raw(struct rapl_domain *rd, enum rapl_primitives prim, @@ -766,7 +735,7 @@ static int rapl_write_data_raw(struct rapl_domain *rd, struct rapl_primitive_info *rp = &rpi[prim]; int cpu; u64 bits; - struct msrl_action ma; + struct reg_action ra; int ret; cpu = rd->rp->lead_cpu; @@ -774,17 +743,13 @@ static int rapl_write_data_raw(struct rapl_domain *rd, bits <<= rp->shift; bits &= rp->mask; - memset(&ma, 0, sizeof(ma)); + memset(&ra, 0, sizeof(ra)); - ma.msr_no = rd->regs[rp->id]; - ma.clear_mask = rp->mask; - ma.set_mask = bits; + ra.reg = rd->regs[rp->id]; + ra.mask = rp->mask; + ra.value = bits; - ret = smp_call_function_single(cpu, msrl_update_func, &ma, 1); - if (ret) - WARN_ON_ONCE(ret); - else - ret = ma.err; + ret = rd->rp->priv->write_raw(cpu, &ra); return ret; } @@ -1507,6 +1472,43 @@ static struct notifier_block rapl_pm_notifier = { .notifier_call = rapl_pm_callback, }; +static int rapl_msr_read_raw(int cpu, struct reg_action *ra) +{ + if (rdmsrl_safe_on_cpu(cpu, ra->reg, &ra->value)) { + pr_debug("failed to read msr 0x%x on cpu %d\n", ra->reg, cpu); + return -EIO; + } + ra->value &= ra->mask; + return 0; +} + +static void rapl_msr_update_func(void *info) +{ + struct reg_action *ra = info; + u64 val; + + ra->err = rdmsrl_safe(ra->reg, &val); + if (ra->err) + return; + + val &= ~ra->mask; + val |= ra->value; + + ra->err = wrmsrl_safe(ra->reg, val); +} + + +static int rapl_msr_write_raw(int cpu, struct reg_action *ra) +{ + int ret; + + ret = smp_call_function_single(cpu, rapl_msr_update_func, ra, 1); + if (WARN_ON_ONCE(ret)) + return ret; + + return ra->err; +} + static int __init rapl_init(void) { const struct x86_cpu_id *id; @@ -1522,6 +1524,8 @@ static int __init rapl_init(void) rapl_defaults = (struct rapl_defaults *)id->driver_data; + rapl_msr_priv.read_raw = rapl_msr_read_raw; + rapl_msr_priv.write_raw = rapl_msr_write_raw; ret = rapl_register_powercap(); if (ret) return ret; diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h index ec2c9e83274f..ff215d64d114 100644 --- a/include/linux/intel_rapl.h +++ b/include/linux/intel_rapl.h @@ -88,6 +88,13 @@ struct rapl_domain { struct rapl_package *rp; }; +struct reg_action { + u32 reg; + u64 mask; + u64 value; + int err; +}; + /** * struct rapl_if_priv: private data for different RAPL interfaces * @control_type: Each RAPL interface must have its own powercap @@ -97,6 +104,10 @@ struct rapl_domain { * @pcap_rapl_online: CPU hotplug state for each RAPL interface. * @reg_unit: Register for getting energy/power/time unit. * @regs: Register sets for different RAPL Domains. + * @read_raw: Callback for reading RAPL interface specific + * registers. + * @write_raw: Callback for writing RAPL interface specific + * registers. */ struct rapl_if_priv { struct powercap_control_type *control_type; @@ -104,6 +115,8 @@ struct rapl_if_priv { enum cpuhp_state pcap_rapl_online; u32 reg_unit; u32 regs[RAPL_DOMAIN_MAX][RAPL_DOMAIN_REG_MAX]; + int (*read_raw)(int cpu, struct reg_action *ra); + int (*write_raw)(int cpu, struct reg_action *ra); }; /* maximum rapl package domain name: package-%d-die-%d */ From 8a00676cd690941c9a18bd390c3b2cade631c516 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 10 Jul 2019 21:44:28 +0800 Subject: [PATCH 15/27] intel_rapl: cleanup some functions Previously, there are three functions: rapl_register_psys(), which registers platform rapl domain. rapl_register_powercap(), which registers powercap control type. rapl_unregsiter_powercap(), which unregisters platform rapl domain and powercap control type. This is confusing as the function name does not describe what it does clearly. With this patch, the three functions are removed, and two new functions rapl_register_platform_domain()/rapl_unregister_platform_domain() are introduced instead, and they do exactly what their function name describes. Plus, as part of the common code, hardcoded MSR accesses in these functions are converted to follow the abstracted register access. Reviewed-by: Pandruvada, Srinivas Tested-by: Pandruvada, Srinivas Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl.c | 62 +++++++++++++++++------------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index d3b9d1cf4d48..7a97d331bcdc 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c @@ -1052,16 +1052,6 @@ static void rapl_update_domain_data(struct rapl_package *rp) } -static void rapl_unregister_powercap(void) -{ - if (&rapl_msr_priv.platform_rapl_domain) { - powercap_unregister_zone(rapl_msr_priv.control_type, - &rapl_msr_priv.platform_rapl_domain->power_zone); - kfree(rapl_msr_priv.platform_rapl_domain); - } - powercap_unregister_control_type(rapl_msr_priv.control_type); -} - static int rapl_package_register_powercap(struct rapl_package *rp) { struct rapl_domain *rd; @@ -1131,16 +1121,23 @@ static int rapl_package_register_powercap(struct rapl_package *rp) return ret; } -static int __init rapl_register_psys(void) +static int __init rapl_add_platform_domain(struct rapl_if_priv *priv) { struct rapl_domain *rd; struct powercap_zone *power_zone; - u64 val; + struct reg_action ra; + int ret; - if (rdmsrl_safe_on_cpu(0, rapl_msr_priv.regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_STATUS], &val) || !val) + ra.reg = priv->regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_STATUS]; + ra.mask = ~0; + ret = priv->read_raw(0, &ra); + if (ret || !ra.value) return -ENODEV; - if (rdmsrl_safe_on_cpu(0, rapl_msr_priv.regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_LIMIT], &val) || !val) + ra.reg = priv->regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_LIMIT]; + ra.mask = ~0; + ret = priv->read_raw(0, &ra); + if (ret || !ra.value) return -ENODEV; rd = kzalloc(sizeof(*rd), GFP_KERNEL); @@ -1149,15 +1146,15 @@ static int __init rapl_register_psys(void) rd->name = rapl_domain_names[RAPL_DOMAIN_PLATFORM]; rd->id = RAPL_DOMAIN_PLATFORM; - rd->regs[RAPL_DOMAIN_REG_LIMIT] = rapl_msr_priv.regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_LIMIT]; - rd->regs[RAPL_DOMAIN_REG_STATUS] = rapl_msr_priv.regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_STATUS]; + rd->regs[RAPL_DOMAIN_REG_LIMIT] = priv->regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_LIMIT]; + rd->regs[RAPL_DOMAIN_REG_STATUS] = priv->regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_STATUS]; rd->rpl[0].prim_id = PL1_ENABLE; rd->rpl[0].name = pl1_name; rd->rpl[1].prim_id = PL2_ENABLE; rd->rpl[1].name = pl2_name; - rd->rp = rapl_find_package_domain(0, &rapl_msr_priv); + rd->rp = rapl_find_package_domain(0, priv); - power_zone = powercap_register_zone(&rd->power_zone, rapl_msr_priv.control_type, + power_zone = powercap_register_zone(&rd->power_zone, priv->control_type, "psys", NULL, &zone_ops[RAPL_DOMAIN_PLATFORM], 2, &constraint_ops); @@ -1167,19 +1164,18 @@ static int __init rapl_register_psys(void) return PTR_ERR(power_zone); } - rapl_msr_priv.platform_rapl_domain = rd; + priv->platform_rapl_domain = rd; return 0; } -static int __init rapl_register_powercap(void) +static void rapl_remove_platform_domain(struct rapl_if_priv *priv) { - rapl_msr_priv.control_type = powercap_register_control_type(NULL, "intel-rapl", NULL); - if (IS_ERR(rapl_msr_priv.control_type)) { - pr_debug("failed to register powercap control_type.\n"); - return PTR_ERR(rapl_msr_priv.control_type); + if (priv->platform_rapl_domain) { + powercap_unregister_zone(priv->control_type, + &priv->platform_rapl_domain->power_zone); + kfree(priv->platform_rapl_domain); } - return 0; } static int rapl_check_domain(int cpu, int domain, struct rapl_package *rp) @@ -1526,9 +1522,12 @@ static int __init rapl_init(void) rapl_msr_priv.read_raw = rapl_msr_read_raw; rapl_msr_priv.write_raw = rapl_msr_write_raw; - ret = rapl_register_powercap(); - if (ret) - return ret; + + rapl_msr_priv.control_type = powercap_register_control_type(NULL, "intel-rapl", NULL); + if (IS_ERR(rapl_msr_priv.control_type)) { + pr_debug("failed to register powercap control_type.\n"); + return PTR_ERR(rapl_msr_priv.control_type); + } ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powercap/rapl:online", rapl_cpu_online, rapl_cpu_down_prep); @@ -1537,7 +1536,7 @@ static int __init rapl_init(void) rapl_msr_priv.pcap_rapl_online = ret; /* Don't bail out if PSys is not supported */ - rapl_register_psys(); + rapl_add_platform_domain(&rapl_msr_priv); ret = register_pm_notifier(&rapl_pm_notifier); if (ret) @@ -1549,7 +1548,7 @@ static int __init rapl_init(void) cpuhp_remove_state(rapl_msr_priv.pcap_rapl_online); err_unreg: - rapl_unregister_powercap(); + powercap_unregister_control_type(rapl_msr_priv.control_type); return ret; } @@ -1557,7 +1556,8 @@ static void __exit rapl_exit(void) { unregister_pm_notifier(&rapl_pm_notifier); cpuhp_remove_state(rapl_msr_priv.pcap_rapl_online); - rapl_unregister_powercap(); + rapl_remove_platform_domain(&rapl_msr_priv); + powercap_unregister_control_type(rapl_msr_priv.control_type); } module_init(rapl_init); From 1193b1658d16f03cdb2edbac5f2a796ccca225af Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 10 Jul 2019 21:44:29 +0800 Subject: [PATCH 16/27] intel_rapl: cleanup hardcoded MSR access There are still some places in the common code that have hardcoded MSR access, convert them to follow the abstracted register access. Reviewed-by: Pandruvada, Srinivas Tested-by: Pandruvada, Srinivas Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl.c | 38 ++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index 7a97d331bcdc..aa54c06ed518 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c @@ -767,22 +767,24 @@ static int rapl_write_data_raw(struct rapl_domain *rd, */ static int rapl_check_unit_core(struct rapl_package *rp, int cpu) { - u64 msr_val; + struct reg_action ra; u32 value; - if (rdmsrl_safe_on_cpu(cpu, rp->priv->reg_unit, &msr_val)) { - pr_err("Failed to read power unit MSR 0x%x on CPU %d, exit.\n", + ra.reg = rp->priv->reg_unit; + ra.mask = ~0; + if (rp->priv->read_raw(cpu, &ra)) { + pr_err("Failed to read power unit REG 0x%x on CPU %d, exit.\n", rp->priv->reg_unit, cpu); return -ENODEV; } - value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; + value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; rp->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value); - value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; + value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; rp->power_unit = 1000000 / (1 << value); - value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; + value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; rp->time_unit = 1000000 / (1 << value); pr_debug("Core CPU %s energy=%dpJ, time=%dus, power=%duW\n", @@ -793,21 +795,24 @@ static int rapl_check_unit_core(struct rapl_package *rp, int cpu) static int rapl_check_unit_atom(struct rapl_package *rp, int cpu) { - u64 msr_val; + struct reg_action ra; u32 value; - if (rdmsrl_safe_on_cpu(cpu, rp->priv->reg_unit, &msr_val)) { - pr_err("Failed to read power unit MSR 0x%x on CPU %d, exit.\n", + ra.reg = rp->priv->reg_unit; + ra.mask = ~0; + if (rp->priv->read_raw(cpu, &ra)) { + pr_err("Failed to read power unit REG 0x%x on CPU %d, exit.\n", rp->priv->reg_unit, cpu); return -ENODEV; } - value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; + + value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; rp->energy_unit = ENERGY_UNIT_SCALE * 1 << value; - value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; + value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; rp->power_unit = (1 << value) * 1000; - value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; + value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; rp->time_unit = 1000000 / (1 << value); pr_debug("Atom %s energy=%dpJ, time=%dus, power=%duW\n", @@ -1180,15 +1185,14 @@ static void rapl_remove_platform_domain(struct rapl_if_priv *priv) static int rapl_check_domain(int cpu, int domain, struct rapl_package *rp) { - u32 reg; - u64 val = 0; + struct reg_action ra; switch (domain) { case RAPL_DOMAIN_PACKAGE: case RAPL_DOMAIN_PP0: case RAPL_DOMAIN_PP1: case RAPL_DOMAIN_DRAM: - reg = rp->priv->regs[domain][RAPL_DOMAIN_REG_STATUS]; + ra.reg = rp->priv->regs[domain][RAPL_DOMAIN_REG_STATUS]; break; case RAPL_DOMAIN_PLATFORM: /* PSYS(PLATFORM) is not a CPU domain, so avoid printng error */ @@ -1200,7 +1204,9 @@ static int rapl_check_domain(int cpu, int domain, struct rapl_package *rp) /* make sure domain counters are available and contains non-zero * values, otherwise skip it. */ - if (rdmsrl_safe_on_cpu(cpu, reg, &val) || !val) + + ra.mask = ~0; + if (rp->priv->read_raw(cpu, &ra) || !ra.value) return -ENODEV; return 0; From 3382388d714891fc0f575926189f33d22e7c960b Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 10 Jul 2019 21:44:30 +0800 Subject: [PATCH 17/27] intel_rapl: abstract RAPL common code Split intel_rapl.c to intel_rapl_common.c and intel_rapl_msr.c, where intel_rapl_common.c contains the common code that can be used by both MSR and MMIO interface. intel_rapl_msr.c contains the implementation of RAPL MSR interface. Reviewed-by: Pandruvada, Srinivas Tested-by: Pandruvada, Srinivas Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/powercap/Kconfig | 11 +- drivers/powercap/Makefile | 3 +- .../{intel_rapl.c => intel_rapl_common.c} | 525 +++++++----------- drivers/powercap/intel_rapl_msr.c | 163 ++++++ include/linux/intel_rapl.h | 7 + 5 files changed, 389 insertions(+), 320 deletions(-) rename drivers/powercap/{intel_rapl.c => intel_rapl_common.c} (74%) create mode 100644 drivers/powercap/intel_rapl_msr.c diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig index 42d3798c88f0..dc1c1381d7fa 100644 --- a/drivers/powercap/Kconfig +++ b/drivers/powercap/Kconfig @@ -16,14 +16,17 @@ menuconfig POWERCAP if POWERCAP # Client driver configurations go here. +config INTEL_RAPL_CORE + tristate + config INTEL_RAPL - tristate "Intel RAPL Support" + tristate "Intel RAPL Support via MSR Interface" depends on X86 && IOSF_MBI - default n + select INTEL_RAPL_CORE ---help--- This enables support for the Intel Running Average Power Limit (RAPL) - technology which allows power limits to be enforced and monitored on - modern Intel processors (Sandy Bridge and later). + technology via MSR interface, which allows power limits to be enforced + and monitored on modern Intel processors (Sandy Bridge and later). In RAPL, the platform level settings are divided into domains for fine grained control. These domains include processor package, DRAM diff --git a/drivers/powercap/Makefile b/drivers/powercap/Makefile index 81c8ccaba6e7..7255c94ec61c 100644 --- a/drivers/powercap/Makefile +++ b/drivers/powercap/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_POWERCAP) += powercap_sys.o -obj-$(CONFIG_INTEL_RAPL) += intel_rapl.o +obj-$(CONFIG_INTEL_RAPL_CORE) += intel_rapl_common.o +obj-$(CONFIG_INTEL_RAPL) += intel_rapl_msr.o obj-$(CONFIG_IDLE_INJECT) += idle_inject.o diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl_common.c similarity index 74% rename from drivers/powercap/intel_rapl.c rename to drivers/powercap/intel_rapl_common.c index aa54c06ed518..34a82531a7cf 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl_common.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Intel Running Average Power Limit (RAPL) Driver - * Copyright (c) 2013, Intel Corporation. + * Common code for Intel Running Average Power Limit (RAPL) support. + * Copyright (c) 2019, Intel Corporation. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -18,10 +18,10 @@ #include #include #include +#include #include -#include -#include +#include #include #include @@ -62,34 +62,19 @@ #define PP_POLICY_MASK 0x1F /* Non HW constants */ -#define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */ +#define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */ #define RAPL_PRIMITIVE_DUMMY BIT(2) #define TIME_WINDOW_MAX_MSEC 40000 #define TIME_WINDOW_MIN_MSEC 250 -#define ENERGY_UNIT_SCALE 1000 /* scale from driver unit to powercap unit */ +#define ENERGY_UNIT_SCALE 1000 /* scale from driver unit to powercap unit */ enum unit_type { - ARBITRARY_UNIT, /* no translation */ + ARBITRARY_UNIT, /* no translation */ POWER_UNIT, ENERGY_UNIT, TIME_UNIT, }; -/* private data for RAPL MSR Interface */ -static struct rapl_if_priv rapl_msr_priv = { - .reg_unit = MSR_RAPL_POWER_UNIT, - .regs[RAPL_DOMAIN_PACKAGE] = { - MSR_PKG_POWER_LIMIT, MSR_PKG_ENERGY_STATUS, MSR_PKG_PERF_STATUS, 0, MSR_PKG_POWER_INFO }, - .regs[RAPL_DOMAIN_PP0] = { - MSR_PP0_POWER_LIMIT, MSR_PP0_ENERGY_STATUS, 0, MSR_PP0_POLICY, 0 }, - .regs[RAPL_DOMAIN_PP1] = { - MSR_PP1_POWER_LIMIT, MSR_PP1_ENERGY_STATUS, 0, MSR_PP1_POLICY, 0 }, - .regs[RAPL_DOMAIN_DRAM] = { - MSR_DRAM_POWER_LIMIT, MSR_DRAM_ENERGY_STATUS, MSR_DRAM_PERF_STATUS, 0, MSR_DRAM_POWER_INFO }, - .regs[RAPL_DOMAIN_PLATFORM] = { - MSR_PLATFORM_POWER_LIMIT, MSR_PLATFORM_ENERGY_STATUS, 0, 0, 0}, -}; - /* per domain data, some are optional */ #define NR_RAW_PRIMITIVES (NR_RAPL_PRIMITIVES - 2) @@ -108,7 +93,7 @@ struct rapl_defaults { int (*check_unit)(struct rapl_package *rp, int cpu); void (*set_floor_freq)(struct rapl_domain *rd, bool mode); u64 (*compute_time_window)(struct rapl_package *rp, u64 val, - bool to_raw); + bool to_raw); unsigned int dram_domain_energy_unit; }; static struct rapl_defaults *rapl_defaults; @@ -143,19 +128,20 @@ struct rapl_primitive_info { static void rapl_init_domains(struct rapl_package *rp); static int rapl_read_data_raw(struct rapl_domain *rd, - enum rapl_primitives prim, - bool xlate, u64 *data); + enum rapl_primitives prim, + bool xlate, u64 *data); static int rapl_write_data_raw(struct rapl_domain *rd, - enum rapl_primitives prim, - unsigned long long value); + enum rapl_primitives prim, + unsigned long long value); static u64 rapl_unit_xlate(struct rapl_domain *rd, - enum unit_type type, u64 value, - int to_raw); + enum unit_type type, u64 value, int to_raw); static void package_power_limit_irq_save(struct rapl_package *rp); +static int rapl_init_core(void); +static void rapl_remove_core(void); -static LIST_HEAD(rapl_packages); /* guarded by CPU hotplug lock */ +static LIST_HEAD(rapl_packages); /* guarded by CPU hotplug lock */ -static const char * const rapl_domain_names[] = { +static const char *const rapl_domain_names[] = { "package", "core", "uncore", @@ -163,21 +149,8 @@ static const char * const rapl_domain_names[] = { "psys", }; -/* caller to ensure CPU hotplug lock is held */ -static struct rapl_package *rapl_find_package_domain(int cpu, struct rapl_if_priv *priv) -{ - int id = topology_logical_die_id(cpu); - struct rapl_package *rp; - - list_for_each_entry(rp, &rapl_packages, plist) { - if (rp->id == id && rp->priv->control_type == priv->control_type) - return rp; - } - - return NULL; -} - -static int get_energy_counter(struct powercap_zone *power_zone, u64 *energy_raw) +static int get_energy_counter(struct powercap_zone *power_zone, + u64 *energy_raw) { struct rapl_domain *rd; u64 energy_now; @@ -276,50 +249,49 @@ static int get_domain_enable(struct powercap_zone *power_zone, bool *mode) static const struct powercap_zone_ops zone_ops[] = { /* RAPL_DOMAIN_PACKAGE */ { - .get_energy_uj = get_energy_counter, - .get_max_energy_range_uj = get_max_energy_counter, - .release = release_zone, - .set_enable = set_domain_enable, - .get_enable = get_domain_enable, - }, + .get_energy_uj = get_energy_counter, + .get_max_energy_range_uj = get_max_energy_counter, + .release = release_zone, + .set_enable = set_domain_enable, + .get_enable = get_domain_enable, + }, /* RAPL_DOMAIN_PP0 */ { - .get_energy_uj = get_energy_counter, - .get_max_energy_range_uj = get_max_energy_counter, - .release = release_zone, - .set_enable = set_domain_enable, - .get_enable = get_domain_enable, - }, + .get_energy_uj = get_energy_counter, + .get_max_energy_range_uj = get_max_energy_counter, + .release = release_zone, + .set_enable = set_domain_enable, + .get_enable = get_domain_enable, + }, /* RAPL_DOMAIN_PP1 */ { - .get_energy_uj = get_energy_counter, - .get_max_energy_range_uj = get_max_energy_counter, - .release = release_zone, - .set_enable = set_domain_enable, - .get_enable = get_domain_enable, - }, + .get_energy_uj = get_energy_counter, + .get_max_energy_range_uj = get_max_energy_counter, + .release = release_zone, + .set_enable = set_domain_enable, + .get_enable = get_domain_enable, + }, /* RAPL_DOMAIN_DRAM */ { - .get_energy_uj = get_energy_counter, - .get_max_energy_range_uj = get_max_energy_counter, - .release = release_zone, - .set_enable = set_domain_enable, - .get_enable = get_domain_enable, - }, + .get_energy_uj = get_energy_counter, + .get_max_energy_range_uj = get_max_energy_counter, + .release = release_zone, + .set_enable = set_domain_enable, + .get_enable = get_domain_enable, + }, /* RAPL_DOMAIN_PLATFORM */ { - .get_energy_uj = get_energy_counter, - .get_max_energy_range_uj = get_max_energy_counter, - .release = release_zone, - .set_enable = set_domain_enable, - .get_enable = get_domain_enable, - }, + .get_energy_uj = get_energy_counter, + .get_max_energy_range_uj = get_max_energy_counter, + .release = release_zone, + .set_enable = set_domain_enable, + .get_enable = get_domain_enable, + }, }; - /* * Constraint index used by powercap can be different than power limit (PL) - * index in that some PLs maybe missing due to non-existant MSRs. So we + * index in that some PLs maybe missing due to non-existent MSRs. So we * need to convert here by finding the valid PLs only (name populated). */ static int contraint_to_pl(struct rapl_domain *rd, int cid) @@ -338,7 +310,7 @@ static int contraint_to_pl(struct rapl_domain *rd, int cid) } static int set_power_limit(struct powercap_zone *power_zone, int cid, - u64 power_limit) + u64 power_limit) { struct rapl_domain *rd; struct rapl_package *rp; @@ -356,8 +328,8 @@ static int set_power_limit(struct powercap_zone *power_zone, int cid, rp = rd->rp; if (rd->state & DOMAIN_STATE_BIOS_LOCKED) { - dev_warn(&power_zone->dev, "%s locked by BIOS, monitoring only\n", - rd->name); + dev_warn(&power_zone->dev, + "%s locked by BIOS, monitoring only\n", rd->name); ret = -EACCES; goto set_exit; } @@ -380,7 +352,7 @@ static int set_power_limit(struct powercap_zone *power_zone, int cid, } static int get_current_power_limit(struct powercap_zone *power_zone, int cid, - u64 *data) + u64 *data) { struct rapl_domain *rd; u64 val; @@ -419,7 +391,7 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int cid, } static int set_time_window(struct powercap_zone *power_zone, int cid, - u64 window) + u64 window) { struct rapl_domain *rd; int ret = 0; @@ -449,7 +421,8 @@ static int set_time_window(struct powercap_zone *power_zone, int cid, return ret; } -static int get_time_window(struct powercap_zone *power_zone, int cid, u64 *data) +static int get_time_window(struct powercap_zone *power_zone, int cid, + u64 *data) { struct rapl_domain *rd; u64 val; @@ -484,7 +457,8 @@ static int get_time_window(struct powercap_zone *power_zone, int cid, u64 *data) return ret; } -static const char *get_constraint_name(struct powercap_zone *power_zone, int cid) +static const char *get_constraint_name(struct powercap_zone *power_zone, + int cid) { struct rapl_domain *rd; int id; @@ -497,9 +471,7 @@ static const char *get_constraint_name(struct powercap_zone *power_zone, int cid return NULL; } - -static int get_max_power(struct powercap_zone *power_zone, int id, - u64 *data) +static int get_max_power(struct powercap_zone *power_zone, int id, u64 *data) { struct rapl_domain *rd; u64 val; @@ -547,11 +519,16 @@ static void rapl_init_domains(struct rapl_package *rp) for (i = 0; i < RAPL_DOMAIN_MAX; i++) { unsigned int mask = rp->domain_map & (1 << i); - rd->regs[RAPL_DOMAIN_REG_LIMIT] = rp->priv->regs[i][RAPL_DOMAIN_REG_LIMIT]; - rd->regs[RAPL_DOMAIN_REG_STATUS] = rp->priv->regs[i][RAPL_DOMAIN_REG_STATUS]; - rd->regs[RAPL_DOMAIN_REG_PERF] = rp->priv->regs[i][RAPL_DOMAIN_REG_PERF]; - rd->regs[RAPL_DOMAIN_REG_POLICY] = rp->priv->regs[i][RAPL_DOMAIN_REG_POLICY]; - rd->regs[RAPL_DOMAIN_REG_INFO] = rp->priv->regs[i][RAPL_DOMAIN_REG_INFO]; + rd->regs[RAPL_DOMAIN_REG_LIMIT] = + rp->priv->regs[i][RAPL_DOMAIN_REG_LIMIT]; + rd->regs[RAPL_DOMAIN_REG_STATUS] = + rp->priv->regs[i][RAPL_DOMAIN_REG_STATUS]; + rd->regs[RAPL_DOMAIN_REG_PERF] = + rp->priv->regs[i][RAPL_DOMAIN_REG_PERF]; + rd->regs[RAPL_DOMAIN_REG_POLICY] = + rp->priv->regs[i][RAPL_DOMAIN_REG_POLICY]; + rd->regs[RAPL_DOMAIN_REG_INFO] = + rp->priv->regs[i][RAPL_DOMAIN_REG_INFO]; switch (mask) { case BIT(RAPL_DOMAIN_PACKAGE): @@ -580,7 +557,7 @@ static void rapl_init_domains(struct rapl_package *rp) rd->rpl[0].prim_id = PL1_ENABLE; rd->rpl[0].name = pl1_name; rd->domain_energy_unit = - rapl_defaults->dram_domain_energy_unit; + rapl_defaults->dram_domain_energy_unit; if (rd->domain_energy_unit) pr_info("DRAM domain energy unit %dpj\n", rd->domain_energy_unit); @@ -594,7 +571,7 @@ static void rapl_init_domains(struct rapl_package *rp) } static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type, - u64 value, int to_raw) + u64 value, int to_raw) { u64 units = 1; struct rapl_package *rp = rd->rp; @@ -631,40 +608,40 @@ static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type, static struct rapl_primitive_info rpi[] = { /* name, mask, shift, msr index, unit divisor */ PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0, - RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0), + RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0), PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0, - RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), + RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32, - RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), + RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), PRIMITIVE_INFO_INIT(FW_LOCK, POWER_PP_LOCK, 31, - RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), + RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15, - RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), + RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), PRIMITIVE_INFO_INIT(PL1_CLAMP, POWER_LIMIT1_CLAMP, 16, - RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), + RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), PRIMITIVE_INFO_INIT(PL2_ENABLE, POWER_LIMIT2_ENABLE, 47, - RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), + RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48, - RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), + RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17, - RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), + RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49, - RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), + RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, POWER_INFO_THERMAL_SPEC_MASK, - 0, RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), + 0, RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), PRIMITIVE_INFO_INIT(MAX_POWER, POWER_INFO_MAX_MASK, 32, - RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), + RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), PRIMITIVE_INFO_INIT(MIN_POWER, POWER_INFO_MIN_MASK, 16, - RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), + RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, POWER_INFO_MAX_TIME_WIN_MASK, 48, - RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0), + RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0), PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0, - RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0), + RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0), PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0, - RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0), + RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0), /* non-hardware */ PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT, - RAPL_PRIMITIVE_DERIVED), + RAPL_PRIMITIVE_DERIVED), {NULL, 0, 0, 0}, }; @@ -682,8 +659,7 @@ static struct rapl_primitive_info rpi[] = { * 63-------------------------- 31--------------------------- 0 */ static int rapl_read_data_raw(struct rapl_domain *rd, - enum rapl_primitives prim, - bool xlate, u64 *data) + enum rapl_primitives prim, bool xlate, u64 *data) { u64 value; struct rapl_primitive_info *rp = &rpi[prim]; @@ -699,7 +675,7 @@ static int rapl_read_data_raw(struct rapl_domain *rd, cpu = rd->rp->lead_cpu; - /* special-case package domain, which uses a different bit*/ + /* special-case package domain, which uses a different bit */ if (prim == FW_LOCK && rd->id == RAPL_DOMAIN_PACKAGE) { rp->mask = POWER_PACKAGE_LOCK; rp->shift = 63; @@ -729,8 +705,8 @@ static int rapl_read_data_raw(struct rapl_domain *rd, /* Similar use of primitive info in the read counterpart */ static int rapl_write_data_raw(struct rapl_domain *rd, - enum rapl_primitives prim, - unsigned long long value) + enum rapl_primitives prim, + unsigned long long value) { struct rapl_primitive_info *rp = &rpi[prim]; int cpu; @@ -774,7 +750,7 @@ static int rapl_check_unit_core(struct rapl_package *rp, int cpu) ra.mask = ~0; if (rp->priv->read_raw(cpu, &ra)) { pr_err("Failed to read power unit REG 0x%x on CPU %d, exit.\n", - rp->priv->reg_unit, cpu); + rp->priv->reg_unit, cpu); return -ENODEV; } @@ -788,7 +764,7 @@ static int rapl_check_unit_core(struct rapl_package *rp, int cpu) rp->time_unit = 1000000 / (1 << value); pr_debug("Core CPU %s energy=%dpJ, time=%dus, power=%duW\n", - rp->name, rp->energy_unit, rp->time_unit, rp->power_unit); + rp->name, rp->energy_unit, rp->time_unit, rp->power_unit); return 0; } @@ -802,7 +778,7 @@ static int rapl_check_unit_atom(struct rapl_package *rp, int cpu) ra.mask = ~0; if (rp->priv->read_raw(cpu, &ra)) { pr_err("Failed to read power unit REG 0x%x on CPU %d, exit.\n", - rp->priv->reg_unit, cpu); + rp->priv->reg_unit, cpu); return -ENODEV; } @@ -816,7 +792,7 @@ static int rapl_check_unit_atom(struct rapl_package *rp, int cpu) rp->time_unit = 1000000 / (1 << value); pr_debug("Atom %s energy=%dpJ, time=%dus, power=%duW\n", - rp->name, rp->energy_unit, rp->time_unit, rp->power_unit); + rp->name, rp->energy_unit, rp->time_unit, rp->power_unit); return 0; } @@ -836,7 +812,6 @@ static void power_limit_irq_save_cpu(void *info) wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); } - /* REVISIT: * When package power limit is set artificially low by RAPL, LVT * thermal interrupt for package power limit should be ignored @@ -920,9 +895,9 @@ static void set_floor_freq_atom(struct rapl_domain *rd, bool enable) } static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value, - bool to_raw) + bool to_raw) { - u64 f, y; /* fraction and exp. used for time unit */ + u64 f, y; /* fraction and exp. used for time unit */ /* * Special processing based on 2^Y*(1+F/4), refer @@ -942,7 +917,7 @@ static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value, } static u64 rapl_compute_time_window_atom(struct rapl_package *rp, u64 value, - bool to_raw) + bool to_raw) { /* * Atom time unit encoding is straight forward val * time_unit, @@ -950,8 +925,8 @@ static u64 rapl_compute_time_window_atom(struct rapl_package *rp, u64 value, */ if (!to_raw) return (value) ? value *= rp->time_unit : rp->time_unit; - else - value = div64_u64(value, rp->time_unit); + + value = div64_u64(value, rp->time_unit); return value; } @@ -999,43 +974,44 @@ static const struct rapl_defaults rapl_defaults_cht = { }; static const struct x86_cpu_id rapl_ids[] __initconst = { - INTEL_CPU_FAM6(SANDYBRIDGE, rapl_defaults_core), - INTEL_CPU_FAM6(SANDYBRIDGE_X, rapl_defaults_core), + INTEL_CPU_FAM6(SANDYBRIDGE, rapl_defaults_core), + INTEL_CPU_FAM6(SANDYBRIDGE_X, rapl_defaults_core), - INTEL_CPU_FAM6(IVYBRIDGE, rapl_defaults_core), - INTEL_CPU_FAM6(IVYBRIDGE_X, rapl_defaults_core), + INTEL_CPU_FAM6(IVYBRIDGE, rapl_defaults_core), + INTEL_CPU_FAM6(IVYBRIDGE_X, rapl_defaults_core), - INTEL_CPU_FAM6(HASWELL_CORE, rapl_defaults_core), - INTEL_CPU_FAM6(HASWELL_ULT, rapl_defaults_core), - INTEL_CPU_FAM6(HASWELL_GT3E, rapl_defaults_core), - INTEL_CPU_FAM6(HASWELL_X, rapl_defaults_hsw_server), + INTEL_CPU_FAM6(HASWELL_CORE, rapl_defaults_core), + INTEL_CPU_FAM6(HASWELL_ULT, rapl_defaults_core), + INTEL_CPU_FAM6(HASWELL_GT3E, rapl_defaults_core), + INTEL_CPU_FAM6(HASWELL_X, rapl_defaults_hsw_server), - INTEL_CPU_FAM6(BROADWELL_CORE, rapl_defaults_core), - INTEL_CPU_FAM6(BROADWELL_GT3E, rapl_defaults_core), - INTEL_CPU_FAM6(BROADWELL_XEON_D, rapl_defaults_core), - INTEL_CPU_FAM6(BROADWELL_X, rapl_defaults_hsw_server), + INTEL_CPU_FAM6(BROADWELL_CORE, rapl_defaults_core), + INTEL_CPU_FAM6(BROADWELL_GT3E, rapl_defaults_core), + INTEL_CPU_FAM6(BROADWELL_XEON_D, rapl_defaults_core), + INTEL_CPU_FAM6(BROADWELL_X, rapl_defaults_hsw_server), - INTEL_CPU_FAM6(SKYLAKE_DESKTOP, rapl_defaults_core), - INTEL_CPU_FAM6(SKYLAKE_MOBILE, rapl_defaults_core), - INTEL_CPU_FAM6(SKYLAKE_X, rapl_defaults_hsw_server), - INTEL_CPU_FAM6(KABYLAKE_MOBILE, rapl_defaults_core), - INTEL_CPU_FAM6(KABYLAKE_DESKTOP, rapl_defaults_core), - INTEL_CPU_FAM6(CANNONLAKE_MOBILE, rapl_defaults_core), - INTEL_CPU_FAM6(ICELAKE_MOBILE, rapl_defaults_core), + INTEL_CPU_FAM6(SKYLAKE_DESKTOP, rapl_defaults_core), + INTEL_CPU_FAM6(SKYLAKE_MOBILE, rapl_defaults_core), + INTEL_CPU_FAM6(SKYLAKE_X, rapl_defaults_hsw_server), + INTEL_CPU_FAM6(KABYLAKE_MOBILE, rapl_defaults_core), + INTEL_CPU_FAM6(KABYLAKE_DESKTOP, rapl_defaults_core), + INTEL_CPU_FAM6(CANNONLAKE_MOBILE, rapl_defaults_core), + INTEL_CPU_FAM6(ICELAKE_MOBILE, rapl_defaults_core), - INTEL_CPU_FAM6(ATOM_SILVERMONT, rapl_defaults_byt), - INTEL_CPU_FAM6(ATOM_AIRMONT, rapl_defaults_cht), - INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, rapl_defaults_tng), - INTEL_CPU_FAM6(ATOM_AIRMONT_MID, rapl_defaults_ann), - INTEL_CPU_FAM6(ATOM_GOLDMONT, rapl_defaults_core), - INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS, rapl_defaults_core), - INTEL_CPU_FAM6(ATOM_GOLDMONT_X, rapl_defaults_core), - INTEL_CPU_FAM6(ATOM_TREMONT_X, rapl_defaults_core), + INTEL_CPU_FAM6(ATOM_SILVERMONT, rapl_defaults_byt), + INTEL_CPU_FAM6(ATOM_AIRMONT, rapl_defaults_cht), + INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, rapl_defaults_tng), + INTEL_CPU_FAM6(ATOM_AIRMONT_MID, rapl_defaults_ann), + INTEL_CPU_FAM6(ATOM_GOLDMONT, rapl_defaults_core), + INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS, rapl_defaults_core), + INTEL_CPU_FAM6(ATOM_GOLDMONT_X, rapl_defaults_core), + INTEL_CPU_FAM6(ATOM_TREMONT_X, rapl_defaults_core), - INTEL_CPU_FAM6(XEON_PHI_KNL, rapl_defaults_hsw_server), - INTEL_CPU_FAM6(XEON_PHI_KNM, rapl_defaults_hsw_server), + INTEL_CPU_FAM6(XEON_PHI_KNL, rapl_defaults_hsw_server), + INTEL_CPU_FAM6(XEON_PHI_KNM, rapl_defaults_hsw_server), {} }; + MODULE_DEVICE_TABLE(x86cpu, rapl_ids); /* Read once for all raw primitive data for domains */ @@ -1051,7 +1027,7 @@ static void rapl_update_domain_data(struct rapl_package *rp) for (prim = 0; prim < NR_RAW_PRIMITIVES; prim++) { if (!rapl_read_data_raw(&rp->domains[dmn], prim, rpi[prim].unit, &val)) - rp->domains[dmn].rdd.primitives[prim] = val; + rp->domains[dmn].rdd.primitives[prim] = val; } } @@ -1066,20 +1042,18 @@ static int rapl_package_register_powercap(struct rapl_package *rp) /* Update the domain data of the new package */ rapl_update_domain_data(rp); - /* first we register package domain as the parent zone*/ + /* first we register package domain as the parent zone */ for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { if (rd->id == RAPL_DOMAIN_PACKAGE) { nr_pl = find_nr_power_limit(rd); pr_debug("register package domain %s\n", rp->name); power_zone = powercap_register_zone(&rd->power_zone, - rp->priv->control_type, - rp->name, NULL, - &zone_ops[rd->id], - nr_pl, - &constraint_ops); + rp->priv->control_type, rp->name, + NULL, &zone_ops[rd->id], nr_pl, + &constraint_ops); if (IS_ERR(power_zone)) { pr_debug("failed to register power zone %s\n", - rp->name); + rp->name); return PTR_ERR(power_zone); } /* track parent zone in per package/socket data */ @@ -1092,21 +1066,21 @@ static int rapl_package_register_powercap(struct rapl_package *rp) pr_err("no package domain found, unknown topology!\n"); return -ENODEV; } - /* now register domains as children of the socket/package*/ + /* now register domains as children of the socket/package */ for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { if (rd->id == RAPL_DOMAIN_PACKAGE) continue; /* number of power limits per domain varies */ nr_pl = find_nr_power_limit(rd); power_zone = powercap_register_zone(&rd->power_zone, - rp->priv->control_type, rd->name, - rp->power_zone, - &zone_ops[rd->id], nr_pl, - &constraint_ops); + rp->priv->control_type, + rd->name, rp->power_zone, + &zone_ops[rd->id], nr_pl, + &constraint_ops); if (IS_ERR(power_zone)) { pr_debug("failed to register power_zone, %s:%s\n", - rp->name, rd->name); + rp->name, rd->name); ret = PTR_ERR(power_zone); goto err_cleanup; } @@ -1120,13 +1094,14 @@ static int rapl_package_register_powercap(struct rapl_package *rp) */ while (--rd >= rp->domains) { pr_debug("unregister %s domain %s\n", rp->name, rd->name); - powercap_unregister_zone(rp->priv->control_type, &rd->power_zone); + powercap_unregister_zone(rp->priv->control_type, + &rd->power_zone); } return ret; } -static int __init rapl_add_platform_domain(struct rapl_if_priv *priv) +int rapl_add_platform_domain(struct rapl_if_priv *priv) { struct rapl_domain *rd; struct powercap_zone *power_zone; @@ -1151,8 +1126,10 @@ static int __init rapl_add_platform_domain(struct rapl_if_priv *priv) rd->name = rapl_domain_names[RAPL_DOMAIN_PLATFORM]; rd->id = RAPL_DOMAIN_PLATFORM; - rd->regs[RAPL_DOMAIN_REG_LIMIT] = priv->regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_LIMIT]; - rd->regs[RAPL_DOMAIN_REG_STATUS] = priv->regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_STATUS]; + rd->regs[RAPL_DOMAIN_REG_LIMIT] = + priv->regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_LIMIT]; + rd->regs[RAPL_DOMAIN_REG_STATUS] = + priv->regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_STATUS]; rd->rpl[0].prim_id = PL1_ENABLE; rd->rpl[0].name = pl1_name; rd->rpl[1].prim_id = PL2_ENABLE; @@ -1173,15 +1150,17 @@ static int __init rapl_add_platform_domain(struct rapl_if_priv *priv) return 0; } +EXPORT_SYMBOL_GPL(rapl_add_platform_domain); -static void rapl_remove_platform_domain(struct rapl_if_priv *priv) +void rapl_remove_platform_domain(struct rapl_if_priv *priv) { if (priv->platform_rapl_domain) { powercap_unregister_zone(priv->control_type, - &priv->platform_rapl_domain->power_zone); + &priv->platform_rapl_domain->power_zone); kfree(priv->platform_rapl_domain); } } +EXPORT_SYMBOL_GPL(rapl_remove_platform_domain); static int rapl_check_domain(int cpu, int domain, struct rapl_package *rp) { @@ -1212,13 +1191,12 @@ static int rapl_check_domain(int cpu, int domain, struct rapl_package *rp) return 0; } - /* * Check if power limits are available. Two cases when they are not available: * 1. Locked by BIOS, in this case we still provide read-only access so that * users can see what limit is set by the BIOS. * 2. Some CPUs make some domains monitoring only which means PLx MSRs may not - * exist at all. In this case, we do not show the contraints in powercap. + * exist at all. In this case, we do not show the constraints in powercap. * * Called after domains are detected and initialized. */ @@ -1235,9 +1213,10 @@ static void rapl_detect_powerlimit(struct rapl_domain *rd) rd->state |= DOMAIN_STATE_BIOS_LOCKED; } } - /* check if power limit MSRs exists, otherwise domain is monitoring only */ + /* check if power limit MSR exists, otherwise domain is monitoring only */ for (i = 0; i < NR_POWER_LIMITS; i++) { int prim = rd->rpl[i].prim_id; + if (rapl_read_data_raw(rd, prim, false, &val64)) rd->rpl[i].name = NULL; } @@ -1258,7 +1237,7 @@ static int rapl_detect_domains(struct rapl_package *rp, int cpu) pr_info("Found RAPL domain %s\n", rapl_domain_names[i]); } } - rp->nr_domains = bitmap_weight(&rp->domain_map, RAPL_DOMAIN_MAX); + rp->nr_domains = bitmap_weight(&rp->domain_map, RAPL_DOMAIN_MAX); if (!rp->nr_domains) { pr_debug("no valid rapl domains found in %s\n", rp->name); return -ENODEV; @@ -1266,7 +1245,7 @@ static int rapl_detect_domains(struct rapl_package *rp, int cpu) pr_debug("found %d domains on %s\n", rp->nr_domains, rp->name); rp->domains = kcalloc(rp->nr_domains + 1, sizeof(struct rapl_domain), - GFP_KERNEL); + GFP_KERNEL); if (!rp->domains) return -ENOMEM; @@ -1279,7 +1258,7 @@ static int rapl_detect_domains(struct rapl_package *rp, int cpu) } /* called from CPU hotplug notifier, hotplug lock held */ -static void rapl_remove_package(struct rapl_package *rp) +void rapl_remove_package(struct rapl_package *rp) { struct rapl_domain *rd, *rd_package = NULL; @@ -1298,22 +1277,47 @@ static void rapl_remove_package(struct rapl_package *rp) } pr_debug("remove package, undo power limit on %s: %s\n", rp->name, rd->name); - powercap_unregister_zone(rp->priv->control_type, &rd->power_zone); + powercap_unregister_zone(rp->priv->control_type, + &rd->power_zone); } /* do parent zone last */ - powercap_unregister_zone(rp->priv->control_type, &rd_package->power_zone); + powercap_unregister_zone(rp->priv->control_type, + &rd_package->power_zone); list_del(&rp->plist); + if (list_empty(&rapl_packages)) + rapl_remove_core(); kfree(rp); } +EXPORT_SYMBOL_GPL(rapl_remove_package); + +/* caller to ensure CPU hotplug lock is held */ +struct rapl_package *rapl_find_package_domain(int cpu, struct rapl_if_priv *priv) +{ + int id = topology_logical_die_id(cpu); + struct rapl_package *rp; + + list_for_each_entry(rp, &rapl_packages, plist) { + if (rp->id == id + && rp->priv->control_type == priv->control_type) + return rp; + } + + return NULL; +} +EXPORT_SYMBOL_GPL(rapl_find_package_domain); /* called from CPU hotplug notifier, hotplug lock held */ -static struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv) +struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv) { int id = topology_logical_die_id(cpu); struct rapl_package *rp; struct cpuinfo_x86 *c = &cpu_data(cpu); int ret; + ret = rapl_init_core(); + if (ret) + return ERR_PTR(ret); + rp = kzalloc(sizeof(struct rapl_package), GFP_KERNEL); if (!rp) return ERR_PTR(-ENOMEM); @@ -1325,14 +1329,13 @@ static struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv) if (topology_max_die_per_package() > 1) snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, - "package-%d-die-%d", c->phys_proc_id, c->cpu_die_id); + "package-%d-die-%d", c->phys_proc_id, c->cpu_die_id); else snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d", - c->phys_proc_id); + c->phys_proc_id); /* check if the package contains valid domains */ - if (rapl_detect_domains(rp, cpu) || - rapl_defaults->check_unit(rp, cpu)) { + if (rapl_detect_domains(rp, cpu) || rapl_defaults->check_unit(rp, cpu)) { ret = -ENODEV; goto err_free_package; } @@ -1348,45 +1351,7 @@ static struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv) kfree(rp); return ERR_PTR(ret); } - -/* Handles CPU hotplug on multi-socket systems. - * If a CPU goes online as the first CPU of the physical package - * we add the RAPL package to the system. Similarly, when the last - * CPU of the package is removed, we remove the RAPL package and its - * associated domains. Cooling devices are handled accordingly at - * per-domain level. - */ -static int rapl_cpu_online(unsigned int cpu) -{ - struct rapl_package *rp; - - rp = rapl_find_package_domain(cpu, &rapl_msr_priv); - if (!rp) { - rp = rapl_add_package(cpu, &rapl_msr_priv); - if (IS_ERR(rp)) - return PTR_ERR(rp); - } - cpumask_set_cpu(cpu, &rp->cpumask); - return 0; -} - -static int rapl_cpu_down_prep(unsigned int cpu) -{ - struct rapl_package *rp; - int lead_cpu; - - rp = rapl_find_package_domain(cpu, &rapl_msr_priv); - if (!rp) - return 0; - - cpumask_clear_cpu(cpu, &rp->cpumask); - lead_cpu = cpumask_first(&rp->cpumask); - if (lead_cpu >= nr_cpu_ids) - rapl_remove_package(rp); - else if (rp->lead_cpu == cpu) - rp->lead_cpu = lead_cpu; - return 0; -} +EXPORT_SYMBOL_GPL(rapl_add_package); static void power_limit_state_save(void) { @@ -1404,17 +1369,15 @@ static void power_limit_state_save(void) switch (rd->rpl[i].prim_id) { case PL1_ENABLE: ret = rapl_read_data_raw(rd, - POWER_LIMIT1, - true, - &rd->rpl[i].last_power_limit); + POWER_LIMIT1, true, + &rd->rpl[i].last_power_limit); if (ret) rd->rpl[i].last_power_limit = 0; break; case PL2_ENABLE: ret = rapl_read_data_raw(rd, - POWER_LIMIT2, - true, - &rd->rpl[i].last_power_limit); + POWER_LIMIT2, true, + &rd->rpl[i].last_power_limit); if (ret) rd->rpl[i].last_power_limit = 0; break; @@ -1440,15 +1403,13 @@ static void power_limit_state_restore(void) switch (rd->rpl[i].prim_id) { case PL1_ENABLE: if (rd->rpl[i].last_power_limit) - rapl_write_data_raw(rd, - POWER_LIMIT1, - rd->rpl[i].last_power_limit); + rapl_write_data_raw(rd, POWER_LIMIT1, + rd->rpl[i].last_power_limit); break; case PL2_ENABLE: if (rd->rpl[i].last_power_limit) - rapl_write_data_raw(rd, - POWER_LIMIT2, - rd->rpl[i].last_power_limit); + rapl_write_data_raw(rd, POWER_LIMIT2, + rd->rpl[i].last_power_limit); break; } } @@ -1457,7 +1418,7 @@ static void power_limit_state_restore(void) } static int rapl_pm_callback(struct notifier_block *nb, - unsigned long mode, void *_unused) + unsigned long mode, void *_unused) { switch (mode) { case PM_SUSPEND_PREPARE: @@ -1474,101 +1435,35 @@ static struct notifier_block rapl_pm_notifier = { .notifier_call = rapl_pm_callback, }; -static int rapl_msr_read_raw(int cpu, struct reg_action *ra) -{ - if (rdmsrl_safe_on_cpu(cpu, ra->reg, &ra->value)) { - pr_debug("failed to read msr 0x%x on cpu %d\n", ra->reg, cpu); - return -EIO; - } - ra->value &= ra->mask; - return 0; -} - -static void rapl_msr_update_func(void *info) -{ - struct reg_action *ra = info; - u64 val; - - ra->err = rdmsrl_safe(ra->reg, &val); - if (ra->err) - return; - - val &= ~ra->mask; - val |= ra->value; - - ra->err = wrmsrl_safe(ra->reg, val); -} - - -static int rapl_msr_write_raw(int cpu, struct reg_action *ra) -{ - int ret; - - ret = smp_call_function_single(cpu, rapl_msr_update_func, ra, 1); - if (WARN_ON_ONCE(ret)) - return ret; - - return ra->err; -} - -static int __init rapl_init(void) +static int rapl_init_core(void) { const struct x86_cpu_id *id; int ret; + if (rapl_defaults) + return 0; + id = x86_match_cpu(rapl_ids); if (!id) { pr_err("driver does not support CPU family %d model %d\n", - boot_cpu_data.x86, boot_cpu_data.x86_model); + boot_cpu_data.x86, boot_cpu_data.x86_model); return -ENODEV; } rapl_defaults = (struct rapl_defaults *)id->driver_data; - rapl_msr_priv.read_raw = rapl_msr_read_raw; - rapl_msr_priv.write_raw = rapl_msr_write_raw; - - rapl_msr_priv.control_type = powercap_register_control_type(NULL, "intel-rapl", NULL); - if (IS_ERR(rapl_msr_priv.control_type)) { - pr_debug("failed to register powercap control_type.\n"); - return PTR_ERR(rapl_msr_priv.control_type); - } - - ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powercap/rapl:online", - rapl_cpu_online, rapl_cpu_down_prep); - if (ret < 0) - goto err_unreg; - rapl_msr_priv.pcap_rapl_online = ret; - - /* Don't bail out if PSys is not supported */ - rapl_add_platform_domain(&rapl_msr_priv); - ret = register_pm_notifier(&rapl_pm_notifier); - if (ret) - goto err_unreg_all; return 0; - -err_unreg_all: - cpuhp_remove_state(rapl_msr_priv.pcap_rapl_online); - -err_unreg: - powercap_unregister_control_type(rapl_msr_priv.control_type); - return ret; } -static void __exit rapl_exit(void) +static void rapl_remove_core(void) { unregister_pm_notifier(&rapl_pm_notifier); - cpuhp_remove_state(rapl_msr_priv.pcap_rapl_online); - rapl_remove_platform_domain(&rapl_msr_priv); - powercap_unregister_control_type(rapl_msr_priv.control_type); + rapl_defaults = NULL; } -module_init(rapl_init); -module_exit(rapl_exit); - -MODULE_DESCRIPTION("Driver for Intel RAPL (Running Average Power Limit)"); +MODULE_DESCRIPTION("Intel Runtime Average Power Limit (RAPL) common code"); MODULE_AUTHOR("Jacob Pan "); MODULE_LICENSE("GPL v2"); diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c new file mode 100644 index 000000000000..89645222e3e0 --- /dev/null +++ b/drivers/powercap/intel_rapl_msr.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Intel Running Average Power Limit (RAPL) Driver via MSR interface + * Copyright (c) 2019, Intel Corporation. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* Local defines */ +#define MSR_PLATFORM_POWER_LIMIT 0x0000065C + +/* private data for RAPL MSR Interface */ +static struct rapl_if_priv rapl_msr_priv = { + .reg_unit = MSR_RAPL_POWER_UNIT, + .regs[RAPL_DOMAIN_PACKAGE] = { + MSR_PKG_POWER_LIMIT, MSR_PKG_ENERGY_STATUS, MSR_PKG_PERF_STATUS, 0, MSR_PKG_POWER_INFO }, + .regs[RAPL_DOMAIN_PP0] = { + MSR_PP0_POWER_LIMIT, MSR_PP0_ENERGY_STATUS, 0, MSR_PP0_POLICY, 0 }, + .regs[RAPL_DOMAIN_PP1] = { + MSR_PP1_POWER_LIMIT, MSR_PP1_ENERGY_STATUS, 0, MSR_PP1_POLICY, 0 }, + .regs[RAPL_DOMAIN_DRAM] = { + MSR_DRAM_POWER_LIMIT, MSR_DRAM_ENERGY_STATUS, MSR_DRAM_PERF_STATUS, 0, MSR_DRAM_POWER_INFO }, + .regs[RAPL_DOMAIN_PLATFORM] = { + MSR_PLATFORM_POWER_LIMIT, MSR_PLATFORM_ENERGY_STATUS, 0, 0, 0}, +}; + +/* Handles CPU hotplug on multi-socket systems. + * If a CPU goes online as the first CPU of the physical package + * we add the RAPL package to the system. Similarly, when the last + * CPU of the package is removed, we remove the RAPL package and its + * associated domains. Cooling devices are handled accordingly at + * per-domain level. + */ +static int rapl_cpu_online(unsigned int cpu) +{ + struct rapl_package *rp; + + rp = rapl_find_package_domain(cpu, &rapl_msr_priv); + if (!rp) { + rp = rapl_add_package(cpu, &rapl_msr_priv); + if (IS_ERR(rp)) + return PTR_ERR(rp); + } + cpumask_set_cpu(cpu, &rp->cpumask); + return 0; +} + +static int rapl_cpu_down_prep(unsigned int cpu) +{ + struct rapl_package *rp; + int lead_cpu; + + rp = rapl_find_package_domain(cpu, &rapl_msr_priv); + if (!rp) + return 0; + + cpumask_clear_cpu(cpu, &rp->cpumask); + lead_cpu = cpumask_first(&rp->cpumask); + if (lead_cpu >= nr_cpu_ids) + rapl_remove_package(rp); + else if (rp->lead_cpu == cpu) + rp->lead_cpu = lead_cpu; + return 0; +} + +static int rapl_msr_read_raw(int cpu, struct reg_action *ra) +{ + if (rdmsrl_safe_on_cpu(cpu, ra->reg, &ra->value)) { + pr_debug("failed to read msr 0x%x on cpu %d\n", ra->reg, cpu); + return -EIO; + } + ra->value &= ra->mask; + return 0; +} + +static void rapl_msr_update_func(void *info) +{ + struct reg_action *ra = info; + u64 val; + + ra->err = rdmsrl_safe(ra->reg, &val); + if (ra->err) + return; + + val &= ~ra->mask; + val |= ra->value; + + ra->err = wrmsrl_safe(ra->reg, val); +} + +static int rapl_msr_write_raw(int cpu, struct reg_action *ra) +{ + int ret; + + ret = smp_call_function_single(cpu, rapl_msr_update_func, ra, 1); + if (WARN_ON_ONCE(ret)) + return ret; + + return ra->err; +} + +static int __init rapl_msr_init(void) +{ + int ret; + + rapl_msr_priv.read_raw = rapl_msr_read_raw; + rapl_msr_priv.write_raw = rapl_msr_write_raw; + + rapl_msr_priv.control_type = powercap_register_control_type(NULL, "intel-rapl", NULL); + if (IS_ERR(rapl_msr_priv.control_type)) { + pr_debug("failed to register powercap control_type.\n"); + return PTR_ERR(rapl_msr_priv.control_type); + } + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powercap/rapl:online", + rapl_cpu_online, rapl_cpu_down_prep); + if (ret < 0) + goto out; + rapl_msr_priv.pcap_rapl_online = ret; + + /* Don't bail out if PSys is not supported */ + rapl_add_platform_domain(&rapl_msr_priv); + + return 0; + +out: + if (ret) + powercap_unregister_control_type(rapl_msr_priv.control_type); + return ret; +} + +static void __exit rapl_msr_exit(void) +{ + cpuhp_remove_state(rapl_msr_priv.pcap_rapl_online); + rapl_remove_platform_domain(&rapl_msr_priv); + powercap_unregister_control_type(rapl_msr_priv.control_type); +} + +module_init(rapl_msr_init); +module_exit(rapl_msr_exit); + +MODULE_DESCRIPTION("Driver for Intel RAPL (Running Average Power Limit) control via MSR interface"); +MODULE_AUTHOR("Zhang Rui "); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h index ff215d64d114..9579f458fe4d 100644 --- a/include/linux/intel_rapl.h +++ b/include/linux/intel_rapl.h @@ -142,4 +142,11 @@ struct rapl_package { struct rapl_if_priv *priv; }; +struct rapl_package *rapl_find_package_domain(int cpu, struct rapl_if_priv *priv); +struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv); +void rapl_remove_package(struct rapl_package *rp); + +int rapl_add_platform_domain(struct rapl_if_priv *priv); +void rapl_remove_platform_domain(struct rapl_if_priv *priv); + #endif /* __INTEL_RAPL_H__ */ From d978e755aabe215cb67bf713e103ed3916ec306d Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 10 Jul 2019 21:44:31 +0800 Subject: [PATCH 18/27] intel_rapl: support 64 bit register RAPL MMIO interface uses 64 bit registers, thus force use 64 bit register for all the RAPL code. Reviewed-by: Pandruvada, Srinivas Tested-by: Pandruvada, Srinivas Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 6 +++--- drivers/powercap/intel_rapl_msr.c | 11 +++++++---- include/linux/intel_rapl.h | 8 ++++---- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 34a82531a7cf..8e4de036f6d0 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -689,7 +689,7 @@ static int rapl_read_data_raw(struct rapl_domain *rd, ra.mask = rp->mask; if (rd->rp->priv->read_raw(cpu, &ra)) { - pr_debug("failed to read reg 0x%x on cpu %d\n", ra.reg, cpu); + pr_debug("failed to read reg 0x%llx on cpu %d\n", ra.reg, cpu); return -EIO; } @@ -749,7 +749,7 @@ static int rapl_check_unit_core(struct rapl_package *rp, int cpu) ra.reg = rp->priv->reg_unit; ra.mask = ~0; if (rp->priv->read_raw(cpu, &ra)) { - pr_err("Failed to read power unit REG 0x%x on CPU %d, exit.\n", + pr_err("Failed to read power unit REG 0x%llx on CPU %d, exit.\n", rp->priv->reg_unit, cpu); return -ENODEV; } @@ -777,7 +777,7 @@ static int rapl_check_unit_atom(struct rapl_package *rp, int cpu) ra.reg = rp->priv->reg_unit; ra.mask = ~0; if (rp->priv->read_raw(cpu, &ra)) { - pr_err("Failed to read power unit REG 0x%x on CPU %d, exit.\n", + pr_err("Failed to read power unit REG 0x%llx on CPU %d, exit.\n", rp->priv->reg_unit, cpu); return -ENODEV; } diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c index 89645222e3e0..6cd8a8fb9238 100644 --- a/drivers/powercap/intel_rapl_msr.c +++ b/drivers/powercap/intel_rapl_msr.c @@ -84,8 +84,10 @@ static int rapl_cpu_down_prep(unsigned int cpu) static int rapl_msr_read_raw(int cpu, struct reg_action *ra) { - if (rdmsrl_safe_on_cpu(cpu, ra->reg, &ra->value)) { - pr_debug("failed to read msr 0x%x on cpu %d\n", ra->reg, cpu); + u32 msr = (u32)ra->reg; + + if (rdmsrl_safe_on_cpu(cpu, msr, &ra->value)) { + pr_debug("failed to read msr 0x%x on cpu %d\n", msr, cpu); return -EIO; } ra->value &= ra->mask; @@ -95,16 +97,17 @@ static int rapl_msr_read_raw(int cpu, struct reg_action *ra) static void rapl_msr_update_func(void *info) { struct reg_action *ra = info; + u32 msr = (u32)ra->reg; u64 val; - ra->err = rdmsrl_safe(ra->reg, &val); + ra->err = rdmsrl_safe(msr, &val); if (ra->err) return; val &= ~ra->mask; val |= ra->value; - ra->err = wrmsrl_safe(ra->reg, val); + ra->err = wrmsrl_safe(msr, val); } static int rapl_msr_write_raw(int cpu, struct reg_action *ra) diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h index 9579f458fe4d..649e19981eb0 100644 --- a/include/linux/intel_rapl.h +++ b/include/linux/intel_rapl.h @@ -78,7 +78,7 @@ struct rapl_package; struct rapl_domain { const char *name; enum rapl_domain_type id; - int regs[RAPL_DOMAIN_REG_MAX]; + u64 regs[RAPL_DOMAIN_REG_MAX]; struct powercap_zone power_zone; struct rapl_domain_data rdd; struct rapl_power_limit rpl[NR_POWER_LIMITS]; @@ -89,7 +89,7 @@ struct rapl_domain { }; struct reg_action { - u32 reg; + u64 reg; u64 mask; u64 value; int err; @@ -113,8 +113,8 @@ struct rapl_if_priv { struct powercap_control_type *control_type; struct rapl_domain *platform_rapl_domain; enum cpuhp_state pcap_rapl_online; - u32 reg_unit; - u32 regs[RAPL_DOMAIN_MAX][RAPL_DOMAIN_REG_MAX]; + u64 reg_unit; + u64 regs[RAPL_DOMAIN_MAX][RAPL_DOMAIN_REG_MAX]; int (*read_raw)(int cpu, struct reg_action *ra); int (*write_raw)(int cpu, struct reg_action *ra); }; From 0c2ddedd8bcb88c4100acb9e0fc5ac8752d09501 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 10 Jul 2019 21:44:32 +0800 Subject: [PATCH 19/27] intel_rapl: support two power limits for every RAPL domain RAPL MSR interface supports 2 power limits for package domain, and 1 power limit for other domains, while RAPL MMIO interface supports 2 power limits for both package and dram domains. And when 2 power limits are supported, the FW_LOCK bit is in bit 63 of the register, instead of bit 31. Remove the assumption that only pakcage domain supports 2 power limits. And allow the RAPL interface driver to specify the number of power limits supported, for every single RAPL domain it owns.. Reviewed-by: Pandruvada, Srinivas Tested-by: Pandruvada, Srinivas Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 70 ++++++++++------------------ drivers/powercap/intel_rapl_msr.c | 1 + include/linux/intel_rapl.h | 2 + 3 files changed, 27 insertions(+), 46 deletions(-) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 8e4de036f6d0..db8df19d8133 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -38,8 +38,8 @@ #define POWER_LIMIT2_MASK (0x7FFFULL<<32) #define POWER_LIMIT2_ENABLE BIT_ULL(47) #define POWER_LIMIT2_CLAMP BIT_ULL(48) -#define POWER_PACKAGE_LOCK BIT_ULL(63) -#define POWER_PP_LOCK BIT(31) +#define POWER_HIGH_LOCK BIT_ULL(63) +#define POWER_LOW_LOCK BIT(31) #define TIME_WINDOW1_MASK (0x7FULL<<17) #define TIME_WINDOW2_MASK (0x7FULL<<49) @@ -513,60 +513,38 @@ static const struct powercap_zone_constraint_ops constraint_ops = { /* called after domain detection and package level data are set */ static void rapl_init_domains(struct rapl_package *rp) { - int i; + enum rapl_domain_type i; + enum rapl_domain_reg_id j; struct rapl_domain *rd = rp->domains; for (i = 0; i < RAPL_DOMAIN_MAX; i++) { unsigned int mask = rp->domain_map & (1 << i); - rd->regs[RAPL_DOMAIN_REG_LIMIT] = - rp->priv->regs[i][RAPL_DOMAIN_REG_LIMIT]; - rd->regs[RAPL_DOMAIN_REG_STATUS] = - rp->priv->regs[i][RAPL_DOMAIN_REG_STATUS]; - rd->regs[RAPL_DOMAIN_REG_PERF] = - rp->priv->regs[i][RAPL_DOMAIN_REG_PERF]; - rd->regs[RAPL_DOMAIN_REG_POLICY] = - rp->priv->regs[i][RAPL_DOMAIN_REG_POLICY]; - rd->regs[RAPL_DOMAIN_REG_INFO] = - rp->priv->regs[i][RAPL_DOMAIN_REG_INFO]; + if (!mask) + continue; - switch (mask) { - case BIT(RAPL_DOMAIN_PACKAGE): - rd->name = rapl_domain_names[RAPL_DOMAIN_PACKAGE]; - rd->id = RAPL_DOMAIN_PACKAGE; - rd->rpl[0].prim_id = PL1_ENABLE; - rd->rpl[0].name = pl1_name; + rd->rp = rp; + rd->name = rapl_domain_names[i]; + rd->id = i; + rd->rpl[0].prim_id = PL1_ENABLE; + rd->rpl[0].name = pl1_name; + /* some domain may support two power limits */ + if (rp->priv->limits[i] == 2) { rd->rpl[1].prim_id = PL2_ENABLE; rd->rpl[1].name = pl2_name; - break; - case BIT(RAPL_DOMAIN_PP0): - rd->name = rapl_domain_names[RAPL_DOMAIN_PP0]; - rd->id = RAPL_DOMAIN_PP0; - rd->rpl[0].prim_id = PL1_ENABLE; - rd->rpl[0].name = pl1_name; - break; - case BIT(RAPL_DOMAIN_PP1): - rd->name = rapl_domain_names[RAPL_DOMAIN_PP1]; - rd->id = RAPL_DOMAIN_PP1; - rd->rpl[0].prim_id = PL1_ENABLE; - rd->rpl[0].name = pl1_name; - break; - case BIT(RAPL_DOMAIN_DRAM): - rd->name = rapl_domain_names[RAPL_DOMAIN_DRAM]; - rd->id = RAPL_DOMAIN_DRAM; - rd->rpl[0].prim_id = PL1_ENABLE; - rd->rpl[0].name = pl1_name; + } + + for (j = 0; j < RAPL_DOMAIN_REG_MAX; j++) + rd->regs[j] = rp->priv->regs[i][j]; + + if (i == RAPL_DOMAIN_DRAM) { rd->domain_energy_unit = rapl_defaults->dram_domain_energy_unit; if (rd->domain_energy_unit) pr_info("DRAM domain energy unit %dpj\n", rd->domain_energy_unit); - break; - } - if (mask) { - rd->rp = rp; - rd++; } + rd++; } } @@ -613,7 +591,7 @@ static struct rapl_primitive_info rpi[] = { RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32, RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), - PRIMITIVE_INFO_INIT(FW_LOCK, POWER_PP_LOCK, 31, + PRIMITIVE_INFO_INIT(FW_LOCK, POWER_LOW_LOCK, 31, RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15, RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), @@ -675,9 +653,9 @@ static int rapl_read_data_raw(struct rapl_domain *rd, cpu = rd->rp->lead_cpu; - /* special-case package domain, which uses a different bit */ - if (prim == FW_LOCK && rd->id == RAPL_DOMAIN_PACKAGE) { - rp->mask = POWER_PACKAGE_LOCK; + /* domain with 2 limits has different bit */ + if (prim == FW_LOCK && rd->rp->priv->limits[rd->id] == 2) { + rp->mask = POWER_HIGH_LOCK; rp->shift = 63; } /* non-hardware data are collected by the polling thread */ diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c index 6cd8a8fb9238..bc14a4579acb 100644 --- a/drivers/powercap/intel_rapl_msr.c +++ b/drivers/powercap/intel_rapl_msr.c @@ -41,6 +41,7 @@ static struct rapl_if_priv rapl_msr_priv = { MSR_DRAM_POWER_LIMIT, MSR_DRAM_ENERGY_STATUS, MSR_DRAM_PERF_STATUS, 0, MSR_DRAM_POWER_INFO }, .regs[RAPL_DOMAIN_PLATFORM] = { MSR_PLATFORM_POWER_LIMIT, MSR_PLATFORM_ENERGY_STATUS, 0, 0, 0}, + .limits[RAPL_DOMAIN_PACKAGE] = 2, }; /* Handles CPU hotplug on multi-socket systems. diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h index 649e19981eb0..0c179d92d110 100644 --- a/include/linux/intel_rapl.h +++ b/include/linux/intel_rapl.h @@ -104,6 +104,7 @@ struct reg_action { * @pcap_rapl_online: CPU hotplug state for each RAPL interface. * @reg_unit: Register for getting energy/power/time unit. * @regs: Register sets for different RAPL Domains. + * @limits: Number of power limits supported by each domain. * @read_raw: Callback for reading RAPL interface specific * registers. * @write_raw: Callback for writing RAPL interface specific @@ -115,6 +116,7 @@ struct rapl_if_priv { enum cpuhp_state pcap_rapl_online; u64 reg_unit; u64 regs[RAPL_DOMAIN_MAX][RAPL_DOMAIN_REG_MAX]; + int limits[RAPL_DOMAIN_MAX]; int (*read_raw)(int cpu, struct reg_action *ra); int (*write_raw)(int cpu, struct reg_action *ra); }; From 555c45fe0d04bd817e245a125d242b6a86af4593 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 10 Jul 2019 21:44:33 +0800 Subject: [PATCH 20/27] int340X/processor_thermal_device: add support for MMIO RAPL Introduce MMIO RAPL support as Intel processor_thermal device exposes the capability to do RAPL control via MMIO registers. Reviewed-by: Pandruvada, Srinivas Tested-by: Pandruvada, Srinivas Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/int340x_thermal/Kconfig | 6 + .../processor_thermal_device.c | 173 +++++++++++++++++- 2 files changed, 173 insertions(+), 6 deletions(-) diff --git a/drivers/thermal/intel/int340x_thermal/Kconfig b/drivers/thermal/intel/int340x_thermal/Kconfig index 5333e018c88c..797907542e43 100644 --- a/drivers/thermal/intel/int340x_thermal/Kconfig +++ b/drivers/thermal/intel/int340x_thermal/Kconfig @@ -40,4 +40,10 @@ config INT3406_THERMAL brightness in order to address a thermal condition or to reduce power consumed by display device. +config PROC_THERMAL_MMIO_RAPL + bool + depends on 64BIT + depends on POWERCAP + select INTEL_RAPL_CORE + default y endif diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c index 53c84fa498ce..06c7ab317dee 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include "int340x_thermal_zone.h" #include "../intel_soc_dts_iosf.h" @@ -37,6 +39,8 @@ /* GeminiLake thermal reporting device */ #define PCI_DEVICE_ID_PROC_GLK_THERMAL 0x318C +#define DRV_NAME "proc_thermal" + struct power_config { u32 index; u32 min_uw; @@ -52,6 +56,7 @@ struct proc_thermal_device { struct power_config power_limits[2]; struct int34x_thermal_zone *int340x_zone; struct intel_soc_dts_sensors *soc_dts; + void __iomem *mmio_base; }; enum proc_thermal_emum_mode_type { @@ -60,6 +65,12 @@ enum proc_thermal_emum_mode_type { PROC_THERMAL_PLATFORM_DEV }; +struct rapl_mmio_regs { + u64 reg_unit; + u64 regs[RAPL_DOMAIN_MAX][RAPL_DOMAIN_REG_MAX]; + int limits[RAPL_DOMAIN_MAX]; +}; + /* * We can have only one type of enumeration, PCI or Platform, * not both. So we don't need instance specific data. @@ -367,8 +378,151 @@ static irqreturn_t proc_thermal_pci_msi_irq(int irq, void *devid) return IRQ_HANDLED; } +#ifdef CONFIG_PROC_THERMAL_MMIO_RAPL + +#define MCHBAR 0 + +/* RAPL Support via MMIO interface */ +static struct rapl_if_priv rapl_mmio_priv; + +static int rapl_mmio_cpu_online(unsigned int cpu) +{ + struct rapl_package *rp; + + /* mmio rapl supports package 0 only for now */ + if (topology_physical_package_id(cpu)) + return 0; + + rp = rapl_find_package_domain(cpu, &rapl_mmio_priv); + if (!rp) { + rp = rapl_add_package(cpu, &rapl_mmio_priv); + if (IS_ERR(rp)) + return PTR_ERR(rp); + } + cpumask_set_cpu(cpu, &rp->cpumask); + return 0; +} + +static int rapl_mmio_cpu_down_prep(unsigned int cpu) +{ + struct rapl_package *rp; + int lead_cpu; + + rp = rapl_find_package_domain(cpu, &rapl_mmio_priv); + if (!rp) + return 0; + + cpumask_clear_cpu(cpu, &rp->cpumask); + lead_cpu = cpumask_first(&rp->cpumask); + if (lead_cpu >= nr_cpu_ids) + rapl_remove_package(rp); + else if (rp->lead_cpu == cpu) + rp->lead_cpu = lead_cpu; + return 0; +} + +static int rapl_mmio_read_raw(int cpu, struct reg_action *ra) +{ + if (!ra->reg) + return -EINVAL; + + ra->value = readq((void __iomem *)ra->reg); + ra->value &= ra->mask; + return 0; +} + +static int rapl_mmio_write_raw(int cpu, struct reg_action *ra) +{ + u64 val; + + if (!ra->reg) + return -EINVAL; + + val = readq((void __iomem *)ra->reg); + val &= ~ra->mask; + val |= ra->value; + writeq(val, (void __iomem *)ra->reg); + return 0; +} + +static int proc_thermal_rapl_add(struct pci_dev *pdev, + struct proc_thermal_device *proc_priv, + struct rapl_mmio_regs *rapl_regs) +{ + enum rapl_domain_reg_id reg; + enum rapl_domain_type domain; + int ret; + + if (!rapl_regs) + return 0; + + ret = pcim_iomap_regions(pdev, 1 << MCHBAR, DRV_NAME); + if (ret) { + dev_err(&pdev->dev, "cannot reserve PCI memory region\n"); + return -ENOMEM; + } + + proc_priv->mmio_base = pcim_iomap_table(pdev)[MCHBAR]; + + for (domain = RAPL_DOMAIN_PACKAGE; domain < RAPL_DOMAIN_MAX; domain++) { + for (reg = RAPL_DOMAIN_REG_LIMIT; reg < RAPL_DOMAIN_REG_MAX; reg++) + if (rapl_regs->regs[domain][reg]) + rapl_mmio_priv.regs[domain][reg] = + (u64)proc_priv->mmio_base + + rapl_regs->regs[domain][reg]; + rapl_mmio_priv.limits[domain] = rapl_regs->limits[domain]; + } + rapl_mmio_priv.reg_unit = (u64)proc_priv->mmio_base + rapl_regs->reg_unit; + + rapl_mmio_priv.read_raw = rapl_mmio_read_raw; + rapl_mmio_priv.write_raw = rapl_mmio_write_raw; + + rapl_mmio_priv.control_type = powercap_register_control_type(NULL, "intel-rapl-mmio", NULL); + if (IS_ERR(rapl_mmio_priv.control_type)) { + pr_debug("failed to register powercap control_type.\n"); + return PTR_ERR(rapl_mmio_priv.control_type); + } + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powercap/rapl:online", + rapl_mmio_cpu_online, rapl_mmio_cpu_down_prep); + if (ret < 0) { + powercap_unregister_control_type(rapl_mmio_priv.control_type); + return ret; + } + rapl_mmio_priv.pcap_rapl_online = ret; + + return 0; +} + +static void proc_thermal_rapl_remove(void) +{ + cpuhp_remove_state(rapl_mmio_priv.pcap_rapl_online); + powercap_unregister_control_type(rapl_mmio_priv.control_type); +} + +static const struct rapl_mmio_regs rapl_mmio_hsw = { + .reg_unit = 0x5938, + .regs[RAPL_DOMAIN_PACKAGE] = { 0x59a0, 0x593c, 0x58f0, 0, 0x5930}, + .regs[RAPL_DOMAIN_DRAM] = { 0x58e0, 0x58e8, 0x58ec, 0, 0}, + .limits[RAPL_DOMAIN_PACKAGE] = 2, + .limits[RAPL_DOMAIN_DRAM] = 2, +}; + +#else + +static int proc_thermal_rapl_add(struct pci_dev *pdev, + struct proc_thermal_device *proc_priv, + struct rapl_mmio_regs *rapl_regs) +{ + return 0; +} +static void proc_thermal_rapl_remove(void) {} +static const struct rapl_mmio_regs rapl_mmio_hsw; + +#endif /* CONFIG_MMIO_RAPL */ + static int proc_thermal_pci_probe(struct pci_dev *pdev, - const struct pci_device_id *unused) + const struct pci_device_id *id) { struct proc_thermal_device *proc_priv; int ret; @@ -378,15 +532,21 @@ static int proc_thermal_pci_probe(struct pci_dev *pdev, return -ENODEV; } - ret = pci_enable_device(pdev); + ret = pcim_enable_device(pdev); if (ret < 0) { dev_err(&pdev->dev, "error: could not enable device\n"); return ret; } ret = proc_thermal_add(&pdev->dev, &proc_priv); + if (ret) + return ret; + + ret = proc_thermal_rapl_add(pdev, proc_priv, + (struct rapl_mmio_regs *)id->driver_data); if (ret) { - pci_disable_device(pdev); + dev_err(&pdev->dev, "failed to add RAPL MMIO interface\n"); + proc_thermal_remove(proc_priv); return ret; } @@ -439,14 +599,15 @@ static void proc_thermal_pci_remove(struct pci_dev *pdev) pci_disable_msi(pdev); } } + proc_thermal_rapl_remove(); proc_thermal_remove(proc_priv); - pci_disable_device(pdev); } static const struct pci_device_id proc_thermal_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BDW_THERMAL)}, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_HSB_THERMAL)}, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_SKL_THERMAL)}, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_SKL_THERMAL), + .driver_data = (kernel_ulong_t)&rapl_mmio_hsw, }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BSW_THERMAL)}, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXT0_THERMAL)}, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXT1_THERMAL)}, @@ -461,7 +622,7 @@ static const struct pci_device_id proc_thermal_pci_ids[] = { MODULE_DEVICE_TABLE(pci, proc_thermal_pci_ids); static struct pci_driver proc_thermal_pci_driver = { - .name = "proc_thermal", + .name = DRV_NAME, .probe = proc_thermal_pci_probe, .remove = proc_thermal_pci_remove, .id_table = proc_thermal_pci_ids, From abcfaeb3f5dc8bded4ba446eb2fb017a7a41d9bc Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 10 Jul 2019 21:44:34 +0800 Subject: [PATCH 21/27] intel_rapl: Fix module autoloading issue intel_rapl driver used to have a list of cpuids, which is used to 1. check if the processor support RAPL MSRs 2. do some cpu model specific setting 3. module autoloading Now, the cpu model specific setting are moved to intel_rapl_common.c as part of the common code, because the setup is also needed by RAPL MMIO interface on those platforms. But removing the cpuid list from intel_rapl MSR interface driver results in that the driver can not be loaded automatically. Maintaining another copy of the cpuid list in intel_rapl_msr.c does not make sense because it increases the complexity when enabling RAPL support on a new cpu model. Fix the problem by creating an "intel_rapl_msr" platform device in the common code, and make RAPL MSR interface driver (intel_rapl_msr.c) probe the platform device directly. Reviewed-by: Pandruvada, Srinivas Tested-by: Pandruvada, Srinivas Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 45 +++++++++++++++++----------- drivers/powercap/intel_rapl_msr.c | 24 ++++++++++++--- 2 files changed, 48 insertions(+), 21 deletions(-) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index db8df19d8133..f1b7bcc32891 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -18,10 +18,11 @@ #include #include #include -#include #include - #include +#include + +#include #include #include @@ -136,8 +137,6 @@ static int rapl_write_data_raw(struct rapl_domain *rd, static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type, u64 value, int to_raw); static void package_power_limit_irq_save(struct rapl_package *rp); -static int rapl_init_core(void); -static void rapl_remove_core(void); static LIST_HEAD(rapl_packages); /* guarded by CPU hotplug lock */ @@ -1262,8 +1261,6 @@ void rapl_remove_package(struct rapl_package *rp) powercap_unregister_zone(rp->priv->control_type, &rd_package->power_zone); list_del(&rp->plist); - if (list_empty(&rapl_packages)) - rapl_remove_core(); kfree(rp); } EXPORT_SYMBOL_GPL(rapl_remove_package); @@ -1292,10 +1289,6 @@ struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv) struct cpuinfo_x86 *c = &cpu_data(cpu); int ret; - ret = rapl_init_core(); - if (ret) - return ERR_PTR(ret); - rp = kzalloc(sizeof(struct rapl_package), GFP_KERNEL); if (!rp) return ERR_PTR(-ENOMEM); @@ -1413,14 +1406,13 @@ static struct notifier_block rapl_pm_notifier = { .notifier_call = rapl_pm_callback, }; -static int rapl_init_core(void) +static struct platform_device *rapl_msr_platdev; + +static int __init rapl_init(void) { const struct x86_cpu_id *id; int ret; - if (rapl_defaults) - return 0; - id = x86_match_cpu(rapl_ids); if (!id) { pr_err("driver does not support CPU family %d model %d\n", @@ -1432,16 +1424,35 @@ static int rapl_init_core(void) rapl_defaults = (struct rapl_defaults *)id->driver_data; ret = register_pm_notifier(&rapl_pm_notifier); + if (ret) + return ret; - return 0; + rapl_msr_platdev = platform_device_alloc("intel_rapl_msr", 0); + if (!rapl_msr_platdev) { + ret = -ENOMEM; + goto end; + } + + ret = platform_device_add(rapl_msr_platdev); + if (ret) + platform_device_put(rapl_msr_platdev); + +end: + if (ret) + unregister_pm_notifier(&rapl_pm_notifier); + + return ret; } -static void rapl_remove_core(void) +static void __exit rapl_exit(void) { + platform_device_unregister(rapl_msr_platdev); unregister_pm_notifier(&rapl_pm_notifier); - rapl_defaults = NULL; } +module_init(rapl_init); +module_exit(rapl_exit); + MODULE_DESCRIPTION("Intel Runtime Average Power Limit (RAPL) common code"); MODULE_AUTHOR("Jacob Pan "); MODULE_LICENSE("GPL v2"); diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c index bc14a4579acb..d5487965bdfe 100644 --- a/drivers/powercap/intel_rapl_msr.c +++ b/drivers/powercap/intel_rapl_msr.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -122,7 +123,7 @@ static int rapl_msr_write_raw(int cpu, struct reg_action *ra) return ra->err; } -static int __init rapl_msr_init(void) +static int rapl_msr_probe(struct platform_device *pdev) { int ret; @@ -152,15 +153,30 @@ static int __init rapl_msr_init(void) return ret; } -static void __exit rapl_msr_exit(void) +static int rapl_msr_remove(struct platform_device *pdev) { cpuhp_remove_state(rapl_msr_priv.pcap_rapl_online); rapl_remove_platform_domain(&rapl_msr_priv); powercap_unregister_control_type(rapl_msr_priv.control_type); + return 0; } -module_init(rapl_msr_init); -module_exit(rapl_msr_exit); +static const struct platform_device_id rapl_msr_ids[] = { + { .name = "intel_rapl_msr", }, + {} +}; +MODULE_DEVICE_TABLE(platform, rapl_msr_ids); + +static struct platform_driver intel_rapl_msr_driver = { + .probe = rapl_msr_probe, + .remove = rapl_msr_remove, + .id_table = rapl_msr_ids, + .driver = { + .name = "intel_rapl_msr", + }, +}; + +module_platform_driver(intel_rapl_msr_driver); MODULE_DESCRIPTION("Driver for Intel RAPL (Running Average Power Limit) control via MSR interface"); MODULE_AUTHOR("Zhang Rui "); From 0ab74bcd1b50821391b264150d26b7f03ba6740b Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 10 Jul 2019 21:44:35 +0800 Subject: [PATCH 22/27] powercap/intel_rapl: add support for IceLake desktop Add IceLake desktop support in intel_rapl driver Signed-off-by: Gayatri Kammela Signed-off-by: Joe Konno Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index f1b7bcc32891..e9e2342616c6 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -974,6 +974,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { INTEL_CPU_FAM6(KABYLAKE_DESKTOP, rapl_defaults_core), INTEL_CPU_FAM6(CANNONLAKE_MOBILE, rapl_defaults_core), INTEL_CPU_FAM6(ICELAKE_MOBILE, rapl_defaults_core), + INTEL_CPU_FAM6(ICELAKE_DESKTOP, rapl_defaults_core), INTEL_CPU_FAM6(ATOM_SILVERMONT, rapl_defaults_byt), INTEL_CPU_FAM6(ATOM_AIRMONT, rapl_defaults_cht), From cceb1d9dfa680e8b0f5d70d87c2ee25903070b96 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 10 Jul 2019 21:44:36 +0800 Subject: [PATCH 23/27] powercap/intel_rapl: add support for ICX Add ICX support in intel_rapl driver Signed-off-by: Jacob Pan Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index e9e2342616c6..3a5440d90017 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -975,6 +975,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { INTEL_CPU_FAM6(CANNONLAKE_MOBILE, rapl_defaults_core), INTEL_CPU_FAM6(ICELAKE_MOBILE, rapl_defaults_core), INTEL_CPU_FAM6(ICELAKE_DESKTOP, rapl_defaults_core), + INTEL_CPU_FAM6(ICELAKE_X, rapl_defaults_hsw_server), INTEL_CPU_FAM6(ATOM_SILVERMONT, rapl_defaults_byt), INTEL_CPU_FAM6(ATOM_AIRMONT, rapl_defaults_cht), From 3231a21d5ca6f6baea95588406775304f35a203e Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 10 Jul 2019 21:44:37 +0800 Subject: [PATCH 24/27] powercap/intel_rapl: add support for ICX-D Add ICX-D support in intel_rapl driver Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 3a5440d90017..b624a88b2c25 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -976,6 +976,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { INTEL_CPU_FAM6(ICELAKE_MOBILE, rapl_defaults_core), INTEL_CPU_FAM6(ICELAKE_DESKTOP, rapl_defaults_core), INTEL_CPU_FAM6(ICELAKE_X, rapl_defaults_hsw_server), + INTEL_CPU_FAM6(ICELAKE_XEON_D, rapl_defaults_hsw_server), INTEL_CPU_FAM6(ATOM_SILVERMONT, rapl_defaults_byt), INTEL_CPU_FAM6(ATOM_AIRMONT, rapl_defaults_cht), From 2e3f45004715085ad53fb0098ae671194157eca4 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Wed, 10 Jul 2019 21:44:38 +0800 Subject: [PATCH 25/27] powercap/rapl: Add Ice Lake NNPI support to RAPL driver Enables support for ICL-NNPI, which is a neural network processor for deep learning inference. From RAPL point of view it is same as Ice Lake Mobile processor. Link: https://lkml.org/lkml/2019/6/5/1034 Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index b624a88b2c25..9fd6dd342169 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -975,6 +975,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { INTEL_CPU_FAM6(CANNONLAKE_MOBILE, rapl_defaults_core), INTEL_CPU_FAM6(ICELAKE_MOBILE, rapl_defaults_core), INTEL_CPU_FAM6(ICELAKE_DESKTOP, rapl_defaults_core), + INTEL_CPU_FAM6(ICELAKE_NNPI, rapl_defaults_core), INTEL_CPU_FAM6(ICELAKE_X, rapl_defaults_hsw_server), INTEL_CPU_FAM6(ICELAKE_XEON_D, rapl_defaults_hsw_server), From 8da04e05cdfc715d414a1c5f8318c03030eb68fb Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 15 Jul 2019 09:56:30 +1000 Subject: [PATCH 26/27] intel_rapl: need linux/cpuhotplug.h for enum cpuhp_state Fixes: 7ebf8eff63b4 ("intel_rapl: introduce struct rapl_if_private") Signed-off-by: Stephen Rothwell Signed-off-by: Rafael J. Wysocki --- include/linux/intel_rapl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h index 0c179d92d110..efb3ce892c20 100644 --- a/include/linux/intel_rapl.h +++ b/include/linux/intel_rapl.h @@ -12,6 +12,7 @@ #include #include +#include enum rapl_domain_type { RAPL_DOMAIN_PACKAGE, /* entire package/socket */ From c4dcc8a162784c1f827c7f6d8409598f19708fe6 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 16 Jul 2019 09:36:08 +0530 Subject: [PATCH 27/27] cpufreq: Make cpufreq_generic_init() return void It always returns 0 (success) and its return type should really be void. Over that, many drivers have added error handling code based on its return value, which is not required at all. Change its return type to void and update all the callers. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/bmips-cpufreq.c | 17 ++++++----------- drivers/cpufreq/cpufreq.c | 4 +--- drivers/cpufreq/davinci-cpufreq.c | 3 ++- drivers/cpufreq/imx6q-cpufreq.c | 6 ++---- drivers/cpufreq/kirkwood-cpufreq.c | 3 ++- drivers/cpufreq/loongson1-cpufreq.c | 8 +++----- drivers/cpufreq/loongson2_cpufreq.c | 3 ++- drivers/cpufreq/maple-cpufreq.c | 3 ++- drivers/cpufreq/omap-cpufreq.c | 15 +++++---------- drivers/cpufreq/pasemi-cpufreq.c | 3 ++- drivers/cpufreq/pmac32-cpufreq.c | 3 ++- drivers/cpufreq/pmac64-cpufreq.c | 3 ++- drivers/cpufreq/s3c2416-cpufreq.c | 9 ++------- drivers/cpufreq/s3c64xx-cpufreq.c | 15 +++------------ drivers/cpufreq/s5pv210-cpufreq.c | 3 ++- drivers/cpufreq/sa1100-cpufreq.c | 3 ++- drivers/cpufreq/sa1110-cpufreq.c | 3 ++- drivers/cpufreq/spear-cpufreq.c | 3 ++- drivers/cpufreq/tegra20-cpufreq.c | 8 +------- include/linux/cpufreq.h | 2 +- 20 files changed, 46 insertions(+), 71 deletions(-) diff --git a/drivers/cpufreq/bmips-cpufreq.c b/drivers/cpufreq/bmips-cpufreq.c index 56a4ebbf00e0..f7c23fa468f0 100644 --- a/drivers/cpufreq/bmips-cpufreq.c +++ b/drivers/cpufreq/bmips-cpufreq.c @@ -131,23 +131,18 @@ static int bmips_cpufreq_exit(struct cpufreq_policy *policy) static int bmips_cpufreq_init(struct cpufreq_policy *policy) { struct cpufreq_frequency_table *freq_table; - int ret; freq_table = bmips_cpufreq_get_freq_table(policy); if (IS_ERR(freq_table)) { - ret = PTR_ERR(freq_table); - pr_err("%s: couldn't determine frequency table (%d).\n", - BMIPS_CPUFREQ_NAME, ret); - return ret; + pr_err("%s: couldn't determine frequency table (%ld).\n", + BMIPS_CPUFREQ_NAME, PTR_ERR(freq_table)); + return PTR_ERR(freq_table); } - ret = cpufreq_generic_init(policy, freq_table, TRANSITION_LATENCY); - if (ret) - bmips_cpufreq_exit(policy); - else - pr_info("%s: registered\n", BMIPS_CPUFREQ_NAME); + cpufreq_generic_init(policy, freq_table, TRANSITION_LATENCY); + pr_info("%s: registered\n", BMIPS_CPUFREQ_NAME); - return ret; + return 0; } static struct cpufreq_driver bmips_cpufreq_driver = { diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 99aa7d20b458..efab334d6ab2 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -162,7 +162,7 @@ EXPORT_SYMBOL_GPL(arch_set_freq_scale); * - set policies transition latency * - policy->cpus with all possible CPUs */ -int cpufreq_generic_init(struct cpufreq_policy *policy, +void cpufreq_generic_init(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table, unsigned int transition_latency) { @@ -174,8 +174,6 @@ int cpufreq_generic_init(struct cpufreq_policy *policy, * share the clock and voltage and clock. */ cpumask_setall(policy->cpus); - - return 0; } EXPORT_SYMBOL_GPL(cpufreq_generic_init); diff --git a/drivers/cpufreq/davinci-cpufreq.c b/drivers/cpufreq/davinci-cpufreq.c index 940fe85db97a..664fa4ab9d1c 100644 --- a/drivers/cpufreq/davinci-cpufreq.c +++ b/drivers/cpufreq/davinci-cpufreq.c @@ -93,7 +93,8 @@ static int davinci_cpu_init(struct cpufreq_policy *policy) * Setting the latency to 2000 us to accommodate addition of drivers * to pre/post change notification list. */ - return cpufreq_generic_init(policy, freq_table, 2000 * 1000); + cpufreq_generic_init(policy, freq_table, 2000 * 1000); + return 0; } static struct cpufreq_driver davinci_driver = { diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 3e17560b1efe..91ea95c97bb2 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -193,14 +193,12 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index) static int imx6q_cpufreq_init(struct cpufreq_policy *policy) { - int ret; - policy->clk = clks[ARM].clk; - ret = cpufreq_generic_init(policy, freq_table, transition_latency); + cpufreq_generic_init(policy, freq_table, transition_latency); policy->suspend_freq = max_freq; dev_pm_opp_of_register_em(policy->cpus); - return ret; + return 0; } static struct cpufreq_driver imx6q_cpufreq_driver = { diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c index 7ab564c1f7ae..cb74bdc5baaa 100644 --- a/drivers/cpufreq/kirkwood-cpufreq.c +++ b/drivers/cpufreq/kirkwood-cpufreq.c @@ -85,7 +85,8 @@ static int kirkwood_cpufreq_target(struct cpufreq_policy *policy, /* Module init and exit code */ static int kirkwood_cpufreq_cpu_init(struct cpufreq_policy *policy) { - return cpufreq_generic_init(policy, kirkwood_freq_table, 5000); + cpufreq_generic_init(policy, kirkwood_freq_table, 5000); + return 0; } static struct cpufreq_driver kirkwood_cpufreq_driver = { diff --git a/drivers/cpufreq/loongson1-cpufreq.c b/drivers/cpufreq/loongson1-cpufreq.c index 21c9ce8526c0..0ea88778882a 100644 --- a/drivers/cpufreq/loongson1-cpufreq.c +++ b/drivers/cpufreq/loongson1-cpufreq.c @@ -81,7 +81,7 @@ static int ls1x_cpufreq_init(struct cpufreq_policy *policy) struct device *cpu_dev = get_cpu_device(policy->cpu); struct cpufreq_frequency_table *freq_tbl; unsigned int pll_freq, freq; - int steps, i, ret; + int steps, i; pll_freq = clk_get_rate(cpufreq->pll_clk) / 1000; @@ -103,11 +103,9 @@ static int ls1x_cpufreq_init(struct cpufreq_policy *policy) freq_tbl[i].frequency = CPUFREQ_TABLE_END; policy->clk = cpufreq->clk; - ret = cpufreq_generic_init(policy, freq_tbl, 0); - if (ret) - kfree(freq_tbl); + cpufreq_generic_init(policy, freq_tbl, 0); - return ret; + return 0; } static int ls1x_cpufreq_exit(struct cpufreq_policy *policy) diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c index da344696beed..890813e0bb76 100644 --- a/drivers/cpufreq/loongson2_cpufreq.c +++ b/drivers/cpufreq/loongson2_cpufreq.c @@ -95,7 +95,8 @@ static int loongson2_cpufreq_cpu_init(struct cpufreq_policy *policy) } policy->clk = cpuclk; - return cpufreq_generic_init(policy, &loongson2_clockmod_table[0], 0); + cpufreq_generic_init(policy, &loongson2_clockmod_table[0], 0); + return 0; } static int loongson2_cpufreq_exit(struct cpufreq_policy *policy) diff --git a/drivers/cpufreq/maple-cpufreq.c b/drivers/cpufreq/maple-cpufreq.c index a94355723ef8..a03cd3ad170f 100644 --- a/drivers/cpufreq/maple-cpufreq.c +++ b/drivers/cpufreq/maple-cpufreq.c @@ -143,7 +143,8 @@ static unsigned int maple_cpufreq_get_speed(unsigned int cpu) static int maple_cpufreq_cpu_init(struct cpufreq_policy *policy) { - return cpufreq_generic_init(policy, maple_cpu_freqs, 12000); + cpufreq_generic_init(policy, maple_cpu_freqs, 12000); + return 0; } static struct cpufreq_driver maple_cpufreq_driver = { diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c index 68052b74d28f..edda20119cfd 100644 --- a/drivers/cpufreq/omap-cpufreq.c +++ b/drivers/cpufreq/omap-cpufreq.c @@ -125,23 +125,18 @@ static int omap_cpu_init(struct cpufreq_policy *policy) dev_err(mpu_dev, "%s: cpu%d: failed creating freq table[%d]\n", __func__, policy->cpu, result); - goto fail; + clk_put(policy->clk); + return result; } } atomic_inc_return(&freq_table_users); /* FIXME: what's the actual transition time? */ - result = cpufreq_generic_init(policy, freq_table, 300 * 1000); - if (!result) { - dev_pm_opp_of_register_em(policy->cpus); - return 0; - } + cpufreq_generic_init(policy, freq_table, 300 * 1000); + dev_pm_opp_of_register_em(policy->cpus); - freq_table_free(); -fail: - clk_put(policy->clk); - return result; + return 0; } static int omap_cpu_exit(struct cpufreq_policy *policy) diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c index 6b1e4abe3248..93f39a1d4c3d 100644 --- a/drivers/cpufreq/pasemi-cpufreq.c +++ b/drivers/cpufreq/pasemi-cpufreq.c @@ -196,7 +196,8 @@ static int pas_cpufreq_cpu_init(struct cpufreq_policy *policy) policy->cur = pas_freqs[cur_astate].frequency; ppc_proc_freq = policy->cur * 1000ul; - return cpufreq_generic_init(policy, pas_freqs, get_gizmo_latency()); + cpufreq_generic_init(policy, pas_freqs, get_gizmo_latency()); + return 0; out_unmap_sdcpwr: iounmap(sdcpwr_mapbase); diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c index 9b4ce2eb8222..bc7fc930294e 100644 --- a/drivers/cpufreq/pmac32-cpufreq.c +++ b/drivers/cpufreq/pmac32-cpufreq.c @@ -376,7 +376,8 @@ static int pmac_cpufreq_target( struct cpufreq_policy *policy, static int pmac_cpufreq_cpu_init(struct cpufreq_policy *policy) { - return cpufreq_generic_init(policy, pmac_cpu_freqs, transition_latency); + cpufreq_generic_init(policy, pmac_cpu_freqs, transition_latency); + return 0; } static u32 read_gpio(struct device_node *np) diff --git a/drivers/cpufreq/pmac64-cpufreq.c b/drivers/cpufreq/pmac64-cpufreq.c index 1d32a863332d..045881494cc9 100644 --- a/drivers/cpufreq/pmac64-cpufreq.c +++ b/drivers/cpufreq/pmac64-cpufreq.c @@ -324,7 +324,8 @@ static unsigned int g5_cpufreq_get_speed(unsigned int cpu) static int g5_cpufreq_cpu_init(struct cpufreq_policy *policy) { - return cpufreq_generic_init(policy, g5_cpu_freqs, transition_latency); + cpufreq_generic_init(policy, g5_cpu_freqs, transition_latency); + return 0; } static struct cpufreq_driver g5_cpufreq_driver = { diff --git a/drivers/cpufreq/s3c2416-cpufreq.c b/drivers/cpufreq/s3c2416-cpufreq.c index 5b2db3c6568f..124a4c68c5ec 100644 --- a/drivers/cpufreq/s3c2416-cpufreq.c +++ b/drivers/cpufreq/s3c2416-cpufreq.c @@ -450,21 +450,16 @@ static int s3c2416_cpufreq_driver_init(struct cpufreq_policy *policy) /* Datasheet says PLL stabalisation time must be at least 300us, * so but add some fudge. (reference in LOCKCON0 register description) */ - ret = cpufreq_generic_init(policy, s3c_freq->freq_table, + cpufreq_generic_init(policy, s3c_freq->freq_table, (500 * 1000) + s3c_freq->regulator_latency); - if (ret) - goto err_freq_table; - register_reboot_notifier(&s3c2416_cpufreq_reboot_notifier); return 0; -err_freq_table: #ifdef CONFIG_ARM_S3C2416_CPUFREQ_VCORESCALE - regulator_put(s3c_freq->vddarm); err_vddarm: -#endif clk_put(s3c_freq->armclk); +#endif err_armclk: clk_put(s3c_freq->hclk); err_hclk: diff --git a/drivers/cpufreq/s3c64xx-cpufreq.c b/drivers/cpufreq/s3c64xx-cpufreq.c index 0cb9040eca49..40aafa8299a0 100644 --- a/drivers/cpufreq/s3c64xx-cpufreq.c +++ b/drivers/cpufreq/s3c64xx-cpufreq.c @@ -147,7 +147,6 @@ static void s3c64xx_cpufreq_config_regulator(void) static int s3c64xx_cpufreq_driver_init(struct cpufreq_policy *policy) { - int ret; struct cpufreq_frequency_table *freq; if (policy->cpu != 0) @@ -168,8 +167,7 @@ static int s3c64xx_cpufreq_driver_init(struct cpufreq_policy *policy) #ifdef CONFIG_REGULATOR vddarm = regulator_get(NULL, "vddarm"); if (IS_ERR(vddarm)) { - ret = PTR_ERR(vddarm); - pr_err("Failed to obtain VDDARM: %d\n", ret); + pr_err("Failed to obtain VDDARM: %ld\n", PTR_ERR(vddarm)); pr_err("Only frequency scaling available\n"); vddarm = NULL; } else { @@ -199,16 +197,9 @@ static int s3c64xx_cpufreq_driver_init(struct cpufreq_policy *policy) * the PLLs, which we don't currently) is ~300us worst case, * but add some fudge. */ - ret = cpufreq_generic_init(policy, s3c64xx_freq_table, + cpufreq_generic_init(policy, s3c64xx_freq_table, (500 * 1000) + regulator_latency); - if (ret != 0) { - pr_err("Failed to configure frequency table: %d\n", - ret); - regulator_put(vddarm); - clk_put(policy->clk); - } - - return ret; + return 0; } static struct cpufreq_driver s3c64xx_cpufreq_driver = { diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c index c7b7d1e65b08..0663cc935fa6 100644 --- a/drivers/cpufreq/s5pv210-cpufreq.c +++ b/drivers/cpufreq/s5pv210-cpufreq.c @@ -544,7 +544,8 @@ static int s5pv210_cpu_init(struct cpufreq_policy *policy) s5pv210_dram_conf[1].freq = clk_get_rate(dmc1_clk); policy->suspend_freq = SLEEP_FREQ; - return cpufreq_generic_init(policy, s5pv210_freq_table, 40000); + cpufreq_generic_init(policy, s5pv210_freq_table, 40000); + return 0; out_dmc1: clk_put(dmc0_clk); diff --git a/drivers/cpufreq/sa1100-cpufreq.c b/drivers/cpufreq/sa1100-cpufreq.c index ab5cab93e638..5c075ef6adc0 100644 --- a/drivers/cpufreq/sa1100-cpufreq.c +++ b/drivers/cpufreq/sa1100-cpufreq.c @@ -181,7 +181,8 @@ static int sa1100_target(struct cpufreq_policy *policy, unsigned int ppcr) static int __init sa1100_cpu_init(struct cpufreq_policy *policy) { - return cpufreq_generic_init(policy, sa11x0_freq_table, 0); + cpufreq_generic_init(policy, sa11x0_freq_table, 0); + return 0; } static struct cpufreq_driver sa1100_driver __refdata = { diff --git a/drivers/cpufreq/sa1110-cpufreq.c b/drivers/cpufreq/sa1110-cpufreq.c index 66e5fb088ecc..1057d7f65118 100644 --- a/drivers/cpufreq/sa1110-cpufreq.c +++ b/drivers/cpufreq/sa1110-cpufreq.c @@ -306,7 +306,8 @@ static int sa1110_target(struct cpufreq_policy *policy, unsigned int ppcr) static int __init sa1110_cpu_init(struct cpufreq_policy *policy) { - return cpufreq_generic_init(policy, sa11x0_freq_table, 0); + cpufreq_generic_init(policy, sa11x0_freq_table, 0); + return 0; } /* sa1110_driver needs __refdata because it must remain after init registers diff --git a/drivers/cpufreq/spear-cpufreq.c b/drivers/cpufreq/spear-cpufreq.c index 4074e2615522..73bd8dc47074 100644 --- a/drivers/cpufreq/spear-cpufreq.c +++ b/drivers/cpufreq/spear-cpufreq.c @@ -153,8 +153,9 @@ static int spear_cpufreq_target(struct cpufreq_policy *policy, static int spear_cpufreq_init(struct cpufreq_policy *policy) { policy->clk = spear_cpufreq.clk; - return cpufreq_generic_init(policy, spear_cpufreq.freq_tbl, + cpufreq_generic_init(policy, spear_cpufreq.freq_tbl, spear_cpufreq.transition_latency); + return 0; } static struct cpufreq_driver spear_cpufreq_driver = { diff --git a/drivers/cpufreq/tegra20-cpufreq.c b/drivers/cpufreq/tegra20-cpufreq.c index 3c32cc7b0671..f84ecd22f488 100644 --- a/drivers/cpufreq/tegra20-cpufreq.c +++ b/drivers/cpufreq/tegra20-cpufreq.c @@ -118,17 +118,11 @@ static int tegra_target(struct cpufreq_policy *policy, unsigned int index) static int tegra_cpu_init(struct cpufreq_policy *policy) { struct tegra20_cpufreq *cpufreq = cpufreq_get_driver_data(); - int ret; clk_prepare_enable(cpufreq->cpu_clk); /* FIXME: what's the actual transition time? */ - ret = cpufreq_generic_init(policy, freq_table, 300 * 1000); - if (ret) { - clk_disable_unprepare(cpufreq->cpu_clk); - return ret; - } - + cpufreq_generic_init(policy, freq_table, 300 * 1000); policy->clk = cpufreq->cpu_clk; policy->suspend_freq = freq_table[0].frequency; return 0; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index afc683021ac5..441ff15b7768 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -995,7 +995,7 @@ extern struct freq_attr *cpufreq_generic_attr[]; int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy); unsigned int cpufreq_generic_get(unsigned int cpu); -int cpufreq_generic_init(struct cpufreq_policy *policy, +void cpufreq_generic_init(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table, unsigned int transition_latency); #endif /* _LINUX_CPUFREQ_H */