mirror of https://gitee.com/openkylin/linux.git
drm/amdkfd: Add thermal throttling SMI event
Add support for reporting thermal throttling events through SMI. Also, add a counter to count the number of throttling interrupts observed and report the count in the SMI event message. Signed-off-by: Mukul Joshi <mukul.joshi@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
df9c8d1aa2
commit
2c2b0d880f
|
@ -789,4 +789,8 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
|
|||
void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
|
||||
{
|
||||
}
|
||||
|
||||
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -270,5 +270,6 @@ int kgd2kfd_resume_mm(struct mm_struct *mm);
|
|||
int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
|
||||
struct dma_fence *fence);
|
||||
void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
|
||||
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask);
|
||||
|
||||
#endif /* AMDGPU_AMDKFD_H_INCLUDED */
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include "cwsr_trap_handler.h"
|
||||
#include "kfd_iommu.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
#include "kfd_smi_events.h"
|
||||
|
||||
#define MQD_SIZE_ALIGNED 768
|
||||
|
||||
|
@ -1245,6 +1246,12 @@ void kfd_dec_compute_active(struct kfd_dev *kfd)
|
|||
WARN_ONCE(count < 0, "Compute profile ref. count error");
|
||||
}
|
||||
|
||||
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
|
||||
{
|
||||
if (kfd)
|
||||
kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
|
||||
/* This function will send a package to HIQ to hang the HWS
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include <linux/wait.h>
|
||||
#include <linux/anon_inodes.h>
|
||||
#include <uapi/linux/kfd_ioctl.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_vm.h"
|
||||
#include "kfd_priv.h"
|
||||
#include "kfd_smi_events.h"
|
||||
|
@ -148,6 +149,54 @@ static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void add_event_to_kfifo(struct kfd_dev *dev, unsigned long long smi_event,
|
||||
char *event_msg, int len)
|
||||
{
|
||||
struct kfd_smi_client *client;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
list_for_each_entry_rcu(client, &dev->smi_clients, list) {
|
||||
if (!(READ_ONCE(client->events) & smi_event))
|
||||
continue;
|
||||
spin_lock(&client->lock);
|
||||
if (kfifo_avail(&client->fifo) >= len) {
|
||||
kfifo_in(&client->fifo, event_msg, len);
|
||||
wake_up_all(&client->wait_queue);
|
||||
} else {
|
||||
pr_debug("smi_event(EventID: %llu): no space left\n",
|
||||
smi_event);
|
||||
}
|
||||
spin_unlock(&client->lock);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
|
||||
uint32_t throttle_bitmask)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
|
||||
/*
|
||||
* ThermalThrottle msg = throttle_bitmask(8):
|
||||
* thermal_interrupt_count(16):
|
||||
* 16 bytes event + 1 byte space + 8 byte throttle_bitmask +
|
||||
* 1 byte : + 16 byte thermal_interupt_counter + 1 byte \n +
|
||||
* 1 byte \0 = 44
|
||||
*/
|
||||
char fifo_in[44];
|
||||
int len;
|
||||
|
||||
if (list_empty(&dev->smi_clients))
|
||||
return;
|
||||
|
||||
len = snprintf(fifo_in, 44, "%x %x:%llx\n",
|
||||
KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
|
||||
atomic64_read(&adev->smu.throttle_int_counter));
|
||||
|
||||
add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, fifo_in, len);
|
||||
}
|
||||
|
||||
void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;
|
||||
|
@ -156,7 +205,6 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
|
|||
/* 16 bytes event + 1 byte space + 25 bytes msg + 1 byte \n = 43
|
||||
*/
|
||||
char fifo_in[43];
|
||||
struct kfd_smi_client *client;
|
||||
int len;
|
||||
|
||||
if (list_empty(&dev->smi_clients))
|
||||
|
@ -171,22 +219,7 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
|
|||
len = snprintf(fifo_in, 43, "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
|
||||
task_info.pid, task_info.task_name);
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
list_for_each_entry_rcu(client, &dev->smi_clients, list) {
|
||||
if (!(READ_ONCE(client->events) & KFD_SMI_EVENT_VMFAULT))
|
||||
continue;
|
||||
spin_lock(&client->lock);
|
||||
if (kfifo_avail(&client->fifo) >= len) {
|
||||
kfifo_in(&client->fifo, fifo_in, len);
|
||||
wake_up_all(&client->wait_queue);
|
||||
}
|
||||
else
|
||||
pr_debug("smi_event(vmfault): no space left\n");
|
||||
spin_unlock(&client->lock);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
add_event_to_kfifo(dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len);
|
||||
}
|
||||
|
||||
int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
|
||||
|
|
|
@ -25,5 +25,7 @@
|
|||
|
||||
int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd);
|
||||
void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
|
||||
void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
|
||||
uint32_t throttle_bitmask);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -640,6 +640,7 @@ static int smu_sw_init(void *handle)
|
|||
mutex_init(&smu->message_lock);
|
||||
|
||||
INIT_WORK(&smu->throttling_logging_work, smu_throttling_logging_work_fn);
|
||||
atomic64_set(&smu->throttle_int_counter, 0);
|
||||
smu->watermarks_bitmap = 0;
|
||||
smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
|
||||
smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
|
||||
|
|
|
@ -2251,6 +2251,7 @@ static void arcturus_log_thermal_throttling_event(struct smu_context *smu)
|
|||
|
||||
dev_warn(adev->dev, "WARN: GPU thermal throttling temperature reached, expect performance decrease. %s.\n",
|
||||
log_buf);
|
||||
kgd2kfd_smi_event_throttle(smu->adev->kfd.dev, throttler_status);
|
||||
}
|
||||
|
||||
static const struct pptable_funcs arcturus_ppt_funcs = {
|
||||
|
|
|
@ -446,6 +446,7 @@ struct smu_context
|
|||
bool dc_controlled_by_gpio;
|
||||
|
||||
struct work_struct throttling_logging_work;
|
||||
atomic64_t throttle_int_counter;
|
||||
};
|
||||
|
||||
struct i2c_adapter;
|
||||
|
|
|
@ -1311,6 +1311,11 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev,
|
|||
smu_v11_0_ack_ac_dc_interrupt(&adev->smu);
|
||||
break;
|
||||
case 0x7:
|
||||
/*
|
||||
* Increment the throttle interrupt counter
|
||||
*/
|
||||
atomic64_inc(&smu->throttle_int_counter);
|
||||
|
||||
if (!atomic_read(&adev->throttling_logging_enabled))
|
||||
return 0;
|
||||
|
||||
|
|
|
@ -450,7 +450,8 @@ struct kfd_ioctl_import_dmabuf_args {
|
|||
* KFD SMI(System Management Interface) events
|
||||
*/
|
||||
/* Event type (defined by bitmask) */
|
||||
#define KFD_SMI_EVENT_VMFAULT 0x0000000000000001
|
||||
#define KFD_SMI_EVENT_VMFAULT 0x0000000000000001
|
||||
#define KFD_SMI_EVENT_THERMAL_THROTTLE 0x0000000000000002
|
||||
|
||||
struct kfd_ioctl_smi_events_args {
|
||||
__u32 gpuid; /* to KFD */
|
||||
|
|
Loading…
Reference in New Issue