drm/amdgpu/ras: fix and update the documentation for RAS
Add new sections to amdgpu.rst, fix up formatting issues, add additional documentation to each section. Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
a667b75c1e
commit
f77c7109c0
|
@ -79,12 +79,32 @@ AMDGPU XGMI Support
|
||||||
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
|
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
|
||||||
:internal:
|
:internal:
|
||||||
|
|
||||||
AMDGPU RAS debugfs control interface
|
AMDGPU RAS Support
|
||||||
====================================
|
==================
|
||||||
|
|
||||||
|
RAS debugfs/sysfs Control and Error Injection Interfaces
|
||||||
|
--------------------------------------------------------
|
||||||
|
|
||||||
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
|
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
|
||||||
:doc: AMDGPU RAS debugfs control interface
|
:doc: AMDGPU RAS debugfs control interface
|
||||||
|
|
||||||
|
RAS Error Count sysfs Interface
|
||||||
|
-------------------------------
|
||||||
|
|
||||||
|
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
|
||||||
|
:doc: AMDGPU RAS sysfs Error Count Interface
|
||||||
|
|
||||||
|
RAS EEPROM debugfs Interface
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
|
||||||
|
:doc: AMDGPU RAS debugfs EEPROM table reset interface
|
||||||
|
|
||||||
|
RAS VRAM Bad Pages sysfs Interface
|
||||||
|
----------------------------------
|
||||||
|
|
||||||
|
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
|
||||||
|
:doc: AMDGPU RAS sysfs gpu_vram_bad_pages Interface
|
||||||
|
|
||||||
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
|
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
|
||||||
:internal:
|
:internal:
|
||||||
|
|
|
@ -310,7 +310,18 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
|
||||||
/**
|
/**
|
||||||
* DOC: AMDGPU RAS debugfs EEPROM table reset interface
|
* DOC: AMDGPU RAS debugfs EEPROM table reset interface
|
||||||
*
|
*
|
||||||
* Usage: echo 1 > ../ras/ras_eeprom_reset will reset EEPROM table to 0 entries.
|
* Some boards contain an EEPROM which is used to persistently store a list of
|
||||||
|
* bad pages containing ECC errors detected in vram. This interface provides
|
||||||
|
* a way to reset the EEPROM, e.g., after testing error injection.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
*
|
||||||
|
* .. code-block:: bash
|
||||||
|
*
|
||||||
|
* echo 1 > ../ras/ras_eeprom_reset
|
||||||
|
*
|
||||||
|
* will reset EEPROM table to 0 entries.
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f, const char __user *buf,
|
static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f, const char __user *buf,
|
||||||
size_t size, loff_t *pos)
|
size_t size, loff_t *pos)
|
||||||
|
@ -337,6 +348,27 @@ static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = {
|
||||||
.llseek = default_llseek
|
.llseek = default_llseek
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DOC: AMDGPU RAS sysfs Error Count Interface
|
||||||
|
*
|
||||||
|
* It allows user to read the error count for each IP block on the gpu through
|
||||||
|
* /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
|
||||||
|
*
|
||||||
|
* It outputs the multiple lines which report the uncorrected (ue) and corrected
|
||||||
|
* (ce) error counts.
|
||||||
|
*
|
||||||
|
* The format of one line is below,
|
||||||
|
*
|
||||||
|
* [ce|ue]: count
|
||||||
|
*
|
||||||
|
* Example:
|
||||||
|
*
|
||||||
|
* .. code-block:: bash
|
||||||
|
*
|
||||||
|
* ue: 0
|
||||||
|
* ce: 1
|
||||||
|
*
|
||||||
|
*/
|
||||||
static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
|
static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
|
||||||
struct device_attribute *attr, char *buf)
|
struct device_attribute *attr, char *buf)
|
||||||
{
|
{
|
||||||
|
@ -781,8 +813,8 @@ static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/**
|
||||||
* DOC: ras sysfs gpu_vram_bad_pages interface
|
* DOC: AMDGPU RAS sysfs gpu_vram_bad_pages Interface
|
||||||
*
|
*
|
||||||
* It allows user to read the bad pages of vram on the gpu through
|
* It allows user to read the bad pages of vram on the gpu through
|
||||||
* /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
|
* /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
|
||||||
|
@ -794,14 +826,21 @@ static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
|
||||||
*
|
*
|
||||||
* gpu pfn and gpu page size are printed in hex format.
|
* gpu pfn and gpu page size are printed in hex format.
|
||||||
* flags can be one of below character,
|
* flags can be one of below character,
|
||||||
|
*
|
||||||
* R: reserved, this gpu page is reserved and not able to use.
|
* R: reserved, this gpu page is reserved and not able to use.
|
||||||
|
*
|
||||||
* P: pending for reserve, this gpu page is marked as bad, will be reserved
|
* P: pending for reserve, this gpu page is marked as bad, will be reserved
|
||||||
* in next window of page_reserve.
|
* in next window of page_reserve.
|
||||||
|
*
|
||||||
* F: unable to reserve. this gpu page can't be reserved due to some reasons.
|
* F: unable to reserve. this gpu page can't be reserved due to some reasons.
|
||||||
*
|
*
|
||||||
* examples:
|
* Examples:
|
||||||
|
*
|
||||||
|
* .. code-block:: bash
|
||||||
|
*
|
||||||
* 0x00000001 : 0x00001000 : R
|
* 0x00000001 : 0x00001000 : R
|
||||||
* 0x00000002 : 0x00001000 : P
|
* 0x00000002 : 0x00001000 : P
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
|
static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
|
||||||
|
|
Loading…
Reference in New Issue